jekyll-import 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll-import.rb +10 -8
- data/lib/jekyll-import/importer.rb +1 -1
- data/lib/jekyll-import/importers.rb +1 -1
- data/lib/jekyll-import/importers/behance.rb +20 -20
- data/lib/jekyll-import/importers/blogger.rb +108 -118
- data/lib/jekyll-import/importers/csv.rb +7 -7
- data/lib/jekyll-import/importers/drupal6.rb +5 -6
- data/lib/jekyll-import/importers/drupal7.rb +7 -13
- data/lib/jekyll-import/importers/drupal_common.rb +57 -59
- data/lib/jekyll-import/importers/easyblog.rb +30 -30
- data/lib/jekyll-import/importers/enki.rb +28 -29
- data/lib/jekyll-import/importers/ghost.rb +46 -33
- data/lib/jekyll-import/importers/google_reader.rb +9 -9
- data/lib/jekyll-import/importers/joomla.rb +32 -32
- data/lib/jekyll-import/importers/joomla3.rb +41 -39
- data/lib/jekyll-import/importers/jrnl.rb +16 -17
- data/lib/jekyll-import/importers/marley.rb +25 -26
- data/lib/jekyll-import/importers/mephisto.rb +26 -26
- data/lib/jekyll-import/importers/mt.rb +76 -75
- data/lib/jekyll-import/importers/posterous.rb +30 -29
- data/lib/jekyll-import/importers/rss.rb +13 -10
- data/lib/jekyll-import/importers/s9y.rb +16 -17
- data/lib/jekyll-import/importers/s9y_database.rb +98 -89
- data/lib/jekyll-import/importers/textpattern.rb +18 -17
- data/lib/jekyll-import/importers/tmp.rb +0 -0
- data/lib/jekyll-import/importers/tumblr.rb +146 -143
- data/lib/jekyll-import/importers/typo.rb +31 -31
- data/lib/jekyll-import/importers/wordpress.rb +100 -100
- data/lib/jekyll-import/importers/wordpressdotcom.rb +70 -60
- data/lib/jekyll-import/util.rb +24 -24
- data/lib/jekyll-import/version.rb +1 -1
- data/lib/jekyll/commands/import.rb +32 -35
- metadata +14 -13
@@ -12,31 +12,32 @@ module JekyllImport
|
|
12
12
|
Keywords \
|
13
13
|
FROM textpattern \
|
14
14
|
WHERE Status = '4' OR \
|
15
|
-
Status = '5'"
|
15
|
+
Status = '5'".freeze
|
16
16
|
|
17
17
|
def self.require_deps
|
18
|
-
JekyllImport.require_with_fallback(%w
|
18
|
+
JekyllImport.require_with_fallback(%w(
|
19
19
|
rubygems
|
20
20
|
sequel
|
21
|
+
mysql2
|
21
22
|
fileutils
|
22
23
|
safe_yaml
|
23
|
-
|
24
|
+
))
|
24
25
|
end
|
25
26
|
|
26
27
|
def self.specify_options(c)
|
27
|
-
c.option
|
28
|
-
c.option
|
29
|
-
c.option
|
30
|
-
c.option
|
28
|
+
c.option "dbname", "--dbname DB", "Database name"
|
29
|
+
c.option "user", "--user USER", "Database user name"
|
30
|
+
c.option "password", "--password PW", "Database user's password"
|
31
|
+
c.option "host", "--host HOST", 'Database host name (default: "localhost")'
|
31
32
|
end
|
32
33
|
|
33
34
|
def self.process(options)
|
34
|
-
dbname = options.fetch(
|
35
|
-
user = options.fetch(
|
36
|
-
pass = options.fetch(
|
37
|
-
host = options.fetch(
|
35
|
+
dbname = options.fetch("dbname")
|
36
|
+
user = options.fetch("user")
|
37
|
+
pass = options.fetch("password", "")
|
38
|
+
host = options.fetch("host", "localhost")
|
38
39
|
|
39
|
-
db = Sequel.
|
40
|
+
db = Sequel.mysql2(dbname, :user => user, :password => pass, :host => host, :encoding => "utf8")
|
40
41
|
|
41
42
|
FileUtils.mkdir_p "_posts"
|
42
43
|
|
@@ -47,15 +48,15 @@ module JekyllImport
|
|
47
48
|
date = post[:Posted]
|
48
49
|
content = post[:Body]
|
49
50
|
|
50
|
-
name = [date.strftime("%Y-%m-%d"), slug].join(
|
51
|
+
name = [date.strftime("%Y-%m-%d"), slug].join("-") + ".textile"
|
51
52
|
|
52
53
|
# Get the relevant fields as a hash, delete empty fields and convert
|
53
54
|
# to YAML for the header.
|
54
55
|
data = {
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
"layout" => "post",
|
57
|
+
"title" => title.to_s,
|
58
|
+
"tags" => post[:Keywords].split(","),
|
59
|
+
}.delete_if { |_k, v| v.nil? || v == "" }.to_yaml
|
59
60
|
|
60
61
|
# Write out the data and content to file.
|
61
62
|
File.open("_posts/#{name}", "w") do |f|
|
File without changes
|
@@ -2,7 +2,7 @@ module JekyllImport
|
|
2
2
|
module Importers
|
3
3
|
class Tumblr < Importer
|
4
4
|
def self.require_deps
|
5
|
-
JekyllImport.require_with_fallback(%w
|
5
|
+
JekyllImport.require_with_fallback(%w(
|
6
6
|
rubygems
|
7
7
|
fileutils
|
8
8
|
open-uri
|
@@ -11,23 +11,23 @@ module JekyllImport
|
|
11
11
|
uri
|
12
12
|
time
|
13
13
|
jekyll
|
14
|
-
|
14
|
+
))
|
15
15
|
end
|
16
16
|
|
17
17
|
def self.specify_options(c)
|
18
|
-
c.option
|
19
|
-
c.option
|
20
|
-
c.option
|
21
|
-
c.option
|
22
|
-
c.option
|
18
|
+
c.option "url", "--url URL", "Tumblr URL"
|
19
|
+
c.option "format", "--format FORMAT", 'Output format (default: "html")'
|
20
|
+
c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
|
21
|
+
c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
|
22
|
+
c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
|
23
23
|
end
|
24
24
|
|
25
25
|
def self.process(options)
|
26
|
-
url = options.fetch(
|
27
|
-
format = options.fetch(
|
28
|
-
grab_images = options.fetch(
|
29
|
-
add_highlights = options.fetch(
|
30
|
-
rewrite_urls = options.fetch(
|
26
|
+
url = options.fetch("url")
|
27
|
+
format = options.fetch("format", "html")
|
28
|
+
grab_images = options.fetch("grab_images", false)
|
29
|
+
add_highlights = options.fetch("add_highlights", false)
|
30
|
+
rewrite_urls = options.fetch("rewrite_urls", false)
|
31
31
|
|
32
32
|
@grab_images = grab_images
|
33
33
|
FileUtils.mkdir_p "_posts/tumblr"
|
@@ -51,52 +51,51 @@ module JekyllImport
|
|
51
51
|
if rewrite_urls
|
52
52
|
posts += batch
|
53
53
|
else
|
54
|
-
batch.each {|post| write_post(post, format == "md", add_highlights)}
|
54
|
+
batch.each { |post| write_post(post, format == "md", add_highlights) }
|
55
55
|
end
|
56
|
-
|
57
56
|
end until blog["posts"].size < per_page
|
58
57
|
|
59
58
|
# Rewrite URLs, create redirects and write out out posts if necessary
|
60
59
|
if rewrite_urls
|
61
60
|
posts = rewrite_urls_and_redirects posts
|
62
|
-
posts.each {|post| write_post(post, format == "md", add_highlights)}
|
61
|
+
posts.each { |post| write_post(post, format == "md", add_highlights) }
|
63
62
|
end
|
64
63
|
end
|
65
64
|
|
66
65
|
private
|
66
|
+
class << self
|
67
|
+
def extract_json(contents)
|
68
|
+
beginning = contents.index("{")
|
69
|
+
ending = contents.rindex("}") + 1
|
70
|
+
json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
|
71
|
+
JSON.parse(json)
|
72
|
+
end
|
67
73
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
|
72
|
-
blog = JSON.parse(json)
|
73
|
-
end
|
74
|
-
|
75
|
-
# Writes a post out to disk
|
76
|
-
def self.write_post(post, use_markdown, add_highlights)
|
77
|
-
content = post[:content]
|
74
|
+
# Writes a post out to disk
|
75
|
+
def write_post(post, use_markdown, add_highlights)
|
76
|
+
content = post[:content]
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
78
|
+
if content
|
79
|
+
if use_markdown
|
80
|
+
content = html_to_markdown content
|
81
|
+
if add_highlights
|
82
|
+
tumblr_url = URI.parse(post[:slug]).path
|
83
|
+
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
84
|
+
FileUtils.mkdir_p redirect_dir
|
85
|
+
content = add_syntax_highlights(content, redirect_dir)
|
86
|
+
end
|
87
87
|
end
|
88
|
-
end
|
89
88
|
|
90
|
-
|
91
|
-
|
89
|
+
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
|
90
|
+
f.puts post[:header].to_yaml + "---\n" + content
|
91
|
+
end
|
92
92
|
end
|
93
93
|
end
|
94
|
-
end
|
95
94
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
95
|
+
# Converts each type of Tumblr post to a hash with all required
|
96
|
+
# data for Jekyll.
|
97
|
+
def post_to_hash(post, format)
|
98
|
+
case post["type"]
|
100
99
|
when "regular"
|
101
100
|
title = post["regular-title"]
|
102
101
|
content = post["regular-body"]
|
@@ -107,7 +106,7 @@ module JekyllImport
|
|
107
106
|
content << "<br/>" + post["link-description"]
|
108
107
|
end
|
109
108
|
when "photo"
|
110
|
-
title = post["slug"].
|
109
|
+
title = post["slug"].tr("-", " ")
|
111
110
|
if post["photos"].size > 1
|
112
111
|
content = ""
|
113
112
|
post["photos"].each do |post_photo|
|
@@ -137,7 +136,7 @@ module JekyllImport
|
|
137
136
|
title = post["conversation-title"]
|
138
137
|
content = "<section><dialog>"
|
139
138
|
post["conversation"].each do |line|
|
140
|
-
content << "<dt>#{line[
|
139
|
+
content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
|
141
140
|
end
|
142
141
|
content << "</dialog></section>"
|
143
142
|
when "video"
|
@@ -153,123 +152,126 @@ module JekyllImport
|
|
153
152
|
when "answer"
|
154
153
|
title = post["question"]
|
155
154
|
content = post["answer"]
|
155
|
+
end
|
156
|
+
date = Date.parse(post["date"]).to_s
|
157
|
+
title = Nokogiri::HTML(title).text
|
158
|
+
title = "no title" if title.empty?
|
159
|
+
slug = if post["slug"] && post["slug"].strip != ""
|
160
|
+
post["slug"]
|
161
|
+
elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
|
162
|
+
slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
|
163
|
+
slug.length > 200 ? slug.slice(0..200) : slug
|
164
|
+
else
|
165
|
+
post["id"]
|
166
|
+
end
|
167
|
+
{
|
168
|
+
:name => "#{date}-#{slug}.#{format}",
|
169
|
+
:header => {
|
170
|
+
"layout" => "post",
|
171
|
+
"title" => title,
|
172
|
+
"date" => Time.parse(post["date"]).xmlschema,
|
173
|
+
"tags" => (post["tags"] || []),
|
174
|
+
"tumblr_url" => post["url-with-slug"],
|
175
|
+
},
|
176
|
+
:content => content,
|
177
|
+
:url => post["url"],
|
178
|
+
:slug => post["url-with-slug"],
|
179
|
+
}
|
156
180
|
end
|
157
|
-
date = Date.parse(post['date']).to_s
|
158
|
-
title = Nokogiri::HTML(title).text
|
159
|
-
title = "no title" if title.empty?
|
160
|
-
slug = if post["slug"] && post["slug"].strip != ""
|
161
|
-
post["slug"]
|
162
|
-
elsif title && title.downcase.gsub(/[^a-z0-9\-]/, '') != '' && title != 'no title'
|
163
|
-
slug = title.downcase.strip.gsub(' ', '-').gsub(/[^a-z0-9\-]/, '')
|
164
|
-
slug.length > 200 ? slug.slice(0..200) : slug
|
165
|
-
else
|
166
|
-
slug = post['id']
|
167
|
-
end
|
168
|
-
{
|
169
|
-
:name => "#{date}-#{slug}.#{format}",
|
170
|
-
:header => {
|
171
|
-
"layout" => "post",
|
172
|
-
"title" => title,
|
173
|
-
"date" => Time.parse(post['date']).xmlschema,
|
174
|
-
"tags" => (post["tags"] or []),
|
175
|
-
"tumblr_url" => post["url-with-slug"]
|
176
|
-
},
|
177
|
-
:content => content,
|
178
|
-
:url => post["url"],
|
179
|
-
:slug => post["url-with-slug"],
|
180
|
-
}
|
181
|
-
end
|
182
181
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
182
|
+
# Attempts to fetch the largest version of a photo available for a post.
|
183
|
+
# If that file fails, it tries the next smaller size until all available
|
184
|
+
# photo URLs are exhausted. If they all fail, the import is aborted.
|
185
|
+
def fetch_photo(post)
|
186
|
+
sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
|
187
|
+
sizes.sort! { |a, b| b <=> a }
|
189
188
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
189
|
+
_ext_key, ext_val = post.find do |k, v|
|
190
|
+
k =~ %r!^photo-url-! && v.split("/").last =~ %r!\.!
|
191
|
+
end
|
192
|
+
ext = "." + ext_val.split(".").last
|
194
193
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
194
|
+
sizes.each do |size|
|
195
|
+
url = post["photo-url"] || post["photo-url-#{size}"]
|
196
|
+
next if url.nil?
|
197
|
+
begin
|
198
|
+
return "<img src=\"#{save_photo(url, ext)}\"/>"
|
199
|
+
rescue OpenURI::HTTPError
|
200
|
+
puts "Failed to grab photo"
|
201
|
+
end
|
202
202
|
end
|
203
|
+
|
204
|
+
abort "Failed to fetch photo for post #{post["url"]}"
|
203
205
|
end
|
204
206
|
|
205
|
-
|
206
|
-
|
207
|
+
# Create a Hash of old urls => new urls, for rewriting and
|
208
|
+
# redirects, and replace urls in each post. Instantiate Jekyll
|
209
|
+
# site/posts to get the correct permalink format.
|
210
|
+
def rewrite_urls_and_redirects(posts)
|
211
|
+
site = Jekyll::Site.new(Jekyll.configuration({}))
|
212
|
+
urls = Hash[posts.map do |post|
|
213
|
+
# Create an initial empty file for the post so that
|
214
|
+
# we can instantiate a post object.
|
215
|
+
File.write("_posts/tumblr/#{post[:name]}", "")
|
216
|
+
tumblr_url = URI.parse(URI.encode(post[:slug])).path
|
217
|
+
jekyll_url = if Jekyll.const_defined? :Post
|
218
|
+
Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
|
219
|
+
else
|
220
|
+
Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), :site => site, :collection => site.posts).url
|
221
|
+
end
|
222
|
+
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
223
|
+
FileUtils.mkdir_p redirect_dir
|
224
|
+
File.open(redirect_dir + "index.html", "w") do |f|
|
225
|
+
f.puts "<html><head><link rel=\"canonical\" href=\"" \
|
226
|
+
"#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " \
|
227
|
+
"url=#{jekyll_url}\"></head><body></body></html>"
|
228
|
+
end
|
229
|
+
[tumblr_url, jekyll_url]
|
230
|
+
end]
|
231
|
+
posts.map do |post|
|
232
|
+
urls.each do |tumblr_url, jekyll_url|
|
233
|
+
post[:content].gsub!(%r!#{tumblr_url}!i, jekyll_url)
|
234
|
+
end
|
235
|
+
post
|
236
|
+
end
|
237
|
+
end
|
207
238
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
# Create an initial empty file for the post so that
|
215
|
-
# we can instantiate a post object.
|
216
|
-
File.write("_posts/tumblr/#{post[:name]}", "")
|
217
|
-
tumblr_url = URI.parse(URI.encode(post[:slug])).path
|
218
|
-
jekyll_url = if Jekyll.const_defined? :Post
|
219
|
-
Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
|
220
|
-
else
|
221
|
-
Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), site: site, collection: site.posts).url
|
222
|
-
end
|
223
|
-
redirect_dir = tumblr_url.sub(/\//, "") + "/"
|
224
|
-
FileUtils.mkdir_p redirect_dir
|
225
|
-
File.open(redirect_dir + "index.html", "w") do |f|
|
226
|
-
f.puts "<html><head><link rel=\"canonical\" href=\"" +
|
227
|
-
"#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
|
228
|
-
"url=#{jekyll_url}\"></head><body></body></html>"
|
239
|
+
# Convert preserving HTML tables as per the markdown docs.
|
240
|
+
def html_to_markdown(content)
|
241
|
+
preserve = %w(table tr th td)
|
242
|
+
preserve.each do |tag|
|
243
|
+
content.gsub!(%r!<#{tag}!i, "$$" + tag)
|
244
|
+
content.gsub!(%r!<\/#{tag}!i, "||" + tag)
|
229
245
|
end
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
|
246
|
+
content = Nokogiri::HTML(content.gsub("'", "''")).text
|
247
|
+
preserve.each do |tag|
|
248
|
+
content.gsub!("$$" + tag, "<" + tag)
|
249
|
+
content.gsub!("||" + tag, "</" + tag)
|
235
250
|
end
|
236
|
-
|
237
|
-
}
|
238
|
-
end
|
239
|
-
|
240
|
-
# Convert preserving HTML tables as per the markdown docs.
|
241
|
-
def self.html_to_markdown(content)
|
242
|
-
preserve = ["table", "tr", "th", "td"]
|
243
|
-
preserve.each do |tag|
|
244
|
-
content.gsub!(/<#{tag}/i, "$$" + tag)
|
245
|
-
content.gsub!(/<\/#{tag}/i, "||" + tag)
|
251
|
+
content
|
246
252
|
end
|
247
|
-
content = Nokogiri::HTML(content.gsub("'", "''")).text
|
248
|
-
preserve.each do |tag|
|
249
|
-
content.gsub!("$$" + tag, "<" + tag)
|
250
|
-
content.gsub!("||" + tag, "</" + tag)
|
251
|
-
end
|
252
|
-
content
|
253
|
-
end
|
254
253
|
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
254
|
+
# Adds pygments highlight tags to code blocks in posts that use
|
255
|
+
# markdown format. This doesn't guess the language of the code
|
256
|
+
# block, so you should modify this to suit your own content.
|
257
|
+
# For example, my code block only contain Python and JavaScript,
|
258
|
+
# so I can assume the block is JavaScript if it contains a
|
259
|
+
# semi-colon.
|
260
|
+
def add_syntax_highlights(content, redirect_dir)
|
262
261
|
lines = content.split("\n")
|
263
|
-
block
|
262
|
+
block = false
|
263
|
+
indent = %r!^ !
|
264
|
+
lang = nil
|
265
|
+
start = nil
|
264
266
|
lines.each_with_index do |line, i|
|
265
267
|
if !block && line =~ indent
|
266
268
|
block = true
|
267
269
|
lang = "python"
|
268
270
|
start = i
|
269
271
|
elsif block
|
270
|
-
lang = "javascript" if line =~
|
272
|
+
lang = "javascript" if line =~ %r!;$!
|
271
273
|
block = line =~ indent && i < lines.size - 1 # Also handle EOF
|
272
|
-
|
274
|
+
unless block
|
273
275
|
lines[start] = "{% highlight #{lang} %}"
|
274
276
|
lines[i - 1] = "{% endhighlight %}"
|
275
277
|
end
|
@@ -280,10 +282,10 @@ module JekyllImport
|
|
280
282
|
lines.join("\n")
|
281
283
|
end
|
282
284
|
|
283
|
-
|
285
|
+
def save_photo(url, ext)
|
284
286
|
if @grab_images
|
285
|
-
path = "tumblr_files/#{url.split(
|
286
|
-
path += ext unless path =~
|
287
|
+
path = "tumblr_files/#{url.split("/").last}"
|
288
|
+
path += ext unless path =~ %r!#{ext}$!
|
287
289
|
FileUtils.mkdir_p "tumblr_files"
|
288
290
|
|
289
291
|
# Don't fetch if we've already cached this file
|
@@ -295,6 +297,7 @@ module JekyllImport
|
|
295
297
|
end
|
296
298
|
url
|
297
299
|
end
|
300
|
+
end
|
298
301
|
end
|
299
302
|
end
|
300
303
|
end
|