jekyll-import 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-import.rb +10 -8
- data/lib/jekyll-import/importer.rb +1 -1
- data/lib/jekyll-import/importers.rb +1 -1
- data/lib/jekyll-import/importers/behance.rb +20 -20
- data/lib/jekyll-import/importers/blogger.rb +108 -118
- data/lib/jekyll-import/importers/csv.rb +7 -7
- data/lib/jekyll-import/importers/drupal6.rb +5 -6
- data/lib/jekyll-import/importers/drupal7.rb +7 -13
- data/lib/jekyll-import/importers/drupal_common.rb +57 -59
- data/lib/jekyll-import/importers/easyblog.rb +30 -30
- data/lib/jekyll-import/importers/enki.rb +28 -29
- data/lib/jekyll-import/importers/ghost.rb +46 -33
- data/lib/jekyll-import/importers/google_reader.rb +9 -9
- data/lib/jekyll-import/importers/joomla.rb +32 -32
- data/lib/jekyll-import/importers/joomla3.rb +41 -39
- data/lib/jekyll-import/importers/jrnl.rb +16 -17
- data/lib/jekyll-import/importers/marley.rb +25 -26
- data/lib/jekyll-import/importers/mephisto.rb +26 -26
- data/lib/jekyll-import/importers/mt.rb +76 -75
- data/lib/jekyll-import/importers/posterous.rb +30 -29
- data/lib/jekyll-import/importers/rss.rb +13 -10
- data/lib/jekyll-import/importers/s9y.rb +16 -17
- data/lib/jekyll-import/importers/s9y_database.rb +98 -89
- data/lib/jekyll-import/importers/textpattern.rb +18 -17
- data/lib/jekyll-import/importers/tmp.rb +0 -0
- data/lib/jekyll-import/importers/tumblr.rb +146 -143
- data/lib/jekyll-import/importers/typo.rb +31 -31
- data/lib/jekyll-import/importers/wordpress.rb +100 -100
- data/lib/jekyll-import/importers/wordpressdotcom.rb +70 -60
- data/lib/jekyll-import/util.rb +24 -24
- data/lib/jekyll-import/version.rb +1 -1
- data/lib/jekyll/commands/import.rb +32 -35
- metadata +14 -13
@@ -12,31 +12,32 @@ module JekyllImport
|
|
12
12
|
Keywords \
|
13
13
|
FROM textpattern \
|
14
14
|
WHERE Status = '4' OR \
|
15
|
-
Status = '5'"
|
15
|
+
Status = '5'".freeze
|
16
16
|
|
17
17
|
def self.require_deps
|
18
|
-
JekyllImport.require_with_fallback(%w
|
18
|
+
JekyllImport.require_with_fallback(%w(
|
19
19
|
rubygems
|
20
20
|
sequel
|
21
|
+
mysql2
|
21
22
|
fileutils
|
22
23
|
safe_yaml
|
23
|
-
|
24
|
+
))
|
24
25
|
end
|
25
26
|
|
26
27
|
def self.specify_options(c)
|
27
|
-
c.option
|
28
|
-
c.option
|
29
|
-
c.option
|
30
|
-
c.option
|
28
|
+
c.option "dbname", "--dbname DB", "Database name"
|
29
|
+
c.option "user", "--user USER", "Database user name"
|
30
|
+
c.option "password", "--password PW", "Database user's password"
|
31
|
+
c.option "host", "--host HOST", 'Database host name (default: "localhost")'
|
31
32
|
end
|
32
33
|
|
33
34
|
def self.process(options)
|
34
|
-
dbname = options.fetch(
|
35
|
-
user = options.fetch(
|
36
|
-
pass = options.fetch(
|
37
|
-
host = options.fetch(
|
35
|
+
dbname = options.fetch("dbname")
|
36
|
+
user = options.fetch("user")
|
37
|
+
pass = options.fetch("password", "")
|
38
|
+
host = options.fetch("host", "localhost")
|
38
39
|
|
39
|
-
db = Sequel.
|
40
|
+
db = Sequel.mysql2(dbname, :user => user, :password => pass, :host => host, :encoding => "utf8")
|
40
41
|
|
41
42
|
FileUtils.mkdir_p "_posts"
|
42
43
|
|
@@ -47,15 +48,15 @@ module JekyllImport
|
|
47
48
|
date = post[:Posted]
|
48
49
|
content = post[:Body]
|
49
50
|
|
50
|
-
name = [date.strftime("%Y-%m-%d"), slug].join(
|
51
|
+
name = [date.strftime("%Y-%m-%d"), slug].join("-") + ".textile"
|
51
52
|
|
52
53
|
# Get the relevant fields as a hash, delete empty fields and convert
|
53
54
|
# to YAML for the header.
|
54
55
|
data = {
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
"layout" => "post",
|
57
|
+
"title" => title.to_s,
|
58
|
+
"tags" => post[:Keywords].split(","),
|
59
|
+
}.delete_if { |_k, v| v.nil? || v == "" }.to_yaml
|
59
60
|
|
60
61
|
# Write out the data and content to file.
|
61
62
|
File.open("_posts/#{name}", "w") do |f|
|
File without changes
|
@@ -2,7 +2,7 @@ module JekyllImport
|
|
2
2
|
module Importers
|
3
3
|
class Tumblr < Importer
|
4
4
|
def self.require_deps
|
5
|
-
JekyllImport.require_with_fallback(%w
|
5
|
+
JekyllImport.require_with_fallback(%w(
|
6
6
|
rubygems
|
7
7
|
fileutils
|
8
8
|
open-uri
|
@@ -11,23 +11,23 @@ module JekyllImport
|
|
11
11
|
uri
|
12
12
|
time
|
13
13
|
jekyll
|
14
|
-
|
14
|
+
))
|
15
15
|
end
|
16
16
|
|
17
17
|
def self.specify_options(c)
|
18
|
-
c.option
|
19
|
-
c.option
|
20
|
-
c.option
|
21
|
-
c.option
|
22
|
-
c.option
|
18
|
+
c.option "url", "--url URL", "Tumblr URL"
|
19
|
+
c.option "format", "--format FORMAT", 'Output format (default: "html")'
|
20
|
+
c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
|
21
|
+
c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
|
22
|
+
c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
|
23
23
|
end
|
24
24
|
|
25
25
|
def self.process(options)
|
26
|
-
url = options.fetch(
|
27
|
-
format = options.fetch(
|
28
|
-
grab_images = options.fetch(
|
29
|
-
add_highlights = options.fetch(
|
30
|
-
rewrite_urls = options.fetch(
|
26
|
+
url = options.fetch("url")
|
27
|
+
format = options.fetch("format", "html")
|
28
|
+
grab_images = options.fetch("grab_images", false)
|
29
|
+
add_highlights = options.fetch("add_highlights", false)
|
30
|
+
rewrite_urls = options.fetch("rewrite_urls", false)
|
31
31
|
|
32
32
|
@grab_images = grab_images
|
33
33
|
FileUtils.mkdir_p "_posts/tumblr"
|
@@ -51,52 +51,51 @@ module JekyllImport
|
|
51
51
|
if rewrite_urls
|
52
52
|
posts += batch
|
53
53
|
else
|
54
|
-
batch.each {|post| write_post(post, format == "md", add_highlights)}
|
54
|
+
batch.each { |post| write_post(post, format == "md", add_highlights) }
|
55
55
|
end
|
56
|
-
|
57
56
|
end until blog["posts"].size < per_page
|
58
57
|
|
59
58
|
# Rewrite URLs, create redirects and write out out posts if necessary
|
60
59
|
if rewrite_urls
|
61
60
|
posts = rewrite_urls_and_redirects posts
|
62
|
-
posts.each {|post| write_post(post, format == "md", add_highlights)}
|
61
|
+
posts.each { |post| write_post(post, format == "md", add_highlights) }
|
63
62
|
end
|
64
63
|
end
|
65
64
|
|
66
65
|
private
|
66
|
+
class << self
|
67
|
+
def extract_json(contents)
|
68
|
+
beginning = contents.index("{")
|
69
|
+
ending = contents.rindex("}") + 1
|
70
|
+
json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
|
71
|
+
JSON.parse(json)
|
72
|
+
end
|
67
73
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
|
72
|
-
blog = JSON.parse(json)
|
73
|
-
end
|
74
|
-
|
75
|
-
# Writes a post out to disk
|
76
|
-
def self.write_post(post, use_markdown, add_highlights)
|
77
|
-
content = post[:content]
|
74
|
+
# Writes a post out to disk
|
75
|
+
def write_post(post, use_markdown, add_highlights)
|
76
|
+
content = post[:content]
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
78
|
+
if content
|
79
|
+
if use_markdown
|
80
|
+
content = html_to_markdown content
|
81
|
+
if add_highlights
|
82
|
+
tumblr_url = URI.parse(post[:slug]).path
|
83
|
+
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
84
|
+
FileUtils.mkdir_p redirect_dir
|
85
|
+
content = add_syntax_highlights(content, redirect_dir)
|
86
|
+
end
|
87
87
|
end
|
88
|
-
end
|
89
88
|
|
90
|
-
|
91
|
-
|
89
|
+
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
|
90
|
+
f.puts post[:header].to_yaml + "---\n" + content
|
91
|
+
end
|
92
92
|
end
|
93
93
|
end
|
94
|
-
end
|
95
94
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
95
|
+
# Converts each type of Tumblr post to a hash with all required
|
96
|
+
# data for Jekyll.
|
97
|
+
def post_to_hash(post, format)
|
98
|
+
case post["type"]
|
100
99
|
when "regular"
|
101
100
|
title = post["regular-title"]
|
102
101
|
content = post["regular-body"]
|
@@ -107,7 +106,7 @@ module JekyllImport
|
|
107
106
|
content << "<br/>" + post["link-description"]
|
108
107
|
end
|
109
108
|
when "photo"
|
110
|
-
title = post["slug"].
|
109
|
+
title = post["slug"].tr("-", " ")
|
111
110
|
if post["photos"].size > 1
|
112
111
|
content = ""
|
113
112
|
post["photos"].each do |post_photo|
|
@@ -137,7 +136,7 @@ module JekyllImport
|
|
137
136
|
title = post["conversation-title"]
|
138
137
|
content = "<section><dialog>"
|
139
138
|
post["conversation"].each do |line|
|
140
|
-
content << "<dt>#{line[
|
139
|
+
content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
|
141
140
|
end
|
142
141
|
content << "</dialog></section>"
|
143
142
|
when "video"
|
@@ -153,123 +152,126 @@ module JekyllImport
|
|
153
152
|
when "answer"
|
154
153
|
title = post["question"]
|
155
154
|
content = post["answer"]
|
155
|
+
end
|
156
|
+
date = Date.parse(post["date"]).to_s
|
157
|
+
title = Nokogiri::HTML(title).text
|
158
|
+
title = "no title" if title.empty?
|
159
|
+
slug = if post["slug"] && post["slug"].strip != ""
|
160
|
+
post["slug"]
|
161
|
+
elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
|
162
|
+
slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
|
163
|
+
slug.length > 200 ? slug.slice(0..200) : slug
|
164
|
+
else
|
165
|
+
post["id"]
|
166
|
+
end
|
167
|
+
{
|
168
|
+
:name => "#{date}-#{slug}.#{format}",
|
169
|
+
:header => {
|
170
|
+
"layout" => "post",
|
171
|
+
"title" => title,
|
172
|
+
"date" => Time.parse(post["date"]).xmlschema,
|
173
|
+
"tags" => (post["tags"] || []),
|
174
|
+
"tumblr_url" => post["url-with-slug"],
|
175
|
+
},
|
176
|
+
:content => content,
|
177
|
+
:url => post["url"],
|
178
|
+
:slug => post["url-with-slug"],
|
179
|
+
}
|
156
180
|
end
|
157
|
-
date = Date.parse(post['date']).to_s
|
158
|
-
title = Nokogiri::HTML(title).text
|
159
|
-
title = "no title" if title.empty?
|
160
|
-
slug = if post["slug"] && post["slug"].strip != ""
|
161
|
-
post["slug"]
|
162
|
-
elsif title && title.downcase.gsub(/[^a-z0-9\-]/, '') != '' && title != 'no title'
|
163
|
-
slug = title.downcase.strip.gsub(' ', '-').gsub(/[^a-z0-9\-]/, '')
|
164
|
-
slug.length > 200 ? slug.slice(0..200) : slug
|
165
|
-
else
|
166
|
-
slug = post['id']
|
167
|
-
end
|
168
|
-
{
|
169
|
-
:name => "#{date}-#{slug}.#{format}",
|
170
|
-
:header => {
|
171
|
-
"layout" => "post",
|
172
|
-
"title" => title,
|
173
|
-
"date" => Time.parse(post['date']).xmlschema,
|
174
|
-
"tags" => (post["tags"] or []),
|
175
|
-
"tumblr_url" => post["url-with-slug"]
|
176
|
-
},
|
177
|
-
:content => content,
|
178
|
-
:url => post["url"],
|
179
|
-
:slug => post["url-with-slug"],
|
180
|
-
}
|
181
|
-
end
|
182
181
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
182
|
+
# Attempts to fetch the largest version of a photo available for a post.
|
183
|
+
# If that file fails, it tries the next smaller size until all available
|
184
|
+
# photo URLs are exhausted. If they all fail, the import is aborted.
|
185
|
+
def fetch_photo(post)
|
186
|
+
sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
|
187
|
+
sizes.sort! { |a, b| b <=> a }
|
189
188
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
189
|
+
_ext_key, ext_val = post.find do |k, v|
|
190
|
+
k =~ %r!^photo-url-! && v.split("/").last =~ %r!\.!
|
191
|
+
end
|
192
|
+
ext = "." + ext_val.split(".").last
|
194
193
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
194
|
+
sizes.each do |size|
|
195
|
+
url = post["photo-url"] || post["photo-url-#{size}"]
|
196
|
+
next if url.nil?
|
197
|
+
begin
|
198
|
+
return "<img src=\"#{save_photo(url, ext)}\"/>"
|
199
|
+
rescue OpenURI::HTTPError
|
200
|
+
puts "Failed to grab photo"
|
201
|
+
end
|
202
202
|
end
|
203
|
+
|
204
|
+
abort "Failed to fetch photo for post #{post["url"]}"
|
203
205
|
end
|
204
206
|
|
205
|
-
|
206
|
-
|
207
|
+
# Create a Hash of old urls => new urls, for rewriting and
|
208
|
+
# redirects, and replace urls in each post. Instantiate Jekyll
|
209
|
+
# site/posts to get the correct permalink format.
|
210
|
+
def rewrite_urls_and_redirects(posts)
|
211
|
+
site = Jekyll::Site.new(Jekyll.configuration({}))
|
212
|
+
urls = Hash[posts.map do |post|
|
213
|
+
# Create an initial empty file for the post so that
|
214
|
+
# we can instantiate a post object.
|
215
|
+
File.write("_posts/tumblr/#{post[:name]}", "")
|
216
|
+
tumblr_url = URI.parse(URI.encode(post[:slug])).path
|
217
|
+
jekyll_url = if Jekyll.const_defined? :Post
|
218
|
+
Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
|
219
|
+
else
|
220
|
+
Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), :site => site, :collection => site.posts).url
|
221
|
+
end
|
222
|
+
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
223
|
+
FileUtils.mkdir_p redirect_dir
|
224
|
+
File.open(redirect_dir + "index.html", "w") do |f|
|
225
|
+
f.puts "<html><head><link rel=\"canonical\" href=\"" \
|
226
|
+
"#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " \
|
227
|
+
"url=#{jekyll_url}\"></head><body></body></html>"
|
228
|
+
end
|
229
|
+
[tumblr_url, jekyll_url]
|
230
|
+
end]
|
231
|
+
posts.map do |post|
|
232
|
+
urls.each do |tumblr_url, jekyll_url|
|
233
|
+
post[:content].gsub!(%r!#{tumblr_url}!i, jekyll_url)
|
234
|
+
end
|
235
|
+
post
|
236
|
+
end
|
237
|
+
end
|
207
238
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
# Create an initial empty file for the post so that
|
215
|
-
# we can instantiate a post object.
|
216
|
-
File.write("_posts/tumblr/#{post[:name]}", "")
|
217
|
-
tumblr_url = URI.parse(URI.encode(post[:slug])).path
|
218
|
-
jekyll_url = if Jekyll.const_defined? :Post
|
219
|
-
Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
|
220
|
-
else
|
221
|
-
Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), site: site, collection: site.posts).url
|
222
|
-
end
|
223
|
-
redirect_dir = tumblr_url.sub(/\//, "") + "/"
|
224
|
-
FileUtils.mkdir_p redirect_dir
|
225
|
-
File.open(redirect_dir + "index.html", "w") do |f|
|
226
|
-
f.puts "<html><head><link rel=\"canonical\" href=\"" +
|
227
|
-
"#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
|
228
|
-
"url=#{jekyll_url}\"></head><body></body></html>"
|
239
|
+
# Convert preserving HTML tables as per the markdown docs.
|
240
|
+
def html_to_markdown(content)
|
241
|
+
preserve = %w(table tr th td)
|
242
|
+
preserve.each do |tag|
|
243
|
+
content.gsub!(%r!<#{tag}!i, "$$" + tag)
|
244
|
+
content.gsub!(%r!<\/#{tag}!i, "||" + tag)
|
229
245
|
end
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
|
246
|
+
content = Nokogiri::HTML(content.gsub("'", "''")).text
|
247
|
+
preserve.each do |tag|
|
248
|
+
content.gsub!("$$" + tag, "<" + tag)
|
249
|
+
content.gsub!("||" + tag, "</" + tag)
|
235
250
|
end
|
236
|
-
|
237
|
-
}
|
238
|
-
end
|
239
|
-
|
240
|
-
# Convert preserving HTML tables as per the markdown docs.
|
241
|
-
def self.html_to_markdown(content)
|
242
|
-
preserve = ["table", "tr", "th", "td"]
|
243
|
-
preserve.each do |tag|
|
244
|
-
content.gsub!(/<#{tag}/i, "$$" + tag)
|
245
|
-
content.gsub!(/<\/#{tag}/i, "||" + tag)
|
251
|
+
content
|
246
252
|
end
|
247
|
-
content = Nokogiri::HTML(content.gsub("'", "''")).text
|
248
|
-
preserve.each do |tag|
|
249
|
-
content.gsub!("$$" + tag, "<" + tag)
|
250
|
-
content.gsub!("||" + tag, "</" + tag)
|
251
|
-
end
|
252
|
-
content
|
253
|
-
end
|
254
253
|
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
254
|
+
# Adds pygments highlight tags to code blocks in posts that use
|
255
|
+
# markdown format. This doesn't guess the language of the code
|
256
|
+
# block, so you should modify this to suit your own content.
|
257
|
+
# For example, my code block only contain Python and JavaScript,
|
258
|
+
# so I can assume the block is JavaScript if it contains a
|
259
|
+
# semi-colon.
|
260
|
+
def add_syntax_highlights(content, redirect_dir)
|
262
261
|
lines = content.split("\n")
|
263
|
-
block
|
262
|
+
block = false
|
263
|
+
indent = %r!^ !
|
264
|
+
lang = nil
|
265
|
+
start = nil
|
264
266
|
lines.each_with_index do |line, i|
|
265
267
|
if !block && line =~ indent
|
266
268
|
block = true
|
267
269
|
lang = "python"
|
268
270
|
start = i
|
269
271
|
elsif block
|
270
|
-
lang = "javascript" if line =~
|
272
|
+
lang = "javascript" if line =~ %r!;$!
|
271
273
|
block = line =~ indent && i < lines.size - 1 # Also handle EOF
|
272
|
-
|
274
|
+
unless block
|
273
275
|
lines[start] = "{% highlight #{lang} %}"
|
274
276
|
lines[i - 1] = "{% endhighlight %}"
|
275
277
|
end
|
@@ -280,10 +282,10 @@ module JekyllImport
|
|
280
282
|
lines.join("\n")
|
281
283
|
end
|
282
284
|
|
283
|
-
|
285
|
+
def save_photo(url, ext)
|
284
286
|
if @grab_images
|
285
|
-
path = "tumblr_files/#{url.split(
|
286
|
-
path += ext unless path =~
|
287
|
+
path = "tumblr_files/#{url.split("/").last}"
|
288
|
+
path += ext unless path =~ %r!#{ext}$!
|
287
289
|
FileUtils.mkdir_p "tumblr_files"
|
288
290
|
|
289
291
|
# Don't fetch if we've already cached this file
|
@@ -295,6 +297,7 @@ module JekyllImport
|
|
295
297
|
end
|
296
298
|
url
|
297
299
|
end
|
300
|
+
end
|
298
301
|
end
|
299
302
|
end
|
300
303
|
end
|