jekyll-import 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/lib/jekyll-import.rb +2 -0
  3. data/lib/jekyll-import/importer.rb +5 -3
  4. data/lib/jekyll-import/importers.rb +3 -0
  5. data/lib/jekyll-import/importers/behance.rb +7 -6
  6. data/lib/jekyll-import/importers/blogger.rb +23 -38
  7. data/lib/jekyll-import/importers/csv.rb +6 -5
  8. data/lib/jekyll-import/importers/drupal6.rb +7 -5
  9. data/lib/jekyll-import/importers/drupal7.rb +15 -13
  10. data/lib/jekyll-import/importers/drupal_common.rb +55 -31
  11. data/lib/jekyll-import/importers/easyblog.rb +8 -8
  12. data/lib/jekyll-import/importers/enki.rb +14 -12
  13. data/lib/jekyll-import/importers/ghost.rb +4 -1
  14. data/lib/jekyll-import/importers/google_reader.rb +4 -4
  15. data/lib/jekyll-import/importers/joomla.rb +9 -9
  16. data/lib/jekyll-import/importers/joomla3.rb +15 -15
  17. data/lib/jekyll-import/importers/jrnl.rb +11 -9
  18. data/lib/jekyll-import/importers/marley.rb +12 -10
  19. data/lib/jekyll-import/importers/mephisto.rb +15 -15
  20. data/lib/jekyll-import/importers/mt.rb +16 -13
  21. data/lib/jekyll-import/importers/posterous.rb +12 -9
  22. data/lib/jekyll-import/importers/roller.rb +277 -0
  23. data/lib/jekyll-import/importers/rss.rb +18 -6
  24. data/lib/jekyll-import/importers/s9y.rb +3 -1
  25. data/lib/jekyll-import/importers/s9y_database.rb +38 -53
  26. data/lib/jekyll-import/importers/textpattern.rb +6 -4
  27. data/lib/jekyll-import/importers/tumblr.rb +101 -107
  28. data/lib/jekyll-import/importers/typo.rb +29 -27
  29. data/lib/jekyll-import/importers/wordpress.rb +47 -59
  30. data/lib/jekyll-import/importers/wordpressdotcom.rb +27 -32
  31. data/lib/jekyll-import/util.rb +2 -1
  32. data/lib/jekyll-import/version.rb +3 -1
  33. data/lib/jekyll/commands/import.rb +4 -7
  34. metadata +40 -40
  35. data/lib/jekyll-import/importers/tmp.rb +0 -0
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class TextPattern < Importer
@@ -12,7 +14,7 @@ module JekyllImport
12
14
  Keywords \
13
15
  FROM textpattern \
14
16
  WHERE Status = '4' OR \
15
- Status = '5'".freeze
17
+ Status = '5'"
16
18
 
17
19
  def self.require_deps
18
20
  JekyllImport.require_with_fallback(%w(
@@ -25,10 +27,10 @@ module JekyllImport
25
27
  end
26
28
 
27
29
  def self.specify_options(c)
28
- c.option "dbname", "--dbname DB", "Database name"
29
- c.option "user", "--user USER", "Database user name"
30
+ c.option "dbname", "--dbname DB", "Database name"
31
+ c.option "user", "--user USER", "Database user name"
30
32
  c.option "password", "--password PW", "Database user's password"
31
- c.option "host", "--host HOST", 'Database host name (default: "localhost")'
33
+ c.option "host", "--host HOST", 'Database host name (default: "localhost")'
32
34
  end
33
35
 
34
36
  def self.process(options)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Tumblr < Importer
@@ -15,11 +17,11 @@ module JekyllImport
15
17
  end
16
18
 
17
19
  def self.specify_options(c)
18
- c.option "url", "--url URL", "Tumblr URL"
19
- c.option "format", "--format FORMAT", 'Output format (default: "html")'
20
- c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
20
+ c.option "url", "--url URL", "Tumblr URL"
21
+ c.option "format", "--format FORMAT", 'Output format (default: "html")'
22
+ c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
21
23
  c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
22
- c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
24
+ c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
23
25
  end
24
26
 
25
27
  def self.process(options)
@@ -34,20 +36,23 @@ module JekyllImport
34
36
  url += "/api/read/json/"
35
37
  per_page = 50
36
38
  posts = []
39
+
37
40
  # Two passes are required so that we can rewrite URLs.
38
41
  # First pass builds up an array of each post as a hash.
39
42
  begin
40
43
  current_page = (current_page || -1) + 1
41
- feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
42
- puts "Fetching #{feed_url}"
43
- feed = open(feed_url)
44
+ feed_url = "#{url}?num=#{per_page}&start=#{current_page * per_page}"
45
+ Jekyll.logger.info "Fetching #{feed_url}"
46
+
47
+ feed = URI.parse(feed_url).open
44
48
  contents = feed.readlines.join("\n")
45
- blog = extract_json(contents)
46
- puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
49
+ blog = extract_json(contents)
50
+ Jekyll.logger.info "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
51
+
47
52
  batch = blog["posts"].map { |post| post_to_hash(post, format) }
48
53
 
49
- # If we're rewriting, save the posts for later. Otherwise, go ahead and
50
- # dump these to disk now
54
+ # If we're rewriting, save the posts for later. Otherwise, go ahead and dump these to
55
+ # disk now
51
56
  if rewrite_urls
52
57
  posts += batch
53
58
  else
@@ -62,33 +67,31 @@ module JekyllImport
62
67
  end
63
68
  end
64
69
 
65
- private
66
70
  class << self
67
71
  def extract_json(contents)
68
72
  beginning = contents.index("{")
69
- ending = contents.rindex("}") + 1
70
- json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
71
- JSON.parse(json)
73
+ ending = contents.rindex("}") + 1
74
+ json_data = contents[beginning...ending] # Strip Tumblr's JSONP chars.
75
+ JSON.parse(json_data)
72
76
  end
73
77
 
74
78
  # Writes a post out to disk
75
79
  def write_post(post, use_markdown, add_highlights)
76
80
  content = post[:content]
81
+ return unless content
77
82
 
78
- if content
79
- if use_markdown
80
- content = html_to_markdown content
81
- if add_highlights
82
- tumblr_url = URI.parse(post[:slug]).path
83
- redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
84
- FileUtils.mkdir_p redirect_dir
85
- content = add_syntax_highlights(content, redirect_dir)
86
- end
83
+ if use_markdown
84
+ content = html_to_markdown content
85
+ if add_highlights
86
+ tumblr_url = URI.parse(post[:slug]).path
87
+ redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
88
+ FileUtils.mkdir_p redirect_dir
89
+ content = add_syntax_highlights(content, redirect_dir)
87
90
  end
91
+ end
88
92
 
89
- File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
90
- f.puts post[:header].to_yaml + "---\n" + content
91
- end
93
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
94
+ f.puts post[:header].to_yaml + "---\n" + content
92
95
  end
93
96
  end
94
97
 
@@ -97,73 +100,66 @@ module JekyllImport
97
100
  def post_to_hash(post, format)
98
101
  case post["type"]
99
102
  when "regular"
100
- title = post["regular-title"]
101
- content = post["regular-body"]
103
+ title, content = post.values_at("regular-title", "regular-body")
102
104
  when "link"
103
- title = post["link-text"] || post["link-url"]
105
+ title = post["link-text"] || post["link-url"]
104
106
  content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
105
- unless post["link-description"].nil?
106
- content << "<br/>" + post["link-description"]
107
- end
107
+ content << "<br/>#{post["link-description"]}" unless post["link-description"].nil?
108
108
  when "photo"
109
109
  title = post["slug"].tr("-", " ")
110
110
  if post["photos"].size > 1
111
- content = ""
111
+ content = +""
112
112
  post["photos"].each do |post_photo|
113
113
  photo = fetch_photo post_photo
114
- content << photo + "<br/>"
114
+ content << "#{photo}<br/>"
115
115
  content << post_photo["caption"]
116
116
  end
117
117
  else
118
118
  content = fetch_photo post
119
119
  end
120
- content << "<br/>" + post["photo-caption"]
120
+ content << "<br/>#{post["photo-caption"]}"
121
121
  when "audio"
122
122
  if !post["id3-title"].nil?
123
- title = post["id3-title"]
124
- content = post["audio-player"] + "<br/>" + post["audio-caption"]
123
+ title, content = post.values_at("id3-title", "audio-player")
124
+ content << "<br/>#{post["audio-caption"]}"
125
125
  else
126
- title = post["audio-caption"]
127
- content = post["audio-player"]
126
+ title, content = post.values_at("audio-caption", "audio-player")
128
127
  end
129
128
  when "quote"
130
- title = post["quote-text"]
129
+ title = post["quote-text"]
131
130
  content = "<blockquote>#{post["quote-text"]}</blockquote>"
132
- unless post["quote-source"].nil?
133
- content << "&#8212;" + post["quote-source"]
134
- end
131
+ content << "&#8212;#{post["quote-source"]}" unless post["quote-source"].nil?
135
132
  when "conversation"
136
- title = post["conversation-title"]
133
+ title = post["conversation-title"]
137
134
  content = "<section><dialog>"
138
135
  post["conversation"].each do |line|
139
136
  content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
140
137
  end
141
138
  content << "</dialog></section>"
142
139
  when "video"
143
- title = post["video-title"]
144
- content = post["video-player"]
140
+ title, content = post.values_at("video-title", "video-player")
145
141
  unless post["video-caption"].nil?
146
142
  if content
147
- content << "<br/>" + post["video-caption"]
143
+ content << "<br/>#{post["video-caption"]}"
148
144
  else
149
145
  content = post["video-caption"]
150
146
  end
151
147
  end
152
148
  when "answer"
153
- title = post["question"]
154
- content = post["answer"]
149
+ title, content = post.values_at("question", "answer")
155
150
  end
156
- date = Date.parse(post["date"]).to_s
151
+
152
+ date = Date.parse(post["date"]).to_s
157
153
  title = Nokogiri::HTML(title).text
158
154
  title = "no title" if title.empty?
159
- slug = if post["slug"] && post["slug"].strip != ""
160
- post["slug"]
161
- elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
162
- slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
163
- slug.length > 200 ? slug.slice(0..200) : slug
164
- else
165
- post["id"]
166
- end
155
+ slug = if post["slug"] && post["slug"].strip != ""
156
+ post["slug"]
157
+ elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
158
+ slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
159
+ slug.length > 200 ? slug.slice(0..200) : slug
160
+ else
161
+ post["id"]
162
+ end
167
163
  {
168
164
  :name => "#{date}-#{slug}.#{format}",
169
165
  :header => {
@@ -180,8 +176,8 @@ module JekyllImport
180
176
  end
181
177
 
182
178
  # Attempts to fetch the largest version of a photo available for a post.
183
- # If that file fails, it tries the next smaller size until all available
184
- # photo URLs are exhausted. If they all fail, the import is aborted.
179
+ # If that file fails, it tries the next smaller size until all available photo URLs are
180
+ # exhausted. If they all fail, the import is aborted.
185
181
  def fetch_photo(post)
186
182
  sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
187
183
  sizes.sort! { |a, b| b <=> a }
@@ -194,30 +190,30 @@ module JekyllImport
194
190
  sizes.each do |size|
195
191
  url = post["photo-url"] || post["photo-url-#{size}"]
196
192
  next if url.nil?
193
+
197
194
  begin
198
- return "<img src=\"#{save_photo(url, ext)}\"/>"
195
+ return +"<img src=\"#{save_photo(url, ext)}\"/>"
199
196
  rescue OpenURI::HTTPError
200
- puts "Failed to grab photo"
197
+ Jekyll.logger.warn "Failed to grab photo"
201
198
  end
202
199
  end
203
200
 
204
201
  abort "Failed to fetch photo for post #{post["url"]}"
205
202
  end
206
203
 
207
- # Create a Hash of old urls => new urls, for rewriting and
208
- # redirects, and replace urls in each post. Instantiate Jekyll
209
- # site/posts to get the correct permalink format.
204
+ # Create a Hash of old urls => new urls, for rewriting and redirects, and replace urls in
205
+ # each post. Instantiate Jekyll site/posts to get the correct permalink format.
210
206
  def rewrite_urls_and_redirects(posts)
211
207
  site = Jekyll::Site.new(Jekyll.configuration({}))
212
208
  urls = Hash[posts.map do |post|
213
- # Create an initial empty file for the post so that
214
- # we can instantiate a post object.
215
- File.write("_posts/tumblr/#{post[:name]}", "")
209
+ # Create an initial empty file for the post so that we can instantiate a post object.
210
+ relative_path = "_posts/tumblr/#{post[:name]}"
211
+ File.write(relative_path, "")
216
212
  tumblr_url = URI.parse(URI.encode(post[:slug])).path
217
213
  jekyll_url = if Jekyll.const_defined? :Post
218
- Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
214
+ Jekyll::Post.new(site, site.source, "", "tumblr/#{post[:name]}").url
219
215
  else
220
- Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), :site => site, :collection => site.posts).url
216
+ Jekyll::Document.new(site.in_source_dir(relative_path), :site => site, :collection => site.posts).url
221
217
  end
222
218
  redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
223
219
  FileUtils.mkdir_p redirect_dir
@@ -240,63 +236,61 @@ module JekyllImport
240
236
  def html_to_markdown(content)
241
237
  preserve = %w(table tr th td)
242
238
  preserve.each do |tag|
243
- content.gsub!(%r!<#{tag}!i, "$$" + tag)
244
- content.gsub!(%r!<\/#{tag}!i, "||" + tag)
239
+ content.gsub!(%r!<#{tag}!i, "$$#{tag}")
240
+ content.gsub!(%r!<\/#{tag}!i, "||#{tag}")
245
241
  end
246
242
  content = Nokogiri::HTML(content.gsub("'", "''")).text
247
243
  preserve.each do |tag|
248
- content.gsub!("$$" + tag, "<" + tag)
249
- content.gsub!("||" + tag, "</" + tag)
244
+ content.gsub!("$$#{tag}", "<#{tag}")
245
+ content.gsub!("||#{tag}", "</#{tag}")
250
246
  end
251
247
  content
252
248
  end
253
249
 
254
- # Adds pygments highlight tags to code blocks in posts that use
255
- # markdown format. This doesn't guess the language of the code
256
- # block, so you should modify this to suit your own content.
257
- # For example, my code block only contain Python and JavaScript,
258
- # so I can assume the block is JavaScript if it contains a
259
- # semi-colon.
250
+ # Adds pygments highlight tags to code blocks in posts that use markdown format.
251
+ # This doesn't guess the language of the code block, so you should modify this to suit your
252
+ # own content.
253
+ # For example, my code block only contain Python and JavaScript, so I can assume the block
254
+ # is JavaScript if it contains a semi-colon.
260
255
  def add_syntax_highlights(content, redirect_dir)
261
- lines = content.split("\n")
262
- block = false
263
- indent = %r!^ !
264
- lang = nil
265
- start = nil
266
- lines.each_with_index do |line, i|
267
- if !block && line =~ indent
268
- block = true
269
- lang = "python"
270
- start = i
271
- elsif block
272
- lang = "javascript" if line =~ %r!;$!
273
- block = line =~ indent && i < lines.size - 1 # Also handle EOF
274
- unless block
275
- lines[start] = "{% highlight #{lang} %}"
276
- lines[i - 1] = "{% endhighlight %}"
256
+ lines = content.split("\n")
257
+ block = false
258
+ indent = %r!^ !
259
+ lang = nil
260
+ start = nil
261
+ lines.each_with_index do |line, i|
262
+ if !block && line =~ indent
263
+ block = true
264
+ lang = "python"
265
+ start = i
266
+ elsif block
267
+ lang = "javascript" if line =~ %r!;$!
268
+ block = line =~ indent && i < lines.size - 1 # Also handle EOF
269
+ unless block
270
+ lines[start] = "{% highlight #{lang} %}"
271
+ lines[i - 1] = "{% endhighlight %}"
272
+ end
273
+ FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
274
+ lines[i] = lines[i].sub(indent, "")
277
275
  end
278
- FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
279
- lines[i] = lines[i].sub(indent, "")
280
276
  end
277
+ lines.join("\n")
281
278
  end
282
- lines.join("\n")
283
- end
284
279
 
285
280
  def save_photo(url, ext)
286
- if @grab_images
281
+ return url unless @grab_images
282
+
287
283
  path = "tumblr_files/#{url.split("/").last}"
288
284
  path += ext unless path =~ %r!#{ext}$!
289
285
  FileUtils.mkdir_p "tumblr_files"
290
286
 
291
287
  # Don't fetch if we've already cached this file
292
288
  unless File.size? path
293
- puts "Fetching photo #{url}"
294
- File.open(path, "wb") { |f| f.write(open(url).read) }
289
+ Jekyll.logger.info "Fetching photo #{url}"
290
+ File.open(path, "wb") { |f| f.write(URI.parse(url).read) }
295
291
  end
296
- url = "/" + path
292
+ "/#{path}"
297
293
  end
298
- url
299
- end
300
294
  end
301
295
  end
302
296
  end
@@ -1,21 +1,23 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Typo < Importer
4
6
  # This SQL *should* work for both MySQL and PostgreSQL.
5
- SQL = <<-EOS.freeze
6
- SELECT c.id id,
7
- c.title title,
8
- c.permalink slug,
9
- c.body body,
10
- c.extended extended,
11
- c.published_at date,
12
- c.state state,
13
- c.keywords keywords,
14
- COALESCE(tf.name, 'html') filter
15
- FROM contents c
16
- LEFT OUTER JOIN text_filters tf
17
- ON c.text_filter_id = tf.id
18
- EOS
7
+ SQL = <<~SQL
8
+ SELECT c.id id,
9
+ c.title title,
10
+ c.permalink slug,
11
+ c.body body,
12
+ c.extended extended,
13
+ c.published_at date,
14
+ c.state state,
15
+ c.keywords keywords,
16
+ COALESCE(tf.name, 'html') filter
17
+ FROM contents c
18
+ LEFT OUTER JOIN text_filters tf
19
+ ON c.text_filter_id = tf.id
20
+ SQL
19
21
 
20
22
  def self.require_deps
21
23
  JekyllImport.require_with_fallback(%w(
@@ -29,11 +31,11 @@ module JekyllImport
29
31
  end
30
32
 
31
33
  def self.specify_options(c)
32
- c.option "server", "--server TYPE", 'Server type ("mysql" or "postgres")'
33
- c.option "dbname", "--dbname DB", "Database name"
34
- c.option "user", "--user USER", "Database user name"
34
+ c.option "server", "--server TYPE", 'Server type ("mysql" or "postgres")'
35
+ c.option "dbname", "--dbname DB", "Database name"
36
+ c.option "user", "--user USER", "Database user name"
35
37
  c.option "password", "--password PW", "Database user's password (default: '')"
36
- c.option "host", "--host HOST", "Database host name"
38
+ c.option "host", "--host HOST", "Database host name"
37
39
  end
38
40
 
39
41
  def self.process(options)
@@ -55,19 +57,19 @@ module JekyllImport
55
57
  db[SQL].each do |post|
56
58
  next unless post[:state] =~ %r!published!i
57
59
 
58
- if post[:slug].nil?
59
- post[:slug] = "no slug"
60
- end
60
+ post[:slug] = "no slug" if post[:slug].nil?
61
61
 
62
62
  if post[:extended]
63
63
  post[:body] << "\n<!-- more -->\n"
64
64
  post[:body] << post[:extended]
65
65
  end
66
66
 
67
- name = [ format("%.04d", post[:date].year),
68
- format("%.02d", post[:date].month),
69
- format("%.02d", post[:date].day),
70
- post[:slug].strip, ].join("-")
67
+ name = [
68
+ format("%.04d", post[:date].year),
69
+ format("%.02d", post[:date].month),
70
+ format("%.02d", post[:date].day),
71
+ post[:slug].strip,
72
+ ].join("-")
71
73
 
72
74
  # Can have more than one text filter in this field, but we just want
73
75
  # the first one for this.
@@ -75,8 +77,8 @@ module JekyllImport
75
77
 
76
78
  File.open("_posts/#{name}", "w") do |f|
77
79
  f.puts({ "layout" => "post",
78
- "title" => (post[:title] && post[:title].to_s.force_encoding("UTF-8")),
79
- "tags" => (post[:keywords] && post[:keywords].to_s.force_encoding("UTF-8")),
80
+ "title" => (post[:title]&.to_s&.force_encoding("UTF-8")),
81
+ "tags" => (post[:keywords]&.to_s&.force_encoding("UTF-8")),
80
82
  "typo_id" => post[:id], }.delete_if { |_k, v| v.nil? || v == "" }.to_yaml)
81
83
  f.puts "---"
82
84
  f.puts post[:body].delete("\r")