jekyll-import 0.14.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/lib/jekyll-import.rb +2 -0
  3. data/lib/jekyll-import/importer.rb +5 -3
  4. data/lib/jekyll-import/importers.rb +3 -0
  5. data/lib/jekyll-import/importers/behance.rb +7 -6
  6. data/lib/jekyll-import/importers/blogger.rb +23 -38
  7. data/lib/jekyll-import/importers/csv.rb +6 -5
  8. data/lib/jekyll-import/importers/drupal6.rb +7 -5
  9. data/lib/jekyll-import/importers/drupal7.rb +15 -13
  10. data/lib/jekyll-import/importers/drupal_common.rb +55 -31
  11. data/lib/jekyll-import/importers/easyblog.rb +8 -8
  12. data/lib/jekyll-import/importers/enki.rb +14 -12
  13. data/lib/jekyll-import/importers/ghost.rb +4 -1
  14. data/lib/jekyll-import/importers/google_reader.rb +4 -4
  15. data/lib/jekyll-import/importers/joomla.rb +9 -9
  16. data/lib/jekyll-import/importers/joomla3.rb +15 -15
  17. data/lib/jekyll-import/importers/jrnl.rb +11 -9
  18. data/lib/jekyll-import/importers/marley.rb +12 -10
  19. data/lib/jekyll-import/importers/mephisto.rb +15 -15
  20. data/lib/jekyll-import/importers/mt.rb +16 -13
  21. data/lib/jekyll-import/importers/posterous.rb +12 -9
  22. data/lib/jekyll-import/importers/roller.rb +277 -0
  23. data/lib/jekyll-import/importers/rss.rb +18 -6
  24. data/lib/jekyll-import/importers/s9y.rb +3 -1
  25. data/lib/jekyll-import/importers/s9y_database.rb +38 -53
  26. data/lib/jekyll-import/importers/textpattern.rb +6 -4
  27. data/lib/jekyll-import/importers/tumblr.rb +101 -107
  28. data/lib/jekyll-import/importers/typo.rb +29 -27
  29. data/lib/jekyll-import/importers/wordpress.rb +47 -59
  30. data/lib/jekyll-import/importers/wordpressdotcom.rb +27 -32
  31. data/lib/jekyll-import/util.rb +2 -1
  32. data/lib/jekyll-import/version.rb +3 -1
  33. data/lib/jekyll/commands/import.rb +4 -7
  34. metadata +40 -40
  35. data/lib/jekyll-import/importers/tmp.rb +0 -0
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class TextPattern < Importer
@@ -12,7 +14,7 @@ module JekyllImport
12
14
  Keywords \
13
15
  FROM textpattern \
14
16
  WHERE Status = '4' OR \
15
- Status = '5'".freeze
17
+ Status = '5'"
16
18
 
17
19
  def self.require_deps
18
20
  JekyllImport.require_with_fallback(%w(
@@ -25,10 +27,10 @@ module JekyllImport
25
27
  end
26
28
 
27
29
  def self.specify_options(c)
28
- c.option "dbname", "--dbname DB", "Database name"
29
- c.option "user", "--user USER", "Database user name"
30
+ c.option "dbname", "--dbname DB", "Database name"
31
+ c.option "user", "--user USER", "Database user name"
30
32
  c.option "password", "--password PW", "Database user's password"
31
- c.option "host", "--host HOST", 'Database host name (default: "localhost")'
33
+ c.option "host", "--host HOST", 'Database host name (default: "localhost")'
32
34
  end
33
35
 
34
36
  def self.process(options)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Tumblr < Importer
@@ -15,11 +17,11 @@ module JekyllImport
15
17
  end
16
18
 
17
19
  def self.specify_options(c)
18
- c.option "url", "--url URL", "Tumblr URL"
19
- c.option "format", "--format FORMAT", 'Output format (default: "html")'
20
- c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
20
+ c.option "url", "--url URL", "Tumblr URL"
21
+ c.option "format", "--format FORMAT", 'Output format (default: "html")'
22
+ c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
21
23
  c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
22
- c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
24
+ c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
23
25
  end
24
26
 
25
27
  def self.process(options)
@@ -34,20 +36,23 @@ module JekyllImport
34
36
  url += "/api/read/json/"
35
37
  per_page = 50
36
38
  posts = []
39
+
37
40
  # Two passes are required so that we can rewrite URLs.
38
41
  # First pass builds up an array of each post as a hash.
39
42
  begin
40
43
  current_page = (current_page || -1) + 1
41
- feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
42
- puts "Fetching #{feed_url}"
43
- feed = open(feed_url)
44
+ feed_url = "#{url}?num=#{per_page}&start=#{current_page * per_page}"
45
+ Jekyll.logger.info "Fetching #{feed_url}"
46
+
47
+ feed = URI.parse(feed_url).open
44
48
  contents = feed.readlines.join("\n")
45
- blog = extract_json(contents)
46
- puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
49
+ blog = extract_json(contents)
50
+ Jekyll.logger.info "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
51
+
47
52
  batch = blog["posts"].map { |post| post_to_hash(post, format) }
48
53
 
49
- # If we're rewriting, save the posts for later. Otherwise, go ahead and
50
- # dump these to disk now
54
+ # If we're rewriting, save the posts for later. Otherwise, go ahead and dump these to
55
+ # disk now
51
56
  if rewrite_urls
52
57
  posts += batch
53
58
  else
@@ -62,33 +67,31 @@ module JekyllImport
62
67
  end
63
68
  end
64
69
 
65
- private
66
70
  class << self
67
71
  def extract_json(contents)
68
72
  beginning = contents.index("{")
69
- ending = contents.rindex("}") + 1
70
- json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
71
- JSON.parse(json)
73
+ ending = contents.rindex("}") + 1
74
+ json_data = contents[beginning...ending] # Strip Tumblr's JSONP chars.
75
+ JSON.parse(json_data)
72
76
  end
73
77
 
74
78
  # Writes a post out to disk
75
79
  def write_post(post, use_markdown, add_highlights)
76
80
  content = post[:content]
81
+ return unless content
77
82
 
78
- if content
79
- if use_markdown
80
- content = html_to_markdown content
81
- if add_highlights
82
- tumblr_url = URI.parse(post[:slug]).path
83
- redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
84
- FileUtils.mkdir_p redirect_dir
85
- content = add_syntax_highlights(content, redirect_dir)
86
- end
83
+ if use_markdown
84
+ content = html_to_markdown content
85
+ if add_highlights
86
+ tumblr_url = URI.parse(post[:slug]).path
87
+ redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
88
+ FileUtils.mkdir_p redirect_dir
89
+ content = add_syntax_highlights(content, redirect_dir)
87
90
  end
91
+ end
88
92
 
89
- File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
90
- f.puts post[:header].to_yaml + "---\n" + content
91
- end
93
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
94
+ f.puts post[:header].to_yaml + "---\n" + content
92
95
  end
93
96
  end
94
97
 
@@ -97,73 +100,66 @@ module JekyllImport
97
100
  def post_to_hash(post, format)
98
101
  case post["type"]
99
102
  when "regular"
100
- title = post["regular-title"]
101
- content = post["regular-body"]
103
+ title, content = post.values_at("regular-title", "regular-body")
102
104
  when "link"
103
- title = post["link-text"] || post["link-url"]
105
+ title = post["link-text"] || post["link-url"]
104
106
  content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
105
- unless post["link-description"].nil?
106
- content << "<br/>" + post["link-description"]
107
- end
107
+ content << "<br/>#{post["link-description"]}" unless post["link-description"].nil?
108
108
  when "photo"
109
109
  title = post["slug"].tr("-", " ")
110
110
  if post["photos"].size > 1
111
- content = ""
111
+ content = +""
112
112
  post["photos"].each do |post_photo|
113
113
  photo = fetch_photo post_photo
114
- content << photo + "<br/>"
114
+ content << "#{photo}<br/>"
115
115
  content << post_photo["caption"]
116
116
  end
117
117
  else
118
118
  content = fetch_photo post
119
119
  end
120
- content << "<br/>" + post["photo-caption"]
120
+ content << "<br/>#{post["photo-caption"]}"
121
121
  when "audio"
122
122
  if !post["id3-title"].nil?
123
- title = post["id3-title"]
124
- content = post["audio-player"] + "<br/>" + post["audio-caption"]
123
+ title, content = post.values_at("id3-title", "audio-player")
124
+ content << "<br/>#{post["audio-caption"]}"
125
125
  else
126
- title = post["audio-caption"]
127
- content = post["audio-player"]
126
+ title, content = post.values_at("audio-caption", "audio-player")
128
127
  end
129
128
  when "quote"
130
- title = post["quote-text"]
129
+ title = post["quote-text"]
131
130
  content = "<blockquote>#{post["quote-text"]}</blockquote>"
132
- unless post["quote-source"].nil?
133
- content << "&#8212;" + post["quote-source"]
134
- end
131
+ content << "&#8212;#{post["quote-source"]}" unless post["quote-source"].nil?
135
132
  when "conversation"
136
- title = post["conversation-title"]
133
+ title = post["conversation-title"]
137
134
  content = "<section><dialog>"
138
135
  post["conversation"].each do |line|
139
136
  content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
140
137
  end
141
138
  content << "</dialog></section>"
142
139
  when "video"
143
- title = post["video-title"]
144
- content = post["video-player"]
140
+ title, content = post.values_at("video-title", "video-player")
145
141
  unless post["video-caption"].nil?
146
142
  if content
147
- content << "<br/>" + post["video-caption"]
143
+ content << "<br/>#{post["video-caption"]}"
148
144
  else
149
145
  content = post["video-caption"]
150
146
  end
151
147
  end
152
148
  when "answer"
153
- title = post["question"]
154
- content = post["answer"]
149
+ title, content = post.values_at("question", "answer")
155
150
  end
156
- date = Date.parse(post["date"]).to_s
151
+
152
+ date = Date.parse(post["date"]).to_s
157
153
  title = Nokogiri::HTML(title).text
158
154
  title = "no title" if title.empty?
159
- slug = if post["slug"] && post["slug"].strip != ""
160
- post["slug"]
161
- elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
162
- slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
163
- slug.length > 200 ? slug.slice(0..200) : slug
164
- else
165
- post["id"]
166
- end
155
+ slug = if post["slug"] && post["slug"].strip != ""
156
+ post["slug"]
157
+ elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
158
+ slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
159
+ slug.length > 200 ? slug.slice(0..200) : slug
160
+ else
161
+ post["id"]
162
+ end
167
163
  {
168
164
  :name => "#{date}-#{slug}.#{format}",
169
165
  :header => {
@@ -180,8 +176,8 @@ module JekyllImport
180
176
  end
181
177
 
182
178
  # Attempts to fetch the largest version of a photo available for a post.
183
- # If that file fails, it tries the next smaller size until all available
184
- # photo URLs are exhausted. If they all fail, the import is aborted.
179
+ # If that file fails, it tries the next smaller size until all available photo URLs are
180
+ # exhausted. If they all fail, the import is aborted.
185
181
  def fetch_photo(post)
186
182
  sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
187
183
  sizes.sort! { |a, b| b <=> a }
@@ -194,30 +190,30 @@ module JekyllImport
194
190
  sizes.each do |size|
195
191
  url = post["photo-url"] || post["photo-url-#{size}"]
196
192
  next if url.nil?
193
+
197
194
  begin
198
- return "<img src=\"#{save_photo(url, ext)}\"/>"
195
+ return +"<img src=\"#{save_photo(url, ext)}\"/>"
199
196
  rescue OpenURI::HTTPError
200
- puts "Failed to grab photo"
197
+ Jekyll.logger.warn "Failed to grab photo"
201
198
  end
202
199
  end
203
200
 
204
201
  abort "Failed to fetch photo for post #{post["url"]}"
205
202
  end
206
203
 
207
- # Create a Hash of old urls => new urls, for rewriting and
208
- # redirects, and replace urls in each post. Instantiate Jekyll
209
- # site/posts to get the correct permalink format.
204
+ # Create a Hash of old urls => new urls, for rewriting and redirects, and replace urls in
205
+ # each post. Instantiate Jekyll site/posts to get the correct permalink format.
210
206
  def rewrite_urls_and_redirects(posts)
211
207
  site = Jekyll::Site.new(Jekyll.configuration({}))
212
208
  urls = Hash[posts.map do |post|
213
- # Create an initial empty file for the post so that
214
- # we can instantiate a post object.
215
- File.write("_posts/tumblr/#{post[:name]}", "")
209
+ # Create an initial empty file for the post so that we can instantiate a post object.
210
+ relative_path = "_posts/tumblr/#{post[:name]}"
211
+ File.write(relative_path, "")
216
212
  tumblr_url = URI.parse(URI.encode(post[:slug])).path
217
213
  jekyll_url = if Jekyll.const_defined? :Post
218
- Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
214
+ Jekyll::Post.new(site, site.source, "", "tumblr/#{post[:name]}").url
219
215
  else
220
- Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), :site => site, :collection => site.posts).url
216
+ Jekyll::Document.new(site.in_source_dir(relative_path), :site => site, :collection => site.posts).url
221
217
  end
222
218
  redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
223
219
  FileUtils.mkdir_p redirect_dir
@@ -240,63 +236,61 @@ module JekyllImport
240
236
  def html_to_markdown(content)
241
237
  preserve = %w(table tr th td)
242
238
  preserve.each do |tag|
243
- content.gsub!(%r!<#{tag}!i, "$$" + tag)
244
- content.gsub!(%r!<\/#{tag}!i, "||" + tag)
239
+ content.gsub!(%r!<#{tag}!i, "$$#{tag}")
240
+ content.gsub!(%r!<\/#{tag}!i, "||#{tag}")
245
241
  end
246
242
  content = Nokogiri::HTML(content.gsub("'", "''")).text
247
243
  preserve.each do |tag|
248
- content.gsub!("$$" + tag, "<" + tag)
249
- content.gsub!("||" + tag, "</" + tag)
244
+ content.gsub!("$$#{tag}", "<#{tag}")
245
+ content.gsub!("||#{tag}", "</#{tag}")
250
246
  end
251
247
  content
252
248
  end
253
249
 
254
- # Adds pygments highlight tags to code blocks in posts that use
255
- # markdown format. This doesn't guess the language of the code
256
- # block, so you should modify this to suit your own content.
257
- # For example, my code block only contain Python and JavaScript,
258
- # so I can assume the block is JavaScript if it contains a
259
- # semi-colon.
250
+ # Adds pygments highlight tags to code blocks in posts that use markdown format.
251
+ # This doesn't guess the language of the code block, so you should modify this to suit your
252
+ # own content.
253
+ # For example, my code block only contain Python and JavaScript, so I can assume the block
254
+ # is JavaScript if it contains a semi-colon.
260
255
  def add_syntax_highlights(content, redirect_dir)
261
- lines = content.split("\n")
262
- block = false
263
- indent = %r!^ !
264
- lang = nil
265
- start = nil
266
- lines.each_with_index do |line, i|
267
- if !block && line =~ indent
268
- block = true
269
- lang = "python"
270
- start = i
271
- elsif block
272
- lang = "javascript" if line =~ %r!;$!
273
- block = line =~ indent && i < lines.size - 1 # Also handle EOF
274
- unless block
275
- lines[start] = "{% highlight #{lang} %}"
276
- lines[i - 1] = "{% endhighlight %}"
256
+ lines = content.split("\n")
257
+ block = false
258
+ indent = %r!^ !
259
+ lang = nil
260
+ start = nil
261
+ lines.each_with_index do |line, i|
262
+ if !block && line =~ indent
263
+ block = true
264
+ lang = "python"
265
+ start = i
266
+ elsif block
267
+ lang = "javascript" if line =~ %r!;$!
268
+ block = line =~ indent && i < lines.size - 1 # Also handle EOF
269
+ unless block
270
+ lines[start] = "{% highlight #{lang} %}"
271
+ lines[i - 1] = "{% endhighlight %}"
272
+ end
273
+ FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
274
+ lines[i] = lines[i].sub(indent, "")
277
275
  end
278
- FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
279
- lines[i] = lines[i].sub(indent, "")
280
276
  end
277
+ lines.join("\n")
281
278
  end
282
- lines.join("\n")
283
- end
284
279
 
285
280
  def save_photo(url, ext)
286
- if @grab_images
281
+ return url unless @grab_images
282
+
287
283
  path = "tumblr_files/#{url.split("/").last}"
288
284
  path += ext unless path =~ %r!#{ext}$!
289
285
  FileUtils.mkdir_p "tumblr_files"
290
286
 
291
287
  # Don't fetch if we've already cached this file
292
288
  unless File.size? path
293
- puts "Fetching photo #{url}"
294
- File.open(path, "wb") { |f| f.write(open(url).read) }
289
+ Jekyll.logger.info "Fetching photo #{url}"
290
+ File.open(path, "wb") { |f| f.write(URI.parse(url).read) }
295
291
  end
296
- url = "/" + path
292
+ "/#{path}"
297
293
  end
298
- url
299
- end
300
294
  end
301
295
  end
302
296
  end
@@ -1,21 +1,23 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Typo < Importer
4
6
  # This SQL *should* work for both MySQL and PostgreSQL.
5
- SQL = <<-EOS.freeze
6
- SELECT c.id id,
7
- c.title title,
8
- c.permalink slug,
9
- c.body body,
10
- c.extended extended,
11
- c.published_at date,
12
- c.state state,
13
- c.keywords keywords,
14
- COALESCE(tf.name, 'html') filter
15
- FROM contents c
16
- LEFT OUTER JOIN text_filters tf
17
- ON c.text_filter_id = tf.id
18
- EOS
7
+ SQL = <<~SQL
8
+ SELECT c.id id,
9
+ c.title title,
10
+ c.permalink slug,
11
+ c.body body,
12
+ c.extended extended,
13
+ c.published_at date,
14
+ c.state state,
15
+ c.keywords keywords,
16
+ COALESCE(tf.name, 'html') filter
17
+ FROM contents c
18
+ LEFT OUTER JOIN text_filters tf
19
+ ON c.text_filter_id = tf.id
20
+ SQL
19
21
 
20
22
  def self.require_deps
21
23
  JekyllImport.require_with_fallback(%w(
@@ -29,11 +31,11 @@ module JekyllImport
29
31
  end
30
32
 
31
33
  def self.specify_options(c)
32
- c.option "server", "--server TYPE", 'Server type ("mysql" or "postgres")'
33
- c.option "dbname", "--dbname DB", "Database name"
34
- c.option "user", "--user USER", "Database user name"
34
+ c.option "server", "--server TYPE", 'Server type ("mysql" or "postgres")'
35
+ c.option "dbname", "--dbname DB", "Database name"
36
+ c.option "user", "--user USER", "Database user name"
35
37
  c.option "password", "--password PW", "Database user's password (default: '')"
36
- c.option "host", "--host HOST", "Database host name"
38
+ c.option "host", "--host HOST", "Database host name"
37
39
  end
38
40
 
39
41
  def self.process(options)
@@ -55,19 +57,19 @@ module JekyllImport
55
57
  db[SQL].each do |post|
56
58
  next unless post[:state] =~ %r!published!i
57
59
 
58
- if post[:slug].nil?
59
- post[:slug] = "no slug"
60
- end
60
+ post[:slug] = "no slug" if post[:slug].nil?
61
61
 
62
62
  if post[:extended]
63
63
  post[:body] << "\n<!-- more -->\n"
64
64
  post[:body] << post[:extended]
65
65
  end
66
66
 
67
- name = [ format("%.04d", post[:date].year),
68
- format("%.02d", post[:date].month),
69
- format("%.02d", post[:date].day),
70
- post[:slug].strip, ].join("-")
67
+ name = [
68
+ format("%.04d", post[:date].year),
69
+ format("%.02d", post[:date].month),
70
+ format("%.02d", post[:date].day),
71
+ post[:slug].strip,
72
+ ].join("-")
71
73
 
72
74
  # Can have more than one text filter in this field, but we just want
73
75
  # the first one for this.
@@ -75,8 +77,8 @@ module JekyllImport
75
77
 
76
78
  File.open("_posts/#{name}", "w") do |f|
77
79
  f.puts({ "layout" => "post",
78
- "title" => (post[:title] && post[:title].to_s.force_encoding("UTF-8")),
79
- "tags" => (post[:keywords] && post[:keywords].to_s.force_encoding("UTF-8")),
80
+ "title" => (post[:title]&.to_s&.force_encoding("UTF-8")),
81
+ "tags" => (post[:keywords]&.to_s&.force_encoding("UTF-8")),
80
82
  "typo_id" => post[:id], }.delete_if { |_k, v| v.nil? || v == "" }.to_yaml)
81
83
  f.puts "---"
82
84
  f.puts post[:body].delete("\r")