jekyll-import 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-import.rb +2 -0
- data/lib/jekyll-import/importer.rb +5 -3
- data/lib/jekyll-import/importers.rb +3 -0
- data/lib/jekyll-import/importers/behance.rb +7 -6
- data/lib/jekyll-import/importers/blogger.rb +23 -38
- data/lib/jekyll-import/importers/csv.rb +6 -5
- data/lib/jekyll-import/importers/drupal6.rb +7 -5
- data/lib/jekyll-import/importers/drupal7.rb +15 -13
- data/lib/jekyll-import/importers/drupal_common.rb +55 -31
- data/lib/jekyll-import/importers/easyblog.rb +8 -8
- data/lib/jekyll-import/importers/enki.rb +14 -12
- data/lib/jekyll-import/importers/ghost.rb +4 -1
- data/lib/jekyll-import/importers/google_reader.rb +4 -4
- data/lib/jekyll-import/importers/joomla.rb +9 -9
- data/lib/jekyll-import/importers/joomla3.rb +15 -15
- data/lib/jekyll-import/importers/jrnl.rb +11 -9
- data/lib/jekyll-import/importers/marley.rb +12 -10
- data/lib/jekyll-import/importers/mephisto.rb +15 -15
- data/lib/jekyll-import/importers/mt.rb +16 -13
- data/lib/jekyll-import/importers/posterous.rb +12 -9
- data/lib/jekyll-import/importers/roller.rb +277 -0
- data/lib/jekyll-import/importers/rss.rb +18 -6
- data/lib/jekyll-import/importers/s9y.rb +3 -1
- data/lib/jekyll-import/importers/s9y_database.rb +38 -53
- data/lib/jekyll-import/importers/textpattern.rb +6 -4
- data/lib/jekyll-import/importers/tumblr.rb +101 -107
- data/lib/jekyll-import/importers/typo.rb +29 -27
- data/lib/jekyll-import/importers/wordpress.rb +47 -59
- data/lib/jekyll-import/importers/wordpressdotcom.rb +27 -32
- data/lib/jekyll-import/util.rb +2 -1
- data/lib/jekyll-import/version.rb +3 -1
- data/lib/jekyll/commands/import.rb +4 -7
- metadata +40 -40
- data/lib/jekyll-import/importers/tmp.rb +0 -0
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module JekyllImport
|
2
4
|
module Importers
|
3
5
|
class TextPattern < Importer
|
@@ -12,7 +14,7 @@ module JekyllImport
|
|
12
14
|
Keywords \
|
13
15
|
FROM textpattern \
|
14
16
|
WHERE Status = '4' OR \
|
15
|
-
Status = '5'"
|
17
|
+
Status = '5'"
|
16
18
|
|
17
19
|
def self.require_deps
|
18
20
|
JekyllImport.require_with_fallback(%w(
|
@@ -25,10 +27,10 @@ module JekyllImport
|
|
25
27
|
end
|
26
28
|
|
27
29
|
def self.specify_options(c)
|
28
|
-
c.option "dbname",
|
29
|
-
c.option "user",
|
30
|
+
c.option "dbname", "--dbname DB", "Database name"
|
31
|
+
c.option "user", "--user USER", "Database user name"
|
30
32
|
c.option "password", "--password PW", "Database user's password"
|
31
|
-
c.option "host",
|
33
|
+
c.option "host", "--host HOST", 'Database host name (default: "localhost")'
|
32
34
|
end
|
33
35
|
|
34
36
|
def self.process(options)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module JekyllImport
|
2
4
|
module Importers
|
3
5
|
class Tumblr < Importer
|
@@ -15,11 +17,11 @@ module JekyllImport
|
|
15
17
|
end
|
16
18
|
|
17
19
|
def self.specify_options(c)
|
18
|
-
c.option "url",
|
19
|
-
c.option "format",
|
20
|
-
c.option "grab_images",
|
20
|
+
c.option "url", "--url URL", "Tumblr URL"
|
21
|
+
c.option "format", "--format FORMAT", 'Output format (default: "html")'
|
22
|
+
c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
|
21
23
|
c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
|
22
|
-
c.option "rewrite_urls",
|
24
|
+
c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
|
23
25
|
end
|
24
26
|
|
25
27
|
def self.process(options)
|
@@ -34,20 +36,23 @@ module JekyllImport
|
|
34
36
|
url += "/api/read/json/"
|
35
37
|
per_page = 50
|
36
38
|
posts = []
|
39
|
+
|
37
40
|
# Two passes are required so that we can rewrite URLs.
|
38
41
|
# First pass builds up an array of each post as a hash.
|
39
42
|
begin
|
40
43
|
current_page = (current_page || -1) + 1
|
41
|
-
feed_url
|
42
|
-
|
43
|
-
|
44
|
+
feed_url = "#{url}?num=#{per_page}&start=#{current_page * per_page}"
|
45
|
+
Jekyll.logger.info "Fetching #{feed_url}"
|
46
|
+
|
47
|
+
feed = URI.parse(feed_url).open
|
44
48
|
contents = feed.readlines.join("\n")
|
45
|
-
blog
|
46
|
-
|
49
|
+
blog = extract_json(contents)
|
50
|
+
Jekyll.logger.info "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
|
51
|
+
|
47
52
|
batch = blog["posts"].map { |post| post_to_hash(post, format) }
|
48
53
|
|
49
|
-
# If we're rewriting, save the posts for later. Otherwise, go ahead and
|
50
|
-
#
|
54
|
+
# If we're rewriting, save the posts for later. Otherwise, go ahead and dump these to
|
55
|
+
# disk now
|
51
56
|
if rewrite_urls
|
52
57
|
posts += batch
|
53
58
|
else
|
@@ -62,33 +67,31 @@ module JekyllImport
|
|
62
67
|
end
|
63
68
|
end
|
64
69
|
|
65
|
-
private
|
66
70
|
class << self
|
67
71
|
def extract_json(contents)
|
68
72
|
beginning = contents.index("{")
|
69
|
-
ending
|
70
|
-
|
71
|
-
JSON.parse(
|
73
|
+
ending = contents.rindex("}") + 1
|
74
|
+
json_data = contents[beginning...ending] # Strip Tumblr's JSONP chars.
|
75
|
+
JSON.parse(json_data)
|
72
76
|
end
|
73
77
|
|
74
78
|
# Writes a post out to disk
|
75
79
|
def write_post(post, use_markdown, add_highlights)
|
76
80
|
content = post[:content]
|
81
|
+
return unless content
|
77
82
|
|
78
|
-
if
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
content = add_syntax_highlights(content, redirect_dir)
|
86
|
-
end
|
83
|
+
if use_markdown
|
84
|
+
content = html_to_markdown content
|
85
|
+
if add_highlights
|
86
|
+
tumblr_url = URI.parse(post[:slug]).path
|
87
|
+
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
88
|
+
FileUtils.mkdir_p redirect_dir
|
89
|
+
content = add_syntax_highlights(content, redirect_dir)
|
87
90
|
end
|
91
|
+
end
|
88
92
|
|
89
|
-
|
90
|
-
|
91
|
-
end
|
93
|
+
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
|
94
|
+
f.puts post[:header].to_yaml + "---\n" + content
|
92
95
|
end
|
93
96
|
end
|
94
97
|
|
@@ -97,73 +100,66 @@ module JekyllImport
|
|
97
100
|
def post_to_hash(post, format)
|
98
101
|
case post["type"]
|
99
102
|
when "regular"
|
100
|
-
title = post
|
101
|
-
content = post["regular-body"]
|
103
|
+
title, content = post.values_at("regular-title", "regular-body")
|
102
104
|
when "link"
|
103
|
-
title
|
105
|
+
title = post["link-text"] || post["link-url"]
|
104
106
|
content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
|
105
|
-
unless post["link-description"].nil?
|
106
|
-
content << "<br/>" + post["link-description"]
|
107
|
-
end
|
107
|
+
content << "<br/>#{post["link-description"]}" unless post["link-description"].nil?
|
108
108
|
when "photo"
|
109
109
|
title = post["slug"].tr("-", " ")
|
110
110
|
if post["photos"].size > 1
|
111
|
-
content = ""
|
111
|
+
content = +""
|
112
112
|
post["photos"].each do |post_photo|
|
113
113
|
photo = fetch_photo post_photo
|
114
|
-
content << photo
|
114
|
+
content << "#{photo}<br/>"
|
115
115
|
content << post_photo["caption"]
|
116
116
|
end
|
117
117
|
else
|
118
118
|
content = fetch_photo post
|
119
119
|
end
|
120
|
-
content << "<br
|
120
|
+
content << "<br/>#{post["photo-caption"]}"
|
121
121
|
when "audio"
|
122
122
|
if !post["id3-title"].nil?
|
123
|
-
title = post
|
124
|
-
content
|
123
|
+
title, content = post.values_at("id3-title", "audio-player")
|
124
|
+
content << "<br/>#{post["audio-caption"]}"
|
125
125
|
else
|
126
|
-
title = post
|
127
|
-
content = post["audio-player"]
|
126
|
+
title, content = post.values_at("audio-caption", "audio-player")
|
128
127
|
end
|
129
128
|
when "quote"
|
130
|
-
title
|
129
|
+
title = post["quote-text"]
|
131
130
|
content = "<blockquote>#{post["quote-text"]}</blockquote>"
|
132
|
-
unless post["quote-source"].nil?
|
133
|
-
content << "—" + post["quote-source"]
|
134
|
-
end
|
131
|
+
content << "—#{post["quote-source"]}" unless post["quote-source"].nil?
|
135
132
|
when "conversation"
|
136
|
-
title
|
133
|
+
title = post["conversation-title"]
|
137
134
|
content = "<section><dialog>"
|
138
135
|
post["conversation"].each do |line|
|
139
136
|
content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
|
140
137
|
end
|
141
138
|
content << "</dialog></section>"
|
142
139
|
when "video"
|
143
|
-
title = post
|
144
|
-
content = post["video-player"]
|
140
|
+
title, content = post.values_at("video-title", "video-player")
|
145
141
|
unless post["video-caption"].nil?
|
146
142
|
if content
|
147
|
-
content << "<br
|
143
|
+
content << "<br/>#{post["video-caption"]}"
|
148
144
|
else
|
149
145
|
content = post["video-caption"]
|
150
146
|
end
|
151
147
|
end
|
152
148
|
when "answer"
|
153
|
-
title = post
|
154
|
-
content = post["answer"]
|
149
|
+
title, content = post.values_at("question", "answer")
|
155
150
|
end
|
156
|
-
|
151
|
+
|
152
|
+
date = Date.parse(post["date"]).to_s
|
157
153
|
title = Nokogiri::HTML(title).text
|
158
154
|
title = "no title" if title.empty?
|
159
|
-
slug
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
155
|
+
slug = if post["slug"] && post["slug"].strip != ""
|
156
|
+
post["slug"]
|
157
|
+
elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
|
158
|
+
slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
|
159
|
+
slug.length > 200 ? slug.slice(0..200) : slug
|
160
|
+
else
|
161
|
+
post["id"]
|
162
|
+
end
|
167
163
|
{
|
168
164
|
:name => "#{date}-#{slug}.#{format}",
|
169
165
|
:header => {
|
@@ -180,8 +176,8 @@ module JekyllImport
|
|
180
176
|
end
|
181
177
|
|
182
178
|
# Attempts to fetch the largest version of a photo available for a post.
|
183
|
-
# If that file fails, it tries the next smaller size until all available
|
184
|
-
#
|
179
|
+
# If that file fails, it tries the next smaller size until all available photo URLs are
|
180
|
+
# exhausted. If they all fail, the import is aborted.
|
185
181
|
def fetch_photo(post)
|
186
182
|
sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
|
187
183
|
sizes.sort! { |a, b| b <=> a }
|
@@ -194,30 +190,30 @@ module JekyllImport
|
|
194
190
|
sizes.each do |size|
|
195
191
|
url = post["photo-url"] || post["photo-url-#{size}"]
|
196
192
|
next if url.nil?
|
193
|
+
|
197
194
|
begin
|
198
|
-
return "<img src=\"#{save_photo(url, ext)}\"/>"
|
195
|
+
return +"<img src=\"#{save_photo(url, ext)}\"/>"
|
199
196
|
rescue OpenURI::HTTPError
|
200
|
-
|
197
|
+
Jekyll.logger.warn "Failed to grab photo"
|
201
198
|
end
|
202
199
|
end
|
203
200
|
|
204
201
|
abort "Failed to fetch photo for post #{post["url"]}"
|
205
202
|
end
|
206
203
|
|
207
|
-
# Create a Hash of old urls => new urls, for rewriting and
|
208
|
-
#
|
209
|
-
# site/posts to get the correct permalink format.
|
204
|
+
# Create a Hash of old urls => new urls, for rewriting and redirects, and replace urls in
|
205
|
+
# each post. Instantiate Jekyll site/posts to get the correct permalink format.
|
210
206
|
def rewrite_urls_and_redirects(posts)
|
211
207
|
site = Jekyll::Site.new(Jekyll.configuration({}))
|
212
208
|
urls = Hash[posts.map do |post|
|
213
|
-
# Create an initial empty file for the post so that
|
214
|
-
|
215
|
-
File.write(
|
209
|
+
# Create an initial empty file for the post so that we can instantiate a post object.
|
210
|
+
relative_path = "_posts/tumblr/#{post[:name]}"
|
211
|
+
File.write(relative_path, "")
|
216
212
|
tumblr_url = URI.parse(URI.encode(post[:slug])).path
|
217
213
|
jekyll_url = if Jekyll.const_defined? :Post
|
218
|
-
Jekyll::Post.new(site,
|
214
|
+
Jekyll::Post.new(site, site.source, "", "tumblr/#{post[:name]}").url
|
219
215
|
else
|
220
|
-
Jekyll::Document.new(
|
216
|
+
Jekyll::Document.new(site.in_source_dir(relative_path), :site => site, :collection => site.posts).url
|
221
217
|
end
|
222
218
|
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
223
219
|
FileUtils.mkdir_p redirect_dir
|
@@ -240,63 +236,61 @@ module JekyllImport
|
|
240
236
|
def html_to_markdown(content)
|
241
237
|
preserve = %w(table tr th td)
|
242
238
|
preserve.each do |tag|
|
243
|
-
content.gsub!(%r!<#{tag}!i, "
|
244
|
-
content.gsub!(%r!<\/#{tag}!i, "
|
239
|
+
content.gsub!(%r!<#{tag}!i, "$$#{tag}")
|
240
|
+
content.gsub!(%r!<\/#{tag}!i, "||#{tag}")
|
245
241
|
end
|
246
242
|
content = Nokogiri::HTML(content.gsub("'", "''")).text
|
247
243
|
preserve.each do |tag|
|
248
|
-
content.gsub!("
|
249
|
-
content.gsub!("
|
244
|
+
content.gsub!("$$#{tag}", "<#{tag}")
|
245
|
+
content.gsub!("||#{tag}", "</#{tag}")
|
250
246
|
end
|
251
247
|
content
|
252
248
|
end
|
253
249
|
|
254
|
-
# Adds pygments highlight tags to code blocks in posts that use
|
255
|
-
#
|
256
|
-
#
|
257
|
-
# For example, my code block only contain Python and JavaScript,
|
258
|
-
#
|
259
|
-
# semi-colon.
|
250
|
+
# Adds pygments highlight tags to code blocks in posts that use markdown format.
|
251
|
+
# This doesn't guess the language of the code block, so you should modify this to suit your
|
252
|
+
# own content.
|
253
|
+
# For example, my code block only contain Python and JavaScript, so I can assume the block
|
254
|
+
# is JavaScript if it contains a semi-colon.
|
260
255
|
def add_syntax_highlights(content, redirect_dir)
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
256
|
+
lines = content.split("\n")
|
257
|
+
block = false
|
258
|
+
indent = %r!^ !
|
259
|
+
lang = nil
|
260
|
+
start = nil
|
261
|
+
lines.each_with_index do |line, i|
|
262
|
+
if !block && line =~ indent
|
263
|
+
block = true
|
264
|
+
lang = "python"
|
265
|
+
start = i
|
266
|
+
elsif block
|
267
|
+
lang = "javascript" if line =~ %r!;$!
|
268
|
+
block = line =~ indent && i < lines.size - 1 # Also handle EOF
|
269
|
+
unless block
|
270
|
+
lines[start] = "{% highlight #{lang} %}"
|
271
|
+
lines[i - 1] = "{% endhighlight %}"
|
272
|
+
end
|
273
|
+
FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
|
274
|
+
lines[i] = lines[i].sub(indent, "")
|
277
275
|
end
|
278
|
-
FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
|
279
|
-
lines[i] = lines[i].sub(indent, "")
|
280
276
|
end
|
277
|
+
lines.join("\n")
|
281
278
|
end
|
282
|
-
lines.join("\n")
|
283
|
-
end
|
284
279
|
|
285
280
|
def save_photo(url, ext)
|
286
|
-
|
281
|
+
return url unless @grab_images
|
282
|
+
|
287
283
|
path = "tumblr_files/#{url.split("/").last}"
|
288
284
|
path += ext unless path =~ %r!#{ext}$!
|
289
285
|
FileUtils.mkdir_p "tumblr_files"
|
290
286
|
|
291
287
|
# Don't fetch if we've already cached this file
|
292
288
|
unless File.size? path
|
293
|
-
|
294
|
-
File.open(path, "wb") { |f| f.write(
|
289
|
+
Jekyll.logger.info "Fetching photo #{url}"
|
290
|
+
File.open(path, "wb") { |f| f.write(URI.parse(url).read) }
|
295
291
|
end
|
296
|
-
|
292
|
+
"/#{path}"
|
297
293
|
end
|
298
|
-
url
|
299
|
-
end
|
300
294
|
end
|
301
295
|
end
|
302
296
|
end
|
@@ -1,21 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module JekyllImport
|
2
4
|
module Importers
|
3
5
|
class Typo < Importer
|
4
6
|
# This SQL *should* work for both MySQL and PostgreSQL.
|
5
|
-
SQL =
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
7
|
+
SQL = <<~SQL
|
8
|
+
SELECT c.id id,
|
9
|
+
c.title title,
|
10
|
+
c.permalink slug,
|
11
|
+
c.body body,
|
12
|
+
c.extended extended,
|
13
|
+
c.published_at date,
|
14
|
+
c.state state,
|
15
|
+
c.keywords keywords,
|
16
|
+
COALESCE(tf.name, 'html') filter
|
17
|
+
FROM contents c
|
18
|
+
LEFT OUTER JOIN text_filters tf
|
19
|
+
ON c.text_filter_id = tf.id
|
20
|
+
SQL
|
19
21
|
|
20
22
|
def self.require_deps
|
21
23
|
JekyllImport.require_with_fallback(%w(
|
@@ -29,11 +31,11 @@ module JekyllImport
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def self.specify_options(c)
|
32
|
-
c.option "server",
|
33
|
-
c.option "dbname",
|
34
|
-
c.option "user",
|
34
|
+
c.option "server", "--server TYPE", 'Server type ("mysql" or "postgres")'
|
35
|
+
c.option "dbname", "--dbname DB", "Database name"
|
36
|
+
c.option "user", "--user USER", "Database user name"
|
35
37
|
c.option "password", "--password PW", "Database user's password (default: '')"
|
36
|
-
c.option "host",
|
38
|
+
c.option "host", "--host HOST", "Database host name"
|
37
39
|
end
|
38
40
|
|
39
41
|
def self.process(options)
|
@@ -55,19 +57,19 @@ module JekyllImport
|
|
55
57
|
db[SQL].each do |post|
|
56
58
|
next unless post[:state] =~ %r!published!i
|
57
59
|
|
58
|
-
if post[:slug].nil?
|
59
|
-
post[:slug] = "no slug"
|
60
|
-
end
|
60
|
+
post[:slug] = "no slug" if post[:slug].nil?
|
61
61
|
|
62
62
|
if post[:extended]
|
63
63
|
post[:body] << "\n<!-- more -->\n"
|
64
64
|
post[:body] << post[:extended]
|
65
65
|
end
|
66
66
|
|
67
|
-
name = [
|
68
|
-
|
69
|
-
|
70
|
-
|
67
|
+
name = [
|
68
|
+
format("%.04d", post[:date].year),
|
69
|
+
format("%.02d", post[:date].month),
|
70
|
+
format("%.02d", post[:date].day),
|
71
|
+
post[:slug].strip,
|
72
|
+
].join("-")
|
71
73
|
|
72
74
|
# Can have more than one text filter in this field, but we just want
|
73
75
|
# the first one for this.
|
@@ -75,8 +77,8 @@ module JekyllImport
|
|
75
77
|
|
76
78
|
File.open("_posts/#{name}", "w") do |f|
|
77
79
|
f.puts({ "layout" => "post",
|
78
|
-
"title" => (post[:title]
|
79
|
-
"tags" => (post[:keywords]
|
80
|
+
"title" => (post[:title]&.to_s&.force_encoding("UTF-8")),
|
81
|
+
"tags" => (post[:keywords]&.to_s&.force_encoding("UTF-8")),
|
80
82
|
"typo_id" => post[:id], }.delete_if { |_k, v| v.nil? || v == "" }.to_yaml)
|
81
83
|
f.puts "---"
|
82
84
|
f.puts post[:body].delete("\r")
|