jekyll-import 0.14.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll-import.rb +2 -0
- data/lib/jekyll-import/importer.rb +5 -3
- data/lib/jekyll-import/importers.rb +3 -0
- data/lib/jekyll-import/importers/behance.rb +7 -6
- data/lib/jekyll-import/importers/blogger.rb +23 -38
- data/lib/jekyll-import/importers/csv.rb +6 -5
- data/lib/jekyll-import/importers/drupal6.rb +7 -5
- data/lib/jekyll-import/importers/drupal7.rb +15 -13
- data/lib/jekyll-import/importers/drupal_common.rb +55 -31
- data/lib/jekyll-import/importers/easyblog.rb +8 -8
- data/lib/jekyll-import/importers/enki.rb +14 -12
- data/lib/jekyll-import/importers/ghost.rb +4 -1
- data/lib/jekyll-import/importers/google_reader.rb +4 -4
- data/lib/jekyll-import/importers/joomla.rb +9 -9
- data/lib/jekyll-import/importers/joomla3.rb +15 -15
- data/lib/jekyll-import/importers/jrnl.rb +11 -9
- data/lib/jekyll-import/importers/marley.rb +12 -10
- data/lib/jekyll-import/importers/mephisto.rb +15 -15
- data/lib/jekyll-import/importers/mt.rb +16 -13
- data/lib/jekyll-import/importers/posterous.rb +12 -9
- data/lib/jekyll-import/importers/roller.rb +277 -0
- data/lib/jekyll-import/importers/rss.rb +18 -6
- data/lib/jekyll-import/importers/s9y.rb +3 -1
- data/lib/jekyll-import/importers/s9y_database.rb +38 -53
- data/lib/jekyll-import/importers/textpattern.rb +6 -4
- data/lib/jekyll-import/importers/tumblr.rb +101 -107
- data/lib/jekyll-import/importers/typo.rb +29 -27
- data/lib/jekyll-import/importers/wordpress.rb +47 -59
- data/lib/jekyll-import/importers/wordpressdotcom.rb +27 -32
- data/lib/jekyll-import/util.rb +2 -1
- data/lib/jekyll-import/version.rb +3 -1
- data/lib/jekyll/commands/import.rb +4 -7
- metadata +40 -40
- data/lib/jekyll-import/importers/tmp.rb +0 -0
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module JekyllImport
|
2
4
|
module Importers
|
3
5
|
class TextPattern < Importer
|
@@ -12,7 +14,7 @@ module JekyllImport
|
|
12
14
|
Keywords \
|
13
15
|
FROM textpattern \
|
14
16
|
WHERE Status = '4' OR \
|
15
|
-
Status = '5'"
|
17
|
+
Status = '5'"
|
16
18
|
|
17
19
|
def self.require_deps
|
18
20
|
JekyllImport.require_with_fallback(%w(
|
@@ -25,10 +27,10 @@ module JekyllImport
|
|
25
27
|
end
|
26
28
|
|
27
29
|
def self.specify_options(c)
|
28
|
-
c.option "dbname",
|
29
|
-
c.option "user",
|
30
|
+
c.option "dbname", "--dbname DB", "Database name"
|
31
|
+
c.option "user", "--user USER", "Database user name"
|
30
32
|
c.option "password", "--password PW", "Database user's password"
|
31
|
-
c.option "host",
|
33
|
+
c.option "host", "--host HOST", 'Database host name (default: "localhost")'
|
32
34
|
end
|
33
35
|
|
34
36
|
def self.process(options)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module JekyllImport
|
2
4
|
module Importers
|
3
5
|
class Tumblr < Importer
|
@@ -15,11 +17,11 @@ module JekyllImport
|
|
15
17
|
end
|
16
18
|
|
17
19
|
def self.specify_options(c)
|
18
|
-
c.option "url",
|
19
|
-
c.option "format",
|
20
|
-
c.option "grab_images",
|
20
|
+
c.option "url", "--url URL", "Tumblr URL"
|
21
|
+
c.option "format", "--format FORMAT", 'Output format (default: "html")'
|
22
|
+
c.option "grab_images", "--grab_images", "Whether to grab images (default: false)"
|
21
23
|
c.option "add_highlights", "--add_highlights", "Whether to add highlights (default: false)"
|
22
|
-
c.option "rewrite_urls",
|
24
|
+
c.option "rewrite_urls", "--rewrite_urls", "Whether to rewrite URLs (default: false)"
|
23
25
|
end
|
24
26
|
|
25
27
|
def self.process(options)
|
@@ -34,20 +36,23 @@ module JekyllImport
|
|
34
36
|
url += "/api/read/json/"
|
35
37
|
per_page = 50
|
36
38
|
posts = []
|
39
|
+
|
37
40
|
# Two passes are required so that we can rewrite URLs.
|
38
41
|
# First pass builds up an array of each post as a hash.
|
39
42
|
begin
|
40
43
|
current_page = (current_page || -1) + 1
|
41
|
-
feed_url
|
42
|
-
|
43
|
-
|
44
|
+
feed_url = "#{url}?num=#{per_page}&start=#{current_page * per_page}"
|
45
|
+
Jekyll.logger.info "Fetching #{feed_url}"
|
46
|
+
|
47
|
+
feed = URI.parse(feed_url).open
|
44
48
|
contents = feed.readlines.join("\n")
|
45
|
-
blog
|
46
|
-
|
49
|
+
blog = extract_json(contents)
|
50
|
+
Jekyll.logger.info "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
|
51
|
+
|
47
52
|
batch = blog["posts"].map { |post| post_to_hash(post, format) }
|
48
53
|
|
49
|
-
# If we're rewriting, save the posts for later. Otherwise, go ahead and
|
50
|
-
#
|
54
|
+
# If we're rewriting, save the posts for later. Otherwise, go ahead and dump these to
|
55
|
+
# disk now
|
51
56
|
if rewrite_urls
|
52
57
|
posts += batch
|
53
58
|
else
|
@@ -62,33 +67,31 @@ module JekyllImport
|
|
62
67
|
end
|
63
68
|
end
|
64
69
|
|
65
|
-
private
|
66
70
|
class << self
|
67
71
|
def extract_json(contents)
|
68
72
|
beginning = contents.index("{")
|
69
|
-
ending
|
70
|
-
|
71
|
-
JSON.parse(
|
73
|
+
ending = contents.rindex("}") + 1
|
74
|
+
json_data = contents[beginning...ending] # Strip Tumblr's JSONP chars.
|
75
|
+
JSON.parse(json_data)
|
72
76
|
end
|
73
77
|
|
74
78
|
# Writes a post out to disk
|
75
79
|
def write_post(post, use_markdown, add_highlights)
|
76
80
|
content = post[:content]
|
81
|
+
return unless content
|
77
82
|
|
78
|
-
if
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
content = add_syntax_highlights(content, redirect_dir)
|
86
|
-
end
|
83
|
+
if use_markdown
|
84
|
+
content = html_to_markdown content
|
85
|
+
if add_highlights
|
86
|
+
tumblr_url = URI.parse(post[:slug]).path
|
87
|
+
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
88
|
+
FileUtils.mkdir_p redirect_dir
|
89
|
+
content = add_syntax_highlights(content, redirect_dir)
|
87
90
|
end
|
91
|
+
end
|
88
92
|
|
89
|
-
|
90
|
-
|
91
|
-
end
|
93
|
+
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
|
94
|
+
f.puts post[:header].to_yaml + "---\n" + content
|
92
95
|
end
|
93
96
|
end
|
94
97
|
|
@@ -97,73 +100,66 @@ module JekyllImport
|
|
97
100
|
def post_to_hash(post, format)
|
98
101
|
case post["type"]
|
99
102
|
when "regular"
|
100
|
-
title = post
|
101
|
-
content = post["regular-body"]
|
103
|
+
title, content = post.values_at("regular-title", "regular-body")
|
102
104
|
when "link"
|
103
|
-
title
|
105
|
+
title = post["link-text"] || post["link-url"]
|
104
106
|
content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
|
105
|
-
unless post["link-description"].nil?
|
106
|
-
content << "<br/>" + post["link-description"]
|
107
|
-
end
|
107
|
+
content << "<br/>#{post["link-description"]}" unless post["link-description"].nil?
|
108
108
|
when "photo"
|
109
109
|
title = post["slug"].tr("-", " ")
|
110
110
|
if post["photos"].size > 1
|
111
|
-
content = ""
|
111
|
+
content = +""
|
112
112
|
post["photos"].each do |post_photo|
|
113
113
|
photo = fetch_photo post_photo
|
114
|
-
content << photo
|
114
|
+
content << "#{photo}<br/>"
|
115
115
|
content << post_photo["caption"]
|
116
116
|
end
|
117
117
|
else
|
118
118
|
content = fetch_photo post
|
119
119
|
end
|
120
|
-
content << "<br
|
120
|
+
content << "<br/>#{post["photo-caption"]}"
|
121
121
|
when "audio"
|
122
122
|
if !post["id3-title"].nil?
|
123
|
-
title = post
|
124
|
-
content
|
123
|
+
title, content = post.values_at("id3-title", "audio-player")
|
124
|
+
content << "<br/>#{post["audio-caption"]}"
|
125
125
|
else
|
126
|
-
title = post
|
127
|
-
content = post["audio-player"]
|
126
|
+
title, content = post.values_at("audio-caption", "audio-player")
|
128
127
|
end
|
129
128
|
when "quote"
|
130
|
-
title
|
129
|
+
title = post["quote-text"]
|
131
130
|
content = "<blockquote>#{post["quote-text"]}</blockquote>"
|
132
|
-
unless post["quote-source"].nil?
|
133
|
-
content << "—" + post["quote-source"]
|
134
|
-
end
|
131
|
+
content << "—#{post["quote-source"]}" unless post["quote-source"].nil?
|
135
132
|
when "conversation"
|
136
|
-
title
|
133
|
+
title = post["conversation-title"]
|
137
134
|
content = "<section><dialog>"
|
138
135
|
post["conversation"].each do |line|
|
139
136
|
content << "<dt>#{line["label"]}</dt><dd>#{line["phrase"]}</dd>"
|
140
137
|
end
|
141
138
|
content << "</dialog></section>"
|
142
139
|
when "video"
|
143
|
-
title = post
|
144
|
-
content = post["video-player"]
|
140
|
+
title, content = post.values_at("video-title", "video-player")
|
145
141
|
unless post["video-caption"].nil?
|
146
142
|
if content
|
147
|
-
content << "<br
|
143
|
+
content << "<br/>#{post["video-caption"]}"
|
148
144
|
else
|
149
145
|
content = post["video-caption"]
|
150
146
|
end
|
151
147
|
end
|
152
148
|
when "answer"
|
153
|
-
title = post
|
154
|
-
content = post["answer"]
|
149
|
+
title, content = post.values_at("question", "answer")
|
155
150
|
end
|
156
|
-
|
151
|
+
|
152
|
+
date = Date.parse(post["date"]).to_s
|
157
153
|
title = Nokogiri::HTML(title).text
|
158
154
|
title = "no title" if title.empty?
|
159
|
-
slug
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
155
|
+
slug = if post["slug"] && post["slug"].strip != ""
|
156
|
+
post["slug"]
|
157
|
+
elsif title && title.downcase.gsub(%r![^a-z0-9\-]!, "") != "" && title != "no title"
|
158
|
+
slug = title.downcase.strip.tr(" ", "-").gsub(%r![^a-z0-9\-]!, "")
|
159
|
+
slug.length > 200 ? slug.slice(0..200) : slug
|
160
|
+
else
|
161
|
+
post["id"]
|
162
|
+
end
|
167
163
|
{
|
168
164
|
:name => "#{date}-#{slug}.#{format}",
|
169
165
|
:header => {
|
@@ -180,8 +176,8 @@ module JekyllImport
|
|
180
176
|
end
|
181
177
|
|
182
178
|
# Attempts to fetch the largest version of a photo available for a post.
|
183
|
-
# If that file fails, it tries the next smaller size until all available
|
184
|
-
#
|
179
|
+
# If that file fails, it tries the next smaller size until all available photo URLs are
|
180
|
+
# exhausted. If they all fail, the import is aborted.
|
185
181
|
def fetch_photo(post)
|
186
182
|
sizes = post.keys.map { |k| k.gsub("photo-url-", "").to_i }
|
187
183
|
sizes.sort! { |a, b| b <=> a }
|
@@ -194,30 +190,30 @@ module JekyllImport
|
|
194
190
|
sizes.each do |size|
|
195
191
|
url = post["photo-url"] || post["photo-url-#{size}"]
|
196
192
|
next if url.nil?
|
193
|
+
|
197
194
|
begin
|
198
|
-
return "<img src=\"#{save_photo(url, ext)}\"/>"
|
195
|
+
return +"<img src=\"#{save_photo(url, ext)}\"/>"
|
199
196
|
rescue OpenURI::HTTPError
|
200
|
-
|
197
|
+
Jekyll.logger.warn "Failed to grab photo"
|
201
198
|
end
|
202
199
|
end
|
203
200
|
|
204
201
|
abort "Failed to fetch photo for post #{post["url"]}"
|
205
202
|
end
|
206
203
|
|
207
|
-
# Create a Hash of old urls => new urls, for rewriting and
|
208
|
-
#
|
209
|
-
# site/posts to get the correct permalink format.
|
204
|
+
# Create a Hash of old urls => new urls, for rewriting and redirects, and replace urls in
|
205
|
+
# each post. Instantiate Jekyll site/posts to get the correct permalink format.
|
210
206
|
def rewrite_urls_and_redirects(posts)
|
211
207
|
site = Jekyll::Site.new(Jekyll.configuration({}))
|
212
208
|
urls = Hash[posts.map do |post|
|
213
|
-
# Create an initial empty file for the post so that
|
214
|
-
|
215
|
-
File.write(
|
209
|
+
# Create an initial empty file for the post so that we can instantiate a post object.
|
210
|
+
relative_path = "_posts/tumblr/#{post[:name]}"
|
211
|
+
File.write(relative_path, "")
|
216
212
|
tumblr_url = URI.parse(URI.encode(post[:slug])).path
|
217
213
|
jekyll_url = if Jekyll.const_defined? :Post
|
218
|
-
Jekyll::Post.new(site,
|
214
|
+
Jekyll::Post.new(site, site.source, "", "tumblr/#{post[:name]}").url
|
219
215
|
else
|
220
|
-
Jekyll::Document.new(
|
216
|
+
Jekyll::Document.new(site.in_source_dir(relative_path), :site => site, :collection => site.posts).url
|
221
217
|
end
|
222
218
|
redirect_dir = tumblr_url.sub(%r!\/!, "") + "/"
|
223
219
|
FileUtils.mkdir_p redirect_dir
|
@@ -240,63 +236,61 @@ module JekyllImport
|
|
240
236
|
def html_to_markdown(content)
|
241
237
|
preserve = %w(table tr th td)
|
242
238
|
preserve.each do |tag|
|
243
|
-
content.gsub!(%r!<#{tag}!i, "
|
244
|
-
content.gsub!(%r!<\/#{tag}!i, "
|
239
|
+
content.gsub!(%r!<#{tag}!i, "$$#{tag}")
|
240
|
+
content.gsub!(%r!<\/#{tag}!i, "||#{tag}")
|
245
241
|
end
|
246
242
|
content = Nokogiri::HTML(content.gsub("'", "''")).text
|
247
243
|
preserve.each do |tag|
|
248
|
-
content.gsub!("
|
249
|
-
content.gsub!("
|
244
|
+
content.gsub!("$$#{tag}", "<#{tag}")
|
245
|
+
content.gsub!("||#{tag}", "</#{tag}")
|
250
246
|
end
|
251
247
|
content
|
252
248
|
end
|
253
249
|
|
254
|
-
# Adds pygments highlight tags to code blocks in posts that use
|
255
|
-
#
|
256
|
-
#
|
257
|
-
# For example, my code block only contain Python and JavaScript,
|
258
|
-
#
|
259
|
-
# semi-colon.
|
250
|
+
# Adds pygments highlight tags to code blocks in posts that use markdown format.
|
251
|
+
# This doesn't guess the language of the code block, so you should modify this to suit your
|
252
|
+
# own content.
|
253
|
+
# For example, my code block only contain Python and JavaScript, so I can assume the block
|
254
|
+
# is JavaScript if it contains a semi-colon.
|
260
255
|
def add_syntax_highlights(content, redirect_dir)
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
256
|
+
lines = content.split("\n")
|
257
|
+
block = false
|
258
|
+
indent = %r!^ !
|
259
|
+
lang = nil
|
260
|
+
start = nil
|
261
|
+
lines.each_with_index do |line, i|
|
262
|
+
if !block && line =~ indent
|
263
|
+
block = true
|
264
|
+
lang = "python"
|
265
|
+
start = i
|
266
|
+
elsif block
|
267
|
+
lang = "javascript" if line =~ %r!;$!
|
268
|
+
block = line =~ indent && i < lines.size - 1 # Also handle EOF
|
269
|
+
unless block
|
270
|
+
lines[start] = "{% highlight #{lang} %}"
|
271
|
+
lines[i - 1] = "{% endhighlight %}"
|
272
|
+
end
|
273
|
+
FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
|
274
|
+
lines[i] = lines[i].sub(indent, "")
|
277
275
|
end
|
278
|
-
FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
|
279
|
-
lines[i] = lines[i].sub(indent, "")
|
280
276
|
end
|
277
|
+
lines.join("\n")
|
281
278
|
end
|
282
|
-
lines.join("\n")
|
283
|
-
end
|
284
279
|
|
285
280
|
def save_photo(url, ext)
|
286
|
-
|
281
|
+
return url unless @grab_images
|
282
|
+
|
287
283
|
path = "tumblr_files/#{url.split("/").last}"
|
288
284
|
path += ext unless path =~ %r!#{ext}$!
|
289
285
|
FileUtils.mkdir_p "tumblr_files"
|
290
286
|
|
291
287
|
# Don't fetch if we've already cached this file
|
292
288
|
unless File.size? path
|
293
|
-
|
294
|
-
File.open(path, "wb") { |f| f.write(
|
289
|
+
Jekyll.logger.info "Fetching photo #{url}"
|
290
|
+
File.open(path, "wb") { |f| f.write(URI.parse(url).read) }
|
295
291
|
end
|
296
|
-
|
292
|
+
"/#{path}"
|
297
293
|
end
|
298
|
-
url
|
299
|
-
end
|
300
294
|
end
|
301
295
|
end
|
302
296
|
end
|
@@ -1,21 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module JekyllImport
|
2
4
|
module Importers
|
3
5
|
class Typo < Importer
|
4
6
|
# This SQL *should* work for both MySQL and PostgreSQL.
|
5
|
-
SQL =
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
7
|
+
SQL = <<~SQL
|
8
|
+
SELECT c.id id,
|
9
|
+
c.title title,
|
10
|
+
c.permalink slug,
|
11
|
+
c.body body,
|
12
|
+
c.extended extended,
|
13
|
+
c.published_at date,
|
14
|
+
c.state state,
|
15
|
+
c.keywords keywords,
|
16
|
+
COALESCE(tf.name, 'html') filter
|
17
|
+
FROM contents c
|
18
|
+
LEFT OUTER JOIN text_filters tf
|
19
|
+
ON c.text_filter_id = tf.id
|
20
|
+
SQL
|
19
21
|
|
20
22
|
def self.require_deps
|
21
23
|
JekyllImport.require_with_fallback(%w(
|
@@ -29,11 +31,11 @@ module JekyllImport
|
|
29
31
|
end
|
30
32
|
|
31
33
|
def self.specify_options(c)
|
32
|
-
c.option "server",
|
33
|
-
c.option "dbname",
|
34
|
-
c.option "user",
|
34
|
+
c.option "server", "--server TYPE", 'Server type ("mysql" or "postgres")'
|
35
|
+
c.option "dbname", "--dbname DB", "Database name"
|
36
|
+
c.option "user", "--user USER", "Database user name"
|
35
37
|
c.option "password", "--password PW", "Database user's password (default: '')"
|
36
|
-
c.option "host",
|
38
|
+
c.option "host", "--host HOST", "Database host name"
|
37
39
|
end
|
38
40
|
|
39
41
|
def self.process(options)
|
@@ -55,19 +57,19 @@ module JekyllImport
|
|
55
57
|
db[SQL].each do |post|
|
56
58
|
next unless post[:state] =~ %r!published!i
|
57
59
|
|
58
|
-
if post[:slug].nil?
|
59
|
-
post[:slug] = "no slug"
|
60
|
-
end
|
60
|
+
post[:slug] = "no slug" if post[:slug].nil?
|
61
61
|
|
62
62
|
if post[:extended]
|
63
63
|
post[:body] << "\n<!-- more -->\n"
|
64
64
|
post[:body] << post[:extended]
|
65
65
|
end
|
66
66
|
|
67
|
-
name = [
|
68
|
-
|
69
|
-
|
70
|
-
|
67
|
+
name = [
|
68
|
+
format("%.04d", post[:date].year),
|
69
|
+
format("%.02d", post[:date].month),
|
70
|
+
format("%.02d", post[:date].day),
|
71
|
+
post[:slug].strip,
|
72
|
+
].join("-")
|
71
73
|
|
72
74
|
# Can have more than one text filter in this field, but we just want
|
73
75
|
# the first one for this.
|
@@ -75,8 +77,8 @@ module JekyllImport
|
|
75
77
|
|
76
78
|
File.open("_posts/#{name}", "w") do |f|
|
77
79
|
f.puts({ "layout" => "post",
|
78
|
-
"title" => (post[:title]
|
79
|
-
"tags" => (post[:keywords]
|
80
|
+
"title" => (post[:title]&.to_s&.force_encoding("UTF-8")),
|
81
|
+
"tags" => (post[:keywords]&.to_s&.force_encoding("UTF-8")),
|
80
82
|
"typo_id" => post[:id], }.delete_if { |_k, v| v.nil? || v == "" }.to_yaml)
|
81
83
|
f.puts "---"
|
82
84
|
f.puts post[:body].delete("\r")
|