bunto-import 2.0.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +21 -21
- data/README.markdown +33 -33
- data/lib/bunto-import.rb +49 -49
- data/lib/bunto-import/importer.rb +26 -26
- data/lib/bunto-import/importers.rb +10 -10
- data/lib/bunto-import/importers/behance.rb +80 -80
- data/lib/bunto-import/importers/blogger.rb +330 -264
- data/lib/bunto-import/importers/csv.rb +96 -96
- data/lib/bunto-import/importers/drupal6.rb +53 -139
- data/lib/bunto-import/importers/drupal7.rb +54 -111
- data/lib/bunto-import/importers/drupal_common.rb +157 -0
- data/lib/bunto-import/importers/easyblog.rb +96 -96
- data/lib/bunto-import/importers/enki.rb +74 -74
- data/lib/bunto-import/importers/ghost.rb +68 -68
- data/lib/bunto-import/importers/google_reader.rb +64 -64
- data/lib/bunto-import/importers/joomla.rb +92 -90
- data/lib/bunto-import/importers/joomla3.rb +91 -91
- data/lib/bunto-import/importers/jrnl.rb +125 -125
- data/lib/bunto-import/importers/marley.rb +72 -72
- data/lib/bunto-import/importers/mephisto.rb +99 -99
- data/lib/bunto-import/importers/mt.rb +257 -257
- data/lib/bunto-import/importers/posterous.rb +130 -130
- data/lib/bunto-import/importers/rss.rb +62 -62
- data/lib/bunto-import/importers/s9y.rb +60 -60
- data/lib/bunto-import/importers/s9y_database.rb +363 -0
- data/lib/bunto-import/importers/textpattern.rb +70 -70
- data/lib/bunto-import/importers/tumblr.rb +300 -289
- data/lib/bunto-import/importers/typo.rb +88 -88
- data/lib/bunto-import/importers/wordpress.rb +372 -372
- data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
- data/lib/bunto-import/util.rb +76 -76
- data/lib/bunto-import/version.rb +3 -3
- data/lib/bunto/commands/import.rb +79 -79
- metadata +84 -54
@@ -1,289 +1,300 @@
|
|
1
|
-
module BuntoImport
|
2
|
-
module Importers
|
3
|
-
class Tumblr < Importer
|
4
|
-
def self.require_deps
|
5
|
-
BuntoImport.require_with_fallback(%w[
|
6
|
-
rubygems
|
7
|
-
fileutils
|
8
|
-
open-uri
|
9
|
-
nokogiri
|
10
|
-
json
|
11
|
-
uri
|
12
|
-
time
|
13
|
-
bunto
|
14
|
-
])
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.specify_options(c)
|
18
|
-
c.option 'url', '--url URL', 'Tumblr URL'
|
19
|
-
c.option 'format', '--format FORMAT', 'Output format (default: "html")'
|
20
|
-
c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
|
21
|
-
c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
|
22
|
-
c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.process(options)
|
26
|
-
url = options.fetch('url')
|
27
|
-
format = options.fetch('format', "html")
|
28
|
-
grab_images = options.fetch('grab_images', false)
|
29
|
-
add_highlights = options.fetch('add_highlights', false)
|
30
|
-
rewrite_urls = options.fetch('rewrite_urls', false)
|
31
|
-
|
32
|
-
@grab_images = grab_images
|
33
|
-
FileUtils.mkdir_p "_posts/tumblr"
|
34
|
-
url += "/api/read/json/"
|
35
|
-
per_page = 50
|
36
|
-
posts = []
|
37
|
-
# Two passes are required so that we can rewrite URLs.
|
38
|
-
# First pass builds up an array of each post as a hash.
|
39
|
-
begin
|
40
|
-
current_page = (current_page || -1) + 1
|
41
|
-
feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
|
42
|
-
puts "Fetching #{feed_url}"
|
43
|
-
feed = open(feed_url)
|
44
|
-
|
45
|
-
blog =
|
46
|
-
puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
|
47
|
-
batch = blog["posts"].map { |post| post_to_hash(post, format) }
|
48
|
-
|
49
|
-
# If we're rewriting, save the posts for later. Otherwise, go ahead and
|
50
|
-
# dump these to disk now
|
51
|
-
if rewrite_urls
|
52
|
-
posts += batch
|
53
|
-
else
|
54
|
-
batch.each {|post| write_post(post, format == "md", add_highlights)}
|
55
|
-
end
|
56
|
-
|
57
|
-
end until blog["posts"].size < per_page
|
58
|
-
|
59
|
-
# Rewrite URLs, create redirects and write out out posts if necessary
|
60
|
-
if rewrite_urls
|
61
|
-
posts = rewrite_urls_and_redirects posts
|
62
|
-
posts.each {|post| write_post(post, format == "md", add_highlights)}
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
private
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
content =
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
post["
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
when "
|
137
|
-
title = post["
|
138
|
-
content =
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
},
|
170
|
-
:
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
sizes.
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
1
|
+
module BuntoImport
|
2
|
+
module Importers
|
3
|
+
class Tumblr < Importer
|
4
|
+
def self.require_deps
|
5
|
+
BuntoImport.require_with_fallback(%w[
|
6
|
+
rubygems
|
7
|
+
fileutils
|
8
|
+
open-uri
|
9
|
+
nokogiri
|
10
|
+
json
|
11
|
+
uri
|
12
|
+
time
|
13
|
+
bunto
|
14
|
+
])
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.specify_options(c)
|
18
|
+
c.option 'url', '--url URL', 'Tumblr URL'
|
19
|
+
c.option 'format', '--format FORMAT', 'Output format (default: "html")'
|
20
|
+
c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
|
21
|
+
c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
|
22
|
+
c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.process(options)
|
26
|
+
url = options.fetch('url')
|
27
|
+
format = options.fetch('format', "html")
|
28
|
+
grab_images = options.fetch('grab_images', false)
|
29
|
+
add_highlights = options.fetch('add_highlights', false)
|
30
|
+
rewrite_urls = options.fetch('rewrite_urls', false)
|
31
|
+
|
32
|
+
@grab_images = grab_images
|
33
|
+
FileUtils.mkdir_p "_posts/tumblr"
|
34
|
+
url += "/api/read/json/"
|
35
|
+
per_page = 50
|
36
|
+
posts = []
|
37
|
+
# Two passes are required so that we can rewrite URLs.
|
38
|
+
# First pass builds up an array of each post as a hash.
|
39
|
+
begin
|
40
|
+
current_page = (current_page || -1) + 1
|
41
|
+
feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
|
42
|
+
puts "Fetching #{feed_url}"
|
43
|
+
feed = open(feed_url)
|
44
|
+
contents = feed.readlines.join("\n")
|
45
|
+
blog = extract_json(contents)
|
46
|
+
puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
|
47
|
+
batch = blog["posts"].map { |post| post_to_hash(post, format) }
|
48
|
+
|
49
|
+
# If we're rewriting, save the posts for later. Otherwise, go ahead and
|
50
|
+
# dump these to disk now
|
51
|
+
if rewrite_urls
|
52
|
+
posts += batch
|
53
|
+
else
|
54
|
+
batch.each {|post| write_post(post, format == "md", add_highlights)}
|
55
|
+
end
|
56
|
+
|
57
|
+
end until blog["posts"].size < per_page
|
58
|
+
|
59
|
+
# Rewrite URLs, create redirects and write out out posts if necessary
|
60
|
+
if rewrite_urls
|
61
|
+
posts = rewrite_urls_and_redirects posts
|
62
|
+
posts.each {|post| write_post(post, format == "md", add_highlights)}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def self.extract_json(contents)
|
69
|
+
beginning = contents.index("{")
|
70
|
+
ending = contents.rindex("}")+1
|
71
|
+
json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
|
72
|
+
blog = JSON.parse(json)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Writes a post out to disk
|
76
|
+
def self.write_post(post, use_markdown, add_highlights)
|
77
|
+
content = post[:content]
|
78
|
+
|
79
|
+
if content
|
80
|
+
if use_markdown
|
81
|
+
content = html_to_markdown content
|
82
|
+
if add_highlights
|
83
|
+
tumblr_url = URI.parse(post[:slug]).path
|
84
|
+
redirect_dir = tumblr_url.sub(/\//, "") + "/"
|
85
|
+
FileUtils.mkdir_p redirect_dir
|
86
|
+
content = add_syntax_highlights(content, redirect_dir)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
|
91
|
+
f.puts post[:header].to_yaml + "---\n" + content
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Converts each type of Tumblr post to a hash with all required
|
97
|
+
# data for Bunto.
|
98
|
+
def self.post_to_hash(post, format)
|
99
|
+
case post['type']
|
100
|
+
when "regular"
|
101
|
+
title = post["regular-title"]
|
102
|
+
content = post["regular-body"]
|
103
|
+
when "link"
|
104
|
+
title = post["link-text"] || post["link-url"]
|
105
|
+
content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
|
106
|
+
unless post["link-description"].nil?
|
107
|
+
content << "<br/>" + post["link-description"]
|
108
|
+
end
|
109
|
+
when "photo"
|
110
|
+
title = post["slug"].gsub("-"," ")
|
111
|
+
if post["photos"].size > 1
|
112
|
+
content = ""
|
113
|
+
post["photos"].each do |post_photo|
|
114
|
+
photo = fetch_photo post_photo
|
115
|
+
content << photo + "<br/>"
|
116
|
+
content << post_photo["caption"]
|
117
|
+
end
|
118
|
+
else
|
119
|
+
content = fetch_photo post
|
120
|
+
end
|
121
|
+
content << "<br/>" + post["photo-caption"]
|
122
|
+
when "audio"
|
123
|
+
if !post["id3-title"].nil?
|
124
|
+
title = post["id3-title"]
|
125
|
+
content = post["audio-player"] + "<br/>" + post["audio-caption"]
|
126
|
+
else
|
127
|
+
title = post["audio-caption"]
|
128
|
+
content = post["audio-player"]
|
129
|
+
end
|
130
|
+
when "quote"
|
131
|
+
title = post["quote-text"]
|
132
|
+
content = "<blockquote>#{post["quote-text"]}</blockquote>"
|
133
|
+
unless post["quote-source"].nil?
|
134
|
+
content << "—" + post["quote-source"]
|
135
|
+
end
|
136
|
+
when "conversation"
|
137
|
+
title = post["conversation-title"]
|
138
|
+
content = "<section><dialog>"
|
139
|
+
post["conversation"].each do |line|
|
140
|
+
content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
|
141
|
+
end
|
142
|
+
content << "</dialog></section>"
|
143
|
+
when "video"
|
144
|
+
title = post["video-title"]
|
145
|
+
content = post["video-player"]
|
146
|
+
unless post["video-caption"].nil?
|
147
|
+
if content
|
148
|
+
content << "<br/>" + post["video-caption"]
|
149
|
+
else
|
150
|
+
content = post["video-caption"]
|
151
|
+
end
|
152
|
+
end
|
153
|
+
when "answer"
|
154
|
+
title = post["question"]
|
155
|
+
content = post["answer"]
|
156
|
+
end
|
157
|
+
date = Date.parse(post['date']).to_s
|
158
|
+
title = Nokogiri::HTML(title).text
|
159
|
+
title = "no title" if title.empty?
|
160
|
+
slug = if post["slug"] && post["slug"].strip != ""
|
161
|
+
post["slug"]
|
162
|
+
elsif title && title.downcase.gsub(/[^a-z0-9\-]/, '') != '' && title != 'no title'
|
163
|
+
slug = title.downcase.strip.gsub(' ', '-').gsub(/[^a-z0-9\-]/, '')
|
164
|
+
slug.length > 200 ? slug.slice(0..200) : slug
|
165
|
+
else
|
166
|
+
slug = post['id']
|
167
|
+
end
|
168
|
+
{
|
169
|
+
:name => "#{date}-#{slug}.#{format}",
|
170
|
+
:header => {
|
171
|
+
"layout" => "post",
|
172
|
+
"title" => title,
|
173
|
+
"date" => Time.parse(post['date']).xmlschema,
|
174
|
+
"tags" => (post["tags"] or []),
|
175
|
+
"tumblr_url" => post["url-with-slug"]
|
176
|
+
},
|
177
|
+
:content => content,
|
178
|
+
:url => post["url"],
|
179
|
+
:slug => post["url-with-slug"],
|
180
|
+
}
|
181
|
+
end
|
182
|
+
|
183
|
+
# Attempts to fetch the largest version of a photo available for a post.
|
184
|
+
# If that file fails, it tries the next smaller size until all available
|
185
|
+
# photo URLs are exhausted. If they all fail, the import is aborted.
|
186
|
+
def self.fetch_photo(post)
|
187
|
+
sizes = post.keys.map {|k| k.gsub("photo-url-", "").to_i}
|
188
|
+
sizes.sort! {|a,b| b <=> a}
|
189
|
+
|
190
|
+
ext_key, ext_val = post.find do |k,v|
|
191
|
+
k =~ /^photo-url-/ && v.split("/").last =~ /\./
|
192
|
+
end
|
193
|
+
ext = "." + ext_val.split(".").last
|
194
|
+
|
195
|
+
sizes.each do |size|
|
196
|
+
url = post["photo-url"] || post["photo-url-#{size}"]
|
197
|
+
next if url.nil?
|
198
|
+
begin
|
199
|
+
return "<img src=\"#{save_photo(url, ext)}\"/>"
|
200
|
+
rescue OpenURI::HTTPError => err
|
201
|
+
puts "Failed to grab photo"
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
abort "Failed to fetch photo for post #{post['url']}"
|
206
|
+
end
|
207
|
+
|
208
|
+
# Create a Hash of old urls => new urls, for rewriting and
|
209
|
+
# redirects, and replace urls in each post. Instantiate Bunto
|
210
|
+
# site/posts to get the correct permalink format.
|
211
|
+
def self.rewrite_urls_and_redirects(posts)
|
212
|
+
site = Bunto::Site.new(Bunto.configuration({}))
|
213
|
+
urls = Hash[posts.map { |post|
|
214
|
+
# Create an initial empty file for the post so that
|
215
|
+
# we can instantiate a post object.
|
216
|
+
tumblr_url = URI.parse(URI.encode(post[:slug])).path
|
217
|
+
bunto_url = if Bunto.const_defined? :Post
|
218
|
+
File.open("_posts/tumblr/#{post[:name]}", "w") { |f| f.puts }
|
219
|
+
Bunto::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
|
220
|
+
else
|
221
|
+
Bunto::Document.new(File.expand_path("tumblr/#{post[:name]}"), site: site, collection: site.posts).url
|
222
|
+
end
|
223
|
+
redirect_dir = tumblr_url.sub(/\//, "") + "/"
|
224
|
+
FileUtils.mkdir_p redirect_dir
|
225
|
+
File.open(redirect_dir + "index.html", "w") do |f|
|
226
|
+
f.puts "<html><head><link rel=\"canonical\" href=\"" +
|
227
|
+
"#{bunto_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
|
228
|
+
"url=#{bunto_url}\"></head><body></body></html>"
|
229
|
+
end
|
230
|
+
[tumblr_url, bunto_url]
|
231
|
+
}]
|
232
|
+
posts.map { |post|
|
233
|
+
urls.each do |tumblr_url, bunto_url|
|
234
|
+
post[:content].gsub!(/#{tumblr_url}/i, bunto_url)
|
235
|
+
end
|
236
|
+
post
|
237
|
+
}
|
238
|
+
end
|
239
|
+
|
240
|
+
# Convert preserving HTML tables as per the markdown docs.
|
241
|
+
def self.html_to_markdown(content)
|
242
|
+
preserve = ["table", "tr", "th", "td"]
|
243
|
+
preserve.each do |tag|
|
244
|
+
content.gsub!(/<#{tag}/i, "$$" + tag)
|
245
|
+
content.gsub!(/<\/#{tag}/i, "||" + tag)
|
246
|
+
end
|
247
|
+
content = Nokogiri::HTML(content.gsub("'", "''")).text
|
248
|
+
preserve.each do |tag|
|
249
|
+
content.gsub!("$$" + tag, "<" + tag)
|
250
|
+
content.gsub!("||" + tag, "</" + tag)
|
251
|
+
end
|
252
|
+
content
|
253
|
+
end
|
254
|
+
|
255
|
+
# Adds pygments highlight tags to code blocks in posts that use
|
256
|
+
# markdown format. This doesn't guess the language of the code
|
257
|
+
# block, so you should modify this to suit your own content.
|
258
|
+
# For example, my code block only contain Python and JavaScript,
|
259
|
+
# so I can assume the block is JavaScript if it contains a
|
260
|
+
# semi-colon.
|
261
|
+
def self.add_syntax_highlights(content, redirect_dir)
|
262
|
+
lines = content.split("\n")
|
263
|
+
block, indent, lang, start = false, /^ /, nil, nil
|
264
|
+
lines.each_with_index do |line, i|
|
265
|
+
if !block && line =~ indent
|
266
|
+
block = true
|
267
|
+
lang = "python"
|
268
|
+
start = i
|
269
|
+
elsif block
|
270
|
+
lang = "javascript" if line =~ /;$/
|
271
|
+
block = line =~ indent && i < lines.size - 1 # Also handle EOF
|
272
|
+
if !block
|
273
|
+
lines[start] = "{% highlight #{lang} %}"
|
274
|
+
lines[i - 1] = "{% endhighlight %}"
|
275
|
+
end
|
276
|
+
FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
|
277
|
+
lines[i] = lines[i].sub(indent, "")
|
278
|
+
end
|
279
|
+
end
|
280
|
+
lines.join("\n")
|
281
|
+
end
|
282
|
+
|
283
|
+
def self.save_photo(url, ext)
|
284
|
+
if @grab_images
|
285
|
+
path = "tumblr_files/#{url.split('/').last}"
|
286
|
+
path += ext unless path =~ /#{ext}$/
|
287
|
+
FileUtils.mkdir_p "tumblr_files"
|
288
|
+
|
289
|
+
# Don't fetch if we've already cached this file
|
290
|
+
unless File.size? path
|
291
|
+
puts "Fetching photo #{url}"
|
292
|
+
File.open(path, "w") { |f| f.write(open(url).read) }
|
293
|
+
end
|
294
|
+
url = "/" + path
|
295
|
+
end
|
296
|
+
url
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|