bunto-import 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.markdown +32 -0
- data/lib/bunto-import.rb +49 -0
- data/lib/bunto-import/importer.rb +26 -0
- data/lib/bunto-import/importers.rb +10 -0
- data/lib/bunto-import/importers/behance.rb +80 -0
- data/lib/bunto-import/importers/blogger.rb +264 -0
- data/lib/bunto-import/importers/csv.rb +96 -0
- data/lib/bunto-import/importers/drupal6.rb +139 -0
- data/lib/bunto-import/importers/drupal7.rb +111 -0
- data/lib/bunto-import/importers/easyblog.rb +96 -0
- data/lib/bunto-import/importers/enki.rb +74 -0
- data/lib/bunto-import/importers/ghost.rb +68 -0
- data/lib/bunto-import/importers/google_reader.rb +64 -0
- data/lib/bunto-import/importers/joomla.rb +90 -0
- data/lib/bunto-import/importers/joomla3.rb +91 -0
- data/lib/bunto-import/importers/jrnl.rb +125 -0
- data/lib/bunto-import/importers/marley.rb +72 -0
- data/lib/bunto-import/importers/mephisto.rb +99 -0
- data/lib/bunto-import/importers/mt.rb +257 -0
- data/lib/bunto-import/importers/posterous.rb +130 -0
- data/lib/bunto-import/importers/rss.rb +62 -0
- data/lib/bunto-import/importers/s9y.rb +60 -0
- data/lib/bunto-import/importers/textpattern.rb +70 -0
- data/lib/bunto-import/importers/tumblr.rb +289 -0
- data/lib/bunto-import/importers/typo.rb +88 -0
- data/lib/bunto-import/importers/wordpress.rb +372 -0
- data/lib/bunto-import/importers/wordpressdotcom.rb +207 -0
- data/lib/bunto-import/util.rb +76 -0
- data/lib/bunto-import/version.rb +3 -0
- data/lib/bunto/commands/import.rb +79 -0
- metadata +374 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
module BuntoImport
|
2
|
+
module Importers
|
3
|
+
class Typo < Importer
|
4
|
+
# This SQL *should* work for both MySQL and PostgreSQL.
|
5
|
+
SQL = <<-EOS
|
6
|
+
SELECT c.id id,
|
7
|
+
c.title title,
|
8
|
+
c.permalink slug,
|
9
|
+
c.body body,
|
10
|
+
c.extended extended,
|
11
|
+
c.published_at date,
|
12
|
+
c.state state,
|
13
|
+
c.keywords keywords,
|
14
|
+
COALESCE(tf.name, 'html') filter
|
15
|
+
FROM contents c
|
16
|
+
LEFT OUTER JOIN text_filters tf
|
17
|
+
ON c.text_filter_id = tf.id
|
18
|
+
EOS
|
19
|
+
|
20
|
+
def self.require_deps
|
21
|
+
BuntoImport.require_with_fallback(%w[
|
22
|
+
rubygems
|
23
|
+
sequel
|
24
|
+
fileutils
|
25
|
+
safe_yaml
|
26
|
+
])
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.specify_options(c)
|
30
|
+
c.option 'server', '--server TYPE', 'Server type ("mysql" or "postgres")'
|
31
|
+
c.option 'dbname', '--dbname DB', 'Database name'
|
32
|
+
c.option 'user', '--user USER', 'Database user name'
|
33
|
+
c.option 'password', '--password PW', "Database user's password (default: '')"
|
34
|
+
c.option 'host', '--host HOST', 'Database host name'
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.process(options)
|
38
|
+
server = options.fetch('server')
|
39
|
+
dbname = options.fetch('dbname')
|
40
|
+
user = options.fetch('user')
|
41
|
+
pass = options.fetch('password', '')
|
42
|
+
host = options.fetch('host', "localhost")
|
43
|
+
|
44
|
+
FileUtils.mkdir_p '_posts'
|
45
|
+
case server.intern
|
46
|
+
when :postgres
|
47
|
+
db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
48
|
+
when :mysql
|
49
|
+
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
50
|
+
else
|
51
|
+
raise "Unknown database server '#{server}'"
|
52
|
+
end
|
53
|
+
db[SQL].each do |post|
|
54
|
+
next unless post[:state] =~ /published/i
|
55
|
+
|
56
|
+
if post[:slug] == nil
|
57
|
+
post[:slug] = "no slug"
|
58
|
+
end
|
59
|
+
|
60
|
+
if post[:extended]
|
61
|
+
post[:body] << "\n<!-- more -->\n"
|
62
|
+
post[:body] << post[:extended]
|
63
|
+
end
|
64
|
+
|
65
|
+
name = [ sprintf("%.04d", post[:date].year),
|
66
|
+
sprintf("%.02d", post[:date].month),
|
67
|
+
sprintf("%.02d", post[:date].day),
|
68
|
+
post[:slug].strip ].join('-')
|
69
|
+
|
70
|
+
# Can have more than one text filter in this field, but we just want
|
71
|
+
# the first one for this.
|
72
|
+
name += '.' + post[:filter].split(' ')[0]
|
73
|
+
|
74
|
+
File.open("_posts/#{name}", 'w') do |f|
|
75
|
+
f.puts({ 'layout' => 'post',
|
76
|
+
'title' => (post[:title] and post[:title].to_s.force_encoding('UTF-8')),
|
77
|
+
'tags' => (post[:keywords] and post[:keywords].to_s.force_encoding('UTF-8')),
|
78
|
+
'typo_id' => post[:id]
|
79
|
+
}.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
|
80
|
+
f.puts '---'
|
81
|
+
f.puts post[:body].delete("\r")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,372 @@
|
|
1
|
+
module BuntoImport
|
2
|
+
module Importers
|
3
|
+
class WordPress < Importer
|
4
|
+
|
5
|
+
def self.require_deps
|
6
|
+
BuntoImport.require_with_fallback(%w[
|
7
|
+
rubygems
|
8
|
+
sequel
|
9
|
+
fileutils
|
10
|
+
safe_yaml
|
11
|
+
unidecode
|
12
|
+
])
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.specify_options(c)
|
16
|
+
c.option 'dbname', '--dbname DB', 'Database name (default: "")'
|
17
|
+
c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
|
18
|
+
c.option 'user', '--user USER', 'Database user name (default: "")'
|
19
|
+
c.option 'password', '--password PW', "Database user's password (default: "")"
|
20
|
+
c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
|
21
|
+
c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "wp_")'
|
22
|
+
c.option 'site_prefix', '--site_prefix PREFIX', 'Site prefix name (default: "")'
|
23
|
+
c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
|
24
|
+
c.option 'comments', '--comments', 'Whether to import comments (default: true)'
|
25
|
+
c.option 'categories', '--categories', 'Whether to import categories (default: true)'
|
26
|
+
c.option 'tags', '--tags', 'Whether to import tags (default: true)'
|
27
|
+
c.option 'more_excerpt', '--more_excerpt', 'Whether to use more excerpt (default: true)'
|
28
|
+
c.option 'more_anchor', '--more_anchor', 'Whether to use more anchor (default: true)'
|
29
|
+
c.option 'status', '--status STATUS,STATUS2', Array, 'Array of allowed statuses (default: ["publish"], other options: "draft", "private", "revision")'
|
30
|
+
end
|
31
|
+
|
32
|
+
# Main migrator function. Call this to perform the migration.
|
33
|
+
#
|
34
|
+
# dbname:: The name of the database
|
35
|
+
# user:: The database user name
|
36
|
+
# pass:: The database user's password
|
37
|
+
# host:: The address of the MySQL database host. Default: 'localhost'
|
38
|
+
# socket:: The database socket's path
|
39
|
+
# options:: A hash table of configuration options.
|
40
|
+
#
|
41
|
+
# Supported options are:
|
42
|
+
#
|
43
|
+
# :table_prefix:: Prefix of database tables used by WordPress.
|
44
|
+
# Default: 'wp_'
|
45
|
+
# :site_prefix:: Prefix of database tables used by WordPress
|
46
|
+
# Multisite, eg: 2_.
|
47
|
+
# Default: ''
|
48
|
+
# :clean_entities:: If true, convert non-ASCII characters to HTML
|
49
|
+
# entities in the posts, comments, titles, and
|
50
|
+
# names. Requires the 'htmlentities' gem to
|
51
|
+
# work. Default: true.
|
52
|
+
# :comments:: If true, migrate post comments too. Comments
|
53
|
+
# are saved in the post's YAML front matter.
|
54
|
+
# Default: true.
|
55
|
+
# :categories:: If true, save the post's categories in its
|
56
|
+
# YAML front matter. Default: true.
|
57
|
+
# :tags:: If true, save the post's tags in its
|
58
|
+
# YAML front matter. Default: true.
|
59
|
+
# :more_excerpt:: If true, when a post has no excerpt but
|
60
|
+
# does have a <!-- more --> tag, use the
|
61
|
+
# preceding post content as the excerpt.
|
62
|
+
# Default: true.
|
63
|
+
# :more_anchor:: If true, convert a <!-- more --> tag into
|
64
|
+
# two HTML anchors with ids "more" and
|
65
|
+
# "more-NNN" (where NNN is the post number).
|
66
|
+
# Default: true.
|
67
|
+
# :extension:: Set the post extension. Default: "html"
|
68
|
+
# :status:: Array of allowed post statuses. Only
|
69
|
+
# posts with matching status will be migrated.
|
70
|
+
# Known statuses are :publish, :draft, :private,
|
71
|
+
# and :revision. If this is nil or an empty
|
72
|
+
# array, all posts are migrated regardless of
|
73
|
+
# status. Default: [:publish].
|
74
|
+
#
|
75
|
+
def self.process(opts)
|
76
|
+
options = {
|
77
|
+
:user => opts.fetch('user', ''),
|
78
|
+
:pass => opts.fetch('password', ''),
|
79
|
+
:host => opts.fetch('host', 'localhost'),
|
80
|
+
:socket => opts.fetch('socket', nil),
|
81
|
+
:dbname => opts.fetch('dbname', ''),
|
82
|
+
:table_prefix => opts.fetch('table_prefix', 'wp_'),
|
83
|
+
:site_prefix => opts.fetch('site_prefix', nil),
|
84
|
+
:clean_entities => opts.fetch('clean_entities', true),
|
85
|
+
:comments => opts.fetch('comments', true),
|
86
|
+
:categories => opts.fetch('categories', true),
|
87
|
+
:tags => opts.fetch('tags', true),
|
88
|
+
:more_excerpt => opts.fetch('more_excerpt', true),
|
89
|
+
:more_anchor => opts.fetch('more_anchor', true),
|
90
|
+
:extension => opts.fetch('extension', 'html'),
|
91
|
+
:status => opts.fetch('status', ['publish']).map(&:to_sym) # :draft, :private, :revision
|
92
|
+
}
|
93
|
+
|
94
|
+
if options[:clean_entities]
|
95
|
+
begin
|
96
|
+
require 'htmlentities'
|
97
|
+
rescue LoadError
|
98
|
+
STDERR.puts "Could not require 'htmlentities', so the " +
|
99
|
+
":clean_entities option is now disabled."
|
100
|
+
options[:clean_entities] = false
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
FileUtils.mkdir_p("_posts")
|
105
|
+
FileUtils.mkdir_p("_drafts") if options[:status].include? :draft
|
106
|
+
|
107
|
+
db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
|
108
|
+
:socket => options[:socket], :host => options[:host], :encoding => 'utf8')
|
109
|
+
|
110
|
+
px = options[:table_prefix]
|
111
|
+
sx = options[:site_prefix]
|
112
|
+
|
113
|
+
page_name_list = {}
|
114
|
+
|
115
|
+
page_name_query = "
|
116
|
+
SELECT
|
117
|
+
posts.ID AS `id`,
|
118
|
+
posts.post_title AS `title`,
|
119
|
+
posts.post_name AS `slug`,
|
120
|
+
posts.post_parent AS `parent`
|
121
|
+
FROM #{px}#{sx}posts AS `posts`
|
122
|
+
WHERE posts.post_type = 'page'"
|
123
|
+
|
124
|
+
db[page_name_query].each do |page|
|
125
|
+
if !page[:slug] or page[:slug].empty?
|
126
|
+
page[:slug] = sluggify(page[:title])
|
127
|
+
end
|
128
|
+
page_name_list[ page[:id] ] = {
|
129
|
+
:slug => page[:slug],
|
130
|
+
:parent => page[:parent]
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
posts_query = "
|
135
|
+
SELECT
|
136
|
+
posts.ID AS `id`,
|
137
|
+
posts.guid AS `guid`,
|
138
|
+
posts.post_type AS `type`,
|
139
|
+
posts.post_status AS `status`,
|
140
|
+
posts.post_title AS `title`,
|
141
|
+
posts.post_name AS `slug`,
|
142
|
+
posts.post_date AS `date`,
|
143
|
+
posts.post_date_gmt AS `date_gmt`,
|
144
|
+
posts.post_content AS `content`,
|
145
|
+
posts.post_excerpt AS `excerpt`,
|
146
|
+
posts.comment_count AS `comment_count`,
|
147
|
+
users.display_name AS `author`,
|
148
|
+
users.user_login AS `author_login`,
|
149
|
+
users.user_email AS `author_email`,
|
150
|
+
users.user_url AS `author_url`
|
151
|
+
FROM #{px}#{sx}posts AS `posts`
|
152
|
+
LEFT JOIN #{px}users AS `users`
|
153
|
+
ON posts.post_author = users.ID"
|
154
|
+
|
155
|
+
if options[:status] and not options[:status].empty?
|
156
|
+
status = options[:status][0]
|
157
|
+
posts_query << "
|
158
|
+
WHERE posts.post_status = '#{status.to_s}'"
|
159
|
+
options[:status][1..-1].each do |status|
|
160
|
+
posts_query << " OR
|
161
|
+
posts.post_status = '#{status.to_s}'"
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
db[posts_query].each do |post|
|
166
|
+
process_post(post, db, options, page_name_list)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
def self.process_post(post, db, options, page_name_list)
|
172
|
+
px = options[:table_prefix]
|
173
|
+
sx = options[:site_prefix]
|
174
|
+
extension = options[:extension]
|
175
|
+
|
176
|
+
title = post[:title]
|
177
|
+
if options[:clean_entities]
|
178
|
+
title = clean_entities(title)
|
179
|
+
end
|
180
|
+
|
181
|
+
slug = post[:slug]
|
182
|
+
if !slug or slug.empty?
|
183
|
+
slug = sluggify(title)
|
184
|
+
end
|
185
|
+
|
186
|
+
date = post[:date] || Time.now
|
187
|
+
name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day,
|
188
|
+
slug, extension]
|
189
|
+
content = post[:content].to_s
|
190
|
+
if options[:clean_entities]
|
191
|
+
content = clean_entities(content)
|
192
|
+
end
|
193
|
+
|
194
|
+
excerpt = post[:excerpt].to_s
|
195
|
+
|
196
|
+
more_index = content.index(/<!-- *more *-->/)
|
197
|
+
more_anchor = nil
|
198
|
+
if more_index
|
199
|
+
if options[:more_excerpt] and
|
200
|
+
(post[:excerpt].nil? or post[:excerpt].empty?)
|
201
|
+
excerpt = content[0...more_index]
|
202
|
+
end
|
203
|
+
if options[:more_anchor]
|
204
|
+
more_link = "more"
|
205
|
+
content.sub!(/<!-- *more *-->/,
|
206
|
+
"<a id=\"more\"></a>" +
|
207
|
+
"<a id=\"more-#{post[:id]}\"></a>")
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
categories = []
|
212
|
+
tags = []
|
213
|
+
|
214
|
+
if options[:categories] or options[:tags]
|
215
|
+
|
216
|
+
cquery =
|
217
|
+
"SELECT
|
218
|
+
terms.name AS `name`,
|
219
|
+
ttax.taxonomy AS `type`
|
220
|
+
FROM
|
221
|
+
#{px}#{sx}terms AS `terms`,
|
222
|
+
#{px}#{sx}term_relationships AS `trels`,
|
223
|
+
#{px}#{sx}term_taxonomy AS `ttax`
|
224
|
+
WHERE
|
225
|
+
trels.object_id = '#{post[:id]}' AND
|
226
|
+
trels.term_taxonomy_id = ttax.term_taxonomy_id AND
|
227
|
+
terms.term_id = ttax.term_id"
|
228
|
+
|
229
|
+
db[cquery].each do |term|
|
230
|
+
if options[:categories] and term[:type] == "category"
|
231
|
+
if options[:clean_entities]
|
232
|
+
categories << clean_entities(term[:name])
|
233
|
+
else
|
234
|
+
categories << term[:name]
|
235
|
+
end
|
236
|
+
elsif options[:tags] and term[:type] == "post_tag"
|
237
|
+
if options[:clean_entities]
|
238
|
+
tags << clean_entities(term[:name])
|
239
|
+
else
|
240
|
+
tags << term[:name]
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
comments = []
|
247
|
+
|
248
|
+
if options[:comments] and post[:comment_count].to_i > 0
|
249
|
+
cquery =
|
250
|
+
"SELECT
|
251
|
+
comment_ID AS `id`,
|
252
|
+
comment_author AS `author`,
|
253
|
+
comment_author_email AS `author_email`,
|
254
|
+
comment_author_url AS `author_url`,
|
255
|
+
comment_date AS `date`,
|
256
|
+
comment_date_gmt AS `date_gmt`,
|
257
|
+
comment_content AS `content`
|
258
|
+
FROM #{px}#{sx}comments
|
259
|
+
WHERE
|
260
|
+
comment_post_ID = '#{post[:id]}' AND
|
261
|
+
comment_approved != 'spam'"
|
262
|
+
|
263
|
+
|
264
|
+
db[cquery].each do |comment|
|
265
|
+
|
266
|
+
comcontent = comment[:content].to_s
|
267
|
+
if comcontent.respond_to?(:force_encoding)
|
268
|
+
comcontent.force_encoding("UTF-8")
|
269
|
+
end
|
270
|
+
if options[:clean_entities]
|
271
|
+
comcontent = clean_entities(comcontent)
|
272
|
+
end
|
273
|
+
comauthor = comment[:author].to_s
|
274
|
+
if options[:clean_entities]
|
275
|
+
comauthor = clean_entities(comauthor)
|
276
|
+
end
|
277
|
+
|
278
|
+
comments << {
|
279
|
+
'id' => comment[:id].to_i,
|
280
|
+
'author' => comauthor,
|
281
|
+
'author_email' => comment[:author_email].to_s,
|
282
|
+
'author_url' => comment[:author_url].to_s,
|
283
|
+
'date' => comment[:date].to_s,
|
284
|
+
'date_gmt' => comment[:date_gmt].to_s,
|
285
|
+
'content' => comcontent,
|
286
|
+
}
|
287
|
+
end
|
288
|
+
|
289
|
+
comments.sort!{ |a,b| a['id'] <=> b['id'] }
|
290
|
+
end
|
291
|
+
|
292
|
+
# Get the relevant fields as a hash, delete empty fields and
|
293
|
+
# convert to YAML for the header.
|
294
|
+
data = {
|
295
|
+
'layout' => post[:type].to_s,
|
296
|
+
'status' => post[:status].to_s,
|
297
|
+
'published' => post[:status].to_s == 'draft' ? nil : (post[:status].to_s == 'publish'),
|
298
|
+
'title' => title.to_s,
|
299
|
+
'author' => {
|
300
|
+
'display_name'=> post[:author].to_s,
|
301
|
+
'login' => post[:author_login].to_s,
|
302
|
+
'email' => post[:author_email].to_s,
|
303
|
+
'url' => post[:author_url].to_s,
|
304
|
+
},
|
305
|
+
'author_login' => post[:author_login].to_s,
|
306
|
+
'author_email' => post[:author_email].to_s,
|
307
|
+
'author_url' => post[:author_url].to_s,
|
308
|
+
'excerpt' => excerpt,
|
309
|
+
'more_anchor' => more_anchor,
|
310
|
+
'wordpress_id' => post[:id],
|
311
|
+
'wordpress_url' => post[:guid].to_s,
|
312
|
+
'date' => date.to_s,
|
313
|
+
'date_gmt' => post[:date_gmt].to_s,
|
314
|
+
'categories' => options[:categories] ? categories : nil,
|
315
|
+
'tags' => options[:tags] ? tags : nil,
|
316
|
+
'comments' => options[:comments] ? comments : nil,
|
317
|
+
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
|
318
|
+
|
319
|
+
if post[:type] == 'page'
|
320
|
+
filename = page_path(post[:id], page_name_list) + "index.#{extension}"
|
321
|
+
FileUtils.mkdir_p(File.dirname(filename))
|
322
|
+
elsif post[:status] == 'draft'
|
323
|
+
filename = "_drafts/#{slug}.md"
|
324
|
+
else
|
325
|
+
filename = "_posts/#{name}"
|
326
|
+
end
|
327
|
+
|
328
|
+
# Write out the data and content to file
|
329
|
+
File.open(filename, "w") do |f|
|
330
|
+
f.puts data
|
331
|
+
f.puts "---"
|
332
|
+
f.puts Util.wpautop(content)
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
def self.clean_entities( text )
|
338
|
+
if text.respond_to?(:force_encoding)
|
339
|
+
text.force_encoding("UTF-8")
|
340
|
+
end
|
341
|
+
text = HTMLEntities.new.encode(text, :named)
|
342
|
+
# We don't want to convert these, it would break all
|
343
|
+
# HTML tags in the post and comments.
|
344
|
+
text.gsub!("&", "&")
|
345
|
+
text.gsub!("<", "<")
|
346
|
+
text.gsub!(">", ">")
|
347
|
+
text.gsub!(""", '"')
|
348
|
+
text.gsub!("'", "'")
|
349
|
+
text.gsub!("/", "/")
|
350
|
+
text
|
351
|
+
end
|
352
|
+
|
353
|
+
|
354
|
+
def self.sluggify( title )
|
355
|
+
title = title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
|
356
|
+
end
|
357
|
+
|
358
|
+
def self.page_path( page_id, page_name_list )
|
359
|
+
if page_name_list.key?(page_id)
|
360
|
+
[
|
361
|
+
page_path(page_name_list[page_id][:parent],page_name_list),
|
362
|
+
page_name_list[page_id][:slug],
|
363
|
+
'/'
|
364
|
+
].join("")
|
365
|
+
else
|
366
|
+
""
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|