bunto-import 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -1,88 +1,88 @@
1
- module BuntoImport
2
- module Importers
3
- class Typo < Importer
4
- # This SQL *should* work for both MySQL and PostgreSQL.
5
- SQL = <<-EOS
6
- SELECT c.id id,
7
- c.title title,
8
- c.permalink slug,
9
- c.body body,
10
- c.extended extended,
11
- c.published_at date,
12
- c.state state,
13
- c.keywords keywords,
14
- COALESCE(tf.name, 'html') filter
15
- FROM contents c
16
- LEFT OUTER JOIN text_filters tf
17
- ON c.text_filter_id = tf.id
18
- EOS
19
-
20
- def self.require_deps
21
- BuntoImport.require_with_fallback(%w[
22
- rubygems
23
- sequel
24
- fileutils
25
- safe_yaml
26
- ])
27
- end
28
-
29
- def self.specify_options(c)
30
- c.option 'server', '--server TYPE', 'Server type ("mysql" or "postgres")'
31
- c.option 'dbname', '--dbname DB', 'Database name'
32
- c.option 'user', '--user USER', 'Database user name'
33
- c.option 'password', '--password PW', "Database user's password (default: '')"
34
- c.option 'host', '--host HOST', 'Database host name'
35
- end
36
-
37
- def self.process(options)
38
- server = options.fetch('server')
39
- dbname = options.fetch('dbname')
40
- user = options.fetch('user')
41
- pass = options.fetch('password', '')
42
- host = options.fetch('host', "localhost")
43
-
44
- FileUtils.mkdir_p '_posts'
45
- case server.intern
46
- when :postgres
47
- db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
48
- when :mysql
49
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
50
- else
51
- raise "Unknown database server '#{server}'"
52
- end
53
- db[SQL].each do |post|
54
- next unless post[:state] =~ /published/i
55
-
56
- if post[:slug] == nil
57
- post[:slug] = "no slug"
58
- end
59
-
60
- if post[:extended]
61
- post[:body] << "\n<!-- more -->\n"
62
- post[:body] << post[:extended]
63
- end
64
-
65
- name = [ sprintf("%.04d", post[:date].year),
66
- sprintf("%.02d", post[:date].month),
67
- sprintf("%.02d", post[:date].day),
68
- post[:slug].strip ].join('-')
69
-
70
- # Can have more than one text filter in this field, but we just want
71
- # the first one for this.
72
- name += '.' + post[:filter].split(' ')[0]
73
-
74
- File.open("_posts/#{name}", 'w') do |f|
75
- f.puts({ 'layout' => 'post',
76
- 'title' => (post[:title] and post[:title].to_s.force_encoding('UTF-8')),
77
- 'tags' => (post[:keywords] and post[:keywords].to_s.force_encoding('UTF-8')),
78
- 'typo_id' => post[:id]
79
- }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
80
- f.puts '---'
81
- f.puts post[:body].delete("\r")
82
- end
83
- end
84
- end
85
-
86
- end
87
- end
88
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class Typo < Importer
4
+ # This SQL *should* work for both MySQL and PostgreSQL.
5
+ SQL = <<-EOS
6
+ SELECT c.id id,
7
+ c.title title,
8
+ c.permalink slug,
9
+ c.body body,
10
+ c.extended extended,
11
+ c.published_at date,
12
+ c.state state,
13
+ c.keywords keywords,
14
+ COALESCE(tf.name, 'html') filter
15
+ FROM contents c
16
+ LEFT OUTER JOIN text_filters tf
17
+ ON c.text_filter_id = tf.id
18
+ EOS
19
+
20
+ def self.require_deps
21
+ BuntoImport.require_with_fallback(%w[
22
+ rubygems
23
+ sequel
24
+ fileutils
25
+ safe_yaml
26
+ ])
27
+ end
28
+
29
+ def self.specify_options(c)
30
+ c.option 'server', '--server TYPE', 'Server type ("mysql" or "postgres")'
31
+ c.option 'dbname', '--dbname DB', 'Database name'
32
+ c.option 'user', '--user USER', 'Database user name'
33
+ c.option 'password', '--password PW', "Database user's password (default: '')"
34
+ c.option 'host', '--host HOST', 'Database host name'
35
+ end
36
+
37
+ def self.process(options)
38
+ server = options.fetch('server')
39
+ dbname = options.fetch('dbname')
40
+ user = options.fetch('user')
41
+ pass = options.fetch('password', '')
42
+ host = options.fetch('host', "localhost")
43
+
44
+ FileUtils.mkdir_p '_posts'
45
+ case server.intern
46
+ when :postgres
47
+ db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
48
+ when :mysql
49
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
50
+ else
51
+ raise "Unknown database server '#{server}'"
52
+ end
53
+ db[SQL].each do |post|
54
+ next unless post[:state] =~ /published/i
55
+
56
+ if post[:slug] == nil
57
+ post[:slug] = "no slug"
58
+ end
59
+
60
+ if post[:extended]
61
+ post[:body] << "\n<!-- more -->\n"
62
+ post[:body] << post[:extended]
63
+ end
64
+
65
+ name = [ sprintf("%.04d", post[:date].year),
66
+ sprintf("%.02d", post[:date].month),
67
+ sprintf("%.02d", post[:date].day),
68
+ post[:slug].strip ].join('-')
69
+
70
+ # Can have more than one text filter in this field, but we just want
71
+ # the first one for this.
72
+ name += '.' + post[:filter].split(' ')[0]
73
+
74
+ File.open("_posts/#{name}", 'w') do |f|
75
+ f.puts({ 'layout' => 'post',
76
+ 'title' => (post[:title] and post[:title].to_s.force_encoding('UTF-8')),
77
+ 'tags' => (post[:keywords] and post[:keywords].to_s.force_encoding('UTF-8')),
78
+ 'typo_id' => post[:id]
79
+ }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
80
+ f.puts '---'
81
+ f.puts post[:body].delete("\r")
82
+ end
83
+ end
84
+ end
85
+
86
+ end
87
+ end
88
+ end
@@ -1,372 +1,372 @@
1
- module BuntoImport
2
- module Importers
3
- class WordPress < Importer
4
-
5
- def self.require_deps
6
- BuntoImport.require_with_fallback(%w[
7
- rubygems
8
- sequel
9
- fileutils
10
- safe_yaml
11
- unidecode
12
- ])
13
- end
14
-
15
- def self.specify_options(c)
16
- c.option 'dbname', '--dbname DB', 'Database name (default: "")'
17
- c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
18
- c.option 'user', '--user USER', 'Database user name (default: "")'
19
- c.option 'password', '--password PW', "Database user's password (default: "")"
20
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
21
- c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "wp_")'
22
- c.option 'site_prefix', '--site_prefix PREFIX', 'Site prefix name (default: "")'
23
- c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
24
- c.option 'comments', '--comments', 'Whether to import comments (default: true)'
25
- c.option 'categories', '--categories', 'Whether to import categories (default: true)'
26
- c.option 'tags', '--tags', 'Whether to import tags (default: true)'
27
- c.option 'more_excerpt', '--more_excerpt', 'Whether to use more excerpt (default: true)'
28
- c.option 'more_anchor', '--more_anchor', 'Whether to use more anchor (default: true)'
29
- c.option 'status', '--status STATUS,STATUS2', Array, 'Array of allowed statuses (default: ["publish"], other options: "draft", "private", "revision")'
30
- end
31
-
32
- # Main migrator function. Call this to perform the migration.
33
- #
34
- # dbname:: The name of the database
35
- # user:: The database user name
36
- # pass:: The database user's password
37
- # host:: The address of the MySQL database host. Default: 'localhost'
38
- # socket:: The database socket's path
39
- # options:: A hash table of configuration options.
40
- #
41
- # Supported options are:
42
- #
43
- # :table_prefix:: Prefix of database tables used by WordPress.
44
- # Default: 'wp_'
45
- # :site_prefix:: Prefix of database tables used by WordPress
46
- # Multisite, eg: 2_.
47
- # Default: ''
48
- # :clean_entities:: If true, convert non-ASCII characters to HTML
49
- # entities in the posts, comments, titles, and
50
- # names. Requires the 'htmlentities' gem to
51
- # work. Default: true.
52
- # :comments:: If true, migrate post comments too. Comments
53
- # are saved in the post's YAML front matter.
54
- # Default: true.
55
- # :categories:: If true, save the post's categories in its
56
- # YAML front matter. Default: true.
57
- # :tags:: If true, save the post's tags in its
58
- # YAML front matter. Default: true.
59
- # :more_excerpt:: If true, when a post has no excerpt but
60
- # does have a <!-- more --> tag, use the
61
- # preceding post content as the excerpt.
62
- # Default: true.
63
- # :more_anchor:: If true, convert a <!-- more --> tag into
64
- # two HTML anchors with ids "more" and
65
- # "more-NNN" (where NNN is the post number).
66
- # Default: true.
67
- # :extension:: Set the post extension. Default: "html"
68
- # :status:: Array of allowed post statuses. Only
69
- # posts with matching status will be migrated.
70
- # Known statuses are :publish, :draft, :private,
71
- # and :revision. If this is nil or an empty
72
- # array, all posts are migrated regardless of
73
- # status. Default: [:publish].
74
- #
75
- def self.process(opts)
76
- options = {
77
- :user => opts.fetch('user', ''),
78
- :pass => opts.fetch('password', ''),
79
- :host => opts.fetch('host', 'localhost'),
80
- :socket => opts.fetch('socket', nil),
81
- :dbname => opts.fetch('dbname', ''),
82
- :table_prefix => opts.fetch('table_prefix', 'wp_'),
83
- :site_prefix => opts.fetch('site_prefix', nil),
84
- :clean_entities => opts.fetch('clean_entities', true),
85
- :comments => opts.fetch('comments', true),
86
- :categories => opts.fetch('categories', true),
87
- :tags => opts.fetch('tags', true),
88
- :more_excerpt => opts.fetch('more_excerpt', true),
89
- :more_anchor => opts.fetch('more_anchor', true),
90
- :extension => opts.fetch('extension', 'html'),
91
- :status => opts.fetch('status', ['publish']).map(&:to_sym) # :draft, :private, :revision
92
- }
93
-
94
- if options[:clean_entities]
95
- begin
96
- require 'htmlentities'
97
- rescue LoadError
98
- STDERR.puts "Could not require 'htmlentities', so the " +
99
- ":clean_entities option is now disabled."
100
- options[:clean_entities] = false
101
- end
102
- end
103
-
104
- FileUtils.mkdir_p("_posts")
105
- FileUtils.mkdir_p("_drafts") if options[:status].include? :draft
106
-
107
- db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
108
- :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
109
-
110
- px = options[:table_prefix]
111
- sx = options[:site_prefix]
112
-
113
- page_name_list = {}
114
-
115
- page_name_query = "
116
- SELECT
117
- posts.ID AS `id`,
118
- posts.post_title AS `title`,
119
- posts.post_name AS `slug`,
120
- posts.post_parent AS `parent`
121
- FROM #{px}#{sx}posts AS `posts`
122
- WHERE posts.post_type = 'page'"
123
-
124
- db[page_name_query].each do |page|
125
- if !page[:slug] or page[:slug].empty?
126
- page[:slug] = sluggify(page[:title])
127
- end
128
- page_name_list[ page[:id] ] = {
129
- :slug => page[:slug],
130
- :parent => page[:parent]
131
- }
132
- end
133
-
134
- posts_query = "
135
- SELECT
136
- posts.ID AS `id`,
137
- posts.guid AS `guid`,
138
- posts.post_type AS `type`,
139
- posts.post_status AS `status`,
140
- posts.post_title AS `title`,
141
- posts.post_name AS `slug`,
142
- posts.post_date AS `date`,
143
- posts.post_date_gmt AS `date_gmt`,
144
- posts.post_content AS `content`,
145
- posts.post_excerpt AS `excerpt`,
146
- posts.comment_count AS `comment_count`,
147
- users.display_name AS `author`,
148
- users.user_login AS `author_login`,
149
- users.user_email AS `author_email`,
150
- users.user_url AS `author_url`
151
- FROM #{px}#{sx}posts AS `posts`
152
- LEFT JOIN #{px}users AS `users`
153
- ON posts.post_author = users.ID"
154
-
155
- if options[:status] and not options[:status].empty?
156
- status = options[:status][0]
157
- posts_query << "
158
- WHERE posts.post_status = '#{status.to_s}'"
159
- options[:status][1..-1].each do |status|
160
- posts_query << " OR
161
- posts.post_status = '#{status.to_s}'"
162
- end
163
- end
164
-
165
- db[posts_query].each do |post|
166
- process_post(post, db, options, page_name_list)
167
- end
168
- end
169
-
170
-
171
- def self.process_post(post, db, options, page_name_list)
172
- px = options[:table_prefix]
173
- sx = options[:site_prefix]
174
- extension = options[:extension]
175
-
176
- title = post[:title]
177
- if options[:clean_entities]
178
- title = clean_entities(title)
179
- end
180
-
181
- slug = post[:slug]
182
- if !slug or slug.empty?
183
- slug = sluggify(title)
184
- end
185
-
186
- date = post[:date] || Time.now
187
- name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day,
188
- slug, extension]
189
- content = post[:content].to_s
190
- if options[:clean_entities]
191
- content = clean_entities(content)
192
- end
193
-
194
- excerpt = post[:excerpt].to_s
195
-
196
- more_index = content.index(/<!-- *more *-->/)
197
- more_anchor = nil
198
- if more_index
199
- if options[:more_excerpt] and
200
- (post[:excerpt].nil? or post[:excerpt].empty?)
201
- excerpt = content[0...more_index]
202
- end
203
- if options[:more_anchor]
204
- more_link = "more"
205
- content.sub!(/<!-- *more *-->/,
206
- "<a id=\"more\"></a>" +
207
- "<a id=\"more-#{post[:id]}\"></a>")
208
- end
209
- end
210
-
211
- categories = []
212
- tags = []
213
-
214
- if options[:categories] or options[:tags]
215
-
216
- cquery =
217
- "SELECT
218
- terms.name AS `name`,
219
- ttax.taxonomy AS `type`
220
- FROM
221
- #{px}#{sx}terms AS `terms`,
222
- #{px}#{sx}term_relationships AS `trels`,
223
- #{px}#{sx}term_taxonomy AS `ttax`
224
- WHERE
225
- trels.object_id = '#{post[:id]}' AND
226
- trels.term_taxonomy_id = ttax.term_taxonomy_id AND
227
- terms.term_id = ttax.term_id"
228
-
229
- db[cquery].each do |term|
230
- if options[:categories] and term[:type] == "category"
231
- if options[:clean_entities]
232
- categories << clean_entities(term[:name])
233
- else
234
- categories << term[:name]
235
- end
236
- elsif options[:tags] and term[:type] == "post_tag"
237
- if options[:clean_entities]
238
- tags << clean_entities(term[:name])
239
- else
240
- tags << term[:name]
241
- end
242
- end
243
- end
244
- end
245
-
246
- comments = []
247
-
248
- if options[:comments] and post[:comment_count].to_i > 0
249
- cquery =
250
- "SELECT
251
- comment_ID AS `id`,
252
- comment_author AS `author`,
253
- comment_author_email AS `author_email`,
254
- comment_author_url AS `author_url`,
255
- comment_date AS `date`,
256
- comment_date_gmt AS `date_gmt`,
257
- comment_content AS `content`
258
- FROM #{px}#{sx}comments
259
- WHERE
260
- comment_post_ID = '#{post[:id]}' AND
261
- comment_approved != 'spam'"
262
-
263
-
264
- db[cquery].each do |comment|
265
-
266
- comcontent = comment[:content].to_s
267
- if comcontent.respond_to?(:force_encoding)
268
- comcontent.force_encoding("UTF-8")
269
- end
270
- if options[:clean_entities]
271
- comcontent = clean_entities(comcontent)
272
- end
273
- comauthor = comment[:author].to_s
274
- if options[:clean_entities]
275
- comauthor = clean_entities(comauthor)
276
- end
277
-
278
- comments << {
279
- 'id' => comment[:id].to_i,
280
- 'author' => comauthor,
281
- 'author_email' => comment[:author_email].to_s,
282
- 'author_url' => comment[:author_url].to_s,
283
- 'date' => comment[:date].to_s,
284
- 'date_gmt' => comment[:date_gmt].to_s,
285
- 'content' => comcontent,
286
- }
287
- end
288
-
289
- comments.sort!{ |a,b| a['id'] <=> b['id'] }
290
- end
291
-
292
- # Get the relevant fields as a hash, delete empty fields and
293
- # convert to YAML for the header.
294
- data = {
295
- 'layout' => post[:type].to_s,
296
- 'status' => post[:status].to_s,
297
- 'published' => post[:status].to_s == 'draft' ? nil : (post[:status].to_s == 'publish'),
298
- 'title' => title.to_s,
299
- 'author' => {
300
- 'display_name'=> post[:author].to_s,
301
- 'login' => post[:author_login].to_s,
302
- 'email' => post[:author_email].to_s,
303
- 'url' => post[:author_url].to_s,
304
- },
305
- 'author_login' => post[:author_login].to_s,
306
- 'author_email' => post[:author_email].to_s,
307
- 'author_url' => post[:author_url].to_s,
308
- 'excerpt' => excerpt,
309
- 'more_anchor' => more_anchor,
310
- 'wordpress_id' => post[:id],
311
- 'wordpress_url' => post[:guid].to_s,
312
- 'date' => date.to_s,
313
- 'date_gmt' => post[:date_gmt].to_s,
314
- 'categories' => options[:categories] ? categories : nil,
315
- 'tags' => options[:tags] ? tags : nil,
316
- 'comments' => options[:comments] ? comments : nil,
317
- }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
318
-
319
- if post[:type] == 'page'
320
- filename = page_path(post[:id], page_name_list) + "index.#{extension}"
321
- FileUtils.mkdir_p(File.dirname(filename))
322
- elsif post[:status] == 'draft'
323
- filename = "_drafts/#{slug}.md"
324
- else
325
- filename = "_posts/#{name}"
326
- end
327
-
328
- # Write out the data and content to file
329
- File.open(filename, "w") do |f|
330
- f.puts data
331
- f.puts "---"
332
- f.puts Util.wpautop(content)
333
- end
334
- end
335
-
336
-
337
- def self.clean_entities( text )
338
- if text.respond_to?(:force_encoding)
339
- text.force_encoding("UTF-8")
340
- end
341
- text = HTMLEntities.new.encode(text, :named)
342
- # We don't want to convert these, it would break all
343
- # HTML tags in the post and comments.
344
- text.gsub!("&amp;", "&")
345
- text.gsub!("&lt;", "<")
346
- text.gsub!("&gt;", ">")
347
- text.gsub!("&quot;", '"')
348
- text.gsub!("&apos;", "'")
349
- text.gsub!("/", "&#47;")
350
- text
351
- end
352
-
353
-
354
- def self.sluggify( title )
355
- title = title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
356
- end
357
-
358
- def self.page_path( page_id, page_name_list )
359
- if page_name_list.key?(page_id)
360
- [
361
- page_path(page_name_list[page_id][:parent],page_name_list),
362
- page_name_list[page_id][:slug],
363
- '/'
364
- ].join("")
365
- else
366
- ""
367
- end
368
- end
369
-
370
- end
371
- end
372
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class WordPress < Importer
4
+
5
+ def self.require_deps
6
+ BuntoImport.require_with_fallback(%w[
7
+ rubygems
8
+ sequel
9
+ fileutils
10
+ safe_yaml
11
+ unidecode
12
+ ])
13
+ end
14
+
15
+ def self.specify_options(c)
16
+ c.option 'dbname', '--dbname DB', 'Database name (default: "")'
17
+ c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
18
+ c.option 'user', '--user USER', 'Database user name (default: "")'
19
+ c.option 'password', '--password PW', "Database user's password (default: "")"
20
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
21
+ c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "wp_")'
22
+ c.option 'site_prefix', '--site_prefix PREFIX', 'Site prefix name (default: "")'
23
+ c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
24
+ c.option 'comments', '--comments', 'Whether to import comments (default: true)'
25
+ c.option 'categories', '--categories', 'Whether to import categories (default: true)'
26
+ c.option 'tags', '--tags', 'Whether to import tags (default: true)'
27
+ c.option 'more_excerpt', '--more_excerpt', 'Whether to use more excerpt (default: true)'
28
+ c.option 'more_anchor', '--more_anchor', 'Whether to use more anchor (default: true)'
29
+ c.option 'status', '--status STATUS,STATUS2', Array, 'Array of allowed statuses (default: ["publish"], other options: "draft", "private", "revision")'
30
+ end
31
+
32
+ # Main migrator function. Call this to perform the migration.
33
+ #
34
+ # dbname:: The name of the database
35
+ # user:: The database user name
36
+ # pass:: The database user's password
37
+ # host:: The address of the MySQL database host. Default: 'localhost'
38
+ # socket:: The database socket's path
39
+ # options:: A hash table of configuration options.
40
+ #
41
+ # Supported options are:
42
+ #
43
+ # :table_prefix:: Prefix of database tables used by WordPress.
44
+ # Default: 'wp_'
45
+ # :site_prefix:: Prefix of database tables used by WordPress
46
+ # Multisite, eg: 2_.
47
+ # Default: ''
48
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
49
+ # entities in the posts, comments, titles, and
50
+ # names. Requires the 'htmlentities' gem to
51
+ # work. Default: true.
52
+ # :comments:: If true, migrate post comments too. Comments
53
+ # are saved in the post's YAML front matter.
54
+ # Default: true.
55
+ # :categories:: If true, save the post's categories in its
56
+ # YAML front matter. Default: true.
57
+ # :tags:: If true, save the post's tags in its
58
+ # YAML front matter. Default: true.
59
+ # :more_excerpt:: If true, when a post has no excerpt but
60
+ # does have a <!-- more --> tag, use the
61
+ # preceding post content as the excerpt.
62
+ # Default: true.
63
+ # :more_anchor:: If true, convert a <!-- more --> tag into
64
+ # two HTML anchors with ids "more" and
65
+ # "more-NNN" (where NNN is the post number).
66
+ # Default: true.
67
+ # :extension:: Set the post extension. Default: "html"
68
+ # :status:: Array of allowed post statuses. Only
69
+ # posts with matching status will be migrated.
70
+ # Known statuses are :publish, :draft, :private,
71
+ # and :revision. If this is nil or an empty
72
+ # array, all posts are migrated regardless of
73
+ # status. Default: [:publish].
74
+ #
75
+ def self.process(opts)
76
+ options = {
77
+ :user => opts.fetch('user', ''),
78
+ :pass => opts.fetch('password', ''),
79
+ :host => opts.fetch('host', 'localhost'),
80
+ :socket => opts.fetch('socket', nil),
81
+ :dbname => opts.fetch('dbname', ''),
82
+ :table_prefix => opts.fetch('table_prefix', 'wp_'),
83
+ :site_prefix => opts.fetch('site_prefix', nil),
84
+ :clean_entities => opts.fetch('clean_entities', true),
85
+ :comments => opts.fetch('comments', true),
86
+ :categories => opts.fetch('categories', true),
87
+ :tags => opts.fetch('tags', true),
88
+ :more_excerpt => opts.fetch('more_excerpt', true),
89
+ :more_anchor => opts.fetch('more_anchor', true),
90
+ :extension => opts.fetch('extension', 'html'),
91
+ :status => opts.fetch('status', ['publish']).map(&:to_sym) # :draft, :private, :revision
92
+ }
93
+
94
+ if options[:clean_entities]
95
+ begin
96
+ require 'htmlentities'
97
+ rescue LoadError
98
+ STDERR.puts "Could not require 'htmlentities', so the " +
99
+ ":clean_entities option is now disabled."
100
+ options[:clean_entities] = false
101
+ end
102
+ end
103
+
104
+ FileUtils.mkdir_p("_posts")
105
+ FileUtils.mkdir_p("_drafts") if options[:status].include? :draft
106
+
107
+ db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
108
+ :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
109
+
110
+ px = options[:table_prefix]
111
+ sx = options[:site_prefix]
112
+
113
+ page_name_list = {}
114
+
115
+ page_name_query = "
116
+ SELECT
117
+ posts.ID AS `id`,
118
+ posts.post_title AS `title`,
119
+ posts.post_name AS `slug`,
120
+ posts.post_parent AS `parent`
121
+ FROM #{px}#{sx}posts AS `posts`
122
+ WHERE posts.post_type = 'page'"
123
+
124
+ db[page_name_query].each do |page|
125
+ if !page[:slug] or page[:slug].empty?
126
+ page[:slug] = sluggify(page[:title])
127
+ end
128
+ page_name_list[ page[:id] ] = {
129
+ :slug => page[:slug],
130
+ :parent => page[:parent]
131
+ }
132
+ end
133
+
134
+ posts_query = "
135
+ SELECT
136
+ posts.ID AS `id`,
137
+ posts.guid AS `guid`,
138
+ posts.post_type AS `type`,
139
+ posts.post_status AS `status`,
140
+ posts.post_title AS `title`,
141
+ posts.post_name AS `slug`,
142
+ posts.post_date AS `date`,
143
+ posts.post_date_gmt AS `date_gmt`,
144
+ posts.post_content AS `content`,
145
+ posts.post_excerpt AS `excerpt`,
146
+ posts.comment_count AS `comment_count`,
147
+ users.display_name AS `author`,
148
+ users.user_login AS `author_login`,
149
+ users.user_email AS `author_email`,
150
+ users.user_url AS `author_url`
151
+ FROM #{px}#{sx}posts AS `posts`
152
+ LEFT JOIN #{px}users AS `users`
153
+ ON posts.post_author = users.ID"
154
+
155
+ if options[:status] and not options[:status].empty?
156
+ status = options[:status][0]
157
+ posts_query << "
158
+ WHERE posts.post_status = '#{status.to_s}'"
159
+ options[:status][1..-1].each do |status|
160
+ posts_query << " OR
161
+ posts.post_status = '#{status.to_s}'"
162
+ end
163
+ end
164
+
165
+ db[posts_query].each do |post|
166
+ process_post(post, db, options, page_name_list)
167
+ end
168
+ end
169
+
170
+
171
+ def self.process_post(post, db, options, page_name_list)
172
+ px = options[:table_prefix]
173
+ sx = options[:site_prefix]
174
+ extension = options[:extension]
175
+
176
+ title = post[:title]
177
+ if options[:clean_entities]
178
+ title = clean_entities(title)
179
+ end
180
+
181
+ slug = post[:slug]
182
+ if !slug or slug.empty?
183
+ slug = sluggify(title)
184
+ end
185
+
186
+ date = post[:date] || Time.now
187
+ name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day,
188
+ slug, extension]
189
+ content = post[:content].to_s
190
+ if options[:clean_entities]
191
+ content = clean_entities(content)
192
+ end
193
+
194
+ excerpt = post[:excerpt].to_s
195
+
196
+ more_index = content.index(/<!-- *more *-->/)
197
+ more_anchor = nil
198
+ if more_index
199
+ if options[:more_excerpt] and
200
+ (post[:excerpt].nil? or post[:excerpt].empty?)
201
+ excerpt = content[0...more_index]
202
+ end
203
+ if options[:more_anchor]
204
+ more_link = "more"
205
+ content.sub!(/<!-- *more *-->/,
206
+ "<a id=\"more\"></a>" +
207
+ "<a id=\"more-#{post[:id]}\"></a>")
208
+ end
209
+ end
210
+
211
+ categories = []
212
+ tags = []
213
+
214
+ if options[:categories] or options[:tags]
215
+
216
+ cquery =
217
+ "SELECT
218
+ terms.name AS `name`,
219
+ ttax.taxonomy AS `type`
220
+ FROM
221
+ #{px}#{sx}terms AS `terms`,
222
+ #{px}#{sx}term_relationships AS `trels`,
223
+ #{px}#{sx}term_taxonomy AS `ttax`
224
+ WHERE
225
+ trels.object_id = '#{post[:id]}' AND
226
+ trels.term_taxonomy_id = ttax.term_taxonomy_id AND
227
+ terms.term_id = ttax.term_id"
228
+
229
+ db[cquery].each do |term|
230
+ if options[:categories] and term[:type] == "category"
231
+ if options[:clean_entities]
232
+ categories << clean_entities(term[:name])
233
+ else
234
+ categories << term[:name]
235
+ end
236
+ elsif options[:tags] and term[:type] == "post_tag"
237
+ if options[:clean_entities]
238
+ tags << clean_entities(term[:name])
239
+ else
240
+ tags << term[:name]
241
+ end
242
+ end
243
+ end
244
+ end
245
+
246
+ comments = []
247
+
248
+ if options[:comments] and post[:comment_count].to_i > 0
249
+ cquery =
250
+ "SELECT
251
+ comment_ID AS `id`,
252
+ comment_author AS `author`,
253
+ comment_author_email AS `author_email`,
254
+ comment_author_url AS `author_url`,
255
+ comment_date AS `date`,
256
+ comment_date_gmt AS `date_gmt`,
257
+ comment_content AS `content`
258
+ FROM #{px}#{sx}comments
259
+ WHERE
260
+ comment_post_ID = '#{post[:id]}' AND
261
+ comment_approved != 'spam'"
262
+
263
+
264
+ db[cquery].each do |comment|
265
+
266
+ comcontent = comment[:content].to_s
267
+ if comcontent.respond_to?(:force_encoding)
268
+ comcontent.force_encoding("UTF-8")
269
+ end
270
+ if options[:clean_entities]
271
+ comcontent = clean_entities(comcontent)
272
+ end
273
+ comauthor = comment[:author].to_s
274
+ if options[:clean_entities]
275
+ comauthor = clean_entities(comauthor)
276
+ end
277
+
278
+ comments << {
279
+ 'id' => comment[:id].to_i,
280
+ 'author' => comauthor,
281
+ 'author_email' => comment[:author_email].to_s,
282
+ 'author_url' => comment[:author_url].to_s,
283
+ 'date' => comment[:date].to_s,
284
+ 'date_gmt' => comment[:date_gmt].to_s,
285
+ 'content' => comcontent,
286
+ }
287
+ end
288
+
289
+ comments.sort!{ |a,b| a['id'] <=> b['id'] }
290
+ end
291
+
292
+ # Get the relevant fields as a hash, delete empty fields and
293
+ # convert to YAML for the header.
294
+ data = {
295
+ 'layout' => post[:type].to_s,
296
+ 'status' => post[:status].to_s,
297
+ 'published' => post[:status].to_s == 'draft' ? nil : (post[:status].to_s == 'publish'),
298
+ 'title' => title.to_s,
299
+ 'author' => {
300
+ 'display_name'=> post[:author].to_s,
301
+ 'login' => post[:author_login].to_s,
302
+ 'email' => post[:author_email].to_s,
303
+ 'url' => post[:author_url].to_s,
304
+ },
305
+ 'author_login' => post[:author_login].to_s,
306
+ 'author_email' => post[:author_email].to_s,
307
+ 'author_url' => post[:author_url].to_s,
308
+ 'excerpt' => excerpt,
309
+ 'more_anchor' => more_anchor,
310
+ 'wordpress_id' => post[:id],
311
+ 'wordpress_url' => post[:guid].to_s,
312
+ 'date' => date.to_s,
313
+ 'date_gmt' => post[:date_gmt].to_s,
314
+ 'categories' => options[:categories] ? categories : nil,
315
+ 'tags' => options[:tags] ? tags : nil,
316
+ 'comments' => options[:comments] ? comments : nil,
317
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
318
+
319
+ if post[:type] == 'page'
320
+ filename = page_path(post[:id], page_name_list) + "index.#{extension}"
321
+ FileUtils.mkdir_p(File.dirname(filename))
322
+ elsif post[:status] == 'draft'
323
+ filename = "_drafts/#{slug}.md"
324
+ else
325
+ filename = "_posts/#{name}"
326
+ end
327
+
328
+ # Write out the data and content to file
329
+ File.open(filename, "w") do |f|
330
+ f.puts data
331
+ f.puts "---"
332
+ f.puts Util.wpautop(content)
333
+ end
334
+ end
335
+
336
+
337
+ def self.clean_entities( text )
338
+ if text.respond_to?(:force_encoding)
339
+ text.force_encoding("UTF-8")
340
+ end
341
+ text = HTMLEntities.new.encode(text, :named)
342
+ # We don't want to convert these, it would break all
343
+ # HTML tags in the post and comments.
344
+ text.gsub!("&amp;", "&")
345
+ text.gsub!("&lt;", "<")
346
+ text.gsub!("&gt;", ">")
347
+ text.gsub!("&quot;", '"')
348
+ text.gsub!("&apos;", "'")
349
+ text.gsub!("/", "&#47;")
350
+ text
351
+ end
352
+
353
+
354
+ def self.sluggify( title )
355
+ title = title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
356
+ end
357
+
358
+ def self.page_path( page_id, page_name_list )
359
+ if page_name_list.key?(page_id)
360
+ [
361
+ page_path(page_name_list[page_id][:parent],page_name_list),
362
+ page_name_list[page_id][:slug],
363
+ '/'
364
+ ].join("")
365
+ else
366
+ ""
367
+ end
368
+ end
369
+
370
+ end
371
+ end
372
+ end