bunto-import 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -0,0 +1,363 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class S9YDatabase < Importer
4
+
5
+ def self.require_deps
6
+ BuntoImport.require_with_fallback(
7
+ %w[
8
+ rubygems
9
+ sequel
10
+ fileutils
11
+ safe_yaml
12
+ unidecode
13
+ ])
14
+ end
15
+
16
+ def self.specify_options(c)
17
+ c.option 'dbname', '--dbname DB', 'Database name (default: "")'
18
+ c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
19
+ c.option 'user', '--user USER', 'Database user name (default: "")'
20
+ c.option 'password', '--password PW', "Database user's password (default: "")"
21
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
22
+ c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "serendipity_")'
23
+ c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
24
+ c.option 'comments', '--comments', 'Whether to import comments (default: true)'
25
+ c.option 'categories', '--categories', 'Whether to import categories (default: true)'
26
+ c.option 'tags', '--tags', 'Whether to import tags (default: true)'
27
+ c.option 'drafts', '--drafts', 'Whether to export drafts as well'
28
+ c.option 'markdown', '--markdown', 'convert into markdown format (default: false)'
29
+ c.option 'permalinks', '--permalinks', 'preserve S9Y permalinks (default: false)'
30
+ end
31
+
32
+ # Main migrator function. Call this to perform the migration.
33
+ #
34
+ # dbname:: The name of the database
35
+ # user:: The database user name
36
+ # pass:: The database user's password
37
+ # host:: The address of the MySQL database host. Default: 'localhost'
38
+ # socket:: The database socket's path
39
+ # options:: A hash table of configuration options.
40
+ #
41
+ # Supported options are:
42
+ #
43
+ # :table_prefix:: Prefix of database tables used by WordPress.
44
+ # Default: 'serendipity_'
45
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
46
+ # entities in the posts, comments, titles, and
47
+ # names. Requires the 'htmlentities' gem to
48
+ # work. Default: true.
49
+ # :comments:: If true, migrate post comments too. Comments
50
+ # are saved in the post's YAML front matter.
51
+ # Default: true.
52
+ # :categories:: If true, save the post's categories in its
53
+ # YAML front matter. Default: true.
54
+ # :tags:: If true, save the post's tags in its
55
+ # YAML front matter. Default: true.
56
+ # :extension:: Set the post extension. Default: "html"
57
+ # :drafts:: If true, export drafts as well
58
+ # Default: true.
59
+ # :markdown:: If true, convert the content to markdown
60
+ # Default: false
61
+ # :permalinks:: If true, save the post's original permalink in its
62
+ # YAML front matter. Default: false.
63
+ #
64
+ def self.process(opts)
65
+ options = {
66
+ :user => opts.fetch('user', ''),
67
+ :pass => opts.fetch('password', ''),
68
+ :host => opts.fetch('host', 'localhost'),
69
+ :socket => opts.fetch('socket', nil),
70
+ :dbname => opts.fetch('dbname', ''),
71
+ :table_prefix => opts.fetch('table_prefix', 'serendipity_'),
72
+ :clean_entities => opts.fetch('clean_entities', true),
73
+ :comments => opts.fetch('comments', true),
74
+ :categories => opts.fetch('categories', true),
75
+ :tags => opts.fetch('tags', true),
76
+ :extension => opts.fetch('extension', 'html'),
77
+ :drafts => opts.fetch('drafts', true),
78
+ :markdown => opts.fetch('markdown', false),
79
+ :permalinks => opts.fetch('permalinks', false),
80
+ }
81
+
82
+ if options[:clean_entities]
83
+ options[:clean_entities] = require_if_available('htmlentities', 'clean_entities')
84
+ end
85
+
86
+ if options[:markdown]
87
+ options[:markdown] = require_if_available('reverse_markdown', 'markdown')
88
+ end
89
+
90
+ FileUtils.mkdir_p("_posts")
91
+ FileUtils.mkdir_p("_drafts") if options[:drafts]
92
+
93
+ db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
94
+ :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
95
+
96
+ px = options[:table_prefix]
97
+
98
+ page_name_list = {}
99
+
100
+ page_name_query = %(
101
+ SELECT
102
+ entries.ID AS `id`,
103
+ entries.title AS `title`
104
+ FROM #{px}entries AS `entries`
105
+ )
106
+
107
+ db[page_name_query].each do |page|
108
+ page[:slug] = sluggify(page[:title])
109
+
110
+ page_name_list[ page[:id] ] = {
111
+ :slug => page[:slug]
112
+ }
113
+ end
114
+
115
+ posts_query = "
116
+ SELECT
117
+ entries.ID AS `id`,
118
+ entries.isdraft AS `isdraft`,
119
+ entries.title AS `title`,
120
+ entries.timestamp AS `timestamp`,
121
+ entries.body AS `body`,
122
+ authors.realname AS `author`,
123
+ authors.username AS `author_login`,
124
+ authors.email AS `author_email`
125
+ FROM #{px}entries AS `entries`
126
+ LEFT JOIN #{px}authors AS `authors`
127
+ ON entries.authorid = authors.authorid"
128
+
129
+ unless options[:drafts]
130
+ posts_query << "WHERE posts.isdraft = 'false'"
131
+ end
132
+
133
+ db[posts_query].each do |post|
134
+ process_post(post, db, options, page_name_list)
135
+ end
136
+ end
137
+
138
+ def self.process_post(post, db, options, page_name_list)
139
+ extension = options[:extension]
140
+
141
+ title = post[:title]
142
+ if options[:clean_entities]
143
+ title = clean_entities(title)
144
+ end
145
+
146
+ slug = post[:slug]
147
+ if !slug || slug.empty?
148
+ slug = sluggify(title)
149
+ end
150
+
151
+ status = post[:isdraft] == 'true' ? 'draft' : 'published'
152
+ date = Time.at(post[:timestamp]).utc || Time.now.utc
153
+ name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day, slug, extension]
154
+
155
+ content = post[:body].to_s
156
+
157
+ if options[:clean_entities]
158
+ content = clean_entities(content)
159
+ end
160
+
161
+ if options[:markdown]
162
+ content = ReverseMarkdown.convert(content)
163
+ end
164
+
165
+ categories = process_categories(db, options, post)
166
+ comments = process_comments(db, options, post)
167
+ tags = process_tags(db, options, post)
168
+ permalink = process_permalink(db, options, post)
169
+
170
+ # Get the relevant fields as a hash, delete empty fields and
171
+ # convert to YAML for the header.
172
+ data = {
173
+ 'layout' => post[:type].to_s,
174
+ 'status' => status.to_s,
175
+ 'published' => status.to_s == 'draft' ? nil : (status.to_s == 'published'),
176
+ 'title' => title.to_s,
177
+ 'author' => {
178
+ 'display_name'=> post[:author].to_s,
179
+ 'login' => post[:author_login].to_s,
180
+ 'email' => post[:author_email].to_s
181
+ },
182
+ 'author_login' => post[:author_login].to_s,
183
+ 'author_email' => post[:author_email].to_s,
184
+ 'date' => date.to_s,
185
+ 'permalink' => options[:permalinks] ? permalink : nil,
186
+ 'categories' => options[:categories] ? categories : nil,
187
+ 'tags' => options[:tags] ? tags : nil,
188
+ 'comments' => options[:comments] ? comments : nil,
189
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
190
+
191
+ if post[:type] == 'page'
192
+ filename = page_path(post[:id], page_name_list) + "index.#{extension}"
193
+ FileUtils.mkdir_p(File.dirname(filename))
194
+ elsif status == 'draft'
195
+ filename = "_drafts/#{slug}.#{extension}"
196
+ else
197
+ filename = "_posts/#{name}"
198
+ end
199
+
200
+ # Write out the data and content to file
201
+ File.open(filename, "w") do |f|
202
+ f.puts data
203
+ f.puts "---"
204
+ f.puts Util.wpautop(content)
205
+ end
206
+ end
207
+
208
+ def self.require_if_available(gem_name, option_name)
209
+ begin
210
+ require gem_name
211
+ return true
212
+ rescue LoadError
213
+ STDERR.puts "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
214
+ return true
215
+ end
216
+ end
217
+
218
+ def self.process_categories(db, options, post)
219
+ return [] unless options[:categories]
220
+
221
+ px = options[:table_prefix]
222
+
223
+ cquery = %(
224
+ SELECT
225
+ categories.category_name AS `name`
226
+ FROM
227
+ #{px}entrycat AS `entrycat`,
228
+ #{px}category AS `categories`
229
+ WHERE
230
+ entrycat.entryid = '#{post[:id]}' AND
231
+ entrycat.categoryid = categories.categoryid
232
+ )
233
+
234
+ db[cquery].each_with_object([]) do |category, categories|
235
+ if options[:clean_entities]
236
+ categories << clean_entities(category[:name])
237
+ else
238
+ categories << category[:name]
239
+ end
240
+ end
241
+ end
242
+
243
+ def self.process_comments(db, options, post)
244
+ return [] unless options[:comments]
245
+
246
+ px = options[:table_prefix]
247
+
248
+ cquery = %(
249
+ SELECT
250
+ id AS `id`,
251
+ author AS `author`,
252
+ email AS `author_email`,
253
+ url AS `author_url`,
254
+ timestamp AS `date`,
255
+ body AS `content`
256
+ FROM #{px}comments
257
+ WHERE
258
+ entry_id = '#{post[:id]}' AND
259
+ status = 'approved'
260
+ )
261
+
262
+ db[cquery].each_with_object([]) do |comment, comments|
263
+ comcontent = comment[:content].to_s
264
+ comauthor = comment[:author].to_s
265
+
266
+ if comcontent.respond_to?(:force_encoding)
267
+ comcontent.force_encoding("UTF-8")
268
+ end
269
+
270
+ if options[:clean_entities]
271
+ comcontent = clean_entities(comcontent)
272
+ comauthor = clean_entities(comauthor)
273
+ end
274
+
275
+ comments << {
276
+ 'id' => comment[:id].to_i,
277
+ 'author' => comauthor,
278
+ 'author_email' => comment[:author_email].to_s,
279
+ 'author_url' => comment[:author_url].to_s,
280
+ 'date' => comment[:date].to_s,
281
+ 'content' => comcontent,
282
+ }
283
+ end.sort!{ |a,b| a['id'] <=> b['id'] }
284
+ end
285
+
286
+ def self.process_tags(db, options, post)
287
+ return [] unless options[:categories]
288
+
289
+ px = options[:table_prefix]
290
+
291
+ cquery = %(
292
+ SELECT
293
+ entrytags.tag AS `name`
294
+ FROM
295
+ #{px}entrytags AS `entrytags`
296
+ WHERE
297
+ entrytags.entryid = '#{post[:id]}'
298
+ )
299
+
300
+ db[cquery].each_with_object([]) do |tag, tags|
301
+ if options[:clean_entities]
302
+ tags << clean_entities(tag[:name])
303
+ else
304
+ tags << tag[:name]
305
+ end
306
+ end
307
+ end
308
+
309
+ def self.process_permalink(db, options, post)
310
+ return unless options[:permalinks]
311
+
312
+ px = options[:table_prefix]
313
+
314
+ cquery = %(
315
+ SELECT
316
+ permalinks.permalink AS `permalink`
317
+ FROM
318
+ #{px}permalinks AS `permalinks`
319
+ WHERE
320
+ permalinks.entry_id = '#{post[:id]}' AND
321
+ permalinks.type = 'entry'
322
+ )
323
+
324
+ db[cquery].each do |link|
325
+ return "/#{link[:permalink]}"
326
+ end
327
+ end
328
+
329
+ def self.clean_entities( text )
330
+ if text.respond_to?(:force_encoding)
331
+ text.force_encoding("UTF-8")
332
+ end
333
+ text = HTMLEntities.new.encode(text, :named)
334
+ # We don't want to convert these, it would break all
335
+ # HTML tags in the post and comments.
336
+ text.gsub!("&amp;", "&")
337
+ text.gsub!("&lt;", "<")
338
+ text.gsub!("&gt;", ">")
339
+ text.gsub!("&quot;", '"')
340
+ text.gsub!("&apos;", "'")
341
+ text.gsub!("/", "&#47;")
342
+ text
343
+ end
344
+
345
+ def self.sluggify( title )
346
+ title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
347
+ end
348
+
349
+ def self.page_path( page_id, page_name_list )
350
+ if page_name_list.key?(page_id)
351
+ [
352
+ page_name_list[page_id][:slug],
353
+ '/'
354
+ ].join("")
355
+ else
356
+ ""
357
+ end
358
+ end
359
+
360
+ end
361
+ end
362
+ end
363
+
@@ -1,70 +1,70 @@
1
- module BuntoImport
2
- module Importers
3
- class TextPattern < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each post.
5
- # The only posts selected are those with a status of 4 or 5, which means
6
- # "live" and "sticky" respectively.
7
- # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
8
- QUERY = "SELECT Title, \
9
- url_title, \
10
- Posted, \
11
- Body, \
12
- Keywords \
13
- FROM textpattern \
14
- WHERE Status = '4' OR \
15
- Status = '5'"
16
-
17
- def self.require_deps
18
- BuntoImport.require_with_fallback(%w[
19
- rubygems
20
- sequel
21
- fileutils
22
- safe_yaml
23
- ])
24
- end
25
-
26
- def self.specify_options(c)
27
- c.option 'dbname', '--dbname DB', 'Database name'
28
- c.option 'user', '--user USER', 'Database user name'
29
- c.option 'password', '--password PW', "Database user's password"
30
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
31
- end
32
-
33
- def self.process(options)
34
- dbname = options.fetch('dbname')
35
- user = options.fetch('user')
36
- pass = options.fetch('password', "")
37
- host = options.fetch('host', "localhost")
38
-
39
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
40
-
41
- FileUtils.mkdir_p "_posts"
42
-
43
- db[QUERY].each do |post|
44
- # Get required fields and construct Bunto compatible name.
45
- title = post[:Title]
46
- slug = post[:url_title]
47
- date = post[:Posted]
48
- content = post[:Body]
49
-
50
- name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
51
-
52
- # Get the relevant fields as a hash, delete empty fields and convert
53
- # to YAML for the header.
54
- data = {
55
- 'layout' => 'post',
56
- 'title' => title.to_s,
57
- 'tags' => post[:Keywords].split(',')
58
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
59
-
60
- # Write out the data and content to file.
61
- File.open("_posts/#{name}", "w") do |f|
62
- f.puts data
63
- f.puts "---"
64
- f.puts content
65
- end
66
- end
67
- end
68
- end
69
- end
70
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class TextPattern < Importer
4
+ # Reads a MySQL database via Sequel and creates a post file for each post.
5
+ # The only posts selected are those with a status of 4 or 5, which means
6
+ # "live" and "sticky" respectively.
7
+ # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
8
+ QUERY = "SELECT Title, \
9
+ url_title, \
10
+ Posted, \
11
+ Body, \
12
+ Keywords \
13
+ FROM textpattern \
14
+ WHERE Status = '4' OR \
15
+ Status = '5'"
16
+
17
+ def self.require_deps
18
+ BuntoImport.require_with_fallback(%w[
19
+ rubygems
20
+ sequel
21
+ fileutils
22
+ safe_yaml
23
+ ])
24
+ end
25
+
26
+ def self.specify_options(c)
27
+ c.option 'dbname', '--dbname DB', 'Database name'
28
+ c.option 'user', '--user USER', 'Database user name'
29
+ c.option 'password', '--password PW', "Database user's password"
30
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
31
+ end
32
+
33
+ def self.process(options)
34
+ dbname = options.fetch('dbname')
35
+ user = options.fetch('user')
36
+ pass = options.fetch('password', "")
37
+ host = options.fetch('host', "localhost")
38
+
39
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
40
+
41
+ FileUtils.mkdir_p "_posts"
42
+
43
+ db[QUERY].each do |post|
44
+ # Get required fields and construct Bunto compatible name.
45
+ title = post[:Title]
46
+ slug = post[:url_title]
47
+ date = post[:Posted]
48
+ content = post[:Body]
49
+
50
+ name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
51
+
52
+ # Get the relevant fields as a hash, delete empty fields and convert
53
+ # to YAML for the header.
54
+ data = {
55
+ 'layout' => 'post',
56
+ 'title' => title.to_s,
57
+ 'tags' => post[:Keywords].split(',')
58
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
59
+
60
+ # Write out the data and content to file.
61
+ File.open("_posts/#{name}", "w") do |f|
62
+ f.puts data
63
+ f.puts "---"
64
+ f.puts content
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end