bunto-import 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -0,0 +1,363 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class S9YDatabase < Importer
4
+
5
+ def self.require_deps
6
+ BuntoImport.require_with_fallback(
7
+ %w[
8
+ rubygems
9
+ sequel
10
+ fileutils
11
+ safe_yaml
12
+ unidecode
13
+ ])
14
+ end
15
+
16
+ def self.specify_options(c)
17
+ c.option 'dbname', '--dbname DB', 'Database name (default: "")'
18
+ c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
19
+ c.option 'user', '--user USER', 'Database user name (default: "")'
20
+ c.option 'password', '--password PW', "Database user's password (default: "")"
21
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
22
+ c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "serendipity_")'
23
+ c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
24
+ c.option 'comments', '--comments', 'Whether to import comments (default: true)'
25
+ c.option 'categories', '--categories', 'Whether to import categories (default: true)'
26
+ c.option 'tags', '--tags', 'Whether to import tags (default: true)'
27
+ c.option 'drafts', '--drafts', 'Whether to export drafts as well'
28
+ c.option 'markdown', '--markdown', 'convert into markdown format (default: false)'
29
+ c.option 'permalinks', '--permalinks', 'preserve S9Y permalinks (default: false)'
30
+ end
31
+
32
+ # Main migrator function. Call this to perform the migration.
33
+ #
34
+ # dbname:: The name of the database
35
+ # user:: The database user name
36
+ # pass:: The database user's password
37
+ # host:: The address of the MySQL database host. Default: 'localhost'
38
+ # socket:: The database socket's path
39
+ # options:: A hash table of configuration options.
40
+ #
41
+ # Supported options are:
42
+ #
43
+ # :table_prefix:: Prefix of database tables used by WordPress.
44
+ # Default: 'serendipity_'
45
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
46
+ # entities in the posts, comments, titles, and
47
+ # names. Requires the 'htmlentities' gem to
48
+ # work. Default: true.
49
+ # :comments:: If true, migrate post comments too. Comments
50
+ # are saved in the post's YAML front matter.
51
+ # Default: true.
52
+ # :categories:: If true, save the post's categories in its
53
+ # YAML front matter. Default: true.
54
+ # :tags:: If true, save the post's tags in its
55
+ # YAML front matter. Default: true.
56
+ # :extension:: Set the post extension. Default: "html"
57
+ # :drafts:: If true, export drafts as well
58
+ # Default: true.
59
+ # :markdown:: If true, convert the content to markdown
60
+ # Default: false
61
+ # :permalinks:: If true, save the post's original permalink in its
62
+ # YAML front matter. Default: false.
63
+ #
64
+ def self.process(opts)
65
+ options = {
66
+ :user => opts.fetch('user', ''),
67
+ :pass => opts.fetch('password', ''),
68
+ :host => opts.fetch('host', 'localhost'),
69
+ :socket => opts.fetch('socket', nil),
70
+ :dbname => opts.fetch('dbname', ''),
71
+ :table_prefix => opts.fetch('table_prefix', 'serendipity_'),
72
+ :clean_entities => opts.fetch('clean_entities', true),
73
+ :comments => opts.fetch('comments', true),
74
+ :categories => opts.fetch('categories', true),
75
+ :tags => opts.fetch('tags', true),
76
+ :extension => opts.fetch('extension', 'html'),
77
+ :drafts => opts.fetch('drafts', true),
78
+ :markdown => opts.fetch('markdown', false),
79
+ :permalinks => opts.fetch('permalinks', false),
80
+ }
81
+
82
+ if options[:clean_entities]
83
+ options[:clean_entities] = require_if_available('htmlentities', 'clean_entities')
84
+ end
85
+
86
+ if options[:markdown]
87
+ options[:markdown] = require_if_available('reverse_markdown', 'markdown')
88
+ end
89
+
90
+ FileUtils.mkdir_p("_posts")
91
+ FileUtils.mkdir_p("_drafts") if options[:drafts]
92
+
93
+ db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
94
+ :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
95
+
96
+ px = options[:table_prefix]
97
+
98
+ page_name_list = {}
99
+
100
+ page_name_query = %(
101
+ SELECT
102
+ entries.ID AS `id`,
103
+ entries.title AS `title`
104
+ FROM #{px}entries AS `entries`
105
+ )
106
+
107
+ db[page_name_query].each do |page|
108
+ page[:slug] = sluggify(page[:title])
109
+
110
+ page_name_list[ page[:id] ] = {
111
+ :slug => page[:slug]
112
+ }
113
+ end
114
+
115
+ posts_query = "
116
+ SELECT
117
+ entries.ID AS `id`,
118
+ entries.isdraft AS `isdraft`,
119
+ entries.title AS `title`,
120
+ entries.timestamp AS `timestamp`,
121
+ entries.body AS `body`,
122
+ authors.realname AS `author`,
123
+ authors.username AS `author_login`,
124
+ authors.email AS `author_email`
125
+ FROM #{px}entries AS `entries`
126
+ LEFT JOIN #{px}authors AS `authors`
127
+ ON entries.authorid = authors.authorid"
128
+
129
+ unless options[:drafts]
130
+ posts_query << "WHERE posts.isdraft = 'false'"
131
+ end
132
+
133
+ db[posts_query].each do |post|
134
+ process_post(post, db, options, page_name_list)
135
+ end
136
+ end
137
+
138
+ def self.process_post(post, db, options, page_name_list)
139
+ extension = options[:extension]
140
+
141
+ title = post[:title]
142
+ if options[:clean_entities]
143
+ title = clean_entities(title)
144
+ end
145
+
146
+ slug = post[:slug]
147
+ if !slug || slug.empty?
148
+ slug = sluggify(title)
149
+ end
150
+
151
+ status = post[:isdraft] == 'true' ? 'draft' : 'published'
152
+ date = Time.at(post[:timestamp]).utc || Time.now.utc
153
+ name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day, slug, extension]
154
+
155
+ content = post[:body].to_s
156
+
157
+ if options[:clean_entities]
158
+ content = clean_entities(content)
159
+ end
160
+
161
+ if options[:markdown]
162
+ content = ReverseMarkdown.convert(content)
163
+ end
164
+
165
+ categories = process_categories(db, options, post)
166
+ comments = process_comments(db, options, post)
167
+ tags = process_tags(db, options, post)
168
+ permalink = process_permalink(db, options, post)
169
+
170
+ # Get the relevant fields as a hash, delete empty fields and
171
+ # convert to YAML for the header.
172
+ data = {
173
+ 'layout' => post[:type].to_s,
174
+ 'status' => status.to_s,
175
+ 'published' => status.to_s == 'draft' ? nil : (status.to_s == 'published'),
176
+ 'title' => title.to_s,
177
+ 'author' => {
178
+ 'display_name'=> post[:author].to_s,
179
+ 'login' => post[:author_login].to_s,
180
+ 'email' => post[:author_email].to_s
181
+ },
182
+ 'author_login' => post[:author_login].to_s,
183
+ 'author_email' => post[:author_email].to_s,
184
+ 'date' => date.to_s,
185
+ 'permalink' => options[:permalinks] ? permalink : nil,
186
+ 'categories' => options[:categories] ? categories : nil,
187
+ 'tags' => options[:tags] ? tags : nil,
188
+ 'comments' => options[:comments] ? comments : nil,
189
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
190
+
191
+ if post[:type] == 'page'
192
+ filename = page_path(post[:id], page_name_list) + "index.#{extension}"
193
+ FileUtils.mkdir_p(File.dirname(filename))
194
+ elsif status == 'draft'
195
+ filename = "_drafts/#{slug}.#{extension}"
196
+ else
197
+ filename = "_posts/#{name}"
198
+ end
199
+
200
+ # Write out the data and content to file
201
+ File.open(filename, "w") do |f|
202
+ f.puts data
203
+ f.puts "---"
204
+ f.puts Util.wpautop(content)
205
+ end
206
+ end
207
+
208
+ def self.require_if_available(gem_name, option_name)
209
+ begin
210
+ require gem_name
211
+ return true
212
+ rescue LoadError
213
+ STDERR.puts "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
214
+ return true
215
+ end
216
+ end
217
+
218
+ def self.process_categories(db, options, post)
219
+ return [] unless options[:categories]
220
+
221
+ px = options[:table_prefix]
222
+
223
+ cquery = %(
224
+ SELECT
225
+ categories.category_name AS `name`
226
+ FROM
227
+ #{px}entrycat AS `entrycat`,
228
+ #{px}category AS `categories`
229
+ WHERE
230
+ entrycat.entryid = '#{post[:id]}' AND
231
+ entrycat.categoryid = categories.categoryid
232
+ )
233
+
234
+ db[cquery].each_with_object([]) do |category, categories|
235
+ if options[:clean_entities]
236
+ categories << clean_entities(category[:name])
237
+ else
238
+ categories << category[:name]
239
+ end
240
+ end
241
+ end
242
+
243
+ def self.process_comments(db, options, post)
244
+ return [] unless options[:comments]
245
+
246
+ px = options[:table_prefix]
247
+
248
+ cquery = %(
249
+ SELECT
250
+ id AS `id`,
251
+ author AS `author`,
252
+ email AS `author_email`,
253
+ url AS `author_url`,
254
+ timestamp AS `date`,
255
+ body AS `content`
256
+ FROM #{px}comments
257
+ WHERE
258
+ entry_id = '#{post[:id]}' AND
259
+ status = 'approved'
260
+ )
261
+
262
+ db[cquery].each_with_object([]) do |comment, comments|
263
+ comcontent = comment[:content].to_s
264
+ comauthor = comment[:author].to_s
265
+
266
+ if comcontent.respond_to?(:force_encoding)
267
+ comcontent.force_encoding("UTF-8")
268
+ end
269
+
270
+ if options[:clean_entities]
271
+ comcontent = clean_entities(comcontent)
272
+ comauthor = clean_entities(comauthor)
273
+ end
274
+
275
+ comments << {
276
+ 'id' => comment[:id].to_i,
277
+ 'author' => comauthor,
278
+ 'author_email' => comment[:author_email].to_s,
279
+ 'author_url' => comment[:author_url].to_s,
280
+ 'date' => comment[:date].to_s,
281
+ 'content' => comcontent,
282
+ }
283
+ end.sort!{ |a,b| a['id'] <=> b['id'] }
284
+ end
285
+
286
+ def self.process_tags(db, options, post)
287
+ return [] unless options[:categories]
288
+
289
+ px = options[:table_prefix]
290
+
291
+ cquery = %(
292
+ SELECT
293
+ entrytags.tag AS `name`
294
+ FROM
295
+ #{px}entrytags AS `entrytags`
296
+ WHERE
297
+ entrytags.entryid = '#{post[:id]}'
298
+ )
299
+
300
+ db[cquery].each_with_object([]) do |tag, tags|
301
+ if options[:clean_entities]
302
+ tags << clean_entities(tag[:name])
303
+ else
304
+ tags << tag[:name]
305
+ end
306
+ end
307
+ end
308
+
309
+ def self.process_permalink(db, options, post)
310
+ return unless options[:permalinks]
311
+
312
+ px = options[:table_prefix]
313
+
314
+ cquery = %(
315
+ SELECT
316
+ permalinks.permalink AS `permalink`
317
+ FROM
318
+ #{px}permalinks AS `permalinks`
319
+ WHERE
320
+ permalinks.entry_id = '#{post[:id]}' AND
321
+ permalinks.type = 'entry'
322
+ )
323
+
324
+ db[cquery].each do |link|
325
+ return "/#{link[:permalink]}"
326
+ end
327
+ end
328
+
329
+ def self.clean_entities( text )
330
+ if text.respond_to?(:force_encoding)
331
+ text.force_encoding("UTF-8")
332
+ end
333
+ text = HTMLEntities.new.encode(text, :named)
334
+ # We don't want to convert these, it would break all
335
+ # HTML tags in the post and comments.
336
+ text.gsub!("&amp;", "&")
337
+ text.gsub!("&lt;", "<")
338
+ text.gsub!("&gt;", ">")
339
+ text.gsub!("&quot;", '"')
340
+ text.gsub!("&apos;", "'")
341
+ text.gsub!("/", "&#47;")
342
+ text
343
+ end
344
+
345
+ def self.sluggify( title )
346
+ title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
347
+ end
348
+
349
+ def self.page_path( page_id, page_name_list )
350
+ if page_name_list.key?(page_id)
351
+ [
352
+ page_name_list[page_id][:slug],
353
+ '/'
354
+ ].join("")
355
+ else
356
+ ""
357
+ end
358
+ end
359
+
360
+ end
361
+ end
362
+ end
363
+
@@ -1,70 +1,70 @@
1
- module BuntoImport
2
- module Importers
3
- class TextPattern < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each post.
5
- # The only posts selected are those with a status of 4 or 5, which means
6
- # "live" and "sticky" respectively.
7
- # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
8
- QUERY = "SELECT Title, \
9
- url_title, \
10
- Posted, \
11
- Body, \
12
- Keywords \
13
- FROM textpattern \
14
- WHERE Status = '4' OR \
15
- Status = '5'"
16
-
17
- def self.require_deps
18
- BuntoImport.require_with_fallback(%w[
19
- rubygems
20
- sequel
21
- fileutils
22
- safe_yaml
23
- ])
24
- end
25
-
26
- def self.specify_options(c)
27
- c.option 'dbname', '--dbname DB', 'Database name'
28
- c.option 'user', '--user USER', 'Database user name'
29
- c.option 'password', '--password PW', "Database user's password"
30
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
31
- end
32
-
33
- def self.process(options)
34
- dbname = options.fetch('dbname')
35
- user = options.fetch('user')
36
- pass = options.fetch('password', "")
37
- host = options.fetch('host', "localhost")
38
-
39
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
40
-
41
- FileUtils.mkdir_p "_posts"
42
-
43
- db[QUERY].each do |post|
44
- # Get required fields and construct Bunto compatible name.
45
- title = post[:Title]
46
- slug = post[:url_title]
47
- date = post[:Posted]
48
- content = post[:Body]
49
-
50
- name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
51
-
52
- # Get the relevant fields as a hash, delete empty fields and convert
53
- # to YAML for the header.
54
- data = {
55
- 'layout' => 'post',
56
- 'title' => title.to_s,
57
- 'tags' => post[:Keywords].split(',')
58
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
59
-
60
- # Write out the data and content to file.
61
- File.open("_posts/#{name}", "w") do |f|
62
- f.puts data
63
- f.puts "---"
64
- f.puts content
65
- end
66
- end
67
- end
68
- end
69
- end
70
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class TextPattern < Importer
4
+ # Reads a MySQL database via Sequel and creates a post file for each post.
5
+ # The only posts selected are those with a status of 4 or 5, which means
6
+ # "live" and "sticky" respectively.
7
+ # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
8
+ QUERY = "SELECT Title, \
9
+ url_title, \
10
+ Posted, \
11
+ Body, \
12
+ Keywords \
13
+ FROM textpattern \
14
+ WHERE Status = '4' OR \
15
+ Status = '5'"
16
+
17
+ def self.require_deps
18
+ BuntoImport.require_with_fallback(%w[
19
+ rubygems
20
+ sequel
21
+ fileutils
22
+ safe_yaml
23
+ ])
24
+ end
25
+
26
+ def self.specify_options(c)
27
+ c.option 'dbname', '--dbname DB', 'Database name'
28
+ c.option 'user', '--user USER', 'Database user name'
29
+ c.option 'password', '--password PW', "Database user's password"
30
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
31
+ end
32
+
33
+ def self.process(options)
34
+ dbname = options.fetch('dbname')
35
+ user = options.fetch('user')
36
+ pass = options.fetch('password', "")
37
+ host = options.fetch('host', "localhost")
38
+
39
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
40
+
41
+ FileUtils.mkdir_p "_posts"
42
+
43
+ db[QUERY].each do |post|
44
+ # Get required fields and construct Bunto compatible name.
45
+ title = post[:Title]
46
+ slug = post[:url_title]
47
+ date = post[:Posted]
48
+ content = post[:Body]
49
+
50
+ name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
51
+
52
+ # Get the relevant fields as a hash, delete empty fields and convert
53
+ # to YAML for the header.
54
+ data = {
55
+ 'layout' => 'post',
56
+ 'title' => title.to_s,
57
+ 'tags' => post[:Keywords].split(',')
58
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
59
+
60
+ # Write out the data and content to file.
61
+ File.open("_posts/#{name}", "w") do |f|
62
+ f.puts data
63
+ f.puts "---"
64
+ f.puts content
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end