bunto-import 2.0.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/README.markdown +33 -33
  4. data/lib/bunto-import.rb +49 -49
  5. data/lib/bunto-import/importer.rb +26 -26
  6. data/lib/bunto-import/importers.rb +10 -10
  7. data/lib/bunto-import/importers/behance.rb +80 -80
  8. data/lib/bunto-import/importers/blogger.rb +330 -264
  9. data/lib/bunto-import/importers/csv.rb +96 -96
  10. data/lib/bunto-import/importers/drupal6.rb +53 -139
  11. data/lib/bunto-import/importers/drupal7.rb +54 -111
  12. data/lib/bunto-import/importers/drupal_common.rb +157 -0
  13. data/lib/bunto-import/importers/easyblog.rb +96 -96
  14. data/lib/bunto-import/importers/enki.rb +74 -74
  15. data/lib/bunto-import/importers/ghost.rb +68 -68
  16. data/lib/bunto-import/importers/google_reader.rb +64 -64
  17. data/lib/bunto-import/importers/joomla.rb +92 -90
  18. data/lib/bunto-import/importers/joomla3.rb +91 -91
  19. data/lib/bunto-import/importers/jrnl.rb +125 -125
  20. data/lib/bunto-import/importers/marley.rb +72 -72
  21. data/lib/bunto-import/importers/mephisto.rb +99 -99
  22. data/lib/bunto-import/importers/mt.rb +257 -257
  23. data/lib/bunto-import/importers/posterous.rb +130 -130
  24. data/lib/bunto-import/importers/rss.rb +62 -62
  25. data/lib/bunto-import/importers/s9y.rb +60 -60
  26. data/lib/bunto-import/importers/s9y_database.rb +363 -0
  27. data/lib/bunto-import/importers/textpattern.rb +70 -70
  28. data/lib/bunto-import/importers/tumblr.rb +300 -289
  29. data/lib/bunto-import/importers/typo.rb +88 -88
  30. data/lib/bunto-import/importers/wordpress.rb +372 -372
  31. data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
  32. data/lib/bunto-import/util.rb +76 -76
  33. data/lib/bunto-import/version.rb +3 -3
  34. data/lib/bunto/commands/import.rb +79 -79
  35. metadata +84 -54
@@ -1,257 +1,257 @@
1
- module BuntoImport
2
- module Importers
3
- class MT < Importer
4
-
5
- SUPPORTED_ENGINES = %{mysql postgres sqlite}
6
-
7
- STATUS_DRAFT = 1
8
- STATUS_PUBLISHED = 2
9
- MORE_CONTENT_SEPARATOR = '<!--more-->'
10
-
11
- def self.default_options
12
- {
13
- 'blog_id' => nil,
14
- 'categories' => true,
15
- 'dest_encoding' => 'utf-8',
16
- 'src_encoding' => 'utf-8',
17
- 'comments' => false
18
- }
19
- end
20
-
21
- def self.require_deps
22
- BuntoImport.require_with_fallback(%w[
23
- rubygems
24
- sequel
25
- fileutils
26
- safe_yaml
27
- ])
28
- end
29
-
30
- def self.specify_options(c)
31
- c.option 'engine', "--engine ENGINE", "Database engine, (default: 'mysql', postgres also supported)"
32
- c.option 'dbname', '--dbname DB', 'Database name'
33
- c.option 'user', '--user USER', 'Database user name'
34
- c.option 'password', '--password PW', "Database user's password, (default: '')"
35
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
36
- c.option 'port', '--port PORT', 'Custom database port connect to (optional)'
37
- c.option 'blog_id', '--blog_id ID', 'Specify a single Movable Type blog ID to import (default: all blogs)'
38
- c.option 'categories', '--categories', "If true, save post's categories in its YAML front matter. (default: true)"
39
- c.option 'src_encoding', '--src_encoding ENCODING', "Encoding of strings from database. (default: UTF-8)"
40
- c.option 'dest_encoding', '--dest_encoding ENCODING', "Encoding of output strings. (default: UTF-8)"
41
- c.option 'comments','--comments', "If true, output comments in _comments directory (default: false)"
42
- end
43
-
44
- # By default this migrator will include posts for all your MovableType blogs.
45
- # Specify a single blog by providing blog_id.
46
-
47
- # Main migrator function. Call this to perform the migration.
48
- #
49
- # dbname:: The name of the database
50
- # user:: The database user name
51
- # pass:: The database user's password
52
- # host:: The address of the MySQL database host. Default: 'localhost'
53
- # options:: A hash of configuration options
54
- #
55
- # Supported options are:
56
- #
57
- # blog_id:: Specify a single MovableType blog to export by providing blog_id.
58
- # Default: nil, importer will include posts for all blogs.
59
- # categories:: If true, save the post's categories in its
60
- # YAML front matter. Default: true
61
- # src_encoding:: Encoding of strings from the database. Default: UTF-8
62
- # If your output contains mangled characters, set src_encoding to
63
- # something appropriate for your database charset.
64
- # dest_encoding:: Encoding of output strings. Default: UTF-8
65
- # comments:: If true, output comments in _comments directory, like the one
66
- # mentioned at https://github.com/mpalmer/bunto-static-comments/
67
- def self.process(options)
68
- options = default_options.merge(options)
69
-
70
- comments = options.fetch('comments')
71
- posts_name_by_id = {} if comments
72
-
73
- db = database_from_opts(options)
74
-
75
- post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
76
-
77
- FileUtils.mkdir_p "_posts"
78
-
79
- posts = db[:mt_entry]
80
- posts = posts.filter(:entry_blog_id => options['blog_id']) if options['blog_id']
81
- posts.each do |post|
82
- categories = post_categories.filter(
83
- :mt_placement__placement_entry_id => post[:entry_id]
84
- ).map {|ea| encode(ea[:category_basename], options) }
85
-
86
- file_name = post_file_name(post, options)
87
-
88
- data = post_metadata(post, options)
89
- data['categories'] = categories if !categories.empty? && options['categories']
90
- yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
91
-
92
- # save post path for comment processing
93
- posts_name_by_id[data['post_id']] = file_name if comments
94
-
95
- content = post_content(post, options)
96
-
97
- File.open("_posts/#{file_name}", "w") do |f|
98
- f.puts yaml_front_matter
99
- f.puts "---"
100
- f.puts encode(content, options)
101
- end
102
- end
103
-
104
- # process comment output, if enabled
105
- if comments
106
- FileUtils.mkdir_p "_comments"
107
-
108
- comments = db[:mt_comment]
109
- comments.each do |comment|
110
- if posts_name_by_id.key?(comment[:comment_entry_id]) # if the entry exists
111
- dir_name, base_name = comment_file_dir_and_base_name(posts_name_by_id, comment, options)
112
- FileUtils.mkdir_p "_comments/#{dir_name}"
113
-
114
- data = comment_metadata(comment, options)
115
- content = comment_content(comment, options)
116
- yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
117
-
118
- File.open("_comments/#{dir_name}/#{base_name}", "w") do |f|
119
- f.puts yaml_front_matter
120
- f.puts "---"
121
- f.puts encode(content, options)
122
- end
123
- end
124
- end
125
- end
126
-
127
- end
128
-
129
- # Extracts metadata for YAML front matter from post
130
- def self.post_metadata(post, options = default_options)
131
- metadata = {
132
- 'layout' => 'post',
133
- 'title' => encode(post[:entry_title], options),
134
- 'date' => post_date(post).strftime("%Y-%m-%d %H:%M:%S %z"),
135
- 'excerpt' => encode(post[:entry_excerpt].to_s, options),
136
- 'mt_id' => post[:entry_id],
137
- 'blog_id' => post[:entry_blog_id],
138
- 'post_id' => post[:entry_id], # for link with comments
139
- 'basename' => post[:entry_basename]
140
- }
141
- metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
142
- metadata
143
- end
144
-
145
- # Different versions of MT used different column names
146
- def self.post_date(post)
147
- post[:entry_authored_on] || post[:entry_created_on]
148
- end
149
-
150
- # Extracts text body from post
151
- def self.extra_entry_text_empty?(post)
152
- post[:entry_text_more].nil? || post[:entry_text_more].strip.empty?
153
- end
154
-
155
- def self.post_content(post, options = default_options)
156
- if extra_entry_text_empty?(post)
157
- post[:entry_text]
158
- else
159
- post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
160
- end
161
- end
162
-
163
- def self.post_file_name(post, options = default_options)
164
- date = post_date(post)
165
- slug = post[:entry_basename]
166
- file_ext = suffix(post[:entry_convert_breaks])
167
-
168
- "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
169
- end
170
-
171
- # Extracts metadata for YAML front matter from comment
172
- def self.comment_metadata(comment, options = default_options)
173
- metadata = {
174
- 'layout' => 'comment',
175
- 'comment_id' => comment[:comment_id],
176
- 'post_id' => comment[:comment_entry_id],
177
- 'author' => encode(comment[:comment_author], options),
178
- 'email' => comment[:comment_email],
179
- 'commenter_id' => comment[:comment_commenter_id],
180
- 'date' => comment_date(comment).strftime("%Y-%m-%d %H:%M:%S %z"),
181
- 'visible' => comment[:comment_visible] == 1,
182
- 'ip' => comment[:comment_ip],
183
- 'url' => comment[:comment_url]
184
- }
185
- metadata
186
- end
187
-
188
- # Different versions of MT used different column names
189
- def self.comment_date(comment)
190
- comment[:comment_modified_on] || comment[:comment_created_on]
191
- end
192
-
193
- def self.comment_content(comment, options = default_options)
194
- comment[:comment_text]
195
- end
196
-
197
- def self.comment_file_dir_and_base_name(posts_name_by_id, comment, options = default_options)
198
- post_basename = posts_name_by_id[comment[:comment_entry_id]].sub(/\.\w+$/, '')
199
- comment_id = comment[:comment_id]
200
-
201
- [post_basename, "#{comment_id}.markdown"]
202
- end
203
-
204
- def self.encode(str, options = default_options)
205
- if str.respond_to?(:encoding)
206
- str.encode(options['dest_encoding'], options['src_encoding'])
207
- else
208
- str
209
- end
210
- end
211
-
212
- # Ideally, this script would determine the post format (markdown,
213
- # html, etc) and create files with proper extensions. At this point
214
- # it just assumes that markdown will be acceptable.
215
- def self.suffix(entry_type)
216
- if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
217
- # The markdown plugin I have saves this as
218
- # "markdown_with_smarty_pants", so I just look for "markdown".
219
- "markdown"
220
- elsif entry_type.include?("textile")
221
- # This is saved as "textile_2" on my installation of MT 5.1.
222
- "textile"
223
- elsif entry_type == "0" || entry_type.include?("richtext")
224
- # Richtext looks to me like it's saved as HTML, so I include it here.
225
- "html"
226
- else
227
- # Other values might need custom work.
228
- entry_type
229
- end
230
- end
231
-
232
- def self.database_from_opts(options)
233
- engine = options.fetch('engine', 'mysql')
234
- dbname = options.fetch('dbname')
235
-
236
- case engine
237
- when "sqlite"
238
- Sequel.sqlite(dbname)
239
- when "mysql", "postgres"
240
- db_connect_opts = {
241
- :host => options.fetch('host', 'localhost'),
242
- :user => options.fetch('user'),
243
- :password => options.fetch('password', '')
244
- }
245
- db_connect_opts = options['port'] if options['port']
246
- Sequel.public_send(
247
- engine,
248
- dbname,
249
- db_connect_opts
250
- )
251
- else
252
- abort("Unsupported engine: '#{engine}'. Must be one of #{SUPPORTED_ENGINES.join(', ')}")
253
- end
254
- end
255
- end
256
- end
257
- end
1
+ module BuntoImport
2
+ module Importers
3
+ class MT < Importer
4
+
5
+ SUPPORTED_ENGINES = %{mysql postgres sqlite}
6
+
7
+ STATUS_DRAFT = 1
8
+ STATUS_PUBLISHED = 2
9
+ MORE_CONTENT_SEPARATOR = '<!--more-->'
10
+
11
+ def self.default_options
12
+ {
13
+ 'blog_id' => nil,
14
+ 'categories' => true,
15
+ 'dest_encoding' => 'utf-8',
16
+ 'src_encoding' => 'utf-8',
17
+ 'comments' => false
18
+ }
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.specify_options(c)
31
+ c.option 'engine', "--engine ENGINE", "Database engine, (default: 'mysql', postgres also supported)"
32
+ c.option 'dbname', '--dbname DB', 'Database name'
33
+ c.option 'user', '--user USER', 'Database user name'
34
+ c.option 'password', '--password PW', "Database user's password, (default: '')"
35
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
36
+ c.option 'port', '--port PORT', 'Custom database port connect to (optional)'
37
+ c.option 'blog_id', '--blog_id ID', 'Specify a single Movable Type blog ID to import (default: all blogs)'
38
+ c.option 'categories', '--categories', "If true, save post's categories in its YAML front matter. (default: true)"
39
+ c.option 'src_encoding', '--src_encoding ENCODING', "Encoding of strings from database. (default: UTF-8)"
40
+ c.option 'dest_encoding', '--dest_encoding ENCODING', "Encoding of output strings. (default: UTF-8)"
41
+ c.option 'comments','--comments', "If true, output comments in _comments directory (default: false)"
42
+ end
43
+
44
+ # By default this migrator will include posts for all your MovableType blogs.
45
+ # Specify a single blog by providing blog_id.
46
+
47
+ # Main migrator function. Call this to perform the migration.
48
+ #
49
+ # dbname:: The name of the database
50
+ # user:: The database user name
51
+ # pass:: The database user's password
52
+ # host:: The address of the MySQL database host. Default: 'localhost'
53
+ # options:: A hash of configuration options
54
+ #
55
+ # Supported options are:
56
+ #
57
+ # blog_id:: Specify a single MovableType blog to export by providing blog_id.
58
+ # Default: nil, importer will include posts for all blogs.
59
+ # categories:: If true, save the post's categories in its
60
+ # YAML front matter. Default: true
61
+ # src_encoding:: Encoding of strings from the database. Default: UTF-8
62
+ # If your output contains mangled characters, set src_encoding to
63
+ # something appropriate for your database charset.
64
+ # dest_encoding:: Encoding of output strings. Default: UTF-8
65
+ # comments:: If true, output comments in _comments directory, like the one
66
+ # mentioned at https://github.com/mpalmer/bunto-static-comments/
67
+ def self.process(options)
68
+ options = default_options.merge(options)
69
+
70
+ comments = options.fetch('comments')
71
+ posts_name_by_id = {} if comments
72
+
73
+ db = database_from_opts(options)
74
+
75
+ post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
76
+
77
+ FileUtils.mkdir_p "_posts"
78
+
79
+ posts = db[:mt_entry]
80
+ posts = posts.filter(:entry_blog_id => options['blog_id']) if options['blog_id']
81
+ posts.each do |post|
82
+ categories = post_categories.filter(
83
+ :mt_placement__placement_entry_id => post[:entry_id]
84
+ ).map {|ea| encode(ea[:category_basename], options) }
85
+
86
+ file_name = post_file_name(post, options)
87
+
88
+ data = post_metadata(post, options)
89
+ data['categories'] = categories if !categories.empty? && options['categories']
90
+ yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
91
+
92
+ # save post path for comment processing
93
+ posts_name_by_id[data['post_id']] = file_name if comments
94
+
95
+ content = post_content(post, options)
96
+
97
+ File.open("_posts/#{file_name}", "w") do |f|
98
+ f.puts yaml_front_matter
99
+ f.puts "---"
100
+ f.puts encode(content, options)
101
+ end
102
+ end
103
+
104
+ # process comment output, if enabled
105
+ if comments
106
+ FileUtils.mkdir_p "_comments"
107
+
108
+ comments = db[:mt_comment]
109
+ comments.each do |comment|
110
+ if posts_name_by_id.key?(comment[:comment_entry_id]) # if the entry exists
111
+ dir_name, base_name = comment_file_dir_and_base_name(posts_name_by_id, comment, options)
112
+ FileUtils.mkdir_p "_comments/#{dir_name}"
113
+
114
+ data = comment_metadata(comment, options)
115
+ content = comment_content(comment, options)
116
+ yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
117
+
118
+ File.open("_comments/#{dir_name}/#{base_name}", "w") do |f|
119
+ f.puts yaml_front_matter
120
+ f.puts "---"
121
+ f.puts encode(content, options)
122
+ end
123
+ end
124
+ end
125
+ end
126
+
127
+ end
128
+
129
+ # Extracts metadata for YAML front matter from post
130
+ def self.post_metadata(post, options = default_options)
131
+ metadata = {
132
+ 'layout' => 'post',
133
+ 'title' => encode(post[:entry_title], options),
134
+ 'date' => post_date(post).strftime("%Y-%m-%d %H:%M:%S %z"),
135
+ 'excerpt' => encode(post[:entry_excerpt].to_s, options),
136
+ 'mt_id' => post[:entry_id],
137
+ 'blog_id' => post[:entry_blog_id],
138
+ 'post_id' => post[:entry_id], # for link with comments
139
+ 'basename' => post[:entry_basename]
140
+ }
141
+ metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
142
+ metadata
143
+ end
144
+
145
+ # Different versions of MT used different column names
146
+ def self.post_date(post)
147
+ post[:entry_authored_on] || post[:entry_created_on]
148
+ end
149
+
150
+ # Extracts text body from post
151
+ def self.extra_entry_text_empty?(post)
152
+ post[:entry_text_more].nil? || post[:entry_text_more].strip.empty?
153
+ end
154
+
155
+ def self.post_content(post, options = default_options)
156
+ if extra_entry_text_empty?(post)
157
+ post[:entry_text]
158
+ else
159
+ post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
160
+ end
161
+ end
162
+
163
+ def self.post_file_name(post, options = default_options)
164
+ date = post_date(post)
165
+ slug = post[:entry_basename]
166
+ file_ext = suffix(post[:entry_convert_breaks])
167
+
168
+ "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
169
+ end
170
+
171
+ # Extracts metadata for YAML front matter from comment
172
+ def self.comment_metadata(comment, options = default_options)
173
+ metadata = {
174
+ 'layout' => 'comment',
175
+ 'comment_id' => comment[:comment_id],
176
+ 'post_id' => comment[:comment_entry_id],
177
+ 'author' => encode(comment[:comment_author], options),
178
+ 'email' => comment[:comment_email],
179
+ 'commenter_id' => comment[:comment_commenter_id],
180
+ 'date' => comment_date(comment).strftime("%Y-%m-%d %H:%M:%S %z"),
181
+ 'visible' => comment[:comment_visible] == 1,
182
+ 'ip' => comment[:comment_ip],
183
+ 'url' => comment[:comment_url]
184
+ }
185
+ metadata
186
+ end
187
+
188
+ # Different versions of MT used different column names
189
+ def self.comment_date(comment)
190
+ comment[:comment_modified_on] || comment[:comment_created_on]
191
+ end
192
+
193
+ def self.comment_content(comment, options = default_options)
194
+ comment[:comment_text]
195
+ end
196
+
197
+ def self.comment_file_dir_and_base_name(posts_name_by_id, comment, options = default_options)
198
+ post_basename = posts_name_by_id[comment[:comment_entry_id]].sub(/\.\w+$/, '')
199
+ comment_id = comment[:comment_id]
200
+
201
+ [post_basename, "#{comment_id}.markdown"]
202
+ end
203
+
204
+ def self.encode(str, options = default_options)
205
+ if str.respond_to?(:encoding)
206
+ str.encode(options['dest_encoding'], options['src_encoding'])
207
+ else
208
+ str
209
+ end
210
+ end
211
+
212
+ # Ideally, this script would determine the post format (markdown,
213
+ # html, etc) and create files with proper extensions. At this point
214
+ # it just assumes that markdown will be acceptable.
215
+ def self.suffix(entry_type)
216
+ if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
217
+ # The markdown plugin I have saves this as
218
+ # "markdown_with_smarty_pants", so I just look for "markdown".
219
+ "markdown"
220
+ elsif entry_type.include?("textile")
221
+ # This is saved as "textile_2" on my installation of MT 5.1.
222
+ "textile"
223
+ elsif entry_type == "0" || entry_type.include?("richtext")
224
+ # Richtext looks to me like it's saved as HTML, so I include it here.
225
+ "html"
226
+ else
227
+ # Other values might need custom work.
228
+ entry_type
229
+ end
230
+ end
231
+
232
+ def self.database_from_opts(options)
233
+ engine = options.fetch('engine', 'mysql')
234
+ dbname = options.fetch('dbname')
235
+
236
+ case engine
237
+ when "sqlite"
238
+ Sequel.sqlite(dbname)
239
+ when "mysql", "postgres"
240
+ db_connect_opts = {
241
+ :host => options.fetch('host', 'localhost'),
242
+ :user => options.fetch('user'),
243
+ :password => options.fetch('password', '')
244
+ }
245
+ db_connect_opts = options['port'] if options['port']
246
+ Sequel.public_send(
247
+ engine,
248
+ dbname,
249
+ db_connect_opts
250
+ )
251
+ else
252
+ abort("Unsupported engine: '#{engine}'. Must be one of #{SUPPORTED_ENGINES.join(', ')}")
253
+ end
254
+ end
255
+ end
256
+ end
257
+ end