bunto-import 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Mephisto < Importer
4
+ #Accepts a hash with database config variables, exports mephisto posts into a csv
5
+ #export PGPASSWORD if you must
6
+ def self.postgres(c)
7
+ sql = <<-SQL
8
+ BEGIN;
9
+ CREATE TEMP TABLE bunto AS
10
+ SELECT title, permalink, body, published_at, filter FROM contents
11
+ WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
12
+ COPY bunto TO STDOUT WITH CSV HEADER;
13
+ ROLLBACK;
14
+ SQL
15
+ command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
16
+ puts command
17
+ `#{command}`
18
+ CSV.process
19
+ end
20
+
21
+ def self.validate(options)
22
+ %w[dbname user].each do |option|
23
+ if options[option].nil?
24
+ abort "Missing mandatory option --#{option}."
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.require_deps
30
+ BuntoImport.require_with_fallback(%w[
31
+ rubygems
32
+ sequel
33
+ fastercsv
34
+ fileutils
35
+ ])
36
+ end
37
+
38
+ def self.specify_options(c)
39
+ c.option 'dbname', '--dbname DB', 'Database name'
40
+ c.option 'user', '--user USER', 'Database user name'
41
+ c.option 'password', '--password PW', "Database user's password (default: '')"
42
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
43
+ end
44
+
45
+ # This query will pull blog posts from all entries across all blogs. If
46
+ # you've got unpublished, deleted or otherwise hidden posts please sift
47
+ # through the created posts to make sure nothing is accidently published.
48
+ QUERY = "SELECT id, \
49
+ permalink, \
50
+ body, \
51
+ published_at, \
52
+ title \
53
+ FROM contents \
54
+ WHERE user_id = 1 AND \
55
+ type = 'Article' AND \
56
+ published_at IS NOT NULL \
57
+ ORDER BY published_at"
58
+
59
+ def self.process(options)
60
+ dbname = options.fetch('dbname')
61
+ user = options.fetch('user')
62
+ pass = options.fetch('password', '')
63
+ host = options.fetch('host', "localhost")
64
+
65
+ db = Sequel.mysql(dbname, :user => user,
66
+ :password => pass,
67
+ :host => host,
68
+ :encoding => 'utf8')
69
+
70
+ FileUtils.mkdir_p "_posts"
71
+
72
+ db[QUERY].each do |post|
73
+ title = post[:title]
74
+ slug = post[:permalink]
75
+ date = post[:published_at]
76
+ content = post[:body]
77
+
78
+ # Ideally, this script would determine the post format (markdown,
79
+ # html, etc) and create files with proper extensions. At this point
80
+ # it just assumes that markdown will be acceptable.
81
+ name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
82
+
83
+ data = {
84
+ 'layout' => 'post',
85
+ 'title' => title.to_s,
86
+ 'mt_id' => post[:entry_id],
87
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
88
+
89
+ File.open("_posts/#{name}", "w") do |f|
90
+ f.puts data
91
+ f.puts "---"
92
+ f.puts content
93
+ end
94
+ end
95
+
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,257 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class MT < Importer
4
+
5
+ SUPPORTED_ENGINES = %{mysql postgres sqlite}
6
+
7
+ STATUS_DRAFT = 1
8
+ STATUS_PUBLISHED = 2
9
+ MORE_CONTENT_SEPARATOR = '<!--more-->'
10
+
11
+ def self.default_options
12
+ {
13
+ 'blog_id' => nil,
14
+ 'categories' => true,
15
+ 'dest_encoding' => 'utf-8',
16
+ 'src_encoding' => 'utf-8',
17
+ 'comments' => false
18
+ }
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.specify_options(c)
31
+ c.option 'engine', "--engine ENGINE", "Database engine, (default: 'mysql', postgres also supported)"
32
+ c.option 'dbname', '--dbname DB', 'Database name'
33
+ c.option 'user', '--user USER', 'Database user name'
34
+ c.option 'password', '--password PW', "Database user's password, (default: '')"
35
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
36
+ c.option 'port', '--port PORT', 'Custom database port connect to (optional)'
37
+ c.option 'blog_id', '--blog_id ID', 'Specify a single Movable Type blog ID to import (default: all blogs)'
38
+ c.option 'categories', '--categories', "If true, save post's categories in its YAML front matter. (default: true)"
39
+ c.option 'src_encoding', '--src_encoding ENCODING', "Encoding of strings from database. (default: UTF-8)"
40
+ c.option 'dest_encoding', '--dest_encoding ENCODING', "Encoding of output strings. (default: UTF-8)"
41
+ c.option 'comments','--comments', "If true, output comments in _comments directory (default: false)"
42
+ end
43
+
44
+ # By default this migrator will include posts for all your MovableType blogs.
45
+ # Specify a single blog by providing blog_id.
46
+
47
+ # Main migrator function. Call this to perform the migration.
48
+ #
49
+ # dbname:: The name of the database
50
+ # user:: The database user name
51
+ # pass:: The database user's password
52
+ # host:: The address of the MySQL database host. Default: 'localhost'
53
+ # options:: A hash of configuration options
54
+ #
55
+ # Supported options are:
56
+ #
57
+ # blog_id:: Specify a single MovableType blog to export by providing blog_id.
58
+ # Default: nil, importer will include posts for all blogs.
59
+ # categories:: If true, save the post's categories in its
60
+ # YAML front matter. Default: true
61
+ # src_encoding:: Encoding of strings from the database. Default: UTF-8
62
+ # If your output contains mangled characters, set src_encoding to
63
+ # something appropriate for your database charset.
64
+ # dest_encoding:: Encoding of output strings. Default: UTF-8
65
+ # comments:: If true, output comments in _comments directory, like the one
66
+ # mentioned at https://github.com/mpalmer/bunto-static-comments/
67
+ def self.process(options)
68
+ options = default_options.merge(options)
69
+
70
+ comments = options.fetch('comments')
71
+ posts_name_by_id = {} if comments
72
+
73
+ db = database_from_opts(options)
74
+
75
+ post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
76
+
77
+ FileUtils.mkdir_p "_posts"
78
+
79
+ posts = db[:mt_entry]
80
+ posts = posts.filter(:entry_blog_id => options['blog_id']) if options['blog_id']
81
+ posts.each do |post|
82
+ categories = post_categories.filter(
83
+ :mt_placement__placement_entry_id => post[:entry_id]
84
+ ).map {|ea| encode(ea[:category_basename], options) }
85
+
86
+ file_name = post_file_name(post, options)
87
+
88
+ data = post_metadata(post, options)
89
+ data['categories'] = categories if !categories.empty? && options['categories']
90
+ yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
91
+
92
+ # save post path for comment processing
93
+ posts_name_by_id[data['post_id']] = file_name if comments
94
+
95
+ content = post_content(post, options)
96
+
97
+ File.open("_posts/#{file_name}", "w") do |f|
98
+ f.puts yaml_front_matter
99
+ f.puts "---"
100
+ f.puts encode(content, options)
101
+ end
102
+ end
103
+
104
+ # process comment output, if enabled
105
+ if comments
106
+ FileUtils.mkdir_p "_comments"
107
+
108
+ comments = db[:mt_comment]
109
+ comments.each do |comment|
110
+ if posts_name_by_id.key?(comment[:comment_entry_id]) # if the entry exists
111
+ dir_name, base_name = comment_file_dir_and_base_name(posts_name_by_id, comment, options)
112
+ FileUtils.mkdir_p "_comments/#{dir_name}"
113
+
114
+ data = comment_metadata(comment, options)
115
+ content = comment_content(comment, options)
116
+ yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
117
+
118
+ File.open("_comments/#{dir_name}/#{base_name}", "w") do |f|
119
+ f.puts yaml_front_matter
120
+ f.puts "---"
121
+ f.puts encode(content, options)
122
+ end
123
+ end
124
+ end
125
+ end
126
+
127
+ end
128
+
129
+ # Extracts metadata for YAML front matter from post
130
+ def self.post_metadata(post, options = default_options)
131
+ metadata = {
132
+ 'layout' => 'post',
133
+ 'title' => encode(post[:entry_title], options),
134
+ 'date' => post_date(post).strftime("%Y-%m-%d %H:%M:%S %z"),
135
+ 'excerpt' => encode(post[:entry_excerpt].to_s, options),
136
+ 'mt_id' => post[:entry_id],
137
+ 'blog_id' => post[:entry_blog_id],
138
+ 'post_id' => post[:entry_id], # for link with comments
139
+ 'basename' => post[:entry_basename]
140
+ }
141
+ metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
142
+ metadata
143
+ end
144
+
145
+ # Different versions of MT used different column names
146
+ def self.post_date(post)
147
+ post[:entry_authored_on] || post[:entry_created_on]
148
+ end
149
+
150
+ # Extracts text body from post
151
+ def self.extra_entry_text_empty?(post)
152
+ post[:entry_text_more].nil? || post[:entry_text_more].strip.empty?
153
+ end
154
+
155
+ def self.post_content(post, options = default_options)
156
+ if extra_entry_text_empty?(post)
157
+ post[:entry_text]
158
+ else
159
+ post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
160
+ end
161
+ end
162
+
163
+ def self.post_file_name(post, options = default_options)
164
+ date = post_date(post)
165
+ slug = post[:entry_basename]
166
+ file_ext = suffix(post[:entry_convert_breaks])
167
+
168
+ "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
169
+ end
170
+
171
+ # Extracts metadata for YAML front matter from comment
172
+ def self.comment_metadata(comment, options = default_options)
173
+ metadata = {
174
+ 'layout' => 'comment',
175
+ 'comment_id' => comment[:comment_id],
176
+ 'post_id' => comment[:comment_entry_id],
177
+ 'author' => encode(comment[:comment_author], options),
178
+ 'email' => comment[:comment_email],
179
+ 'commenter_id' => comment[:comment_commenter_id],
180
+ 'date' => comment_date(comment).strftime("%Y-%m-%d %H:%M:%S %z"),
181
+ 'visible' => comment[:comment_visible] == 1,
182
+ 'ip' => comment[:comment_ip],
183
+ 'url' => comment[:comment_url]
184
+ }
185
+ metadata
186
+ end
187
+
188
+ # Different versions of MT used different column names
189
+ def self.comment_date(comment)
190
+ comment[:comment_modified_on] || comment[:comment_created_on]
191
+ end
192
+
193
+ def self.comment_content(comment, options = default_options)
194
+ comment[:comment_text]
195
+ end
196
+
197
+ def self.comment_file_dir_and_base_name(posts_name_by_id, comment, options = default_options)
198
+ post_basename = posts_name_by_id[comment[:comment_entry_id]].sub(/\.\w+$/, '')
199
+ comment_id = comment[:comment_id]
200
+
201
+ [post_basename, "#{comment_id}.markdown"]
202
+ end
203
+
204
+ def self.encode(str, options = default_options)
205
+ if str.respond_to?(:encoding)
206
+ str.encode(options['dest_encoding'], options['src_encoding'])
207
+ else
208
+ str
209
+ end
210
+ end
211
+
212
+ # Ideally, this script would determine the post format (markdown,
213
+ # html, etc) and create files with proper extensions. At this point
214
+ # it just assumes that markdown will be acceptable.
215
+ def self.suffix(entry_type)
216
+ if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
217
+ # The markdown plugin I have saves this as
218
+ # "markdown_with_smarty_pants", so I just look for "markdown".
219
+ "markdown"
220
+ elsif entry_type.include?("textile")
221
+ # This is saved as "textile_2" on my installation of MT 5.1.
222
+ "textile"
223
+ elsif entry_type == "0" || entry_type.include?("richtext")
224
+ # Richtext looks to me like it's saved as HTML, so I include it here.
225
+ "html"
226
+ else
227
+ # Other values might need custom work.
228
+ entry_type
229
+ end
230
+ end
231
+
232
+ def self.database_from_opts(options)
233
+ engine = options.fetch('engine', 'mysql')
234
+ dbname = options.fetch('dbname')
235
+
236
+ case engine
237
+ when "sqlite"
238
+ Sequel.sqlite(dbname)
239
+ when "mysql", "postgres"
240
+ db_connect_opts = {
241
+ :host => options.fetch('host', 'localhost'),
242
+ :user => options.fetch('user'),
243
+ :password => options.fetch('password', '')
244
+ }
245
+ db_connect_opts = options['port'] if options['port']
246
+ Sequel.public_send(
247
+ engine,
248
+ dbname,
249
+ db_connect_opts
250
+ )
251
+ else
252
+ abort("Unsupported engine: '#{engine}'. Must be one of #{SUPPORTED_ENGINES.join(', ')}")
253
+ end
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,130 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Posterous < Importer
4
+
5
+ def self.specify_options(c)
6
+ c.option 'email', '--email EMAIL', 'Posterous email address'
7
+ c.option 'password', '--password PW', 'Posterous password'
8
+ c.option 'api_token', '--token TOKEN', 'Posterous API Token'
9
+ end
10
+
11
+ def self.require_deps
12
+ BuntoImport.require_with_fallback(%w[
13
+ rubygems
14
+ bunto
15
+ fileutils
16
+ uri
17
+ json
18
+ net/http
19
+ ])
20
+ end
21
+
22
+ def self.fetch(uri_str, limit = 10)
23
+ # You should choose better exception.
24
+ raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
25
+
26
+ response = nil
27
+ Net::HTTP.start('posterous.com') do |http|
28
+ req = Net::HTTP::Get.new(uri_str)
29
+ req.basic_auth @email, @pass
30
+ response = http.request(req)
31
+ end
32
+
33
+ case response
34
+ when Net::HTTPSuccess then response
35
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
36
+ else response.error!
37
+ end
38
+ end
39
+
40
+ def self.fetch_images(directory, imgs)
41
+ def self.fetch_one(url, limit = 10)
42
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
43
+ response = Net::HTTP.get_response(URI.parse(url))
44
+ case response
45
+ when Net::HTTPSuccess then response.body
46
+ when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
47
+ else
48
+ response.error!
49
+ end
50
+ end
51
+
52
+ FileUtils.mkdir_p directory
53
+ urls = Array.new
54
+ imgs.each do |img|
55
+ fullurl = img["full"]["url"]
56
+ uri = URI.parse(fullurl)
57
+ imgname = uri.path.split("/")[-1]
58
+ imgdata = self.fetch_one(fullurl)
59
+ open(directory + "/" + imgname, "wb") do |file|
60
+ file.write imgdata
61
+ end
62
+ urls.push(directory + "/" + imgname)
63
+ end
64
+
65
+ return urls
66
+ end
67
+
68
+ def self.process(options)
69
+ email = options.fetch('email')
70
+ pass = options.fetch('password')
71
+ api_token = options.fetch('api_token')
72
+
73
+ @email, @pass, @api_token = email, pass, api_token
74
+ defaults = { :include_imgs => false, :blog => 'primary', :base_path => '/' }
75
+ opts = defaults.merge(opts)
76
+ FileUtils.mkdir_p "_posts"
77
+
78
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}").body)
79
+ page = 1
80
+
81
+ while posts.any?
82
+ posts.each do |post|
83
+ title = post["title"]
84
+ slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
85
+ date = Date.parse(post["display_date"])
86
+ content = post["body_html"]
87
+ published = !post["is_private"]
88
+ basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
89
+ name = basename + '.html'
90
+
91
+ # Images:
92
+ if opts[:include_imgs]
93
+ post_imgs = post["media"]["images"]
94
+ if post_imgs.any?
95
+ img_dir = "imgs/%s" % basename
96
+ img_urls = self.fetch_images(img_dir, post_imgs)
97
+
98
+ img_urls.map! do |url|
99
+ '<li><img src="' + opts[:base_path] + url + '"></li>'
100
+ end
101
+ imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
102
+
103
+ # filter out "posterous-content", replacing with imgs:
104
+ content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
105
+ end
106
+ end
107
+
108
+ # Get the relevant fields as a hash, delete empty fields and convert
109
+ # to YAML for the header
110
+ data = {
111
+ 'layout' => 'post',
112
+ 'title' => title.to_s,
113
+ 'published' => published
114
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
115
+
116
+ # Write out the data and content to file
117
+ File.open("_posts/#{name}", "w") do |f|
118
+ f.puts data
119
+ f.puts "---"
120
+ f.puts content
121
+ end
122
+ end
123
+
124
+ page += 1
125
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}&page=#{page}").body)
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end