bunto-import 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,99 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Mephisto < Importer
4
+ #Accepts a hash with database config variables, exports mephisto posts into a csv
5
+ #export PGPASSWORD if you must
6
+ def self.postgres(c)
7
+ sql = <<-SQL
8
+ BEGIN;
9
+ CREATE TEMP TABLE bunto AS
10
+ SELECT title, permalink, body, published_at, filter FROM contents
11
+ WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
12
+ COPY bunto TO STDOUT WITH CSV HEADER;
13
+ ROLLBACK;
14
+ SQL
15
+ command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
16
+ puts command
17
+ `#{command}`
18
+ CSV.process
19
+ end
20
+
21
+ def self.validate(options)
22
+ %w[dbname user].each do |option|
23
+ if options[option].nil?
24
+ abort "Missing mandatory option --#{option}."
25
+ end
26
+ end
27
+ end
28
+
29
+ def self.require_deps
30
+ BuntoImport.require_with_fallback(%w[
31
+ rubygems
32
+ sequel
33
+ fastercsv
34
+ fileutils
35
+ ])
36
+ end
37
+
38
+ def self.specify_options(c)
39
+ c.option 'dbname', '--dbname DB', 'Database name'
40
+ c.option 'user', '--user USER', 'Database user name'
41
+ c.option 'password', '--password PW', "Database user's password (default: '')"
42
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
43
+ end
44
+
45
+ # This query will pull blog posts from all entries across all blogs. If
46
+ # you've got unpublished, deleted or otherwise hidden posts please sift
47
+ # through the created posts to make sure nothing is accidently published.
48
+ QUERY = "SELECT id, \
49
+ permalink, \
50
+ body, \
51
+ published_at, \
52
+ title \
53
+ FROM contents \
54
+ WHERE user_id = 1 AND \
55
+ type = 'Article' AND \
56
+ published_at IS NOT NULL \
57
+ ORDER BY published_at"
58
+
59
+ def self.process(options)
60
+ dbname = options.fetch('dbname')
61
+ user = options.fetch('user')
62
+ pass = options.fetch('password', '')
63
+ host = options.fetch('host', "localhost")
64
+
65
+ db = Sequel.mysql(dbname, :user => user,
66
+ :password => pass,
67
+ :host => host,
68
+ :encoding => 'utf8')
69
+
70
+ FileUtils.mkdir_p "_posts"
71
+
72
+ db[QUERY].each do |post|
73
+ title = post[:title]
74
+ slug = post[:permalink]
75
+ date = post[:published_at]
76
+ content = post[:body]
77
+
78
+ # Ideally, this script would determine the post format (markdown,
79
+ # html, etc) and create files with proper extensions. At this point
80
+ # it just assumes that markdown will be acceptable.
81
+ name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
82
+
83
+ data = {
84
+ 'layout' => 'post',
85
+ 'title' => title.to_s,
86
+ 'mt_id' => post[:entry_id],
87
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
88
+
89
+ File.open("_posts/#{name}", "w") do |f|
90
+ f.puts data
91
+ f.puts "---"
92
+ f.puts content
93
+ end
94
+ end
95
+
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,257 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class MT < Importer
4
+
5
+ SUPPORTED_ENGINES = %{mysql postgres sqlite}
6
+
7
+ STATUS_DRAFT = 1
8
+ STATUS_PUBLISHED = 2
9
+ MORE_CONTENT_SEPARATOR = '<!--more-->'
10
+
11
+ def self.default_options
12
+ {
13
+ 'blog_id' => nil,
14
+ 'categories' => true,
15
+ 'dest_encoding' => 'utf-8',
16
+ 'src_encoding' => 'utf-8',
17
+ 'comments' => false
18
+ }
19
+ end
20
+
21
+ def self.require_deps
22
+ BuntoImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.specify_options(c)
31
+ c.option 'engine', "--engine ENGINE", "Database engine, (default: 'mysql', postgres also supported)"
32
+ c.option 'dbname', '--dbname DB', 'Database name'
33
+ c.option 'user', '--user USER', 'Database user name'
34
+ c.option 'password', '--password PW', "Database user's password, (default: '')"
35
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
36
+ c.option 'port', '--port PORT', 'Custom database port connect to (optional)'
37
+ c.option 'blog_id', '--blog_id ID', 'Specify a single Movable Type blog ID to import (default: all blogs)'
38
+ c.option 'categories', '--categories', "If true, save post's categories in its YAML front matter. (default: true)"
39
+ c.option 'src_encoding', '--src_encoding ENCODING', "Encoding of strings from database. (default: UTF-8)"
40
+ c.option 'dest_encoding', '--dest_encoding ENCODING', "Encoding of output strings. (default: UTF-8)"
41
+ c.option 'comments','--comments', "If true, output comments in _comments directory (default: false)"
42
+ end
43
+
44
+ # By default this migrator will include posts for all your MovableType blogs.
45
+ # Specify a single blog by providing blog_id.
46
+
47
+ # Main migrator function. Call this to perform the migration.
48
+ #
49
+ # dbname:: The name of the database
50
+ # user:: The database user name
51
+ # pass:: The database user's password
52
+ # host:: The address of the MySQL database host. Default: 'localhost'
53
+ # options:: A hash of configuration options
54
+ #
55
+ # Supported options are:
56
+ #
57
+ # blog_id:: Specify a single MovableType blog to export by providing blog_id.
58
+ # Default: nil, importer will include posts for all blogs.
59
+ # categories:: If true, save the post's categories in its
60
+ # YAML front matter. Default: true
61
+ # src_encoding:: Encoding of strings from the database. Default: UTF-8
62
+ # If your output contains mangled characters, set src_encoding to
63
+ # something appropriate for your database charset.
64
+ # dest_encoding:: Encoding of output strings. Default: UTF-8
65
+ # comments:: If true, output comments in _comments directory, like the one
66
+ # mentioned at https://github.com/mpalmer/bunto-static-comments/
67
+ def self.process(options)
68
+ options = default_options.merge(options)
69
+
70
+ comments = options.fetch('comments')
71
+ posts_name_by_id = {} if comments
72
+
73
+ db = database_from_opts(options)
74
+
75
+ post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
76
+
77
+ FileUtils.mkdir_p "_posts"
78
+
79
+ posts = db[:mt_entry]
80
+ posts = posts.filter(:entry_blog_id => options['blog_id']) if options['blog_id']
81
+ posts.each do |post|
82
+ categories = post_categories.filter(
83
+ :mt_placement__placement_entry_id => post[:entry_id]
84
+ ).map {|ea| encode(ea[:category_basename], options) }
85
+
86
+ file_name = post_file_name(post, options)
87
+
88
+ data = post_metadata(post, options)
89
+ data['categories'] = categories if !categories.empty? && options['categories']
90
+ yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
91
+
92
+ # save post path for comment processing
93
+ posts_name_by_id[data['post_id']] = file_name if comments
94
+
95
+ content = post_content(post, options)
96
+
97
+ File.open("_posts/#{file_name}", "w") do |f|
98
+ f.puts yaml_front_matter
99
+ f.puts "---"
100
+ f.puts encode(content, options)
101
+ end
102
+ end
103
+
104
+ # process comment output, if enabled
105
+ if comments
106
+ FileUtils.mkdir_p "_comments"
107
+
108
+ comments = db[:mt_comment]
109
+ comments.each do |comment|
110
+ if posts_name_by_id.key?(comment[:comment_entry_id]) # if the entry exists
111
+ dir_name, base_name = comment_file_dir_and_base_name(posts_name_by_id, comment, options)
112
+ FileUtils.mkdir_p "_comments/#{dir_name}"
113
+
114
+ data = comment_metadata(comment, options)
115
+ content = comment_content(comment, options)
116
+ yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
117
+
118
+ File.open("_comments/#{dir_name}/#{base_name}", "w") do |f|
119
+ f.puts yaml_front_matter
120
+ f.puts "---"
121
+ f.puts encode(content, options)
122
+ end
123
+ end
124
+ end
125
+ end
126
+
127
+ end
128
+
129
+ # Extracts metadata for YAML front matter from post
130
+ def self.post_metadata(post, options = default_options)
131
+ metadata = {
132
+ 'layout' => 'post',
133
+ 'title' => encode(post[:entry_title], options),
134
+ 'date' => post_date(post).strftime("%Y-%m-%d %H:%M:%S %z"),
135
+ 'excerpt' => encode(post[:entry_excerpt].to_s, options),
136
+ 'mt_id' => post[:entry_id],
137
+ 'blog_id' => post[:entry_blog_id],
138
+ 'post_id' => post[:entry_id], # for link with comments
139
+ 'basename' => post[:entry_basename]
140
+ }
141
+ metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
142
+ metadata
143
+ end
144
+
145
+ # Different versions of MT used different column names
146
+ def self.post_date(post)
147
+ post[:entry_authored_on] || post[:entry_created_on]
148
+ end
149
+
150
+ # Extracts text body from post
151
+ def self.extra_entry_text_empty?(post)
152
+ post[:entry_text_more].nil? || post[:entry_text_more].strip.empty?
153
+ end
154
+
155
+ def self.post_content(post, options = default_options)
156
+ if extra_entry_text_empty?(post)
157
+ post[:entry_text]
158
+ else
159
+ post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
160
+ end
161
+ end
162
+
163
+ def self.post_file_name(post, options = default_options)
164
+ date = post_date(post)
165
+ slug = post[:entry_basename]
166
+ file_ext = suffix(post[:entry_convert_breaks])
167
+
168
+ "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
169
+ end
170
+
171
+ # Extracts metadata for YAML front matter from comment
172
+ def self.comment_metadata(comment, options = default_options)
173
+ metadata = {
174
+ 'layout' => 'comment',
175
+ 'comment_id' => comment[:comment_id],
176
+ 'post_id' => comment[:comment_entry_id],
177
+ 'author' => encode(comment[:comment_author], options),
178
+ 'email' => comment[:comment_email],
179
+ 'commenter_id' => comment[:comment_commenter_id],
180
+ 'date' => comment_date(comment).strftime("%Y-%m-%d %H:%M:%S %z"),
181
+ 'visible' => comment[:comment_visible] == 1,
182
+ 'ip' => comment[:comment_ip],
183
+ 'url' => comment[:comment_url]
184
+ }
185
+ metadata
186
+ end
187
+
188
+ # Different versions of MT used different column names
189
+ def self.comment_date(comment)
190
+ comment[:comment_modified_on] || comment[:comment_created_on]
191
+ end
192
+
193
+ def self.comment_content(comment, options = default_options)
194
+ comment[:comment_text]
195
+ end
196
+
197
+ def self.comment_file_dir_and_base_name(posts_name_by_id, comment, options = default_options)
198
+ post_basename = posts_name_by_id[comment[:comment_entry_id]].sub(/\.\w+$/, '')
199
+ comment_id = comment[:comment_id]
200
+
201
+ [post_basename, "#{comment_id}.markdown"]
202
+ end
203
+
204
+ def self.encode(str, options = default_options)
205
+ if str.respond_to?(:encoding)
206
+ str.encode(options['dest_encoding'], options['src_encoding'])
207
+ else
208
+ str
209
+ end
210
+ end
211
+
212
+ # Ideally, this script would determine the post format (markdown,
213
+ # html, etc) and create files with proper extensions. At this point
214
+ # it just assumes that markdown will be acceptable.
215
+ def self.suffix(entry_type)
216
+ if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
217
+ # The markdown plugin I have saves this as
218
+ # "markdown_with_smarty_pants", so I just look for "markdown".
219
+ "markdown"
220
+ elsif entry_type.include?("textile")
221
+ # This is saved as "textile_2" on my installation of MT 5.1.
222
+ "textile"
223
+ elsif entry_type == "0" || entry_type.include?("richtext")
224
+ # Richtext looks to me like it's saved as HTML, so I include it here.
225
+ "html"
226
+ else
227
+ # Other values might need custom work.
228
+ entry_type
229
+ end
230
+ end
231
+
232
+ def self.database_from_opts(options)
233
+ engine = options.fetch('engine', 'mysql')
234
+ dbname = options.fetch('dbname')
235
+
236
+ case engine
237
+ when "sqlite"
238
+ Sequel.sqlite(dbname)
239
+ when "mysql", "postgres"
240
+ db_connect_opts = {
241
+ :host => options.fetch('host', 'localhost'),
242
+ :user => options.fetch('user'),
243
+ :password => options.fetch('password', '')
244
+ }
245
+ db_connect_opts = options['port'] if options['port']
246
+ Sequel.public_send(
247
+ engine,
248
+ dbname,
249
+ db_connect_opts
250
+ )
251
+ else
252
+ abort("Unsupported engine: '#{engine}'. Must be one of #{SUPPORTED_ENGINES.join(', ')}")
253
+ end
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,130 @@
1
+ module BuntoImport
2
+ module Importers
3
+ class Posterous < Importer
4
+
5
+ def self.specify_options(c)
6
+ c.option 'email', '--email EMAIL', 'Posterous email address'
7
+ c.option 'password', '--password PW', 'Posterous password'
8
+ c.option 'api_token', '--token TOKEN', 'Posterous API Token'
9
+ end
10
+
11
+ def self.require_deps
12
+ BuntoImport.require_with_fallback(%w[
13
+ rubygems
14
+ bunto
15
+ fileutils
16
+ uri
17
+ json
18
+ net/http
19
+ ])
20
+ end
21
+
22
+ def self.fetch(uri_str, limit = 10)
23
+ # You should choose better exception.
24
+ raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
25
+
26
+ response = nil
27
+ Net::HTTP.start('posterous.com') do |http|
28
+ req = Net::HTTP::Get.new(uri_str)
29
+ req.basic_auth @email, @pass
30
+ response = http.request(req)
31
+ end
32
+
33
+ case response
34
+ when Net::HTTPSuccess then response
35
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
36
+ else response.error!
37
+ end
38
+ end
39
+
40
+ def self.fetch_images(directory, imgs)
41
+ def self.fetch_one(url, limit = 10)
42
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
43
+ response = Net::HTTP.get_response(URI.parse(url))
44
+ case response
45
+ when Net::HTTPSuccess then response.body
46
+ when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
47
+ else
48
+ response.error!
49
+ end
50
+ end
51
+
52
+ FileUtils.mkdir_p directory
53
+ urls = Array.new
54
+ imgs.each do |img|
55
+ fullurl = img["full"]["url"]
56
+ uri = URI.parse(fullurl)
57
+ imgname = uri.path.split("/")[-1]
58
+ imgdata = self.fetch_one(fullurl)
59
+ open(directory + "/" + imgname, "wb") do |file|
60
+ file.write imgdata
61
+ end
62
+ urls.push(directory + "/" + imgname)
63
+ end
64
+
65
+ return urls
66
+ end
67
+
68
+ def self.process(options)
69
+ email = options.fetch('email')
70
+ pass = options.fetch('password')
71
+ api_token = options.fetch('api_token')
72
+
73
+ @email, @pass, @api_token = email, pass, api_token
74
+ defaults = { :include_imgs => false, :blog => 'primary', :base_path => '/' }
75
+ opts = defaults.merge(opts)
76
+ FileUtils.mkdir_p "_posts"
77
+
78
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}").body)
79
+ page = 1
80
+
81
+ while posts.any?
82
+ posts.each do |post|
83
+ title = post["title"]
84
+ slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
85
+ date = Date.parse(post["display_date"])
86
+ content = post["body_html"]
87
+ published = !post["is_private"]
88
+ basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
89
+ name = basename + '.html'
90
+
91
+ # Images:
92
+ if opts[:include_imgs]
93
+ post_imgs = post["media"]["images"]
94
+ if post_imgs.any?
95
+ img_dir = "imgs/%s" % basename
96
+ img_urls = self.fetch_images(img_dir, post_imgs)
97
+
98
+ img_urls.map! do |url|
99
+ '<li><img src="' + opts[:base_path] + url + '"></li>'
100
+ end
101
+ imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
102
+
103
+ # filter out "posterous-content", replacing with imgs:
104
+ content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
105
+ end
106
+ end
107
+
108
+ # Get the relevant fields as a hash, delete empty fields and convert
109
+ # to YAML for the header
110
+ data = {
111
+ 'layout' => 'post',
112
+ 'title' => title.to_s,
113
+ 'published' => published
114
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
115
+
116
+ # Write out the data and content to file
117
+ File.open("_posts/#{name}", "w") do |f|
118
+ f.puts data
119
+ f.puts "---"
120
+ f.puts content
121
+ end
122
+ end
123
+
124
+ page += 1
125
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}&page=#{page}").body)
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end