jekyll-import 0.1.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,67 @@
1
+ # Author: Toby DiPasquale <toby@cbcg.net>
2
+ require 'fileutils'
3
+ require 'rubygems'
4
+ require 'sequel'
5
+ require 'safe_yaml'
6
+
7
+ module JekyllImport
8
+ module Typo
9
+ # This SQL *should* work for both MySQL and PostgreSQL.
10
+ SQL = <<-EOS
11
+ SELECT c.id id,
12
+ c.title title,
13
+ c.permalink slug,
14
+ c.body body,
15
+ c.extended extended,
16
+ c.published_at date,
17
+ c.state state,
18
+ COALESCE(tf.name, 'html') filter
19
+ FROM contents c
20
+ LEFT OUTER JOIN text_filters tf
21
+ ON c.text_filter_id = tf.id
22
+ EOS
23
+
24
+ def self.process server, dbname, user, pass, host='localhost'
25
+ FileUtils.mkdir_p '_posts'
26
+ case server.intern
27
+ when :postgres
28
+ db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
29
+ when :mysql
30
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
31
+ else
32
+ raise "Unknown database server '#{server}'"
33
+ end
34
+ db[SQL].each do |post|
35
+ next unless post[:state] =~ /published/
36
+
37
+ if post[:slug] == nil
38
+ post[:slug] = "no slug"
39
+ end
40
+
41
+ if post[:extended]
42
+ post[:body] << "\n<!-- more -->\n"
43
+ post[:body] << post[:extended]
44
+ end
45
+
46
+ name = [ sprintf("%.04d", post[:date].year),
47
+ sprintf("%.02d", post[:date].month),
48
+ sprintf("%.02d", post[:date].day),
49
+ post[:slug].strip ].join('-')
50
+
51
+ # Can have more than one text filter in this field, but we just want
52
+ # the first one for this.
53
+ name += '.' + post[:filter].split(' ')[0]
54
+
55
+ File.open("_posts/#{name}", 'w') do |f|
56
+ f.puts({ 'layout' => 'post',
57
+ 'title' => post[:title].to_s,
58
+ 'typo_id' => post[:id]
59
+ }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
60
+ f.puts '---'
61
+ f.puts post[:body].delete("\r")
62
+ end
63
+ end
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,296 @@
1
+ require 'rubygems'
2
+ require 'sequel'
3
+ require 'fileutils'
4
+ require 'psych'
5
+ require 'safe_yaml'
6
+
7
+ # NOTE: This converter requires Sequel and the MySQL gems.
8
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
9
+ # installed, running the following commands should work:
10
+ # $ sudo gem install sequel
11
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
12
+
13
+ module JekyllImport
14
+ module WordPress
15
+
16
+ # Main migrator function. Call this to perform the migration.
17
+ #
18
+ # dbname:: The name of the database
19
+ # user:: The database user name
20
+ # pass:: The database user's password
21
+ # host:: The address of the MySQL database host. Default: 'localhost'
22
+ # options:: A hash table of configuration options.
23
+ #
24
+ # Supported options are:
25
+ #
26
+ # :table_prefix:: Prefix of database tables used by WordPress.
27
+ # Default: 'wp_'
28
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
29
+ # entities in the posts, comments, titles, and
30
+ # names. Requires the 'htmlentities' gem to
31
+ # work. Default: true.
32
+ # :comments:: If true, migrate post comments too. Comments
33
+ # are saved in the post's YAML front matter.
34
+ # Default: true.
35
+ # :categories:: If true, save the post's categories in its
36
+ # YAML front matter.
37
+ # :tags:: If true, save the post's tags in its
38
+ # YAML front matter.
39
+ # :more_excerpt:: If true, when a post has no excerpt but
40
+ # does have a <!-- more --> tag, use the
41
+ # preceding post content as the excerpt.
42
+ # Default: true.
43
+ # :more_anchor:: If true, convert a <!-- more --> tag into
44
+ # two HTML anchors with ids "more" and
45
+ # "more-NNN" (where NNN is the post number).
46
+ # Default: true.
47
+ # :status:: Array of allowed post statuses. Only
48
+ # posts with matching status will be migrated.
49
+ # Known statuses are :publish, :draft, :private,
50
+ # and :revision. If this is nil or an empty
51
+ # array, all posts are migrated regardless of
52
+ # status. Default: [:publish].
53
+ #
54
+ def self.process(dbname, user, pass, host='localhost', options={})
55
+ options = {
56
+ :table_prefix => 'wp_',
57
+ :clean_entities => true,
58
+ :comments => true,
59
+ :categories => true,
60
+ :tags => true,
61
+ :more_excerpt => true,
62
+ :more_anchor => true,
63
+ :status => [:publish] # :draft, :private, :revision
64
+ }.merge(options)
65
+
66
+ if options[:clean_entities]
67
+ begin
68
+ require 'htmlentities'
69
+ rescue LoadError
70
+ STDERR.puts "Could not require 'htmlentities', so the " +
71
+ ":clean_entities option is now disabled."
72
+ options[:clean_entities] = false
73
+ end
74
+ end
75
+
76
+ FileUtils.mkdir_p("_posts")
77
+
78
+ db = Sequel.mysql(dbname, :user => user, :password => pass,
79
+ :host => host, :encoding => 'utf8')
80
+
81
+ px = options[:table_prefix]
82
+
83
+ posts_query = "
84
+ SELECT
85
+ posts.ID AS `id`,
86
+ posts.guid AS `guid`,
87
+ posts.post_type AS `type`,
88
+ posts.post_status AS `status`,
89
+ posts.post_title AS `title`,
90
+ posts.post_name AS `slug`,
91
+ posts.post_date AS `date`,
92
+ posts.post_content AS `content`,
93
+ posts.post_excerpt AS `excerpt`,
94
+ posts.comment_count AS `comment_count`,
95
+ users.display_name AS `author`,
96
+ users.user_login AS `author_login`,
97
+ users.user_email AS `author_email`,
98
+ users.user_url AS `author_url`
99
+ FROM #{px}posts AS `posts`
100
+ LEFT JOIN #{px}users AS `users`
101
+ ON posts.post_author = users.ID"
102
+
103
+ if options[:status] and not options[:status].empty?
104
+ status = options[:status][0]
105
+ posts_query << "
106
+ WHERE posts.post_status = '#{status.to_s}'"
107
+ options[:status][1..-1].each do |status|
108
+ posts_query << " OR
109
+ posts.post_status = '#{status.to_s}'"
110
+ end
111
+ end
112
+
113
+ db[posts_query].each do |post|
114
+ process_post(post, db, options)
115
+ end
116
+ end
117
+
118
+
119
+ def self.process_post(post, db, options)
120
+ px = options[:table_prefix]
121
+
122
+ title = post[:title]
123
+ if options[:clean_entities]
124
+ title = clean_entities(title)
125
+ end
126
+
127
+ slug = post[:slug]
128
+ if !slug or slug.empty?
129
+ slug = sluggify(title)
130
+ end
131
+
132
+ date = post[:date] || Time.now
133
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
134
+ date.day, slug]
135
+ content = post[:content].to_s
136
+ if options[:clean_entities]
137
+ content = clean_entities(content)
138
+ end
139
+
140
+ excerpt = post[:excerpt].to_s
141
+
142
+ more_index = content.index(/<!-- *more *-->/)
143
+ more_anchor = nil
144
+ if more_index
145
+ if options[:more_excerpt] and
146
+ (post[:excerpt].nil? or post[:excerpt].empty?)
147
+ excerpt = content[0...more_index]
148
+ end
149
+ if options[:more_anchor]
150
+ more_link = "more"
151
+ content.sub!(/<!-- *more *-->/,
152
+ "<a id=\"more\"></a>" +
153
+ "<a id=\"more-#{post[:id]}\"></a>")
154
+ end
155
+ end
156
+
157
+ categories = []
158
+ tags = []
159
+
160
+ if options[:categories] or options[:tags]
161
+
162
+ cquery =
163
+ "SELECT
164
+ terms.name AS `name`,
165
+ ttax.taxonomy AS `type`
166
+ FROM
167
+ #{px}terms AS `terms`,
168
+ #{px}term_relationships AS `trels`,
169
+ #{px}term_taxonomy AS `ttax`
170
+ WHERE
171
+ trels.object_id = '#{post[:id]}' AND
172
+ trels.term_taxonomy_id = ttax.term_taxonomy_id AND
173
+ terms.term_id = ttax.term_id"
174
+
175
+ db[cquery].each do |term|
176
+ if options[:categories] and term[:type] == "category"
177
+ if options[:clean_entities]
178
+ categories << clean_entities(term[:name])
179
+ else
180
+ categories << term[:name]
181
+ end
182
+ elsif options[:tags] and term[:type] == "post_tag"
183
+ if options[:clean_entities]
184
+ tags << clean_entities(term[:name])
185
+ else
186
+ tags << term[:name]
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ comments = []
193
+
194
+ if options[:comments] and post[:comment_count].to_i > 0
195
+ cquery =
196
+ "SELECT
197
+ comment_ID AS `id`,
198
+ comment_author AS `author`,
199
+ comment_author_email AS `author_email`,
200
+ comment_author_url AS `author_url`,
201
+ comment_date AS `date`,
202
+ comment_date_gmt AS `date_gmt`,
203
+ comment_content AS `content`
204
+ FROM #{px}comments
205
+ WHERE
206
+ comment_post_ID = '#{post[:id]}' AND
207
+ comment_approved != 'spam'"
208
+
209
+
210
+ db[cquery].each do |comment|
211
+
212
+ comcontent = comment[:content].to_s
213
+ if comcontent.respond_to?(:force_encoding)
214
+ comcontent.force_encoding("UTF-8")
215
+ end
216
+ if options[:clean_entities]
217
+ comcontent = clean_entities(comcontent)
218
+ end
219
+ comauthor = comment[:author].to_s
220
+ if options[:clean_entities]
221
+ comauthor = clean_entities(comauthor)
222
+ end
223
+
224
+ comments << {
225
+ 'id' => comment[:id].to_i,
226
+ 'author' => comauthor,
227
+ 'author_email' => comment[:author_email].to_s,
228
+ 'author_url' => comment[:author_url].to_s,
229
+ 'date' => comment[:date].to_s,
230
+ 'date_gmt' => comment[:date_gmt].to_s,
231
+ 'content' => comcontent,
232
+ }
233
+ end
234
+
235
+ comments.sort!{ |a,b| a['id'] <=> b['id'] }
236
+ end
237
+
238
+ # Get the relevant fields as a hash, delete empty fields and
239
+ # convert to YAML for the header.
240
+ data = {
241
+ 'layout' => post[:type].to_s,
242
+ 'status' => post[:status].to_s,
243
+ 'published' => (post[:status].to_s == "publish"),
244
+ 'title' => title.to_s,
245
+ 'author' => post[:author].to_s,
246
+ 'author_login' => post[:author_login].to_s,
247
+ 'author_email' => post[:author_email].to_s,
248
+ 'author_url' => post[:author_url].to_s,
249
+ 'excerpt' => excerpt,
250
+ 'more_anchor' => more_anchor,
251
+ 'wordpress_id' => post[:id],
252
+ 'wordpress_url' => post[:guid].to_s,
253
+ 'date' => date,
254
+ 'categories' => options[:categories] ? categories : nil,
255
+ 'tags' => options[:tags] ? tags : nil,
256
+ 'comments' => options[:comments] ? comments : nil,
257
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
258
+
259
+ # Write out the data and content to file
260
+ File.open("_posts/#{name}", "w") do |f|
261
+ f.puts data
262
+ f.puts "---"
263
+ f.puts content
264
+ end
265
+ end
266
+
267
+
268
+ def self.clean_entities( text )
269
+ if text.respond_to?(:force_encoding)
270
+ text.force_encoding("UTF-8")
271
+ end
272
+ text = HTMLEntities.new.encode(text, :named)
273
+ # We don't want to convert these, it would break all
274
+ # HTML tags in the post and comments.
275
+ text.gsub!("&amp;", "&")
276
+ text.gsub!("&lt;", "<")
277
+ text.gsub!("&gt;", ">")
278
+ text.gsub!("&quot;", '"')
279
+ text.gsub!("&apos;", "'")
280
+ text.gsub!("/", "&#47;")
281
+ text
282
+ end
283
+
284
+
285
+ def self.sluggify( title )
286
+ begin
287
+ require 'unidecode'
288
+ title = title.to_ascii
289
+ rescue LoadError
290
+ STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
291
+ end
292
+ title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
293
+ end
294
+
295
+ end
296
+ end
@@ -0,0 +1,82 @@
1
+ # coding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'hpricot'
5
+ require 'fileutils'
6
+ require 'safe_yaml'
7
+ require 'time'
8
+
9
+ module JekyllImport
10
+ # This importer takes a wordpress.xml file, which can be exported from your
11
+ # wordpress.com blog (/wp-admin/export.php).
12
+ module WordpressDotCom
13
+ def self.process(filename = "wordpress.xml")
14
+ import_count = Hash.new(0)
15
+ doc = Hpricot::XML(File.read(filename))
16
+
17
+ (doc/:channel/:item).each do |item|
18
+ title = item.at(:title).inner_text.strip
19
+ permalink_title = item.at('wp:post_name').inner_text
20
+ # Fallback to "prettified" title if post_name is empty (can happen)
21
+ if permalink_title == ""
22
+ permalink_title = sluggify(title)
23
+ end
24
+
25
+ date = Time.parse(item.at('wp:post_date').inner_text)
26
+ status = item.at('wp:status').inner_text
27
+
28
+ if status == "publish"
29
+ published = true
30
+ else
31
+ published = false
32
+ end
33
+
34
+ type = item.at('wp:post_type').inner_text
35
+ tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
36
+
37
+ metas = Hash.new
38
+ item.search("wp:postmeta").each do |meta|
39
+ key = meta.at('wp:meta_key').inner_text
40
+ value = meta.at('wp:meta_value').inner_text
41
+ metas[key] = value;
42
+ end
43
+
44
+ name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
45
+ header = {
46
+ 'layout' => type,
47
+ 'title' => title,
48
+ 'tags' => tags,
49
+ 'status' => status,
50
+ 'type' => type,
51
+ 'published' => published,
52
+ 'meta' => metas
53
+ }
54
+
55
+ begin
56
+ FileUtils.mkdir_p "_#{type}s"
57
+ File.open("_#{type}s/#{name}", "w") do |f|
58
+ f.puts header.to_yaml
59
+ f.puts '---'
60
+ f.puts item.at('content:encoded').inner_text
61
+ end
62
+ rescue => e
63
+ puts "Couldn't import post!"
64
+ puts "Title: #{title}"
65
+ puts "Name/Slug: #{name}\n"
66
+ puts "Error: #{e.message}"
67
+ next
68
+ end
69
+
70
+ import_count[type] += 1
71
+ end
72
+
73
+ import_count.each do |key, value|
74
+ puts "Imported #{value} #{key}s"
75
+ end
76
+ end
77
+
78
+ def self.sluggify(title)
79
+ title.gsub(/[^[:alnum:]]+/, '-').downcase
80
+ end
81
+ end
82
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,43 @@
1
+ if RUBY_VERSION > '1.9' && ENV["COVERAGE"] == "true"
2
+ require 'simplecov'
3
+ require 'simplecov-gem-adapter'
4
+ SimpleCov.start('gem')
5
+ end
6
+
7
+ require 'test/unit'
8
+ require 'redgreen' if RUBY_VERSION < '1.9'
9
+ require 'shoulda'
10
+ require 'rr'
11
+
12
+ Dir.glob(File.expand_path('../../lib/jekyll/jekyll-import/*', __FILE__)).each do |f|
13
+ require f
14
+ end
15
+
16
+ # Send STDERR into the void to suppress program output messages
17
+ STDERR.reopen(test(?e, '/dev/null') ? '/dev/null' : 'NUL:')
18
+
19
+ class Test::Unit::TestCase
20
+ include RR::Adapters::TestUnit
21
+
22
+ def dest_dir(*subdirs)
23
+ File.join(File.dirname(__FILE__), 'dest', *subdirs)
24
+ end
25
+
26
+ def source_dir(*subdirs)
27
+ File.join(File.dirname(__FILE__), 'source', *subdirs)
28
+ end
29
+
30
+ def clear_dest
31
+ FileUtils.rm_rf(dest_dir)
32
+ end
33
+
34
+ def capture_stdout
35
+ $old_stdout = $stdout
36
+ $stdout = StringIO.new
37
+ yield
38
+ $stdout.rewind
39
+ return $stdout.string
40
+ ensure
41
+ $stdout = $old_stdout
42
+ end
43
+ end