jekyll-import 0.1.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ # Author: Toby DiPasquale <toby@cbcg.net>
2
+ require 'fileutils'
3
+ require 'rubygems'
4
+ require 'sequel'
5
+ require 'safe_yaml'
6
+
7
+ module JekyllImport
8
+ module Typo
9
+ # This SQL *should* work for both MySQL and PostgreSQL.
10
+ SQL = <<-EOS
11
+ SELECT c.id id,
12
+ c.title title,
13
+ c.permalink slug,
14
+ c.body body,
15
+ c.extended extended,
16
+ c.published_at date,
17
+ c.state state,
18
+ COALESCE(tf.name, 'html') filter
19
+ FROM contents c
20
+ LEFT OUTER JOIN text_filters tf
21
+ ON c.text_filter_id = tf.id
22
+ EOS
23
+
24
+ def self.process server, dbname, user, pass, host='localhost'
25
+ FileUtils.mkdir_p '_posts'
26
+ case server.intern
27
+ when :postgres
28
+ db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
29
+ when :mysql
30
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
31
+ else
32
+ raise "Unknown database server '#{server}'"
33
+ end
34
+ db[SQL].each do |post|
35
+ next unless post[:state] =~ /published/
36
+
37
+ if post[:slug] == nil
38
+ post[:slug] = "no slug"
39
+ end
40
+
41
+ if post[:extended]
42
+ post[:body] << "\n<!-- more -->\n"
43
+ post[:body] << post[:extended]
44
+ end
45
+
46
+ name = [ sprintf("%.04d", post[:date].year),
47
+ sprintf("%.02d", post[:date].month),
48
+ sprintf("%.02d", post[:date].day),
49
+ post[:slug].strip ].join('-')
50
+
51
+ # Can have more than one text filter in this field, but we just want
52
+ # the first one for this.
53
+ name += '.' + post[:filter].split(' ')[0]
54
+
55
+ File.open("_posts/#{name}", 'w') do |f|
56
+ f.puts({ 'layout' => 'post',
57
+ 'title' => post[:title].to_s,
58
+ 'typo_id' => post[:id]
59
+ }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
60
+ f.puts '---'
61
+ f.puts post[:body].delete("\r")
62
+ end
63
+ end
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,296 @@
1
+ require 'rubygems'
2
+ require 'sequel'
3
+ require 'fileutils'
4
+ require 'psych'
5
+ require 'safe_yaml'
6
+
7
+ # NOTE: This converter requires Sequel and the MySQL gems.
8
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
9
+ # installed, running the following commands should work:
10
+ # $ sudo gem install sequel
11
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
12
+
13
+ module JekyllImport
14
+ module WordPress
15
+
16
+ # Main migrator function. Call this to perform the migration.
17
+ #
18
+ # dbname:: The name of the database
19
+ # user:: The database user name
20
+ # pass:: The database user's password
21
+ # host:: The address of the MySQL database host. Default: 'localhost'
22
+ # options:: A hash table of configuration options.
23
+ #
24
+ # Supported options are:
25
+ #
26
+ # :table_prefix:: Prefix of database tables used by WordPress.
27
+ # Default: 'wp_'
28
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
29
+ # entities in the posts, comments, titles, and
30
+ # names. Requires the 'htmlentities' gem to
31
+ # work. Default: true.
32
+ # :comments:: If true, migrate post comments too. Comments
33
+ # are saved in the post's YAML front matter.
34
+ # Default: true.
35
+ # :categories:: If true, save the post's categories in its
36
+ # YAML front matter.
37
+ # :tags:: If true, save the post's tags in its
38
+ # YAML front matter.
39
+ # :more_excerpt:: If true, when a post has no excerpt but
40
+ # does have a <!-- more --> tag, use the
41
+ # preceding post content as the excerpt.
42
+ # Default: true.
43
+ # :more_anchor:: If true, convert a <!-- more --> tag into
44
+ # two HTML anchors with ids "more" and
45
+ # "more-NNN" (where NNN is the post number).
46
+ # Default: true.
47
+ # :status:: Array of allowed post statuses. Only
48
+ # posts with matching status will be migrated.
49
+ # Known statuses are :publish, :draft, :private,
50
+ # and :revision. If this is nil or an empty
51
+ # array, all posts are migrated regardless of
52
+ # status. Default: [:publish].
53
+ #
54
+ def self.process(dbname, user, pass, host='localhost', options={})
55
+ options = {
56
+ :table_prefix => 'wp_',
57
+ :clean_entities => true,
58
+ :comments => true,
59
+ :categories => true,
60
+ :tags => true,
61
+ :more_excerpt => true,
62
+ :more_anchor => true,
63
+ :status => [:publish] # :draft, :private, :revision
64
+ }.merge(options)
65
+
66
+ if options[:clean_entities]
67
+ begin
68
+ require 'htmlentities'
69
+ rescue LoadError
70
+ STDERR.puts "Could not require 'htmlentities', so the " +
71
+ ":clean_entities option is now disabled."
72
+ options[:clean_entities] = false
73
+ end
74
+ end
75
+
76
+ FileUtils.mkdir_p("_posts")
77
+
78
+ db = Sequel.mysql(dbname, :user => user, :password => pass,
79
+ :host => host, :encoding => 'utf8')
80
+
81
+ px = options[:table_prefix]
82
+
83
+ posts_query = "
84
+ SELECT
85
+ posts.ID AS `id`,
86
+ posts.guid AS `guid`,
87
+ posts.post_type AS `type`,
88
+ posts.post_status AS `status`,
89
+ posts.post_title AS `title`,
90
+ posts.post_name AS `slug`,
91
+ posts.post_date AS `date`,
92
+ posts.post_content AS `content`,
93
+ posts.post_excerpt AS `excerpt`,
94
+ posts.comment_count AS `comment_count`,
95
+ users.display_name AS `author`,
96
+ users.user_login AS `author_login`,
97
+ users.user_email AS `author_email`,
98
+ users.user_url AS `author_url`
99
+ FROM #{px}posts AS `posts`
100
+ LEFT JOIN #{px}users AS `users`
101
+ ON posts.post_author = users.ID"
102
+
103
+ if options[:status] and not options[:status].empty?
104
+ status = options[:status][0]
105
+ posts_query << "
106
+ WHERE posts.post_status = '#{status.to_s}'"
107
+ options[:status][1..-1].each do |status|
108
+ posts_query << " OR
109
+ posts.post_status = '#{status.to_s}'"
110
+ end
111
+ end
112
+
113
+ db[posts_query].each do |post|
114
+ process_post(post, db, options)
115
+ end
116
+ end
117
+
118
+
119
+ def self.process_post(post, db, options)
120
+ px = options[:table_prefix]
121
+
122
+ title = post[:title]
123
+ if options[:clean_entities]
124
+ title = clean_entities(title)
125
+ end
126
+
127
+ slug = post[:slug]
128
+ if !slug or slug.empty?
129
+ slug = sluggify(title)
130
+ end
131
+
132
+ date = post[:date] || Time.now
133
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
134
+ date.day, slug]
135
+ content = post[:content].to_s
136
+ if options[:clean_entities]
137
+ content = clean_entities(content)
138
+ end
139
+
140
+ excerpt = post[:excerpt].to_s
141
+
142
+ more_index = content.index(/<!-- *more *-->/)
143
+ more_anchor = nil
144
+ if more_index
145
+ if options[:more_excerpt] and
146
+ (post[:excerpt].nil? or post[:excerpt].empty?)
147
+ excerpt = content[0...more_index]
148
+ end
149
+ if options[:more_anchor]
150
+ more_link = "more"
151
+ content.sub!(/<!-- *more *-->/,
152
+ "<a id=\"more\"></a>" +
153
+ "<a id=\"more-#{post[:id]}\"></a>")
154
+ end
155
+ end
156
+
157
+ categories = []
158
+ tags = []
159
+
160
+ if options[:categories] or options[:tags]
161
+
162
+ cquery =
163
+ "SELECT
164
+ terms.name AS `name`,
165
+ ttax.taxonomy AS `type`
166
+ FROM
167
+ #{px}terms AS `terms`,
168
+ #{px}term_relationships AS `trels`,
169
+ #{px}term_taxonomy AS `ttax`
170
+ WHERE
171
+ trels.object_id = '#{post[:id]}' AND
172
+ trels.term_taxonomy_id = ttax.term_taxonomy_id AND
173
+ terms.term_id = ttax.term_id"
174
+
175
+ db[cquery].each do |term|
176
+ if options[:categories] and term[:type] == "category"
177
+ if options[:clean_entities]
178
+ categories << clean_entities(term[:name])
179
+ else
180
+ categories << term[:name]
181
+ end
182
+ elsif options[:tags] and term[:type] == "post_tag"
183
+ if options[:clean_entities]
184
+ tags << clean_entities(term[:name])
185
+ else
186
+ tags << term[:name]
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ comments = []
193
+
194
+ if options[:comments] and post[:comment_count].to_i > 0
195
+ cquery =
196
+ "SELECT
197
+ comment_ID AS `id`,
198
+ comment_author AS `author`,
199
+ comment_author_email AS `author_email`,
200
+ comment_author_url AS `author_url`,
201
+ comment_date AS `date`,
202
+ comment_date_gmt AS `date_gmt`,
203
+ comment_content AS `content`
204
+ FROM #{px}comments
205
+ WHERE
206
+ comment_post_ID = '#{post[:id]}' AND
207
+ comment_approved != 'spam'"
208
+
209
+
210
+ db[cquery].each do |comment|
211
+
212
+ comcontent = comment[:content].to_s
213
+ if comcontent.respond_to?(:force_encoding)
214
+ comcontent.force_encoding("UTF-8")
215
+ end
216
+ if options[:clean_entities]
217
+ comcontent = clean_entities(comcontent)
218
+ end
219
+ comauthor = comment[:author].to_s
220
+ if options[:clean_entities]
221
+ comauthor = clean_entities(comauthor)
222
+ end
223
+
224
+ comments << {
225
+ 'id' => comment[:id].to_i,
226
+ 'author' => comauthor,
227
+ 'author_email' => comment[:author_email].to_s,
228
+ 'author_url' => comment[:author_url].to_s,
229
+ 'date' => comment[:date].to_s,
230
+ 'date_gmt' => comment[:date_gmt].to_s,
231
+ 'content' => comcontent,
232
+ }
233
+ end
234
+
235
+ comments.sort!{ |a,b| a['id'] <=> b['id'] }
236
+ end
237
+
238
+ # Get the relevant fields as a hash, delete empty fields and
239
+ # convert to YAML for the header.
240
+ data = {
241
+ 'layout' => post[:type].to_s,
242
+ 'status' => post[:status].to_s,
243
+ 'published' => (post[:status].to_s == "publish"),
244
+ 'title' => title.to_s,
245
+ 'author' => post[:author].to_s,
246
+ 'author_login' => post[:author_login].to_s,
247
+ 'author_email' => post[:author_email].to_s,
248
+ 'author_url' => post[:author_url].to_s,
249
+ 'excerpt' => excerpt,
250
+ 'more_anchor' => more_anchor,
251
+ 'wordpress_id' => post[:id],
252
+ 'wordpress_url' => post[:guid].to_s,
253
+ 'date' => date,
254
+ 'categories' => options[:categories] ? categories : nil,
255
+ 'tags' => options[:tags] ? tags : nil,
256
+ 'comments' => options[:comments] ? comments : nil,
257
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
258
+
259
+ # Write out the data and content to file
260
+ File.open("_posts/#{name}", "w") do |f|
261
+ f.puts data
262
+ f.puts "---"
263
+ f.puts content
264
+ end
265
+ end
266
+
267
+
268
+ def self.clean_entities( text )
269
+ if text.respond_to?(:force_encoding)
270
+ text.force_encoding("UTF-8")
271
+ end
272
+ text = HTMLEntities.new.encode(text, :named)
273
+ # We don't want to convert these, it would break all
274
+ # HTML tags in the post and comments.
275
+ text.gsub!("&amp;", "&")
276
+ text.gsub!("&lt;", "<")
277
+ text.gsub!("&gt;", ">")
278
+ text.gsub!("&quot;", '"')
279
+ text.gsub!("&apos;", "'")
280
+ text.gsub!("/", "&#47;")
281
+ text
282
+ end
283
+
284
+
285
+ def self.sluggify( title )
286
+ begin
287
+ require 'unidecode'
288
+ title = title.to_ascii
289
+ rescue LoadError
290
+ STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
291
+ end
292
+ title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
293
+ end
294
+
295
+ end
296
+ end
@@ -0,0 +1,82 @@
1
+ # coding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'hpricot'
5
+ require 'fileutils'
6
+ require 'safe_yaml'
7
+ require 'time'
8
+
9
+ module JekyllImport
10
+ # This importer takes a wordpress.xml file, which can be exported from your
11
+ # wordpress.com blog (/wp-admin/export.php).
12
+ module WordpressDotCom
13
+ def self.process(filename = "wordpress.xml")
14
+ import_count = Hash.new(0)
15
+ doc = Hpricot::XML(File.read(filename))
16
+
17
+ (doc/:channel/:item).each do |item|
18
+ title = item.at(:title).inner_text.strip
19
+ permalink_title = item.at('wp:post_name').inner_text
20
+ # Fallback to "prettified" title if post_name is empty (can happen)
21
+ if permalink_title == ""
22
+ permalink_title = sluggify(title)
23
+ end
24
+
25
+ date = Time.parse(item.at('wp:post_date').inner_text)
26
+ status = item.at('wp:status').inner_text
27
+
28
+ if status == "publish"
29
+ published = true
30
+ else
31
+ published = false
32
+ end
33
+
34
+ type = item.at('wp:post_type').inner_text
35
+ tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
36
+
37
+ metas = Hash.new
38
+ item.search("wp:postmeta").each do |meta|
39
+ key = meta.at('wp:meta_key').inner_text
40
+ value = meta.at('wp:meta_value').inner_text
41
+ metas[key] = value;
42
+ end
43
+
44
+ name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
45
+ header = {
46
+ 'layout' => type,
47
+ 'title' => title,
48
+ 'tags' => tags,
49
+ 'status' => status,
50
+ 'type' => type,
51
+ 'published' => published,
52
+ 'meta' => metas
53
+ }
54
+
55
+ begin
56
+ FileUtils.mkdir_p "_#{type}s"
57
+ File.open("_#{type}s/#{name}", "w") do |f|
58
+ f.puts header.to_yaml
59
+ f.puts '---'
60
+ f.puts item.at('content:encoded').inner_text
61
+ end
62
+ rescue => e
63
+ puts "Couldn't import post!"
64
+ puts "Title: #{title}"
65
+ puts "Name/Slug: #{name}\n"
66
+ puts "Error: #{e.message}"
67
+ next
68
+ end
69
+
70
+ import_count[type] += 1
71
+ end
72
+
73
+ import_count.each do |key, value|
74
+ puts "Imported #{value} #{key}s"
75
+ end
76
+ end
77
+
78
+ def self.sluggify(title)
79
+ title.gsub(/[^[:alnum:]]+/, '-').downcase
80
+ end
81
+ end
82
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,43 @@
1
+ if RUBY_VERSION > '1.9' && ENV["COVERAGE"] == "true"
2
+ require 'simplecov'
3
+ require 'simplecov-gem-adapter'
4
+ SimpleCov.start('gem')
5
+ end
6
+
7
+ require 'test/unit'
8
+ require 'redgreen' if RUBY_VERSION < '1.9'
9
+ require 'shoulda'
10
+ require 'rr'
11
+
12
+ Dir.glob(File.expand_path('../../lib/jekyll/jekyll-import/*', __FILE__)).each do |f|
13
+ require f
14
+ end
15
+
16
+ # Send STDERR into the void to suppress program output messages
17
+ STDERR.reopen(test(?e, '/dev/null') ? '/dev/null' : 'NUL:')
18
+
19
+ class Test::Unit::TestCase
20
+ include RR::Adapters::TestUnit
21
+
22
+ def dest_dir(*subdirs)
23
+ File.join(File.dirname(__FILE__), 'dest', *subdirs)
24
+ end
25
+
26
+ def source_dir(*subdirs)
27
+ File.join(File.dirname(__FILE__), 'source', *subdirs)
28
+ end
29
+
30
+ def clear_dest
31
+ FileUtils.rm_rf(dest_dir)
32
+ end
33
+
34
+ def capture_stdout
35
+ $old_stdout = $stdout
36
+ $stdout = StringIO.new
37
+ yield
38
+ $stdout.rewind
39
+ return $stdout.string
40
+ ensure
41
+ $stdout = $old_stdout
42
+ end
43
+ end