jekyll-import 0.1.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/History.txt +7 -0
- data/LICENSE +21 -0
- data/README.md +4 -0
- data/Rakefile +151 -0
- data/jekyll-import.gemspec +80 -0
- data/lib/jekyll-import.rb +7 -0
- data/lib/jekyll/commands/import.rb +51 -0
- data/lib/jekyll/jekyll-import/csv.rb +26 -0
- data/lib/jekyll/jekyll-import/drupal6.rb +102 -0
- data/lib/jekyll/jekyll-import/drupal7.rb +73 -0
- data/lib/jekyll/jekyll-import/enki.rb +49 -0
- data/lib/jekyll/jekyll-import/joomla.rb +53 -0
- data/lib/jekyll/jekyll-import/marley.rb +52 -0
- data/lib/jekyll/jekyll-import/mephisto.rb +84 -0
- data/lib/jekyll/jekyll-import/mt.rb +142 -0
- data/lib/jekyll/jekyll-import/posterous.rb +111 -0
- data/lib/jekyll/jekyll-import/rss.rb +63 -0
- data/lib/jekyll/jekyll-import/s9y.rb +49 -0
- data/lib/jekyll/jekyll-import/textpattern.rb +58 -0
- data/lib/jekyll/jekyll-import/tumblr.rb +195 -0
- data/lib/jekyll/jekyll-import/typo.rb +67 -0
- data/lib/jekyll/jekyll-import/wordpress.rb +296 -0
- data/lib/jekyll/jekyll-import/wordpressdotcom.rb +82 -0
- data/test/helper.rb +43 -0
- data/test/test_mt_importer.rb +104 -0
- data/test/test_wordpress_importer.rb +9 -0
- data/test/test_wordpressdotcom_importer.rb +8 -0
- metadata +334 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
# Author: Toby DiPasquale <toby@cbcg.net>
|
2
|
+
require 'fileutils'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'sequel'
|
5
|
+
require 'safe_yaml'
|
6
|
+
|
7
|
+
module JekyllImport
|
8
|
+
module Typo
|
9
|
+
# This SQL *should* work for both MySQL and PostgreSQL.
|
10
|
+
SQL = <<-EOS
|
11
|
+
SELECT c.id id,
|
12
|
+
c.title title,
|
13
|
+
c.permalink slug,
|
14
|
+
c.body body,
|
15
|
+
c.extended extended,
|
16
|
+
c.published_at date,
|
17
|
+
c.state state,
|
18
|
+
COALESCE(tf.name, 'html') filter
|
19
|
+
FROM contents c
|
20
|
+
LEFT OUTER JOIN text_filters tf
|
21
|
+
ON c.text_filter_id = tf.id
|
22
|
+
EOS
|
23
|
+
|
24
|
+
def self.process server, dbname, user, pass, host='localhost'
|
25
|
+
FileUtils.mkdir_p '_posts'
|
26
|
+
case server.intern
|
27
|
+
when :postgres
|
28
|
+
db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
29
|
+
when :mysql
|
30
|
+
db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
|
31
|
+
else
|
32
|
+
raise "Unknown database server '#{server}'"
|
33
|
+
end
|
34
|
+
db[SQL].each do |post|
|
35
|
+
next unless post[:state] =~ /published/
|
36
|
+
|
37
|
+
if post[:slug] == nil
|
38
|
+
post[:slug] = "no slug"
|
39
|
+
end
|
40
|
+
|
41
|
+
if post[:extended]
|
42
|
+
post[:body] << "\n<!-- more -->\n"
|
43
|
+
post[:body] << post[:extended]
|
44
|
+
end
|
45
|
+
|
46
|
+
name = [ sprintf("%.04d", post[:date].year),
|
47
|
+
sprintf("%.02d", post[:date].month),
|
48
|
+
sprintf("%.02d", post[:date].day),
|
49
|
+
post[:slug].strip ].join('-')
|
50
|
+
|
51
|
+
# Can have more than one text filter in this field, but we just want
|
52
|
+
# the first one for this.
|
53
|
+
name += '.' + post[:filter].split(' ')[0]
|
54
|
+
|
55
|
+
File.open("_posts/#{name}", 'w') do |f|
|
56
|
+
f.puts({ 'layout' => 'post',
|
57
|
+
'title' => post[:title].to_s,
|
58
|
+
'typo_id' => post[:id]
|
59
|
+
}.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
|
60
|
+
f.puts '---'
|
61
|
+
f.puts post[:body].delete("\r")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,296 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'sequel'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'psych'
|
5
|
+
require 'safe_yaml'
|
6
|
+
|
7
|
+
# NOTE: This converter requires Sequel and the MySQL gems.
|
8
|
+
# The MySQL gem can be difficult to install on OS X. Once you have MySQL
|
9
|
+
# installed, running the following commands should work:
|
10
|
+
# $ sudo gem install sequel
|
11
|
+
# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
|
12
|
+
|
13
|
+
module JekyllImport
|
14
|
+
module WordPress
|
15
|
+
|
16
|
+
# Main migrator function. Call this to perform the migration.
|
17
|
+
#
|
18
|
+
# dbname:: The name of the database
|
19
|
+
# user:: The database user name
|
20
|
+
# pass:: The database user's password
|
21
|
+
# host:: The address of the MySQL database host. Default: 'localhost'
|
22
|
+
# options:: A hash table of configuration options.
|
23
|
+
#
|
24
|
+
# Supported options are:
|
25
|
+
#
|
26
|
+
# :table_prefix:: Prefix of database tables used by WordPress.
|
27
|
+
# Default: 'wp_'
|
28
|
+
# :clean_entities:: If true, convert non-ASCII characters to HTML
|
29
|
+
# entities in the posts, comments, titles, and
|
30
|
+
# names. Requires the 'htmlentities' gem to
|
31
|
+
# work. Default: true.
|
32
|
+
# :comments:: If true, migrate post comments too. Comments
|
33
|
+
# are saved in the post's YAML front matter.
|
34
|
+
# Default: true.
|
35
|
+
# :categories:: If true, save the post's categories in its
|
36
|
+
# YAML front matter.
|
37
|
+
# :tags:: If true, save the post's tags in its
|
38
|
+
# YAML front matter.
|
39
|
+
# :more_excerpt:: If true, when a post has no excerpt but
|
40
|
+
# does have a <!-- more --> tag, use the
|
41
|
+
# preceding post content as the excerpt.
|
42
|
+
# Default: true.
|
43
|
+
# :more_anchor:: If true, convert a <!-- more --> tag into
|
44
|
+
# two HTML anchors with ids "more" and
|
45
|
+
# "more-NNN" (where NNN is the post number).
|
46
|
+
# Default: true.
|
47
|
+
# :status:: Array of allowed post statuses. Only
|
48
|
+
# posts with matching status will be migrated.
|
49
|
+
# Known statuses are :publish, :draft, :private,
|
50
|
+
# and :revision. If this is nil or an empty
|
51
|
+
# array, all posts are migrated regardless of
|
52
|
+
# status. Default: [:publish].
|
53
|
+
#
|
54
|
+
def self.process(dbname, user, pass, host='localhost', options={})
|
55
|
+
options = {
|
56
|
+
:table_prefix => 'wp_',
|
57
|
+
:clean_entities => true,
|
58
|
+
:comments => true,
|
59
|
+
:categories => true,
|
60
|
+
:tags => true,
|
61
|
+
:more_excerpt => true,
|
62
|
+
:more_anchor => true,
|
63
|
+
:status => [:publish] # :draft, :private, :revision
|
64
|
+
}.merge(options)
|
65
|
+
|
66
|
+
if options[:clean_entities]
|
67
|
+
begin
|
68
|
+
require 'htmlentities'
|
69
|
+
rescue LoadError
|
70
|
+
STDERR.puts "Could not require 'htmlentities', so the " +
|
71
|
+
":clean_entities option is now disabled."
|
72
|
+
options[:clean_entities] = false
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
FileUtils.mkdir_p("_posts")
|
77
|
+
|
78
|
+
db = Sequel.mysql(dbname, :user => user, :password => pass,
|
79
|
+
:host => host, :encoding => 'utf8')
|
80
|
+
|
81
|
+
px = options[:table_prefix]
|
82
|
+
|
83
|
+
posts_query = "
|
84
|
+
SELECT
|
85
|
+
posts.ID AS `id`,
|
86
|
+
posts.guid AS `guid`,
|
87
|
+
posts.post_type AS `type`,
|
88
|
+
posts.post_status AS `status`,
|
89
|
+
posts.post_title AS `title`,
|
90
|
+
posts.post_name AS `slug`,
|
91
|
+
posts.post_date AS `date`,
|
92
|
+
posts.post_content AS `content`,
|
93
|
+
posts.post_excerpt AS `excerpt`,
|
94
|
+
posts.comment_count AS `comment_count`,
|
95
|
+
users.display_name AS `author`,
|
96
|
+
users.user_login AS `author_login`,
|
97
|
+
users.user_email AS `author_email`,
|
98
|
+
users.user_url AS `author_url`
|
99
|
+
FROM #{px}posts AS `posts`
|
100
|
+
LEFT JOIN #{px}users AS `users`
|
101
|
+
ON posts.post_author = users.ID"
|
102
|
+
|
103
|
+
if options[:status] and not options[:status].empty?
|
104
|
+
status = options[:status][0]
|
105
|
+
posts_query << "
|
106
|
+
WHERE posts.post_status = '#{status.to_s}'"
|
107
|
+
options[:status][1..-1].each do |status|
|
108
|
+
posts_query << " OR
|
109
|
+
posts.post_status = '#{status.to_s}'"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
db[posts_query].each do |post|
|
114
|
+
process_post(post, db, options)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
def self.process_post(post, db, options)
|
120
|
+
px = options[:table_prefix]
|
121
|
+
|
122
|
+
title = post[:title]
|
123
|
+
if options[:clean_entities]
|
124
|
+
title = clean_entities(title)
|
125
|
+
end
|
126
|
+
|
127
|
+
slug = post[:slug]
|
128
|
+
if !slug or slug.empty?
|
129
|
+
slug = sluggify(title)
|
130
|
+
end
|
131
|
+
|
132
|
+
date = post[:date] || Time.now
|
133
|
+
name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
|
134
|
+
date.day, slug]
|
135
|
+
content = post[:content].to_s
|
136
|
+
if options[:clean_entities]
|
137
|
+
content = clean_entities(content)
|
138
|
+
end
|
139
|
+
|
140
|
+
excerpt = post[:excerpt].to_s
|
141
|
+
|
142
|
+
more_index = content.index(/<!-- *more *-->/)
|
143
|
+
more_anchor = nil
|
144
|
+
if more_index
|
145
|
+
if options[:more_excerpt] and
|
146
|
+
(post[:excerpt].nil? or post[:excerpt].empty?)
|
147
|
+
excerpt = content[0...more_index]
|
148
|
+
end
|
149
|
+
if options[:more_anchor]
|
150
|
+
more_link = "more"
|
151
|
+
content.sub!(/<!-- *more *-->/,
|
152
|
+
"<a id=\"more\"></a>" +
|
153
|
+
"<a id=\"more-#{post[:id]}\"></a>")
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
categories = []
|
158
|
+
tags = []
|
159
|
+
|
160
|
+
if options[:categories] or options[:tags]
|
161
|
+
|
162
|
+
cquery =
|
163
|
+
"SELECT
|
164
|
+
terms.name AS `name`,
|
165
|
+
ttax.taxonomy AS `type`
|
166
|
+
FROM
|
167
|
+
#{px}terms AS `terms`,
|
168
|
+
#{px}term_relationships AS `trels`,
|
169
|
+
#{px}term_taxonomy AS `ttax`
|
170
|
+
WHERE
|
171
|
+
trels.object_id = '#{post[:id]}' AND
|
172
|
+
trels.term_taxonomy_id = ttax.term_taxonomy_id AND
|
173
|
+
terms.term_id = ttax.term_id"
|
174
|
+
|
175
|
+
db[cquery].each do |term|
|
176
|
+
if options[:categories] and term[:type] == "category"
|
177
|
+
if options[:clean_entities]
|
178
|
+
categories << clean_entities(term[:name])
|
179
|
+
else
|
180
|
+
categories << term[:name]
|
181
|
+
end
|
182
|
+
elsif options[:tags] and term[:type] == "post_tag"
|
183
|
+
if options[:clean_entities]
|
184
|
+
tags << clean_entities(term[:name])
|
185
|
+
else
|
186
|
+
tags << term[:name]
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
comments = []
|
193
|
+
|
194
|
+
if options[:comments] and post[:comment_count].to_i > 0
|
195
|
+
cquery =
|
196
|
+
"SELECT
|
197
|
+
comment_ID AS `id`,
|
198
|
+
comment_author AS `author`,
|
199
|
+
comment_author_email AS `author_email`,
|
200
|
+
comment_author_url AS `author_url`,
|
201
|
+
comment_date AS `date`,
|
202
|
+
comment_date_gmt AS `date_gmt`,
|
203
|
+
comment_content AS `content`
|
204
|
+
FROM #{px}comments
|
205
|
+
WHERE
|
206
|
+
comment_post_ID = '#{post[:id]}' AND
|
207
|
+
comment_approved != 'spam'"
|
208
|
+
|
209
|
+
|
210
|
+
db[cquery].each do |comment|
|
211
|
+
|
212
|
+
comcontent = comment[:content].to_s
|
213
|
+
if comcontent.respond_to?(:force_encoding)
|
214
|
+
comcontent.force_encoding("UTF-8")
|
215
|
+
end
|
216
|
+
if options[:clean_entities]
|
217
|
+
comcontent = clean_entities(comcontent)
|
218
|
+
end
|
219
|
+
comauthor = comment[:author].to_s
|
220
|
+
if options[:clean_entities]
|
221
|
+
comauthor = clean_entities(comauthor)
|
222
|
+
end
|
223
|
+
|
224
|
+
comments << {
|
225
|
+
'id' => comment[:id].to_i,
|
226
|
+
'author' => comauthor,
|
227
|
+
'author_email' => comment[:author_email].to_s,
|
228
|
+
'author_url' => comment[:author_url].to_s,
|
229
|
+
'date' => comment[:date].to_s,
|
230
|
+
'date_gmt' => comment[:date_gmt].to_s,
|
231
|
+
'content' => comcontent,
|
232
|
+
}
|
233
|
+
end
|
234
|
+
|
235
|
+
comments.sort!{ |a,b| a['id'] <=> b['id'] }
|
236
|
+
end
|
237
|
+
|
238
|
+
# Get the relevant fields as a hash, delete empty fields and
|
239
|
+
# convert to YAML for the header.
|
240
|
+
data = {
|
241
|
+
'layout' => post[:type].to_s,
|
242
|
+
'status' => post[:status].to_s,
|
243
|
+
'published' => (post[:status].to_s == "publish"),
|
244
|
+
'title' => title.to_s,
|
245
|
+
'author' => post[:author].to_s,
|
246
|
+
'author_login' => post[:author_login].to_s,
|
247
|
+
'author_email' => post[:author_email].to_s,
|
248
|
+
'author_url' => post[:author_url].to_s,
|
249
|
+
'excerpt' => excerpt,
|
250
|
+
'more_anchor' => more_anchor,
|
251
|
+
'wordpress_id' => post[:id],
|
252
|
+
'wordpress_url' => post[:guid].to_s,
|
253
|
+
'date' => date,
|
254
|
+
'categories' => options[:categories] ? categories : nil,
|
255
|
+
'tags' => options[:tags] ? tags : nil,
|
256
|
+
'comments' => options[:comments] ? comments : nil,
|
257
|
+
}.delete_if { |k,v| v.nil? || v == '' }.to_yaml
|
258
|
+
|
259
|
+
# Write out the data and content to file
|
260
|
+
File.open("_posts/#{name}", "w") do |f|
|
261
|
+
f.puts data
|
262
|
+
f.puts "---"
|
263
|
+
f.puts content
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
|
268
|
+
def self.clean_entities( text )
|
269
|
+
if text.respond_to?(:force_encoding)
|
270
|
+
text.force_encoding("UTF-8")
|
271
|
+
end
|
272
|
+
text = HTMLEntities.new.encode(text, :named)
|
273
|
+
# We don't want to convert these, it would break all
|
274
|
+
# HTML tags in the post and comments.
|
275
|
+
text.gsub!("&", "&")
|
276
|
+
text.gsub!("<", "<")
|
277
|
+
text.gsub!(">", ">")
|
278
|
+
text.gsub!(""", '"')
|
279
|
+
text.gsub!("'", "'")
|
280
|
+
text.gsub!("/", "/")
|
281
|
+
text
|
282
|
+
end
|
283
|
+
|
284
|
+
|
285
|
+
def self.sluggify( title )
|
286
|
+
begin
|
287
|
+
require 'unidecode'
|
288
|
+
title = title.to_ascii
|
289
|
+
rescue LoadError
|
290
|
+
STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
|
291
|
+
end
|
292
|
+
title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
|
293
|
+
end
|
294
|
+
|
295
|
+
end
|
296
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'safe_yaml'
|
7
|
+
require 'time'
|
8
|
+
|
9
|
+
module JekyllImport
|
10
|
+
# This importer takes a wordpress.xml file, which can be exported from your
|
11
|
+
# wordpress.com blog (/wp-admin/export.php).
|
12
|
+
module WordpressDotCom
|
13
|
+
def self.process(filename = "wordpress.xml")
|
14
|
+
import_count = Hash.new(0)
|
15
|
+
doc = Hpricot::XML(File.read(filename))
|
16
|
+
|
17
|
+
(doc/:channel/:item).each do |item|
|
18
|
+
title = item.at(:title).inner_text.strip
|
19
|
+
permalink_title = item.at('wp:post_name').inner_text
|
20
|
+
# Fallback to "prettified" title if post_name is empty (can happen)
|
21
|
+
if permalink_title == ""
|
22
|
+
permalink_title = sluggify(title)
|
23
|
+
end
|
24
|
+
|
25
|
+
date = Time.parse(item.at('wp:post_date').inner_text)
|
26
|
+
status = item.at('wp:status').inner_text
|
27
|
+
|
28
|
+
if status == "publish"
|
29
|
+
published = true
|
30
|
+
else
|
31
|
+
published = false
|
32
|
+
end
|
33
|
+
|
34
|
+
type = item.at('wp:post_type').inner_text
|
35
|
+
tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
|
36
|
+
|
37
|
+
metas = Hash.new
|
38
|
+
item.search("wp:postmeta").each do |meta|
|
39
|
+
key = meta.at('wp:meta_key').inner_text
|
40
|
+
value = meta.at('wp:meta_value').inner_text
|
41
|
+
metas[key] = value;
|
42
|
+
end
|
43
|
+
|
44
|
+
name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
|
45
|
+
header = {
|
46
|
+
'layout' => type,
|
47
|
+
'title' => title,
|
48
|
+
'tags' => tags,
|
49
|
+
'status' => status,
|
50
|
+
'type' => type,
|
51
|
+
'published' => published,
|
52
|
+
'meta' => metas
|
53
|
+
}
|
54
|
+
|
55
|
+
begin
|
56
|
+
FileUtils.mkdir_p "_#{type}s"
|
57
|
+
File.open("_#{type}s/#{name}", "w") do |f|
|
58
|
+
f.puts header.to_yaml
|
59
|
+
f.puts '---'
|
60
|
+
f.puts item.at('content:encoded').inner_text
|
61
|
+
end
|
62
|
+
rescue => e
|
63
|
+
puts "Couldn't import post!"
|
64
|
+
puts "Title: #{title}"
|
65
|
+
puts "Name/Slug: #{name}\n"
|
66
|
+
puts "Error: #{e.message}"
|
67
|
+
next
|
68
|
+
end
|
69
|
+
|
70
|
+
import_count[type] += 1
|
71
|
+
end
|
72
|
+
|
73
|
+
import_count.each do |key, value|
|
74
|
+
puts "Imported #{value} #{key}s"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.sluggify(title)
|
79
|
+
title.gsub(/[^[:alnum:]]+/, '-').downcase
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
if RUBY_VERSION > '1.9' && ENV["COVERAGE"] == "true"
|
2
|
+
require 'simplecov'
|
3
|
+
require 'simplecov-gem-adapter'
|
4
|
+
SimpleCov.start('gem')
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'test/unit'
|
8
|
+
require 'redgreen' if RUBY_VERSION < '1.9'
|
9
|
+
require 'shoulda'
|
10
|
+
require 'rr'
|
11
|
+
|
12
|
+
Dir.glob(File.expand_path('../../lib/jekyll/jekyll-import/*', __FILE__)).each do |f|
|
13
|
+
require f
|
14
|
+
end
|
15
|
+
|
16
|
+
# Send STDERR into the void to suppress program output messages
|
17
|
+
STDERR.reopen(test(?e, '/dev/null') ? '/dev/null' : 'NUL:')
|
18
|
+
|
19
|
+
class Test::Unit::TestCase
|
20
|
+
include RR::Adapters::TestUnit
|
21
|
+
|
22
|
+
def dest_dir(*subdirs)
|
23
|
+
File.join(File.dirname(__FILE__), 'dest', *subdirs)
|
24
|
+
end
|
25
|
+
|
26
|
+
def source_dir(*subdirs)
|
27
|
+
File.join(File.dirname(__FILE__), 'source', *subdirs)
|
28
|
+
end
|
29
|
+
|
30
|
+
def clear_dest
|
31
|
+
FileUtils.rm_rf(dest_dir)
|
32
|
+
end
|
33
|
+
|
34
|
+
def capture_stdout
|
35
|
+
$old_stdout = $stdout
|
36
|
+
$stdout = StringIO.new
|
37
|
+
yield
|
38
|
+
$stdout.rewind
|
39
|
+
return $stdout.string
|
40
|
+
ensure
|
41
|
+
$stdout = $old_stdout
|
42
|
+
end
|
43
|
+
end
|