jekyll-import 0.1.0.beta3 → 0.1.0.beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +6 -14
  2. data/History.markdown +18 -0
  3. data/README.markdown +12 -1
  4. data/jekyll-import.gemspec +31 -25
  5. data/lib/jekyll-import.rb +50 -1
  6. data/lib/jekyll-import/importer.rb +11 -0
  7. data/lib/jekyll-import/importers.rb +10 -0
  8. data/lib/jekyll-import/importers/csv.rb +50 -0
  9. data/lib/jekyll-import/importers/drupal6.rb +139 -0
  10. data/lib/jekyll-import/importers/drupal7.rb +102 -0
  11. data/lib/jekyll-import/importers/enki.rb +76 -0
  12. data/lib/jekyll-import/importers/google_reader.rb +68 -0
  13. data/lib/jekyll-import/importers/joomla.rb +83 -0
  14. data/lib/jekyll-import/importers/jrnl.rb +127 -0
  15. data/lib/jekyll-import/importers/marley.rb +72 -0
  16. data/lib/jekyll-import/importers/mephisto.rb +109 -0
  17. data/lib/jekyll-import/importers/mt.rb +169 -0
  18. data/lib/jekyll-import/importers/posterous.rb +139 -0
  19. data/lib/jekyll-import/importers/rss.rb +71 -0
  20. data/lib/jekyll-import/importers/s9y.rb +67 -0
  21. data/lib/jekyll-import/importers/textpattern.rb +76 -0
  22. data/lib/jekyll-import/importers/tumblr.rb +265 -0
  23. data/lib/jekyll-import/importers/typo.rb +89 -0
  24. data/lib/jekyll-import/importers/wordpress.rb +323 -0
  25. data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
  26. data/lib/jekyll/commands/import.rb +1 -0
  27. data/test/helper.rb +3 -1
  28. data/test/test_jrnl_importer.rb +39 -0
  29. data/test/test_mt_importer.rb +16 -16
  30. data/test/test_tumblr_importer.rb +61 -0
  31. data/test/test_wordpress_importer.rb +1 -1
  32. data/test/test_wordpressdotcom_importer.rb +1 -1
  33. metadata +53 -32
  34. data/lib/jekyll/jekyll-import/csv.rb +0 -30
  35. data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
  36. data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
  37. data/lib/jekyll/jekyll-import/enki.rb +0 -49
  38. data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
  39. data/lib/jekyll/jekyll-import/joomla.rb +0 -53
  40. data/lib/jekyll/jekyll-import/marley.rb +0 -52
  41. data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
  42. data/lib/jekyll/jekyll-import/mt.rb +0 -142
  43. data/lib/jekyll/jekyll-import/posterous.rb +0 -122
  44. data/lib/jekyll/jekyll-import/rss.rb +0 -63
  45. data/lib/jekyll/jekyll-import/s9y.rb +0 -59
  46. data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
  47. data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
  48. data/lib/jekyll/jekyll-import/typo.rb +0 -69
  49. data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
  50. data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84
@@ -0,0 +1,89 @@
1
+ # Author: Toby DiPasquale <toby@cbcg.net>
2
+ module JekyllImport
3
+ module Importers
4
+ class Typo < Importer
5
+ # This SQL *should* work for both MySQL and PostgreSQL.
6
+ SQL = <<-EOS
7
+ SELECT c.id id,
8
+ c.title title,
9
+ c.permalink slug,
10
+ c.body body,
11
+ c.extended extended,
12
+ c.published_at date,
13
+ c.state state,
14
+ c.keywords keywords,
15
+ COALESCE(tf.name, 'html') filter
16
+ FROM contents c
17
+ LEFT OUTER JOIN text_filters tf
18
+ ON c.text_filter_id = tf.id
19
+ EOS
20
+
21
+ def self.require_deps
22
+ JekyllImport.require_with_fallback(%w[
23
+ rubygems
24
+ sequel
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.specify_options(c)
31
+ c.option 'server', '--server TYPE', 'Server type ("mysql" or "postgres")'
32
+ c.option 'dbname', '--dbname DB', 'Database name'
33
+ c.option 'user', '--user USER', 'Database user name'
34
+ c.option 'password', '--password PW', "Database user's password (default: '')"
35
+ c.option 'host', '--host HOST', 'Database host name'
36
+ end
37
+
38
+ def self.process(options)
39
+ server = options.fetch('server')
40
+ dbname = options.fetch('dbname')
41
+ user = options.fetch('user')
42
+ pass = options.fetch('password', '')
43
+ host = options.fetch('host', "localhost")
44
+
45
+ FileUtils.mkdir_p '_posts'
46
+ case server.intern
47
+ when :postgres
48
+ db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
49
+ when :mysql
50
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
51
+ else
52
+ raise "Unknown database server '#{server}'"
53
+ end
54
+ db[SQL].each do |post|
55
+ next unless post[:state] =~ /published/i
56
+
57
+ if post[:slug] == nil
58
+ post[:slug] = "no slug"
59
+ end
60
+
61
+ if post[:extended]
62
+ post[:body] << "\n<!-- more -->\n"
63
+ post[:body] << post[:extended]
64
+ end
65
+
66
+ name = [ sprintf("%.04d", post[:date].year),
67
+ sprintf("%.02d", post[:date].month),
68
+ sprintf("%.02d", post[:date].day),
69
+ post[:slug].strip ].join('-')
70
+
71
+ # Can have more than one text filter in this field, but we just want
72
+ # the first one for this.
73
+ name += '.' + post[:filter].split(' ')[0]
74
+
75
+ File.open("_posts/#{name}", 'w') do |f|
76
+ f.puts({ 'layout' => 'post',
77
+ 'title' => (post[:title] and post[:title].to_s.force_encoding('UTF-8')),
78
+ 'tags' => (post[:keywords] and post[:keywords].to_s.force_encoding('UTF-8')),
79
+ 'typo_id' => post[:id]
80
+ }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
81
+ f.puts '---'
82
+ f.puts post[:body].delete("\r")
83
+ end
84
+ end
85
+ end
86
+
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,323 @@
1
+ # NOTE: This converter requires Sequel and the MySQL gems.
2
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
3
+ # installed, running the following commands should work:
4
+ # $ sudo gem install sequel
5
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
6
+ #
7
+ # If you are running MacPorts, it the mysql2 gem can be installed like this:
8
+ # $ gem install mysql2 -- --with-mysql-lib=/opt/local/lib/mysql56/mysql/ --with-mysql-include=/opt/local/include/mysql56/mysql
9
+
10
+ module JekyllImport
11
+ module Importers
12
+ class WordPress < Importer
13
+
14
+ def self.require_deps
15
+ JekyllImport.require_with_fallback(%w[
16
+ rubygems
17
+ sequel
18
+ fileutils
19
+ safe_yaml
20
+ ])
21
+ end
22
+
23
+ def self.specify_options(c)
24
+ c.option 'dbname', '--dbname DB', 'Database name (default: "")'
25
+ c.option 'user', '--user USER', 'Database user name (default: "")'
26
+ c.option 'password', '--password PW', "Database user's password (default: "")"
27
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
28
+ c.option 'table_prefix', '--prefix PREFIX', 'Table prefix name (default: "wp_")'
29
+ c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
30
+ c.option 'comments', '--comments', 'Whether to import comments (default: true)'
31
+ c.option 'categories', '--categories', 'Whether to import categories (default: true)'
32
+ c.option 'tags', '--tags', 'Whether to import tags (default: true)'
33
+ c.option 'more_excerpt', '--more_excerpt', 'Whether to use more excerpt (default: true)'
34
+ c.option 'more_anchor', '--more_anchor', 'Whether to use more anchor (default: true)'
35
+ c.option 'status', '--status STATUS,STATUS2', Array, 'Array of allowed statuses (default: ["publish"], other options: "draft", "private", "revision")'
36
+ end
37
+
38
+ # Main migrator function. Call this to perform the migration.
39
+ #
40
+ # dbname:: The name of the database
41
+ # user:: The database user name
42
+ # pass:: The database user's password
43
+ # host:: The address of the MySQL database host. Default: 'localhost'
44
+ # options:: A hash table of configuration options.
45
+ #
46
+ # Supported options are:
47
+ #
48
+ # :table_prefix:: Prefix of database tables used by WordPress.
49
+ # Default: 'wp_'
50
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
51
+ # entities in the posts, comments, titles, and
52
+ # names. Requires the 'htmlentities' gem to
53
+ # work. Default: true.
54
+ # :comments:: If true, migrate post comments too. Comments
55
+ # are saved in the post's YAML front matter.
56
+ # Default: true.
57
+ # :categories:: If true, save the post's categories in its
58
+ # YAML front matter.
59
+ # :tags:: If true, save the post's tags in its
60
+ # YAML front matter.
61
+ # :more_excerpt:: If true, when a post has no excerpt but
62
+ # does have a <!-- more --> tag, use the
63
+ # preceding post content as the excerpt.
64
+ # Default: true.
65
+ # :more_anchor:: If true, convert a <!-- more --> tag into
66
+ # two HTML anchors with ids "more" and
67
+ # "more-NNN" (where NNN is the post number).
68
+ # Default: true.
69
+ # :status:: Array of allowed post statuses. Only
70
+ # posts with matching status will be migrated.
71
+ # Known statuses are :publish, :draft, :private,
72
+ # and :revision. If this is nil or an empty
73
+ # array, all posts are migrated regardless of
74
+ # status. Default: [:publish].
75
+ #
76
+ def self.process(opts)
77
+ options = {
78
+ :user => opts.fetch('user', ''),
79
+ :pass => opts.fetch('password', ''),
80
+ :host => opts.fetch('host', 'localhost'),
81
+ :dbname => opts.fetch('dbname', ''),
82
+ :table_prefix => opts.fetch('prefix', 'wp_'),
83
+ :clean_entities => opts.fetch('clean_entities', true),
84
+ :comments => opts.fetch('comments', true),
85
+ :categories => opts.fetch('categories', true),
86
+ :tags => opts.fetch('tags', true),
87
+ :more_excerpt => opts.fetch('more_excerpt', true),
88
+ :more_anchor => opts.fetch('more_anchor', true),
89
+ :status => opts.fetch('status', ["publish"]).map(&:to_sym) # :draft, :private, :revision
90
+ }
91
+
92
+ if options[:clean_entities]
93
+ begin
94
+ require 'htmlentities'
95
+ rescue LoadError
96
+ STDERR.puts "Could not require 'htmlentities', so the " +
97
+ ":clean_entities option is now disabled."
98
+ options[:clean_entities] = false
99
+ end
100
+ end
101
+
102
+ FileUtils.mkdir_p("_posts")
103
+
104
+ db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
105
+ :host => options[:host], :encoding => 'utf8')
106
+
107
+ px = options[:table_prefix]
108
+
109
+ posts_query = "
110
+ SELECT
111
+ posts.ID AS `id`,
112
+ posts.guid AS `guid`,
113
+ posts.post_type AS `type`,
114
+ posts.post_status AS `status`,
115
+ posts.post_title AS `title`,
116
+ posts.post_name AS `slug`,
117
+ posts.post_date AS `date`,
118
+ posts.post_content AS `content`,
119
+ posts.post_excerpt AS `excerpt`,
120
+ posts.comment_count AS `comment_count`,
121
+ users.display_name AS `author`,
122
+ users.user_login AS `author_login`,
123
+ users.user_email AS `author_email`,
124
+ users.user_url AS `author_url`
125
+ FROM #{px}posts AS `posts`
126
+ LEFT JOIN #{px}users AS `users`
127
+ ON posts.post_author = users.ID"
128
+
129
+ if options[:status] and not options[:status].empty?
130
+ status = options[:status][0]
131
+ posts_query << "
132
+ WHERE posts.post_status = '#{status.to_s}'"
133
+ options[:status][1..-1].each do |status|
134
+ posts_query << " OR
135
+ posts.post_status = '#{status.to_s}'"
136
+ end
137
+ end
138
+
139
+ db[posts_query].each do |post|
140
+ process_post(post, db, options)
141
+ end
142
+ end
143
+
144
+
145
+ def self.process_post(post, db, options)
146
+ px = options[:table_prefix]
147
+
148
+ title = post[:title]
149
+ if options[:clean_entities]
150
+ title = clean_entities(title)
151
+ end
152
+
153
+ slug = post[:slug]
154
+ if !slug or slug.empty?
155
+ slug = sluggify(title)
156
+ end
157
+
158
+ date = post[:date] || Time.now
159
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
160
+ date.day, slug]
161
+ content = post[:content].to_s
162
+ if options[:clean_entities]
163
+ content = clean_entities(content)
164
+ end
165
+
166
+ excerpt = post[:excerpt].to_s
167
+
168
+ more_index = content.index(/<!-- *more *-->/)
169
+ more_anchor = nil
170
+ if more_index
171
+ if options[:more_excerpt] and
172
+ (post[:excerpt].nil? or post[:excerpt].empty?)
173
+ excerpt = content[0...more_index]
174
+ end
175
+ if options[:more_anchor]
176
+ more_link = "more"
177
+ content.sub!(/<!-- *more *-->/,
178
+ "<a id=\"more\"></a>" +
179
+ "<a id=\"more-#{post[:id]}\"></a>")
180
+ end
181
+ end
182
+
183
+ categories = []
184
+ tags = []
185
+
186
+ if options[:categories] or options[:tags]
187
+
188
+ cquery =
189
+ "SELECT
190
+ terms.name AS `name`,
191
+ ttax.taxonomy AS `type`
192
+ FROM
193
+ #{px}terms AS `terms`,
194
+ #{px}term_relationships AS `trels`,
195
+ #{px}term_taxonomy AS `ttax`
196
+ WHERE
197
+ trels.object_id = '#{post[:id]}' AND
198
+ trels.term_taxonomy_id = ttax.term_taxonomy_id AND
199
+ terms.term_id = ttax.term_id"
200
+
201
+ db[cquery].each do |term|
202
+ if options[:categories] and term[:type] == "category"
203
+ if options[:clean_entities]
204
+ categories << clean_entities(term[:name])
205
+ else
206
+ categories << term[:name]
207
+ end
208
+ elsif options[:tags] and term[:type] == "post_tag"
209
+ if options[:clean_entities]
210
+ tags << clean_entities(term[:name])
211
+ else
212
+ tags << term[:name]
213
+ end
214
+ end
215
+ end
216
+ end
217
+
218
+ comments = []
219
+
220
+ if options[:comments] and post[:comment_count].to_i > 0
221
+ cquery =
222
+ "SELECT
223
+ comment_ID AS `id`,
224
+ comment_author AS `author`,
225
+ comment_author_email AS `author_email`,
226
+ comment_author_url AS `author_url`,
227
+ comment_date AS `date`,
228
+ comment_date_gmt AS `date_gmt`,
229
+ comment_content AS `content`
230
+ FROM #{px}comments
231
+ WHERE
232
+ comment_post_ID = '#{post[:id]}' AND
233
+ comment_approved != 'spam'"
234
+
235
+
236
+ db[cquery].each do |comment|
237
+
238
+ comcontent = comment[:content].to_s
239
+ if comcontent.respond_to?(:force_encoding)
240
+ comcontent.force_encoding("UTF-8")
241
+ end
242
+ if options[:clean_entities]
243
+ comcontent = clean_entities(comcontent)
244
+ end
245
+ comauthor = comment[:author].to_s
246
+ if options[:clean_entities]
247
+ comauthor = clean_entities(comauthor)
248
+ end
249
+
250
+ comments << {
251
+ 'id' => comment[:id].to_i,
252
+ 'author' => comauthor,
253
+ 'author_email' => comment[:author_email].to_s,
254
+ 'author_url' => comment[:author_url].to_s,
255
+ 'date' => comment[:date].to_s,
256
+ 'date_gmt' => comment[:date_gmt].to_s,
257
+ 'content' => comcontent,
258
+ }
259
+ end
260
+
261
+ comments.sort!{ |a,b| a['id'] <=> b['id'] }
262
+ end
263
+
264
+ # Get the relevant fields as a hash, delete empty fields and
265
+ # convert to YAML for the header.
266
+ data = {
267
+ 'layout' => post[:type].to_s,
268
+ 'status' => post[:status].to_s,
269
+ 'published' => (post[:status].to_s == "publish"),
270
+ 'title' => title.to_s,
271
+ 'author' => post[:author].to_s,
272
+ 'author_login' => post[:author_login].to_s,
273
+ 'author_email' => post[:author_email].to_s,
274
+ 'author_url' => post[:author_url].to_s,
275
+ 'excerpt' => excerpt,
276
+ 'more_anchor' => more_anchor,
277
+ 'wordpress_id' => post[:id],
278
+ 'wordpress_url' => post[:guid].to_s,
279
+ 'date' => date,
280
+ 'categories' => options[:categories] ? categories : nil,
281
+ 'tags' => options[:tags] ? tags : nil,
282
+ 'comments' => options[:comments] ? comments : nil,
283
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
284
+
285
+ # Write out the data and content to file
286
+ File.open("_posts/#{name}", "w") do |f|
287
+ f.puts data
288
+ f.puts "---"
289
+ f.puts content
290
+ end
291
+ end
292
+
293
+
294
+ def self.clean_entities( text )
295
+ if text.respond_to?(:force_encoding)
296
+ text.force_encoding("UTF-8")
297
+ end
298
+ text = HTMLEntities.new.encode(text, :named)
299
+ # We don't want to convert these, it would break all
300
+ # HTML tags in the post and comments.
301
+ text.gsub!("&amp;", "&")
302
+ text.gsub!("&lt;", "<")
303
+ text.gsub!("&gt;", ">")
304
+ text.gsub!("&quot;", '"')
305
+ text.gsub!("&apos;", "'")
306
+ text.gsub!("/", "&#47;")
307
+ text
308
+ end
309
+
310
+
311
+ def self.sluggify( title )
312
+ begin
313
+ require 'unidecode'
314
+ title = title.to_ascii
315
+ rescue LoadError
316
+ STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
317
+ end
318
+ title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
319
+ end
320
+
321
+ end
322
+ end
323
+ end
@@ -0,0 +1,97 @@
1
+ # coding: utf-8
2
+ # This importer takes a wordpress.xml file, which can be exported from your
3
+ # wordpress.com blog (/wp-admin/export.php).
4
+
5
+ module JekyllImport
6
+ module Importers
7
+ class WordpressDotCom < Importer
8
+ def self.require_deps
9
+ JekyllImport.require_with_fallback(%w[
10
+ rubygems
11
+ sequel
12
+ fileutils
13
+ safe_yaml
14
+ hpricot
15
+ time
16
+ ])
17
+ end
18
+
19
+ def self.specify_options(c)
20
+ c.option 'source', '--source FILE', 'WordPress export XML file (default: "wordpress.xml")'
21
+ end
22
+
23
+ def self.process(options)
24
+ source = options.fetch('source', "wordpress.xml")
25
+
26
+ import_count = Hash.new(0)
27
+ doc = Hpricot::XML(File.read(source))
28
+
29
+ (doc/:channel/:item).each do |item|
30
+ title = item.at(:title).inner_text.strip
31
+ permalink_title = item.at('wp:post_name').inner_text
32
+ # Fallback to "prettified" title if post_name is empty (can happen)
33
+ if permalink_title == ""
34
+ permalink_title = sluggify(title)
35
+ end
36
+
37
+ date = Time.parse(item.at('wp:post_date').inner_text)
38
+ status = item.at('wp:status').inner_text
39
+
40
+ if status == "publish"
41
+ published = true
42
+ else
43
+ published = false
44
+ end
45
+
46
+ type = item.at('wp:post_type').inner_text
47
+ categories = item.search('category[@domain="category"]').map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
48
+ tags = item.search('category[@domain="post_tag"]').map{|t| t.inner_text}.uniq
49
+
50
+ metas = Hash.new
51
+ item.search("wp:postmeta").each do |meta|
52
+ key = meta.at('wp:meta_key').inner_text
53
+ value = meta.at('wp:meta_value').inner_text
54
+ metas[key] = value;
55
+ end
56
+
57
+ name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
58
+ header = {
59
+ 'layout' => type,
60
+ 'title' => title,
61
+ 'categories' => categories,
62
+ 'tags' => tags,
63
+ 'status' => status,
64
+ 'type' => type,
65
+ 'published' => published,
66
+ 'meta' => metas
67
+ }
68
+
69
+ begin
70
+ FileUtils.mkdir_p "_#{type}s"
71
+ File.open("_#{type}s/#{name}", "w") do |f|
72
+ f.puts header.to_yaml
73
+ f.puts '---'
74
+ f.puts item.at('content:encoded').inner_text
75
+ end
76
+ rescue => e
77
+ puts "Couldn't import post!"
78
+ puts "Title: #{title}"
79
+ puts "Name/Slug: #{name}\n"
80
+ puts "Error: #{e.message}"
81
+ next
82
+ end
83
+
84
+ import_count[type] += 1
85
+ end
86
+
87
+ import_count.each do |key, value|
88
+ puts "Imported #{value} #{key}s"
89
+ end
90
+ end
91
+
92
+ def self.sluggify(title)
93
+ title.gsub(/[^[:alnum:]]+/, '-').downcase
94
+ end
95
+ end
96
+ end
97
+ end