jekyll-import 0.1.0.beta3 → 0.1.0.beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +6 -14
  2. data/History.markdown +18 -0
  3. data/README.markdown +12 -1
  4. data/jekyll-import.gemspec +31 -25
  5. data/lib/jekyll-import.rb +50 -1
  6. data/lib/jekyll-import/importer.rb +11 -0
  7. data/lib/jekyll-import/importers.rb +10 -0
  8. data/lib/jekyll-import/importers/csv.rb +50 -0
  9. data/lib/jekyll-import/importers/drupal6.rb +139 -0
  10. data/lib/jekyll-import/importers/drupal7.rb +102 -0
  11. data/lib/jekyll-import/importers/enki.rb +76 -0
  12. data/lib/jekyll-import/importers/google_reader.rb +68 -0
  13. data/lib/jekyll-import/importers/joomla.rb +83 -0
  14. data/lib/jekyll-import/importers/jrnl.rb +127 -0
  15. data/lib/jekyll-import/importers/marley.rb +72 -0
  16. data/lib/jekyll-import/importers/mephisto.rb +109 -0
  17. data/lib/jekyll-import/importers/mt.rb +169 -0
  18. data/lib/jekyll-import/importers/posterous.rb +139 -0
  19. data/lib/jekyll-import/importers/rss.rb +71 -0
  20. data/lib/jekyll-import/importers/s9y.rb +67 -0
  21. data/lib/jekyll-import/importers/textpattern.rb +76 -0
  22. data/lib/jekyll-import/importers/tumblr.rb +265 -0
  23. data/lib/jekyll-import/importers/typo.rb +89 -0
  24. data/lib/jekyll-import/importers/wordpress.rb +323 -0
  25. data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
  26. data/lib/jekyll/commands/import.rb +1 -0
  27. data/test/helper.rb +3 -1
  28. data/test/test_jrnl_importer.rb +39 -0
  29. data/test/test_mt_importer.rb +16 -16
  30. data/test/test_tumblr_importer.rb +61 -0
  31. data/test/test_wordpress_importer.rb +1 -1
  32. data/test/test_wordpressdotcom_importer.rb +1 -1
  33. metadata +53 -32
  34. data/lib/jekyll/jekyll-import/csv.rb +0 -30
  35. data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
  36. data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
  37. data/lib/jekyll/jekyll-import/enki.rb +0 -49
  38. data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
  39. data/lib/jekyll/jekyll-import/joomla.rb +0 -53
  40. data/lib/jekyll/jekyll-import/marley.rb +0 -52
  41. data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
  42. data/lib/jekyll/jekyll-import/mt.rb +0 -142
  43. data/lib/jekyll/jekyll-import/posterous.rb +0 -122
  44. data/lib/jekyll/jekyll-import/rss.rb +0 -63
  45. data/lib/jekyll/jekyll-import/s9y.rb +0 -59
  46. data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
  47. data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
  48. data/lib/jekyll/jekyll-import/typo.rb +0 -69
  49. data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
  50. data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84
@@ -0,0 +1,71 @@
1
+ # Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
2
+ # Use at your own risk. The end.
3
+ #
4
+ # Usage:
5
+ # (URL)
6
+ # ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => 'http://yourdomain.com/your-favorite-feed.xml')"
7
+ #
8
+ # (Local file)
9
+ # ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => './somefile/on/your/computer.xml')"
10
+
11
+ module JekyllImport
12
+ module Importers
13
+ class RSS < Importer
14
+ def self.specify_options(c)
15
+ c.option 'file', '--file NAME', 'The RSS file to import'
16
+ end
17
+
18
+ def self.validate(options)
19
+ if options['source'].nil?
20
+ abort "Missing mandatory option --source."
21
+ end
22
+ end
23
+
24
+ def self.require_deps
25
+ JekyllImport.require_with_fallback(%w[
26
+ rss/1.0
27
+ rss/2.0
28
+ open-uri
29
+ fileutils
30
+ safe_yaml
31
+ ])
32
+ end
33
+
34
+ # Process the import.
35
+ #
36
+ # source - a URL or a local file String.
37
+ #
38
+ # Returns nothing.
39
+ def self.process(options)
40
+ source = options.fetch('file')
41
+
42
+ content = ""
43
+ open(source) { |s| content = s.read }
44
+ rss = ::RSS::Parser.parse(content, false)
45
+
46
+ raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
47
+
48
+ rss.items.each do |item|
49
+ formatted_date = item.date.strftime('%Y-%m-%d')
50
+ post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
51
+ i.downcase if i != ''
52
+ end.compact.join('-')
53
+ name = "#{formatted_date}-#{post_name}"
54
+
55
+ header = {
56
+ 'layout' => 'post',
57
+ 'title' => item.title
58
+ }
59
+
60
+ FileUtils.mkdir_p("_posts")
61
+
62
+ File.open("_posts/#{name}.html", "w") do |f|
63
+ f.puts header.to_yaml
64
+ f.puts "---\n\n"
65
+ f.puts item.description
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,67 @@
1
+ # Migrator to import entries from an Serendipity (S9Y) blog
2
+ #
3
+ # Entries can be exported from http://blog.example.com/rss.php?version=2.0&all=1
4
+ #
5
+ # Usage:
6
+ # ruby -r './s9y_rss.rb' -e 'Jekyll::S9Y.process("http://blog.example.com/rss.php?version=2.0&all=1")'
7
+
8
+ module JekyllImport
9
+ module Importers
10
+ class S9Y < Importer
11
+ def self.specify_options(c)
12
+ c.option 'source', '--source SOURCE', 'The URL of the S9Y RSS feed'
13
+ end
14
+
15
+ def self.validate(options)
16
+ if options['source'].nil?
17
+ abort "Missing mandatory option --source, e.g. --source \"http://blog.example.com/rss.php?version=2.0&all=1\""
18
+ end
19
+ end
20
+
21
+ def self.require_deps
22
+ JekyllImport.require_with_fallback(%w[
23
+ open-uri
24
+ rss
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ source = options.fetch(:source)
32
+
33
+ FileUtils.mkdir_p("_posts")
34
+
35
+ text = ''
36
+ open(source) { |line| text = line.read }
37
+ rss = RSS::Parser.parse(text)
38
+
39
+ rss.items.each do |item|
40
+ post_url = item.link.match('.*(/archives/.*)')[1]
41
+ categories = item.categories.collect { |c| c.content }
42
+ content = item.content_encoded.strip
43
+ date = item.date
44
+ slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
45
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
46
+ slug]
47
+
48
+ data = {
49
+ 'layout' => 'post',
50
+ 'title' => item.title,
51
+ 'categories' => categories,
52
+ 'permalink' => post_url,
53
+ 's9y_link' => item.link,
54
+ 'date' => item.date,
55
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
56
+
57
+ # Write out the data and content to file
58
+ File.open("_posts/#{name}", "w") do |f|
59
+ f.puts data
60
+ f.puts "---"
61
+ f.puts content
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,76 @@
1
+ # NOTE: This converter requires Sequel and the MySQL gems.
2
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
3
+ # installed, running the following commands should work:
4
+ # $ sudo gem install sequel
5
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
6
+
7
+ module JekyllImport
8
+ module Importers
9
+ class TextPattern < Importer
10
+ # Reads a MySQL database via Sequel and creates a post file for each post.
11
+ # The only posts selected are those with a status of 4 or 5, which means
12
+ # "live" and "sticky" respectively.
13
+ # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
14
+ QUERY = "SELECT Title, \
15
+ url_title, \
16
+ Posted, \
17
+ Body, \
18
+ Keywords \
19
+ FROM textpattern \
20
+ WHERE Status = '4' OR \
21
+ Status = '5'"
22
+
23
+ def self.require_deps
24
+ JekyllImport.require_with_fallback(%w[
25
+ rubygems
26
+ sequel
27
+ fileutils
28
+ safe_yaml
29
+ ])
30
+ end
31
+
32
+ def self.specify_options(c)
33
+ c.option 'dbname', '--dbname DB', 'Database name'
34
+ c.option 'user', '--user USER', 'Database user name'
35
+ c.option 'password', '--password PW', "Database user's password"
36
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
37
+ end
38
+
39
+ def self.process(options)
40
+ dbname = options.fetch('dbname')
41
+ user = options.fetch('user')
42
+ pass = options.fetch('password')
43
+ host = options.fetch('host', "localhost")
44
+
45
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
46
+
47
+ FileUtils.mkdir_p "_posts"
48
+
49
+ db[QUERY].each do |post|
50
+ # Get required fields and construct Jekyll compatible name.
51
+ title = post[:Title]
52
+ slug = post[:url_title]
53
+ date = post[:Posted]
54
+ content = post[:Body]
55
+
56
+ name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
57
+
58
+ # Get the relevant fields as a hash, delete empty fields and convert
59
+ # to YAML for the header.
60
+ data = {
61
+ 'layout' => 'post',
62
+ 'title' => title.to_s,
63
+ 'tags' => post[:Keywords].split(',')
64
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
65
+
66
+ # Write out the data and content to file.
67
+ File.open("_posts/#{name}", "w") do |f|
68
+ f.puts data
69
+ f.puts "---"
70
+ f.puts content
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,265 @@
1
+ module JekyllImport
2
+ module Importers
3
+ class Tumblr < Importer
4
+ def self.require_deps
5
+ JekyllImport.require_with_fallback(%w[
6
+ rubygems
7
+ fileutils
8
+ open-uri
9
+ nokogiri
10
+ json
11
+ uri
12
+ time
13
+ jekyll
14
+ ])
15
+ end
16
+
17
+ def self.specify_options(c)
18
+ c.option 'url', '--url URL', 'Tumblr URL'
19
+ c.option 'format', '--format FORMAT', 'Output format (default: "html")'
20
+ c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
21
+ c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
22
+ c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
23
+ end
24
+
25
+ def self.process(options)
26
+ url = options.fetch('url')
27
+ format = options.fetch('format', "html")
28
+ grab_images = options.fetch('grab_images', false)
29
+ add_highlights = options.fetch('add_highlights', false)
30
+ rewrite_urls = options.fetch('rewrite_urls', false)
31
+
32
+ @grab_images = grab_images
33
+ FileUtils.mkdir_p "_posts/tumblr"
34
+ url += "/api/read/json/"
35
+ per_page = 50
36
+ posts = []
37
+ # Two passes are required so that we can rewrite URLs.
38
+ # First pass builds up an array of each post as a hash.
39
+ begin
40
+ current_page = (current_page || -1) + 1
41
+ feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
42
+ puts "Fetching #{feed_url}"
43
+ feed = open(feed_url)
44
+ json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars.
45
+ blog = JSON.parse(json)
46
+ puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
47
+ batch = blog["posts"].map { |post| post_to_hash(post, format) }
48
+
49
+ # If we're rewriting, save the posts for later. Otherwise, go ahead and
50
+ # dump these to disk now
51
+ if rewrite_urls
52
+ posts += batch
53
+ else
54
+ batch.each {|post| write_post(post, format == "md", add_highlights)}
55
+ end
56
+
57
+ end until blog["posts"].size < per_page
58
+
59
+ # Rewrite URLs, create redirects and write out out posts if necessary
60
+ if rewrite_urls
61
+ posts = rewrite_urls_and_redirects posts
62
+ posts.each {|post| write_post(post, format == "md", add_highlights)}
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ # Writes a post out to disk
69
+ def self.write_post(post, use_markdown, add_highlights)
70
+ content = post[:content]
71
+ if use_markdown
72
+ content = html_to_markdown content
73
+ content = add_syntax_highlights content if add_highlights
74
+ end
75
+
76
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
77
+ f.puts post[:header].to_yaml + "---\n" + content
78
+ end
79
+ end
80
+
81
+ # Converts each type of Tumblr post to a hash with all required
82
+ # data for Jekyll.
83
+ def self.post_to_hash(post, format)
84
+ case post['type']
85
+ when "regular"
86
+ title = post["regular-title"]
87
+ content = post["regular-body"]
88
+ when "link"
89
+ title = post["link-text"] || post["link-url"]
90
+ content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
91
+ unless post["link-description"].nil?
92
+ content << "<br/>" + post["link-description"]
93
+ end
94
+ when "photo"
95
+ title = post["photo-caption"]
96
+ content = if post["photo-link-url"].nil?
97
+ "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
98
+ else
99
+ fetch_photo post
100
+ end
101
+ when "audio"
102
+ if !post["id3-title"].nil?
103
+ title = post["id3-title"]
104
+ content = post["audio-player"] + "<br/>" + post["audio-caption"]
105
+ else
106
+ title = post["audio-caption"]
107
+ content = post["audio-player"]
108
+ end
109
+ when "quote"
110
+ title = post["quote-text"]
111
+ content = "<blockquote>#{post["quote-text"]}</blockquote>"
112
+ unless post["quote-source"].nil?
113
+ content << "&#8212;" + post["quote-source"]
114
+ end
115
+ when "conversation"
116
+ title = post["conversation-title"]
117
+ content = "<section><dialog>"
118
+ post["conversation"].each do |line|
119
+ content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
120
+ end
121
+ content << "</section></dialog>"
122
+ when "video"
123
+ title = post["video-title"]
124
+ content = post["video-player"]
125
+ unless post["video-caption"].nil?
126
+ content << "<br/>" + post["video-caption"]
127
+ end
128
+ end
129
+ date = Date.parse(post['date']).to_s
130
+ title = Nokogiri::HTML(title).text
131
+ slug = if post["slug"] && post["slug"].strip != ""
132
+ post["slug"]
133
+ else
134
+ slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
135
+ slug.length > 200 ? slug.slice(0..200) : slug
136
+ end
137
+ {
138
+ :name => "#{date}-#{slug}.#{format}",
139
+ :header => {
140
+ "layout" => "post",
141
+ "title" => title,
142
+ "date" => Time.parse(post['date']).xmlschema,
143
+ "tags" => post["tags"],
144
+ "tumblr_url" => post["url-with-slug"]
145
+ },
146
+ :content => content,
147
+ :url => post["url"],
148
+ :slug => post["url-with-slug"],
149
+ }
150
+ end
151
+
152
+ # Attempts to fetch the largest version of a photo available for a post.
153
+ # If that file fails, it tries the next smaller size until all available
154
+ # photo URLs are exhausted. If they all fail, the import is aborted.
155
+ def self.fetch_photo(post)
156
+ sizes = post.keys.map {|k| k.gsub("photo-url-", "").to_i}
157
+ sizes.sort! {|a,b| b <=> a}
158
+
159
+ ext_key, ext_val = post.find do |k,v|
160
+ k =~ /^photo-url-/ && v.split("/").last =~ /\./
161
+ end
162
+ ext = "." + ext_val.split(".").last
163
+
164
+ sizes.each do |size|
165
+ url = post["photo-url"] || post["photo-url-#{size}"]
166
+ next if url.nil?
167
+ begin
168
+ return "<img src=\"#{save_photo(url, ext)}\"/>"
169
+ rescue OpenURI::HTTPError => err
170
+ puts "Failed to grab photo"
171
+ end
172
+ end
173
+
174
+ abort "Failed to fetch photo for post #{post['url']}"
175
+ end
176
+
177
+ # Create a Hash of old urls => new urls, for rewriting and
178
+ # redirects, and replace urls in each post. Instantiate Jekyll
179
+ # site/posts to get the correct permalink format.
180
+ def self.rewrite_urls_and_redirects(posts)
181
+ site = Jekyll::Site.new(Jekyll.configuration({}))
182
+ urls = Hash[posts.map { |post|
183
+ # Create an initial empty file for the post so that
184
+ # we can instantiate a post object.
185
+ File.open("_posts/tumblr/#{post[:name]}", "w")
186
+ tumblr_url = URI.parse(post[:slug]).path
187
+ jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
188
+ redirect_dir = tumblr_url.sub(/\//, "") + "/"
189
+ FileUtils.mkdir_p redirect_dir
190
+ File.open(redirect_dir + "index.html", "w") do |f|
191
+ f.puts "<html><head><link rel=\"canonical\" href=\"" +
192
+ "#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
193
+ "url=#{jekyll_url}\"></head><body></body></html>"
194
+ end
195
+ [tumblr_url, jekyll_url]
196
+ }]
197
+ posts.map { |post|
198
+ urls.each do |tumblr_url, jekyll_url|
199
+ post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
200
+ end
201
+ post
202
+ }
203
+ end
204
+
205
+ # Convert preserving HTML tables as per the markdown docs.
206
+ def self.html_to_markdown(content)
207
+ preserve = ["table", "tr", "th", "td"]
208
+ preserve.each do |tag|
209
+ content.gsub!(/<#{tag}/i, "$$" + tag)
210
+ content.gsub!(/<\/#{tag}/i, "||" + tag)
211
+ end
212
+ content = Nokogiri::HTML(content.gsub("'", "''")).text
213
+ preserve.each do |tag|
214
+ content.gsub!("$$" + tag, "<" + tag)
215
+ content.gsub!("||" + tag, "</" + tag)
216
+ end
217
+ content
218
+ end
219
+
220
+ # Adds pygments highlight tags to code blocks in posts that use
221
+ # markdown format. This doesn't guess the language of the code
222
+ # block, so you should modify this to suit your own content.
223
+ # For example, my code block only contain Python and JavaScript,
224
+ # so I can assume the block is JavaScript if it contains a
225
+ # semi-colon.
226
+ def self.add_syntax_highlights(content)
227
+ lines = content.split("\n")
228
+ block, indent, lang, start = false, /^ /, nil, nil
229
+ lines.each_with_index do |line, i|
230
+ if !block && line =~ indent
231
+ block = true
232
+ lang = "python"
233
+ start = i
234
+ elsif block
235
+ lang = "javascript" if line =~ /;$/
236
+ block = line =~ indent && i < lines.size - 1 # Also handle EOF
237
+ if !block
238
+ lines[start] = "{% highlight #{lang} %}"
239
+ lines[i - 1] = "{% endhighlight %}"
240
+ end
241
+ FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
242
+ lines[i] = lines[i].sub(indent, "")
243
+ end
244
+ end
245
+ lines.join("\n")
246
+ end
247
+
248
+ def self.save_photo(url, ext)
249
+ if @grab_images
250
+ path = "tumblr_files/#{url.split('/').last}"
251
+ path += ext unless path =~ /#{ext}$/
252
+ FileUtils.mkdir_p "tumblr_files"
253
+
254
+ # Don't fetch if we've already cached this file
255
+ unless File.size? path
256
+ puts "Fetching photo #{url}"
257
+ File.open(path, "w") { |f| f.write(open(url).read) }
258
+ end
259
+ url = "/" + path
260
+ end
261
+ url
262
+ end
263
+ end
264
+ end
265
+ end