jekyll-import 0.1.0.beta3 → 0.1.0.beta4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +6 -14
  2. data/History.markdown +18 -0
  3. data/README.markdown +12 -1
  4. data/jekyll-import.gemspec +31 -25
  5. data/lib/jekyll-import.rb +50 -1
  6. data/lib/jekyll-import/importer.rb +11 -0
  7. data/lib/jekyll-import/importers.rb +10 -0
  8. data/lib/jekyll-import/importers/csv.rb +50 -0
  9. data/lib/jekyll-import/importers/drupal6.rb +139 -0
  10. data/lib/jekyll-import/importers/drupal7.rb +102 -0
  11. data/lib/jekyll-import/importers/enki.rb +76 -0
  12. data/lib/jekyll-import/importers/google_reader.rb +68 -0
  13. data/lib/jekyll-import/importers/joomla.rb +83 -0
  14. data/lib/jekyll-import/importers/jrnl.rb +127 -0
  15. data/lib/jekyll-import/importers/marley.rb +72 -0
  16. data/lib/jekyll-import/importers/mephisto.rb +109 -0
  17. data/lib/jekyll-import/importers/mt.rb +169 -0
  18. data/lib/jekyll-import/importers/posterous.rb +139 -0
  19. data/lib/jekyll-import/importers/rss.rb +71 -0
  20. data/lib/jekyll-import/importers/s9y.rb +67 -0
  21. data/lib/jekyll-import/importers/textpattern.rb +76 -0
  22. data/lib/jekyll-import/importers/tumblr.rb +265 -0
  23. data/lib/jekyll-import/importers/typo.rb +89 -0
  24. data/lib/jekyll-import/importers/wordpress.rb +323 -0
  25. data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
  26. data/lib/jekyll/commands/import.rb +1 -0
  27. data/test/helper.rb +3 -1
  28. data/test/test_jrnl_importer.rb +39 -0
  29. data/test/test_mt_importer.rb +16 -16
  30. data/test/test_tumblr_importer.rb +61 -0
  31. data/test/test_wordpress_importer.rb +1 -1
  32. data/test/test_wordpressdotcom_importer.rb +1 -1
  33. metadata +53 -32
  34. data/lib/jekyll/jekyll-import/csv.rb +0 -30
  35. data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
  36. data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
  37. data/lib/jekyll/jekyll-import/enki.rb +0 -49
  38. data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
  39. data/lib/jekyll/jekyll-import/joomla.rb +0 -53
  40. data/lib/jekyll/jekyll-import/marley.rb +0 -52
  41. data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
  42. data/lib/jekyll/jekyll-import/mt.rb +0 -142
  43. data/lib/jekyll/jekyll-import/posterous.rb +0 -122
  44. data/lib/jekyll/jekyll-import/rss.rb +0 -63
  45. data/lib/jekyll/jekyll-import/s9y.rb +0 -59
  46. data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
  47. data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
  48. data/lib/jekyll/jekyll-import/typo.rb +0 -69
  49. data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
  50. data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84
@@ -0,0 +1,71 @@
1
+ # Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
2
+ # Use at your own risk. The end.
3
+ #
4
+ # Usage:
5
+ # (URL)
6
+ # ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => 'http://yourdomain.com/your-favorite-feed.xml')"
7
+ #
8
+ # (Local file)
9
+ # ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => './somefile/on/your/computer.xml')"
10
+
11
+ module JekyllImport
12
+ module Importers
13
+ class RSS < Importer
14
+ def self.specify_options(c)
15
+ c.option 'file', '--file NAME', 'The RSS file to import'
16
+ end
17
+
18
+ def self.validate(options)
19
+ if options['source'].nil?
20
+ abort "Missing mandatory option --source."
21
+ end
22
+ end
23
+
24
+ def self.require_deps
25
+ JekyllImport.require_with_fallback(%w[
26
+ rss/1.0
27
+ rss/2.0
28
+ open-uri
29
+ fileutils
30
+ safe_yaml
31
+ ])
32
+ end
33
+
34
+ # Process the import.
35
+ #
36
+ # source - a URL or a local file String.
37
+ #
38
+ # Returns nothing.
39
+ def self.process(options)
40
+ source = options.fetch('file')
41
+
42
+ content = ""
43
+ open(source) { |s| content = s.read }
44
+ rss = ::RSS::Parser.parse(content, false)
45
+
46
+ raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
47
+
48
+ rss.items.each do |item|
49
+ formatted_date = item.date.strftime('%Y-%m-%d')
50
+ post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
51
+ i.downcase if i != ''
52
+ end.compact.join('-')
53
+ name = "#{formatted_date}-#{post_name}"
54
+
55
+ header = {
56
+ 'layout' => 'post',
57
+ 'title' => item.title
58
+ }
59
+
60
+ FileUtils.mkdir_p("_posts")
61
+
62
+ File.open("_posts/#{name}.html", "w") do |f|
63
+ f.puts header.to_yaml
64
+ f.puts "---\n\n"
65
+ f.puts item.description
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,67 @@
1
+ # Migrator to import entries from an Serendipity (S9Y) blog
2
+ #
3
+ # Entries can be exported from http://blog.example.com/rss.php?version=2.0&all=1
4
+ #
5
+ # Usage:
6
+ # ruby -r './s9y_rss.rb' -e 'Jekyll::S9Y.process("http://blog.example.com/rss.php?version=2.0&all=1")'
7
+
8
+ module JekyllImport
9
+ module Importers
10
+ class S9Y < Importer
11
+ def self.specify_options(c)
12
+ c.option 'source', '--source SOURCE', 'The URL of the S9Y RSS feed'
13
+ end
14
+
15
+ def self.validate(options)
16
+ if options['source'].nil?
17
+ abort "Missing mandatory option --source, e.g. --source \"http://blog.example.com/rss.php?version=2.0&all=1\""
18
+ end
19
+ end
20
+
21
+ def self.require_deps
22
+ JekyllImport.require_with_fallback(%w[
23
+ open-uri
24
+ rss
25
+ fileutils
26
+ safe_yaml
27
+ ])
28
+ end
29
+
30
+ def self.process(options)
31
+ source = options.fetch(:source)
32
+
33
+ FileUtils.mkdir_p("_posts")
34
+
35
+ text = ''
36
+ open(source) { |line| text = line.read }
37
+ rss = RSS::Parser.parse(text)
38
+
39
+ rss.items.each do |item|
40
+ post_url = item.link.match('.*(/archives/.*)')[1]
41
+ categories = item.categories.collect { |c| c.content }
42
+ content = item.content_encoded.strip
43
+ date = item.date
44
+ slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
45
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
46
+ slug]
47
+
48
+ data = {
49
+ 'layout' => 'post',
50
+ 'title' => item.title,
51
+ 'categories' => categories,
52
+ 'permalink' => post_url,
53
+ 's9y_link' => item.link,
54
+ 'date' => item.date,
55
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
56
+
57
+ # Write out the data and content to file
58
+ File.open("_posts/#{name}", "w") do |f|
59
+ f.puts data
60
+ f.puts "---"
61
+ f.puts content
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,76 @@
1
+ # NOTE: This converter requires Sequel and the MySQL gems.
2
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
3
+ # installed, running the following commands should work:
4
+ # $ sudo gem install sequel
5
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
6
+
7
+ module JekyllImport
8
+ module Importers
9
+ class TextPattern < Importer
10
+ # Reads a MySQL database via Sequel and creates a post file for each post.
11
+ # The only posts selected are those with a status of 4 or 5, which means
12
+ # "live" and "sticky" respectively.
13
+ # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
14
+ QUERY = "SELECT Title, \
15
+ url_title, \
16
+ Posted, \
17
+ Body, \
18
+ Keywords \
19
+ FROM textpattern \
20
+ WHERE Status = '4' OR \
21
+ Status = '5'"
22
+
23
+ def self.require_deps
24
+ JekyllImport.require_with_fallback(%w[
25
+ rubygems
26
+ sequel
27
+ fileutils
28
+ safe_yaml
29
+ ])
30
+ end
31
+
32
+ def self.specify_options(c)
33
+ c.option 'dbname', '--dbname DB', 'Database name'
34
+ c.option 'user', '--user USER', 'Database user name'
35
+ c.option 'password', '--password PW', "Database user's password"
36
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
37
+ end
38
+
39
+ def self.process(options)
40
+ dbname = options.fetch('dbname')
41
+ user = options.fetch('user')
42
+ pass = options.fetch('password')
43
+ host = options.fetch('host', "localhost")
44
+
45
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
46
+
47
+ FileUtils.mkdir_p "_posts"
48
+
49
+ db[QUERY].each do |post|
50
+ # Get required fields and construct Jekyll compatible name.
51
+ title = post[:Title]
52
+ slug = post[:url_title]
53
+ date = post[:Posted]
54
+ content = post[:Body]
55
+
56
+ name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
57
+
58
+ # Get the relevant fields as a hash, delete empty fields and convert
59
+ # to YAML for the header.
60
+ data = {
61
+ 'layout' => 'post',
62
+ 'title' => title.to_s,
63
+ 'tags' => post[:Keywords].split(',')
64
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
65
+
66
+ # Write out the data and content to file.
67
+ File.open("_posts/#{name}", "w") do |f|
68
+ f.puts data
69
+ f.puts "---"
70
+ f.puts content
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,265 @@
1
+ module JekyllImport
2
+ module Importers
3
+ class Tumblr < Importer
4
+ def self.require_deps
5
+ JekyllImport.require_with_fallback(%w[
6
+ rubygems
7
+ fileutils
8
+ open-uri
9
+ nokogiri
10
+ json
11
+ uri
12
+ time
13
+ jekyll
14
+ ])
15
+ end
16
+
17
+ def self.specify_options(c)
18
+ c.option 'url', '--url URL', 'Tumblr URL'
19
+ c.option 'format', '--format FORMAT', 'Output format (default: "html")'
20
+ c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
21
+ c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
22
+ c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
23
+ end
24
+
25
+ def self.process(options)
26
+ url = options.fetch('url')
27
+ format = options.fetch('format', "html")
28
+ grab_images = options.fetch('grab_images', false)
29
+ add_highlights = options.fetch('add_highlights', false)
30
+ rewrite_urls = options.fetch('rewrite_urls', false)
31
+
32
+ @grab_images = grab_images
33
+ FileUtils.mkdir_p "_posts/tumblr"
34
+ url += "/api/read/json/"
35
+ per_page = 50
36
+ posts = []
37
+ # Two passes are required so that we can rewrite URLs.
38
+ # First pass builds up an array of each post as a hash.
39
+ begin
40
+ current_page = (current_page || -1) + 1
41
+ feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
42
+ puts "Fetching #{feed_url}"
43
+ feed = open(feed_url)
44
+ json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars.
45
+ blog = JSON.parse(json)
46
+ puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
47
+ batch = blog["posts"].map { |post| post_to_hash(post, format) }
48
+
49
+ # If we're rewriting, save the posts for later. Otherwise, go ahead and
50
+ # dump these to disk now
51
+ if rewrite_urls
52
+ posts += batch
53
+ else
54
+ batch.each {|post| write_post(post, format == "md", add_highlights)}
55
+ end
56
+
57
+ end until blog["posts"].size < per_page
58
+
59
+ # Rewrite URLs, create redirects and write out out posts if necessary
60
+ if rewrite_urls
61
+ posts = rewrite_urls_and_redirects posts
62
+ posts.each {|post| write_post(post, format == "md", add_highlights)}
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ # Writes a post out to disk
69
+ def self.write_post(post, use_markdown, add_highlights)
70
+ content = post[:content]
71
+ if use_markdown
72
+ content = html_to_markdown content
73
+ content = add_syntax_highlights content if add_highlights
74
+ end
75
+
76
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
77
+ f.puts post[:header].to_yaml + "---\n" + content
78
+ end
79
+ end
80
+
81
+ # Converts each type of Tumblr post to a hash with all required
82
+ # data for Jekyll.
83
+ def self.post_to_hash(post, format)
84
+ case post['type']
85
+ when "regular"
86
+ title = post["regular-title"]
87
+ content = post["regular-body"]
88
+ when "link"
89
+ title = post["link-text"] || post["link-url"]
90
+ content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
91
+ unless post["link-description"].nil?
92
+ content << "<br/>" + post["link-description"]
93
+ end
94
+ when "photo"
95
+ title = post["photo-caption"]
96
+ content = if post["photo-link-url"].nil?
97
+ "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
98
+ else
99
+ fetch_photo post
100
+ end
101
+ when "audio"
102
+ if !post["id3-title"].nil?
103
+ title = post["id3-title"]
104
+ content = post["audio-player"] + "<br/>" + post["audio-caption"]
105
+ else
106
+ title = post["audio-caption"]
107
+ content = post["audio-player"]
108
+ end
109
+ when "quote"
110
+ title = post["quote-text"]
111
+ content = "<blockquote>#{post["quote-text"]}</blockquote>"
112
+ unless post["quote-source"].nil?
113
+ content << "&#8212;" + post["quote-source"]
114
+ end
115
+ when "conversation"
116
+ title = post["conversation-title"]
117
+ content = "<section><dialog>"
118
+ post["conversation"].each do |line|
119
+ content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
120
+ end
121
+ content << "</section></dialog>"
122
+ when "video"
123
+ title = post["video-title"]
124
+ content = post["video-player"]
125
+ unless post["video-caption"].nil?
126
+ content << "<br/>" + post["video-caption"]
127
+ end
128
+ end
129
+ date = Date.parse(post['date']).to_s
130
+ title = Nokogiri::HTML(title).text
131
+ slug = if post["slug"] && post["slug"].strip != ""
132
+ post["slug"]
133
+ else
134
+ slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
135
+ slug.length > 200 ? slug.slice(0..200) : slug
136
+ end
137
+ {
138
+ :name => "#{date}-#{slug}.#{format}",
139
+ :header => {
140
+ "layout" => "post",
141
+ "title" => title,
142
+ "date" => Time.parse(post['date']).xmlschema,
143
+ "tags" => post["tags"],
144
+ "tumblr_url" => post["url-with-slug"]
145
+ },
146
+ :content => content,
147
+ :url => post["url"],
148
+ :slug => post["url-with-slug"],
149
+ }
150
+ end
151
+
152
+ # Attempts to fetch the largest version of a photo available for a post.
153
+ # If that file fails, it tries the next smaller size until all available
154
+ # photo URLs are exhausted. If they all fail, the import is aborted.
155
+ def self.fetch_photo(post)
156
+ sizes = post.keys.map {|k| k.gsub("photo-url-", "").to_i}
157
+ sizes.sort! {|a,b| b <=> a}
158
+
159
+ ext_key, ext_val = post.find do |k,v|
160
+ k =~ /^photo-url-/ && v.split("/").last =~ /\./
161
+ end
162
+ ext = "." + ext_val.split(".").last
163
+
164
+ sizes.each do |size|
165
+ url = post["photo-url"] || post["photo-url-#{size}"]
166
+ next if url.nil?
167
+ begin
168
+ return "<img src=\"#{save_photo(url, ext)}\"/>"
169
+ rescue OpenURI::HTTPError => err
170
+ puts "Failed to grab photo"
171
+ end
172
+ end
173
+
174
+ abort "Failed to fetch photo for post #{post['url']}"
175
+ end
176
+
177
+ # Create a Hash of old urls => new urls, for rewriting and
178
+ # redirects, and replace urls in each post. Instantiate Jekyll
179
+ # site/posts to get the correct permalink format.
180
+ def self.rewrite_urls_and_redirects(posts)
181
+ site = Jekyll::Site.new(Jekyll.configuration({}))
182
+ urls = Hash[posts.map { |post|
183
+ # Create an initial empty file for the post so that
184
+ # we can instantiate a post object.
185
+ File.open("_posts/tumblr/#{post[:name]}", "w")
186
+ tumblr_url = URI.parse(post[:slug]).path
187
+ jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
188
+ redirect_dir = tumblr_url.sub(/\//, "") + "/"
189
+ FileUtils.mkdir_p redirect_dir
190
+ File.open(redirect_dir + "index.html", "w") do |f|
191
+ f.puts "<html><head><link rel=\"canonical\" href=\"" +
192
+ "#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
193
+ "url=#{jekyll_url}\"></head><body></body></html>"
194
+ end
195
+ [tumblr_url, jekyll_url]
196
+ }]
197
+ posts.map { |post|
198
+ urls.each do |tumblr_url, jekyll_url|
199
+ post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
200
+ end
201
+ post
202
+ }
203
+ end
204
+
205
+ # Convert preserving HTML tables as per the markdown docs.
206
+ def self.html_to_markdown(content)
207
+ preserve = ["table", "tr", "th", "td"]
208
+ preserve.each do |tag|
209
+ content.gsub!(/<#{tag}/i, "$$" + tag)
210
+ content.gsub!(/<\/#{tag}/i, "||" + tag)
211
+ end
212
+ content = Nokogiri::HTML(content.gsub("'", "''")).text
213
+ preserve.each do |tag|
214
+ content.gsub!("$$" + tag, "<" + tag)
215
+ content.gsub!("||" + tag, "</" + tag)
216
+ end
217
+ content
218
+ end
219
+
220
+ # Adds pygments highlight tags to code blocks in posts that use
221
+ # markdown format. This doesn't guess the language of the code
222
+ # block, so you should modify this to suit your own content.
223
+ # For example, my code block only contain Python and JavaScript,
224
+ # so I can assume the block is JavaScript if it contains a
225
+ # semi-colon.
226
+ def self.add_syntax_highlights(content)
227
+ lines = content.split("\n")
228
+ block, indent, lang, start = false, /^ /, nil, nil
229
+ lines.each_with_index do |line, i|
230
+ if !block && line =~ indent
231
+ block = true
232
+ lang = "python"
233
+ start = i
234
+ elsif block
235
+ lang = "javascript" if line =~ /;$/
236
+ block = line =~ indent && i < lines.size - 1 # Also handle EOF
237
+ if !block
238
+ lines[start] = "{% highlight #{lang} %}"
239
+ lines[i - 1] = "{% endhighlight %}"
240
+ end
241
+ FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
242
+ lines[i] = lines[i].sub(indent, "")
243
+ end
244
+ end
245
+ lines.join("\n")
246
+ end
247
+
248
+ def self.save_photo(url, ext)
249
+ if @grab_images
250
+ path = "tumblr_files/#{url.split('/').last}"
251
+ path += ext unless path =~ /#{ext}$/
252
+ FileUtils.mkdir_p "tumblr_files"
253
+
254
+ # Don't fetch if we've already cached this file
255
+ unless File.size? path
256
+ puts "Fetching photo #{url}"
257
+ File.open(path, "w") { |f| f.write(open(url).read) }
258
+ end
259
+ url = "/" + path
260
+ end
261
+ url
262
+ end
263
+ end
264
+ end
265
+ end