jekyll-import 0.1.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ require 'rubygems'
2
+ require 'jekyll'
3
+ require 'fileutils'
4
+ require 'net/http'
5
+ require 'uri'
6
+ require "json"
7
+
8
+ # ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key, blog)'
9
+
10
+ module JekyllImport
11
+ module Posterous
12
+ def self.fetch(uri_str, limit = 10)
13
+ # You should choose better exception.
14
+ raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
15
+
16
+ response = nil
17
+ Net::HTTP.start('posterous.com') do |http|
18
+ req = Net::HTTP::Get.new(uri_str)
19
+ req.basic_auth @email, @pass
20
+ response = http.request(req)
21
+ end
22
+
23
+ case response
24
+ when Net::HTTPSuccess then response
25
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
26
+ else response.error!
27
+ end
28
+ end
29
+
30
+ def self.fetch_images(directory, imgs)
31
+ def self.fetch_one(url, limit = 10)
32
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
33
+ response = Net::HTTP.get_response(URI.parse(url))
34
+ case response
35
+ when Net::HTTPSuccess then response.body
36
+ when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
37
+ else
38
+ response.error!
39
+ end
40
+ end
41
+
42
+ FileUtils.mkdir_p directory
43
+ urls = Array.new
44
+ imgs.each do |img|
45
+ fullurl = img["full"]["url"]
46
+ uri = URI.parse(fullurl)
47
+ imgname = uri.path.split("/")[-1]
48
+ imgdata = self.fetch_one(fullurl)
49
+ open(directory + "/" + imgname, "wb") do |file|
50
+ file.write imgdata
51
+ end
52
+ urls.push(directory + "/" + imgname)
53
+ end
54
+
55
+ return urls
56
+ end
57
+
58
+ def self.process(email, pass, api_token, blog = 'primary', base_path = '/')
59
+ @email, @pass, @api_token = email, pass, api_token
60
+ FileUtils.mkdir_p "_posts"
61
+
62
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body)
63
+ page = 1
64
+
65
+ while posts.any?
66
+ posts.each do |post|
67
+ title = post["title"]
68
+ slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
69
+ date = Date.parse(post["display_date"])
70
+ content = post["body_html"]
71
+ published = !post["is_private"]
72
+ basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
73
+ name = basename + '.html'
74
+
75
+ # Images:
76
+ post_imgs = post["media"]["images"]
77
+ if post_imgs.any?
78
+ img_dir = "imgs/%s" % basename
79
+ img_urls = self.fetch_images(img_dir, post_imgs)
80
+
81
+ img_urls.map! do |url|
82
+ '<li><img src="' + base_path + url + '"></li>'
83
+ end
84
+ imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
85
+
86
+ # filter out "posterous-content", replacing with imgs:
87
+ content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
88
+ end
89
+
90
+ # Get the relevant fields as a hash, delete empty fields and convert
91
+ # to YAML for the header
92
+ data = {
93
+ 'layout' => 'post',
94
+ 'title' => title.to_s,
95
+ 'published' => published
96
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
97
+
98
+ # Write out the data and content to file
99
+ File.open("_posts/#{name}", "w") do |f|
100
+ f.puts data
101
+ f.puts "---"
102
+ f.puts content
103
+ end
104
+ end
105
+
106
+ page += 1
107
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body)
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,63 @@
1
+ # Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
2
+ # Use at your own risk. The end.
3
+ #
4
+ # Usage:
5
+ # (URL)
6
+ # ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('http://yourdomain.com/your-favorite-feed.xml')"
7
+ #
8
+ # (Local file)
9
+ # ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('./somefile/on/your/computer.xml')"
10
+
11
+ require 'rss/1.0'
12
+ require 'rss/2.0'
13
+ require 'open-uri'
14
+ require 'fileutils'
15
+ require 'safe_yaml'
16
+
17
+ module JekyllImport
18
+ module RSS
19
+ def self.validate(options)
20
+ if !options[:source]
21
+ abort "Missing mandatory option --source."
22
+ end
23
+ end
24
+
25
+ # Process the import.
26
+ #
27
+ # source - a URL or a local file String.
28
+ #
29
+ # Returns nothing.
30
+ def self.process(options)
31
+ validate(options)
32
+
33
+ source = options[:source]
34
+
35
+ content = ""
36
+ open(source) { |s| content = s.read }
37
+ rss = ::RSS::Parser.parse(content, false)
38
+
39
+ raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
40
+
41
+ rss.items.each do |item|
42
+ formatted_date = item.date.strftime('%Y-%m-%d')
43
+ post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
44
+ i.downcase if i != ''
45
+ end.compact.join('-')
46
+ name = "#{formatted_date}-#{post_name}"
47
+
48
+ header = {
49
+ 'layout' => 'post',
50
+ 'title' => item.title
51
+ }
52
+
53
+ FileUtils.mkdir_p("_posts")
54
+
55
+ File.open("_posts/#{name}.html", "w") do |f|
56
+ f.puts header.to_yaml
57
+ f.puts "---\n\n"
58
+ f.puts item.description
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,49 @@
1
+ # Migrator to import entries from an Serendipity (S9Y) blog
2
+ #
3
+ # Entries can be exported from http://blog.example.com/rss.php?version=2.0&all=1
4
+ #
5
+ # Usage:
6
+ # ruby -r './s9y_rss.rb' -e 'Jekyll::S9Y.process("http://blog.example.com/rss.php?version=2.0&all=1")'
7
+
8
+ require 'open-uri'
9
+ require 'rss'
10
+ require 'fileutils'
11
+ require 'yaml'
12
+
13
+ module JekyllImport
14
+ module S9Y
15
+ def self.process(file_name)
16
+ FileUtils.mkdir_p("_posts")
17
+
18
+ text = ''
19
+ open(file_name, 'r') { |line| text = line.read }
20
+ rss = RSS::Parser.parse(text)
21
+
22
+ rss.items.each do |item|
23
+ post_url = item.link.match('.*(/archives/.*)')[1]
24
+ categories = item.categories.collect { |c| c.content }
25
+ content = item.content_encoded.strip
26
+ date = item.date
27
+ slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
28
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
29
+ slug]
30
+
31
+ data = {
32
+ 'layout' => 'post',
33
+ 'title' => item.title,
34
+ 'categories' => categories,
35
+ 'permalink' => post_url,
36
+ 's9y_link' => item.link,
37
+ 'date' => item.date,
38
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
39
+
40
+ # Write out the data and content to file
41
+ File.open("_posts/#{name}", "w") do |f|
42
+ f.puts data
43
+ f.puts "---"
44
+ f.puts content
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'sequel'
3
+ require 'fileutils'
4
+ require 'safe_yaml'
5
+
6
+ # NOTE: This converter requires Sequel and the MySQL gems.
7
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
8
+ # installed, running the following commands should work:
9
+ # $ sudo gem install sequel
10
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
11
+
12
+ module JekyllImport
13
+ module TextPattern
14
+ # Reads a MySQL database via Sequel and creates a post file for each post.
15
+ # The only posts selected are those with a status of 4 or 5, which means
16
+ # "live" and "sticky" respectively.
17
+ # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
18
+ QUERY = "SELECT Title, \
19
+ url_title, \
20
+ Posted, \
21
+ Body, \
22
+ Keywords \
23
+ FROM textpattern \
24
+ WHERE Status = '4' OR \
25
+ Status = '5'"
26
+
27
+ def self.process(dbname, user, pass, host = 'localhost')
28
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
29
+
30
+ FileUtils.mkdir_p "_posts"
31
+
32
+ db[QUERY].each do |post|
33
+ # Get required fields and construct Jekyll compatible name.
34
+ title = post[:Title]
35
+ slug = post[:url_title]
36
+ date = post[:Posted]
37
+ content = post[:Body]
38
+
39
+ name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
40
+
41
+ # Get the relevant fields as a hash, delete empty fields and convert
42
+ # to YAML for the header.
43
+ data = {
44
+ 'layout' => 'post',
45
+ 'title' => title.to_s,
46
+ 'tags' => post[:Keywords].split(',')
47
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
48
+
49
+ # Write out the data and content to file.
50
+ File.open("_posts/#{name}", "w") do |f|
51
+ f.puts data
52
+ f.puts "---"
53
+ f.puts content
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,195 @@
1
+ require 'rubygems'
2
+ require 'open-uri'
3
+ require 'fileutils'
4
+ require 'nokogiri'
5
+ require 'date'
6
+ require 'json'
7
+ require 'uri'
8
+ require 'jekyll'
9
+
10
+ module JekyllImport
11
+ module Tumblr
12
+ def self.process(url, format = "html", grab_images = false,
13
+ add_highlights = false, rewrite_urls = true)
14
+ @grab_images = grab_images
15
+ FileUtils.mkdir_p "_posts/tumblr"
16
+ url += "/api/read/json/"
17
+ per_page = 50
18
+ posts = []
19
+ # Two passes are required so that we can rewrite URLs.
20
+ # First pass builds up an array of each post as a hash.
21
+ begin
22
+ current_page = (current_page || -1) + 1
23
+ feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}")
24
+ json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars.
25
+ blog = JSON.parse(json)
26
+ puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
27
+ posts += blog["posts"].map { |post| post_to_hash(post, format) }
28
+ end until blog["posts"].size < per_page
29
+ # Rewrite URLs and create redirects.
30
+ posts = rewrite_urls_and_redirects posts if rewrite_urls
31
+ # Second pass for writing post files.
32
+ posts.each do |post|
33
+ if format == "md"
34
+ post[:content] = html_to_markdown post[:content]
35
+ post[:content] = add_syntax_highlights post[:content] if add_highlights
36
+ end
37
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
38
+ f.puts post[:header].to_yaml + "---\n" + post[:content]
39
+ end
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ # Converts each type of Tumblr post to a hash with all required
46
+ # data for Jekyll.
47
+ def self.post_to_hash(post, format)
48
+ case post['type']
49
+ when "regular"
50
+ title = post["regular-title"]
51
+ content = post["regular-body"]
52
+ when "link"
53
+ title = post["link-text"] || post["link-url"]
54
+ content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
55
+ unless post["link-description"].nil?
56
+ content << "<br/>" + post["link-description"]
57
+ end
58
+ when "photo"
59
+ title = post["photo-caption"]
60
+ max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
61
+ url = post["photo-url"] || post["photo-url-#{max_size}"]
62
+ ext = "." + post[post.keys.select { |k|
63
+ k =~ /^photo-url-/ && post[k].split("/").last =~ /\./
64
+ }.first].split(".").last
65
+ content = "<img src=\"#{save_file(url, ext)}\"/>"
66
+ unless post["photo-link-url"].nil?
67
+ content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
68
+ end
69
+ when "audio"
70
+ if !post["id3-title"].nil?
71
+ title = post["id3-title"]
72
+ content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
73
+ else
74
+ title = post["audio-caption"]
75
+ content = post.at["audio-player"]
76
+ end
77
+ when "quote"
78
+ title = post["quote-text"]
79
+ content = "<blockquote>#{post["quote-text"]}</blockquote>"
80
+ unless post["quote-source"].nil?
81
+ content << "&#8212;" + post["quote-source"]
82
+ end
83
+ when "conversation"
84
+ title = post["conversation-title"]
85
+ content = "<section><dialog>"
86
+ post["conversation"].each do |line|
87
+ content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
88
+ end
89
+ content << "</section></dialog>"
90
+ when "video"
91
+ title = post["video-title"]
92
+ content = post["video-player"]
93
+ unless post["video-caption"].nil?
94
+ content << "<br/>" + post["video-caption"]
95
+ end
96
+ end
97
+ date = Date.parse(post['date']).to_s
98
+ title = Nokogiri::HTML(title).text
99
+ slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
100
+ slug = slug.slice(0..200) if slug.length > 200
101
+ {
102
+ :name => "#{date}-#{slug}.#{format}",
103
+ :header => {
104
+ "layout" => "post",
105
+ "title" => title,
106
+ "tags" => post["tags"],
107
+ },
108
+ :content => content,
109
+ :url => post["url"],
110
+ :slug => post["url-with-slug"],
111
+ }
112
+ end
113
+
114
+ # Create a Hash of old urls => new urls, for rewriting and
115
+ # redirects, and replace urls in each post. Instantiate Jekyll
116
+ # site/posts to get the correct permalink format.
117
+ def self.rewrite_urls_and_redirects(posts)
118
+ site = Jekyll::Site.new(Jekyll.configuration({}))
119
+ urls = Hash[posts.map { |post|
120
+ # Create an initial empty file for the post so that
121
+ # we can instantiate a post object.
122
+ File.open("_posts/tumblr/#{post[:name]}", "w")
123
+ tumblr_url = URI.parse(post[:slug]).path
124
+ jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
125
+ redirect_dir = tumblr_url.sub(/\//, "") + "/"
126
+ FileUtils.mkdir_p redirect_dir
127
+ File.open(redirect_dir + "index.html", "w") do |f|
128
+ f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
129
+ "url=#{jekyll_url}'></head><body></body></html>"
130
+ end
131
+ [tumblr_url, jekyll_url]
132
+ }]
133
+ posts.map { |post|
134
+ urls.each do |tumblr_url, jekyll_url|
135
+ post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
136
+ end
137
+ post
138
+ }
139
+ end
140
+
141
+ # Uses Python's html2text to convert a post's content to
142
+ # markdown. Preserve HTML tables as per the markdown docs.
143
+ def self.html_to_markdown(content)
144
+ preserve = ["table", "tr", "th", "td"]
145
+ preserve.each do |tag|
146
+ content.gsub!(/<#{tag}/i, "$$" + tag)
147
+ content.gsub!(/<\/#{tag}/i, "||" + tag)
148
+ end
149
+ content = %x[echo '#{content.gsub("'", "''")}' | html2text]
150
+ preserve.each do |tag|
151
+ content.gsub!("$$" + tag, "<" + tag)
152
+ content.gsub!("||" + tag, "</" + tag)
153
+ end
154
+ content
155
+ end
156
+
157
+ # Adds pygments highlight tags to code blocks in posts that use
158
+ # markdown format. This doesn't guess the language of the code
159
+ # block, so you should modify this to suit your own content.
160
+ # For example, my code block only contain Python and JavaScript,
161
+ # so I can assume the block is JavaScript if it contains a
162
+ # semi-colon.
163
+ def self.add_syntax_highlights(content)
164
+ lines = content.split("\n")
165
+ block, indent, lang, start = false, /^ /, nil, nil
166
+ lines.each_with_index do |line, i|
167
+ if !block && line =~ indent
168
+ block = true
169
+ lang = "python"
170
+ start = i
171
+ elsif block
172
+ lang = "javascript" if line =~ /;$/
173
+ block = line =~ indent && i < lines.size - 1 # Also handle EOF
174
+ if !block
175
+ lines[start] = "{% highlight #{lang} %}"
176
+ lines[i - 1] = "{% endhighlight %}"
177
+ end
178
+ lines[i] = lines[i].sub(indent, "")
179
+ end
180
+ end
181
+ lines.join("\n")
182
+ end
183
+
184
+ def self.save_file(url, ext)
185
+ if @grab_images
186
+ path = "tumblr_files/#{url.split('/').last}"
187
+ path += ext unless path =~ /#{ext}$/
188
+ FileUtils.mkdir_p "tumblr_files"
189
+ File.open(path, "w") { |f| f.write(open(url).read) }
190
+ url = "/" + path
191
+ end
192
+ url
193
+ end
194
+ end
195
+ end