jekyll-import 0.1.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,111 @@
1
+ require 'rubygems'
2
+ require 'jekyll'
3
+ require 'fileutils'
4
+ require 'net/http'
5
+ require 'uri'
6
+ require "json"
7
+
8
+ # ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key, blog)'
9
+
10
+ module JekyllImport
11
+ module Posterous
12
+ def self.fetch(uri_str, limit = 10)
13
+ # You should choose better exception.
14
+ raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
15
+
16
+ response = nil
17
+ Net::HTTP.start('posterous.com') do |http|
18
+ req = Net::HTTP::Get.new(uri_str)
19
+ req.basic_auth @email, @pass
20
+ response = http.request(req)
21
+ end
22
+
23
+ case response
24
+ when Net::HTTPSuccess then response
25
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
26
+ else response.error!
27
+ end
28
+ end
29
+
30
+ def self.fetch_images(directory, imgs)
31
+ def self.fetch_one(url, limit = 10)
32
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
33
+ response = Net::HTTP.get_response(URI.parse(url))
34
+ case response
35
+ when Net::HTTPSuccess then response.body
36
+ when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
37
+ else
38
+ response.error!
39
+ end
40
+ end
41
+
42
+ FileUtils.mkdir_p directory
43
+ urls = Array.new
44
+ imgs.each do |img|
45
+ fullurl = img["full"]["url"]
46
+ uri = URI.parse(fullurl)
47
+ imgname = uri.path.split("/")[-1]
48
+ imgdata = self.fetch_one(fullurl)
49
+ open(directory + "/" + imgname, "wb") do |file|
50
+ file.write imgdata
51
+ end
52
+ urls.push(directory + "/" + imgname)
53
+ end
54
+
55
+ return urls
56
+ end
57
+
58
+ def self.process(email, pass, api_token, blog = 'primary', base_path = '/')
59
+ @email, @pass, @api_token = email, pass, api_token
60
+ FileUtils.mkdir_p "_posts"
61
+
62
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body)
63
+ page = 1
64
+
65
+ while posts.any?
66
+ posts.each do |post|
67
+ title = post["title"]
68
+ slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
69
+ date = Date.parse(post["display_date"])
70
+ content = post["body_html"]
71
+ published = !post["is_private"]
72
+ basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
73
+ name = basename + '.html'
74
+
75
+ # Images:
76
+ post_imgs = post["media"]["images"]
77
+ if post_imgs.any?
78
+ img_dir = "imgs/%s" % basename
79
+ img_urls = self.fetch_images(img_dir, post_imgs)
80
+
81
+ img_urls.map! do |url|
82
+ '<li><img src="' + base_path + url + '"></li>'
83
+ end
84
+ imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
85
+
86
+ # filter out "posterous-content", replacing with imgs:
87
+ content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
88
+ end
89
+
90
+ # Get the relevant fields as a hash, delete empty fields and convert
91
+ # to YAML for the header
92
+ data = {
93
+ 'layout' => 'post',
94
+ 'title' => title.to_s,
95
+ 'published' => published
96
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
97
+
98
+ # Write out the data and content to file
99
+ File.open("_posts/#{name}", "w") do |f|
100
+ f.puts data
101
+ f.puts "---"
102
+ f.puts content
103
+ end
104
+ end
105
+
106
+ page += 1
107
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body)
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,63 @@
1
+ # Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
2
+ # Use at your own risk. The end.
3
+ #
4
+ # Usage:
5
+ # (URL)
6
+ # ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('http://yourdomain.com/your-favorite-feed.xml')"
7
+ #
8
+ # (Local file)
9
+ # ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('./somefile/on/your/computer.xml')"
10
+
11
+ require 'rss/1.0'
12
+ require 'rss/2.0'
13
+ require 'open-uri'
14
+ require 'fileutils'
15
+ require 'safe_yaml'
16
+
17
+ module JekyllImport
18
+ module RSS
19
+ def self.validate(options)
20
+ if !options[:source]
21
+ abort "Missing mandatory option --source."
22
+ end
23
+ end
24
+
25
+ # Process the import.
26
+ #
27
+ # source - a URL or a local file String.
28
+ #
29
+ # Returns nothing.
30
+ def self.process(options)
31
+ validate(options)
32
+
33
+ source = options[:source]
34
+
35
+ content = ""
36
+ open(source) { |s| content = s.read }
37
+ rss = ::RSS::Parser.parse(content, false)
38
+
39
+ raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
40
+
41
+ rss.items.each do |item|
42
+ formatted_date = item.date.strftime('%Y-%m-%d')
43
+ post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
44
+ i.downcase if i != ''
45
+ end.compact.join('-')
46
+ name = "#{formatted_date}-#{post_name}"
47
+
48
+ header = {
49
+ 'layout' => 'post',
50
+ 'title' => item.title
51
+ }
52
+
53
+ FileUtils.mkdir_p("_posts")
54
+
55
+ File.open("_posts/#{name}.html", "w") do |f|
56
+ f.puts header.to_yaml
57
+ f.puts "---\n\n"
58
+ f.puts item.description
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,49 @@
1
+ # Migrator to import entries from an Serendipity (S9Y) blog
2
+ #
3
+ # Entries can be exported from http://blog.example.com/rss.php?version=2.0&all=1
4
+ #
5
+ # Usage:
6
+ # ruby -r './s9y_rss.rb' -e 'Jekyll::S9Y.process("http://blog.example.com/rss.php?version=2.0&all=1")'
7
+
8
+ require 'open-uri'
9
+ require 'rss'
10
+ require 'fileutils'
11
+ require 'yaml'
12
+
13
+ module JekyllImport
14
+ module S9Y
15
+ def self.process(file_name)
16
+ FileUtils.mkdir_p("_posts")
17
+
18
+ text = ''
19
+ open(file_name, 'r') { |line| text = line.read }
20
+ rss = RSS::Parser.parse(text)
21
+
22
+ rss.items.each do |item|
23
+ post_url = item.link.match('.*(/archives/.*)')[1]
24
+ categories = item.categories.collect { |c| c.content }
25
+ content = item.content_encoded.strip
26
+ date = item.date
27
+ slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
28
+ name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
29
+ slug]
30
+
31
+ data = {
32
+ 'layout' => 'post',
33
+ 'title' => item.title,
34
+ 'categories' => categories,
35
+ 'permalink' => post_url,
36
+ 's9y_link' => item.link,
37
+ 'date' => item.date,
38
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
39
+
40
+ # Write out the data and content to file
41
+ File.open("_posts/#{name}", "w") do |f|
42
+ f.puts data
43
+ f.puts "---"
44
+ f.puts content
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'sequel'
3
+ require 'fileutils'
4
+ require 'safe_yaml'
5
+
6
+ # NOTE: This converter requires Sequel and the MySQL gems.
7
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
8
+ # installed, running the following commands should work:
9
+ # $ sudo gem install sequel
10
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
11
+
12
+ module JekyllImport
13
+ module TextPattern
14
+ # Reads a MySQL database via Sequel and creates a post file for each post.
15
+ # The only posts selected are those with a status of 4 or 5, which means
16
+ # "live" and "sticky" respectively.
17
+ # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
18
+ QUERY = "SELECT Title, \
19
+ url_title, \
20
+ Posted, \
21
+ Body, \
22
+ Keywords \
23
+ FROM textpattern \
24
+ WHERE Status = '4' OR \
25
+ Status = '5'"
26
+
27
+ def self.process(dbname, user, pass, host = 'localhost')
28
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
29
+
30
+ FileUtils.mkdir_p "_posts"
31
+
32
+ db[QUERY].each do |post|
33
+ # Get required fields and construct Jekyll compatible name.
34
+ title = post[:Title]
35
+ slug = post[:url_title]
36
+ date = post[:Posted]
37
+ content = post[:Body]
38
+
39
+ name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
40
+
41
+ # Get the relevant fields as a hash, delete empty fields and convert
42
+ # to YAML for the header.
43
+ data = {
44
+ 'layout' => 'post',
45
+ 'title' => title.to_s,
46
+ 'tags' => post[:Keywords].split(',')
47
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
48
+
49
+ # Write out the data and content to file.
50
+ File.open("_posts/#{name}", "w") do |f|
51
+ f.puts data
52
+ f.puts "---"
53
+ f.puts content
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,195 @@
1
+ require 'rubygems'
2
+ require 'open-uri'
3
+ require 'fileutils'
4
+ require 'nokogiri'
5
+ require 'date'
6
+ require 'json'
7
+ require 'uri'
8
+ require 'jekyll'
9
+
10
+ module JekyllImport
11
+ module Tumblr
12
+ def self.process(url, format = "html", grab_images = false,
13
+ add_highlights = false, rewrite_urls = true)
14
+ @grab_images = grab_images
15
+ FileUtils.mkdir_p "_posts/tumblr"
16
+ url += "/api/read/json/"
17
+ per_page = 50
18
+ posts = []
19
+ # Two passes are required so that we can rewrite URLs.
20
+ # First pass builds up an array of each post as a hash.
21
+ begin
22
+ current_page = (current_page || -1) + 1
23
+ feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}")
24
+ json = feed.readlines.join("\n")[21...-2] # Strip Tumblr's JSONP chars.
25
+ blog = JSON.parse(json)
26
+ puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
27
+ posts += blog["posts"].map { |post| post_to_hash(post, format) }
28
+ end until blog["posts"].size < per_page
29
+ # Rewrite URLs and create redirects.
30
+ posts = rewrite_urls_and_redirects posts if rewrite_urls
31
+ # Second pass for writing post files.
32
+ posts.each do |post|
33
+ if format == "md"
34
+ post[:content] = html_to_markdown post[:content]
35
+ post[:content] = add_syntax_highlights post[:content] if add_highlights
36
+ end
37
+ File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
38
+ f.puts post[:header].to_yaml + "---\n" + post[:content]
39
+ end
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ # Converts each type of Tumblr post to a hash with all required
46
+ # data for Jekyll.
47
+ def self.post_to_hash(post, format)
48
+ case post['type']
49
+ when "regular"
50
+ title = post["regular-title"]
51
+ content = post["regular-body"]
52
+ when "link"
53
+ title = post["link-text"] || post["link-url"]
54
+ content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
55
+ unless post["link-description"].nil?
56
+ content << "<br/>" + post["link-description"]
57
+ end
58
+ when "photo"
59
+ title = post["photo-caption"]
60
+ max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
61
+ url = post["photo-url"] || post["photo-url-#{max_size}"]
62
+ ext = "." + post[post.keys.select { |k|
63
+ k =~ /^photo-url-/ && post[k].split("/").last =~ /\./
64
+ }.first].split(".").last
65
+ content = "<img src=\"#{save_file(url, ext)}\"/>"
66
+ unless post["photo-link-url"].nil?
67
+ content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
68
+ end
69
+ when "audio"
70
+ if !post["id3-title"].nil?
71
+ title = post["id3-title"]
72
+ content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
73
+ else
74
+ title = post["audio-caption"]
75
+ content = post.at["audio-player"]
76
+ end
77
+ when "quote"
78
+ title = post["quote-text"]
79
+ content = "<blockquote>#{post["quote-text"]}</blockquote>"
80
+ unless post["quote-source"].nil?
81
+ content << "&#8212;" + post["quote-source"]
82
+ end
83
+ when "conversation"
84
+ title = post["conversation-title"]
85
+ content = "<section><dialog>"
86
+ post["conversation"].each do |line|
87
+ content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
88
+ end
89
+ content << "</section></dialog>"
90
+ when "video"
91
+ title = post["video-title"]
92
+ content = post["video-player"]
93
+ unless post["video-caption"].nil?
94
+ content << "<br/>" + post["video-caption"]
95
+ end
96
+ end
97
+ date = Date.parse(post['date']).to_s
98
+ title = Nokogiri::HTML(title).text
99
+ slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
100
+ slug = slug.slice(0..200) if slug.length > 200
101
+ {
102
+ :name => "#{date}-#{slug}.#{format}",
103
+ :header => {
104
+ "layout" => "post",
105
+ "title" => title,
106
+ "tags" => post["tags"],
107
+ },
108
+ :content => content,
109
+ :url => post["url"],
110
+ :slug => post["url-with-slug"],
111
+ }
112
+ end
113
+
114
+ # Create a Hash of old urls => new urls, for rewriting and
115
+ # redirects, and replace urls in each post. Instantiate Jekyll
116
+ # site/posts to get the correct permalink format.
117
+ def self.rewrite_urls_and_redirects(posts)
118
+ site = Jekyll::Site.new(Jekyll.configuration({}))
119
+ urls = Hash[posts.map { |post|
120
+ # Create an initial empty file for the post so that
121
+ # we can instantiate a post object.
122
+ File.open("_posts/tumblr/#{post[:name]}", "w")
123
+ tumblr_url = URI.parse(post[:slug]).path
124
+ jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
125
+ redirect_dir = tumblr_url.sub(/\//, "") + "/"
126
+ FileUtils.mkdir_p redirect_dir
127
+ File.open(redirect_dir + "index.html", "w") do |f|
128
+ f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
129
+ "url=#{jekyll_url}'></head><body></body></html>"
130
+ end
131
+ [tumblr_url, jekyll_url]
132
+ }]
133
+ posts.map { |post|
134
+ urls.each do |tumblr_url, jekyll_url|
135
+ post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
136
+ end
137
+ post
138
+ }
139
+ end
140
+
141
+ # Uses Python's html2text to convert a post's content to
142
+ # markdown. Preserve HTML tables as per the markdown docs.
143
+ def self.html_to_markdown(content)
144
+ preserve = ["table", "tr", "th", "td"]
145
+ preserve.each do |tag|
146
+ content.gsub!(/<#{tag}/i, "$$" + tag)
147
+ content.gsub!(/<\/#{tag}/i, "||" + tag)
148
+ end
149
+ content = %x[echo '#{content.gsub("'", "''")}' | html2text]
150
+ preserve.each do |tag|
151
+ content.gsub!("$$" + tag, "<" + tag)
152
+ content.gsub!("||" + tag, "</" + tag)
153
+ end
154
+ content
155
+ end
156
+
157
+ # Adds pygments highlight tags to code blocks in posts that use
158
+ # markdown format. This doesn't guess the language of the code
159
+ # block, so you should modify this to suit your own content.
160
+ # For example, my code block only contain Python and JavaScript,
161
+ # so I can assume the block is JavaScript if it contains a
162
+ # semi-colon.
163
+ def self.add_syntax_highlights(content)
164
+ lines = content.split("\n")
165
+ block, indent, lang, start = false, /^ /, nil, nil
166
+ lines.each_with_index do |line, i|
167
+ if !block && line =~ indent
168
+ block = true
169
+ lang = "python"
170
+ start = i
171
+ elsif block
172
+ lang = "javascript" if line =~ /;$/
173
+ block = line =~ indent && i < lines.size - 1 # Also handle EOF
174
+ if !block
175
+ lines[start] = "{% highlight #{lang} %}"
176
+ lines[i - 1] = "{% endhighlight %}"
177
+ end
178
+ lines[i] = lines[i].sub(indent, "")
179
+ end
180
+ end
181
+ lines.join("\n")
182
+ end
183
+
184
+ def self.save_file(url, ext)
185
+ if @grab_images
186
+ path = "tumblr_files/#{url.split('/').last}"
187
+ path += ext unless path =~ /#{ext}$/
188
+ FileUtils.mkdir_p "tumblr_files"
189
+ File.open(path, "w") { |f| f.write(open(url).read) }
190
+ url = "/" + path
191
+ end
192
+ url
193
+ end
194
+ end
195
+ end