jekyll-import 0.1.0.beta3 → 0.1.0.beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +6 -14
  2. data/History.markdown +18 -0
  3. data/README.markdown +12 -1
  4. data/jekyll-import.gemspec +31 -25
  5. data/lib/jekyll-import.rb +50 -1
  6. data/lib/jekyll-import/importer.rb +11 -0
  7. data/lib/jekyll-import/importers.rb +10 -0
  8. data/lib/jekyll-import/importers/csv.rb +50 -0
  9. data/lib/jekyll-import/importers/drupal6.rb +139 -0
  10. data/lib/jekyll-import/importers/drupal7.rb +102 -0
  11. data/lib/jekyll-import/importers/enki.rb +76 -0
  12. data/lib/jekyll-import/importers/google_reader.rb +68 -0
  13. data/lib/jekyll-import/importers/joomla.rb +83 -0
  14. data/lib/jekyll-import/importers/jrnl.rb +127 -0
  15. data/lib/jekyll-import/importers/marley.rb +72 -0
  16. data/lib/jekyll-import/importers/mephisto.rb +109 -0
  17. data/lib/jekyll-import/importers/mt.rb +169 -0
  18. data/lib/jekyll-import/importers/posterous.rb +139 -0
  19. data/lib/jekyll-import/importers/rss.rb +71 -0
  20. data/lib/jekyll-import/importers/s9y.rb +67 -0
  21. data/lib/jekyll-import/importers/textpattern.rb +76 -0
  22. data/lib/jekyll-import/importers/tumblr.rb +265 -0
  23. data/lib/jekyll-import/importers/typo.rb +89 -0
  24. data/lib/jekyll-import/importers/wordpress.rb +323 -0
  25. data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
  26. data/lib/jekyll/commands/import.rb +1 -0
  27. data/test/helper.rb +3 -1
  28. data/test/test_jrnl_importer.rb +39 -0
  29. data/test/test_mt_importer.rb +16 -16
  30. data/test/test_tumblr_importer.rb +61 -0
  31. data/test/test_wordpress_importer.rb +1 -1
  32. data/test/test_wordpressdotcom_importer.rb +1 -1
  33. metadata +53 -32
  34. data/lib/jekyll/jekyll-import/csv.rb +0 -30
  35. data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
  36. data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
  37. data/lib/jekyll/jekyll-import/enki.rb +0 -49
  38. data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
  39. data/lib/jekyll/jekyll-import/joomla.rb +0 -53
  40. data/lib/jekyll/jekyll-import/marley.rb +0 -52
  41. data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
  42. data/lib/jekyll/jekyll-import/mt.rb +0 -142
  43. data/lib/jekyll/jekyll-import/posterous.rb +0 -122
  44. data/lib/jekyll/jekyll-import/rss.rb +0 -63
  45. data/lib/jekyll/jekyll-import/s9y.rb +0 -59
  46. data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
  47. data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
  48. data/lib/jekyll/jekyll-import/typo.rb +0 -69
  49. data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
  50. data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84
@@ -1,142 +0,0 @@
1
- # Created by Nick Gerakines, open source and publically available under the
2
- # MIT license. Use this module at your own risk.
3
- # I'm an Erlang/Perl/C++ guy so please forgive my dirty ruby.
4
-
5
- require 'rubygems'
6
- require 'sequel'
7
- require 'fileutils'
8
- require 'safe_yaml'
9
-
10
- # NOTE: This converter requires Sequel and the MySQL gems.
11
- # The MySQL gem can be difficult to install on OS X. Once you have MySQL
12
- # installed, running the following commands should work:
13
- # $ sudo gem install sequel
14
- # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
15
-
16
- module JekyllImport
17
- module MT
18
-
19
- STATUS_DRAFT = 1
20
- STATUS_PUBLISHED = 2
21
- MORE_CONTENT_SEPARATOR = '<!--more-->'
22
-
23
- def self.default_options
24
- {
25
- :blog_id => nil,
26
- :categories => true,
27
- :dest_encoding => 'utf-8',
28
- :src_encoding => 'utf-8'
29
- }
30
- end
31
-
32
- # By default this migrator will include posts for all your MovableType blogs.
33
- # Specify a single blog by providing blog_id.
34
-
35
- # Main migrator function. Call this to perform the migration.
36
- #
37
- # dbname:: The name of the database
38
- # user:: The database user name
39
- # pass:: The database user's password
40
- # host:: The address of the MySQL database host. Default: 'localhost'
41
- # options:: A hash of configuration options
42
- #
43
- # Supported options are:
44
- #
45
- # :blog_id:: Specify a single MovableType blog to export by providing blog_id.
46
- # Default: nil, importer will include posts for all blogs.
47
- # :categories:: If true, save the post's categories in its
48
- # YAML front matter. Default: true
49
- # :src_encoding:: Encoding of strings from the database. Default: UTF-8
50
- # If your output contains mangled characters, set src_encoding to
51
- # something appropriate for your database charset.
52
- # :dest_encoding:: Encoding of output strings. Default: UTF-8
53
- def self.process(dbname, user, pass, host = 'localhost', options = {})
54
- options = default_options.merge(options)
55
-
56
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host)
57
- post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
58
-
59
- FileUtils.mkdir_p "_posts"
60
-
61
- posts = db[:mt_entry]
62
- posts = posts.filter(:entry_blog_id => options[:blog_id]) if options[:blog_id]
63
- posts.each do |post|
64
- categories = post_categories.filter(
65
- :mt_placement__placement_entry_id => post[:entry_id]
66
- ).map {|ea| encode(ea[:category_basename], options) }
67
-
68
- file_name = post_file_name(post, options)
69
-
70
- data = post_metadata(post, options)
71
- data['categories'] = categories if !categories.empty? && options[:categories]
72
- yaml_front_matter = data.delete_if { |k,v| v.nil? || v == '' }.to_yaml
73
-
74
- content = post_content(post, options)
75
-
76
- File.open("_posts/#{file_name}", "w") do |f|
77
- f.puts yaml_front_matter
78
- f.puts "---"
79
- f.puts encode(content, options)
80
- end
81
- end
82
- end
83
-
84
- # Extracts metadata for YAML front matter from post
85
- def self.post_metadata(post, options = default_options)
86
- metadata = {
87
- 'layout' => 'post',
88
- 'title' => encode(post[:entry_title], options),
89
- 'date' => post[:entry_authored_on].strftime("%Y-%m-%d %H:%M:%S %z"),
90
- 'excerpt' => encode(post[:entry_excerpt], options),
91
- 'mt_id' => post[:entry_id]
92
- }
93
- metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
94
- metadata
95
- end
96
-
97
- # Extracts text body from post
98
- def self.post_content(post, options = default_options)
99
- if post[:entry_text_more].strip.empty?
100
- post[:entry_text]
101
- else
102
- post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
103
- end
104
- end
105
-
106
- def self.post_file_name(post, options = default_options)
107
- date = post[:entry_authored_on]
108
- slug = post[:entry_basename]
109
- file_ext = suffix(post[:entry_convert_breaks])
110
-
111
- "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
112
- end
113
-
114
- def self.encode(str, options = default_options)
115
- if str.respond_to?(:encoding)
116
- str.encode(options[:dest_encoding], options[:src_encoding])
117
- else
118
- str
119
- end
120
- end
121
-
122
- # Ideally, this script would determine the post format (markdown,
123
- # html, etc) and create files with proper extensions. At this point
124
- # it just assumes that markdown will be acceptable.
125
- def self.suffix(entry_type)
126
- if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
127
- # The markdown plugin I have saves this as
128
- # "markdown_with_smarty_pants", so I just look for "markdown".
129
- "markdown"
130
- elsif entry_type.include?("textile")
131
- # This is saved as "textile_2" on my installation of MT 5.1.
132
- "textile"
133
- elsif entry_type == "0" || entry_type.include?("richtext")
134
- # Richtext looks to me like it's saved as HTML, so I include it here.
135
- "html"
136
- else
137
- # Other values might need custom work.
138
- entry_type
139
- end
140
- end
141
- end
142
- end
@@ -1,122 +0,0 @@
1
- require 'rubygems'
2
- require 'jekyll'
3
- require 'fileutils'
4
- require 'net/http'
5
- require 'uri'
6
- require "json"
7
-
8
- # ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key)'
9
- # Other arguments are optional; the default values are:
10
- # * :include_imgs => false # should images be downloaded as well?
11
- # * :blog => 'primary' # blog, if you have more than one.
12
- # * :base_path => '/' # for image, if they will be served from a different host for eg.
13
-
14
- # For example, to download images as well as your posts, use the above command with
15
- # ....process(email, pass, api_key, :include_imgs => true)
16
-
17
- module JekyllImport
18
- module Posterous
19
- def self.fetch(uri_str, limit = 10)
20
- # You should choose better exception.
21
- raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
22
-
23
- response = nil
24
- Net::HTTP.start('posterous.com') do |http|
25
- req = Net::HTTP::Get.new(uri_str)
26
- req.basic_auth @email, @pass
27
- response = http.request(req)
28
- end
29
-
30
- case response
31
- when Net::HTTPSuccess then response
32
- when Net::HTTPRedirection then fetch(response['location'], limit - 1)
33
- else response.error!
34
- end
35
- end
36
-
37
- def self.fetch_images(directory, imgs)
38
- def self.fetch_one(url, limit = 10)
39
- raise ArgumentError, 'HTTP redirect too deep' if limit == 0
40
- response = Net::HTTP.get_response(URI.parse(url))
41
- case response
42
- when Net::HTTPSuccess then response.body
43
- when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
44
- else
45
- response.error!
46
- end
47
- end
48
-
49
- FileUtils.mkdir_p directory
50
- urls = Array.new
51
- imgs.each do |img|
52
- fullurl = img["full"]["url"]
53
- uri = URI.parse(fullurl)
54
- imgname = uri.path.split("/")[-1]
55
- imgdata = self.fetch_one(fullurl)
56
- open(directory + "/" + imgname, "wb") do |file|
57
- file.write imgdata
58
- end
59
- urls.push(directory + "/" + imgname)
60
- end
61
-
62
- return urls
63
- end
64
-
65
- def self.process(email, pass, api_token, opts={})
66
- @email, @pass, @api_token = email, pass, api_token
67
- defaults = { :include_imgs => false, :blog => 'primary', :base_path => '/' }
68
- opts = defaults.merge(opts)
69
- FileUtils.mkdir_p "_posts"
70
-
71
- posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}").body)
72
- page = 1
73
-
74
- while posts.any?
75
- posts.each do |post|
76
- title = post["title"]
77
- slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
78
- date = Date.parse(post["display_date"])
79
- content = post["body_html"]
80
- published = !post["is_private"]
81
- basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
82
- name = basename + '.html'
83
-
84
- # Images:
85
- if opts[:include_imgs]
86
- post_imgs = post["media"]["images"]
87
- if post_imgs.any?
88
- img_dir = "imgs/%s" % basename
89
- img_urls = self.fetch_images(img_dir, post_imgs)
90
-
91
- img_urls.map! do |url|
92
- '<li><img src="' + opts[:base_path] + url + '"></li>'
93
- end
94
- imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
95
-
96
- # filter out "posterous-content", replacing with imgs:
97
- content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
98
- end
99
- end
100
-
101
- # Get the relevant fields as a hash, delete empty fields and convert
102
- # to YAML for the header
103
- data = {
104
- 'layout' => 'post',
105
- 'title' => title.to_s,
106
- 'published' => published
107
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
108
-
109
- # Write out the data and content to file
110
- File.open("_posts/#{name}", "w") do |f|
111
- f.puts data
112
- f.puts "---"
113
- f.puts content
114
- end
115
- end
116
-
117
- page += 1
118
- posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}&page=#{page}").body)
119
- end
120
- end
121
- end
122
- end
@@ -1,63 +0,0 @@
1
- # Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
2
- # Use at your own risk. The end.
3
- #
4
- # Usage:
5
- # (URL)
6
- # ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => 'http://yourdomain.com/your-favorite-feed.xml')"
7
- #
8
- # (Local file)
9
- # ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => './somefile/on/your/computer.xml')"
10
-
11
- require 'rss/1.0'
12
- require 'rss/2.0'
13
- require 'open-uri'
14
- require 'fileutils'
15
- require 'safe_yaml'
16
-
17
- module JekyllImport
18
- module RSS
19
- def self.validate(options)
20
- if !options[:source]
21
- abort "Missing mandatory option --source."
22
- end
23
- end
24
-
25
- # Process the import.
26
- #
27
- # source - a URL or a local file String.
28
- #
29
- # Returns nothing.
30
- def self.process(options)
31
- validate(options)
32
-
33
- source = options[:source]
34
-
35
- content = ""
36
- open(source) { |s| content = s.read }
37
- rss = ::RSS::Parser.parse(content, false)
38
-
39
- raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
40
-
41
- rss.items.each do |item|
42
- formatted_date = item.date.strftime('%Y-%m-%d')
43
- post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
44
- i.downcase if i != ''
45
- end.compact.join('-')
46
- name = "#{formatted_date}-#{post_name}"
47
-
48
- header = {
49
- 'layout' => 'post',
50
- 'title' => item.title
51
- }
52
-
53
- FileUtils.mkdir_p("_posts")
54
-
55
- File.open("_posts/#{name}.html", "w") do |f|
56
- f.puts header.to_yaml
57
- f.puts "---\n\n"
58
- f.puts item.description
59
- end
60
- end
61
- end
62
- end
63
- end
@@ -1,59 +0,0 @@
1
- # Migrator to import entries from an Serendipity (S9Y) blog
2
- #
3
- # Entries can be exported from http://blog.example.com/rss.php?version=2.0&all=1
4
- #
5
- # Usage:
6
- # ruby -r './s9y_rss.rb' -e 'Jekyll::S9Y.process("http://blog.example.com/rss.php?version=2.0&all=1")'
7
-
8
- require 'open-uri'
9
- require 'rss'
10
- require 'fileutils'
11
- require 'yaml'
12
-
13
- module JekyllImport
14
- module S9Y
15
- def self.validate(options)
16
- if !options[:source]
17
- abort "Missing mandatory option --source, e.g. --source \"http://blog.example.com/rss.php?version=2.0&all=1\""
18
- end
19
- end
20
-
21
- def self.process(options)
22
- validate(options)
23
-
24
- FileUtils.mkdir_p("_posts")
25
-
26
- source = options[:source]
27
-
28
- text = ''
29
- open(source) { |line| text = line.read }
30
- rss = RSS::Parser.parse(text)
31
-
32
- rss.items.each do |item|
33
- post_url = item.link.match('.*(/archives/.*)')[1]
34
- categories = item.categories.collect { |c| c.content }
35
- content = item.content_encoded.strip
36
- date = item.date
37
- slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
38
- name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
39
- slug]
40
-
41
- data = {
42
- 'layout' => 'post',
43
- 'title' => item.title,
44
- 'categories' => categories,
45
- 'permalink' => post_url,
46
- 's9y_link' => item.link,
47
- 'date' => item.date,
48
- }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
49
-
50
- # Write out the data and content to file
51
- File.open("_posts/#{name}", "w") do |f|
52
- f.puts data
53
- f.puts "---"
54
- f.puts content
55
- end
56
- end
57
- end
58
- end
59
- end
@@ -1,58 +0,0 @@
1
- require 'rubygems'
2
- require 'sequel'
3
- require 'fileutils'
4
- require 'safe_yaml'
5
-
6
- # NOTE: This converter requires Sequel and the MySQL gems.
7
- # The MySQL gem can be difficult to install on OS X. Once you have MySQL
8
- # installed, running the following commands should work:
9
- # $ sudo gem install sequel
10
- # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
11
-
12
- module JekyllImport
13
- module TextPattern
14
- # Reads a MySQL database via Sequel and creates a post file for each post.
15
- # The only posts selected are those with a status of 4 or 5, which means
16
- # "live" and "sticky" respectively.
17
- # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
18
- QUERY = "SELECT Title, \
19
- url_title, \
20
- Posted, \
21
- Body, \
22
- Keywords \
23
- FROM textpattern \
24
- WHERE Status = '4' OR \
25
- Status = '5'"
26
-
27
- def self.process(dbname, user, pass, host = 'localhost')
28
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
29
-
30
- FileUtils.mkdir_p "_posts"
31
-
32
- db[QUERY].each do |post|
33
- # Get required fields and construct Jekyll compatible name.
34
- title = post[:Title]
35
- slug = post[:url_title]
36
- date = post[:Posted]
37
- content = post[:Body]
38
-
39
- name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
40
-
41
- # Get the relevant fields as a hash, delete empty fields and convert
42
- # to YAML for the header.
43
- data = {
44
- 'layout' => 'post',
45
- 'title' => title.to_s,
46
- 'tags' => post[:Keywords].split(',')
47
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
48
-
49
- # Write out the data and content to file.
50
- File.open("_posts/#{name}", "w") do |f|
51
- f.puts data
52
- f.puts "---"
53
- f.puts content
54
- end
55
- end
56
- end
57
- end
58
- end