jekyll-import 0.1.0.beta3 → 0.1.0.beta4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +6 -14
  2. data/History.markdown +18 -0
  3. data/README.markdown +12 -1
  4. data/jekyll-import.gemspec +31 -25
  5. data/lib/jekyll-import.rb +50 -1
  6. data/lib/jekyll-import/importer.rb +11 -0
  7. data/lib/jekyll-import/importers.rb +10 -0
  8. data/lib/jekyll-import/importers/csv.rb +50 -0
  9. data/lib/jekyll-import/importers/drupal6.rb +139 -0
  10. data/lib/jekyll-import/importers/drupal7.rb +102 -0
  11. data/lib/jekyll-import/importers/enki.rb +76 -0
  12. data/lib/jekyll-import/importers/google_reader.rb +68 -0
  13. data/lib/jekyll-import/importers/joomla.rb +83 -0
  14. data/lib/jekyll-import/importers/jrnl.rb +127 -0
  15. data/lib/jekyll-import/importers/marley.rb +72 -0
  16. data/lib/jekyll-import/importers/mephisto.rb +109 -0
  17. data/lib/jekyll-import/importers/mt.rb +169 -0
  18. data/lib/jekyll-import/importers/posterous.rb +139 -0
  19. data/lib/jekyll-import/importers/rss.rb +71 -0
  20. data/lib/jekyll-import/importers/s9y.rb +67 -0
  21. data/lib/jekyll-import/importers/textpattern.rb +76 -0
  22. data/lib/jekyll-import/importers/tumblr.rb +265 -0
  23. data/lib/jekyll-import/importers/typo.rb +89 -0
  24. data/lib/jekyll-import/importers/wordpress.rb +323 -0
  25. data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
  26. data/lib/jekyll/commands/import.rb +1 -0
  27. data/test/helper.rb +3 -1
  28. data/test/test_jrnl_importer.rb +39 -0
  29. data/test/test_mt_importer.rb +16 -16
  30. data/test/test_tumblr_importer.rb +61 -0
  31. data/test/test_wordpress_importer.rb +1 -1
  32. data/test/test_wordpressdotcom_importer.rb +1 -1
  33. metadata +53 -32
  34. data/lib/jekyll/jekyll-import/csv.rb +0 -30
  35. data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
  36. data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
  37. data/lib/jekyll/jekyll-import/enki.rb +0 -49
  38. data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
  39. data/lib/jekyll/jekyll-import/joomla.rb +0 -53
  40. data/lib/jekyll/jekyll-import/marley.rb +0 -52
  41. data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
  42. data/lib/jekyll/jekyll-import/mt.rb +0 -142
  43. data/lib/jekyll/jekyll-import/posterous.rb +0 -122
  44. data/lib/jekyll/jekyll-import/rss.rb +0 -63
  45. data/lib/jekyll/jekyll-import/s9y.rb +0 -59
  46. data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
  47. data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
  48. data/lib/jekyll/jekyll-import/typo.rb +0 -69
  49. data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
  50. data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84
@@ -0,0 +1,72 @@
1
+ module JekyllImport
2
+ module Importers
3
+ class Marley < Importer
4
+ def self.validate(options)
5
+ if options['marley_data_dir'].nil?
6
+ Jekyll.logger.abort_with "Missing mandatory option --marley_data_dir."
7
+ else
8
+ unless File.directory?(options['marley_data_dir'])
9
+ raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
+ end
11
+ end
12
+ end
13
+
14
+ def self.regexp
15
+ { :id => /^\d{0,4}-{0,1}(.*)$/,
16
+ :title => /^#\s*(.*)\s+$/,
17
+ :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
+ :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
+ :perex => /^([^\#\n]+\n)$/,
20
+ :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
+ }
22
+ end
23
+
24
+ def self.require_deps
25
+ JekyllImport.require_with_fallback(%w[
26
+ fileutils
27
+ safe_yaml
28
+ ])
29
+ end
30
+
31
+ def self.specify_options(c)
32
+ c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
+ end
34
+
35
+ def self.process(options)
36
+ marley_data_dir = options.fetch('marley_data_dir')
37
+
38
+ FileUtils.mkdir_p "_posts"
39
+
40
+ posts = 0
41
+ Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
+ next unless File.exists?(f)
43
+
44
+ #copied over from marley's app/lib/post.rb
45
+ file_content = File.read(f)
46
+ meta_content = file_content.slice!( self.regexp[:meta] )
47
+ body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
+
49
+ title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
+ prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
+ published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
+ meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
+ meta['title'] = title
54
+ meta['layout'] = 'post'
55
+
56
+ formatted_date = published_on.strftime('%Y-%m-%d')
57
+ post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
+
59
+ name = "#{formatted_date}-#{post_name}"
60
+ File.open("_posts/#{name}.markdown", "w") do |f|
61
+ f.puts meta.to_yaml
62
+ f.puts "---\n"
63
+ f.puts "\n#{prerex}\n\n" if prerex
64
+ f.puts body
65
+ end
66
+ posts += 1
67
+ end
68
+ "Created #{posts} posts!"
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,109 @@
1
+ # Quickly hacked together my Michael Ivey
2
+ # Based on mt.rb by Nick Gerakines, open source and publically
3
+ # available under the MIT license. Use this module at your own risk.
4
+
5
+ # NOTE: This converter requires Sequel and the MySQL gems.
6
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
7
+ # installed, running the following commands should work:
8
+ # $ sudo gem install sequel
9
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
10
+
11
+ module JekyllImport
12
+ module Importers
13
+ class Mephisto < Importer
14
+ #Accepts a hash with database config variables, exports mephisto posts into a csv
15
+ #export PGPASSWORD if you must
16
+ def self.postgres(c)
17
+ sql = <<-SQL
18
+ BEGIN;
19
+ CREATE TEMP TABLE jekyll AS
20
+ SELECT title, permalink, body, published_at, filter FROM contents
21
+ WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
22
+ COPY jekyll TO STDOUT WITH CSV HEADER;
23
+ ROLLBACK;
24
+ SQL
25
+ command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
26
+ puts command
27
+ `#{command}`
28
+ CSV.process
29
+ end
30
+
31
+ def self.validate(options)
32
+ %w[dbname user].each do |option|
33
+ if options[option].nil?
34
+ abort "Missing mandatory option --#{option}."
35
+ end
36
+ end
37
+ end
38
+
39
+ def self.require_deps
40
+ JekyllImport.require_with_fallback(%w[
41
+ rubygems
42
+ sequel
43
+ fastercsv
44
+ fileutils
45
+ ])
46
+ end
47
+
48
+ def self.specify_options(c)
49
+ c.option 'dbname', '--dbname DB', 'Database name'
50
+ c.option 'user', '--user USER', 'Database user name'
51
+ c.option 'password', '--password PW', "Database user's password (default: '')"
52
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
53
+ end
54
+
55
+ # This query will pull blog posts from all entries across all blogs. If
56
+ # you've got unpublished, deleted or otherwise hidden posts please sift
57
+ # through the created posts to make sure nothing is accidently published.
58
+ QUERY = "SELECT id, \
59
+ permalink, \
60
+ body, \
61
+ published_at, \
62
+ title \
63
+ FROM contents \
64
+ WHERE user_id = 1 AND \
65
+ type = 'Article' AND \
66
+ published_at IS NOT NULL \
67
+ ORDER BY published_at"
68
+
69
+ def self.process(options)
70
+ dbname = options.fetch('dbname')
71
+ user = options.fetch('user')
72
+ pass = options.fetch('password', '')
73
+ host = options.fetch('host', "localhost")
74
+
75
+ db = Sequel.mysql(dbname, :user => user,
76
+ :password => pass,
77
+ :host => host,
78
+ :encoding => 'utf8')
79
+
80
+ FileUtils.mkdir_p "_posts"
81
+
82
+ db[QUERY].each do |post|
83
+ title = post[:title]
84
+ slug = post[:permalink]
85
+ date = post[:published_at]
86
+ content = post[:body]
87
+
88
+ # Ideally, this script would determine the post format (markdown,
89
+ # html, etc) and create files with proper extensions. At this point
90
+ # it just assumes that markdown will be acceptable.
91
+ name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
92
+
93
+ data = {
94
+ 'layout' => 'post',
95
+ 'title' => title.to_s,
96
+ 'mt_id' => post[:entry_id],
97
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
98
+
99
+ File.open("_posts/#{name}", "w") do |f|
100
+ f.puts data
101
+ f.puts "---"
102
+ f.puts content
103
+ end
104
+ end
105
+
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,169 @@
1
+ # Created by Nick Gerakines, open source and publically available under the
2
+ # MIT license. Use this module at your own risk.
3
+ # I'm an Erlang/Perl/C++ guy so please forgive my dirty ruby.
4
+
5
+ # NOTE: This converter requires Sequel and the MySQL gems.
6
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
7
+ # installed, running the following commands should work:
8
+ # $ sudo gem install sequel
9
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
10
+
11
+ module JekyllImport
12
+ module Importers
13
+ class MT < Importer
14
+
15
+ STATUS_DRAFT = 1
16
+ STATUS_PUBLISHED = 2
17
+ MORE_CONTENT_SEPARATOR = '<!--more-->'
18
+
19
+ def self.default_options
20
+ {
21
+ :blog_id => nil,
22
+ :categories => true,
23
+ :dest_encoding => 'utf-8',
24
+ :src_encoding => 'utf-8'
25
+ }
26
+ end
27
+
28
+ def self.require_deps
29
+ JekyllImport.require_with_fallback(%w[
30
+ rubygems
31
+ sequel
32
+ fileutils
33
+ safe_yaml
34
+ ])
35
+ end
36
+
37
+ def self.specify_options(c)
38
+ c.option 'dbname', '--dbname DB', 'Database name'
39
+ c.option 'user', '--user USER', 'Database user name'
40
+ c.option 'password', '--password PW', "Database user's password, (default: '')"
41
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
42
+ end
43
+
44
+ # By default this migrator will include posts for all your MovableType blogs.
45
+ # Specify a single blog by providing blog_id.
46
+
47
+ # Main migrator function. Call this to perform the migration.
48
+ #
49
+ # dbname:: The name of the database
50
+ # user:: The database user name
51
+ # pass:: The database user's password
52
+ # host:: The address of the MySQL database host. Default: 'localhost'
53
+ # options:: A hash of configuration options
54
+ #
55
+ # Supported options are:
56
+ #
57
+ # :blog_id:: Specify a single MovableType blog to export by providing blog_id.
58
+ # Default: nil, importer will include posts for all blogs.
59
+ # :categories:: If true, save the post's categories in its
60
+ # YAML front matter. Default: true
61
+ # :src_encoding:: Encoding of strings from the database. Default: UTF-8
62
+ # If your output contains mangled characters, set src_encoding to
63
+ # something appropriate for your database charset.
64
+ # :dest_encoding:: Encoding of output strings. Default: UTF-8
65
+ def self.process(options)
66
+ dbname = options.fetch('dbname')
67
+ user = options.fetch('user')
68
+ pass = options.fetch('password', "")
69
+ host = options.fetch('host', "localhost")
70
+
71
+ options = default_options.merge(options)
72
+
73
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host)
74
+ post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
75
+
76
+ FileUtils.mkdir_p "_posts"
77
+
78
+ posts = db[:mt_entry]
79
+ posts = posts.filter(:entry_blog_id => options[:blog_id]) if options[:blog_id]
80
+ posts.each do |post|
81
+ categories = post_categories.filter(
82
+ :mt_placement__placement_entry_id => post[:entry_id]
83
+ ).map {|ea| encode(ea[:category_basename], options) }
84
+
85
+ file_name = post_file_name(post, options)
86
+
87
+ data = post_metadata(post, options)
88
+ data['categories'] = categories if !categories.empty? && options[:categories]
89
+ yaml_front_matter = data.delete_if { |k,v| v.nil? || v == '' }.to_yaml
90
+
91
+ content = post_content(post, options)
92
+
93
+ File.open("_posts/#{file_name}", "w") do |f|
94
+ f.puts yaml_front_matter
95
+ f.puts "---"
96
+ f.puts encode(content, options)
97
+ end
98
+ end
99
+ end
100
+
101
+ # Extracts metadata for YAML front matter from post
102
+ def self.post_metadata(post, options = default_options)
103
+ metadata = {
104
+ 'layout' => 'post',
105
+ 'title' => encode(post[:entry_title], options),
106
+ 'date' => post_date(post).strftime("%Y-%m-%d %H:%M:%S %z"),
107
+ 'excerpt' => encode(post[:entry_excerpt], options),
108
+ 'mt_id' => post[:entry_id]
109
+ }
110
+ metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
111
+ metadata
112
+ end
113
+
114
+ # Different versions of MT used different column names
115
+ def self.post_date(post)
116
+ post[:entry_authored_on] || post[:entry_created_on]
117
+ end
118
+
119
+ # Extracts text body from post
120
+ def self.extra_entry_text_empty?(post)
121
+ post[:entry_text_more].nil? || post[:entry_text_more].strip.empty?
122
+ end
123
+
124
+ def self.post_content(post, options = default_options)
125
+ if extra_entry_text_empty?(post)
126
+ post[:entry_text]
127
+ else
128
+ post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
129
+ end
130
+ end
131
+
132
+ def self.post_file_name(post, options = default_options)
133
+ date = post_date(post)
134
+ slug = post[:entry_basename]
135
+ file_ext = suffix(post[:entry_convert_breaks])
136
+
137
+ "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
138
+ end
139
+
140
+ def self.encode(str, options = default_options)
141
+ if str.respond_to?(:encoding)
142
+ str.encode(options[:dest_encoding], options[:src_encoding])
143
+ else
144
+ str
145
+ end
146
+ end
147
+
148
+ # Ideally, this script would determine the post format (markdown,
149
+ # html, etc) and create files with proper extensions. At this point
150
+ # it just assumes that markdown will be acceptable.
151
+ def self.suffix(entry_type)
152
+ if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
153
+ # The markdown plugin I have saves this as
154
+ # "markdown_with_smarty_pants", so I just look for "markdown".
155
+ "markdown"
156
+ elsif entry_type.include?("textile")
157
+ # This is saved as "textile_2" on my installation of MT 5.1.
158
+ "textile"
159
+ elsif entry_type == "0" || entry_type.include?("richtext")
160
+ # Richtext looks to me like it's saved as HTML, so I include it here.
161
+ "html"
162
+ else
163
+ # Other values might need custom work.
164
+ entry_type
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,139 @@
1
+ # ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key)'
2
+ # Other arguments are optional; the default values are:
3
+ # * :include_imgs => false # should images be downloaded as well?
4
+ # * :blog => 'primary' # blog, if you have more than one.
5
+ # * :base_path => '/' # for image, if they will be served from a different host for eg.
6
+
7
+ # For example, to download images as well as your posts, use the above command with
8
+ # ....process(email, pass, api_key, :include_imgs => true)
9
+
10
+ module JekyllImport
11
+ module Importers
12
+ class Posterous < Importer
13
+
14
+ def self.specify_options(c)
15
+ c.option 'email', '--email EMAIL', 'Posterous email address'
16
+ c.option 'password', '--password PW', 'Posterous password'
17
+ c.option 'api_token', '--token TOKEN', 'Posterous API Token'
18
+ end
19
+
20
+ def self.require_deps
21
+ JekyllImport.require_with_fallback(%w[
22
+ rubygems
23
+ jekyll
24
+ fileutils
25
+ uri
26
+ json
27
+ net/http
28
+ ])
29
+ end
30
+
31
+ def self.fetch(uri_str, limit = 10)
32
+ # You should choose better exception.
33
+ raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
34
+
35
+ response = nil
36
+ Net::HTTP.start('posterous.com') do |http|
37
+ req = Net::HTTP::Get.new(uri_str)
38
+ req.basic_auth @email, @pass
39
+ response = http.request(req)
40
+ end
41
+
42
+ case response
43
+ when Net::HTTPSuccess then response
44
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
45
+ else response.error!
46
+ end
47
+ end
48
+
49
+ def self.fetch_images(directory, imgs)
50
+ def self.fetch_one(url, limit = 10)
51
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
52
+ response = Net::HTTP.get_response(URI.parse(url))
53
+ case response
54
+ when Net::HTTPSuccess then response.body
55
+ when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
56
+ else
57
+ response.error!
58
+ end
59
+ end
60
+
61
+ FileUtils.mkdir_p directory
62
+ urls = Array.new
63
+ imgs.each do |img|
64
+ fullurl = img["full"]["url"]
65
+ uri = URI.parse(fullurl)
66
+ imgname = uri.path.split("/")[-1]
67
+ imgdata = self.fetch_one(fullurl)
68
+ open(directory + "/" + imgname, "wb") do |file|
69
+ file.write imgdata
70
+ end
71
+ urls.push(directory + "/" + imgname)
72
+ end
73
+
74
+ return urls
75
+ end
76
+
77
+ def self.process(options)
78
+ email = options.fetch('email')
79
+ pass = options.fetch('password')
80
+ api_token = options.fetch('api_token')
81
+
82
+ @email, @pass, @api_token = email, pass, api_token
83
+ defaults = { :include_imgs => false, :blog => 'primary', :base_path => '/' }
84
+ opts = defaults.merge(opts)
85
+ FileUtils.mkdir_p "_posts"
86
+
87
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}").body)
88
+ page = 1
89
+
90
+ while posts.any?
91
+ posts.each do |post|
92
+ title = post["title"]
93
+ slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
94
+ date = Date.parse(post["display_date"])
95
+ content = post["body_html"]
96
+ published = !post["is_private"]
97
+ basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
98
+ name = basename + '.html'
99
+
100
+ # Images:
101
+ if opts[:include_imgs]
102
+ post_imgs = post["media"]["images"]
103
+ if post_imgs.any?
104
+ img_dir = "imgs/%s" % basename
105
+ img_urls = self.fetch_images(img_dir, post_imgs)
106
+
107
+ img_urls.map! do |url|
108
+ '<li><img src="' + opts[:base_path] + url + '"></li>'
109
+ end
110
+ imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
111
+
112
+ # filter out "posterous-content", replacing with imgs:
113
+ content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
114
+ end
115
+ end
116
+
117
+ # Get the relevant fields as a hash, delete empty fields and convert
118
+ # to YAML for the header
119
+ data = {
120
+ 'layout' => 'post',
121
+ 'title' => title.to_s,
122
+ 'published' => published
123
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
124
+
125
+ # Write out the data and content to file
126
+ File.open("_posts/#{name}", "w") do |f|
127
+ f.puts data
128
+ f.puts "---"
129
+ f.puts content
130
+ end
131
+ end
132
+
133
+ page += 1
134
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}&page=#{page}").body)
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end