jekyll-import 0.1.0.beta3 → 0.1.0.beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +6 -14
  2. data/History.markdown +18 -0
  3. data/README.markdown +12 -1
  4. data/jekyll-import.gemspec +31 -25
  5. data/lib/jekyll-import.rb +50 -1
  6. data/lib/jekyll-import/importer.rb +11 -0
  7. data/lib/jekyll-import/importers.rb +10 -0
  8. data/lib/jekyll-import/importers/csv.rb +50 -0
  9. data/lib/jekyll-import/importers/drupal6.rb +139 -0
  10. data/lib/jekyll-import/importers/drupal7.rb +102 -0
  11. data/lib/jekyll-import/importers/enki.rb +76 -0
  12. data/lib/jekyll-import/importers/google_reader.rb +68 -0
  13. data/lib/jekyll-import/importers/joomla.rb +83 -0
  14. data/lib/jekyll-import/importers/jrnl.rb +127 -0
  15. data/lib/jekyll-import/importers/marley.rb +72 -0
  16. data/lib/jekyll-import/importers/mephisto.rb +109 -0
  17. data/lib/jekyll-import/importers/mt.rb +169 -0
  18. data/lib/jekyll-import/importers/posterous.rb +139 -0
  19. data/lib/jekyll-import/importers/rss.rb +71 -0
  20. data/lib/jekyll-import/importers/s9y.rb +67 -0
  21. data/lib/jekyll-import/importers/textpattern.rb +76 -0
  22. data/lib/jekyll-import/importers/tumblr.rb +265 -0
  23. data/lib/jekyll-import/importers/typo.rb +89 -0
  24. data/lib/jekyll-import/importers/wordpress.rb +323 -0
  25. data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
  26. data/lib/jekyll/commands/import.rb +1 -0
  27. data/test/helper.rb +3 -1
  28. data/test/test_jrnl_importer.rb +39 -0
  29. data/test/test_mt_importer.rb +16 -16
  30. data/test/test_tumblr_importer.rb +61 -0
  31. data/test/test_wordpress_importer.rb +1 -1
  32. data/test/test_wordpressdotcom_importer.rb +1 -1
  33. metadata +53 -32
  34. data/lib/jekyll/jekyll-import/csv.rb +0 -30
  35. data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
  36. data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
  37. data/lib/jekyll/jekyll-import/enki.rb +0 -49
  38. data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
  39. data/lib/jekyll/jekyll-import/joomla.rb +0 -53
  40. data/lib/jekyll/jekyll-import/marley.rb +0 -52
  41. data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
  42. data/lib/jekyll/jekyll-import/mt.rb +0 -142
  43. data/lib/jekyll/jekyll-import/posterous.rb +0 -122
  44. data/lib/jekyll/jekyll-import/rss.rb +0 -63
  45. data/lib/jekyll/jekyll-import/s9y.rb +0 -59
  46. data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
  47. data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
  48. data/lib/jekyll/jekyll-import/typo.rb +0 -69
  49. data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
  50. data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84
@@ -0,0 +1,72 @@
1
+ module JekyllImport
2
+ module Importers
3
+ class Marley < Importer
4
+ def self.validate(options)
5
+ if options['marley_data_dir'].nil?
6
+ Jekyll.logger.abort_with "Missing mandatory option --marley_data_dir."
7
+ else
8
+ unless File.directory?(options['marley_data_dir'])
9
+ raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
10
+ end
11
+ end
12
+ end
13
+
14
+ def self.regexp
15
+ { :id => /^\d{0,4}-{0,1}(.*)$/,
16
+ :title => /^#\s*(.*)\s+$/,
17
+ :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
18
+ :published_on => /.*\s+\(([0-9\/]+)\)$/,
19
+ :perex => /^([^\#\n]+\n)$/,
20
+ :meta => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
21
+ }
22
+ end
23
+
24
+ def self.require_deps
25
+ JekyllImport.require_with_fallback(%w[
26
+ fileutils
27
+ safe_yaml
28
+ ])
29
+ end
30
+
31
+ def self.specify_options(c)
32
+ c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
33
+ end
34
+
35
+ def self.process(options)
36
+ marley_data_dir = options.fetch('marley_data_dir')
37
+
38
+ FileUtils.mkdir_p "_posts"
39
+
40
+ posts = 0
41
+ Dir["#{marley_data_dir}/**/*.txt"].each do |f|
42
+ next unless File.exists?(f)
43
+
44
+ #copied over from marley's app/lib/post.rb
45
+ file_content = File.read(f)
46
+ meta_content = file_content.slice!( self.regexp[:meta] )
47
+ body = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
48
+
49
+ title = file_content.scan( self.regexp[:title] ).first.to_s.strip
50
+ prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
51
+ published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
52
+ meta = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
53
+ meta['title'] = title
54
+ meta['layout'] = 'post'
55
+
56
+ formatted_date = published_on.strftime('%Y-%m-%d')
57
+ post_name = File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
58
+
59
+ name = "#{formatted_date}-#{post_name}"
60
+ File.open("_posts/#{name}.markdown", "w") do |f|
61
+ f.puts meta.to_yaml
62
+ f.puts "---\n"
63
+ f.puts "\n#{prerex}\n\n" if prerex
64
+ f.puts body
65
+ end
66
+ posts += 1
67
+ end
68
+ "Created #{posts} posts!"
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,109 @@
1
+ # Quickly hacked together my Michael Ivey
2
+ # Based on mt.rb by Nick Gerakines, open source and publically
3
+ # available under the MIT license. Use this module at your own risk.
4
+
5
+ # NOTE: This converter requires Sequel and the MySQL gems.
6
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
7
+ # installed, running the following commands should work:
8
+ # $ sudo gem install sequel
9
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
10
+
11
+ module JekyllImport
12
+ module Importers
13
+ class Mephisto < Importer
14
+ #Accepts a hash with database config variables, exports mephisto posts into a csv
15
+ #export PGPASSWORD if you must
16
+ def self.postgres(c)
17
+ sql = <<-SQL
18
+ BEGIN;
19
+ CREATE TEMP TABLE jekyll AS
20
+ SELECT title, permalink, body, published_at, filter FROM contents
21
+ WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
22
+ COPY jekyll TO STDOUT WITH CSV HEADER;
23
+ ROLLBACK;
24
+ SQL
25
+ command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
26
+ puts command
27
+ `#{command}`
28
+ CSV.process
29
+ end
30
+
31
+ def self.validate(options)
32
+ %w[dbname user].each do |option|
33
+ if options[option].nil?
34
+ abort "Missing mandatory option --#{option}."
35
+ end
36
+ end
37
+ end
38
+
39
+ def self.require_deps
40
+ JekyllImport.require_with_fallback(%w[
41
+ rubygems
42
+ sequel
43
+ fastercsv
44
+ fileutils
45
+ ])
46
+ end
47
+
48
+ def self.specify_options(c)
49
+ c.option 'dbname', '--dbname DB', 'Database name'
50
+ c.option 'user', '--user USER', 'Database user name'
51
+ c.option 'password', '--password PW', "Database user's password (default: '')"
52
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
53
+ end
54
+
55
+ # This query will pull blog posts from all entries across all blogs. If
56
+ # you've got unpublished, deleted or otherwise hidden posts please sift
57
+ # through the created posts to make sure nothing is accidently published.
58
+ QUERY = "SELECT id, \
59
+ permalink, \
60
+ body, \
61
+ published_at, \
62
+ title \
63
+ FROM contents \
64
+ WHERE user_id = 1 AND \
65
+ type = 'Article' AND \
66
+ published_at IS NOT NULL \
67
+ ORDER BY published_at"
68
+
69
+ def self.process(options)
70
+ dbname = options.fetch('dbname')
71
+ user = options.fetch('user')
72
+ pass = options.fetch('password', '')
73
+ host = options.fetch('host', "localhost")
74
+
75
+ db = Sequel.mysql(dbname, :user => user,
76
+ :password => pass,
77
+ :host => host,
78
+ :encoding => 'utf8')
79
+
80
+ FileUtils.mkdir_p "_posts"
81
+
82
+ db[QUERY].each do |post|
83
+ title = post[:title]
84
+ slug = post[:permalink]
85
+ date = post[:published_at]
86
+ content = post[:body]
87
+
88
+ # Ideally, this script would determine the post format (markdown,
89
+ # html, etc) and create files with proper extensions. At this point
90
+ # it just assumes that markdown will be acceptable.
91
+ name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
92
+
93
+ data = {
94
+ 'layout' => 'post',
95
+ 'title' => title.to_s,
96
+ 'mt_id' => post[:entry_id],
97
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
98
+
99
+ File.open("_posts/#{name}", "w") do |f|
100
+ f.puts data
101
+ f.puts "---"
102
+ f.puts content
103
+ end
104
+ end
105
+
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,169 @@
1
+ # Created by Nick Gerakines, open source and publically available under the
2
+ # MIT license. Use this module at your own risk.
3
+ # I'm an Erlang/Perl/C++ guy so please forgive my dirty ruby.
4
+
5
+ # NOTE: This converter requires Sequel and the MySQL gems.
6
+ # The MySQL gem can be difficult to install on OS X. Once you have MySQL
7
+ # installed, running the following commands should work:
8
+ # $ sudo gem install sequel
9
+ # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
10
+
11
+ module JekyllImport
12
+ module Importers
13
+ class MT < Importer
14
+
15
+ STATUS_DRAFT = 1
16
+ STATUS_PUBLISHED = 2
17
+ MORE_CONTENT_SEPARATOR = '<!--more-->'
18
+
19
+ def self.default_options
20
+ {
21
+ :blog_id => nil,
22
+ :categories => true,
23
+ :dest_encoding => 'utf-8',
24
+ :src_encoding => 'utf-8'
25
+ }
26
+ end
27
+
28
+ def self.require_deps
29
+ JekyllImport.require_with_fallback(%w[
30
+ rubygems
31
+ sequel
32
+ fileutils
33
+ safe_yaml
34
+ ])
35
+ end
36
+
37
+ def self.specify_options(c)
38
+ c.option 'dbname', '--dbname DB', 'Database name'
39
+ c.option 'user', '--user USER', 'Database user name'
40
+ c.option 'password', '--password PW', "Database user's password, (default: '')"
41
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
42
+ end
43
+
44
+ # By default this migrator will include posts for all your MovableType blogs.
45
+ # Specify a single blog by providing blog_id.
46
+
47
+ # Main migrator function. Call this to perform the migration.
48
+ #
49
+ # dbname:: The name of the database
50
+ # user:: The database user name
51
+ # pass:: The database user's password
52
+ # host:: The address of the MySQL database host. Default: 'localhost'
53
+ # options:: A hash of configuration options
54
+ #
55
+ # Supported options are:
56
+ #
57
+ # :blog_id:: Specify a single MovableType blog to export by providing blog_id.
58
+ # Default: nil, importer will include posts for all blogs.
59
+ # :categories:: If true, save the post's categories in its
60
+ # YAML front matter. Default: true
61
+ # :src_encoding:: Encoding of strings from the database. Default: UTF-8
62
+ # If your output contains mangled characters, set src_encoding to
63
+ # something appropriate for your database charset.
64
+ # :dest_encoding:: Encoding of output strings. Default: UTF-8
65
+ def self.process(options)
66
+ dbname = options.fetch('dbname')
67
+ user = options.fetch('user')
68
+ pass = options.fetch('password', "")
69
+ host = options.fetch('host', "localhost")
70
+
71
+ options = default_options.merge(options)
72
+
73
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host)
74
+ post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
75
+
76
+ FileUtils.mkdir_p "_posts"
77
+
78
+ posts = db[:mt_entry]
79
+ posts = posts.filter(:entry_blog_id => options[:blog_id]) if options[:blog_id]
80
+ posts.each do |post|
81
+ categories = post_categories.filter(
82
+ :mt_placement__placement_entry_id => post[:entry_id]
83
+ ).map {|ea| encode(ea[:category_basename], options) }
84
+
85
+ file_name = post_file_name(post, options)
86
+
87
+ data = post_metadata(post, options)
88
+ data['categories'] = categories if !categories.empty? && options[:categories]
89
+ yaml_front_matter = data.delete_if { |k,v| v.nil? || v == '' }.to_yaml
90
+
91
+ content = post_content(post, options)
92
+
93
+ File.open("_posts/#{file_name}", "w") do |f|
94
+ f.puts yaml_front_matter
95
+ f.puts "---"
96
+ f.puts encode(content, options)
97
+ end
98
+ end
99
+ end
100
+
101
+ # Extracts metadata for YAML front matter from post
102
+ def self.post_metadata(post, options = default_options)
103
+ metadata = {
104
+ 'layout' => 'post',
105
+ 'title' => encode(post[:entry_title], options),
106
+ 'date' => post_date(post).strftime("%Y-%m-%d %H:%M:%S %z"),
107
+ 'excerpt' => encode(post[:entry_excerpt], options),
108
+ 'mt_id' => post[:entry_id]
109
+ }
110
+ metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
111
+ metadata
112
+ end
113
+
114
+ # Different versions of MT used different column names
115
+ def self.post_date(post)
116
+ post[:entry_authored_on] || post[:entry_created_on]
117
+ end
118
+
119
+ # Extracts text body from post
120
+ def self.extra_entry_text_empty?(post)
121
+ post[:entry_text_more].nil? || post[:entry_text_more].strip.empty?
122
+ end
123
+
124
+ def self.post_content(post, options = default_options)
125
+ if extra_entry_text_empty?(post)
126
+ post[:entry_text]
127
+ else
128
+ post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
129
+ end
130
+ end
131
+
132
+ def self.post_file_name(post, options = default_options)
133
+ date = post_date(post)
134
+ slug = post[:entry_basename]
135
+ file_ext = suffix(post[:entry_convert_breaks])
136
+
137
+ "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
138
+ end
139
+
140
+ def self.encode(str, options = default_options)
141
+ if str.respond_to?(:encoding)
142
+ str.encode(options[:dest_encoding], options[:src_encoding])
143
+ else
144
+ str
145
+ end
146
+ end
147
+
148
+ # Ideally, this script would determine the post format (markdown,
149
+ # html, etc) and create files with proper extensions. At this point
150
+ # it just assumes that markdown will be acceptable.
151
+ def self.suffix(entry_type)
152
+ if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
153
+ # The markdown plugin I have saves this as
154
+ # "markdown_with_smarty_pants", so I just look for "markdown".
155
+ "markdown"
156
+ elsif entry_type.include?("textile")
157
+ # This is saved as "textile_2" on my installation of MT 5.1.
158
+ "textile"
159
+ elsif entry_type == "0" || entry_type.include?("richtext")
160
+ # Richtext looks to me like it's saved as HTML, so I include it here.
161
+ "html"
162
+ else
163
+ # Other values might need custom work.
164
+ entry_type
165
+ end
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,139 @@
1
+ # ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key)'
2
+ # Other arguments are optional; the default values are:
3
+ # * :include_imgs => false # should images be downloaded as well?
4
+ # * :blog => 'primary' # blog, if you have more than one.
5
+ # * :base_path => '/' # for image, if they will be served from a different host for eg.
6
+
7
+ # For example, to download images as well as your posts, use the above command with
8
+ # ....process(email, pass, api_key, :include_imgs => true)
9
+
10
+ module JekyllImport
11
+ module Importers
12
+ class Posterous < Importer
13
+
14
+ def self.specify_options(c)
15
+ c.option 'email', '--email EMAIL', 'Posterous email address'
16
+ c.option 'password', '--password PW', 'Posterous password'
17
+ c.option 'api_token', '--token TOKEN', 'Posterous API Token'
18
+ end
19
+
20
+ def self.require_deps
21
+ JekyllImport.require_with_fallback(%w[
22
+ rubygems
23
+ jekyll
24
+ fileutils
25
+ uri
26
+ json
27
+ net/http
28
+ ])
29
+ end
30
+
31
+ def self.fetch(uri_str, limit = 10)
32
+ # You should choose better exception.
33
+ raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
34
+
35
+ response = nil
36
+ Net::HTTP.start('posterous.com') do |http|
37
+ req = Net::HTTP::Get.new(uri_str)
38
+ req.basic_auth @email, @pass
39
+ response = http.request(req)
40
+ end
41
+
42
+ case response
43
+ when Net::HTTPSuccess then response
44
+ when Net::HTTPRedirection then fetch(response['location'], limit - 1)
45
+ else response.error!
46
+ end
47
+ end
48
+
49
+ def self.fetch_images(directory, imgs)
50
+ def self.fetch_one(url, limit = 10)
51
+ raise ArgumentError, 'HTTP redirect too deep' if limit == 0
52
+ response = Net::HTTP.get_response(URI.parse(url))
53
+ case response
54
+ when Net::HTTPSuccess then response.body
55
+ when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
56
+ else
57
+ response.error!
58
+ end
59
+ end
60
+
61
+ FileUtils.mkdir_p directory
62
+ urls = Array.new
63
+ imgs.each do |img|
64
+ fullurl = img["full"]["url"]
65
+ uri = URI.parse(fullurl)
66
+ imgname = uri.path.split("/")[-1]
67
+ imgdata = self.fetch_one(fullurl)
68
+ open(directory + "/" + imgname, "wb") do |file|
69
+ file.write imgdata
70
+ end
71
+ urls.push(directory + "/" + imgname)
72
+ end
73
+
74
+ return urls
75
+ end
76
+
77
+ def self.process(options)
78
+ email = options.fetch('email')
79
+ pass = options.fetch('password')
80
+ api_token = options.fetch('api_token')
81
+
82
+ @email, @pass, @api_token = email, pass, api_token
83
+ defaults = { :include_imgs => false, :blog => 'primary', :base_path => '/' }
84
+ opts = defaults.merge(opts)
85
+ FileUtils.mkdir_p "_posts"
86
+
87
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}").body)
88
+ page = 1
89
+
90
+ while posts.any?
91
+ posts.each do |post|
92
+ title = post["title"]
93
+ slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
94
+ date = Date.parse(post["display_date"])
95
+ content = post["body_html"]
96
+ published = !post["is_private"]
97
+ basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
98
+ name = basename + '.html'
99
+
100
+ # Images:
101
+ if opts[:include_imgs]
102
+ post_imgs = post["media"]["images"]
103
+ if post_imgs.any?
104
+ img_dir = "imgs/%s" % basename
105
+ img_urls = self.fetch_images(img_dir, post_imgs)
106
+
107
+ img_urls.map! do |url|
108
+ '<li><img src="' + opts[:base_path] + url + '"></li>'
109
+ end
110
+ imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
111
+
112
+ # filter out "posterous-content", replacing with imgs:
113
+ content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
114
+ end
115
+ end
116
+
117
+ # Get the relevant fields as a hash, delete empty fields and convert
118
+ # to YAML for the header
119
+ data = {
120
+ 'layout' => 'post',
121
+ 'title' => title.to_s,
122
+ 'published' => published
123
+ }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
124
+
125
+ # Write out the data and content to file
126
+ File.open("_posts/#{name}", "w") do |f|
127
+ f.puts data
128
+ f.puts "---"
129
+ f.puts content
130
+ end
131
+ end
132
+
133
+ page += 1
134
+ posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}&page=#{page}").body)
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end