RubyGems - jekyll-import - Versions diffs - 0.1.0.beta1 - Mend

jekyll-import 0.1.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

data/Gemfile +2 -0
data/History.txt +7 -0
data/LICENSE +21 -0
data/README.md +4 -0
data/Rakefile +151 -0
data/jekyll-import.gemspec +80 -0
data/lib/jekyll-import.rb +7 -0
data/lib/jekyll/commands/import.rb +51 -0
data/lib/jekyll/jekyll-import/csv.rb +26 -0
data/lib/jekyll/jekyll-import/drupal6.rb +102 -0
data/lib/jekyll/jekyll-import/drupal7.rb +73 -0
data/lib/jekyll/jekyll-import/enki.rb +49 -0
data/lib/jekyll/jekyll-import/joomla.rb +53 -0
data/lib/jekyll/jekyll-import/marley.rb +52 -0
data/lib/jekyll/jekyll-import/mephisto.rb +84 -0
data/lib/jekyll/jekyll-import/mt.rb +142 -0
data/lib/jekyll/jekyll-import/posterous.rb +111 -0
data/lib/jekyll/jekyll-import/rss.rb +63 -0
data/lib/jekyll/jekyll-import/s9y.rb +49 -0
data/lib/jekyll/jekyll-import/textpattern.rb +58 -0
data/lib/jekyll/jekyll-import/tumblr.rb +195 -0
data/lib/jekyll/jekyll-import/typo.rb +67 -0
data/lib/jekyll/jekyll-import/wordpress.rb +296 -0
data/lib/jekyll/jekyll-import/wordpressdotcom.rb +82 -0
data/test/helper.rb +43 -0
data/test/test_mt_importer.rb +104 -0
data/test/test_wordpress_importer.rb +9 -0
data/test/test_wordpressdotcom_importer.rb +8 -0
metadata +334 -0

data/lib/jekyll/jekyll-import/posterous.rb ADDED Viewed

@@ -0,0 +1,111 @@
+require 'rubygems'
+require 'jekyll'
+require 'fileutils'
+require 'net/http'
+require 'uri'
+require "json"
+# ruby -r './lib/jekyll/migrators/posterous.rb' -e 'Jekyll::Posterous.process(email, pass, api_key, blog)'
+module JekyllImport
+  module Posterous
+    def self.fetch(uri_str, limit = 10)
+      # You should choose better exception.
+      raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
+      response = nil
+      Net::HTTP.start('posterous.com') do |http|
+        req = Net::HTTP::Get.new(uri_str)
+        req.basic_auth @email, @pass
+        response = http.request(req)
+      end
+      case response
+        when Net::HTTPSuccess     then response
+        when Net::HTTPRedirection then fetch(response['location'], limit - 1)
+        else response.error!
+      end
+    end
+    def self.fetch_images(directory, imgs)
+      def self.fetch_one(url, limit = 10)
+        raise ArgumentError, 'HTTP redirect too deep' if limit == 0
+        response = Net::HTTP.get_response(URI.parse(url))
+        case response
+        when Net::HTTPSuccess     then response.body
+        when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
+        else
+          response.error!
+        end
+      end
+      FileUtils.mkdir_p directory
+      urls = Array.new
+      imgs.each do |img|
+        fullurl = img["full"]["url"]
+        uri = URI.parse(fullurl)
+        imgname = uri.path.split("/")[-1]
+        imgdata = self.fetch_one(fullurl)
+        open(directory + "/" + imgname, "wb") do |file|
+          file.write imgdata
+        end
+        urls.push(directory + "/" + imgname)
+      end
+      return urls
+    end
+    def self.process(email, pass, api_token, blog = 'primary', base_path = '/')
+      @email, @pass, @api_token = email, pass, api_token
+      FileUtils.mkdir_p "_posts"
+      posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}").body)
+      page = 1
+      while posts.any?
+        posts.each do |post|
+          title = post["title"]
+          slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
+          date = Date.parse(post["display_date"])
+          content = post["body_html"]
+          published = !post["is_private"]
+          basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
+          name = basename + '.html'
+          # Images:
+          post_imgs = post["media"]["images"]
+          if post_imgs.any?
+            img_dir = "imgs/%s" % basename
+            img_urls = self.fetch_images(img_dir, post_imgs)
+            img_urls.map! do |url|
+              '<li><img src="' + base_path + url + '"></li>'
+            end
+            imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
+            # filter out "posterous-content", replacing with imgs:
+            content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
+          end
+          # Get the relevant fields as a hash, delete empty fields and convert
+          # to YAML for the header
+          data = {
+             'layout' => 'post',
+             'title' => title.to_s,
+             'published' => published
+           }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+          # Write out the data and content to file
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+        page += 1
+        posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{blog}/posts?api_token=#{@api_token}&page=#{page}").body)
+      end
+    end
+  end
+end

data/lib/jekyll/jekyll-import/rss.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
+# Use at your own risk. The end.
+#
+# Usage:
+#   (URL)
+#   ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('http://yourdomain.com/your-favorite-feed.xml')"
+#
+#   (Local file)
+#   ruby -r '_import/rss.rb' -e "Jekyll::MigrateRSS.process('./somefile/on/your/computer.xml')"
+require 'rss/1.0'
+require 'rss/2.0'
+require 'open-uri'
+require 'fileutils'
+require 'safe_yaml'
+module JekyllImport
+  module RSS
+    def self.validate(options)
+      if !options[:source]
+        abort "Missing mandatory option --source."
+      end
+    end
+    # Process the import.
+    #
+    # source - a URL or a local file String.
+    #
+    # Returns nothing.
+    def self.process(options)
+      validate(options)
+      source = options[:source]
+      content = ""
+      open(source) { |s| content = s.read }
+      rss = ::RSS::Parser.parse(content, false)
+      raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
+      rss.items.each do |item|
+        formatted_date = item.date.strftime('%Y-%m-%d')
+        post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
+          i.downcase if i != ''
+        end.compact.join('-')
+        name = "#{formatted_date}-#{post_name}"
+        header = {
+          'layout' => 'post',
+          'title' => item.title
+        }
+        FileUtils.mkdir_p("_posts")
+        File.open("_posts/#{name}.html", "w") do |f|
+          f.puts header.to_yaml
+          f.puts "---\n\n"
+          f.puts item.description
+        end
+      end
+    end
+  end
+end

data/lib/jekyll/jekyll-import/s9y.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# Migrator to import entries from an Serendipity (S9Y) blog
+#
+# Entries can be exported from http://blog.example.com/rss.php?version=2.0&all=1
+#
+# Usage:
+# ruby -r './s9y_rss.rb' -e 'Jekyll::S9Y.process("http://blog.example.com/rss.php?version=2.0&all=1")'
+require 'open-uri'
+require 'rss'
+require 'fileutils'
+require 'yaml'
+module JekyllImport
+  module S9Y
+    def self.process(file_name)
+      FileUtils.mkdir_p("_posts")
+      text = ''
+      open(file_name, 'r') { |line| text = line.read }
+      rss = RSS::Parser.parse(text)
+      rss.items.each do |item|
+        post_url = item.link.match('.*(/archives/.*)')[1]
+        categories = item.categories.collect { |c| c.content }
+        content = item.content_encoded.strip
+        date = item.date
+        slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
+        name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
+                                               slug]
+        data = {
+          'layout' => 'post',
+          'title' => item.title,
+          'categories' => categories,
+          'permalink' => post_url,
+          's9y_link' => item.link,
+          'date' => item.date,
+        }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
+        # Write out the data and content to file
+        File.open("_posts/#{name}", "w") do |f|
+          f.puts data
+          f.puts "---"
+          f.puts content
+        end
+      end
+    end
+  end
+end

data/lib/jekyll/jekyll-import/textpattern.rb ADDED Viewed

@@ -0,0 +1,58 @@
+require 'rubygems'
+require 'sequel'
+require 'fileutils'
+require 'safe_yaml'
+# NOTE: This converter requires Sequel and the MySQL gems.
+# The MySQL gem can be difficult to install on OS X. Once you have MySQL
+# installed, running the following commands should work:
+# $ sudo gem install sequel
+# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
+module JekyllImport
+  module TextPattern
+    # Reads a MySQL database via Sequel and creates a post file for each post.
+    # The only posts selected are those with a status of 4 or 5, which means
+    # "live" and "sticky" respectively.
+    # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
+    QUERY = "SELECT Title, \
+                    url_title, \
+                    Posted, \
+                    Body, \
+                    Keywords \
+             FROM textpattern \
+             WHERE Status = '4' OR \
+                   Status = '5'"
+    def self.process(dbname, user, pass, host = 'localhost')
+      db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+      FileUtils.mkdir_p "_posts"
+      db[QUERY].each do |post|
+        # Get required fields and construct Jekyll compatible name.
+        title = post[:Title]
+        slug = post[:url_title]
+        date = post[:Posted]
+        content = post[:Body]
+        name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
+        # Get the relevant fields as a hash, delete empty fields and convert
+        # to YAML for the header.
+        data = {
+           'layout' => 'post',
+           'title' => title.to_s,
+           'tags' => post[:Keywords].split(',')
+         }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+        # Write out the data and content to file.
+        File.open("_posts/#{name}", "w") do |f|
+          f.puts data
+          f.puts "---"
+          f.puts content
+        end
+      end
+    end
+  end
+end

data/lib/jekyll/jekyll-import/tumblr.rb ADDED Viewed

@@ -0,0 +1,195 @@
+require 'rubygems'
+require 'open-uri'
+require 'fileutils'
+require 'nokogiri'
+require 'date'
+require 'json'
+require 'uri'
+require 'jekyll'
+module JekyllImport
+  module Tumblr
+    def self.process(url, format = "html", grab_images = false,
+                     add_highlights = false, rewrite_urls = true)
+      @grab_images = grab_images
+      FileUtils.mkdir_p "_posts/tumblr"
+      url += "/api/read/json/"
+      per_page = 50
+      posts = []
+      # Two passes are required so that we can rewrite URLs.
+      # First pass builds up an array of each post as a hash.
+      begin
+        current_page = (current_page || -1) + 1
+        feed = open(url + "?num=#{per_page}&start=#{current_page * per_page}")
+        json = feed.readlines.join("\n")[21...-2]  # Strip Tumblr's JSONP chars.
+        blog = JSON.parse(json)
+        puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
+        posts += blog["posts"].map { |post| post_to_hash(post, format) }
+      end until blog["posts"].size < per_page
+      # Rewrite URLs and create redirects.
+      posts = rewrite_urls_and_redirects posts if rewrite_urls
+      # Second pass for writing post files.
+      posts.each do |post|
+        if format == "md"
+          post[:content] = html_to_markdown post[:content]
+          post[:content] = add_syntax_highlights post[:content] if add_highlights
+        end
+        File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
+          f.puts post[:header].to_yaml + "---\n" + post[:content]
+        end
+      end
+    end
+    private
+    # Converts each type of Tumblr post to a hash with all required
+    # data for Jekyll.
+    def self.post_to_hash(post, format)
+      case post['type']
+        when "regular"
+          title = post["regular-title"]
+          content = post["regular-body"]
+        when "link"
+          title = post["link-text"] || post["link-url"]
+          content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
+          unless post["link-description"].nil?
+            content << "<br/>" + post["link-description"]
+          end
+        when "photo"
+          title = post["photo-caption"]
+          max_size = post.keys.map{ |k| k.gsub("photo-url-", "").to_i }.max
+          url = post["photo-url"] || post["photo-url-#{max_size}"]
+          ext = "." + post[post.keys.select { |k|
+            k =~ /^photo-url-/ && post[k].split("/").last =~ /\./
+          }.first].split(".").last
+          content = "<img src=\"#{save_file(url, ext)}\"/>"
+          unless post["photo-link-url"].nil?
+            content = "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
+          end
+        when "audio"
+          if !post["id3-title"].nil?
+            title = post["id3-title"]
+            content = post.at["audio-player"] + "<br/>" + post["audio-caption"]
+          else
+            title = post["audio-caption"]
+            content = post.at["audio-player"]
+          end
+        when "quote"
+          title = post["quote-text"]
+          content = "<blockquote>#{post["quote-text"]}</blockquote>"
+          unless post["quote-source"].nil?
+            content << "&#8212;" + post["quote-source"]
+          end
+        when "conversation"
+          title = post["conversation-title"]
+          content = "<section><dialog>"
+          post["conversation"].each do |line|
+            content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
+          end
+          content << "</section></dialog>"
+        when "video"
+          title = post["video-title"]
+          content = post["video-player"]
+          unless post["video-caption"].nil?
+            content << "<br/>" + post["video-caption"]
+          end
+      end
+      date = Date.parse(post['date']).to_s
+      title = Nokogiri::HTML(title).text
+      slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
+      slug = slug.slice(0..200) if slug.length > 200
+      {
+        :name => "#{date}-#{slug}.#{format}",
+        :header => {
+          "layout" => "post",
+          "title" => title,
+          "tags" => post["tags"],
+        },
+        :content => content,
+        :url => post["url"],
+        :slug => post["url-with-slug"],
+      }
+    end
+    # Create a Hash of old urls => new urls, for rewriting and
+    # redirects, and replace urls in each post. Instantiate Jekyll
+    # site/posts to get the correct permalink format.
+    def self.rewrite_urls_and_redirects(posts)
+      site = Jekyll::Site.new(Jekyll.configuration({}))
+      urls = Hash[posts.map { |post|
+        # Create an initial empty file for the post so that
+        # we can instantiate a post object.
+        File.open("_posts/tumblr/#{post[:name]}", "w")
+        tumblr_url = URI.parse(post[:slug]).path
+        jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
+        redirect_dir = tumblr_url.sub(/\//, "") + "/"
+        FileUtils.mkdir_p redirect_dir
+        File.open(redirect_dir + "index.html", "w") do |f|
+          f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
+                 "url=#{jekyll_url}'></head><body></body></html>"
+        end
+        [tumblr_url, jekyll_url]
+      }]
+      posts.map { |post|
+        urls.each do |tumblr_url, jekyll_url|
+          post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
+        end
+        post
+      }
+    end
+    # Uses Python's html2text to convert a post's content to
+    # markdown. Preserve HTML tables as per the markdown docs.
+    def self.html_to_markdown(content)
+      preserve = ["table", "tr", "th", "td"]
+      preserve.each do |tag|
+        content.gsub!(/<#{tag}/i, "$$" + tag)
+        content.gsub!(/<\/#{tag}/i, "||" + tag)
+      end
+      content = %x[echo '#{content.gsub("'", "''")}' | html2text]
+      preserve.each do |tag|
+        content.gsub!("$$" + tag, "<" + tag)
+        content.gsub!("||" + tag, "</" + tag)
+      end
+      content
+    end
+    # Adds pygments highlight tags to code blocks in posts that use
+    # markdown format. This doesn't guess the language of the code
+    # block, so you should modify this to suit your own content.
+    # For example, my code block only contain Python and JavaScript,
+    # so I can assume the block is JavaScript if it contains a
+    # semi-colon.
+    def self.add_syntax_highlights(content)
+      lines = content.split("\n")
+      block, indent, lang, start = false, /^    /, nil, nil
+      lines.each_with_index do |line, i|
+        if !block && line =~ indent
+          block = true
+          lang = "python"
+          start = i
+        elsif block
+          lang = "javascript" if line =~ /;$/
+          block = line =~ indent && i < lines.size - 1 # Also handle EOF
+          if !block
+            lines[start] = "{% highlight #{lang} %}"
+            lines[i - 1] = "{% endhighlight %}"
+          end
+          lines[i] = lines[i].sub(indent, "")
+        end
+      end
+      lines.join("\n")
+    end
+    def self.save_file(url, ext)
+      if @grab_images
+        path = "tumblr_files/#{url.split('/').last}"
+        path += ext unless path =~ /#{ext}$/
+        FileUtils.mkdir_p "tumblr_files"
+        File.open(path, "w") { |f| f.write(open(url).read) }
+        url = "/" + path
+      end
+      url
+    end
+  end
+end