RubyGems - jekyll-import - Versions diffs - 0.1.0.beta3 → 0.1.0.beta4 - Mend

jekyll-import 0.1.0.beta3 → 0.1.0.beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

checksums.yaml +6 -14
data/History.markdown +18 -0
data/README.markdown +12 -1
data/jekyll-import.gemspec +31 -25
data/lib/jekyll-import.rb +50 -1
data/lib/jekyll-import/importer.rb +11 -0
data/lib/jekyll-import/importers.rb +10 -0
data/lib/jekyll-import/importers/csv.rb +50 -0
data/lib/jekyll-import/importers/drupal6.rb +139 -0
data/lib/jekyll-import/importers/drupal7.rb +102 -0
data/lib/jekyll-import/importers/enki.rb +76 -0
data/lib/jekyll-import/importers/google_reader.rb +68 -0
data/lib/jekyll-import/importers/joomla.rb +83 -0
data/lib/jekyll-import/importers/jrnl.rb +127 -0
data/lib/jekyll-import/importers/marley.rb +72 -0
data/lib/jekyll-import/importers/mephisto.rb +109 -0
data/lib/jekyll-import/importers/mt.rb +169 -0
data/lib/jekyll-import/importers/posterous.rb +139 -0
data/lib/jekyll-import/importers/rss.rb +71 -0
data/lib/jekyll-import/importers/s9y.rb +67 -0
data/lib/jekyll-import/importers/textpattern.rb +76 -0
data/lib/jekyll-import/importers/tumblr.rb +265 -0
data/lib/jekyll-import/importers/typo.rb +89 -0
data/lib/jekyll-import/importers/wordpress.rb +323 -0
data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
data/lib/jekyll/commands/import.rb +1 -0
data/test/helper.rb +3 -1
data/test/test_jrnl_importer.rb +39 -0
data/test/test_mt_importer.rb +16 -16
data/test/test_tumblr_importer.rb +61 -0
data/test/test_wordpress_importer.rb +1 -1
data/test/test_wordpressdotcom_importer.rb +1 -1
metadata +53 -32
data/lib/jekyll/jekyll-import/csv.rb +0 -30
data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
data/lib/jekyll/jekyll-import/enki.rb +0 -49
data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
data/lib/jekyll/jekyll-import/joomla.rb +0 -53
data/lib/jekyll/jekyll-import/marley.rb +0 -52
data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
data/lib/jekyll/jekyll-import/mt.rb +0 -142
data/lib/jekyll/jekyll-import/posterous.rb +0 -122
data/lib/jekyll/jekyll-import/rss.rb +0 -63
data/lib/jekyll/jekyll-import/s9y.rb +0 -59
data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
data/lib/jekyll/jekyll-import/typo.rb +0 -69
data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84

data/lib/jekyll-import/importers/rss.rb ADDED Viewed

@@ -0,0 +1,71 @@
+# Created by Kendall Buchanan (https://github.com/kendagriff) on 2011-12-22.
+# Use at your own risk. The end.
+#
+# Usage:
+#   (URL)
+#   ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => 'http://yourdomain.com/your-favorite-feed.xml')"
+#
+#   (Local file)
+#   ruby -r 'jekyll/jekyll-import/rss' -e "JekyllImport::RSS.process(:source => './somefile/on/your/computer.xml')"
+module JekyllImport
+  module Importers
+    class RSS < Importer
+      def self.specify_options(c)
+        c.option 'file', '--file NAME', 'The RSS file to import'
+      end
+      def self.validate(options)
+        if options['source'].nil?
+          abort "Missing mandatory option --source."
+        end
+      end
+      def self.require_deps
+        JekyllImport.require_with_fallback(%w[
+          rss/1.0
+          rss/2.0
+          open-uri
+          fileutils
+          safe_yaml
+        ])
+      end
+      # Process the import.
+      #
+      # source - a URL or a local file String.
+      #
+      # Returns nothing.
+      def self.process(options)
+        source = options.fetch('file')
+        content = ""
+        open(source) { |s| content = s.read }
+        rss = ::RSS::Parser.parse(content, false)
+        raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
+        rss.items.each do |item|
+          formatted_date = item.date.strftime('%Y-%m-%d')
+          post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
+            i.downcase if i != ''
+          end.compact.join('-')
+          name = "#{formatted_date}-#{post_name}"
+          header = {
+            'layout' => 'post',
+            'title' => item.title
+          }
+          FileUtils.mkdir_p("_posts")
+          File.open("_posts/#{name}.html", "w") do |f|
+            f.puts header.to_yaml
+            f.puts "---\n\n"
+            f.puts item.description
+          end
+        end
+      end
+    end
+  end
+end

data/lib/jekyll-import/importers/s9y.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# Migrator to import entries from an Serendipity (S9Y) blog
+#
+# Entries can be exported from http://blog.example.com/rss.php?version=2.0&all=1
+#
+# Usage:
+# ruby -r './s9y_rss.rb' -e 'Jekyll::S9Y.process("http://blog.example.com/rss.php?version=2.0&all=1")'
+module JekyllImport
+  module Importers
+    class S9Y < Importer
+      def self.specify_options(c)
+        c.option 'source', '--source SOURCE', 'The URL of the S9Y RSS feed'
+      end
+      def self.validate(options)
+        if options['source'].nil?
+          abort "Missing mandatory option --source, e.g. --source \"http://blog.example.com/rss.php?version=2.0&all=1\""
+        end
+      end
+      def self.require_deps
+        JekyllImport.require_with_fallback(%w[
+          open-uri
+          rss
+          fileutils
+          safe_yaml
+        ])
+      end
+      def self.process(options)
+        source = options.fetch(:source)
+        FileUtils.mkdir_p("_posts")
+        text = ''
+        open(source) { |line| text = line.read }
+        rss = RSS::Parser.parse(text)
+        rss.items.each do |item|
+          post_url = item.link.match('.*(/archives/.*)')[1]
+          categories = item.categories.collect { |c| c.content }
+          content = item.content_encoded.strip
+          date = item.date
+          slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
+          name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
+                                                 slug]
+          data = {
+            'layout' => 'post',
+            'title' => item.title,
+            'categories' => categories,
+            'permalink' => post_url,
+            's9y_link' => item.link,
+            'date' => item.date,
+          }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
+          # Write out the data and content to file
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+      end
+    end
+  end
+end

data/lib/jekyll-import/importers/textpattern.rb ADDED Viewed

@@ -0,0 +1,76 @@
+# NOTE: This converter requires Sequel and the MySQL gems.
+# The MySQL gem can be difficult to install on OS X. Once you have MySQL
+# installed, running the following commands should work:
+# $ sudo gem install sequel
+# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
+module JekyllImport
+  module Importers
+    class TextPattern < Importer
+      # Reads a MySQL database via Sequel and creates a post file for each post.
+      # The only posts selected are those with a status of 4 or 5, which means
+      # "live" and "sticky" respectively.
+      # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
+      QUERY = "SELECT Title, \
+                      url_title, \
+                      Posted, \
+                      Body, \
+                      Keywords \
+               FROM textpattern \
+               WHERE Status = '4' OR \
+                     Status = '5'"
+      def self.require_deps
+        JekyllImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fileutils
+          safe_yaml
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname DB', 'Database name'
+        c.option 'user', '--user USER', 'Database user name'
+        c.option 'password', '--password PW', "Database user's password"
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+      end
+      def self.process(options)
+        dbname = options.fetch('dbname')
+        user   = options.fetch('user')
+        pass   = options.fetch('password')
+        host   = options.fetch('host', "localhost")
+        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+        FileUtils.mkdir_p "_posts"
+        db[QUERY].each do |post|
+          # Get required fields and construct Jekyll compatible name.
+          title = post[:Title]
+          slug = post[:url_title]
+          date = post[:Posted]
+          content = post[:Body]
+          name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
+          # Get the relevant fields as a hash, delete empty fields and convert
+          # to YAML for the header.
+          data = {
+             'layout' => 'post',
+             'title' => title.to_s,
+             'tags' => post[:Keywords].split(',')
+           }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+          # Write out the data and content to file.
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+      end
+    end
+  end
+end

data/lib/jekyll-import/importers/tumblr.rb ADDED Viewed

@@ -0,0 +1,265 @@
+module JekyllImport
+  module Importers
+    class Tumblr < Importer
+      def self.require_deps
+        JekyllImport.require_with_fallback(%w[
+          rubygems
+          fileutils
+          open-uri
+          nokogiri
+          json
+          uri
+          time
+          jekyll
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'url', '--url URL', 'Tumblr URL'
+        c.option 'format', '--format FORMAT', 'Output format (default: "html")'
+        c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
+        c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
+        c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
+      end
+      def self.process(options)
+        url            = options.fetch('url')
+        format         = options.fetch('format', "html")
+        grab_images    = options.fetch('grab_images', false)
+        add_highlights = options.fetch('add_highlights', false)
+        rewrite_urls   = options.fetch('rewrite_urls', false)
+        @grab_images = grab_images
+        FileUtils.mkdir_p "_posts/tumblr"
+        url += "/api/read/json/"
+        per_page = 50
+        posts = []
+        # Two passes are required so that we can rewrite URLs.
+        # First pass builds up an array of each post as a hash.
+        begin
+          current_page = (current_page || -1) + 1
+          feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
+          puts "Fetching #{feed_url}"
+          feed = open(feed_url)
+          json = feed.readlines.join("\n")[21...-2]  # Strip Tumblr's JSONP chars.
+          blog = JSON.parse(json)
+          puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
+          batch = blog["posts"].map { |post| post_to_hash(post, format) }
+          # If we're rewriting, save the posts for later.  Otherwise, go ahead and
+          # dump these to disk now
+          if rewrite_urls
+            posts += batch
+          else
+            batch.each {|post| write_post(post, format == "md", add_highlights)}
+          end
+        end until blog["posts"].size < per_page
+        # Rewrite URLs, create redirects and write out out posts if necessary
+        if rewrite_urls
+          posts = rewrite_urls_and_redirects posts
+          posts.each {|post| write_post(post, format == "md", add_highlights)}
+        end
+      end
+      private
+      # Writes a post out to disk
+      def self.write_post(post, use_markdown, add_highlights)
+        content = post[:content]
+        if use_markdown
+          content = html_to_markdown content
+          content = add_syntax_highlights content if add_highlights
+        end
+        File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
+          f.puts post[:header].to_yaml + "---\n" + content
+        end
+      end
+      # Converts each type of Tumblr post to a hash with all required
+      # data for Jekyll.
+      def self.post_to_hash(post, format)
+        case post['type']
+          when "regular"
+            title = post["regular-title"]
+            content = post["regular-body"]
+          when "link"
+            title = post["link-text"] || post["link-url"]
+            content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
+            unless post["link-description"].nil?
+              content << "<br/>" + post["link-description"]
+            end
+          when "photo"
+            title = post["photo-caption"]
+            content = if post["photo-link-url"].nil?
+              "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
+            else
+              fetch_photo post
+            end
+          when "audio"
+            if !post["id3-title"].nil?
+              title = post["id3-title"]
+              content = post["audio-player"] + "<br/>" + post["audio-caption"]
+            else
+              title = post["audio-caption"]
+              content = post["audio-player"]
+            end
+          when "quote"
+            title = post["quote-text"]
+            content = "<blockquote>#{post["quote-text"]}</blockquote>"
+            unless post["quote-source"].nil?
+              content << "&#8212;" + post["quote-source"]
+            end
+          when "conversation"
+            title = post["conversation-title"]
+            content = "<section><dialog>"
+            post["conversation"].each do |line|
+              content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
+            end
+            content << "</section></dialog>"
+          when "video"
+            title = post["video-title"]
+            content = post["video-player"]
+            unless post["video-caption"].nil?
+              content << "<br/>" + post["video-caption"]
+            end
+        end
+        date = Date.parse(post['date']).to_s
+        title = Nokogiri::HTML(title).text
+        slug = if post["slug"] && post["slug"].strip != ""
+          post["slug"]
+        else
+          slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
+          slug.length > 200 ? slug.slice(0..200) : slug
+        end
+        {
+          :name => "#{date}-#{slug}.#{format}",
+          :header => {
+            "layout" => "post",
+            "title" => title,
+            "date" => Time.parse(post['date']).xmlschema,
+            "tags" => post["tags"],
+            "tumblr_url" => post["url-with-slug"]
+          },
+          :content => content,
+          :url => post["url"],
+          :slug => post["url-with-slug"],
+        }
+      end
+      # Attempts to fetch the largest version of a photo available for a post.
+      # If that file fails, it tries the next smaller size until all available
+      # photo URLs are exhausted.  If they all fail, the import is aborted.
+      def self.fetch_photo(post)
+        sizes = post.keys.map {|k| k.gsub("photo-url-", "").to_i}
+        sizes.sort! {|a,b| b <=> a}
+        ext_key, ext_val = post.find do |k,v|
+          k =~ /^photo-url-/ && v.split("/").last =~ /\./
+        end
+        ext = "." + ext_val.split(".").last
+        sizes.each do |size|
+          url = post["photo-url"] || post["photo-url-#{size}"]
+          next if url.nil?
+          begin
+            return "<img src=\"#{save_photo(url, ext)}\"/>"
+          rescue OpenURI::HTTPError => err
+            puts "Failed to grab photo"
+          end
+        end
+        abort "Failed to fetch photo for post #{post['url']}"
+      end
+      # Create a Hash of old urls => new urls, for rewriting and
+      # redirects, and replace urls in each post. Instantiate Jekyll
+      # site/posts to get the correct permalink format.
+      def self.rewrite_urls_and_redirects(posts)
+        site = Jekyll::Site.new(Jekyll.configuration({}))
+        urls = Hash[posts.map { |post|
+          # Create an initial empty file for the post so that
+          # we can instantiate a post object.
+          File.open("_posts/tumblr/#{post[:name]}", "w")
+          tumblr_url = URI.parse(post[:slug]).path
+          jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
+          redirect_dir = tumblr_url.sub(/\//, "") + "/"
+          FileUtils.mkdir_p redirect_dir
+          File.open(redirect_dir + "index.html", "w") do |f|
+            f.puts "<html><head><link rel=\"canonical\" href=\"" +
+                   "#{jekyll_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
+                   "url=#{jekyll_url}\"></head><body></body></html>"
+          end
+          [tumblr_url, jekyll_url]
+        }]
+        posts.map { |post|
+          urls.each do |tumblr_url, jekyll_url|
+            post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
+          end
+          post
+        }
+      end
+      # Convert preserving HTML tables as per the markdown docs.
+      def self.html_to_markdown(content)
+        preserve = ["table", "tr", "th", "td"]
+        preserve.each do |tag|
+          content.gsub!(/<#{tag}/i, "$$" + tag)
+          content.gsub!(/<\/#{tag}/i, "||" + tag)
+        end
+        content = Nokogiri::HTML(content.gsub("'", "''")).text
+        preserve.each do |tag|
+          content.gsub!("$$" + tag, "<" + tag)
+          content.gsub!("||" + tag, "</" + tag)
+        end
+        content
+      end
+      # Adds pygments highlight tags to code blocks in posts that use
+      # markdown format. This doesn't guess the language of the code
+      # block, so you should modify this to suit your own content.
+      # For example, my code block only contain Python and JavaScript,
+      # so I can assume the block is JavaScript if it contains a
+      # semi-colon.
+      def self.add_syntax_highlights(content)
+        lines = content.split("\n")
+        block, indent, lang, start = false, /^    /, nil, nil
+        lines.each_with_index do |line, i|
+          if !block && line =~ indent
+            block = true
+            lang = "python"
+            start = i
+          elsif block
+            lang = "javascript" if line =~ /;$/
+            block = line =~ indent && i < lines.size - 1 # Also handle EOF
+            if !block
+              lines[start] = "{% highlight #{lang} %}"
+              lines[i - 1] = "{% endhighlight %}"
+            end
+            FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
+            lines[i] = lines[i].sub(indent, "")
+          end
+        end
+        lines.join("\n")
+      end
+      def self.save_photo(url, ext)
+        if @grab_images
+          path = "tumblr_files/#{url.split('/').last}"
+          path += ext unless path =~ /#{ext}$/
+          FileUtils.mkdir_p "tumblr_files"
+          # Don't fetch if we've already cached this file
+          unless File.size? path
+            puts "Fetching photo #{url}"
+            File.open(path, "w") { |f| f.write(open(url).read) }
+          end
+          url = "/" + path
+        end
+        url
+      end
+    end
+  end
+end