RubyGems - jekyll-import - Versions diffs - 0.1.0.beta1 - Mend

jekyll-import 0.1.0.beta1

Files changed (29) hide show

data/Gemfile +2 -0
data/History.txt +7 -0
data/LICENSE +21 -0
data/README.md +4 -0
data/Rakefile +151 -0
data/jekyll-import.gemspec +80 -0
data/lib/jekyll-import.rb +7 -0
data/lib/jekyll/commands/import.rb +51 -0
data/lib/jekyll/jekyll-import/csv.rb +26 -0
data/lib/jekyll/jekyll-import/drupal6.rb +102 -0
data/lib/jekyll/jekyll-import/drupal7.rb +73 -0
data/lib/jekyll/jekyll-import/enki.rb +49 -0
data/lib/jekyll/jekyll-import/joomla.rb +53 -0
data/lib/jekyll/jekyll-import/marley.rb +52 -0
data/lib/jekyll/jekyll-import/mephisto.rb +84 -0
data/lib/jekyll/jekyll-import/mt.rb +142 -0
data/lib/jekyll/jekyll-import/posterous.rb +111 -0
data/lib/jekyll/jekyll-import/rss.rb +63 -0
data/lib/jekyll/jekyll-import/s9y.rb +49 -0
data/lib/jekyll/jekyll-import/textpattern.rb +58 -0
data/lib/jekyll/jekyll-import/tumblr.rb +195 -0
data/lib/jekyll/jekyll-import/typo.rb +67 -0
data/lib/jekyll/jekyll-import/wordpress.rb +296 -0
data/lib/jekyll/jekyll-import/wordpressdotcom.rb +82 -0
data/test/helper.rb +43 -0
data/test/test_mt_importer.rb +104 -0
data/test/test_wordpress_importer.rb +9 -0
data/test/test_wordpressdotcom_importer.rb +8 -0
metadata +334 -0

data/lib/jekyll/jekyll-import/typo.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# Author: Toby DiPasquale <toby@cbcg.net>
+require 'fileutils'
+require 'rubygems'
+require 'sequel'
+require 'safe_yaml'
+module JekyllImport
+  module Typo
+    # This SQL *should* work for both MySQL and PostgreSQL.
+    SQL = <<-EOS
+    SELECT c.id id,
+           c.title title,
+           c.permalink slug,
+           c.body body,
+           c.extended extended,
+           c.published_at date,
+           c.state state,
+           COALESCE(tf.name, 'html') filter
+      FROM contents c
+           LEFT OUTER JOIN text_filters tf
+                        ON c.text_filter_id = tf.id
+    EOS
+    def self.process server, dbname, user, pass, host='localhost'
+      FileUtils.mkdir_p '_posts'
+      case server.intern
+      when :postgres
+        db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+      when :mysql
+        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+      else
+        raise "Unknown database server '#{server}'"
+      end
+      db[SQL].each do |post|
+        next unless post[:state] =~ /published/
+        if post[:slug] == nil
+          post[:slug] = "no slug"
+        end
+        if post[:extended]
+          post[:body] << "\n<!-- more -->\n"
+          post[:body] << post[:extended]
+        end
+        name = [ sprintf("%.04d", post[:date].year),
+                 sprintf("%.02d", post[:date].month),
+                 sprintf("%.02d", post[:date].day),
+                 post[:slug].strip ].join('-')
+        # Can have more than one text filter in this field, but we just want
+        # the first one for this.
+        name += '.' + post[:filter].split(' ')[0]
+        File.open("_posts/#{name}", 'w') do |f|
+          f.puts({ 'layout'   => 'post',
+                   'title'    => post[:title].to_s,
+                   'typo_id'  => post[:id]
+                 }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
+          f.puts '---'
+          f.puts post[:body].delete("\r")
+        end
+      end
+    end
+  end
+end

data/lib/jekyll/jekyll-import/wordpress.rb ADDED Viewed

@@ -0,0 +1,296 @@
+require 'rubygems'
+require 'sequel'
+require 'fileutils'
+require 'psych'
+require 'safe_yaml'
+# NOTE: This converter requires Sequel and the MySQL gems.
+# The MySQL gem can be difficult to install on OS X. Once you have MySQL
+# installed, running the following commands should work:
+# $ sudo gem install sequel
+# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
+module JekyllImport
+  module WordPress
+    # Main migrator function. Call this to perform the migration.
+    #
+    # dbname::  The name of the database
+    # user::    The database user name
+    # pass::    The database user's password
+    # host::    The address of the MySQL database host. Default: 'localhost'
+    # options:: A hash table of configuration options.
+    #
+    # Supported options are:
+    #
+    # :table_prefix::   Prefix of database tables used by WordPress.
+    #                   Default: 'wp_'
+    # :clean_entities:: If true, convert non-ASCII characters to HTML
+    #                   entities in the posts, comments, titles, and
+    #                   names. Requires the 'htmlentities' gem to
+    #                   work. Default: true.
+    # :comments::       If true, migrate post comments too. Comments
+    #                   are saved in the post's YAML front matter.
+    #                   Default: true.
+    # :categories::     If true, save the post's categories in its
+    #                   YAML front matter.
+    # :tags::           If true, save the post's tags in its
+    #                   YAML front matter.
+    # :more_excerpt::   If true, when a post has no excerpt but
+    #                   does have a <!-- more --> tag, use the
+    #                   preceding post content as the excerpt.
+    #                   Default: true.
+    # :more_anchor::    If true, convert a <!-- more --> tag into
+    #                   two HTML anchors with ids "more" and
+    #                   "more-NNN" (where NNN is the post number).
+    #                   Default: true.
+    # :status::         Array of allowed post statuses. Only
+    #                   posts with matching status will be migrated.
+    #                   Known statuses are :publish, :draft, :private,
+    #                   and :revision. If this is nil or an empty
+    #                   array, all posts are migrated regardless of
+    #                   status. Default: [:publish].
+    #
+    def self.process(dbname, user, pass, host='localhost', options={})
+      options = {
+        :table_prefix   => 'wp_',
+        :clean_entities => true,
+        :comments       => true,
+        :categories     => true,
+        :tags           => true,
+        :more_excerpt   => true,
+        :more_anchor    => true,
+        :status         => [:publish] # :draft, :private, :revision
+      }.merge(options)
+      if options[:clean_entities]
+        begin
+          require 'htmlentities'
+        rescue LoadError
+          STDERR.puts "Could not require 'htmlentities', so the " +
+                      ":clean_entities option is now disabled."
+          options[:clean_entities] = false
+        end
+      end
+      FileUtils.mkdir_p("_posts")
+      db = Sequel.mysql(dbname, :user => user, :password => pass,
+                        :host => host, :encoding => 'utf8')
+      px = options[:table_prefix]
+      posts_query = "
+         SELECT
+           posts.ID            AS `id`,
+           posts.guid          AS `guid`,
+           posts.post_type     AS `type`,
+           posts.post_status   AS `status`,
+           posts.post_title    AS `title`,
+           posts.post_name     AS `slug`,
+           posts.post_date     AS `date`,
+           posts.post_content  AS `content`,
+           posts.post_excerpt  AS `excerpt`,
+           posts.comment_count AS `comment_count`,
+           users.display_name  AS `author`,
+           users.user_login    AS `author_login`,
+           users.user_email    AS `author_email`,
+           users.user_url      AS `author_url`
+         FROM #{px}posts AS `posts`
+           LEFT JOIN #{px}users AS `users`
+             ON posts.post_author = users.ID"
+      if options[:status] and not options[:status].empty?
+        status = options[:status][0]
+        posts_query << "
+         WHERE posts.post_status = '#{status.to_s}'"
+        options[:status][1..-1].each do |status|
+          posts_query << " OR
+           posts.post_status = '#{status.to_s}'"
+        end
+      end
+      db[posts_query].each do |post|
+        process_post(post, db, options)
+      end
+    end
+    def self.process_post(post, db, options)
+      px = options[:table_prefix]
+      title = post[:title]
+      if options[:clean_entities]
+        title = clean_entities(title)
+      end
+      slug = post[:slug]
+      if !slug or slug.empty?
+        slug = sluggify(title)
+      end
+      date = post[:date] || Time.now
+      name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
+                                             date.day, slug]
+      content = post[:content].to_s
+      if options[:clean_entities]
+        content = clean_entities(content)
+      end
+      excerpt = post[:excerpt].to_s
+      more_index = content.index(/<!-- *more *-->/)
+      more_anchor = nil
+      if more_index
+        if options[:more_excerpt] and
+            (post[:excerpt].nil? or post[:excerpt].empty?)
+          excerpt = content[0...more_index]
+        end
+        if options[:more_anchor]
+          more_link = "more"
+          content.sub!(/<!-- *more *-->/,
+                       "<a id=\"more\"></a>" +
+                       "<a id=\"more-#{post[:id]}\"></a>")
+        end
+      end
+      categories = []
+      tags = []
+      if options[:categories] or options[:tags]
+        cquery =
+          "SELECT
+             terms.name AS `name`,
+             ttax.taxonomy AS `type`
+           FROM
+             #{px}terms AS `terms`,
+             #{px}term_relationships AS `trels`,
+             #{px}term_taxonomy AS `ttax`
+           WHERE
+             trels.object_id = '#{post[:id]}' AND
+             trels.term_taxonomy_id = ttax.term_taxonomy_id AND
+             terms.term_id = ttax.term_id"
+        db[cquery].each do |term|
+          if options[:categories] and term[:type] == "category"
+            if options[:clean_entities]
+              categories << clean_entities(term[:name])
+            else
+              categories << term[:name]
+            end
+          elsif options[:tags] and term[:type] == "post_tag"
+            if options[:clean_entities]
+              tags << clean_entities(term[:name])
+            else
+              tags << term[:name]
+            end
+          end
+        end
+      end
+      comments = []
+      if options[:comments] and post[:comment_count].to_i > 0
+        cquery =
+          "SELECT
+             comment_ID           AS `id`,
+             comment_author       AS `author`,
+             comment_author_email AS `author_email`,
+             comment_author_url   AS `author_url`,
+             comment_date         AS `date`,
+             comment_date_gmt     AS `date_gmt`,
+             comment_content      AS `content`
+           FROM #{px}comments
+           WHERE
+             comment_post_ID = '#{post[:id]}' AND
+             comment_approved != 'spam'"
+        db[cquery].each do |comment|
+          comcontent = comment[:content].to_s
+          if comcontent.respond_to?(:force_encoding)
+            comcontent.force_encoding("UTF-8")
+          end
+          if options[:clean_entities]
+            comcontent = clean_entities(comcontent)
+          end
+          comauthor = comment[:author].to_s
+          if options[:clean_entities]
+            comauthor = clean_entities(comauthor)
+          end
+          comments << {
+            'id'           => comment[:id].to_i,
+            'author'       => comauthor,
+            'author_email' => comment[:author_email].to_s,
+            'author_url'   => comment[:author_url].to_s,
+            'date'         => comment[:date].to_s,
+            'date_gmt'     => comment[:date_gmt].to_s,
+            'content'      => comcontent,
+          }
+        end
+        comments.sort!{ |a,b| a['id'] <=> b['id'] }
+      end
+      # Get the relevant fields as a hash, delete empty fields and
+      # convert to YAML for the header.
+      data = {
+        'layout'        => post[:type].to_s,
+        'status'        => post[:status].to_s,
+        'published'     => (post[:status].to_s == "publish"),
+        'title'         => title.to_s,
+        'author'        => post[:author].to_s,
+        'author_login'  => post[:author_login].to_s,
+        'author_email'  => post[:author_email].to_s,
+        'author_url'    => post[:author_url].to_s,
+        'excerpt'       => excerpt,
+        'more_anchor'   => more_anchor,
+        'wordpress_id'  => post[:id],
+        'wordpress_url' => post[:guid].to_s,
+        'date'          => date,
+        'categories'    => options[:categories] ? categories : nil,
+        'tags'          => options[:tags] ? tags : nil,
+        'comments'      => options[:comments] ? comments : nil,
+      }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
+      # Write out the data and content to file
+      File.open("_posts/#{name}", "w") do |f|
+        f.puts data
+        f.puts "---"
+        f.puts content
+      end
+    end
+    def self.clean_entities( text )
+      if text.respond_to?(:force_encoding)
+        text.force_encoding("UTF-8")
+      end
+      text = HTMLEntities.new.encode(text, :named)
+      # We don't want to convert these, it would break all
+      # HTML tags in the post and comments.
+      text.gsub!("&amp;", "&")
+      text.gsub!("&lt;", "<")
+      text.gsub!("&gt;", ">")
+      text.gsub!("&quot;", '"')
+      text.gsub!("&apos;", "'")
+      text.gsub!("/", "&#47;")
+      text
+    end
+    def self.sluggify( title )
+      begin
+        require 'unidecode'
+        title = title.to_ascii
+      rescue LoadError
+        STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
+      end
+      title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
+    end
+  end
+end

data/lib/jekyll/jekyll-import/wordpressdotcom.rb ADDED Viewed

@@ -0,0 +1,82 @@
+# coding: utf-8
+require 'rubygems'
+require 'hpricot'
+require 'fileutils'
+require 'safe_yaml'
+require 'time'
+module JekyllImport
+  # This importer takes a wordpress.xml file, which can be exported from your
+  # wordpress.com blog (/wp-admin/export.php).
+  module WordpressDotCom
+    def self.process(filename = "wordpress.xml")
+      import_count = Hash.new(0)
+      doc = Hpricot::XML(File.read(filename))
+      (doc/:channel/:item).each do |item|
+        title = item.at(:title).inner_text.strip
+        permalink_title = item.at('wp:post_name').inner_text
+        # Fallback to "prettified" title if post_name is empty (can happen)
+        if permalink_title == ""
+          permalink_title = sluggify(title)
+        end
+        date = Time.parse(item.at('wp:post_date').inner_text)
+        status = item.at('wp:status').inner_text
+        if status == "publish"
+          published = true
+        else
+          published = false
+        end
+        type = item.at('wp:post_type').inner_text
+        tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
+        metas = Hash.new
+        item.search("wp:postmeta").each do |meta|
+          key = meta.at('wp:meta_key').inner_text
+          value = meta.at('wp:meta_value').inner_text
+          metas[key] = value;
+        end
+        name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
+        header = {
+          'layout' => type,
+          'title'  => title,
+          'tags'   => tags,
+          'status'   => status,
+          'type'   => type,
+          'published' => published,
+          'meta'   => metas
+        }
+        begin
+          FileUtils.mkdir_p "_#{type}s"
+          File.open("_#{type}s/#{name}", "w") do |f|
+            f.puts header.to_yaml
+            f.puts '---'
+            f.puts item.at('content:encoded').inner_text
+          end
+        rescue => e
+          puts "Couldn't import post!"
+          puts "Title: #{title}"
+          puts "Name/Slug: #{name}\n"
+          puts "Error: #{e.message}"
+          next
+        end
+        import_count[type] += 1
+      end
+      import_count.each do |key, value|
+        puts "Imported #{value} #{key}s"
+      end
+    end
+    def self.sluggify(title)
+      title.gsub(/[^[:alnum:]]+/, '-').downcase
+    end
+  end
+end

data/test/helper.rb ADDED Viewed

@@ -0,0 +1,43 @@
+if RUBY_VERSION > '1.9' && ENV["COVERAGE"] == "true"
+  require 'simplecov'
+  require 'simplecov-gem-adapter'
+  SimpleCov.start('gem')
+end
+require 'test/unit'
+require 'redgreen' if RUBY_VERSION < '1.9'
+require 'shoulda'
+require 'rr'
+Dir.glob(File.expand_path('../../lib/jekyll/jekyll-import/*', __FILE__)).each do |f|
+  require f
+end
+# Send STDERR into the void to suppress program output messages
+STDERR.reopen(test(?e, '/dev/null') ? '/dev/null' : 'NUL:')
+class Test::Unit::TestCase
+  include RR::Adapters::TestUnit
+  def dest_dir(*subdirs)
+    File.join(File.dirname(__FILE__), 'dest', *subdirs)
+  end
+  def source_dir(*subdirs)
+    File.join(File.dirname(__FILE__), 'source', *subdirs)
+  end
+  def clear_dest
+    FileUtils.rm_rf(dest_dir)
+  end
+  def capture_stdout
+    $old_stdout = $stdout
+    $stdout = StringIO.new
+    yield
+    $stdout.rewind
+    return $stdout.string
+  ensure
+    $stdout = $old_stdout
+  end
+end