RubyGems - jekyll-import - Versions diffs - 0.1.0.beta3 → 0.1.0.beta4 - Mend

jekyll-import 0.1.0.beta3 → 0.1.0.beta4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

checksums.yaml +6 -14
data/History.markdown +18 -0
data/README.markdown +12 -1
data/jekyll-import.gemspec +31 -25
data/lib/jekyll-import.rb +50 -1
data/lib/jekyll-import/importer.rb +11 -0
data/lib/jekyll-import/importers.rb +10 -0
data/lib/jekyll-import/importers/csv.rb +50 -0
data/lib/jekyll-import/importers/drupal6.rb +139 -0
data/lib/jekyll-import/importers/drupal7.rb +102 -0
data/lib/jekyll-import/importers/enki.rb +76 -0
data/lib/jekyll-import/importers/google_reader.rb +68 -0
data/lib/jekyll-import/importers/joomla.rb +83 -0
data/lib/jekyll-import/importers/jrnl.rb +127 -0
data/lib/jekyll-import/importers/marley.rb +72 -0
data/lib/jekyll-import/importers/mephisto.rb +109 -0
data/lib/jekyll-import/importers/mt.rb +169 -0
data/lib/jekyll-import/importers/posterous.rb +139 -0
data/lib/jekyll-import/importers/rss.rb +71 -0
data/lib/jekyll-import/importers/s9y.rb +67 -0
data/lib/jekyll-import/importers/textpattern.rb +76 -0
data/lib/jekyll-import/importers/tumblr.rb +265 -0
data/lib/jekyll-import/importers/typo.rb +89 -0
data/lib/jekyll-import/importers/wordpress.rb +323 -0
data/lib/jekyll-import/importers/wordpressdotcom.rb +97 -0
data/lib/jekyll/commands/import.rb +1 -0
data/test/helper.rb +3 -1
data/test/test_jrnl_importer.rb +39 -0
data/test/test_mt_importer.rb +16 -16
data/test/test_tumblr_importer.rb +61 -0
data/test/test_wordpress_importer.rb +1 -1
data/test/test_wordpressdotcom_importer.rb +1 -1
metadata +53 -32
data/lib/jekyll/jekyll-import/csv.rb +0 -30
data/lib/jekyll/jekyll-import/drupal6.rb +0 -112
data/lib/jekyll/jekyll-import/drupal7.rb +0 -74
data/lib/jekyll/jekyll-import/enki.rb +0 -49
data/lib/jekyll/jekyll-import/google_reader.rb +0 -61
data/lib/jekyll/jekyll-import/joomla.rb +0 -53
data/lib/jekyll/jekyll-import/marley.rb +0 -52
data/lib/jekyll/jekyll-import/mephisto.rb +0 -84
data/lib/jekyll/jekyll-import/mt.rb +0 -142
data/lib/jekyll/jekyll-import/posterous.rb +0 -122
data/lib/jekyll/jekyll-import/rss.rb +0 -63
data/lib/jekyll/jekyll-import/s9y.rb +0 -59
data/lib/jekyll/jekyll-import/textpattern.rb +0 -58
data/lib/jekyll/jekyll-import/tumblr.rb +0 -242
data/lib/jekyll/jekyll-import/typo.rb +0 -69
data/lib/jekyll/jekyll-import/wordpress.rb +0 -299
data/lib/jekyll/jekyll-import/wordpressdotcom.rb +0 -84

data/lib/jekyll/jekyll-import/tumblr.rb DELETED Viewed

@@ -1,242 +0,0 @@
-require 'rubygems'
-require 'open-uri'
-require 'fileutils'
-require 'nokogiri'
-require 'date'
-require 'json'
-require 'uri'
-require 'jekyll'
-module JekyllImport
-  module Tumblr
-    def self.process(url, format = "html", grab_images = false,
-                     add_highlights = false, rewrite_urls = true)
-      @grab_images = grab_images
-      FileUtils.mkdir_p "_posts/tumblr"
-      url += "/api/read/json/"
-      per_page = 50
-      posts = []
-      # Two passes are required so that we can rewrite URLs.
-      # First pass builds up an array of each post as a hash.
-      begin
-        current_page = (current_page || -1) + 1
-        feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
-        puts "Fetching #{feed_url}"
-        feed = open(feed_url)
-        json = feed.readlines.join("\n")[21...-2]  # Strip Tumblr's JSONP chars.
-        blog = JSON.parse(json)
-        puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
-        batch = blog["posts"].map { |post| post_to_hash(post, format) }
-        # If we're rewriting, save the posts for later.  Otherwise, go ahead and
-        # dump these to disk now
-        if rewrite_urls
-          posts += batch
-        else
-          batch.each {|post| write_post(post, format == "md", add_highlights)}
-        end
-      end until blog["posts"].size < per_page
-      # Rewrite URLs, create redirects and write out out posts if necessary
-      if rewrite_urls
-        posts = rewrite_urls_and_redirects posts
-        posts.each {|post| write_post(post, format == "md", add_highlights)}
-      end
-    end
-    private
-    # Writes a post out to disk
-    def self.write_post(post, use_markdown, add_highlights)
-      content = post[:content]
-      if use_markdown
-        content = html_to_markdown content
-        content = add_syntax_highlights content if add_highlights
-      end
-      File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
-        f.puts post[:header].to_yaml + "---\n" + content
-      end
-    end
-    # Converts each type of Tumblr post to a hash with all required
-    # data for Jekyll.
-    def self.post_to_hash(post, format)
-      case post['type']
-        when "regular"
-          title = post["regular-title"]
-          content = post["regular-body"]
-        when "link"
-          title = post["link-text"] || post["link-url"]
-          content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
-          unless post["link-description"].nil?
-            content << "<br/>" + post["link-description"]
-          end
-        when "photo"
-          title = post["photo-caption"]
-          content = if post["photo-link-url"].nil?
-            "<a href=\"#{post["photo-link-url"]}\">#{content}</a>"
-          else
-            fetch_photo post
-          end
-        when "audio"
-          if !post["id3-title"].nil?
-            title = post["id3-title"]
-            content = post["audio-player"] + "<br/>" + post["audio-caption"]
-          else
-            title = post["audio-caption"]
-            content = post["audio-player"]
-          end
-        when "quote"
-          title = post["quote-text"]
-          content = "<blockquote>#{post["quote-text"]}</blockquote>"
-          unless post["quote-source"].nil?
-            content << "&#8212;" + post["quote-source"]
-          end
-        when "conversation"
-          title = post["conversation-title"]
-          content = "<section><dialog>"
-          post["conversation"].each do |line|
-            content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
-          end
-          content << "</section></dialog>"
-        when "video"
-          title = post["video-title"]
-          content = post["video-player"]
-          unless post["video-caption"].nil?
-            content << "<br/>" + post["video-caption"]
-          end
-      end
-      date = Date.parse(post['date']).to_s
-      title = Nokogiri::HTML(title).text
-      slug = if post["slug"] && post["slug"].strip != ""
-        post["slug"]
-      else
-        slug = title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
-        slug.length > 200 ? slug.slice(0..200) : slug
-      end
-      {
-        :name => "#{date}-#{slug}.#{format}",
-        :header => {
-          "layout" => "post",
-          "title" => title,
-          "tags" => post["tags"],
-        },
-        :content => content,
-        :url => post["url"],
-        :slug => post["url-with-slug"],
-      }
-    end
-    # Attempts to fetch the largest version of a photo available for a post.
-    # If that file fails, it tries the next smaller size until all available
-    # photo URLs are exhausted.  If they all fail, the import is aborted.
-    def self.fetch_photo(post)
-      sizes = post.keys.map {|k| k.gsub("photo-url-", "").to_i}
-      sizes.sort! {|a,b| b <=> a}
-      ext_key, ext_val = post.find do |k,v|
-        k =~ /^photo-url-/ && v.split("/").last =~ /\./
-      end
-      ext = "." + ext_val.split(".").last
-      sizes.each do |size|
-        url = post["photo-url"] || post["photo-url-#{size}"]
-        next if url.nil?
-        begin
-          return "<img src=\"#{save_photo(url, ext)}\"/>"
-        rescue OpenURI::HTTPError => err
-          puts "Failed to grab photo"
-        end
-      end
-      abort "Failed to fetch photo for post #{post['url']}"
-    end
-    # Create a Hash of old urls => new urls, for rewriting and
-    # redirects, and replace urls in each post. Instantiate Jekyll
-    # site/posts to get the correct permalink format.
-    def self.rewrite_urls_and_redirects(posts)
-      site = Jekyll::Site.new(Jekyll.configuration({}))
-      urls = Hash[posts.map { |post|
-        # Create an initial empty file for the post so that
-        # we can instantiate a post object.
-        File.open("_posts/tumblr/#{post[:name]}", "w")
-        tumblr_url = URI.parse(post[:slug]).path
-        jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
-        redirect_dir = tumblr_url.sub(/\//, "") + "/"
-        FileUtils.mkdir_p redirect_dir
-        File.open(redirect_dir + "index.html", "w") do |f|
-          f.puts "<html><head><meta http-equiv='Refresh' content='0; " +
-                 "url=#{jekyll_url}'></head><body></body></html>"
-        end
-        [tumblr_url, jekyll_url]
-      }]
-      posts.map { |post|
-        urls.each do |tumblr_url, jekyll_url|
-          post[:content].gsub!(/#{tumblr_url}/i, jekyll_url)
-        end
-        post
-      }
-    end
-    # Convert preserving HTML tables as per the markdown docs.
-    def self.html_to_markdown(content)
-      preserve = ["table", "tr", "th", "td"]
-      preserve.each do |tag|
-        content.gsub!(/<#{tag}/i, "$$" + tag)
-        content.gsub!(/<\/#{tag}/i, "||" + tag)
-      end
-      content = Nokogiri::HTML(content.gsub("'", "''")).text
-      preserve.each do |tag|
-        content.gsub!("$$" + tag, "<" + tag)
-        content.gsub!("||" + tag, "</" + tag)
-      end
-      content
-    end
-    # Adds pygments highlight tags to code blocks in posts that use
-    # markdown format. This doesn't guess the language of the code
-    # block, so you should modify this to suit your own content.
-    # For example, my code block only contain Python and JavaScript,
-    # so I can assume the block is JavaScript if it contains a
-    # semi-colon.
-    def self.add_syntax_highlights(content)
-      lines = content.split("\n")
-      block, indent, lang, start = false, /^    /, nil, nil
-      lines.each_with_index do |line, i|
-        if !block && line =~ indent
-          block = true
-          lang = "python"
-          start = i
-        elsif block
-          lang = "javascript" if line =~ /;$/
-          block = line =~ indent && i < lines.size - 1 # Also handle EOF
-          if !block
-            lines[start] = "{% highlight #{lang} %}"
-            lines[i - 1] = "{% endhighlight %}"
-          end
-          lines[i] = lines[i].sub(indent, "")
-        end
-      end
-      lines.join("\n")
-    end
-    def self.save_photo(url, ext)
-      if @grab_images
-        path = "tumblr_files/#{url.split('/').last}"
-        path += ext unless path =~ /#{ext}$/
-        FileUtils.mkdir_p "tumblr_files"
-        # Don't fetch if we've already cached this file
-        unless File.size? path
-          puts "Fetching photo #{url}"
-          File.open(path, "w") { |f| f.write(open(url).read) }
-        end
-        url = "/" + path
-      end
-      url
-    end
-  end
-end

data/lib/jekyll/jekyll-import/typo.rb DELETED Viewed

@@ -1,69 +0,0 @@
-# Author: Toby DiPasquale <toby@cbcg.net>
-require 'fileutils'
-require 'rubygems'
-require 'sequel'
-require 'safe_yaml'
-module JekyllImport
-  module Typo
-    # This SQL *should* work for both MySQL and PostgreSQL.
-    SQL = <<-EOS
-    SELECT c.id id,
-           c.title title,
-           c.permalink slug,
-           c.body body,
-           c.extended extended,
-           c.published_at date,
-           c.state state,
-           c.keywords keywords,
-           COALESCE(tf.name, 'html') filter
-      FROM contents c
-           LEFT OUTER JOIN text_filters tf
-                        ON c.text_filter_id = tf.id
-    EOS
-    def self.process server, dbname, user, pass, host='localhost'
-      FileUtils.mkdir_p '_posts'
-      case server.intern
-      when :postgres
-        db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
-      when :mysql
-        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
-      else
-        raise "Unknown database server '#{server}'"
-      end
-      db[SQL].each do |post|
-        next unless post[:state] =~ /published/i
-        if post[:slug] == nil
-          post[:slug] = "no slug"
-        end
-        if post[:extended]
-          post[:body] << "\n<!-- more -->\n"
-          post[:body] << post[:extended]
-        end
-        name = [ sprintf("%.04d", post[:date].year),
-                 sprintf("%.02d", post[:date].month),
-                 sprintf("%.02d", post[:date].day),
-                 post[:slug].strip ].join('-')
-        # Can have more than one text filter in this field, but we just want
-        # the first one for this.
-        name += '.' + post[:filter].split(' ')[0]
-        File.open("_posts/#{name}", 'w') do |f|
-          f.puts({ 'layout'   => 'post',
-                   'title'    => (post[:title] and post[:title].to_s.force_encoding('UTF-8')),
-                   'tags'     => (post[:keywords] and post[:keywords].to_s.force_encoding('UTF-8')),
-                   'typo_id'  => post[:id]
-                 }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
-          f.puts '---'
-          f.puts post[:body].delete("\r")
-        end
-      end
-    end
-  end
-end

data/lib/jekyll/jekyll-import/wordpress.rb DELETED Viewed

@@ -1,299 +0,0 @@
-require 'rubygems'
-require 'sequel'
-require 'fileutils'
-require 'safe_yaml'
-# NOTE: This converter requires Sequel and the MySQL gems.
-# The MySQL gem can be difficult to install on OS X. Once you have MySQL
-# installed, running the following commands should work:
-# $ sudo gem install sequel
-# $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
-module JekyllImport
-  module WordPress
-    # Main migrator function. Call this to perform the migration.
-    #
-    # dbname::  The name of the database
-    # user::    The database user name
-    # pass::    The database user's password
-    # host::    The address of the MySQL database host. Default: 'localhost'
-    # options:: A hash table of configuration options.
-    #
-    # Supported options are:
-    #
-    # :table_prefix::   Prefix of database tables used by WordPress.
-    #                   Default: 'wp_'
-    # :clean_entities:: If true, convert non-ASCII characters to HTML
-    #                   entities in the posts, comments, titles, and
-    #                   names. Requires the 'htmlentities' gem to
-    #                   work. Default: true.
-    # :comments::       If true, migrate post comments too. Comments
-    #                   are saved in the post's YAML front matter.
-    #                   Default: true.
-    # :categories::     If true, save the post's categories in its
-    #                   YAML front matter.
-    # :tags::           If true, save the post's tags in its
-    #                   YAML front matter.
-    # :more_excerpt::   If true, when a post has no excerpt but
-    #                   does have a <!-- more --> tag, use the
-    #                   preceding post content as the excerpt.
-    #                   Default: true.
-    # :more_anchor::    If true, convert a <!-- more --> tag into
-    #                   two HTML anchors with ids "more" and
-    #                   "more-NNN" (where NNN is the post number).
-    #                   Default: true.
-    # :status::         Array of allowed post statuses. Only
-    #                   posts with matching status will be migrated.
-    #                   Known statuses are :publish, :draft, :private,
-    #                   and :revision. If this is nil or an empty
-    #                   array, all posts are migrated regardless of
-    #                   status. Default: [:publish].
-    #
-    def self.process(options={})
-      options = {
-        :user           => '',
-        :pass           => '',
-        :host           => 'localhost',
-        :dbname         => '',
-        :table_prefix   => 'wp_',
-        :clean_entities => true,
-        :comments       => true,
-        :categories     => true,
-        :tags           => true,
-        :more_excerpt   => true,
-        :more_anchor    => true,
-        :status         => [:publish] # :draft, :private, :revision
-      }.merge(options)
-      if options[:clean_entities]
-        begin
-          require 'htmlentities'
-        rescue LoadError
-          STDERR.puts "Could not require 'htmlentities', so the " +
-                      ":clean_entities option is now disabled."
-          options[:clean_entities] = false
-        end
-      end
-      FileUtils.mkdir_p("_posts")
-      db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
-                        :host => options[:host], :encoding => 'utf8')
-      px = options[:table_prefix]
-      posts_query = "
-         SELECT
-           posts.ID            AS `id`,
-           posts.guid          AS `guid`,
-           posts.post_type     AS `type`,
-           posts.post_status   AS `status`,
-           posts.post_title    AS `title`,
-           posts.post_name     AS `slug`,
-           posts.post_date     AS `date`,
-           posts.post_content  AS `content`,
-           posts.post_excerpt  AS `excerpt`,
-           posts.comment_count AS `comment_count`,
-           users.display_name  AS `author`,
-           users.user_login    AS `author_login`,
-           users.user_email    AS `author_email`,
-           users.user_url      AS `author_url`
-         FROM #{px}posts AS `posts`
-           LEFT JOIN #{px}users AS `users`
-             ON posts.post_author = users.ID"
-      if options[:status] and not options[:status].empty?
-        status = options[:status][0]
-        posts_query << "
-         WHERE posts.post_status = '#{status.to_s}'"
-        options[:status][1..-1].each do |status|
-          posts_query << " OR
-           posts.post_status = '#{status.to_s}'"
-        end
-      end
-      db[posts_query].each do |post|
-        process_post(post, db, options)
-      end
-    end
-    def self.process_post(post, db, options)
-      px = options[:table_prefix]
-      title = post[:title]
-      if options[:clean_entities]
-        title = clean_entities(title)
-      end
-      slug = post[:slug]
-      if !slug or slug.empty?
-        slug = sluggify(title)
-      end
-      date = post[:date] || Time.now
-      name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
-                                             date.day, slug]
-      content = post[:content].to_s
-      if options[:clean_entities]
-        content = clean_entities(content)
-      end
-      excerpt = post[:excerpt].to_s
-      more_index = content.index(/<!-- *more *-->/)
-      more_anchor = nil
-      if more_index
-        if options[:more_excerpt] and
-            (post[:excerpt].nil? or post[:excerpt].empty?)
-          excerpt = content[0...more_index]
-        end
-        if options[:more_anchor]
-          more_link = "more"
-          content.sub!(/<!-- *more *-->/,
-                       "<a id=\"more\"></a>" +
-                       "<a id=\"more-#{post[:id]}\"></a>")
-        end
-      end
-      categories = []
-      tags = []
-      if options[:categories] or options[:tags]
-        cquery =
-          "SELECT
-             terms.name AS `name`,
-             ttax.taxonomy AS `type`
-           FROM
-             #{px}terms AS `terms`,
-             #{px}term_relationships AS `trels`,
-             #{px}term_taxonomy AS `ttax`
-           WHERE
-             trels.object_id = '#{post[:id]}' AND
-             trels.term_taxonomy_id = ttax.term_taxonomy_id AND
-             terms.term_id = ttax.term_id"
-        db[cquery].each do |term|
-          if options[:categories] and term[:type] == "category"
-            if options[:clean_entities]
-              categories << clean_entities(term[:name])
-            else
-              categories << term[:name]
-            end
-          elsif options[:tags] and term[:type] == "post_tag"
-            if options[:clean_entities]
-              tags << clean_entities(term[:name])
-            else
-              tags << term[:name]
-            end
-          end
-        end
-      end
-      comments = []
-      if options[:comments] and post[:comment_count].to_i > 0
-        cquery =
-          "SELECT
-             comment_ID           AS `id`,
-             comment_author       AS `author`,
-             comment_author_email AS `author_email`,
-             comment_author_url   AS `author_url`,
-             comment_date         AS `date`,
-             comment_date_gmt     AS `date_gmt`,
-             comment_content      AS `content`
-           FROM #{px}comments
-           WHERE
-             comment_post_ID = '#{post[:id]}' AND
-             comment_approved != 'spam'"
-        db[cquery].each do |comment|
-          comcontent = comment[:content].to_s
-          if comcontent.respond_to?(:force_encoding)
-            comcontent.force_encoding("UTF-8")
-          end
-          if options[:clean_entities]
-            comcontent = clean_entities(comcontent)
-          end
-          comauthor = comment[:author].to_s
-          if options[:clean_entities]
-            comauthor = clean_entities(comauthor)
-          end
-          comments << {
-            'id'           => comment[:id].to_i,
-            'author'       => comauthor,
-            'author_email' => comment[:author_email].to_s,
-            'author_url'   => comment[:author_url].to_s,
-            'date'         => comment[:date].to_s,
-            'date_gmt'     => comment[:date_gmt].to_s,
-            'content'      => comcontent,
-          }
-        end
-        comments.sort!{ |a,b| a['id'] <=> b['id'] }
-      end
-      # Get the relevant fields as a hash, delete empty fields and
-      # convert to YAML for the header.
-      data = {
-        'layout'        => post[:type].to_s,
-        'status'        => post[:status].to_s,
-        'published'     => (post[:status].to_s == "publish"),
-        'title'         => title.to_s,
-        'author'        => post[:author].to_s,
-        'author_login'  => post[:author_login].to_s,
-        'author_email'  => post[:author_email].to_s,
-        'author_url'    => post[:author_url].to_s,
-        'excerpt'       => excerpt,
-        'more_anchor'   => more_anchor,
-        'wordpress_id'  => post[:id],
-        'wordpress_url' => post[:guid].to_s,
-        'date'          => date,
-        'categories'    => options[:categories] ? categories : nil,
-        'tags'          => options[:tags] ? tags : nil,
-        'comments'      => options[:comments] ? comments : nil,
-      }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
-      # Write out the data and content to file
-      File.open("_posts/#{name}", "w") do |f|
-        f.puts data
-        f.puts "---"
-        f.puts content
-      end
-    end
-    def self.clean_entities( text )
-      if text.respond_to?(:force_encoding)
-        text.force_encoding("UTF-8")
-      end
-      text = HTMLEntities.new.encode(text, :named)
-      # We don't want to convert these, it would break all
-      # HTML tags in the post and comments.
-      text.gsub!("&amp;", "&")
-      text.gsub!("&lt;", "<")
-      text.gsub!("&gt;", ">")
-      text.gsub!("&quot;", '"')
-      text.gsub!("&apos;", "'")
-      text.gsub!("/", "&#47;")
-      text
-    end
-    def self.sluggify( title )
-      begin
-        require 'unidecode'
-        title = title.to_ascii
-      rescue LoadError
-        STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
-      end
-      title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
-    end
-  end
-end