RubyGems - jekyll-import - Versions diffs - 0.11.0 → 0.12.0 - Mend

jekyll-import 0.11.0 → 0.12.0

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/jekyll-import/importers/blogger.rb +67 -1
data/lib/jekyll-import/importers/drupal6.rb +39 -125
data/lib/jekyll-import/importers/drupal7.rb +40 -97
data/lib/jekyll-import/importers/drupal_common.rb +157 -0
data/lib/jekyll-import/importers/joomla.rb +3 -1
data/lib/jekyll-import/importers/s9y_database.rb +363 -0
data/lib/jekyll-import/importers/tumblr.rb +17 -9
data/lib/jekyll-import/version.rb +1 -1
metadata +19 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 411790c3c98c3ba0eee2dffdc56eeb1b9ccd30b2
-  data.tar.gz: 57d36c8a9489910f81bdacd6cb36a52f9c46e17c
+  metadata.gz: 27f5a0ab9d87425b92a6e1a4fbc01c5c87c64626
+  data.tar.gz: 4c3382d2e508f6fd6571fa45f1a01f1c804cf69d
 SHA512:
-  metadata.gz: e6b0b500144bc36702db64ee1344e91ce8b3f37fa929ae1f241957c076010a5b3a89e2e9cd5fe2639b3610f2b540fc0166c86ff1b70dccad68431dc287dec192
-  data.tar.gz: cbc3b694be174e53b55d5851672c3413d93d1f6eb2c7cbf9dd073192a80c8c2fa697ac4adbf61b7534ce49f7948c951a690d4f1a6a791e402625110178d3a501
+  metadata.gz: 5ec278c744142928a3db648710bae5313ad6a7c78dfc948a382c50ed30122e4d0c03324a31ddfc41dd9ad99512ea6879fc7ed9a093779c473ea36703a0743aef
+  data.tar.gz: b63caa7a81fc42055d663b94b0f9e9e798bdb2459e9782268bbd938288f8b15b633afba6b1719a58c0f349e21e27bec46ea9413a03cb3e8c00af5cdc4048a44f

data/lib/jekyll-import/importers/blogger.rb CHANGED Viewed

@@ -5,6 +5,7 @@ module JekyllImport
         c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
         c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
         c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
+        c.option 'comments', '--comments', 'import comments to _comments collection'
       end
       def self.validate(options)
@@ -41,6 +42,7 @@ module JekyllImport
         listener = BloggerAtomStreamListener.new
         listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
+        listener.comments = options.fetch('comments', false),
         File.open(source, 'r') do |f|
           f.flock(File::LOCK_SH)
@@ -95,11 +97,12 @@ module JekyllImport
           extend BloggerAtomStreamListenerMethods
           @leave_blogger_info = true
+          @comments = false
         end
       end
       module BloggerAtomStreamListenerMethods
-        attr_accessor :leave_blogger_info
+        attr_accessor :leave_blogger_info, :comments
         attr_reader :original_url_base
         def tag_start(tag, attrs)
@@ -143,6 +146,10 @@ module JekyllImport
             if @in_entry_elem
               @in_entry_elem[:meta][:thumbnail] = attrs['url']
             end
+          when 'thr:in-reply-to'
+            if @in_entry_elem
+              @in_entry_elem[:meta][:post_id] = attrs['ref']
+            end
           end
         end
@@ -185,6 +192,23 @@ module JekyllImport
                 FileUtils.mkdir_p(target_dir)
+                file_name = URI::decode("#{post_data[:filename]}.html")
+                File.open(File.join(target_dir, file_name), 'w') do |f|
+                  f.flock(File::LOCK_EX)
+                  f << post_data[:header].to_yaml
+                  f << "---\n\n"
+                  f << post_data[:body]
+                end
+              end
+            elsif @in_entry_elem[:meta][:kind] == 'comment' and @comments
+              post_data = get_post_data_from_in_entry_elem_info
+              if post_data
+                target_dir = '_comments'
+                FileUtils.mkdir_p(target_dir)
                 file_name = URI::decode("#{post_data[:filename]}.html")
                 File.open(File.join(target_dir, file_name), 'w') do |f|
                   f.flock(File::LOCK_EX)
@@ -251,6 +275,48 @@ module JekyllImport
               body.gsub!(/{%/, '{{ "{%" }}')
             end
+            { :filename => filename, :header => header, :body => body }
+          elsif @in_entry_elem[:meta][:kind] == 'comment'
+            timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
+            if @in_entry_elem[:meta][:original_url]
+              if not @comment_seq
+                @comment_seq = 1
+              end
+              original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
+              original_path = original_uri.path.to_s
+              filename = "%s-%s-%s" %
+                [timestamp,
+                 File.basename(original_path, File.extname(original_path)),
+                 @comment_seq]
+              @comment_seq = @comment_seq + 1
+              @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
+            else
+              raise 'Original URL is missing'
+            end
+            header = {
+              'date' => @in_entry_elem[:meta][:published],
+              'author' => @in_entry_elem[:meta][:author],
+              'blogger_post_id' => @in_entry_elem[:meta][:post_id],
+            }
+            header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
+            header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
+            header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
+            header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
+            body = @in_entry_elem[:body]
+            # body escaping associated with liquid
+            if body =~ /{{/
+              body.gsub!(/{{/, '{{ "{{" }}')
+            end
+            if body =~ /{%/
+              body.gsub!(/{%/, '{{ "{%" }}')
+            end
             { :filename => filename, :header => header, :body => body }
           else
             nil

data/lib/jekyll-import/importers/drupal6.rb CHANGED Viewed

@@ -1,139 +1,53 @@
+require 'jekyll-import/importers/drupal_common'
 module JekyllImport
   module Importers
     class Drupal6 < Importer
-      # Reads a MySQL database via Sequel and creates a post file for each story
-      # and blog node.
-      QUERY = "SELECT n.nid, \
-                      n.title, \
-                      nr.body, \
-                      n.created, \
-                      n.status, \
-                      GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
-                 FROM node_revisions AS nr, \
-                      node AS n \
-                 LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
-                 LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
-                WHERE (%types%) \
-                  AND n.vid = nr.vid \
-             GROUP BY n.nid"
-      def self.validate(options)
-        %w[dbname user].each do |option|
-          if options[option].nil?
-            abort "Missing mandatory option --#{option}."
-          end
-        end
-      end
+      include DrupalCommon
+      extend DrupalCommon::ClassMethods
-      def self.specify_options(c)
-        c.option 'dbname', '--dbname DB', 'Database name'
-        c.option 'user', '--user USER', 'Database user name'
-        c.option 'password', '--password PW', "Database user's password (default: '')"
-        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
-        c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
-        c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
+      def self.build_query(prefix, types)
+        types = types.join("' OR n.type = '")
+        types = "n.type = '#{types}'"
+        query = <<EOS
+                SELECT n.nid,
+                       n.title,
+                       nr.body,
+                       nr.teaser,
+                       n.created,
+                       n.status,
+                       n.type,
+                       GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
+                FROM #{prefix}node_revisions AS nr,
+                     #{prefix}node AS n
+                     LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
+                     LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
+                WHERE (#{types})
+                  AND n.vid = nr.vid
+                GROUP BY n.nid
+EOS
+        return query
       end
-      def self.require_deps
-        JekyllImport.require_with_fallback(%w[
-          rubygems
-          sequel
-          fileutils
-          safe_yaml
-          mysql
-        ])
+      def self.aliases_query(prefix)
+        "SELECT src AS source, dst AS alias FROM #{prefix}url_alias WHERE src = ?"
       end
-      def self.process(options)
-        dbname = options.fetch('dbname')
-        user   = options.fetch('user')
-        pass   = options.fetch('password', "")
-        host   = options.fetch('host', "localhost")
-        prefix = options.fetch('prefix', "")
-        types  = options.fetch('types', ['blog', 'story', 'article'])
-        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
-        if prefix != ''
-          QUERY[" node "] = " " + prefix + "node "
-          QUERY[" node_revisions "] = " " + prefix + "node_revisions "
-          QUERY[" term_node "] = " " + prefix + "term_node "
-          QUERY[" term_data "] = " " + prefix + "term_data "
-        end
-        types = types.join("' OR n.type = '")
-        QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
-        FileUtils.mkdir_p "_posts"
-        FileUtils.mkdir_p "_drafts"
-        FileUtils.mkdir_p "_layouts"
-        # Create the refresh layout
-        # Change the refresh url if you customized your permalink config
-        File.open("_layouts/refresh.html", "w") do |f|
-          f.puts <<EOF
-<!DOCTYPE html>
-<html>
-<head>
-<meta http-equiv="content-type" content="text/html; charset=utf-8" />
-<meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
-</head>
-</html>
-EOF
-        end
-        db[QUERY].each do |post|
-          # Get required fields and construct Jekyll compatible name
-          node_id = post[:nid]
-          title = post[:title]
-          content = post[:body]
-          tags = (post[:tags] || '').downcase.strip
-          created = post[:created]
-          time = Time.at(created)
-          is_published = post[:status] == 1
-          dir = is_published ? "_posts" : "_drafts"
-          slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
-          name = time.strftime("%Y-%m-%d-") + slug + '.md'
-          # Get the relevant fields as a hash, delete empty fields and convert
-          # to YAML for the header
-          data = {
-             'layout' => 'post',
-             'title' => title.to_s,
-             'created' => created,
-             'categories' => tags.split('|')
-           }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
-              |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
-           }.to_yaml
+      def self.post_data(sql_post_data)
+        content = sql_post_data[:body].to_s
+        summary = sql_post_data[:teaser].to_s
+        tags = (sql_post_data[:tags] || '').downcase.strip
-          # Write out the data and content to file
-          File.open("#{dir}/#{name}", "w") do |f|
-            f.puts data
-            f.puts "---"
-            f.puts content
-          end
+        data = {
+          'excerpt' => summary,
+          'categories' => tags.split('|')
+        }
-          # Make a file to redirect from the old Drupal URL
-          if is_published
-            aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
-            aliases.push(:dst => "node/#{node_id}")
-            aliases.each do |url_alias|
-              FileUtils.mkdir_p url_alias[:dst]
-              File.open("#{url_alias[:dst]}/index.md", "w") do |f|
-                f.puts "---"
-                f.puts "layout: refresh"
-                f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
-                f.puts "---"
-              end
-            end
-          end
-        end
-        # TODO: Make dirs & files for nodes of type 'page'
-        # Make refresh pages for these as well
+         return data, content
       end
     end
   end
 end

data/lib/jekyll-import/importers/drupal7.rb CHANGED Viewed

@@ -1,111 +1,54 @@
+require 'jekyll-import/importers/drupal_common'
 module JekyllImport
   module Importers
     class Drupal7 < Importer
-      # Reads a MySQL database via Sequel and creates a post file for each story
-      # and blog node.
-      QUERY = "SELECT n.title, \
-                      fdb.body_value, \
-                      fdb.body_summary, \
-                      n.created, \
-                      n.status, \
-                      n.nid, \
-                      u.name \
-               FROM node AS n, \
-                    field_data_body AS fdb, \
-                    users AS u \
-               WHERE (%types%) \
-               AND n.nid = fdb.entity_id \
-               AND n.vid = fdb.revision_id
-               AND n.uid = u.uid"
-      def self.validate(options)
-        %w[dbname user].each do |option|
-          if options[option].nil?
-            abort "Missing mandatory option --#{option}."
-          end
-        end
-      end
+      include DrupalCommon
+      extend DrupalCommon::ClassMethods
-      def self.specify_options(c)
-        c.option 'dbname', '--dbname DB', 'Database name'
-        c.option 'user', '--user USER', 'Database user name'
-        c.option 'password', '--password PW', 'Database user\'s password (default: "")'
-        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
-        c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
-        c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
+      def self.build_query(prefix, types)
+        types = types.join("' OR n.type = '")
+        types = "n.type = '#{types}'"
+        query = <<EOS
+                SELECT n.nid,
+                       n.title,
+                       fdb.body_value,
+                       fdb.body_summary,
+                       n.created,
+                       n.status,
+                       n.type,
+                       GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
+                FROM #{prefix}field_data_body AS fdb,
+                     #{prefix}node AS n
+                     LEFT OUTER JOIN #{prefix}taxonomy_index AS ti ON ti.nid = n.nid
+                     LEFT OUTER JOIN #{prefix}taxonomy_term_data AS td ON ti.tid = td.tid
+                WHERE (#{types})
+                  AND n.nid = fdb.entity_id
+                  AND n.vid = fdb.revision_id
+                GROUP BY n.nid"
+EOS
+        return query
       end
-      def self.require_deps
-        JekyllImport.require_with_fallback(%w[
-          rubygems
-          sequel
-          fileutils
-          safe_yaml
-        ])
+      def self.aliases_query(prefix)
+        "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
       end
-      def self.process(options)
-        dbname = options.fetch('dbname')
-        user   = options.fetch('user')
-        pass   = options.fetch('password', "")
-        host   = options.fetch('host', "localhost")
-        prefix = options.fetch('prefix', "")
-        types  = options.fetch('types', ['blog', 'story', 'article'])
-        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
-        unless prefix.empty?
-          QUERY[" node "] = " " + prefix + "node "
-          QUERY[" field_data_body "] = " " + prefix + "field_data_body "
-          QUERY[" users "] = " " + prefix + "users "
-        end
-        types = types.join("' OR n.type = '")
-        QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
-        FileUtils.mkdir_p "_posts"
-        FileUtils.mkdir_p "_drafts"
-        FileUtils.mkdir_p "_layouts"
-        db[QUERY].each do |post|
-          # Get required fields and construct Jekyll compatible name
-          title = post[:title]
-          content = post[:body_value]
-          summary = post[:body_summary]
-          created = post[:created]
-          author = post[:name]
-          nid = post[:nid]
-          time = Time.at(created)
-          is_published = post[:status] == 1
-          dir = is_published ? "_posts" : "_drafts"
-          slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
-          name = time.strftime("%Y-%m-%d-") + slug + '.md'
-          # Get the relevant fields as a hash, delete empty fields and convert
-          # to YAML for the header
-          data = {
-            'layout' => 'post',
-            'title' => title.strip.force_encoding("UTF-8"),
-            'author' => author,
-            'nid' => nid,
-            'created' => created,
-            'excerpt' => summary
-          }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+      def self.post_data(sql_post_data)
+        content = sql_post_data[:body_value].to_s
+        summary = sql_post_data[:body_summary].to_s
+        tags = (sql_post_data[:tags] || '').downcase.strip
-          # Write out the data and content to file
-          File.open("#{dir}/#{name}", "w") do |f|
-            f.puts data
-            f.puts "---"
-            f.puts content
-          end
+        data = {
+          'excerpt' => summary,
+          'categories' => tags.split('|')
+        }
-        end
-        # TODO: Make dirs & files for nodes of type 'page'
-          # Make refresh pages for these as well
-        # TODO: Make refresh dirs & files according to entries in url_alias table
+        return data, content
       end
     end
   end
 end

data/lib/jekyll-import/importers/drupal_common.rb ADDED Viewed

@@ -0,0 +1,157 @@
+require 'date'
+module JekyllImport
+  module Importers
+    module DrupalCommon
+      # This module provides a base for the Drupal importers (at least for 6
+      # and 7; since 8 will be a different beast). Version-specific importers
+      # will need to implement the missing methods from the Importer class.
+      #
+      # The general idea is that this importer reads a MySQL database via Sequel
+      # and creates a post file for each node it finds in the Drupal database.
+      module ClassMethods
+        DEFAULTS = {
+          "password" => "",
+          "host"     => "localhost",
+          "prefix"   => "",
+          "types"    => %w(blog story article)
+        }
+        def specify_options(c)
+          c.option 'dbname', '--dbname DB', 'Database name'
+          c.option 'user', '--user USER', 'Database user name'
+          c.option 'password', '--password PW', "Database user's password (default: #{DEFAULTS["password"].inspect})"
+          c.option 'host', '--host HOST', "Database host name (default: #{DEFAULTS["host"].inspect})"
+          c.option 'prefix', '--prefix PREFIX', "Table prefix name (default: #{DEFAULTS["prefix"].inspect})"
+          c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array,
+            "The Drupal content types to be imported  (default: #{DEFAULTS["types"].join(",")})"
+        end
+        def require_deps
+          JekyllImport.require_with_fallback(%w[
+            rubygems
+            sequel
+            fileutils
+            safe_yaml
+          ])
+        end
+        def process(options)
+          dbname = options.fetch('dbname')
+          user   = options.fetch('user')
+          pass   = options.fetch('password', DEFAULTS["password"])
+          host   = options.fetch('host',     DEFAULTS["host"])
+          prefix = options.fetch('prefix',   DEFAULTS["prefix"])
+          types  = options.fetch('types',    DEFAULTS["types"])
+          db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+          query = self.build_query(prefix, types)
+          conf = Jekyll.configuration({})
+          src_dir = conf['source']
+          dirs = {
+              :_posts   => File.join(src_dir, '_posts').to_s,
+              :_drafts  => File.join(src_dir, '_drafts').to_s,
+              :_layouts => Jekyll.sanitized_path(src_dir, conf['layouts_dir'].to_s)
+          }
+          dirs.each do |key, dir|
+            FileUtils.mkdir_p dir
+          end
+          # Create the refresh layout
+          # Change the refresh url if you customized your permalink config
+          File.open(File.join(dirs[:_layouts], 'refresh.html'), 'w') do |f|
+            f.puts <<-HTML
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
+</head>
+</html>
+HTML
+          end
+          db[query].each do |post|
+            # Get required fields
+            data, content = self.post_data(post)
+            data['layout'] = post[:type]
+            title = data['title'] = post[:title].strip.force_encoding('UTF-8')
+            time = data['created'] = post[:created]
+            # Get the relevant fields as a hash and delete empty fields
+            data = data.delete_if { |k,v| v.nil? || v == ''}.each_pair {
+                |k,v| ((v.is_a? String) ? v.force_encoding('UTF-8') : v)
+            }
+            # Construct a Jekyll compatible file name
+            is_published = post[:status] == 1
+            node_id = post[:nid]
+            dir = is_published ? dirs[:_posts] : dirs[:_drafts]
+            slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
+            filename = Time.at(time).to_datetime.strftime('%Y-%m-%d-') + slug + '.md'
+            # Write out the data and content to file
+            File.open("#{dir}/#{filename}", 'w') do |f|
+              f.puts data.to_yaml
+              f.puts '---'
+              f.puts content
+            end
+            # Make a file to redirect from the old Drupal URL
+            if is_published
+              alias_query = self.aliases_query(prefix)
+              type = post[:type]
+              aliases = db[alias_query, "#{type}/#{node_id}"].all
+              aliases.push(:alias => "#{type}/#{node_id}")
+              aliases.each do |url_alias|
+                FileUtils.mkdir_p url_alias[:alias]
+                File.open("#{url_alias[:alias]}/index.md", "w") do |f|
+                  f.puts '---'
+                  f.puts 'layout: refresh'
+                  f.puts "refresh_to_post_id: /#{Time.at(time).to_datetime.strftime('%Y/%m/%d/') + slug}"
+                  f.puts '---'
+                end
+              end
+            end
+          end
+        end
+      end
+      def build_query(prefix, types)
+        raise 'The importer you are trying to use does not implement the get_query() method.'
+      end
+      def aliases_query(prefix)
+        # Make sure you implement the query returning "alias" as the column name
+        # for the URL aliases. See the Drupal 6 importer for an example. The
+        # alias field is called 'dst' but we alias it to 'alias', to follow
+        # Drupal 7's column names.
+        raise 'The importer you are trying to use does not implement the get_aliases_query() method.'
+      end
+      def post_data(sql_post_data)
+        raise 'The importer you are trying to use does not implement the get_query() method.'
+      end
+      def validate(options)
+        %w[dbname user].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+    end
+  end
+end

data/lib/jekyll-import/importers/joomla.rb CHANGED Viewed

@@ -22,8 +22,10 @@ module JekyllImport
         JekyllImport.require_with_fallback(%w[
           rubygems
           sequel
+          mysql2
           fileutils
           safe_yaml
+          mysql
         ])
       end
@@ -35,7 +37,7 @@ module JekyllImport
         section = options.fetch('section', '1')
         table_prefix = options.fetch('prefix', "jos_")
-        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+        db = Sequel.mysql2(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
         FileUtils.mkdir_p("_posts")

data/lib/jekyll-import/importers/s9y_database.rb ADDED Viewed

@@ -0,0 +1,363 @@
+module JekyllImport
+  module Importers
+    class S9YDatabase < Importer
+      def self.require_deps
+        JekyllImport.require_with_fallback(
+          %w[
+          rubygems
+          sequel
+          fileutils
+          safe_yaml
+          unidecode
+          ])
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname DB', 'Database name (default: "")'
+        c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
+        c.option 'user', '--user USER', 'Database user name (default: "")'
+        c.option 'password', '--password PW', "Database user's password (default: "")"
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+        c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "serendipity_")'
+        c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
+        c.option 'comments', '--comments', 'Whether to import comments (default: true)'
+        c.option 'categories', '--categories', 'Whether to import categories (default: true)'
+        c.option 'tags', '--tags', 'Whether to import tags (default: true)'
+        c.option 'drafts', '--drafts', 'Whether to export drafts as well'
+        c.option 'markdown', '--markdown', 'convert into markdown format (default: false)'
+        c.option 'permalinks', '--permalinks', 'preserve S9Y permalinks (default: false)'
+      end
+      # Main migrator function. Call this to perform the migration.
+      #
+      # dbname::  The name of the database
+      # user::    The database user name
+      # pass::    The database user's password
+      # host::    The address of the MySQL database host. Default: 'localhost'
+      # socket::  The database socket's path
+      # options:: A hash table of configuration options.
+      #
+      # Supported options are:
+      #
+      # :table_prefix::   Prefix of database tables used by WordPress.
+      #                   Default: 'serendipity_'
+      # :clean_entities:: If true, convert non-ASCII characters to HTML
+      #                   entities in the posts, comments, titles, and
+      #                   names. Requires the 'htmlentities' gem to
+      #                   work. Default: true.
+      # :comments::       If true, migrate post comments too. Comments
+      #                   are saved in the post's YAML front matter.
+      #                   Default: true.
+      # :categories::     If true, save the post's categories in its
+      #                   YAML front matter. Default: true.
+      # :tags::           If true, save the post's tags in its
+      #                   YAML front matter. Default: true.
+      # :extension::      Set the post extension. Default: "html"
+      # :drafts::  If true, export drafts as well
+      #                   Default: true.
+      # :markdown::       If true, convert the content to markdown
+      #                   Default: false
+      # :permalinks::     If true, save the post's original permalink in its
+      #                   YAML front matter. Default: false.
+      #
+      def self.process(opts)
+        options = {
+          :user           => opts.fetch('user', ''),
+          :pass           => opts.fetch('password', ''),
+          :host           => opts.fetch('host', 'localhost'),
+          :socket         => opts.fetch('socket', nil),
+          :dbname         => opts.fetch('dbname', ''),
+          :table_prefix   => opts.fetch('table_prefix', 'serendipity_'),
+          :clean_entities => opts.fetch('clean_entities', true),
+          :comments       => opts.fetch('comments', true),
+          :categories     => opts.fetch('categories', true),
+          :tags           => opts.fetch('tags', true),
+          :extension      => opts.fetch('extension', 'html'),
+          :drafts         => opts.fetch('drafts', true),
+          :markdown       => opts.fetch('markdown', false),
+          :permalinks     => opts.fetch('permalinks', false),
+        }
+        if options[:clean_entities]
+          options[:clean_entities] = require_if_available('htmlentities', 'clean_entities')
+        end
+        if options[:markdown]
+          options[:markdown] = require_if_available('reverse_markdown', 'markdown')
+        end
+        FileUtils.mkdir_p("_posts")
+        FileUtils.mkdir_p("_drafts") if options[:drafts]
+        db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
+                           :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
+        px = options[:table_prefix]
+        page_name_list = {}
+        page_name_query = %(
+           SELECT
+             entries.ID             AS `id`,
+             entries.title          AS `title`
+           FROM #{px}entries AS `entries`
+        )
+        db[page_name_query].each do |page|
+          page[:slug] = sluggify(page[:title])
+          page_name_list[ page[:id] ] = {
+            :slug   => page[:slug]
+          }
+        end
+        posts_query = "
+           SELECT
+             entries.ID             AS `id`,
+             entries.isdraft        AS `isdraft`,
+             entries.title          AS `title`,
+             entries.timestamp      AS `timestamp`,
+             entries.body           AS `body`,
+             authors.realname     AS `author`,
+             authors.username     AS `author_login`,
+             authors.email        AS `author_email`
+           FROM #{px}entries AS `entries`
+             LEFT JOIN #{px}authors AS `authors`
+               ON entries.authorid = authors.authorid"
+        unless options[:drafts]
+          posts_query << "WHERE posts.isdraft = 'false'"
+        end
+        db[posts_query].each do |post|
+          process_post(post, db, options, page_name_list)
+        end
+      end
+      def self.process_post(post, db, options, page_name_list)
+        extension = options[:extension]
+        title = post[:title]
+        if options[:clean_entities]
+          title = clean_entities(title)
+        end
+        slug = post[:slug]
+        if !slug || slug.empty?
+          slug = sluggify(title)
+        end
+        status = post[:isdraft] == 'true' ? 'draft' : 'published'
+        date = Time.at(post[:timestamp]).utc || Time.now.utc
+        name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day, slug, extension]
+        content = post[:body].to_s
+        if options[:clean_entities]
+          content = clean_entities(content)
+        end
+        if options[:markdown]
+          content = ReverseMarkdown.convert(content)
+        end
+        categories = process_categories(db, options, post)
+        comments = process_comments(db, options, post)
+        tags = process_tags(db, options, post)
+        permalink = process_permalink(db, options, post)
+        # Get the relevant fields as a hash, delete empty fields and
+        # convert to YAML for the header.
+        data = {
+          'layout'        => post[:type].to_s,
+          'status'        => status.to_s,
+          'published'     => status.to_s == 'draft' ? nil : (status.to_s == 'published'),
+          'title'         => title.to_s,
+          'author'        => {
+            'display_name'=> post[:author].to_s,
+            'login'       => post[:author_login].to_s,
+            'email'       => post[:author_email].to_s
+          },
+          'author_login'  => post[:author_login].to_s,
+          'author_email'  => post[:author_email].to_s,
+          'date'          => date.to_s,
+          'permalink'     => options[:permalinks] ? permalink : nil,
+          'categories'    => options[:categories] ? categories : nil,
+          'tags'          => options[:tags] ? tags : nil,
+          'comments'      => options[:comments] ? comments : nil,
+        }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
+        if post[:type] == 'page'
+          filename = page_path(post[:id], page_name_list) + "index.#{extension}"
+          FileUtils.mkdir_p(File.dirname(filename))
+        elsif status == 'draft'
+          filename = "_drafts/#{slug}.#{extension}"
+        else
+          filename = "_posts/#{name}"
+        end
+        # Write out the data and content to file
+        File.open(filename, "w") do |f|
+          f.puts data
+          f.puts "---"
+          f.puts Util.wpautop(content)
+        end
+      end
+      def self.require_if_available(gem_name, option_name)
+        begin
+          require gem_name
+          return true
+        rescue LoadError
+          STDERR.puts "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
+          return true
+        end
+      end
+      def self.process_categories(db, options, post)
+        return [] unless options[:categories]
+        px = options[:table_prefix]
+        cquery = %(
+            SELECT
+               categories.category_name AS `name`
+             FROM
+              #{px}entrycat AS `entrycat`,
+              #{px}category AS `categories`
+             WHERE
+               entrycat.entryid = '#{post[:id]}' AND
+               entrycat.categoryid = categories.categoryid
+        )
+        db[cquery].each_with_object([]) do |category, categories|
+          if options[:clean_entities]
+            categories << clean_entities(category[:name])
+          else
+            categories << category[:name]
+          end
+        end
+      end
+      def self.process_comments(db, options, post)
+        return [] unless options[:comments]
+        px = options[:table_prefix]
+        cquery = %(
+            SELECT
+               id           AS `id`,
+               author       AS `author`,
+               email        AS `author_email`,
+               url          AS `author_url`,
+               timestamp    AS `date`,
+               body         AS `content`
+             FROM #{px}comments
+             WHERE
+               entry_id = '#{post[:id]}' AND
+               status = 'approved'
+        )
+        db[cquery].each_with_object([]) do |comment, comments|
+          comcontent = comment[:content].to_s
+          comauthor = comment[:author].to_s
+          if comcontent.respond_to?(:force_encoding)
+            comcontent.force_encoding("UTF-8")
+          end
+          if options[:clean_entities]
+            comcontent = clean_entities(comcontent)
+            comauthor = clean_entities(comauthor)
+          end
+          comments << {
+            'id'           => comment[:id].to_i,
+            'author'       => comauthor,
+            'author_email' => comment[:author_email].to_s,
+            'author_url'   => comment[:author_url].to_s,
+            'date'         => comment[:date].to_s,
+            'content'      => comcontent,
+          }
+        end.sort!{ |a,b| a['id'] <=> b['id'] }
+      end
+      def self.process_tags(db, options, post)
+        return [] unless options[:categories]
+        px = options[:table_prefix]
+        cquery = %(
+            SELECT
+               entrytags.tag AS `name`
+             FROM
+              #{px}entrytags AS `entrytags`
+             WHERE
+               entrytags.entryid = '#{post[:id]}'
+        )
+        db[cquery].each_with_object([]) do |tag, tags|
+          if options[:clean_entities]
+            tags << clean_entities(tag[:name])
+          else
+            tags << tag[:name]
+          end
+        end
+      end
+      def self.process_permalink(db, options, post)
+        return unless options[:permalinks]
+        px = options[:table_prefix]
+        cquery = %(
+            SELECT
+               permalinks.permalink AS `permalink`
+             FROM
+        #{px}permalinks AS `permalinks`
+             WHERE
+               permalinks.entry_id = '#{post[:id]}' AND
+               permalinks.type = 'entry'
+        )
+        db[cquery].each do |link|
+          return "/#{link[:permalink]}"
+        end
+      end
+      def self.clean_entities( text )
+        if text.respond_to?(:force_encoding)
+          text.force_encoding("UTF-8")
+        end
+        text = HTMLEntities.new.encode(text, :named)
+        # We don't want to convert these, it would break all
+        # HTML tags in the post and comments.
+        text.gsub!("&amp;", "&")
+        text.gsub!("&lt;", "<")
+        text.gsub!("&gt;", ">")
+        text.gsub!("&quot;", '"')
+        text.gsub!("&apos;", "'")
+        text.gsub!("/", "&#47;")
+        text
+      end
+      def self.sluggify( title )
+        title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
+      end
+      def self.page_path( page_id, page_name_list )
+        if page_name_list.key?(page_id)
+          [
+            page_name_list[page_id][:slug],
+            '/'
+          ].join("")
+        else
+          ""
+        end
+      end
+    end
+  end
+end

data/lib/jekyll-import/importers/tumblr.rb CHANGED Viewed

@@ -42,10 +42,7 @@ module JekyllImport
           puts "Fetching #{feed_url}"
           feed = open(feed_url)
           contents = feed.readlines.join("\n")
-          beginning = contents.index("{")
-          ending = contents.rindex("}")
-          json = contents[beginning..ending]  # Strip Tumblr's JSONP chars.
-          blog = JSON.parse(json)
+          blog = extract_json(contents)
           puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
           batch = blog["posts"].map { |post| post_to_hash(post, format) }
@@ -68,6 +65,13 @@ module JekyllImport
       private
+      def self.extract_json(contents)
+        beginning = contents.index("{")
+        ending = contents.rindex("}")+1
+        json = contents[beginning...ending]  # Strip Tumblr's JSONP chars.
+        blog = JSON.parse(json)
+      end
       # Writes a post out to disk
       def self.write_post(post, use_markdown, add_highlights)
         content = post[:content]
@@ -135,12 +139,12 @@ module JekyllImport
             post["conversation"].each do |line|
               content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
             end
-            content << "</section></dialog>"
+            content << "</dialog></section>"
           when "video"
             title = post["video-title"]
             content = post["video-player"]
             unless post["video-caption"].nil?
-              unless content.nil?
+              if content
                 content << "<br/>" + post["video-caption"]
               else
                 content = post["video-caption"]
@@ -209,9 +213,13 @@ module JekyllImport
         urls = Hash[posts.map { |post|
           # Create an initial empty file for the post so that
           # we can instantiate a post object.
-          File.open("_posts/tumblr/#{post[:name]}", "w")
+          File.write("_posts/tumblr/#{post[:name]}", "")
           tumblr_url = URI.parse(URI.encode(post[:slug])).path
-          jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
+          jekyll_url = if Jekyll.const_defined? :Post
+                         Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
+                       else
+                         Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), site: site, collection: site.posts).url
+                       end
           redirect_dir = tumblr_url.sub(/\//, "") + "/"
           FileUtils.mkdir_p redirect_dir
           File.open(redirect_dir + "index.html", "w") do |f|
@@ -281,7 +289,7 @@ module JekyllImport
           # Don't fetch if we've already cached this file
           unless File.size? path
             puts "Fetching photo #{url}"
-            File.open(path, "w") { |f| f.write(open(url).read) }
+            File.open(path, "wb") { |f| f.write(open(url).read) }
           end
           url = "/" + path
         end

data/lib/jekyll-import/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module JekyllImport
-  VERSION = '0.11.0'
+  VERSION = '0.12.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: jekyll-import
 version: !ruby/object:Gem::Version
-  version: 0.11.0
+  version: 0.12.0
 platform: ruby
 authors:
 - Tom Preston-Werner
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-06-28 00:00:00.000000000 Z
+date: 2016-11-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: jekyll
@@ -304,6 +304,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: reverse_markdown
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: launchy
   requirement: !ruby/object:Gem::Requirement
@@ -336,6 +350,7 @@ files:
 - lib/jekyll-import/importers/csv.rb
 - lib/jekyll-import/importers/drupal6.rb
 - lib/jekyll-import/importers/drupal7.rb
+- lib/jekyll-import/importers/drupal_common.rb
 - lib/jekyll-import/importers/easyblog.rb
 - lib/jekyll-import/importers/enki.rb
 - lib/jekyll-import/importers/ghost.rb
@@ -349,6 +364,7 @@ files:
 - lib/jekyll-import/importers/posterous.rb
 - lib/jekyll-import/importers/rss.rb
 - lib/jekyll-import/importers/s9y.rb
+- lib/jekyll-import/importers/s9y_database.rb
 - lib/jekyll-import/importers/textpattern.rb
 - lib/jekyll-import/importers/tumblr.rb
 - lib/jekyll-import/importers/typo.rb
@@ -378,7 +394,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.5.1
+rubygems_version: 2.5.2
 signing_key:
 specification_version: 2
 summary: Import command for Jekyll (static site generator).