RubyGems - bunto-import - Versions diffs - 2.0.0 → 3.0.0 - Mend

bunto-import 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/LICENSE +21 -21
data/README.markdown +33 -33
data/lib/bunto-import.rb +49 -49
data/lib/bunto-import/importer.rb +26 -26
data/lib/bunto-import/importers.rb +10 -10
data/lib/bunto-import/importers/behance.rb +80 -80
data/lib/bunto-import/importers/blogger.rb +330 -264
data/lib/bunto-import/importers/csv.rb +96 -96
data/lib/bunto-import/importers/drupal6.rb +53 -139
data/lib/bunto-import/importers/drupal7.rb +54 -111
data/lib/bunto-import/importers/drupal_common.rb +157 -0
data/lib/bunto-import/importers/easyblog.rb +96 -96
data/lib/bunto-import/importers/enki.rb +74 -74
data/lib/bunto-import/importers/ghost.rb +68 -68
data/lib/bunto-import/importers/google_reader.rb +64 -64
data/lib/bunto-import/importers/joomla.rb +92 -90
data/lib/bunto-import/importers/joomla3.rb +91 -91
data/lib/bunto-import/importers/jrnl.rb +125 -125
data/lib/bunto-import/importers/marley.rb +72 -72
data/lib/bunto-import/importers/mephisto.rb +99 -99
data/lib/bunto-import/importers/mt.rb +257 -257
data/lib/bunto-import/importers/posterous.rb +130 -130
data/lib/bunto-import/importers/rss.rb +62 -62
data/lib/bunto-import/importers/s9y.rb +60 -60
data/lib/bunto-import/importers/s9y_database.rb +363 -0
data/lib/bunto-import/importers/textpattern.rb +70 -70
data/lib/bunto-import/importers/tumblr.rb +300 -289
data/lib/bunto-import/importers/typo.rb +88 -88
data/lib/bunto-import/importers/wordpress.rb +372 -372
data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
data/lib/bunto-import/util.rb +76 -76
data/lib/bunto-import/version.rb +3 -3
data/lib/bunto/commands/import.rb +79 -79
metadata +84 -54

data/lib/bunto-import/importers/csv.rb CHANGED

@@ -1,96 +1,96 @@
-# encoding: UTF-8
-module BuntoImport
-  module Importers
-    class CSV < Importer
-      def self.require_deps
-        BuntoImport.require_with_fallback(%w[
-          csv
-          fileutils
-          yaml
-        ])
-      end
-      def self.specify_options(c)
-        c.option 'file', '--file NAME', 'The CSV file to import (default: "posts.csv")'
-        c.option 'no-front-matter', '--no-front-matter', 'Do not add the default front matter to the post body'
-      end
-      # Reads a csv with title, permalink, body, published_at, and filter.
-      # It creates a post file for each row in the csv
-      def self.process(options)
-        file = options.fetch('file', "posts.csv")
-        FileUtils.mkdir_p "_posts"
-        posts = 0
-        abort "Cannot find the file '#{file}'. Aborting." unless File.file?(file)
-        ::CSV.foreach(file) do |row|
-          next if row[0] == "title" # header
-          posts += 1
-          write_post(CSVPost.new(row), options)
-        end
-        Bunto.logger.info "Created #{posts} posts!"
-      end
-      class CSVPost
-        attr_reader :title, :permalink, :body, :markup
-        MissingDataError = Class.new(RuntimeError)
-        # Creates a CSVPost
-        #
-        # row - Array of data, length of 4 or 5 with the columns:
-        #
-        #   1. title
-        #   2. permalink
-        #   3. body
-        #   4. published_at
-        #   5. markup (markdown, textile)
-        def initialize(row)
-          @title = row[0]        || missing_data("Post title not present in first column.")
-          @permalink = row[1]    || missing_data("Post permalink not present in second column.")
-          @body = row[2]         || missing_data("Post body not present in third column.")
-          @published_at = row[3] || missing_data("Post publish date not present in fourth column.")
-          @markup = row[4]       || "markdown"
-        end
-        def published_at
-          if @published_at && !@published_at.is_a?(DateTime)
-            @published_at = DateTime.parse(@published_at)
-          else
-            @published_at
-          end
-        end
-        def filename
-          "#{published_at.strftime("%Y-%m-%d")}-#{File.basename(permalink, ".*")}.#{markup}"
-        end
-        def missing_data(message)
-          raise MissingDataError, message
-        end
-      end
-      def self.write_post(post, options = {})
-        File.open(File.join("_posts", post.filename), "w") do |f|
-          write_frontmatter(f, post, options)
-          f.puts post.body
-        end
-      end
-      def self.write_frontmatter(f, post, options)
-        no_frontmatter = options.fetch('no-front-matter', false)
-        unless no_frontmatter
-          f.puts YAML.dump({
-            "layout"    => "post",
-            "title"     => post.title,
-            "date"      => post.published_at.to_s,
-            "permalink" => post.permalink
-          })
-          f.puts "---"
-        end
-      end
-    end
-  end
-end
+# encoding: UTF-8
+module BuntoImport
+  module Importers
+    class CSV < Importer
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          csv
+          fileutils
+          yaml
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'file', '--file NAME', 'The CSV file to import (default: "posts.csv")'
+        c.option 'no-front-matter', '--no-front-matter', 'Do not add the default front matter to the post body'
+      end
+      # Reads a csv with title, permalink, body, published_at, and filter.
+      # It creates a post file for each row in the csv
+      def self.process(options)
+        file = options.fetch('file', "posts.csv")
+        FileUtils.mkdir_p "_posts"
+        posts = 0
+        abort "Cannot find the file '#{file}'. Aborting." unless File.file?(file)
+        ::CSV.foreach(file) do |row|
+          next if row[0] == "title" # header
+          posts += 1
+          write_post(CSVPost.new(row), options)
+        end
+        Bunto.logger.info "Created #{posts} posts!"
+      end
+      class CSVPost
+        attr_reader :title, :permalink, :body, :markup
+        MissingDataError = Class.new(RuntimeError)
+        # Creates a CSVPost
+        #
+        # row - Array of data, length of 4 or 5 with the columns:
+        #
+        #   1. title
+        #   2. permalink
+        #   3. body
+        #   4. published_at
+        #   5. markup (markdown, textile)
+        def initialize(row)
+          @title = row[0]        || missing_data("Post title not present in first column.")
+          @permalink = row[1]    || missing_data("Post permalink not present in second column.")
+          @body = row[2]         || missing_data("Post body not present in third column.")
+          @published_at = row[3] || missing_data("Post publish date not present in fourth column.")
+          @markup = row[4]       || "markdown"
+        end
+        def published_at
+          if @published_at && !@published_at.is_a?(DateTime)
+            @published_at = DateTime.parse(@published_at)
+          else
+            @published_at
+          end
+        end
+        def filename
+          "#{published_at.strftime("%Y-%m-%d")}-#{File.basename(permalink, ".*")}.#{markup}"
+        end
+        def missing_data(message)
+          raise MissingDataError, message
+        end
+      end
+      def self.write_post(post, options = {})
+        File.open(File.join("_posts", post.filename), "w") do |f|
+          write_frontmatter(f, post, options)
+          f.puts post.body
+        end
+      end
+      def self.write_frontmatter(f, post, options)
+        no_frontmatter = options.fetch('no-front-matter', false)
+        unless no_frontmatter
+          f.puts YAML.dump({
+            "layout"    => "post",
+            "title"     => post.title,
+            "date"      => post.published_at.to_s,
+            "permalink" => post.permalink
+          })
+          f.puts "---"
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/drupal6.rb CHANGED

@@ -1,139 +1,53 @@
-module BuntoImport
-  module Importers
-    class Drupal6 < Importer
-      # Reads a MySQL database via Sequel and creates a post file for each story
-      # and blog node.
-      QUERY = "SELECT n.nid, \
-                      n.title, \
-                      nr.body, \
-                      n.created, \
-                      n.status, \
-                      GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
-                 FROM node_revisions AS nr, \
-                      node AS n \
-                 LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
-                 LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
-                WHERE (%types%) \
-                  AND n.vid = nr.vid \
-             GROUP BY n.nid"
-      def self.validate(options)
-        %w[dbname user].each do |option|
-          if options[option].nil?
-            abort "Missing mandatory option --#{option}."
-          end
-        end
-      end
-      def self.specify_options(c)
-        c.option 'dbname', '--dbname DB', 'Database name'
-        c.option 'user', '--user USER', 'Database user name'
-        c.option 'password', '--password PW', "Database user's password (default: '')"
-        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
-        c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
-        c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
-      end
-      def self.require_deps
-        BuntoImport.require_with_fallback(%w[
-          rubygems
-          sequel
-          fileutils
-          safe_yaml
-          mysql
-        ])
-      end
-      def self.process(options)
-        dbname = options.fetch('dbname')
-        user   = options.fetch('user')
-        pass   = options.fetch('password', "")
-        host   = options.fetch('host', "localhost")
-        prefix = options.fetch('prefix', "")
-        types  = options.fetch('types', ['blog', 'story', 'article'])
-        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
-        if prefix != ''
-          QUERY[" node "] = " " + prefix + "node "
-          QUERY[" node_revisions "] = " " + prefix + "node_revisions "
-          QUERY[" term_node "] = " " + prefix + "term_node "
-          QUERY[" term_data "] = " " + prefix + "term_data "
-        end
-        types = types.join("' OR n.type = '")
-        QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
-        FileUtils.mkdir_p "_posts"
-        FileUtils.mkdir_p "_drafts"
-        FileUtils.mkdir_p "_layouts"
-        # Create the refresh layout
-        # Change the refresh url if you customized your permalink config
-        File.open("_layouts/refresh.html", "w") do |f|
-          f.puts <<EOF
-<!DOCTYPE html>
-<html>
-<head>
-<meta http-equiv="content-type" content="text/html; charset=utf-8" />
-<meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
-</head>
-</html>
-EOF
-        end
-        db[QUERY].each do |post|
-          # Get required fields and construct Bunto compatible name
-          node_id = post[:nid]
-          title = post[:title]
-          content = post[:body]
-          tags = (post[:tags] || '').downcase.strip
-          created = post[:created]
-          time = Time.at(created)
-          is_published = post[:status] == 1
-          dir = is_published ? "_posts" : "_drafts"
-          slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
-          name = time.strftime("%Y-%m-%d-") + slug + '.md'
-          # Get the relevant fields as a hash, delete empty fields and convert
-          # to YAML for the header
-          data = {
-             'layout' => 'post',
-             'title' => title.to_s,
-             'created' => created,
-             'categories' => tags.split('|')
-           }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
-              |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
-           }.to_yaml
-          # Write out the data and content to file
-          File.open("#{dir}/#{name}", "w") do |f|
-            f.puts data
-            f.puts "---"
-            f.puts content
-          end
-          # Make a file to redirect from the old Drupal URL
-          if is_published
-            aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
-            aliases.push(:dst => "node/#{node_id}")
-            aliases.each do |url_alias|
-              FileUtils.mkdir_p url_alias[:dst]
-              File.open("#{url_alias[:dst]}/index.md", "w") do |f|
-                f.puts "---"
-                f.puts "layout: refresh"
-                f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
-                f.puts "---"
-              end
-            end
-          end
-        end
-        # TODO: Make dirs & files for nodes of type 'page'
-        # Make refresh pages for these as well
-      end
-    end
-  end
-end
+require 'bunto-import/importers/drupal_common'
+module BuntoImport
+  module Importers
+    class Drupal6 < Importer
+      include DrupalCommon
+      extend DrupalCommon::ClassMethods
+      def self.build_query(prefix, types)
+        types = types.join("' OR n.type = '")
+        types = "n.type = '#{types}'"
+        query = <<EOS
+                SELECT n.nid,
+                       n.title,
+                       nr.body,
+                       nr.teaser,
+                       n.created,
+                       n.status,
+                       n.type,
+                       GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
+                FROM #{prefix}node_revisions AS nr,
+                     #{prefix}node AS n
+                     LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
+                     LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
+                WHERE (#{types})
+                  AND n.vid = nr.vid
+                GROUP BY n.nid
+EOS
+        return query
+      end
+      def self.aliases_query(prefix)
+        "SELECT src AS source, dst AS alias FROM #{prefix}url_alias WHERE src = ?"
+      end
+      def self.post_data(sql_post_data)
+        content = sql_post_data[:body].to_s
+        summary = sql_post_data[:teaser].to_s
+        tags = (sql_post_data[:tags] || '').downcase.strip
+        data = {
+          'excerpt' => summary,
+          'categories' => tags.split('|')
+        }
+         return data, content
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/drupal7.rb CHANGED

@@ -1,111 +1,54 @@
-module BuntoImport
-  module Importers
-    class Drupal7 < Importer
-      # Reads a MySQL database via Sequel and creates a post file for each story
-      # and blog node.
-      QUERY = "SELECT n.title, \
-                      fdb.body_value, \
-                      fdb.body_summary, \
-                      n.created, \
-                      n.status, \
-                      n.nid, \
-                      u.name \
-               FROM node AS n, \
-                    field_data_body AS fdb, \
-                    users AS u \
-               WHERE (%types%) \
-               AND n.nid = fdb.entity_id \
-               AND n.vid = fdb.revision_id
-               AND n.uid = u.uid"
-      def self.validate(options)
-        %w[dbname user].each do |option|
-          if options[option].nil?
-            abort "Missing mandatory option --#{option}."
-          end
-        end
-      end
-      def self.specify_options(c)
-        c.option 'dbname', '--dbname DB', 'Database name'
-        c.option 'user', '--user USER', 'Database user name'
-        c.option 'password', '--password PW', 'Database user\'s password (default: "")'
-        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
-        c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
-        c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
-      end
-      def self.require_deps
-        BuntoImport.require_with_fallback(%w[
-          rubygems
-          sequel
-          fileutils
-          safe_yaml
-        ])
-      end
-      def self.process(options)
-        dbname = options.fetch('dbname')
-        user   = options.fetch('user')
-        pass   = options.fetch('password', "")
-        host   = options.fetch('host', "localhost")
-        prefix = options.fetch('prefix', "")
-        types  = options.fetch('types', ['blog', 'story', 'article'])
-        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
-        unless prefix.empty?
-          QUERY[" node "] = " " + prefix + "node "
-          QUERY[" field_data_body "] = " " + prefix + "field_data_body "
-          QUERY[" users "] = " " + prefix + "users "
-        end
-        types = types.join("' OR n.type = '")
-        QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
-        FileUtils.mkdir_p "_posts"
-        FileUtils.mkdir_p "_drafts"
-        FileUtils.mkdir_p "_layouts"
-        db[QUERY].each do |post|
-          # Get required fields and construct Bunto compatible name
-          title = post[:title]
-          content = post[:body_value]
-          summary = post[:body_summary]
-          created = post[:created]
-          author = post[:name]
-          nid = post[:nid]
-          time = Time.at(created)
-          is_published = post[:status] == 1
-          dir = is_published ? "_posts" : "_drafts"
-          slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
-          name = time.strftime("%Y-%m-%d-") + slug + '.md'
-          # Get the relevant fields as a hash, delete empty fields and convert
-          # to YAML for the header
-          data = {
-            'layout' => 'post',
-            'title' => title.strip.force_encoding("UTF-8"),
-            'author' => author,
-            'nid' => nid,
-            'created' => created,
-            'excerpt' => summary
-          }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
-          # Write out the data and content to file
-          File.open("#{dir}/#{name}", "w") do |f|
-            f.puts data
-            f.puts "---"
-            f.puts content
-          end
-        end
-        # TODO: Make dirs & files for nodes of type 'page'
-          # Make refresh pages for these as well
-        # TODO: Make refresh dirs & files according to entries in url_alias table
-      end
-    end
-  end
-end
+require 'bunto-import/importers/drupal_common'
+module BuntoImport
+  module Importers
+    class Drupal7 < Importer
+      include DrupalCommon
+      extend DrupalCommon::ClassMethods
+      def self.build_query(prefix, types)
+        types = types.join("' OR n.type = '")
+        types = "n.type = '#{types}'"
+        query = <<EOS
+                SELECT n.nid,
+                       n.title,
+                       fdb.body_value,
+                       fdb.body_summary,
+                       n.created,
+                       n.status,
+                       n.type,
+                       GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
+                FROM #{prefix}field_data_body AS fdb,
+                     #{prefix}node AS n
+                     LEFT OUTER JOIN #{prefix}taxonomy_index AS ti ON ti.nid = n.nid
+                     LEFT OUTER JOIN #{prefix}taxonomy_term_data AS td ON ti.tid = td.tid
+                WHERE (#{types})
+                  AND n.nid = fdb.entity_id
+                  AND n.vid = fdb.revision_id
+                GROUP BY n.nid"
+EOS
+        return query
+      end
+      def self.aliases_query(prefix)
+        "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
+      end
+      def self.post_data(sql_post_data)
+        content = sql_post_data[:body_value].to_s
+        summary = sql_post_data[:body_summary].to_s
+        tags = (sql_post_data[:tags] || '').downcase.strip
+        data = {
+          'excerpt' => summary,
+          'categories' => tags.split('|')
+        }
+        return data, content
+      end
+    end
+  end
+end