RubyGems - bunto-import - Versions diffs - 1.0.0 - Mend

bunto-import 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.markdown +32 -0
data/lib/bunto-import.rb +49 -0
data/lib/bunto-import/importer.rb +26 -0
data/lib/bunto-import/importers.rb +10 -0
data/lib/bunto-import/importers/behance.rb +80 -0
data/lib/bunto-import/importers/blogger.rb +264 -0
data/lib/bunto-import/importers/csv.rb +96 -0
data/lib/bunto-import/importers/drupal6.rb +139 -0
data/lib/bunto-import/importers/drupal7.rb +111 -0
data/lib/bunto-import/importers/easyblog.rb +96 -0
data/lib/bunto-import/importers/enki.rb +74 -0
data/lib/bunto-import/importers/ghost.rb +68 -0
data/lib/bunto-import/importers/google_reader.rb +64 -0
data/lib/bunto-import/importers/joomla.rb +90 -0
data/lib/bunto-import/importers/joomla3.rb +91 -0
data/lib/bunto-import/importers/jrnl.rb +125 -0
data/lib/bunto-import/importers/marley.rb +72 -0
data/lib/bunto-import/importers/mephisto.rb +99 -0
data/lib/bunto-import/importers/mt.rb +257 -0
data/lib/bunto-import/importers/posterous.rb +130 -0
data/lib/bunto-import/importers/rss.rb +62 -0
data/lib/bunto-import/importers/s9y.rb +60 -0
data/lib/bunto-import/importers/textpattern.rb +70 -0
data/lib/bunto-import/importers/tumblr.rb +289 -0
data/lib/bunto-import/importers/typo.rb +88 -0
data/lib/bunto-import/importers/wordpress.rb +372 -0
data/lib/bunto-import/importers/wordpressdotcom.rb +207 -0
data/lib/bunto-import/util.rb +76 -0
data/lib/bunto-import/version.rb +3 -0
data/lib/bunto/commands/import.rb +79 -0
metadata +374 -0

data/lib/bunto-import/importers/csv.rb ADDED

@@ -0,0 +1,96 @@
+# encoding: UTF-8
+module BuntoImport
+  module Importers
+    class CSV < Importer
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          csv
+          fileutils
+          yaml
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'file', '--file NAME', 'The CSV file to import (default: "posts.csv")'
+        c.option 'no-front-matter', '--no-front-matter', 'Do not add the default front matter to the post body'
+      end
+      # Reads a csv with title, permalink, body, published_at, and filter.
+      # It creates a post file for each row in the csv
+      def self.process(options)
+        file = options.fetch('file', "posts.csv")
+        FileUtils.mkdir_p "_posts"
+        posts = 0
+        abort "Cannot find the file '#{file}'. Aborting." unless File.file?(file)
+        ::CSV.foreach(file) do |row|
+          next if row[0] == "title" # header
+          posts += 1
+          write_post(CSVPost.new(row), options)
+        end
+        Bunto.logger.info "Created #{posts} posts!"
+      end
+      class CSVPost
+        attr_reader :title, :permalink, :body, :markup
+        MissingDataError = Class.new(RuntimeError)
+        # Creates a CSVPost
+        #
+        # row - Array of data, length of 4 or 5 with the columns:
+        #
+        #   1. title
+        #   2. permalink
+        #   3. body
+        #   4. published_at
+        #   5. markup (markdown, textile)
+        def initialize(row)
+          @title = row[0]        || missing_data("Post title not present in first column.")
+          @permalink = row[1]    || missing_data("Post permalink not present in second column.")
+          @body = row[2]         || missing_data("Post body not present in third column.")
+          @published_at = row[3] || missing_data("Post publish date not present in fourth column.")
+          @markup = row[4]       || "markdown"
+        end
+        def published_at
+          if @published_at && !@published_at.is_a?(DateTime)
+            @published_at = DateTime.parse(@published_at)
+          else
+            @published_at
+          end
+        end
+        def filename
+          "#{published_at.strftime("%Y-%m-%d")}-#{File.basename(permalink, ".*")}.#{markup}"
+        end
+        def missing_data(message)
+          raise MissingDataError, message
+        end
+      end
+      def self.write_post(post, options = {})
+        File.open(File.join("_posts", post.filename), "w") do |f|
+          write_frontmatter(f, post, options)
+          f.puts post.body
+        end
+      end
+      def self.write_frontmatter(f, post, options)
+        no_frontmatter = options.fetch('no-front-matter', false)
+        unless no_frontmatter
+          f.puts YAML.dump({
+            "layout"    => "post",
+            "title"     => post.title,
+            "date"      => post.published_at.to_s,
+            "permalink" => post.permalink
+          })
+          f.puts "---"
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/drupal6.rb ADDED

@@ -0,0 +1,139 @@
+module BuntoImport
+  module Importers
+    class Drupal6 < Importer
+      # Reads a MySQL database via Sequel and creates a post file for each story
+      # and blog node.
+      QUERY = "SELECT n.nid, \
+                      n.title, \
+                      nr.body, \
+                      n.created, \
+                      n.status, \
+                      GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
+                 FROM node_revisions AS nr, \
+                      node AS n \
+                 LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
+                 LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
+                WHERE (%types%) \
+                  AND n.vid = nr.vid \
+             GROUP BY n.nid"
+      def self.validate(options)
+        %w[dbname user].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname DB', 'Database name'
+        c.option 'user', '--user USER', 'Database user name'
+        c.option 'password', '--password PW', "Database user's password (default: '')"
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+        c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
+        c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fileutils
+          safe_yaml
+          mysql
+        ])
+      end
+      def self.process(options)
+        dbname = options.fetch('dbname')
+        user   = options.fetch('user')
+        pass   = options.fetch('password', "")
+        host   = options.fetch('host', "localhost")
+        prefix = options.fetch('prefix', "")
+        types  = options.fetch('types', ['blog', 'story', 'article'])
+        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+        if prefix != ''
+          QUERY[" node "] = " " + prefix + "node "
+          QUERY[" node_revisions "] = " " + prefix + "node_revisions "
+          QUERY[" term_node "] = " " + prefix + "term_node "
+          QUERY[" term_data "] = " " + prefix + "term_data "
+        end
+        types = types.join("' OR n.type = '")
+        QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
+        FileUtils.mkdir_p "_posts"
+        FileUtils.mkdir_p "_drafts"
+        FileUtils.mkdir_p "_layouts"
+        # Create the refresh layout
+        # Change the refresh url if you customized your permalink config
+        File.open("_layouts/refresh.html", "w") do |f|
+          f.puts <<EOF
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+<meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
+</head>
+</html>
+EOF
+        end
+        db[QUERY].each do |post|
+          # Get required fields and construct Bunto compatible name
+          node_id = post[:nid]
+          title = post[:title]
+          content = post[:body]
+          tags = (post[:tags] || '').downcase.strip
+          created = post[:created]
+          time = Time.at(created)
+          is_published = post[:status] == 1
+          dir = is_published ? "_posts" : "_drafts"
+          slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
+          name = time.strftime("%Y-%m-%d-") + slug + '.md'
+          # Get the relevant fields as a hash, delete empty fields and convert
+          # to YAML for the header
+          data = {
+             'layout' => 'post',
+             'title' => title.to_s,
+             'created' => created,
+             'categories' => tags.split('|')
+           }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
+              |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
+           }.to_yaml
+          # Write out the data and content to file
+          File.open("#{dir}/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+          # Make a file to redirect from the old Drupal URL
+          if is_published
+            aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
+            aliases.push(:dst => "node/#{node_id}")
+            aliases.each do |url_alias|
+              FileUtils.mkdir_p url_alias[:dst]
+              File.open("#{url_alias[:dst]}/index.md", "w") do |f|
+                f.puts "---"
+                f.puts "layout: refresh"
+                f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
+                f.puts "---"
+              end
+            end
+          end
+        end
+        # TODO: Make dirs & files for nodes of type 'page'
+        # Make refresh pages for these as well
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/drupal7.rb ADDED

@@ -0,0 +1,111 @@
+module BuntoImport
+  module Importers
+    class Drupal7 < Importer
+      # Reads a MySQL database via Sequel and creates a post file for each story
+      # and blog node.
+      QUERY = "SELECT n.title, \
+                      fdb.body_value, \
+                      fdb.body_summary, \
+                      n.created, \
+                      n.status, \
+                      n.nid, \
+                      u.name \
+               FROM node AS n, \
+                    field_data_body AS fdb, \
+                    users AS u \
+               WHERE (%types%) \
+               AND n.nid = fdb.entity_id \
+               AND n.vid = fdb.revision_id
+               AND n.uid = u.uid"
+      def self.validate(options)
+        %w[dbname user].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname DB', 'Database name'
+        c.option 'user', '--user USER', 'Database user name'
+        c.option 'password', '--password PW', 'Database user\'s password (default: "")'
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+        c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
+        c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fileutils
+          safe_yaml
+        ])
+      end
+      def self.process(options)
+        dbname = options.fetch('dbname')
+        user   = options.fetch('user')
+        pass   = options.fetch('password', "")
+        host   = options.fetch('host', "localhost")
+        prefix = options.fetch('prefix', "")
+        types  = options.fetch('types', ['blog', 'story', 'article'])
+        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+        unless prefix.empty?
+          QUERY[" node "] = " " + prefix + "node "
+          QUERY[" field_data_body "] = " " + prefix + "field_data_body "
+          QUERY[" users "] = " " + prefix + "users "
+        end
+        types = types.join("' OR n.type = '")
+        QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
+        FileUtils.mkdir_p "_posts"
+        FileUtils.mkdir_p "_drafts"
+        FileUtils.mkdir_p "_layouts"
+        db[QUERY].each do |post|
+          # Get required fields and construct Bunto compatible name
+          title = post[:title]
+          content = post[:body_value]
+          summary = post[:body_summary]
+          created = post[:created]
+          author = post[:name]
+          nid = post[:nid]
+          time = Time.at(created)
+          is_published = post[:status] == 1
+          dir = is_published ? "_posts" : "_drafts"
+          slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
+          name = time.strftime("%Y-%m-%d-") + slug + '.md'
+          # Get the relevant fields as a hash, delete empty fields and convert
+          # to YAML for the header
+          data = {
+            'layout' => 'post',
+            'title' => title.strip.force_encoding("UTF-8"),
+            'author' => author,
+            'nid' => nid,
+            'created' => created,
+            'excerpt' => summary
+          }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+          # Write out the data and content to file
+          File.open("#{dir}/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+        # TODO: Make dirs & files for nodes of type 'page'
+          # Make refresh pages for these as well
+        # TODO: Make refresh dirs & files according to entries in url_alias table
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/easyblog.rb ADDED

@@ -0,0 +1,96 @@
+module BuntoImport
+  module Importers
+    class Easyblog < Importer
+      def self.validate(options)
+        %w[dbname user].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname', 'Database name'
+        c.option 'user', '--user', 'Database user name'
+        c.option 'password', '--password', "Database user's password (default: '')"
+        c.option 'host', '--host', 'Database host name'
+        c.option 'section', '--section', 'Table prefix name'
+        c.option 'prefix', '--prefix', 'Table prefix name'
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+                                           rubygems
+                                          sequel
+                                          fileutils
+                                          safe_yaml
+                                          ])
+      end
+      def self.process(options)
+        dbname  = options.fetch('dbname')
+        user    = options.fetch('user')
+        pass    = options.fetch('password', '')
+        host    = options.fetch('host', "localhost")
+        section = options.fetch('section', '1')
+        table_prefix = options.fetch('prefix', "jos_")
+        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+        FileUtils.mkdir_p("_posts")
+        # Reads a MySQL database via Sequel and creates a post file for each
+        # post in wp_posts that has post_status = 'publish'. This restriction is
+        # made because 'draft' posts are not guaranteed to have valid dates.
+        query = "
+        select
+	  ep.`title`, `permalink` as alias, concat(`intro`, `content`) as content, ep.`created`, ep.`id`, ec.`title` as category, tags
+        from
+          #{table_prefix}easyblog_post ep
+          left join #{table_prefix}easyblog_category ec on (ep.category_id = ec.id)
+          left join (
+            select
+              ept.post_id,
+              group_concat(et.alias order by alias separator ' ') as tags
+            from
+              #{table_prefix}easyblog_post_tag ept
+              join #{table_prefix}easyblog_tag et on (ept.tag_id = et.id)
+            group by
+              ept.post_id) x on (ep.id = x.post_id);
+        "
+        db[query].each do |post|
+          # Get required fields and construct Bunto compatible name.
+          title = post[:title]
+          slug = post[:alias]
+          date = post[:created]
+          content = post[:content]
+          category = post[:category]
+          tags = post[:tags]
+          name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
+                                                 slug]
+          # Get the relevant fields as a hash, delete empty fields and convert
+          # to YAML for the header.
+          data = {
+            'layout' => 'post',
+            'title' => title.to_s,
+            'joomla_id' => post[:id],
+            'joomla_url' => post[:alias],
+            'category' => post[:category],
+            'tags' => post[:tags],
+            'date' => date
+          }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
+          # Write out the data and content to file
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/enki.rb ADDED

@@ -0,0 +1,74 @@
+module BuntoImport
+    module Importers
+    class Enki < Importer
+      SQL = <<-EOS
+        SELECT p.id,
+               p.title,
+               p.slug,
+               p.body,
+               p.published_at as date,
+               p.cached_tag_list as tags
+        FROM posts p
+EOS
+      def self.validate(options)
+        %w[dbname user].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname', 'Database name'
+        c.option 'user', '--user', 'Database name'
+        c.option 'password', '--password', 'Database name (default: "")'
+        c.option 'host', '--host', 'Database name'
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fileutils
+          pg
+          yaml
+        ])
+      end
+      # Just working with postgres, but can be easily adapted
+      # to work with both mysql and postgres.
+      def self.process(options)
+        dbname = options.fetch('dbname')
+        user   = options.fetch('user')
+        pass   = options.fetch('password', "")
+        host   = options.fetch('host', "localhost")
+        FileUtils.mkdir_p('_posts')
+        db = Sequel.postgres(:database => dbname,
+                             :user => user,
+                             :password => pass,
+                             :host => host,
+                             :encoding => 'utf8')
+        db[SQL].each do |post|
+          name = [ sprintf("%.04d", post[:date].year),
+                   sprintf("%.02d", post[:date].month),
+                   sprintf("%.02d", post[:date].day),
+                   post[:slug].strip ].join('-')
+          name += '.textile'
+          File.open("_posts/#{name}", 'w') do |f|
+            f.puts({ 'layout'   => 'post',
+                     'title'    => post[:title].to_s,
+                     'enki_id'  => post[:id],
+                     'categories'  => post[:tags]
+                   }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
+            f.puts '---'
+            f.puts post[:body].delete("\r")
+          end
+        end
+      end
+    end
+  end
+end