RubyGems - bunto-import - Versions diffs - 1.0.0 - Mend

bunto-import 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.markdown +32 -0
data/lib/bunto-import.rb +49 -0
data/lib/bunto-import/importer.rb +26 -0
data/lib/bunto-import/importers.rb +10 -0
data/lib/bunto-import/importers/behance.rb +80 -0
data/lib/bunto-import/importers/blogger.rb +264 -0
data/lib/bunto-import/importers/csv.rb +96 -0
data/lib/bunto-import/importers/drupal6.rb +139 -0
data/lib/bunto-import/importers/drupal7.rb +111 -0
data/lib/bunto-import/importers/easyblog.rb +96 -0
data/lib/bunto-import/importers/enki.rb +74 -0
data/lib/bunto-import/importers/ghost.rb +68 -0
data/lib/bunto-import/importers/google_reader.rb +64 -0
data/lib/bunto-import/importers/joomla.rb +90 -0
data/lib/bunto-import/importers/joomla3.rb +91 -0
data/lib/bunto-import/importers/jrnl.rb +125 -0
data/lib/bunto-import/importers/marley.rb +72 -0
data/lib/bunto-import/importers/mephisto.rb +99 -0
data/lib/bunto-import/importers/mt.rb +257 -0
data/lib/bunto-import/importers/posterous.rb +130 -0
data/lib/bunto-import/importers/rss.rb +62 -0
data/lib/bunto-import/importers/s9y.rb +60 -0
data/lib/bunto-import/importers/textpattern.rb +70 -0
data/lib/bunto-import/importers/tumblr.rb +289 -0
data/lib/bunto-import/importers/typo.rb +88 -0
data/lib/bunto-import/importers/wordpress.rb +372 -0
data/lib/bunto-import/importers/wordpressdotcom.rb +207 -0
data/lib/bunto-import/util.rb +76 -0
data/lib/bunto-import/version.rb +3 -0
data/lib/bunto/commands/import.rb +79 -0
metadata +374 -0

data/lib/bunto-import/importers/mephisto.rb ADDED

@@ -0,0 +1,99 @@
+module BuntoImport
+  module Importers
+    class Mephisto < Importer
+      #Accepts a hash with database config variables, exports mephisto posts into a csv
+      #export PGPASSWORD if you must
+      def self.postgres(c)
+        sql = <<-SQL
+        BEGIN;
+        CREATE TEMP TABLE bunto AS
+          SELECT title, permalink, body, published_at, filter FROM contents
+          WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
+        COPY bunto TO STDOUT WITH CSV HEADER;
+        ROLLBACK;
+        SQL
+        command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
+        puts command
+        `#{command}`
+        CSV.process
+      end
+      def self.validate(options)
+        %w[dbname user].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fastercsv
+          fileutils
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname DB', 'Database name'
+        c.option 'user', '--user USER', 'Database user name'
+        c.option 'password', '--password PW', "Database user's password (default: '')"
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+      end
+      # This query will pull blog posts from all entries across all blogs. If
+      # you've got unpublished, deleted or otherwise hidden posts please sift
+      # through the created posts to make sure nothing is accidently published.
+      QUERY = "SELECT id, \
+                      permalink, \
+                      body, \
+                      published_at, \
+                      title \
+               FROM contents \
+               WHERE user_id = 1 AND \
+                     type = 'Article' AND \
+                     published_at IS NOT NULL \
+               ORDER BY published_at"
+      def self.process(options)
+        dbname = options.fetch('dbname')
+        user   = options.fetch('user')
+        pass   = options.fetch('password', '')
+        host   = options.fetch('host', "localhost")
+        db = Sequel.mysql(dbname, :user => user,
+                                  :password => pass,
+                                  :host => host,
+                                  :encoding => 'utf8')
+        FileUtils.mkdir_p "_posts"
+        db[QUERY].each do |post|
+          title = post[:title]
+          slug = post[:permalink]
+          date = post[:published_at]
+          content = post[:body]
+          # Ideally, this script would determine the post format (markdown,
+          # html, etc) and create files with proper extensions. At this point
+          # it just assumes that markdown will be acceptable.
+          name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
+          data = {
+             'layout' => 'post',
+             'title' => title.to_s,
+             'mt_id' => post[:entry_id],
+           }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/mt.rb ADDED

@@ -0,0 +1,257 @@
+module BuntoImport
+  module Importers
+    class MT < Importer
+      SUPPORTED_ENGINES = %{mysql postgres sqlite}
+      STATUS_DRAFT = 1
+      STATUS_PUBLISHED = 2
+      MORE_CONTENT_SEPARATOR = '<!--more-->'
+      def self.default_options
+        {
+          'blog_id' => nil,
+          'categories' => true,
+          'dest_encoding' => 'utf-8',
+          'src_encoding' => 'utf-8',
+          'comments' => false
+        }
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fileutils
+          safe_yaml
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'engine', "--engine ENGINE", "Database engine, (default: 'mysql', postgres also supported)"
+        c.option 'dbname', '--dbname DB', 'Database name'
+        c.option 'user', '--user USER', 'Database user name'
+        c.option 'password', '--password PW', "Database user's password, (default: '')"
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+        c.option 'port', '--port PORT', 'Custom database port connect to (optional)'
+        c.option 'blog_id', '--blog_id ID', 'Specify a single Movable Type blog ID to import (default: all blogs)'
+        c.option 'categories', '--categories', "If true, save post's categories in its YAML front matter. (default: true)"
+        c.option 'src_encoding', '--src_encoding ENCODING', "Encoding of strings from database. (default: UTF-8)"
+        c.option 'dest_encoding', '--dest_encoding ENCODING', "Encoding of output strings. (default: UTF-8)"
+        c.option 'comments','--comments', "If true, output comments in _comments directory (default: false)"
+      end
+      # By default this migrator will include posts for all your MovableType blogs.
+      # Specify a single blog by providing blog_id.
+      # Main migrator function. Call this to perform the migration.
+      #
+      # dbname::  The name of the database
+      # user::    The database user name
+      # pass::    The database user's password
+      # host::    The address of the MySQL database host. Default: 'localhost'
+      # options:: A hash of configuration options
+      #
+      # Supported options are:
+      #
+      # blog_id::         Specify a single MovableType blog to export by providing blog_id.
+      #                   Default: nil, importer will include posts for all blogs.
+      # categories::      If true, save the post's categories in its
+      #                   YAML front matter. Default: true
+      # src_encoding::    Encoding of strings from the database. Default: UTF-8
+      #                   If your output contains mangled characters, set src_encoding to
+      #                   something appropriate for your database charset.
+      # dest_encoding::   Encoding of output strings. Default: UTF-8
+      # comments::        If true, output comments in _comments directory, like the one
+      #                   mentioned at https://github.com/mpalmer/bunto-static-comments/
+      def self.process(options)
+        options  = default_options.merge(options)
+        comments = options.fetch('comments')
+        posts_name_by_id = {} if comments
+        db = database_from_opts(options)
+        post_categories = db[:mt_placement].join(:mt_category, :category_id => :placement_category_id)
+        FileUtils.mkdir_p "_posts"
+        posts = db[:mt_entry]
+        posts = posts.filter(:entry_blog_id => options['blog_id']) if options['blog_id']
+        posts.each do |post|
+          categories = post_categories.filter(
+            :mt_placement__placement_entry_id => post[:entry_id]
+          ).map {|ea| encode(ea[:category_basename], options) }
+          file_name = post_file_name(post, options)
+          data = post_metadata(post, options)
+          data['categories'] = categories if !categories.empty? && options['categories']
+          yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
+          # save post path for comment processing
+          posts_name_by_id[data['post_id']] = file_name if comments
+          content = post_content(post, options)
+          File.open("_posts/#{file_name}", "w") do |f|
+            f.puts yaml_front_matter
+            f.puts "---"
+            f.puts encode(content, options)
+          end
+        end
+        # process comment output, if enabled
+        if comments
+          FileUtils.mkdir_p "_comments"
+          comments = db[:mt_comment]
+          comments.each do |comment|
+            if posts_name_by_id.key?(comment[:comment_entry_id]) # if the entry exists
+              dir_name, base_name = comment_file_dir_and_base_name(posts_name_by_id, comment, options)
+              FileUtils.mkdir_p "_comments/#{dir_name}"
+              data = comment_metadata(comment, options)
+              content = comment_content(comment, options)
+              yaml_front_matter = data.delete_if { |_,v| v.nil? || v == '' }.to_yaml
+              File.open("_comments/#{dir_name}/#{base_name}", "w") do |f|
+                f.puts yaml_front_matter
+                f.puts "---"
+                f.puts encode(content, options)
+              end
+            end
+          end
+        end
+      end
+      # Extracts metadata for YAML front matter from post
+      def self.post_metadata(post, options = default_options)
+        metadata = {
+          'layout' => 'post',
+          'title' => encode(post[:entry_title], options),
+          'date' => post_date(post).strftime("%Y-%m-%d %H:%M:%S %z"),
+          'excerpt' => encode(post[:entry_excerpt].to_s, options),
+          'mt_id' => post[:entry_id],
+          'blog_id' => post[:entry_blog_id],
+          'post_id' => post[:entry_id], # for link with comments
+          'basename' => post[:entry_basename]
+        }
+        metadata['published'] = false if post[:entry_status] != STATUS_PUBLISHED
+        metadata
+      end
+      # Different versions of MT used different column names
+      def self.post_date(post)
+        post[:entry_authored_on] || post[:entry_created_on]
+      end
+      # Extracts text body from post
+      def self.extra_entry_text_empty?(post)
+        post[:entry_text_more].nil? || post[:entry_text_more].strip.empty?
+      end
+      def self.post_content(post, options = default_options)
+        if extra_entry_text_empty?(post)
+          post[:entry_text]
+        else
+          post[:entry_text] + "\n\n#{MORE_CONTENT_SEPARATOR}\n\n" + post[:entry_text_more]
+        end
+      end
+      def self.post_file_name(post, options = default_options)
+        date = post_date(post)
+        slug = post[:entry_basename]
+        file_ext = suffix(post[:entry_convert_breaks])
+        "#{date.strftime('%Y-%m-%d')}-#{slug}.#{file_ext}"
+      end
+      # Extracts metadata for YAML front matter from comment
+      def self.comment_metadata(comment, options = default_options)
+        metadata = {
+          'layout' => 'comment',
+          'comment_id' => comment[:comment_id],
+          'post_id' => comment[:comment_entry_id],
+          'author' => encode(comment[:comment_author], options),
+          'email' => comment[:comment_email],
+          'commenter_id' => comment[:comment_commenter_id],
+          'date' => comment_date(comment).strftime("%Y-%m-%d %H:%M:%S %z"),
+          'visible' => comment[:comment_visible] == 1,
+          'ip' => comment[:comment_ip],
+          'url' => comment[:comment_url]
+        }
+        metadata
+      end
+      # Different versions of MT used different column names
+      def self.comment_date(comment)
+        comment[:comment_modified_on] || comment[:comment_created_on]
+      end
+      def self.comment_content(comment, options = default_options)
+        comment[:comment_text]
+      end
+      def self.comment_file_dir_and_base_name(posts_name_by_id, comment, options = default_options)
+        post_basename = posts_name_by_id[comment[:comment_entry_id]].sub(/\.\w+$/, '')
+        comment_id = comment[:comment_id]
+        [post_basename, "#{comment_id}.markdown"]
+      end
+      def self.encode(str, options = default_options)
+        if str.respond_to?(:encoding)
+          str.encode(options['dest_encoding'], options['src_encoding'])
+        else
+          str
+        end
+      end
+      # Ideally, this script would determine the post format (markdown,
+      # html, etc) and create files with proper extensions. At this point
+      # it just assumes that markdown will be acceptable.
+      def self.suffix(entry_type)
+        if entry_type.nil? || entry_type.include?("markdown") || entry_type.include?("__default__")
+          # The markdown plugin I have saves this as
+          # "markdown_with_smarty_pants", so I just look for "markdown".
+          "markdown"
+        elsif entry_type.include?("textile")
+          # This is saved as "textile_2" on my installation of MT 5.1.
+          "textile"
+        elsif entry_type == "0" || entry_type.include?("richtext")
+          # Richtext looks to me like it's saved as HTML, so I include it here.
+          "html"
+        else
+          # Other values might need custom work.
+          entry_type
+        end
+      end
+      def self.database_from_opts(options)
+        engine   = options.fetch('engine', 'mysql')
+        dbname   = options.fetch('dbname')
+        case engine
+        when "sqlite"
+          Sequel.sqlite(dbname)
+        when "mysql", "postgres"
+          db_connect_opts = {
+            :host =>     options.fetch('host', 'localhost'),
+            :user =>     options.fetch('user'),
+            :password => options.fetch('password', '')
+          }
+          db_connect_opts = options['port'] if options['port']
+          Sequel.public_send(
+            engine,
+            dbname,
+            db_connect_opts
+          )
+        else
+          abort("Unsupported engine: '#{engine}'. Must be one of #{SUPPORTED_ENGINES.join(', ')}")
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/posterous.rb ADDED

@@ -0,0 +1,130 @@
+module BuntoImport
+  module Importers
+    class Posterous < Importer
+      def self.specify_options(c)
+        c.option 'email', '--email EMAIL', 'Posterous email address'
+        c.option 'password', '--password PW', 'Posterous password'
+        c.option 'api_token', '--token TOKEN', 'Posterous API Token'
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          bunto
+          fileutils
+          uri
+          json
+          net/http
+        ])
+      end
+      def self.fetch(uri_str, limit = 10)
+        # You should choose better exception.
+        raise ArgumentError, 'Stuck in a redirect loop. Please double check your email and password' if limit == 0
+        response = nil
+        Net::HTTP.start('posterous.com') do |http|
+          req = Net::HTTP::Get.new(uri_str)
+          req.basic_auth @email, @pass
+          response = http.request(req)
+        end
+        case response
+          when Net::HTTPSuccess     then response
+          when Net::HTTPRedirection then fetch(response['location'], limit - 1)
+          else response.error!
+        end
+      end
+      def self.fetch_images(directory, imgs)
+        def self.fetch_one(url, limit = 10)
+          raise ArgumentError, 'HTTP redirect too deep' if limit == 0
+          response = Net::HTTP.get_response(URI.parse(url))
+          case response
+          when Net::HTTPSuccess     then response.body
+          when Net::HTTPRedirection then self.fetch_one(response['location'], limit - 1)
+          else
+            response.error!
+          end
+        end
+        FileUtils.mkdir_p directory
+        urls = Array.new
+        imgs.each do |img|
+          fullurl = img["full"]["url"]
+          uri = URI.parse(fullurl)
+          imgname = uri.path.split("/")[-1]
+          imgdata = self.fetch_one(fullurl)
+          open(directory + "/" + imgname, "wb") do |file|
+            file.write imgdata
+          end
+          urls.push(directory + "/" + imgname)
+        end
+        return urls
+      end
+      def self.process(options)
+        email     = options.fetch('email')
+        pass      = options.fetch('password')
+        api_token = options.fetch('api_token')
+        @email, @pass, @api_token = email, pass, api_token
+        defaults = { :include_imgs => false, :blog => 'primary', :base_path => '/' }
+        opts = defaults.merge(opts)
+        FileUtils.mkdir_p "_posts"
+        posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}").body)
+        page = 1
+        while posts.any?
+          posts.each do |post|
+            title = post["title"]
+            slug = title.gsub(/[^[:alnum:]]+/, '-').downcase
+            date = Date.parse(post["display_date"])
+            content = post["body_html"]
+            published = !post["is_private"]
+            basename = "%02d-%02d-%02d-%s" % [date.year, date.month, date.day, slug]
+            name = basename + '.html'
+            # Images:
+            if opts[:include_imgs]
+              post_imgs = post["media"]["images"]
+              if post_imgs.any?
+                img_dir = "imgs/%s" % basename
+                img_urls = self.fetch_images(img_dir, post_imgs)
+                img_urls.map! do |url|
+                  '<li><img src="' + opts[:base_path] + url + '"></li>'
+                end
+                imgcontent = "<ol>\n" + img_urls.join("\n") + "</ol>\n"
+                # filter out "posterous-content", replacing with imgs:
+                content = content.sub(/\<p\>\[\[posterous-content:[^\]]+\]\]\<\/p\>/, imgcontent)
+              end
+            end
+            # Get the relevant fields as a hash, delete empty fields and convert
+            # to YAML for the header
+            data = {
+               'layout' => 'post',
+               'title' => title.to_s,
+               'published' => published
+             }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+            # Write out the data and content to file
+            File.open("_posts/#{name}", "w") do |f|
+              f.puts data
+              f.puts "---"
+              f.puts content
+            end
+          end
+          page += 1
+          posts = JSON.parse(self.fetch("/api/v2/users/me/sites/#{opts[:blog]}/posts?api_token=#{@api_token}&page=#{page}").body)
+        end
+      end
+    end
+  end
+end