RubyGems - bunto-import - Versions diffs - 1.0.0 - Mend

bunto-import 1.0.0

Files changed (33) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.markdown +32 -0
data/lib/bunto-import.rb +49 -0
data/lib/bunto-import/importer.rb +26 -0
data/lib/bunto-import/importers.rb +10 -0
data/lib/bunto-import/importers/behance.rb +80 -0
data/lib/bunto-import/importers/blogger.rb +264 -0
data/lib/bunto-import/importers/csv.rb +96 -0
data/lib/bunto-import/importers/drupal6.rb +139 -0
data/lib/bunto-import/importers/drupal7.rb +111 -0
data/lib/bunto-import/importers/easyblog.rb +96 -0
data/lib/bunto-import/importers/enki.rb +74 -0
data/lib/bunto-import/importers/ghost.rb +68 -0
data/lib/bunto-import/importers/google_reader.rb +64 -0
data/lib/bunto-import/importers/joomla.rb +90 -0
data/lib/bunto-import/importers/joomla3.rb +91 -0
data/lib/bunto-import/importers/jrnl.rb +125 -0
data/lib/bunto-import/importers/marley.rb +72 -0
data/lib/bunto-import/importers/mephisto.rb +99 -0
data/lib/bunto-import/importers/mt.rb +257 -0
data/lib/bunto-import/importers/posterous.rb +130 -0
data/lib/bunto-import/importers/rss.rb +62 -0
data/lib/bunto-import/importers/s9y.rb +60 -0
data/lib/bunto-import/importers/textpattern.rb +70 -0
data/lib/bunto-import/importers/tumblr.rb +289 -0
data/lib/bunto-import/importers/typo.rb +88 -0
data/lib/bunto-import/importers/wordpress.rb +372 -0
data/lib/bunto-import/importers/wordpressdotcom.rb +207 -0
data/lib/bunto-import/util.rb +76 -0
data/lib/bunto-import/version.rb +3 -0
data/lib/bunto/commands/import.rb +79 -0
metadata +374 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 05b2589a386dfe73ec5e8fa3f0988e95d2b982dc
+  data.tar.gz: fdfee7813f47b1e34da4b3b178e1587d4133bc55
+SHA512:
+  metadata.gz: 2e14ddd53cfb6e29b460c20b82e3a7ddc9f033202912793c540ae3fc79240b65f459cc5f08b65c6007b7b57b219ffeea71f2cea3d34e05c1a8ea570ec7befcfb
+  data.tar.gz: 288de4e071628f790bd049d04a21318673fcd810a159a13a5de739d3a022909b5e4d4229d7b866e00fae16a6fd057dbbb3b958205ae27502ecb26ec7f265e918

data/LICENSE ADDED

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2016-present Tom Preston-Werner
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.markdown ADDED

@@ -0,0 +1,32 @@
+# bunto-import
+[![Build Status](https://travis-ci.org/bunto/bunto-import.svg?branch=master)](https://travis-ci.org/bunto/bunto-import)
+The new __Bunto__ command for importing from various blogs to Bunto format.
+**Note: _migrators_ are now called _importers_ and are only available if one installs the `bunto-import` _gem_.**
+## How `bunto-import` works:
+### Bunto v2.x and higher
+1. Install the _rubygem_ with `gem install bunto-import`.
+2. Run `bunto import IMPORTER [options]`
+### Bunto v1.x
+Launch IRB:
+```ruby
+# 1. Require bunto-import
+irb> require 'bunto-import'
+# 2. Choose the importer you'd like to use.
+irb> importer_class = "Behance" # an example, there are many others!
+# 3. Run it!
+irb> BuntoImport::Importers.const_get(importer_class).run(options_hash)
+```
+## Documentation
+bunto-import has its own documentation site, found at https://bunto.github.io/.
+Dedicated [documentation for each migrator](https://bunto.github.io/import/docs/home/) is available there.

data/lib/bunto-import.rb ADDED

@@ -0,0 +1,49 @@
+$:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
+require 'rubygems'
+require 'bunto'
+require 'bunto/commands/import'
+require 'colorator'
+require 'bunto-import/importer'
+require 'bunto-import/importers'
+require 'bunto-import/util'
+module BuntoImport
+  # Public: Add the subcommands for each importer
+  #
+  # cmd - the instance of Mercenary::Command from the
+  #
+  # Returns a list of valid subcommands
+  def self.add_importer_commands(cmd)
+    commands = []
+    BuntoImport::Importer.subclasses.each do |importer|
+      name = importer.to_s.split("::").last.downcase
+      commands << name
+      cmd.command(name.to_sym) do |c|
+        c.syntax "#{name} [options]"
+        importer.specify_options(c)
+        c.action do |_, options|
+          importer.run(options)
+        end
+      end
+    end
+    commands
+  end
+  def self.require_with_fallback(gems)
+    Array[gems].flatten.each do |gem|
+      begin
+        require gem
+      rescue LoadError
+        Bunto.logger.error "Whoops! Looks like you need to install '#{gem}' before you can use this importer."
+        Bunto.logger.error ""
+        Bunto.logger.error "If you're using bundler:"
+        Bunto.logger.error "  1. Add 'gem \"#{gem}\"' to your Gemfile"
+        Bunto.logger.error "  2. Run 'bundle install'"
+        Bunto.logger.error ""
+        Bunto.logger.error "If you're not using bundler:"
+        Bunto.logger.abort_with "  1. Run 'gem install #{gem}'."
+      end
+    end
+  end
+end

data/lib/bunto-import/importer.rb ADDED

@@ -0,0 +1,26 @@
+module BuntoImport
+  class Importer
+    def self.inherited(base)
+      subclasses << base
+    end
+    def self.subclasses
+      @subclasses ||= []
+    end
+    def self.stringify_keys(hash)
+      the_hash = hash.clone
+      the_hash.keys.each do |key|
+        the_hash[(key.to_s rescue key) || key] =  the_hash.delete(key)
+      end
+      the_hash
+    end
+    def self.run(options = {})
+      opts = stringify_keys(options)
+      self.require_deps
+      self.validate(opts) if self.respond_to?(:validate)
+      self.process(opts)
+    end
+  end
+end

data/lib/bunto-import/importers.rb ADDED

@@ -0,0 +1,10 @@
+module BuntoImport
+  module Importers
+    Dir.chdir(File.expand_path(File.join("importers"), File.dirname(__FILE__))) do
+      Dir.entries(".").each do |f|
+        next if f[0..0].eql?(".")
+        require "bunto-import/importers/#{f}"
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/behance.rb ADDED

@@ -0,0 +1,80 @@
+module BuntoImport
+  module Importers
+    class Behance < Importer
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          fileutils
+          safe_yaml
+          date
+          time
+          behance
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'user', '--user NAME', 'The username of the account'
+        c.option 'api_token', '--api_token TOKEN', 'The API access token for the account'
+      end
+      def self.validate(options)
+        %w[user api_token].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      # Process the import.
+      #
+      # user - the behance user to retrieve projects (ID or username)
+      # api_token - your developer API Token
+      #
+      # Returns nothing.
+      def self.process(options)
+        user  = options.fetch('user')
+        token = options.fetch('api_token')
+        client = fetch_behance(token)
+        user_projects = client.user_projects(user)
+        puts "#{user_projects.length} project(s) found. Importing now..."
+        user_projects.each do |project|
+          details = client.project(project['id'])
+          title   = project['name'].to_s
+          formatted_date = Time.at(project['published_on'].to_i).to_date.to_s
+          post_name = title.split(%r{ |!|/|:|&|-|$|,}).map do |character|
+            character.downcase unless character.empty?
+          end.compact.join('-')
+          name = "#{formatted_date}-#{post_name}"
+          header = {
+            "layout" => "post",
+            "title" => title,
+            "details" => details
+          }
+          FileUtils.mkdir_p("_posts")
+          File.open("_posts/#{name}.md", "w") do |f|
+            f.puts header.to_yaml
+            f.puts "---\n\n"
+            f.puts details['description'].to_s
+          end
+        end
+        puts "Finished importing."
+      end
+      private
+      def self.fetch_behance(token)
+        ::Behance::Client.new(access_token: token)
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/blogger.rb ADDED

@@ -0,0 +1,264 @@
+module BuntoImport
+  module Importers
+    class Blogger < Importer
+      def self.specify_options(c)
+        c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
+        c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
+        c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
+      end
+      def self.validate(options)
+        if options['source'].nil?
+          raise 'Missing mandatory option: --source'
+        elsif not File.exist?(options['source'])
+          raise Errno::ENOENT, "File not found: #{options['source']}"
+        end
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rexml/document
+          rexml/streamlistener
+          rexml/parsers/streamparser
+          uri
+          time
+          fileutils
+          safe_yaml
+          open-uri
+        ])
+      end
+      # Process the import.
+      #
+      # source::                a local file String (or IO object for internal use purpose)..
+      # no-blogger-info::       a boolean if not leave blogger info (id and original URL).
+      # replace-internal-link:: a boolean if replace internal link
+      #
+      # Returns nothing.
+      def self.process(options)
+        source = options.fetch('source')
+        listener = BloggerAtomStreamListener.new
+        listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
+        File.open(source, 'r') do |f|
+          f.flock(File::LOCK_SH)
+          REXML::Parsers::StreamParser.new(f, listener).parse()
+        end
+        options['original-url-base'] = listener.original_url_base
+        postprocess(options)
+      end
+      # Post-process after import.
+      #
+      # replace-internal-link:: a boolean if replace internal link
+      #
+      # Returns nothing.
+      def self.postprocess(options)
+        # Replace internal link URL
+        if options.fetch('replace-internal-link', false)
+          original_url_base = options.fetch('original-url-base', nil)
+          if original_url_base
+            orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
+            Dir.glob('_posts/*.*') do |filename|
+              body = nil
+              File.open(filename, 'r') do |f|
+                f.flock(File::LOCK_SH)
+                body = f.read
+              end
+              body.gsub!(orig_url_pattern) do
+                # for post_url
+                quote = $1
+                post_file = Dir.glob("_posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}").first
+                raise "Could not found: _posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}" if post_file.nil?
+                " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, '.html')} %}#{quote}"
+              end
+              File.open(filename, 'w') do |f|
+                f.flock(File::LOCK_EX)
+                f << body
+              end
+            end
+          end
+        end
+      end
+      class BloggerAtomStreamListener
+        def initialize
+          # use `extend` instead of `include` to use `require_deps` instead of `require`.
+          extend REXML::StreamListener
+          extend BloggerAtomStreamListenerMethods
+          @leave_blogger_info = true
+        end
+      end
+      module BloggerAtomStreamListenerMethods
+        attr_accessor :leave_blogger_info
+        attr_reader :original_url_base
+        def tag_start(tag, attrs)
+          @tag_bread = [] unless @tag_bread
+          @tag_bread.push(tag)
+          case tag
+          when 'entry'
+            raise 'nest entry element' if @in_entry_elem
+            @in_entry_elem = {:meta => {}, :body => nil}
+          when 'title'
+            if @in_entry_elem
+              raise 'only <title type="text"></title> is supported' if attrs['type'] != 'text'
+            end
+          when 'category'
+            if @in_entry_elem
+              if attrs['scheme'] == 'http://www.blogger.com/atom/ns#'
+                @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
+                @in_entry_elem[:meta][:category] << attrs['term']
+              elsif attrs['scheme'] == 'http://schemas.google.com/g/2005#kind'
+                kind = attrs['term']
+                kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), '')
+                @in_entry_elem[:meta][:kind] = kind
+              end
+            end
+          when 'content'
+            if @in_entry_elem
+              @in_entry_elem[:meta][:content_type] = attrs['type']
+            end
+          when 'link'
+            if @in_entry_elem
+              if attrs['rel'] == 'alternate' && attrs['type'] == 'text/html'
+                @in_entry_elem[:meta][:original_url] = attrs['href']
+              elsif attrs['rel'] == 'replies' && attrs['type'] == 'text/html'
+                unless @in_entry_elem[:meta][:original_url]
+                  @in_entry_elem[:meta][:original_url] = attrs['href'].sub(/\#comment-form$/, '')
+                end
+              end
+            end
+          when 'media:thumbnail'
+            if @in_entry_elem
+              @in_entry_elem[:meta][:thumbnail] = attrs['url']
+            end
+          end
+        end
+        def text(text)
+          if @in_entry_elem
+            case @tag_bread.last
+            when 'id'
+              @in_entry_elem[:meta][:id] = text
+            when 'published'
+              @in_entry_elem[:meta][:published] = text
+            when 'updated'
+              @in_entry_elem[:meta][:updated] = text
+            when 'title'
+              @in_entry_elem[:meta][:title] = text
+            when 'content'
+              @in_entry_elem[:body] = text
+            when 'name'
+              if @tag_bread[-2..-1] == %w[author name]
+                @in_entry_elem[:meta][:author] = text
+              end
+            when 'app:draft'
+              if @tag_bread[-2..-1] == %w[app:control app:draft]
+                @in_entry_elem[:meta][:draft] = true if text == 'yes'
+              end
+            end
+          end
+        end
+        def tag_end(tag)
+          case tag
+          when 'entry'
+            raise 'nest entry element' unless @in_entry_elem
+            if @in_entry_elem[:meta][:kind] == 'post'
+              post_data = get_post_data_from_in_entry_elem_info
+              if post_data
+                target_dir = '_posts'
+                target_dir = '_drafts' if @in_entry_elem[:meta][:draft]
+                FileUtils.mkdir_p(target_dir)
+                file_name = URI::decode("#{post_data[:filename]}.html")
+                File.open(File.join(target_dir, file_name), 'w') do |f|
+                  f.flock(File::LOCK_EX)
+                  f << post_data[:header].to_yaml
+                  f << "---\n\n"
+                  f << post_data[:body]
+                end
+              end
+            end
+            @in_entry_elem = nil
+          end
+          @tag_bread.pop
+        end
+        def get_post_data_from_in_entry_elem_info
+          if (@in_entry_elem.nil? || ! @in_entry_elem.has_key?(:meta) || ! @in_entry_elem[:meta].has_key?(:kind))
+            nil
+          elsif @in_entry_elem[:meta][:kind] == 'post'
+            timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
+            if @in_entry_elem[:meta][:original_url]
+              original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
+              original_path = original_uri.path.to_s
+              filename = "%s-%s" %
+                [timestamp,
+                 File.basename(original_path, File.extname(original_path))]
+              @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
+            elsif @in_entry_elem[:meta][:draft]
+              # Drafts don't have published urls
+              name = @in_entry_elem[:meta][:title]
+              if name.nil?
+                filename = timestamp
+              else
+                filename = "%s-%s" %
+                  [timestamp,
+                   CGI.escape(name.downcase).tr('+','-')]
+              end
+            else
+              raise 'Original URL is missing'
+            end
+            header = {
+              'layout' => 'post',
+              'title' => @in_entry_elem[:meta][:title],
+              'date' => @in_entry_elem[:meta][:published],
+              'author' => @in_entry_elem[:meta][:author],
+              'tags' => @in_entry_elem[:meta][:category],
+            }
+            header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
+            header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
+            header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
+            header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
+            body = @in_entry_elem[:body]
+            # body escaping associated with liquid
+            if body =~ /{{/
+              body.gsub!(/{{/, '{{ "{{" }}')
+            end
+            if body =~ /{%/
+              body.gsub!(/{%/, '{{ "{%" }}')
+            end
+            { :filename => filename, :header => header, :body => body }
+          else
+            nil
+          end
+        end
+      end
+    end
+  end
+end