RubyGems - bunto-import - Versions diffs - 1.0.0 - Mend

bunto-import 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.markdown +32 -0
data/lib/bunto-import.rb +49 -0
data/lib/bunto-import/importer.rb +26 -0
data/lib/bunto-import/importers.rb +10 -0
data/lib/bunto-import/importers/behance.rb +80 -0
data/lib/bunto-import/importers/blogger.rb +264 -0
data/lib/bunto-import/importers/csv.rb +96 -0
data/lib/bunto-import/importers/drupal6.rb +139 -0
data/lib/bunto-import/importers/drupal7.rb +111 -0
data/lib/bunto-import/importers/easyblog.rb +96 -0
data/lib/bunto-import/importers/enki.rb +74 -0
data/lib/bunto-import/importers/ghost.rb +68 -0
data/lib/bunto-import/importers/google_reader.rb +64 -0
data/lib/bunto-import/importers/joomla.rb +90 -0
data/lib/bunto-import/importers/joomla3.rb +91 -0
data/lib/bunto-import/importers/jrnl.rb +125 -0
data/lib/bunto-import/importers/marley.rb +72 -0
data/lib/bunto-import/importers/mephisto.rb +99 -0
data/lib/bunto-import/importers/mt.rb +257 -0
data/lib/bunto-import/importers/posterous.rb +130 -0
data/lib/bunto-import/importers/rss.rb +62 -0
data/lib/bunto-import/importers/s9y.rb +60 -0
data/lib/bunto-import/importers/textpattern.rb +70 -0
data/lib/bunto-import/importers/tumblr.rb +289 -0
data/lib/bunto-import/importers/typo.rb +88 -0
data/lib/bunto-import/importers/wordpress.rb +372 -0
data/lib/bunto-import/importers/wordpressdotcom.rb +207 -0
data/lib/bunto-import/util.rb +76 -0
data/lib/bunto-import/version.rb +3 -0
data/lib/bunto/commands/import.rb +79 -0
metadata +374 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 05b2589a386dfe73ec5e8fa3f0988e95d2b982dc
+  data.tar.gz: fdfee7813f47b1e34da4b3b178e1587d4133bc55
+SHA512:
+  metadata.gz: 2e14ddd53cfb6e29b460c20b82e3a7ddc9f033202912793c540ae3fc79240b65f459cc5f08b65c6007b7b57b219ffeea71f2cea3d34e05c1a8ea570ec7befcfb
+  data.tar.gz: 288de4e071628f790bd049d04a21318673fcd810a159a13a5de739d3a022909b5e4d4229d7b866e00fae16a6fd057dbbb3b958205ae27502ecb26ec7f265e918

data/LICENSE ADDED

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2016-present Tom Preston-Werner
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.markdown ADDED

@@ -0,0 +1,32 @@
+# bunto-import
+[![Build Status](https://travis-ci.org/bunto/bunto-import.svg?branch=master)](https://travis-ci.org/bunto/bunto-import)
+The new __Bunto__ command for importing from various blogs to Bunto format.
+**Note: _migrators_ are now called _importers_ and are only available if one installs the `bunto-import` _gem_.**
+## How `bunto-import` works:
+### Bunto v2.x and higher
+1. Install the _rubygem_ with `gem install bunto-import`.
+2. Run `bunto import IMPORTER [options]`
+### Bunto v1.x
+Launch IRB:
+```ruby
+# 1. Require bunto-import
+irb> require 'bunto-import'
+# 2. Choose the importer you'd like to use.
+irb> importer_class = "Behance" # an example, there are many others!
+# 3. Run it!
+irb> BuntoImport::Importers.const_get(importer_class).run(options_hash)
+```
+## Documentation
+bunto-import has its own documentation site, found at https://bunto.github.io/.
+Dedicated [documentation for each migrator](https://bunto.github.io/import/docs/home/) is available there.

data/lib/bunto-import.rb ADDED

@@ -0,0 +1,49 @@
+$:.unshift File.dirname(__FILE__) # For use/testing when no gem is installed
+require 'rubygems'
+require 'bunto'
+require 'bunto/commands/import'
+require 'colorator'
+require 'bunto-import/importer'
+require 'bunto-import/importers'
+require 'bunto-import/util'
+module BuntoImport
+  # Public: Add the subcommands for each importer
+  #
+  # cmd - the instance of Mercenary::Command from the
+  #
+  # Returns a list of valid subcommands
+  def self.add_importer_commands(cmd)
+    commands = []
+    BuntoImport::Importer.subclasses.each do |importer|
+      name = importer.to_s.split("::").last.downcase
+      commands << name
+      cmd.command(name.to_sym) do |c|
+        c.syntax "#{name} [options]"
+        importer.specify_options(c)
+        c.action do |_, options|
+          importer.run(options)
+        end
+      end
+    end
+    commands
+  end
+  def self.require_with_fallback(gems)
+    Array[gems].flatten.each do |gem|
+      begin
+        require gem
+      rescue LoadError
+        Bunto.logger.error "Whoops! Looks like you need to install '#{gem}' before you can use this importer."
+        Bunto.logger.error ""
+        Bunto.logger.error "If you're using bundler:"
+        Bunto.logger.error "  1. Add 'gem \"#{gem}\"' to your Gemfile"
+        Bunto.logger.error "  2. Run 'bundle install'"
+        Bunto.logger.error ""
+        Bunto.logger.error "If you're not using bundler:"
+        Bunto.logger.abort_with "  1. Run 'gem install #{gem}'."
+      end
+    end
+  end
+end

data/lib/bunto-import/importer.rb ADDED

@@ -0,0 +1,26 @@
+module BuntoImport
+  class Importer
+    def self.inherited(base)
+      subclasses << base
+    end
+    def self.subclasses
+      @subclasses ||= []
+    end
+    def self.stringify_keys(hash)
+      the_hash = hash.clone
+      the_hash.keys.each do |key|
+        the_hash[(key.to_s rescue key) || key] =  the_hash.delete(key)
+      end
+      the_hash
+    end
+    def self.run(options = {})
+      opts = stringify_keys(options)
+      self.require_deps
+      self.validate(opts) if self.respond_to?(:validate)
+      self.process(opts)
+    end
+  end
+end

data/lib/bunto-import/importers.rb ADDED

@@ -0,0 +1,10 @@
+module BuntoImport
+  module Importers
+    Dir.chdir(File.expand_path(File.join("importers"), File.dirname(__FILE__))) do
+      Dir.entries(".").each do |f|
+        next if f[0..0].eql?(".")
+        require "bunto-import/importers/#{f}"
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/behance.rb ADDED

@@ -0,0 +1,80 @@
+module BuntoImport
+  module Importers
+    class Behance < Importer
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          fileutils
+          safe_yaml
+          date
+          time
+          behance
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'user', '--user NAME', 'The username of the account'
+        c.option 'api_token', '--api_token TOKEN', 'The API access token for the account'
+      end
+      def self.validate(options)
+        %w[user api_token].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      # Process the import.
+      #
+      # user - the behance user to retrieve projects (ID or username)
+      # api_token - your developer API Token
+      #
+      # Returns nothing.
+      def self.process(options)
+        user  = options.fetch('user')
+        token = options.fetch('api_token')
+        client = fetch_behance(token)
+        user_projects = client.user_projects(user)
+        puts "#{user_projects.length} project(s) found. Importing now..."
+        user_projects.each do |project|
+          details = client.project(project['id'])
+          title   = project['name'].to_s
+          formatted_date = Time.at(project['published_on'].to_i).to_date.to_s
+          post_name = title.split(%r{ |!|/|:|&|-|$|,}).map do |character|
+            character.downcase unless character.empty?
+          end.compact.join('-')
+          name = "#{formatted_date}-#{post_name}"
+          header = {
+            "layout" => "post",
+            "title" => title,
+            "details" => details
+          }
+          FileUtils.mkdir_p("_posts")
+          File.open("_posts/#{name}.md", "w") do |f|
+            f.puts header.to_yaml
+            f.puts "---\n\n"
+            f.puts details['description'].to_s
+          end
+        end
+        puts "Finished importing."
+      end
+      private
+      def self.fetch_behance(token)
+        ::Behance::Client.new(access_token: token)
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/blogger.rb ADDED

@@ -0,0 +1,264 @@
+module BuntoImport
+  module Importers
+    class Blogger < Importer
+      def self.specify_options(c)
+        c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
+        c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
+        c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
+      end
+      def self.validate(options)
+        if options['source'].nil?
+          raise 'Missing mandatory option: --source'
+        elsif not File.exist?(options['source'])
+          raise Errno::ENOENT, "File not found: #{options['source']}"
+        end
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rexml/document
+          rexml/streamlistener
+          rexml/parsers/streamparser
+          uri
+          time
+          fileutils
+          safe_yaml
+          open-uri
+        ])
+      end
+      # Process the import.
+      #
+      # source::                a local file String (or IO object for internal use purpose)..
+      # no-blogger-info::       a boolean if not leave blogger info (id and original URL).
+      # replace-internal-link:: a boolean if replace internal link
+      #
+      # Returns nothing.
+      def self.process(options)
+        source = options.fetch('source')
+        listener = BloggerAtomStreamListener.new
+        listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
+        File.open(source, 'r') do |f|
+          f.flock(File::LOCK_SH)
+          REXML::Parsers::StreamParser.new(f, listener).parse()
+        end
+        options['original-url-base'] = listener.original_url_base
+        postprocess(options)
+      end
+      # Post-process after import.
+      #
+      # replace-internal-link:: a boolean if replace internal link
+      #
+      # Returns nothing.
+      def self.postprocess(options)
+        # Replace internal link URL
+        if options.fetch('replace-internal-link', false)
+          original_url_base = options.fetch('original-url-base', nil)
+          if original_url_base
+            orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
+            Dir.glob('_posts/*.*') do |filename|
+              body = nil
+              File.open(filename, 'r') do |f|
+                f.flock(File::LOCK_SH)
+                body = f.read
+              end
+              body.gsub!(orig_url_pattern) do
+                # for post_url
+                quote = $1
+                post_file = Dir.glob("_posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}").first
+                raise "Could not found: _posts/#{$2}-#{$3}-*-#{$4.to_s.tr('/', '-')}" if post_file.nil?
+                " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, '.html')} %}#{quote}"
+              end
+              File.open(filename, 'w') do |f|
+                f.flock(File::LOCK_EX)
+                f << body
+              end
+            end
+          end
+        end
+      end
+      class BloggerAtomStreamListener
+        def initialize
+          # use `extend` instead of `include` to use `require_deps` instead of `require`.
+          extend REXML::StreamListener
+          extend BloggerAtomStreamListenerMethods
+          @leave_blogger_info = true
+        end
+      end
+      module BloggerAtomStreamListenerMethods
+        attr_accessor :leave_blogger_info
+        attr_reader :original_url_base
+        def tag_start(tag, attrs)
+          @tag_bread = [] unless @tag_bread
+          @tag_bread.push(tag)
+          case tag
+          when 'entry'
+            raise 'nest entry element' if @in_entry_elem
+            @in_entry_elem = {:meta => {}, :body => nil}
+          when 'title'
+            if @in_entry_elem
+              raise 'only <title type="text"></title> is supported' if attrs['type'] != 'text'
+            end
+          when 'category'
+            if @in_entry_elem
+              if attrs['scheme'] == 'http://www.blogger.com/atom/ns#'
+                @in_entry_elem[:meta][:category] = [] unless @in_entry_elem[:meta][:category]
+                @in_entry_elem[:meta][:category] << attrs['term']
+              elsif attrs['scheme'] == 'http://schemas.google.com/g/2005#kind'
+                kind = attrs['term']
+                kind.sub!(Regexp.new("^http://schemas\\.google\\.com/blogger/2008/kind\\#"), '')
+                @in_entry_elem[:meta][:kind] = kind
+              end
+            end
+          when 'content'
+            if @in_entry_elem
+              @in_entry_elem[:meta][:content_type] = attrs['type']
+            end
+          when 'link'
+            if @in_entry_elem
+              if attrs['rel'] == 'alternate' && attrs['type'] == 'text/html'
+                @in_entry_elem[:meta][:original_url] = attrs['href']
+              elsif attrs['rel'] == 'replies' && attrs['type'] == 'text/html'
+                unless @in_entry_elem[:meta][:original_url]
+                  @in_entry_elem[:meta][:original_url] = attrs['href'].sub(/\#comment-form$/, '')
+                end
+              end
+            end
+          when 'media:thumbnail'
+            if @in_entry_elem
+              @in_entry_elem[:meta][:thumbnail] = attrs['url']
+            end
+          end
+        end
+        def text(text)
+          if @in_entry_elem
+            case @tag_bread.last
+            when 'id'
+              @in_entry_elem[:meta][:id] = text
+            when 'published'
+              @in_entry_elem[:meta][:published] = text
+            when 'updated'
+              @in_entry_elem[:meta][:updated] = text
+            when 'title'
+              @in_entry_elem[:meta][:title] = text
+            when 'content'
+              @in_entry_elem[:body] = text
+            when 'name'
+              if @tag_bread[-2..-1] == %w[author name]
+                @in_entry_elem[:meta][:author] = text
+              end
+            when 'app:draft'
+              if @tag_bread[-2..-1] == %w[app:control app:draft]
+                @in_entry_elem[:meta][:draft] = true if text == 'yes'
+              end
+            end
+          end
+        end
+        def tag_end(tag)
+          case tag
+          when 'entry'
+            raise 'nest entry element' unless @in_entry_elem
+            if @in_entry_elem[:meta][:kind] == 'post'
+              post_data = get_post_data_from_in_entry_elem_info
+              if post_data
+                target_dir = '_posts'
+                target_dir = '_drafts' if @in_entry_elem[:meta][:draft]
+                FileUtils.mkdir_p(target_dir)
+                file_name = URI::decode("#{post_data[:filename]}.html")
+                File.open(File.join(target_dir, file_name), 'w') do |f|
+                  f.flock(File::LOCK_EX)
+                  f << post_data[:header].to_yaml
+                  f << "---\n\n"
+                  f << post_data[:body]
+                end
+              end
+            end
+            @in_entry_elem = nil
+          end
+          @tag_bread.pop
+        end
+        def get_post_data_from_in_entry_elem_info
+          if (@in_entry_elem.nil? || ! @in_entry_elem.has_key?(:meta) || ! @in_entry_elem[:meta].has_key?(:kind))
+            nil
+          elsif @in_entry_elem[:meta][:kind] == 'post'
+            timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
+            if @in_entry_elem[:meta][:original_url]
+              original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
+              original_path = original_uri.path.to_s
+              filename = "%s-%s" %
+                [timestamp,
+                 File.basename(original_path, File.extname(original_path))]
+              @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
+            elsif @in_entry_elem[:meta][:draft]
+              # Drafts don't have published urls
+              name = @in_entry_elem[:meta][:title]
+              if name.nil?
+                filename = timestamp
+              else
+                filename = "%s-%s" %
+                  [timestamp,
+                   CGI.escape(name.downcase).tr('+','-')]
+              end
+            else
+              raise 'Original URL is missing'
+            end
+            header = {
+              'layout' => 'post',
+              'title' => @in_entry_elem[:meta][:title],
+              'date' => @in_entry_elem[:meta][:published],
+              'author' => @in_entry_elem[:meta][:author],
+              'tags' => @in_entry_elem[:meta][:category],
+            }
+            header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
+            header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
+            header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
+            header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
+            body = @in_entry_elem[:body]
+            # body escaping associated with liquid
+            if body =~ /{{/
+              body.gsub!(/{{/, '{{ "{{" }}')
+            end
+            if body =~ /{%/
+              body.gsub!(/{%/, '{{ "{%" }}')
+            end
+            { :filename => filename, :header => header, :body => body }
+          else
+            nil
+          end
+        end
+      end
+    end
+  end
+end