RubyGems - bunto-import - Versions diffs - 2.0.0 → 3.0.0 - Mend

bunto-import 2.0.0 → 3.0.0

Files changed (35) hide show

checksums.yaml +4 -4
data/LICENSE +21 -21
data/README.markdown +33 -33
data/lib/bunto-import.rb +49 -49
data/lib/bunto-import/importer.rb +26 -26
data/lib/bunto-import/importers.rb +10 -10
data/lib/bunto-import/importers/behance.rb +80 -80
data/lib/bunto-import/importers/blogger.rb +330 -264
data/lib/bunto-import/importers/csv.rb +96 -96
data/lib/bunto-import/importers/drupal6.rb +53 -139
data/lib/bunto-import/importers/drupal7.rb +54 -111
data/lib/bunto-import/importers/drupal_common.rb +157 -0
data/lib/bunto-import/importers/easyblog.rb +96 -96
data/lib/bunto-import/importers/enki.rb +74 -74
data/lib/bunto-import/importers/ghost.rb +68 -68
data/lib/bunto-import/importers/google_reader.rb +64 -64
data/lib/bunto-import/importers/joomla.rb +92 -90
data/lib/bunto-import/importers/joomla3.rb +91 -91
data/lib/bunto-import/importers/jrnl.rb +125 -125
data/lib/bunto-import/importers/marley.rb +72 -72
data/lib/bunto-import/importers/mephisto.rb +99 -99
data/lib/bunto-import/importers/mt.rb +257 -257
data/lib/bunto-import/importers/posterous.rb +130 -130
data/lib/bunto-import/importers/rss.rb +62 -62
data/lib/bunto-import/importers/s9y.rb +60 -60
data/lib/bunto-import/importers/s9y_database.rb +363 -0
data/lib/bunto-import/importers/textpattern.rb +70 -70
data/lib/bunto-import/importers/tumblr.rb +300 -289
data/lib/bunto-import/importers/typo.rb +88 -88
data/lib/bunto-import/importers/wordpress.rb +372 -372
data/lib/bunto-import/importers/wordpressdotcom.rb +207 -207
data/lib/bunto-import/util.rb +76 -76
data/lib/bunto-import/version.rb +3 -3
data/lib/bunto/commands/import.rb +79 -79
metadata +84 -54

data/lib/bunto-import/importers/jrnl.rb CHANGED

@@ -1,125 +1,125 @@
-module BuntoImport
-  module Importers
-    class Jrnl < Importer
-      def self.require_deps
-        BuntoImport.require_with_fallback(%w[
-          time
-          rubygems
-          safe_yaml
-        ])
-      end
-      def self.specify_options(c)
-        c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
-        c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
-        c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
-        c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
-      end
-      # Reads a jrnl file and creates a new post for each entry
-      # The following overrides are available:
-      # :file         path to input file
-      # :time_format  the format used by the jrnl configuration
-      # :extension    the extension format of the output files
-      # :layout       explicitly set the layout of the output
-      def self.process(options)
-        file        = options.fetch('file', "~/journal.txt")
-        time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
-        extension   = options.fetch('extension', "md")
-        layout      = options.fetch('layout', "post")
-        date_length = Time.now.strftime(time_format).length
-        # convert relative to absolute if needed
-        file = File.expand_path(file)
-        abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
-        input = File.read(file)
-        entries = input.split("\n\n");
-        entries.each do |entry|
-          # split dateline and body
-          # content[0] has the date and title
-          # content[1] has the post body
-          content = entry.split("\n")
-          body = get_post_content(content)
-          date = get_date(content[0], date_length)
-          title = get_title(content[0], date_length)
-          slug = create_slug(title)
-          filename = create_filename(date, slug, extension)
-          meta = create_meta(layout, title, date) # prepare YAML meta data
-          write_file(filename, meta, body) # write to file
-        end
-      end
-      # strip body from jrnl entry
-      def self.get_post_content(content)
-        return content[1]
-      end
-      # strip timestamp from the dateline
-      def self.get_date(content, offset)
-        return content[0, offset]
-      end
-      # strip title from the dateline
-      def self.get_title(content, offset)
-        return content[offset + 1, content.length]
-      end
-      # generate slug
-      def self.create_slug(title)
-        return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
-      end
-      # generate filename
-      def self.create_filename(date, slug, extension)
-        return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
-      end
-      # Prepare YAML meta data
-      #
-      # layout  - name of the layout
-      # title   - title of the entry
-      # date    - date of entry creation
-      #
-      # Examples
-      #
-      #   create_meta("post", "Entry 1", "2013-01-01 13:00")
-      #   # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
-      #
-      # Returns array converted to YAML
-      def self.create_meta(layout, title, date)
-        data = {
-          'layout'        => layout,
-          'title'         => title,
-          'date'          => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
-        }.to_yaml
-        return data;
-      end
-      # Writes given data to file
-      #
-      # filename    - name of the output file
-      # meta        - YAML header data
-      # body        - jrnl entry content
-      #
-      # Examples
-      #
-      #   write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
-      #
-      # Writes file to _posts/filename
-      def self.write_file(filename, meta, body)
-        File.open("_posts/#{filename}", "w") do |f|
-          f.puts meta
-          f.puts "---\n\n"
-          f.puts body
-        end
-      end
-    end
-  end
-end
+module BuntoImport
+  module Importers
+    class Jrnl < Importer
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          time
+          rubygems
+          safe_yaml
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'file', '--file FILENAME', 'Journal file (default: "~/journal.txt")'
+        c.option 'time_format', '--time_format FORMAT', 'Time format of your journal (default: "%Y-%m-%d %H:%M")'
+        c.option 'extension', '--extension EXT', 'Output extension (default: "md")'
+        c.option 'layout', '--layout NAME', 'Output post layout (default: "post")'
+      end
+      # Reads a jrnl file and creates a new post for each entry
+      # The following overrides are available:
+      # :file         path to input file
+      # :time_format  the format used by the jrnl configuration
+      # :extension    the extension format of the output files
+      # :layout       explicitly set the layout of the output
+      def self.process(options)
+        file        = options.fetch('file', "~/journal.txt")
+        time_format = options.fetch('time_format', "%Y-%m-%d %H:%M")
+        extension   = options.fetch('extension', "md")
+        layout      = options.fetch('layout', "post")
+        date_length = Time.now.strftime(time_format).length
+        # convert relative to absolute if needed
+        file = File.expand_path(file)
+        abort "The jrnl file was not found. Please make sure '#{file}' exists. You can specify a different file using the --file switch." unless File.file?(file)
+        input = File.read(file)
+        entries = input.split("\n\n");
+        entries.each do |entry|
+          # split dateline and body
+          # content[0] has the date and title
+          # content[1] has the post body
+          content = entry.split("\n")
+          body = get_post_content(content)
+          date = get_date(content[0], date_length)
+          title = get_title(content[0], date_length)
+          slug = create_slug(title)
+          filename = create_filename(date, slug, extension)
+          meta = create_meta(layout, title, date) # prepare YAML meta data
+          write_file(filename, meta, body) # write to file
+        end
+      end
+      # strip body from jrnl entry
+      def self.get_post_content(content)
+        return content[1]
+      end
+      # strip timestamp from the dateline
+      def self.get_date(content, offset)
+        return content[0, offset]
+      end
+      # strip title from the dateline
+      def self.get_title(content, offset)
+        return content[offset + 1, content.length]
+      end
+      # generate slug
+      def self.create_slug(title)
+        return title.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
+      end
+      # generate filename
+      def self.create_filename(date, slug, extension)
+        return "#{Time.parse(date).strftime("%Y-%m-%d")}-#{slug}.#{extension}"
+      end
+      # Prepare YAML meta data
+      #
+      # layout  - name of the layout
+      # title   - title of the entry
+      # date    - date of entry creation
+      #
+      # Examples
+      #
+      #   create_meta("post", "Entry 1", "2013-01-01 13:00")
+      #   # => "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n"
+      #
+      # Returns array converted to YAML
+      def self.create_meta(layout, title, date)
+        data = {
+          'layout'        => layout,
+          'title'         => title,
+          'date'          => Time.parse(date).strftime("%Y-%m-%d %H:%M %z")
+        }.to_yaml
+        return data;
+      end
+      # Writes given data to file
+      #
+      # filename    - name of the output file
+      # meta        - YAML header data
+      # body        - jrnl entry content
+      #
+      # Examples
+      #
+      #   write_file("2013-01-01-entry-1.md", "---\nlayout: post\ntitle: Entry 1\ndate: 2013-01-01 13:00\n", "This is the first entry for my new journal")
+      #
+      # Writes file to _posts/filename
+      def self.write_file(filename, meta, body)
+        File.open("_posts/#{filename}", "w") do |f|
+          f.puts meta
+          f.puts "---\n\n"
+          f.puts body
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/marley.rb CHANGED

@@ -1,72 +1,72 @@
-module BuntoImport
-  module Importers
-    class Marley < Importer
-      def self.validate(options)
-        if options['marley_data_dir'].nil?
-          Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
-        else
-          unless File.directory?(options['marley_data_dir'])
-            raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
-          end
-        end
-      end
-      def self.regexp
-        { :id    => /^\d{0,4}-{0,1}(.*)$/,
-          :title => /^#\s*(.*)\s+$/,
-          :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
-          :published_on => /.*\s+\(([0-9\/]+)\)$/,
-          :perex => /^([^\#\n]+\n)$/,
-          :meta  => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
-        }
-      end
-      def self.require_deps
-        BuntoImport.require_with_fallback(%w[
-          fileutils
-          safe_yaml
-        ])
-      end
-      def self.specify_options(c)
-        c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
-      end
-      def self.process(options)
-        marley_data_dir = options.fetch('marley_data_dir')
-        FileUtils.mkdir_p "_posts"
-        posts = 0
-        Dir["#{marley_data_dir}/**/*.txt"].each do |f|
-          next unless File.exists?(f)
-          #copied over from marley's app/lib/post.rb
-          file_content  = File.read(f)
-          meta_content  = file_content.slice!( self.regexp[:meta] )
-          body          = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
-          title = file_content.scan( self.regexp[:title] ).first.to_s.strip
-          prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
-          published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
-          meta          = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
-          meta['title'] = title
-          meta['layout'] = 'post'
-          formatted_date = published_on.strftime('%Y-%m-%d')
-          post_name =  File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
-          name = "#{formatted_date}-#{post_name}"
-          File.open("_posts/#{name}.markdown", "w") do |f|
-            f.puts meta.to_yaml
-            f.puts "---\n"
-            f.puts "\n#{prerex}\n\n" if prerex
-            f.puts body
-          end
-          posts += 1
-        end
-        "Created #{posts} posts!"
-      end
-    end
-  end
-end
+module BuntoImport
+  module Importers
+    class Marley < Importer
+      def self.validate(options)
+        if options['marley_data_dir'].nil?
+          Bunto.logger.abort_with "Missing mandatory option --marley_data_dir."
+        else
+          unless File.directory?(options['marley_data_dir'])
+            raise ArgumentError, "marley dir '#{options['marley_data_dir']}' not found"
+          end
+        end
+      end
+      def self.regexp
+        { :id    => /^\d{0,4}-{0,1}(.*)$/,
+          :title => /^#\s*(.*)\s+$/,
+          :title_with_date => /^#\s*(.*)\s+\(([0-9\/]+)\)$/,
+          :published_on => /.*\s+\(([0-9\/]+)\)$/,
+          :perex => /^([^\#\n]+\n)$/,
+          :meta  => /^\{\{\n(.*)\}\}\n$/mi # Multiline Regexp
+        }
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          fileutils
+          safe_yaml
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'marley_data_dir', '--marley_data_dir DIR', 'The dir containing your marley data'
+      end
+      def self.process(options)
+        marley_data_dir = options.fetch('marley_data_dir')
+        FileUtils.mkdir_p "_posts"
+        posts = 0
+        Dir["#{marley_data_dir}/**/*.txt"].each do |f|
+          next unless File.exists?(f)
+          #copied over from marley's app/lib/post.rb
+          file_content  = File.read(f)
+          meta_content  = file_content.slice!( self.regexp[:meta] )
+          body          = file_content.sub( self.regexp[:title], '').sub( self.regexp[:perex], '').strip
+          title = file_content.scan( self.regexp[:title] ).first.to_s.strip
+          prerex = file_content.scan( self.regexp[:perex] ).first.to_s.strip
+          published_on = DateTime.parse( post[:published_on] ) rescue File.mtime( File.dirname(f) )
+          meta          = ( meta_content ) ? YAML::load( meta_content.scan( self.regexp[:meta]).to_s ) : {}
+          meta['title'] = title
+          meta['layout'] = 'post'
+          formatted_date = published_on.strftime('%Y-%m-%d')
+          post_name =  File.dirname(f).split(%r{/}).last.gsub(/\A\d+-/, '')
+          name = "#{formatted_date}-#{post_name}"
+          File.open("_posts/#{name}.markdown", "w") do |f|
+            f.puts meta.to_yaml
+            f.puts "---\n"
+            f.puts "\n#{prerex}\n\n" if prerex
+            f.puts body
+          end
+          posts += 1
+        end
+        "Created #{posts} posts!"
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/mephisto.rb CHANGED

@@ -1,99 +1,99 @@
-module BuntoImport
-  module Importers
-    class Mephisto < Importer
-      #Accepts a hash with database config variables, exports mephisto posts into a csv
-      #export PGPASSWORD if you must
-      def self.postgres(c)
-        sql = <<-SQL
-        BEGIN;
-        CREATE TEMP TABLE bunto AS
-          SELECT title, permalink, body, published_at, filter FROM contents
-          WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
-        COPY bunto TO STDOUT WITH CSV HEADER;
-        ROLLBACK;
-        SQL
-        command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
-        puts command
-        `#{command}`
-        CSV.process
-      end
-      def self.validate(options)
-        %w[dbname user].each do |option|
-          if options[option].nil?
-            abort "Missing mandatory option --#{option}."
-          end
-        end
-      end
-      def self.require_deps
-        BuntoImport.require_with_fallback(%w[
-          rubygems
-          sequel
-          fastercsv
-          fileutils
-        ])
-      end
-      def self.specify_options(c)
-        c.option 'dbname', '--dbname DB', 'Database name'
-        c.option 'user', '--user USER', 'Database user name'
-        c.option 'password', '--password PW', "Database user's password (default: '')"
-        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
-      end
-      # This query will pull blog posts from all entries across all blogs. If
-      # you've got unpublished, deleted or otherwise hidden posts please sift
-      # through the created posts to make sure nothing is accidently published.
-      QUERY = "SELECT id, \
-                      permalink, \
-                      body, \
-                      published_at, \
-                      title \
-               FROM contents \
-               WHERE user_id = 1 AND \
-                     type = 'Article' AND \
-                     published_at IS NOT NULL \
-               ORDER BY published_at"
-      def self.process(options)
-        dbname = options.fetch('dbname')
-        user   = options.fetch('user')
-        pass   = options.fetch('password', '')
-        host   = options.fetch('host', "localhost")
-        db = Sequel.mysql(dbname, :user => user,
-                                  :password => pass,
-                                  :host => host,
-                                  :encoding => 'utf8')
-        FileUtils.mkdir_p "_posts"
-        db[QUERY].each do |post|
-          title = post[:title]
-          slug = post[:permalink]
-          date = post[:published_at]
-          content = post[:body]
-          # Ideally, this script would determine the post format (markdown,
-          # html, etc) and create files with proper extensions. At this point
-          # it just assumes that markdown will be acceptable.
-          name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
-          data = {
-             'layout' => 'post',
-             'title' => title.to_s,
-             'mt_id' => post[:entry_id],
-           }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
-          File.open("_posts/#{name}", "w") do |f|
-            f.puts data
-            f.puts "---"
-            f.puts content
-          end
-        end
-      end
-    end
-  end
-end
+module BuntoImport
+  module Importers
+    class Mephisto < Importer
+      #Accepts a hash with database config variables, exports mephisto posts into a csv
+      #export PGPASSWORD if you must
+      def self.postgres(c)
+        sql = <<-SQL
+        BEGIN;
+        CREATE TEMP TABLE bunto AS
+          SELECT title, permalink, body, published_at, filter FROM contents
+          WHERE user_id = 1 AND type = 'Article' ORDER BY published_at;
+        COPY bunto TO STDOUT WITH CSV HEADER;
+        ROLLBACK;
+        SQL
+        command = %Q(psql -h #{c[:host] || "localhost"} -c "#{sql.strip}" #{c[:database]} #{c[:username]} -o #{c[:filename] || "posts.csv"})
+        puts command
+        `#{command}`
+        CSV.process
+      end
+      def self.validate(options)
+        %w[dbname user].each do |option|
+          if options[option].nil?
+            abort "Missing mandatory option --#{option}."
+          end
+        end
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fastercsv
+          fileutils
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname DB', 'Database name'
+        c.option 'user', '--user USER', 'Database user name'
+        c.option 'password', '--password PW', "Database user's password (default: '')"
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+      end
+      # This query will pull blog posts from all entries across all blogs. If
+      # you've got unpublished, deleted or otherwise hidden posts please sift
+      # through the created posts to make sure nothing is accidently published.
+      QUERY = "SELECT id, \
+                      permalink, \
+                      body, \
+                      published_at, \
+                      title \
+               FROM contents \
+               WHERE user_id = 1 AND \
+                     type = 'Article' AND \
+                     published_at IS NOT NULL \
+               ORDER BY published_at"
+      def self.process(options)
+        dbname = options.fetch('dbname')
+        user   = options.fetch('user')
+        pass   = options.fetch('password', '')
+        host   = options.fetch('host', "localhost")
+        db = Sequel.mysql(dbname, :user => user,
+                                  :password => pass,
+                                  :host => host,
+                                  :encoding => 'utf8')
+        FileUtils.mkdir_p "_posts"
+        db[QUERY].each do |post|
+          title = post[:title]
+          slug = post[:permalink]
+          date = post[:published_at]
+          content = post[:body]
+          # Ideally, this script would determine the post format (markdown,
+          # html, etc) and create files with proper extensions. At this point
+          # it just assumes that markdown will be acceptable.
+          name = [date.year, date.month, date.day, slug].join('-') + ".markdown"
+          data = {
+             'layout' => 'post',
+             'title' => title.to_s,
+             'mt_id' => post[:entry_id],
+           }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+      end
+    end
+  end
+end