RubyGems - bunto-import - Versions diffs - 1.0.0 - Mend

bunto-import 1.0.0

Files changed (33) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.markdown +32 -0
data/lib/bunto-import.rb +49 -0
data/lib/bunto-import/importer.rb +26 -0
data/lib/bunto-import/importers.rb +10 -0
data/lib/bunto-import/importers/behance.rb +80 -0
data/lib/bunto-import/importers/blogger.rb +264 -0
data/lib/bunto-import/importers/csv.rb +96 -0
data/lib/bunto-import/importers/drupal6.rb +139 -0
data/lib/bunto-import/importers/drupal7.rb +111 -0
data/lib/bunto-import/importers/easyblog.rb +96 -0
data/lib/bunto-import/importers/enki.rb +74 -0
data/lib/bunto-import/importers/ghost.rb +68 -0
data/lib/bunto-import/importers/google_reader.rb +64 -0
data/lib/bunto-import/importers/joomla.rb +90 -0
data/lib/bunto-import/importers/joomla3.rb +91 -0
data/lib/bunto-import/importers/jrnl.rb +125 -0
data/lib/bunto-import/importers/marley.rb +72 -0
data/lib/bunto-import/importers/mephisto.rb +99 -0
data/lib/bunto-import/importers/mt.rb +257 -0
data/lib/bunto-import/importers/posterous.rb +130 -0
data/lib/bunto-import/importers/rss.rb +62 -0
data/lib/bunto-import/importers/s9y.rb +60 -0
data/lib/bunto-import/importers/textpattern.rb +70 -0
data/lib/bunto-import/importers/tumblr.rb +289 -0
data/lib/bunto-import/importers/typo.rb +88 -0
data/lib/bunto-import/importers/wordpress.rb +372 -0
data/lib/bunto-import/importers/wordpressdotcom.rb +207 -0
data/lib/bunto-import/util.rb +76 -0
data/lib/bunto-import/version.rb +3 -0
data/lib/bunto/commands/import.rb +79 -0
metadata +374 -0

data/lib/bunto-import/importers/rss.rb ADDED

@@ -0,0 +1,62 @@
+module BuntoImport
+  module Importers
+    class RSS < Importer
+      def self.specify_options(c)
+        c.option 'source', '--source NAME', 'The RSS file or URL to import'
+      end
+      def self.validate(options)
+        if options['source'].nil?
+          abort "Missing mandatory option --source."
+        end
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rss
+          rss/1.0
+          rss/2.0
+          open-uri
+          fileutils
+          safe_yaml
+        ])
+      end
+      # Process the import.
+      #
+      # source - a URL or a local file String.
+      #
+      # Returns nothing.
+      def self.process(options)
+        source = options.fetch('source')
+        content = ""
+        open(source) { |s| content = s.read }
+        rss = ::RSS::Parser.parse(content, false)
+        raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
+        rss.items.each do |item|
+          formatted_date = item.date.strftime('%Y-%m-%d')
+          post_name = item.title.split(%r{ |!|/|:|&|-|$|,}).map do |i|
+            i.downcase if i != ''
+          end.compact.join('-')
+          name = "#{formatted_date}-#{post_name}"
+          header = {
+            'layout' => 'post',
+            'title' => item.title
+          }
+          FileUtils.mkdir_p("_posts")
+          File.open("_posts/#{name}.html", "w") do |f|
+            f.puts header.to_yaml
+            f.puts "---\n\n"
+            f.puts item.description
+          end
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/s9y.rb ADDED

@@ -0,0 +1,60 @@
+module BuntoImport
+  module Importers
+    class S9Y < Importer
+      def self.specify_options(c)
+        c.option 'source', '--source SOURCE', 'The URL of the S9Y RSS feed'
+      end
+      def self.validate(options)
+        if options['source'].nil?
+          abort "Missing mandatory option --source, e.g. --source \"http://blog.example.com/rss.php?version=2.0&all=1\""
+        end
+      end
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          open-uri
+          rss
+          fileutils
+          safe_yaml
+        ])
+      end
+      def self.process(options)
+        source = options.fetch('source')
+        FileUtils.mkdir_p("_posts")
+        text = ''
+        open(source) { |line| text = line.read }
+        rss = ::RSS::Parser.parse(text)
+        rss.items.each do |item|
+          post_url = item.link.match('.*(/archives/.*)')[1]
+          categories = item.categories.collect { |c| c.content }
+          content = item.content_encoded.strip
+          date = item.date
+          slug = item.link.match('.*/archives/[0-9]+-(.*)\.html')[1]
+          name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month, date.day,
+                                                 slug]
+          data = {
+            'layout' => 'post',
+            'title' => item.title,
+            'categories' => categories,
+            'permalink' => post_url,
+            's9y_link' => item.link,
+            'date' => item.date,
+          }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
+          # Write out the data and content to file
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/textpattern.rb ADDED

@@ -0,0 +1,70 @@
+module BuntoImport
+  module Importers
+    class TextPattern < Importer
+      # Reads a MySQL database via Sequel and creates a post file for each post.
+      # The only posts selected are those with a status of 4 or 5, which means
+      # "live" and "sticky" respectively.
+      # Other statuses are 1 => draft, 2 => hidden and 3 => pending.
+      QUERY = "SELECT Title, \
+                      url_title, \
+                      Posted, \
+                      Body, \
+                      Keywords \
+               FROM textpattern \
+               WHERE Status = '4' OR \
+                     Status = '5'"
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          sequel
+          fileutils
+          safe_yaml
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'dbname', '--dbname DB', 'Database name'
+        c.option 'user', '--user USER', 'Database user name'
+        c.option 'password', '--password PW', "Database user's password"
+        c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
+      end
+      def self.process(options)
+        dbname = options.fetch('dbname')
+        user   = options.fetch('user')
+        pass   = options.fetch('password', "")
+        host   = options.fetch('host', "localhost")
+        db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
+        FileUtils.mkdir_p "_posts"
+        db[QUERY].each do |post|
+          # Get required fields and construct Bunto compatible name.
+          title = post[:Title]
+          slug = post[:url_title]
+          date = post[:Posted]
+          content = post[:Body]
+          name = [date.strftime("%Y-%m-%d"), slug].join('-') + ".textile"
+          # Get the relevant fields as a hash, delete empty fields and convert
+          # to YAML for the header.
+          data = {
+             'layout' => 'post',
+             'title' => title.to_s,
+             'tags' => post[:Keywords].split(',')
+           }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
+          # Write out the data and content to file.
+          File.open("_posts/#{name}", "w") do |f|
+            f.puts data
+            f.puts "---"
+            f.puts content
+          end
+        end
+      end
+    end
+  end
+end

data/lib/bunto-import/importers/tumblr.rb ADDED

@@ -0,0 +1,289 @@
+module BuntoImport
+  module Importers
+    class Tumblr < Importer
+      def self.require_deps
+        BuntoImport.require_with_fallback(%w[
+          rubygems
+          fileutils
+          open-uri
+          nokogiri
+          json
+          uri
+          time
+          bunto
+        ])
+      end
+      def self.specify_options(c)
+        c.option 'url', '--url URL', 'Tumblr URL'
+        c.option 'format', '--format FORMAT', 'Output format (default: "html")'
+        c.option 'grab_images', '--grab_images', 'Whether to grab images (default: false)'
+        c.option 'add_highlights', '--add_highlights', 'Whether to add highlights (default: false)'
+        c.option 'rewrite_urls', '--rewrite_urls', 'Whether to rewrite URLs (default: false)'
+      end
+      def self.process(options)
+        url            = options.fetch('url')
+        format         = options.fetch('format', "html")
+        grab_images    = options.fetch('grab_images', false)
+        add_highlights = options.fetch('add_highlights', false)
+        rewrite_urls   = options.fetch('rewrite_urls', false)
+        @grab_images = grab_images
+        FileUtils.mkdir_p "_posts/tumblr"
+        url += "/api/read/json/"
+        per_page = 50
+        posts = []
+        # Two passes are required so that we can rewrite URLs.
+        # First pass builds up an array of each post as a hash.
+        begin
+          current_page = (current_page || -1) + 1
+          feed_url = url + "?num=#{per_page}&start=#{current_page * per_page}"
+          puts "Fetching #{feed_url}"
+          feed = open(feed_url)
+          json = feed.readlines.join("\n")[21...-2]  # Strip Tumblr's JSONP chars.
+          blog = JSON.parse(json)
+          puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
+          batch = blog["posts"].map { |post| post_to_hash(post, format) }
+          # If we're rewriting, save the posts for later.  Otherwise, go ahead and
+          # dump these to disk now
+          if rewrite_urls
+            posts += batch
+          else
+            batch.each {|post| write_post(post, format == "md", add_highlights)}
+          end
+        end until blog["posts"].size < per_page
+        # Rewrite URLs, create redirects and write out out posts if necessary
+        if rewrite_urls
+          posts = rewrite_urls_and_redirects posts
+          posts.each {|post| write_post(post, format == "md", add_highlights)}
+        end
+      end
+      private
+      # Writes a post out to disk
+      def self.write_post(post, use_markdown, add_highlights)
+        content = post[:content]
+        if content
+          if use_markdown
+            content = html_to_markdown content
+            if add_highlights
+              tumblr_url = URI.parse(post[:slug]).path
+              redirect_dir = tumblr_url.sub(/\//, "") + "/"
+              FileUtils.mkdir_p redirect_dir
+              content = add_syntax_highlights(content, redirect_dir)
+            end
+          end
+          File.open("_posts/tumblr/#{post[:name]}", "w") do |f|
+            f.puts post[:header].to_yaml + "---\n" + content
+          end
+        end
+      end
+      # Converts each type of Tumblr post to a hash with all required
+      # data for Bunto.
+      def self.post_to_hash(post, format)
+        case post['type']
+          when "regular"
+            title = post["regular-title"]
+            content = post["regular-body"]
+          when "link"
+            title = post["link-text"] || post["link-url"]
+            content = "<a href=\"#{post["link-url"]}\">#{title}</a>"
+            unless post["link-description"].nil?
+              content << "<br/>" + post["link-description"]
+            end
+          when "photo"
+            title = post["slug"].gsub("-"," ")
+            if post["photos"].size > 1
+              content = ""
+              post["photos"].each do |post_photo|
+                photo = fetch_photo post_photo
+                content << photo + "<br/>"
+                content << post_photo["caption"]
+              end
+            else
+              content = fetch_photo post
+            end
+            content << "<br/>" + post["photo-caption"]
+          when "audio"
+            if !post["id3-title"].nil?
+              title = post["id3-title"]
+              content = post["audio-player"] + "<br/>" + post["audio-caption"]
+            else
+              title = post["audio-caption"]
+              content = post["audio-player"]
+            end
+          when "quote"
+            title = post["quote-text"]
+            content = "<blockquote>#{post["quote-text"]}</blockquote>"
+            unless post["quote-source"].nil?
+              content << "&#8212;" + post["quote-source"]
+            end
+          when "conversation"
+            title = post["conversation-title"]
+            content = "<section><dialog>"
+            post["conversation"].each do |line|
+              content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
+            end
+            content << "</section></dialog>"
+          when "video"
+            title = post["video-title"]
+            content = post["video-player"]
+            unless post["video-caption"].nil?
+              unless content.nil?
+                content << "<br/>" + post["video-caption"]
+              else
+                content = post["video-caption"]
+              end
+            end
+          when "answer"
+            title = post["question"]
+            content = post["answer"]
+        end
+        date = Date.parse(post['date']).to_s
+        title = Nokogiri::HTML(title).text
+        title = "no title" if title.empty?
+        slug = if post["slug"] && post["slug"].strip != ""
+          post["slug"]
+        elsif title && title.downcase.gsub(/[^a-z0-9\-]/, '') != '' && title != 'no title'
+          slug = title.downcase.strip.gsub(' ', '-').gsub(/[^a-z0-9\-]/, '')
+          slug.length > 200 ? slug.slice(0..200) : slug
+        else
+          slug = post['id']
+        end
+        {
+          :name => "#{date}-#{slug}.#{format}",
+          :header => {
+            "layout" => "post",
+            "title" => title,
+            "date" => Time.parse(post['date']).xmlschema,
+            "tags" => (post["tags"] or []),
+            "tumblr_url" => post["url-with-slug"]
+          },
+          :content => content,
+          :url => post["url"],
+          :slug => post["url-with-slug"],
+        }
+      end
+      # Attempts to fetch the largest version of a photo available for a post.
+      # If that file fails, it tries the next smaller size until all available
+      # photo URLs are exhausted.  If they all fail, the import is aborted.
+      def self.fetch_photo(post)
+        sizes = post.keys.map {|k| k.gsub("photo-url-", "").to_i}
+        sizes.sort! {|a,b| b <=> a}
+        ext_key, ext_val = post.find do |k,v|
+          k =~ /^photo-url-/ && v.split("/").last =~ /\./
+        end
+        ext = "." + ext_val.split(".").last
+        sizes.each do |size|
+          url = post["photo-url"] || post["photo-url-#{size}"]
+          next if url.nil?
+          begin
+            return "<img src=\"#{save_photo(url, ext)}\"/>"
+          rescue OpenURI::HTTPError => err
+            puts "Failed to grab photo"
+          end
+        end
+        abort "Failed to fetch photo for post #{post['url']}"
+      end
+      # Create a Hash of old urls => new urls, for rewriting and
+      # redirects, and replace urls in each post. Instantiate Bunto
+      # site/posts to get the correct permalink format.
+      def self.rewrite_urls_and_redirects(posts)
+        site = Bunto::Site.new(Bunto.configuration({}))
+        urls = Hash[posts.map { |post|
+          # Create an initial empty file for the post so that
+          # we can instantiate a post object.
+          File.open("_posts/tumblr/#{post[:name]}", "w")
+          tumblr_url = URI.parse(URI.encode(post[:slug])).path
+          bunto_url = Bunto::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
+          redirect_dir = tumblr_url.sub(/\//, "") + "/"
+          FileUtils.mkdir_p redirect_dir
+          File.open(redirect_dir + "index.html", "w") do |f|
+            f.puts "<html><head><link rel=\"canonical\" href=\"" +
+                   "#{bunto_url}\"><meta http-equiv=\"refresh\" content=\"0; " +
+                   "url=#{bunto_url}\"></head><body></body></html>"
+          end
+          [tumblr_url, bunto_url]
+        }]
+        posts.map { |post|
+          urls.each do |tumblr_url, bunto_url|
+            post[:content].gsub!(/#{tumblr_url}/i, bunto_url)
+          end
+          post
+        }
+      end
+      # Convert preserving HTML tables as per the markdown docs.
+      def self.html_to_markdown(content)
+        preserve = ["table", "tr", "th", "td"]
+        preserve.each do |tag|
+          content.gsub!(/<#{tag}/i, "$$" + tag)
+          content.gsub!(/<\/#{tag}/i, "||" + tag)
+        end
+        content = Nokogiri::HTML(content.gsub("'", "''")).text
+        preserve.each do |tag|
+          content.gsub!("$$" + tag, "<" + tag)
+          content.gsub!("||" + tag, "</" + tag)
+        end
+        content
+      end
+      # Adds pygments highlight tags to code blocks in posts that use
+      # markdown format. This doesn't guess the language of the code
+      # block, so you should modify this to suit your own content.
+      # For example, my code block only contain Python and JavaScript,
+      # so I can assume the block is JavaScript if it contains a
+      # semi-colon.
+      def self.add_syntax_highlights(content, redirect_dir)
+        lines = content.split("\n")
+        block, indent, lang, start = false, /^    /, nil, nil
+        lines.each_with_index do |line, i|
+          if !block && line =~ indent
+            block = true
+            lang = "python"
+            start = i
+          elsif block
+            lang = "javascript" if line =~ /;$/
+            block = line =~ indent && i < lines.size - 1 # Also handle EOF
+            if !block
+              lines[start] = "{% highlight #{lang} %}"
+              lines[i - 1] = "{% endhighlight %}"
+            end
+            FileUtils.cp(redirect_dir + "index.html", redirect_dir + "../" + "index.html")
+            lines[i] = lines[i].sub(indent, "")
+          end
+        end
+        lines.join("\n")
+      end
+      def self.save_photo(url, ext)
+        if @grab_images
+          path = "tumblr_files/#{url.split('/').last}"
+          path += ext unless path =~ /#{ext}$/
+          FileUtils.mkdir_p "tumblr_files"
+          # Don't fetch if we've already cached this file
+          unless File.size? path
+            puts "Fetching photo #{url}"
+            File.open(path, "w") { |f| f.write(open(url).read) }
+          end
+          url = "/" + path
+        end
+        url
+      end
+    end
+  end
+end