jekyll-import 0.1.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/History.txt +7 -0
- data/LICENSE +21 -0
- data/README.md +4 -0
- data/Rakefile +151 -0
- data/jekyll-import.gemspec +80 -0
- data/lib/jekyll-import.rb +7 -0
- data/lib/jekyll/commands/import.rb +51 -0
- data/lib/jekyll/jekyll-import/csv.rb +26 -0
- data/lib/jekyll/jekyll-import/drupal6.rb +102 -0
- data/lib/jekyll/jekyll-import/drupal7.rb +73 -0
- data/lib/jekyll/jekyll-import/enki.rb +49 -0
- data/lib/jekyll/jekyll-import/joomla.rb +53 -0
- data/lib/jekyll/jekyll-import/marley.rb +52 -0
- data/lib/jekyll/jekyll-import/mephisto.rb +84 -0
- data/lib/jekyll/jekyll-import/mt.rb +142 -0
- data/lib/jekyll/jekyll-import/posterous.rb +111 -0
- data/lib/jekyll/jekyll-import/rss.rb +63 -0
- data/lib/jekyll/jekyll-import/s9y.rb +49 -0
- data/lib/jekyll/jekyll-import/textpattern.rb +58 -0
- data/lib/jekyll/jekyll-import/tumblr.rb +195 -0
- data/lib/jekyll/jekyll-import/typo.rb +67 -0
- data/lib/jekyll/jekyll-import/wordpress.rb +296 -0
- data/lib/jekyll/jekyll-import/wordpressdotcom.rb +82 -0
- data/test/helper.rb +43 -0
- data/test/test_mt_importer.rb +104 -0
- data/test/test_wordpress_importer.rb +9 -0
- data/test/test_wordpressdotcom_importer.rb +8 -0
- metadata +334 -0
| @@ -0,0 +1,67 @@ | |
| 1 | 
            +
            # Author: Toby DiPasquale <toby@cbcg.net>
         | 
| 2 | 
            +
            require 'fileutils'
         | 
| 3 | 
            +
            require 'rubygems'
         | 
| 4 | 
            +
            require 'sequel'
         | 
| 5 | 
            +
            require 'safe_yaml'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            module JekyllImport
         | 
| 8 | 
            +
              module Typo
         | 
| 9 | 
            +
                # This SQL *should* work for both MySQL and PostgreSQL.
         | 
| 10 | 
            +
                SQL = <<-EOS
         | 
| 11 | 
            +
                SELECT c.id id,
         | 
| 12 | 
            +
                       c.title title,
         | 
| 13 | 
            +
                       c.permalink slug,
         | 
| 14 | 
            +
                       c.body body,
         | 
| 15 | 
            +
                       c.extended extended,
         | 
| 16 | 
            +
                       c.published_at date,
         | 
| 17 | 
            +
                       c.state state,
         | 
| 18 | 
            +
                       COALESCE(tf.name, 'html') filter
         | 
| 19 | 
            +
                  FROM contents c
         | 
| 20 | 
            +
                       LEFT OUTER JOIN text_filters tf
         | 
| 21 | 
            +
                                    ON c.text_filter_id = tf.id
         | 
| 22 | 
            +
                EOS
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def self.process server, dbname, user, pass, host='localhost'
         | 
| 25 | 
            +
                  FileUtils.mkdir_p '_posts'
         | 
| 26 | 
            +
                  case server.intern
         | 
| 27 | 
            +
                  when :postgres
         | 
| 28 | 
            +
                    db = Sequel.postgres(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
         | 
| 29 | 
            +
                  when :mysql
         | 
| 30 | 
            +
                    db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
         | 
| 31 | 
            +
                  else
         | 
| 32 | 
            +
                    raise "Unknown database server '#{server}'"
         | 
| 33 | 
            +
                  end
         | 
| 34 | 
            +
                  db[SQL].each do |post|
         | 
| 35 | 
            +
                    next unless post[:state] =~ /published/
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    if post[:slug] == nil
         | 
| 38 | 
            +
                      post[:slug] = "no slug"
         | 
| 39 | 
            +
                    end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                    if post[:extended]
         | 
| 42 | 
            +
                      post[:body] << "\n<!-- more -->\n"
         | 
| 43 | 
            +
                      post[:body] << post[:extended]
         | 
| 44 | 
            +
                    end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                    name = [ sprintf("%.04d", post[:date].year),
         | 
| 47 | 
            +
                             sprintf("%.02d", post[:date].month),
         | 
| 48 | 
            +
                             sprintf("%.02d", post[:date].day),
         | 
| 49 | 
            +
                             post[:slug].strip ].join('-')
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                    # Can have more than one text filter in this field, but we just want
         | 
| 52 | 
            +
                    # the first one for this.
         | 
| 53 | 
            +
                    name += '.' + post[:filter].split(' ')[0]
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                    File.open("_posts/#{name}", 'w') do |f|
         | 
| 56 | 
            +
                      f.puts({ 'layout'   => 'post',
         | 
| 57 | 
            +
                               'title'    => post[:title].to_s,
         | 
| 58 | 
            +
                               'typo_id'  => post[:id]
         | 
| 59 | 
            +
                             }.delete_if { |k, v| v.nil? || v == '' }.to_yaml)
         | 
| 60 | 
            +
                      f.puts '---'
         | 
| 61 | 
            +
                      f.puts post[:body].delete("\r")
         | 
| 62 | 
            +
                    end
         | 
| 63 | 
            +
                  end
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
              end
         | 
| 67 | 
            +
            end
         | 
| @@ -0,0 +1,296 @@ | |
| 1 | 
            +
            require 'rubygems'
         | 
| 2 | 
            +
            require 'sequel'
         | 
| 3 | 
            +
            require 'fileutils'
         | 
| 4 | 
            +
            require 'psych'
         | 
| 5 | 
            +
            require 'safe_yaml'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            # NOTE: This converter requires Sequel and the MySQL gems.
         | 
| 8 | 
            +
            # The MySQL gem can be difficult to install on OS X. Once you have MySQL
         | 
| 9 | 
            +
            # installed, running the following commands should work:
         | 
| 10 | 
            +
            # $ sudo gem install sequel
         | 
| 11 | 
            +
            # $ sudo gem install mysql -- --with-mysql-config=/usr/local/mysql/bin/mysql_config
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            module JekyllImport
         | 
| 14 | 
            +
              module WordPress
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                # Main migrator function. Call this to perform the migration.
         | 
| 17 | 
            +
                # 
         | 
| 18 | 
            +
                # dbname::  The name of the database
         | 
| 19 | 
            +
                # user::    The database user name
         | 
| 20 | 
            +
                # pass::    The database user's password
         | 
| 21 | 
            +
                # host::    The address of the MySQL database host. Default: 'localhost'
         | 
| 22 | 
            +
                # options:: A hash table of configuration options.
         | 
| 23 | 
            +
                # 
         | 
| 24 | 
            +
                # Supported options are:
         | 
| 25 | 
            +
                # 
         | 
| 26 | 
            +
                # :table_prefix::   Prefix of database tables used by WordPress.
         | 
| 27 | 
            +
                #                   Default: 'wp_'
         | 
| 28 | 
            +
                # :clean_entities:: If true, convert non-ASCII characters to HTML
         | 
| 29 | 
            +
                #                   entities in the posts, comments, titles, and
         | 
| 30 | 
            +
                #                   names. Requires the 'htmlentities' gem to
         | 
| 31 | 
            +
                #                   work. Default: true.
         | 
| 32 | 
            +
                # :comments::       If true, migrate post comments too. Comments
         | 
| 33 | 
            +
                #                   are saved in the post's YAML front matter.
         | 
| 34 | 
            +
                #                   Default: true.
         | 
| 35 | 
            +
                # :categories::     If true, save the post's categories in its
         | 
| 36 | 
            +
                #                   YAML front matter.
         | 
| 37 | 
            +
                # :tags::           If true, save the post's tags in its
         | 
| 38 | 
            +
                #                   YAML front matter.
         | 
| 39 | 
            +
                # :more_excerpt::   If true, when a post has no excerpt but
         | 
| 40 | 
            +
                #                   does have a <!-- more --> tag, use the
         | 
| 41 | 
            +
                #                   preceding post content as the excerpt.
         | 
| 42 | 
            +
                #                   Default: true.
         | 
| 43 | 
            +
                # :more_anchor::    If true, convert a <!-- more --> tag into
         | 
| 44 | 
            +
                #                   two HTML anchors with ids "more" and
         | 
| 45 | 
            +
                #                   "more-NNN" (where NNN is the post number).
         | 
| 46 | 
            +
                #                   Default: true.
         | 
| 47 | 
            +
                # :status::         Array of allowed post statuses. Only
         | 
| 48 | 
            +
                #                   posts with matching status will be migrated.
         | 
| 49 | 
            +
                #                   Known statuses are :publish, :draft, :private,
         | 
| 50 | 
            +
                #                   and :revision. If this is nil or an empty
         | 
| 51 | 
            +
                #                   array, all posts are migrated regardless of
         | 
| 52 | 
            +
                #                   status. Default: [:publish].
         | 
| 53 | 
            +
                # 
         | 
| 54 | 
            +
                def self.process(dbname, user, pass, host='localhost', options={})
         | 
| 55 | 
            +
                  options = {
         | 
| 56 | 
            +
                    :table_prefix   => 'wp_',
         | 
| 57 | 
            +
                    :clean_entities => true,
         | 
| 58 | 
            +
                    :comments       => true,
         | 
| 59 | 
            +
                    :categories     => true,
         | 
| 60 | 
            +
                    :tags           => true,
         | 
| 61 | 
            +
                    :more_excerpt   => true,
         | 
| 62 | 
            +
                    :more_anchor    => true,
         | 
| 63 | 
            +
                    :status         => [:publish] # :draft, :private, :revision
         | 
| 64 | 
            +
                  }.merge(options)
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                  if options[:clean_entities]
         | 
| 67 | 
            +
                    begin
         | 
| 68 | 
            +
                      require 'htmlentities'
         | 
| 69 | 
            +
                    rescue LoadError
         | 
| 70 | 
            +
                      STDERR.puts "Could not require 'htmlentities', so the " +
         | 
| 71 | 
            +
                                  ":clean_entities option is now disabled."
         | 
| 72 | 
            +
                      options[:clean_entities] = false
         | 
| 73 | 
            +
                    end
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  FileUtils.mkdir_p("_posts")
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                  db = Sequel.mysql(dbname, :user => user, :password => pass,
         | 
| 79 | 
            +
                                    :host => host, :encoding => 'utf8')
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                  px = options[:table_prefix]
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                  posts_query = "
         | 
| 84 | 
            +
                     SELECT
         | 
| 85 | 
            +
                       posts.ID            AS `id`,
         | 
| 86 | 
            +
                       posts.guid          AS `guid`,
         | 
| 87 | 
            +
                       posts.post_type     AS `type`,
         | 
| 88 | 
            +
                       posts.post_status   AS `status`,
         | 
| 89 | 
            +
                       posts.post_title    AS `title`,
         | 
| 90 | 
            +
                       posts.post_name     AS `slug`,
         | 
| 91 | 
            +
                       posts.post_date     AS `date`,
         | 
| 92 | 
            +
                       posts.post_content  AS `content`,
         | 
| 93 | 
            +
                       posts.post_excerpt  AS `excerpt`,
         | 
| 94 | 
            +
                       posts.comment_count AS `comment_count`,
         | 
| 95 | 
            +
                       users.display_name  AS `author`,
         | 
| 96 | 
            +
                       users.user_login    AS `author_login`,
         | 
| 97 | 
            +
                       users.user_email    AS `author_email`,
         | 
| 98 | 
            +
                       users.user_url      AS `author_url`
         | 
| 99 | 
            +
                     FROM #{px}posts AS `posts`
         | 
| 100 | 
            +
                       LEFT JOIN #{px}users AS `users`
         | 
| 101 | 
            +
                         ON posts.post_author = users.ID"
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                  if options[:status] and not options[:status].empty?
         | 
| 104 | 
            +
                    status = options[:status][0]
         | 
| 105 | 
            +
                    posts_query << "
         | 
| 106 | 
            +
                     WHERE posts.post_status = '#{status.to_s}'"
         | 
| 107 | 
            +
                    options[:status][1..-1].each do |status|
         | 
| 108 | 
            +
                      posts_query << " OR
         | 
| 109 | 
            +
                       posts.post_status = '#{status.to_s}'"
         | 
| 110 | 
            +
                    end
         | 
| 111 | 
            +
                  end
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                  db[posts_query].each do |post|
         | 
| 114 | 
            +
                    process_post(post, db, options)
         | 
| 115 | 
            +
                  end
         | 
| 116 | 
            +
                end
         | 
| 117 | 
            +
             | 
| 118 | 
            +
             | 
| 119 | 
            +
                def self.process_post(post, db, options)
         | 
| 120 | 
            +
                  px = options[:table_prefix]
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                  title = post[:title]
         | 
| 123 | 
            +
                  if options[:clean_entities]
         | 
| 124 | 
            +
                    title = clean_entities(title)
         | 
| 125 | 
            +
                  end
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                  slug = post[:slug]
         | 
| 128 | 
            +
                  if !slug or slug.empty?
         | 
| 129 | 
            +
                    slug = sluggify(title)
         | 
| 130 | 
            +
                  end
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                  date = post[:date] || Time.now
         | 
| 133 | 
            +
                  name = "%02d-%02d-%02d-%s.markdown" % [date.year, date.month,
         | 
| 134 | 
            +
                                                         date.day, slug]
         | 
| 135 | 
            +
                  content = post[:content].to_s
         | 
| 136 | 
            +
                  if options[:clean_entities]
         | 
| 137 | 
            +
                    content = clean_entities(content)
         | 
| 138 | 
            +
                  end
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                  excerpt = post[:excerpt].to_s
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                  more_index = content.index(/<!-- *more *-->/)
         | 
| 143 | 
            +
                  more_anchor = nil
         | 
| 144 | 
            +
                  if more_index
         | 
| 145 | 
            +
                    if options[:more_excerpt] and
         | 
| 146 | 
            +
                        (post[:excerpt].nil? or post[:excerpt].empty?)
         | 
| 147 | 
            +
                      excerpt = content[0...more_index]
         | 
| 148 | 
            +
                    end
         | 
| 149 | 
            +
                    if options[:more_anchor]
         | 
| 150 | 
            +
                      more_link = "more"
         | 
| 151 | 
            +
                      content.sub!(/<!-- *more *-->/,
         | 
| 152 | 
            +
                                   "<a id=\"more\"></a>" + 
         | 
| 153 | 
            +
                                   "<a id=\"more-#{post[:id]}\"></a>")
         | 
| 154 | 
            +
                    end
         | 
| 155 | 
            +
                  end
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                  categories = []
         | 
| 158 | 
            +
                  tags = []
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                  if options[:categories] or options[:tags]
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                    cquery =
         | 
| 163 | 
            +
                      "SELECT
         | 
| 164 | 
            +
                         terms.name AS `name`,
         | 
| 165 | 
            +
                         ttax.taxonomy AS `type`
         | 
| 166 | 
            +
                       FROM
         | 
| 167 | 
            +
                         #{px}terms AS `terms`,
         | 
| 168 | 
            +
                         #{px}term_relationships AS `trels`,
         | 
| 169 | 
            +
                         #{px}term_taxonomy AS `ttax`
         | 
| 170 | 
            +
                       WHERE
         | 
| 171 | 
            +
                         trels.object_id = '#{post[:id]}' AND
         | 
| 172 | 
            +
                         trels.term_taxonomy_id = ttax.term_taxonomy_id AND
         | 
| 173 | 
            +
                         terms.term_id = ttax.term_id"
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                    db[cquery].each do |term|
         | 
| 176 | 
            +
                      if options[:categories] and term[:type] == "category"
         | 
| 177 | 
            +
                        if options[:clean_entities]
         | 
| 178 | 
            +
                          categories << clean_entities(term[:name])
         | 
| 179 | 
            +
                        else
         | 
| 180 | 
            +
                          categories << term[:name]
         | 
| 181 | 
            +
                        end
         | 
| 182 | 
            +
                      elsif options[:tags] and term[:type] == "post_tag"
         | 
| 183 | 
            +
                        if options[:clean_entities]
         | 
| 184 | 
            +
                          tags << clean_entities(term[:name])
         | 
| 185 | 
            +
                        else
         | 
| 186 | 
            +
                          tags << term[:name]
         | 
| 187 | 
            +
                        end
         | 
| 188 | 
            +
                      end
         | 
| 189 | 
            +
                    end
         | 
| 190 | 
            +
                  end
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                  comments = []
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                  if options[:comments] and post[:comment_count].to_i > 0
         | 
| 195 | 
            +
                    cquery =
         | 
| 196 | 
            +
                      "SELECT
         | 
| 197 | 
            +
                         comment_ID           AS `id`,
         | 
| 198 | 
            +
                         comment_author       AS `author`,
         | 
| 199 | 
            +
                         comment_author_email AS `author_email`,
         | 
| 200 | 
            +
                         comment_author_url   AS `author_url`,
         | 
| 201 | 
            +
                         comment_date         AS `date`,
         | 
| 202 | 
            +
                         comment_date_gmt     AS `date_gmt`,
         | 
| 203 | 
            +
                         comment_content      AS `content`
         | 
| 204 | 
            +
                       FROM #{px}comments
         | 
| 205 | 
            +
                       WHERE
         | 
| 206 | 
            +
                         comment_post_ID = '#{post[:id]}' AND
         | 
| 207 | 
            +
                         comment_approved != 'spam'"
         | 
| 208 | 
            +
             | 
| 209 | 
            +
             | 
| 210 | 
            +
                    db[cquery].each do |comment|
         | 
| 211 | 
            +
             | 
| 212 | 
            +
                      comcontent = comment[:content].to_s
         | 
| 213 | 
            +
                      if comcontent.respond_to?(:force_encoding)
         | 
| 214 | 
            +
                        comcontent.force_encoding("UTF-8")
         | 
| 215 | 
            +
                      end
         | 
| 216 | 
            +
                      if options[:clean_entities]
         | 
| 217 | 
            +
                        comcontent = clean_entities(comcontent)
         | 
| 218 | 
            +
                      end
         | 
| 219 | 
            +
                      comauthor = comment[:author].to_s
         | 
| 220 | 
            +
                      if options[:clean_entities]
         | 
| 221 | 
            +
                        comauthor = clean_entities(comauthor)
         | 
| 222 | 
            +
                      end
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                      comments << {
         | 
| 225 | 
            +
                        'id'           => comment[:id].to_i,
         | 
| 226 | 
            +
                        'author'       => comauthor,
         | 
| 227 | 
            +
                        'author_email' => comment[:author_email].to_s,
         | 
| 228 | 
            +
                        'author_url'   => comment[:author_url].to_s,
         | 
| 229 | 
            +
                        'date'         => comment[:date].to_s,
         | 
| 230 | 
            +
                        'date_gmt'     => comment[:date_gmt].to_s,
         | 
| 231 | 
            +
                        'content'      => comcontent,
         | 
| 232 | 
            +
                      }
         | 
| 233 | 
            +
                    end
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                    comments.sort!{ |a,b| a['id'] <=> b['id'] }
         | 
| 236 | 
            +
                  end
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                  # Get the relevant fields as a hash, delete empty fields and
         | 
| 239 | 
            +
                  # convert to YAML for the header.
         | 
| 240 | 
            +
                  data = {
         | 
| 241 | 
            +
                    'layout'        => post[:type].to_s,
         | 
| 242 | 
            +
                    'status'        => post[:status].to_s,
         | 
| 243 | 
            +
                    'published'     => (post[:status].to_s == "publish"),
         | 
| 244 | 
            +
                    'title'         => title.to_s,
         | 
| 245 | 
            +
                    'author'        => post[:author].to_s,
         | 
| 246 | 
            +
                    'author_login'  => post[:author_login].to_s,
         | 
| 247 | 
            +
                    'author_email'  => post[:author_email].to_s,
         | 
| 248 | 
            +
                    'author_url'    => post[:author_url].to_s,
         | 
| 249 | 
            +
                    'excerpt'       => excerpt,
         | 
| 250 | 
            +
                    'more_anchor'   => more_anchor,
         | 
| 251 | 
            +
                    'wordpress_id'  => post[:id],
         | 
| 252 | 
            +
                    'wordpress_url' => post[:guid].to_s,
         | 
| 253 | 
            +
                    'date'          => date,
         | 
| 254 | 
            +
                    'categories'    => options[:categories] ? categories : nil,
         | 
| 255 | 
            +
                    'tags'          => options[:tags] ? tags : nil,
         | 
| 256 | 
            +
                    'comments'      => options[:comments] ? comments : nil,
         | 
| 257 | 
            +
                  }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
         | 
| 258 | 
            +
             | 
| 259 | 
            +
                  # Write out the data and content to file
         | 
| 260 | 
            +
                  File.open("_posts/#{name}", "w") do |f|
         | 
| 261 | 
            +
                    f.puts data
         | 
| 262 | 
            +
                    f.puts "---"
         | 
| 263 | 
            +
                    f.puts content
         | 
| 264 | 
            +
                  end
         | 
| 265 | 
            +
                end
         | 
| 266 | 
            +
             | 
| 267 | 
            +
             | 
| 268 | 
            +
                def self.clean_entities( text )
         | 
| 269 | 
            +
                  if text.respond_to?(:force_encoding)
         | 
| 270 | 
            +
                    text.force_encoding("UTF-8")
         | 
| 271 | 
            +
                  end
         | 
| 272 | 
            +
                  text = HTMLEntities.new.encode(text, :named)
         | 
| 273 | 
            +
                  # We don't want to convert these, it would break all
         | 
| 274 | 
            +
                  # HTML tags in the post and comments.
         | 
| 275 | 
            +
                  text.gsub!("&", "&")
         | 
| 276 | 
            +
                  text.gsub!("<", "<")
         | 
| 277 | 
            +
                  text.gsub!(">", ">")
         | 
| 278 | 
            +
                  text.gsub!(""", '"')
         | 
| 279 | 
            +
                  text.gsub!("'", "'")
         | 
| 280 | 
            +
                  text.gsub!("/", "/")
         | 
| 281 | 
            +
                  text
         | 
| 282 | 
            +
                end
         | 
| 283 | 
            +
             | 
| 284 | 
            +
             | 
| 285 | 
            +
                def self.sluggify( title )
         | 
| 286 | 
            +
                  begin
         | 
| 287 | 
            +
                    require 'unidecode'
         | 
| 288 | 
            +
                    title = title.to_ascii
         | 
| 289 | 
            +
                  rescue LoadError
         | 
| 290 | 
            +
                    STDERR.puts "Could not require 'unidecode'. If your post titles have non-ASCII characters, you could get nicer permalinks by installing unidecode."
         | 
| 291 | 
            +
                  end
         | 
| 292 | 
            +
                  title.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
         | 
| 293 | 
            +
                end
         | 
| 294 | 
            +
             | 
| 295 | 
            +
              end
         | 
| 296 | 
            +
            end
         | 
| @@ -0,0 +1,82 @@ | |
| 1 | 
            +
            # coding: utf-8
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rubygems'
         | 
| 4 | 
            +
            require 'hpricot'
         | 
| 5 | 
            +
            require 'fileutils'
         | 
| 6 | 
            +
            require 'safe_yaml'
         | 
| 7 | 
            +
            require 'time'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            module JekyllImport
         | 
| 10 | 
            +
              # This importer takes a wordpress.xml file, which can be exported from your
         | 
| 11 | 
            +
              # wordpress.com blog (/wp-admin/export.php).
         | 
| 12 | 
            +
              module WordpressDotCom
         | 
| 13 | 
            +
                def self.process(filename = "wordpress.xml")
         | 
| 14 | 
            +
                  import_count = Hash.new(0)
         | 
| 15 | 
            +
                  doc = Hpricot::XML(File.read(filename))
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  (doc/:channel/:item).each do |item|
         | 
| 18 | 
            +
                    title = item.at(:title).inner_text.strip
         | 
| 19 | 
            +
                    permalink_title = item.at('wp:post_name').inner_text
         | 
| 20 | 
            +
                    # Fallback to "prettified" title if post_name is empty (can happen)
         | 
| 21 | 
            +
                    if permalink_title == ""
         | 
| 22 | 
            +
                      permalink_title = sluggify(title)
         | 
| 23 | 
            +
                    end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    date = Time.parse(item.at('wp:post_date').inner_text)
         | 
| 26 | 
            +
                    status = item.at('wp:status').inner_text
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                    if status == "publish" 
         | 
| 29 | 
            +
                      published = true
         | 
| 30 | 
            +
                    else
         | 
| 31 | 
            +
                      published = false
         | 
| 32 | 
            +
                    end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                    type = item.at('wp:post_type').inner_text
         | 
| 35 | 
            +
                    tags = (item/:category).map{|c| c.inner_text}.reject{|c| c == 'Uncategorized'}.uniq
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    metas = Hash.new
         | 
| 38 | 
            +
                    item.search("wp:postmeta").each do |meta|
         | 
| 39 | 
            +
                      key = meta.at('wp:meta_key').inner_text
         | 
| 40 | 
            +
                      value = meta.at('wp:meta_value').inner_text
         | 
| 41 | 
            +
                      metas[key] = value;
         | 
| 42 | 
            +
                    end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                    name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.html"
         | 
| 45 | 
            +
                    header = {
         | 
| 46 | 
            +
                      'layout' => type,
         | 
| 47 | 
            +
                      'title'  => title,
         | 
| 48 | 
            +
                      'tags'   => tags,
         | 
| 49 | 
            +
                      'status'   => status,
         | 
| 50 | 
            +
                      'type'   => type,
         | 
| 51 | 
            +
                      'published' => published,
         | 
| 52 | 
            +
                      'meta'   => metas
         | 
| 53 | 
            +
                    }
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                    begin
         | 
| 56 | 
            +
                      FileUtils.mkdir_p "_#{type}s"
         | 
| 57 | 
            +
                      File.open("_#{type}s/#{name}", "w") do |f|
         | 
| 58 | 
            +
                        f.puts header.to_yaml
         | 
| 59 | 
            +
                        f.puts '---'
         | 
| 60 | 
            +
                        f.puts item.at('content:encoded').inner_text
         | 
| 61 | 
            +
                      end
         | 
| 62 | 
            +
                    rescue => e
         | 
| 63 | 
            +
                      puts "Couldn't import post!"
         | 
| 64 | 
            +
                      puts "Title: #{title}"
         | 
| 65 | 
            +
                      puts "Name/Slug: #{name}\n"
         | 
| 66 | 
            +
                      puts "Error: #{e.message}"
         | 
| 67 | 
            +
                      next
         | 
| 68 | 
            +
                    end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    import_count[type] += 1
         | 
| 71 | 
            +
                  end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                  import_count.each do |key, value|
         | 
| 74 | 
            +
                    puts "Imported #{value} #{key}s"
         | 
| 75 | 
            +
                  end
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                def self.sluggify(title)
         | 
| 79 | 
            +
                  title.gsub(/[^[:alnum:]]+/, '-').downcase
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
              end
         | 
| 82 | 
            +
            end
         | 
    
        data/test/helper.rb
    ADDED
    
    | @@ -0,0 +1,43 @@ | |
| 1 | 
            +
            if RUBY_VERSION > '1.9' && ENV["COVERAGE"] == "true"
         | 
| 2 | 
            +
              require 'simplecov'
         | 
| 3 | 
            +
              require 'simplecov-gem-adapter'
         | 
| 4 | 
            +
              SimpleCov.start('gem')
         | 
| 5 | 
            +
            end
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            require 'test/unit'
         | 
| 8 | 
            +
            require 'redgreen' if RUBY_VERSION < '1.9'
         | 
| 9 | 
            +
            require 'shoulda'
         | 
| 10 | 
            +
            require 'rr'
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            Dir.glob(File.expand_path('../../lib/jekyll/jekyll-import/*', __FILE__)).each do |f|
         | 
| 13 | 
            +
              require f
         | 
| 14 | 
            +
            end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # Send STDERR into the void to suppress program output messages
         | 
| 17 | 
            +
            STDERR.reopen(test(?e, '/dev/null') ? '/dev/null' : 'NUL:')
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            class Test::Unit::TestCase
         | 
| 20 | 
            +
              include RR::Adapters::TestUnit
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              def dest_dir(*subdirs)
         | 
| 23 | 
            +
                File.join(File.dirname(__FILE__), 'dest', *subdirs)
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              def source_dir(*subdirs)
         | 
| 27 | 
            +
                File.join(File.dirname(__FILE__), 'source', *subdirs)
         | 
| 28 | 
            +
              end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              def clear_dest
         | 
| 31 | 
            +
                FileUtils.rm_rf(dest_dir)
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              def capture_stdout
         | 
| 35 | 
            +
                $old_stdout = $stdout
         | 
| 36 | 
            +
                $stdout = StringIO.new
         | 
| 37 | 
            +
                yield
         | 
| 38 | 
            +
                $stdout.rewind
         | 
| 39 | 
            +
                return $stdout.string
         | 
| 40 | 
            +
              ensure
         | 
| 41 | 
            +
                $stdout = $old_stdout
         | 
| 42 | 
            +
              end
         | 
| 43 | 
            +
            end
         |