jekyll-import 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/lib/jekyll-import.rb +2 -0
  3. data/lib/jekyll-import/importer.rb +5 -3
  4. data/lib/jekyll-import/importers.rb +3 -0
  5. data/lib/jekyll-import/importers/behance.rb +7 -6
  6. data/lib/jekyll-import/importers/blogger.rb +23 -38
  7. data/lib/jekyll-import/importers/csv.rb +6 -5
  8. data/lib/jekyll-import/importers/drupal6.rb +7 -5
  9. data/lib/jekyll-import/importers/drupal7.rb +15 -13
  10. data/lib/jekyll-import/importers/drupal_common.rb +55 -31
  11. data/lib/jekyll-import/importers/easyblog.rb +8 -8
  12. data/lib/jekyll-import/importers/enki.rb +14 -12
  13. data/lib/jekyll-import/importers/ghost.rb +4 -1
  14. data/lib/jekyll-import/importers/google_reader.rb +4 -4
  15. data/lib/jekyll-import/importers/joomla.rb +9 -9
  16. data/lib/jekyll-import/importers/joomla3.rb +15 -15
  17. data/lib/jekyll-import/importers/jrnl.rb +11 -9
  18. data/lib/jekyll-import/importers/marley.rb +12 -10
  19. data/lib/jekyll-import/importers/mephisto.rb +15 -15
  20. data/lib/jekyll-import/importers/mt.rb +16 -13
  21. data/lib/jekyll-import/importers/posterous.rb +12 -9
  22. data/lib/jekyll-import/importers/roller.rb +277 -0
  23. data/lib/jekyll-import/importers/rss.rb +18 -6
  24. data/lib/jekyll-import/importers/s9y.rb +3 -1
  25. data/lib/jekyll-import/importers/s9y_database.rb +38 -53
  26. data/lib/jekyll-import/importers/textpattern.rb +6 -4
  27. data/lib/jekyll-import/importers/tumblr.rb +101 -107
  28. data/lib/jekyll-import/importers/typo.rb +29 -27
  29. data/lib/jekyll-import/importers/wordpress.rb +47 -59
  30. data/lib/jekyll-import/importers/wordpressdotcom.rb +27 -32
  31. data/lib/jekyll-import/util.rb +2 -1
  32. data/lib/jekyll-import/version.rb +3 -1
  33. data/lib/jekyll/commands/import.rb +4 -7
  34. metadata +40 -40
  35. data/lib/jekyll-import/importers/tmp.rb +0 -0
@@ -0,0 +1,277 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllImport
4
+ module Importers
5
+ class Roller < Importer
6
+ def self.require_deps
7
+ JekyllImport.require_with_fallback(%w(
8
+ rubygems
9
+ sequel
10
+ fileutils
11
+ safe_yaml
12
+ unidecode
13
+ ))
14
+ end
15
+
16
+ def self.specify_options(c)
17
+ c.option "dbname", "--dbname DB", "Database name (default: '')"
18
+ c.option "socket", "--socket SOCKET", "Database socket (default: '')"
19
+ c.option "user", "--user USER", "Database user name (default: '')"
20
+ c.option "password", "--password PW", "Database user's password (default: '')"
21
+ c.option "host", "--host HOST", "Database host name (default: 'localhost')"
22
+ c.option "port", "--port PORT", "Database port number (default: '3306')"
23
+ c.option "clean_entities", "--clean_entities", "Whether to clean entities (default: true)"
24
+ c.option "comments", "--comments", "Whether to import comments (default: true)"
25
+ c.option "categories", "--categories", "Whether to import categories (default: true)"
26
+ c.option "tags", "--tags", "Whether to import tags (default: true)"
27
+
28
+ c.option "status", "--status STATUS,STATUS2", Array,
29
+ "Array of allowed statuses (default: ['PUBLISHED'], other options: 'DRAFT')"
30
+ end
31
+
32
+ # Main migrator function. Call this to perform the migration.
33
+ #
34
+ # dbname:: The name of the database
35
+ # user:: The database user name
36
+ # pass:: The database user's password
37
+ # host:: The address of the MySQL database host. Default: 'localhost'
38
+ # port:: The port number of the MySQL database. Default: '3306'
39
+ # socket:: The database socket's path
40
+ # options:: A hash table of configuration options.
41
+ #
42
+ # Supported options are:
43
+ #
44
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
45
+ # entities in the posts, comments, titles, and
46
+ # names. Requires the 'htmlentities' gem to
47
+ # work. Default: true.
48
+ # :comments:: If true, migrate post comments too. Comments
49
+ # are saved in the post's YAML front matter.
50
+ # Default: true.
51
+ # :categories:: If true, save the post's categories in its
52
+ # YAML front matter. Default: true.
53
+ # :tags:: If true, save the post's tags in its
54
+ # YAML front matter. Default: true.
55
+ # :extension:: Set the post extension. Default: "html"
56
+ # :status:: Array of allowed post statuses. Only
57
+ # posts with matching status will be migrated.
58
+ # Known statuses are :PUBLISHED and :DRAFT
59
+ # If this is nil or an empty
60
+ # array, all posts are migrated regardless of
61
+ # status. Default: [:PUBLISHED].
62
+ #
63
+ def self.process(opts)
64
+ options = {
65
+ :user => opts.fetch("user", ""),
66
+ :pass => opts.fetch("password", ""),
67
+ :host => opts.fetch("host", "localhost"),
68
+ :port => opts.fetch("port", "3306"),
69
+ :socket => opts.fetch("socket", nil),
70
+ :dbname => opts.fetch("dbname", ""),
71
+ :clean_entities => opts.fetch("clean_entities", true),
72
+ :comments => opts.fetch("comments", true),
73
+ :categories => opts.fetch("categories", true),
74
+ :tags => opts.fetch("tags", true),
75
+ :extension => opts.fetch("extension", "html"),
76
+ :status => opts.fetch("status", ["PUBLISHED"]).map(&:to_sym) # :DRAFT
77
+ }
78
+
79
+ if options[:clean_entities]
80
+ begin
81
+ require "htmlentities"
82
+ rescue LoadError
83
+ STDERR.puts "Could not require 'htmlentities', so the :clean_entities option is now disabled."
84
+ options[:clean_entities] = false
85
+ end
86
+ end
87
+
88
+ FileUtils.mkdir_p("_posts")
89
+ FileUtils.mkdir_p("_drafts") if options[:status].include? :DRAFT
90
+
91
+ db = Sequel.mysql2(options[:dbname],
92
+ :user => options[:user],
93
+ :password => options[:pass],
94
+ :socket => options[:socket],
95
+ :host => options[:host],
96
+ :port => options[:port],
97
+ :encoding => "utf8")
98
+
99
+ posts_query = "
100
+ SELECT
101
+ weblogentry.id AS `id`,
102
+ weblogentry.status AS `status`,
103
+ weblogentry.title AS `title`,
104
+ weblogentry.anchor AS `slug`,
105
+ weblogentry.updatetime AS `date`,
106
+ weblogentry.text AS `content`,
107
+ weblogentry.summary AS `excerpt`,
108
+ weblogentry.categoryid AS `categoryid`,
109
+ roller_user.fullname AS `author`,
110
+ roller_user.username AS `author_login`,
111
+ roller_user.emailaddress AS `author_email`,
112
+ weblog.handle AS `site`
113
+ FROM weblogentry AS `weblogentry`
114
+ LEFT JOIN roller_user AS `roller_user`
115
+ ON weblogentry.creator = roller_user.username
116
+ LEFT JOIN weblog AS `weblog`
117
+ ON weblogentry.websiteid = weblog.id"
118
+
119
+ if options[:status] && !options[:status].empty?
120
+ status = options[:status][0]
121
+ posts_query += "
122
+ WHERE weblogentry.status = '#{status}'"
123
+ options[:status][1..-1].each do |stat|
124
+ posts_query += " OR
125
+ weblogentry.status = '#{stat}'"
126
+ end
127
+ end
128
+
129
+ db[posts_query].each do |post|
130
+ process_post(post, db, options)
131
+ end
132
+ end
133
+
134
+ def self.process_post(post, db, options)
135
+ extension = options[:extension]
136
+
137
+ title = post[:title]
138
+ title = clean_entities(title) if options[:clean_entities]
139
+
140
+ slug = post[:slug]
141
+ slug = sluggify(title) if !slug || slug.empty?
142
+
143
+ date = post[:date] || Time.now
144
+ name = format("%02d-%02d-%02d-%s.%s", date.year, date.month, date.day, slug, extension)
145
+
146
+ content = post[:content].to_s
147
+ content = clean_entities(content) if options[:clean_entities]
148
+
149
+ excerpt = post[:excerpt].to_s
150
+
151
+ permalink = "#{post[:site]}/entry/#{post[:slug]}"
152
+
153
+ categories = []
154
+ tags = []
155
+
156
+ if options[:categories]
157
+ cquery =
158
+ "SELECT
159
+ weblogcategory.name AS `name`
160
+ FROM
161
+ weblogcategory AS `weblogcategory`
162
+ WHERE
163
+ weblogcategory.id = '#{post[:categoryid]}'"
164
+
165
+ db[cquery].each do |term|
166
+ categories << (options[:clean_entities] ? clean_entities(term[:name]) : term[:name])
167
+ end
168
+ end
169
+
170
+ if options[:tags]
171
+ cquery =
172
+ "SELECT
173
+ roller_weblogentrytag.name AS `name`
174
+ FROM
175
+ roller_weblogentrytag AS `roller_weblogentrytag`
176
+ WHERE
177
+ roller_weblogentrytag.entryid = '#{post[:id]}'"
178
+
179
+ db[cquery].each do |term|
180
+ tags << (options[:clean_entities] ? clean_entities(term[:name]) : term[:name])
181
+ end
182
+ end
183
+
184
+ comments = []
185
+
186
+ if options[:comments]
187
+ cquery =
188
+ "SELECT
189
+ id AS `id`,
190
+ name AS `author`,
191
+ email AS `author_email`,
192
+ posttime AS `date`,
193
+ content AS `content`
194
+ FROM roller_comment
195
+ WHERE
196
+ entryid = '#{post[:id]}' AND
197
+ status = 'APPROVED'"
198
+
199
+ db[cquery].each do |comment|
200
+ comcontent = comment[:content].to_s
201
+ comauthor = comment[:author].to_s
202
+ comcontent.force_encoding("UTF-8") if comcontent.respond_to?(:force_encoding)
203
+
204
+ if options[:clean_entities]
205
+ comcontent = clean_entities(comcontent)
206
+ comauthor = clean_entities(comauthor)
207
+ end
208
+
209
+ comments << {
210
+ "id" => comment[:id].to_i,
211
+ "author" => comauthor,
212
+ "author_email" => comment[:author_email].to_s,
213
+ "date" => comment[:date].to_s,
214
+ "content" => comcontent,
215
+ }
216
+ end
217
+
218
+ comments.sort! { |a, b| a["id"] <=> b["id"] }
219
+ end
220
+
221
+ # Get the relevant fields as a hash, delete empty fields and
222
+ # convert to YAML for the header.
223
+ data = {
224
+ "layout" => post[:type].to_s,
225
+ "status" => post[:status].to_s,
226
+ "published" => post[:status].to_s == "DRAFT" ? nil : (post[:status].to_s == "PUBLISHED"),
227
+ "title" => title.to_s,
228
+ "author" => {
229
+ "display_name" => post[:author].to_s,
230
+ "login" => post[:author_login].to_s,
231
+ "email" => post[:author_email].to_s,
232
+ },
233
+ "author_login" => post[:author_login].to_s,
234
+ "author_email" => post[:author_email].to_s,
235
+ "excerpt" => excerpt,
236
+ "id" => post[:id],
237
+ "date" => date.to_s,
238
+ "categories" => options[:categories] ? categories : nil,
239
+ "tags" => options[:tags] ? tags : nil,
240
+ "comments" => options[:comments] ? comments : nil,
241
+ "permalink" => permalink,
242
+ }.delete_if { |_k, v| v.nil? || v == "" }.to_yaml
243
+
244
+ filename = post[:status] == "DRAFT" ? "_drafts/#{slug}.md" : "_posts/#{name}"
245
+
246
+ # Write out the data and content to file
247
+ File.open(filename, "w") do |f|
248
+ f.puts data
249
+ f.puts "---"
250
+ f.puts Util.wpautop(content)
251
+ end
252
+ end
253
+
254
+ def self.clean_entities(text)
255
+ text.force_encoding("UTF-8") if text.respond_to?(:force_encoding)
256
+ text = HTMLEntities.new.encode(text, :named)
257
+ # We don't want to convert these, it would break all
258
+ # HTML tags in the post and comments.
259
+ text.gsub!("&amp;", "&")
260
+ text.gsub!("&lt;", "<")
261
+ text.gsub!("&gt;", ">")
262
+ text.gsub!("&quot;", '"')
263
+ text.gsub!("&apos;", "'")
264
+ text.gsub!("&#47;", "/")
265
+ text
266
+ end
267
+
268
+ def self.sluggify(title)
269
+ title.to_ascii.downcase.gsub(%r![^0-9a-z]+!, " ").strip.tr(" ", "-")
270
+ end
271
+
272
+ def self.page_path(_page_id)
273
+ ""
274
+ end
275
+ end
276
+ end
277
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class RSS < Importer
@@ -7,9 +9,7 @@ module JekyllImport
7
9
  end
8
10
 
9
11
  def self.validate(options)
10
- if options["source"].nil?
11
- abort "Missing mandatory option --source."
12
- end
12
+ abort "Missing mandatory option --source." if options["source"].nil?
13
13
  end
14
14
 
15
15
  def self.require_deps
@@ -30,9 +30,11 @@ module JekyllImport
30
30
  # Returns nothing.
31
31
  def self.process(options)
32
32
  source = options.fetch("source")
33
+ frontmatter = options.fetch("frontmatter", [])
34
+ body = options.fetch("body", ["description"])
33
35
 
34
36
  content = ""
35
- open(source) { |s| content = s.read }
37
+ URI.parse(source).open { |s| content = s.read }
36
38
  rss = ::RSS::Parser.parse(content, false)
37
39
 
38
40
  raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
@@ -49,14 +51,24 @@ module JekyllImport
49
51
  "title" => item.title,
50
52
  }
51
53
 
52
- header["tag"] = options["tag"] if !options.to_s.empty?
54
+ header["tag"] = options["tag"] unless options.to_s.empty?
55
+
56
+ frontmatter.each do |value|
57
+ header[value] = item.send(value)
58
+ end
59
+
60
+ output = ""
61
+
62
+ body.each do |row|
63
+ output += item.send(row)
64
+ end
53
65
 
54
66
  FileUtils.mkdir_p("_posts")
55
67
 
56
68
  File.open("_posts/#{name}.html", "w") do |f|
57
69
  f.puts header.to_yaml
58
70
  f.puts "---\n\n"
59
- f.puts item.description
71
+ f.puts output
60
72
  end
61
73
  end
62
74
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class S9Y < Importer
@@ -26,7 +28,7 @@ module JekyllImport
26
28
  FileUtils.mkdir_p("_posts")
27
29
 
28
30
  text = ""
29
- open(source) { |line| text = line.read }
31
+ URI.parse(source).open { |line| text = line.read }
30
32
  rss = ::RSS::Parser.parse(text)
31
33
 
32
34
  rss.items.each do |item|
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class S9YDatabase < Importer
@@ -5,29 +7,29 @@ module JekyllImport
5
7
  JekyllImport.require_with_fallback(
6
8
  %w(
7
9
  rubygems
8
- sequel
9
- fileutils
10
- safe_yaml
11
- unidecode
10
+ sequel
11
+ fileutils
12
+ safe_yaml
13
+ unidecode
12
14
  )
13
15
  )
14
16
  end
15
17
 
16
18
  def self.specify_options(c)
17
- c.option "dbname", "--dbname DB", "Database name (default: '')"
18
- c.option "socket", "--socket SOCKET", "Database socket (default: '')"
19
- c.option "user", "--user USER", "Database user name (default: '')"
20
- c.option "password", "--password PW", "Database user's password (default: '')"
21
- c.option "host", "--host HOST", "Database host name (default: 'localhost')"
22
- c.option "port", "--port PORT", "Custom database port connect to (default: 3306)"
23
- c.option "table_prefix", "--table_prefix PREFIX", "Table prefix name (default: 'serendipity_')"
24
- c.option "clean_entities", "--clean_entities", "Whether to clean entities (default: true)"
25
- c.option "comments", "--comments", "Whether to import comments (default: true)"
26
- c.option "categories", "--categories", "Whether to import categories (default: true)"
27
- c.option "tags", "--tags", "Whether to import tags (default: true)"
28
- c.option "drafts", "--drafts", "Whether to export drafts as well"
29
- c.option "markdown", "--markdown", "convert into markdown format (default: false)"
30
- c.option "permalinks", "--permalinks", "preserve S9Y permalinks (default: false)"
19
+ c.option "dbname", "--dbname DB", "Database name (default: '')"
20
+ c.option "socket", "--socket SOCKET", "Database socket (default: '')"
21
+ c.option "user", "--user USER", "Database user name (default: '')"
22
+ c.option "password", "--password PW", "Database user's password (default: '')"
23
+ c.option "host", "--host HOST", "Database host name (default: 'localhost')"
24
+ c.option "port", "--port PORT", "Custom database port connect to (default: 3306)"
25
+ c.option "table_prefix", "--table_prefix PREFIX", "Table prefix name (default: 'serendipity_')"
26
+ c.option "clean_entities", "--clean_entities", "Whether to clean entities (default: true)"
27
+ c.option "comments", "--comments", "Whether to import comments (default: true)"
28
+ c.option "categories", "--categories", "Whether to import categories (default: true)"
29
+ c.option "tags", "--tags", "Whether to import tags (default: true)"
30
+ c.option "drafts", "--drafts", "Whether to export drafts as well"
31
+ c.option "markdown", "--markdown", "convert into markdown format (default: false)"
32
+ c.option "permalinks", "--permalinks", "preserve S9Y permalinks (default: false)"
31
33
  end
32
34
 
33
35
  # Main migrator function. Call this to perform the migration.
@@ -68,7 +70,7 @@ module JekyllImport
68
70
  :user => opts.fetch("user", ""),
69
71
  :pass => opts.fetch("password", ""),
70
72
  :host => opts.fetch("host", "localhost"),
71
- :port => opts.fetch("port", 3306),
73
+ :port => opts.fetch("port", 3306),
72
74
  :socket => opts.fetch("socket", nil),
73
75
  :dbname => opts.fetch("dbname", ""),
74
76
  :table_prefix => opts.fetch("table_prefix", "serendipity_"),
@@ -93,14 +95,13 @@ module JekyllImport
93
95
  FileUtils.mkdir_p("_posts")
94
96
  FileUtils.mkdir_p("_drafts") if options[:drafts]
95
97
 
96
- db = Sequel.mysql2(options[:dbname],
97
- :user => options[:user],
98
- :password => options[:pass],
99
- :socket => options[:socket],
100
- :host => options[:host],
101
- :port => options[:port],
102
- :encoding => "utf8"
103
- )
98
+ db = Sequel.mysql2(options[:dbname],
99
+ :user => options[:user],
100
+ :password => options[:pass],
101
+ :socket => options[:socket],
102
+ :host => options[:host],
103
+ :port => options[:port],
104
+ :encoding => "utf8")
104
105
 
105
106
  px = options[:table_prefix]
106
107
 
@@ -136,9 +137,7 @@ module JekyllImport
136
137
  LEFT JOIN #{px}authors AS `authors`
137
138
  ON entries.authorid = authors.authorid"
138
139
 
139
- unless options[:drafts]
140
- posts_query << "WHERE posts.isdraft = 'false'"
141
- end
140
+ posts_query << "WHERE posts.isdraft = 'false'" unless options[:drafts]
142
141
 
143
142
  db[posts_query].each do |post|
144
143
  process_post(post, db, options, page_name_list)
@@ -149,31 +148,21 @@ module JekyllImport
149
148
  extension = options[:extension]
150
149
 
151
150
  title = post[:title]
152
- if options[:clean_entities]
153
- title = clean_entities(title)
154
- end
151
+ title = clean_entities(title) if options[:clean_entities]
155
152
 
156
153
  slug = post[:slug]
157
- if !slug || slug.empty?
158
- slug = sluggify(title)
159
- end
154
+ slug = sluggify(title) if !slug || slug.empty?
160
155
 
161
156
  status = post[:isdraft] == "true" ? "draft" : "published"
162
157
  date = Time.at(post[:timestamp]).utc || Time.now.utc
163
158
  name = format("%02d-%02d-%02d-%s.%s", date.year, date.month, date.day, slug, extension)
164
159
 
165
160
  content = post[:body].to_s
166
- unless post[:body_extended].to_s.empty?
167
- content += "\n\n" + post[:body_extended].to_s
168
- end
161
+ content += "\n\n" + post[:body_extended].to_s unless post[:body_extended].to_s.empty?
169
162
 
170
- if options[:clean_entities]
171
- content = clean_entities(content)
172
- end
163
+ content = clean_entities(content) if options[:clean_entities]
173
164
 
174
- if options[:markdown]
175
- content = ReverseMarkdown.convert(content)
176
- end
165
+ content = ReverseMarkdown.convert(content) if options[:markdown]
177
166
 
178
167
  categories = process_categories(db, options, post)
179
168
  comments = process_comments(db, options, post)
@@ -220,10 +209,10 @@ module JekyllImport
220
209
 
221
210
  def self.require_if_available(gem_name, option_name)
222
211
  require gem_name
223
- return true
212
+ true
224
213
  rescue LoadError
225
214
  STDERR.puts "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
226
- return true
215
+ true
227
216
  end
228
217
 
229
218
  def self.process_categories(db, options, post)
@@ -274,9 +263,7 @@ module JekyllImport
274
263
  comcontent = comment[:content].to_s
275
264
  comauthor = comment[:author].to_s
276
265
 
277
- if comcontent.respond_to?(:force_encoding)
278
- comcontent.force_encoding("UTF-8")
279
- end
266
+ comcontent.force_encoding("UTF-8") if comcontent.respond_to?(:force_encoding)
280
267
 
281
268
  if options[:clean_entities]
282
269
  comcontent = clean_entities(comcontent)
@@ -338,9 +325,7 @@ module JekyllImport
338
325
  end
339
326
 
340
327
  def self.clean_entities(text)
341
- if text.respond_to?(:force_encoding)
342
- text.force_encoding("UTF-8")
343
- end
328
+ text.force_encoding("UTF-8") if text.respond_to?(:force_encoding)
344
329
  text = HTMLEntities.new.encode(text, :named)
345
330
  # We don't want to convert these, it would break all
346
331
  # HTML tags in the post and comments.