jekyll-import 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 411790c3c98c3ba0eee2dffdc56eeb1b9ccd30b2
4
- data.tar.gz: 57d36c8a9489910f81bdacd6cb36a52f9c46e17c
3
+ metadata.gz: 27f5a0ab9d87425b92a6e1a4fbc01c5c87c64626
4
+ data.tar.gz: 4c3382d2e508f6fd6571fa45f1a01f1c804cf69d
5
5
  SHA512:
6
- metadata.gz: e6b0b500144bc36702db64ee1344e91ce8b3f37fa929ae1f241957c076010a5b3a89e2e9cd5fe2639b3610f2b540fc0166c86ff1b70dccad68431dc287dec192
7
- data.tar.gz: cbc3b694be174e53b55d5851672c3413d93d1f6eb2c7cbf9dd073192a80c8c2fa697ac4adbf61b7534ce49f7948c951a690d4f1a6a791e402625110178d3a501
6
+ metadata.gz: 5ec278c744142928a3db648710bae5313ad6a7c78dfc948a382c50ed30122e4d0c03324a31ddfc41dd9ad99512ea6879fc7ed9a093779c473ea36703a0743aef
7
+ data.tar.gz: b63caa7a81fc42055d663b94b0f9e9e798bdb2459e9782268bbd938288f8b15b633afba6b1719a58c0f349e21e27bec46ea9413a03cb3e8c00af5cdc4048a44f
@@ -5,6 +5,7 @@ module JekyllImport
5
5
  c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
6
6
  c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
7
7
  c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
8
+ c.option 'comments', '--comments', 'import comments to _comments collection'
8
9
  end
9
10
 
10
11
  def self.validate(options)
@@ -41,6 +42,7 @@ module JekyllImport
41
42
  listener = BloggerAtomStreamListener.new
42
43
 
43
44
  listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
45
+ listener.comments = options.fetch('comments', false),
44
46
 
45
47
  File.open(source, 'r') do |f|
46
48
  f.flock(File::LOCK_SH)
@@ -95,11 +97,12 @@ module JekyllImport
95
97
  extend BloggerAtomStreamListenerMethods
96
98
 
97
99
  @leave_blogger_info = true
100
+ @comments = false
98
101
  end
99
102
  end
100
103
 
101
104
  module BloggerAtomStreamListenerMethods
102
- attr_accessor :leave_blogger_info
105
+ attr_accessor :leave_blogger_info, :comments
103
106
  attr_reader :original_url_base
104
107
 
105
108
  def tag_start(tag, attrs)
@@ -143,6 +146,10 @@ module JekyllImport
143
146
  if @in_entry_elem
144
147
  @in_entry_elem[:meta][:thumbnail] = attrs['url']
145
148
  end
149
+ when 'thr:in-reply-to'
150
+ if @in_entry_elem
151
+ @in_entry_elem[:meta][:post_id] = attrs['ref']
152
+ end
146
153
  end
147
154
  end
148
155
 
@@ -185,6 +192,23 @@ module JekyllImport
185
192
 
186
193
  FileUtils.mkdir_p(target_dir)
187
194
 
195
+ file_name = URI::decode("#{post_data[:filename]}.html")
196
+ File.open(File.join(target_dir, file_name), 'w') do |f|
197
+ f.flock(File::LOCK_EX)
198
+
199
+ f << post_data[:header].to_yaml
200
+ f << "---\n\n"
201
+ f << post_data[:body]
202
+ end
203
+ end
204
+ elsif @in_entry_elem[:meta][:kind] == 'comment' and @comments
205
+ post_data = get_post_data_from_in_entry_elem_info
206
+
207
+ if post_data
208
+ target_dir = '_comments'
209
+
210
+ FileUtils.mkdir_p(target_dir)
211
+
188
212
  file_name = URI::decode("#{post_data[:filename]}.html")
189
213
  File.open(File.join(target_dir, file_name), 'w') do |f|
190
214
  f.flock(File::LOCK_EX)
@@ -251,6 +275,48 @@ module JekyllImport
251
275
  body.gsub!(/{%/, '{{ "{%" }}')
252
276
  end
253
277
 
278
+ { :filename => filename, :header => header, :body => body }
279
+ elsif @in_entry_elem[:meta][:kind] == 'comment'
280
+ timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
281
+ if @in_entry_elem[:meta][:original_url]
282
+ if not @comment_seq
283
+ @comment_seq = 1
284
+ end
285
+
286
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
287
+ original_path = original_uri.path.to_s
288
+ filename = "%s-%s-%s" %
289
+ [timestamp,
290
+ File.basename(original_path, File.extname(original_path)),
291
+ @comment_seq]
292
+
293
+ @comment_seq = @comment_seq + 1
294
+
295
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
296
+ else
297
+ raise 'Original URL is missing'
298
+ end
299
+
300
+ header = {
301
+ 'date' => @in_entry_elem[:meta][:published],
302
+ 'author' => @in_entry_elem[:meta][:author],
303
+ 'blogger_post_id' => @in_entry_elem[:meta][:post_id],
304
+ }
305
+ header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
306
+ header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
307
+ header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
308
+ header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
309
+
310
+ body = @in_entry_elem[:body]
311
+
312
+ # body escaping associated with liquid
313
+ if body =~ /{{/
314
+ body.gsub!(/{{/, '{{ "{{" }}')
315
+ end
316
+ if body =~ /{%/
317
+ body.gsub!(/{%/, '{{ "{%" }}')
318
+ end
319
+
254
320
  { :filename => filename, :header => header, :body => body }
255
321
  else
256
322
  nil
@@ -1,139 +1,53 @@
1
+ require 'jekyll-import/importers/drupal_common'
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Drupal6 < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each story
5
- # and blog node.
6
- QUERY = "SELECT n.nid, \
7
- n.title, \
8
- nr.body, \
9
- n.created, \
10
- n.status, \
11
- GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
12
- FROM node_revisions AS nr, \
13
- node AS n \
14
- LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
15
- LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
16
- WHERE (%types%) \
17
- AND n.vid = nr.vid \
18
- GROUP BY n.nid"
19
-
20
- def self.validate(options)
21
- %w[dbname user].each do |option|
22
- if options[option].nil?
23
- abort "Missing mandatory option --#{option}."
24
- end
25
- end
26
- end
6
+ include DrupalCommon
7
+ extend DrupalCommon::ClassMethods
27
8
 
28
- def self.specify_options(c)
29
- c.option 'dbname', '--dbname DB', 'Database name'
30
- c.option 'user', '--user USER', 'Database user name'
31
- c.option 'password', '--password PW', "Database user's password (default: '')"
32
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
33
- c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
34
- c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
9
+ def self.build_query(prefix, types)
10
+ types = types.join("' OR n.type = '")
11
+ types = "n.type = '#{types}'"
12
+
13
+ query = <<EOS
14
+ SELECT n.nid,
15
+ n.title,
16
+ nr.body,
17
+ nr.teaser,
18
+ n.created,
19
+ n.status,
20
+ n.type,
21
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
22
+ FROM #{prefix}node_revisions AS nr,
23
+ #{prefix}node AS n
24
+ LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
25
+ LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
26
+ WHERE (#{types})
27
+ AND n.vid = nr.vid
28
+ GROUP BY n.nid
29
+ EOS
30
+
31
+ return query
35
32
  end
36
33
 
37
- def self.require_deps
38
- JekyllImport.require_with_fallback(%w[
39
- rubygems
40
- sequel
41
- fileutils
42
- safe_yaml
43
- mysql
44
- ])
34
+ def self.aliases_query(prefix)
35
+ "SELECT src AS source, dst AS alias FROM #{prefix}url_alias WHERE src = ?"
45
36
  end
46
37
 
47
- def self.process(options)
48
- dbname = options.fetch('dbname')
49
- user = options.fetch('user')
50
- pass = options.fetch('password', "")
51
- host = options.fetch('host', "localhost")
52
- prefix = options.fetch('prefix', "")
53
- types = options.fetch('types', ['blog', 'story', 'article'])
54
-
55
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
-
57
- if prefix != ''
58
- QUERY[" node "] = " " + prefix + "node "
59
- QUERY[" node_revisions "] = " " + prefix + "node_revisions "
60
- QUERY[" term_node "] = " " + prefix + "term_node "
61
- QUERY[" term_data "] = " " + prefix + "term_data "
62
- end
63
-
64
- types = types.join("' OR n.type = '")
65
- QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
66
-
67
- FileUtils.mkdir_p "_posts"
68
- FileUtils.mkdir_p "_drafts"
69
- FileUtils.mkdir_p "_layouts"
70
-
71
- # Create the refresh layout
72
- # Change the refresh url if you customized your permalink config
73
- File.open("_layouts/refresh.html", "w") do |f|
74
- f.puts <<EOF
75
- <!DOCTYPE html>
76
- <html>
77
- <head>
78
- <meta http-equiv="content-type" content="text/html; charset=utf-8" />
79
- <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
80
- </head>
81
- </html>
82
- EOF
83
- end
84
-
85
- db[QUERY].each do |post|
86
- # Get required fields and construct Jekyll compatible name
87
- node_id = post[:nid]
88
- title = post[:title]
89
- content = post[:body]
90
- tags = (post[:tags] || '').downcase.strip
91
- created = post[:created]
92
- time = Time.at(created)
93
- is_published = post[:status] == 1
94
- dir = is_published ? "_posts" : "_drafts"
95
- slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
96
- name = time.strftime("%Y-%m-%d-") + slug + '.md'
97
-
98
- # Get the relevant fields as a hash, delete empty fields and convert
99
- # to YAML for the header
100
- data = {
101
- 'layout' => 'post',
102
- 'title' => title.to_s,
103
- 'created' => created,
104
- 'categories' => tags.split('|')
105
- }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
106
- |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
107
- }.to_yaml
38
+ def self.post_data(sql_post_data)
39
+ content = sql_post_data[:body].to_s
40
+ summary = sql_post_data[:teaser].to_s
41
+ tags = (sql_post_data[:tags] || '').downcase.strip
108
42
 
109
- # Write out the data and content to file
110
- File.open("#{dir}/#{name}", "w") do |f|
111
- f.puts data
112
- f.puts "---"
113
- f.puts content
114
- end
43
+ data = {
44
+ 'excerpt' => summary,
45
+ 'categories' => tags.split('|')
46
+ }
115
47
 
116
- # Make a file to redirect from the old Drupal URL
117
- if is_published
118
- aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
119
-
120
- aliases.push(:dst => "node/#{node_id}")
121
-
122
- aliases.each do |url_alias|
123
- FileUtils.mkdir_p url_alias[:dst]
124
- File.open("#{url_alias[:dst]}/index.md", "w") do |f|
125
- f.puts "---"
126
- f.puts "layout: refresh"
127
- f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
128
- f.puts "---"
129
- end
130
- end
131
- end
132
- end
133
-
134
- # TODO: Make dirs & files for nodes of type 'page'
135
- # Make refresh pages for these as well
48
+ return data, content
136
49
  end
50
+
137
51
  end
138
52
  end
139
53
  end
@@ -1,111 +1,54 @@
1
+ require 'jekyll-import/importers/drupal_common'
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Drupal7 < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each story
5
- # and blog node.
6
- QUERY = "SELECT n.title, \
7
- fdb.body_value, \
8
- fdb.body_summary, \
9
- n.created, \
10
- n.status, \
11
- n.nid, \
12
- u.name \
13
- FROM node AS n, \
14
- field_data_body AS fdb, \
15
- users AS u \
16
- WHERE (%types%) \
17
- AND n.nid = fdb.entity_id \
18
- AND n.vid = fdb.revision_id
19
- AND n.uid = u.uid"
20
-
21
- def self.validate(options)
22
- %w[dbname user].each do |option|
23
- if options[option].nil?
24
- abort "Missing mandatory option --#{option}."
25
- end
26
- end
27
- end
6
+ include DrupalCommon
7
+ extend DrupalCommon::ClassMethods
28
8
 
29
- def self.specify_options(c)
30
- c.option 'dbname', '--dbname DB', 'Database name'
31
- c.option 'user', '--user USER', 'Database user name'
32
- c.option 'password', '--password PW', 'Database user\'s password (default: "")'
33
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
34
- c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
35
- c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
9
+ def self.build_query(prefix, types)
10
+ types = types.join("' OR n.type = '")
11
+ types = "n.type = '#{types}'"
12
+
13
+ query = <<EOS
14
+ SELECT n.nid,
15
+ n.title,
16
+ fdb.body_value,
17
+ fdb.body_summary,
18
+ n.created,
19
+ n.status,
20
+ n.type,
21
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
22
+ FROM #{prefix}field_data_body AS fdb,
23
+ #{prefix}node AS n
24
+ LEFT OUTER JOIN #{prefix}taxonomy_index AS ti ON ti.nid = n.nid
25
+ LEFT OUTER JOIN #{prefix}taxonomy_term_data AS td ON ti.tid = td.tid
26
+ WHERE (#{types})
27
+ AND n.nid = fdb.entity_id
28
+ AND n.vid = fdb.revision_id
29
+ GROUP BY n.nid"
30
+ EOS
31
+
32
+ return query
36
33
  end
37
34
 
38
- def self.require_deps
39
- JekyllImport.require_with_fallback(%w[
40
- rubygems
41
- sequel
42
- fileutils
43
- safe_yaml
44
- ])
35
+ def self.aliases_query(prefix)
36
+ "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
45
37
  end
46
38
 
47
- def self.process(options)
48
- dbname = options.fetch('dbname')
49
- user = options.fetch('user')
50
- pass = options.fetch('password', "")
51
- host = options.fetch('host', "localhost")
52
- prefix = options.fetch('prefix', "")
53
- types = options.fetch('types', ['blog', 'story', 'article'])
54
-
55
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
-
57
- unless prefix.empty?
58
- QUERY[" node "] = " " + prefix + "node "
59
- QUERY[" field_data_body "] = " " + prefix + "field_data_body "
60
- QUERY[" users "] = " " + prefix + "users "
61
- end
62
-
63
- types = types.join("' OR n.type = '")
64
- QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
65
-
66
- FileUtils.mkdir_p "_posts"
67
- FileUtils.mkdir_p "_drafts"
68
- FileUtils.mkdir_p "_layouts"
69
-
70
- db[QUERY].each do |post|
71
- # Get required fields and construct Jekyll compatible name
72
- title = post[:title]
73
- content = post[:body_value]
74
- summary = post[:body_summary]
75
- created = post[:created]
76
- author = post[:name]
77
- nid = post[:nid]
78
- time = Time.at(created)
79
- is_published = post[:status] == 1
80
- dir = is_published ? "_posts" : "_drafts"
81
- slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
82
- name = time.strftime("%Y-%m-%d-") + slug + '.md'
83
-
84
- # Get the relevant fields as a hash, delete empty fields and convert
85
- # to YAML for the header
86
- data = {
87
- 'layout' => 'post',
88
- 'title' => title.strip.force_encoding("UTF-8"),
89
- 'author' => author,
90
- 'nid' => nid,
91
- 'created' => created,
92
- 'excerpt' => summary
93
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
39
+ def self.post_data(sql_post_data)
40
+ content = sql_post_data[:body_value].to_s
41
+ summary = sql_post_data[:body_summary].to_s
42
+ tags = (sql_post_data[:tags] || '').downcase.strip
94
43
 
95
- # Write out the data and content to file
96
- File.open("#{dir}/#{name}", "w") do |f|
97
- f.puts data
98
- f.puts "---"
99
- f.puts content
100
- end
44
+ data = {
45
+ 'excerpt' => summary,
46
+ 'categories' => tags.split('|')
47
+ }
101
48
 
102
- end
103
-
104
- # TODO: Make dirs & files for nodes of type 'page'
105
- # Make refresh pages for these as well
106
-
107
- # TODO: Make refresh dirs & files according to entries in url_alias table
49
+ return data, content
108
50
  end
51
+
109
52
  end
110
53
  end
111
54
  end
@@ -0,0 +1,157 @@
1
+ require 'date'
2
+
3
+ module JekyllImport
4
+ module Importers
5
+ module DrupalCommon
6
+ # This module provides a base for the Drupal importers (at least for 6
7
+ # and 7; since 8 will be a different beast). Version-specific importers
8
+ # will need to implement the missing methods from the Importer class.
9
+ #
10
+ # The general idea is that this importer reads a MySQL database via Sequel
11
+ # and creates a post file for each node it finds in the Drupal database.
12
+
13
+ module ClassMethods
14
+ DEFAULTS = {
15
+ "password" => "",
16
+ "host" => "localhost",
17
+ "prefix" => "",
18
+ "types" => %w(blog story article)
19
+ }
20
+
21
+ def specify_options(c)
22
+ c.option 'dbname', '--dbname DB', 'Database name'
23
+ c.option 'user', '--user USER', 'Database user name'
24
+ c.option 'password', '--password PW', "Database user's password (default: #{DEFAULTS["password"].inspect})"
25
+ c.option 'host', '--host HOST', "Database host name (default: #{DEFAULTS["host"].inspect})"
26
+ c.option 'prefix', '--prefix PREFIX', "Table prefix name (default: #{DEFAULTS["prefix"].inspect})"
27
+ c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array,
28
+ "The Drupal content types to be imported (default: #{DEFAULTS["types"].join(",")})"
29
+ end
30
+
31
+ def require_deps
32
+ JekyllImport.require_with_fallback(%w[
33
+ rubygems
34
+ sequel
35
+ fileutils
36
+ safe_yaml
37
+ ])
38
+ end
39
+
40
+ def process(options)
41
+ dbname = options.fetch('dbname')
42
+ user = options.fetch('user')
43
+ pass = options.fetch('password', DEFAULTS["password"])
44
+ host = options.fetch('host', DEFAULTS["host"])
45
+ prefix = options.fetch('prefix', DEFAULTS["prefix"])
46
+ types = options.fetch('types', DEFAULTS["types"])
47
+
48
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
49
+
50
+ query = self.build_query(prefix, types)
51
+
52
+ conf = Jekyll.configuration({})
53
+ src_dir = conf['source']
54
+
55
+ dirs = {
56
+ :_posts => File.join(src_dir, '_posts').to_s,
57
+ :_drafts => File.join(src_dir, '_drafts').to_s,
58
+ :_layouts => Jekyll.sanitized_path(src_dir, conf['layouts_dir'].to_s)
59
+ }
60
+
61
+ dirs.each do |key, dir|
62
+ FileUtils.mkdir_p dir
63
+ end
64
+
65
+ # Create the refresh layout
66
+ # Change the refresh url if you customized your permalink config
67
+ File.open(File.join(dirs[:_layouts], 'refresh.html'), 'w') do |f|
68
+ f.puts <<-HTML
69
+ <!DOCTYPE html>
70
+ <html>
71
+ <head>
72
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
73
+ <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
74
+ </head>
75
+ </html>
76
+ HTML
77
+ end
78
+
79
+ db[query].each do |post|
80
+ # Get required fields
81
+ data, content = self.post_data(post)
82
+
83
+ data['layout'] = post[:type]
84
+ title = data['title'] = post[:title].strip.force_encoding('UTF-8')
85
+ time = data['created'] = post[:created]
86
+
87
+ # Get the relevant fields as a hash and delete empty fields
88
+ data = data.delete_if { |k,v| v.nil? || v == ''}.each_pair {
89
+ |k,v| ((v.is_a? String) ? v.force_encoding('UTF-8') : v)
90
+ }
91
+
92
+ # Construct a Jekyll compatible file name
93
+ is_published = post[:status] == 1
94
+ node_id = post[:nid]
95
+ dir = is_published ? dirs[:_posts] : dirs[:_drafts]
96
+ slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
97
+ filename = Time.at(time).to_datetime.strftime('%Y-%m-%d-') + slug + '.md'
98
+
99
+ # Write out the data and content to file
100
+ File.open("#{dir}/#{filename}", 'w') do |f|
101
+ f.puts data.to_yaml
102
+ f.puts '---'
103
+ f.puts content
104
+ end
105
+
106
+
107
+ # Make a file to redirect from the old Drupal URL
108
+ if is_published
109
+ alias_query = self.aliases_query(prefix)
110
+ type = post[:type]
111
+
112
+ aliases = db[alias_query, "#{type}/#{node_id}"].all
113
+
114
+ aliases.push(:alias => "#{type}/#{node_id}")
115
+
116
+ aliases.each do |url_alias|
117
+ FileUtils.mkdir_p url_alias[:alias]
118
+ File.open("#{url_alias[:alias]}/index.md", "w") do |f|
119
+ f.puts '---'
120
+ f.puts 'layout: refresh'
121
+ f.puts "refresh_to_post_id: /#{Time.at(time).to_datetime.strftime('%Y/%m/%d/') + slug}"
122
+ f.puts '---'
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+
130
+ def build_query(prefix, types)
131
+ raise 'The importer you are trying to use does not implement the get_query() method.'
132
+ end
133
+
134
+ def aliases_query(prefix)
135
+ # Make sure you implement the query returning "alias" as the column name
136
+ # for the URL aliases. See the Drupal 6 importer for an example. The
137
+ # alias field is called 'dst' but we alias it to 'alias', to follow
138
+ # Drupal 7's column names.
139
+ raise 'The importer you are trying to use does not implement the get_aliases_query() method.'
140
+ end
141
+
142
+ def post_data(sql_post_data)
143
+ raise 'The importer you are trying to use does not implement the get_query() method.'
144
+ end
145
+
146
+ def validate(options)
147
+ %w[dbname user].each do |option|
148
+ if options[option].nil?
149
+ abort "Missing mandatory option --#{option}."
150
+ end
151
+ end
152
+ end
153
+
154
+
155
+ end
156
+ end
157
+ end
@@ -22,8 +22,10 @@ module JekyllImport
22
22
  JekyllImport.require_with_fallback(%w[
23
23
  rubygems
24
24
  sequel
25
+ mysql2
25
26
  fileutils
26
27
  safe_yaml
28
+ mysql
27
29
  ])
28
30
  end
29
31
 
@@ -35,7 +37,7 @@ module JekyllImport
35
37
  section = options.fetch('section', '1')
36
38
  table_prefix = options.fetch('prefix', "jos_")
37
39
 
38
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
40
+ db = Sequel.mysql2(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
41
 
40
42
  FileUtils.mkdir_p("_posts")
41
43
 
@@ -0,0 +1,363 @@
1
+ module JekyllImport
2
+ module Importers
3
+ class S9YDatabase < Importer
4
+
5
+ def self.require_deps
6
+ JekyllImport.require_with_fallback(
7
+ %w[
8
+ rubygems
9
+ sequel
10
+ fileutils
11
+ safe_yaml
12
+ unidecode
13
+ ])
14
+ end
15
+
16
+ def self.specify_options(c)
17
+ c.option 'dbname', '--dbname DB', 'Database name (default: "")'
18
+ c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
19
+ c.option 'user', '--user USER', 'Database user name (default: "")'
20
+ c.option 'password', '--password PW', "Database user's password (default: "")"
21
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
22
+ c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "serendipity_")'
23
+ c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
24
+ c.option 'comments', '--comments', 'Whether to import comments (default: true)'
25
+ c.option 'categories', '--categories', 'Whether to import categories (default: true)'
26
+ c.option 'tags', '--tags', 'Whether to import tags (default: true)'
27
+ c.option 'drafts', '--drafts', 'Whether to export drafts as well'
28
+ c.option 'markdown', '--markdown', 'convert into markdown format (default: false)'
29
+ c.option 'permalinks', '--permalinks', 'preserve S9Y permalinks (default: false)'
30
+ end
31
+
32
+ # Main migrator function. Call this to perform the migration.
33
+ #
34
+ # dbname:: The name of the database
35
+ # user:: The database user name
36
+ # pass:: The database user's password
37
+ # host:: The address of the MySQL database host. Default: 'localhost'
38
+ # socket:: The database socket's path
39
+ # options:: A hash table of configuration options.
40
+ #
41
+ # Supported options are:
42
+ #
43
+ # :table_prefix:: Prefix of database tables used by WordPress.
44
+ # Default: 'serendipity_'
45
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
46
+ # entities in the posts, comments, titles, and
47
+ # names. Requires the 'htmlentities' gem to
48
+ # work. Default: true.
49
+ # :comments:: If true, migrate post comments too. Comments
50
+ # are saved in the post's YAML front matter.
51
+ # Default: true.
52
+ # :categories:: If true, save the post's categories in its
53
+ # YAML front matter. Default: true.
54
+ # :tags:: If true, save the post's tags in its
55
+ # YAML front matter. Default: true.
56
+ # :extension:: Set the post extension. Default: "html"
57
+ # :drafts:: If true, export drafts as well
58
+ # Default: true.
59
+ # :markdown:: If true, convert the content to markdown
60
+ # Default: false
61
+ # :permalinks:: If true, save the post's original permalink in its
62
+ # YAML front matter. Default: false.
63
+ #
64
+ def self.process(opts)
65
+ options = {
66
+ :user => opts.fetch('user', ''),
67
+ :pass => opts.fetch('password', ''),
68
+ :host => opts.fetch('host', 'localhost'),
69
+ :socket => opts.fetch('socket', nil),
70
+ :dbname => opts.fetch('dbname', ''),
71
+ :table_prefix => opts.fetch('table_prefix', 'serendipity_'),
72
+ :clean_entities => opts.fetch('clean_entities', true),
73
+ :comments => opts.fetch('comments', true),
74
+ :categories => opts.fetch('categories', true),
75
+ :tags => opts.fetch('tags', true),
76
+ :extension => opts.fetch('extension', 'html'),
77
+ :drafts => opts.fetch('drafts', true),
78
+ :markdown => opts.fetch('markdown', false),
79
+ :permalinks => opts.fetch('permalinks', false),
80
+ }
81
+
82
+ if options[:clean_entities]
83
+ options[:clean_entities] = require_if_available('htmlentities', 'clean_entities')
84
+ end
85
+
86
+ if options[:markdown]
87
+ options[:markdown] = require_if_available('reverse_markdown', 'markdown')
88
+ end
89
+
90
+ FileUtils.mkdir_p("_posts")
91
+ FileUtils.mkdir_p("_drafts") if options[:drafts]
92
+
93
+ db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
94
+ :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
95
+
96
+ px = options[:table_prefix]
97
+
98
+ page_name_list = {}
99
+
100
+ page_name_query = %(
101
+ SELECT
102
+ entries.ID AS `id`,
103
+ entries.title AS `title`
104
+ FROM #{px}entries AS `entries`
105
+ )
106
+
107
+ db[page_name_query].each do |page|
108
+ page[:slug] = sluggify(page[:title])
109
+
110
+ page_name_list[ page[:id] ] = {
111
+ :slug => page[:slug]
112
+ }
113
+ end
114
+
115
+ posts_query = "
116
+ SELECT
117
+ entries.ID AS `id`,
118
+ entries.isdraft AS `isdraft`,
119
+ entries.title AS `title`,
120
+ entries.timestamp AS `timestamp`,
121
+ entries.body AS `body`,
122
+ authors.realname AS `author`,
123
+ authors.username AS `author_login`,
124
+ authors.email AS `author_email`
125
+ FROM #{px}entries AS `entries`
126
+ LEFT JOIN #{px}authors AS `authors`
127
+ ON entries.authorid = authors.authorid"
128
+
129
+ unless options[:drafts]
130
+ posts_query << "WHERE posts.isdraft = 'false'"
131
+ end
132
+
133
+ db[posts_query].each do |post|
134
+ process_post(post, db, options, page_name_list)
135
+ end
136
+ end
137
+
138
+ def self.process_post(post, db, options, page_name_list)
139
+ extension = options[:extension]
140
+
141
+ title = post[:title]
142
+ if options[:clean_entities]
143
+ title = clean_entities(title)
144
+ end
145
+
146
+ slug = post[:slug]
147
+ if !slug || slug.empty?
148
+ slug = sluggify(title)
149
+ end
150
+
151
+ status = post[:isdraft] == 'true' ? 'draft' : 'published'
152
+ date = Time.at(post[:timestamp]).utc || Time.now.utc
153
+ name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day, slug, extension]
154
+
155
+ content = post[:body].to_s
156
+
157
+ if options[:clean_entities]
158
+ content = clean_entities(content)
159
+ end
160
+
161
+ if options[:markdown]
162
+ content = ReverseMarkdown.convert(content)
163
+ end
164
+
165
+ categories = process_categories(db, options, post)
166
+ comments = process_comments(db, options, post)
167
+ tags = process_tags(db, options, post)
168
+ permalink = process_permalink(db, options, post)
169
+
170
+ # Get the relevant fields as a hash, delete empty fields and
171
+ # convert to YAML for the header.
172
+ data = {
173
+ 'layout' => post[:type].to_s,
174
+ 'status' => status.to_s,
175
+ 'published' => status.to_s == 'draft' ? nil : (status.to_s == 'published'),
176
+ 'title' => title.to_s,
177
+ 'author' => {
178
+ 'display_name'=> post[:author].to_s,
179
+ 'login' => post[:author_login].to_s,
180
+ 'email' => post[:author_email].to_s
181
+ },
182
+ 'author_login' => post[:author_login].to_s,
183
+ 'author_email' => post[:author_email].to_s,
184
+ 'date' => date.to_s,
185
+ 'permalink' => options[:permalinks] ? permalink : nil,
186
+ 'categories' => options[:categories] ? categories : nil,
187
+ 'tags' => options[:tags] ? tags : nil,
188
+ 'comments' => options[:comments] ? comments : nil,
189
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
190
+
191
+ if post[:type] == 'page'
192
+ filename = page_path(post[:id], page_name_list) + "index.#{extension}"
193
+ FileUtils.mkdir_p(File.dirname(filename))
194
+ elsif status == 'draft'
195
+ filename = "_drafts/#{slug}.#{extension}"
196
+ else
197
+ filename = "_posts/#{name}"
198
+ end
199
+
200
+ # Write out the data and content to file
201
+ File.open(filename, "w") do |f|
202
+ f.puts data
203
+ f.puts "---"
204
+ f.puts Util.wpautop(content)
205
+ end
206
+ end
207
+
208
+ def self.require_if_available(gem_name, option_name)
209
+ begin
210
+ require gem_name
211
+ return true
212
+ rescue LoadError
213
+ STDERR.puts "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
214
+ return true
215
+ end
216
+ end
217
+
218
+ def self.process_categories(db, options, post)
219
+ return [] unless options[:categories]
220
+
221
+ px = options[:table_prefix]
222
+
223
+ cquery = %(
224
+ SELECT
225
+ categories.category_name AS `name`
226
+ FROM
227
+ #{px}entrycat AS `entrycat`,
228
+ #{px}category AS `categories`
229
+ WHERE
230
+ entrycat.entryid = '#{post[:id]}' AND
231
+ entrycat.categoryid = categories.categoryid
232
+ )
233
+
234
+ db[cquery].each_with_object([]) do |category, categories|
235
+ if options[:clean_entities]
236
+ categories << clean_entities(category[:name])
237
+ else
238
+ categories << category[:name]
239
+ end
240
+ end
241
+ end
242
+
243
+ def self.process_comments(db, options, post)
244
+ return [] unless options[:comments]
245
+
246
+ px = options[:table_prefix]
247
+
248
+ cquery = %(
249
+ SELECT
250
+ id AS `id`,
251
+ author AS `author`,
252
+ email AS `author_email`,
253
+ url AS `author_url`,
254
+ timestamp AS `date`,
255
+ body AS `content`
256
+ FROM #{px}comments
257
+ WHERE
258
+ entry_id = '#{post[:id]}' AND
259
+ status = 'approved'
260
+ )
261
+
262
+ db[cquery].each_with_object([]) do |comment, comments|
263
+ comcontent = comment[:content].to_s
264
+ comauthor = comment[:author].to_s
265
+
266
+ if comcontent.respond_to?(:force_encoding)
267
+ comcontent.force_encoding("UTF-8")
268
+ end
269
+
270
+ if options[:clean_entities]
271
+ comcontent = clean_entities(comcontent)
272
+ comauthor = clean_entities(comauthor)
273
+ end
274
+
275
+ comments << {
276
+ 'id' => comment[:id].to_i,
277
+ 'author' => comauthor,
278
+ 'author_email' => comment[:author_email].to_s,
279
+ 'author_url' => comment[:author_url].to_s,
280
+ 'date' => comment[:date].to_s,
281
+ 'content' => comcontent,
282
+ }
283
+ end.sort!{ |a,b| a['id'] <=> b['id'] }
284
+ end
285
+
286
+ def self.process_tags(db, options, post)
287
+ return [] unless options[:categories]
288
+
289
+ px = options[:table_prefix]
290
+
291
+ cquery = %(
292
+ SELECT
293
+ entrytags.tag AS `name`
294
+ FROM
295
+ #{px}entrytags AS `entrytags`
296
+ WHERE
297
+ entrytags.entryid = '#{post[:id]}'
298
+ )
299
+
300
+ db[cquery].each_with_object([]) do |tag, tags|
301
+ if options[:clean_entities]
302
+ tags << clean_entities(tag[:name])
303
+ else
304
+ tags << tag[:name]
305
+ end
306
+ end
307
+ end
308
+
309
+ def self.process_permalink(db, options, post)
310
+ return unless options[:permalinks]
311
+
312
+ px = options[:table_prefix]
313
+
314
+ cquery = %(
315
+ SELECT
316
+ permalinks.permalink AS `permalink`
317
+ FROM
318
+ #{px}permalinks AS `permalinks`
319
+ WHERE
320
+ permalinks.entry_id = '#{post[:id]}' AND
321
+ permalinks.type = 'entry'
322
+ )
323
+
324
+ db[cquery].each do |link|
325
+ return "/#{link[:permalink]}"
326
+ end
327
+ end
328
+
329
+ def self.clean_entities( text )
330
+ if text.respond_to?(:force_encoding)
331
+ text.force_encoding("UTF-8")
332
+ end
333
+ text = HTMLEntities.new.encode(text, :named)
334
+ # We don't want to convert these, it would break all
335
+ # HTML tags in the post and comments.
336
+ text.gsub!("&amp;", "&")
337
+ text.gsub!("&lt;", "<")
338
+ text.gsub!("&gt;", ">")
339
+ text.gsub!("&quot;", '"')
340
+ text.gsub!("&apos;", "'")
341
+ text.gsub!("/", "&#47;")
342
+ text
343
+ end
344
+
345
+ def self.sluggify( title )
346
+ title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
347
+ end
348
+
349
+ def self.page_path( page_id, page_name_list )
350
+ if page_name_list.key?(page_id)
351
+ [
352
+ page_name_list[page_id][:slug],
353
+ '/'
354
+ ].join("")
355
+ else
356
+ ""
357
+ end
358
+ end
359
+
360
+ end
361
+ end
362
+ end
363
+
@@ -42,10 +42,7 @@ module JekyllImport
42
42
  puts "Fetching #{feed_url}"
43
43
  feed = open(feed_url)
44
44
  contents = feed.readlines.join("\n")
45
- beginning = contents.index("{")
46
- ending = contents.rindex("}")
47
- json = contents[beginning..ending] # Strip Tumblr's JSONP chars.
48
- blog = JSON.parse(json)
45
+ blog = extract_json(contents)
49
46
  puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
50
47
  batch = blog["posts"].map { |post| post_to_hash(post, format) }
51
48
 
@@ -68,6 +65,13 @@ module JekyllImport
68
65
 
69
66
  private
70
67
 
68
+ def self.extract_json(contents)
69
+ beginning = contents.index("{")
70
+ ending = contents.rindex("}")+1
71
+ json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
72
+ blog = JSON.parse(json)
73
+ end
74
+
71
75
  # Writes a post out to disk
72
76
  def self.write_post(post, use_markdown, add_highlights)
73
77
  content = post[:content]
@@ -135,12 +139,12 @@ module JekyllImport
135
139
  post["conversation"].each do |line|
136
140
  content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
137
141
  end
138
- content << "</section></dialog>"
142
+ content << "</dialog></section>"
139
143
  when "video"
140
144
  title = post["video-title"]
141
145
  content = post["video-player"]
142
146
  unless post["video-caption"].nil?
143
- unless content.nil?
147
+ if content
144
148
  content << "<br/>" + post["video-caption"]
145
149
  else
146
150
  content = post["video-caption"]
@@ -209,9 +213,13 @@ module JekyllImport
209
213
  urls = Hash[posts.map { |post|
210
214
  # Create an initial empty file for the post so that
211
215
  # we can instantiate a post object.
212
- File.open("_posts/tumblr/#{post[:name]}", "w")
216
+ File.write("_posts/tumblr/#{post[:name]}", "")
213
217
  tumblr_url = URI.parse(URI.encode(post[:slug])).path
214
- jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
218
+ jekyll_url = if Jekyll.const_defined? :Post
219
+ Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
220
+ else
221
+ Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), site: site, collection: site.posts).url
222
+ end
215
223
  redirect_dir = tumblr_url.sub(/\//, "") + "/"
216
224
  FileUtils.mkdir_p redirect_dir
217
225
  File.open(redirect_dir + "index.html", "w") do |f|
@@ -281,7 +289,7 @@ module JekyllImport
281
289
  # Don't fetch if we've already cached this file
282
290
  unless File.size? path
283
291
  puts "Fetching photo #{url}"
284
- File.open(path, "w") { |f| f.write(open(url).read) }
292
+ File.open(path, "wb") { |f| f.write(open(url).read) }
285
293
  end
286
294
  url = "/" + path
287
295
  end
@@ -1,3 +1,3 @@
1
1
  module JekyllImport
2
- VERSION = '0.11.0'
2
+ VERSION = '0.12.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Preston-Werner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-28 00:00:00.000000000 Z
11
+ date: 2016-11-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jekyll
@@ -304,6 +304,20 @@ dependencies:
304
304
  - - ">="
305
305
  - !ruby/object:Gem::Version
306
306
  version: '0'
307
+ - !ruby/object:Gem::Dependency
308
+ name: reverse_markdown
309
+ requirement: !ruby/object:Gem::Requirement
310
+ requirements:
311
+ - - ">="
312
+ - !ruby/object:Gem::Version
313
+ version: '0'
314
+ type: :development
315
+ prerelease: false
316
+ version_requirements: !ruby/object:Gem::Requirement
317
+ requirements:
318
+ - - ">="
319
+ - !ruby/object:Gem::Version
320
+ version: '0'
307
321
  - !ruby/object:Gem::Dependency
308
322
  name: launchy
309
323
  requirement: !ruby/object:Gem::Requirement
@@ -336,6 +350,7 @@ files:
336
350
  - lib/jekyll-import/importers/csv.rb
337
351
  - lib/jekyll-import/importers/drupal6.rb
338
352
  - lib/jekyll-import/importers/drupal7.rb
353
+ - lib/jekyll-import/importers/drupal_common.rb
339
354
  - lib/jekyll-import/importers/easyblog.rb
340
355
  - lib/jekyll-import/importers/enki.rb
341
356
  - lib/jekyll-import/importers/ghost.rb
@@ -349,6 +364,7 @@ files:
349
364
  - lib/jekyll-import/importers/posterous.rb
350
365
  - lib/jekyll-import/importers/rss.rb
351
366
  - lib/jekyll-import/importers/s9y.rb
367
+ - lib/jekyll-import/importers/s9y_database.rb
352
368
  - lib/jekyll-import/importers/textpattern.rb
353
369
  - lib/jekyll-import/importers/tumblr.rb
354
370
  - lib/jekyll-import/importers/typo.rb
@@ -378,7 +394,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
378
394
  version: '0'
379
395
  requirements: []
380
396
  rubyforge_project:
381
- rubygems_version: 2.5.1
397
+ rubygems_version: 2.5.2
382
398
  signing_key:
383
399
  specification_version: 2
384
400
  summary: Import command for Jekyll (static site generator).