jekyll-import 0.11.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 411790c3c98c3ba0eee2dffdc56eeb1b9ccd30b2
4
- data.tar.gz: 57d36c8a9489910f81bdacd6cb36a52f9c46e17c
3
+ metadata.gz: 27f5a0ab9d87425b92a6e1a4fbc01c5c87c64626
4
+ data.tar.gz: 4c3382d2e508f6fd6571fa45f1a01f1c804cf69d
5
5
  SHA512:
6
- metadata.gz: e6b0b500144bc36702db64ee1344e91ce8b3f37fa929ae1f241957c076010a5b3a89e2e9cd5fe2639b3610f2b540fc0166c86ff1b70dccad68431dc287dec192
7
- data.tar.gz: cbc3b694be174e53b55d5851672c3413d93d1f6eb2c7cbf9dd073192a80c8c2fa697ac4adbf61b7534ce49f7948c951a690d4f1a6a791e402625110178d3a501
6
+ metadata.gz: 5ec278c744142928a3db648710bae5313ad6a7c78dfc948a382c50ed30122e4d0c03324a31ddfc41dd9ad99512ea6879fc7ed9a093779c473ea36703a0743aef
7
+ data.tar.gz: b63caa7a81fc42055d663b94b0f9e9e798bdb2459e9782268bbd938288f8b15b633afba6b1719a58c0f349e21e27bec46ea9413a03cb3e8c00af5cdc4048a44f
@@ -5,6 +5,7 @@ module JekyllImport
5
5
  c.option 'source', '--source NAME', 'The XML file (blog-MM-DD-YYYY.xml) path to import'
6
6
  c.option 'no-blogger-info', '--no-blogger-info', 'not to leave blogger-URL info (id and old URL) in the front matter (default: false)'
7
7
  c.option 'replace-internal-link', '--replace-internal-link', 'replace internal links using the post_url liquid tag. (default: false)'
8
+ c.option 'comments', '--comments', 'import comments to _comments collection'
8
9
  end
9
10
 
10
11
  def self.validate(options)
@@ -41,6 +42,7 @@ module JekyllImport
41
42
  listener = BloggerAtomStreamListener.new
42
43
 
43
44
  listener.leave_blogger_info = ! options.fetch('no-blogger-info', false),
45
+ listener.comments = options.fetch('comments', false),
44
46
 
45
47
  File.open(source, 'r') do |f|
46
48
  f.flock(File::LOCK_SH)
@@ -95,11 +97,12 @@ module JekyllImport
95
97
  extend BloggerAtomStreamListenerMethods
96
98
 
97
99
  @leave_blogger_info = true
100
+ @comments = false
98
101
  end
99
102
  end
100
103
 
101
104
  module BloggerAtomStreamListenerMethods
102
- attr_accessor :leave_blogger_info
105
+ attr_accessor :leave_blogger_info, :comments
103
106
  attr_reader :original_url_base
104
107
 
105
108
  def tag_start(tag, attrs)
@@ -143,6 +146,10 @@ module JekyllImport
143
146
  if @in_entry_elem
144
147
  @in_entry_elem[:meta][:thumbnail] = attrs['url']
145
148
  end
149
+ when 'thr:in-reply-to'
150
+ if @in_entry_elem
151
+ @in_entry_elem[:meta][:post_id] = attrs['ref']
152
+ end
146
153
  end
147
154
  end
148
155
 
@@ -185,6 +192,23 @@ module JekyllImport
185
192
 
186
193
  FileUtils.mkdir_p(target_dir)
187
194
 
195
+ file_name = URI::decode("#{post_data[:filename]}.html")
196
+ File.open(File.join(target_dir, file_name), 'w') do |f|
197
+ f.flock(File::LOCK_EX)
198
+
199
+ f << post_data[:header].to_yaml
200
+ f << "---\n\n"
201
+ f << post_data[:body]
202
+ end
203
+ end
204
+ elsif @in_entry_elem[:meta][:kind] == 'comment' and @comments
205
+ post_data = get_post_data_from_in_entry_elem_info
206
+
207
+ if post_data
208
+ target_dir = '_comments'
209
+
210
+ FileUtils.mkdir_p(target_dir)
211
+
188
212
  file_name = URI::decode("#{post_data[:filename]}.html")
189
213
  File.open(File.join(target_dir, file_name), 'w') do |f|
190
214
  f.flock(File::LOCK_EX)
@@ -251,6 +275,48 @@ module JekyllImport
251
275
  body.gsub!(/{%/, '{{ "{%" }}')
252
276
  end
253
277
 
278
+ { :filename => filename, :header => header, :body => body }
279
+ elsif @in_entry_elem[:meta][:kind] == 'comment'
280
+ timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime('%Y-%m-%d')
281
+ if @in_entry_elem[:meta][:original_url]
282
+ if not @comment_seq
283
+ @comment_seq = 1
284
+ end
285
+
286
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
287
+ original_path = original_uri.path.to_s
288
+ filename = "%s-%s-%s" %
289
+ [timestamp,
290
+ File.basename(original_path, File.extname(original_path)),
291
+ @comment_seq]
292
+
293
+ @comment_seq = @comment_seq + 1
294
+
295
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
296
+ else
297
+ raise 'Original URL is missing'
298
+ end
299
+
300
+ header = {
301
+ 'date' => @in_entry_elem[:meta][:published],
302
+ 'author' => @in_entry_elem[:meta][:author],
303
+ 'blogger_post_id' => @in_entry_elem[:meta][:post_id],
304
+ }
305
+ header['modified_time'] = @in_entry_elem[:meta][:updated] if @in_entry_elem[:meta][:updated] && @in_entry_elem[:meta][:updated] != @in_entry_elem[:meta][:published]
306
+ header['thumbnail'] = @in_entry_elem[:meta][:thumbnail] if @in_entry_elem[:meta][:thumbnail]
307
+ header['blogger_id'] = @in_entry_elem[:meta][:id] if @leave_blogger_info
308
+ header['blogger_orig_url'] = @in_entry_elem[:meta][:original_url] if @leave_blogger_info && @in_entry_elem[:meta][:original_url]
309
+
310
+ body = @in_entry_elem[:body]
311
+
312
+ # body escaping associated with liquid
313
+ if body =~ /{{/
314
+ body.gsub!(/{{/, '{{ "{{" }}')
315
+ end
316
+ if body =~ /{%/
317
+ body.gsub!(/{%/, '{{ "{%" }}')
318
+ end
319
+
254
320
  { :filename => filename, :header => header, :body => body }
255
321
  else
256
322
  nil
@@ -1,139 +1,53 @@
1
+ require 'jekyll-import/importers/drupal_common'
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Drupal6 < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each story
5
- # and blog node.
6
- QUERY = "SELECT n.nid, \
7
- n.title, \
8
- nr.body, \
9
- n.created, \
10
- n.status, \
11
- GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags' \
12
- FROM node_revisions AS nr, \
13
- node AS n \
14
- LEFT OUTER JOIN term_node AS tn ON tn.nid = n.nid \
15
- LEFT OUTER JOIN term_data AS td ON tn.tid = td.tid \
16
- WHERE (%types%) \
17
- AND n.vid = nr.vid \
18
- GROUP BY n.nid"
19
-
20
- def self.validate(options)
21
- %w[dbname user].each do |option|
22
- if options[option].nil?
23
- abort "Missing mandatory option --#{option}."
24
- end
25
- end
26
- end
6
+ include DrupalCommon
7
+ extend DrupalCommon::ClassMethods
27
8
 
28
- def self.specify_options(c)
29
- c.option 'dbname', '--dbname DB', 'Database name'
30
- c.option 'user', '--user USER', 'Database user name'
31
- c.option 'password', '--password PW', "Database user's password (default: '')"
32
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
33
- c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
34
- c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
9
+ def self.build_query(prefix, types)
10
+ types = types.join("' OR n.type = '")
11
+ types = "n.type = '#{types}'"
12
+
13
+ query = <<EOS
14
+ SELECT n.nid,
15
+ n.title,
16
+ nr.body,
17
+ nr.teaser,
18
+ n.created,
19
+ n.status,
20
+ n.type,
21
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
22
+ FROM #{prefix}node_revisions AS nr,
23
+ #{prefix}node AS n
24
+ LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
25
+ LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
26
+ WHERE (#{types})
27
+ AND n.vid = nr.vid
28
+ GROUP BY n.nid
29
+ EOS
30
+
31
+ return query
35
32
  end
36
33
 
37
- def self.require_deps
38
- JekyllImport.require_with_fallback(%w[
39
- rubygems
40
- sequel
41
- fileutils
42
- safe_yaml
43
- mysql
44
- ])
34
+ def self.aliases_query(prefix)
35
+ "SELECT src AS source, dst AS alias FROM #{prefix}url_alias WHERE src = ?"
45
36
  end
46
37
 
47
- def self.process(options)
48
- dbname = options.fetch('dbname')
49
- user = options.fetch('user')
50
- pass = options.fetch('password', "")
51
- host = options.fetch('host', "localhost")
52
- prefix = options.fetch('prefix', "")
53
- types = options.fetch('types', ['blog', 'story', 'article'])
54
-
55
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
-
57
- if prefix != ''
58
- QUERY[" node "] = " " + prefix + "node "
59
- QUERY[" node_revisions "] = " " + prefix + "node_revisions "
60
- QUERY[" term_node "] = " " + prefix + "term_node "
61
- QUERY[" term_data "] = " " + prefix + "term_data "
62
- end
63
-
64
- types = types.join("' OR n.type = '")
65
- QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
66
-
67
- FileUtils.mkdir_p "_posts"
68
- FileUtils.mkdir_p "_drafts"
69
- FileUtils.mkdir_p "_layouts"
70
-
71
- # Create the refresh layout
72
- # Change the refresh url if you customized your permalink config
73
- File.open("_layouts/refresh.html", "w") do |f|
74
- f.puts <<EOF
75
- <!DOCTYPE html>
76
- <html>
77
- <head>
78
- <meta http-equiv="content-type" content="text/html; charset=utf-8" />
79
- <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
80
- </head>
81
- </html>
82
- EOF
83
- end
84
-
85
- db[QUERY].each do |post|
86
- # Get required fields and construct Jekyll compatible name
87
- node_id = post[:nid]
88
- title = post[:title]
89
- content = post[:body]
90
- tags = (post[:tags] || '').downcase.strip
91
- created = post[:created]
92
- time = Time.at(created)
93
- is_published = post[:status] == 1
94
- dir = is_published ? "_posts" : "_drafts"
95
- slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
96
- name = time.strftime("%Y-%m-%d-") + slug + '.md'
97
-
98
- # Get the relevant fields as a hash, delete empty fields and convert
99
- # to YAML for the header
100
- data = {
101
- 'layout' => 'post',
102
- 'title' => title.to_s,
103
- 'created' => created,
104
- 'categories' => tags.split('|')
105
- }.delete_if { |k,v| v.nil? || v == ''}.each_pair {
106
- |k,v| ((v.is_a? String) ? v.force_encoding("UTF-8") : v)
107
- }.to_yaml
38
+ def self.post_data(sql_post_data)
39
+ content = sql_post_data[:body].to_s
40
+ summary = sql_post_data[:teaser].to_s
41
+ tags = (sql_post_data[:tags] || '').downcase.strip
108
42
 
109
- # Write out the data and content to file
110
- File.open("#{dir}/#{name}", "w") do |f|
111
- f.puts data
112
- f.puts "---"
113
- f.puts content
114
- end
43
+ data = {
44
+ 'excerpt' => summary,
45
+ 'categories' => tags.split('|')
46
+ }
115
47
 
116
- # Make a file to redirect from the old Drupal URL
117
- if is_published
118
- aliases = db["SELECT dst FROM #{prefix}url_alias WHERE src = ?", "node/#{node_id}"].all
119
-
120
- aliases.push(:dst => "node/#{node_id}")
121
-
122
- aliases.each do |url_alias|
123
- FileUtils.mkdir_p url_alias[:dst]
124
- File.open("#{url_alias[:dst]}/index.md", "w") do |f|
125
- f.puts "---"
126
- f.puts "layout: refresh"
127
- f.puts "refresh_to_post_id: /#{time.strftime("%Y/%m/%d/") + slug}"
128
- f.puts "---"
129
- end
130
- end
131
- end
132
- end
133
-
134
- # TODO: Make dirs & files for nodes of type 'page'
135
- # Make refresh pages for these as well
48
+ return data, content
136
49
  end
50
+
137
51
  end
138
52
  end
139
53
  end
@@ -1,111 +1,54 @@
1
+ require 'jekyll-import/importers/drupal_common'
2
+
1
3
  module JekyllImport
2
4
  module Importers
3
5
  class Drupal7 < Importer
4
- # Reads a MySQL database via Sequel and creates a post file for each story
5
- # and blog node.
6
- QUERY = "SELECT n.title, \
7
- fdb.body_value, \
8
- fdb.body_summary, \
9
- n.created, \
10
- n.status, \
11
- n.nid, \
12
- u.name \
13
- FROM node AS n, \
14
- field_data_body AS fdb, \
15
- users AS u \
16
- WHERE (%types%) \
17
- AND n.nid = fdb.entity_id \
18
- AND n.vid = fdb.revision_id
19
- AND n.uid = u.uid"
20
-
21
- def self.validate(options)
22
- %w[dbname user].each do |option|
23
- if options[option].nil?
24
- abort "Missing mandatory option --#{option}."
25
- end
26
- end
27
- end
6
+ include DrupalCommon
7
+ extend DrupalCommon::ClassMethods
28
8
 
29
- def self.specify_options(c)
30
- c.option 'dbname', '--dbname DB', 'Database name'
31
- c.option 'user', '--user USER', 'Database user name'
32
- c.option 'password', '--password PW', 'Database user\'s password (default: "")'
33
- c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
34
- c.option 'prefix', '--prefix PREFIX', 'Table prefix name'
35
- c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array, 'The Drupal content types to be imported.'
9
+ def self.build_query(prefix, types)
10
+ types = types.join("' OR n.type = '")
11
+ types = "n.type = '#{types}'"
12
+
13
+ query = <<EOS
14
+ SELECT n.nid,
15
+ n.title,
16
+ fdb.body_value,
17
+ fdb.body_summary,
18
+ n.created,
19
+ n.status,
20
+ n.type,
21
+ GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
22
+ FROM #{prefix}field_data_body AS fdb,
23
+ #{prefix}node AS n
24
+ LEFT OUTER JOIN #{prefix}taxonomy_index AS ti ON ti.nid = n.nid
25
+ LEFT OUTER JOIN #{prefix}taxonomy_term_data AS td ON ti.tid = td.tid
26
+ WHERE (#{types})
27
+ AND n.nid = fdb.entity_id
28
+ AND n.vid = fdb.revision_id
29
+ GROUP BY n.nid"
30
+ EOS
31
+
32
+ return query
36
33
  end
37
34
 
38
- def self.require_deps
39
- JekyllImport.require_with_fallback(%w[
40
- rubygems
41
- sequel
42
- fileutils
43
- safe_yaml
44
- ])
35
+ def self.aliases_query(prefix)
36
+ "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
45
37
  end
46
38
 
47
- def self.process(options)
48
- dbname = options.fetch('dbname')
49
- user = options.fetch('user')
50
- pass = options.fetch('password', "")
51
- host = options.fetch('host', "localhost")
52
- prefix = options.fetch('prefix', "")
53
- types = options.fetch('types', ['blog', 'story', 'article'])
54
-
55
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
56
-
57
- unless prefix.empty?
58
- QUERY[" node "] = " " + prefix + "node "
59
- QUERY[" field_data_body "] = " " + prefix + "field_data_body "
60
- QUERY[" users "] = " " + prefix + "users "
61
- end
62
-
63
- types = types.join("' OR n.type = '")
64
- QUERY[" WHERE (%types%) "] = " WHERE (n.type = '#{types}') "
65
-
66
- FileUtils.mkdir_p "_posts"
67
- FileUtils.mkdir_p "_drafts"
68
- FileUtils.mkdir_p "_layouts"
69
-
70
- db[QUERY].each do |post|
71
- # Get required fields and construct Jekyll compatible name
72
- title = post[:title]
73
- content = post[:body_value]
74
- summary = post[:body_summary]
75
- created = post[:created]
76
- author = post[:name]
77
- nid = post[:nid]
78
- time = Time.at(created)
79
- is_published = post[:status] == 1
80
- dir = is_published ? "_posts" : "_drafts"
81
- slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
82
- name = time.strftime("%Y-%m-%d-") + slug + '.md'
83
-
84
- # Get the relevant fields as a hash, delete empty fields and convert
85
- # to YAML for the header
86
- data = {
87
- 'layout' => 'post',
88
- 'title' => title.strip.force_encoding("UTF-8"),
89
- 'author' => author,
90
- 'nid' => nid,
91
- 'created' => created,
92
- 'excerpt' => summary
93
- }.delete_if { |k,v| v.nil? || v == ''}.to_yaml
39
+ def self.post_data(sql_post_data)
40
+ content = sql_post_data[:body_value].to_s
41
+ summary = sql_post_data[:body_summary].to_s
42
+ tags = (sql_post_data[:tags] || '').downcase.strip
94
43
 
95
- # Write out the data and content to file
96
- File.open("#{dir}/#{name}", "w") do |f|
97
- f.puts data
98
- f.puts "---"
99
- f.puts content
100
- end
44
+ data = {
45
+ 'excerpt' => summary,
46
+ 'categories' => tags.split('|')
47
+ }
101
48
 
102
- end
103
-
104
- # TODO: Make dirs & files for nodes of type 'page'
105
- # Make refresh pages for these as well
106
-
107
- # TODO: Make refresh dirs & files according to entries in url_alias table
49
+ return data, content
108
50
  end
51
+
109
52
  end
110
53
  end
111
54
  end
@@ -0,0 +1,157 @@
1
+ require 'date'
2
+
3
+ module JekyllImport
4
+ module Importers
5
+ module DrupalCommon
6
+ # This module provides a base for the Drupal importers (at least for 6
7
+ # and 7; since 8 will be a different beast). Version-specific importers
8
+ # will need to implement the missing methods from the Importer class.
9
+ #
10
+ # The general idea is that this importer reads a MySQL database via Sequel
11
+ # and creates a post file for each node it finds in the Drupal database.
12
+
13
+ module ClassMethods
14
+ DEFAULTS = {
15
+ "password" => "",
16
+ "host" => "localhost",
17
+ "prefix" => "",
18
+ "types" => %w(blog story article)
19
+ }
20
+
21
+ def specify_options(c)
22
+ c.option 'dbname', '--dbname DB', 'Database name'
23
+ c.option 'user', '--user USER', 'Database user name'
24
+ c.option 'password', '--password PW', "Database user's password (default: #{DEFAULTS["password"].inspect})"
25
+ c.option 'host', '--host HOST', "Database host name (default: #{DEFAULTS["host"].inspect})"
26
+ c.option 'prefix', '--prefix PREFIX', "Table prefix name (default: #{DEFAULTS["prefix"].inspect})"
27
+ c.option 'types', '--types TYPE1[,TYPE2[,TYPE3...]]', Array,
28
+ "The Drupal content types to be imported (default: #{DEFAULTS["types"].join(",")})"
29
+ end
30
+
31
+ def require_deps
32
+ JekyllImport.require_with_fallback(%w[
33
+ rubygems
34
+ sequel
35
+ fileutils
36
+ safe_yaml
37
+ ])
38
+ end
39
+
40
+ def process(options)
41
+ dbname = options.fetch('dbname')
42
+ user = options.fetch('user')
43
+ pass = options.fetch('password', DEFAULTS["password"])
44
+ host = options.fetch('host', DEFAULTS["host"])
45
+ prefix = options.fetch('prefix', DEFAULTS["prefix"])
46
+ types = options.fetch('types', DEFAULTS["types"])
47
+
48
+ db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
49
+
50
+ query = self.build_query(prefix, types)
51
+
52
+ conf = Jekyll.configuration({})
53
+ src_dir = conf['source']
54
+
55
+ dirs = {
56
+ :_posts => File.join(src_dir, '_posts').to_s,
57
+ :_drafts => File.join(src_dir, '_drafts').to_s,
58
+ :_layouts => Jekyll.sanitized_path(src_dir, conf['layouts_dir'].to_s)
59
+ }
60
+
61
+ dirs.each do |key, dir|
62
+ FileUtils.mkdir_p dir
63
+ end
64
+
65
+ # Create the refresh layout
66
+ # Change the refresh url if you customized your permalink config
67
+ File.open(File.join(dirs[:_layouts], 'refresh.html'), 'w') do |f|
68
+ f.puts <<-HTML
69
+ <!DOCTYPE html>
70
+ <html>
71
+ <head>
72
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
73
+ <meta http-equiv="refresh" content="0;url={{ page.refresh_to_post_id }}.html" />
74
+ </head>
75
+ </html>
76
+ HTML
77
+ end
78
+
79
+ db[query].each do |post|
80
+ # Get required fields
81
+ data, content = self.post_data(post)
82
+
83
+ data['layout'] = post[:type]
84
+ title = data['title'] = post[:title].strip.force_encoding('UTF-8')
85
+ time = data['created'] = post[:created]
86
+
87
+ # Get the relevant fields as a hash and delete empty fields
88
+ data = data.delete_if { |k,v| v.nil? || v == ''}.each_pair {
89
+ |k,v| ((v.is_a? String) ? v.force_encoding('UTF-8') : v)
90
+ }
91
+
92
+ # Construct a Jekyll compatible file name
93
+ is_published = post[:status] == 1
94
+ node_id = post[:nid]
95
+ dir = is_published ? dirs[:_posts] : dirs[:_drafts]
96
+ slug = title.strip.downcase.gsub(/(&|&amp;)/, ' and ').gsub(/[\s\.\/\\]/, '-').gsub(/[^\w-]/, '').gsub(/[-_]{2,}/, '-').gsub(/^[-_]/, '').gsub(/[-_]$/, '')
97
+ filename = Time.at(time).to_datetime.strftime('%Y-%m-%d-') + slug + '.md'
98
+
99
+ # Write out the data and content to file
100
+ File.open("#{dir}/#{filename}", 'w') do |f|
101
+ f.puts data.to_yaml
102
+ f.puts '---'
103
+ f.puts content
104
+ end
105
+
106
+
107
+ # Make a file to redirect from the old Drupal URL
108
+ if is_published
109
+ alias_query = self.aliases_query(prefix)
110
+ type = post[:type]
111
+
112
+ aliases = db[alias_query, "#{type}/#{node_id}"].all
113
+
114
+ aliases.push(:alias => "#{type}/#{node_id}")
115
+
116
+ aliases.each do |url_alias|
117
+ FileUtils.mkdir_p url_alias[:alias]
118
+ File.open("#{url_alias[:alias]}/index.md", "w") do |f|
119
+ f.puts '---'
120
+ f.puts 'layout: refresh'
121
+ f.puts "refresh_to_post_id: /#{Time.at(time).to_datetime.strftime('%Y/%m/%d/') + slug}"
122
+ f.puts '---'
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+
130
+ def build_query(prefix, types)
131
+ raise 'The importer you are trying to use does not implement the get_query() method.'
132
+ end
133
+
134
+ def aliases_query(prefix)
135
+ # Make sure you implement the query returning "alias" as the column name
136
+ # for the URL aliases. See the Drupal 6 importer for an example. The
137
+ # alias field is called 'dst' but we alias it to 'alias', to follow
138
+ # Drupal 7's column names.
139
+ raise 'The importer you are trying to use does not implement the get_aliases_query() method.'
140
+ end
141
+
142
+ def post_data(sql_post_data)
143
+ raise 'The importer you are trying to use does not implement the get_query() method.'
144
+ end
145
+
146
+ def validate(options)
147
+ %w[dbname user].each do |option|
148
+ if options[option].nil?
149
+ abort "Missing mandatory option --#{option}."
150
+ end
151
+ end
152
+ end
153
+
154
+
155
+ end
156
+ end
157
+ end
@@ -22,8 +22,10 @@ module JekyllImport
22
22
  JekyllImport.require_with_fallback(%w[
23
23
  rubygems
24
24
  sequel
25
+ mysql2
25
26
  fileutils
26
27
  safe_yaml
28
+ mysql
27
29
  ])
28
30
  end
29
31
 
@@ -35,7 +37,7 @@ module JekyllImport
35
37
  section = options.fetch('section', '1')
36
38
  table_prefix = options.fetch('prefix', "jos_")
37
39
 
38
- db = Sequel.mysql(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
40
+ db = Sequel.mysql2(dbname, :user => user, :password => pass, :host => host, :encoding => 'utf8')
39
41
 
40
42
  FileUtils.mkdir_p("_posts")
41
43
 
@@ -0,0 +1,363 @@
1
+ module JekyllImport
2
+ module Importers
3
+ class S9YDatabase < Importer
4
+
5
+ def self.require_deps
6
+ JekyllImport.require_with_fallback(
7
+ %w[
8
+ rubygems
9
+ sequel
10
+ fileutils
11
+ safe_yaml
12
+ unidecode
13
+ ])
14
+ end
15
+
16
+ def self.specify_options(c)
17
+ c.option 'dbname', '--dbname DB', 'Database name (default: "")'
18
+ c.option 'socket', '--socket SOCKET', 'Database socket (default: "")'
19
+ c.option 'user', '--user USER', 'Database user name (default: "")'
20
+ c.option 'password', '--password PW', "Database user's password (default: "")"
21
+ c.option 'host', '--host HOST', 'Database host name (default: "localhost")'
22
+ c.option 'table_prefix', '--table_prefix PREFIX', 'Table prefix name (default: "serendipity_")'
23
+ c.option 'clean_entities', '--clean_entities', 'Whether to clean entities (default: true)'
24
+ c.option 'comments', '--comments', 'Whether to import comments (default: true)'
25
+ c.option 'categories', '--categories', 'Whether to import categories (default: true)'
26
+ c.option 'tags', '--tags', 'Whether to import tags (default: true)'
27
+ c.option 'drafts', '--drafts', 'Whether to export drafts as well'
28
+ c.option 'markdown', '--markdown', 'convert into markdown format (default: false)'
29
+ c.option 'permalinks', '--permalinks', 'preserve S9Y permalinks (default: false)'
30
+ end
31
+
32
+ # Main migrator function. Call this to perform the migration.
33
+ #
34
+ # dbname:: The name of the database
35
+ # user:: The database user name
36
+ # pass:: The database user's password
37
+ # host:: The address of the MySQL database host. Default: 'localhost'
38
+ # socket:: The database socket's path
39
+ # options:: A hash table of configuration options.
40
+ #
41
+ # Supported options are:
42
+ #
43
+ # :table_prefix:: Prefix of database tables used by WordPress.
44
+ # Default: 'serendipity_'
45
+ # :clean_entities:: If true, convert non-ASCII characters to HTML
46
+ # entities in the posts, comments, titles, and
47
+ # names. Requires the 'htmlentities' gem to
48
+ # work. Default: true.
49
+ # :comments:: If true, migrate post comments too. Comments
50
+ # are saved in the post's YAML front matter.
51
+ # Default: true.
52
+ # :categories:: If true, save the post's categories in its
53
+ # YAML front matter. Default: true.
54
+ # :tags:: If true, save the post's tags in its
55
+ # YAML front matter. Default: true.
56
+ # :extension:: Set the post extension. Default: "html"
57
+ # :drafts:: If true, export drafts as well
58
+ # Default: true.
59
+ # :markdown:: If true, convert the content to markdown
60
+ # Default: false
61
+ # :permalinks:: If true, save the post's original permalink in its
62
+ # YAML front matter. Default: false.
63
+ #
64
+ def self.process(opts)
65
+ options = {
66
+ :user => opts.fetch('user', ''),
67
+ :pass => opts.fetch('password', ''),
68
+ :host => opts.fetch('host', 'localhost'),
69
+ :socket => opts.fetch('socket', nil),
70
+ :dbname => opts.fetch('dbname', ''),
71
+ :table_prefix => opts.fetch('table_prefix', 'serendipity_'),
72
+ :clean_entities => opts.fetch('clean_entities', true),
73
+ :comments => opts.fetch('comments', true),
74
+ :categories => opts.fetch('categories', true),
75
+ :tags => opts.fetch('tags', true),
76
+ :extension => opts.fetch('extension', 'html'),
77
+ :drafts => opts.fetch('drafts', true),
78
+ :markdown => opts.fetch('markdown', false),
79
+ :permalinks => opts.fetch('permalinks', false),
80
+ }
81
+
82
+ if options[:clean_entities]
83
+ options[:clean_entities] = require_if_available('htmlentities', 'clean_entities')
84
+ end
85
+
86
+ if options[:markdown]
87
+ options[:markdown] = require_if_available('reverse_markdown', 'markdown')
88
+ end
89
+
90
+ FileUtils.mkdir_p("_posts")
91
+ FileUtils.mkdir_p("_drafts") if options[:drafts]
92
+
93
+ db = Sequel.mysql2(options[:dbname], :user => options[:user], :password => options[:pass],
94
+ :socket => options[:socket], :host => options[:host], :encoding => 'utf8')
95
+
96
+ px = options[:table_prefix]
97
+
98
+ page_name_list = {}
99
+
100
+ page_name_query = %(
101
+ SELECT
102
+ entries.ID AS `id`,
103
+ entries.title AS `title`
104
+ FROM #{px}entries AS `entries`
105
+ )
106
+
107
+ db[page_name_query].each do |page|
108
+ page[:slug] = sluggify(page[:title])
109
+
110
+ page_name_list[ page[:id] ] = {
111
+ :slug => page[:slug]
112
+ }
113
+ end
114
+
115
+ posts_query = "
116
+ SELECT
117
+ entries.ID AS `id`,
118
+ entries.isdraft AS `isdraft`,
119
+ entries.title AS `title`,
120
+ entries.timestamp AS `timestamp`,
121
+ entries.body AS `body`,
122
+ authors.realname AS `author`,
123
+ authors.username AS `author_login`,
124
+ authors.email AS `author_email`
125
+ FROM #{px}entries AS `entries`
126
+ LEFT JOIN #{px}authors AS `authors`
127
+ ON entries.authorid = authors.authorid"
128
+
129
+ unless options[:drafts]
130
+ posts_query << "WHERE posts.isdraft = 'false'"
131
+ end
132
+
133
+ db[posts_query].each do |post|
134
+ process_post(post, db, options, page_name_list)
135
+ end
136
+ end
137
+
138
+ def self.process_post(post, db, options, page_name_list)
139
+ extension = options[:extension]
140
+
141
+ title = post[:title]
142
+ if options[:clean_entities]
143
+ title = clean_entities(title)
144
+ end
145
+
146
+ slug = post[:slug]
147
+ if !slug || slug.empty?
148
+ slug = sluggify(title)
149
+ end
150
+
151
+ status = post[:isdraft] == 'true' ? 'draft' : 'published'
152
+ date = Time.at(post[:timestamp]).utc || Time.now.utc
153
+ name = "%02d-%02d-%02d-%s.%s" % [date.year, date.month, date.day, slug, extension]
154
+
155
+ content = post[:body].to_s
156
+
157
+ if options[:clean_entities]
158
+ content = clean_entities(content)
159
+ end
160
+
161
+ if options[:markdown]
162
+ content = ReverseMarkdown.convert(content)
163
+ end
164
+
165
+ categories = process_categories(db, options, post)
166
+ comments = process_comments(db, options, post)
167
+ tags = process_tags(db, options, post)
168
+ permalink = process_permalink(db, options, post)
169
+
170
+ # Get the relevant fields as a hash, delete empty fields and
171
+ # convert to YAML for the header.
172
+ data = {
173
+ 'layout' => post[:type].to_s,
174
+ 'status' => status.to_s,
175
+ 'published' => status.to_s == 'draft' ? nil : (status.to_s == 'published'),
176
+ 'title' => title.to_s,
177
+ 'author' => {
178
+ 'display_name'=> post[:author].to_s,
179
+ 'login' => post[:author_login].to_s,
180
+ 'email' => post[:author_email].to_s
181
+ },
182
+ 'author_login' => post[:author_login].to_s,
183
+ 'author_email' => post[:author_email].to_s,
184
+ 'date' => date.to_s,
185
+ 'permalink' => options[:permalinks] ? permalink : nil,
186
+ 'categories' => options[:categories] ? categories : nil,
187
+ 'tags' => options[:tags] ? tags : nil,
188
+ 'comments' => options[:comments] ? comments : nil,
189
+ }.delete_if { |k,v| v.nil? || v == '' }.to_yaml
190
+
191
+ if post[:type] == 'page'
192
+ filename = page_path(post[:id], page_name_list) + "index.#{extension}"
193
+ FileUtils.mkdir_p(File.dirname(filename))
194
+ elsif status == 'draft'
195
+ filename = "_drafts/#{slug}.#{extension}"
196
+ else
197
+ filename = "_posts/#{name}"
198
+ end
199
+
200
+ # Write out the data and content to file
201
+ File.open(filename, "w") do |f|
202
+ f.puts data
203
+ f.puts "---"
204
+ f.puts Util.wpautop(content)
205
+ end
206
+ end
207
+
208
+ def self.require_if_available(gem_name, option_name)
209
+ begin
210
+ require gem_name
211
+ return true
212
+ rescue LoadError
213
+ STDERR.puts "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
214
+ return true
215
+ end
216
+ end
217
+
218
+ def self.process_categories(db, options, post)
219
+ return [] unless options[:categories]
220
+
221
+ px = options[:table_prefix]
222
+
223
+ cquery = %(
224
+ SELECT
225
+ categories.category_name AS `name`
226
+ FROM
227
+ #{px}entrycat AS `entrycat`,
228
+ #{px}category AS `categories`
229
+ WHERE
230
+ entrycat.entryid = '#{post[:id]}' AND
231
+ entrycat.categoryid = categories.categoryid
232
+ )
233
+
234
+ db[cquery].each_with_object([]) do |category, categories|
235
+ if options[:clean_entities]
236
+ categories << clean_entities(category[:name])
237
+ else
238
+ categories << category[:name]
239
+ end
240
+ end
241
+ end
242
+
243
+ def self.process_comments(db, options, post)
244
+ return [] unless options[:comments]
245
+
246
+ px = options[:table_prefix]
247
+
248
+ cquery = %(
249
+ SELECT
250
+ id AS `id`,
251
+ author AS `author`,
252
+ email AS `author_email`,
253
+ url AS `author_url`,
254
+ timestamp AS `date`,
255
+ body AS `content`
256
+ FROM #{px}comments
257
+ WHERE
258
+ entry_id = '#{post[:id]}' AND
259
+ status = 'approved'
260
+ )
261
+
262
+ db[cquery].each_with_object([]) do |comment, comments|
263
+ comcontent = comment[:content].to_s
264
+ comauthor = comment[:author].to_s
265
+
266
+ if comcontent.respond_to?(:force_encoding)
267
+ comcontent.force_encoding("UTF-8")
268
+ end
269
+
270
+ if options[:clean_entities]
271
+ comcontent = clean_entities(comcontent)
272
+ comauthor = clean_entities(comauthor)
273
+ end
274
+
275
+ comments << {
276
+ 'id' => comment[:id].to_i,
277
+ 'author' => comauthor,
278
+ 'author_email' => comment[:author_email].to_s,
279
+ 'author_url' => comment[:author_url].to_s,
280
+ 'date' => comment[:date].to_s,
281
+ 'content' => comcontent,
282
+ }
283
+ end.sort!{ |a,b| a['id'] <=> b['id'] }
284
+ end
285
+
286
+ def self.process_tags(db, options, post)
287
+ return [] unless options[:categories]
288
+
289
+ px = options[:table_prefix]
290
+
291
+ cquery = %(
292
+ SELECT
293
+ entrytags.tag AS `name`
294
+ FROM
295
+ #{px}entrytags AS `entrytags`
296
+ WHERE
297
+ entrytags.entryid = '#{post[:id]}'
298
+ )
299
+
300
+ db[cquery].each_with_object([]) do |tag, tags|
301
+ if options[:clean_entities]
302
+ tags << clean_entities(tag[:name])
303
+ else
304
+ tags << tag[:name]
305
+ end
306
+ end
307
+ end
308
+
309
+ def self.process_permalink(db, options, post)
310
+ return unless options[:permalinks]
311
+
312
+ px = options[:table_prefix]
313
+
314
+ cquery = %(
315
+ SELECT
316
+ permalinks.permalink AS `permalink`
317
+ FROM
318
+ #{px}permalinks AS `permalinks`
319
+ WHERE
320
+ permalinks.entry_id = '#{post[:id]}' AND
321
+ permalinks.type = 'entry'
322
+ )
323
+
324
+ db[cquery].each do |link|
325
+ return "/#{link[:permalink]}"
326
+ end
327
+ end
328
+
329
+ def self.clean_entities( text )
330
+ if text.respond_to?(:force_encoding)
331
+ text.force_encoding("UTF-8")
332
+ end
333
+ text = HTMLEntities.new.encode(text, :named)
334
+ # We don't want to convert these, it would break all
335
+ # HTML tags in the post and comments.
336
+ text.gsub!("&amp;", "&")
337
+ text.gsub!("&lt;", "<")
338
+ text.gsub!("&gt;", ">")
339
+ text.gsub!("&quot;", '"')
340
+ text.gsub!("&apos;", "'")
341
+ text.gsub!("/", "&#47;")
342
+ text
343
+ end
344
+
345
+ def self.sluggify( title )
346
+ title.to_ascii.downcase.gsub(/[^0-9A-Za-z]+/, " ").strip.gsub(" ", "-")
347
+ end
348
+
349
+ def self.page_path( page_id, page_name_list )
350
+ if page_name_list.key?(page_id)
351
+ [
352
+ page_name_list[page_id][:slug],
353
+ '/'
354
+ ].join("")
355
+ else
356
+ ""
357
+ end
358
+ end
359
+
360
+ end
361
+ end
362
+ end
363
+
@@ -42,10 +42,7 @@ module JekyllImport
42
42
  puts "Fetching #{feed_url}"
43
43
  feed = open(feed_url)
44
44
  contents = feed.readlines.join("\n")
45
- beginning = contents.index("{")
46
- ending = contents.rindex("}")
47
- json = contents[beginning..ending] # Strip Tumblr's JSONP chars.
48
- blog = JSON.parse(json)
45
+ blog = extract_json(contents)
49
46
  puts "Page: #{current_page + 1} - Posts: #{blog["posts"].size}"
50
47
  batch = blog["posts"].map { |post| post_to_hash(post, format) }
51
48
 
@@ -68,6 +65,13 @@ module JekyllImport
68
65
 
69
66
  private
70
67
 
68
+ def self.extract_json(contents)
69
+ beginning = contents.index("{")
70
+ ending = contents.rindex("}")+1
71
+ json = contents[beginning...ending] # Strip Tumblr's JSONP chars.
72
+ blog = JSON.parse(json)
73
+ end
74
+
71
75
  # Writes a post out to disk
72
76
  def self.write_post(post, use_markdown, add_highlights)
73
77
  content = post[:content]
@@ -135,12 +139,12 @@ module JekyllImport
135
139
  post["conversation"].each do |line|
136
140
  content << "<dt>#{line['label']}</dt><dd>#{line['phrase']}</dd>"
137
141
  end
138
- content << "</section></dialog>"
142
+ content << "</dialog></section>"
139
143
  when "video"
140
144
  title = post["video-title"]
141
145
  content = post["video-player"]
142
146
  unless post["video-caption"].nil?
143
- unless content.nil?
147
+ if content
144
148
  content << "<br/>" + post["video-caption"]
145
149
  else
146
150
  content = post["video-caption"]
@@ -209,9 +213,13 @@ module JekyllImport
209
213
  urls = Hash[posts.map { |post|
210
214
  # Create an initial empty file for the post so that
211
215
  # we can instantiate a post object.
212
- File.open("_posts/tumblr/#{post[:name]}", "w")
216
+ File.write("_posts/tumblr/#{post[:name]}", "")
213
217
  tumblr_url = URI.parse(URI.encode(post[:slug])).path
214
- jekyll_url = Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
218
+ jekyll_url = if Jekyll.const_defined? :Post
219
+ Jekyll::Post.new(site, Dir.pwd, "", "tumblr/" + post[:name]).url
220
+ else
221
+ Jekyll::Document.new(File.expand_path("_posts/tumblr/#{post[:name]}"), site: site, collection: site.posts).url
222
+ end
215
223
  redirect_dir = tumblr_url.sub(/\//, "") + "/"
216
224
  FileUtils.mkdir_p redirect_dir
217
225
  File.open(redirect_dir + "index.html", "w") do |f|
@@ -281,7 +289,7 @@ module JekyllImport
281
289
  # Don't fetch if we've already cached this file
282
290
  unless File.size? path
283
291
  puts "Fetching photo #{url}"
284
- File.open(path, "w") { |f| f.write(open(url).read) }
292
+ File.open(path, "wb") { |f| f.write(open(url).read) }
285
293
  end
286
294
  url = "/" + path
287
295
  end
@@ -1,3 +1,3 @@
1
1
  module JekyllImport
2
- VERSION = '0.11.0'
2
+ VERSION = '0.12.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Preston-Werner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-28 00:00:00.000000000 Z
11
+ date: 2016-11-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: jekyll
@@ -304,6 +304,20 @@ dependencies:
304
304
  - - ">="
305
305
  - !ruby/object:Gem::Version
306
306
  version: '0'
307
+ - !ruby/object:Gem::Dependency
308
+ name: reverse_markdown
309
+ requirement: !ruby/object:Gem::Requirement
310
+ requirements:
311
+ - - ">="
312
+ - !ruby/object:Gem::Version
313
+ version: '0'
314
+ type: :development
315
+ prerelease: false
316
+ version_requirements: !ruby/object:Gem::Requirement
317
+ requirements:
318
+ - - ">="
319
+ - !ruby/object:Gem::Version
320
+ version: '0'
307
321
  - !ruby/object:Gem::Dependency
308
322
  name: launchy
309
323
  requirement: !ruby/object:Gem::Requirement
@@ -336,6 +350,7 @@ files:
336
350
  - lib/jekyll-import/importers/csv.rb
337
351
  - lib/jekyll-import/importers/drupal6.rb
338
352
  - lib/jekyll-import/importers/drupal7.rb
353
+ - lib/jekyll-import/importers/drupal_common.rb
339
354
  - lib/jekyll-import/importers/easyblog.rb
340
355
  - lib/jekyll-import/importers/enki.rb
341
356
  - lib/jekyll-import/importers/ghost.rb
@@ -349,6 +364,7 @@ files:
349
364
  - lib/jekyll-import/importers/posterous.rb
350
365
  - lib/jekyll-import/importers/rss.rb
351
366
  - lib/jekyll-import/importers/s9y.rb
367
+ - lib/jekyll-import/importers/s9y_database.rb
352
368
  - lib/jekyll-import/importers/textpattern.rb
353
369
  - lib/jekyll-import/importers/tumblr.rb
354
370
  - lib/jekyll-import/importers/typo.rb
@@ -378,7 +394,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
378
394
  version: '0'
379
395
  requirements: []
380
396
  rubyforge_project:
381
- rubygems_version: 2.5.1
397
+ rubygems_version: 2.5.2
382
398
  signing_key:
383
399
  specification_version: 2
384
400
  summary: Import command for Jekyll (static site generator).