jekyll-import 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/jekyll-import/importers/blogger.rb +51 -63
- data/lib/jekyll-import/importers/drupal6.rb +7 -3
- data/lib/jekyll-import/importers/drupal8.rb +65 -0
- data/lib/jekyll-import/importers/drupal_common.rb +3 -2
- data/lib/jekyll-import/importers/rss.rb +42 -24
- data/lib/jekyll-import/importers/s9y_database.rb +270 -56
- data/lib/jekyll-import/version.rb +1 -1
- metadata +7 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0098ade7230382d4787c65ab3c17607680119727e1e9cdca587274d94e3e084
|
4
|
+
data.tar.gz: 8f1050dfa7170aa7b08462c0132ab959ffdcda31e8c469929ce123aecb64909a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f32b5ac48f88293a4703c7ece13d4a9886c598b4491fa9e8b750b02f4dfab405291c095355885f823a9b23551bc69e73015487371daa9fb6a9db7ec4783b88f0
|
7
|
+
data.tar.gz: 0ba737a7d8ff767eb1bcce6bffa8af357d6c66793b36e682edfcce05245e21dd0a12d5a3d250118ae9b00dc40857db50884d2365e2419a86efce7d97e0ac52af
|
@@ -11,11 +11,8 @@ module JekyllImport
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def self.validate(options)
|
14
|
-
if options["source"].nil?
|
15
|
-
|
16
|
-
elsif !File.exist?(options["source"])
|
17
|
-
raise Errno::ENOENT, "File not found: #{options["source"]}"
|
18
|
-
end
|
14
|
+
raise "Missing mandatory option: --source" if options["source"].nil?
|
15
|
+
raise Errno::ENOENT, "File not found: #{options["source"]}" unless File.exist?(options["source"])
|
19
16
|
end
|
20
17
|
|
21
18
|
def self.require_deps
|
@@ -42,7 +39,6 @@ module JekyllImport
|
|
42
39
|
source = options.fetch("source")
|
43
40
|
|
44
41
|
listener = BloggerAtomStreamListener.new
|
45
|
-
|
46
42
|
listener.leave_blogger_info = !options.fetch("no-blogger-info", false)
|
47
43
|
listener.comments = options.fetch("comments", false)
|
48
44
|
|
@@ -52,7 +48,6 @@ module JekyllImport
|
|
52
48
|
end
|
53
49
|
|
54
50
|
options["original-url-base"] = listener.original_url_base
|
55
|
-
|
56
51
|
postprocess(options)
|
57
52
|
end
|
58
53
|
|
@@ -63,32 +58,32 @@ module JekyllImport
|
|
63
58
|
# Returns nothing.
|
64
59
|
def self.postprocess(options)
|
65
60
|
# Replace internal link URL
|
66
|
-
|
67
|
-
original_url_base = options.fetch("original-url-base", nil)
|
68
|
-
if original_url_base
|
69
|
-
orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
|
70
|
-
|
71
|
-
Dir.glob("_posts/*.*") do |filename|
|
72
|
-
body = nil
|
73
|
-
File.open(filename, "r") do |f|
|
74
|
-
f.flock(File::LOCK_SH)
|
75
|
-
body = f.read
|
76
|
-
end
|
61
|
+
return unless options.fetch("replace-internal-link", false)
|
77
62
|
|
78
|
-
|
79
|
-
|
80
|
-
quote = Regexp.last_match(1)
|
81
|
-
post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
|
82
|
-
raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
|
63
|
+
original_url_base = options.fetch("original-url-base", nil)
|
64
|
+
return unless original_url_base
|
83
65
|
|
84
|
-
|
85
|
-
end
|
66
|
+
orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
|
86
67
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
68
|
+
Dir.glob("_posts/*.*") do |filename|
|
69
|
+
body = nil
|
70
|
+
File.open(filename, "r") do |f|
|
71
|
+
f.flock(File::LOCK_SH)
|
72
|
+
body = f.read
|
73
|
+
end
|
74
|
+
|
75
|
+
body.gsub!(orig_url_pattern) do
|
76
|
+
# for post_url
|
77
|
+
quote = Regexp.last_match(1)
|
78
|
+
post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
|
79
|
+
raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
|
80
|
+
|
81
|
+
" href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
|
82
|
+
end
|
83
|
+
|
84
|
+
File.open(filename, "w") do |f|
|
85
|
+
f.flock(File::LOCK_EX)
|
86
|
+
f << body
|
92
87
|
end
|
93
88
|
end
|
94
89
|
end
|
@@ -118,9 +113,7 @@ module JekyllImport
|
|
118
113
|
|
119
114
|
@in_entry_elem = { :meta => {}, :body => nil }
|
120
115
|
when "title"
|
121
|
-
if @in_entry_elem
|
122
|
-
raise 'only <title type="text"></title> is supported' if attrs["type"] != "text"
|
123
|
-
end
|
116
|
+
raise 'only <title type="text"></title> is supported' if @in_entry_elem && attrs["type"] != "text"
|
124
117
|
when "category"
|
125
118
|
if @in_entry_elem
|
126
119
|
if attrs["scheme"] == "http://www.blogger.com/atom/ns#"
|
@@ -150,25 +143,23 @@ module JekyllImport
|
|
150
143
|
end
|
151
144
|
|
152
145
|
def text(text)
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
171
|
-
end
|
146
|
+
return unless @in_entry_elem
|
147
|
+
|
148
|
+
case @tag_bread.last
|
149
|
+
when "id"
|
150
|
+
@in_entry_elem[:meta][:id] = text
|
151
|
+
when "published"
|
152
|
+
@in_entry_elem[:meta][:published] = text
|
153
|
+
when "updated"
|
154
|
+
@in_entry_elem[:meta][:updated] = text
|
155
|
+
when "title"
|
156
|
+
@in_entry_elem[:meta][:title] = text
|
157
|
+
when "content"
|
158
|
+
@in_entry_elem[:body] = text
|
159
|
+
when "name"
|
160
|
+
@in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
|
161
|
+
when "app:draft"
|
162
|
+
@in_entry_elem[:meta][:draft] = true if @tag_bread[-2..-1] == %w(app:control app:draft) && text == "yes"
|
172
163
|
end
|
173
164
|
end
|
174
165
|
|
@@ -186,7 +177,7 @@ module JekyllImport
|
|
186
177
|
|
187
178
|
FileUtils.mkdir_p(target_dir)
|
188
179
|
|
189
|
-
file_name = URI.
|
180
|
+
file_name = URI.decode_www_form_component("#{post_data[:filename]}.html")
|
190
181
|
File.open(File.join(target_dir, file_name), "w") do |f|
|
191
182
|
f.flock(File::LOCK_EX)
|
192
183
|
|
@@ -264,19 +255,16 @@ module JekyllImport
|
|
264
255
|
{ :filename => filename, :header => header, :body => body }
|
265
256
|
elsif @in_entry_elem[:meta][:kind] == "comment"
|
266
257
|
timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
|
267
|
-
|
268
|
-
@comment_seq ||= 1
|
258
|
+
raise "Original URL is missing" unless @in_entry_elem[:meta][:original_url]
|
269
259
|
|
270
|
-
|
271
|
-
original_path = original_uri.path.to_s
|
272
|
-
filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
|
260
|
+
@comment_seq ||= 1
|
273
261
|
|
274
|
-
|
262
|
+
original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
|
263
|
+
original_path = original_uri.path.to_s
|
264
|
+
filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
|
275
265
|
|
276
|
-
|
277
|
-
|
278
|
-
raise "Original URL is missing"
|
279
|
-
end
|
266
|
+
@comment_seq += 1
|
267
|
+
@original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
|
280
268
|
|
281
269
|
header = {
|
282
270
|
"date" => @in_entry_elem[:meta][:published],
|
@@ -19,15 +19,17 @@ module JekyllImport
|
|
19
19
|
nr.teaser,
|
20
20
|
n.created,
|
21
21
|
n.status,
|
22
|
+
ua.dst AS alias,
|
22
23
|
n.type,
|
23
24
|
GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
|
24
|
-
FROM #{prefix}node_revisions AS nr,
|
25
|
+
FROM #{prefix}node_revisions AS nr, url_alias AS ua,
|
25
26
|
#{prefix}node AS n
|
26
27
|
LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
|
27
28
|
LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
|
28
29
|
WHERE (#{types})
|
29
30
|
AND n.vid = nr.vid
|
30
|
-
|
31
|
+
AND ua.src = CONCAT( 'node/', n.nid)
|
32
|
+
GROUP BY n.nid, ua.dst
|
31
33
|
SQL
|
32
34
|
|
33
35
|
query
|
@@ -44,9 +46,11 @@ SQL
|
|
44
46
|
|
45
47
|
data = {
|
46
48
|
"excerpt" => summary,
|
47
|
-
"categories" => tags.split("|"),
|
49
|
+
"categories" => tags.split("|").uniq,
|
48
50
|
}
|
49
51
|
|
52
|
+
data["permalink"] = "/" + sql_post_data[:alias] if sql_post_data[:alias]
|
53
|
+
|
50
54
|
[data, content]
|
51
55
|
end
|
52
56
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "jekyll-import/importers/drupal_common"
|
4
|
+
|
5
|
+
module JekyllImport
|
6
|
+
module Importers
|
7
|
+
class Drupal8 < Importer
|
8
|
+
include DrupalCommon
|
9
|
+
extend DrupalCommon::ClassMethods
|
10
|
+
|
11
|
+
def self.build_query(prefix, types, engine)
|
12
|
+
types = types.join("' OR n.type = '")
|
13
|
+
types = "n.type = '#{types}'"
|
14
|
+
|
15
|
+
tag_group = if engine == "postgresql"
|
16
|
+
<<POSTGRESQL
|
17
|
+
(SELECT STRING_AGG(td.name, '|')
|
18
|
+
FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
|
19
|
+
WHERE ti.tid = td.tid AND ti.nid = n.nid) AS tags
|
20
|
+
POSTGRESQL
|
21
|
+
else
|
22
|
+
<<SQL
|
23
|
+
(SELECT GROUP_CONCAT(td.name SEPARATOR '|')
|
24
|
+
FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
|
25
|
+
WHERE ti.tid = td.tid AND ti.nid = n.nid) AS 'tags'
|
26
|
+
SQL
|
27
|
+
end
|
28
|
+
|
29
|
+
query = <<QUERY
|
30
|
+
SELECT n.nid,
|
31
|
+
n.title,
|
32
|
+
nb.body_value,
|
33
|
+
nb.body_summary,
|
34
|
+
n.created,
|
35
|
+
n.status,
|
36
|
+
n.type,
|
37
|
+
#{tag_group}
|
38
|
+
FROM #{prefix}node_field_data AS n
|
39
|
+
LEFT JOIN #{prefix}node__body AS nb
|
40
|
+
ON nb.entity_id = n.nid
|
41
|
+
WHERE (#{types})
|
42
|
+
QUERY
|
43
|
+
|
44
|
+
query
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.aliases_query(prefix)
|
48
|
+
"SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.post_data(sql_post_data)
|
52
|
+
content = sql_post_data[:body_value].to_s
|
53
|
+
summary = sql_post_data[:body_summary].to_s
|
54
|
+
tags = (sql_post_data[:tags] || "").downcase.strip
|
55
|
+
|
56
|
+
data = {
|
57
|
+
"excerpt" => summary,
|
58
|
+
"categories" => tags.split("|"),
|
59
|
+
}
|
60
|
+
|
61
|
+
[data, content]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -68,6 +68,7 @@ module JekyllImport
|
|
68
68
|
src_dir = conf["source"]
|
69
69
|
|
70
70
|
dirs = {
|
71
|
+
:_aliases => src_dir,
|
71
72
|
:_posts => File.join(src_dir, "_posts").to_s,
|
72
73
|
:_drafts => File.join(src_dir, "_drafts").to_s,
|
73
74
|
:_layouts => Jekyll.sanitized_path(src_dir, conf["layouts_dir"].to_s),
|
@@ -146,10 +147,10 @@ module JekyllImport
|
|
146
147
|
|
147
148
|
if partition.first.length.positive?
|
148
149
|
dir = "#{partition.first}/"
|
149
|
-
FileUtils.mkdir_p
|
150
|
+
FileUtils.mkdir_p "#{dirs[:_aliases]}/#{dir}"
|
150
151
|
end
|
151
152
|
|
152
|
-
File.open("#{dir}#{file}.md", "w") do |f|
|
153
|
+
File.open("#{dirs[:_aliases]}/#{dir}#{file}.md", "w") do |f|
|
153
154
|
f.puts "---"
|
154
155
|
f.puts "layout: refresh"
|
155
156
|
f.puts "permalink: #{dir}#{file}/"
|
@@ -6,6 +6,7 @@ module JekyllImport
|
|
6
6
|
def self.specify_options(c)
|
7
7
|
c.option "source", "--source NAME", "The RSS file or URL to import"
|
8
8
|
c.option "tag", "--tag NAME", "Add a tag to posts"
|
9
|
+
c.option "render_audio", "--render_audio", "Render <audio> element as necessary"
|
9
10
|
end
|
10
11
|
|
11
12
|
def self.validate(options)
|
@@ -30,8 +31,6 @@ module JekyllImport
|
|
30
31
|
# Returns nothing.
|
31
32
|
def self.process(options)
|
32
33
|
source = options.fetch("source")
|
33
|
-
frontmatter = options.fetch("frontmatter", [])
|
34
|
-
body = options.fetch("body", ["description"])
|
35
34
|
|
36
35
|
content = ""
|
37
36
|
open(source) { |s| content = s.read }
|
@@ -40,37 +39,56 @@ module JekyllImport
|
|
40
39
|
raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
|
41
40
|
|
42
41
|
rss.items.each do |item|
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
write_rss_item(item, options)
|
43
|
+
end
|
44
|
+
end
|
46
45
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
def self.write_rss_item(item, options)
|
47
|
+
frontmatter = options.fetch("frontmatter", [])
|
48
|
+
body = options.fetch("body", ["description"])
|
49
|
+
render_audio = options.fetch("render_audio", false)
|
51
50
|
|
52
|
-
|
51
|
+
formatted_date = item.date.strftime("%Y-%m-%d")
|
52
|
+
post_name = Jekyll::Utils.slugify(item.title, :mode => "latin")
|
53
|
+
name = "#{formatted_date}-#{post_name}"
|
54
|
+
audio = render_audio && item.enclosure.url
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
-
|
56
|
+
header = {
|
57
|
+
"layout" => "post",
|
58
|
+
"title" => item.title,
|
59
|
+
}
|
57
60
|
|
58
|
-
|
61
|
+
header["tag"] = options["tag"] unless options["tag"].nil? || options["tag"].empty?
|
59
62
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
+
frontmatter.each do |value|
|
64
|
+
header[value] = item.send(value)
|
65
|
+
end
|
63
66
|
|
64
|
-
|
65
|
-
|
67
|
+
output = +""
|
68
|
+
|
69
|
+
body.each do |row|
|
70
|
+
output << item.send(row).to_s
|
71
|
+
end
|
66
72
|
|
67
|
-
|
73
|
+
output.strip!
|
74
|
+
output = item.content_encoded if output.empty?
|
68
75
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
76
|
+
FileUtils.mkdir_p("_posts")
|
77
|
+
|
78
|
+
File.open("_posts/#{name}.html", "w") do |f|
|
79
|
+
f.puts header.to_yaml
|
80
|
+
f.puts "---\n\n"
|
81
|
+
|
82
|
+
if audio
|
83
|
+
f.puts <<~HTML
|
84
|
+
<audio controls="">
|
85
|
+
<source src="#{audio}" type="audio/mpeg">
|
86
|
+
Your browser does not support the audio element.
|
87
|
+
</audio>
|
88
|
+
HTML
|
73
89
|
end
|
90
|
+
|
91
|
+
f.puts output
|
74
92
|
end
|
75
93
|
end
|
76
94
|
end
|
@@ -11,25 +11,31 @@ module JekyllImport
|
|
11
11
|
fileutils
|
12
12
|
safe_yaml
|
13
13
|
unidecode
|
14
|
+
nokogiri
|
14
15
|
)
|
15
16
|
)
|
16
17
|
end
|
17
18
|
|
18
19
|
def self.specify_options(c)
|
19
|
-
c.option "dbname",
|
20
|
-
c.option "socket",
|
21
|
-
c.option "user",
|
22
|
-
c.option "password",
|
23
|
-
c.option "host",
|
24
|
-
c.option "port",
|
25
|
-
c.option "table_prefix",
|
26
|
-
c.option "clean_entities",
|
27
|
-
c.option "comments",
|
28
|
-
c.option "categories",
|
29
|
-
c.option "tags",
|
30
|
-
c.option "drafts",
|
31
|
-
c.option "markdown",
|
32
|
-
c.option "permalinks",
|
20
|
+
c.option "dbname", "--dbname DB", "Database name (default: '')"
|
21
|
+
c.option "socket", "--socket SOCKET", "Database socket (default: '')"
|
22
|
+
c.option "user", "--user USER", "Database user name (default: '')"
|
23
|
+
c.option "password", "--password PW", "Database user's password (default: '')"
|
24
|
+
c.option "host", "--host HOST", "Database host name (default: 'localhost')"
|
25
|
+
c.option "port", "--port PORT", "Custom database port connect to (default: 3306)"
|
26
|
+
c.option "table_prefix", "--table_prefix PREFIX", "Table prefix name (default: 'serendipity_')"
|
27
|
+
c.option "clean_entities", "--clean_entities", "Whether to clean entities (default: true)"
|
28
|
+
c.option "comments", "--comments", "Whether to import comments (default: true)"
|
29
|
+
c.option "categories", "--categories", "Whether to import categories (default: true)"
|
30
|
+
c.option "tags", "--tags", "Whether to import tags (default: true)"
|
31
|
+
c.option "drafts", "--drafts", "Whether to export drafts as well"
|
32
|
+
c.option "markdown", "--markdown", "convert into markdown format (default: false)"
|
33
|
+
c.option "permalinks", "--permalinks", "preserve S9Y permalinks (default: false)"
|
34
|
+
c.option "excerpt_separator", "--excerpt_separator", "Demarkation for excerpts (default: '<a id=\"extended\"></a>')"
|
35
|
+
c.option "includeentry", "--includeentry", "Replace macros from the includeentry plugin (default: false)"
|
36
|
+
c.option "imgfig", "--imgfig", "Replace nested img and youtube divs with HTML figure tags (default: true)"
|
37
|
+
c.option "linebreak", "--linebreak", "Line break processing: wp, nokogiri, ignore (default: wp)"
|
38
|
+
c.option "relative", "--relative", "Convert links with this prefix to relative (default:nil)"
|
33
39
|
end
|
34
40
|
|
35
41
|
# Main migrator function. Call this to perform the migration.
|
@@ -56,36 +62,64 @@ module JekyllImport
|
|
56
62
|
# :categories:: If true, save the post's categories in its
|
57
63
|
# YAML front matter. Default: true.
|
58
64
|
# :tags:: If true, save the post's tags in its
|
59
|
-
# YAML front matter.
|
65
|
+
# YAML front matter, in lowercase. Default: true.
|
60
66
|
# :extension:: Set the post extension. Default: "html"
|
61
67
|
# :drafts:: If true, export drafts as well
|
62
68
|
# Default: true.
|
63
69
|
# :markdown:: If true, convert the content to markdown
|
64
70
|
# Default: false
|
65
71
|
# :permalinks:: If true, save the post's original permalink in its
|
66
|
-
# YAML front matter.
|
72
|
+
# YAML front matter. If the 'entryproperties' plugin
|
73
|
+
# was used, its permalink will become the canonical
|
74
|
+
# permalink, and any other will become redirects.
|
75
|
+
# Default: false.
|
76
|
+
# :excerpt_separator:: A string to use to separate the excerpt (body
|
77
|
+
# in S9Y) from the rest of the article (extended
|
78
|
+
# body in S9Y). Default: "<a id=\"extended\"></a>".
|
79
|
+
# :includentry:: Replace macros from the includentry plugin - these are
|
80
|
+
# the [s9y-include-entry] and [s9y-include-block] macros.
|
81
|
+
# Default: false.
|
82
|
+
# :imgfig:: Replace S9Y image-comment divs with an HTML figure
|
83
|
+
# div and figcaption, if applicable. Works for img and
|
84
|
+
# iframe.
|
85
|
+
# Default: true.
|
67
86
|
#
|
87
|
+
# :linebreak:: When set to the default "wp", line breaks in entries
|
88
|
+
# will be processed WordPress style, by replacing double
|
89
|
+
# line breaks with HTML p tags, and remaining single
|
90
|
+
# line breaks with HTML br tags. When set to "nokogiri",
|
91
|
+
# entries will be loaded into Nokogiri and formatted as
|
92
|
+
# an XHTML fragment. When set to "ignore", line breaks
|
93
|
+
# will not be replaced at all.
|
94
|
+
# Default: wp
|
95
|
+
# :relative:: Replace absolute links (http://:relative:/foo)
|
96
|
+
# to relative links (/foo).
|
97
|
+
|
68
98
|
def self.process(opts)
|
69
99
|
options = {
|
70
|
-
:user
|
71
|
-
:pass
|
72
|
-
:host
|
73
|
-
:port
|
74
|
-
:socket
|
75
|
-
:dbname
|
76
|
-
:table_prefix
|
77
|
-
:clean_entities
|
78
|
-
:comments
|
79
|
-
:categories
|
80
|
-
:tags
|
81
|
-
:extension
|
82
|
-
:drafts
|
83
|
-
:markdown
|
84
|
-
:permalinks
|
100
|
+
:user => opts.fetch("user", ""),
|
101
|
+
:pass => opts.fetch("password", ""),
|
102
|
+
:host => opts.fetch("host", "localhost"),
|
103
|
+
:port => opts.fetch("port", 3306),
|
104
|
+
:socket => opts.fetch("socket", nil),
|
105
|
+
:dbname => opts.fetch("dbname", ""),
|
106
|
+
:table_prefix => opts.fetch("table_prefix", "serendipity_"),
|
107
|
+
:clean_entities => opts.fetch("clean_entities", true),
|
108
|
+
:comments => opts.fetch("comments", true),
|
109
|
+
:categories => opts.fetch("categories", true),
|
110
|
+
:tags => opts.fetch("tags", true),
|
111
|
+
:extension => opts.fetch("extension", "html"),
|
112
|
+
:drafts => opts.fetch("drafts", true),
|
113
|
+
:markdown => opts.fetch("markdown", false),
|
114
|
+
:permalinks => opts.fetch("permalinks", false),
|
115
|
+
:excerpt_separator => opts.fetch("excerpt_separator", "<a id=\"extended\"></a>"),
|
116
|
+
:includeentry => opts.fetch("includeentry", false),
|
117
|
+
:imgfig => opts.fetch("imgfig", true),
|
118
|
+
:linebreak => opts.fetch("linebreak", "wp"),
|
119
|
+
:relative => opts.fetch("relative", nil),
|
85
120
|
}
|
86
121
|
|
87
122
|
options[:clean_entities] = require_if_available("htmlentities", "clean_entities") if options[:clean_entities]
|
88
|
-
|
89
123
|
options[:markdown] = require_if_available("reverse_markdown", "markdown") if options[:markdown]
|
90
124
|
|
91
125
|
FileUtils.mkdir_p("_posts")
|
@@ -120,6 +154,7 @@ module JekyllImport
|
|
120
154
|
|
121
155
|
posts_query = "
|
122
156
|
SELECT
|
157
|
+
'post' AS `type`,
|
123
158
|
entries.ID AS `id`,
|
124
159
|
entries.isdraft AS `isdraft`,
|
125
160
|
entries.title AS `title`,
|
@@ -154,36 +189,41 @@ module JekyllImport
|
|
154
189
|
name = format("%02d-%02d-%02d-%s.%s", date.year, date.month, date.day, slug, extension)
|
155
190
|
|
156
191
|
content = post[:body].to_s
|
157
|
-
|
192
|
+
extended_content = post[:body_extended].to_s
|
193
|
+
|
194
|
+
content += options[:excerpt_separator] + extended_content unless extended_content.nil? || extended_content.strip.empty?
|
158
195
|
|
196
|
+
content = process_includeentry(content, db, options) if options[:includeentry]
|
197
|
+
content = process_img_div(content) if options[:imgfig]
|
159
198
|
content = clean_entities(content) if options[:clean_entities]
|
199
|
+
content = content.gsub(%r!href=(["'])http://#{options[:relative]}!, 'href=\1') if options[:relative]
|
160
200
|
|
161
201
|
content = ReverseMarkdown.convert(content) if options[:markdown]
|
162
202
|
|
163
203
|
categories = process_categories(db, options, post)
|
164
204
|
comments = process_comments(db, options, post)
|
165
205
|
tags = process_tags(db, options, post)
|
166
|
-
|
206
|
+
all_permalinks = process_permalink(db, options, post)
|
207
|
+
primary_permalink = all_permalinks.shift
|
208
|
+
supplemental_permalinks = all_permalinks unless all_permalinks.empty?
|
167
209
|
|
168
210
|
# Get the relevant fields as a hash, delete empty fields and
|
169
211
|
# convert to YAML for the header.
|
170
212
|
data = {
|
171
|
-
"layout"
|
172
|
-
"status"
|
173
|
-
"published"
|
174
|
-
"title"
|
175
|
-
"author"
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
"
|
181
|
-
"
|
182
|
-
"
|
183
|
-
"
|
184
|
-
"
|
185
|
-
"tags" => options[:tags] ? tags : nil,
|
186
|
-
"comments" => options[:comments] ? comments : nil,
|
213
|
+
"layout" => post[:type].to_s,
|
214
|
+
"status" => status.to_s,
|
215
|
+
"published" => status.to_s == "draft" ? nil : (status.to_s == "published"),
|
216
|
+
"title" => title.to_s,
|
217
|
+
"author" => post[:author].to_s,
|
218
|
+
"author_login" => post[:author_login].to_s,
|
219
|
+
"author_email" => post[:author_email].to_s,
|
220
|
+
"date" => date.to_s,
|
221
|
+
"permalink" => options[:permalinks] ? primary_permalink : nil,
|
222
|
+
"redirect_from" => options[:permalinks] ? supplemental_permalinks : nil,
|
223
|
+
"categories" => options[:categories] ? categories : nil,
|
224
|
+
"tags" => options[:tags] ? tags : nil,
|
225
|
+
"comments" => options[:comments] ? comments : nil,
|
226
|
+
"excerpt_separator" => extended_content.empty? ? nil : options[:excerpt_separator],
|
187
227
|
}.delete_if { |_k, v| v.nil? || v == "" }.to_yaml
|
188
228
|
|
189
229
|
if post[:type] == "page"
|
@@ -195,11 +235,21 @@ module JekyllImport
|
|
195
235
|
filename = "_posts/#{name}"
|
196
236
|
end
|
197
237
|
|
238
|
+
content = case options[:linebreak]
|
239
|
+
when "nokogiri"
|
240
|
+
Nokogiri::HTML.fragment(content).to_xhtml
|
241
|
+
when "ignore"
|
242
|
+
content
|
243
|
+
else
|
244
|
+
# "wp" is the only remaining option, and the default
|
245
|
+
Util.wpautop(content)
|
246
|
+
end
|
247
|
+
|
198
248
|
# Write out the data and content to file
|
199
249
|
File.open(filename, "w") do |f|
|
200
250
|
f.puts data
|
201
251
|
f.puts "---"
|
202
|
-
f.puts
|
252
|
+
f.puts content
|
203
253
|
end
|
204
254
|
end
|
205
255
|
|
@@ -207,10 +257,154 @@ module JekyllImport
|
|
207
257
|
require gem_name
|
208
258
|
true
|
209
259
|
rescue LoadError
|
210
|
-
warn "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
|
260
|
+
Jekyll.logger.warn "s9y database:", "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
|
211
261
|
true
|
212
262
|
end
|
213
263
|
|
264
|
+
def self.process_includeentry(text, db, options)
|
265
|
+
return text unless options[:includeentry]
|
266
|
+
|
267
|
+
result = text
|
268
|
+
|
269
|
+
px = options[:table_prefix]
|
270
|
+
|
271
|
+
props = text.scan(%r!(\[s9y-include-entry:([0-9]+):([^:]+)\])!)
|
272
|
+
blocks = text.scan(%r!(\[s9y-include-block:([0-9]+):?([^:]+)?\])!)
|
273
|
+
|
274
|
+
props.each do |match|
|
275
|
+
macro = match[0]
|
276
|
+
id = match[1]
|
277
|
+
replacement = ""
|
278
|
+
if match[2].start_with?("prop=")
|
279
|
+
prop = match[2].sub("prop=", "")
|
280
|
+
cquery = get_property_query(px, id, prop)
|
281
|
+
else
|
282
|
+
prop = match[2]
|
283
|
+
cquery = get_value_query(px, id, prop)
|
284
|
+
end
|
285
|
+
db[cquery].each do |row|
|
286
|
+
replacement << row[:txt]
|
287
|
+
end
|
288
|
+
result = result.sub(macro, replacement)
|
289
|
+
end
|
290
|
+
|
291
|
+
blocks.each do |match|
|
292
|
+
macro = match[0]
|
293
|
+
id = match[1]
|
294
|
+
replacement = ""
|
295
|
+
# match[2] *could* be 'template', but we can't run it through Smarty, so we ignore it
|
296
|
+
cquery = %(
|
297
|
+
SELECT
|
298
|
+
px.body AS `txt`
|
299
|
+
FROM
|
300
|
+
#{px}staticblocks AS px
|
301
|
+
WHERE
|
302
|
+
id = '#{id}'
|
303
|
+
)
|
304
|
+
db[cquery].each do |row|
|
305
|
+
replacement << row[:txt]
|
306
|
+
end
|
307
|
+
result = result.sub(macro, replacement)
|
308
|
+
end
|
309
|
+
|
310
|
+
result
|
311
|
+
end
|
312
|
+
|
313
|
+
def get_property_query(px, id, prop)
|
314
|
+
%(
|
315
|
+
SELECT
|
316
|
+
px.value AS `txt`
|
317
|
+
FROM
|
318
|
+
#{px}entryproperties AS px
|
319
|
+
WHERE
|
320
|
+
entryid = '#{id}' AND
|
321
|
+
property = '#{prop}'
|
322
|
+
)
|
323
|
+
end
|
324
|
+
|
325
|
+
def get_value_query(px, id, prop)
|
326
|
+
%(
|
327
|
+
SELECT
|
328
|
+
px.#{prop} AS `txt`
|
329
|
+
FROM
|
330
|
+
#{px}entries AS px
|
331
|
+
WHERE
|
332
|
+
entryid = '#{id}'
|
333
|
+
)
|
334
|
+
end
|
335
|
+
|
336
|
+
# Replace .serendipity_imageComment_* blocks
|
337
|
+
def self.process_img_div(text)
|
338
|
+
caption_classes = [
|
339
|
+
".serendipity_imageComment_left",
|
340
|
+
".serendipity_imageComment_right",
|
341
|
+
".serendipity_imageComment_center",
|
342
|
+
]
|
343
|
+
|
344
|
+
noko = Nokogiri::HTML.fragment(text)
|
345
|
+
noko.css(caption_classes.join(",")).each do |imgcaption|
|
346
|
+
block_attrs = get_block_attrs(imgcaption)
|
347
|
+
|
348
|
+
# Is this a thumbnail to a bigger/other image?
|
349
|
+
big_link = imgcaption.at_css(".serendipity_image_link")
|
350
|
+
big_link ||= imgcaption.at_xpath(".//a[.//img]")
|
351
|
+
|
352
|
+
# The caption (if any) may have raw HTML
|
353
|
+
caption_elem = imgcaption.at_css(".serendipity_imageComment_txt")
|
354
|
+
caption = ""
|
355
|
+
caption = "<figcaption>#{caption_elem.inner_html}</figcaption>" if caption_elem
|
356
|
+
|
357
|
+
image_node = imgcaption.at_css("img")
|
358
|
+
if image_node
|
359
|
+
attrs = get_media_attrs(image_node)
|
360
|
+
media = "<img #{attrs}/>"
|
361
|
+
else
|
362
|
+
iframe_node = imgcaption.at_css("iframe")
|
363
|
+
if iframe_node
|
364
|
+
attrs = get_media_attrs(iframe_node)
|
365
|
+
media = "<iframe #{attrs}'></iframe>"
|
366
|
+
else
|
367
|
+
Jekyll.logger.warn "s9y database:", "Unrecognized media block: #{imgcaption}"
|
368
|
+
return text
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
# Wrap media in link, if any
|
373
|
+
if big_link
|
374
|
+
big = big_link.attribute("href")
|
375
|
+
media = "<a href='#{big}'>#{media}</a>"
|
376
|
+
end
|
377
|
+
|
378
|
+
# Replace HTML with clean media source, wrapped in figure
|
379
|
+
imgcaption.replace("<figure #{block_attrs}#{media}#{caption}</figure>")
|
380
|
+
end
|
381
|
+
|
382
|
+
noko.to_s
|
383
|
+
end
|
384
|
+
|
385
|
+
def get_media_attrs(node)
|
386
|
+
width = node.attribute("width")
|
387
|
+
width = "width='#{width}'" if width
|
388
|
+
height = node.attribute("height")
|
389
|
+
height = "height='#{height}'" if height
|
390
|
+
alt = node.attribute("alt")
|
391
|
+
alt = "alt='#{alt}'" if alt
|
392
|
+
src = "src='" + node.attribute("src") + "'"
|
393
|
+
[src, width, height, alt].join(" ")
|
394
|
+
end
|
395
|
+
|
396
|
+
def get_block_attrs(imgcaption)
|
397
|
+
# Extract block-level attributes
|
398
|
+
float = imgcaption.attribute("class").value.sub("serendipity_imageComment_", "")
|
399
|
+
float = "class='figure-#{float}'"
|
400
|
+
style = imgcaption.attribute("style")
|
401
|
+
style = " style='#{style.value}'" if style
|
402
|
+
# Don't lose good data
|
403
|
+
mdbnum = imgcaption.search(".//comment()").text.strip.sub("s9ymdb:", "")
|
404
|
+
mdb = "<!-- mdb='#{mdbnum}' -->" if mdbnum
|
405
|
+
[float, style, mdb].join(" ")
|
406
|
+
end
|
407
|
+
|
214
408
|
def self.process_categories(db, options, post)
|
215
409
|
return [] unless options[:categories]
|
216
410
|
|
@@ -293,18 +487,36 @@ module JekyllImport
|
|
293
487
|
|
294
488
|
db[cquery].each_with_object([]) do |tag, tags|
|
295
489
|
tags << if options[:clean_entities]
|
296
|
-
clean_entities(tag[:name])
|
490
|
+
clean_entities(tag[:name]).downcase
|
297
491
|
else
|
298
|
-
tag[:name]
|
492
|
+
tag[:name].downcase
|
299
493
|
end
|
300
494
|
end
|
301
495
|
end
|
302
496
|
|
303
497
|
def self.process_permalink(db, options, post)
|
304
|
-
return unless options[:permalinks]
|
498
|
+
return [] unless options[:permalinks]
|
499
|
+
|
500
|
+
permalinks = []
|
305
501
|
|
306
502
|
px = options[:table_prefix]
|
307
503
|
|
504
|
+
if db.table_exists?("#{px}entryproperties")
|
505
|
+
pquery = %(
|
506
|
+
SELECT
|
507
|
+
props.value AS `permalink`
|
508
|
+
FROM
|
509
|
+
#{px}entryproperties AS props
|
510
|
+
WHERE
|
511
|
+
props.entryid = '#{post[:id]}' AND
|
512
|
+
props.property = 'permalink'
|
513
|
+
)
|
514
|
+
db[pquery].each do |link|
|
515
|
+
plink = link[:permalink].to_s
|
516
|
+
permalinks << plink unless plink.end_with? "/UNKNOWN.html"
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
308
520
|
cquery = %(
|
309
521
|
SELECT
|
310
522
|
permalinks.permalink AS `permalink`
|
@@ -316,8 +528,10 @@ module JekyllImport
|
|
316
528
|
)
|
317
529
|
|
318
530
|
db[cquery].each do |link|
|
319
|
-
|
531
|
+
permalinks << "/#{link[:permalink]}"
|
320
532
|
end
|
533
|
+
|
534
|
+
permalinks
|
321
535
|
end
|
322
536
|
|
323
537
|
def self.clean_entities(text)
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Preston-Werner
|
8
8
|
- Parker Moore
|
9
9
|
- Matt Rogers
|
10
|
-
autorequire:
|
10
|
+
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2021-11-01 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: jekyll
|
@@ -60,20 +60,6 @@ dependencies:
|
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
62
|
version: '1.0'
|
63
|
-
- !ruby/object:Gem::Dependency
|
64
|
-
name: activesupport
|
65
|
-
requirement: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '4.2'
|
70
|
-
type: :development
|
71
|
-
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
requirements:
|
74
|
-
- - "~>"
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '4.2'
|
77
63
|
- !ruby/object:Gem::Dependency
|
78
64
|
name: bundler
|
79
65
|
requirement: !ruby/object:Gem::Requirement
|
@@ -359,6 +345,7 @@ files:
|
|
359
345
|
- lib/jekyll-import/importers/dotclear.rb
|
360
346
|
- lib/jekyll-import/importers/drupal6.rb
|
361
347
|
- lib/jekyll-import/importers/drupal7.rb
|
348
|
+
- lib/jekyll-import/importers/drupal8.rb
|
362
349
|
- lib/jekyll-import/importers/drupal_common.rb
|
363
350
|
- lib/jekyll-import/importers/easyblog.rb
|
364
351
|
- lib/jekyll-import/importers/enki.rb
|
@@ -388,7 +375,7 @@ homepage: http://github.com/jekyll/jekyll-import
|
|
388
375
|
licenses:
|
389
376
|
- MIT
|
390
377
|
metadata: {}
|
391
|
-
post_install_message:
|
378
|
+
post_install_message:
|
392
379
|
rdoc_options:
|
393
380
|
- "--charset=UTF-8"
|
394
381
|
require_paths:
|
@@ -404,8 +391,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
404
391
|
- !ruby/object:Gem::Version
|
405
392
|
version: '0'
|
406
393
|
requirements: []
|
407
|
-
rubygems_version: 3.
|
408
|
-
signing_key:
|
394
|
+
rubygems_version: 3.1.6
|
395
|
+
signing_key:
|
409
396
|
specification_version: 4
|
410
397
|
summary: Import command for Jekyll (static site generator).
|
411
398
|
test_files: []
|