jekyll-import 0.20.0 → 0.21.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/jekyll-import/importers/blogger.rb +51 -63
- data/lib/jekyll-import/importers/drupal6.rb +7 -3
- data/lib/jekyll-import/importers/drupal8.rb +65 -0
- data/lib/jekyll-import/importers/drupal_common.rb +3 -2
- data/lib/jekyll-import/importers/rss.rb +42 -24
- data/lib/jekyll-import/importers/s9y_database.rb +270 -56
- data/lib/jekyll-import/version.rb +1 -1
- metadata +7 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0098ade7230382d4787c65ab3c17607680119727e1e9cdca587274d94e3e084
|
4
|
+
data.tar.gz: 8f1050dfa7170aa7b08462c0132ab959ffdcda31e8c469929ce123aecb64909a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f32b5ac48f88293a4703c7ece13d4a9886c598b4491fa9e8b750b02f4dfab405291c095355885f823a9b23551bc69e73015487371daa9fb6a9db7ec4783b88f0
|
7
|
+
data.tar.gz: 0ba737a7d8ff767eb1bcce6bffa8af357d6c66793b36e682edfcce05245e21dd0a12d5a3d250118ae9b00dc40857db50884d2365e2419a86efce7d97e0ac52af
|
@@ -11,11 +11,8 @@ module JekyllImport
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def self.validate(options)
|
14
|
-
if options["source"].nil?
|
15
|
-
|
16
|
-
elsif !File.exist?(options["source"])
|
17
|
-
raise Errno::ENOENT, "File not found: #{options["source"]}"
|
18
|
-
end
|
14
|
+
raise "Missing mandatory option: --source" if options["source"].nil?
|
15
|
+
raise Errno::ENOENT, "File not found: #{options["source"]}" unless File.exist?(options["source"])
|
19
16
|
end
|
20
17
|
|
21
18
|
def self.require_deps
|
@@ -42,7 +39,6 @@ module JekyllImport
|
|
42
39
|
source = options.fetch("source")
|
43
40
|
|
44
41
|
listener = BloggerAtomStreamListener.new
|
45
|
-
|
46
42
|
listener.leave_blogger_info = !options.fetch("no-blogger-info", false)
|
47
43
|
listener.comments = options.fetch("comments", false)
|
48
44
|
|
@@ -52,7 +48,6 @@ module JekyllImport
|
|
52
48
|
end
|
53
49
|
|
54
50
|
options["original-url-base"] = listener.original_url_base
|
55
|
-
|
56
51
|
postprocess(options)
|
57
52
|
end
|
58
53
|
|
@@ -63,32 +58,32 @@ module JekyllImport
|
|
63
58
|
# Returns nothing.
|
64
59
|
def self.postprocess(options)
|
65
60
|
# Replace internal link URL
|
66
|
-
|
67
|
-
original_url_base = options.fetch("original-url-base", nil)
|
68
|
-
if original_url_base
|
69
|
-
orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
|
70
|
-
|
71
|
-
Dir.glob("_posts/*.*") do |filename|
|
72
|
-
body = nil
|
73
|
-
File.open(filename, "r") do |f|
|
74
|
-
f.flock(File::LOCK_SH)
|
75
|
-
body = f.read
|
76
|
-
end
|
61
|
+
return unless options.fetch("replace-internal-link", false)
|
77
62
|
|
78
|
-
|
79
|
-
|
80
|
-
quote = Regexp.last_match(1)
|
81
|
-
post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
|
82
|
-
raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
|
63
|
+
original_url_base = options.fetch("original-url-base", nil)
|
64
|
+
return unless original_url_base
|
83
65
|
|
84
|
-
|
85
|
-
end
|
66
|
+
orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
|
86
67
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
68
|
+
Dir.glob("_posts/*.*") do |filename|
|
69
|
+
body = nil
|
70
|
+
File.open(filename, "r") do |f|
|
71
|
+
f.flock(File::LOCK_SH)
|
72
|
+
body = f.read
|
73
|
+
end
|
74
|
+
|
75
|
+
body.gsub!(orig_url_pattern) do
|
76
|
+
# for post_url
|
77
|
+
quote = Regexp.last_match(1)
|
78
|
+
post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
|
79
|
+
raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
|
80
|
+
|
81
|
+
" href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
|
82
|
+
end
|
83
|
+
|
84
|
+
File.open(filename, "w") do |f|
|
85
|
+
f.flock(File::LOCK_EX)
|
86
|
+
f << body
|
92
87
|
end
|
93
88
|
end
|
94
89
|
end
|
@@ -118,9 +113,7 @@ module JekyllImport
|
|
118
113
|
|
119
114
|
@in_entry_elem = { :meta => {}, :body => nil }
|
120
115
|
when "title"
|
121
|
-
if @in_entry_elem
|
122
|
-
raise 'only <title type="text"></title> is supported' if attrs["type"] != "text"
|
123
|
-
end
|
116
|
+
raise 'only <title type="text"></title> is supported' if @in_entry_elem && attrs["type"] != "text"
|
124
117
|
when "category"
|
125
118
|
if @in_entry_elem
|
126
119
|
if attrs["scheme"] == "http://www.blogger.com/atom/ns#"
|
@@ -150,25 +143,23 @@ module JekyllImport
|
|
150
143
|
end
|
151
144
|
|
152
145
|
def text(text)
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
171
|
-
end
|
146
|
+
return unless @in_entry_elem
|
147
|
+
|
148
|
+
case @tag_bread.last
|
149
|
+
when "id"
|
150
|
+
@in_entry_elem[:meta][:id] = text
|
151
|
+
when "published"
|
152
|
+
@in_entry_elem[:meta][:published] = text
|
153
|
+
when "updated"
|
154
|
+
@in_entry_elem[:meta][:updated] = text
|
155
|
+
when "title"
|
156
|
+
@in_entry_elem[:meta][:title] = text
|
157
|
+
when "content"
|
158
|
+
@in_entry_elem[:body] = text
|
159
|
+
when "name"
|
160
|
+
@in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
|
161
|
+
when "app:draft"
|
162
|
+
@in_entry_elem[:meta][:draft] = true if @tag_bread[-2..-1] == %w(app:control app:draft) && text == "yes"
|
172
163
|
end
|
173
164
|
end
|
174
165
|
|
@@ -186,7 +177,7 @@ module JekyllImport
|
|
186
177
|
|
187
178
|
FileUtils.mkdir_p(target_dir)
|
188
179
|
|
189
|
-
file_name = URI.
|
180
|
+
file_name = URI.decode_www_form_component("#{post_data[:filename]}.html")
|
190
181
|
File.open(File.join(target_dir, file_name), "w") do |f|
|
191
182
|
f.flock(File::LOCK_EX)
|
192
183
|
|
@@ -264,19 +255,16 @@ module JekyllImport
|
|
264
255
|
{ :filename => filename, :header => header, :body => body }
|
265
256
|
elsif @in_entry_elem[:meta][:kind] == "comment"
|
266
257
|
timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
|
267
|
-
|
268
|
-
@comment_seq ||= 1
|
258
|
+
raise "Original URL is missing" unless @in_entry_elem[:meta][:original_url]
|
269
259
|
|
270
|
-
|
271
|
-
original_path = original_uri.path.to_s
|
272
|
-
filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
|
260
|
+
@comment_seq ||= 1
|
273
261
|
|
274
|
-
|
262
|
+
original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
|
263
|
+
original_path = original_uri.path.to_s
|
264
|
+
filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
|
275
265
|
|
276
|
-
|
277
|
-
|
278
|
-
raise "Original URL is missing"
|
279
|
-
end
|
266
|
+
@comment_seq += 1
|
267
|
+
@original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
|
280
268
|
|
281
269
|
header = {
|
282
270
|
"date" => @in_entry_elem[:meta][:published],
|
@@ -19,15 +19,17 @@ module JekyllImport
|
|
19
19
|
nr.teaser,
|
20
20
|
n.created,
|
21
21
|
n.status,
|
22
|
+
ua.dst AS alias,
|
22
23
|
n.type,
|
23
24
|
GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
|
24
|
-
FROM #{prefix}node_revisions AS nr,
|
25
|
+
FROM #{prefix}node_revisions AS nr, url_alias AS ua,
|
25
26
|
#{prefix}node AS n
|
26
27
|
LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
|
27
28
|
LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
|
28
29
|
WHERE (#{types})
|
29
30
|
AND n.vid = nr.vid
|
30
|
-
|
31
|
+
AND ua.src = CONCAT( 'node/', n.nid)
|
32
|
+
GROUP BY n.nid, ua.dst
|
31
33
|
SQL
|
32
34
|
|
33
35
|
query
|
@@ -44,9 +46,11 @@ SQL
|
|
44
46
|
|
45
47
|
data = {
|
46
48
|
"excerpt" => summary,
|
47
|
-
"categories" => tags.split("|"),
|
49
|
+
"categories" => tags.split("|").uniq,
|
48
50
|
}
|
49
51
|
|
52
|
+
data["permalink"] = "/" + sql_post_data[:alias] if sql_post_data[:alias]
|
53
|
+
|
50
54
|
[data, content]
|
51
55
|
end
|
52
56
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "jekyll-import/importers/drupal_common"
|
4
|
+
|
5
|
+
module JekyllImport
|
6
|
+
module Importers
|
7
|
+
class Drupal8 < Importer
|
8
|
+
include DrupalCommon
|
9
|
+
extend DrupalCommon::ClassMethods
|
10
|
+
|
11
|
+
def self.build_query(prefix, types, engine)
|
12
|
+
types = types.join("' OR n.type = '")
|
13
|
+
types = "n.type = '#{types}'"
|
14
|
+
|
15
|
+
tag_group = if engine == "postgresql"
|
16
|
+
<<POSTGRESQL
|
17
|
+
(SELECT STRING_AGG(td.name, '|')
|
18
|
+
FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
|
19
|
+
WHERE ti.tid = td.tid AND ti.nid = n.nid) AS tags
|
20
|
+
POSTGRESQL
|
21
|
+
else
|
22
|
+
<<SQL
|
23
|
+
(SELECT GROUP_CONCAT(td.name SEPARATOR '|')
|
24
|
+
FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
|
25
|
+
WHERE ti.tid = td.tid AND ti.nid = n.nid) AS 'tags'
|
26
|
+
SQL
|
27
|
+
end
|
28
|
+
|
29
|
+
query = <<QUERY
|
30
|
+
SELECT n.nid,
|
31
|
+
n.title,
|
32
|
+
nb.body_value,
|
33
|
+
nb.body_summary,
|
34
|
+
n.created,
|
35
|
+
n.status,
|
36
|
+
n.type,
|
37
|
+
#{tag_group}
|
38
|
+
FROM #{prefix}node_field_data AS n
|
39
|
+
LEFT JOIN #{prefix}node__body AS nb
|
40
|
+
ON nb.entity_id = n.nid
|
41
|
+
WHERE (#{types})
|
42
|
+
QUERY
|
43
|
+
|
44
|
+
query
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.aliases_query(prefix)
|
48
|
+
"SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.post_data(sql_post_data)
|
52
|
+
content = sql_post_data[:body_value].to_s
|
53
|
+
summary = sql_post_data[:body_summary].to_s
|
54
|
+
tags = (sql_post_data[:tags] || "").downcase.strip
|
55
|
+
|
56
|
+
data = {
|
57
|
+
"excerpt" => summary,
|
58
|
+
"categories" => tags.split("|"),
|
59
|
+
}
|
60
|
+
|
61
|
+
[data, content]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -68,6 +68,7 @@ module JekyllImport
|
|
68
68
|
src_dir = conf["source"]
|
69
69
|
|
70
70
|
dirs = {
|
71
|
+
:_aliases => src_dir,
|
71
72
|
:_posts => File.join(src_dir, "_posts").to_s,
|
72
73
|
:_drafts => File.join(src_dir, "_drafts").to_s,
|
73
74
|
:_layouts => Jekyll.sanitized_path(src_dir, conf["layouts_dir"].to_s),
|
@@ -146,10 +147,10 @@ module JekyllImport
|
|
146
147
|
|
147
148
|
if partition.first.length.positive?
|
148
149
|
dir = "#{partition.first}/"
|
149
|
-
FileUtils.mkdir_p
|
150
|
+
FileUtils.mkdir_p "#{dirs[:_aliases]}/#{dir}"
|
150
151
|
end
|
151
152
|
|
152
|
-
File.open("#{dir}#{file}.md", "w") do |f|
|
153
|
+
File.open("#{dirs[:_aliases]}/#{dir}#{file}.md", "w") do |f|
|
153
154
|
f.puts "---"
|
154
155
|
f.puts "layout: refresh"
|
155
156
|
f.puts "permalink: #{dir}#{file}/"
|
@@ -6,6 +6,7 @@ module JekyllImport
|
|
6
6
|
def self.specify_options(c)
|
7
7
|
c.option "source", "--source NAME", "The RSS file or URL to import"
|
8
8
|
c.option "tag", "--tag NAME", "Add a tag to posts"
|
9
|
+
c.option "render_audio", "--render_audio", "Render <audio> element as necessary"
|
9
10
|
end
|
10
11
|
|
11
12
|
def self.validate(options)
|
@@ -30,8 +31,6 @@ module JekyllImport
|
|
30
31
|
# Returns nothing.
|
31
32
|
def self.process(options)
|
32
33
|
source = options.fetch("source")
|
33
|
-
frontmatter = options.fetch("frontmatter", [])
|
34
|
-
body = options.fetch("body", ["description"])
|
35
34
|
|
36
35
|
content = ""
|
37
36
|
open(source) { |s| content = s.read }
|
@@ -40,37 +39,56 @@ module JekyllImport
|
|
40
39
|
raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
|
41
40
|
|
42
41
|
rss.items.each do |item|
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
write_rss_item(item, options)
|
43
|
+
end
|
44
|
+
end
|
46
45
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
def self.write_rss_item(item, options)
|
47
|
+
frontmatter = options.fetch("frontmatter", [])
|
48
|
+
body = options.fetch("body", ["description"])
|
49
|
+
render_audio = options.fetch("render_audio", false)
|
51
50
|
|
52
|
-
|
51
|
+
formatted_date = item.date.strftime("%Y-%m-%d")
|
52
|
+
post_name = Jekyll::Utils.slugify(item.title, :mode => "latin")
|
53
|
+
name = "#{formatted_date}-#{post_name}"
|
54
|
+
audio = render_audio && item.enclosure.url
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
-
|
56
|
+
header = {
|
57
|
+
"layout" => "post",
|
58
|
+
"title" => item.title,
|
59
|
+
}
|
57
60
|
|
58
|
-
|
61
|
+
header["tag"] = options["tag"] unless options["tag"].nil? || options["tag"].empty?
|
59
62
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
+
frontmatter.each do |value|
|
64
|
+
header[value] = item.send(value)
|
65
|
+
end
|
63
66
|
|
64
|
-
|
65
|
-
|
67
|
+
output = +""
|
68
|
+
|
69
|
+
body.each do |row|
|
70
|
+
output << item.send(row).to_s
|
71
|
+
end
|
66
72
|
|
67
|
-
|
73
|
+
output.strip!
|
74
|
+
output = item.content_encoded if output.empty?
|
68
75
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
76
|
+
FileUtils.mkdir_p("_posts")
|
77
|
+
|
78
|
+
File.open("_posts/#{name}.html", "w") do |f|
|
79
|
+
f.puts header.to_yaml
|
80
|
+
f.puts "---\n\n"
|
81
|
+
|
82
|
+
if audio
|
83
|
+
f.puts <<~HTML
|
84
|
+
<audio controls="">
|
85
|
+
<source src="#{audio}" type="audio/mpeg">
|
86
|
+
Your browser does not support the audio element.
|
87
|
+
</audio>
|
88
|
+
HTML
|
73
89
|
end
|
90
|
+
|
91
|
+
f.puts output
|
74
92
|
end
|
75
93
|
end
|
76
94
|
end
|
@@ -11,25 +11,31 @@ module JekyllImport
|
|
11
11
|
fileutils
|
12
12
|
safe_yaml
|
13
13
|
unidecode
|
14
|
+
nokogiri
|
14
15
|
)
|
15
16
|
)
|
16
17
|
end
|
17
18
|
|
18
19
|
def self.specify_options(c)
|
19
|
-
c.option "dbname",
|
20
|
-
c.option "socket",
|
21
|
-
c.option "user",
|
22
|
-
c.option "password",
|
23
|
-
c.option "host",
|
24
|
-
c.option "port",
|
25
|
-
c.option "table_prefix",
|
26
|
-
c.option "clean_entities",
|
27
|
-
c.option "comments",
|
28
|
-
c.option "categories",
|
29
|
-
c.option "tags",
|
30
|
-
c.option "drafts",
|
31
|
-
c.option "markdown",
|
32
|
-
c.option "permalinks",
|
20
|
+
c.option "dbname", "--dbname DB", "Database name (default: '')"
|
21
|
+
c.option "socket", "--socket SOCKET", "Database socket (default: '')"
|
22
|
+
c.option "user", "--user USER", "Database user name (default: '')"
|
23
|
+
c.option "password", "--password PW", "Database user's password (default: '')"
|
24
|
+
c.option "host", "--host HOST", "Database host name (default: 'localhost')"
|
25
|
+
c.option "port", "--port PORT", "Custom database port connect to (default: 3306)"
|
26
|
+
c.option "table_prefix", "--table_prefix PREFIX", "Table prefix name (default: 'serendipity_')"
|
27
|
+
c.option "clean_entities", "--clean_entities", "Whether to clean entities (default: true)"
|
28
|
+
c.option "comments", "--comments", "Whether to import comments (default: true)"
|
29
|
+
c.option "categories", "--categories", "Whether to import categories (default: true)"
|
30
|
+
c.option "tags", "--tags", "Whether to import tags (default: true)"
|
31
|
+
c.option "drafts", "--drafts", "Whether to export drafts as well"
|
32
|
+
c.option "markdown", "--markdown", "convert into markdown format (default: false)"
|
33
|
+
c.option "permalinks", "--permalinks", "preserve S9Y permalinks (default: false)"
|
34
|
+
c.option "excerpt_separator", "--excerpt_separator", "Demarkation for excerpts (default: '<a id=\"extended\"></a>')"
|
35
|
+
c.option "includeentry", "--includeentry", "Replace macros from the includeentry plugin (default: false)"
|
36
|
+
c.option "imgfig", "--imgfig", "Replace nested img and youtube divs with HTML figure tags (default: true)"
|
37
|
+
c.option "linebreak", "--linebreak", "Line break processing: wp, nokogiri, ignore (default: wp)"
|
38
|
+
c.option "relative", "--relative", "Convert links with this prefix to relative (default:nil)"
|
33
39
|
end
|
34
40
|
|
35
41
|
# Main migrator function. Call this to perform the migration.
|
@@ -56,36 +62,64 @@ module JekyllImport
|
|
56
62
|
# :categories:: If true, save the post's categories in its
|
57
63
|
# YAML front matter. Default: true.
|
58
64
|
# :tags:: If true, save the post's tags in its
|
59
|
-
# YAML front matter.
|
65
|
+
# YAML front matter, in lowercase. Default: true.
|
60
66
|
# :extension:: Set the post extension. Default: "html"
|
61
67
|
# :drafts:: If true, export drafts as well
|
62
68
|
# Default: true.
|
63
69
|
# :markdown:: If true, convert the content to markdown
|
64
70
|
# Default: false
|
65
71
|
# :permalinks:: If true, save the post's original permalink in its
|
66
|
-
# YAML front matter.
|
72
|
+
# YAML front matter. If the 'entryproperties' plugin
|
73
|
+
# was used, its permalink will become the canonical
|
74
|
+
# permalink, and any other will become redirects.
|
75
|
+
# Default: false.
|
76
|
+
# :excerpt_separator:: A string to use to separate the excerpt (body
|
77
|
+
# in S9Y) from the rest of the article (extended
|
78
|
+
# body in S9Y). Default: "<a id=\"extended\"></a>".
|
79
|
+
# :includentry:: Replace macros from the includentry plugin - these are
|
80
|
+
# the [s9y-include-entry] and [s9y-include-block] macros.
|
81
|
+
# Default: false.
|
82
|
+
# :imgfig:: Replace S9Y image-comment divs with an HTML figure
|
83
|
+
# div and figcaption, if applicable. Works for img and
|
84
|
+
# iframe.
|
85
|
+
# Default: true.
|
67
86
|
#
|
87
|
+
# :linebreak:: When set to the default "wp", line breaks in entries
|
88
|
+
# will be processed WordPress style, by replacing double
|
89
|
+
# line breaks with HTML p tags, and remaining single
|
90
|
+
# line breaks with HTML br tags. When set to "nokogiri",
|
91
|
+
# entries will be loaded into Nokogiri and formatted as
|
92
|
+
# an XHTML fragment. When set to "ignore", line breaks
|
93
|
+
# will not be replaced at all.
|
94
|
+
# Default: wp
|
95
|
+
# :relative:: Replace absolute links (http://:relative:/foo)
|
96
|
+
# to relative links (/foo).
|
97
|
+
|
68
98
|
def self.process(opts)
|
69
99
|
options = {
|
70
|
-
:user
|
71
|
-
:pass
|
72
|
-
:host
|
73
|
-
:port
|
74
|
-
:socket
|
75
|
-
:dbname
|
76
|
-
:table_prefix
|
77
|
-
:clean_entities
|
78
|
-
:comments
|
79
|
-
:categories
|
80
|
-
:tags
|
81
|
-
:extension
|
82
|
-
:drafts
|
83
|
-
:markdown
|
84
|
-
:permalinks
|
100
|
+
:user => opts.fetch("user", ""),
|
101
|
+
:pass => opts.fetch("password", ""),
|
102
|
+
:host => opts.fetch("host", "localhost"),
|
103
|
+
:port => opts.fetch("port", 3306),
|
104
|
+
:socket => opts.fetch("socket", nil),
|
105
|
+
:dbname => opts.fetch("dbname", ""),
|
106
|
+
:table_prefix => opts.fetch("table_prefix", "serendipity_"),
|
107
|
+
:clean_entities => opts.fetch("clean_entities", true),
|
108
|
+
:comments => opts.fetch("comments", true),
|
109
|
+
:categories => opts.fetch("categories", true),
|
110
|
+
:tags => opts.fetch("tags", true),
|
111
|
+
:extension => opts.fetch("extension", "html"),
|
112
|
+
:drafts => opts.fetch("drafts", true),
|
113
|
+
:markdown => opts.fetch("markdown", false),
|
114
|
+
:permalinks => opts.fetch("permalinks", false),
|
115
|
+
:excerpt_separator => opts.fetch("excerpt_separator", "<a id=\"extended\"></a>"),
|
116
|
+
:includeentry => opts.fetch("includeentry", false),
|
117
|
+
:imgfig => opts.fetch("imgfig", true),
|
118
|
+
:linebreak => opts.fetch("linebreak", "wp"),
|
119
|
+
:relative => opts.fetch("relative", nil),
|
85
120
|
}
|
86
121
|
|
87
122
|
options[:clean_entities] = require_if_available("htmlentities", "clean_entities") if options[:clean_entities]
|
88
|
-
|
89
123
|
options[:markdown] = require_if_available("reverse_markdown", "markdown") if options[:markdown]
|
90
124
|
|
91
125
|
FileUtils.mkdir_p("_posts")
|
@@ -120,6 +154,7 @@ module JekyllImport
|
|
120
154
|
|
121
155
|
posts_query = "
|
122
156
|
SELECT
|
157
|
+
'post' AS `type`,
|
123
158
|
entries.ID AS `id`,
|
124
159
|
entries.isdraft AS `isdraft`,
|
125
160
|
entries.title AS `title`,
|
@@ -154,36 +189,41 @@ module JekyllImport
|
|
154
189
|
name = format("%02d-%02d-%02d-%s.%s", date.year, date.month, date.day, slug, extension)
|
155
190
|
|
156
191
|
content = post[:body].to_s
|
157
|
-
|
192
|
+
extended_content = post[:body_extended].to_s
|
193
|
+
|
194
|
+
content += options[:excerpt_separator] + extended_content unless extended_content.nil? || extended_content.strip.empty?
|
158
195
|
|
196
|
+
content = process_includeentry(content, db, options) if options[:includeentry]
|
197
|
+
content = process_img_div(content) if options[:imgfig]
|
159
198
|
content = clean_entities(content) if options[:clean_entities]
|
199
|
+
content = content.gsub(%r!href=(["'])http://#{options[:relative]}!, 'href=\1') if options[:relative]
|
160
200
|
|
161
201
|
content = ReverseMarkdown.convert(content) if options[:markdown]
|
162
202
|
|
163
203
|
categories = process_categories(db, options, post)
|
164
204
|
comments = process_comments(db, options, post)
|
165
205
|
tags = process_tags(db, options, post)
|
166
|
-
|
206
|
+
all_permalinks = process_permalink(db, options, post)
|
207
|
+
primary_permalink = all_permalinks.shift
|
208
|
+
supplemental_permalinks = all_permalinks unless all_permalinks.empty?
|
167
209
|
|
168
210
|
# Get the relevant fields as a hash, delete empty fields and
|
169
211
|
# convert to YAML for the header.
|
170
212
|
data = {
|
171
|
-
"layout"
|
172
|
-
"status"
|
173
|
-
"published"
|
174
|
-
"title"
|
175
|
-
"author"
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
"
|
181
|
-
"
|
182
|
-
"
|
183
|
-
"
|
184
|
-
"
|
185
|
-
"tags" => options[:tags] ? tags : nil,
|
186
|
-
"comments" => options[:comments] ? comments : nil,
|
213
|
+
"layout" => post[:type].to_s,
|
214
|
+
"status" => status.to_s,
|
215
|
+
"published" => status.to_s == "draft" ? nil : (status.to_s == "published"),
|
216
|
+
"title" => title.to_s,
|
217
|
+
"author" => post[:author].to_s,
|
218
|
+
"author_login" => post[:author_login].to_s,
|
219
|
+
"author_email" => post[:author_email].to_s,
|
220
|
+
"date" => date.to_s,
|
221
|
+
"permalink" => options[:permalinks] ? primary_permalink : nil,
|
222
|
+
"redirect_from" => options[:permalinks] ? supplemental_permalinks : nil,
|
223
|
+
"categories" => options[:categories] ? categories : nil,
|
224
|
+
"tags" => options[:tags] ? tags : nil,
|
225
|
+
"comments" => options[:comments] ? comments : nil,
|
226
|
+
"excerpt_separator" => extended_content.empty? ? nil : options[:excerpt_separator],
|
187
227
|
}.delete_if { |_k, v| v.nil? || v == "" }.to_yaml
|
188
228
|
|
189
229
|
if post[:type] == "page"
|
@@ -195,11 +235,21 @@ module JekyllImport
|
|
195
235
|
filename = "_posts/#{name}"
|
196
236
|
end
|
197
237
|
|
238
|
+
content = case options[:linebreak]
|
239
|
+
when "nokogiri"
|
240
|
+
Nokogiri::HTML.fragment(content).to_xhtml
|
241
|
+
when "ignore"
|
242
|
+
content
|
243
|
+
else
|
244
|
+
# "wp" is the only remaining option, and the default
|
245
|
+
Util.wpautop(content)
|
246
|
+
end
|
247
|
+
|
198
248
|
# Write out the data and content to file
|
199
249
|
File.open(filename, "w") do |f|
|
200
250
|
f.puts data
|
201
251
|
f.puts "---"
|
202
|
-
f.puts
|
252
|
+
f.puts content
|
203
253
|
end
|
204
254
|
end
|
205
255
|
|
@@ -207,10 +257,154 @@ module JekyllImport
|
|
207
257
|
require gem_name
|
208
258
|
true
|
209
259
|
rescue LoadError
|
210
|
-
warn "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
|
260
|
+
Jekyll.logger.warn "s9y database:", "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
|
211
261
|
true
|
212
262
|
end
|
213
263
|
|
264
|
+
def self.process_includeentry(text, db, options)
|
265
|
+
return text unless options[:includeentry]
|
266
|
+
|
267
|
+
result = text
|
268
|
+
|
269
|
+
px = options[:table_prefix]
|
270
|
+
|
271
|
+
props = text.scan(%r!(\[s9y-include-entry:([0-9]+):([^:]+)\])!)
|
272
|
+
blocks = text.scan(%r!(\[s9y-include-block:([0-9]+):?([^:]+)?\])!)
|
273
|
+
|
274
|
+
props.each do |match|
|
275
|
+
macro = match[0]
|
276
|
+
id = match[1]
|
277
|
+
replacement = ""
|
278
|
+
if match[2].start_with?("prop=")
|
279
|
+
prop = match[2].sub("prop=", "")
|
280
|
+
cquery = get_property_query(px, id, prop)
|
281
|
+
else
|
282
|
+
prop = match[2]
|
283
|
+
cquery = get_value_query(px, id, prop)
|
284
|
+
end
|
285
|
+
db[cquery].each do |row|
|
286
|
+
replacement << row[:txt]
|
287
|
+
end
|
288
|
+
result = result.sub(macro, replacement)
|
289
|
+
end
|
290
|
+
|
291
|
+
blocks.each do |match|
|
292
|
+
macro = match[0]
|
293
|
+
id = match[1]
|
294
|
+
replacement = ""
|
295
|
+
# match[2] *could* be 'template', but we can't run it through Smarty, so we ignore it
|
296
|
+
cquery = %(
|
297
|
+
SELECT
|
298
|
+
px.body AS `txt`
|
299
|
+
FROM
|
300
|
+
#{px}staticblocks AS px
|
301
|
+
WHERE
|
302
|
+
id = '#{id}'
|
303
|
+
)
|
304
|
+
db[cquery].each do |row|
|
305
|
+
replacement << row[:txt]
|
306
|
+
end
|
307
|
+
result = result.sub(macro, replacement)
|
308
|
+
end
|
309
|
+
|
310
|
+
result
|
311
|
+
end
|
312
|
+
|
313
|
+
def get_property_query(px, id, prop)
|
314
|
+
%(
|
315
|
+
SELECT
|
316
|
+
px.value AS `txt`
|
317
|
+
FROM
|
318
|
+
#{px}entryproperties AS px
|
319
|
+
WHERE
|
320
|
+
entryid = '#{id}' AND
|
321
|
+
property = '#{prop}'
|
322
|
+
)
|
323
|
+
end
|
324
|
+
|
325
|
+
def get_value_query(px, id, prop)
|
326
|
+
%(
|
327
|
+
SELECT
|
328
|
+
px.#{prop} AS `txt`
|
329
|
+
FROM
|
330
|
+
#{px}entries AS px
|
331
|
+
WHERE
|
332
|
+
entryid = '#{id}'
|
333
|
+
)
|
334
|
+
end
|
335
|
+
|
336
|
+
# Replace .serendipity_imageComment_* blocks
|
337
|
+
def self.process_img_div(text)
|
338
|
+
caption_classes = [
|
339
|
+
".serendipity_imageComment_left",
|
340
|
+
".serendipity_imageComment_right",
|
341
|
+
".serendipity_imageComment_center",
|
342
|
+
]
|
343
|
+
|
344
|
+
noko = Nokogiri::HTML.fragment(text)
|
345
|
+
noko.css(caption_classes.join(",")).each do |imgcaption|
|
346
|
+
block_attrs = get_block_attrs(imgcaption)
|
347
|
+
|
348
|
+
# Is this a thumbnail to a bigger/other image?
|
349
|
+
big_link = imgcaption.at_css(".serendipity_image_link")
|
350
|
+
big_link ||= imgcaption.at_xpath(".//a[.//img]")
|
351
|
+
|
352
|
+
# The caption (if any) may have raw HTML
|
353
|
+
caption_elem = imgcaption.at_css(".serendipity_imageComment_txt")
|
354
|
+
caption = ""
|
355
|
+
caption = "<figcaption>#{caption_elem.inner_html}</figcaption>" if caption_elem
|
356
|
+
|
357
|
+
image_node = imgcaption.at_css("img")
|
358
|
+
if image_node
|
359
|
+
attrs = get_media_attrs(image_node)
|
360
|
+
media = "<img #{attrs}/>"
|
361
|
+
else
|
362
|
+
iframe_node = imgcaption.at_css("iframe")
|
363
|
+
if iframe_node
|
364
|
+
attrs = get_media_attrs(iframe_node)
|
365
|
+
media = "<iframe #{attrs}'></iframe>"
|
366
|
+
else
|
367
|
+
Jekyll.logger.warn "s9y database:", "Unrecognized media block: #{imgcaption}"
|
368
|
+
return text
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
# Wrap media in link, if any
|
373
|
+
if big_link
|
374
|
+
big = big_link.attribute("href")
|
375
|
+
media = "<a href='#{big}'>#{media}</a>"
|
376
|
+
end
|
377
|
+
|
378
|
+
# Replace HTML with clean media source, wrapped in figure
|
379
|
+
imgcaption.replace("<figure #{block_attrs}#{media}#{caption}</figure>")
|
380
|
+
end
|
381
|
+
|
382
|
+
noko.to_s
|
383
|
+
end
|
384
|
+
|
385
|
+
def get_media_attrs(node)
|
386
|
+
width = node.attribute("width")
|
387
|
+
width = "width='#{width}'" if width
|
388
|
+
height = node.attribute("height")
|
389
|
+
height = "height='#{height}'" if height
|
390
|
+
alt = node.attribute("alt")
|
391
|
+
alt = "alt='#{alt}'" if alt
|
392
|
+
src = "src='" + node.attribute("src") + "'"
|
393
|
+
[src, width, height, alt].join(" ")
|
394
|
+
end
|
395
|
+
|
396
|
+
def get_block_attrs(imgcaption)
|
397
|
+
# Extract block-level attributes
|
398
|
+
float = imgcaption.attribute("class").value.sub("serendipity_imageComment_", "")
|
399
|
+
float = "class='figure-#{float}'"
|
400
|
+
style = imgcaption.attribute("style")
|
401
|
+
style = " style='#{style.value}'" if style
|
402
|
+
# Don't lose good data
|
403
|
+
mdbnum = imgcaption.search(".//comment()").text.strip.sub("s9ymdb:", "")
|
404
|
+
mdb = "<!-- mdb='#{mdbnum}' -->" if mdbnum
|
405
|
+
[float, style, mdb].join(" ")
|
406
|
+
end
|
407
|
+
|
214
408
|
def self.process_categories(db, options, post)
|
215
409
|
return [] unless options[:categories]
|
216
410
|
|
@@ -293,18 +487,36 @@ module JekyllImport
|
|
293
487
|
|
294
488
|
db[cquery].each_with_object([]) do |tag, tags|
|
295
489
|
tags << if options[:clean_entities]
|
296
|
-
clean_entities(tag[:name])
|
490
|
+
clean_entities(tag[:name]).downcase
|
297
491
|
else
|
298
|
-
tag[:name]
|
492
|
+
tag[:name].downcase
|
299
493
|
end
|
300
494
|
end
|
301
495
|
end
|
302
496
|
|
303
497
|
def self.process_permalink(db, options, post)
|
304
|
-
return unless options[:permalinks]
|
498
|
+
return [] unless options[:permalinks]
|
499
|
+
|
500
|
+
permalinks = []
|
305
501
|
|
306
502
|
px = options[:table_prefix]
|
307
503
|
|
504
|
+
if db.table_exists?("#{px}entryproperties")
|
505
|
+
pquery = %(
|
506
|
+
SELECT
|
507
|
+
props.value AS `permalink`
|
508
|
+
FROM
|
509
|
+
#{px}entryproperties AS props
|
510
|
+
WHERE
|
511
|
+
props.entryid = '#{post[:id]}' AND
|
512
|
+
props.property = 'permalink'
|
513
|
+
)
|
514
|
+
db[pquery].each do |link|
|
515
|
+
plink = link[:permalink].to_s
|
516
|
+
permalinks << plink unless plink.end_with? "/UNKNOWN.html"
|
517
|
+
end
|
518
|
+
end
|
519
|
+
|
308
520
|
cquery = %(
|
309
521
|
SELECT
|
310
522
|
permalinks.permalink AS `permalink`
|
@@ -316,8 +528,10 @@ module JekyllImport
|
|
316
528
|
)
|
317
529
|
|
318
530
|
db[cquery].each do |link|
|
319
|
-
|
531
|
+
permalinks << "/#{link[:permalink]}"
|
320
532
|
end
|
533
|
+
|
534
|
+
permalinks
|
321
535
|
end
|
322
536
|
|
323
537
|
def self.clean_entities(text)
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jekyll-import
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.21.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Preston-Werner
|
8
8
|
- Parker Moore
|
9
9
|
- Matt Rogers
|
10
|
-
autorequire:
|
10
|
+
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2021-11-01 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: jekyll
|
@@ -60,20 +60,6 @@ dependencies:
|
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
62
|
version: '1.0'
|
63
|
-
- !ruby/object:Gem::Dependency
|
64
|
-
name: activesupport
|
65
|
-
requirement: !ruby/object:Gem::Requirement
|
66
|
-
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '4.2'
|
70
|
-
type: :development
|
71
|
-
prerelease: false
|
72
|
-
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
requirements:
|
74
|
-
- - "~>"
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
version: '4.2'
|
77
63
|
- !ruby/object:Gem::Dependency
|
78
64
|
name: bundler
|
79
65
|
requirement: !ruby/object:Gem::Requirement
|
@@ -359,6 +345,7 @@ files:
|
|
359
345
|
- lib/jekyll-import/importers/dotclear.rb
|
360
346
|
- lib/jekyll-import/importers/drupal6.rb
|
361
347
|
- lib/jekyll-import/importers/drupal7.rb
|
348
|
+
- lib/jekyll-import/importers/drupal8.rb
|
362
349
|
- lib/jekyll-import/importers/drupal_common.rb
|
363
350
|
- lib/jekyll-import/importers/easyblog.rb
|
364
351
|
- lib/jekyll-import/importers/enki.rb
|
@@ -388,7 +375,7 @@ homepage: http://github.com/jekyll/jekyll-import
|
|
388
375
|
licenses:
|
389
376
|
- MIT
|
390
377
|
metadata: {}
|
391
|
-
post_install_message:
|
378
|
+
post_install_message:
|
392
379
|
rdoc_options:
|
393
380
|
- "--charset=UTF-8"
|
394
381
|
require_paths:
|
@@ -404,8 +391,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
404
391
|
- !ruby/object:Gem::Version
|
405
392
|
version: '0'
|
406
393
|
requirements: []
|
407
|
-
rubygems_version: 3.
|
408
|
-
signing_key:
|
394
|
+
rubygems_version: 3.1.6
|
395
|
+
signing_key:
|
409
396
|
specification_version: 4
|
410
397
|
summary: Import command for Jekyll (static site generator).
|
411
398
|
test_files: []
|