jekyll-import 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 334724ebd0dbdc9774dd2e4799dea04dc5adaacc911d05cfc26f2a5daf23bbfa
4
- data.tar.gz: 79a4f8c6de087aebca9f726275ebb3c234361b7341a01ce10c544ff65a0b6bac
3
+ metadata.gz: e0098ade7230382d4787c65ab3c17607680119727e1e9cdca587274d94e3e084
4
+ data.tar.gz: 8f1050dfa7170aa7b08462c0132ab959ffdcda31e8c469929ce123aecb64909a
5
5
  SHA512:
6
- metadata.gz: d716d81c1e596e1004a65eadaa6c4df3e130d134d0b3db0fef7e06151350a90525f59db50aefd6f771950b0698f174fe19311dca7134c3488be970b85e76737a
7
- data.tar.gz: f5ee7ba0c29bfad8267482789e2f571ee2d6ccafd343d173c9e9408b1c6c9c25941f28476386c3076310ec630fc5ff84c7ef12aaac2bc934aa1b8ee8e11010f5
6
+ metadata.gz: f32b5ac48f88293a4703c7ece13d4a9886c598b4491fa9e8b750b02f4dfab405291c095355885f823a9b23551bc69e73015487371daa9fb6a9db7ec4783b88f0
7
+ data.tar.gz: 0ba737a7d8ff767eb1bcce6bffa8af357d6c66793b36e682edfcce05245e21dd0a12d5a3d250118ae9b00dc40857db50884d2365e2419a86efce7d97e0ac52af
@@ -11,11 +11,8 @@ module JekyllImport
11
11
  end
12
12
 
13
13
  def self.validate(options)
14
- if options["source"].nil?
15
- raise "Missing mandatory option: --source"
16
- elsif !File.exist?(options["source"])
17
- raise Errno::ENOENT, "File not found: #{options["source"]}"
18
- end
14
+ raise "Missing mandatory option: --source" if options["source"].nil?
15
+ raise Errno::ENOENT, "File not found: #{options["source"]}" unless File.exist?(options["source"])
19
16
  end
20
17
 
21
18
  def self.require_deps
@@ -42,7 +39,6 @@ module JekyllImport
42
39
  source = options.fetch("source")
43
40
 
44
41
  listener = BloggerAtomStreamListener.new
45
-
46
42
  listener.leave_blogger_info = !options.fetch("no-blogger-info", false)
47
43
  listener.comments = options.fetch("comments", false)
48
44
 
@@ -52,7 +48,6 @@ module JekyllImport
52
48
  end
53
49
 
54
50
  options["original-url-base"] = listener.original_url_base
55
-
56
51
  postprocess(options)
57
52
  end
58
53
 
@@ -63,32 +58,32 @@ module JekyllImport
63
58
  # Returns nothing.
64
59
  def self.postprocess(options)
65
60
  # Replace internal link URL
66
- if options.fetch("replace-internal-link", false)
67
- original_url_base = options.fetch("original-url-base", nil)
68
- if original_url_base
69
- orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
70
-
71
- Dir.glob("_posts/*.*") do |filename|
72
- body = nil
73
- File.open(filename, "r") do |f|
74
- f.flock(File::LOCK_SH)
75
- body = f.read
76
- end
61
+ return unless options.fetch("replace-internal-link", false)
77
62
 
78
- body.gsub!(orig_url_pattern) do
79
- # for post_url
80
- quote = Regexp.last_match(1)
81
- post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
82
- raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
63
+ original_url_base = options.fetch("original-url-base", nil)
64
+ return unless original_url_base
83
65
 
84
- " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
85
- end
66
+ orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
86
67
 
87
- File.open(filename, "w") do |f|
88
- f.flock(File::LOCK_EX)
89
- f << body
90
- end
91
- end
68
+ Dir.glob("_posts/*.*") do |filename|
69
+ body = nil
70
+ File.open(filename, "r") do |f|
71
+ f.flock(File::LOCK_SH)
72
+ body = f.read
73
+ end
74
+
75
+ body.gsub!(orig_url_pattern) do
76
+ # for post_url
77
+ quote = Regexp.last_match(1)
78
+ post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
79
+ raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
80
+
81
+ " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
82
+ end
83
+
84
+ File.open(filename, "w") do |f|
85
+ f.flock(File::LOCK_EX)
86
+ f << body
92
87
  end
93
88
  end
94
89
  end
@@ -118,9 +113,7 @@ module JekyllImport
118
113
 
119
114
  @in_entry_elem = { :meta => {}, :body => nil }
120
115
  when "title"
121
- if @in_entry_elem
122
- raise 'only <title type="text"></title> is supported' if attrs["type"] != "text"
123
- end
116
+ raise 'only <title type="text"></title> is supported' if @in_entry_elem && attrs["type"] != "text"
124
117
  when "category"
125
118
  if @in_entry_elem
126
119
  if attrs["scheme"] == "http://www.blogger.com/atom/ns#"
@@ -150,25 +143,23 @@ module JekyllImport
150
143
  end
151
144
 
152
145
  def text(text)
153
- if @in_entry_elem
154
- case @tag_bread.last
155
- when "id"
156
- @in_entry_elem[:meta][:id] = text
157
- when "published"
158
- @in_entry_elem[:meta][:published] = text
159
- when "updated"
160
- @in_entry_elem[:meta][:updated] = text
161
- when "title"
162
- @in_entry_elem[:meta][:title] = text
163
- when "content"
164
- @in_entry_elem[:body] = text
165
- when "name"
166
- @in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
167
- when "app:draft"
168
- if @tag_bread[-2..-1] == %w(app:control app:draft)
169
- @in_entry_elem[:meta][:draft] = true if text == "yes"
170
- end
171
- end
146
+ return unless @in_entry_elem
147
+
148
+ case @tag_bread.last
149
+ when "id"
150
+ @in_entry_elem[:meta][:id] = text
151
+ when "published"
152
+ @in_entry_elem[:meta][:published] = text
153
+ when "updated"
154
+ @in_entry_elem[:meta][:updated] = text
155
+ when "title"
156
+ @in_entry_elem[:meta][:title] = text
157
+ when "content"
158
+ @in_entry_elem[:body] = text
159
+ when "name"
160
+ @in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
161
+ when "app:draft"
162
+ @in_entry_elem[:meta][:draft] = true if @tag_bread[-2..-1] == %w(app:control app:draft) && text == "yes"
172
163
  end
173
164
  end
174
165
 
@@ -186,7 +177,7 @@ module JekyllImport
186
177
 
187
178
  FileUtils.mkdir_p(target_dir)
188
179
 
189
- file_name = URI.decode("#{post_data[:filename]}.html")
180
+ file_name = URI.decode_www_form_component("#{post_data[:filename]}.html")
190
181
  File.open(File.join(target_dir, file_name), "w") do |f|
191
182
  f.flock(File::LOCK_EX)
192
183
 
@@ -264,19 +255,16 @@ module JekyllImport
264
255
  { :filename => filename, :header => header, :body => body }
265
256
  elsif @in_entry_elem[:meta][:kind] == "comment"
266
257
  timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
267
- if @in_entry_elem[:meta][:original_url]
268
- @comment_seq ||= 1
258
+ raise "Original URL is missing" unless @in_entry_elem[:meta][:original_url]
269
259
 
270
- original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
271
- original_path = original_uri.path.to_s
272
- filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
260
+ @comment_seq ||= 1
273
261
 
274
- @comment_seq += 1
262
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
263
+ original_path = original_uri.path.to_s
264
+ filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
275
265
 
276
- @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
277
- else
278
- raise "Original URL is missing"
279
- end
266
+ @comment_seq += 1
267
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
280
268
 
281
269
  header = {
282
270
  "date" => @in_entry_elem[:meta][:published],
@@ -19,15 +19,17 @@ module JekyllImport
19
19
  nr.teaser,
20
20
  n.created,
21
21
  n.status,
22
+ ua.dst AS alias,
22
23
  n.type,
23
24
  GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
24
- FROM #{prefix}node_revisions AS nr,
25
+ FROM #{prefix}node_revisions AS nr, url_alias AS ua,
25
26
  #{prefix}node AS n
26
27
  LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
27
28
  LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
28
29
  WHERE (#{types})
29
30
  AND n.vid = nr.vid
30
- GROUP BY n.nid
31
+ AND ua.src = CONCAT( 'node/', n.nid)
32
+ GROUP BY n.nid, ua.dst
31
33
  SQL
32
34
 
33
35
  query
@@ -44,9 +46,11 @@ SQL
44
46
 
45
47
  data = {
46
48
  "excerpt" => summary,
47
- "categories" => tags.split("|"),
49
+ "categories" => tags.split("|").uniq,
48
50
  }
49
51
 
52
+ data["permalink"] = "/" + sql_post_data[:alias] if sql_post_data[:alias]
53
+
50
54
  [data, content]
51
55
  end
52
56
  end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "jekyll-import/importers/drupal_common"
4
+
5
+ module JekyllImport
6
+ module Importers
7
+ class Drupal8 < Importer
8
+ include DrupalCommon
9
+ extend DrupalCommon::ClassMethods
10
+
11
+ def self.build_query(prefix, types, engine)
12
+ types = types.join("' OR n.type = '")
13
+ types = "n.type = '#{types}'"
14
+
15
+ tag_group = if engine == "postgresql"
16
+ <<POSTGRESQL
17
+ (SELECT STRING_AGG(td.name, '|')
18
+ FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
19
+ WHERE ti.tid = td.tid AND ti.nid = n.nid) AS tags
20
+ POSTGRESQL
21
+ else
22
+ <<SQL
23
+ (SELECT GROUP_CONCAT(td.name SEPARATOR '|')
24
+ FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
25
+ WHERE ti.tid = td.tid AND ti.nid = n.nid) AS 'tags'
26
+ SQL
27
+ end
28
+
29
+ query = <<QUERY
30
+ SELECT n.nid,
31
+ n.title,
32
+ nb.body_value,
33
+ nb.body_summary,
34
+ n.created,
35
+ n.status,
36
+ n.type,
37
+ #{tag_group}
38
+ FROM #{prefix}node_field_data AS n
39
+ LEFT JOIN #{prefix}node__body AS nb
40
+ ON nb.entity_id = n.nid
41
+ WHERE (#{types})
42
+ QUERY
43
+
44
+ query
45
+ end
46
+
47
+ def self.aliases_query(prefix)
48
+ "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
49
+ end
50
+
51
+ def self.post_data(sql_post_data)
52
+ content = sql_post_data[:body_value].to_s
53
+ summary = sql_post_data[:body_summary].to_s
54
+ tags = (sql_post_data[:tags] || "").downcase.strip
55
+
56
+ data = {
57
+ "excerpt" => summary,
58
+ "categories" => tags.split("|"),
59
+ }
60
+
61
+ [data, content]
62
+ end
63
+ end
64
+ end
65
+ end
@@ -68,6 +68,7 @@ module JekyllImport
68
68
  src_dir = conf["source"]
69
69
 
70
70
  dirs = {
71
+ :_aliases => src_dir,
71
72
  :_posts => File.join(src_dir, "_posts").to_s,
72
73
  :_drafts => File.join(src_dir, "_drafts").to_s,
73
74
  :_layouts => Jekyll.sanitized_path(src_dir, conf["layouts_dir"].to_s),
@@ -146,10 +147,10 @@ module JekyllImport
146
147
 
147
148
  if partition.first.length.positive?
148
149
  dir = "#{partition.first}/"
149
- FileUtils.mkdir_p partition.first
150
+ FileUtils.mkdir_p "#{dirs[:_aliases]}/#{dir}"
150
151
  end
151
152
 
152
- File.open("#{dir}#{file}.md", "w") do |f|
153
+ File.open("#{dirs[:_aliases]}/#{dir}#{file}.md", "w") do |f|
153
154
  f.puts "---"
154
155
  f.puts "layout: refresh"
155
156
  f.puts "permalink: #{dir}#{file}/"
@@ -6,6 +6,7 @@ module JekyllImport
6
6
  def self.specify_options(c)
7
7
  c.option "source", "--source NAME", "The RSS file or URL to import"
8
8
  c.option "tag", "--tag NAME", "Add a tag to posts"
9
+ c.option "render_audio", "--render_audio", "Render <audio> element as necessary"
9
10
  end
10
11
 
11
12
  def self.validate(options)
@@ -30,8 +31,6 @@ module JekyllImport
30
31
  # Returns nothing.
31
32
  def self.process(options)
32
33
  source = options.fetch("source")
33
- frontmatter = options.fetch("frontmatter", [])
34
- body = options.fetch("body", ["description"])
35
34
 
36
35
  content = ""
37
36
  open(source) { |s| content = s.read }
@@ -40,37 +39,56 @@ module JekyllImport
40
39
  raise "There doesn't appear to be any RSS items at the source (#{source}) provided." unless rss
41
40
 
42
41
  rss.items.each do |item|
43
- formatted_date = item.date.strftime("%Y-%m-%d")
44
- post_name = Jekyll::Utils.slugify(item.title, :mode => "latin")
45
- name = "#{formatted_date}-#{post_name}"
42
+ write_rss_item(item, options)
43
+ end
44
+ end
46
45
 
47
- header = {
48
- "layout" => "post",
49
- "title" => item.title,
50
- }
46
+ def self.write_rss_item(item, options)
47
+ frontmatter = options.fetch("frontmatter", [])
48
+ body = options.fetch("body", ["description"])
49
+ render_audio = options.fetch("render_audio", false)
51
50
 
52
- header["tag"] = options["tag"] unless options["tag"].nil? || options["tag"].empty?
51
+ formatted_date = item.date.strftime("%Y-%m-%d")
52
+ post_name = Jekyll::Utils.slugify(item.title, :mode => "latin")
53
+ name = "#{formatted_date}-#{post_name}"
54
+ audio = render_audio && item.enclosure.url
53
55
 
54
- frontmatter.each do |value|
55
- header[value] = item.send(value)
56
- end
56
+ header = {
57
+ "layout" => "post",
58
+ "title" => item.title,
59
+ }
57
60
 
58
- output = +""
61
+ header["tag"] = options["tag"] unless options["tag"].nil? || options["tag"].empty?
59
62
 
60
- body.each do |row|
61
- output << item.send(row).to_s
62
- end
63
+ frontmatter.each do |value|
64
+ header[value] = item.send(value)
65
+ end
63
66
 
64
- output.strip!
65
- output = item.content_encoded if output.empty?
67
+ output = +""
68
+
69
+ body.each do |row|
70
+ output << item.send(row).to_s
71
+ end
66
72
 
67
- FileUtils.mkdir_p("_posts")
73
+ output.strip!
74
+ output = item.content_encoded if output.empty?
68
75
 
69
- File.open("_posts/#{name}.html", "w") do |f|
70
- f.puts header.to_yaml
71
- f.puts "---\n\n"
72
- f.puts output
76
+ FileUtils.mkdir_p("_posts")
77
+
78
+ File.open("_posts/#{name}.html", "w") do |f|
79
+ f.puts header.to_yaml
80
+ f.puts "---\n\n"
81
+
82
+ if audio
83
+ f.puts <<~HTML
84
+ <audio controls="">
85
+ <source src="#{audio}" type="audio/mpeg">
86
+ Your browser does not support the audio element.
87
+ </audio>
88
+ HTML
73
89
  end
90
+
91
+ f.puts output
74
92
  end
75
93
  end
76
94
  end
@@ -11,25 +11,31 @@ module JekyllImport
11
11
  fileutils
12
12
  safe_yaml
13
13
  unidecode
14
+ nokogiri
14
15
  )
15
16
  )
16
17
  end
17
18
 
18
19
  def self.specify_options(c)
19
- c.option "dbname", "--dbname DB", "Database name (default: '')"
20
- c.option "socket", "--socket SOCKET", "Database socket (default: '')"
21
- c.option "user", "--user USER", "Database user name (default: '')"
22
- c.option "password", "--password PW", "Database user's password (default: '')"
23
- c.option "host", "--host HOST", "Database host name (default: 'localhost')"
24
- c.option "port", "--port PORT", "Custom database port connect to (default: 3306)"
25
- c.option "table_prefix", "--table_prefix PREFIX", "Table prefix name (default: 'serendipity_')"
26
- c.option "clean_entities", "--clean_entities", "Whether to clean entities (default: true)"
27
- c.option "comments", "--comments", "Whether to import comments (default: true)"
28
- c.option "categories", "--categories", "Whether to import categories (default: true)"
29
- c.option "tags", "--tags", "Whether to import tags (default: true)"
30
- c.option "drafts", "--drafts", "Whether to export drafts as well"
31
- c.option "markdown", "--markdown", "convert into markdown format (default: false)"
32
- c.option "permalinks", "--permalinks", "preserve S9Y permalinks (default: false)"
20
+ c.option "dbname", "--dbname DB", "Database name (default: '')"
21
+ c.option "socket", "--socket SOCKET", "Database socket (default: '')"
22
+ c.option "user", "--user USER", "Database user name (default: '')"
23
+ c.option "password", "--password PW", "Database user's password (default: '')"
24
+ c.option "host", "--host HOST", "Database host name (default: 'localhost')"
25
+ c.option "port", "--port PORT", "Custom database port connect to (default: 3306)"
26
+ c.option "table_prefix", "--table_prefix PREFIX", "Table prefix name (default: 'serendipity_')"
27
+ c.option "clean_entities", "--clean_entities", "Whether to clean entities (default: true)"
28
+ c.option "comments", "--comments", "Whether to import comments (default: true)"
29
+ c.option "categories", "--categories", "Whether to import categories (default: true)"
30
+ c.option "tags", "--tags", "Whether to import tags (default: true)"
31
+ c.option "drafts", "--drafts", "Whether to export drafts as well"
32
+ c.option "markdown", "--markdown", "convert into markdown format (default: false)"
33
+ c.option "permalinks", "--permalinks", "preserve S9Y permalinks (default: false)"
34
+ c.option "excerpt_separator", "--excerpt_separator", "Demarkation for excerpts (default: '<a id=\"extended\"></a>')"
35
+ c.option "includeentry", "--includeentry", "Replace macros from the includeentry plugin (default: false)"
36
+ c.option "imgfig", "--imgfig", "Replace nested img and youtube divs with HTML figure tags (default: true)"
37
+ c.option "linebreak", "--linebreak", "Line break processing: wp, nokogiri, ignore (default: wp)"
38
+ c.option "relative", "--relative", "Convert links with this prefix to relative (default:nil)"
33
39
  end
34
40
 
35
41
  # Main migrator function. Call this to perform the migration.
@@ -56,36 +62,64 @@ module JekyllImport
56
62
  # :categories:: If true, save the post's categories in its
57
63
  # YAML front matter. Default: true.
58
64
  # :tags:: If true, save the post's tags in its
59
- # YAML front matter. Default: true.
65
+ # YAML front matter, in lowercase. Default: true.
60
66
  # :extension:: Set the post extension. Default: "html"
61
67
  # :drafts:: If true, export drafts as well
62
68
  # Default: true.
63
69
  # :markdown:: If true, convert the content to markdown
64
70
  # Default: false
65
71
  # :permalinks:: If true, save the post's original permalink in its
66
- # YAML front matter. Default: false.
72
+ # YAML front matter. If the 'entryproperties' plugin
73
+ # was used, its permalink will become the canonical
74
+ # permalink, and any other will become redirects.
75
+ # Default: false.
76
+ # :excerpt_separator:: A string to use to separate the excerpt (body
77
+ # in S9Y) from the rest of the article (extended
78
+ # body in S9Y). Default: "<a id=\"extended\"></a>".
79
+ # :includentry:: Replace macros from the includentry plugin - these are
80
+ # the [s9y-include-entry] and [s9y-include-block] macros.
81
+ # Default: false.
82
+ # :imgfig:: Replace S9Y image-comment divs with an HTML figure
83
+ # div and figcaption, if applicable. Works for img and
84
+ # iframe.
85
+ # Default: true.
67
86
  #
87
+ # :linebreak:: When set to the default "wp", line breaks in entries
88
+ # will be processed WordPress style, by replacing double
89
+ # line breaks with HTML p tags, and remaining single
90
+ # line breaks with HTML br tags. When set to "nokogiri",
91
+ # entries will be loaded into Nokogiri and formatted as
92
+ # an XHTML fragment. When set to "ignore", line breaks
93
+ # will not be replaced at all.
94
+ # Default: wp
95
+ # :relative:: Replace absolute links (http://:relative:/foo)
96
+ # to relative links (/foo).
97
+
68
98
  def self.process(opts)
69
99
  options = {
70
- :user => opts.fetch("user", ""),
71
- :pass => opts.fetch("password", ""),
72
- :host => opts.fetch("host", "localhost"),
73
- :port => opts.fetch("port", 3306),
74
- :socket => opts.fetch("socket", nil),
75
- :dbname => opts.fetch("dbname", ""),
76
- :table_prefix => opts.fetch("table_prefix", "serendipity_"),
77
- :clean_entities => opts.fetch("clean_entities", true),
78
- :comments => opts.fetch("comments", true),
79
- :categories => opts.fetch("categories", true),
80
- :tags => opts.fetch("tags", true),
81
- :extension => opts.fetch("extension", "html"),
82
- :drafts => opts.fetch("drafts", true),
83
- :markdown => opts.fetch("markdown", false),
84
- :permalinks => opts.fetch("permalinks", false),
100
+ :user => opts.fetch("user", ""),
101
+ :pass => opts.fetch("password", ""),
102
+ :host => opts.fetch("host", "localhost"),
103
+ :port => opts.fetch("port", 3306),
104
+ :socket => opts.fetch("socket", nil),
105
+ :dbname => opts.fetch("dbname", ""),
106
+ :table_prefix => opts.fetch("table_prefix", "serendipity_"),
107
+ :clean_entities => opts.fetch("clean_entities", true),
108
+ :comments => opts.fetch("comments", true),
109
+ :categories => opts.fetch("categories", true),
110
+ :tags => opts.fetch("tags", true),
111
+ :extension => opts.fetch("extension", "html"),
112
+ :drafts => opts.fetch("drafts", true),
113
+ :markdown => opts.fetch("markdown", false),
114
+ :permalinks => opts.fetch("permalinks", false),
115
+ :excerpt_separator => opts.fetch("excerpt_separator", "<a id=\"extended\"></a>"),
116
+ :includeentry => opts.fetch("includeentry", false),
117
+ :imgfig => opts.fetch("imgfig", true),
118
+ :linebreak => opts.fetch("linebreak", "wp"),
119
+ :relative => opts.fetch("relative", nil),
85
120
  }
86
121
 
87
122
  options[:clean_entities] = require_if_available("htmlentities", "clean_entities") if options[:clean_entities]
88
-
89
123
  options[:markdown] = require_if_available("reverse_markdown", "markdown") if options[:markdown]
90
124
 
91
125
  FileUtils.mkdir_p("_posts")
@@ -120,6 +154,7 @@ module JekyllImport
120
154
 
121
155
  posts_query = "
122
156
  SELECT
157
+ 'post' AS `type`,
123
158
  entries.ID AS `id`,
124
159
  entries.isdraft AS `isdraft`,
125
160
  entries.title AS `title`,
@@ -154,36 +189,41 @@ module JekyllImport
154
189
  name = format("%02d-%02d-%02d-%s.%s", date.year, date.month, date.day, slug, extension)
155
190
 
156
191
  content = post[:body].to_s
157
- content += "\n\n" + post[:body_extended].to_s unless post[:body_extended].to_s.empty?
192
+ extended_content = post[:body_extended].to_s
193
+
194
+ content += options[:excerpt_separator] + extended_content unless extended_content.nil? || extended_content.strip.empty?
158
195
 
196
+ content = process_includeentry(content, db, options) if options[:includeentry]
197
+ content = process_img_div(content) if options[:imgfig]
159
198
  content = clean_entities(content) if options[:clean_entities]
199
+ content = content.gsub(%r!href=(["'])http://#{options[:relative]}!, 'href=\1') if options[:relative]
160
200
 
161
201
  content = ReverseMarkdown.convert(content) if options[:markdown]
162
202
 
163
203
  categories = process_categories(db, options, post)
164
204
  comments = process_comments(db, options, post)
165
205
  tags = process_tags(db, options, post)
166
- permalink = process_permalink(db, options, post)
206
+ all_permalinks = process_permalink(db, options, post)
207
+ primary_permalink = all_permalinks.shift
208
+ supplemental_permalinks = all_permalinks unless all_permalinks.empty?
167
209
 
168
210
  # Get the relevant fields as a hash, delete empty fields and
169
211
  # convert to YAML for the header.
170
212
  data = {
171
- "layout" => post[:type].to_s,
172
- "status" => status.to_s,
173
- "published" => status.to_s == "draft" ? nil : (status.to_s == "published"),
174
- "title" => title.to_s,
175
- "author" => {
176
- "display_name" => post[:author].to_s,
177
- "login" => post[:author_login].to_s,
178
- "email" => post[:author_email].to_s,
179
- },
180
- "author_login" => post[:author_login].to_s,
181
- "author_email" => post[:author_email].to_s,
182
- "date" => date.to_s,
183
- "permalink" => options[:permalinks] ? permalink : nil,
184
- "categories" => options[:categories] ? categories : nil,
185
- "tags" => options[:tags] ? tags : nil,
186
- "comments" => options[:comments] ? comments : nil,
213
+ "layout" => post[:type].to_s,
214
+ "status" => status.to_s,
215
+ "published" => status.to_s == "draft" ? nil : (status.to_s == "published"),
216
+ "title" => title.to_s,
217
+ "author" => post[:author].to_s,
218
+ "author_login" => post[:author_login].to_s,
219
+ "author_email" => post[:author_email].to_s,
220
+ "date" => date.to_s,
221
+ "permalink" => options[:permalinks] ? primary_permalink : nil,
222
+ "redirect_from" => options[:permalinks] ? supplemental_permalinks : nil,
223
+ "categories" => options[:categories] ? categories : nil,
224
+ "tags" => options[:tags] ? tags : nil,
225
+ "comments" => options[:comments] ? comments : nil,
226
+ "excerpt_separator" => extended_content.empty? ? nil : options[:excerpt_separator],
187
227
  }.delete_if { |_k, v| v.nil? || v == "" }.to_yaml
188
228
 
189
229
  if post[:type] == "page"
@@ -195,11 +235,21 @@ module JekyllImport
195
235
  filename = "_posts/#{name}"
196
236
  end
197
237
 
238
+ content = case options[:linebreak]
239
+ when "nokogiri"
240
+ Nokogiri::HTML.fragment(content).to_xhtml
241
+ when "ignore"
242
+ content
243
+ else
244
+ # "wp" is the only remaining option, and the default
245
+ Util.wpautop(content)
246
+ end
247
+
198
248
  # Write out the data and content to file
199
249
  File.open(filename, "w") do |f|
200
250
  f.puts data
201
251
  f.puts "---"
202
- f.puts Util.wpautop(content)
252
+ f.puts content
203
253
  end
204
254
  end
205
255
 
@@ -207,10 +257,154 @@ module JekyllImport
207
257
  require gem_name
208
258
  true
209
259
  rescue LoadError
210
- warn "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
260
+ Jekyll.logger.warn "s9y database:", "Could not require '#{gem_name}', so the :#{option_name} option is now disabled."
211
261
  true
212
262
  end
213
263
 
264
+ def self.process_includeentry(text, db, options)
265
+ return text unless options[:includeentry]
266
+
267
+ result = text
268
+
269
+ px = options[:table_prefix]
270
+
271
+ props = text.scan(%r!(\[s9y-include-entry:([0-9]+):([^:]+)\])!)
272
+ blocks = text.scan(%r!(\[s9y-include-block:([0-9]+):?([^:]+)?\])!)
273
+
274
+ props.each do |match|
275
+ macro = match[0]
276
+ id = match[1]
277
+ replacement = ""
278
+ if match[2].start_with?("prop=")
279
+ prop = match[2].sub("prop=", "")
280
+ cquery = get_property_query(px, id, prop)
281
+ else
282
+ prop = match[2]
283
+ cquery = get_value_query(px, id, prop)
284
+ end
285
+ db[cquery].each do |row|
286
+ replacement << row[:txt]
287
+ end
288
+ result = result.sub(macro, replacement)
289
+ end
290
+
291
+ blocks.each do |match|
292
+ macro = match[0]
293
+ id = match[1]
294
+ replacement = ""
295
+ # match[2] *could* be 'template', but we can't run it through Smarty, so we ignore it
296
+ cquery = %(
297
+ SELECT
298
+ px.body AS `txt`
299
+ FROM
300
+ #{px}staticblocks AS px
301
+ WHERE
302
+ id = '#{id}'
303
+ )
304
+ db[cquery].each do |row|
305
+ replacement << row[:txt]
306
+ end
307
+ result = result.sub(macro, replacement)
308
+ end
309
+
310
+ result
311
+ end
312
+
313
+ def get_property_query(px, id, prop)
314
+ %(
315
+ SELECT
316
+ px.value AS `txt`
317
+ FROM
318
+ #{px}entryproperties AS px
319
+ WHERE
320
+ entryid = '#{id}' AND
321
+ property = '#{prop}'
322
+ )
323
+ end
324
+
325
+ def get_value_query(px, id, prop)
326
+ %(
327
+ SELECT
328
+ px.#{prop} AS `txt`
329
+ FROM
330
+ #{px}entries AS px
331
+ WHERE
332
+ entryid = '#{id}'
333
+ )
334
+ end
335
+
336
+ # Replace .serendipity_imageComment_* blocks
337
+ def self.process_img_div(text)
338
+ caption_classes = [
339
+ ".serendipity_imageComment_left",
340
+ ".serendipity_imageComment_right",
341
+ ".serendipity_imageComment_center",
342
+ ]
343
+
344
+ noko = Nokogiri::HTML.fragment(text)
345
+ noko.css(caption_classes.join(",")).each do |imgcaption|
346
+ block_attrs = get_block_attrs(imgcaption)
347
+
348
+ # Is this a thumbnail to a bigger/other image?
349
+ big_link = imgcaption.at_css(".serendipity_image_link")
350
+ big_link ||= imgcaption.at_xpath(".//a[.//img]")
351
+
352
+ # The caption (if any) may have raw HTML
353
+ caption_elem = imgcaption.at_css(".serendipity_imageComment_txt")
354
+ caption = ""
355
+ caption = "<figcaption>#{caption_elem.inner_html}</figcaption>" if caption_elem
356
+
357
+ image_node = imgcaption.at_css("img")
358
+ if image_node
359
+ attrs = get_media_attrs(image_node)
360
+ media = "<img #{attrs}/>"
361
+ else
362
+ iframe_node = imgcaption.at_css("iframe")
363
+ if iframe_node
364
+ attrs = get_media_attrs(iframe_node)
365
+ media = "<iframe #{attrs}'></iframe>"
366
+ else
367
+ Jekyll.logger.warn "s9y database:", "Unrecognized media block: #{imgcaption}"
368
+ return text
369
+ end
370
+ end
371
+
372
+ # Wrap media in link, if any
373
+ if big_link
374
+ big = big_link.attribute("href")
375
+ media = "<a href='#{big}'>#{media}</a>"
376
+ end
377
+
378
+ # Replace HTML with clean media source, wrapped in figure
379
+ imgcaption.replace("<figure #{block_attrs}#{media}#{caption}</figure>")
380
+ end
381
+
382
+ noko.to_s
383
+ end
384
+
385
+ def get_media_attrs(node)
386
+ width = node.attribute("width")
387
+ width = "width='#{width}'" if width
388
+ height = node.attribute("height")
389
+ height = "height='#{height}'" if height
390
+ alt = node.attribute("alt")
391
+ alt = "alt='#{alt}'" if alt
392
+ src = "src='" + node.attribute("src") + "'"
393
+ [src, width, height, alt].join(" ")
394
+ end
395
+
396
+ def get_block_attrs(imgcaption)
397
+ # Extract block-level attributes
398
+ float = imgcaption.attribute("class").value.sub("serendipity_imageComment_", "")
399
+ float = "class='figure-#{float}'"
400
+ style = imgcaption.attribute("style")
401
+ style = " style='#{style.value}'" if style
402
+ # Don't lose good data
403
+ mdbnum = imgcaption.search(".//comment()").text.strip.sub("s9ymdb:", "")
404
+ mdb = "<!-- mdb='#{mdbnum}' -->" if mdbnum
405
+ [float, style, mdb].join(" ")
406
+ end
407
+
214
408
  def self.process_categories(db, options, post)
215
409
  return [] unless options[:categories]
216
410
 
@@ -293,18 +487,36 @@ module JekyllImport
293
487
 
294
488
  db[cquery].each_with_object([]) do |tag, tags|
295
489
  tags << if options[:clean_entities]
296
- clean_entities(tag[:name])
490
+ clean_entities(tag[:name]).downcase
297
491
  else
298
- tag[:name]
492
+ tag[:name].downcase
299
493
  end
300
494
  end
301
495
  end
302
496
 
303
497
  def self.process_permalink(db, options, post)
304
- return unless options[:permalinks]
498
+ return [] unless options[:permalinks]
499
+
500
+ permalinks = []
305
501
 
306
502
  px = options[:table_prefix]
307
503
 
504
+ if db.table_exists?("#{px}entryproperties")
505
+ pquery = %(
506
+ SELECT
507
+ props.value AS `permalink`
508
+ FROM
509
+ #{px}entryproperties AS props
510
+ WHERE
511
+ props.entryid = '#{post[:id]}' AND
512
+ props.property = 'permalink'
513
+ )
514
+ db[pquery].each do |link|
515
+ plink = link[:permalink].to_s
516
+ permalinks << plink unless plink.end_with? "/UNKNOWN.html"
517
+ end
518
+ end
519
+
308
520
  cquery = %(
309
521
  SELECT
310
522
  permalinks.permalink AS `permalink`
@@ -316,8 +528,10 @@ module JekyllImport
316
528
  )
317
529
 
318
530
  db[cquery].each do |link|
319
- return "/#{link[:permalink]}"
531
+ permalinks << "/#{link[:permalink]}"
320
532
  end
533
+
534
+ permalinks
321
535
  end
322
536
 
323
537
  def self.clean_entities(text)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JekyllImport
4
- VERSION = "0.20.0"
4
+ VERSION = "0.21.0"
5
5
  end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jekyll-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.0
4
+ version: 0.21.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Preston-Werner
8
8
  - Parker Moore
9
9
  - Matt Rogers
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-09-08 00:00:00.000000000 Z
13
+ date: 2021-11-01 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: jekyll
@@ -60,20 +60,6 @@ dependencies:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
62
  version: '1.0'
63
- - !ruby/object:Gem::Dependency
64
- name: activesupport
65
- requirement: !ruby/object:Gem::Requirement
66
- requirements:
67
- - - "~>"
68
- - !ruby/object:Gem::Version
69
- version: '4.2'
70
- type: :development
71
- prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- requirements:
74
- - - "~>"
75
- - !ruby/object:Gem::Version
76
- version: '4.2'
77
63
  - !ruby/object:Gem::Dependency
78
64
  name: bundler
79
65
  requirement: !ruby/object:Gem::Requirement
@@ -359,6 +345,7 @@ files:
359
345
  - lib/jekyll-import/importers/dotclear.rb
360
346
  - lib/jekyll-import/importers/drupal6.rb
361
347
  - lib/jekyll-import/importers/drupal7.rb
348
+ - lib/jekyll-import/importers/drupal8.rb
362
349
  - lib/jekyll-import/importers/drupal_common.rb
363
350
  - lib/jekyll-import/importers/easyblog.rb
364
351
  - lib/jekyll-import/importers/enki.rb
@@ -388,7 +375,7 @@ homepage: http://github.com/jekyll/jekyll-import
388
375
  licenses:
389
376
  - MIT
390
377
  metadata: {}
391
- post_install_message:
378
+ post_install_message:
392
379
  rdoc_options:
393
380
  - "--charset=UTF-8"
394
381
  require_paths:
@@ -404,8 +391,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
404
391
  - !ruby/object:Gem::Version
405
392
  version: '0'
406
393
  requirements: []
407
- rubygems_version: 3.0.3
408
- signing_key:
394
+ rubygems_version: 3.1.6
395
+ signing_key:
409
396
  specification_version: 4
410
397
  summary: Import command for Jekyll (static site generator).
411
398
  test_files: []