jekyll-import 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 334724ebd0dbdc9774dd2e4799dea04dc5adaacc911d05cfc26f2a5daf23bbfa
4
- data.tar.gz: 79a4f8c6de087aebca9f726275ebb3c234361b7341a01ce10c544ff65a0b6bac
3
+ metadata.gz: ba591570a71e1a96e2a064ba583010d114d8051d9cb4fa810ff45a7e382b621e
4
+ data.tar.gz: ac515c173bc2bb258d75da253cbdd83aa5f3a9074a97ba8c68a02193b4d15449
5
5
  SHA512:
6
- metadata.gz: d716d81c1e596e1004a65eadaa6c4df3e130d134d0b3db0fef7e06151350a90525f59db50aefd6f771950b0698f174fe19311dca7134c3488be970b85e76737a
7
- data.tar.gz: f5ee7ba0c29bfad8267482789e2f571ee2d6ccafd343d173c9e9408b1c6c9c25941f28476386c3076310ec630fc5ff84c7ef12aaac2bc934aa1b8ee8e11010f5
6
+ metadata.gz: 716d363903258758c63a266d81a865bee73e174816d9cdd854f8d8079511ec3b75fccb1576aa82700b3212c8fa4fe59144196f0e4010084489c7e7c0f8127268
7
+ data.tar.gz: 65a01c5fbf3b3d69d2a2808ea2e4233b6a7034e056e91950487cdef84d03d9048ea95fefd52eb8b48cd3e0608bfb269b6ef35d347ab59e94a698779088d57069
data/README.markdown CHANGED
@@ -1,6 +1,7 @@
1
1
  # jekyll-import
2
2
 
3
- [![Build Status](https://travis-ci.org/jekyll/jekyll-import.svg?branch=master)](https://travis-ci.org/jekyll/jekyll-import)
3
+ [![Gem Version](https://img.shields.io/gem/v/jekyll-import.svg)](https://rubygems.org/gems/jekyll-import)
4
+ [![Continuous Integration](https://github.com/jekyll/jekyll-import/actions/workflows/ci.yml/badge.svg)](https://github.com/jekyll/jekyll-import/actions/workflows/ci.yml)
4
5
 
5
6
  The new __Jekyll__ command for importing from various blogs to Jekyll format.
6
7
 
@@ -39,7 +39,7 @@ module Jekyll
39
39
  if args.empty?
40
40
  Jekyll.logger.warn "You must specify an importer."
41
41
  Jekyll.logger.info "Valid options are:"
42
- importers.each { |i| Jekyll.logger.info "*", i.to_s }
42
+ importers.sort.each { |i| Jekyll.logger.info "*", i.to_s }
43
43
  end
44
44
  end
45
45
  end
@@ -5,17 +5,14 @@ module JekyllImport
5
5
  class Blogger < Importer
6
6
  def self.specify_options(c)
7
7
  c.option "source", "--source NAME", "The XML file (blog-MM-DD-YYYY.xml) path to import"
8
- c.option "no-blogger-info", "--no-blogger-info", "not to leave blogger-URL info (id and old URL) in the front matter (default: false)"
8
+ c.option "no-blogger-info", "--no-blogger-info", "not to leave blogger-URL info (id and old URL) in the front matter. (default: false)"
9
9
  c.option "replace-internal-link", "--replace-internal-link", "replace internal links using the post_url liquid tag. (default: false)"
10
- c.option "comments", "--comments", "import comments to _comments collection"
10
+ c.option "comments", "--comments", "import comments to _comments collection. (default: false)"
11
11
  end
12
12
 
13
13
  def self.validate(options)
14
- if options["source"].nil?
15
- raise "Missing mandatory option: --source"
16
- elsif !File.exist?(options["source"])
17
- raise Errno::ENOENT, "File not found: #{options["source"]}"
18
- end
14
+ raise "Missing mandatory option: --source" if options["source"].nil?
15
+ raise Errno::ENOENT, "File not found: #{options["source"]}" unless File.exist?(options["source"])
19
16
  end
20
17
 
21
18
  def self.require_deps
@@ -42,7 +39,6 @@ module JekyllImport
42
39
  source = options.fetch("source")
43
40
 
44
41
  listener = BloggerAtomStreamListener.new
45
-
46
42
  listener.leave_blogger_info = !options.fetch("no-blogger-info", false)
47
43
  listener.comments = options.fetch("comments", false)
48
44
 
@@ -52,7 +48,6 @@ module JekyllImport
52
48
  end
53
49
 
54
50
  options["original-url-base"] = listener.original_url_base
55
-
56
51
  postprocess(options)
57
52
  end
58
53
 
@@ -63,32 +58,32 @@ module JekyllImport
63
58
  # Returns nothing.
64
59
  def self.postprocess(options)
65
60
  # Replace internal link URL
66
- if options.fetch("replace-internal-link", false)
67
- original_url_base = options.fetch("original-url-base", nil)
68
- if original_url_base
69
- orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
70
-
71
- Dir.glob("_posts/*.*") do |filename|
72
- body = nil
73
- File.open(filename, "r") do |f|
74
- f.flock(File::LOCK_SH)
75
- body = f.read
76
- end
61
+ return unless options.fetch("replace-internal-link", false)
77
62
 
78
- body.gsub!(orig_url_pattern) do
79
- # for post_url
80
- quote = Regexp.last_match(1)
81
- post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
82
- raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
63
+ original_url_base = options.fetch("original-url-base", nil)
64
+ return unless original_url_base
83
65
 
84
- " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
85
- end
66
+ orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
86
67
 
87
- File.open(filename, "w") do |f|
88
- f.flock(File::LOCK_EX)
89
- f << body
90
- end
91
- end
68
+ Dir.glob("_posts/*.*") do |filename|
69
+ body = nil
70
+ File.open(filename, "r") do |f|
71
+ f.flock(File::LOCK_SH)
72
+ body = f.read
73
+ end
74
+
75
+ body.gsub!(orig_url_pattern) do
76
+ # for post_url
77
+ quote = Regexp.last_match(1)
78
+ post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
79
+ raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
80
+
81
+ " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
82
+ end
83
+
84
+ File.open(filename, "w") do |f|
85
+ f.flock(File::LOCK_EX)
86
+ f << body
92
87
  end
93
88
  end
94
89
  end
@@ -118,9 +113,7 @@ module JekyllImport
118
113
 
119
114
  @in_entry_elem = { :meta => {}, :body => nil }
120
115
  when "title"
121
- if @in_entry_elem
122
- raise 'only <title type="text"></title> is supported' if attrs["type"] != "text"
123
- end
116
+ raise 'only <title type="text"></title> is supported' if @in_entry_elem && attrs["type"] != "text"
124
117
  when "category"
125
118
  if @in_entry_elem
126
119
  if attrs["scheme"] == "http://www.blogger.com/atom/ns#"
@@ -150,25 +143,23 @@ module JekyllImport
150
143
  end
151
144
 
152
145
  def text(text)
153
- if @in_entry_elem
154
- case @tag_bread.last
155
- when "id"
156
- @in_entry_elem[:meta][:id] = text
157
- when "published"
158
- @in_entry_elem[:meta][:published] = text
159
- when "updated"
160
- @in_entry_elem[:meta][:updated] = text
161
- when "title"
162
- @in_entry_elem[:meta][:title] = text
163
- when "content"
164
- @in_entry_elem[:body] = text
165
- when "name"
166
- @in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
167
- when "app:draft"
168
- if @tag_bread[-2..-1] == %w(app:control app:draft)
169
- @in_entry_elem[:meta][:draft] = true if text == "yes"
170
- end
171
- end
146
+ return unless @in_entry_elem
147
+
148
+ case @tag_bread.last
149
+ when "id"
150
+ @in_entry_elem[:meta][:id] = text
151
+ when "published"
152
+ @in_entry_elem[:meta][:published] = text
153
+ when "updated"
154
+ @in_entry_elem[:meta][:updated] = text
155
+ when "title"
156
+ @in_entry_elem[:meta][:title] = text
157
+ when "content"
158
+ @in_entry_elem[:body] = text
159
+ when "name"
160
+ @in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
161
+ when "app:draft"
162
+ @in_entry_elem[:meta][:draft] = true if @tag_bread[-2..-1] == %w(app:control app:draft) && text == "yes"
172
163
  end
173
164
  end
174
165
 
@@ -186,7 +177,7 @@ module JekyllImport
186
177
 
187
178
  FileUtils.mkdir_p(target_dir)
188
179
 
189
- file_name = URI.decode("#{post_data[:filename]}.html")
180
+ file_name = URI.decode_www_form_component("#{post_data[:filename]}.html")
190
181
  File.open(File.join(target_dir, file_name), "w") do |f|
191
182
  f.flock(File::LOCK_EX)
192
183
 
@@ -203,7 +194,7 @@ module JekyllImport
203
194
 
204
195
  FileUtils.mkdir_p(target_dir)
205
196
 
206
- file_name = URI.decode("#{post_data[:filename]}.html")
197
+ file_name = URI::DEFAULT_PARSER.unescape("#{post_data[:filename]}.html")
207
198
  File.open(File.join(target_dir, file_name), "w") do |f|
208
199
  f.flock(File::LOCK_EX)
209
200
 
@@ -264,19 +255,16 @@ module JekyllImport
264
255
  { :filename => filename, :header => header, :body => body }
265
256
  elsif @in_entry_elem[:meta][:kind] == "comment"
266
257
  timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
267
- if @in_entry_elem[:meta][:original_url]
268
- @comment_seq ||= 1
258
+ raise "Original URL is missing" unless @in_entry_elem[:meta][:original_url]
269
259
 
270
- original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
271
- original_path = original_uri.path.to_s
272
- filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
260
+ @comment_seq ||= 1
273
261
 
274
- @comment_seq += 1
262
+ original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
263
+ original_path = original_uri.path.to_s
264
+ filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
275
265
 
276
- @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
277
- else
278
- raise "Original URL is missing"
279
- end
266
+ @comment_seq += 1
267
+ @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
280
268
 
281
269
  header = {
282
270
  "date" => @in_entry_elem[:meta][:published],
@@ -12,8 +12,8 @@ module JekyllImport
12
12
  end
13
13
 
14
14
  def self.specify_options(c)
15
- c.option "file", "--file NAME", 'The CSV file to import (default: "posts.csv")'
16
- c.option "no-front-matter", "--no-front-matter", "Do not add the default front matter to the post body"
15
+ c.option "file", "--file NAME", "The CSV file to import. (default: 'posts.csv')"
16
+ c.option "no-front-matter", "--no-front-matter", "Do not add the default front matter to the post body. (default: false)"
17
17
  end
18
18
 
19
19
  # Reads a csv with title, permalink, body, published_at, and filter.
@@ -1,121 +1,180 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Tested with dotClear 2.1.5
4
3
  module JekyllImport
5
4
  module Importers
6
5
  class Dotclear < Importer
7
- def self.specify_options(c)
8
- c.option "datafile", "--datafile PATH", "dotClear export file"
9
- c.option "mediafolder", "--mediafolder PATH", "dotClear media export folder (media.zip inflated)"
10
- end
6
+ class << self
7
+ def specify_options(c)
8
+ c.option "datafile", "--datafile PATH", "Dotclear export file."
9
+ c.option "mediafolder", "--mediafolder DIR", "Dotclear media export folder (unpacked media.zip)."
10
+ end
11
11
 
12
- def self.require_deps
13
- JekyllImport.require_with_fallback(%w(
14
- rubygems
15
- fileutils
16
- safe_yaml
17
- date
18
- active_support
19
- active_support/core_ext/string/inflections
20
- csv
21
- pp
22
- ))
23
- end
12
+ def require_deps
13
+ JekyllImport.require_with_fallback(%w())
14
+ end
24
15
 
25
- def self.validate(opts)
26
- abort "Specify a data file !" if opts["datafile"].nil? || opts["datafile"].empty?
27
- abort "Specify a media folder !" if opts["mediafolder"].nil? || opts["mediafolder"].empty?
28
- end
16
+ def validate(opts)
17
+ file_path = opts["datafile"]
18
+ log_undefined_flag_error("datafile") if file_path.nil? || file_path.empty?
29
19
 
30
- def self.extract_headers_section(str)
31
- str[1..-2].split(" ")[1].split(",")
32
- end
33
-
34
- def self.extract_data_section(str)
35
- str.gsub(%r!^"!, "").gsub(%r!"$!, "").split('","')
36
- end
20
+ file_path = File.expand_path(file_path)
21
+ if File.open(file_path, "rb", &:readline).start_with?("///DOTCLEAR|")
22
+ @data = read_export(file_path)
23
+ Jekyll.logger.info "Export File:", file_path
24
+ else
25
+ Jekyll.logger.abort_with "Import Error:", "#{file_path.inspect} is not a valid Dotclear export file!"
26
+ end
37
27
 
38
- def self.process(opts)
39
- options = {
40
- :datafile => opts.fetch("datafile", ""),
41
- :mediafolder => opts.fetch("mediafolder", ""),
42
- }
28
+ assets = @data["media"]
29
+ return if !assets || assets.empty?
43
30
 
44
- FileUtils.mkdir_p("_posts")
45
- FileUtils.mkdir_p("_drafts")
31
+ Jekyll.logger.info "", "Media files detected in export data."
46
32
 
47
- type_data = ""
48
- headers = {}
49
- posts_and_drafts = {}
50
- keywords = {}
33
+ media_dir = opts["mediafolder"]
34
+ log_undefined_flag_error("mediafolder") if media_dir.nil? || media_dir.empty?
51
35
 
52
- File.readlines(options[:datafile]).each do |lineraw|
53
- line = lineraw.strip.gsub(%r!\n$!, "")
36
+ media_dir = File.expand_path(media_dir)
37
+ log_invalid_media_dir_error(media_dir) if !File.directory?(media_dir) || Dir.empty?(media_dir)
38
+ end
54
39
 
55
- next if line.empty?
40
+ def process(opts)
41
+ import_posts
42
+ import_assets(opts["mediafolder"])
43
+ Jekyll.logger.info "", "and, done!"
44
+ end
56
45
 
57
- if line.start_with?("[") # post | media \ meta | comment...
58
- type_data = line.split(" ").first[1..-1]
59
- headers[type_data] = extract_headers_section(line)
60
- next
46
+ private
47
+
48
+ # Parse backup sections into a Hash of arrays.
49
+ #
50
+ # Each section is of following shape:
51
+ #
52
+ # [key alpha,beta,gamma,...]
53
+ # lorem,ipsum,dolor,...
54
+ # red,blue,green,...
55
+ #
56
+ # Returns Hash of shape:
57
+ #
58
+ # {key => [{alpha => lorem,...}, {alpha => red,...}]}
59
+ #
60
+ def read_export(file)
61
+ ignored_sections = %w(category comment link setting)
62
+
63
+ File.read(file, :encoding => "utf-8").split("\n\n").each_with_object({}) do |section, data|
64
+ next unless %r!^\[(?<key>.*?) (?<header>.*)\]\n(?<rows>.*)!m =~ section
65
+ next if ignored_sections.include?(key)
66
+
67
+ headers = header.split(",")
68
+
69
+ data[key] = rows.each_line.with_object([]) do |line, bucket|
70
+ bucket << headers.zip(sanitize_line!(line)).to_h
71
+ end
72
+
73
+ data
61
74
  end
75
+ end
62
76
 
63
- elts = extract_data_section(line)
64
-
65
- if type_data == "post"
66
- draft = (elts[headers[type_data].index("post_status")] != "1")
77
+ def register_post_tags
78
+ @data["meta"].each_with_object({}) do |entry, tags|
79
+ next unless entry["meta_type"] == "tag"
67
80
 
68
- date_str = elts[headers[type_data].index("post_creadt")]
69
- date_blank = (date_str.nil? || date_str.empty?)
70
- date_str_formatted = date_blank ? Date.today : Date.parse(date_str).strftime("%Y-%m-%d")
71
- title_param = elts[headers[type_data].index("post_title")].to_s.parameterize
81
+ post_id = entry["post_id"]
82
+ tags[post_id] ||= []
83
+ tags[post_id] << entry["meta_id"]
84
+ end
85
+ end
72
86
 
73
- content = elts[headers[type_data].index("post_content_xhtml")].to_s
74
- content = content.gsub('\"', '"').gsub('\n', "\n").gsub("/public/", "/assets/images/")
87
+ def log_undefined_flag_error(label)
88
+ Jekyll.logger.abort_with "Import Error:", "--#{label} flag cannot be undefined, null or empty!"
89
+ end
75
90
 
76
- filepath = File.join(Dir.pwd, (draft ? "_drafts" : "_posts"), "#{date_str_formatted}-#{title_param}.html")
91
+ def log_invalid_media_dir_error(media_dir)
92
+ Jekyll.logger.error "Import Error:", "--mediafolder should be a non-empty directory."
93
+ Jekyll.logger.abort_with "", "Please check #{media_dir.inspect}."
94
+ end
77
95
 
78
- entire_content_file = <<~POST_FILE
79
- ---
80
- layout: post
81
- title: "#{elts[headers[type_data].index("post_title")]}"
82
- date: #{elts[headers[type_data].index("post_creadt")]} +0100
83
- tags: ABC
84
- ---
96
+ def sanitize_line!(line)
97
+ line.strip!
98
+ line.split('","').tap do |items|
99
+ items[0].delete_prefix!('"')
100
+ items[-1].delete_suffix!('"')
101
+ end
102
+ end
85
103
 
86
- #{content}
87
- POST_FILE
104
+ # -
88
105
 
89
- posts_and_drafts[elts[headers[type_data].index("post_id")]] = { :path => filepath, :content => entire_content_file }
90
- elsif type_data == "media"
91
- elts[headers[type_data].index("media_title")]
92
- mediafilepath = elts[headers[type_data].index("media_file")]
106
+ REPLACE_MAP = {
107
+ '\"' => '"',
108
+ '\r\n' => "\n",
109
+ '\n' => "\n",
110
+ "/dotclear/public/" => "/assets/dotclear/",
111
+ "/public/" => "/assets/dotclear/",
112
+ }.freeze
93
113
 
94
- src_path = File.join(options[:mediafolder], mediafilepath)
95
- dst_path = File.join(Dir.pwd, "assets", "images", mediafilepath.to_s)
114
+ REPLACE_RE = Regexp.union(REPLACE_MAP.keys)
96
115
 
97
- FileUtils.mkdir_p(File.dirname(dst_path))
98
- FileUtils.cp(src_path, dst_path)
99
- elsif type_data == "meta"
100
- keywords[elts[headers[type_data].index("post_id")]] ||= []
101
- keywords[elts[headers[type_data].index("post_id")]] << elts[headers[type_data].index("meta_id")]
102
- elsif type_data == "link"
116
+ private_constant :REPLACE_MAP, :REPLACE_RE
103
117
 
104
- elsif type_data == "setting"
118
+ # -
105
119
 
106
- elsif type_data == "comment"
120
+ def adjust_post_contents!(content)
121
+ content.strip!
122
+ content.gsub!(REPLACE_RE, REPLACE_MAP)
123
+ content
124
+ end
107
125
 
126
+ def import_posts
127
+ tags = register_post_tags
128
+ posts = @data["post"]
129
+
130
+ FileUtils.mkdir_p("_drafts") unless posts.empty?
131
+ Jekyll.logger.info "Importing posts.."
132
+
133
+ posts.each do |post|
134
+ date, title = post.values_at("post_creadt", "post_title")
135
+ path = File.join("_drafts", Date.parse(date).strftime("%Y-%m-%d-") + Jekyll::Utils.slugify(title) + ".html")
136
+
137
+ excerpt = adjust_post_contents!(post["post_excerpt_xhtml"].to_s)
138
+ excerpt = nil if excerpt.empty?
139
+
140
+ # Unlike the paradigm in Jekyll-generated HTML, `post_content_xhtml` in the export data
141
+ # doesn't begin with `post_excerpt_xhtml`.
142
+ # Instead of checking whether the excerpt content exists elsewhere in the exported content
143
+ # string, always prepend excerpt onto content with an empty line in between.
144
+ content = [excerpt, post["post_content_xhtml"]].tap(&:compact!).join("\n\n")
145
+
146
+ front_matter_data = {
147
+ "layout" => "post",
148
+ "title" => title,
149
+ "date" => date,
150
+ "lang" => post["post_lang"],
151
+ "tags" => tags[post["post_id"]],
152
+ "original_url" => post["post_url"], # URL as included in the export-file.
153
+ "excerpt" => excerpt,
154
+ }.tap(&:compact!)
155
+
156
+ Jekyll.logger.info "Creating:", path
157
+ File.write(path, "#{YAML.dump(front_matter_data)}---\n\n#{adjust_post_contents!(content)}\n")
108
158
  end
109
159
  end
110
160
 
111
- # POST-process : Change media path in posts and drafts
112
- posts_and_drafts.each do |post_id, hsh|
113
- keywords_str = keywords[post_id].to_a.join(", ")
114
- content_file = hsh[:content]
115
- content_file = content_file.gsub("tags: ABC", "tags: [#{keywords_str}]")
116
-
117
- File.open(hsh[:path], "wb") do |f|
118
- f.write(content_file)
161
+ def import_assets(src_dir)
162
+ assets = @data["media"]
163
+ FileUtils.mkdir_p("assets/dotclear") if assets && !assets.empty?
164
+ Jekyll.logger.info "Importing assets.."
165
+
166
+ assets.each do |asset|
167
+ file_path = File.join(src_dir, asset["media_file"])
168
+ if File.exist?(file_path)
169
+ dest_path = File.join("assets/dotclear", asset["media_file"])
170
+ FileUtils.mkdir_p(File.dirname(dest_path))
171
+
172
+ Jekyll.logger.info "Copying:", file_path
173
+ Jekyll.logger.info "To:", dest_path
174
+ FileUtils.cp_r file_path, dest_path
175
+ else
176
+ Jekyll.logger.info "Not found:", file_path
177
+ end
119
178
  end
120
179
  end
121
180
  end
@@ -19,15 +19,17 @@ module JekyllImport
19
19
  nr.teaser,
20
20
  n.created,
21
21
  n.status,
22
+ ua.dst AS alias,
22
23
  n.type,
23
24
  GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
24
- FROM #{prefix}node_revisions AS nr,
25
+ FROM #{prefix}node_revisions AS nr, url_alias AS ua,
25
26
  #{prefix}node AS n
26
27
  LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
27
28
  LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
28
29
  WHERE (#{types})
29
30
  AND n.vid = nr.vid
30
- GROUP BY n.nid
31
+ AND ua.src = CONCAT( 'node/', n.nid)
32
+ GROUP BY n.nid, ua.dst
31
33
  SQL
32
34
 
33
35
  query
@@ -44,9 +46,11 @@ SQL
44
46
 
45
47
  data = {
46
48
  "excerpt" => summary,
47
- "categories" => tags.split("|"),
49
+ "categories" => tags.split("|").uniq,
48
50
  }
49
51
 
52
+ data["permalink"] = "/" + sql_post_data[:alias] if sql_post_data[:alias]
53
+
50
54
  [data, content]
51
55
  end
52
56
  end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "jekyll-import/importers/drupal_common"
4
+
5
+ module JekyllImport
6
+ module Importers
7
+ class Drupal8 < Importer
8
+ include DrupalCommon
9
+ extend DrupalCommon::ClassMethods
10
+
11
+ def self.build_query(prefix, types, engine)
12
+ types = types.join("' OR n.type = '")
13
+ types = "n.type = '#{types}'"
14
+
15
+ tag_group = if engine == "postgresql"
16
+ <<POSTGRESQL
17
+ (SELECT STRING_AGG(td.name, '|')
18
+ FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
19
+ WHERE ti.tid = td.tid AND ti.nid = n.nid) AS tags
20
+ POSTGRESQL
21
+ else
22
+ <<SQL
23
+ (SELECT GROUP_CONCAT(td.name SEPARATOR '|')
24
+ FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
25
+ WHERE ti.tid = td.tid AND ti.nid = n.nid) AS 'tags'
26
+ SQL
27
+ end
28
+
29
+ query = <<QUERY
30
+ SELECT n.nid,
31
+ n.title,
32
+ nb.body_value,
33
+ nb.body_summary,
34
+ n.created,
35
+ n.status,
36
+ n.type,
37
+ #{tag_group}
38
+ FROM #{prefix}node_field_data AS n
39
+ LEFT JOIN #{prefix}node__body AS nb
40
+ ON nb.entity_id = n.nid
41
+ WHERE (#{types})
42
+ QUERY
43
+
44
+ query
45
+ end
46
+
47
+ def self.aliases_query(prefix)
48
+ "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
49
+ end
50
+
51
+ def self.post_data(sql_post_data)
52
+ content = sql_post_data[:body_value].to_s
53
+ summary = sql_post_data[:body_summary].to_s
54
+ tags = (sql_post_data[:tags] || "").downcase.strip
55
+
56
+ data = {
57
+ "excerpt" => summary,
58
+ "categories" => tags.split("|"),
59
+ }
60
+
61
+ [data, content]
62
+ end
63
+ end
64
+ end
65
+ end