RubyGems - jekyll-import - Versions diffs - 0.20.0 → 0.22.0 - Mend

jekyll-import 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

checksums.yaml +4 -4
data/README.markdown +2 -1
data/lib/jekyll/commands/import.rb +1 -1
data/lib/jekyll-import/importers/blogger.rb +54 -66
data/lib/jekyll-import/importers/csv.rb +2 -2
data/lib/jekyll-import/importers/dotclear.rb +147 -88
data/lib/jekyll-import/importers/drupal6.rb +7 -3
data/lib/jekyll-import/importers/drupal8.rb +65 -0
data/lib/jekyll-import/importers/drupal_common.rb +6 -5
data/lib/jekyll-import/importers/easyblog.rb +7 -7
data/lib/jekyll-import/importers/enki.rb +4 -4
data/lib/jekyll-import/importers/joomla.rb +8 -8
data/lib/jekyll-import/importers/joomla3.rb +8 -8
data/lib/jekyll-import/importers/jrnl.rb +4 -4
data/lib/jekyll-import/importers/marley.rb +1 -1
data/lib/jekyll-import/importers/medium.rb +36 -0
data/lib/jekyll-import/importers/mephisto.rb +3 -3
data/lib/jekyll-import/importers/mt.rb +10 -10
data/lib/jekyll-import/importers/pluxml.rb +3 -3
data/lib/jekyll-import/importers/roller.rb +12 -12
data/lib/jekyll-import/importers/rss.rb +64 -27
data/lib/jekyll-import/importers/s9y_database.rb +270 -56
data/lib/jekyll-import/importers/textpattern.rb +5 -5
data/lib/jekyll-import/importers/tumblr.rb +5 -5
data/lib/jekyll-import/importers/typo.rb +6 -6
data/lib/jekyll-import/importers/wordpress.rb +16 -16
data/lib/jekyll-import/importers/wordpressdotcom.rb +3 -3
data/lib/jekyll-import/version.rb +1 -1
metadata +34 -46

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 334724ebd0dbdc9774dd2e4799dea04dc5adaacc911d05cfc26f2a5daf23bbfa
-  data.tar.gz: 79a4f8c6de087aebca9f726275ebb3c234361b7341a01ce10c544ff65a0b6bac
+  metadata.gz: ba591570a71e1a96e2a064ba583010d114d8051d9cb4fa810ff45a7e382b621e
+  data.tar.gz: ac515c173bc2bb258d75da253cbdd83aa5f3a9074a97ba8c68a02193b4d15449
 SHA512:
-  metadata.gz: d716d81c1e596e1004a65eadaa6c4df3e130d134d0b3db0fef7e06151350a90525f59db50aefd6f771950b0698f174fe19311dca7134c3488be970b85e76737a
-  data.tar.gz: f5ee7ba0c29bfad8267482789e2f571ee2d6ccafd343d173c9e9408b1c6c9c25941f28476386c3076310ec630fc5ff84c7ef12aaac2bc934aa1b8ee8e11010f5
+  metadata.gz: 716d363903258758c63a266d81a865bee73e174816d9cdd854f8d8079511ec3b75fccb1576aa82700b3212c8fa4fe59144196f0e4010084489c7e7c0f8127268
+  data.tar.gz: 65a01c5fbf3b3d69d2a2808ea2e4233b6a7034e056e91950487cdef84d03d9048ea95fefd52eb8b48cd3e0608bfb269b6ef35d347ab59e94a698779088d57069

data/README.markdown CHANGED Viewed

@@ -1,6 +1,7 @@
 # jekyll-import
-[![Build Status](https://travis-ci.org/jekyll/jekyll-import.svg?branch=master)](https://travis-ci.org/jekyll/jekyll-import)
+[![Gem Version](https://img.shields.io/gem/v/jekyll-import.svg)](https://rubygems.org/gems/jekyll-import)
+[![Continuous Integration](https://github.com/jekyll/jekyll-import/actions/workflows/ci.yml/badge.svg)](https://github.com/jekyll/jekyll-import/actions/workflows/ci.yml)
 The new __Jekyll__ command for importing from various blogs to Jekyll format.

data/lib/jekyll/commands/import.rb CHANGED Viewed

@@ -39,7 +39,7 @@ module Jekyll
               if args.empty?
                 Jekyll.logger.warn "You must specify an importer."
                 Jekyll.logger.info "Valid options are:"
-                importers.each { |i| Jekyll.logger.info "*", i.to_s }
+                importers.sort.each { |i| Jekyll.logger.info "*", i.to_s }
               end
             end
           end

data/lib/jekyll-import/importers/blogger.rb CHANGED Viewed

@@ -5,17 +5,14 @@ module JekyllImport
     class Blogger < Importer
       def self.specify_options(c)
         c.option "source",                 "--source NAME",           "The XML file (blog-MM-DD-YYYY.xml) path to import"
-        c.option "no-blogger-info",        "--no-blogger-info",       "not to leave blogger-URL info (id and old URL) in the front matter (default: false)"
+        c.option "no-blogger-info",        "--no-blogger-info",       "not to leave blogger-URL info (id and old URL) in the front matter. (default: false)"
         c.option "replace-internal-link",  "--replace-internal-link", "replace internal links using the post_url liquid tag. (default: false)"
-        c.option "comments",               "--comments",              "import comments to _comments collection"
+        c.option "comments",               "--comments",              "import comments to _comments collection. (default: false)"
       end
       def self.validate(options)
-        if options["source"].nil?
-          raise "Missing mandatory option: --source"
-        elsif !File.exist?(options["source"])
-          raise Errno::ENOENT, "File not found: #{options["source"]}"
-        end
+        raise "Missing mandatory option: --source" if options["source"].nil?
+        raise Errno::ENOENT, "File not found: #{options["source"]}" unless File.exist?(options["source"])
       end
       def self.require_deps
@@ -42,7 +39,6 @@ module JekyllImport
         source = options.fetch("source")
         listener = BloggerAtomStreamListener.new
         listener.leave_blogger_info = !options.fetch("no-blogger-info", false)
         listener.comments = options.fetch("comments", false)
@@ -52,7 +48,6 @@ module JekyllImport
         end
         options["original-url-base"] = listener.original_url_base
         postprocess(options)
       end
@@ -63,32 +58,32 @@ module JekyllImport
       # Returns nothing.
       def self.postprocess(options)
         # Replace internal link URL
-        if options.fetch("replace-internal-link", false)
-          original_url_base = options.fetch("original-url-base", nil)
-          if original_url_base
-            orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
-            Dir.glob("_posts/*.*") do |filename|
-              body = nil
-              File.open(filename, "r") do |f|
-                f.flock(File::LOCK_SH)
-                body = f.read
-              end
+        return unless options.fetch("replace-internal-link", false)
-              body.gsub!(orig_url_pattern) do
-                # for post_url
-                quote = Regexp.last_match(1)
-                post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
-                raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
+        original_url_base = options.fetch("original-url-base", nil)
+        return unless original_url_base
-                " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
-              end
+        orig_url_pattern = Regexp.new(" href=([\"\'])(?:#{Regexp.escape(original_url_base)})?/([0-9]{4})/([0-9]{2})/([^\"\']+\.html)\\1")
-              File.open(filename, "w") do |f|
-                f.flock(File::LOCK_EX)
-                f << body
-              end
-            end
+        Dir.glob("_posts/*.*") do |filename|
+          body = nil
+          File.open(filename, "r") do |f|
+            f.flock(File::LOCK_SH)
+            body = f.read
+          end
+          body.gsub!(orig_url_pattern) do
+            # for post_url
+            quote = Regexp.last_match(1)
+            post_file = Dir.glob("_posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}").first
+            raise "Could not found: _posts/#{Regexp.last_match(2)}-#{Regexp.last_match(3)}-*-#{Regexp.last_match(4).to_s.tr("/", "-")}" if post_file.nil?
+            " href=#{quote}{{ site.baseurl }}{% post_url #{File.basename(post_file, ".html")} %}#{quote}"
+          end
+          File.open(filename, "w") do |f|
+            f.flock(File::LOCK_EX)
+            f << body
           end
         end
       end
@@ -118,9 +113,7 @@ module JekyllImport
             @in_entry_elem = { :meta => {}, :body => nil }
           when "title"
-            if @in_entry_elem
-              raise 'only <title type="text"></title> is supported' if attrs["type"] != "text"
-            end
+            raise 'only <title type="text"></title> is supported' if @in_entry_elem && attrs["type"] != "text"
           when "category"
             if @in_entry_elem
               if attrs["scheme"] == "http://www.blogger.com/atom/ns#"
@@ -150,25 +143,23 @@ module JekyllImport
         end
         def text(text)
-          if @in_entry_elem
-            case @tag_bread.last
-            when "id"
-              @in_entry_elem[:meta][:id] = text
-            when "published"
-              @in_entry_elem[:meta][:published] = text
-            when "updated"
-              @in_entry_elem[:meta][:updated] = text
-            when "title"
-              @in_entry_elem[:meta][:title] = text
-            when "content"
-              @in_entry_elem[:body] = text
-            when "name"
-              @in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
-            when "app:draft"
-              if @tag_bread[-2..-1] == %w(app:control app:draft)
-                @in_entry_elem[:meta][:draft] = true if text == "yes"
-              end
-            end
+          return unless @in_entry_elem
+          case @tag_bread.last
+          when "id"
+            @in_entry_elem[:meta][:id] = text
+          when "published"
+            @in_entry_elem[:meta][:published] = text
+          when "updated"
+            @in_entry_elem[:meta][:updated] = text
+          when "title"
+            @in_entry_elem[:meta][:title] = text
+          when "content"
+            @in_entry_elem[:body] = text
+          when "name"
+            @in_entry_elem[:meta][:author] = text if @tag_bread[-2..-1] == %w(author name)
+          when "app:draft"
+            @in_entry_elem[:meta][:draft] = true if @tag_bread[-2..-1] == %w(app:control app:draft) && text == "yes"
           end
         end
@@ -186,7 +177,7 @@ module JekyllImport
                 FileUtils.mkdir_p(target_dir)
-                file_name = URI.decode("#{post_data[:filename]}.html")
+                file_name = URI.decode_www_form_component("#{post_data[:filename]}.html")
                 File.open(File.join(target_dir, file_name), "w") do |f|
                   f.flock(File::LOCK_EX)
@@ -203,7 +194,7 @@ module JekyllImport
                 FileUtils.mkdir_p(target_dir)
-                file_name = URI.decode("#{post_data[:filename]}.html")
+                file_name = URI::DEFAULT_PARSER.unescape("#{post_data[:filename]}.html")
                 File.open(File.join(target_dir, file_name), "w") do |f|
                   f.flock(File::LOCK_EX)
@@ -264,19 +255,16 @@ module JekyllImport
             { :filename => filename, :header => header, :body => body }
           elsif @in_entry_elem[:meta][:kind] == "comment"
             timestamp = Time.parse(@in_entry_elem[:meta][:published]).strftime("%Y-%m-%d")
-            if @in_entry_elem[:meta][:original_url]
-              @comment_seq ||= 1
+            raise "Original URL is missing" unless @in_entry_elem[:meta][:original_url]
-              original_uri = URI.parse(@in_entry_elem[:meta][:original_url])
-              original_path = original_uri.path.to_s
-              filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
+            @comment_seq ||= 1
-              @comment_seq += 1
+            original_uri  = URI.parse(@in_entry_elem[:meta][:original_url])
+            original_path = original_uri.path.to_s
+            filename = format("%s-%s-%s", timestamp, File.basename(original_path, File.extname(original_path)), @comment_seq)
-              @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
-            else
-              raise "Original URL is missing"
-            end
+            @comment_seq += 1
+            @original_url_base = "#{original_uri.scheme}://#{original_uri.host}"
             header = {
               "date"            => @in_entry_elem[:meta][:published],

data/lib/jekyll-import/importers/csv.rb CHANGED Viewed

@@ -12,8 +12,8 @@ module JekyllImport
       end
       def self.specify_options(c)
-        c.option "file",            "--file NAME",       'The CSV file to import (default: "posts.csv")'
-        c.option "no-front-matter", "--no-front-matter", "Do not add the default front matter to the post body"
+        c.option "file",            "--file NAME",       "The CSV file to import. (default: 'posts.csv')"
+        c.option "no-front-matter", "--no-front-matter", "Do not add the default front matter to the post body. (default: false)"
       end
       # Reads a csv with title, permalink, body, published_at, and filter.

data/lib/jekyll-import/importers/dotclear.rb CHANGED Viewed

@@ -1,121 +1,180 @@
 # frozen_string_literal: true
-# Tested with dotClear 2.1.5
 module JekyllImport
   module Importers
     class Dotclear < Importer
-      def self.specify_options(c)
-        c.option "datafile", "--datafile PATH", "dotClear export file"
-        c.option "mediafolder", "--mediafolder PATH", "dotClear media export folder (media.zip inflated)"
-      end
+      class << self
+        def specify_options(c)
+          c.option "datafile",    "--datafile PATH",   "Dotclear export file."
+          c.option "mediafolder", "--mediafolder DIR", "Dotclear media export folder (unpacked media.zip)."
+        end
-      def self.require_deps
-        JekyllImport.require_with_fallback(%w(
-          rubygems
-          fileutils
-          safe_yaml
-          date
-          active_support
-          active_support/core_ext/string/inflections
-          csv
-          pp
-        ))
-      end
+        def require_deps
+          JekyllImport.require_with_fallback(%w())
+        end
-      def self.validate(opts)
-        abort "Specify a data file !" if opts["datafile"].nil? || opts["datafile"].empty?
-        abort "Specify a media folder !" if opts["mediafolder"].nil? || opts["mediafolder"].empty?
-      end
+        def validate(opts)
+          file_path = opts["datafile"]
+          log_undefined_flag_error("datafile") if file_path.nil? || file_path.empty?
-      def self.extract_headers_section(str)
-        str[1..-2].split(" ")[1].split(",")
-      end
-      def self.extract_data_section(str)
-        str.gsub(%r!^"!, "").gsub(%r!"$!, "").split('","')
-      end
+          file_path = File.expand_path(file_path)
+          if File.open(file_path, "rb", &:readline).start_with?("///DOTCLEAR|")
+            @data = read_export(file_path)
+            Jekyll.logger.info "Export File:", file_path
+          else
+            Jekyll.logger.abort_with "Import Error:", "#{file_path.inspect} is not a valid Dotclear export file!"
+          end
-      def self.process(opts)
-        options = {
-          :datafile    => opts.fetch("datafile", ""),
-          :mediafolder => opts.fetch("mediafolder", ""),
-        }
+          assets = @data["media"]
+          return if !assets || assets.empty?
-        FileUtils.mkdir_p("_posts")
-        FileUtils.mkdir_p("_drafts")
+          Jekyll.logger.info "", "Media files detected in export data."
-        type_data = ""
-        headers = {}
-        posts_and_drafts = {}
-        keywords = {}
+          media_dir = opts["mediafolder"]
+          log_undefined_flag_error("mediafolder") if media_dir.nil? || media_dir.empty?
-        File.readlines(options[:datafile]).each do |lineraw|
-          line = lineraw.strip.gsub(%r!\n$!, "")
+          media_dir = File.expand_path(media_dir)
+          log_invalid_media_dir_error(media_dir) if !File.directory?(media_dir) || Dir.empty?(media_dir)
+        end
-          next if line.empty?
+        def process(opts)
+          import_posts
+          import_assets(opts["mediafolder"])
+          Jekyll.logger.info "", "and, done!"
+        end
-          if line.start_with?("[") # post | media \ meta | comment...
-            type_data = line.split(" ").first[1..-1]
-            headers[type_data] = extract_headers_section(line)
-            next
+        private
+        # Parse backup sections into a Hash of arrays.
+        #
+        # Each section is of following shape:
+        #
+        #   [key alpha,beta,gamma,...]
+        #   lorem,ipsum,dolor,...
+        #   red,blue,green,...
+        #
+        # Returns Hash of shape:
+        #
+        #   {key => [{alpha => lorem,...}, {alpha => red,...}]}
+        #
+        def read_export(file)
+          ignored_sections = %w(category comment link setting)
+          File.read(file, :encoding => "utf-8").split("\n\n").each_with_object({}) do |section, data|
+            next unless %r!^\[(?<key>.*?) (?<header>.*)\]\n(?<rows>.*)!m =~ section
+            next if ignored_sections.include?(key)
+            headers = header.split(",")
+            data[key] = rows.each_line.with_object([]) do |line, bucket|
+              bucket << headers.zip(sanitize_line!(line)).to_h
+            end
+            data
           end
+        end
-          elts = extract_data_section(line)
-          if type_data == "post"
-            draft = (elts[headers[type_data].index("post_status")] != "1")
+        def register_post_tags
+          @data["meta"].each_with_object({}) do |entry, tags|
+            next unless entry["meta_type"] == "tag"
-            date_str = elts[headers[type_data].index("post_creadt")]
-            date_blank = (date_str.nil? || date_str.empty?)
-            date_str_formatted = date_blank ? Date.today : Date.parse(date_str).strftime("%Y-%m-%d")
-            title_param = elts[headers[type_data].index("post_title")].to_s.parameterize
+            post_id = entry["post_id"]
+            tags[post_id] ||= []
+            tags[post_id] << entry["meta_id"]
+          end
+        end
-            content = elts[headers[type_data].index("post_content_xhtml")].to_s
-            content = content.gsub('\"', '"').gsub('\n', "\n").gsub("/public/", "/assets/images/")
+        def log_undefined_flag_error(label)
+          Jekyll.logger.abort_with "Import Error:", "--#{label} flag cannot be undefined, null or empty!"
+        end
-            filepath = File.join(Dir.pwd, (draft ? "_drafts" : "_posts"), "#{date_str_formatted}-#{title_param}.html")
+        def log_invalid_media_dir_error(media_dir)
+          Jekyll.logger.error "Import Error:", "--mediafolder should be a non-empty directory."
+          Jekyll.logger.abort_with "", "Please check #{media_dir.inspect}."
+        end
-            entire_content_file = <<~POST_FILE
-              ---
-              layout: post
-              title: "#{elts[headers[type_data].index("post_title")]}"
-              date: #{elts[headers[type_data].index("post_creadt")]} +0100
-              tags: ABC
-              ---
+        def sanitize_line!(line)
+          line.strip!
+          line.split('","').tap do |items|
+            items[0].delete_prefix!('"')
+            items[-1].delete_suffix!('"')
+          end
+        end
-              #{content}
-            POST_FILE
+        # -
-            posts_and_drafts[elts[headers[type_data].index("post_id")]] = { :path => filepath, :content => entire_content_file }
-          elsif type_data == "media"
-            elts[headers[type_data].index("media_title")]
-            mediafilepath = elts[headers[type_data].index("media_file")]
+        REPLACE_MAP = {
+          '\"'                => '"',
+          '\r\n'              => "\n",
+          '\n'                => "\n",
+          "/dotclear/public/" => "/assets/dotclear/",
+          "/public/"          => "/assets/dotclear/",
+        }.freeze
-            src_path = File.join(options[:mediafolder], mediafilepath)
-            dst_path = File.join(Dir.pwd, "assets", "images", mediafilepath.to_s)
+        REPLACE_RE = Regexp.union(REPLACE_MAP.keys)
-            FileUtils.mkdir_p(File.dirname(dst_path))
-            FileUtils.cp(src_path, dst_path)
-          elsif type_data == "meta"
-            keywords[elts[headers[type_data].index("post_id")]] ||= []
-            keywords[elts[headers[type_data].index("post_id")]] << elts[headers[type_data].index("meta_id")]
-          elsif type_data == "link"
+        private_constant :REPLACE_MAP, :REPLACE_RE
-          elsif type_data == "setting"
+        # -
-          elsif type_data == "comment"
+        def adjust_post_contents!(content)
+          content.strip!
+          content.gsub!(REPLACE_RE, REPLACE_MAP)
+          content
+        end
+        def import_posts
+          tags = register_post_tags
+          posts = @data["post"]
+          FileUtils.mkdir_p("_drafts") unless posts.empty?
+          Jekyll.logger.info "Importing posts.."
+          posts.each do |post|
+            date, title = post.values_at("post_creadt", "post_title")
+            path = File.join("_drafts", Date.parse(date).strftime("%Y-%m-%d-") + Jekyll::Utils.slugify(title) + ".html")
+            excerpt = adjust_post_contents!(post["post_excerpt_xhtml"].to_s)
+            excerpt = nil if excerpt.empty?
+            # Unlike the paradigm in Jekyll-generated HTML, `post_content_xhtml` in the export data
+            # doesn't begin with `post_excerpt_xhtml`.
+            # Instead of checking whether the excerpt content exists elsewhere in the exported content
+            # string, always prepend excerpt onto content with an empty line in between.
+            content = [excerpt, post["post_content_xhtml"]].tap(&:compact!).join("\n\n")
+            front_matter_data = {
+              "layout"       => "post",
+              "title"        => title,
+              "date"         => date,
+              "lang"         => post["post_lang"],
+              "tags"         => tags[post["post_id"]],
+              "original_url" => post["post_url"], # URL as included in the export-file.
+              "excerpt"      => excerpt,
+            }.tap(&:compact!)
+            Jekyll.logger.info "Creating:", path
+            File.write(path, "#{YAML.dump(front_matter_data)}---\n\n#{adjust_post_contents!(content)}\n")
           end
         end
-        # POST-process : Change media path in posts and drafts
-        posts_and_drafts.each do |post_id, hsh|
-          keywords_str = keywords[post_id].to_a.join(", ")
-          content_file = hsh[:content]
-          content_file = content_file.gsub("tags: ABC", "tags: [#{keywords_str}]")
-          File.open(hsh[:path], "wb") do |f|
-            f.write(content_file)
+        def import_assets(src_dir)
+          assets = @data["media"]
+          FileUtils.mkdir_p("assets/dotclear") if assets && !assets.empty?
+          Jekyll.logger.info "Importing assets.."
+          assets.each do |asset|
+            file_path = File.join(src_dir, asset["media_file"])
+            if File.exist?(file_path)
+              dest_path = File.join("assets/dotclear", asset["media_file"])
+              FileUtils.mkdir_p(File.dirname(dest_path))
+              Jekyll.logger.info "Copying:", file_path
+              Jekyll.logger.info "To:", dest_path
+              FileUtils.cp_r file_path, dest_path
+            else
+              Jekyll.logger.info "Not found:", file_path
+            end
           end
         end
       end

data/lib/jekyll-import/importers/drupal6.rb CHANGED Viewed

@@ -19,15 +19,17 @@ module JekyllImport
                        nr.teaser,
                        n.created,
                        n.status,
+                       ua.dst AS alias,
                        n.type,
                        GROUP_CONCAT( td.name SEPARATOR '|' ) AS 'tags'
-                FROM #{prefix}node_revisions AS nr,
+                FROM #{prefix}node_revisions AS nr, url_alias AS ua,
                      #{prefix}node AS n
                      LEFT OUTER JOIN #{prefix}term_node AS tn ON tn.nid = n.nid
                      LEFT OUTER JOIN #{prefix}term_data AS td ON tn.tid = td.tid
                 WHERE (#{types})
                   AND n.vid = nr.vid
-                GROUP BY n.nid
+                  AND  ua.src = CONCAT( 'node/', n.nid)
+                GROUP BY n.nid, ua.dst
 SQL
         query
@@ -44,9 +46,11 @@ SQL
         data = {
           "excerpt"    => summary,
-          "categories" => tags.split("|"),
+          "categories" => tags.split("|").uniq,
         }
+        data["permalink"] = "/" + sql_post_data[:alias] if sql_post_data[:alias]
         [data, content]
       end
     end

data/lib/jekyll-import/importers/drupal8.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+require "jekyll-import/importers/drupal_common"
+module JekyllImport
+  module Importers
+    class Drupal8 < Importer
+      include DrupalCommon
+      extend DrupalCommon::ClassMethods
+      def self.build_query(prefix, types, engine)
+        types = types.join("' OR n.type = '")
+        types = "n.type = '#{types}'"
+        tag_group = if engine == "postgresql"
+                      <<POSTGRESQL
+            (SELECT STRING_AGG(td.name, '|')
+            FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
+            WHERE ti.tid = td.tid AND ti.nid = n.nid) AS tags
+POSTGRESQL
+                    else
+                      <<SQL
+            (SELECT GROUP_CONCAT(td.name SEPARATOR '|')
+            FROM #{prefix}taxonomy_term_field_data td, #{prefix}taxonomy_index ti
+            WHERE ti.tid = td.tid AND ti.nid = n.nid) AS 'tags'
+SQL
+                    end
+        query = <<QUERY
+                SELECT n.nid,
+                       n.title,
+                       nb.body_value,
+                       nb.body_summary,
+                       n.created,
+                       n.status,
+                       n.type,
+                       #{tag_group}
+                FROM #{prefix}node_field_data AS n
+                LEFT JOIN #{prefix}node__body AS nb
+                  ON nb.entity_id = n.nid
+                WHERE (#{types})
+QUERY
+        query
+      end
+      def self.aliases_query(prefix)
+        "SELECT source, alias FROM #{prefix}url_alias WHERE source = ?"
+      end
+      def self.post_data(sql_post_data)
+        content = sql_post_data[:body_value].to_s
+        summary = sql_post_data[:body_summary].to_s
+        tags = (sql_post_data[:tags] || "").downcase.strip
+        data = {
+          "excerpt"    => summary,
+          "categories" => tags.split("|"),
+        }
+        [data, content]
+      end
+    end
+  end
+end