RubyGems - ZMediumToMarkdown - Versions diffs - 1.0.0 → 1.0.3 - Mend

ZMediumToMarkdown 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/bin/ZMediumFetcher +63 -7
data/lib/Helper.rb +26 -0
data/lib/Parsers/BQParser.rb +5 -1
data/lib/Parsers/CodeBlockParser.rb +22 -0
data/lib/Parsers/FallbackParser.rb +2 -1
data/lib/Parsers/PQParser.rb +5 -1
data/lib/Parsers/PREParser.rb +16 -2
data/lib/Post.rb +24 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4a64eb0d39c1be48b0ddd24863c802f4f76234ebeee73161cf4b74fe8e5e4392
-  data.tar.gz: 88f480de193e4d89069bc3e5fd0bd6efb83c25cac4d8b65b9ec4e96efd459102
+  metadata.gz: 1f7805fe04110dc50c764983746065030b4cc87865810de1a50c3b5b89f646d0
+  data.tar.gz: 5eb355fcd4b28a1d0e704e80b5e3e564e770b681f4cd40302f556e647e3cda95
 SHA512:
-  metadata.gz: 4a85b5bf8e8b98a52d859cdf1b144a2d7ede7dc5701d2695f36296a34f8f5e9004aa83092effb7ab2ed21d9ae9cf3ca3d669cb915c7e73d5f3342835c35cdc12
-  data.tar.gz: '019224020c3d4071f0a8f2ac86c1beb22abaca096862dbf8d50dbe14ca0fdbf37ff8d78dc9b88b42a5af40107595e07a8d83868d0409d57486bbf894ad8c2e63'
+  metadata.gz: ff70606758386f70dc7fdc9ce1a8a45c6e4c6a136f27403794217be4f0acb59b94b00d2caca74f4ba6b464691fa1312ea2e9e94caf20785c5c758ced7de0aa6d
+  data.tar.gz: d50e747c1acdd6f1f3536711a5c1f15e2fcb729741efe584f8e878fb13e9a461b664e34b0143aef879e1504054ecb108204cb8521a1be200e9cfae52a1a1c8f0

data/bin/ZMediumFetcher CHANGED Viewed

@@ -24,6 +24,7 @@ require "Parsers/OLIParser"
 require "Parsers/MIXTAPEEMBEDParser"
 require "Parsers/PQParser"
 require "Parsers/LinkParser"
+require "Parsers/CodeBlockParser"
 require "PathPolicy"
 require "Request"
@@ -124,8 +125,10 @@ class ZMediumFetcher
             imgParser.setNext(bqParser)
         preParser = PREParser.new()
             bqParser.setNext(preParser)
+        codeBlockParser = CodeBlockParser.new()
+            preParser.setNext(codeBlockParser)
         fallbackParser = FallbackParser.new()
-            preParser.setNext(fallbackParser)
+            codeBlockParser.setNext(fallbackParser)
         h1Parser
@@ -145,6 +148,8 @@ class ZMediumFetcher
         if postContent.nil?
             raise "Error: Content is empty! PostURL: #{postURL}"
         end
+        postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
         sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
         if sourceParagraphs.nil?
@@ -156,7 +161,8 @@ class ZMediumFetcher
         paragraphs = []
         oliIndex = 0
-        preParagraph = nil
+        previousParagraph = nil
+        preTypeParagraphs = []
         sourceParagraphs.each do |sourcParagraph|
             paragraph = Paragraph.new(sourcParagraph, postID, postContent)
             if OLIParser.isOLI(paragraph)
@@ -168,13 +174,55 @@ class ZMediumFetcher
             # if previous is OLI or ULI and current is not OLI or ULI
             # than insert a blank paragraph to keep markdown foramt correct
-            if (OLIParser.isOLI(preParagraph) && !OLIParser.isOLI(paragraph)) ||
-                (ULIParser.isULI(preParagraph) && !ULIParser.isULI(paragraph))
+            if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
+                (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
                 paragraphs.append(Paragraph.makeBlankParagraph(postID))
             end
+            # group by PRE paragraph to code block
+            # because medium will give continue pre to present code block
+            # e.g.
+            # type=pre, text=<html>
+            # type=pre, text=text
+            # type=pre, text=</html>
+            if !previousParagraph.nil?
+                if PREParser.isPRE(paragraph)
+                    # if current is pre
+                    preTypeParagraphs.append(paragraph)
+                elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
+                    # if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
+                    if preTypeParagraphs.length > 1
+                        lastPreTypeParagraph = preTypeParagraphs.pop
+                        # group by preParagraphs text to last preParagraph
+                        groupByText = ""
+                        preTypeParagraphs.each do |preTypeParagraph|
+                            if groupByText != ""
+                                groupByText += "\n"
+                            end
+                            markupParser = MarkupParser.new(postHtml, preTypeParagraph)
+                            groupByText += markupParser.parse()
+                        end
+                        lastPreTypeParagraph.text = "#{groupByText}"
+                        lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
+                        # remove all preParagraphs
+                        preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
+                            preTypeParagraph.name
+                        end
+                        paragraphs = paragraphs.select do |paragraph|
+                            !preTypeParagraphNames.include? paragraph.name
+                        end
+                    end
+                    preTypeParagraphs = []
+                end
+            end
             paragraphs.append(paragraph)
-            preParagraph = paragraph
+            previousParagraph = paragraph
         end
         postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
@@ -191,6 +239,10 @@ class ZMediumFetcher
         Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
         index = 0
         File.open(absolutePath, "w+") do |file|
+            # write postInfo into top
+            file.puts(Helper.createPostInfo(postInfo))
             paragraphs.each do |paragraph|
                 markupParser = MarkupParser.new(postHtml, paragraph)
                 paragraph.text = markupParser.parse()
@@ -268,7 +320,11 @@ begin
     puts "You have read and agree with the Disclaimer."
     Main.new()
     puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
-    puts "If this repo is helpful, please help to star this repo or recommend it to your friends. Thanks."
+    puts "Thanks for using this tool."
+    puts "If this is helpful, please help to star the repo or recommend it to your friends."
 rescue => e
-    puts "Error: #{e.class} #{e.message}"
+    puts "#Error: #{e.class} #{e.message}\n"
+    puts e.backtrace
+    puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
+    puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
 end

data/lib/Helper.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 $lib = File.expand_path('../lib', File.dirname(__FILE__))
+require 'Post'
 class Helper
     def self.createDirIfNotExist(dirPath)
         dirs = dirPath.split("/")
@@ -11,12 +13,36 @@ class Helper
         end while dirs.length > 0
     end
+    def self.makeWarningText(message)
+        puts "####################################################\n"
+        puts "#WARNING:\n"
+        puts "##{message}\n"
+        puts "#--------------------------------------------------#\n"
+        puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
+        puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
+        puts "####################################################\n"
+    end
+    def self.createPostInfo(postInfo)
+        result = "---\n"
+        result += "title: #{postInfo.title}\n"
+        result += "author: #{postInfo.creator}\n"
+        result += "date: #{postInfo.firstPublishedAt}\n"
+        result += "tags: [#{postInfo.tags.join(",")}]\n"
+        result += "---\n"
+        result += "\r\n"
+        result
+    end
     def self.createWatermark(postURL)
         text = "\r\n\r\n\r\n"
         text += "+-----------------------------------------------------------------------------------+"
         text += "\r\n"
+        text += "\r\n"
         text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
         text += "\r\n"
+        text += "\r\n"
         text += "+-----------------------------------------------------------------------------------+"
         text += "\r\n"

data/lib/Parsers/BQParser.rb CHANGED Viewed

@@ -7,7 +7,11 @@ class BQParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
         if paragraph.type == 'BQ'
-            "> #{paragraph.text}"
+            result = ""
+            paragraph.text.each_line do |p|
+                result += "> #{p}"
+            end
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Parsers/CodeBlockParser.rb ADDED Viewed

@@ -0,0 +1,22 @@
+$lib = File.expand_path('../', File.dirname(__FILE__))
+require "Parsers/Parser"
+require 'Models/Paragraph'
+class CodeBlockParser < Parser
+    attr_accessor :nextParser
+    def self.getTypeString()
+        'CODE_BLOCK'
+    end
+    def parse(paragraph)
+        if paragraph.type == CodeBlockParser.getTypeString()
+            "```\n#{paragraph.text}\n```"
+        else
+            if !nextParser.nil?
+                nextParser.parse(paragraph)
+            end
+        end
+    end
+end

data/lib/Parsers/FallbackParser.rb CHANGED Viewed

@@ -1,12 +1,13 @@
 $lib = File.expand_path('../', File.dirname(__FILE__))
+require "Helper"
 require "Parsers/Parser"
 require 'Models/Paragraph'
 class FallbackParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
-        puts paragraph.type
+        Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
         "#{paragraph.text}"
     end
 end

data/lib/Parsers/PQParser.rb CHANGED Viewed

@@ -7,7 +7,11 @@ class PQParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
         if paragraph.type == 'PQ'
-            "> #{paragraph.text}"
+            result = ""
+            paragraph.text.each_line do |p|
+                result += "> #{p}"
+            end
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Parsers/PREParser.rb CHANGED Viewed

@@ -5,9 +5,23 @@ require 'Models/Paragraph'
 class PREParser < Parser
     attr_accessor :nextParser
+    def self.isPRE(paragraph)
+        if paragraph.nil?
+            false
+        else
+            paragraph.type == "PRE"
+        end
+    end
     def parse(paragraph)
-        if paragraph.type == 'PRE'
-            "> #{paragraph.text}"
+        if PREParser.isPRE(paragraph)
+            result = "```\n"
+            paragraph.text.each_line do |p|
+                result += p
+            end
+            result += "\n```"
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Post.rb CHANGED Viewed

@@ -4,8 +4,14 @@ require "Request"
 require 'uri'
 require 'nokogiri'
 require 'json'
+require 'date'
 class Post
+  class PostInfo
+    attr_accessor :title, :tags, :creator, :firstPublishedAt
+  end
   def self.getPostIDFromPostURLString(postURLString)
     uri = URI.parse(postURLString)
     postID = uri.path.split('/').last.split('-').last
@@ -40,4 +46,22 @@ class Post
       result.map { |paragraph| content[paragraph["__ref"]] }
     end
   end
+  def self.parsePostInfoFromPostContent(content, postID)
+    postInfo = PostInfo.new()
+    postInfo.title = content&.dig("Post:#{postID}", "title")
+    postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
+    creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
+    if !creatorRef.nil?
+      postInfo.creator = content&.dig(creatorRef, "name")
+    end
+    firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
+    if !firstPublishedAt.nil?
+      postInfo.firstPublishedAt = DateTime.strptime(firstPublishedAt.to_s,'%Q')
+    end
+    postInfo
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ZMediumToMarkdown
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.0.3
 platform: ruby
 authors:
 - ZhgChgLi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-05-28 00:00:00.000000000 Z
+date: 2022-05-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -66,6 +66,7 @@ files:
 - lib/ImageDownloader.rb
 - lib/Models/Paragraph.rb
 - lib/Parsers/BQParser.rb
+- lib/Parsers/CodeBlockParser.rb
 - lib/Parsers/FallbackParser.rb
 - lib/Parsers/H1Parser.rb
 - lib/Parsers/H2Parser.rb