RubyGems - ZMediumToMarkdown - Versions diffs - 1.0.1 → 1.0.2 - Mend

ZMediumToMarkdown 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/bin/ZMediumFetcher +63 -7
data/lib/Helper.rb +24 -0
data/lib/Parsers/CodeBlockParser.rb +22 -0
data/lib/Parsers/FallbackParser.rb +2 -1
data/lib/Parsers/PQParser.rb +5 -1
data/lib/Parsers/PREParser.rb +15 -2
data/lib/Post.rb +24 -0
metadata +2 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 43d039e9c0ec69d1e765f14c7f23d9b171cd40030d5ea53a81934e074b4176e3
-  data.tar.gz: 26083e071edd627b747d0a67ca8e84921e2558d2d8b4416be99d1d685c54e232
+  metadata.gz: a2243519ed0bc3c844758752f194791e882eb88371e91c082cb6e342c5fb94c0
+  data.tar.gz: 32fd50f1f531288f8d0e0f13a01e0412bd7acd3341f2b4e9af059635a0478f14
 SHA512:
-  metadata.gz: 15a68b2b9c048f5dbcfea9a6bcf1285d2a2e55839aaa69766cdc0c437f2d97d29ed1d5b16c40a1e9d0cdf7c504a28ddc9a0e5526d08998d7d3b4e160a66f2f25
-  data.tar.gz: f27d305f8f2b2886d32b97afe3c967851fe7108bd76bb618010015f6952dc0e2f98e8a14eef8d211025baa1524eefbc8294366047e92570e3ac700889ce94002
+  metadata.gz: cce4567526e3db1c0d92ef7fa5b1f2953a4242c8dc505fd35f090deee5df8eabd81762712b72ebd4a854082c3a4eae2057622f7e4283335d1df98a191235f38a
+  data.tar.gz: c87a247402fdc62ab3634ffaa422729cfb5fdfe1f0f6220a70c5e5bb598a542f05a64b97a42c747cab7521f63d8942f179bfb74def31ccefa0d49002da60d673

data/bin/ZMediumFetcher CHANGED Viewed

@@ -24,6 +24,7 @@ require "Parsers/OLIParser"
 require "Parsers/MIXTAPEEMBEDParser"
 require "Parsers/PQParser"
 require "Parsers/LinkParser"
+require "Parsers/CodeBlockParser"
 require "PathPolicy"
 require "Request"
@@ -124,8 +125,10 @@ class ZMediumFetcher
             imgParser.setNext(bqParser)
         preParser = PREParser.new()
             bqParser.setNext(preParser)
+        codeBlockParser = CodeBlockParser.new()
+            preParser.setNext(codeBlockParser)
         fallbackParser = FallbackParser.new()
-            preParser.setNext(fallbackParser)
+            codeBlockParser.setNext(fallbackParser)
         h1Parser
@@ -145,6 +148,8 @@ class ZMediumFetcher
         if postContent.nil?
             raise "Error: Content is empty! PostURL: #{postURL}"
         end
+        postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
         sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
         if sourceParagraphs.nil?
@@ -156,7 +161,8 @@ class ZMediumFetcher
         paragraphs = []
         oliIndex = 0
-        preParagraph = nil
+        previousParagraph = nil
+        preTypeParagraphs = []
         sourceParagraphs.each do |sourcParagraph|
             paragraph = Paragraph.new(sourcParagraph, postID, postContent)
             if OLIParser.isOLI(paragraph)
@@ -168,13 +174,55 @@ class ZMediumFetcher
             # if previous is OLI or ULI and current is not OLI or ULI
             # than insert a blank paragraph to keep markdown foramt correct
-            if (OLIParser.isOLI(preParagraph) && !OLIParser.isOLI(paragraph)) ||
-                (ULIParser.isULI(preParagraph) && !ULIParser.isULI(paragraph))
+            if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
+                (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
                 paragraphs.append(Paragraph.makeBlankParagraph(postID))
             end
+            # group by PRE paragraph to code block
+            # because medium will give continue pre to present code block
+            # e.g.
+            # type=pre, text=<html>
+            # type=pre, text=text
+            # type=pre, text=</html>
+            if !previousParagraph.nil?
+                if PREParser.isPRE(paragraph)
+                    # if current is pre
+                    preTypeParagraphs.append(paragraph)
+                elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
+                    # if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
+                    if preTypeParagraphs.length > 1
+                        lastPreTypeParagraph = preTypeParagraphs.pop
+                        # group by preParagraphs text to last preParagraph
+                        groupByText = ""
+                        preTypeParagraphs.each do |preTypeParagraph|
+                            if groupByText != ""
+                                groupByText += "\n"
+                            end
+                            markupParser = MarkupParser.new(postHtml, preTypeParagraph)
+                            groupByText += markupParser.parse()
+                        end
+                        lastPreTypeParagraph.text = "#{groupByText}"
+                        lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
+                        # remove all preParagraphs
+                        preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
+                            preTypeParagraph.name
+                        end
+                        paragraphs = paragraphs.select do |paragraph|
+                            !preTypeParagraphNames.include? paragraph.name
+                        end
+                    end
+                    preTypeParagraphs = []
+                end
+            end
             paragraphs.append(paragraph)
-            preParagraph = paragraph
+            previousParagraph = paragraph
         end
         postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
@@ -191,6 +239,10 @@ class ZMediumFetcher
         Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
         index = 0
         File.open(absolutePath, "w+") do |file|
+            # write postInfo into top
+            file.puts(Helper.createPostInfo(postInfo))
             paragraphs.each do |paragraph|
                 markupParser = MarkupParser.new(postHtml, paragraph)
                 paragraph.text = markupParser.parse()
@@ -268,7 +320,11 @@ begin
     puts "You have read and agree with the Disclaimer."
     Main.new()
     puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
-    puts "If this repo is helpful, please help to star this repo or recommend it to your friends. Thanks."
+    puts "Thanks for using this tool."
+    puts "If this is helpful, please help to star the repo or recommend it to your friends."
 rescue => e
-    puts "Error: #{e.class} #{e.message}"
+    puts "#Error: #{e.class} #{e.message}\n"
+    puts e.backtrace
+    puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
+    puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
 end

data/lib/Helper.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 $lib = File.expand_path('../lib', File.dirname(__FILE__))
+require 'Post'
 class Helper
     def self.createDirIfNotExist(dirPath)
         dirs = dirPath.split("/")
@@ -11,6 +13,28 @@ class Helper
         end while dirs.length > 0
     end
+    def self.makeWarningText(message)
+        puts "####################################################\n"
+        puts "#WARNING:\n"
+        puts "##{message}\n"
+        puts "#--------------------------------------------------#\n"
+        puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
+        puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
+        puts "####################################################\n"
+    end
+    def self.createPostInfo(postInfo)
+        result = "---\n"
+        result += "title: #{postInfo.title}\n"
+        result += "author: #{postInfo.creator}\n"
+        result += "date: #{postInfo.firstPublishedAt}\n"
+        result += "tags: [#{postInfo.tags.join(",")}]\n"
+        result += "---\n"
+        result += "\r\n"
+        result
+    end
     def self.createWatermark(postURL)
         text = "\r\n\r\n\r\n"
         text += "+-----------------------------------------------------------------------------------+"

data/lib/Parsers/CodeBlockParser.rb ADDED Viewed

@@ -0,0 +1,22 @@
+$lib = File.expand_path('../', File.dirname(__FILE__))
+require "Parsers/Parser"
+require 'Models/Paragraph'
+class CodeBlockParser < Parser
+    attr_accessor :nextParser
+    def self.getTypeString()
+        'CODE_BLOCK'
+    end
+    def parse(paragraph)
+        if paragraph.type == CodeBlockParser.getTypeString()
+            "```\n#{paragraph.text}\n```"
+        else
+            if !nextParser.nil?
+                nextParser.parse(paragraph)
+            end
+        end
+    end
+end

data/lib/Parsers/FallbackParser.rb CHANGED Viewed

@@ -1,12 +1,13 @@
 $lib = File.expand_path('../', File.dirname(__FILE__))
+require "Helper"
 require "Parsers/Parser"
 require 'Models/Paragraph'
 class FallbackParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
-        puts paragraph.type
+        Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
         "#{paragraph.text}"
     end
 end

data/lib/Parsers/PQParser.rb CHANGED Viewed

@@ -7,7 +7,11 @@ class PQParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
         if paragraph.type == 'PQ'
-            "> #{paragraph.text}"
+            result = ""
+            paragraph.text.each_line do |p|
+                result += "> #{p}"
+            end
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Parsers/PREParser.rb CHANGED Viewed

@@ -5,9 +5,22 @@ require 'Models/Paragraph'
 class PREParser < Parser
     attr_accessor :nextParser
+    def self.isPRE(paragraph)
+        if paragraph.nil?
+            false
+        else
+            paragraph.type == "PRE"
+        end
+    end
     def parse(paragraph)
-        if paragraph.type == 'PRE'
-            "> #{paragraph.text}"
+        if PREParser.isPRE(paragraph)
+            result = ""
+            paragraph.text.each_line do |p|
+                result += "> #{p}"
+            end
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Post.rb CHANGED Viewed

@@ -4,8 +4,14 @@ require "Request"
 require 'uri'
 require 'nokogiri'
 require 'json'
+require 'date'
 class Post
+  class PostInfo
+    attr_accessor :title, :tags, :creator, :firstPublishedAt
+  end
   def self.getPostIDFromPostURLString(postURLString)
     uri = URI.parse(postURLString)
     postID = uri.path.split('/').last.split('-').last
@@ -40,4 +46,22 @@ class Post
       result.map { |paragraph| content[paragraph["__ref"]] }
     end
   end
+  def self.parsePostInfoFromPostContent(content, postID)
+    postInfo = PostInfo.new()
+    postInfo.title = content&.dig("Post:#{postID}", "title")
+    postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
+    creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
+    if !creatorRef.nil?
+      postInfo.creator = content&.dig(creatorRef, "name")
+    end
+    firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
+    if !firstPublishedAt.nil?
+      postInfo.firstPublishedAt = DateTime.strptime(firstPublishedAt.to_s,'%Q')
+    end
+    postInfo
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ZMediumToMarkdown
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.0.2
 platform: ruby
 authors:
 - ZhgChgLi
@@ -66,6 +66,7 @@ files:
 - lib/ImageDownloader.rb
 - lib/Models/Paragraph.rb
 - lib/Parsers/BQParser.rb
+- lib/Parsers/CodeBlockParser.rb
 - lib/Parsers/FallbackParser.rb
 - lib/Parsers/H1Parser.rb
 - lib/Parsers/H2Parser.rb