RubyGems - ZMediumToMarkdown - Versions diffs - 1.0.1 → 1.1.0 - Mend

ZMediumToMarkdown 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/bin/ZMediumFetcher +98 -31
data/lib/Helper.rb +25 -0
data/lib/Parsers/BQParser.rb +5 -1
data/lib/Parsers/CodeBlockParser.rb +22 -0
data/lib/Parsers/FallbackParser.rb +2 -1
data/lib/Parsers/PQParser.rb +5 -1
data/lib/Parsers/PREParser.rb +16 -2
data/lib/Post.rb +29 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 43d039e9c0ec69d1e765f14c7f23d9b171cd40030d5ea53a81934e074b4176e3
-  data.tar.gz: 26083e071edd627b747d0a67ca8e84921e2558d2d8b4416be99d1d685c54e232
+  metadata.gz: 29245a0299d0f492d7000a27c97f4cfdd305b5bd39b31d1dfbdfd126f938daf1
+  data.tar.gz: 7a81eca7da5c8a3d02b80936f2395ff1385ff37ef7092a5f6ae919e9dc817065
 SHA512:
-  metadata.gz: 15a68b2b9c048f5dbcfea9a6bcf1285d2a2e55839aaa69766cdc0c437f2d97d29ed1d5b16c40a1e9d0cdf7c504a28ddc9a0e5526d08998d7d3b4e160a66f2f25
-  data.tar.gz: f27d305f8f2b2886d32b97afe3c967851fe7108bd76bb618010015f6952dc0e2f98e8a14eef8d211025baa1524eefbc8294366047e92570e3ac700889ce94002
+  metadata.gz: 592b4a98e54ea032aee4560c23a827637fcfe38bc56b66af7cb1b5799e3a1b8b641f20de03566e96f04b9b8a75ddf97b97e339503f5b49c55afa599a8cdbf31b
+  data.tar.gz: e2003629feee6fe3230d4c72059a860e9be0458e28a5fee7640a13c4aa1ef5ec2047d27a21fa8a7a719d19bb601efa245dcec24461738bf0cdc113e6ed1e694c

data/bin/ZMediumFetcher CHANGED Viewed

@@ -7,6 +7,7 @@ $LOAD_PATH.unshift($lib)
 require "open-uri"
 require 'json'
 require 'optparse'
+require 'fileutils'
 require "Parsers/H1Parser"
 require "Parsers/H2Parser"
@@ -24,6 +25,7 @@ require "Parsers/OLIParser"
 require "Parsers/MIXTAPEEMBEDParser"
 require "Parsers/PQParser"
 require "Parsers/LinkParser"
+require "Parsers/CodeBlockParser"
 require "PathPolicy"
 require "Request"
@@ -124,8 +126,10 @@ class ZMediumFetcher
             imgParser.setNext(bqParser)
         preParser = PREParser.new()
             bqParser.setNext(preParser)
+        codeBlockParser = CodeBlockParser.new()
+            preParser.setNext(codeBlockParser)
         fallbackParser = FallbackParser.new()
-            preParser.setNext(fallbackParser)
+            codeBlockParser.setNext(fallbackParser)
         h1Parser
@@ -145,6 +149,8 @@ class ZMediumFetcher
         if postContent.nil?
             raise "Error: Content is empty! PostURL: #{postURL}"
         end
+        postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
         sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
         if sourceParagraphs.nil?
@@ -156,7 +162,8 @@ class ZMediumFetcher
         paragraphs = []
         oliIndex = 0
-        preParagraph = nil
+        previousParagraph = nil
+        preTypeParagraphs = []
         sourceParagraphs.each do |sourcParagraph|
             paragraph = Paragraph.new(sourcParagraph, postID, postContent)
             if OLIParser.isOLI(paragraph)
@@ -168,13 +175,55 @@ class ZMediumFetcher
             # if previous is OLI or ULI and current is not OLI or ULI
             # than insert a blank paragraph to keep markdown foramt correct
-            if (OLIParser.isOLI(preParagraph) && !OLIParser.isOLI(paragraph)) ||
-                (ULIParser.isULI(preParagraph) && !ULIParser.isULI(paragraph))
+            if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
+                (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
                 paragraphs.append(Paragraph.makeBlankParagraph(postID))
             end
+            # group by PRE paragraph to code block
+            # because medium will give continue pre to present code block
+            # e.g.
+            # type=pre, text=<html>
+            # type=pre, text=text
+            # type=pre, text=</html>
+            if !previousParagraph.nil?
+                if PREParser.isPRE(paragraph)
+                    # if current is pre
+                    preTypeParagraphs.append(paragraph)
+                elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
+                    # if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
+                    if preTypeParagraphs.length > 1
+                        lastPreTypeParagraph = preTypeParagraphs.pop
+                        # group by preParagraphs text to last preParagraph
+                        groupByText = ""
+                        preTypeParagraphs.each do |preTypeParagraph|
+                            if groupByText != ""
+                                groupByText += "\n"
+                            end
+                            markupParser = MarkupParser.new(postHtml, preTypeParagraph)
+                            groupByText += markupParser.parse()
+                        end
+                        lastPreTypeParagraph.text = "#{groupByText}"
+                        lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
+                        # remove all preParagraphs
+                        preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
+                            preTypeParagraph.name
+                        end
+                        paragraphs = paragraphs.select do |paragraph|
+                            !preTypeParagraphNames.include? paragraph.name
+                        end
+                    end
+                    preTypeParagraphs = []
+                end
+            end
             paragraphs.append(paragraph)
-            preParagraph = paragraph
+            previousParagraph = paragraph
         end
         postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
@@ -188,32 +237,45 @@ class ZMediumFetcher
         progress.printLog()
         absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
-        Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
-        index = 0
-        File.open(absolutePath, "w+") do |file|
-            paragraphs.each do |paragraph|
-                markupParser = MarkupParser.new(postHtml, paragraph)
-                paragraph.text = markupParser.parse()
-                result = startParser.parse(paragraph)
-                if !linkParser.nil?
-                    result = linkParser.parse(result, paragraph.markupLinks)
-                end
+        # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
+        if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
+            # Already downloaded and nothing has changed!, Skip!
+            progress.currentPostParagraphIndex = paragraphs.length
+            progress.message = "Skip, Post already downloaded and nothing has changed!"
+            progress.printLog()
+        else
+            Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
+            File.open(absolutePath, "w+") do |file|
+                # write postInfo into top
+                file.puts(Helper.createPostInfo(postInfo))
-                file.puts(result)
-                index += 1
-                progress.currentPostParagraphIndex = index
-                progress.message = "Converting Post..."
-                progress.printLog()
+                index = 0
+                paragraphs.each do |paragraph|
+                    markupParser = MarkupParser.new(postHtml, paragraph)
+                    paragraph.text = markupParser.parse()
+                    result = startParser.parse(paragraph)
+                    if !linkParser.nil?
+                        result = linkParser.parse(result, paragraph.markupLinks)
+                    end
+                    file.puts(result)
+                    index += 1
+                    progress.currentPostParagraphIndex = index
+                    progress.message = "Converting Post..."
+                    progress.printLog()
+                end
+                file.puts(Helper.createWatermark(postURL))
             end
+            FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
-            file.puts(Helper.createWatermark(postURL))
+            progress.message = "Post Successfully Downloaded!"
+            progress.printLog()
         end
-        progress.message = "Post Successfully Downloaded!"
-        progress.printLog()
         progress.postPath = nil
     end
@@ -264,11 +326,16 @@ class ZMediumFetcher
 end
 begin
-    puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
+    puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
     puts "You have read and agree with the Disclaimer."
     Main.new()
-    puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
-    puts "If this repo is helpful, please help to star this repo or recommend it to your friends. Thanks."
+    puts "Execute Successfully!!!"
+    puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
+    puts "#Thanks for using this tool."
+    puts "#If this is helpful, please help to star the repo or recommend it to your friends."
 rescue => e
-    puts "Error: #{e.class} #{e.message}"
+    puts "#Error: #{e.class} #{e.message}\n"
+    puts e.backtrace
+    puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
+    puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
 end

data/lib/Helper.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 $lib = File.expand_path('../lib', File.dirname(__FILE__))
+require 'date'
+require 'Post'
 class Helper
     def self.createDirIfNotExist(dirPath)
         dirs = dirPath.split("/")
@@ -11,6 +14,28 @@ class Helper
         end while dirs.length > 0
     end
+    def self.makeWarningText(message)
+        puts "####################################################\n"
+        puts "#WARNING:\n"
+        puts "##{message}\n"
+        puts "#--------------------------------------------------#\n"
+        puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
+        puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
+        puts "####################################################\n"
+    end
+    def self.createPostInfo(postInfo)
+        result = "---\n"
+        result += "title: #{postInfo.title}\n"
+        result += "author: #{postInfo.creator}\n"
+        result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
+        result += "tags: [#{postInfo.tags.join(",")}]\n"
+        result += "---\n"
+        result += "\r\n"
+        result
+    end
     def self.createWatermark(postURL)
         text = "\r\n\r\n\r\n"
         text += "+-----------------------------------------------------------------------------------+"

data/lib/Parsers/BQParser.rb CHANGED Viewed

@@ -7,7 +7,11 @@ class BQParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
         if paragraph.type == 'BQ'
-            "> #{paragraph.text}"
+            result = ""
+            paragraph.text.each_line do |p|
+                result += "> #{p}"
+            end
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Parsers/CodeBlockParser.rb ADDED Viewed

@@ -0,0 +1,22 @@
+$lib = File.expand_path('../', File.dirname(__FILE__))
+require "Parsers/Parser"
+require 'Models/Paragraph'
+class CodeBlockParser < Parser
+    attr_accessor :nextParser
+    def self.getTypeString()
+        'CODE_BLOCK'
+    end
+    def parse(paragraph)
+        if paragraph.type == CodeBlockParser.getTypeString()
+            "```\n#{paragraph.text}\n```"
+        else
+            if !nextParser.nil?
+                nextParser.parse(paragraph)
+            end
+        end
+    end
+end

data/lib/Parsers/FallbackParser.rb CHANGED Viewed

@@ -1,12 +1,13 @@
 $lib = File.expand_path('../', File.dirname(__FILE__))
+require "Helper"
 require "Parsers/Parser"
 require 'Models/Paragraph'
 class FallbackParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
-        puts paragraph.type
+        Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
         "#{paragraph.text}"
     end
 end

data/lib/Parsers/PQParser.rb CHANGED Viewed

@@ -7,7 +7,11 @@ class PQParser < Parser
     attr_accessor :nextParser
     def parse(paragraph)
         if paragraph.type == 'PQ'
-            "> #{paragraph.text}"
+            result = ""
+            paragraph.text.each_line do |p|
+                result += "> #{p}"
+            end
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Parsers/PREParser.rb CHANGED Viewed

@@ -5,9 +5,23 @@ require 'Models/Paragraph'
 class PREParser < Parser
     attr_accessor :nextParser
+    def self.isPRE(paragraph)
+        if paragraph.nil?
+            false
+        else
+            paragraph.type == "PRE"
+        end
+    end
     def parse(paragraph)
-        if paragraph.type == 'PRE'
-            "> #{paragraph.text}"
+        if PREParser.isPRE(paragraph)
+            result = "```\n"
+            paragraph.text.each_line do |p|
+                result += p
+            end
+            result += "\n```"
+            result
         else
             if !nextParser.nil?
                 nextParser.parse(paragraph)

data/lib/Post.rb CHANGED Viewed

@@ -4,8 +4,14 @@ require "Request"
 require 'uri'
 require 'nokogiri'
 require 'json'
+require 'date'
 class Post
+  class PostInfo
+    attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
+  end
   def self.getPostIDFromPostURLString(postURLString)
     uri = URI.parse(postURLString)
     postID = uri.path.split('/').last.split('-').last
@@ -40,4 +46,27 @@ class Post
       result.map { |paragraph| content[paragraph["__ref"]] }
     end
   end
+  def self.parsePostInfoFromPostContent(content, postID)
+    postInfo = PostInfo.new()
+    postInfo.title = content&.dig("Post:#{postID}", "title")
+    postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
+    creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
+    if !creatorRef.nil?
+      postInfo.creator = content&.dig(creatorRef, "name")
+    end
+    firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
+    if !firstPublishedAt.nil?
+      postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
+    end
+    latestPublishedAt = content&.dig("Post:#{postID}", "latestPublishedAt")
+    if !latestPublishedAt.nil?
+      postInfo.latestPublishedAt = Time.at(0, latestPublishedAt, :millisecond)
+    end
+    postInfo
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ZMediumToMarkdown
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.1.0
 platform: ruby
 authors:
 - ZhgChgLi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-05-28 00:00:00.000000000 Z
+date: 2022-05-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -66,6 +66,7 @@ files:
 - lib/ImageDownloader.rb
 - lib/Models/Paragraph.rb
 - lib/Parsers/BQParser.rb
+- lib/Parsers/CodeBlockParser.rb
 - lib/Parsers/FallbackParser.rb
 - lib/Parsers/H1Parser.rb
 - lib/Parsers/H2Parser.rb