RubyGems - ZMediumToMarkdown - Versions diffs - 1.9.7 → 2.0.1 - Mend

ZMediumToMarkdown 1.9.7 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/bin/ZMediumToMarkdown +4 -4
data/lib/Helper.rb +11 -15
data/lib/Models/Paragraph.rb +15 -9
data/lib/Parsers/IMGParser.rb +6 -9
data/lib/Parsers/IframeParser.rb +46 -11
data/lib/Parsers/MIXTAPEEMBEDParser.rb +2 -2
data/lib/Parsers/MarkupStyleRender.rb +2 -0
data/lib/PathPolicy.rb +19 -8
data/lib/Post.rb +21 -3
data/lib/ZMediumFetcher.rb +31 -14
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 521c7a377c1e9b93996dc31355a8e7d6afb90708d154f69e31f4f20675bed060
-  data.tar.gz: 396d663eaf2f302168dde595842f092bd0051094baf2cbf4974018c0b2aaf270
+  metadata.gz: c410145d7924db7410198b57030d8a886b81162d2dc0f63c580d9140131aaae3
+  data.tar.gz: 91930b43c92725becb2d6f2e2246a4df6c2bed1d7616cd4f6dfb11cafa0d3af3
 SHA512:
-  metadata.gz: 57d8f51c5fe1fa5d193e0cdd1e7d11152a5010520867e54966681d064d0f30a7d81b57fb389c765ed8c522f896b4fad4e5ddb3bff9d58f6ae4687172cc381cfd
-  data.tar.gz: 9c5dfe1ff916f9f9d6c288c3537fd9b542a664959f34101f573f66dd0ce2655611b930fb78396a7392c1998b8ae2c52bee9276e1250c40861ad7a97f2dce0809
+  metadata.gz: b23e795f4af879ad6902dcf7a842cb0f1ad160e722196b1e8e4b17bb404686ac0f9fb3801b396124e4f248beb4743c1c56787bf5ef17fecb1b662f880983ce74
+  data.tar.gz: 9fa9954456a922437b624e2b4a762b509e5fac44ba453ca947b87ab838ad0110fd005cb628e9ae8042d2ddf60f0b850e92a08a8a9a148c0a00e8c30c77326670

data/bin/ZMediumToMarkdown CHANGED Viewed

@@ -19,21 +19,21 @@ class Main
             opts.banner = "Usage: ZMediumFetcher [options]"
             opts.on('-uUSERNAME', '--username=USERNAME', 'Downloading all posts from user') do |username|
-                outputFilePath = PathPolicy.new(filePath, "Output")
+                outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
                 fetcher.downloadPostsByUsername(username, outputFilePath)
                 Helper.printNewVersionMessageIfExists()
             end
             opts.on('-pPOST_URL', '--postURL=POST_URL', 'Downloading single post') do |postURL|
-                outputFilePath = PathPolicy.new(filePath, "Output")
+                outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
                 fetcher.downloadPost(postURL, outputFilePath)
                 Helper.printNewVersionMessageIfExists()
             end
             opts.on('-jUSERNAME', '--jekyllUsername=USERNAME', 'Downloading all posts from user with Jekyll friendly') do |username|
-                outputFilePath = PathPolicy.new(filePath, "/")
+                outputFilePath = PathPolicy.new(filePath, "")
                 fetcher.isForJekyll = true
                 fetcher.downloadPostsByUsername(username, outputFilePath)
@@ -41,7 +41,7 @@ class Main
             end
             opts.on('-kPOST_URL', '--jekyllPostURL=POST_URL', 'Downloading single post with Jekyll friendly') do |postURL|
-                outputFilePath = PathPolicy.new(filePath, "/")
+                outputFilePath = PathPolicy.new(filePath, "")
                 fetcher.isForJekyll = true
                 fetcher.downloadPost(postURL, outputFilePath)

data/lib/Helper.rb CHANGED Viewed

@@ -12,10 +12,6 @@ require 'nokogiri'
 class Helper
-    def self.escapeMarkdown(text)
-        text.gsub(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/){ |x| "\\#{x}" }
-    end
     def self.fetchOGImage(url)
         html = Request.html(Request.URL(url))
         content = html.search("meta[property='og:image']").attribute('content')
@@ -98,8 +94,7 @@ class Helper
         end
     end
-    def self.createPostInfo(postInfo)
+    def self.createPostInfo(postInfo, isForJekyll)
         title = postInfo.title.gsub("[","")
         title = title.gsub("]","")
@@ -107,10 +102,17 @@ class Helper
         result += "title: #{title}\n"
         result += "author: #{postInfo.creator}\n"
         result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
+        result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
         result += "categories: #{postInfo.collectionName}\n"
         result += "tags: [#{postInfo.tags.join(",")}]\n"
         result += "description: #{postInfo.description}\n"
-        result += "render_with_liquid: false\n"
+        if !postInfo.previewImage.nil?
+            result += "image:\r\n"
+            result += "  path: #{postInfo.previewImage}\r\n"
+        end
+        if isForJekyll
+            result += "render_with_liquid: false\n"
+        end
         result += "---\n"
         result += "\r\n"
@@ -185,15 +187,9 @@ class Helper
     def self.createWatermark(postURL)
         text = "\r\n\r\n\r\n"
-        text += "+-----------------------------------------------------------------------------------+"
-        text += "\r\n"
+        text += "_Converted [Medium Post](#{postURL}) by [ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)._"
         text += "\r\n"
-        text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
-        text += "\r\n"
-        text += "\r\n"
-        text += "+-----------------------------------------------------------------------------------+"
-        text += "\r\n"
         text
     end
 end

data/lib/Models/Paragraph.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'Parsers/PParser'
 require 'securerandom'
 class Paragraph
-    attr_accessor :postID, :name, :orgText, :orgTextWithEscape, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
+    attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
     class Iframe
         attr_accessor :id, :title, :type, :src
@@ -66,9 +66,6 @@ class Paragraph
         @href = json['href']
         @postID = postID
-        orgTextWithEscape = Helper.escapeMarkdown(json['text'])
-        @orgTextWithEscape = orgTextWithEscape
         if json['metadata'].nil?
             @metadata = nil
         else
@@ -87,19 +84,28 @@ class Paragraph
             @iframe = Iframe.new(json['iframe']['mediaResource'])
         end
+        markups = []
         if !json['markups'].nil? && json['markups'].length > 0
-            markups = []
             json['markups'].each do |markup|
                 markups.append(Markup.new(markup))
             end
-            @markups = markups
             links = json['markups'].select{ |markup| markup["type"] == "A" }
             if !links.nil? && links.length > 0
                 @markupLinks = links.map{ |link| link["href"] }
             end
-        else
-            @markups = nil
         end
+        i = 0
+        while i = orgText.index(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/, i + 1)
+            escapeMarkup = {
+                "type" => 'ESCAPE',
+                "start" => i,
+                "end" => i + 1
+            }
+            markups.append(Markup.new(escapeMarkup))
+        end
+        @markups = markups
     end
 end

data/lib/Parsers/IMGParser.rb CHANGED Viewed

@@ -18,26 +18,23 @@ class IMGParser < Parser
             fileName = paragraph.metadata.id #d*fsafwfe.jpg
-            imageURL = "https://miro.medium.com/max/1400/#{paragraph.metadata.id}"
+            imageURL = "https://miro.medium.com/max/1400/#{fileName}"
-            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
+            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
             absolutePath = imagePathPolicy.getAbsolutePath(fileName)
             result = ""
             alt = ""
-            if paragraph.orgTextWithEscape != ""
-                alt = " \"#{paragraph.orgTextWithEscape}\""
-            end
             if  ImageDownloader.download(absolutePath, imageURL)
-                relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
+                relativePath = imagePathPolicy.getRelativePath(fileName)
                 if isForJekyll
-                    result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](/#{relativePath}#{alt})\r\n\r\n"
+                    result = "\r\n\r\n![#{paragraph.text}](/#{relativePath}#{alt})\r\n\r\n"
                 else
-                    result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](#{relativePath}#{alt})\r\n\r\n"
+                    result = "\r\n\r\n![#{paragraph.text}](#{relativePath}#{alt})\r\n\r\n"
                 end
             else
-                result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](#{imageURL}#{alt})\r\n\r\n"
+                result = "\r\n\r\n![#{paragraph.text}](#{imageURL}#{alt})\r\n\r\n"
             end
             if paragraph.text != ""

data/lib/Parsers/IframeParser.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 $lib = File.expand_path('../', File.dirname(__FILE__))
 require 'uri'
+require 'net/http'
 require "Request"
 require "Parsers/Parser"
@@ -38,7 +39,7 @@ class IframeParser < Parser
                     fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
                     imageURL = params["image"]
-                    imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
+                    imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
                     absolutePath = imagePathPolicy.getAbsolutePath(fileName)
                     title = paragraph.iframe.title
                     if title.nil? or title == ""
@@ -46,7 +47,7 @@ class IframeParser < Parser
                     end
                     if  ImageDownloader.download(absolutePath, imageURL)
-                        relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
+                        relativePath = imagePathPolicy.getRelativePath(fileName)
                         if isForJekyll
                             result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
                         else
@@ -90,17 +91,51 @@ class IframeParser < Parser
                             ogURL = params["url"]
                         end
                     end
-                    ogImageURL = Helper.fetchOGImage(ogURL)
-                    title = paragraph.iframe.title
-                    if title.nil? or title == ""
-                        title = Helper.escapeMarkdown(ogURL)
-                    end
-                    if !ogImageURL.nil?
-                        result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})\r\n\r\n"
+                    twitterID = ogURL[/^(https\:\/\/twitter\.com\/){1}.+(\/){1}(\d+)/, 3]
+                    if !twitterID.nil?
+                        uri = URI("https://api.twitter.com/1.1/statuses/show.json?simple_quoted_tweet=true&include_entities=true&tweet_mode=extended&include_cards=1&id=#{twitterID}")
+                        https = Net::HTTP.new(uri.host, uri.port)
+                        https.use_ssl = true
+                        request = Net::HTTP::Get.new(uri)
+                        request['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.17.375.766 Safari/537.36';
+                        request['Authorization'] = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'; # twitter private api
+                        response = https.request(request)
+                        if response.code.to_i == 200
+                            twitterObj = JSON.parse(response.read_body)
+                            fullText = twitterObj["full_text"]
+                            twitterObj["entities"]["user_mentions"].each do |user_mention|
+                                fullText = fullText.gsub(user_mention["screen_name"],"[#{user_mention["screen_name"]}](https://twitter.com/#{user_mention["screen_name"]})")
+                            end
+                            twitterObj["entities"]["urls"].each do |url|
+                                fullText = fullText.gsub(url["url"],"[#{url["display_url"]}](#{url["expanded_url"]})")
+                            end
+                            createdAt = Time.parse(twitterObj["created_at"]).strftime('%Y-%m-%d %H:%M:%S')
+                            result = "\n\n"
+                            result += "■■■■■■■■■■■■■■ \n"
+                            result += "> **[#{twitterObj["user"]["name"]}](https://twitter.com/#{twitterObj["user"]["screen_name"]}) @ Twitter Says:** \n\n"
+                            result += "> > #{fullText} \n\n"
+                            result += "> **Tweeted at [#{createdAt}](#{ogURL}).** \n\n"
+                            result += "■■■■■■■■■■■■■■ \n\n"
+                        end
                     else
-                        result = "[#{title}](#{ogURL})"
+                        ogImageURL = Helper.fetchOGImage(ogURL)
+                        title = paragraph.iframe.title
+                        if title.nil? or title == ""
+                            title = Helper.escapeMarkdown(ogURL)
+                        end
+                        if !ogImageURL.nil?
+                            result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})\r\n\r\n"
+                        else
+                            result = "[#{title}](#{ogURL})"
+                        end
                     end
                 end
             end

data/lib/Parsers/MIXTAPEEMBEDParser.rb CHANGED Viewed

@@ -11,9 +11,9 @@ class MIXTAPEEMBEDParser < Parser
             if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
                 ogImageURL = Helper.fetchOGImage(paragraph.mixtapeMetadata.href)
                 if !ogImageURL.nil?
-                    "\r\n\r\n[![#{paragraph.orgTextWithEscape}](#{ogImageURL} \"#{paragraph.orgTextWithEscape}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
+                    "\r\n\r\n[![#{paragraph.text}](#{ogImageURL} \"#{paragraph.text}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
                 else
-                    "\n[#{paragraph.orgTextWithEscape}](#{paragraph.mixtapeMetadata.href})"
+                    "\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
                 end
             else
                 "\n#{paragraph.text}"

data/lib/Parsers/MarkupStyleRender.rb CHANGED Viewed

@@ -174,6 +174,8 @@ class MarkupStyleRender
                     tag = TagChar.new(3, markup.start, markup.end, "`", "`")
                 elsif markup.type == "STRONG"
                     tag = TagChar.new(2, markup.start, markup.end, "**", "**")
+                elsif markup.type == "ESCAPE"
+                    tag = TagChar.new(2, markup.start, markup.end, "\\", "**")
                 elsif markup.type == "A"
                     url = markup.href
                     if markup.anchorType == "LINK"

data/lib/PathPolicy.rb CHANGED Viewed

@@ -8,18 +8,29 @@ class PathPolicy
     end
     def getRelativePath(lastPath)
-        if lastPath.nil?
-            "#{path}"
-        else
-            "#{path}/#{lastPath}"
+        result = path
+        if result != ""
+            result += "/"
+        end
+        if !lastPath.nil?
+            result += lastPath
         end
+        result
     end
     def getAbsolutePath(lastPath)
-        if lastPath.nil?
-            "#{rootPath}/#{path}"
-        else
-            "#{rootPath}/#{path}/#{lastPath}"
+        result = rootPath
+        if !lastPath.nil?
+            if result != ""
+                result += "/"
+            end
+            result += "#{lastPath}"
         end
+        result
     end
 end

data/lib/Post.rb CHANGED Viewed

@@ -6,10 +6,12 @@ require 'nokogiri'
 require 'json'
 require 'date'
-class Post
+require 'ImageDownloader'
+require 'PathPolicy'
+class Post
   class PostInfo
-    attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName, :description
+    attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName, :description, :previewImage
   end
   def self.getPostIDFromPostURLString(postURLString)
@@ -58,12 +60,28 @@ class Post
     end
   end
-  def self.parsePostInfoFromPostContent(content, postID)
+  def self.parsePostInfoFromPostContent(content, postID, pathPolicy)
     postInfo = PostInfo.new()
     postInfo.description = content&.dig("Post:#{postID}", "previewContent", "subtitle")
     postInfo.title = content&.dig("Post:#{postID}", "title")
     postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
+    previewImage = content&.dig("Post:#{postID}", "previewImage", "__ref")
+    if !previewImage.nil?
+      previewImageFIleName = content&.dig(previewImage, "id")
+      imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(postID), pathPolicy.getRelativePath(postID))
+      absolutePath = imagePathPolicy.getAbsolutePath(previewImageFIleName)
+      imageURL = "https://miro.medium.com/max/1400/#{previewImageFIleName}"
+      if  ImageDownloader.download(absolutePath, imageURL)
+          relativePath = imagePathPolicy.getRelativePath(previewImageFIleName)
+          postInfo.previewImage = relativePath
+      end
+    end
     creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
     if !creatorRef.nil?
       postInfo.creator = content&.dig(creatorRef, "name")

data/lib/ZMediumFetcher.rb CHANGED Viewed

@@ -121,6 +121,14 @@ class ZMediumFetcher
             postPath = Post.getPostPathFromPostURLString(postURL)
         end
+        if isForJekyll
+            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("_posts/zmediumtomarkdown"), pathPolicy.getRelativePath("_posts/zmediumtomarkdown"))
+            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("assets"), "assets")
+        else
+            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("zmediumtomarkdown"), pathPolicy.getRelativePath("zmediumtomarkdown"))
+            imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath("assets"), "assets")
+        end
         progress.postPath = postPath
         progress.message = "Downloading Post..."
         progress.printLog()
@@ -132,7 +140,7 @@ class ZMediumFetcher
             raise "Error: Content is empty! PostURL: #{postURL}"
         end
-        postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
+        postInfo = Post.parsePostInfoFromPostContent(postContent, postID, imagePathPolicy)
         sourceParagraphs = Post.fetchPostParagraphs(postID)
         if sourceParagraphs.nil?
@@ -207,14 +215,6 @@ class ZMediumFetcher
             paragraphs.append(paragraph)
             previousParagraph = paragraph
         end
-        if isForJekyll
-            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts/zmediumtomarkdown")
-            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
-        else
-            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "zmediumtomarkdown")
-            imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
-        end
         startParser = buildParser(imagePathPolicy)
@@ -229,8 +229,19 @@ class ZMediumFetcher
         absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
-        # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
-        if File.file?(absolutePath) && File.mtime(absolutePath).to_time.to_i >= postInfo.latestPublishedAt.to_i
+        fileLatestPublishedAt = nil
+        if File.file?(absolutePath)
+            lines = File.foreach(absolutePath).first(15)
+            if lines.first.start_with?("---")
+                dateLine = lines.select { |line| line.start_with?("last_modified_at:") }.first
+                if !dateLine.nil?
+                    fileLatestPublishedAt = Time.parse(dateLine[/^(last_modified_at:)\s+(\S*)/, 2]).to_i
+                end
+            end
+        end
+        if !fileLatestPublishedAt.nil? && fileLatestPublishedAt >= postInfo.latestPublishedAt.to_i
             # Already downloaded and nothing has changed!, Skip!
             progress.currentPostParagraphIndex = paragraphs.length
             progress.message = "Skip, Post already downloaded and nothing has changed!"
@@ -239,7 +250,10 @@ class ZMediumFetcher
             Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
             File.open(absolutePath, "w+") do |file|
                 # write postInfo into top
-                file.puts(Helper.createPostInfo(postInfo))
+                postMetaInfo = Helper.createPostInfo(postInfo, isForJekyll)
+                if !postMetaInfo.nil?
+                    file.puts(postMetaInfo)
+                end
                 index = 0
                 paragraphs.each do |paragraph|
@@ -260,7 +274,10 @@ class ZMediumFetcher
                     progress.printLog()
                 end
-                file.puts(Helper.createWatermark(postURL))
+                postWatermark = Helper.createWatermark(postURL)
+                if !postWatermark.nil?
+                    file.puts(postWatermark)
+                end
             end
             FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
@@ -304,7 +321,7 @@ class ZMediumFetcher
         if isForJekyll
             downloadPathPolicy = pathPolicy
         else
-            downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "users/#{username}")
+            downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("users/#{username}"), pathPolicy.getRelativePath("users/#{username}"))
         end
         index = 0

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ZMediumToMarkdown
 version: !ruby/object:Gem::Version
-  version: 1.9.7
+  version: 2.0.1
 platform: ruby
 authors:
 - ZhgChgLi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-07-17 00:00:00.000000000 Z
+date: 2022-07-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri