RubyGems - ZMediumToMarkdown - Versions diffs - 1.9.8 → 2.0.2 - Mend

ZMediumToMarkdown 1.9.8 → 2.0.2

Files changed (12) hide show

checksums.yaml +4 -4
data/bin/ZMediumToMarkdown +4 -4
data/lib/Helper.rb +9 -18
data/lib/Models/Paragraph.rb +15 -9
data/lib/Parsers/IMGParser.rb +6 -9
data/lib/Parsers/IframeParser.rb +46 -11
data/lib/Parsers/MIXTAPEEMBEDParser.rb +2 -2
data/lib/Parsers/MarkupStyleRender.rb +6 -0
data/lib/PathPolicy.rb +19 -8
data/lib/Post.rb +21 -3
data/lib/ZMediumFetcher.rb +31 -14
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: cc121cbe8d2f06e2e6421872ff2f6950addd4affd46fe928172b9848da8e1517
-  data.tar.gz: 62e35936912a730a37c2424d6028fd3a780fe41aa0048ed717e81df02910dff2
+  metadata.gz: 9d5d59f757813210f3d5d98025ab4bcbb79eaf99c7a8f75d9540854fc7aa7093
+  data.tar.gz: ab7fcd5f9d8588697bc59bd4f26b0a334d69b4e9f21325425e144f53c7a9f38b
 SHA512:
-  metadata.gz: 6e76c68cc1c4f1f56bf35bc6517a0814c8263367f6ebda4e33591dd29667ae597a7a0b6da1ca1485245522e45d1a7dfbe739e9e996c0addb8ffc6d1600b1940b
-  data.tar.gz: cfe0bce95d184b67fb1afb2c84407478384b617665162dae9908fda1cfd8ff68094bbde65016b2872eea9d91e5e5987890f04e41e572175712cf0cdfa6210064
+  metadata.gz: 9d8913a1d4b741a30e4496513fa5c7e6cd0b80b495eb2b62a22eeee4f2e66cdb5873f5b886de1200dff62d2bb1dc175f15312999e7bc28f56fd0cffd361be9cd
+  data.tar.gz: 42dca718b61f52b20424edf8de42a0c9936f39adfdb0392efcc4990cc53df1b4534650d6cb5d48f6945e4d513a39a39657132682dce242d44a897686c5c39503

data/bin/ZMediumToMarkdown CHANGED Viewed

@@ -19,21 +19,21 @@ class Main
             opts.banner = "Usage: ZMediumFetcher [options]"
             opts.on('-uUSERNAME', '--username=USERNAME', 'Downloading all posts from user') do |username|
-                outputFilePath = PathPolicy.new(filePath, "Output")
+                outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
                 fetcher.downloadPostsByUsername(username, outputFilePath)
                 Helper.printNewVersionMessageIfExists()
             end
             opts.on('-pPOST_URL', '--postURL=POST_URL', 'Downloading single post') do |postURL|
-                outputFilePath = PathPolicy.new(filePath, "Output")
+                outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
                 fetcher.downloadPost(postURL, outputFilePath)
                 Helper.printNewVersionMessageIfExists()
             end
             opts.on('-jUSERNAME', '--jekyllUsername=USERNAME', 'Downloading all posts from user with Jekyll friendly') do |username|
-                outputFilePath = PathPolicy.new(filePath, "/")
+                outputFilePath = PathPolicy.new(filePath, "")
                 fetcher.isForJekyll = true
                 fetcher.downloadPostsByUsername(username, outputFilePath)
@@ -41,7 +41,7 @@ class Main
             end
             opts.on('-kPOST_URL', '--jekyllPostURL=POST_URL', 'Downloading single post with Jekyll friendly') do |postURL|
-                outputFilePath = PathPolicy.new(filePath, "/")
+                outputFilePath = PathPolicy.new(filePath, "")
                 fetcher.isForJekyll = true
                 fetcher.downloadPost(postURL, outputFilePath)

data/lib/Helper.rb CHANGED Viewed

@@ -12,10 +12,6 @@ require 'nokogiri'
 class Helper
-    def self.escapeMarkdown(text)
-        text.gsub(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/){ |x| "\\#{x}" }
-    end
     def self.fetchOGImage(url)
         html = Request.html(Request.URL(url))
         content = html.search("meta[property='og:image']").attribute('content')
@@ -99,7 +95,6 @@ class Helper
     end
     def self.createPostInfo(postInfo, isForJekyll)
         title = postInfo.title.gsub("[","")
         title = title.gsub("]","")
@@ -107,9 +102,14 @@ class Helper
         result += "title: #{title}\n"
         result += "author: #{postInfo.creator}\n"
         result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
+        result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
         result += "categories: #{postInfo.collectionName}\n"
         result += "tags: [#{postInfo.tags.join(",")}]\n"
         result += "description: #{postInfo.description}\n"
+        if !postInfo.previewImage.nil?
+            result += "image:\r\n"
+            result += "  path: #{postInfo.previewImage}\r\n"
+        end
         if isForJekyll
             result += "render_with_liquid: false\n"
         end
@@ -186,19 +186,10 @@ class Helper
     def self.createWatermark(postURL)
-        text = ""
-        # text += "\r\n\r\n\r\n"
-        # text += "+-----------------------------------------------------------------------------------+"
-        # text += "\r\n"
-        # text += "\r\n"
-        # text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
-        # text += "\r\n"
-        # text += "\r\n"
-        # text += "+-----------------------------------------------------------------------------------+"
-        # text += "\r\n"
-        # no need to show any watermark :)
+        text = "\r\n\r\n\r\n"
+        text += "_Converted [Medium Post](#{postURL}) by [ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)._"
+        text += "\r\n"
         text
     end
 end

data/lib/Models/Paragraph.rb CHANGED Viewed

@@ -5,7 +5,7 @@ require 'Parsers/PParser'
 require 'securerandom'
 class Paragraph
-    attr_accessor :postID, :name, :orgText, :orgTextWithEscape, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
+    attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
     class Iframe
         attr_accessor :id, :title, :type, :src
@@ -66,9 +66,6 @@ class Paragraph
         @href = json['href']
         @postID = postID
-        orgTextWithEscape = Helper.escapeMarkdown(json['text'])
-        @orgTextWithEscape = orgTextWithEscape
         if json['metadata'].nil?
             @metadata = nil
         else
@@ -87,19 +84,28 @@ class Paragraph
             @iframe = Iframe.new(json['iframe']['mediaResource'])
         end
+        markups = []
         if !json['markups'].nil? && json['markups'].length > 0
-            markups = []
             json['markups'].each do |markup|
                 markups.append(Markup.new(markup))
             end
-            @markups = markups
             links = json['markups'].select{ |markup| markup["type"] == "A" }
             if !links.nil? && links.length > 0
                 @markupLinks = links.map{ |link| link["href"] }
             end
-        else
-            @markups = nil
         end
+        i = 0
+        while i = orgText.index(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/, i + 1)
+            escapeMarkup = {
+                "type" => 'ESCAPE',
+                "start" => i,
+                "end" => i + 1
+            }
+            markups.append(Markup.new(escapeMarkup))
+        end
+        @markups = markups
     end
 end

data/lib/Parsers/IMGParser.rb CHANGED Viewed

@@ -18,26 +18,23 @@ class IMGParser < Parser
             fileName = paragraph.metadata.id #d*fsafwfe.jpg
-            imageURL = "https://miro.medium.com/max/1400/#{paragraph.metadata.id}"
+            imageURL = "https://miro.medium.com/max/1400/#{fileName}"
-            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
+            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
             absolutePath = imagePathPolicy.getAbsolutePath(fileName)
             result = ""
             alt = ""
-            if paragraph.orgTextWithEscape != ""
-                alt = " \"#{paragraph.orgTextWithEscape}\""
-            end
             if  ImageDownloader.download(absolutePath, imageURL)
-                relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
+                relativePath = imagePathPolicy.getRelativePath(fileName)
                 if isForJekyll
-                    result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](/#{relativePath}#{alt})\r\n\r\n"
+                    result = "\r\n\r\n![#{paragraph.text}](/#{relativePath}#{alt})\r\n\r\n"
                 else
-                    result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](#{relativePath}#{alt})\r\n\r\n"
+                    result = "\r\n\r\n![#{paragraph.text}](#{relativePath}#{alt})\r\n\r\n"
                 end
             else
-                result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](#{imageURL}#{alt})\r\n\r\n"
+                result = "\r\n\r\n![#{paragraph.text}](#{imageURL}#{alt})\r\n\r\n"
             end
             if paragraph.text != ""

data/lib/Parsers/IframeParser.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 $lib = File.expand_path('../', File.dirname(__FILE__))
 require 'uri'
+require 'net/http'
 require "Request"
 require "Parsers/Parser"
@@ -38,7 +39,7 @@ class IframeParser < Parser
                     fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
                     imageURL = params["image"]
-                    imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
+                    imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
                     absolutePath = imagePathPolicy.getAbsolutePath(fileName)
                     title = paragraph.iframe.title
                     if title.nil? or title == ""
@@ -46,7 +47,7 @@ class IframeParser < Parser
                     end
                     if  ImageDownloader.download(absolutePath, imageURL)
-                        relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
+                        relativePath = imagePathPolicy.getRelativePath(fileName)
                         if isForJekyll
                             result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
                         else
@@ -90,17 +91,51 @@ class IframeParser < Parser
                             ogURL = params["url"]
                         end
                     end
-                    ogImageURL = Helper.fetchOGImage(ogURL)
-                    title = paragraph.iframe.title
-                    if title.nil? or title == ""
-                        title = Helper.escapeMarkdown(ogURL)
-                    end
-                    if !ogImageURL.nil?
-                        result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})\r\n\r\n"
+                    twitterID = ogURL[/^(https\:\/\/twitter\.com\/){1}.+(\/){1}(\d+)/, 3]
+                    if !twitterID.nil?
+                        uri = URI("https://api.twitter.com/1.1/statuses/show.json?simple_quoted_tweet=true&include_entities=true&tweet_mode=extended&include_cards=1&id=#{twitterID}")
+                        https = Net::HTTP.new(uri.host, uri.port)
+                        https.use_ssl = true
+                        request = Net::HTTP::Get.new(uri)
+                        request['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.17.375.766 Safari/537.36';
+                        request['Authorization'] = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'; # twitter private api
+                        response = https.request(request)
+                        if response.code.to_i == 200
+                            twitterObj = JSON.parse(response.read_body)
+                            fullText = twitterObj["full_text"]
+                            twitterObj["entities"]["user_mentions"].each do |user_mention|
+                                fullText = fullText.gsub(user_mention["screen_name"],"[#{user_mention["screen_name"]}](https://twitter.com/#{user_mention["screen_name"]})")
+                            end
+                            twitterObj["entities"]["urls"].each do |url|
+                                fullText = fullText.gsub(url["url"],"[#{url["display_url"]}](#{url["expanded_url"]})")
+                            end
+                            createdAt = Time.parse(twitterObj["created_at"]).strftime('%Y-%m-%d %H:%M:%S')
+                            result = "\n\n"
+                            result += "■■■■■■■■■■■■■■ \n"
+                            result += "> **[#{twitterObj["user"]["name"]}](https://twitter.com/#{twitterObj["user"]["screen_name"]}) @ Twitter Says:** \n\n"
+                            result += "> > #{fullText} \n\n"
+                            result += "> **Tweeted at [#{createdAt}](#{ogURL}).** \n\n"
+                            result += "■■■■■■■■■■■■■■ \n\n"
+                        end
                     else
-                        result = "[#{title}](#{ogURL})"
+                        ogImageURL = Helper.fetchOGImage(ogURL)
+                        title = paragraph.iframe.title
+                        if title.nil? or title == ""
+                            title = Helper.escapeMarkdown(ogURL)
+                        end
+                        if !ogImageURL.nil?
+                            result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})\r\n\r\n"
+                        else
+                            result = "[#{title}](#{ogURL})"
+                        end
                     end
                 end
             end

data/lib/Parsers/MIXTAPEEMBEDParser.rb CHANGED Viewed

@@ -11,9 +11,9 @@ class MIXTAPEEMBEDParser < Parser
             if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
                 ogImageURL = Helper.fetchOGImage(paragraph.mixtapeMetadata.href)
                 if !ogImageURL.nil?
-                    "\r\n\r\n[![#{paragraph.orgTextWithEscape}](#{ogImageURL} \"#{paragraph.orgTextWithEscape}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
+                    "\r\n\r\n[![#{paragraph.text}](#{ogImageURL} \"#{paragraph.text}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
                 else
-                    "\n[#{paragraph.orgTextWithEscape}](#{paragraph.mixtapeMetadata.href})"
+                    "\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
                 end
             else
                 "\n#{paragraph.text}"

data/lib/Parsers/MarkupStyleRender.rb CHANGED Viewed

@@ -174,6 +174,12 @@ class MarkupStyleRender
                     tag = TagChar.new(3, markup.start, markup.end, "`", "`")
                 elsif markup.type == "STRONG"
                     tag = TagChar.new(2, markup.start, markup.end, "**", "**")
+                elsif markup.type == "ESCAPE"
+                    escapeTagChar = TagChar.new(0,markup.start, markup.end,'','')
+                    escapeTagChar.startChars = TextChar.new('\\'.chars,'Text')
+                    escapeTagChar.endChars = TextChar.new([],'Text')
+                    tag = escapeTagChar
                 elsif markup.type == "A"
                     url = markup.href
                     if markup.anchorType == "LINK"

data/lib/PathPolicy.rb CHANGED Viewed

@@ -8,18 +8,29 @@ class PathPolicy
     end
     def getRelativePath(lastPath)
-        if lastPath.nil?
-            "#{path}"
-        else
-            "#{path}/#{lastPath}"
+        result = path
+        if result != ""
+            result += "/"
+        end
+        if !lastPath.nil?
+            result += lastPath
         end
+        result
     end
     def getAbsolutePath(lastPath)
-        if lastPath.nil?
-            "#{rootPath}/#{path}"
-        else
-            "#{rootPath}/#{path}/#{lastPath}"
+        result = rootPath
+        if !lastPath.nil?
+            if result != ""
+                result += "/"
+            end
+            result += "#{lastPath}"
         end
+        result
     end
 end

data/lib/Post.rb CHANGED Viewed

@@ -6,10 +6,12 @@ require 'nokogiri'
 require 'json'
 require 'date'
-class Post
+require 'ImageDownloader'
+require 'PathPolicy'
+class Post
   class PostInfo
-    attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName, :description
+    attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName, :description, :previewImage
   end
   def self.getPostIDFromPostURLString(postURLString)
@@ -58,12 +60,28 @@ class Post
     end
   end
-  def self.parsePostInfoFromPostContent(content, postID)
+  def self.parsePostInfoFromPostContent(content, postID, pathPolicy)
     postInfo = PostInfo.new()
     postInfo.description = content&.dig("Post:#{postID}", "previewContent", "subtitle")
     postInfo.title = content&.dig("Post:#{postID}", "title")
     postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
+    previewImage = content&.dig("Post:#{postID}", "previewImage", "__ref")
+    if !previewImage.nil?
+      previewImageFIleName = content&.dig(previewImage, "id")
+      imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(postID), pathPolicy.getRelativePath(postID))
+      absolutePath = imagePathPolicy.getAbsolutePath(previewImageFIleName)
+      imageURL = "https://miro.medium.com/max/1400/#{previewImageFIleName}"
+      if  ImageDownloader.download(absolutePath, imageURL)
+          relativePath = imagePathPolicy.getRelativePath(previewImageFIleName)
+          postInfo.previewImage = relativePath
+      end
+    end
     creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
     if !creatorRef.nil?
       postInfo.creator = content&.dig(creatorRef, "name")

data/lib/ZMediumFetcher.rb CHANGED Viewed

@@ -121,6 +121,14 @@ class ZMediumFetcher
             postPath = Post.getPostPathFromPostURLString(postURL)
         end
+        if isForJekyll
+            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("_posts/zmediumtomarkdown"), pathPolicy.getRelativePath("_posts/zmediumtomarkdown"))
+            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("assets"), "assets")
+        else
+            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("zmediumtomarkdown"), pathPolicy.getRelativePath("zmediumtomarkdown"))
+            imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath("assets"), "assets")
+        end
         progress.postPath = postPath
         progress.message = "Downloading Post..."
         progress.printLog()
@@ -132,7 +140,7 @@ class ZMediumFetcher
             raise "Error: Content is empty! PostURL: #{postURL}"
         end
-        postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
+        postInfo = Post.parsePostInfoFromPostContent(postContent, postID, imagePathPolicy)
         sourceParagraphs = Post.fetchPostParagraphs(postID)
         if sourceParagraphs.nil?
@@ -207,14 +215,6 @@ class ZMediumFetcher
             paragraphs.append(paragraph)
             previousParagraph = paragraph
         end
-        if isForJekyll
-            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts/zmediumtomarkdown")
-            imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
-        else
-            postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "zmediumtomarkdown")
-            imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
-        end
         startParser = buildParser(imagePathPolicy)
@@ -229,8 +229,19 @@ class ZMediumFetcher
         absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
-        # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
-        if File.file?(absolutePath) && File.mtime(absolutePath).to_time.to_i >= postInfo.latestPublishedAt.to_i
+        fileLatestPublishedAt = nil
+        if File.file?(absolutePath)
+            lines = File.foreach(absolutePath).first(15)
+            if lines.first.start_with?("---")
+                dateLine = lines.select { |line| line.start_with?("last_modified_at:") }.first
+                if !dateLine.nil?
+                    #fileLatestPublishedAt = Time.parse(dateLine[/^(last_modified_at:)\s+(\S*)/, 2]).to_i
+                end
+            end
+        end
+        if !fileLatestPublishedAt.nil? && fileLatestPublishedAt >= postInfo.latestPublishedAt.to_i
             # Already downloaded and nothing has changed!, Skip!
             progress.currentPostParagraphIndex = paragraphs.length
             progress.message = "Skip, Post already downloaded and nothing has changed!"
@@ -239,7 +250,10 @@ class ZMediumFetcher
             Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
             File.open(absolutePath, "w+") do |file|
                 # write postInfo into top
-                file.puts(Helper.createPostInfo(postInfo, isForJekyll))
+                postMetaInfo = Helper.createPostInfo(postInfo, isForJekyll)
+                if !postMetaInfo.nil?
+                    file.puts(postMetaInfo)
+                end
                 index = 0
                 paragraphs.each do |paragraph|
@@ -260,7 +274,10 @@ class ZMediumFetcher
                     progress.printLog()
                 end
-                file.puts(Helper.createWatermark(postURL))
+                postWatermark = Helper.createWatermark(postURL)
+                if !postWatermark.nil?
+                    file.puts(postWatermark)
+                end
             end
             FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
@@ -304,7 +321,7 @@ class ZMediumFetcher
         if isForJekyll
             downloadPathPolicy = pathPolicy
         else
-            downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "users/#{username}")
+            downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("users/#{username}"), pathPolicy.getRelativePath("users/#{username}"))
         end
         index = 0

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ZMediumToMarkdown
 version: !ruby/object:Gem::Version
-  version: 1.9.8
+  version: 2.0.2
 platform: ruby
 authors:
 - ZhgChgLi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-07-18 00:00:00.000000000 Z
+date: 2022-07-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri