RubyGems - ZMediumToMarkdown - Versions diffs - 1.5.0 → 1.6.2 - Mend

ZMediumToMarkdown 1.5.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/lib/Helper.rb +8 -3
data/lib/Models/Paragraph.rb +26 -7
data/lib/Parsers/BQParser.rb +11 -5
data/lib/Parsers/CodeBlockParser.rb +9 -1
data/lib/Parsers/IMGParser.rb +7 -2
data/lib/Parsers/IframeParser.rb +5 -4
data/lib/Parsers/LinkParser.rb +1 -13
data/lib/Parsers/MIXTAPEEMBEDParser.rb +2 -2
data/lib/Parsers/MarkupParser.rb +12 -7
data/lib/Parsers/MarkupStyleRender.rb +232 -0
data/lib/Post.rb +24 -6
data/lib/ZMediumFetcher.rb +16 -9
metadata +3 -16

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b38eda08edc524ebdbb5c459b106a907d8c86db59a772f466781a1da297b8ebc
-  data.tar.gz: 1c9c62e9124bc30acef072ee1a1d63dac4dc61212c0ddc443377314bcfe410ec
+  metadata.gz: 57ebbd86d072c9c43a5baef02031561323e9e7f6857e639aecc754de5741c543
+  data.tar.gz: 03bde3f39434b21c7d96380d05dbc9ccc7096f30ab97aade0e6838165e28de3e
 SHA512:
-  metadata.gz: 7e9965a014bf975e82c5255228a5cef81ecf434be208a7bec2baab319943ba5fa92871fa02bb36c698733259504b3271ed6e1087a2bf02b7f11fabfc5964e4f1
-  data.tar.gz: c1825a86bc74c9ec61c4481bbfa603f167cbe7f8adac166b19e8c0b0563d212eaa541df62c45103f939172ecd06ff4abced2ed698dfff0aa227bce549b61c2c7
+  metadata.gz: d108f648afe9eb0f90231dc771e22fd6f3f15d820f6d19aa941d23b48b2416c672350b6353af85c58dd13ccc1c3faa194b55f622476d09ea8bc84cccff6ba6ac
+  data.tar.gz: e484a2d51bc9ec006dc5511e2586d29eaab102bf778a030240ee92941b4fdfd9a726fbc5542677af7dbd8bb192e6c5d162b30b8c8b5001d166d3561edabdb9ec

data/lib/Helper.rb CHANGED Viewed

@@ -77,10 +77,15 @@ class Helper
     end
     def self.createPostInfo(postInfo)
+        title = postInfo.title.gsub("[","")
+        title = title.gsub("]","")
         result = "---\n"
-        result += "title: #{postInfo.title}\n"
+        result += "title: #{title}\n"
         result += "author: #{postInfo.creator}\n"
         result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
+        result += "categories: #{postInfo.collectionName}\n"
         result += "tags: [#{postInfo.tags.join(",")}]\n"
         result += "---\n"
         result += "\r\n"
@@ -159,7 +164,7 @@ class Helper
         text += "+-----------------------------------------------------------------------------------+"
         text += "\r\n"
         text += "\r\n"
-        text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
+        text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
         text += "\r\n"
         text += "\r\n"
         text += "+-----------------------------------------------------------------------------------+"
@@ -167,4 +172,4 @@ class Helper
         text
     end
-end
+end

data/lib/Models/Paragraph.rb CHANGED Viewed

@@ -4,7 +4,7 @@ require 'Parsers/PParser'
 require 'securerandom'
 class Paragraph
-    attr_accessor :postID, :name, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :hasMarkup, :oliIndex, :markupLinks
+    attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
     class Iframe
         attr_accessor :id, :title, :type, :src
@@ -20,6 +20,19 @@ class Paragraph
         end
     end
+    class Markup
+        attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
+        def initialize(json)
+            @type = json['type']
+            @start = json['start']
+            @end = json['end']
+            @href = json['href']
+            @anchorType = json['anchorType']
+            @userId = json['userId']
+            @linkMetadata = json['linkMetadata']
+        end
+    end
     class MetaData
         attr_accessor :id, :type
         def initialize(json)
@@ -41,12 +54,13 @@ class Paragraph
             "text" => "",
             "type" => PParser.getTypeString()
         }
-        Paragraph.new(json, postID, nil)
+        Paragraph.new(json, postID)
     end
-    def initialize(json, postID, resource)
+    def initialize(json, postID)
         @name = json['name']
         @text = json['text']
+        @orgText = json['text']
         @type = json['type']
         @href = json['href']
         @postID = postID
@@ -54,7 +68,7 @@ class Paragraph
         if json['metadata'].nil?
             @metadata = nil
         else
-            @metadata = MetaData.new(resource[json['metadata']['__ref']])
+            @metadata = MetaData.new(json['metadata'])
         end
         if json['mixtapeMetadata'].nil?
@@ -66,17 +80,22 @@ class Paragraph
         if json['iframe'].nil?
             @iframe = nil
         else
-            @iframe = Iframe.new(resource[json['iframe']['mediaResource']['__ref']])
+            @iframe = Iframe.new(json['iframe']['mediaResource'])
         end
         if !json['markups'].nil? && json['markups'].length > 0
+            markups = []
+            json['markups'].each do |markup|
+                markups.append(Markup.new(markup))
+            end
+            @markups = markups
             links = json['markups'].select{ |markup| markup["type"] == "A" }
             if !links.nil? && links.length > 0
                 @markupLinks = links.map{ |link| link["href"] }
             end
-            @hasMarkup = true
         else
-            @hasMarkup = false
+            @markups = nil
         end
     end
 end

data/lib/Parsers/BQParser.rb CHANGED Viewed

@@ -5,12 +5,18 @@ require 'Models/Paragraph'
 class BQParser < Parser
     attr_accessor :nextParser
+    def self.isBQ(paragraph)
+        if paragraph.nil?
+            false
+        else
+            paragraph.type == "BQ"
+        end
+    end
     def parse(paragraph)
-        if paragraph.type == 'BQ'
-            result = ""
-            paragraph.text.each_line do |p|
-                result += "> #{p}"
-            end
+        if BQParser.isBQ(paragraph)
+            result = "> #{paragraph.text}"
             result
         else
             if !nextParser.nil?

data/lib/Parsers/CodeBlockParser.rb CHANGED Viewed

@@ -10,8 +10,16 @@ class CodeBlockParser < Parser
         'CODE_BLOCK'
     end
+    def self.isCodeBlock(paragraph)
+        if paragraph.nil?
+            false
+        else
+            paragraph.type == CodeBlockParser.getTypeString()
+        end
+    end
     def parse(paragraph)
-        if paragraph.type == CodeBlockParser.getTypeString()
+        if CodeBlockParser.isCodeBlock(paragraph)
             "```\n#{paragraph.text}\n```"
         else
             if !nextParser.nil?

data/lib/Parsers/IMGParser.rb CHANGED Viewed

@@ -18,11 +18,16 @@ class IMGParser < Parser
             imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
             absolutePath = imagePathPolicy.getAbsolutePath(fileName)
+            comment = ""
+            if paragraph.text != ""
+                comment = " \"#{paragraph.text}\""
+            end
             if  ImageDownloader.download(absolutePath, imageURL)
                 relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
-                "![#{paragraph.text}](#{relativePath} \"#{paragraph.text}\")"
+                "![#{paragraph.text}](/#{relativePath}#{comment})"
             else
-                "![#{paragraph.text}](#{imageURL} \"#{paragraph.text}\")"
+                "![#{paragraph.text}](#{imageURL}#{comment})"
             end
         else
             if !nextParser.nil?

data/lib/Parsers/IframeParser.rb CHANGED Viewed

@@ -24,6 +24,7 @@ class IframeParser < Parser
                 # is youtube
                 youtubeURL = URI(URI.decode(url)).query
                 params = URI::decode_www_form(youtubeURL).to_h
                 if !params["image"].nil? && !params["url"].nil?
                     fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
@@ -31,12 +32,12 @@ class IframeParser < Parser
                     imageURL = params["image"]
                     imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
                     absolutePath = imagePathPolicy.getAbsolutePath(fileName)
+                    title = paragraph.iframe.title
                     if  ImageDownloader.download(absolutePath, imageURL)
                         relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
-                        result = "\n[![YouTube](#{relativePath} \"YouTube\")](#{params["url"]})"
+                        result = "\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})"
                     else
-                        result = "\n[YouTube](#{params["url"]})"
+                        result = "\n[#{title}](#{params["url"]})"
                     end
                 end
             else
@@ -54,7 +55,7 @@ class IframeParser < Parser
                     gistHTML.search('a').each do |a|
                         if a.text == 'view raw'
                             gistRAW = Request.body(Request.URL(a['href']))
-                            result = "```#{lang}\n#{gistRAW}\n```"
+                            result = "```#{lang.downcase}\n#{gistRAW}\n```"
                         end
                     end
                 end

data/lib/Parsers/LinkParser.rb CHANGED Viewed

@@ -23,19 +23,7 @@ class LinkParser
                         postPath = link.split("/").last
                         if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
-                            markdownString = markdownString.sub! link, postPath
-                        end
-                    else
-                        if !(link =~ /\A#{URI::regexp(['http', 'https'])}\z/)
-                            # medium will give you an relative path if url is medium's post (due to we use html to markdown render)
-                            # e.g. /zrealm-ios-dev/visitor-pattern-in-ios-swift-ba5773a7bfea
-                            # it's not a vaild url
-                            # fullfill url from markup attribute
-                            match = markupLinks.find{ |markupLink| markupLink.include? link }
-                            if !match.nil?
-                                markdownString = markdownString.sub! link, match
-                            end
+                            markdownString = markdownString.sub! link, "../#{postPath}"
                         end
                     end
                 end

data/lib/Parsers/MIXTAPEEMBEDParser.rb CHANGED Viewed

@@ -8,9 +8,9 @@ class MIXTAPEEMBEDParser < Parser
     def parse(paragraph)
         if paragraph.type == 'MIXTAPE_EMBED'
             if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
-                "\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
+                "\n[#{paragraph.orgText}](#{paragraph.mixtapeMetadata.href})"
             else
-                "\n#{paragraph.text}"
+                "\n#{paragraph.orgText}"
             end
         else
             if !nextParser.nil?

data/lib/Parsers/MarkupParser.rb CHANGED Viewed

@@ -1,23 +1,28 @@
 $lib = File.expand_path('../', File.dirname(__FILE__))
 require 'Models/Paragraph'
-require 'reverse_markdown'
+require 'Parsers/MarkupStyleRender'
 require 'nokogiri'
+require 'securerandom'
+require 'User'
 class MarkupParser
     attr_accessor :body, :paragraph
-    def initialize(html, paragraph)
-        @body = html.search("body").first
+    def initialize(paragraph)
         @paragraph = paragraph
     end
     def parse()
         result = paragraph.text
-        if paragraph.hasMarkup
-            p = body.at_css("##{paragraph.name}")
-            if !p.nil?
-                result = ReverseMarkdown.convert p.inner_html
+        if !paragraph.markups.nil? && paragraph.markups.length > 0
+            markupRender = MarkupStyleRender.new(paragraph)
+            begin
+                result = markupRender.parse()
+            rescue => e
+                puts e.backtrace
+                Helper.makeWarningText("Error occurred during render markup text, please help to open an issue on github.")
             end
         end

data/lib/Parsers/MarkupStyleRender.rb ADDED Viewed

@@ -0,0 +1,232 @@
+$lib = File.expand_path('../', File.dirname(__FILE__))
+require 'Models/Paragraph'
+class MarkupStyleRender
+    attr_accessor :paragraph, :chars, :encodeType
+    class TextChar
+        attr_accessor :chars, :type
+        def initialize(chars, type)
+            @chars = chars
+            @type = type
+        end
+    end
+    class TagChar < TextChar
+        attr_accessor :sort, :startIndex, :endIndex, :startChars, :endChars
+        def initialize(sort, startIndex, endIndex, startChars, endChars)
+            @sort = sort
+            @startIndex = startIndex
+            @endIndex = endIndex - 1
+            @startChars = TextChar.new(startChars.chars, 'TagStart')
+            @endChars = TextChar.new(endChars.chars, 'TagEnd')
+        end
+    end
+    def initialize(paragraph)
+        @paragraph = paragraph
+        chars = {}
+        index = 0
+        emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
+        excludesEmojis = ["⚠"]
+        paragraph.text.each_char do |char|
+            chars[index] = TextChar.new([char], "Text")
+            index += 1
+            if char =~ emojiRegex && !excludesEmojis.include?(char)
+                # some emoji need more space (in Medium)
+                chars[index] = TextChar.new([], "Text")
+                index += 1
+            end
+        end
+        @chars = chars
+    end
+    def optimize(chars)
+        while true
+            hasExcute = false
+            index = 0
+            startTagIndex = nil
+            preTag = nil
+            preTagIndex = nil
+            preTextChar = nil
+            preTextIndex = nil
+            chars.each do |char|
+                if !preTag.nil?
+                    if preTag.type == "TagStart" && char.type == "TagEnd"
+                        chars.delete_at(index)
+                        chars.delete_at(preTagIndex)
+                        hasExcute = true
+                        break
+                    end
+                end
+                if char.type == "TagStart" && (preTag == nil || preTag.type == "TagEnd" || preTag.type == "Text")
+                    startTagIndex = index
+                elsif (char.type  == "TagEnd" || char.type  == "Text") && startTagIndex != nil
+                    if preTextChar != nil && preTextChar.chars.join() != "\n"
+                        # not first tag & insert blank between start tag and before text
+                        if preTextChar.chars.join() != " "
+                            chars.insert(startTagIndex, TextChar.new(" ".chars, "Text"))
+                            hasExcute = true
+                            break
+                        end
+                    end
+                    startTagIndex = nil
+                end
+                if !preTag.nil?
+                    if preTag.type == "TagStart" && char.type  == "Text"
+                        # delete blank between start tag and after text
+                        if char.chars.join().strip == ""
+                            chars.delete_at(index)
+                            hasExcute = true
+                            break
+                        end
+                    end
+                    if preTag.type == "Text" && char.type  == "TagEnd"
+                        if preTextChar.chars.join().strip == "" && preTextChar.chars.join() != "\n"
+                            chars.delete_at(preTextIndex)
+                            hasExcute = true
+                            break
+                        end
+                    end
+                    if preTag.type == "TagEnd" && char.type  == "Text"
+                        if char.chars.join() != " "
+                            chars.insert(index, TextChar.new(" ".chars, "Text"))
+                            hasExcute = true
+                            break
+                        end
+                    end
+                end
+                if char.type == "Text"
+                    preTextChar = char
+                    preTextIndex = index
+                end
+                preTag = char
+                preTagIndex = index
+                index += 1
+            end
+            if !hasExcute
+                break
+            end
+        end
+        chars
+    end
+    def parse()
+        result = paragraph.text
+        if !paragraph.markups.nil? && paragraph.markups.length > 0
+            tags = []
+            paragraph.markups.each do |markup|
+                tag = nil
+                if markup.type == "EM"
+                    tag = TagChar.new(2, markup.start, markup.end, "_", "_")
+                elsif markup.type == "CODE"
+                    tag = TagChar.new(3, markup.start, markup.end, "`", "`")
+                elsif markup.type == "STRONG"
+                    tag = TagChar.new(2, markup.start, markup.end, "**", "**")
+                elsif markup.type == "A"
+                    url = markup.href
+                    if markup.anchorType == "LINK"
+                        url = markup.href
+                    elsif markup.anchorType == "USER"
+                        url = "https://medium.com/u/#{markup.userId}"
+                    end
+                    tag = TagChar.new(1, markup.start, markup.end, "[", "](#{url})")
+                else
+                    Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
+                end
+                if !tag.nil?
+                    tags.append(tag)
+                end
+            end
+            tags.sort_by(&:startIndex)
+            response = []
+            stack = []
+            chars.each do |index, char|
+                if char.chars.join() == "\n"
+                    brStack = stack.dup
+                    while brStack.length > 0
+                        tag = brStack.pop
+                        response.push(tag.endChars)
+                    end
+                    response.append(TextChar.new(char.chars, 'Text'))
+                    brStack = stack.dup.reverse
+                    while brStack.length > 0
+                        tag = brStack.pop
+                        response.push(tag.startChars)
+                    end
+                end
+                startTags = tags.select { |tag| tag.startIndex == index }.sort_by(&:sort)
+                if !startTags.nil?
+                    startTags.each do |tag|
+                        response.append(tag.startChars)
+                        stack.append(tag)
+                    end
+                end
+                if char.chars.join() != "\n"
+                    response.append(TextChar.new(char.chars, 'Text'))
+                end
+                endTags = tags.select { |tag| tag.endIndex == index }
+                if !endTags.nil? && endTags.length > 0
+                    mismatchTags = []
+                    while endTags.length > 0
+                        stackTag = stack.pop
+                        stackTagInEndTagsIndex = endTags.find_index(stackTag)
+                        if !stackTagInEndTagsIndex.nil?
+                            # as expected
+                            endTags.delete_at(stackTagInEndTagsIndex)
+                        else
+                            mismatchTags.append(stackTag)
+                        end
+                        response.append(stackTag.endChars)
+                    end
+                    while mismatchTags.length > 0
+                        mismatchTag = mismatchTags.pop
+                        response.append(mismatchTag.startChars)
+                        stack.append(mismatchTag)
+                    end
+                end
+            end
+            while stack.length > 0
+                tag = stack.pop
+                response.push(tag.endChars)
+            end
+            response = optimize(response)
+            result = response.map{ |response| response.chars }.join()
+        end
+        result
+    end
+end

data/lib/Post.rb CHANGED Viewed

@@ -9,7 +9,7 @@ require 'date'
 class Post
   class PostInfo
-    attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
+    attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName
   end
   def self.getPostIDFromPostURLString(postURLString)
@@ -38,12 +38,23 @@ class Post
     json
   end
-  def self.parsePostParagraphsFromPostContent(content, postID)
-    result = content&.dig("Post:#{postID}", "content({\"postMeteringOptions\":null})", "bodyModel", "paragraphs")
-    if result.nil?
-      nil
+  def self.fetchPostParagraphs(postID)
+    query = [
+      {
+        "operationName": "PostViewerEdgeContentQuery",
+        "variables": {
+          "postId": postID
+        },
+        "query": "query PostViewerEdgeContentQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) {\n  post(id: $postId) {\n    ... on Post {\n      id\n      viewerEdge {\n        id\n        fullContent(postMeteringOptions: $postMeteringOptions) {\n          isLockedPreviewOnly\n          validatedShareKey\n          bodyModel {\n            ...PostBody_bodyModel\n            __typename\n          }\n          __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n}\n\nfragment PostBody_bodyModel on RichText {\n  sections {\n    name\n    startIndex\n    textLayout\n    imageLayout\n    backgroundImage {\n      id\n      originalHeight\n      originalWidth\n      __typename\n    }\n    videoLayout\n    backgroundVideo {\n      videoId\n      originalHeight\n      originalWidth\n      previewImageId\n      __typename\n    }\n    __typename\n  }\n  paragraphs {\n    id\n    ...PostBodySection_paragraph\n    __typename\n  }\n  ...normalizedBodyModel_richText\n  __typename\n}\n\nfragment PostBodySection_paragraph on Paragraph {\n  name\n  ...PostBodyParagraph_paragraph\n  __typename\n  id\n}\n\nfragment PostBodyParagraph_paragraph on Paragraph {\n  name\n  type\n  ...ImageParagraph_paragraph\n  ...TextParagraph_paragraph\n  ...IframeParagraph_paragraph\n  ...MixtapeParagraph_paragraph\n  __typename\n  id\n}\n\nfragment ImageParagraph_paragraph on Paragraph {\n  href\n  layout\n  metadata {\n    id\n    originalHeight\n    originalWidth\n    focusPercentX\n    focusPercentY\n    alt\n    __typename\n  }\n  ...Markups_paragraph\n  ...ParagraphRefsMapContext_paragraph\n  ...PostAnnotationsMarker_paragraph\n  __typename\n  id\n}\n\nfragment Markups_paragraph on Paragraph {\n  name\n  text\n  hasDropCap\n  dropCapImage {\n    ...MarkupNode_data_dropCapImage\n    __typename\n    id\n  }\n  markups {\n    type\n    start\n    end\n    href\n    anchorType\n    userId\n    linkMetadata {\n      httpStatus\n      __typename\n    }\n    __typename\n  }\n  __typename\n  id\n}\n\nfragment MarkupNode_data_dropCapImage on ImageMetadata {\n  ...DropCap_image\n  __typename\n  id\n}\n\nfragment DropCap_image on ImageMetadata {\n  id\n  originalHeight\n  originalWidth\n  __typename\n}\n\nfragment ParagraphRefsMapContext_paragraph on Paragraph {\n  id\n  name\n  text\n  __typename\n}\n\nfragment PostAnnotationsMarker_paragraph on Paragraph {\n  ...PostViewNoteCard_paragraph\n  __typename\n  id\n}\n\nfragment PostViewNoteCard_paragraph on Paragraph {\n  name\n  __typename\n  id\n}\n\nfragment TextParagraph_paragraph on Paragraph {\n  type\n  hasDropCap\n  ...Markups_paragraph\n  ...ParagraphRefsMapContext_paragraph\n  __typename\n  id\n}\n\nfragment IframeParagraph_paragraph on Paragraph {\n  iframe {\n    mediaResource {\n      id\n      iframeSrc\n      iframeHeight\n      iframeWidth\n      title\n      __typename\n    }\n    __typename\n  }\n  layout\n  ...getEmbedlyCardUrlParams_paragraph\n  ...Markups_paragraph\n  __typename\n  id\n}\n\nfragment getEmbedlyCardUrlParams_paragraph on Paragraph {\n  type\n  iframe {\n    mediaResource {\n      iframeSrc\n      __typename\n    }\n    __typename\n  }\n  __typename\n  id\n}\n\nfragment MixtapeParagraph_paragraph on Paragraph {\n  type\n  mixtapeMetadata {\n    href\n    mediaResource {\n      mediumCatalog {\n        id\n        __typename\n      }\n      __typename\n    }\n    __typename\n  }\n  ...GenericMixtapeParagraph_paragraph\n  __typename\n  id\n}\n\nfragment GenericMixtapeParagraph_paragraph on Paragraph {\n  text\n  mixtapeMetadata {\n    href\n    thumbnailImageId\n    __typename\n  }\n  markups {\n    start\n    end\n    type\n    href\n    __typename\n  }\n  __typename\n  id\n}\n\nfragment normalizedBodyModel_richText on RichText {\n  paragraphs {\n    markups {\n      type\n      __typename\n    }\n    ...getParagraphHighlights_paragraph\n    ...getParagraphPrivateNotes_paragraph\n    __typename\n  }\n  sections {\n    startIndex\n    ...getSectionEndIndex_section\n    __typename\n  }\n  ...getParagraphStyles_richText\n  ...getParagraphSpaces_richText\n  __typename\n}\n\nfragment getParagraphHighlights_paragraph on Paragraph {\n  name\n  __typename\n  id\n}\n\nfragment getParagraphPrivateNotes_paragraph on Paragraph {\n  name\n  __typename\n  id\n}\n\nfragment getSectionEndIndex_section on Section {\n  startIndex\n  __typename\n}\n\nfragment getParagraphStyles_richText on RichText {\n  paragraphs {\n    text\n    type\n    __typename\n  }\n  sections {\n    ...getSectionEndIndex_section\n    __typename\n  }\n  __typename\n}\n\nfragment getParagraphSpaces_richText on RichText {\n  paragraphs {\n    layout\n    metadata {\n      originalHeight\n      originalWidth\n      __typename\n    }\n    type\n    ...paragraphExtendsImageGrid_paragraph\n    __typename\n  }\n  ...getSeriesParagraphTopSpacings_richText\n  ...getPostParagraphTopSpacings_richText\n  __typename\n}\n\nfragment paragraphExtendsImageGrid_paragraph on Paragraph {\n  layout\n  type\n  __typename\n  id\n}\n\nfragment getSeriesParagraphTopSpacings_richText on RichText {\n  paragraphs {\n    id\n    __typename\n  }\n  sections {\n    startIndex\n    __typename\n  }\n  __typename\n}\n\nfragment getPostParagraphTopSpacings_richText on RichText {\n  paragraphs {\n    layout\n    text\n    __typename\n  }\n  sections {\n    startIndex\n    __typename\n  }\n  __typename\n}\n"
+      }
+    ]
+    body = Request.body(Request.URL("https://medium.com/_/graphql", "POST", query))
+    if !body.nil?
+      json = JSON.parse(body)
+      json&.dig(0, "data", "post", "viewerEdge", "fullContent", "bodyModel", "paragraphs")
     else
-      result.map { |paragraph| content[paragraph["__ref"]] }
+      nil
     end
   end
@@ -57,6 +68,13 @@ class Post
       postInfo.creator = content&.dig(creatorRef, "name")
     end
+    colletionRef = content&.dig("Post:#{postID}", "collection", "__ref")
+    if !colletionRef.nil?
+      postInfo.collectionName = content&.dig(colletionRef, "name")
+    end
     firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
     if !firstPublishedAt.nil?
       postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)

data/lib/ZMediumFetcher.rb CHANGED Viewed

@@ -26,6 +26,7 @@ require "PathPolicy"
 require "Request"
 require "Post"
 require "User"
+require 'date'
 class ZMediumFetcher
@@ -127,7 +128,7 @@ class ZMediumFetcher
         postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
-        sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
+        sourceParagraphs = Post.fetchPostParagraphs(postID)
         if sourceParagraphs.nil?
             raise "Error: Paragraph not found! PostURL: #{postURL}"
         end
@@ -140,7 +141,7 @@ class ZMediumFetcher
         previousParagraph = nil
         preTypeParagraphs = []
         sourceParagraphs.each do |sourcParagraph|
-            paragraph = Paragraph.new(sourcParagraph, postID, postContent)
+            paragraph = Paragraph.new(sourcParagraph, postID)
             if OLIParser.isOLI(paragraph)
                 oliIndex += 1
                 paragraph.oliIndex = oliIndex
@@ -148,10 +149,11 @@ class ZMediumFetcher
                 oliIndex = 0
             end
-            # if previous is OLI or ULI and current is not OLI or ULI
+            # if previous is OLI or ULI or BQ and current is not OLI or ULI or BQ
             # than insert a blank paragraph to keep markdown foramt correct
             if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
-                (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
+                (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))||
+                (BQParser.isBQ(previousParagraph) && !BQParser.isBQ(paragraph))
                 paragraphs.append(Paragraph.makeBlankParagraph(postID))
             end
@@ -178,7 +180,7 @@ class ZMediumFetcher
                                 groupByText += "\n"
                             end
-                            markupParser = MarkupParser.new(postHtml, preTypeParagraph)
+                            markupParser = MarkupParser.new(preTypeParagraph)
                             groupByText += markupParser.parse()
                         end
@@ -203,7 +205,7 @@ class ZMediumFetcher
         postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
-        imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "images")
+        imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
         startParser = buildParser(imagePathPolicy)
         progress.totalPostParagraphsLength = paragraphs.length
@@ -211,7 +213,9 @@ class ZMediumFetcher
         progress.message = "Converting Post..."
         progress.printLog()
-        absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
+        postWithDatePath = "#{postInfo.firstPublishedAt.strftime("%Y-%m-%d")}-#{postPath}"
+        absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
         # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
         if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
@@ -227,8 +231,11 @@ class ZMediumFetcher
                 index = 0
                 paragraphs.each do |paragraph|
-                    markupParser = MarkupParser.new(postHtml, paragraph)
-                    paragraph.text = markupParser.parse()
+                    if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
+                        markupParser = MarkupParser.new(paragraph)
+                        paragraph.text = markupParser.parse()
+                    end
                     result = startParser.parse(paragraph)
                     if !linkParser.nil?

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ZMediumToMarkdown
 version: !ruby/object:Gem::Version
-  version: 1.5.0
+  version: 1.6.2
 platform: ruby
 authors:
 - ZhgChgLi
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-06-01 00:00:00.000000000 Z
+date: 2022-06-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -24,20 +24,6 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 1.13.1
-- !ruby/object:Gem::Dependency
-  name: reverse_markdown
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: 2.1.1
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: 2.1.1
 - !ruby/object:Gem::Dependency
   name: net-http
   requirement: !ruby/object:Gem::Requirement
@@ -91,6 +77,7 @@ files:
 - lib/Parsers/LinkParser.rb
 - lib/Parsers/MIXTAPEEMBEDParser.rb
 - lib/Parsers/MarkupParser.rb
+- lib/Parsers/MarkupStyleRender.rb
 - lib/Parsers/OLIParser.rb
 - lib/Parsers/PParser.rb
 - lib/Parsers/PQParser.rb