ZMediumToMarkdown 1.5.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b38eda08edc524ebdbb5c459b106a907d8c86db59a772f466781a1da297b8ebc
4
- data.tar.gz: 1c9c62e9124bc30acef072ee1a1d63dac4dc61212c0ddc443377314bcfe410ec
3
+ metadata.gz: 57ebbd86d072c9c43a5baef02031561323e9e7f6857e639aecc754de5741c543
4
+ data.tar.gz: 03bde3f39434b21c7d96380d05dbc9ccc7096f30ab97aade0e6838165e28de3e
5
5
  SHA512:
6
- metadata.gz: 7e9965a014bf975e82c5255228a5cef81ecf434be208a7bec2baab319943ba5fa92871fa02bb36c698733259504b3271ed6e1087a2bf02b7f11fabfc5964e4f1
7
- data.tar.gz: c1825a86bc74c9ec61c4481bbfa603f167cbe7f8adac166b19e8c0b0563d212eaa541df62c45103f939172ecd06ff4abced2ed698dfff0aa227bce549b61c2c7
6
+ metadata.gz: d108f648afe9eb0f90231dc771e22fd6f3f15d820f6d19aa941d23b48b2416c672350b6353af85c58dd13ccc1c3faa194b55f622476d09ea8bc84cccff6ba6ac
7
+ data.tar.gz: e484a2d51bc9ec006dc5511e2586d29eaab102bf778a030240ee92941b4fdfd9a726fbc5542677af7dbd8bb192e6c5d162b30b8c8b5001d166d3561edabdb9ec
data/lib/Helper.rb CHANGED
@@ -77,10 +77,15 @@ class Helper
77
77
  end
78
78
 
79
79
  def self.createPostInfo(postInfo)
80
+
81
+ title = postInfo.title.gsub("[","")
82
+ title = title.gsub("]","")
83
+
80
84
  result = "---\n"
81
- result += "title: #{postInfo.title}\n"
85
+ result += "title: #{title}\n"
82
86
  result += "author: #{postInfo.creator}\n"
83
87
  result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
88
+ result += "categories: #{postInfo.collectionName}\n"
84
89
  result += "tags: [#{postInfo.tags.join(",")}]\n"
85
90
  result += "---\n"
86
91
  result += "\r\n"
@@ -159,7 +164,7 @@ class Helper
159
164
  text += "+-----------------------------------------------------------------------------------+"
160
165
  text += "\r\n"
161
166
  text += "\r\n"
162
- text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
167
+ text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
163
168
  text += "\r\n"
164
169
  text += "\r\n"
165
170
  text += "+-----------------------------------------------------------------------------------+"
@@ -167,4 +172,4 @@ class Helper
167
172
 
168
173
  text
169
174
  end
170
- end
175
+ end
@@ -4,7 +4,7 @@ require 'Parsers/PParser'
4
4
  require 'securerandom'
5
5
 
6
6
  class Paragraph
7
- attr_accessor :postID, :name, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :hasMarkup, :oliIndex, :markupLinks
7
+ attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
8
8
 
9
9
  class Iframe
10
10
  attr_accessor :id, :title, :type, :src
@@ -20,6 +20,19 @@ class Paragraph
20
20
  end
21
21
  end
22
22
 
23
+ class Markup
24
+ attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
25
+ def initialize(json)
26
+ @type = json['type']
27
+ @start = json['start']
28
+ @end = json['end']
29
+ @href = json['href']
30
+ @anchorType = json['anchorType']
31
+ @userId = json['userId']
32
+ @linkMetadata = json['linkMetadata']
33
+ end
34
+ end
35
+
23
36
  class MetaData
24
37
  attr_accessor :id, :type
25
38
  def initialize(json)
@@ -41,12 +54,13 @@ class Paragraph
41
54
  "text" => "",
42
55
  "type" => PParser.getTypeString()
43
56
  }
44
- Paragraph.new(json, postID, nil)
57
+ Paragraph.new(json, postID)
45
58
  end
46
59
 
47
- def initialize(json, postID, resource)
60
+ def initialize(json, postID)
48
61
  @name = json['name']
49
62
  @text = json['text']
63
+ @orgText = json['text']
50
64
  @type = json['type']
51
65
  @href = json['href']
52
66
  @postID = postID
@@ -54,7 +68,7 @@ class Paragraph
54
68
  if json['metadata'].nil?
55
69
  @metadata = nil
56
70
  else
57
- @metadata = MetaData.new(resource[json['metadata']['__ref']])
71
+ @metadata = MetaData.new(json['metadata'])
58
72
  end
59
73
 
60
74
  if json['mixtapeMetadata'].nil?
@@ -66,17 +80,22 @@ class Paragraph
66
80
  if json['iframe'].nil?
67
81
  @iframe = nil
68
82
  else
69
- @iframe = Iframe.new(resource[json['iframe']['mediaResource']['__ref']])
83
+ @iframe = Iframe.new(json['iframe']['mediaResource'])
70
84
  end
71
85
 
72
86
  if !json['markups'].nil? && json['markups'].length > 0
87
+ markups = []
88
+ json['markups'].each do |markup|
89
+ markups.append(Markup.new(markup))
90
+ end
91
+ @markups = markups
92
+
73
93
  links = json['markups'].select{ |markup| markup["type"] == "A" }
74
94
  if !links.nil? && links.length > 0
75
95
  @markupLinks = links.map{ |link| link["href"] }
76
96
  end
77
- @hasMarkup = true
78
97
  else
79
- @hasMarkup = false
98
+ @markups = nil
80
99
  end
81
100
  end
82
101
  end
@@ -5,12 +5,18 @@ require 'Models/Paragraph'
5
5
 
6
6
  class BQParser < Parser
7
7
  attr_accessor :nextParser
8
+
9
+ def self.isBQ(paragraph)
10
+ if paragraph.nil?
11
+ false
12
+ else
13
+ paragraph.type == "BQ"
14
+ end
15
+ end
16
+
8
17
  def parse(paragraph)
9
- if paragraph.type == 'BQ'
10
- result = ""
11
- paragraph.text.each_line do |p|
12
- result += "> #{p}"
13
- end
18
+ if BQParser.isBQ(paragraph)
19
+ result = "> #{paragraph.text}"
14
20
  result
15
21
  else
16
22
  if !nextParser.nil?
@@ -10,8 +10,16 @@ class CodeBlockParser < Parser
10
10
  'CODE_BLOCK'
11
11
  end
12
12
 
13
+ def self.isCodeBlock(paragraph)
14
+ if paragraph.nil?
15
+ false
16
+ else
17
+ paragraph.type == CodeBlockParser.getTypeString()
18
+ end
19
+ end
20
+
13
21
  def parse(paragraph)
14
- if paragraph.type == CodeBlockParser.getTypeString()
22
+ if CodeBlockParser.isCodeBlock(paragraph)
15
23
  "```\n#{paragraph.text}\n```"
16
24
  else
17
25
  if !nextParser.nil?
@@ -18,11 +18,16 @@ class IMGParser < Parser
18
18
  imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
19
19
  absolutePath = imagePathPolicy.getAbsolutePath(fileName)
20
20
 
21
+ comment = ""
22
+ if paragraph.text != ""
23
+ comment = " \"#{paragraph.text}\""
24
+ end
25
+
21
26
  if ImageDownloader.download(absolutePath, imageURL)
22
27
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
23
- "![#{paragraph.text}](#{relativePath} \"#{paragraph.text}\")"
28
+ "![#{paragraph.text}](/#{relativePath}#{comment})"
24
29
  else
25
- "![#{paragraph.text}](#{imageURL} \"#{paragraph.text}\")"
30
+ "![#{paragraph.text}](#{imageURL}#{comment})"
26
31
  end
27
32
  else
28
33
  if !nextParser.nil?
@@ -24,6 +24,7 @@ class IframeParser < Parser
24
24
  # is youtube
25
25
  youtubeURL = URI(URI.decode(url)).query
26
26
  params = URI::decode_www_form(youtubeURL).to_h
27
+
27
28
  if !params["image"].nil? && !params["url"].nil?
28
29
 
29
30
  fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
@@ -31,12 +32,12 @@ class IframeParser < Parser
31
32
  imageURL = params["image"]
32
33
  imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
33
34
  absolutePath = imagePathPolicy.getAbsolutePath(fileName)
34
-
35
+ title = paragraph.iframe.title
35
36
  if ImageDownloader.download(absolutePath, imageURL)
36
37
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
37
- result = "\n[![YouTube](#{relativePath} \"YouTube\")](#{params["url"]})"
38
+ result = "\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})"
38
39
  else
39
- result = "\n[YouTube](#{params["url"]})"
40
+ result = "\n[#{title}](#{params["url"]})"
40
41
  end
41
42
  end
42
43
  else
@@ -54,7 +55,7 @@ class IframeParser < Parser
54
55
  gistHTML.search('a').each do |a|
55
56
  if a.text == 'view raw'
56
57
  gistRAW = Request.body(Request.URL(a['href']))
57
- result = "```#{lang}\n#{gistRAW}\n```"
58
+ result = "```#{lang.downcase}\n#{gistRAW}\n```"
58
59
  end
59
60
  end
60
61
  end
@@ -23,19 +23,7 @@ class LinkParser
23
23
 
24
24
  postPath = link.split("/").last
25
25
  if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
26
- markdownString = markdownString.sub! link, postPath
27
- end
28
- else
29
- if !(link =~ /\A#{URI::regexp(['http', 'https'])}\z/)
30
- # medium will give you an relative path if url is medium's post (due to we use html to markdown render)
31
- # e.g. /zrealm-ios-dev/visitor-pattern-in-ios-swift-ba5773a7bfea
32
- # it's not a vaild url
33
-
34
- # fullfill url from markup attribute
35
- match = markupLinks.find{ |markupLink| markupLink.include? link }
36
- if !match.nil?
37
- markdownString = markdownString.sub! link, match
38
- end
26
+ markdownString = markdownString.sub! link, "../#{postPath}"
39
27
  end
40
28
  end
41
29
  end
@@ -8,9 +8,9 @@ class MIXTAPEEMBEDParser < Parser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'MIXTAPE_EMBED'
10
10
  if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
11
- "\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
11
+ "\n[#{paragraph.orgText}](#{paragraph.mixtapeMetadata.href})"
12
12
  else
13
- "\n#{paragraph.text}"
13
+ "\n#{paragraph.orgText}"
14
14
  end
15
15
  else
16
16
  if !nextParser.nil?
@@ -1,23 +1,28 @@
1
1
  $lib = File.expand_path('../', File.dirname(__FILE__))
2
2
 
3
3
  require 'Models/Paragraph'
4
- require 'reverse_markdown'
4
+ require 'Parsers/MarkupStyleRender'
5
5
  require 'nokogiri'
6
+ require 'securerandom'
7
+ require 'User'
6
8
 
7
9
  class MarkupParser
8
10
  attr_accessor :body, :paragraph
9
11
 
10
- def initialize(html, paragraph)
11
- @body = html.search("body").first
12
+ def initialize(paragraph)
12
13
  @paragraph = paragraph
13
14
  end
14
15
 
15
16
  def parse()
16
17
  result = paragraph.text
17
- if paragraph.hasMarkup
18
- p = body.at_css("##{paragraph.name}")
19
- if !p.nil?
20
- result = ReverseMarkdown.convert p.inner_html
18
+ if !paragraph.markups.nil? && paragraph.markups.length > 0
19
+ markupRender = MarkupStyleRender.new(paragraph)
20
+
21
+ begin
22
+ result = markupRender.parse()
23
+ rescue => e
24
+ puts e.backtrace
25
+ Helper.makeWarningText("Error occurred during render markup text, please help to open an issue on github.")
21
26
  end
22
27
  end
23
28
 
@@ -0,0 +1,232 @@
1
+
2
+ $lib = File.expand_path('../', File.dirname(__FILE__))
3
+
4
+ require 'Models/Paragraph'
5
+
6
+ class MarkupStyleRender
7
+ attr_accessor :paragraph, :chars, :encodeType
8
+
9
+ class TextChar
10
+ attr_accessor :chars, :type
11
+ def initialize(chars, type)
12
+ @chars = chars
13
+ @type = type
14
+ end
15
+ end
16
+
17
+ class TagChar < TextChar
18
+ attr_accessor :sort, :startIndex, :endIndex, :startChars, :endChars
19
+ def initialize(sort, startIndex, endIndex, startChars, endChars)
20
+ @sort = sort
21
+ @startIndex = startIndex
22
+ @endIndex = endIndex - 1
23
+ @startChars = TextChar.new(startChars.chars, 'TagStart')
24
+ @endChars = TextChar.new(endChars.chars, 'TagEnd')
25
+ end
26
+ end
27
+
28
+
29
+ def initialize(paragraph)
30
+ @paragraph = paragraph
31
+
32
+ chars = {}
33
+ index = 0
34
+
35
+ emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
36
+ excludesEmojis = ["⚠"]
37
+ paragraph.text.each_char do |char|
38
+ chars[index] = TextChar.new([char], "Text")
39
+ index += 1
40
+ if char =~ emojiRegex && !excludesEmojis.include?(char)
41
+ # some emoji need more space (in Medium)
42
+ chars[index] = TextChar.new([], "Text")
43
+ index += 1
44
+ end
45
+ end
46
+
47
+ @chars = chars
48
+ end
49
+
50
+ def optimize(chars)
51
+ while true
52
+ hasExcute = false
53
+
54
+ index = 0
55
+ startTagIndex = nil
56
+ preTag = nil
57
+ preTagIndex = nil
58
+ preTextChar = nil
59
+ preTextIndex = nil
60
+ chars.each do |char|
61
+
62
+ if !preTag.nil?
63
+ if preTag.type == "TagStart" && char.type == "TagEnd"
64
+ chars.delete_at(index)
65
+ chars.delete_at(preTagIndex)
66
+ hasExcute = true
67
+ break
68
+ end
69
+ end
70
+
71
+ if char.type == "TagStart" && (preTag == nil || preTag.type == "TagEnd" || preTag.type == "Text")
72
+ startTagIndex = index
73
+ elsif (char.type == "TagEnd" || char.type == "Text") && startTagIndex != nil
74
+ if preTextChar != nil && preTextChar.chars.join() != "\n"
75
+ # not first tag & insert blank between start tag and before text
76
+ if preTextChar.chars.join() != " "
77
+ chars.insert(startTagIndex, TextChar.new(" ".chars, "Text"))
78
+ hasExcute = true
79
+ break
80
+ end
81
+ end
82
+ startTagIndex = nil
83
+ end
84
+
85
+ if !preTag.nil?
86
+ if preTag.type == "TagStart" && char.type == "Text"
87
+ # delete blank between start tag and after text
88
+ if char.chars.join().strip == ""
89
+ chars.delete_at(index)
90
+ hasExcute = true
91
+ break
92
+ end
93
+ end
94
+
95
+ if preTag.type == "Text" && char.type == "TagEnd"
96
+ if preTextChar.chars.join().strip == "" && preTextChar.chars.join() != "\n"
97
+ chars.delete_at(preTextIndex)
98
+ hasExcute = true
99
+ break
100
+ end
101
+ end
102
+
103
+ if preTag.type == "TagEnd" && char.type == "Text"
104
+ if char.chars.join() != " "
105
+ chars.insert(index, TextChar.new(" ".chars, "Text"))
106
+ hasExcute = true
107
+ break
108
+ end
109
+ end
110
+
111
+ end
112
+
113
+ if char.type == "Text"
114
+ preTextChar = char
115
+ preTextIndex = index
116
+ end
117
+
118
+ preTag = char
119
+ preTagIndex = index
120
+
121
+ index += 1
122
+ end
123
+
124
+ if !hasExcute
125
+ break
126
+ end
127
+ end
128
+
129
+ chars
130
+ end
131
+
132
+ def parse()
133
+ result = paragraph.text
134
+
135
+ if !paragraph.markups.nil? && paragraph.markups.length > 0
136
+
137
+ tags = []
138
+ paragraph.markups.each do |markup|
139
+ tag = nil
140
+ if markup.type == "EM"
141
+ tag = TagChar.new(2, markup.start, markup.end, "_", "_")
142
+ elsif markup.type == "CODE"
143
+ tag = TagChar.new(3, markup.start, markup.end, "`", "`")
144
+ elsif markup.type == "STRONG"
145
+ tag = TagChar.new(2, markup.start, markup.end, "**", "**")
146
+ elsif markup.type == "A"
147
+ url = markup.href
148
+ if markup.anchorType == "LINK"
149
+ url = markup.href
150
+ elsif markup.anchorType == "USER"
151
+ url = "https://medium.com/u/#{markup.userId}"
152
+ end
153
+
154
+ tag = TagChar.new(1, markup.start, markup.end, "[", "](#{url})")
155
+ else
156
+ Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
157
+ end
158
+
159
+ if !tag.nil?
160
+ tags.append(tag)
161
+ end
162
+ end
163
+
164
+ tags.sort_by(&:startIndex)
165
+
166
+ response = []
167
+ stack = []
168
+
169
+ chars.each do |index, char|
170
+
171
+ if char.chars.join() == "\n"
172
+ brStack = stack.dup
173
+ while brStack.length > 0
174
+ tag = brStack.pop
175
+ response.push(tag.endChars)
176
+ end
177
+ response.append(TextChar.new(char.chars, 'Text'))
178
+ brStack = stack.dup.reverse
179
+ while brStack.length > 0
180
+ tag = brStack.pop
181
+ response.push(tag.startChars)
182
+ end
183
+ end
184
+
185
+ startTags = tags.select { |tag| tag.startIndex == index }.sort_by(&:sort)
186
+ if !startTags.nil?
187
+ startTags.each do |tag|
188
+ response.append(tag.startChars)
189
+ stack.append(tag)
190
+ end
191
+ end
192
+
193
+ if char.chars.join() != "\n"
194
+ response.append(TextChar.new(char.chars, 'Text'))
195
+ end
196
+
197
+ endTags = tags.select { |tag| tag.endIndex == index }
198
+ if !endTags.nil? && endTags.length > 0
199
+ mismatchTags = []
200
+ while endTags.length > 0
201
+ stackTag = stack.pop
202
+ stackTagInEndTagsIndex = endTags.find_index(stackTag)
203
+ if !stackTagInEndTagsIndex.nil?
204
+ # as expected
205
+ endTags.delete_at(stackTagInEndTagsIndex)
206
+ else
207
+ mismatchTags.append(stackTag)
208
+ end
209
+ response.append(stackTag.endChars)
210
+ end
211
+
212
+ while mismatchTags.length > 0
213
+ mismatchTag = mismatchTags.pop
214
+ response.append(mismatchTag.startChars)
215
+ stack.append(mismatchTag)
216
+ end
217
+ end
218
+ end
219
+
220
+ while stack.length > 0
221
+ tag = stack.pop
222
+ response.push(tag.endChars)
223
+ end
224
+
225
+ response = optimize(response)
226
+ result = response.map{ |response| response.chars }.join()
227
+ end
228
+
229
+ result
230
+ end
231
+
232
+ end
data/lib/Post.rb CHANGED
@@ -9,7 +9,7 @@ require 'date'
9
9
  class Post
10
10
 
11
11
  class PostInfo
12
- attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
12
+ attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName
13
13
  end
14
14
 
15
15
  def self.getPostIDFromPostURLString(postURLString)
@@ -38,12 +38,23 @@ class Post
38
38
  json
39
39
  end
40
40
 
41
- def self.parsePostParagraphsFromPostContent(content, postID)
42
- result = content&.dig("Post:#{postID}", "content({\"postMeteringOptions\":null})", "bodyModel", "paragraphs")
43
- if result.nil?
44
- nil
41
+ def self.fetchPostParagraphs(postID)
42
+ query = [
43
+ {
44
+ "operationName": "PostViewerEdgeContentQuery",
45
+ "variables": {
46
+ "postId": postID
47
+ },
48
+ "query": "query PostViewerEdgeContentQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) {\n post(id: $postId) {\n ... on Post {\n id\n viewerEdge {\n id\n fullContent(postMeteringOptions: $postMeteringOptions) {\n isLockedPreviewOnly\n validatedShareKey\n bodyModel {\n ...PostBody_bodyModel\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment PostBody_bodyModel on RichText {\n sections {\n name\n startIndex\n textLayout\n imageLayout\n backgroundImage {\n id\n originalHeight\n originalWidth\n __typename\n }\n videoLayout\n backgroundVideo {\n videoId\n originalHeight\n originalWidth\n previewImageId\n __typename\n }\n __typename\n }\n paragraphs {\n id\n ...PostBodySection_paragraph\n __typename\n }\n ...normalizedBodyModel_richText\n __typename\n}\n\nfragment PostBodySection_paragraph on Paragraph {\n name\n ...PostBodyParagraph_paragraph\n __typename\n id\n}\n\nfragment PostBodyParagraph_paragraph on Paragraph {\n name\n type\n ...ImageParagraph_paragraph\n ...TextParagraph_paragraph\n ...IframeParagraph_paragraph\n ...MixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment ImageParagraph_paragraph on Paragraph {\n href\n layout\n metadata {\n id\n originalHeight\n originalWidth\n focusPercentX\n focusPercentY\n alt\n __typename\n }\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n ...PostAnnotationsMarker_paragraph\n __typename\n id\n}\n\nfragment Markups_paragraph on Paragraph {\n name\n text\n hasDropCap\n dropCapImage {\n ...MarkupNode_data_dropCapImage\n __typename\n id\n }\n markups {\n type\n start\n end\n href\n anchorType\n userId\n linkMetadata {\n httpStatus\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MarkupNode_data_dropCapImage on ImageMetadata {\n ...DropCap_image\n __typename\n id\n}\n\nfragment DropCap_image on ImageMetadata {\n id\n originalHeight\n originalWidth\n __typename\n}\n\nfragment ParagraphRefsMapContext_paragraph on Paragraph {\n id\n name\n text\n __typename\n}\n\nfragment PostAnnotationsMarker_paragraph on Paragraph {\n ...PostViewNoteCard_paragraph\n __typename\n id\n}\n\nfragment PostViewNoteCard_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment TextParagraph_paragraph on Paragraph {\n type\n hasDropCap\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n __typename\n id\n}\n\nfragment IframeParagraph_paragraph on Paragraph {\n iframe {\n mediaResource {\n id\n iframeSrc\n iframeHeight\n iframeWidth\n title\n __typename\n }\n __typename\n }\n layout\n ...getEmbedlyCardUrlParams_paragraph\n ...Markups_paragraph\n __typename\n id\n}\n\nfragment getEmbedlyCardUrlParams_paragraph on Paragraph {\n type\n iframe {\n mediaResource {\n iframeSrc\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MixtapeParagraph_paragraph on Paragraph {\n type\n mixtapeMetadata {\n href\n mediaResource {\n mediumCatalog {\n id\n __typename\n }\n __typename\n }\n __typename\n }\n ...GenericMixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment GenericMixtapeParagraph_paragraph on Paragraph {\n text\n mixtapeMetadata {\n href\n thumbnailImageId\n __typename\n }\n markups {\n start\n end\n type\n href\n __typename\n }\n __typename\n id\n}\n\nfragment normalizedBodyModel_richText on RichText {\n paragraphs {\n markups {\n type\n __typename\n }\n ...getParagraphHighlights_paragraph\n ...getParagraphPrivateNotes_paragraph\n __typename\n }\n sections {\n startIndex\n ...getSectionEndIndex_section\n __typename\n }\n ...getParagraphStyles_richText\n ...getParagraphSpaces_richText\n __typename\n}\n\nfragment getParagraphHighlights_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getParagraphPrivateNotes_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getSectionEndIndex_section on Section {\n startIndex\n __typename\n}\n\nfragment getParagraphStyles_richText on RichText {\n paragraphs {\n text\n type\n __typename\n }\n sections {\n ...getSectionEndIndex_section\n __typename\n }\n __typename\n}\n\nfragment getParagraphSpaces_richText on RichText {\n paragraphs {\n layout\n metadata {\n originalHeight\n originalWidth\n __typename\n }\n type\n ...paragraphExtendsImageGrid_paragraph\n __typename\n }\n ...getSeriesParagraphTopSpacings_richText\n ...getPostParagraphTopSpacings_richText\n __typename\n}\n\nfragment paragraphExtendsImageGrid_paragraph on Paragraph {\n layout\n type\n __typename\n id\n}\n\nfragment getSeriesParagraphTopSpacings_richText on RichText {\n paragraphs {\n id\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n\nfragment getPostParagraphTopSpacings_richText on RichText {\n paragraphs {\n layout\n text\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n"
49
+ }
50
+ ]
51
+
52
+ body = Request.body(Request.URL("https://medium.com/_/graphql", "POST", query))
53
+ if !body.nil?
54
+ json = JSON.parse(body)
55
+ json&.dig(0, "data", "post", "viewerEdge", "fullContent", "bodyModel", "paragraphs")
45
56
  else
46
- result.map { |paragraph| content[paragraph["__ref"]] }
57
+ nil
47
58
  end
48
59
  end
49
60
 
@@ -57,6 +68,13 @@ class Post
57
68
  postInfo.creator = content&.dig(creatorRef, "name")
58
69
  end
59
70
 
71
+ colletionRef = content&.dig("Post:#{postID}", "collection", "__ref")
72
+ if !colletionRef.nil?
73
+ postInfo.collectionName = content&.dig(colletionRef, "name")
74
+ end
75
+
76
+
77
+
60
78
  firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
61
79
  if !firstPublishedAt.nil?
62
80
  postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
@@ -26,6 +26,7 @@ require "PathPolicy"
26
26
  require "Request"
27
27
  require "Post"
28
28
  require "User"
29
+ require 'date'
29
30
 
30
31
  class ZMediumFetcher
31
32
 
@@ -127,7 +128,7 @@ class ZMediumFetcher
127
128
 
128
129
  postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
129
130
 
130
- sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
131
+ sourceParagraphs = Post.fetchPostParagraphs(postID)
131
132
  if sourceParagraphs.nil?
132
133
  raise "Error: Paragraph not found! PostURL: #{postURL}"
133
134
  end
@@ -140,7 +141,7 @@ class ZMediumFetcher
140
141
  previousParagraph = nil
141
142
  preTypeParagraphs = []
142
143
  sourceParagraphs.each do |sourcParagraph|
143
- paragraph = Paragraph.new(sourcParagraph, postID, postContent)
144
+ paragraph = Paragraph.new(sourcParagraph, postID)
144
145
  if OLIParser.isOLI(paragraph)
145
146
  oliIndex += 1
146
147
  paragraph.oliIndex = oliIndex
@@ -148,10 +149,11 @@ class ZMediumFetcher
148
149
  oliIndex = 0
149
150
  end
150
151
 
151
- # if previous is OLI or ULI and current is not OLI or ULI
152
+ # if previous is OLI or ULI or BQ and current is not OLI or ULI or BQ
152
153
  # than insert a blank paragraph to keep markdown foramt correct
153
154
  if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
154
- (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
155
+ (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))||
156
+ (BQParser.isBQ(previousParagraph) && !BQParser.isBQ(paragraph))
155
157
  paragraphs.append(Paragraph.makeBlankParagraph(postID))
156
158
  end
157
159
 
@@ -178,7 +180,7 @@ class ZMediumFetcher
178
180
  groupByText += "\n"
179
181
  end
180
182
 
181
- markupParser = MarkupParser.new(postHtml, preTypeParagraph)
183
+ markupParser = MarkupParser.new(preTypeParagraph)
182
184
  groupByText += markupParser.parse()
183
185
  end
184
186
 
@@ -203,7 +205,7 @@ class ZMediumFetcher
203
205
 
204
206
  postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
205
207
 
206
- imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "images")
208
+ imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
207
209
  startParser = buildParser(imagePathPolicy)
208
210
 
209
211
  progress.totalPostParagraphsLength = paragraphs.length
@@ -211,7 +213,9 @@ class ZMediumFetcher
211
213
  progress.message = "Converting Post..."
212
214
  progress.printLog()
213
215
 
214
- absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
216
+ postWithDatePath = "#{postInfo.firstPublishedAt.strftime("%Y-%m-%d")}-#{postPath}"
217
+
218
+ absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
215
219
 
216
220
  # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
217
221
  if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
@@ -227,8 +231,11 @@ class ZMediumFetcher
227
231
 
228
232
  index = 0
229
233
  paragraphs.each do |paragraph|
230
- markupParser = MarkupParser.new(postHtml, paragraph)
231
- paragraph.text = markupParser.parse()
234
+ if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
235
+ markupParser = MarkupParser.new(paragraph)
236
+ paragraph.text = markupParser.parse()
237
+ end
238
+
232
239
  result = startParser.parse(paragraph)
233
240
 
234
241
  if !linkParser.nil?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-01 00:00:00.000000000 Z
11
+ date: 2022-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.13.1
27
- - !ruby/object:Gem::Dependency
28
- name: reverse_markdown
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: 2.1.1
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: 2.1.1
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: net-http
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -91,6 +77,7 @@ files:
91
77
  - lib/Parsers/LinkParser.rb
92
78
  - lib/Parsers/MIXTAPEEMBEDParser.rb
93
79
  - lib/Parsers/MarkupParser.rb
80
+ - lib/Parsers/MarkupStyleRender.rb
94
81
  - lib/Parsers/OLIParser.rb
95
82
  - lib/Parsers/PParser.rb
96
83
  - lib/Parsers/PQParser.rb