ZMediumToMarkdown 1.5.0 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b38eda08edc524ebdbb5c459b106a907d8c86db59a772f466781a1da297b8ebc
4
- data.tar.gz: 1c9c62e9124bc30acef072ee1a1d63dac4dc61212c0ddc443377314bcfe410ec
3
+ metadata.gz: 57ebbd86d072c9c43a5baef02031561323e9e7f6857e639aecc754de5741c543
4
+ data.tar.gz: 03bde3f39434b21c7d96380d05dbc9ccc7096f30ab97aade0e6838165e28de3e
5
5
  SHA512:
6
- metadata.gz: 7e9965a014bf975e82c5255228a5cef81ecf434be208a7bec2baab319943ba5fa92871fa02bb36c698733259504b3271ed6e1087a2bf02b7f11fabfc5964e4f1
7
- data.tar.gz: c1825a86bc74c9ec61c4481bbfa603f167cbe7f8adac166b19e8c0b0563d212eaa541df62c45103f939172ecd06ff4abced2ed698dfff0aa227bce549b61c2c7
6
+ metadata.gz: d108f648afe9eb0f90231dc771e22fd6f3f15d820f6d19aa941d23b48b2416c672350b6353af85c58dd13ccc1c3faa194b55f622476d09ea8bc84cccff6ba6ac
7
+ data.tar.gz: e484a2d51bc9ec006dc5511e2586d29eaab102bf778a030240ee92941b4fdfd9a726fbc5542677af7dbd8bb192e6c5d162b30b8c8b5001d166d3561edabdb9ec
data/lib/Helper.rb CHANGED
@@ -77,10 +77,15 @@ class Helper
77
77
  end
78
78
 
79
79
  def self.createPostInfo(postInfo)
80
+
81
+ title = postInfo.title.gsub("[","")
82
+ title = title.gsub("]","")
83
+
80
84
  result = "---\n"
81
- result += "title: #{postInfo.title}\n"
85
+ result += "title: #{title}\n"
82
86
  result += "author: #{postInfo.creator}\n"
83
87
  result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
88
+ result += "categories: #{postInfo.collectionName}\n"
84
89
  result += "tags: [#{postInfo.tags.join(",")}]\n"
85
90
  result += "---\n"
86
91
  result += "\r\n"
@@ -159,7 +164,7 @@ class Helper
159
164
  text += "+-----------------------------------------------------------------------------------+"
160
165
  text += "\r\n"
161
166
  text += "\r\n"
162
- text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
167
+ text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
163
168
  text += "\r\n"
164
169
  text += "\r\n"
165
170
  text += "+-----------------------------------------------------------------------------------+"
@@ -167,4 +172,4 @@ class Helper
167
172
 
168
173
  text
169
174
  end
170
- end
175
+ end
@@ -4,7 +4,7 @@ require 'Parsers/PParser'
4
4
  require 'securerandom'
5
5
 
6
6
  class Paragraph
7
- attr_accessor :postID, :name, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :hasMarkup, :oliIndex, :markupLinks
7
+ attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
8
8
 
9
9
  class Iframe
10
10
  attr_accessor :id, :title, :type, :src
@@ -20,6 +20,19 @@ class Paragraph
20
20
  end
21
21
  end
22
22
 
23
+ class Markup
24
+ attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
25
+ def initialize(json)
26
+ @type = json['type']
27
+ @start = json['start']
28
+ @end = json['end']
29
+ @href = json['href']
30
+ @anchorType = json['anchorType']
31
+ @userId = json['userId']
32
+ @linkMetadata = json['linkMetadata']
33
+ end
34
+ end
35
+
23
36
  class MetaData
24
37
  attr_accessor :id, :type
25
38
  def initialize(json)
@@ -41,12 +54,13 @@ class Paragraph
41
54
  "text" => "",
42
55
  "type" => PParser.getTypeString()
43
56
  }
44
- Paragraph.new(json, postID, nil)
57
+ Paragraph.new(json, postID)
45
58
  end
46
59
 
47
- def initialize(json, postID, resource)
60
+ def initialize(json, postID)
48
61
  @name = json['name']
49
62
  @text = json['text']
63
+ @orgText = json['text']
50
64
  @type = json['type']
51
65
  @href = json['href']
52
66
  @postID = postID
@@ -54,7 +68,7 @@ class Paragraph
54
68
  if json['metadata'].nil?
55
69
  @metadata = nil
56
70
  else
57
- @metadata = MetaData.new(resource[json['metadata']['__ref']])
71
+ @metadata = MetaData.new(json['metadata'])
58
72
  end
59
73
 
60
74
  if json['mixtapeMetadata'].nil?
@@ -66,17 +80,22 @@ class Paragraph
66
80
  if json['iframe'].nil?
67
81
  @iframe = nil
68
82
  else
69
- @iframe = Iframe.new(resource[json['iframe']['mediaResource']['__ref']])
83
+ @iframe = Iframe.new(json['iframe']['mediaResource'])
70
84
  end
71
85
 
72
86
  if !json['markups'].nil? && json['markups'].length > 0
87
+ markups = []
88
+ json['markups'].each do |markup|
89
+ markups.append(Markup.new(markup))
90
+ end
91
+ @markups = markups
92
+
73
93
  links = json['markups'].select{ |markup| markup["type"] == "A" }
74
94
  if !links.nil? && links.length > 0
75
95
  @markupLinks = links.map{ |link| link["href"] }
76
96
  end
77
- @hasMarkup = true
78
97
  else
79
- @hasMarkup = false
98
+ @markups = nil
80
99
  end
81
100
  end
82
101
  end
@@ -5,12 +5,18 @@ require 'Models/Paragraph'
5
5
 
6
6
  class BQParser < Parser
7
7
  attr_accessor :nextParser
8
+
9
+ def self.isBQ(paragraph)
10
+ if paragraph.nil?
11
+ false
12
+ else
13
+ paragraph.type == "BQ"
14
+ end
15
+ end
16
+
8
17
  def parse(paragraph)
9
- if paragraph.type == 'BQ'
10
- result = ""
11
- paragraph.text.each_line do |p|
12
- result += "> #{p}"
13
- end
18
+ if BQParser.isBQ(paragraph)
19
+ result = "> #{paragraph.text}"
14
20
  result
15
21
  else
16
22
  if !nextParser.nil?
@@ -10,8 +10,16 @@ class CodeBlockParser < Parser
10
10
  'CODE_BLOCK'
11
11
  end
12
12
 
13
+ def self.isCodeBlock(paragraph)
14
+ if paragraph.nil?
15
+ false
16
+ else
17
+ paragraph.type == CodeBlockParser.getTypeString()
18
+ end
19
+ end
20
+
13
21
  def parse(paragraph)
14
- if paragraph.type == CodeBlockParser.getTypeString()
22
+ if CodeBlockParser.isCodeBlock(paragraph)
15
23
  "```\n#{paragraph.text}\n```"
16
24
  else
17
25
  if !nextParser.nil?
@@ -18,11 +18,16 @@ class IMGParser < Parser
18
18
  imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
19
19
  absolutePath = imagePathPolicy.getAbsolutePath(fileName)
20
20
 
21
+ comment = ""
22
+ if paragraph.text != ""
23
+ comment = " \"#{paragraph.text}\""
24
+ end
25
+
21
26
  if ImageDownloader.download(absolutePath, imageURL)
22
27
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
23
- "![#{paragraph.text}](#{relativePath} \"#{paragraph.text}\")"
28
+ "![#{paragraph.text}](/#{relativePath}#{comment})"
24
29
  else
25
- "![#{paragraph.text}](#{imageURL} \"#{paragraph.text}\")"
30
+ "![#{paragraph.text}](#{imageURL}#{comment})"
26
31
  end
27
32
  else
28
33
  if !nextParser.nil?
@@ -24,6 +24,7 @@ class IframeParser < Parser
24
24
  # is youtube
25
25
  youtubeURL = URI(URI.decode(url)).query
26
26
  params = URI::decode_www_form(youtubeURL).to_h
27
+
27
28
  if !params["image"].nil? && !params["url"].nil?
28
29
 
29
30
  fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
@@ -31,12 +32,12 @@ class IframeParser < Parser
31
32
  imageURL = params["image"]
32
33
  imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
33
34
  absolutePath = imagePathPolicy.getAbsolutePath(fileName)
34
-
35
+ title = paragraph.iframe.title
35
36
  if ImageDownloader.download(absolutePath, imageURL)
36
37
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
37
- result = "\n[![YouTube](#{relativePath} \"YouTube\")](#{params["url"]})"
38
+ result = "\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})"
38
39
  else
39
- result = "\n[YouTube](#{params["url"]})"
40
+ result = "\n[#{title}](#{params["url"]})"
40
41
  end
41
42
  end
42
43
  else
@@ -54,7 +55,7 @@ class IframeParser < Parser
54
55
  gistHTML.search('a').each do |a|
55
56
  if a.text == 'view raw'
56
57
  gistRAW = Request.body(Request.URL(a['href']))
57
- result = "```#{lang}\n#{gistRAW}\n```"
58
+ result = "```#{lang.downcase}\n#{gistRAW}\n```"
58
59
  end
59
60
  end
60
61
  end
@@ -23,19 +23,7 @@ class LinkParser
23
23
 
24
24
  postPath = link.split("/").last
25
25
  if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
26
- markdownString = markdownString.sub! link, postPath
27
- end
28
- else
29
- if !(link =~ /\A#{URI::regexp(['http', 'https'])}\z/)
30
- # medium will give you an relative path if url is medium's post (due to we use html to markdown render)
31
- # e.g. /zrealm-ios-dev/visitor-pattern-in-ios-swift-ba5773a7bfea
32
- # it's not a vaild url
33
-
34
- # fullfill url from markup attribute
35
- match = markupLinks.find{ |markupLink| markupLink.include? link }
36
- if !match.nil?
37
- markdownString = markdownString.sub! link, match
38
- end
26
+ markdownString = markdownString.sub! link, "../#{postPath}"
39
27
  end
40
28
  end
41
29
  end
@@ -8,9 +8,9 @@ class MIXTAPEEMBEDParser < Parser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'MIXTAPE_EMBED'
10
10
  if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
11
- "\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
11
+ "\n[#{paragraph.orgText}](#{paragraph.mixtapeMetadata.href})"
12
12
  else
13
- "\n#{paragraph.text}"
13
+ "\n#{paragraph.orgText}"
14
14
  end
15
15
  else
16
16
  if !nextParser.nil?
@@ -1,23 +1,28 @@
1
1
  $lib = File.expand_path('../', File.dirname(__FILE__))
2
2
 
3
3
  require 'Models/Paragraph'
4
- require 'reverse_markdown'
4
+ require 'Parsers/MarkupStyleRender'
5
5
  require 'nokogiri'
6
+ require 'securerandom'
7
+ require 'User'
6
8
 
7
9
  class MarkupParser
8
10
  attr_accessor :body, :paragraph
9
11
 
10
- def initialize(html, paragraph)
11
- @body = html.search("body").first
12
+ def initialize(paragraph)
12
13
  @paragraph = paragraph
13
14
  end
14
15
 
15
16
  def parse()
16
17
  result = paragraph.text
17
- if paragraph.hasMarkup
18
- p = body.at_css("##{paragraph.name}")
19
- if !p.nil?
20
- result = ReverseMarkdown.convert p.inner_html
18
+ if !paragraph.markups.nil? && paragraph.markups.length > 0
19
+ markupRender = MarkupStyleRender.new(paragraph)
20
+
21
+ begin
22
+ result = markupRender.parse()
23
+ rescue => e
24
+ puts e.backtrace
25
+ Helper.makeWarningText("Error occurred during render markup text, please help to open an issue on github.")
21
26
  end
22
27
  end
23
28
 
@@ -0,0 +1,232 @@
1
+
2
+ $lib = File.expand_path('../', File.dirname(__FILE__))
3
+
4
+ require 'Models/Paragraph'
5
+
6
+ class MarkupStyleRender
7
+ attr_accessor :paragraph, :chars, :encodeType
8
+
9
+ class TextChar
10
+ attr_accessor :chars, :type
11
+ def initialize(chars, type)
12
+ @chars = chars
13
+ @type = type
14
+ end
15
+ end
16
+
17
+ class TagChar < TextChar
18
+ attr_accessor :sort, :startIndex, :endIndex, :startChars, :endChars
19
+ def initialize(sort, startIndex, endIndex, startChars, endChars)
20
+ @sort = sort
21
+ @startIndex = startIndex
22
+ @endIndex = endIndex - 1
23
+ @startChars = TextChar.new(startChars.chars, 'TagStart')
24
+ @endChars = TextChar.new(endChars.chars, 'TagEnd')
25
+ end
26
+ end
27
+
28
+
29
+ def initialize(paragraph)
30
+ @paragraph = paragraph
31
+
32
+ chars = {}
33
+ index = 0
34
+
35
+ emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
36
+ excludesEmojis = ["⚠"]
37
+ paragraph.text.each_char do |char|
38
+ chars[index] = TextChar.new([char], "Text")
39
+ index += 1
40
+ if char =~ emojiRegex && !excludesEmojis.include?(char)
41
+ # some emoji need more space (in Medium)
42
+ chars[index] = TextChar.new([], "Text")
43
+ index += 1
44
+ end
45
+ end
46
+
47
+ @chars = chars
48
+ end
49
+
50
+ def optimize(chars)
51
+ while true
52
+ hasExcute = false
53
+
54
+ index = 0
55
+ startTagIndex = nil
56
+ preTag = nil
57
+ preTagIndex = nil
58
+ preTextChar = nil
59
+ preTextIndex = nil
60
+ chars.each do |char|
61
+
62
+ if !preTag.nil?
63
+ if preTag.type == "TagStart" && char.type == "TagEnd"
64
+ chars.delete_at(index)
65
+ chars.delete_at(preTagIndex)
66
+ hasExcute = true
67
+ break
68
+ end
69
+ end
70
+
71
+ if char.type == "TagStart" && (preTag == nil || preTag.type == "TagEnd" || preTag.type == "Text")
72
+ startTagIndex = index
73
+ elsif (char.type == "TagEnd" || char.type == "Text") && startTagIndex != nil
74
+ if preTextChar != nil && preTextChar.chars.join() != "\n"
75
+ # not first tag & insert blank between start tag and before text
76
+ if preTextChar.chars.join() != " "
77
+ chars.insert(startTagIndex, TextChar.new(" ".chars, "Text"))
78
+ hasExcute = true
79
+ break
80
+ end
81
+ end
82
+ startTagIndex = nil
83
+ end
84
+
85
+ if !preTag.nil?
86
+ if preTag.type == "TagStart" && char.type == "Text"
87
+ # delete blank between start tag and after text
88
+ if char.chars.join().strip == ""
89
+ chars.delete_at(index)
90
+ hasExcute = true
91
+ break
92
+ end
93
+ end
94
+
95
+ if preTag.type == "Text" && char.type == "TagEnd"
96
+ if preTextChar.chars.join().strip == "" && preTextChar.chars.join() != "\n"
97
+ chars.delete_at(preTextIndex)
98
+ hasExcute = true
99
+ break
100
+ end
101
+ end
102
+
103
+ if preTag.type == "TagEnd" && char.type == "Text"
104
+ if char.chars.join() != " "
105
+ chars.insert(index, TextChar.new(" ".chars, "Text"))
106
+ hasExcute = true
107
+ break
108
+ end
109
+ end
110
+
111
+ end
112
+
113
+ if char.type == "Text"
114
+ preTextChar = char
115
+ preTextIndex = index
116
+ end
117
+
118
+ preTag = char
119
+ preTagIndex = index
120
+
121
+ index += 1
122
+ end
123
+
124
+ if !hasExcute
125
+ break
126
+ end
127
+ end
128
+
129
+ chars
130
+ end
131
+
132
+ def parse()
133
+ result = paragraph.text
134
+
135
+ if !paragraph.markups.nil? && paragraph.markups.length > 0
136
+
137
+ tags = []
138
+ paragraph.markups.each do |markup|
139
+ tag = nil
140
+ if markup.type == "EM"
141
+ tag = TagChar.new(2, markup.start, markup.end, "_", "_")
142
+ elsif markup.type == "CODE"
143
+ tag = TagChar.new(3, markup.start, markup.end, "`", "`")
144
+ elsif markup.type == "STRONG"
145
+ tag = TagChar.new(2, markup.start, markup.end, "**", "**")
146
+ elsif markup.type == "A"
147
+ url = markup.href
148
+ if markup.anchorType == "LINK"
149
+ url = markup.href
150
+ elsif markup.anchorType == "USER"
151
+ url = "https://medium.com/u/#{markup.userId}"
152
+ end
153
+
154
+ tag = TagChar.new(1, markup.start, markup.end, "[", "](#{url})")
155
+ else
156
+ Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
157
+ end
158
+
159
+ if !tag.nil?
160
+ tags.append(tag)
161
+ end
162
+ end
163
+
164
+ tags.sort_by(&:startIndex)
165
+
166
+ response = []
167
+ stack = []
168
+
169
+ chars.each do |index, char|
170
+
171
+ if char.chars.join() == "\n"
172
+ brStack = stack.dup
173
+ while brStack.length > 0
174
+ tag = brStack.pop
175
+ response.push(tag.endChars)
176
+ end
177
+ response.append(TextChar.new(char.chars, 'Text'))
178
+ brStack = stack.dup.reverse
179
+ while brStack.length > 0
180
+ tag = brStack.pop
181
+ response.push(tag.startChars)
182
+ end
183
+ end
184
+
185
+ startTags = tags.select { |tag| tag.startIndex == index }.sort_by(&:sort)
186
+ if !startTags.nil?
187
+ startTags.each do |tag|
188
+ response.append(tag.startChars)
189
+ stack.append(tag)
190
+ end
191
+ end
192
+
193
+ if char.chars.join() != "\n"
194
+ response.append(TextChar.new(char.chars, 'Text'))
195
+ end
196
+
197
+ endTags = tags.select { |tag| tag.endIndex == index }
198
+ if !endTags.nil? && endTags.length > 0
199
+ mismatchTags = []
200
+ while endTags.length > 0
201
+ stackTag = stack.pop
202
+ stackTagInEndTagsIndex = endTags.find_index(stackTag)
203
+ if !stackTagInEndTagsIndex.nil?
204
+ # as expected
205
+ endTags.delete_at(stackTagInEndTagsIndex)
206
+ else
207
+ mismatchTags.append(stackTag)
208
+ end
209
+ response.append(stackTag.endChars)
210
+ end
211
+
212
+ while mismatchTags.length > 0
213
+ mismatchTag = mismatchTags.pop
214
+ response.append(mismatchTag.startChars)
215
+ stack.append(mismatchTag)
216
+ end
217
+ end
218
+ end
219
+
220
+ while stack.length > 0
221
+ tag = stack.pop
222
+ response.push(tag.endChars)
223
+ end
224
+
225
+ response = optimize(response)
226
+ result = response.map{ |response| response.chars }.join()
227
+ end
228
+
229
+ result
230
+ end
231
+
232
+ end
data/lib/Post.rb CHANGED
@@ -9,7 +9,7 @@ require 'date'
9
9
  class Post
10
10
 
11
11
  class PostInfo
12
- attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
12
+ attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName
13
13
  end
14
14
 
15
15
  def self.getPostIDFromPostURLString(postURLString)
@@ -38,12 +38,23 @@ class Post
38
38
  json
39
39
  end
40
40
 
41
- def self.parsePostParagraphsFromPostContent(content, postID)
42
- result = content&.dig("Post:#{postID}", "content({\"postMeteringOptions\":null})", "bodyModel", "paragraphs")
43
- if result.nil?
44
- nil
41
+ def self.fetchPostParagraphs(postID)
42
+ query = [
43
+ {
44
+ "operationName": "PostViewerEdgeContentQuery",
45
+ "variables": {
46
+ "postId": postID
47
+ },
48
+ "query": "query PostViewerEdgeContentQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) {\n post(id: $postId) {\n ... on Post {\n id\n viewerEdge {\n id\n fullContent(postMeteringOptions: $postMeteringOptions) {\n isLockedPreviewOnly\n validatedShareKey\n bodyModel {\n ...PostBody_bodyModel\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment PostBody_bodyModel on RichText {\n sections {\n name\n startIndex\n textLayout\n imageLayout\n backgroundImage {\n id\n originalHeight\n originalWidth\n __typename\n }\n videoLayout\n backgroundVideo {\n videoId\n originalHeight\n originalWidth\n previewImageId\n __typename\n }\n __typename\n }\n paragraphs {\n id\n ...PostBodySection_paragraph\n __typename\n }\n ...normalizedBodyModel_richText\n __typename\n}\n\nfragment PostBodySection_paragraph on Paragraph {\n name\n ...PostBodyParagraph_paragraph\n __typename\n id\n}\n\nfragment PostBodyParagraph_paragraph on Paragraph {\n name\n type\n ...ImageParagraph_paragraph\n ...TextParagraph_paragraph\n ...IframeParagraph_paragraph\n ...MixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment ImageParagraph_paragraph on Paragraph {\n href\n layout\n metadata {\n id\n originalHeight\n originalWidth\n focusPercentX\n focusPercentY\n alt\n __typename\n }\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n ...PostAnnotationsMarker_paragraph\n __typename\n id\n}\n\nfragment Markups_paragraph on Paragraph {\n name\n text\n hasDropCap\n dropCapImage {\n ...MarkupNode_data_dropCapImage\n __typename\n id\n }\n markups {\n type\n start\n end\n href\n anchorType\n userId\n linkMetadata {\n httpStatus\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MarkupNode_data_dropCapImage on ImageMetadata {\n ...DropCap_image\n __typename\n id\n}\n\nfragment DropCap_image on ImageMetadata {\n id\n originalHeight\n originalWidth\n __typename\n}\n\nfragment ParagraphRefsMapContext_paragraph on Paragraph {\n id\n name\n text\n __typename\n}\n\nfragment PostAnnotationsMarker_paragraph on Paragraph {\n ...PostViewNoteCard_paragraph\n __typename\n id\n}\n\nfragment PostViewNoteCard_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment TextParagraph_paragraph on Paragraph {\n type\n hasDropCap\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n __typename\n id\n}\n\nfragment IframeParagraph_paragraph on Paragraph {\n iframe {\n mediaResource {\n id\n iframeSrc\n iframeHeight\n iframeWidth\n title\n __typename\n }\n __typename\n }\n layout\n ...getEmbedlyCardUrlParams_paragraph\n ...Markups_paragraph\n __typename\n id\n}\n\nfragment getEmbedlyCardUrlParams_paragraph on Paragraph {\n type\n iframe {\n mediaResource {\n iframeSrc\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MixtapeParagraph_paragraph on Paragraph {\n type\n mixtapeMetadata {\n href\n mediaResource {\n mediumCatalog {\n id\n __typename\n }\n __typename\n }\n __typename\n }\n ...GenericMixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment GenericMixtapeParagraph_paragraph on Paragraph {\n text\n mixtapeMetadata {\n href\n thumbnailImageId\n __typename\n }\n markups {\n start\n end\n type\n href\n __typename\n }\n __typename\n id\n}\n\nfragment normalizedBodyModel_richText on RichText {\n paragraphs {\n markups {\n type\n __typename\n }\n ...getParagraphHighlights_paragraph\n ...getParagraphPrivateNotes_paragraph\n __typename\n }\n sections {\n startIndex\n ...getSectionEndIndex_section\n __typename\n }\n ...getParagraphStyles_richText\n ...getParagraphSpaces_richText\n __typename\n}\n\nfragment getParagraphHighlights_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getParagraphPrivateNotes_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getSectionEndIndex_section on Section {\n startIndex\n __typename\n}\n\nfragment getParagraphStyles_richText on RichText {\n paragraphs {\n text\n type\n __typename\n }\n sections {\n ...getSectionEndIndex_section\n __typename\n }\n __typename\n}\n\nfragment getParagraphSpaces_richText on RichText {\n paragraphs {\n layout\n metadata {\n originalHeight\n originalWidth\n __typename\n }\n type\n ...paragraphExtendsImageGrid_paragraph\n __typename\n }\n ...getSeriesParagraphTopSpacings_richText\n ...getPostParagraphTopSpacings_richText\n __typename\n}\n\nfragment paragraphExtendsImageGrid_paragraph on Paragraph {\n layout\n type\n __typename\n id\n}\n\nfragment getSeriesParagraphTopSpacings_richText on RichText {\n paragraphs {\n id\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n\nfragment getPostParagraphTopSpacings_richText on RichText {\n paragraphs {\n layout\n text\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n"
49
+ }
50
+ ]
51
+
52
+ body = Request.body(Request.URL("https://medium.com/_/graphql", "POST", query))
53
+ if !body.nil?
54
+ json = JSON.parse(body)
55
+ json&.dig(0, "data", "post", "viewerEdge", "fullContent", "bodyModel", "paragraphs")
45
56
  else
46
- result.map { |paragraph| content[paragraph["__ref"]] }
57
+ nil
47
58
  end
48
59
  end
49
60
 
@@ -57,6 +68,13 @@ class Post
57
68
  postInfo.creator = content&.dig(creatorRef, "name")
58
69
  end
59
70
 
71
+ colletionRef = content&.dig("Post:#{postID}", "collection", "__ref")
72
+ if !colletionRef.nil?
73
+ postInfo.collectionName = content&.dig(colletionRef, "name")
74
+ end
75
+
76
+
77
+
60
78
  firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
61
79
  if !firstPublishedAt.nil?
62
80
  postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
@@ -26,6 +26,7 @@ require "PathPolicy"
26
26
  require "Request"
27
27
  require "Post"
28
28
  require "User"
29
+ require 'date'
29
30
 
30
31
  class ZMediumFetcher
31
32
 
@@ -127,7 +128,7 @@ class ZMediumFetcher
127
128
 
128
129
  postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
129
130
 
130
- sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
131
+ sourceParagraphs = Post.fetchPostParagraphs(postID)
131
132
  if sourceParagraphs.nil?
132
133
  raise "Error: Paragraph not found! PostURL: #{postURL}"
133
134
  end
@@ -140,7 +141,7 @@ class ZMediumFetcher
140
141
  previousParagraph = nil
141
142
  preTypeParagraphs = []
142
143
  sourceParagraphs.each do |sourcParagraph|
143
- paragraph = Paragraph.new(sourcParagraph, postID, postContent)
144
+ paragraph = Paragraph.new(sourcParagraph, postID)
144
145
  if OLIParser.isOLI(paragraph)
145
146
  oliIndex += 1
146
147
  paragraph.oliIndex = oliIndex
@@ -148,10 +149,11 @@ class ZMediumFetcher
148
149
  oliIndex = 0
149
150
  end
150
151
 
151
- # if previous is OLI or ULI and current is not OLI or ULI
152
+ # if previous is OLI or ULI or BQ and current is not OLI or ULI or BQ
152
153
  # than insert a blank paragraph to keep markdown foramt correct
153
154
  if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
154
- (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
155
+ (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))||
156
+ (BQParser.isBQ(previousParagraph) && !BQParser.isBQ(paragraph))
155
157
  paragraphs.append(Paragraph.makeBlankParagraph(postID))
156
158
  end
157
159
 
@@ -178,7 +180,7 @@ class ZMediumFetcher
178
180
  groupByText += "\n"
179
181
  end
180
182
 
181
- markupParser = MarkupParser.new(postHtml, preTypeParagraph)
183
+ markupParser = MarkupParser.new(preTypeParagraph)
182
184
  groupByText += markupParser.parse()
183
185
  end
184
186
 
@@ -203,7 +205,7 @@ class ZMediumFetcher
203
205
 
204
206
  postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
205
207
 
206
- imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "images")
208
+ imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
207
209
  startParser = buildParser(imagePathPolicy)
208
210
 
209
211
  progress.totalPostParagraphsLength = paragraphs.length
@@ -211,7 +213,9 @@ class ZMediumFetcher
211
213
  progress.message = "Converting Post..."
212
214
  progress.printLog()
213
215
 
214
- absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
216
+ postWithDatePath = "#{postInfo.firstPublishedAt.strftime("%Y-%m-%d")}-#{postPath}"
217
+
218
+ absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
215
219
 
216
220
  # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
217
221
  if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
@@ -227,8 +231,11 @@ class ZMediumFetcher
227
231
 
228
232
  index = 0
229
233
  paragraphs.each do |paragraph|
230
- markupParser = MarkupParser.new(postHtml, paragraph)
231
- paragraph.text = markupParser.parse()
234
+ if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
235
+ markupParser = MarkupParser.new(paragraph)
236
+ paragraph.text = markupParser.parse()
237
+ end
238
+
232
239
  result = startParser.parse(paragraph)
233
240
 
234
241
  if !linkParser.nil?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-01 00:00:00.000000000 Z
11
+ date: 2022-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.13.1
27
- - !ruby/object:Gem::Dependency
28
- name: reverse_markdown
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: 2.1.1
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: 2.1.1
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: net-http
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -91,6 +77,7 @@ files:
91
77
  - lib/Parsers/LinkParser.rb
92
78
  - lib/Parsers/MIXTAPEEMBEDParser.rb
93
79
  - lib/Parsers/MarkupParser.rb
80
+ - lib/Parsers/MarkupStyleRender.rb
94
81
  - lib/Parsers/OLIParser.rb
95
82
  - lib/Parsers/PParser.rb
96
83
  - lib/Parsers/PQParser.rb