ZMediumToMarkdown 1.4.7 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/Models/Paragraph.rb +26 -7
- data/lib/Parsers/BQParser.rb +11 -5
- data/lib/Parsers/IframeParser.rb +4 -3
- data/lib/Parsers/MIXTAPEEMBEDParser.rb +2 -2
- data/lib/Parsers/MarkupParser.rb +12 -7
- data/lib/Parsers/MarkupStyleRender.rb +232 -0
- data/lib/Post.rb +16 -5
- data/lib/ZMediumFetcher.rb +7 -6
- metadata +3 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6026b5ebd732b82696ca5a7de549e5167067336998152c8a5e7bed751d610b78
|
4
|
+
data.tar.gz: 76c24779eaceb1763225f6e5e274fb5177685b0be9f75390ce189ec047278c21
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db56b44ece9656eb90f703a6416c54a163f54e0208981ebf37029af9491fb39e30252b564e5d63f6defc63c72c51f3c3f944f531dfbd77ed10ba06b59abbd511
|
7
|
+
data.tar.gz: b71cc148ded49d5e3bd1ab8e72d915a5b8716de041cba43d39158fd47f451ce4c52ad0d671462a3dca7571879c744fa029b0af7a7e45d2fea92788d5ee858f0b
|
data/lib/Models/Paragraph.rb
CHANGED
@@ -4,7 +4,7 @@ require 'Parsers/PParser'
|
|
4
4
|
require 'securerandom'
|
5
5
|
|
6
6
|
class Paragraph
|
7
|
-
attr_accessor :postID, :name, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :
|
7
|
+
attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
|
8
8
|
|
9
9
|
class Iframe
|
10
10
|
attr_accessor :id, :title, :type, :src
|
@@ -20,6 +20,19 @@ class Paragraph
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
class Markup
|
24
|
+
attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
|
25
|
+
def initialize(json)
|
26
|
+
@type = json['type']
|
27
|
+
@start = json['start']
|
28
|
+
@end = json['end']
|
29
|
+
@href = json['href']
|
30
|
+
@anchorType = json['anchorType']
|
31
|
+
@userId = json['userId']
|
32
|
+
@linkMetadata = json['linkMetadata']
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
23
36
|
class MetaData
|
24
37
|
attr_accessor :id, :type
|
25
38
|
def initialize(json)
|
@@ -41,12 +54,13 @@ class Paragraph
|
|
41
54
|
"text" => "",
|
42
55
|
"type" => PParser.getTypeString()
|
43
56
|
}
|
44
|
-
Paragraph.new(json, postID
|
57
|
+
Paragraph.new(json, postID)
|
45
58
|
end
|
46
59
|
|
47
|
-
def initialize(json, postID
|
60
|
+
def initialize(json, postID)
|
48
61
|
@name = json['name']
|
49
62
|
@text = json['text']
|
63
|
+
@orgText = json['text']
|
50
64
|
@type = json['type']
|
51
65
|
@href = json['href']
|
52
66
|
@postID = postID
|
@@ -54,7 +68,7 @@ class Paragraph
|
|
54
68
|
if json['metadata'].nil?
|
55
69
|
@metadata = nil
|
56
70
|
else
|
57
|
-
@metadata = MetaData.new(
|
71
|
+
@metadata = MetaData.new(json['metadata'])
|
58
72
|
end
|
59
73
|
|
60
74
|
if json['mixtapeMetadata'].nil?
|
@@ -66,17 +80,22 @@ class Paragraph
|
|
66
80
|
if json['iframe'].nil?
|
67
81
|
@iframe = nil
|
68
82
|
else
|
69
|
-
@iframe = Iframe.new(
|
83
|
+
@iframe = Iframe.new(json['iframe']['mediaResource'])
|
70
84
|
end
|
71
85
|
|
72
86
|
if !json['markups'].nil? && json['markups'].length > 0
|
87
|
+
markups = []
|
88
|
+
json['markups'].each do |markup|
|
89
|
+
markups.append(Markup.new(markup))
|
90
|
+
end
|
91
|
+
@markups = markups
|
92
|
+
|
73
93
|
links = json['markups'].select{ |markup| markup["type"] == "A" }
|
74
94
|
if !links.nil? && links.length > 0
|
75
95
|
@markupLinks = links.map{ |link| link["href"] }
|
76
96
|
end
|
77
|
-
@hasMarkup = true
|
78
97
|
else
|
79
|
-
@
|
98
|
+
@markups = nil
|
80
99
|
end
|
81
100
|
end
|
82
101
|
end
|
data/lib/Parsers/BQParser.rb
CHANGED
@@ -5,12 +5,18 @@ require 'Models/Paragraph'
|
|
5
5
|
|
6
6
|
class BQParser < Parser
|
7
7
|
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.isBQ(paragraph)
|
10
|
+
if paragraph.nil?
|
11
|
+
false
|
12
|
+
else
|
13
|
+
paragraph.type == "BQ"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
def parse(paragraph)
|
9
|
-
if paragraph
|
10
|
-
result = ""
|
11
|
-
paragraph.text.each_line do |p|
|
12
|
-
result += "> #{p}"
|
13
|
-
end
|
18
|
+
if BQParser.isBQ(paragraph)
|
19
|
+
result = "> #{paragraph.text}"
|
14
20
|
result
|
15
21
|
else
|
16
22
|
if !nextParser.nil?
|
data/lib/Parsers/IframeParser.rb
CHANGED
@@ -24,6 +24,7 @@ class IframeParser < Parser
|
|
24
24
|
# is youtube
|
25
25
|
youtubeURL = URI(URI.decode(url)).query
|
26
26
|
params = URI::decode_www_form(youtubeURL).to_h
|
27
|
+
|
27
28
|
if !params["image"].nil? && !params["url"].nil?
|
28
29
|
|
29
30
|
fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
|
@@ -31,12 +32,12 @@ class IframeParser < Parser
|
|
31
32
|
imageURL = params["image"]
|
32
33
|
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
|
33
34
|
absolutePath = imagePathPolicy.getAbsolutePath(fileName)
|
34
|
-
|
35
|
+
title = paragraph.iframe.title
|
35
36
|
if ImageDownloader.download(absolutePath, imageURL)
|
36
37
|
relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
|
37
|
-
result = "\n[](#{params["url"]})"
|
38
39
|
else
|
39
|
-
result = "\n[
|
40
|
+
result = "\n[#{title}](#{params["url"]})"
|
40
41
|
end
|
41
42
|
end
|
42
43
|
else
|
@@ -8,9 +8,9 @@ class MIXTAPEEMBEDParser < Parser
|
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'MIXTAPE_EMBED'
|
10
10
|
if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
|
11
|
-
"\n[#{paragraph.
|
11
|
+
"\n[#{paragraph.orgText}](#{paragraph.mixtapeMetadata.href})"
|
12
12
|
else
|
13
|
-
"\n#{paragraph.
|
13
|
+
"\n#{paragraph.orgText}"
|
14
14
|
end
|
15
15
|
else
|
16
16
|
if !nextParser.nil?
|
data/lib/Parsers/MarkupParser.rb
CHANGED
@@ -1,23 +1,28 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
3
|
require 'Models/Paragraph'
|
4
|
-
require '
|
4
|
+
require 'Parsers/MarkupStyleRender'
|
5
5
|
require 'nokogiri'
|
6
|
+
require 'securerandom'
|
7
|
+
require 'User'
|
6
8
|
|
7
9
|
class MarkupParser
|
8
10
|
attr_accessor :body, :paragraph
|
9
11
|
|
10
|
-
def initialize(
|
11
|
-
@body = html.search("body").first
|
12
|
+
def initialize(paragraph)
|
12
13
|
@paragraph = paragraph
|
13
14
|
end
|
14
15
|
|
15
16
|
def parse()
|
16
17
|
result = paragraph.text
|
17
|
-
if paragraph.
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
if !paragraph.markups.nil? && paragraph.markups.length > 0
|
19
|
+
markupRender = MarkupStyleRender.new(paragraph)
|
20
|
+
|
21
|
+
begin
|
22
|
+
result = markupRender.parse()
|
23
|
+
rescue => e
|
24
|
+
puts e.backtrace
|
25
|
+
Helper.makeWarningText("Error occurred during render markup text, please help to open an issue on github.")
|
21
26
|
end
|
22
27
|
end
|
23
28
|
|
@@ -0,0 +1,232 @@
|
|
1
|
+
|
2
|
+
$lib = File.expand_path('../', File.dirname(__FILE__))
|
3
|
+
|
4
|
+
require 'Models/Paragraph'
|
5
|
+
|
6
|
+
class MarkupStyleRender
|
7
|
+
attr_accessor :paragraph, :chars, :encodeType
|
8
|
+
|
9
|
+
class TextChar
|
10
|
+
attr_accessor :chars, :type
|
11
|
+
def initialize(chars, type)
|
12
|
+
@chars = chars
|
13
|
+
@type = type
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TagChar < TextChar
|
18
|
+
attr_accessor :sort, :startIndex, :endIndex, :startChars, :endChars
|
19
|
+
def initialize(sort, startIndex, endIndex, startChars, endChars)
|
20
|
+
@sort = sort
|
21
|
+
@startIndex = startIndex
|
22
|
+
@endIndex = endIndex - 1
|
23
|
+
@startChars = TextChar.new(startChars.chars, 'TagStart')
|
24
|
+
@endChars = TextChar.new(endChars.chars, 'TagEnd')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def initialize(paragraph)
|
30
|
+
@paragraph = paragraph
|
31
|
+
|
32
|
+
chars = {}
|
33
|
+
index = 0
|
34
|
+
|
35
|
+
emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
|
36
|
+
excludesEmojis = ["⚠"]
|
37
|
+
paragraph.text.each_char do |char|
|
38
|
+
chars[index] = TextChar.new([char], "Text")
|
39
|
+
index += 1
|
40
|
+
if char =~ emojiRegex && !excludesEmojis.include?(char)
|
41
|
+
# some emoji need more space (in Medium)
|
42
|
+
chars[index] = TextChar.new([], "Text")
|
43
|
+
index += 1
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
@chars = chars
|
48
|
+
end
|
49
|
+
|
50
|
+
def optimize(chars)
|
51
|
+
while true
|
52
|
+
hasExcute = false
|
53
|
+
|
54
|
+
index = 0
|
55
|
+
startTagIndex = nil
|
56
|
+
preTag = nil
|
57
|
+
preTagIndex = nil
|
58
|
+
preTextChar = nil
|
59
|
+
preTextIndex = nil
|
60
|
+
chars.each do |char|
|
61
|
+
|
62
|
+
if !preTag.nil?
|
63
|
+
if preTag.type == "TagStart" && char.type == "TagEnd"
|
64
|
+
chars.delete_at(index)
|
65
|
+
chars.delete_at(preTagIndex)
|
66
|
+
hasExcute = true
|
67
|
+
break
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if char.type == "TagStart" && (preTag == nil || preTag.type == "TagEnd" || preTag.type == "Text")
|
72
|
+
startTagIndex = index
|
73
|
+
elsif (char.type == "TagEnd" || char.type == "Text") && startTagIndex != nil
|
74
|
+
if preTextChar != nil && preTextChar.chars.join() != "\n"
|
75
|
+
# not first tag & insert blank between start tag and before text
|
76
|
+
if preTextChar.chars.join() != " "
|
77
|
+
chars.insert(startTagIndex, TextChar.new(" ".chars, "Text"))
|
78
|
+
hasExcute = true
|
79
|
+
break
|
80
|
+
end
|
81
|
+
end
|
82
|
+
startTagIndex = nil
|
83
|
+
end
|
84
|
+
|
85
|
+
if !preTag.nil?
|
86
|
+
if preTag.type == "TagStart" && char.type == "Text"
|
87
|
+
# delete blank between start tag and after text
|
88
|
+
if char.chars.join().strip == ""
|
89
|
+
chars.delete_at(index)
|
90
|
+
hasExcute = true
|
91
|
+
break
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
if preTag.type == "Text" && char.type == "TagEnd"
|
96
|
+
if preTextChar.chars.join().strip == "" && preTextChar.chars.join() != "\n"
|
97
|
+
chars.delete_at(preTextIndex)
|
98
|
+
hasExcute = true
|
99
|
+
break
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if preTag.type == "TagEnd" && char.type == "Text"
|
104
|
+
if char.chars.join() != " "
|
105
|
+
chars.insert(index, TextChar.new(" ".chars, "Text"))
|
106
|
+
hasExcute = true
|
107
|
+
break
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
if char.type == "Text"
|
114
|
+
preTextChar = char
|
115
|
+
preTextIndex = index
|
116
|
+
end
|
117
|
+
|
118
|
+
preTag = char
|
119
|
+
preTagIndex = index
|
120
|
+
|
121
|
+
index += 1
|
122
|
+
end
|
123
|
+
|
124
|
+
if !hasExcute
|
125
|
+
break
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
chars
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse()
|
133
|
+
result = paragraph.text
|
134
|
+
|
135
|
+
if !paragraph.markups.nil? && paragraph.markups.length > 0
|
136
|
+
|
137
|
+
tags = []
|
138
|
+
paragraph.markups.each do |markup|
|
139
|
+
tag = nil
|
140
|
+
if markup.type == "EM"
|
141
|
+
tag = TagChar.new(2, markup.start, markup.end, "_", "_")
|
142
|
+
elsif markup.type == "CODE"
|
143
|
+
tag = TagChar.new(3, markup.start, markup.end, "`", "`")
|
144
|
+
elsif markup.type == "STRONG"
|
145
|
+
tag = TagChar.new(2, markup.start, markup.end, "**", "**")
|
146
|
+
elsif markup.type == "A"
|
147
|
+
url = markup.href
|
148
|
+
if markup.anchorType == "LINK"
|
149
|
+
url = markup.href
|
150
|
+
elsif markup.anchorType == "USER"
|
151
|
+
url = "https://medium.com/u/#{markup.userId}"
|
152
|
+
end
|
153
|
+
|
154
|
+
tag = TagChar.new(1, markup.start, markup.end, "[", "](#{url})")
|
155
|
+
else
|
156
|
+
Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
|
157
|
+
end
|
158
|
+
|
159
|
+
if !tag.nil?
|
160
|
+
tags.append(tag)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
tags.sort_by(&:startIndex)
|
165
|
+
|
166
|
+
response = []
|
167
|
+
stack = []
|
168
|
+
|
169
|
+
chars.each do |index, char|
|
170
|
+
|
171
|
+
if char.chars.join() == "\n"
|
172
|
+
brStack = stack.dup
|
173
|
+
while brStack.length > 0
|
174
|
+
tag = brStack.pop
|
175
|
+
response.push(tag.endChars)
|
176
|
+
end
|
177
|
+
response.append(TextChar.new(char.chars, 'Text'))
|
178
|
+
brStack = stack.dup.reverse
|
179
|
+
while brStack.length > 0
|
180
|
+
tag = brStack.pop
|
181
|
+
response.push(tag.startChars)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
startTags = tags.select { |tag| tag.startIndex == index }.sort_by(&:sort)
|
186
|
+
if !startTags.nil?
|
187
|
+
startTags.each do |tag|
|
188
|
+
response.append(tag.startChars)
|
189
|
+
stack.append(tag)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
if char.chars.join() != "\n"
|
194
|
+
response.append(TextChar.new(char.chars, 'Text'))
|
195
|
+
end
|
196
|
+
|
197
|
+
endTags = tags.select { |tag| tag.endIndex == index }
|
198
|
+
if !endTags.nil? && endTags.length > 0
|
199
|
+
mismatchTags = []
|
200
|
+
while endTags.length > 0
|
201
|
+
stackTag = stack.pop
|
202
|
+
stackTagInEndTagsIndex = endTags.find_index(stackTag)
|
203
|
+
if !stackTagInEndTagsIndex.nil?
|
204
|
+
# as expected
|
205
|
+
endTags.delete_at(stackTagInEndTagsIndex)
|
206
|
+
else
|
207
|
+
mismatchTags.append(stackTag)
|
208
|
+
end
|
209
|
+
response.append(stackTag.endChars)
|
210
|
+
end
|
211
|
+
|
212
|
+
while mismatchTags.length > 0
|
213
|
+
mismatchTag = mismatchTags.pop
|
214
|
+
response.append(mismatchTag.startChars)
|
215
|
+
stack.append(mismatchTag)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
while stack.length > 0
|
221
|
+
tag = stack.pop
|
222
|
+
response.push(tag.endChars)
|
223
|
+
end
|
224
|
+
|
225
|
+
response = optimize(response)
|
226
|
+
result = response.map{ |response| response.chars }.join()
|
227
|
+
end
|
228
|
+
|
229
|
+
result
|
230
|
+
end
|
231
|
+
|
232
|
+
end
|
data/lib/Post.rb
CHANGED
@@ -38,12 +38,23 @@ class Post
|
|
38
38
|
json
|
39
39
|
end
|
40
40
|
|
41
|
-
def self.
|
42
|
-
|
43
|
-
|
44
|
-
|
41
|
+
def self.fetchPostParagraphs(postID)
|
42
|
+
query = [
|
43
|
+
{
|
44
|
+
"operationName": "PostViewerEdgeContentQuery",
|
45
|
+
"variables": {
|
46
|
+
"postId": postID
|
47
|
+
},
|
48
|
+
"query": "query PostViewerEdgeContentQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) {\n post(id: $postId) {\n ... on Post {\n id\n viewerEdge {\n id\n fullContent(postMeteringOptions: $postMeteringOptions) {\n isLockedPreviewOnly\n validatedShareKey\n bodyModel {\n ...PostBody_bodyModel\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment PostBody_bodyModel on RichText {\n sections {\n name\n startIndex\n textLayout\n imageLayout\n backgroundImage {\n id\n originalHeight\n originalWidth\n __typename\n }\n videoLayout\n backgroundVideo {\n videoId\n originalHeight\n originalWidth\n previewImageId\n __typename\n }\n __typename\n }\n paragraphs {\n id\n ...PostBodySection_paragraph\n __typename\n }\n ...normalizedBodyModel_richText\n __typename\n}\n\nfragment PostBodySection_paragraph on Paragraph {\n name\n ...PostBodyParagraph_paragraph\n __typename\n id\n}\n\nfragment PostBodyParagraph_paragraph on Paragraph {\n name\n type\n ...ImageParagraph_paragraph\n ...TextParagraph_paragraph\n ...IframeParagraph_paragraph\n ...MixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment ImageParagraph_paragraph on Paragraph {\n href\n layout\n metadata {\n id\n originalHeight\n originalWidth\n focusPercentX\n focusPercentY\n alt\n __typename\n }\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n ...PostAnnotationsMarker_paragraph\n __typename\n id\n}\n\nfragment Markups_paragraph on Paragraph {\n name\n text\n hasDropCap\n dropCapImage {\n ...MarkupNode_data_dropCapImage\n __typename\n id\n }\n markups {\n type\n start\n end\n href\n anchorType\n userId\n linkMetadata {\n httpStatus\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MarkupNode_data_dropCapImage on ImageMetadata {\n ...DropCap_image\n __typename\n id\n}\n\nfragment DropCap_image on ImageMetadata {\n id\n originalHeight\n originalWidth\n __typename\n}\n\nfragment ParagraphRefsMapContext_paragraph on Paragraph {\n id\n name\n text\n __typename\n}\n\nfragment PostAnnotationsMarker_paragraph on Paragraph {\n ...PostViewNoteCard_paragraph\n __typename\n id\n}\n\nfragment PostViewNoteCard_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment TextParagraph_paragraph on Paragraph {\n type\n hasDropCap\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n __typename\n id\n}\n\nfragment IframeParagraph_paragraph on Paragraph {\n iframe {\n mediaResource {\n id\n iframeSrc\n iframeHeight\n iframeWidth\n title\n __typename\n }\n __typename\n }\n layout\n ...getEmbedlyCardUrlParams_paragraph\n ...Markups_paragraph\n __typename\n id\n}\n\nfragment getEmbedlyCardUrlParams_paragraph on Paragraph {\n type\n iframe {\n mediaResource {\n iframeSrc\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MixtapeParagraph_paragraph on Paragraph {\n type\n mixtapeMetadata {\n href\n mediaResource {\n mediumCatalog {\n id\n __typename\n }\n __typename\n }\n __typename\n }\n ...GenericMixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment GenericMixtapeParagraph_paragraph on Paragraph {\n text\n mixtapeMetadata {\n href\n thumbnailImageId\n __typename\n }\n markups {\n start\n end\n type\n href\n __typename\n }\n __typename\n id\n}\n\nfragment normalizedBodyModel_richText on RichText {\n paragraphs {\n markups {\n type\n __typename\n }\n ...getParagraphHighlights_paragraph\n ...getParagraphPrivateNotes_paragraph\n __typename\n }\n sections {\n startIndex\n ...getSectionEndIndex_section\n __typename\n }\n ...getParagraphStyles_richText\n ...getParagraphSpaces_richText\n __typename\n}\n\nfragment getParagraphHighlights_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getParagraphPrivateNotes_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getSectionEndIndex_section on Section {\n startIndex\n __typename\n}\n\nfragment getParagraphStyles_richText on RichText {\n paragraphs {\n text\n type\n __typename\n }\n sections {\n ...getSectionEndIndex_section\n __typename\n }\n __typename\n}\n\nfragment getParagraphSpaces_richText on RichText {\n paragraphs {\n layout\n metadata {\n originalHeight\n originalWidth\n __typename\n }\n type\n ...paragraphExtendsImageGrid_paragraph\n __typename\n }\n ...getSeriesParagraphTopSpacings_richText\n ...getPostParagraphTopSpacings_richText\n __typename\n}\n\nfragment paragraphExtendsImageGrid_paragraph on Paragraph {\n layout\n type\n __typename\n id\n}\n\nfragment getSeriesParagraphTopSpacings_richText on RichText {\n paragraphs {\n id\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n\nfragment getPostParagraphTopSpacings_richText on RichText {\n paragraphs {\n layout\n text\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n"
|
49
|
+
}
|
50
|
+
]
|
51
|
+
|
52
|
+
body = Request.body(Request.URL("https://medium.com/_/graphql", "POST", query))
|
53
|
+
if !body.nil?
|
54
|
+
json = JSON.parse(body)
|
55
|
+
json&.dig(0, "data", "post", "viewerEdge", "fullContent", "bodyModel", "paragraphs")
|
45
56
|
else
|
46
|
-
|
57
|
+
nil
|
47
58
|
end
|
48
59
|
end
|
49
60
|
|
data/lib/ZMediumFetcher.rb
CHANGED
@@ -127,7 +127,7 @@ class ZMediumFetcher
|
|
127
127
|
|
128
128
|
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
129
129
|
|
130
|
-
sourceParagraphs = Post.
|
130
|
+
sourceParagraphs = Post.fetchPostParagraphs(postID)
|
131
131
|
if sourceParagraphs.nil?
|
132
132
|
raise "Error: Paragraph not found! PostURL: #{postURL}"
|
133
133
|
end
|
@@ -140,7 +140,7 @@ class ZMediumFetcher
|
|
140
140
|
previousParagraph = nil
|
141
141
|
preTypeParagraphs = []
|
142
142
|
sourceParagraphs.each do |sourcParagraph|
|
143
|
-
paragraph = Paragraph.new(sourcParagraph, postID
|
143
|
+
paragraph = Paragraph.new(sourcParagraph, postID)
|
144
144
|
if OLIParser.isOLI(paragraph)
|
145
145
|
oliIndex += 1
|
146
146
|
paragraph.oliIndex = oliIndex
|
@@ -148,10 +148,11 @@ class ZMediumFetcher
|
|
148
148
|
oliIndex = 0
|
149
149
|
end
|
150
150
|
|
151
|
-
# if previous is OLI or ULI and current is not OLI or ULI
|
151
|
+
# if previous is OLI or ULI or BQ and current is not OLI or ULI or BQ
|
152
152
|
# than insert a blank paragraph to keep markdown foramt correct
|
153
153
|
if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
|
154
|
-
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
|
154
|
+
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))||
|
155
|
+
(BQParser.isBQ(previousParagraph) && !BQParser.isBQ(paragraph))
|
155
156
|
paragraphs.append(Paragraph.makeBlankParagraph(postID))
|
156
157
|
end
|
157
158
|
|
@@ -178,7 +179,7 @@ class ZMediumFetcher
|
|
178
179
|
groupByText += "\n"
|
179
180
|
end
|
180
181
|
|
181
|
-
markupParser = MarkupParser.new(
|
182
|
+
markupParser = MarkupParser.new(preTypeParagraph)
|
182
183
|
groupByText += markupParser.parse()
|
183
184
|
end
|
184
185
|
|
@@ -227,7 +228,7 @@ class ZMediumFetcher
|
|
227
228
|
|
228
229
|
index = 0
|
229
230
|
paragraphs.each do |paragraph|
|
230
|
-
markupParser = MarkupParser.new(
|
231
|
+
markupParser = MarkupParser.new(paragraph)
|
231
232
|
paragraph.text = markupParser.parse()
|
232
233
|
result = startParser.parse(paragraph)
|
233
234
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.13.1
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: reverse_markdown
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 2.1.1
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 2.1.1
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: net-http
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -91,6 +77,7 @@ files:
|
|
91
77
|
- lib/Parsers/LinkParser.rb
|
92
78
|
- lib/Parsers/MIXTAPEEMBEDParser.rb
|
93
79
|
- lib/Parsers/MarkupParser.rb
|
80
|
+
- lib/Parsers/MarkupStyleRender.rb
|
94
81
|
- lib/Parsers/OLIParser.rb
|
95
82
|
- lib/Parsers/PParser.rb
|
96
83
|
- lib/Parsers/PQParser.rb
|