ZMediumToMarkdown 1.4.7 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/Models/Paragraph.rb +26 -7
- data/lib/Parsers/BQParser.rb +11 -5
- data/lib/Parsers/IframeParser.rb +4 -3
- data/lib/Parsers/MIXTAPEEMBEDParser.rb +2 -2
- data/lib/Parsers/MarkupParser.rb +12 -7
- data/lib/Parsers/MarkupStyleRender.rb +232 -0
- data/lib/Post.rb +16 -5
- data/lib/ZMediumFetcher.rb +7 -6
- metadata +3 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6026b5ebd732b82696ca5a7de549e5167067336998152c8a5e7bed751d610b78
|
4
|
+
data.tar.gz: 76c24779eaceb1763225f6e5e274fb5177685b0be9f75390ce189ec047278c21
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db56b44ece9656eb90f703a6416c54a163f54e0208981ebf37029af9491fb39e30252b564e5d63f6defc63c72c51f3c3f944f531dfbd77ed10ba06b59abbd511
|
7
|
+
data.tar.gz: b71cc148ded49d5e3bd1ab8e72d915a5b8716de041cba43d39158fd47f451ce4c52ad0d671462a3dca7571879c744fa029b0af7a7e45d2fea92788d5ee858f0b
|
data/lib/Models/Paragraph.rb
CHANGED
@@ -4,7 +4,7 @@ require 'Parsers/PParser'
|
|
4
4
|
require 'securerandom'
|
5
5
|
|
6
6
|
class Paragraph
|
7
|
-
attr_accessor :postID, :name, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :
|
7
|
+
attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
|
8
8
|
|
9
9
|
class Iframe
|
10
10
|
attr_accessor :id, :title, :type, :src
|
@@ -20,6 +20,19 @@ class Paragraph
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
class Markup
|
24
|
+
attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
|
25
|
+
def initialize(json)
|
26
|
+
@type = json['type']
|
27
|
+
@start = json['start']
|
28
|
+
@end = json['end']
|
29
|
+
@href = json['href']
|
30
|
+
@anchorType = json['anchorType']
|
31
|
+
@userId = json['userId']
|
32
|
+
@linkMetadata = json['linkMetadata']
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
23
36
|
class MetaData
|
24
37
|
attr_accessor :id, :type
|
25
38
|
def initialize(json)
|
@@ -41,12 +54,13 @@ class Paragraph
|
|
41
54
|
"text" => "",
|
42
55
|
"type" => PParser.getTypeString()
|
43
56
|
}
|
44
|
-
Paragraph.new(json, postID
|
57
|
+
Paragraph.new(json, postID)
|
45
58
|
end
|
46
59
|
|
47
|
-
def initialize(json, postID
|
60
|
+
def initialize(json, postID)
|
48
61
|
@name = json['name']
|
49
62
|
@text = json['text']
|
63
|
+
@orgText = json['text']
|
50
64
|
@type = json['type']
|
51
65
|
@href = json['href']
|
52
66
|
@postID = postID
|
@@ -54,7 +68,7 @@ class Paragraph
|
|
54
68
|
if json['metadata'].nil?
|
55
69
|
@metadata = nil
|
56
70
|
else
|
57
|
-
@metadata = MetaData.new(
|
71
|
+
@metadata = MetaData.new(json['metadata'])
|
58
72
|
end
|
59
73
|
|
60
74
|
if json['mixtapeMetadata'].nil?
|
@@ -66,17 +80,22 @@ class Paragraph
|
|
66
80
|
if json['iframe'].nil?
|
67
81
|
@iframe = nil
|
68
82
|
else
|
69
|
-
@iframe = Iframe.new(
|
83
|
+
@iframe = Iframe.new(json['iframe']['mediaResource'])
|
70
84
|
end
|
71
85
|
|
72
86
|
if !json['markups'].nil? && json['markups'].length > 0
|
87
|
+
markups = []
|
88
|
+
json['markups'].each do |markup|
|
89
|
+
markups.append(Markup.new(markup))
|
90
|
+
end
|
91
|
+
@markups = markups
|
92
|
+
|
73
93
|
links = json['markups'].select{ |markup| markup["type"] == "A" }
|
74
94
|
if !links.nil? && links.length > 0
|
75
95
|
@markupLinks = links.map{ |link| link["href"] }
|
76
96
|
end
|
77
|
-
@hasMarkup = true
|
78
97
|
else
|
79
|
-
@
|
98
|
+
@markups = nil
|
80
99
|
end
|
81
100
|
end
|
82
101
|
end
|
data/lib/Parsers/BQParser.rb
CHANGED
@@ -5,12 +5,18 @@ require 'Models/Paragraph'
|
|
5
5
|
|
6
6
|
class BQParser < Parser
|
7
7
|
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.isBQ(paragraph)
|
10
|
+
if paragraph.nil?
|
11
|
+
false
|
12
|
+
else
|
13
|
+
paragraph.type == "BQ"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
def parse(paragraph)
|
9
|
-
if paragraph
|
10
|
-
result = ""
|
11
|
-
paragraph.text.each_line do |p|
|
12
|
-
result += "> #{p}"
|
13
|
-
end
|
18
|
+
if BQParser.isBQ(paragraph)
|
19
|
+
result = "> #{paragraph.text}"
|
14
20
|
result
|
15
21
|
else
|
16
22
|
if !nextParser.nil?
|
data/lib/Parsers/IframeParser.rb
CHANGED
@@ -24,6 +24,7 @@ class IframeParser < Parser
|
|
24
24
|
# is youtube
|
25
25
|
youtubeURL = URI(URI.decode(url)).query
|
26
26
|
params = URI::decode_www_form(youtubeURL).to_h
|
27
|
+
|
27
28
|
if !params["image"].nil? && !params["url"].nil?
|
28
29
|
|
29
30
|
fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
|
@@ -31,12 +32,12 @@ class IframeParser < Parser
|
|
31
32
|
imageURL = params["image"]
|
32
33
|
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
|
33
34
|
absolutePath = imagePathPolicy.getAbsolutePath(fileName)
|
34
|
-
|
35
|
+
title = paragraph.iframe.title
|
35
36
|
if ImageDownloader.download(absolutePath, imageURL)
|
36
37
|
relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
|
37
|
-
result = "\n[![
|
38
|
+
result = "\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})"
|
38
39
|
else
|
39
|
-
result = "\n[
|
40
|
+
result = "\n[#{title}](#{params["url"]})"
|
40
41
|
end
|
41
42
|
end
|
42
43
|
else
|
@@ -8,9 +8,9 @@ class MIXTAPEEMBEDParser < Parser
|
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'MIXTAPE_EMBED'
|
10
10
|
if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
|
11
|
-
"\n[#{paragraph.
|
11
|
+
"\n[#{paragraph.orgText}](#{paragraph.mixtapeMetadata.href})"
|
12
12
|
else
|
13
|
-
"\n#{paragraph.
|
13
|
+
"\n#{paragraph.orgText}"
|
14
14
|
end
|
15
15
|
else
|
16
16
|
if !nextParser.nil?
|
data/lib/Parsers/MarkupParser.rb
CHANGED
@@ -1,23 +1,28 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
3
|
require 'Models/Paragraph'
|
4
|
-
require '
|
4
|
+
require 'Parsers/MarkupStyleRender'
|
5
5
|
require 'nokogiri'
|
6
|
+
require 'securerandom'
|
7
|
+
require 'User'
|
6
8
|
|
7
9
|
class MarkupParser
|
8
10
|
attr_accessor :body, :paragraph
|
9
11
|
|
10
|
-
def initialize(
|
11
|
-
@body = html.search("body").first
|
12
|
+
def initialize(paragraph)
|
12
13
|
@paragraph = paragraph
|
13
14
|
end
|
14
15
|
|
15
16
|
def parse()
|
16
17
|
result = paragraph.text
|
17
|
-
if paragraph.
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
if !paragraph.markups.nil? && paragraph.markups.length > 0
|
19
|
+
markupRender = MarkupStyleRender.new(paragraph)
|
20
|
+
|
21
|
+
begin
|
22
|
+
result = markupRender.parse()
|
23
|
+
rescue => e
|
24
|
+
puts e.backtrace
|
25
|
+
Helper.makeWarningText("Error occurred during render markup text, please help to open an issue on github.")
|
21
26
|
end
|
22
27
|
end
|
23
28
|
|
@@ -0,0 +1,232 @@
|
|
1
|
+
|
2
|
+
$lib = File.expand_path('../', File.dirname(__FILE__))
|
3
|
+
|
4
|
+
require 'Models/Paragraph'
|
5
|
+
|
6
|
+
class MarkupStyleRender
|
7
|
+
attr_accessor :paragraph, :chars, :encodeType
|
8
|
+
|
9
|
+
class TextChar
|
10
|
+
attr_accessor :chars, :type
|
11
|
+
def initialize(chars, type)
|
12
|
+
@chars = chars
|
13
|
+
@type = type
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TagChar < TextChar
|
18
|
+
attr_accessor :sort, :startIndex, :endIndex, :startChars, :endChars
|
19
|
+
def initialize(sort, startIndex, endIndex, startChars, endChars)
|
20
|
+
@sort = sort
|
21
|
+
@startIndex = startIndex
|
22
|
+
@endIndex = endIndex - 1
|
23
|
+
@startChars = TextChar.new(startChars.chars, 'TagStart')
|
24
|
+
@endChars = TextChar.new(endChars.chars, 'TagEnd')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def initialize(paragraph)
|
30
|
+
@paragraph = paragraph
|
31
|
+
|
32
|
+
chars = {}
|
33
|
+
index = 0
|
34
|
+
|
35
|
+
emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
|
36
|
+
excludesEmojis = ["⚠"]
|
37
|
+
paragraph.text.each_char do |char|
|
38
|
+
chars[index] = TextChar.new([char], "Text")
|
39
|
+
index += 1
|
40
|
+
if char =~ emojiRegex && !excludesEmojis.include?(char)
|
41
|
+
# some emoji need more space (in Medium)
|
42
|
+
chars[index] = TextChar.new([], "Text")
|
43
|
+
index += 1
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
@chars = chars
|
48
|
+
end
|
49
|
+
|
50
|
+
def optimize(chars)
|
51
|
+
while true
|
52
|
+
hasExcute = false
|
53
|
+
|
54
|
+
index = 0
|
55
|
+
startTagIndex = nil
|
56
|
+
preTag = nil
|
57
|
+
preTagIndex = nil
|
58
|
+
preTextChar = nil
|
59
|
+
preTextIndex = nil
|
60
|
+
chars.each do |char|
|
61
|
+
|
62
|
+
if !preTag.nil?
|
63
|
+
if preTag.type == "TagStart" && char.type == "TagEnd"
|
64
|
+
chars.delete_at(index)
|
65
|
+
chars.delete_at(preTagIndex)
|
66
|
+
hasExcute = true
|
67
|
+
break
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if char.type == "TagStart" && (preTag == nil || preTag.type == "TagEnd" || preTag.type == "Text")
|
72
|
+
startTagIndex = index
|
73
|
+
elsif (char.type == "TagEnd" || char.type == "Text") && startTagIndex != nil
|
74
|
+
if preTextChar != nil && preTextChar.chars.join() != "\n"
|
75
|
+
# not first tag & insert blank between start tag and before text
|
76
|
+
if preTextChar.chars.join() != " "
|
77
|
+
chars.insert(startTagIndex, TextChar.new(" ".chars, "Text"))
|
78
|
+
hasExcute = true
|
79
|
+
break
|
80
|
+
end
|
81
|
+
end
|
82
|
+
startTagIndex = nil
|
83
|
+
end
|
84
|
+
|
85
|
+
if !preTag.nil?
|
86
|
+
if preTag.type == "TagStart" && char.type == "Text"
|
87
|
+
# delete blank between start tag and after text
|
88
|
+
if char.chars.join().strip == ""
|
89
|
+
chars.delete_at(index)
|
90
|
+
hasExcute = true
|
91
|
+
break
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
if preTag.type == "Text" && char.type == "TagEnd"
|
96
|
+
if preTextChar.chars.join().strip == "" && preTextChar.chars.join() != "\n"
|
97
|
+
chars.delete_at(preTextIndex)
|
98
|
+
hasExcute = true
|
99
|
+
break
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if preTag.type == "TagEnd" && char.type == "Text"
|
104
|
+
if char.chars.join() != " "
|
105
|
+
chars.insert(index, TextChar.new(" ".chars, "Text"))
|
106
|
+
hasExcute = true
|
107
|
+
break
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
if char.type == "Text"
|
114
|
+
preTextChar = char
|
115
|
+
preTextIndex = index
|
116
|
+
end
|
117
|
+
|
118
|
+
preTag = char
|
119
|
+
preTagIndex = index
|
120
|
+
|
121
|
+
index += 1
|
122
|
+
end
|
123
|
+
|
124
|
+
if !hasExcute
|
125
|
+
break
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
chars
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse()
|
133
|
+
result = paragraph.text
|
134
|
+
|
135
|
+
if !paragraph.markups.nil? && paragraph.markups.length > 0
|
136
|
+
|
137
|
+
tags = []
|
138
|
+
paragraph.markups.each do |markup|
|
139
|
+
tag = nil
|
140
|
+
if markup.type == "EM"
|
141
|
+
tag = TagChar.new(2, markup.start, markup.end, "_", "_")
|
142
|
+
elsif markup.type == "CODE"
|
143
|
+
tag = TagChar.new(3, markup.start, markup.end, "`", "`")
|
144
|
+
elsif markup.type == "STRONG"
|
145
|
+
tag = TagChar.new(2, markup.start, markup.end, "**", "**")
|
146
|
+
elsif markup.type == "A"
|
147
|
+
url = markup.href
|
148
|
+
if markup.anchorType == "LINK"
|
149
|
+
url = markup.href
|
150
|
+
elsif markup.anchorType == "USER"
|
151
|
+
url = "https://medium.com/u/#{markup.userId}"
|
152
|
+
end
|
153
|
+
|
154
|
+
tag = TagChar.new(1, markup.start, markup.end, "[", "](#{url})")
|
155
|
+
else
|
156
|
+
Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
|
157
|
+
end
|
158
|
+
|
159
|
+
if !tag.nil?
|
160
|
+
tags.append(tag)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
tags.sort_by(&:startIndex)
|
165
|
+
|
166
|
+
response = []
|
167
|
+
stack = []
|
168
|
+
|
169
|
+
chars.each do |index, char|
|
170
|
+
|
171
|
+
if char.chars.join() == "\n"
|
172
|
+
brStack = stack.dup
|
173
|
+
while brStack.length > 0
|
174
|
+
tag = brStack.pop
|
175
|
+
response.push(tag.endChars)
|
176
|
+
end
|
177
|
+
response.append(TextChar.new(char.chars, 'Text'))
|
178
|
+
brStack = stack.dup.reverse
|
179
|
+
while brStack.length > 0
|
180
|
+
tag = brStack.pop
|
181
|
+
response.push(tag.startChars)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
startTags = tags.select { |tag| tag.startIndex == index }.sort_by(&:sort)
|
186
|
+
if !startTags.nil?
|
187
|
+
startTags.each do |tag|
|
188
|
+
response.append(tag.startChars)
|
189
|
+
stack.append(tag)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
if char.chars.join() != "\n"
|
194
|
+
response.append(TextChar.new(char.chars, 'Text'))
|
195
|
+
end
|
196
|
+
|
197
|
+
endTags = tags.select { |tag| tag.endIndex == index }
|
198
|
+
if !endTags.nil? && endTags.length > 0
|
199
|
+
mismatchTags = []
|
200
|
+
while endTags.length > 0
|
201
|
+
stackTag = stack.pop
|
202
|
+
stackTagInEndTagsIndex = endTags.find_index(stackTag)
|
203
|
+
if !stackTagInEndTagsIndex.nil?
|
204
|
+
# as expected
|
205
|
+
endTags.delete_at(stackTagInEndTagsIndex)
|
206
|
+
else
|
207
|
+
mismatchTags.append(stackTag)
|
208
|
+
end
|
209
|
+
response.append(stackTag.endChars)
|
210
|
+
end
|
211
|
+
|
212
|
+
while mismatchTags.length > 0
|
213
|
+
mismatchTag = mismatchTags.pop
|
214
|
+
response.append(mismatchTag.startChars)
|
215
|
+
stack.append(mismatchTag)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
while stack.length > 0
|
221
|
+
tag = stack.pop
|
222
|
+
response.push(tag.endChars)
|
223
|
+
end
|
224
|
+
|
225
|
+
response = optimize(response)
|
226
|
+
result = response.map{ |response| response.chars }.join()
|
227
|
+
end
|
228
|
+
|
229
|
+
result
|
230
|
+
end
|
231
|
+
|
232
|
+
end
|
data/lib/Post.rb
CHANGED
@@ -38,12 +38,23 @@ class Post
|
|
38
38
|
json
|
39
39
|
end
|
40
40
|
|
41
|
-
def self.
|
42
|
-
|
43
|
-
|
44
|
-
|
41
|
+
def self.fetchPostParagraphs(postID)
|
42
|
+
query = [
|
43
|
+
{
|
44
|
+
"operationName": "PostViewerEdgeContentQuery",
|
45
|
+
"variables": {
|
46
|
+
"postId": postID
|
47
|
+
},
|
48
|
+
"query": "query PostViewerEdgeContentQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) {\n post(id: $postId) {\n ... on Post {\n id\n viewerEdge {\n id\n fullContent(postMeteringOptions: $postMeteringOptions) {\n isLockedPreviewOnly\n validatedShareKey\n bodyModel {\n ...PostBody_bodyModel\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment PostBody_bodyModel on RichText {\n sections {\n name\n startIndex\n textLayout\n imageLayout\n backgroundImage {\n id\n originalHeight\n originalWidth\n __typename\n }\n videoLayout\n backgroundVideo {\n videoId\n originalHeight\n originalWidth\n previewImageId\n __typename\n }\n __typename\n }\n paragraphs {\n id\n ...PostBodySection_paragraph\n __typename\n }\n ...normalizedBodyModel_richText\n __typename\n}\n\nfragment PostBodySection_paragraph on Paragraph {\n name\n ...PostBodyParagraph_paragraph\n __typename\n id\n}\n\nfragment PostBodyParagraph_paragraph on Paragraph {\n name\n type\n ...ImageParagraph_paragraph\n ...TextParagraph_paragraph\n ...IframeParagraph_paragraph\n ...MixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment ImageParagraph_paragraph on Paragraph {\n href\n layout\n metadata {\n id\n originalHeight\n originalWidth\n focusPercentX\n focusPercentY\n alt\n __typename\n }\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n ...PostAnnotationsMarker_paragraph\n __typename\n id\n}\n\nfragment Markups_paragraph on Paragraph {\n name\n text\n hasDropCap\n dropCapImage {\n ...MarkupNode_data_dropCapImage\n __typename\n id\n }\n markups {\n type\n start\n end\n href\n anchorType\n userId\n linkMetadata {\n httpStatus\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MarkupNode_data_dropCapImage on ImageMetadata {\n ...DropCap_image\n __typename\n id\n}\n\nfragment DropCap_image on ImageMetadata {\n id\n originalHeight\n originalWidth\n __typename\n}\n\nfragment ParagraphRefsMapContext_paragraph on Paragraph {\n id\n name\n text\n __typename\n}\n\nfragment PostAnnotationsMarker_paragraph on Paragraph {\n ...PostViewNoteCard_paragraph\n __typename\n id\n}\n\nfragment PostViewNoteCard_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment TextParagraph_paragraph on Paragraph {\n type\n hasDropCap\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n __typename\n id\n}\n\nfragment IframeParagraph_paragraph on Paragraph {\n iframe {\n mediaResource {\n id\n iframeSrc\n iframeHeight\n iframeWidth\n title\n __typename\n }\n __typename\n }\n layout\n ...getEmbedlyCardUrlParams_paragraph\n ...Markups_paragraph\n __typename\n id\n}\n\nfragment getEmbedlyCardUrlParams_paragraph on Paragraph {\n type\n iframe {\n mediaResource {\n iframeSrc\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MixtapeParagraph_paragraph on Paragraph {\n type\n mixtapeMetadata {\n href\n mediaResource {\n mediumCatalog {\n id\n __typename\n }\n __typename\n }\n __typename\n }\n ...GenericMixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment GenericMixtapeParagraph_paragraph on Paragraph {\n text\n mixtapeMetadata {\n href\n thumbnailImageId\n __typename\n }\n markups {\n start\n end\n type\n href\n __typename\n }\n __typename\n id\n}\n\nfragment normalizedBodyModel_richText on RichText {\n paragraphs {\n markups {\n type\n __typename\n }\n ...getParagraphHighlights_paragraph\n ...getParagraphPrivateNotes_paragraph\n __typename\n }\n sections {\n startIndex\n ...getSectionEndIndex_section\n __typename\n }\n ...getParagraphStyles_richText\n ...getParagraphSpaces_richText\n __typename\n}\n\nfragment getParagraphHighlights_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getParagraphPrivateNotes_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getSectionEndIndex_section on Section {\n startIndex\n __typename\n}\n\nfragment getParagraphStyles_richText on RichText {\n paragraphs {\n text\n type\n __typename\n }\n sections {\n ...getSectionEndIndex_section\n __typename\n }\n __typename\n}\n\nfragment getParagraphSpaces_richText on RichText {\n paragraphs {\n layout\n metadata {\n originalHeight\n originalWidth\n __typename\n }\n type\n ...paragraphExtendsImageGrid_paragraph\n __typename\n }\n ...getSeriesParagraphTopSpacings_richText\n ...getPostParagraphTopSpacings_richText\n __typename\n}\n\nfragment paragraphExtendsImageGrid_paragraph on Paragraph {\n layout\n type\n __typename\n id\n}\n\nfragment getSeriesParagraphTopSpacings_richText on RichText {\n paragraphs {\n id\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n\nfragment getPostParagraphTopSpacings_richText on RichText {\n paragraphs {\n layout\n text\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n"
|
49
|
+
}
|
50
|
+
]
|
51
|
+
|
52
|
+
body = Request.body(Request.URL("https://medium.com/_/graphql", "POST", query))
|
53
|
+
if !body.nil?
|
54
|
+
json = JSON.parse(body)
|
55
|
+
json&.dig(0, "data", "post", "viewerEdge", "fullContent", "bodyModel", "paragraphs")
|
45
56
|
else
|
46
|
-
|
57
|
+
nil
|
47
58
|
end
|
48
59
|
end
|
49
60
|
|
data/lib/ZMediumFetcher.rb
CHANGED
@@ -127,7 +127,7 @@ class ZMediumFetcher
|
|
127
127
|
|
128
128
|
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
129
129
|
|
130
|
-
sourceParagraphs = Post.
|
130
|
+
sourceParagraphs = Post.fetchPostParagraphs(postID)
|
131
131
|
if sourceParagraphs.nil?
|
132
132
|
raise "Error: Paragraph not found! PostURL: #{postURL}"
|
133
133
|
end
|
@@ -140,7 +140,7 @@ class ZMediumFetcher
|
|
140
140
|
previousParagraph = nil
|
141
141
|
preTypeParagraphs = []
|
142
142
|
sourceParagraphs.each do |sourcParagraph|
|
143
|
-
paragraph = Paragraph.new(sourcParagraph, postID
|
143
|
+
paragraph = Paragraph.new(sourcParagraph, postID)
|
144
144
|
if OLIParser.isOLI(paragraph)
|
145
145
|
oliIndex += 1
|
146
146
|
paragraph.oliIndex = oliIndex
|
@@ -148,10 +148,11 @@ class ZMediumFetcher
|
|
148
148
|
oliIndex = 0
|
149
149
|
end
|
150
150
|
|
151
|
-
# if previous is OLI or ULI and current is not OLI or ULI
|
151
|
+
# if previous is OLI or ULI or BQ and current is not OLI or ULI or BQ
|
152
152
|
# than insert a blank paragraph to keep markdown foramt correct
|
153
153
|
if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
|
154
|
-
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
|
154
|
+
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))||
|
155
|
+
(BQParser.isBQ(previousParagraph) && !BQParser.isBQ(paragraph))
|
155
156
|
paragraphs.append(Paragraph.makeBlankParagraph(postID))
|
156
157
|
end
|
157
158
|
|
@@ -178,7 +179,7 @@ class ZMediumFetcher
|
|
178
179
|
groupByText += "\n"
|
179
180
|
end
|
180
181
|
|
181
|
-
markupParser = MarkupParser.new(
|
182
|
+
markupParser = MarkupParser.new(preTypeParagraph)
|
182
183
|
groupByText += markupParser.parse()
|
183
184
|
end
|
184
185
|
|
@@ -227,7 +228,7 @@ class ZMediumFetcher
|
|
227
228
|
|
228
229
|
index = 0
|
229
230
|
paragraphs.each do |paragraph|
|
230
|
-
markupParser = MarkupParser.new(
|
231
|
+
markupParser = MarkupParser.new(paragraph)
|
231
232
|
paragraph.text = markupParser.parse()
|
232
233
|
result = startParser.parse(paragraph)
|
233
234
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.13.1
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: reverse_markdown
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 2.1.1
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 2.1.1
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: net-http
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -91,6 +77,7 @@ files:
|
|
91
77
|
- lib/Parsers/LinkParser.rb
|
92
78
|
- lib/Parsers/MIXTAPEEMBEDParser.rb
|
93
79
|
- lib/Parsers/MarkupParser.rb
|
80
|
+
- lib/Parsers/MarkupStyleRender.rb
|
94
81
|
- lib/Parsers/OLIParser.rb
|
95
82
|
- lib/Parsers/PParser.rb
|
96
83
|
- lib/Parsers/PQParser.rb
|