ZMediumToMarkdown 1.5.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/Helper.rb +8 -3
- data/lib/Models/Paragraph.rb +26 -7
- data/lib/Parsers/BQParser.rb +11 -5
- data/lib/Parsers/CodeBlockParser.rb +9 -1
- data/lib/Parsers/IMGParser.rb +7 -2
- data/lib/Parsers/IframeParser.rb +5 -4
- data/lib/Parsers/LinkParser.rb +1 -13
- data/lib/Parsers/MIXTAPEEMBEDParser.rb +2 -2
- data/lib/Parsers/MarkupParser.rb +12 -7
- data/lib/Parsers/MarkupStyleRender.rb +232 -0
- data/lib/Post.rb +24 -6
- data/lib/ZMediumFetcher.rb +16 -9
- metadata +3 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57ebbd86d072c9c43a5baef02031561323e9e7f6857e639aecc754de5741c543
|
4
|
+
data.tar.gz: 03bde3f39434b21c7d96380d05dbc9ccc7096f30ab97aade0e6838165e28de3e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d108f648afe9eb0f90231dc771e22fd6f3f15d820f6d19aa941d23b48b2416c672350b6353af85c58dd13ccc1c3faa194b55f622476d09ea8bc84cccff6ba6ac
|
7
|
+
data.tar.gz: e484a2d51bc9ec006dc5511e2586d29eaab102bf778a030240ee92941b4fdfd9a726fbc5542677af7dbd8bb192e6c5d162b30b8c8b5001d166d3561edabdb9ec
|
data/lib/Helper.rb
CHANGED
@@ -77,10 +77,15 @@ class Helper
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def self.createPostInfo(postInfo)
|
80
|
+
|
81
|
+
title = postInfo.title.gsub("[","")
|
82
|
+
title = title.gsub("]","")
|
83
|
+
|
80
84
|
result = "---\n"
|
81
|
-
result += "title: #{
|
85
|
+
result += "title: #{title}\n"
|
82
86
|
result += "author: #{postInfo.creator}\n"
|
83
87
|
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
88
|
+
result += "categories: #{postInfo.collectionName}\n"
|
84
89
|
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
85
90
|
result += "---\n"
|
86
91
|
result += "\r\n"
|
@@ -159,7 +164,7 @@ class Helper
|
|
159
164
|
text += "+-----------------------------------------------------------------------------------+"
|
160
165
|
text += "\r\n"
|
161
166
|
text += "\r\n"
|
162
|
-
text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://
|
167
|
+
text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
|
163
168
|
text += "\r\n"
|
164
169
|
text += "\r\n"
|
165
170
|
text += "+-----------------------------------------------------------------------------------+"
|
@@ -167,4 +172,4 @@ class Helper
|
|
167
172
|
|
168
173
|
text
|
169
174
|
end
|
170
|
-
end
|
175
|
+
end
|
data/lib/Models/Paragraph.rb
CHANGED
@@ -4,7 +4,7 @@ require 'Parsers/PParser'
|
|
4
4
|
require 'securerandom'
|
5
5
|
|
6
6
|
class Paragraph
|
7
|
-
attr_accessor :postID, :name, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :
|
7
|
+
attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
|
8
8
|
|
9
9
|
class Iframe
|
10
10
|
attr_accessor :id, :title, :type, :src
|
@@ -20,6 +20,19 @@ class Paragraph
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
class Markup
|
24
|
+
attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
|
25
|
+
def initialize(json)
|
26
|
+
@type = json['type']
|
27
|
+
@start = json['start']
|
28
|
+
@end = json['end']
|
29
|
+
@href = json['href']
|
30
|
+
@anchorType = json['anchorType']
|
31
|
+
@userId = json['userId']
|
32
|
+
@linkMetadata = json['linkMetadata']
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
23
36
|
class MetaData
|
24
37
|
attr_accessor :id, :type
|
25
38
|
def initialize(json)
|
@@ -41,12 +54,13 @@ class Paragraph
|
|
41
54
|
"text" => "",
|
42
55
|
"type" => PParser.getTypeString()
|
43
56
|
}
|
44
|
-
Paragraph.new(json, postID
|
57
|
+
Paragraph.new(json, postID)
|
45
58
|
end
|
46
59
|
|
47
|
-
def initialize(json, postID
|
60
|
+
def initialize(json, postID)
|
48
61
|
@name = json['name']
|
49
62
|
@text = json['text']
|
63
|
+
@orgText = json['text']
|
50
64
|
@type = json['type']
|
51
65
|
@href = json['href']
|
52
66
|
@postID = postID
|
@@ -54,7 +68,7 @@ class Paragraph
|
|
54
68
|
if json['metadata'].nil?
|
55
69
|
@metadata = nil
|
56
70
|
else
|
57
|
-
@metadata = MetaData.new(
|
71
|
+
@metadata = MetaData.new(json['metadata'])
|
58
72
|
end
|
59
73
|
|
60
74
|
if json['mixtapeMetadata'].nil?
|
@@ -66,17 +80,22 @@ class Paragraph
|
|
66
80
|
if json['iframe'].nil?
|
67
81
|
@iframe = nil
|
68
82
|
else
|
69
|
-
@iframe = Iframe.new(
|
83
|
+
@iframe = Iframe.new(json['iframe']['mediaResource'])
|
70
84
|
end
|
71
85
|
|
72
86
|
if !json['markups'].nil? && json['markups'].length > 0
|
87
|
+
markups = []
|
88
|
+
json['markups'].each do |markup|
|
89
|
+
markups.append(Markup.new(markup))
|
90
|
+
end
|
91
|
+
@markups = markups
|
92
|
+
|
73
93
|
links = json['markups'].select{ |markup| markup["type"] == "A" }
|
74
94
|
if !links.nil? && links.length > 0
|
75
95
|
@markupLinks = links.map{ |link| link["href"] }
|
76
96
|
end
|
77
|
-
@hasMarkup = true
|
78
97
|
else
|
79
|
-
@
|
98
|
+
@markups = nil
|
80
99
|
end
|
81
100
|
end
|
82
101
|
end
|
data/lib/Parsers/BQParser.rb
CHANGED
@@ -5,12 +5,18 @@ require 'Models/Paragraph'
|
|
5
5
|
|
6
6
|
class BQParser < Parser
|
7
7
|
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.isBQ(paragraph)
|
10
|
+
if paragraph.nil?
|
11
|
+
false
|
12
|
+
else
|
13
|
+
paragraph.type == "BQ"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
def parse(paragraph)
|
9
|
-
if paragraph
|
10
|
-
result = ""
|
11
|
-
paragraph.text.each_line do |p|
|
12
|
-
result += "> #{p}"
|
13
|
-
end
|
18
|
+
if BQParser.isBQ(paragraph)
|
19
|
+
result = "> #{paragraph.text}"
|
14
20
|
result
|
15
21
|
else
|
16
22
|
if !nextParser.nil?
|
@@ -10,8 +10,16 @@ class CodeBlockParser < Parser
|
|
10
10
|
'CODE_BLOCK'
|
11
11
|
end
|
12
12
|
|
13
|
+
def self.isCodeBlock(paragraph)
|
14
|
+
if paragraph.nil?
|
15
|
+
false
|
16
|
+
else
|
17
|
+
paragraph.type == CodeBlockParser.getTypeString()
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
13
21
|
def parse(paragraph)
|
14
|
-
if
|
22
|
+
if CodeBlockParser.isCodeBlock(paragraph)
|
15
23
|
"```\n#{paragraph.text}\n```"
|
16
24
|
else
|
17
25
|
if !nextParser.nil?
|
data/lib/Parsers/IMGParser.rb
CHANGED
@@ -18,11 +18,16 @@ class IMGParser < Parser
|
|
18
18
|
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
|
19
19
|
absolutePath = imagePathPolicy.getAbsolutePath(fileName)
|
20
20
|
|
21
|
+
comment = ""
|
22
|
+
if paragraph.text != ""
|
23
|
+
comment = " \"#{paragraph.text}\""
|
24
|
+
end
|
25
|
+
|
21
26
|
if ImageDownloader.download(absolutePath, imageURL)
|
22
27
|
relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
|
23
|
-
""
|
24
29
|
else
|
25
|
-
""
|
26
31
|
end
|
27
32
|
else
|
28
33
|
if !nextParser.nil?
|
data/lib/Parsers/IframeParser.rb
CHANGED
@@ -24,6 +24,7 @@ class IframeParser < Parser
|
|
24
24
|
# is youtube
|
25
25
|
youtubeURL = URI(URI.decode(url)).query
|
26
26
|
params = URI::decode_www_form(youtubeURL).to_h
|
27
|
+
|
27
28
|
if !params["image"].nil? && !params["url"].nil?
|
28
29
|
|
29
30
|
fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
|
@@ -31,12 +32,12 @@ class IframeParser < Parser
|
|
31
32
|
imageURL = params["image"]
|
32
33
|
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
|
33
34
|
absolutePath = imagePathPolicy.getAbsolutePath(fileName)
|
34
|
-
|
35
|
+
title = paragraph.iframe.title
|
35
36
|
if ImageDownloader.download(absolutePath, imageURL)
|
36
37
|
relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
|
37
|
-
result = "\n[](#{params["url"]})"
|
38
39
|
else
|
39
|
-
result = "\n[
|
40
|
+
result = "\n[#{title}](#{params["url"]})"
|
40
41
|
end
|
41
42
|
end
|
42
43
|
else
|
@@ -54,7 +55,7 @@ class IframeParser < Parser
|
|
54
55
|
gistHTML.search('a').each do |a|
|
55
56
|
if a.text == 'view raw'
|
56
57
|
gistRAW = Request.body(Request.URL(a['href']))
|
57
|
-
result = "```#{lang}\n#{gistRAW}\n```"
|
58
|
+
result = "```#{lang.downcase}\n#{gistRAW}\n```"
|
58
59
|
end
|
59
60
|
end
|
60
61
|
end
|
data/lib/Parsers/LinkParser.rb
CHANGED
@@ -23,19 +23,7 @@ class LinkParser
|
|
23
23
|
|
24
24
|
postPath = link.split("/").last
|
25
25
|
if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
|
26
|
-
markdownString = markdownString.sub! link, postPath
|
27
|
-
end
|
28
|
-
else
|
29
|
-
if !(link =~ /\A#{URI::regexp(['http', 'https'])}\z/)
|
30
|
-
# medium will give you an relative path if url is medium's post (due to we use html to markdown render)
|
31
|
-
# e.g. /zrealm-ios-dev/visitor-pattern-in-ios-swift-ba5773a7bfea
|
32
|
-
# it's not a vaild url
|
33
|
-
|
34
|
-
# fullfill url from markup attribute
|
35
|
-
match = markupLinks.find{ |markupLink| markupLink.include? link }
|
36
|
-
if !match.nil?
|
37
|
-
markdownString = markdownString.sub! link, match
|
38
|
-
end
|
26
|
+
markdownString = markdownString.sub! link, "../#{postPath}"
|
39
27
|
end
|
40
28
|
end
|
41
29
|
end
|
@@ -8,9 +8,9 @@ class MIXTAPEEMBEDParser < Parser
|
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'MIXTAPE_EMBED'
|
10
10
|
if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
|
11
|
-
"\n[#{paragraph.
|
11
|
+
"\n[#{paragraph.orgText}](#{paragraph.mixtapeMetadata.href})"
|
12
12
|
else
|
13
|
-
"\n#{paragraph.
|
13
|
+
"\n#{paragraph.orgText}"
|
14
14
|
end
|
15
15
|
else
|
16
16
|
if !nextParser.nil?
|
data/lib/Parsers/MarkupParser.rb
CHANGED
@@ -1,23 +1,28 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
3
|
require 'Models/Paragraph'
|
4
|
-
require '
|
4
|
+
require 'Parsers/MarkupStyleRender'
|
5
5
|
require 'nokogiri'
|
6
|
+
require 'securerandom'
|
7
|
+
require 'User'
|
6
8
|
|
7
9
|
class MarkupParser
|
8
10
|
attr_accessor :body, :paragraph
|
9
11
|
|
10
|
-
def initialize(
|
11
|
-
@body = html.search("body").first
|
12
|
+
def initialize(paragraph)
|
12
13
|
@paragraph = paragraph
|
13
14
|
end
|
14
15
|
|
15
16
|
def parse()
|
16
17
|
result = paragraph.text
|
17
|
-
if paragraph.
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
if !paragraph.markups.nil? && paragraph.markups.length > 0
|
19
|
+
markupRender = MarkupStyleRender.new(paragraph)
|
20
|
+
|
21
|
+
begin
|
22
|
+
result = markupRender.parse()
|
23
|
+
rescue => e
|
24
|
+
puts e.backtrace
|
25
|
+
Helper.makeWarningText("Error occurred during render markup text, please help to open an issue on github.")
|
21
26
|
end
|
22
27
|
end
|
23
28
|
|
@@ -0,0 +1,232 @@
|
|
1
|
+
|
2
|
+
$lib = File.expand_path('../', File.dirname(__FILE__))
|
3
|
+
|
4
|
+
require 'Models/Paragraph'
|
5
|
+
|
6
|
+
class MarkupStyleRender
|
7
|
+
attr_accessor :paragraph, :chars, :encodeType
|
8
|
+
|
9
|
+
class TextChar
|
10
|
+
attr_accessor :chars, :type
|
11
|
+
def initialize(chars, type)
|
12
|
+
@chars = chars
|
13
|
+
@type = type
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TagChar < TextChar
|
18
|
+
attr_accessor :sort, :startIndex, :endIndex, :startChars, :endChars
|
19
|
+
def initialize(sort, startIndex, endIndex, startChars, endChars)
|
20
|
+
@sort = sort
|
21
|
+
@startIndex = startIndex
|
22
|
+
@endIndex = endIndex - 1
|
23
|
+
@startChars = TextChar.new(startChars.chars, 'TagStart')
|
24
|
+
@endChars = TextChar.new(endChars.chars, 'TagEnd')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def initialize(paragraph)
|
30
|
+
@paragraph = paragraph
|
31
|
+
|
32
|
+
chars = {}
|
33
|
+
index = 0
|
34
|
+
|
35
|
+
emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
|
36
|
+
excludesEmojis = ["⚠"]
|
37
|
+
paragraph.text.each_char do |char|
|
38
|
+
chars[index] = TextChar.new([char], "Text")
|
39
|
+
index += 1
|
40
|
+
if char =~ emojiRegex && !excludesEmojis.include?(char)
|
41
|
+
# some emoji need more space (in Medium)
|
42
|
+
chars[index] = TextChar.new([], "Text")
|
43
|
+
index += 1
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
@chars = chars
|
48
|
+
end
|
49
|
+
|
50
|
+
def optimize(chars)
|
51
|
+
while true
|
52
|
+
hasExcute = false
|
53
|
+
|
54
|
+
index = 0
|
55
|
+
startTagIndex = nil
|
56
|
+
preTag = nil
|
57
|
+
preTagIndex = nil
|
58
|
+
preTextChar = nil
|
59
|
+
preTextIndex = nil
|
60
|
+
chars.each do |char|
|
61
|
+
|
62
|
+
if !preTag.nil?
|
63
|
+
if preTag.type == "TagStart" && char.type == "TagEnd"
|
64
|
+
chars.delete_at(index)
|
65
|
+
chars.delete_at(preTagIndex)
|
66
|
+
hasExcute = true
|
67
|
+
break
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if char.type == "TagStart" && (preTag == nil || preTag.type == "TagEnd" || preTag.type == "Text")
|
72
|
+
startTagIndex = index
|
73
|
+
elsif (char.type == "TagEnd" || char.type == "Text") && startTagIndex != nil
|
74
|
+
if preTextChar != nil && preTextChar.chars.join() != "\n"
|
75
|
+
# not first tag & insert blank between start tag and before text
|
76
|
+
if preTextChar.chars.join() != " "
|
77
|
+
chars.insert(startTagIndex, TextChar.new(" ".chars, "Text"))
|
78
|
+
hasExcute = true
|
79
|
+
break
|
80
|
+
end
|
81
|
+
end
|
82
|
+
startTagIndex = nil
|
83
|
+
end
|
84
|
+
|
85
|
+
if !preTag.nil?
|
86
|
+
if preTag.type == "TagStart" && char.type == "Text"
|
87
|
+
# delete blank between start tag and after text
|
88
|
+
if char.chars.join().strip == ""
|
89
|
+
chars.delete_at(index)
|
90
|
+
hasExcute = true
|
91
|
+
break
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
if preTag.type == "Text" && char.type == "TagEnd"
|
96
|
+
if preTextChar.chars.join().strip == "" && preTextChar.chars.join() != "\n"
|
97
|
+
chars.delete_at(preTextIndex)
|
98
|
+
hasExcute = true
|
99
|
+
break
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
if preTag.type == "TagEnd" && char.type == "Text"
|
104
|
+
if char.chars.join() != " "
|
105
|
+
chars.insert(index, TextChar.new(" ".chars, "Text"))
|
106
|
+
hasExcute = true
|
107
|
+
break
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
if char.type == "Text"
|
114
|
+
preTextChar = char
|
115
|
+
preTextIndex = index
|
116
|
+
end
|
117
|
+
|
118
|
+
preTag = char
|
119
|
+
preTagIndex = index
|
120
|
+
|
121
|
+
index += 1
|
122
|
+
end
|
123
|
+
|
124
|
+
if !hasExcute
|
125
|
+
break
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
chars
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse()
|
133
|
+
result = paragraph.text
|
134
|
+
|
135
|
+
if !paragraph.markups.nil? && paragraph.markups.length > 0
|
136
|
+
|
137
|
+
tags = []
|
138
|
+
paragraph.markups.each do |markup|
|
139
|
+
tag = nil
|
140
|
+
if markup.type == "EM"
|
141
|
+
tag = TagChar.new(2, markup.start, markup.end, "_", "_")
|
142
|
+
elsif markup.type == "CODE"
|
143
|
+
tag = TagChar.new(3, markup.start, markup.end, "`", "`")
|
144
|
+
elsif markup.type == "STRONG"
|
145
|
+
tag = TagChar.new(2, markup.start, markup.end, "**", "**")
|
146
|
+
elsif markup.type == "A"
|
147
|
+
url = markup.href
|
148
|
+
if markup.anchorType == "LINK"
|
149
|
+
url = markup.href
|
150
|
+
elsif markup.anchorType == "USER"
|
151
|
+
url = "https://medium.com/u/#{markup.userId}"
|
152
|
+
end
|
153
|
+
|
154
|
+
tag = TagChar.new(1, markup.start, markup.end, "[", "](#{url})")
|
155
|
+
else
|
156
|
+
Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
|
157
|
+
end
|
158
|
+
|
159
|
+
if !tag.nil?
|
160
|
+
tags.append(tag)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
tags.sort_by(&:startIndex)
|
165
|
+
|
166
|
+
response = []
|
167
|
+
stack = []
|
168
|
+
|
169
|
+
chars.each do |index, char|
|
170
|
+
|
171
|
+
if char.chars.join() == "\n"
|
172
|
+
brStack = stack.dup
|
173
|
+
while brStack.length > 0
|
174
|
+
tag = brStack.pop
|
175
|
+
response.push(tag.endChars)
|
176
|
+
end
|
177
|
+
response.append(TextChar.new(char.chars, 'Text'))
|
178
|
+
brStack = stack.dup.reverse
|
179
|
+
while brStack.length > 0
|
180
|
+
tag = brStack.pop
|
181
|
+
response.push(tag.startChars)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
startTags = tags.select { |tag| tag.startIndex == index }.sort_by(&:sort)
|
186
|
+
if !startTags.nil?
|
187
|
+
startTags.each do |tag|
|
188
|
+
response.append(tag.startChars)
|
189
|
+
stack.append(tag)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
if char.chars.join() != "\n"
|
194
|
+
response.append(TextChar.new(char.chars, 'Text'))
|
195
|
+
end
|
196
|
+
|
197
|
+
endTags = tags.select { |tag| tag.endIndex == index }
|
198
|
+
if !endTags.nil? && endTags.length > 0
|
199
|
+
mismatchTags = []
|
200
|
+
while endTags.length > 0
|
201
|
+
stackTag = stack.pop
|
202
|
+
stackTagInEndTagsIndex = endTags.find_index(stackTag)
|
203
|
+
if !stackTagInEndTagsIndex.nil?
|
204
|
+
# as expected
|
205
|
+
endTags.delete_at(stackTagInEndTagsIndex)
|
206
|
+
else
|
207
|
+
mismatchTags.append(stackTag)
|
208
|
+
end
|
209
|
+
response.append(stackTag.endChars)
|
210
|
+
end
|
211
|
+
|
212
|
+
while mismatchTags.length > 0
|
213
|
+
mismatchTag = mismatchTags.pop
|
214
|
+
response.append(mismatchTag.startChars)
|
215
|
+
stack.append(mismatchTag)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
while stack.length > 0
|
221
|
+
tag = stack.pop
|
222
|
+
response.push(tag.endChars)
|
223
|
+
end
|
224
|
+
|
225
|
+
response = optimize(response)
|
226
|
+
result = response.map{ |response| response.chars }.join()
|
227
|
+
end
|
228
|
+
|
229
|
+
result
|
230
|
+
end
|
231
|
+
|
232
|
+
end
|
data/lib/Post.rb
CHANGED
@@ -9,7 +9,7 @@ require 'date'
|
|
9
9
|
class Post
|
10
10
|
|
11
11
|
class PostInfo
|
12
|
-
attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
|
12
|
+
attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.getPostIDFromPostURLString(postURLString)
|
@@ -38,12 +38,23 @@ class Post
|
|
38
38
|
json
|
39
39
|
end
|
40
40
|
|
41
|
-
def self.
|
42
|
-
|
43
|
-
|
44
|
-
|
41
|
+
def self.fetchPostParagraphs(postID)
|
42
|
+
query = [
|
43
|
+
{
|
44
|
+
"operationName": "PostViewerEdgeContentQuery",
|
45
|
+
"variables": {
|
46
|
+
"postId": postID
|
47
|
+
},
|
48
|
+
"query": "query PostViewerEdgeContentQuery($postId: ID!, $postMeteringOptions: PostMeteringOptions) {\n post(id: $postId) {\n ... on Post {\n id\n viewerEdge {\n id\n fullContent(postMeteringOptions: $postMeteringOptions) {\n isLockedPreviewOnly\n validatedShareKey\n bodyModel {\n ...PostBody_bodyModel\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment PostBody_bodyModel on RichText {\n sections {\n name\n startIndex\n textLayout\n imageLayout\n backgroundImage {\n id\n originalHeight\n originalWidth\n __typename\n }\n videoLayout\n backgroundVideo {\n videoId\n originalHeight\n originalWidth\n previewImageId\n __typename\n }\n __typename\n }\n paragraphs {\n id\n ...PostBodySection_paragraph\n __typename\n }\n ...normalizedBodyModel_richText\n __typename\n}\n\nfragment PostBodySection_paragraph on Paragraph {\n name\n ...PostBodyParagraph_paragraph\n __typename\n id\n}\n\nfragment PostBodyParagraph_paragraph on Paragraph {\n name\n type\n ...ImageParagraph_paragraph\n ...TextParagraph_paragraph\n ...IframeParagraph_paragraph\n ...MixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment ImageParagraph_paragraph on Paragraph {\n href\n layout\n metadata {\n id\n originalHeight\n originalWidth\n focusPercentX\n focusPercentY\n alt\n __typename\n }\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n ...PostAnnotationsMarker_paragraph\n __typename\n id\n}\n\nfragment Markups_paragraph on Paragraph {\n name\n text\n hasDropCap\n dropCapImage {\n ...MarkupNode_data_dropCapImage\n __typename\n id\n }\n markups {\n type\n start\n end\n href\n anchorType\n userId\n linkMetadata {\n httpStatus\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MarkupNode_data_dropCapImage on ImageMetadata {\n ...DropCap_image\n __typename\n id\n}\n\nfragment DropCap_image on ImageMetadata {\n id\n originalHeight\n originalWidth\n __typename\n}\n\nfragment ParagraphRefsMapContext_paragraph on Paragraph {\n id\n name\n text\n __typename\n}\n\nfragment PostAnnotationsMarker_paragraph on Paragraph {\n ...PostViewNoteCard_paragraph\n __typename\n id\n}\n\nfragment PostViewNoteCard_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment TextParagraph_paragraph on Paragraph {\n type\n hasDropCap\n ...Markups_paragraph\n ...ParagraphRefsMapContext_paragraph\n __typename\n id\n}\n\nfragment IframeParagraph_paragraph on Paragraph {\n iframe {\n mediaResource {\n id\n iframeSrc\n iframeHeight\n iframeWidth\n title\n __typename\n }\n __typename\n }\n layout\n ...getEmbedlyCardUrlParams_paragraph\n ...Markups_paragraph\n __typename\n id\n}\n\nfragment getEmbedlyCardUrlParams_paragraph on Paragraph {\n type\n iframe {\n mediaResource {\n iframeSrc\n __typename\n }\n __typename\n }\n __typename\n id\n}\n\nfragment MixtapeParagraph_paragraph on Paragraph {\n type\n mixtapeMetadata {\n href\n mediaResource {\n mediumCatalog {\n id\n __typename\n }\n __typename\n }\n __typename\n }\n ...GenericMixtapeParagraph_paragraph\n __typename\n id\n}\n\nfragment GenericMixtapeParagraph_paragraph on Paragraph {\n text\n mixtapeMetadata {\n href\n thumbnailImageId\n __typename\n }\n markups {\n start\n end\n type\n href\n __typename\n }\n __typename\n id\n}\n\nfragment normalizedBodyModel_richText on RichText {\n paragraphs {\n markups {\n type\n __typename\n }\n ...getParagraphHighlights_paragraph\n ...getParagraphPrivateNotes_paragraph\n __typename\n }\n sections {\n startIndex\n ...getSectionEndIndex_section\n __typename\n }\n ...getParagraphStyles_richText\n ...getParagraphSpaces_richText\n __typename\n}\n\nfragment getParagraphHighlights_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getParagraphPrivateNotes_paragraph on Paragraph {\n name\n __typename\n id\n}\n\nfragment getSectionEndIndex_section on Section {\n startIndex\n __typename\n}\n\nfragment getParagraphStyles_richText on RichText {\n paragraphs {\n text\n type\n __typename\n }\n sections {\n ...getSectionEndIndex_section\n __typename\n }\n __typename\n}\n\nfragment getParagraphSpaces_richText on RichText {\n paragraphs {\n layout\n metadata {\n originalHeight\n originalWidth\n __typename\n }\n type\n ...paragraphExtendsImageGrid_paragraph\n __typename\n }\n ...getSeriesParagraphTopSpacings_richText\n ...getPostParagraphTopSpacings_richText\n __typename\n}\n\nfragment paragraphExtendsImageGrid_paragraph on Paragraph {\n layout\n type\n __typename\n id\n}\n\nfragment getSeriesParagraphTopSpacings_richText on RichText {\n paragraphs {\n id\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n\nfragment getPostParagraphTopSpacings_richText on RichText {\n paragraphs {\n layout\n text\n __typename\n }\n sections {\n startIndex\n __typename\n }\n __typename\n}\n"
|
49
|
+
}
|
50
|
+
]
|
51
|
+
|
52
|
+
body = Request.body(Request.URL("https://medium.com/_/graphql", "POST", query))
|
53
|
+
if !body.nil?
|
54
|
+
json = JSON.parse(body)
|
55
|
+
json&.dig(0, "data", "post", "viewerEdge", "fullContent", "bodyModel", "paragraphs")
|
45
56
|
else
|
46
|
-
|
57
|
+
nil
|
47
58
|
end
|
48
59
|
end
|
49
60
|
|
@@ -57,6 +68,13 @@ class Post
|
|
57
68
|
postInfo.creator = content&.dig(creatorRef, "name")
|
58
69
|
end
|
59
70
|
|
71
|
+
colletionRef = content&.dig("Post:#{postID}", "collection", "__ref")
|
72
|
+
if !colletionRef.nil?
|
73
|
+
postInfo.collectionName = content&.dig(colletionRef, "name")
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
|
60
78
|
firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
|
61
79
|
if !firstPublishedAt.nil?
|
62
80
|
postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
|
data/lib/ZMediumFetcher.rb
CHANGED
@@ -26,6 +26,7 @@ require "PathPolicy"
|
|
26
26
|
require "Request"
|
27
27
|
require "Post"
|
28
28
|
require "User"
|
29
|
+
require 'date'
|
29
30
|
|
30
31
|
class ZMediumFetcher
|
31
32
|
|
@@ -127,7 +128,7 @@ class ZMediumFetcher
|
|
127
128
|
|
128
129
|
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
129
130
|
|
130
|
-
sourceParagraphs = Post.
|
131
|
+
sourceParagraphs = Post.fetchPostParagraphs(postID)
|
131
132
|
if sourceParagraphs.nil?
|
132
133
|
raise "Error: Paragraph not found! PostURL: #{postURL}"
|
133
134
|
end
|
@@ -140,7 +141,7 @@ class ZMediumFetcher
|
|
140
141
|
previousParagraph = nil
|
141
142
|
preTypeParagraphs = []
|
142
143
|
sourceParagraphs.each do |sourcParagraph|
|
143
|
-
paragraph = Paragraph.new(sourcParagraph, postID
|
144
|
+
paragraph = Paragraph.new(sourcParagraph, postID)
|
144
145
|
if OLIParser.isOLI(paragraph)
|
145
146
|
oliIndex += 1
|
146
147
|
paragraph.oliIndex = oliIndex
|
@@ -148,10 +149,11 @@ class ZMediumFetcher
|
|
148
149
|
oliIndex = 0
|
149
150
|
end
|
150
151
|
|
151
|
-
# if previous is OLI or ULI and current is not OLI or ULI
|
152
|
+
# if previous is OLI or ULI or BQ and current is not OLI or ULI or BQ
|
152
153
|
# than insert a blank paragraph to keep markdown foramt correct
|
153
154
|
if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
|
154
|
-
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
|
155
|
+
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))||
|
156
|
+
(BQParser.isBQ(previousParagraph) && !BQParser.isBQ(paragraph))
|
155
157
|
paragraphs.append(Paragraph.makeBlankParagraph(postID))
|
156
158
|
end
|
157
159
|
|
@@ -178,7 +180,7 @@ class ZMediumFetcher
|
|
178
180
|
groupByText += "\n"
|
179
181
|
end
|
180
182
|
|
181
|
-
markupParser = MarkupParser.new(
|
183
|
+
markupParser = MarkupParser.new(preTypeParagraph)
|
182
184
|
groupByText += markupParser.parse()
|
183
185
|
end
|
184
186
|
|
@@ -203,7 +205,7 @@ class ZMediumFetcher
|
|
203
205
|
|
204
206
|
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
|
205
207
|
|
206
|
-
imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "
|
208
|
+
imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
|
207
209
|
startParser = buildParser(imagePathPolicy)
|
208
210
|
|
209
211
|
progress.totalPostParagraphsLength = paragraphs.length
|
@@ -211,7 +213,9 @@ class ZMediumFetcher
|
|
211
213
|
progress.message = "Converting Post..."
|
212
214
|
progress.printLog()
|
213
215
|
|
214
|
-
|
216
|
+
postWithDatePath = "#{postInfo.firstPublishedAt.strftime("%Y-%m-%d")}-#{postPath}"
|
217
|
+
|
218
|
+
absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
|
215
219
|
|
216
220
|
# if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
|
217
221
|
if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
|
@@ -227,8 +231,11 @@ class ZMediumFetcher
|
|
227
231
|
|
228
232
|
index = 0
|
229
233
|
paragraphs.each do |paragraph|
|
230
|
-
|
231
|
-
|
234
|
+
if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
|
235
|
+
markupParser = MarkupParser.new(paragraph)
|
236
|
+
paragraph.text = markupParser.parse()
|
237
|
+
end
|
238
|
+
|
232
239
|
result = startParser.parse(paragraph)
|
233
240
|
|
234
241
|
if !linkParser.nil?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 1.13.1
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: reverse_markdown
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 2.1.1
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 2.1.1
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: net-http
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -91,6 +77,7 @@ files:
|
|
91
77
|
- lib/Parsers/LinkParser.rb
|
92
78
|
- lib/Parsers/MIXTAPEEMBEDParser.rb
|
93
79
|
- lib/Parsers/MarkupParser.rb
|
80
|
+
- lib/Parsers/MarkupStyleRender.rb
|
94
81
|
- lib/Parsers/OLIParser.rb
|
95
82
|
- lib/Parsers/PParser.rb
|
96
83
|
- lib/Parsers/PQParser.rb
|