ZMediumToMarkdown 2.0.4 → 2.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/Helper.rb +13 -4
- data/lib/Models/Paragraph.rb +17 -8
- data/lib/Parsers/BQParser.rb +1 -1
- data/lib/Parsers/IframeParser.rb +14 -8
- data/lib/Parsers/MIXTAPEEMBEDParser.rb +12 -3
- data/lib/Parsers/MarkupParser.rb +2 -2
- data/lib/Parsers/MarkupStyleRender.rb +39 -15
- data/lib/Parsers/PQParser.rb +1 -1
- data/lib/ZMediumFetcher.rb +7 -10
- metadata +2 -3
- data/lib/Parsers/LinkParser.rb +0 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 411d9f653e728ad8708bfdd0738e181de68085865a3860e5d5272125236e856c
|
4
|
+
data.tar.gz: 94c713a02a605b480bf63e1022a4ee53dcfca3fe4355daf401b2253354f6f194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b90384c75f6de2b1fcaaac3fcc1f3885ec6bf223698ce677c7cd621eec479eabead53794eb66d7ce2367867ef187468c733473e1e42a711eadf360a249a5803
|
7
|
+
data.tar.gz: 89c9ec47b56b047042a060716f923ad1924722d17c842ccce954252976d8112cac0c3d610f7626caadd1a4eaada5031dea17ca26dea4349e2b8e9e2ca1431b1c
|
data/lib/Helper.rb
CHANGED
@@ -19,6 +19,10 @@ class Helper
|
|
19
19
|
content
|
20
20
|
end
|
21
21
|
|
22
|
+
def self.escapeMarkdown(text)
|
23
|
+
text.gsub(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/){ |x| "\\#{x}" }
|
24
|
+
end
|
25
|
+
|
22
26
|
def self.escapeHTML(text)
|
23
27
|
if text == "<"
|
24
28
|
"<"
|
@@ -101,8 +105,8 @@ class Helper
|
|
101
105
|
result = "---\n"
|
102
106
|
result += "title: #{title}\n"
|
103
107
|
result += "author: #{postInfo.creator}\n"
|
104
|
-
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%
|
105
|
-
result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%
|
108
|
+
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%L%z')}\n"
|
109
|
+
result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%L%z')}\n"
|
106
110
|
result += "categories: #{postInfo.collectionName}\n"
|
107
111
|
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
108
112
|
result += "description: #{postInfo.description}\n"
|
@@ -185,9 +189,14 @@ class Helper
|
|
185
189
|
end
|
186
190
|
|
187
191
|
|
188
|
-
def self.createWatermark(postURL)
|
192
|
+
def self.createWatermark(postURL, isForJekyll)
|
193
|
+
jekyllOpen = ""
|
194
|
+
if isForJekyll
|
195
|
+
jekyllOpen = "{:target=\"_blank\"}"
|
196
|
+
end
|
197
|
+
|
189
198
|
text = "\r\n\r\n\r\n"
|
190
|
-
text += "_Converted [Medium Post](#{postURL}) by [ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)._"
|
199
|
+
text += "_Converted [Medium Post](#{postURL})#{jekyllOpen} by [ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)#{jekyllOpen}._"
|
191
200
|
text += "\r\n"
|
192
201
|
|
193
202
|
text
|
data/lib/Models/Paragraph.rb
CHANGED
@@ -96,14 +96,23 @@ class Paragraph
|
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
99
|
+
index = 0
|
100
|
+
orgText.each_char do |char|
|
101
|
+
|
102
|
+
if char.chars.join() =~ /(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/
|
103
|
+
escapeMarkup = {
|
104
|
+
"type" => 'ESCAPE',
|
105
|
+
"start" => index,
|
106
|
+
"end" => index + 1
|
107
|
+
}
|
108
|
+
markups.append(Markup.new(escapeMarkup))
|
109
|
+
end
|
110
|
+
|
111
|
+
index += 1
|
112
|
+
if char.bytes.length >= 4
|
113
|
+
# some emoji need more space (in Medium)
|
114
|
+
index += 1
|
115
|
+
end
|
107
116
|
end
|
108
117
|
|
109
118
|
@markups = markups
|
data/lib/Parsers/BQParser.rb
CHANGED
data/lib/Parsers/IframeParser.rb
CHANGED
@@ -19,6 +19,12 @@ class IframeParser < Parser
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def parse(paragraph)
|
22
|
+
|
23
|
+
jekyllOpen = ""
|
24
|
+
if isForJekyll
|
25
|
+
jekyllOpen = "{:target=\"_blank\"}"
|
26
|
+
end
|
27
|
+
|
22
28
|
if paragraph.type == 'IFRAME'
|
23
29
|
|
24
30
|
if !paragraph.iframe.src.nil? && paragraph.iframe.src != ""
|
@@ -27,7 +33,7 @@ class IframeParser < Parser
|
|
27
33
|
url = "https://medium.com/media/#{paragraph.iframe.id}"
|
28
34
|
end
|
29
35
|
|
30
|
-
result = "[#{paragraph.iframe.title}](#{url})"
|
36
|
+
result = "[#{paragraph.iframe.title}](#{url})#{jekyllOpen}"
|
31
37
|
|
32
38
|
if !url[/(www\.youtube\.com)/].nil?
|
33
39
|
# is youtube
|
@@ -49,12 +55,12 @@ class IframeParser < Parser
|
|
49
55
|
if ImageDownloader.download(absolutePath, imageURL)
|
50
56
|
relativePath = imagePathPolicy.getRelativePath(fileName)
|
51
57
|
if isForJekyll
|
52
|
-
result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
|
58
|
+
result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})#{jekyllOpen}\r\n\r\n"
|
53
59
|
else
|
54
|
-
result = "\r\n\r\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
|
60
|
+
result = "\r\n\r\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})#{jekyllOpen}\r\n\r\n"
|
55
61
|
end
|
56
62
|
else
|
57
|
-
result = "\r\n[#{title}](#{params["url"]})\r\n"
|
63
|
+
result = "\r\n[#{title}](#{params["url"]})#{jekyllOpen}\r\n"
|
58
64
|
end
|
59
65
|
end
|
60
66
|
else
|
@@ -118,9 +124,9 @@ class IframeParser < Parser
|
|
118
124
|
createdAt = Time.parse(twitterObj["created_at"]).strftime('%Y-%m-%d %H:%M:%S')
|
119
125
|
result = "\n\n"
|
120
126
|
result += "■■■■■■■■■■■■■■ \n"
|
121
|
-
result += "> **[#{twitterObj["user"]["name"]}](https://twitter.com/#{twitterObj["user"]["screen_name"]}) @ Twitter Says:** \n\n"
|
127
|
+
result += "> **[#{twitterObj["user"]["name"]}](https://twitter.com/#{twitterObj["user"]["screen_name"]})#{jekyllOpen} @ Twitter Says:** \n\n"
|
122
128
|
result += "> > #{fullText} \n\n"
|
123
|
-
result += "> **Tweeted at [#{createdAt}](#{ogURL}).** \n\n"
|
129
|
+
result += "> **Tweeted at [#{createdAt}](#{ogURL})#{jekyllOpen}.** \n\n"
|
124
130
|
result += "■■■■■■■■■■■■■■ \n\n"
|
125
131
|
end
|
126
132
|
else
|
@@ -132,9 +138,9 @@ class IframeParser < Parser
|
|
132
138
|
end
|
133
139
|
|
134
140
|
if !ogImageURL.nil?
|
135
|
-
result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})\r\n\r\n"
|
141
|
+
result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})#{jekyllOpen}\r\n\r\n"
|
136
142
|
else
|
137
|
-
result = "[#{title}](#{ogURL})"
|
143
|
+
result = "[#{title}](#{ogURL})#{jekyllOpen}"
|
138
144
|
end
|
139
145
|
end
|
140
146
|
end
|
@@ -5,15 +5,24 @@ require "Parsers/Parser"
|
|
5
5
|
require 'Models/Paragraph'
|
6
6
|
|
7
7
|
class MIXTAPEEMBEDParser < Parser
|
8
|
-
attr_accessor :nextParser
|
8
|
+
attr_accessor :nextParser, :isForJekyll
|
9
|
+
|
10
|
+
def initialize(isForJekyll)
|
11
|
+
@isForJekyll = isForJekyll
|
12
|
+
end
|
13
|
+
|
9
14
|
def parse(paragraph)
|
10
15
|
if paragraph.type == 'MIXTAPE_EMBED'
|
11
16
|
if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
|
12
17
|
ogImageURL = Helper.fetchOGImage(paragraph.mixtapeMetadata.href)
|
13
18
|
if !ogImageURL.nil?
|
14
|
-
|
19
|
+
jekyllOpen = ""
|
20
|
+
if isForJekyll
|
21
|
+
jekyllOpen = "{:target=\"_blank\"}"
|
22
|
+
end
|
23
|
+
"\r\n\r\n[![](#{ogImageURL})](#{paragraph.mixtapeMetadata.href})#{jekyllOpen}\r\n\r\n"
|
15
24
|
else
|
16
|
-
"\n
|
25
|
+
"\n#{paragraph.text}"
|
17
26
|
end
|
18
27
|
else
|
19
28
|
"\n#{paragraph.text}"
|
data/lib/Parsers/MarkupParser.rb
CHANGED
@@ -7,7 +7,7 @@ require 'securerandom'
|
|
7
7
|
require 'User'
|
8
8
|
|
9
9
|
class MarkupParser
|
10
|
-
attr_accessor :body, :paragraph, :isForJekyll
|
10
|
+
attr_accessor :body, :paragraph, :isForJekyll, :usersPostURLs
|
11
11
|
|
12
12
|
def initialize(paragraph, isForJekyll)
|
13
13
|
@paragraph = paragraph
|
@@ -18,7 +18,7 @@ class MarkupParser
|
|
18
18
|
result = paragraph.text
|
19
19
|
if !paragraph.markups.nil? && paragraph.markups.length > 0
|
20
20
|
markupRender = MarkupStyleRender.new(paragraph, isForJekyll)
|
21
|
-
|
21
|
+
markupRender.usersPostURLs = usersPostURLs
|
22
22
|
begin
|
23
23
|
result = markupRender.parse()
|
24
24
|
rescue => e
|
@@ -5,7 +5,7 @@ require 'Models/Paragraph'
|
|
5
5
|
require 'Helper'
|
6
6
|
|
7
7
|
class MarkupStyleRender
|
8
|
-
attr_accessor :paragraph, :chars, :encodeType, :isForJekyll
|
8
|
+
attr_accessor :paragraph, :chars, :encodeType, :isForJekyll, :usersPostURLs
|
9
9
|
|
10
10
|
class TextChar
|
11
11
|
attr_accessor :chars, :type
|
@@ -188,7 +188,27 @@ class MarkupStyleRender
|
|
188
188
|
url = "https://medium.com/u/#{markup.userId}"
|
189
189
|
end
|
190
190
|
|
191
|
-
|
191
|
+
lastPath = url.split("/").last
|
192
|
+
lastQuery = nil
|
193
|
+
if !lastPath.nil?
|
194
|
+
lastQuery = lastPath.split("-").last
|
195
|
+
end
|
196
|
+
|
197
|
+
if !usersPostURLs.nil? && !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == lastQuery }.nil?
|
198
|
+
if isForJekyll
|
199
|
+
url = "(../#{lastQuery}/)"
|
200
|
+
else
|
201
|
+
url = "(#{lastPath})"
|
202
|
+
end
|
203
|
+
else
|
204
|
+
if isForJekyll
|
205
|
+
url = "(#{url}){:target=\"_blank\"}"
|
206
|
+
else
|
207
|
+
url = "(#{url})"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
tag = TagChar.new(1, markup.start, markup.end, "[", "]#{url}")
|
192
212
|
else
|
193
213
|
Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
|
194
214
|
end
|
@@ -205,6 +225,15 @@ class MarkupStyleRender
|
|
205
225
|
|
206
226
|
chars.each do |index, char|
|
207
227
|
|
228
|
+
# is in code block
|
229
|
+
if !stack.last.nil? && stack.last.endChars.chars.join() == "`"
|
230
|
+
containEndTag = tags.select { |tag| tag.endIndex == index && tag.endChars.chars.join() == "`" }.length > 0
|
231
|
+
if !containEndTag
|
232
|
+
response.append(char)
|
233
|
+
next
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
208
237
|
if char.chars.join() == "\n"
|
209
238
|
brStack = stack.dup
|
210
239
|
while brStack.length > 0
|
@@ -228,21 +257,16 @@ class MarkupStyleRender
|
|
228
257
|
end
|
229
258
|
|
230
259
|
if char.chars.join() != "\n"
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
else
|
235
|
-
resultChar = Helper.escapeMarkdown(char.chars.join())
|
236
|
-
if isForJekyll
|
237
|
-
resultChar = Helper.escapeHTML(resultChar)
|
238
|
-
end
|
239
|
-
|
240
|
-
response.append(TextChar.new(resultChar.chars, "Text"))
|
260
|
+
resultChar = char.chars.join()
|
261
|
+
if isForJekyll
|
262
|
+
resultChar = Helper.escapeHTML(resultChar)
|
241
263
|
end
|
264
|
+
|
265
|
+
response.append(TextChar.new(resultChar.chars, "Text"))
|
242
266
|
end
|
243
267
|
|
244
268
|
endTags = tags.select { |tag| tag.endIndex == index }
|
245
|
-
if
|
269
|
+
if endTags.length > 0
|
246
270
|
mismatchTags = []
|
247
271
|
while endTags.length > 0
|
248
272
|
stackTag = stack.pop
|
@@ -268,14 +292,14 @@ class MarkupStyleRender
|
|
268
292
|
tag = stack.pop
|
269
293
|
response.push(tag.endChars)
|
270
294
|
end
|
271
|
-
|
295
|
+
|
272
296
|
response = optimize(response)
|
273
297
|
result = response.map{ |response| response.chars }.join()
|
274
298
|
|
275
299
|
else
|
276
300
|
response = []
|
277
301
|
chars.each do |index, char|
|
278
|
-
resultChar =
|
302
|
+
resultChar = char
|
279
303
|
if isForJekyll
|
280
304
|
resultChar = escapeHTML(char)
|
281
305
|
end
|
data/lib/Parsers/PQParser.rb
CHANGED
data/lib/ZMediumFetcher.rb
CHANGED
@@ -19,7 +19,6 @@ require "Parsers/MarkupParser"
|
|
19
19
|
require "Parsers/OLIParser"
|
20
20
|
require "Parsers/MIXTAPEEMBEDParser"
|
21
21
|
require "Parsers/PQParser"
|
22
|
-
require "Parsers/LinkParser"
|
23
22
|
require "Parsers/CodeBlockParser"
|
24
23
|
|
25
24
|
require "PathPolicy"
|
@@ -30,7 +29,7 @@ require 'date'
|
|
30
29
|
|
31
30
|
class ZMediumFetcher
|
32
31
|
|
33
|
-
attr_accessor :progress, :
|
32
|
+
attr_accessor :progress, :usersPostURLs, :isForJekyll
|
34
33
|
|
35
34
|
class Progress
|
36
35
|
attr_accessor :username, :postPath, :currentPostIndex, :totalPostsLength, :currentPostParagraphIndex, :totalPostParagraphsLength, :message
|
@@ -71,7 +70,7 @@ class ZMediumFetcher
|
|
71
70
|
|
72
71
|
def initialize
|
73
72
|
@progress = Progress.new()
|
74
|
-
@
|
73
|
+
@usersPostURLs = nil
|
75
74
|
@isForJekyll = false
|
76
75
|
end
|
77
76
|
|
@@ -89,7 +88,7 @@ class ZMediumFetcher
|
|
89
88
|
ppParser.setNext(uliParser)
|
90
89
|
oliParser = OLIParser.new()
|
91
90
|
uliParser.setNext(oliParser)
|
92
|
-
mixtapeembedParser = MIXTAPEEMBEDParser.new()
|
91
|
+
mixtapeembedParser = MIXTAPEEMBEDParser.new(isForJekyll)
|
93
92
|
oliParser.setNext(mixtapeembedParser)
|
94
93
|
pqParser = PQParser.new()
|
95
94
|
mixtapeembedParser.setNext(pqParser)
|
@@ -223,8 +222,6 @@ class ZMediumFetcher
|
|
223
222
|
progress.message = "Converting Post..."
|
224
223
|
progress.printLog()
|
225
224
|
|
226
|
-
linkParser.isForJekyll = isForJekyll
|
227
|
-
|
228
225
|
postWithDatePath = "#{postInfo.firstPublishedAt.strftime("%Y-%m-%d")}-#{postPath}"
|
229
226
|
|
230
227
|
absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
|
@@ -260,12 +257,12 @@ class ZMediumFetcher
|
|
260
257
|
|
261
258
|
if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
|
262
259
|
markupParser = MarkupParser.new(paragraph, isForJekyll)
|
260
|
+
markupParser.usersPostURLs = usersPostURLs
|
263
261
|
paragraph.text = markupParser.parse()
|
264
262
|
end
|
265
263
|
|
266
264
|
result = startParser.parse(paragraph)
|
267
|
-
|
268
|
-
|
265
|
+
|
269
266
|
file.puts(result)
|
270
267
|
|
271
268
|
index += 1
|
@@ -274,7 +271,7 @@ class ZMediumFetcher
|
|
274
271
|
progress.printLog()
|
275
272
|
end
|
276
273
|
|
277
|
-
postWatermark = Helper.createWatermark(postURL)
|
274
|
+
postWatermark = Helper.createWatermark(postURL, isForJekyll)
|
278
275
|
if !postWatermark.nil?
|
279
276
|
file.puts(postWatermark)
|
280
277
|
end
|
@@ -311,7 +308,7 @@ class ZMediumFetcher
|
|
311
308
|
nextID = postPageInfo["nextID"]
|
312
309
|
end while !nextID.nil?
|
313
310
|
|
314
|
-
|
311
|
+
@usersPostURLs = postURLS
|
315
312
|
|
316
313
|
progress.totalPostsLength = postURLS.length
|
317
314
|
progress.currentPostIndex = 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -74,7 +74,6 @@ files:
|
|
74
74
|
- lib/Parsers/H4Parser.rb
|
75
75
|
- lib/Parsers/IMGParser.rb
|
76
76
|
- lib/Parsers/IframeParser.rb
|
77
|
-
- lib/Parsers/LinkParser.rb
|
78
77
|
- lib/Parsers/MIXTAPEEMBEDParser.rb
|
79
78
|
- lib/Parsers/MarkupParser.rb
|
80
79
|
- lib/Parsers/MarkupStyleRender.rb
|
data/lib/Parsers/LinkParser.rb
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
|
-
|
3
|
-
require 'Models/Paragraph'
|
4
|
-
|
5
|
-
class LinkParser
|
6
|
-
attr_accessor :usersPostURLs, :isForJekyll
|
7
|
-
|
8
|
-
def initialize()
|
9
|
-
@usersPostURLs = nil
|
10
|
-
@isForJekyll = false
|
11
|
-
end
|
12
|
-
|
13
|
-
def parse(markdownString)
|
14
|
-
matchLinks = markdownString.scan(/\[[^\]]*\]\(([^\)]*)\)/m)
|
15
|
-
if !matchLinks.nil?
|
16
|
-
|
17
|
-
matchLinks.each do |matchLink|
|
18
|
-
link = matchLink[0]
|
19
|
-
linkMarkdown = "(#{link})"
|
20
|
-
newLinkMarkdown = linkMarkdown
|
21
|
-
|
22
|
-
if isForJekyll
|
23
|
-
newLinkMarkdown = "(#{link}){:target=\"_blank\"}"
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
|
-
if !usersPostURLs.nil?
|
28
|
-
# if have provide user's post urls
|
29
|
-
# find & replace medium url to local post url if matched
|
30
|
-
|
31
|
-
if isForJekyll
|
32
|
-
postPath = link.split("/").last.split("-").last
|
33
|
-
else
|
34
|
-
postPath = link.split("/").last
|
35
|
-
end
|
36
|
-
|
37
|
-
if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
|
38
|
-
if isForJekyll
|
39
|
-
newLinkMarkdown = "(../#{postPath}/)"
|
40
|
-
else
|
41
|
-
newLinkMarkdown = "(#{postPath})"
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
if linkMarkdown != newLinkMarkdown
|
47
|
-
markdownString = markdownString.sub! linkMarkdown, newLinkMarkdown
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
markdownString
|
53
|
-
end
|
54
|
-
end
|