ZMediumToMarkdown 1.6.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ZMediumToMarkdown +18 -1
- data/lib/Helper.rb +3 -13
- data/lib/Parsers/CodeBlockParser.rb +9 -1
- data/lib/Parsers/IMGParser.rb +12 -3
- data/lib/Parsers/IframeParser.rb +12 -3
- data/lib/Parsers/LinkParser.rb +13 -15
- data/lib/Parsers/MarkupStyleRender.rb +1 -3
- data/lib/ZMediumFetcher.rb +30 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff9b27bae7ac1365701ccb1b3174a8b7a2d3b7b2fa12771d10665368636c1733
|
4
|
+
data.tar.gz: 0f68f9aca2a902694898560c82b689bc10b0e45d6c2d181b279002fb263f74d4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a397b8860995b6fe0e7f4aea405c352d5ce05de20de0d9dc041001bc8af3c8509ac01387f5ea7ae5ecedc844ba41e70ade1ac6f6f58d4d9baca02cce0783c28d
|
7
|
+
data.tar.gz: 532bcf53a46474d897bb27dcdfe3153dfd3103cc43de01a9588608776284ddb0f23f89e836797bf453469168c38ed041a1c63f1a607c0b16616bafd002df75f1
|
data/bin/ZMediumToMarkdown
CHANGED
@@ -14,18 +14,35 @@ class Main
|
|
14
14
|
ARGV << '-h' if ARGV.empty?
|
15
15
|
|
16
16
|
filePath = ENV['PWD'] || ::Dir.pwd
|
17
|
-
outputFilePath = PathPolicy.new(filePath, "Output")
|
18
17
|
|
19
18
|
OptionParser.new do |opts|
|
20
19
|
opts.banner = "Usage: ZMediumFetcher [options]"
|
21
20
|
|
22
21
|
opts.on('-uUSERNAME', '--username=USERNAME', 'Downloading all posts from user') do |username|
|
22
|
+
outputFilePath = PathPolicy.new(filePath, "/")
|
23
23
|
fetcher.downloadPostsByUsername(username, outputFilePath)
|
24
24
|
|
25
25
|
Helper.printNewVersionMessageIfExists()
|
26
26
|
end
|
27
27
|
|
28
28
|
opts.on('-pPOST_URL', '--postURL=POST_URL', 'Downloading single post') do |postURL|
|
29
|
+
outputFilePath = PathPolicy.new(filePath, "/")
|
30
|
+
fetcher.downloadPost(postURL, outputFilePath)
|
31
|
+
|
32
|
+
Helper.printNewVersionMessageIfExists()
|
33
|
+
end
|
34
|
+
|
35
|
+
opts.on('-jUSERNAME', '--jekyllUsername=USERNAME', 'Downloading all posts from user with Jekyll friendly') do |username|
|
36
|
+
outputFilePath = PathPolicy.new(filePath, "Output")
|
37
|
+
fetcher.isForJekyll = true
|
38
|
+
fetcher.downloadPostsByUsername(username, outputFilePath)
|
39
|
+
|
40
|
+
Helper.printNewVersionMessageIfExists()
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on('-kpPOST_URL', '--jekyllPostURL=POST_URL', 'Downloading single post with Jekyll friendly') do |postURL|
|
44
|
+
outputFilePath = PathPolicy.new(filePath, "Output")
|
45
|
+
fetcher.isForJekyll = true
|
29
46
|
fetcher.downloadPost(postURL, outputFilePath)
|
30
47
|
|
31
48
|
Helper.printNewVersionMessageIfExists()
|
data/lib/Helper.rb
CHANGED
@@ -159,17 +159,7 @@ class Helper
|
|
159
159
|
end
|
160
160
|
|
161
161
|
|
162
|
-
def self.createWatermark(postURL)
|
163
|
-
text = "\r\n
|
164
|
-
text += "+-----------------------------------------------------------------------------------+"
|
165
|
-
text += "\r\n"
|
166
|
-
text += "\r\n"
|
167
|
-
text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
|
168
|
-
text += "\r\n"
|
169
|
-
text += "\r\n"
|
170
|
-
text += "+-----------------------------------------------------------------------------------+"
|
171
|
-
text += "\r\n"
|
172
|
-
|
173
|
-
text
|
162
|
+
def self.createWatermark(postURL)
|
163
|
+
text = "\r\n[Medium 原文](#{postURL})"
|
174
164
|
end
|
175
|
-
end
|
165
|
+
end
|
@@ -10,8 +10,16 @@ class CodeBlockParser < Parser
|
|
10
10
|
'CODE_BLOCK'
|
11
11
|
end
|
12
12
|
|
13
|
+
def self.isCodeBlock(paragraph)
|
14
|
+
if paragraph.nil?
|
15
|
+
false
|
16
|
+
else
|
17
|
+
paragraph.type == CodeBlockParser.getTypeString()
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
13
21
|
def parse(paragraph)
|
14
|
-
if
|
22
|
+
if CodeBlockParser.isCodeBlock(paragraph)
|
15
23
|
"```\n#{paragraph.text}\n```"
|
16
24
|
else
|
17
25
|
if !nextParser.nil?
|
data/lib/Parsers/IMGParser.rb
CHANGED
@@ -7,7 +7,12 @@ require 'ImageDownloader'
|
|
7
7
|
require 'PathPolicy'
|
8
8
|
|
9
9
|
class IMGParser < Parser
|
10
|
-
attr_accessor :nextParser, :pathPolicy
|
10
|
+
attr_accessor :nextParser, :pathPolicy, :isForJekyll
|
11
|
+
|
12
|
+
def initialize(isForJekyll)
|
13
|
+
@isForJekyll = isForJekyll
|
14
|
+
end
|
15
|
+
|
11
16
|
def parse(paragraph)
|
12
17
|
if paragraph.type == 'IMG'
|
13
18
|
|
@@ -25,9 +30,13 @@ class IMGParser < Parser
|
|
25
30
|
|
26
31
|
if ImageDownloader.download(absolutePath, imageURL)
|
27
32
|
relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
|
28
|
-
|
33
|
+
if isForJekyll
|
34
|
+
"\r\n\r\n"
|
35
|
+
else
|
36
|
+
"\r\n\r\n"
|
37
|
+
end
|
29
38
|
else
|
30
|
-
""
|
39
|
+
"\r\n\r\n"
|
31
40
|
end
|
32
41
|
else
|
33
42
|
if !nextParser.nil?
|
data/lib/Parsers/IframeParser.rb
CHANGED
@@ -11,7 +11,12 @@ require 'ImageDownloader'
|
|
11
11
|
require 'PathPolicy'
|
12
12
|
|
13
13
|
class IframeParser < Parser
|
14
|
-
attr_accessor :nextParser, :pathPolicy
|
14
|
+
attr_accessor :nextParser, :pathPolicy, :isForJekyll
|
15
|
+
|
16
|
+
def initialize(isForJekyll)
|
17
|
+
@isForJekyll = isForJekyll
|
18
|
+
end
|
19
|
+
|
15
20
|
def parse(paragraph)
|
16
21
|
if paragraph.type == 'IFRAME'
|
17
22
|
if !paragraph.iframe.src.nil? && paragraph.iframe.src != ""
|
@@ -35,9 +40,13 @@ class IframeParser < Parser
|
|
35
40
|
title = paragraph.iframe.title
|
36
41
|
if ImageDownloader.download(absolutePath, imageURL)
|
37
42
|
relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
|
38
|
-
|
43
|
+
if isForJekyll
|
44
|
+
result = "\r\n[](#{params["url"]})\r\n"
|
45
|
+
else
|
46
|
+
result = "\r\n[](#{params["url"]})\r\n"
|
47
|
+
end
|
39
48
|
else
|
40
|
-
result = "\n[#{title}](#{params["url"]})"
|
49
|
+
result = "\r\n[#{title}](#{params["url"]})\r\n"
|
41
50
|
end
|
42
51
|
end
|
43
52
|
else
|
data/lib/Parsers/LinkParser.rb
CHANGED
@@ -3,10 +3,11 @@ $lib = File.expand_path('../', File.dirname(__FILE__))
|
|
3
3
|
require 'Models/Paragraph'
|
4
4
|
|
5
5
|
class LinkParser
|
6
|
-
attr_accessor :usersPostURLs
|
6
|
+
attr_accessor :usersPostURLs, :isForJekyll
|
7
7
|
|
8
|
-
def initialize(usersPostURLs)
|
8
|
+
def initialize(usersPostURLs, isForJekyll)
|
9
9
|
@usersPostURLs = usersPostURLs
|
10
|
+
@isForJekyll = isForJekyll
|
10
11
|
end
|
11
12
|
|
12
13
|
def parse(markdownString, markupLinks)
|
@@ -21,20 +22,17 @@ class LinkParser
|
|
21
22
|
# if have provide user's post urls
|
22
23
|
# find & replace medium url to local post url if matched
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
if isForJekyll
|
26
|
+
postPath = link.split("/").last.split("-").last
|
27
|
+
else
|
28
|
+
postPath = link.split("/").last
|
27
29
|
end
|
28
|
-
|
29
|
-
if !(
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
# fullfill url from markup attribute
|
35
|
-
match = markupLinks.find{ |markupLink| markupLink.include? link }
|
36
|
-
if !match.nil?
|
37
|
-
markdownString = markdownString.sub! link, match
|
30
|
+
|
31
|
+
if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
|
32
|
+
if isForJekyll
|
33
|
+
markdownString = markdownString.sub! link, "../#{postPath}"
|
34
|
+
else
|
35
|
+
markdownString = markdownString.sub! link, "#{postPath}"
|
38
36
|
end
|
39
37
|
end
|
40
38
|
end
|
@@ -32,12 +32,10 @@ class MarkupStyleRender
|
|
32
32
|
chars = {}
|
33
33
|
index = 0
|
34
34
|
|
35
|
-
emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
|
36
|
-
excludesEmojis = ["⚠"]
|
37
35
|
paragraph.text.each_char do |char|
|
38
36
|
chars[index] = TextChar.new([char], "Text")
|
39
37
|
index += 1
|
40
|
-
if char
|
38
|
+
if char.bytes.length >= 4
|
41
39
|
# some emoji need more space (in Medium)
|
42
40
|
chars[index] = TextChar.new([], "Text")
|
43
41
|
index += 1
|
data/lib/ZMediumFetcher.rb
CHANGED
@@ -30,7 +30,7 @@ require 'date'
|
|
30
30
|
|
31
31
|
class ZMediumFetcher
|
32
32
|
|
33
|
-
attr_accessor :progress, :linkParser
|
33
|
+
attr_accessor :progress, :linkParser, :isForJekyll
|
34
34
|
|
35
35
|
class Progress
|
36
36
|
attr_accessor :username, :postPath, :currentPostIndex, :totalPostsLength, :currentPostParagraphIndex, :totalPostParagraphsLength, :message
|
@@ -71,7 +71,8 @@ class ZMediumFetcher
|
|
71
71
|
|
72
72
|
def initialize
|
73
73
|
@progress = Progress.new()
|
74
|
-
@linkParser = LinkParser.new(nil)
|
74
|
+
@linkParser = LinkParser.new(nil, false)
|
75
|
+
@isForJekyll = false
|
75
76
|
end
|
76
77
|
|
77
78
|
def buildParser(imagePathPolicy)
|
@@ -92,10 +93,10 @@ class ZMediumFetcher
|
|
92
93
|
oliParser.setNext(mixtapeembedParser)
|
93
94
|
pqParser = PQParser.new()
|
94
95
|
mixtapeembedParser.setNext(pqParser)
|
95
|
-
iframeParser = IframeParser.new()
|
96
|
+
iframeParser = IframeParser.new(isForJekyll)
|
96
97
|
iframeParser.pathPolicy = imagePathPolicy
|
97
98
|
pqParser.setNext(iframeParser)
|
98
|
-
imgParser = IMGParser.new()
|
99
|
+
imgParser = IMGParser.new(isForJekyll)
|
99
100
|
imgParser.pathPolicy = imagePathPolicy
|
100
101
|
iframeParser.setNext(imgParser)
|
101
102
|
bqParser = BQParser.new()
|
@@ -113,7 +114,12 @@ class ZMediumFetcher
|
|
113
114
|
|
114
115
|
def downloadPost(postURL, pathPolicy)
|
115
116
|
postID = Post.getPostIDFromPostURLString(postURL)
|
116
|
-
|
117
|
+
|
118
|
+
if isForJekyll
|
119
|
+
postPath = postID # use only post id is more friendly for url seo
|
120
|
+
else
|
121
|
+
postPath = Post.getPostPathFromPostURLString(postURL)
|
122
|
+
end
|
117
123
|
|
118
124
|
progress.postPath = postPath
|
119
125
|
progress.message = "Downloading Post..."
|
@@ -203,9 +209,14 @@ class ZMediumFetcher
|
|
203
209
|
previousParagraph = paragraph
|
204
210
|
end
|
205
211
|
|
206
|
-
|
207
|
-
|
208
|
-
|
212
|
+
if isForJekyll
|
213
|
+
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts")
|
214
|
+
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
|
215
|
+
else
|
216
|
+
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
|
217
|
+
imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
|
218
|
+
end
|
219
|
+
|
209
220
|
startParser = buildParser(imagePathPolicy)
|
210
221
|
|
211
222
|
progress.totalPostParagraphsLength = paragraphs.length
|
@@ -231,13 +242,21 @@ class ZMediumFetcher
|
|
231
242
|
|
232
243
|
index = 0
|
233
244
|
paragraphs.each do |paragraph|
|
234
|
-
|
235
|
-
paragraph
|
245
|
+
|
246
|
+
if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
|
247
|
+
markupParser = MarkupParser.new(paragraph)
|
248
|
+
paragraph.text = markupParser.parse()
|
249
|
+
end
|
250
|
+
|
236
251
|
result = startParser.parse(paragraph)
|
237
252
|
|
238
253
|
if !linkParser.nil?
|
239
254
|
result = linkParser.parse(result, paragraph.markupLinks)
|
240
255
|
end
|
256
|
+
|
257
|
+
if paragraph.orgText == "延伸閱讀" or result.include? "Like Z Realm" or paragraph.orgText == "有任何問題及指教歡迎與我聯絡。"
|
258
|
+
break
|
259
|
+
end
|
241
260
|
|
242
261
|
file.puts(result)
|
243
262
|
|
@@ -281,7 +300,7 @@ class ZMediumFetcher
|
|
281
300
|
nextID = postPageInfo["nextID"]
|
282
301
|
end while !nextID.nil?
|
283
302
|
|
284
|
-
@linkParser = LinkParser.new(postURLS)
|
303
|
+
@linkParser = LinkParser.new(postURLS, isForJekyll)
|
285
304
|
|
286
305
|
progress.totalPostsLength = postURLS.length
|
287
306
|
progress.currentPostIndex = 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-06-
|
11
|
+
date: 2022-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|