ZMediumToMarkdown 2.0.0 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ZMediumToMarkdown +4 -4
- data/lib/Helper.rb +19 -25
- data/lib/Models/Paragraph.rb +15 -9
- data/lib/Parsers/IMGParser.rb +5 -8
- data/lib/Parsers/IframeParser.rb +2 -2
- data/lib/Parsers/LinkParser.rb +25 -27
- data/lib/Parsers/MIXTAPEEMBEDParser.rb +2 -2
- data/lib/Parsers/MarkupStyleRender.rb +6 -0
- data/lib/PathPolicy.rb +19 -8
- data/lib/Post.rb +3 -2
- data/lib/ZMediumFetcher.rb +19 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c6540c31739d0b7673b180fa73887641933b08f431346a7aa77e89c5188acb6
|
4
|
+
data.tar.gz: 9cd571bc32f08011d136d6814fadf8afb3ab3bc1251ea1476bc0144b30fc4461
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 57162ffcec3607c5fdb654b984593eef97a6badbd1e3ebb8e11bb8be2ea1b7301f17aa51d93fa6d864a2db58bfbb45ceda6b3cade25a2ca633a5eb90d08c5240
|
7
|
+
data.tar.gz: 862072b9c0d384bf1a45f02590b5a0f9286d96ad480b8874670f067e56fc5c66b9c2c33e62f24d799871459504cb807e705d7876576c56f5e9317e80caa0a145
|
data/bin/ZMediumToMarkdown
CHANGED
@@ -19,21 +19,21 @@ class Main
|
|
19
19
|
opts.banner = "Usage: ZMediumFetcher [options]"
|
20
20
|
|
21
21
|
opts.on('-uUSERNAME', '--username=USERNAME', 'Downloading all posts from user') do |username|
|
22
|
-
outputFilePath = PathPolicy.new(filePath, "Output")
|
22
|
+
outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
|
23
23
|
fetcher.downloadPostsByUsername(username, outputFilePath)
|
24
24
|
|
25
25
|
Helper.printNewVersionMessageIfExists()
|
26
26
|
end
|
27
27
|
|
28
28
|
opts.on('-pPOST_URL', '--postURL=POST_URL', 'Downloading single post') do |postURL|
|
29
|
-
outputFilePath = PathPolicy.new(filePath, "Output")
|
29
|
+
outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
|
30
30
|
fetcher.downloadPost(postURL, outputFilePath)
|
31
31
|
|
32
32
|
Helper.printNewVersionMessageIfExists()
|
33
33
|
end
|
34
34
|
|
35
35
|
opts.on('-jUSERNAME', '--jekyllUsername=USERNAME', 'Downloading all posts from user with Jekyll friendly') do |username|
|
36
|
-
outputFilePath = PathPolicy.new(filePath, "
|
36
|
+
outputFilePath = PathPolicy.new(filePath, "")
|
37
37
|
fetcher.isForJekyll = true
|
38
38
|
fetcher.downloadPostsByUsername(username, outputFilePath)
|
39
39
|
|
@@ -41,7 +41,7 @@ class Main
|
|
41
41
|
end
|
42
42
|
|
43
43
|
opts.on('-kPOST_URL', '--jekyllPostURL=POST_URL', 'Downloading single post with Jekyll friendly') do |postURL|
|
44
|
-
outputFilePath = PathPolicy.new(filePath, "
|
44
|
+
outputFilePath = PathPolicy.new(filePath, "")
|
45
45
|
fetcher.isForJekyll = true
|
46
46
|
fetcher.downloadPost(postURL, outputFilePath)
|
47
47
|
|
data/lib/Helper.rb
CHANGED
@@ -12,10 +12,6 @@ require 'nokogiri'
|
|
12
12
|
|
13
13
|
class Helper
|
14
14
|
|
15
|
-
def self.escapeMarkdown(text)
|
16
|
-
text.gsub(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/){ |x| "\\#{x}" }
|
17
|
-
end
|
18
|
-
|
19
15
|
def self.fetchOGImage(url)
|
20
16
|
html = Request.html(Request.URL(url))
|
21
17
|
content = html.search("meta[property='og:image']").attribute('content')
|
@@ -99,30 +95,28 @@ class Helper
|
|
99
95
|
end
|
100
96
|
|
101
97
|
def self.createPostInfo(postInfo, isForJekyll)
|
98
|
+
title = postInfo.title.gsub("[","")
|
99
|
+
title = title.gsub("]","")
|
100
|
+
|
101
|
+
result = "---\n"
|
102
|
+
result += "title: #{title}\n"
|
103
|
+
result += "author: #{postInfo.creator}\n"
|
104
|
+
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
105
|
+
result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
106
|
+
result += "categories: #{postInfo.collectionName}\n"
|
107
|
+
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
108
|
+
result += "description: #{postInfo.description}\n"
|
109
|
+
if !postInfo.previewImage.nil?
|
110
|
+
result += "image:\r\n"
|
111
|
+
result += " path: #{postInfo.previewImage}\r\n"
|
112
|
+
end
|
102
113
|
if isForJekyll
|
103
|
-
title = postInfo.title.gsub("[","")
|
104
|
-
title = title.gsub("]","")
|
105
|
-
|
106
|
-
result = "---\n"
|
107
|
-
result += "title: #{title}\n"
|
108
|
-
result += "author: #{postInfo.creator}\n"
|
109
|
-
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
110
|
-
result += "categories: #{postInfo.collectionName}\n"
|
111
|
-
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
112
|
-
result += "description: #{postInfo.description}\n"
|
113
|
-
if !postInfo.previewImage.nil?
|
114
|
-
result += "image:\r\n"
|
115
|
-
result += " path: #{postInfo.previewImage}\r\n"
|
116
|
-
end
|
117
114
|
result += "render_with_liquid: false\n"
|
118
|
-
|
119
|
-
result += "---\n"
|
120
|
-
result += "\r\n"
|
121
|
-
|
122
|
-
result
|
123
|
-
else
|
124
|
-
nil
|
125
115
|
end
|
116
|
+
result += "---\n"
|
117
|
+
result += "\r\n"
|
118
|
+
|
119
|
+
result
|
126
120
|
end
|
127
121
|
|
128
122
|
def self.printNewVersionMessageIfExists()
|
data/lib/Models/Paragraph.rb
CHANGED
@@ -5,7 +5,7 @@ require 'Parsers/PParser'
|
|
5
5
|
require 'securerandom'
|
6
6
|
|
7
7
|
class Paragraph
|
8
|
-
attr_accessor :postID, :name, :orgText, :
|
8
|
+
attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
|
9
9
|
|
10
10
|
class Iframe
|
11
11
|
attr_accessor :id, :title, :type, :src
|
@@ -66,9 +66,6 @@ class Paragraph
|
|
66
66
|
@href = json['href']
|
67
67
|
@postID = postID
|
68
68
|
|
69
|
-
orgTextWithEscape = Helper.escapeMarkdown(json['text'])
|
70
|
-
@orgTextWithEscape = orgTextWithEscape
|
71
|
-
|
72
69
|
if json['metadata'].nil?
|
73
70
|
@metadata = nil
|
74
71
|
else
|
@@ -87,19 +84,28 @@ class Paragraph
|
|
87
84
|
@iframe = Iframe.new(json['iframe']['mediaResource'])
|
88
85
|
end
|
89
86
|
|
87
|
+
markups = []
|
90
88
|
if !json['markups'].nil? && json['markups'].length > 0
|
91
|
-
markups = []
|
92
89
|
json['markups'].each do |markup|
|
93
90
|
markups.append(Markup.new(markup))
|
94
91
|
end
|
95
|
-
|
96
|
-
|
92
|
+
|
97
93
|
links = json['markups'].select{ |markup| markup["type"] == "A" }
|
98
94
|
if !links.nil? && links.length > 0
|
99
95
|
@markupLinks = links.map{ |link| link["href"] }
|
100
96
|
end
|
101
|
-
else
|
102
|
-
@markups = nil
|
103
97
|
end
|
98
|
+
|
99
|
+
i = 0
|
100
|
+
while i = orgText.index(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/, i + 1)
|
101
|
+
escapeMarkup = {
|
102
|
+
"type" => 'ESCAPE',
|
103
|
+
"start" => i,
|
104
|
+
"end" => i + 1
|
105
|
+
}
|
106
|
+
markups.append(Markup.new(escapeMarkup))
|
107
|
+
end
|
108
|
+
|
109
|
+
@markups = markups
|
104
110
|
end
|
105
111
|
end
|
data/lib/Parsers/IMGParser.rb
CHANGED
@@ -20,24 +20,21 @@ class IMGParser < Parser
|
|
20
20
|
|
21
21
|
imageURL = "https://miro.medium.com/max/1400/#{fileName}"
|
22
22
|
|
23
|
-
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(
|
23
|
+
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
|
24
24
|
absolutePath = imagePathPolicy.getAbsolutePath(fileName)
|
25
25
|
|
26
26
|
result = ""
|
27
27
|
alt = ""
|
28
|
-
if paragraph.orgTextWithEscape != ""
|
29
|
-
alt = " \"#{paragraph.orgTextWithEscape}\""
|
30
|
-
end
|
31
28
|
|
32
29
|
if ImageDownloader.download(absolutePath, imageURL)
|
33
|
-
relativePath =
|
30
|
+
relativePath = imagePathPolicy.getRelativePath(fileName)
|
34
31
|
if isForJekyll
|
35
|
-
result = "\r\n\r\n![#{paragraph.
|
32
|
+
result = "\r\n\r\n![#{paragraph.text}](/#{relativePath}#{alt})\r\n\r\n"
|
36
33
|
else
|
37
|
-
result = "\r\n\r\n![#{paragraph.
|
34
|
+
result = "\r\n\r\n![#{paragraph.text}](#{relativePath}#{alt})\r\n\r\n"
|
38
35
|
end
|
39
36
|
else
|
40
|
-
result = "\r\n\r\n![#{paragraph.
|
37
|
+
result = "\r\n\r\n![#{paragraph.text}](#{imageURL}#{alt})\r\n\r\n"
|
41
38
|
end
|
42
39
|
|
43
40
|
if paragraph.text != ""
|
data/lib/Parsers/IframeParser.rb
CHANGED
@@ -39,7 +39,7 @@ class IframeParser < Parser
|
|
39
39
|
fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
|
40
40
|
|
41
41
|
imageURL = params["image"]
|
42
|
-
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(
|
42
|
+
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
|
43
43
|
absolutePath = imagePathPolicy.getAbsolutePath(fileName)
|
44
44
|
title = paragraph.iframe.title
|
45
45
|
if title.nil? or title == ""
|
@@ -47,7 +47,7 @@ class IframeParser < Parser
|
|
47
47
|
end
|
48
48
|
|
49
49
|
if ImageDownloader.download(absolutePath, imageURL)
|
50
|
-
relativePath =
|
50
|
+
relativePath = imagePathPolicy.getRelativePath(fileName)
|
51
51
|
if isForJekyll
|
52
52
|
result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
|
53
53
|
else
|
data/lib/Parsers/LinkParser.rb
CHANGED
@@ -10,43 +10,41 @@ class LinkParser
|
|
10
10
|
@isForJekyll = false
|
11
11
|
end
|
12
12
|
|
13
|
-
def parse(markdownString
|
14
|
-
|
15
|
-
|
16
|
-
if !matchLinks.nil?
|
13
|
+
def parse(markdownString)
|
14
|
+
matchLinks = markdownString.scan(/\[[^\]]*\]\(([^\)]*)\)/m)
|
15
|
+
if !matchLinks.nil?
|
17
16
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
matchLinks.each do |matchLink|
|
18
|
+
link = matchLink[0]
|
19
|
+
linkMarkdown = "(#{link})"
|
20
|
+
newLinkMarkdown = linkMarkdown
|
21
|
+
|
22
|
+
if isForJekyll
|
23
|
+
newLinkMarkdown = "(#{link}){:target=\"_blank\"}"
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
if !usersPostURLs.nil?
|
28
|
+
# if have provide user's post urls
|
29
|
+
# find & replace medium url to local post url if matched
|
22
30
|
|
23
31
|
if isForJekyll
|
24
|
-
|
32
|
+
postPath = link.split("/").last.split("-").last
|
33
|
+
else
|
34
|
+
postPath = link.split("/").last
|
25
35
|
end
|
26
36
|
|
27
|
-
|
28
|
-
if !usersPostURLs.nil?
|
29
|
-
# if have provide user's post urls
|
30
|
-
# find & replace medium url to local post url if matched
|
31
|
-
|
37
|
+
if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
|
32
38
|
if isForJekyll
|
33
|
-
|
39
|
+
newLinkMarkdown = "(../#{postPath})"
|
34
40
|
else
|
35
|
-
|
36
|
-
end
|
37
|
-
|
38
|
-
if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
|
39
|
-
if isForJekyll
|
40
|
-
newLinkMarkdown = "(../#{postPath})"
|
41
|
-
else
|
42
|
-
newLinkMarkdown = "(#{postPath})"
|
43
|
-
end
|
41
|
+
newLinkMarkdown = "(#{postPath})"
|
44
42
|
end
|
45
43
|
end
|
44
|
+
end
|
46
45
|
|
47
|
-
|
48
|
-
|
49
|
-
end
|
46
|
+
if linkMarkdown != newLinkMarkdown
|
47
|
+
markdownString = markdownString.sub! linkMarkdown, newLinkMarkdown
|
50
48
|
end
|
51
49
|
end
|
52
50
|
end
|
@@ -11,9 +11,9 @@ class MIXTAPEEMBEDParser < Parser
|
|
11
11
|
if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
|
12
12
|
ogImageURL = Helper.fetchOGImage(paragraph.mixtapeMetadata.href)
|
13
13
|
if !ogImageURL.nil?
|
14
|
-
"\r\n\r\n[![#{paragraph.
|
14
|
+
"\r\n\r\n[![#{paragraph.text}](#{ogImageURL} \"#{paragraph.text}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
|
15
15
|
else
|
16
|
-
"\n[#{paragraph.
|
16
|
+
"\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
|
17
17
|
end
|
18
18
|
else
|
19
19
|
"\n#{paragraph.text}"
|
@@ -174,6 +174,12 @@ class MarkupStyleRender
|
|
174
174
|
tag = TagChar.new(3, markup.start, markup.end, "`", "`")
|
175
175
|
elsif markup.type == "STRONG"
|
176
176
|
tag = TagChar.new(2, markup.start, markup.end, "**", "**")
|
177
|
+
elsif markup.type == "ESCAPE"
|
178
|
+
escapeTagChar = TagChar.new(0,markup.start, markup.end,'','')
|
179
|
+
escapeTagChar.startChars = TextChar.new('\\'.chars,'Text')
|
180
|
+
escapeTagChar.endChars = TextChar.new([],'Text')
|
181
|
+
|
182
|
+
tag = escapeTagChar
|
177
183
|
elsif markup.type == "A"
|
178
184
|
url = markup.href
|
179
185
|
if markup.anchorType == "LINK"
|
data/lib/PathPolicy.rb
CHANGED
@@ -8,18 +8,29 @@ class PathPolicy
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def getRelativePath(lastPath)
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
"
|
11
|
+
result = path
|
12
|
+
|
13
|
+
if result != ""
|
14
|
+
result += "/"
|
15
|
+
end
|
16
|
+
|
17
|
+
if !lastPath.nil?
|
18
|
+
result += lastPath
|
15
19
|
end
|
20
|
+
|
21
|
+
result
|
16
22
|
end
|
17
23
|
|
18
24
|
def getAbsolutePath(lastPath)
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
"
|
25
|
+
result = rootPath
|
26
|
+
|
27
|
+
if !lastPath.nil?
|
28
|
+
if result != ""
|
29
|
+
result += "/"
|
30
|
+
end
|
31
|
+
result += "#{lastPath}"
|
23
32
|
end
|
33
|
+
|
34
|
+
result
|
24
35
|
end
|
25
36
|
end
|
data/lib/Post.rb
CHANGED
@@ -70,13 +70,14 @@ class Post
|
|
70
70
|
if !previewImage.nil?
|
71
71
|
previewImageFIleName = content&.dig(previewImage, "id")
|
72
72
|
|
73
|
-
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(
|
73
|
+
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(postID), pathPolicy.getRelativePath(postID))
|
74
|
+
|
74
75
|
absolutePath = imagePathPolicy.getAbsolutePath(previewImageFIleName)
|
75
76
|
|
76
77
|
imageURL = "https://miro.medium.com/max/1400/#{previewImageFIleName}"
|
77
78
|
|
78
79
|
if ImageDownloader.download(absolutePath, imageURL)
|
79
|
-
relativePath =
|
80
|
+
relativePath = imagePathPolicy.getRelativePath(previewImageFIleName)
|
80
81
|
postInfo.previewImage = relativePath
|
81
82
|
end
|
82
83
|
end
|
data/lib/ZMediumFetcher.rb
CHANGED
@@ -122,11 +122,11 @@ class ZMediumFetcher
|
|
122
122
|
end
|
123
123
|
|
124
124
|
if isForJekyll
|
125
|
-
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(
|
126
|
-
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(
|
125
|
+
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("_posts/zmediumtomarkdown"), pathPolicy.getRelativePath("_posts/zmediumtomarkdown"))
|
126
|
+
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("assets"), "assets")
|
127
127
|
else
|
128
|
-
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(
|
129
|
-
imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(
|
128
|
+
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("zmediumtomarkdown"), pathPolicy.getRelativePath("zmediumtomarkdown"))
|
129
|
+
imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath("assets"), "assets")
|
130
130
|
end
|
131
131
|
|
132
132
|
progress.postPath = postPath
|
@@ -229,8 +229,19 @@ class ZMediumFetcher
|
|
229
229
|
|
230
230
|
absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
|
231
231
|
|
232
|
-
|
233
|
-
|
232
|
+
fileLatestPublishedAt = nil
|
233
|
+
|
234
|
+
if File.file?(absolutePath)
|
235
|
+
lines = File.foreach(absolutePath).first(15)
|
236
|
+
if lines.first.start_with?("---")
|
237
|
+
dateLine = lines.select { |line| line.start_with?("last_modified_at:") }.first
|
238
|
+
if !dateLine.nil?
|
239
|
+
fileLatestPublishedAt = Time.parse(dateLine[/^(last_modified_at:)\s+(\S*)/, 2]).to_i
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
if !fileLatestPublishedAt.nil? && fileLatestPublishedAt >= postInfo.latestPublishedAt.to_i
|
234
245
|
# Already downloaded and nothing has changed!, Skip!
|
235
246
|
progress.currentPostParagraphIndex = paragraphs.length
|
236
247
|
progress.message = "Skip, Post already downloaded and nothing has changed!"
|
@@ -253,7 +264,7 @@ class ZMediumFetcher
|
|
253
264
|
end
|
254
265
|
|
255
266
|
result = startParser.parse(paragraph)
|
256
|
-
result = linkParser.parse(result
|
267
|
+
result = linkParser.parse(result)
|
257
268
|
|
258
269
|
file.puts(result)
|
259
270
|
|
@@ -310,7 +321,7 @@ class ZMediumFetcher
|
|
310
321
|
if isForJekyll
|
311
322
|
downloadPathPolicy = pathPolicy
|
312
323
|
else
|
313
|
-
downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(
|
324
|
+
downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("users/#{username}"), pathPolicy.getRelativePath("users/#{username}"))
|
314
325
|
end
|
315
326
|
|
316
327
|
index = 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|