ZMediumToMarkdown 1.9.6 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/Helper.rb +25 -21
- data/lib/Parsers/CodeBlockParser.rb +1 -7
- data/lib/Parsers/IMGParser.rb +1 -1
- data/lib/Parsers/IframeParser.rb +45 -16
- data/lib/Parsers/PREParser.rb +1 -6
- data/lib/Post.rb +21 -3
- data/lib/ZMediumFetcher.rb +20 -14
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 796d995f3d5f3f1edf3de599a28df4a8bea5ab9083d9bf8191d0d0535c924eb3
|
4
|
+
data.tar.gz: 7fa92c888507d4fea9293a3649c1656d3891a04b34d73d035d1a65676a5c8dc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 434c7f737e281189a0feaa821d25e936632be3e72731a495ba62ea945bfef67063518637879b44ecf5f9121ce4ae8b302553bca55959a93e874904cc0fddc808
|
7
|
+
data.tar.gz: 298efd510208b800826cd39be456e47a7b1744291078fd7e61b954a59e9c9257e796c09ed1813b098ad0c8615c17b1b7e19217388dcbcd9f085d9b5151a35663
|
data/lib/Helper.rb
CHANGED
@@ -98,21 +98,31 @@ class Helper
|
|
98
98
|
end
|
99
99
|
end
|
100
100
|
|
101
|
-
def self.createPostInfo(postInfo)
|
102
|
-
|
103
|
-
|
104
|
-
|
101
|
+
def self.createPostInfo(postInfo, isForJekyll)
|
102
|
+
if isForJekyll
|
103
|
+
title = postInfo.title.gsub("[","")
|
104
|
+
title = title.gsub("]","")
|
105
|
+
|
106
|
+
result = "---\n"
|
107
|
+
result += "title: #{title}\n"
|
108
|
+
result += "author: #{postInfo.creator}\n"
|
109
|
+
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
110
|
+
result += "categories: #{postInfo.collectionName}\n"
|
111
|
+
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
112
|
+
result += "description: #{postInfo.description}\n"
|
113
|
+
if !postInfo.previewImage.nil?
|
114
|
+
result += "image:\r\n"
|
115
|
+
result += " path: #{postInfo.previewImage}\r\n"
|
116
|
+
end
|
117
|
+
result += "render_with_liquid: false\n"
|
105
118
|
|
106
|
-
|
107
|
-
|
108
|
-
result += "author: #{postInfo.creator}\n"
|
109
|
-
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
110
|
-
result += "categories: #{postInfo.collectionName}\n"
|
111
|
-
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
112
|
-
result += "---\n"
|
113
|
-
result += "\r\n"
|
119
|
+
result += "---\n"
|
120
|
+
result += "\r\n"
|
114
121
|
|
115
|
-
|
122
|
+
result
|
123
|
+
else
|
124
|
+
nil
|
125
|
+
end
|
116
126
|
end
|
117
127
|
|
118
128
|
def self.printNewVersionMessageIfExists()
|
@@ -183,15 +193,9 @@ class Helper
|
|
183
193
|
|
184
194
|
def self.createWatermark(postURL)
|
185
195
|
text = "\r\n\r\n\r\n"
|
186
|
-
text += "
|
187
|
-
text += "\r\n"
|
196
|
+
text += "_Converted [Medium Post](#{postURL}) by [ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)._"
|
188
197
|
text += "\r\n"
|
189
|
-
|
190
|
-
text += "\r\n"
|
191
|
-
text += "\r\n"
|
192
|
-
text += "+-----------------------------------------------------------------------------------+"
|
193
|
-
text += "\r\n"
|
194
|
-
|
198
|
+
|
195
199
|
text
|
196
200
|
end
|
197
201
|
end
|
@@ -25,15 +25,9 @@ class CodeBlockParser < Parser
|
|
25
25
|
def parse(paragraph)
|
26
26
|
if CodeBlockParser.isCodeBlock(paragraph)
|
27
27
|
result = "```\n"
|
28
|
-
if isForJekyll
|
29
|
-
result += "{% raw %}\n"
|
30
|
-
end
|
31
28
|
|
32
|
-
result += paragraph.text
|
29
|
+
result += paragraph.text.chomp
|
33
30
|
|
34
|
-
if isForJekyll
|
35
|
-
result += "\n{% endraw %}"
|
36
|
-
end
|
37
31
|
result += "\n```"
|
38
32
|
else
|
39
33
|
if !nextParser.nil?
|
data/lib/Parsers/IMGParser.rb
CHANGED
@@ -18,7 +18,7 @@ class IMGParser < Parser
|
|
18
18
|
|
19
19
|
fileName = paragraph.metadata.id #d*fsafwfe.jpg
|
20
20
|
|
21
|
-
imageURL = "https://miro.medium.com/max/1400/#{
|
21
|
+
imageURL = "https://miro.medium.com/max/1400/#{fileName}"
|
22
22
|
|
23
23
|
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
|
24
24
|
absolutePath = imagePathPolicy.getAbsolutePath(fileName)
|
data/lib/Parsers/IframeParser.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
3
|
require 'uri'
|
4
|
+
require 'net/http'
|
4
5
|
|
5
6
|
require "Request"
|
6
7
|
require "Parsers/Parser"
|
@@ -76,15 +77,9 @@ class IframeParser < Parser
|
|
76
77
|
gistRAW = Request.body(Request.URL(a['href']))
|
77
78
|
|
78
79
|
result = "```#{lang}\n"
|
79
|
-
if isForJekyll
|
80
|
-
result += "{% raw %}\n"
|
81
|
-
end
|
82
80
|
|
83
|
-
result += gistRAW
|
81
|
+
result += gistRAW.chomp
|
84
82
|
|
85
|
-
if isForJekyll
|
86
|
-
result += "\n{% endraw %}"
|
87
|
-
end
|
88
83
|
result += "\n```"
|
89
84
|
end
|
90
85
|
end
|
@@ -96,17 +91,51 @@ class IframeParser < Parser
|
|
96
91
|
ogURL = params["url"]
|
97
92
|
end
|
98
93
|
end
|
99
|
-
ogImageURL = Helper.fetchOGImage(ogURL)
|
100
94
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
95
|
+
twitterID = ogURL[/^(https\:\/\/twitter\.com\/){1}.+(\/){1}(\d+)/, 3]
|
96
|
+
|
97
|
+
if !twitterID.nil?
|
98
|
+
uri = URI("https://api.twitter.com/1.1/statuses/show.json?simple_quoted_tweet=true&include_entities=true&tweet_mode=extended&include_cards=1&id=#{twitterID}")
|
99
|
+
https = Net::HTTP.new(uri.host, uri.port)
|
100
|
+
https.use_ssl = true
|
101
|
+
|
102
|
+
request = Net::HTTP::Get.new(uri)
|
103
|
+
request['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.17.375.766 Safari/537.36';
|
104
|
+
request['Authorization'] = 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'; # twitter private api
|
105
|
+
|
106
|
+
response = https.request(request)
|
107
|
+
if response.code.to_i == 200
|
108
|
+
twitterObj = JSON.parse(response.read_body)
|
109
|
+
|
110
|
+
fullText = twitterObj["full_text"]
|
111
|
+
twitterObj["entities"]["user_mentions"].each do |user_mention|
|
112
|
+
fullText = fullText.gsub(user_mention["screen_name"],"[#{user_mention["screen_name"]}](https://twitter.com/#{user_mention["screen_name"]})")
|
113
|
+
end
|
114
|
+
twitterObj["entities"]["urls"].each do |url|
|
115
|
+
fullText = fullText.gsub(url["url"],"[#{url["display_url"]}](#{url["expanded_url"]})")
|
116
|
+
end
|
117
|
+
|
118
|
+
createdAt = Time.parse(twitterObj["created_at"]).strftime('%Y-%m-%d %H:%M:%S')
|
119
|
+
result = "\n\n"
|
120
|
+
result += "■■■■■■■■■■■■■■ \n"
|
121
|
+
result += "> **[#{twitterObj["user"]["name"]}](https://twitter.com/#{twitterObj["user"]["screen_name"]}) @ Twitter Says:** \n\n"
|
122
|
+
result += "> > #{fullText} \n\n"
|
123
|
+
result += "> **Tweeted at [#{createdAt}](#{ogURL}).** \n\n"
|
124
|
+
result += "■■■■■■■■■■■■■■ \n\n"
|
125
|
+
end
|
108
126
|
else
|
109
|
-
|
127
|
+
ogImageURL = Helper.fetchOGImage(ogURL)
|
128
|
+
|
129
|
+
title = paragraph.iframe.title
|
130
|
+
if title.nil? or title == ""
|
131
|
+
title = Helper.escapeMarkdown(ogURL)
|
132
|
+
end
|
133
|
+
|
134
|
+
if !ogImageURL.nil?
|
135
|
+
result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})\r\n\r\n"
|
136
|
+
else
|
137
|
+
result = "[#{title}](#{ogURL})"
|
138
|
+
end
|
110
139
|
end
|
111
140
|
end
|
112
141
|
end
|
data/lib/Parsers/PREParser.rb
CHANGED
@@ -21,17 +21,12 @@ class PREParser < Parser
|
|
21
21
|
def parse(paragraph)
|
22
22
|
if PREParser.isPRE(paragraph)
|
23
23
|
result = "```\n"
|
24
|
-
if isForJekyll
|
25
|
-
result += "{% raw %}\n"
|
26
|
-
end
|
27
24
|
|
28
25
|
paragraph.text.each_line do |p|
|
29
26
|
result += p
|
30
27
|
end
|
31
28
|
|
32
|
-
|
33
|
-
result += "\n{% endraw %}"
|
34
|
-
end
|
29
|
+
result = result.chomp
|
35
30
|
result += "\n```"
|
36
31
|
|
37
32
|
result
|
data/lib/Post.rb
CHANGED
@@ -6,10 +6,12 @@ require 'nokogiri'
|
|
6
6
|
require 'json'
|
7
7
|
require 'date'
|
8
8
|
|
9
|
-
|
9
|
+
require 'ImageDownloader'
|
10
|
+
require 'PathPolicy'
|
10
11
|
|
12
|
+
class Post
|
11
13
|
class PostInfo
|
12
|
-
attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName
|
14
|
+
attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt, :collectionName, :description, :previewImage
|
13
15
|
end
|
14
16
|
|
15
17
|
def self.getPostIDFromPostURLString(postURLString)
|
@@ -58,11 +60,27 @@ class Post
|
|
58
60
|
end
|
59
61
|
end
|
60
62
|
|
61
|
-
def self.parsePostInfoFromPostContent(content, postID)
|
63
|
+
def self.parsePostInfoFromPostContent(content, postID, pathPolicy)
|
62
64
|
postInfo = PostInfo.new()
|
65
|
+
postInfo.description = content&.dig("Post:#{postID}", "previewContent", "subtitle")
|
63
66
|
postInfo.title = content&.dig("Post:#{postID}", "title")
|
64
67
|
postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
|
65
68
|
|
69
|
+
previewImage = content&.dig("Post:#{postID}", "previewImage", "__ref")
|
70
|
+
if !previewImage.nil?
|
71
|
+
previewImageFIleName = content&.dig(previewImage, "id")
|
72
|
+
|
73
|
+
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), postID)
|
74
|
+
absolutePath = imagePathPolicy.getAbsolutePath(previewImageFIleName)
|
75
|
+
|
76
|
+
imageURL = "https://miro.medium.com/max/1400/#{previewImageFIleName}"
|
77
|
+
|
78
|
+
if ImageDownloader.download(absolutePath, imageURL)
|
79
|
+
relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(previewImageFIleName)}"
|
80
|
+
postInfo.previewImage = relativePath
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
66
84
|
creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
|
67
85
|
if !creatorRef.nil?
|
68
86
|
postInfo.creator = content&.dig(creatorRef, "name")
|
data/lib/ZMediumFetcher.rb
CHANGED
@@ -50,9 +50,9 @@ class ZMediumFetcher
|
|
50
50
|
info += "-"
|
51
51
|
end
|
52
52
|
if !currentPostParagraphIndex.nil? && !totalPostParagraphsLength.nil?
|
53
|
-
info += "[#{postPath[0..
|
53
|
+
info += "[#{postPath[0..15]}...(#{currentPostParagraphIndex}/#{totalPostParagraphsLength})]"
|
54
54
|
else
|
55
|
-
info += "[#{postPath[0..
|
55
|
+
info += "[#{postPath[0..15]}...]"
|
56
56
|
end
|
57
57
|
end
|
58
58
|
|
@@ -121,6 +121,14 @@ class ZMediumFetcher
|
|
121
121
|
postPath = Post.getPostPathFromPostURLString(postURL)
|
122
122
|
end
|
123
123
|
|
124
|
+
if isForJekyll
|
125
|
+
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts/zmediumtomarkdown")
|
126
|
+
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
|
127
|
+
else
|
128
|
+
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "zmediumtomarkdown")
|
129
|
+
imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
|
130
|
+
end
|
131
|
+
|
124
132
|
progress.postPath = postPath
|
125
133
|
progress.message = "Downloading Post..."
|
126
134
|
progress.printLog()
|
@@ -132,7 +140,7 @@ class ZMediumFetcher
|
|
132
140
|
raise "Error: Content is empty! PostURL: #{postURL}"
|
133
141
|
end
|
134
142
|
|
135
|
-
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
143
|
+
postInfo = Post.parsePostInfoFromPostContent(postContent, postID, imagePathPolicy)
|
136
144
|
|
137
145
|
sourceParagraphs = Post.fetchPostParagraphs(postID)
|
138
146
|
if sourceParagraphs.nil?
|
@@ -207,14 +215,6 @@ class ZMediumFetcher
|
|
207
215
|
paragraphs.append(paragraph)
|
208
216
|
previousParagraph = paragraph
|
209
217
|
end
|
210
|
-
|
211
|
-
if isForJekyll
|
212
|
-
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts/zmediumtomarkdown")
|
213
|
-
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
|
214
|
-
else
|
215
|
-
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "zmediumtomarkdown")
|
216
|
-
imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
|
217
|
-
end
|
218
218
|
|
219
219
|
startParser = buildParser(imagePathPolicy)
|
220
220
|
|
@@ -230,7 +230,7 @@ class ZMediumFetcher
|
|
230
230
|
absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
|
231
231
|
|
232
232
|
# if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
|
233
|
-
if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
|
233
|
+
if File.file?(absolutePath) && File.mtime(absolutePath).to_time.to_i >= postInfo.latestPublishedAt.to_i
|
234
234
|
# Already downloaded and nothing has changed!, Skip!
|
235
235
|
progress.currentPostParagraphIndex = paragraphs.length
|
236
236
|
progress.message = "Skip, Post already downloaded and nothing has changed!"
|
@@ -239,7 +239,10 @@ class ZMediumFetcher
|
|
239
239
|
Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
|
240
240
|
File.open(absolutePath, "w+") do |file|
|
241
241
|
# write postInfo into top
|
242
|
-
|
242
|
+
postMetaInfo = Helper.createPostInfo(postInfo, isForJekyll)
|
243
|
+
if !postMetaInfo.nil?
|
244
|
+
file.puts(postMetaInfo)
|
245
|
+
end
|
243
246
|
|
244
247
|
index = 0
|
245
248
|
paragraphs.each do |paragraph|
|
@@ -260,7 +263,10 @@ class ZMediumFetcher
|
|
260
263
|
progress.printLog()
|
261
264
|
end
|
262
265
|
|
263
|
-
|
266
|
+
postWatermark = Helper.createWatermark(postURL)
|
267
|
+
if !postWatermark.nil?
|
268
|
+
file.puts(postWatermark)
|
269
|
+
end
|
264
270
|
end
|
265
271
|
FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
|
266
272
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-07-
|
11
|
+
date: 2022-07-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|