ZMediumToMarkdown 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ZMediumFetcher +98 -31
- data/lib/Helper.rb +25 -0
- data/lib/Parsers/BQParser.rb +5 -1
- data/lib/Parsers/CodeBlockParser.rb +22 -0
- data/lib/Parsers/FallbackParser.rb +2 -1
- data/lib/Parsers/PQParser.rb +5 -1
- data/lib/Parsers/PREParser.rb +16 -2
- data/lib/Post.rb +29 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29245a0299d0f492d7000a27c97f4cfdd305b5bd39b31d1dfbdfd126f938daf1
|
4
|
+
data.tar.gz: 7a81eca7da5c8a3d02b80936f2395ff1385ff37ef7092a5f6ae919e9dc817065
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 592b4a98e54ea032aee4560c23a827637fcfe38bc56b66af7cb1b5799e3a1b8b641f20de03566e96f04b9b8a75ddf97b97e339503f5b49c55afa599a8cdbf31b
|
7
|
+
data.tar.gz: e2003629feee6fe3230d4c72059a860e9be0458e28a5fee7640a13c4aa1ef5ec2047d27a21fa8a7a719d19bb601efa245dcec24461738bf0cdc113e6ed1e694c
|
data/bin/ZMediumFetcher
CHANGED
@@ -7,6 +7,7 @@ $LOAD_PATH.unshift($lib)
|
|
7
7
|
require "open-uri"
|
8
8
|
require 'json'
|
9
9
|
require 'optparse'
|
10
|
+
require 'fileutils'
|
10
11
|
|
11
12
|
require "Parsers/H1Parser"
|
12
13
|
require "Parsers/H2Parser"
|
@@ -24,6 +25,7 @@ require "Parsers/OLIParser"
|
|
24
25
|
require "Parsers/MIXTAPEEMBEDParser"
|
25
26
|
require "Parsers/PQParser"
|
26
27
|
require "Parsers/LinkParser"
|
28
|
+
require "Parsers/CodeBlockParser"
|
27
29
|
|
28
30
|
require "PathPolicy"
|
29
31
|
require "Request"
|
@@ -124,8 +126,10 @@ class ZMediumFetcher
|
|
124
126
|
imgParser.setNext(bqParser)
|
125
127
|
preParser = PREParser.new()
|
126
128
|
bqParser.setNext(preParser)
|
129
|
+
codeBlockParser = CodeBlockParser.new()
|
130
|
+
preParser.setNext(codeBlockParser)
|
127
131
|
fallbackParser = FallbackParser.new()
|
128
|
-
|
132
|
+
codeBlockParser.setNext(fallbackParser)
|
129
133
|
|
130
134
|
|
131
135
|
h1Parser
|
@@ -145,6 +149,8 @@ class ZMediumFetcher
|
|
145
149
|
if postContent.nil?
|
146
150
|
raise "Error: Content is empty! PostURL: #{postURL}"
|
147
151
|
end
|
152
|
+
|
153
|
+
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
148
154
|
|
149
155
|
sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
|
150
156
|
if sourceParagraphs.nil?
|
@@ -156,7 +162,8 @@ class ZMediumFetcher
|
|
156
162
|
|
157
163
|
paragraphs = []
|
158
164
|
oliIndex = 0
|
159
|
-
|
165
|
+
previousParagraph = nil
|
166
|
+
preTypeParagraphs = []
|
160
167
|
sourceParagraphs.each do |sourcParagraph|
|
161
168
|
paragraph = Paragraph.new(sourcParagraph, postID, postContent)
|
162
169
|
if OLIParser.isOLI(paragraph)
|
@@ -168,13 +175,55 @@ class ZMediumFetcher
|
|
168
175
|
|
169
176
|
# if previous is OLI or ULI and current is not OLI or ULI
|
170
177
|
# than insert a blank paragraph to keep markdown foramt correct
|
171
|
-
if (OLIParser.isOLI(
|
172
|
-
(ULIParser.isULI(
|
178
|
+
if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
|
179
|
+
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
|
173
180
|
paragraphs.append(Paragraph.makeBlankParagraph(postID))
|
174
181
|
end
|
175
182
|
|
183
|
+
# group by PRE paragraph to code block
|
184
|
+
# because medium will give continue pre to present code block
|
185
|
+
# e.g.
|
186
|
+
# type=pre, text=<html>
|
187
|
+
# type=pre, text=text
|
188
|
+
# type=pre, text=</html>
|
189
|
+
|
190
|
+
if !previousParagraph.nil?
|
191
|
+
if PREParser.isPRE(paragraph)
|
192
|
+
# if current is pre
|
193
|
+
preTypeParagraphs.append(paragraph)
|
194
|
+
elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
|
195
|
+
# if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
|
196
|
+
if preTypeParagraphs.length > 1
|
197
|
+
lastPreTypeParagraph = preTypeParagraphs.pop
|
198
|
+
|
199
|
+
# group by preParagraphs text to last preParagraph
|
200
|
+
groupByText = ""
|
201
|
+
preTypeParagraphs.each do |preTypeParagraph|
|
202
|
+
if groupByText != ""
|
203
|
+
groupByText += "\n"
|
204
|
+
end
|
205
|
+
|
206
|
+
markupParser = MarkupParser.new(postHtml, preTypeParagraph)
|
207
|
+
groupByText += markupParser.parse()
|
208
|
+
end
|
209
|
+
|
210
|
+
lastPreTypeParagraph.text = "#{groupByText}"
|
211
|
+
lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
|
212
|
+
|
213
|
+
# remove all preParagraphs
|
214
|
+
preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
|
215
|
+
preTypeParagraph.name
|
216
|
+
end
|
217
|
+
paragraphs = paragraphs.select do |paragraph|
|
218
|
+
!preTypeParagraphNames.include? paragraph.name
|
219
|
+
end
|
220
|
+
end
|
221
|
+
preTypeParagraphs = []
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
176
225
|
paragraphs.append(paragraph)
|
177
|
-
|
226
|
+
previousParagraph = paragraph
|
178
227
|
end
|
179
228
|
|
180
229
|
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
|
@@ -188,32 +237,45 @@ class ZMediumFetcher
|
|
188
237
|
progress.printLog()
|
189
238
|
|
190
239
|
absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
|
191
|
-
|
192
|
-
|
193
|
-
File.
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
240
|
+
|
241
|
+
# if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
|
242
|
+
if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
|
243
|
+
# Already downloaded and nothing has changed!, Skip!
|
244
|
+
progress.currentPostParagraphIndex = paragraphs.length
|
245
|
+
progress.message = "Skip, Post already downloaded and nothing has changed!"
|
246
|
+
progress.printLog()
|
247
|
+
else
|
248
|
+
Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
|
249
|
+
File.open(absolutePath, "w+") do |file|
|
250
|
+
# write postInfo into top
|
251
|
+
file.puts(Helper.createPostInfo(postInfo))
|
202
252
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
253
|
+
index = 0
|
254
|
+
paragraphs.each do |paragraph|
|
255
|
+
markupParser = MarkupParser.new(postHtml, paragraph)
|
256
|
+
paragraph.text = markupParser.parse()
|
257
|
+
result = startParser.parse(paragraph)
|
258
|
+
|
259
|
+
if !linkParser.nil?
|
260
|
+
result = linkParser.parse(result, paragraph.markupLinks)
|
261
|
+
end
|
262
|
+
|
263
|
+
file.puts(result)
|
264
|
+
|
265
|
+
index += 1
|
266
|
+
progress.currentPostParagraphIndex = index
|
267
|
+
progress.message = "Converting Post..."
|
268
|
+
progress.printLog()
|
269
|
+
end
|
270
|
+
|
271
|
+
file.puts(Helper.createWatermark(postURL))
|
209
272
|
end
|
273
|
+
FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
|
210
274
|
|
211
|
-
|
275
|
+
progress.message = "Post Successfully Downloaded!"
|
276
|
+
progress.printLog()
|
212
277
|
end
|
213
|
-
|
214
|
-
progress.message = "Post Successfully Downloaded!"
|
215
|
-
progress.printLog()
|
216
|
-
|
278
|
+
|
217
279
|
progress.postPath = nil
|
218
280
|
end
|
219
281
|
|
@@ -264,11 +326,16 @@ class ZMediumFetcher
|
|
264
326
|
end
|
265
327
|
|
266
328
|
begin
|
267
|
-
puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
329
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
268
330
|
puts "You have read and agree with the Disclaimer."
|
269
331
|
Main.new()
|
270
|
-
puts "
|
271
|
-
puts "
|
332
|
+
puts "Execute Successfully!!!"
|
333
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
334
|
+
puts "#Thanks for using this tool."
|
335
|
+
puts "#If this is helpful, please help to star the repo or recommend it to your friends."
|
272
336
|
rescue => e
|
273
|
-
puts "Error: #{e.class} #{e.message}"
|
337
|
+
puts "#Error: #{e.class} #{e.message}\n"
|
338
|
+
puts e.backtrace
|
339
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
340
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
274
341
|
end
|
data/lib/Helper.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
$lib = File.expand_path('../lib', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require 'date'
|
4
|
+
require 'Post'
|
5
|
+
|
3
6
|
class Helper
|
4
7
|
def self.createDirIfNotExist(dirPath)
|
5
8
|
dirs = dirPath.split("/")
|
@@ -11,6 +14,28 @@ class Helper
|
|
11
14
|
end while dirs.length > 0
|
12
15
|
end
|
13
16
|
|
17
|
+
def self.makeWarningText(message)
|
18
|
+
puts "####################################################\n"
|
19
|
+
puts "#WARNING:\n"
|
20
|
+
puts "##{message}\n"
|
21
|
+
puts "#--------------------------------------------------#\n"
|
22
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
23
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
24
|
+
puts "####################################################\n"
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.createPostInfo(postInfo)
|
28
|
+
result = "---\n"
|
29
|
+
result += "title: #{postInfo.title}\n"
|
30
|
+
result += "author: #{postInfo.creator}\n"
|
31
|
+
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
32
|
+
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
33
|
+
result += "---\n"
|
34
|
+
result += "\r\n"
|
35
|
+
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
14
39
|
def self.createWatermark(postURL)
|
15
40
|
text = "\r\n\r\n\r\n"
|
16
41
|
text += "+-----------------------------------------------------------------------------------+"
|
data/lib/Parsers/BQParser.rb
CHANGED
@@ -7,7 +7,11 @@ class BQParser < Parser
|
|
7
7
|
attr_accessor :nextParser
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'BQ'
|
10
|
-
|
10
|
+
result = ""
|
11
|
+
paragraph.text.each_line do |p|
|
12
|
+
result += "> #{p}"
|
13
|
+
end
|
14
|
+
result
|
11
15
|
else
|
12
16
|
if !nextParser.nil?
|
13
17
|
nextParser.parse(paragraph)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
|
+
|
3
|
+
require "Parsers/Parser"
|
4
|
+
require 'Models/Paragraph'
|
5
|
+
|
6
|
+
class CodeBlockParser < Parser
|
7
|
+
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.getTypeString()
|
10
|
+
'CODE_BLOCK'
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(paragraph)
|
14
|
+
if paragraph.type == CodeBlockParser.getTypeString()
|
15
|
+
"```\n#{paragraph.text}\n```"
|
16
|
+
else
|
17
|
+
if !nextParser.nil?
|
18
|
+
nextParser.parse(paragraph)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -1,12 +1,13 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require "Helper"
|
3
4
|
require "Parsers/Parser"
|
4
5
|
require 'Models/Paragraph'
|
5
6
|
|
6
7
|
class FallbackParser < Parser
|
7
8
|
attr_accessor :nextParser
|
8
9
|
def parse(paragraph)
|
9
|
-
|
10
|
+
Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
|
10
11
|
"#{paragraph.text}"
|
11
12
|
end
|
12
13
|
end
|
data/lib/Parsers/PQParser.rb
CHANGED
@@ -7,7 +7,11 @@ class PQParser < Parser
|
|
7
7
|
attr_accessor :nextParser
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'PQ'
|
10
|
-
|
10
|
+
result = ""
|
11
|
+
paragraph.text.each_line do |p|
|
12
|
+
result += "> #{p}"
|
13
|
+
end
|
14
|
+
result
|
11
15
|
else
|
12
16
|
if !nextParser.nil?
|
13
17
|
nextParser.parse(paragraph)
|
data/lib/Parsers/PREParser.rb
CHANGED
@@ -5,9 +5,23 @@ require 'Models/Paragraph'
|
|
5
5
|
|
6
6
|
class PREParser < Parser
|
7
7
|
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.isPRE(paragraph)
|
10
|
+
if paragraph.nil?
|
11
|
+
false
|
12
|
+
else
|
13
|
+
paragraph.type == "PRE"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
def parse(paragraph)
|
9
|
-
if paragraph
|
10
|
-
|
18
|
+
if PREParser.isPRE(paragraph)
|
19
|
+
result = "```\n"
|
20
|
+
paragraph.text.each_line do |p|
|
21
|
+
result += p
|
22
|
+
end
|
23
|
+
result += "\n```"
|
24
|
+
result
|
11
25
|
else
|
12
26
|
if !nextParser.nil?
|
13
27
|
nextParser.parse(paragraph)
|
data/lib/Post.rb
CHANGED
@@ -4,8 +4,14 @@ require "Request"
|
|
4
4
|
require 'uri'
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'json'
|
7
|
+
require 'date'
|
7
8
|
|
8
9
|
class Post
|
10
|
+
|
11
|
+
class PostInfo
|
12
|
+
attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
|
13
|
+
end
|
14
|
+
|
9
15
|
def self.getPostIDFromPostURLString(postURLString)
|
10
16
|
uri = URI.parse(postURLString)
|
11
17
|
postID = uri.path.split('/').last.split('-').last
|
@@ -40,4 +46,27 @@ class Post
|
|
40
46
|
result.map { |paragraph| content[paragraph["__ref"]] }
|
41
47
|
end
|
42
48
|
end
|
49
|
+
|
50
|
+
def self.parsePostInfoFromPostContent(content, postID)
|
51
|
+
postInfo = PostInfo.new()
|
52
|
+
postInfo.title = content&.dig("Post:#{postID}", "title")
|
53
|
+
postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
|
54
|
+
|
55
|
+
creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
|
56
|
+
if !creatorRef.nil?
|
57
|
+
postInfo.creator = content&.dig(creatorRef, "name")
|
58
|
+
end
|
59
|
+
|
60
|
+
firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
|
61
|
+
if !firstPublishedAt.nil?
|
62
|
+
postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
|
63
|
+
end
|
64
|
+
|
65
|
+
latestPublishedAt = content&.dig("Post:#{postID}", "latestPublishedAt")
|
66
|
+
if !latestPublishedAt.nil?
|
67
|
+
postInfo.latestPublishedAt = Time.at(0, latestPublishedAt, :millisecond)
|
68
|
+
end
|
69
|
+
|
70
|
+
postInfo
|
71
|
+
end
|
43
72
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- lib/ImageDownloader.rb
|
67
67
|
- lib/Models/Paragraph.rb
|
68
68
|
- lib/Parsers/BQParser.rb
|
69
|
+
- lib/Parsers/CodeBlockParser.rb
|
69
70
|
- lib/Parsers/FallbackParser.rb
|
70
71
|
- lib/Parsers/H1Parser.rb
|
71
72
|
- lib/Parsers/H2Parser.rb
|