ZMediumToMarkdown 1.0.0 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ZMediumFetcher +63 -7
- data/lib/Helper.rb +26 -0
- data/lib/Parsers/BQParser.rb +5 -1
- data/lib/Parsers/CodeBlockParser.rb +22 -0
- data/lib/Parsers/FallbackParser.rb +2 -1
- data/lib/Parsers/PQParser.rb +5 -1
- data/lib/Parsers/PREParser.rb +16 -2
- data/lib/Post.rb +24 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f7805fe04110dc50c764983746065030b4cc87865810de1a50c3b5b89f646d0
|
4
|
+
data.tar.gz: 5eb355fcd4b28a1d0e704e80b5e3e564e770b681f4cd40302f556e647e3cda95
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff70606758386f70dc7fdc9ce1a8a45c6e4c6a136f27403794217be4f0acb59b94b00d2caca74f4ba6b464691fa1312ea2e9e94caf20785c5c758ced7de0aa6d
|
7
|
+
data.tar.gz: d50e747c1acdd6f1f3536711a5c1f15e2fcb729741efe584f8e878fb13e9a461b664e34b0143aef879e1504054ecb108204cb8521a1be200e9cfae52a1a1c8f0
|
data/bin/ZMediumFetcher
CHANGED
@@ -24,6 +24,7 @@ require "Parsers/OLIParser"
|
|
24
24
|
require "Parsers/MIXTAPEEMBEDParser"
|
25
25
|
require "Parsers/PQParser"
|
26
26
|
require "Parsers/LinkParser"
|
27
|
+
require "Parsers/CodeBlockParser"
|
27
28
|
|
28
29
|
require "PathPolicy"
|
29
30
|
require "Request"
|
@@ -124,8 +125,10 @@ class ZMediumFetcher
|
|
124
125
|
imgParser.setNext(bqParser)
|
125
126
|
preParser = PREParser.new()
|
126
127
|
bqParser.setNext(preParser)
|
128
|
+
codeBlockParser = CodeBlockParser.new()
|
129
|
+
preParser.setNext(codeBlockParser)
|
127
130
|
fallbackParser = FallbackParser.new()
|
128
|
-
|
131
|
+
codeBlockParser.setNext(fallbackParser)
|
129
132
|
|
130
133
|
|
131
134
|
h1Parser
|
@@ -145,6 +148,8 @@ class ZMediumFetcher
|
|
145
148
|
if postContent.nil?
|
146
149
|
raise "Error: Content is empty! PostURL: #{postURL}"
|
147
150
|
end
|
151
|
+
|
152
|
+
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
148
153
|
|
149
154
|
sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
|
150
155
|
if sourceParagraphs.nil?
|
@@ -156,7 +161,8 @@ class ZMediumFetcher
|
|
156
161
|
|
157
162
|
paragraphs = []
|
158
163
|
oliIndex = 0
|
159
|
-
|
164
|
+
previousParagraph = nil
|
165
|
+
preTypeParagraphs = []
|
160
166
|
sourceParagraphs.each do |sourcParagraph|
|
161
167
|
paragraph = Paragraph.new(sourcParagraph, postID, postContent)
|
162
168
|
if OLIParser.isOLI(paragraph)
|
@@ -168,13 +174,55 @@ class ZMediumFetcher
|
|
168
174
|
|
169
175
|
# if previous is OLI or ULI and current is not OLI or ULI
|
170
176
|
# than insert a blank paragraph to keep markdown foramt correct
|
171
|
-
if (OLIParser.isOLI(
|
172
|
-
(ULIParser.isULI(
|
177
|
+
if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
|
178
|
+
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
|
173
179
|
paragraphs.append(Paragraph.makeBlankParagraph(postID))
|
174
180
|
end
|
175
181
|
|
182
|
+
# group by PRE paragraph to code block
|
183
|
+
# because medium will give continue pre to present code block
|
184
|
+
# e.g.
|
185
|
+
# type=pre, text=<html>
|
186
|
+
# type=pre, text=text
|
187
|
+
# type=pre, text=</html>
|
188
|
+
|
189
|
+
if !previousParagraph.nil?
|
190
|
+
if PREParser.isPRE(paragraph)
|
191
|
+
# if current is pre
|
192
|
+
preTypeParagraphs.append(paragraph)
|
193
|
+
elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
|
194
|
+
# if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
|
195
|
+
if preTypeParagraphs.length > 1
|
196
|
+
lastPreTypeParagraph = preTypeParagraphs.pop
|
197
|
+
|
198
|
+
# group by preParagraphs text to last preParagraph
|
199
|
+
groupByText = ""
|
200
|
+
preTypeParagraphs.each do |preTypeParagraph|
|
201
|
+
if groupByText != ""
|
202
|
+
groupByText += "\n"
|
203
|
+
end
|
204
|
+
|
205
|
+
markupParser = MarkupParser.new(postHtml, preTypeParagraph)
|
206
|
+
groupByText += markupParser.parse()
|
207
|
+
end
|
208
|
+
|
209
|
+
lastPreTypeParagraph.text = "#{groupByText}"
|
210
|
+
lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
|
211
|
+
|
212
|
+
# remove all preParagraphs
|
213
|
+
preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
|
214
|
+
preTypeParagraph.name
|
215
|
+
end
|
216
|
+
paragraphs = paragraphs.select do |paragraph|
|
217
|
+
!preTypeParagraphNames.include? paragraph.name
|
218
|
+
end
|
219
|
+
end
|
220
|
+
preTypeParagraphs = []
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
176
224
|
paragraphs.append(paragraph)
|
177
|
-
|
225
|
+
previousParagraph = paragraph
|
178
226
|
end
|
179
227
|
|
180
228
|
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
|
@@ -191,6 +239,10 @@ class ZMediumFetcher
|
|
191
239
|
Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
|
192
240
|
index = 0
|
193
241
|
File.open(absolutePath, "w+") do |file|
|
242
|
+
# write postInfo into top
|
243
|
+
|
244
|
+
file.puts(Helper.createPostInfo(postInfo))
|
245
|
+
|
194
246
|
paragraphs.each do |paragraph|
|
195
247
|
markupParser = MarkupParser.new(postHtml, paragraph)
|
196
248
|
paragraph.text = markupParser.parse()
|
@@ -268,7 +320,11 @@ begin
|
|
268
320
|
puts "You have read and agree with the Disclaimer."
|
269
321
|
Main.new()
|
270
322
|
puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
271
|
-
puts "
|
323
|
+
puts "Thanks for using this tool."
|
324
|
+
puts "If this is helpful, please help to star the repo or recommend it to your friends."
|
272
325
|
rescue => e
|
273
|
-
puts "Error: #{e.class} #{e.message}"
|
326
|
+
puts "#Error: #{e.class} #{e.message}\n"
|
327
|
+
puts e.backtrace
|
328
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
329
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
274
330
|
end
|
data/lib/Helper.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
$lib = File.expand_path('../lib', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require 'Post'
|
4
|
+
|
3
5
|
class Helper
|
4
6
|
def self.createDirIfNotExist(dirPath)
|
5
7
|
dirs = dirPath.split("/")
|
@@ -11,12 +13,36 @@ class Helper
|
|
11
13
|
end while dirs.length > 0
|
12
14
|
end
|
13
15
|
|
16
|
+
def self.makeWarningText(message)
|
17
|
+
puts "####################################################\n"
|
18
|
+
puts "#WARNING:\n"
|
19
|
+
puts "##{message}\n"
|
20
|
+
puts "#--------------------------------------------------#\n"
|
21
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
22
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
23
|
+
puts "####################################################\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.createPostInfo(postInfo)
|
27
|
+
result = "---\n"
|
28
|
+
result += "title: #{postInfo.title}\n"
|
29
|
+
result += "author: #{postInfo.creator}\n"
|
30
|
+
result += "date: #{postInfo.firstPublishedAt}\n"
|
31
|
+
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
32
|
+
result += "---\n"
|
33
|
+
result += "\r\n"
|
34
|
+
|
35
|
+
result
|
36
|
+
end
|
37
|
+
|
14
38
|
def self.createWatermark(postURL)
|
15
39
|
text = "\r\n\r\n\r\n"
|
16
40
|
text += "+-----------------------------------------------------------------------------------+"
|
17
41
|
text += "\r\n"
|
42
|
+
text += "\r\n"
|
18
43
|
text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
|
19
44
|
text += "\r\n"
|
45
|
+
text += "\r\n"
|
20
46
|
text += "+-----------------------------------------------------------------------------------+"
|
21
47
|
text += "\r\n"
|
22
48
|
|
data/lib/Parsers/BQParser.rb
CHANGED
@@ -7,7 +7,11 @@ class BQParser < Parser
|
|
7
7
|
attr_accessor :nextParser
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'BQ'
|
10
|
-
|
10
|
+
result = ""
|
11
|
+
paragraph.text.each_line do |p|
|
12
|
+
result += "> #{p}"
|
13
|
+
end
|
14
|
+
result
|
11
15
|
else
|
12
16
|
if !nextParser.nil?
|
13
17
|
nextParser.parse(paragraph)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
|
+
|
3
|
+
require "Parsers/Parser"
|
4
|
+
require 'Models/Paragraph'
|
5
|
+
|
6
|
+
class CodeBlockParser < Parser
|
7
|
+
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.getTypeString()
|
10
|
+
'CODE_BLOCK'
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(paragraph)
|
14
|
+
if paragraph.type == CodeBlockParser.getTypeString()
|
15
|
+
"```\n#{paragraph.text}\n```"
|
16
|
+
else
|
17
|
+
if !nextParser.nil?
|
18
|
+
nextParser.parse(paragraph)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -1,12 +1,13 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require "Helper"
|
3
4
|
require "Parsers/Parser"
|
4
5
|
require 'Models/Paragraph'
|
5
6
|
|
6
7
|
class FallbackParser < Parser
|
7
8
|
attr_accessor :nextParser
|
8
9
|
def parse(paragraph)
|
9
|
-
|
10
|
+
Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
|
10
11
|
"#{paragraph.text}"
|
11
12
|
end
|
12
13
|
end
|
data/lib/Parsers/PQParser.rb
CHANGED
@@ -7,7 +7,11 @@ class PQParser < Parser
|
|
7
7
|
attr_accessor :nextParser
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'PQ'
|
10
|
-
|
10
|
+
result = ""
|
11
|
+
paragraph.text.each_line do |p|
|
12
|
+
result += "> #{p}"
|
13
|
+
end
|
14
|
+
result
|
11
15
|
else
|
12
16
|
if !nextParser.nil?
|
13
17
|
nextParser.parse(paragraph)
|
data/lib/Parsers/PREParser.rb
CHANGED
@@ -5,9 +5,23 @@ require 'Models/Paragraph'
|
|
5
5
|
|
6
6
|
class PREParser < Parser
|
7
7
|
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.isPRE(paragraph)
|
10
|
+
if paragraph.nil?
|
11
|
+
false
|
12
|
+
else
|
13
|
+
paragraph.type == "PRE"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
def parse(paragraph)
|
9
|
-
if paragraph
|
10
|
-
|
18
|
+
if PREParser.isPRE(paragraph)
|
19
|
+
result = "```\n"
|
20
|
+
paragraph.text.each_line do |p|
|
21
|
+
result += p
|
22
|
+
end
|
23
|
+
result += "\n```"
|
24
|
+
result
|
11
25
|
else
|
12
26
|
if !nextParser.nil?
|
13
27
|
nextParser.parse(paragraph)
|
data/lib/Post.rb
CHANGED
@@ -4,8 +4,14 @@ require "Request"
|
|
4
4
|
require 'uri'
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'json'
|
7
|
+
require 'date'
|
7
8
|
|
8
9
|
class Post
|
10
|
+
|
11
|
+
class PostInfo
|
12
|
+
attr_accessor :title, :tags, :creator, :firstPublishedAt
|
13
|
+
end
|
14
|
+
|
9
15
|
def self.getPostIDFromPostURLString(postURLString)
|
10
16
|
uri = URI.parse(postURLString)
|
11
17
|
postID = uri.path.split('/').last.split('-').last
|
@@ -40,4 +46,22 @@ class Post
|
|
40
46
|
result.map { |paragraph| content[paragraph["__ref"]] }
|
41
47
|
end
|
42
48
|
end
|
49
|
+
|
50
|
+
def self.parsePostInfoFromPostContent(content, postID)
|
51
|
+
postInfo = PostInfo.new()
|
52
|
+
postInfo.title = content&.dig("Post:#{postID}", "title")
|
53
|
+
postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
|
54
|
+
|
55
|
+
creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
|
56
|
+
if !creatorRef.nil?
|
57
|
+
postInfo.creator = content&.dig(creatorRef, "name")
|
58
|
+
end
|
59
|
+
|
60
|
+
firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
|
61
|
+
if !firstPublishedAt.nil?
|
62
|
+
postInfo.firstPublishedAt = DateTime.strptime(firstPublishedAt.to_s,'%Q')
|
63
|
+
end
|
64
|
+
|
65
|
+
postInfo
|
66
|
+
end
|
43
67
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- lib/ImageDownloader.rb
|
67
67
|
- lib/Models/Paragraph.rb
|
68
68
|
- lib/Parsers/BQParser.rb
|
69
|
+
- lib/Parsers/CodeBlockParser.rb
|
69
70
|
- lib/Parsers/FallbackParser.rb
|
70
71
|
- lib/Parsers/H1Parser.rb
|
71
72
|
- lib/Parsers/H2Parser.rb
|