ZMediumToMarkdown 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ZMediumFetcher +63 -7
- data/lib/Helper.rb +24 -0
- data/lib/Parsers/CodeBlockParser.rb +22 -0
- data/lib/Parsers/FallbackParser.rb +2 -1
- data/lib/Parsers/PQParser.rb +5 -1
- data/lib/Parsers/PREParser.rb +15 -2
- data/lib/Post.rb +24 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a2243519ed0bc3c844758752f194791e882eb88371e91c082cb6e342c5fb94c0
|
4
|
+
data.tar.gz: 32fd50f1f531288f8d0e0f13a01e0412bd7acd3341f2b4e9af059635a0478f14
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cce4567526e3db1c0d92ef7fa5b1f2953a4242c8dc505fd35f090deee5df8eabd81762712b72ebd4a854082c3a4eae2057622f7e4283335d1df98a191235f38a
|
7
|
+
data.tar.gz: c87a247402fdc62ab3634ffaa422729cfb5fdfe1f0f6220a70c5e5bb598a542f05a64b97a42c747cab7521f63d8942f179bfb74def31ccefa0d49002da60d673
|
data/bin/ZMediumFetcher
CHANGED
@@ -24,6 +24,7 @@ require "Parsers/OLIParser"
|
|
24
24
|
require "Parsers/MIXTAPEEMBEDParser"
|
25
25
|
require "Parsers/PQParser"
|
26
26
|
require "Parsers/LinkParser"
|
27
|
+
require "Parsers/CodeBlockParser"
|
27
28
|
|
28
29
|
require "PathPolicy"
|
29
30
|
require "Request"
|
@@ -124,8 +125,10 @@ class ZMediumFetcher
|
|
124
125
|
imgParser.setNext(bqParser)
|
125
126
|
preParser = PREParser.new()
|
126
127
|
bqParser.setNext(preParser)
|
128
|
+
codeBlockParser = CodeBlockParser.new()
|
129
|
+
preParser.setNext(codeBlockParser)
|
127
130
|
fallbackParser = FallbackParser.new()
|
128
|
-
|
131
|
+
codeBlockParser.setNext(fallbackParser)
|
129
132
|
|
130
133
|
|
131
134
|
h1Parser
|
@@ -145,6 +148,8 @@ class ZMediumFetcher
|
|
145
148
|
if postContent.nil?
|
146
149
|
raise "Error: Content is empty! PostURL: #{postURL}"
|
147
150
|
end
|
151
|
+
|
152
|
+
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
148
153
|
|
149
154
|
sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
|
150
155
|
if sourceParagraphs.nil?
|
@@ -156,7 +161,8 @@ class ZMediumFetcher
|
|
156
161
|
|
157
162
|
paragraphs = []
|
158
163
|
oliIndex = 0
|
159
|
-
|
164
|
+
previousParagraph = nil
|
165
|
+
preTypeParagraphs = []
|
160
166
|
sourceParagraphs.each do |sourcParagraph|
|
161
167
|
paragraph = Paragraph.new(sourcParagraph, postID, postContent)
|
162
168
|
if OLIParser.isOLI(paragraph)
|
@@ -168,13 +174,55 @@ class ZMediumFetcher
|
|
168
174
|
|
169
175
|
# if previous is OLI or ULI and current is not OLI or ULI
|
170
176
|
# than insert a blank paragraph to keep markdown foramt correct
|
171
|
-
if (OLIParser.isOLI(
|
172
|
-
(ULIParser.isULI(
|
177
|
+
if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
|
178
|
+
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
|
173
179
|
paragraphs.append(Paragraph.makeBlankParagraph(postID))
|
174
180
|
end
|
175
181
|
|
182
|
+
# group by PRE paragraph to code block
|
183
|
+
# because medium will give continue pre to present code block
|
184
|
+
# e.g.
|
185
|
+
# type=pre, text=<html>
|
186
|
+
# type=pre, text=text
|
187
|
+
# type=pre, text=</html>
|
188
|
+
|
189
|
+
if !previousParagraph.nil?
|
190
|
+
if PREParser.isPRE(paragraph)
|
191
|
+
# if current is pre
|
192
|
+
preTypeParagraphs.append(paragraph)
|
193
|
+
elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
|
194
|
+
# if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
|
195
|
+
if preTypeParagraphs.length > 1
|
196
|
+
lastPreTypeParagraph = preTypeParagraphs.pop
|
197
|
+
|
198
|
+
# group by preParagraphs text to last preParagraph
|
199
|
+
groupByText = ""
|
200
|
+
preTypeParagraphs.each do |preTypeParagraph|
|
201
|
+
if groupByText != ""
|
202
|
+
groupByText += "\n"
|
203
|
+
end
|
204
|
+
|
205
|
+
markupParser = MarkupParser.new(postHtml, preTypeParagraph)
|
206
|
+
groupByText += markupParser.parse()
|
207
|
+
end
|
208
|
+
|
209
|
+
lastPreTypeParagraph.text = "#{groupByText}"
|
210
|
+
lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
|
211
|
+
|
212
|
+
# remove all preParagraphs
|
213
|
+
preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
|
214
|
+
preTypeParagraph.name
|
215
|
+
end
|
216
|
+
paragraphs = paragraphs.select do |paragraph|
|
217
|
+
!preTypeParagraphNames.include? paragraph.name
|
218
|
+
end
|
219
|
+
end
|
220
|
+
preTypeParagraphs = []
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
176
224
|
paragraphs.append(paragraph)
|
177
|
-
|
225
|
+
previousParagraph = paragraph
|
178
226
|
end
|
179
227
|
|
180
228
|
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
|
@@ -191,6 +239,10 @@ class ZMediumFetcher
|
|
191
239
|
Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
|
192
240
|
index = 0
|
193
241
|
File.open(absolutePath, "w+") do |file|
|
242
|
+
# write postInfo into top
|
243
|
+
|
244
|
+
file.puts(Helper.createPostInfo(postInfo))
|
245
|
+
|
194
246
|
paragraphs.each do |paragraph|
|
195
247
|
markupParser = MarkupParser.new(postHtml, paragraph)
|
196
248
|
paragraph.text = markupParser.parse()
|
@@ -268,7 +320,11 @@ begin
|
|
268
320
|
puts "You have read and agree with the Disclaimer."
|
269
321
|
Main.new()
|
270
322
|
puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
271
|
-
puts "
|
323
|
+
puts "Thanks for using this tool."
|
324
|
+
puts "If this is helpful, please help to star the repo or recommend it to your friends."
|
272
325
|
rescue => e
|
273
|
-
puts "Error: #{e.class} #{e.message}"
|
326
|
+
puts "#Error: #{e.class} #{e.message}\n"
|
327
|
+
puts e.backtrace
|
328
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
329
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
274
330
|
end
|
data/lib/Helper.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
$lib = File.expand_path('../lib', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require 'Post'
|
4
|
+
|
3
5
|
class Helper
|
4
6
|
def self.createDirIfNotExist(dirPath)
|
5
7
|
dirs = dirPath.split("/")
|
@@ -11,6 +13,28 @@ class Helper
|
|
11
13
|
end while dirs.length > 0
|
12
14
|
end
|
13
15
|
|
16
|
+
def self.makeWarningText(message)
|
17
|
+
puts "####################################################\n"
|
18
|
+
puts "#WARNING:\n"
|
19
|
+
puts "##{message}\n"
|
20
|
+
puts "#--------------------------------------------------#\n"
|
21
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
22
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
23
|
+
puts "####################################################\n"
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.createPostInfo(postInfo)
|
27
|
+
result = "---\n"
|
28
|
+
result += "title: #{postInfo.title}\n"
|
29
|
+
result += "author: #{postInfo.creator}\n"
|
30
|
+
result += "date: #{postInfo.firstPublishedAt}\n"
|
31
|
+
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
32
|
+
result += "---\n"
|
33
|
+
result += "\r\n"
|
34
|
+
|
35
|
+
result
|
36
|
+
end
|
37
|
+
|
14
38
|
def self.createWatermark(postURL)
|
15
39
|
text = "\r\n\r\n\r\n"
|
16
40
|
text += "+-----------------------------------------------------------------------------------+"
|
@@ -0,0 +1,22 @@
|
|
1
|
+
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
|
+
|
3
|
+
require "Parsers/Parser"
|
4
|
+
require 'Models/Paragraph'
|
5
|
+
|
6
|
+
class CodeBlockParser < Parser
|
7
|
+
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.getTypeString()
|
10
|
+
'CODE_BLOCK'
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(paragraph)
|
14
|
+
if paragraph.type == CodeBlockParser.getTypeString()
|
15
|
+
"```\n#{paragraph.text}\n```"
|
16
|
+
else
|
17
|
+
if !nextParser.nil?
|
18
|
+
nextParser.parse(paragraph)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -1,12 +1,13 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require "Helper"
|
3
4
|
require "Parsers/Parser"
|
4
5
|
require 'Models/Paragraph'
|
5
6
|
|
6
7
|
class FallbackParser < Parser
|
7
8
|
attr_accessor :nextParser
|
8
9
|
def parse(paragraph)
|
9
|
-
|
10
|
+
Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
|
10
11
|
"#{paragraph.text}"
|
11
12
|
end
|
12
13
|
end
|
data/lib/Parsers/PQParser.rb
CHANGED
@@ -7,7 +7,11 @@ class PQParser < Parser
|
|
7
7
|
attr_accessor :nextParser
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'PQ'
|
10
|
-
|
10
|
+
result = ""
|
11
|
+
paragraph.text.each_line do |p|
|
12
|
+
result += "> #{p}"
|
13
|
+
end
|
14
|
+
result
|
11
15
|
else
|
12
16
|
if !nextParser.nil?
|
13
17
|
nextParser.parse(paragraph)
|
data/lib/Parsers/PREParser.rb
CHANGED
@@ -5,9 +5,22 @@ require 'Models/Paragraph'
|
|
5
5
|
|
6
6
|
class PREParser < Parser
|
7
7
|
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.isPRE(paragraph)
|
10
|
+
if paragraph.nil?
|
11
|
+
false
|
12
|
+
else
|
13
|
+
paragraph.type == "PRE"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
def parse(paragraph)
|
9
|
-
if paragraph
|
10
|
-
|
18
|
+
if PREParser.isPRE(paragraph)
|
19
|
+
result = ""
|
20
|
+
paragraph.text.each_line do |p|
|
21
|
+
result += "> #{p}"
|
22
|
+
end
|
23
|
+
result
|
11
24
|
else
|
12
25
|
if !nextParser.nil?
|
13
26
|
nextParser.parse(paragraph)
|
data/lib/Post.rb
CHANGED
@@ -4,8 +4,14 @@ require "Request"
|
|
4
4
|
require 'uri'
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'json'
|
7
|
+
require 'date'
|
7
8
|
|
8
9
|
class Post
|
10
|
+
|
11
|
+
class PostInfo
|
12
|
+
attr_accessor :title, :tags, :creator, :firstPublishedAt
|
13
|
+
end
|
14
|
+
|
9
15
|
def self.getPostIDFromPostURLString(postURLString)
|
10
16
|
uri = URI.parse(postURLString)
|
11
17
|
postID = uri.path.split('/').last.split('-').last
|
@@ -40,4 +46,22 @@ class Post
|
|
40
46
|
result.map { |paragraph| content[paragraph["__ref"]] }
|
41
47
|
end
|
42
48
|
end
|
49
|
+
|
50
|
+
def self.parsePostInfoFromPostContent(content, postID)
|
51
|
+
postInfo = PostInfo.new()
|
52
|
+
postInfo.title = content&.dig("Post:#{postID}", "title")
|
53
|
+
postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
|
54
|
+
|
55
|
+
creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
|
56
|
+
if !creatorRef.nil?
|
57
|
+
postInfo.creator = content&.dig(creatorRef, "name")
|
58
|
+
end
|
59
|
+
|
60
|
+
firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
|
61
|
+
if !firstPublishedAt.nil?
|
62
|
+
postInfo.firstPublishedAt = DateTime.strptime(firstPublishedAt.to_s,'%Q')
|
63
|
+
end
|
64
|
+
|
65
|
+
postInfo
|
66
|
+
end
|
43
67
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- lib/ImageDownloader.rb
|
67
67
|
- lib/Models/Paragraph.rb
|
68
68
|
- lib/Parsers/BQParser.rb
|
69
|
+
- lib/Parsers/CodeBlockParser.rb
|
69
70
|
- lib/Parsers/FallbackParser.rb
|
70
71
|
- lib/Parsers/H1Parser.rb
|
71
72
|
- lib/Parsers/H2Parser.rb
|