ZMediumToMarkdown 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ZMediumFetcher +98 -31
- data/lib/Helper.rb +25 -0
- data/lib/Parsers/BQParser.rb +5 -1
- data/lib/Parsers/CodeBlockParser.rb +22 -0
- data/lib/Parsers/FallbackParser.rb +2 -1
- data/lib/Parsers/PQParser.rb +5 -1
- data/lib/Parsers/PREParser.rb +16 -2
- data/lib/Post.rb +29 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29245a0299d0f492d7000a27c97f4cfdd305b5bd39b31d1dfbdfd126f938daf1
|
4
|
+
data.tar.gz: 7a81eca7da5c8a3d02b80936f2395ff1385ff37ef7092a5f6ae919e9dc817065
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 592b4a98e54ea032aee4560c23a827637fcfe38bc56b66af7cb1b5799e3a1b8b641f20de03566e96f04b9b8a75ddf97b97e339503f5b49c55afa599a8cdbf31b
|
7
|
+
data.tar.gz: e2003629feee6fe3230d4c72059a860e9be0458e28a5fee7640a13c4aa1ef5ec2047d27a21fa8a7a719d19bb601efa245dcec24461738bf0cdc113e6ed1e694c
|
data/bin/ZMediumFetcher
CHANGED
@@ -7,6 +7,7 @@ $LOAD_PATH.unshift($lib)
|
|
7
7
|
require "open-uri"
|
8
8
|
require 'json'
|
9
9
|
require 'optparse'
|
10
|
+
require 'fileutils'
|
10
11
|
|
11
12
|
require "Parsers/H1Parser"
|
12
13
|
require "Parsers/H2Parser"
|
@@ -24,6 +25,7 @@ require "Parsers/OLIParser"
|
|
24
25
|
require "Parsers/MIXTAPEEMBEDParser"
|
25
26
|
require "Parsers/PQParser"
|
26
27
|
require "Parsers/LinkParser"
|
28
|
+
require "Parsers/CodeBlockParser"
|
27
29
|
|
28
30
|
require "PathPolicy"
|
29
31
|
require "Request"
|
@@ -124,8 +126,10 @@ class ZMediumFetcher
|
|
124
126
|
imgParser.setNext(bqParser)
|
125
127
|
preParser = PREParser.new()
|
126
128
|
bqParser.setNext(preParser)
|
129
|
+
codeBlockParser = CodeBlockParser.new()
|
130
|
+
preParser.setNext(codeBlockParser)
|
127
131
|
fallbackParser = FallbackParser.new()
|
128
|
-
|
132
|
+
codeBlockParser.setNext(fallbackParser)
|
129
133
|
|
130
134
|
|
131
135
|
h1Parser
|
@@ -145,6 +149,8 @@ class ZMediumFetcher
|
|
145
149
|
if postContent.nil?
|
146
150
|
raise "Error: Content is empty! PostURL: #{postURL}"
|
147
151
|
end
|
152
|
+
|
153
|
+
postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
|
148
154
|
|
149
155
|
sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
|
150
156
|
if sourceParagraphs.nil?
|
@@ -156,7 +162,8 @@ class ZMediumFetcher
|
|
156
162
|
|
157
163
|
paragraphs = []
|
158
164
|
oliIndex = 0
|
159
|
-
|
165
|
+
previousParagraph = nil
|
166
|
+
preTypeParagraphs = []
|
160
167
|
sourceParagraphs.each do |sourcParagraph|
|
161
168
|
paragraph = Paragraph.new(sourcParagraph, postID, postContent)
|
162
169
|
if OLIParser.isOLI(paragraph)
|
@@ -168,13 +175,55 @@ class ZMediumFetcher
|
|
168
175
|
|
169
176
|
# if previous is OLI or ULI and current is not OLI or ULI
|
170
177
|
# than insert a blank paragraph to keep markdown foramt correct
|
171
|
-
if (OLIParser.isOLI(
|
172
|
-
(ULIParser.isULI(
|
178
|
+
if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
|
179
|
+
(ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
|
173
180
|
paragraphs.append(Paragraph.makeBlankParagraph(postID))
|
174
181
|
end
|
175
182
|
|
183
|
+
# group by PRE paragraph to code block
|
184
|
+
# because medium will give continue pre to present code block
|
185
|
+
# e.g.
|
186
|
+
# type=pre, text=<html>
|
187
|
+
# type=pre, text=text
|
188
|
+
# type=pre, text=</html>
|
189
|
+
|
190
|
+
if !previousParagraph.nil?
|
191
|
+
if PREParser.isPRE(paragraph)
|
192
|
+
# if current is pre
|
193
|
+
preTypeParagraphs.append(paragraph)
|
194
|
+
elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
|
195
|
+
# if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
|
196
|
+
if preTypeParagraphs.length > 1
|
197
|
+
lastPreTypeParagraph = preTypeParagraphs.pop
|
198
|
+
|
199
|
+
# group by preParagraphs text to last preParagraph
|
200
|
+
groupByText = ""
|
201
|
+
preTypeParagraphs.each do |preTypeParagraph|
|
202
|
+
if groupByText != ""
|
203
|
+
groupByText += "\n"
|
204
|
+
end
|
205
|
+
|
206
|
+
markupParser = MarkupParser.new(postHtml, preTypeParagraph)
|
207
|
+
groupByText += markupParser.parse()
|
208
|
+
end
|
209
|
+
|
210
|
+
lastPreTypeParagraph.text = "#{groupByText}"
|
211
|
+
lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
|
212
|
+
|
213
|
+
# remove all preParagraphs
|
214
|
+
preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
|
215
|
+
preTypeParagraph.name
|
216
|
+
end
|
217
|
+
paragraphs = paragraphs.select do |paragraph|
|
218
|
+
!preTypeParagraphNames.include? paragraph.name
|
219
|
+
end
|
220
|
+
end
|
221
|
+
preTypeParagraphs = []
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
176
225
|
paragraphs.append(paragraph)
|
177
|
-
|
226
|
+
previousParagraph = paragraph
|
178
227
|
end
|
179
228
|
|
180
229
|
postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
|
@@ -188,32 +237,45 @@ class ZMediumFetcher
|
|
188
237
|
progress.printLog()
|
189
238
|
|
190
239
|
absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
|
191
|
-
|
192
|
-
|
193
|
-
File.
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
240
|
+
|
241
|
+
# if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
|
242
|
+
if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
|
243
|
+
# Already downloaded and nothing has changed!, Skip!
|
244
|
+
progress.currentPostParagraphIndex = paragraphs.length
|
245
|
+
progress.message = "Skip, Post already downloaded and nothing has changed!"
|
246
|
+
progress.printLog()
|
247
|
+
else
|
248
|
+
Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
|
249
|
+
File.open(absolutePath, "w+") do |file|
|
250
|
+
# write postInfo into top
|
251
|
+
file.puts(Helper.createPostInfo(postInfo))
|
202
252
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
253
|
+
index = 0
|
254
|
+
paragraphs.each do |paragraph|
|
255
|
+
markupParser = MarkupParser.new(postHtml, paragraph)
|
256
|
+
paragraph.text = markupParser.parse()
|
257
|
+
result = startParser.parse(paragraph)
|
258
|
+
|
259
|
+
if !linkParser.nil?
|
260
|
+
result = linkParser.parse(result, paragraph.markupLinks)
|
261
|
+
end
|
262
|
+
|
263
|
+
file.puts(result)
|
264
|
+
|
265
|
+
index += 1
|
266
|
+
progress.currentPostParagraphIndex = index
|
267
|
+
progress.message = "Converting Post..."
|
268
|
+
progress.printLog()
|
269
|
+
end
|
270
|
+
|
271
|
+
file.puts(Helper.createWatermark(postURL))
|
209
272
|
end
|
273
|
+
FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
|
210
274
|
|
211
|
-
|
275
|
+
progress.message = "Post Successfully Downloaded!"
|
276
|
+
progress.printLog()
|
212
277
|
end
|
213
|
-
|
214
|
-
progress.message = "Post Successfully Downloaded!"
|
215
|
-
progress.printLog()
|
216
|
-
|
278
|
+
|
217
279
|
progress.postPath = nil
|
218
280
|
end
|
219
281
|
|
@@ -264,11 +326,16 @@ class ZMediumFetcher
|
|
264
326
|
end
|
265
327
|
|
266
328
|
begin
|
267
|
-
puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
329
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
268
330
|
puts "You have read and agree with the Disclaimer."
|
269
331
|
Main.new()
|
270
|
-
puts "
|
271
|
-
puts "
|
332
|
+
puts "Execute Successfully!!!"
|
333
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
|
334
|
+
puts "#Thanks for using this tool."
|
335
|
+
puts "#If this is helpful, please help to star the repo or recommend it to your friends."
|
272
336
|
rescue => e
|
273
|
-
puts "Error: #{e.class} #{e.message}"
|
337
|
+
puts "#Error: #{e.class} #{e.message}\n"
|
338
|
+
puts e.backtrace
|
339
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
340
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
274
341
|
end
|
data/lib/Helper.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
$lib = File.expand_path('../lib', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require 'date'
|
4
|
+
require 'Post'
|
5
|
+
|
3
6
|
class Helper
|
4
7
|
def self.createDirIfNotExist(dirPath)
|
5
8
|
dirs = dirPath.split("/")
|
@@ -11,6 +14,28 @@ class Helper
|
|
11
14
|
end while dirs.length > 0
|
12
15
|
end
|
13
16
|
|
17
|
+
def self.makeWarningText(message)
|
18
|
+
puts "####################################################\n"
|
19
|
+
puts "#WARNING:\n"
|
20
|
+
puts "##{message}\n"
|
21
|
+
puts "#--------------------------------------------------#\n"
|
22
|
+
puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
|
23
|
+
puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
|
24
|
+
puts "####################################################\n"
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.createPostInfo(postInfo)
|
28
|
+
result = "---\n"
|
29
|
+
result += "title: #{postInfo.title}\n"
|
30
|
+
result += "author: #{postInfo.creator}\n"
|
31
|
+
result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
|
32
|
+
result += "tags: [#{postInfo.tags.join(",")}]\n"
|
33
|
+
result += "---\n"
|
34
|
+
result += "\r\n"
|
35
|
+
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
14
39
|
def self.createWatermark(postURL)
|
15
40
|
text = "\r\n\r\n\r\n"
|
16
41
|
text += "+-----------------------------------------------------------------------------------+"
|
data/lib/Parsers/BQParser.rb
CHANGED
@@ -7,7 +7,11 @@ class BQParser < Parser
|
|
7
7
|
attr_accessor :nextParser
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'BQ'
|
10
|
-
|
10
|
+
result = ""
|
11
|
+
paragraph.text.each_line do |p|
|
12
|
+
result += "> #{p}"
|
13
|
+
end
|
14
|
+
result
|
11
15
|
else
|
12
16
|
if !nextParser.nil?
|
13
17
|
nextParser.parse(paragraph)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
|
+
|
3
|
+
require "Parsers/Parser"
|
4
|
+
require 'Models/Paragraph'
|
5
|
+
|
6
|
+
class CodeBlockParser < Parser
|
7
|
+
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.getTypeString()
|
10
|
+
'CODE_BLOCK'
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(paragraph)
|
14
|
+
if paragraph.type == CodeBlockParser.getTypeString()
|
15
|
+
"```\n#{paragraph.text}\n```"
|
16
|
+
else
|
17
|
+
if !nextParser.nil?
|
18
|
+
nextParser.parse(paragraph)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -1,12 +1,13 @@
|
|
1
1
|
$lib = File.expand_path('../', File.dirname(__FILE__))
|
2
2
|
|
3
|
+
require "Helper"
|
3
4
|
require "Parsers/Parser"
|
4
5
|
require 'Models/Paragraph'
|
5
6
|
|
6
7
|
class FallbackParser < Parser
|
7
8
|
attr_accessor :nextParser
|
8
9
|
def parse(paragraph)
|
9
|
-
|
10
|
+
Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
|
10
11
|
"#{paragraph.text}"
|
11
12
|
end
|
12
13
|
end
|
data/lib/Parsers/PQParser.rb
CHANGED
@@ -7,7 +7,11 @@ class PQParser < Parser
|
|
7
7
|
attr_accessor :nextParser
|
8
8
|
def parse(paragraph)
|
9
9
|
if paragraph.type == 'PQ'
|
10
|
-
|
10
|
+
result = ""
|
11
|
+
paragraph.text.each_line do |p|
|
12
|
+
result += "> #{p}"
|
13
|
+
end
|
14
|
+
result
|
11
15
|
else
|
12
16
|
if !nextParser.nil?
|
13
17
|
nextParser.parse(paragraph)
|
data/lib/Parsers/PREParser.rb
CHANGED
@@ -5,9 +5,23 @@ require 'Models/Paragraph'
|
|
5
5
|
|
6
6
|
class PREParser < Parser
|
7
7
|
attr_accessor :nextParser
|
8
|
+
|
9
|
+
def self.isPRE(paragraph)
|
10
|
+
if paragraph.nil?
|
11
|
+
false
|
12
|
+
else
|
13
|
+
paragraph.type == "PRE"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
8
17
|
def parse(paragraph)
|
9
|
-
if paragraph
|
10
|
-
|
18
|
+
if PREParser.isPRE(paragraph)
|
19
|
+
result = "```\n"
|
20
|
+
paragraph.text.each_line do |p|
|
21
|
+
result += p
|
22
|
+
end
|
23
|
+
result += "\n```"
|
24
|
+
result
|
11
25
|
else
|
12
26
|
if !nextParser.nil?
|
13
27
|
nextParser.parse(paragraph)
|
data/lib/Post.rb
CHANGED
@@ -4,8 +4,14 @@ require "Request"
|
|
4
4
|
require 'uri'
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'json'
|
7
|
+
require 'date'
|
7
8
|
|
8
9
|
class Post
|
10
|
+
|
11
|
+
class PostInfo
|
12
|
+
attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
|
13
|
+
end
|
14
|
+
|
9
15
|
def self.getPostIDFromPostURLString(postURLString)
|
10
16
|
uri = URI.parse(postURLString)
|
11
17
|
postID = uri.path.split('/').last.split('-').last
|
@@ -40,4 +46,27 @@ class Post
|
|
40
46
|
result.map { |paragraph| content[paragraph["__ref"]] }
|
41
47
|
end
|
42
48
|
end
|
49
|
+
|
50
|
+
def self.parsePostInfoFromPostContent(content, postID)
|
51
|
+
postInfo = PostInfo.new()
|
52
|
+
postInfo.title = content&.dig("Post:#{postID}", "title")
|
53
|
+
postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
|
54
|
+
|
55
|
+
creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
|
56
|
+
if !creatorRef.nil?
|
57
|
+
postInfo.creator = content&.dig(creatorRef, "name")
|
58
|
+
end
|
59
|
+
|
60
|
+
firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
|
61
|
+
if !firstPublishedAt.nil?
|
62
|
+
postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
|
63
|
+
end
|
64
|
+
|
65
|
+
latestPublishedAt = content&.dig("Post:#{postID}", "latestPublishedAt")
|
66
|
+
if !latestPublishedAt.nil?
|
67
|
+
postInfo.latestPublishedAt = Time.at(0, latestPublishedAt, :millisecond)
|
68
|
+
end
|
69
|
+
|
70
|
+
postInfo
|
71
|
+
end
|
43
72
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ZMediumToMarkdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ZhgChgLi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -66,6 +66,7 @@ files:
|
|
66
66
|
- lib/ImageDownloader.rb
|
67
67
|
- lib/Models/Paragraph.rb
|
68
68
|
- lib/Parsers/BQParser.rb
|
69
|
+
- lib/Parsers/CodeBlockParser.rb
|
69
70
|
- lib/Parsers/FallbackParser.rb
|
70
71
|
- lib/Parsers/H1Parser.rb
|
71
72
|
- lib/Parsers/H2Parser.rb
|