ZMediumToMarkdown 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 43d039e9c0ec69d1e765f14c7f23d9b171cd40030d5ea53a81934e074b4176e3
4
- data.tar.gz: 26083e071edd627b747d0a67ca8e84921e2558d2d8b4416be99d1d685c54e232
3
+ metadata.gz: 29245a0299d0f492d7000a27c97f4cfdd305b5bd39b31d1dfbdfd126f938daf1
4
+ data.tar.gz: 7a81eca7da5c8a3d02b80936f2395ff1385ff37ef7092a5f6ae919e9dc817065
5
5
  SHA512:
6
- metadata.gz: 15a68b2b9c048f5dbcfea9a6bcf1285d2a2e55839aaa69766cdc0c437f2d97d29ed1d5b16c40a1e9d0cdf7c504a28ddc9a0e5526d08998d7d3b4e160a66f2f25
7
- data.tar.gz: f27d305f8f2b2886d32b97afe3c967851fe7108bd76bb618010015f6952dc0e2f98e8a14eef8d211025baa1524eefbc8294366047e92570e3ac700889ce94002
6
+ metadata.gz: 592b4a98e54ea032aee4560c23a827637fcfe38bc56b66af7cb1b5799e3a1b8b641f20de03566e96f04b9b8a75ddf97b97e339503f5b49c55afa599a8cdbf31b
7
+ data.tar.gz: e2003629feee6fe3230d4c72059a860e9be0458e28a5fee7640a13c4aa1ef5ec2047d27a21fa8a7a719d19bb601efa245dcec24461738bf0cdc113e6ed1e694c
data/bin/ZMediumFetcher CHANGED
@@ -7,6 +7,7 @@ $LOAD_PATH.unshift($lib)
7
7
  require "open-uri"
8
8
  require 'json'
9
9
  require 'optparse'
10
+ require 'fileutils'
10
11
 
11
12
  require "Parsers/H1Parser"
12
13
  require "Parsers/H2Parser"
@@ -24,6 +25,7 @@ require "Parsers/OLIParser"
24
25
  require "Parsers/MIXTAPEEMBEDParser"
25
26
  require "Parsers/PQParser"
26
27
  require "Parsers/LinkParser"
28
+ require "Parsers/CodeBlockParser"
27
29
 
28
30
  require "PathPolicy"
29
31
  require "Request"
@@ -124,8 +126,10 @@ class ZMediumFetcher
124
126
  imgParser.setNext(bqParser)
125
127
  preParser = PREParser.new()
126
128
  bqParser.setNext(preParser)
129
+ codeBlockParser = CodeBlockParser.new()
130
+ preParser.setNext(codeBlockParser)
127
131
  fallbackParser = FallbackParser.new()
128
- preParser.setNext(fallbackParser)
132
+ codeBlockParser.setNext(fallbackParser)
129
133
 
130
134
 
131
135
  h1Parser
@@ -145,6 +149,8 @@ class ZMediumFetcher
145
149
  if postContent.nil?
146
150
  raise "Error: Content is empty! PostURL: #{postURL}"
147
151
  end
152
+
153
+ postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
148
154
 
149
155
  sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
150
156
  if sourceParagraphs.nil?
@@ -156,7 +162,8 @@ class ZMediumFetcher
156
162
 
157
163
  paragraphs = []
158
164
  oliIndex = 0
159
- preParagraph = nil
165
+ previousParagraph = nil
166
+ preTypeParagraphs = []
160
167
  sourceParagraphs.each do |sourcParagraph|
161
168
  paragraph = Paragraph.new(sourcParagraph, postID, postContent)
162
169
  if OLIParser.isOLI(paragraph)
@@ -168,13 +175,55 @@ class ZMediumFetcher
168
175
 
169
176
  # if previous is OLI or ULI and current is not OLI or ULI
170
177
  # than insert a blank paragraph to keep markdown foramt correct
171
- if (OLIParser.isOLI(preParagraph) && !OLIParser.isOLI(paragraph)) ||
172
- (ULIParser.isULI(preParagraph) && !ULIParser.isULI(paragraph))
178
+ if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
179
+ (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
173
180
  paragraphs.append(Paragraph.makeBlankParagraph(postID))
174
181
  end
175
182
 
183
+ # group by PRE paragraph to code block
184
+ # because medium will give continue pre to present code block
185
+ # e.g.
186
+ # type=pre, text=<html>
187
+ # type=pre, text=text
188
+ # type=pre, text=</html>
189
+
190
+ if !previousParagraph.nil?
191
+ if PREParser.isPRE(paragraph)
192
+ # if current is pre
193
+ preTypeParagraphs.append(paragraph)
194
+ elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
195
+ # if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
196
+ if preTypeParagraphs.length > 1
197
+ lastPreTypeParagraph = preTypeParagraphs.pop
198
+
199
+ # group by preParagraphs text to last preParagraph
200
+ groupByText = ""
201
+ preTypeParagraphs.each do |preTypeParagraph|
202
+ if groupByText != ""
203
+ groupByText += "\n"
204
+ end
205
+
206
+ markupParser = MarkupParser.new(postHtml, preTypeParagraph)
207
+ groupByText += markupParser.parse()
208
+ end
209
+
210
+ lastPreTypeParagraph.text = "#{groupByText}"
211
+ lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
212
+
213
+ # remove all preParagraphs
214
+ preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
215
+ preTypeParagraph.name
216
+ end
217
+ paragraphs = paragraphs.select do |paragraph|
218
+ !preTypeParagraphNames.include? paragraph.name
219
+ end
220
+ end
221
+ preTypeParagraphs = []
222
+ end
223
+ end
224
+
176
225
  paragraphs.append(paragraph)
177
- preParagraph = paragraph
226
+ previousParagraph = paragraph
178
227
  end
179
228
 
180
229
  postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
@@ -188,32 +237,45 @@ class ZMediumFetcher
188
237
  progress.printLog()
189
238
 
190
239
  absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
191
- Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
192
- index = 0
193
- File.open(absolutePath, "w+") do |file|
194
- paragraphs.each do |paragraph|
195
- markupParser = MarkupParser.new(postHtml, paragraph)
196
- paragraph.text = markupParser.parse()
197
- result = startParser.parse(paragraph)
198
-
199
- if !linkParser.nil?
200
- result = linkParser.parse(result, paragraph.markupLinks)
201
- end
240
+
241
+ # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
242
+ if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
243
+ # Already downloaded and nothing has changed!, Skip!
244
+ progress.currentPostParagraphIndex = paragraphs.length
245
+ progress.message = "Skip, Post already downloaded and nothing has changed!"
246
+ progress.printLog()
247
+ else
248
+ Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
249
+ File.open(absolutePath, "w+") do |file|
250
+ # write postInfo into top
251
+ file.puts(Helper.createPostInfo(postInfo))
202
252
 
203
- file.puts(result)
204
-
205
- index += 1
206
- progress.currentPostParagraphIndex = index
207
- progress.message = "Converting Post..."
208
- progress.printLog()
253
+ index = 0
254
+ paragraphs.each do |paragraph|
255
+ markupParser = MarkupParser.new(postHtml, paragraph)
256
+ paragraph.text = markupParser.parse()
257
+ result = startParser.parse(paragraph)
258
+
259
+ if !linkParser.nil?
260
+ result = linkParser.parse(result, paragraph.markupLinks)
261
+ end
262
+
263
+ file.puts(result)
264
+
265
+ index += 1
266
+ progress.currentPostParagraphIndex = index
267
+ progress.message = "Converting Post..."
268
+ progress.printLog()
269
+ end
270
+
271
+ file.puts(Helper.createWatermark(postURL))
209
272
  end
273
+ FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
210
274
 
211
- file.puts(Helper.createWatermark(postURL))
275
+ progress.message = "Post Successfully Downloaded!"
276
+ progress.printLog()
212
277
  end
213
-
214
- progress.message = "Post Successfully Downloaded!"
215
- progress.printLog()
216
-
278
+
217
279
  progress.postPath = nil
218
280
  end
219
281
 
@@ -264,11 +326,16 @@ class ZMediumFetcher
264
326
  end
265
327
 
266
328
  begin
267
- puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
329
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
268
330
  puts "You have read and agree with the Disclaimer."
269
331
  Main.new()
270
- puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
271
- puts "If this repo is helpful, please help to star this repo or recommend it to your friends. Thanks."
332
+ puts "Execute Successfully!!!"
333
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
334
+ puts "#Thanks for using this tool."
335
+ puts "#If this is helpful, please help to star the repo or recommend it to your friends."
272
336
  rescue => e
273
- puts "Error: #{e.class} #{e.message}"
337
+ puts "#Error: #{e.class} #{e.message}\n"
338
+ puts e.backtrace
339
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
340
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
274
341
  end
data/lib/Helper.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  $lib = File.expand_path('../lib', File.dirname(__FILE__))
2
2
 
3
+ require 'date'
4
+ require 'Post'
5
+
3
6
  class Helper
4
7
  def self.createDirIfNotExist(dirPath)
5
8
  dirs = dirPath.split("/")
@@ -11,6 +14,28 @@ class Helper
11
14
  end while dirs.length > 0
12
15
  end
13
16
 
17
+ def self.makeWarningText(message)
18
+ puts "####################################################\n"
19
+ puts "#WARNING:\n"
20
+ puts "##{message}\n"
21
+ puts "#--------------------------------------------------#\n"
22
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
23
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
24
+ puts "####################################################\n"
25
+ end
26
+
27
+ def self.createPostInfo(postInfo)
28
+ result = "---\n"
29
+ result += "title: #{postInfo.title}\n"
30
+ result += "author: #{postInfo.creator}\n"
31
+ result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
32
+ result += "tags: [#{postInfo.tags.join(",")}]\n"
33
+ result += "---\n"
34
+ result += "\r\n"
35
+
36
+ result
37
+ end
38
+
14
39
  def self.createWatermark(postURL)
15
40
  text = "\r\n\r\n\r\n"
16
41
  text += "+-----------------------------------------------------------------------------------+"
@@ -7,7 +7,11 @@ class BQParser < Parser
7
7
  attr_accessor :nextParser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'BQ'
10
- "> #{paragraph.text}"
10
+ result = ""
11
+ paragraph.text.each_line do |p|
12
+ result += "> #{p}"
13
+ end
14
+ result
11
15
  else
12
16
  if !nextParser.nil?
13
17
  nextParser.parse(paragraph)
@@ -0,0 +1,22 @@
1
+ $lib = File.expand_path('../', File.dirname(__FILE__))
2
+
3
+ require "Parsers/Parser"
4
+ require 'Models/Paragraph'
5
+
6
+ class CodeBlockParser < Parser
7
+ attr_accessor :nextParser
8
+
9
+ def self.getTypeString()
10
+ 'CODE_BLOCK'
11
+ end
12
+
13
+ def parse(paragraph)
14
+ if paragraph.type == CodeBlockParser.getTypeString()
15
+ "```\n#{paragraph.text}\n```"
16
+ else
17
+ if !nextParser.nil?
18
+ nextParser.parse(paragraph)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -1,12 +1,13 @@
1
1
  $lib = File.expand_path('../', File.dirname(__FILE__))
2
2
 
3
+ require "Helper"
3
4
  require "Parsers/Parser"
4
5
  require 'Models/Paragraph'
5
6
 
6
7
  class FallbackParser < Parser
7
8
  attr_accessor :nextParser
8
9
  def parse(paragraph)
9
- puts paragraph.type
10
+ Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
10
11
  "#{paragraph.text}"
11
12
  end
12
13
  end
@@ -7,7 +7,11 @@ class PQParser < Parser
7
7
  attr_accessor :nextParser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'PQ'
10
- "> #{paragraph.text}"
10
+ result = ""
11
+ paragraph.text.each_line do |p|
12
+ result += "> #{p}"
13
+ end
14
+ result
11
15
  else
12
16
  if !nextParser.nil?
13
17
  nextParser.parse(paragraph)
@@ -5,9 +5,23 @@ require 'Models/Paragraph'
5
5
 
6
6
  class PREParser < Parser
7
7
  attr_accessor :nextParser
8
+
9
+ def self.isPRE(paragraph)
10
+ if paragraph.nil?
11
+ false
12
+ else
13
+ paragraph.type == "PRE"
14
+ end
15
+ end
16
+
8
17
  def parse(paragraph)
9
- if paragraph.type == 'PRE'
10
- "> #{paragraph.text}"
18
+ if PREParser.isPRE(paragraph)
19
+ result = "```\n"
20
+ paragraph.text.each_line do |p|
21
+ result += p
22
+ end
23
+ result += "\n```"
24
+ result
11
25
  else
12
26
  if !nextParser.nil?
13
27
  nextParser.parse(paragraph)
data/lib/Post.rb CHANGED
@@ -4,8 +4,14 @@ require "Request"
4
4
  require 'uri'
5
5
  require 'nokogiri'
6
6
  require 'json'
7
+ require 'date'
7
8
 
8
9
  class Post
10
+
11
+ class PostInfo
12
+ attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
13
+ end
14
+
9
15
  def self.getPostIDFromPostURLString(postURLString)
10
16
  uri = URI.parse(postURLString)
11
17
  postID = uri.path.split('/').last.split('-').last
@@ -40,4 +46,27 @@ class Post
40
46
  result.map { |paragraph| content[paragraph["__ref"]] }
41
47
  end
42
48
  end
49
+
50
+ def self.parsePostInfoFromPostContent(content, postID)
51
+ postInfo = PostInfo.new()
52
+ postInfo.title = content&.dig("Post:#{postID}", "title")
53
+ postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
54
+
55
+ creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
56
+ if !creatorRef.nil?
57
+ postInfo.creator = content&.dig(creatorRef, "name")
58
+ end
59
+
60
+ firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
61
+ if !firstPublishedAt.nil?
62
+ postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
63
+ end
64
+
65
+ latestPublishedAt = content&.dig("Post:#{postID}", "latestPublishedAt")
66
+ if !latestPublishedAt.nil?
67
+ postInfo.latestPublishedAt = Time.at(0, latestPublishedAt, :millisecond)
68
+ end
69
+
70
+ postInfo
71
+ end
43
72
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-28 00:00:00.000000000 Z
11
+ date: 2022-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,6 +66,7 @@ files:
66
66
  - lib/ImageDownloader.rb
67
67
  - lib/Models/Paragraph.rb
68
68
  - lib/Parsers/BQParser.rb
69
+ - lib/Parsers/CodeBlockParser.rb
69
70
  - lib/Parsers/FallbackParser.rb
70
71
  - lib/Parsers/H1Parser.rb
71
72
  - lib/Parsers/H2Parser.rb