ZMediumToMarkdown 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 43d039e9c0ec69d1e765f14c7f23d9b171cd40030d5ea53a81934e074b4176e3
4
- data.tar.gz: 26083e071edd627b747d0a67ca8e84921e2558d2d8b4416be99d1d685c54e232
3
+ metadata.gz: 29245a0299d0f492d7000a27c97f4cfdd305b5bd39b31d1dfbdfd126f938daf1
4
+ data.tar.gz: 7a81eca7da5c8a3d02b80936f2395ff1385ff37ef7092a5f6ae919e9dc817065
5
5
  SHA512:
6
- metadata.gz: 15a68b2b9c048f5dbcfea9a6bcf1285d2a2e55839aaa69766cdc0c437f2d97d29ed1d5b16c40a1e9d0cdf7c504a28ddc9a0e5526d08998d7d3b4e160a66f2f25
7
- data.tar.gz: f27d305f8f2b2886d32b97afe3c967851fe7108bd76bb618010015f6952dc0e2f98e8a14eef8d211025baa1524eefbc8294366047e92570e3ac700889ce94002
6
+ metadata.gz: 592b4a98e54ea032aee4560c23a827637fcfe38bc56b66af7cb1b5799e3a1b8b641f20de03566e96f04b9b8a75ddf97b97e339503f5b49c55afa599a8cdbf31b
7
+ data.tar.gz: e2003629feee6fe3230d4c72059a860e9be0458e28a5fee7640a13c4aa1ef5ec2047d27a21fa8a7a719d19bb601efa245dcec24461738bf0cdc113e6ed1e694c
data/bin/ZMediumFetcher CHANGED
@@ -7,6 +7,7 @@ $LOAD_PATH.unshift($lib)
7
7
  require "open-uri"
8
8
  require 'json'
9
9
  require 'optparse'
10
+ require 'fileutils'
10
11
 
11
12
  require "Parsers/H1Parser"
12
13
  require "Parsers/H2Parser"
@@ -24,6 +25,7 @@ require "Parsers/OLIParser"
24
25
  require "Parsers/MIXTAPEEMBEDParser"
25
26
  require "Parsers/PQParser"
26
27
  require "Parsers/LinkParser"
28
+ require "Parsers/CodeBlockParser"
27
29
 
28
30
  require "PathPolicy"
29
31
  require "Request"
@@ -124,8 +126,10 @@ class ZMediumFetcher
124
126
  imgParser.setNext(bqParser)
125
127
  preParser = PREParser.new()
126
128
  bqParser.setNext(preParser)
129
+ codeBlockParser = CodeBlockParser.new()
130
+ preParser.setNext(codeBlockParser)
127
131
  fallbackParser = FallbackParser.new()
128
- preParser.setNext(fallbackParser)
132
+ codeBlockParser.setNext(fallbackParser)
129
133
 
130
134
 
131
135
  h1Parser
@@ -145,6 +149,8 @@ class ZMediumFetcher
145
149
  if postContent.nil?
146
150
  raise "Error: Content is empty! PostURL: #{postURL}"
147
151
  end
152
+
153
+ postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
148
154
 
149
155
  sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
150
156
  if sourceParagraphs.nil?
@@ -156,7 +162,8 @@ class ZMediumFetcher
156
162
 
157
163
  paragraphs = []
158
164
  oliIndex = 0
159
- preParagraph = nil
165
+ previousParagraph = nil
166
+ preTypeParagraphs = []
160
167
  sourceParagraphs.each do |sourcParagraph|
161
168
  paragraph = Paragraph.new(sourcParagraph, postID, postContent)
162
169
  if OLIParser.isOLI(paragraph)
@@ -168,13 +175,55 @@ class ZMediumFetcher
168
175
 
169
176
  # if previous is OLI or ULI and current is not OLI or ULI
170
177
  # than insert a blank paragraph to keep markdown foramt correct
171
- if (OLIParser.isOLI(preParagraph) && !OLIParser.isOLI(paragraph)) ||
172
- (ULIParser.isULI(preParagraph) && !ULIParser.isULI(paragraph))
178
+ if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
179
+ (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
173
180
  paragraphs.append(Paragraph.makeBlankParagraph(postID))
174
181
  end
175
182
 
183
+ # group by PRE paragraph to code block
184
+ # because medium will give continue pre to present code block
185
+ # e.g.
186
+ # type=pre, text=<html>
187
+ # type=pre, text=text
188
+ # type=pre, text=</html>
189
+
190
+ if !previousParagraph.nil?
191
+ if PREParser.isPRE(paragraph)
192
+ # if current is pre
193
+ preTypeParagraphs.append(paragraph)
194
+ elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
195
+ # if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
196
+ if preTypeParagraphs.length > 1
197
+ lastPreTypeParagraph = preTypeParagraphs.pop
198
+
199
+ # group by preParagraphs text to last preParagraph
200
+ groupByText = ""
201
+ preTypeParagraphs.each do |preTypeParagraph|
202
+ if groupByText != ""
203
+ groupByText += "\n"
204
+ end
205
+
206
+ markupParser = MarkupParser.new(postHtml, preTypeParagraph)
207
+ groupByText += markupParser.parse()
208
+ end
209
+
210
+ lastPreTypeParagraph.text = "#{groupByText}"
211
+ lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
212
+
213
+ # remove all preParagraphs
214
+ preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
215
+ preTypeParagraph.name
216
+ end
217
+ paragraphs = paragraphs.select do |paragraph|
218
+ !preTypeParagraphNames.include? paragraph.name
219
+ end
220
+ end
221
+ preTypeParagraphs = []
222
+ end
223
+ end
224
+
176
225
  paragraphs.append(paragraph)
177
- preParagraph = paragraph
226
+ previousParagraph = paragraph
178
227
  end
179
228
 
180
229
  postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
@@ -188,32 +237,45 @@ class ZMediumFetcher
188
237
  progress.printLog()
189
238
 
190
239
  absolutePath = postPathPolicy.getAbsolutePath("#{postPath}.md")
191
- Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
192
- index = 0
193
- File.open(absolutePath, "w+") do |file|
194
- paragraphs.each do |paragraph|
195
- markupParser = MarkupParser.new(postHtml, paragraph)
196
- paragraph.text = markupParser.parse()
197
- result = startParser.parse(paragraph)
198
-
199
- if !linkParser.nil?
200
- result = linkParser.parse(result, paragraph.markupLinks)
201
- end
240
+
241
+ # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
242
+ if File.file?(absolutePath) && File.mtime(absolutePath) >= postInfo.latestPublishedAt
243
+ # Already downloaded and nothing has changed!, Skip!
244
+ progress.currentPostParagraphIndex = paragraphs.length
245
+ progress.message = "Skip, Post already downloaded and nothing has changed!"
246
+ progress.printLog()
247
+ else
248
+ Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
249
+ File.open(absolutePath, "w+") do |file|
250
+ # write postInfo into top
251
+ file.puts(Helper.createPostInfo(postInfo))
202
252
 
203
- file.puts(result)
204
-
205
- index += 1
206
- progress.currentPostParagraphIndex = index
207
- progress.message = "Converting Post..."
208
- progress.printLog()
253
+ index = 0
254
+ paragraphs.each do |paragraph|
255
+ markupParser = MarkupParser.new(postHtml, paragraph)
256
+ paragraph.text = markupParser.parse()
257
+ result = startParser.parse(paragraph)
258
+
259
+ if !linkParser.nil?
260
+ result = linkParser.parse(result, paragraph.markupLinks)
261
+ end
262
+
263
+ file.puts(result)
264
+
265
+ index += 1
266
+ progress.currentPostParagraphIndex = index
267
+ progress.message = "Converting Post..."
268
+ progress.printLog()
269
+ end
270
+
271
+ file.puts(Helper.createWatermark(postURL))
209
272
  end
273
+ FileUtils.touch absolutePath, :mtime => postInfo.latestPublishedAt
210
274
 
211
- file.puts(Helper.createWatermark(postURL))
275
+ progress.message = "Post Successfully Downloaded!"
276
+ progress.printLog()
212
277
  end
213
-
214
- progress.message = "Post Successfully Downloaded!"
215
- progress.printLog()
216
-
278
+
217
279
  progress.postPath = nil
218
280
  end
219
281
 
@@ -264,11 +326,16 @@ class ZMediumFetcher
264
326
  end
265
327
 
266
328
  begin
267
- puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
329
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
268
330
  puts "You have read and agree with the Disclaimer."
269
331
  Main.new()
270
- puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
271
- puts "If this repo is helpful, please help to star this repo or recommend it to your friends. Thanks."
332
+ puts "Execute Successfully!!!"
333
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown"
334
+ puts "#Thanks for using this tool."
335
+ puts "#If this is helpful, please help to star the repo or recommend it to your friends."
272
336
  rescue => e
273
- puts "Error: #{e.class} #{e.message}"
337
+ puts "#Error: #{e.class} #{e.message}\n"
338
+ puts e.backtrace
339
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
340
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
274
341
  end
data/lib/Helper.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  $lib = File.expand_path('../lib', File.dirname(__FILE__))
2
2
 
3
+ require 'date'
4
+ require 'Post'
5
+
3
6
  class Helper
4
7
  def self.createDirIfNotExist(dirPath)
5
8
  dirs = dirPath.split("/")
@@ -11,6 +14,28 @@ class Helper
11
14
  end while dirs.length > 0
12
15
  end
13
16
 
17
+ def self.makeWarningText(message)
18
+ puts "####################################################\n"
19
+ puts "#WARNING:\n"
20
+ puts "##{message}\n"
21
+ puts "#--------------------------------------------------#\n"
22
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
23
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
24
+ puts "####################################################\n"
25
+ end
26
+
27
+ def self.createPostInfo(postInfo)
28
+ result = "---\n"
29
+ result += "title: #{postInfo.title}\n"
30
+ result += "author: #{postInfo.creator}\n"
31
+ result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
32
+ result += "tags: [#{postInfo.tags.join(",")}]\n"
33
+ result += "---\n"
34
+ result += "\r\n"
35
+
36
+ result
37
+ end
38
+
14
39
  def self.createWatermark(postURL)
15
40
  text = "\r\n\r\n\r\n"
16
41
  text += "+-----------------------------------------------------------------------------------+"
@@ -7,7 +7,11 @@ class BQParser < Parser
7
7
  attr_accessor :nextParser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'BQ'
10
- "> #{paragraph.text}"
10
+ result = ""
11
+ paragraph.text.each_line do |p|
12
+ result += "> #{p}"
13
+ end
14
+ result
11
15
  else
12
16
  if !nextParser.nil?
13
17
  nextParser.parse(paragraph)
@@ -0,0 +1,22 @@
1
+ $lib = File.expand_path('../', File.dirname(__FILE__))
2
+
3
+ require "Parsers/Parser"
4
+ require 'Models/Paragraph'
5
+
6
+ class CodeBlockParser < Parser
7
+ attr_accessor :nextParser
8
+
9
+ def self.getTypeString()
10
+ 'CODE_BLOCK'
11
+ end
12
+
13
+ def parse(paragraph)
14
+ if paragraph.type == CodeBlockParser.getTypeString()
15
+ "```\n#{paragraph.text}\n```"
16
+ else
17
+ if !nextParser.nil?
18
+ nextParser.parse(paragraph)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -1,12 +1,13 @@
1
1
  $lib = File.expand_path('../', File.dirname(__FILE__))
2
2
 
3
+ require "Helper"
3
4
  require "Parsers/Parser"
4
5
  require 'Models/Paragraph'
5
6
 
6
7
  class FallbackParser < Parser
7
8
  attr_accessor :nextParser
8
9
  def parse(paragraph)
9
- puts paragraph.type
10
+ Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
10
11
  "#{paragraph.text}"
11
12
  end
12
13
  end
@@ -7,7 +7,11 @@ class PQParser < Parser
7
7
  attr_accessor :nextParser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'PQ'
10
- "> #{paragraph.text}"
10
+ result = ""
11
+ paragraph.text.each_line do |p|
12
+ result += "> #{p}"
13
+ end
14
+ result
11
15
  else
12
16
  if !nextParser.nil?
13
17
  nextParser.parse(paragraph)
@@ -5,9 +5,23 @@ require 'Models/Paragraph'
5
5
 
6
6
  class PREParser < Parser
7
7
  attr_accessor :nextParser
8
+
9
+ def self.isPRE(paragraph)
10
+ if paragraph.nil?
11
+ false
12
+ else
13
+ paragraph.type == "PRE"
14
+ end
15
+ end
16
+
8
17
  def parse(paragraph)
9
- if paragraph.type == 'PRE'
10
- "> #{paragraph.text}"
18
+ if PREParser.isPRE(paragraph)
19
+ result = "```\n"
20
+ paragraph.text.each_line do |p|
21
+ result += p
22
+ end
23
+ result += "\n```"
24
+ result
11
25
  else
12
26
  if !nextParser.nil?
13
27
  nextParser.parse(paragraph)
data/lib/Post.rb CHANGED
@@ -4,8 +4,14 @@ require "Request"
4
4
  require 'uri'
5
5
  require 'nokogiri'
6
6
  require 'json'
7
+ require 'date'
7
8
 
8
9
  class Post
10
+
11
+ class PostInfo
12
+ attr_accessor :title, :tags, :creator, :firstPublishedAt, :latestPublishedAt
13
+ end
14
+
9
15
  def self.getPostIDFromPostURLString(postURLString)
10
16
  uri = URI.parse(postURLString)
11
17
  postID = uri.path.split('/').last.split('-').last
@@ -40,4 +46,27 @@ class Post
40
46
  result.map { |paragraph| content[paragraph["__ref"]] }
41
47
  end
42
48
  end
49
+
50
+ def self.parsePostInfoFromPostContent(content, postID)
51
+ postInfo = PostInfo.new()
52
+ postInfo.title = content&.dig("Post:#{postID}", "title")
53
+ postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
54
+
55
+ creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
56
+ if !creatorRef.nil?
57
+ postInfo.creator = content&.dig(creatorRef, "name")
58
+ end
59
+
60
+ firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
61
+ if !firstPublishedAt.nil?
62
+ postInfo.firstPublishedAt = Time.at(0, firstPublishedAt, :millisecond)
63
+ end
64
+
65
+ latestPublishedAt = content&.dig("Post:#{postID}", "latestPublishedAt")
66
+ if !latestPublishedAt.nil?
67
+ postInfo.latestPublishedAt = Time.at(0, latestPublishedAt, :millisecond)
68
+ end
69
+
70
+ postInfo
71
+ end
43
72
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-28 00:00:00.000000000 Z
11
+ date: 2022-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,6 +66,7 @@ files:
66
66
  - lib/ImageDownloader.rb
67
67
  - lib/Models/Paragraph.rb
68
68
  - lib/Parsers/BQParser.rb
69
+ - lib/Parsers/CodeBlockParser.rb
69
70
  - lib/Parsers/FallbackParser.rb
70
71
  - lib/Parsers/H1Parser.rb
71
72
  - lib/Parsers/H2Parser.rb