ZMediumToMarkdown 1.0.0 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a64eb0d39c1be48b0ddd24863c802f4f76234ebeee73161cf4b74fe8e5e4392
4
- data.tar.gz: 88f480de193e4d89069bc3e5fd0bd6efb83c25cac4d8b65b9ec4e96efd459102
3
+ metadata.gz: 1f7805fe04110dc50c764983746065030b4cc87865810de1a50c3b5b89f646d0
4
+ data.tar.gz: 5eb355fcd4b28a1d0e704e80b5e3e564e770b681f4cd40302f556e647e3cda95
5
5
  SHA512:
6
- metadata.gz: 4a85b5bf8e8b98a52d859cdf1b144a2d7ede7dc5701d2695f36296a34f8f5e9004aa83092effb7ab2ed21d9ae9cf3ca3d669cb915c7e73d5f3342835c35cdc12
7
- data.tar.gz: '019224020c3d4071f0a8f2ac86c1beb22abaca096862dbf8d50dbe14ca0fdbf37ff8d78dc9b88b42a5af40107595e07a8d83868d0409d57486bbf894ad8c2e63'
6
+ metadata.gz: ff70606758386f70dc7fdc9ce1a8a45c6e4c6a136f27403794217be4f0acb59b94b00d2caca74f4ba6b464691fa1312ea2e9e94caf20785c5c758ced7de0aa6d
7
+ data.tar.gz: d50e747c1acdd6f1f3536711a5c1f15e2fcb729741efe584f8e878fb13e9a461b664e34b0143aef879e1504054ecb108204cb8521a1be200e9cfae52a1a1c8f0
data/bin/ZMediumFetcher CHANGED
@@ -24,6 +24,7 @@ require "Parsers/OLIParser"
24
24
  require "Parsers/MIXTAPEEMBEDParser"
25
25
  require "Parsers/PQParser"
26
26
  require "Parsers/LinkParser"
27
+ require "Parsers/CodeBlockParser"
27
28
 
28
29
  require "PathPolicy"
29
30
  require "Request"
@@ -124,8 +125,10 @@ class ZMediumFetcher
124
125
  imgParser.setNext(bqParser)
125
126
  preParser = PREParser.new()
126
127
  bqParser.setNext(preParser)
128
+ codeBlockParser = CodeBlockParser.new()
129
+ preParser.setNext(codeBlockParser)
127
130
  fallbackParser = FallbackParser.new()
128
- preParser.setNext(fallbackParser)
131
+ codeBlockParser.setNext(fallbackParser)
129
132
 
130
133
 
131
134
  h1Parser
@@ -145,6 +148,8 @@ class ZMediumFetcher
145
148
  if postContent.nil?
146
149
  raise "Error: Content is empty! PostURL: #{postURL}"
147
150
  end
151
+
152
+ postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
148
153
 
149
154
  sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
150
155
  if sourceParagraphs.nil?
@@ -156,7 +161,8 @@ class ZMediumFetcher
156
161
 
157
162
  paragraphs = []
158
163
  oliIndex = 0
159
- preParagraph = nil
164
+ previousParagraph = nil
165
+ preTypeParagraphs = []
160
166
  sourceParagraphs.each do |sourcParagraph|
161
167
  paragraph = Paragraph.new(sourcParagraph, postID, postContent)
162
168
  if OLIParser.isOLI(paragraph)
@@ -168,13 +174,55 @@ class ZMediumFetcher
168
174
 
169
175
  # if previous is OLI or ULI and current is not OLI or ULI
170
176
  # than insert a blank paragraph to keep markdown foramt correct
171
- if (OLIParser.isOLI(preParagraph) && !OLIParser.isOLI(paragraph)) ||
172
- (ULIParser.isULI(preParagraph) && !ULIParser.isULI(paragraph))
177
+ if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
178
+ (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
173
179
  paragraphs.append(Paragraph.makeBlankParagraph(postID))
174
180
  end
175
181
 
182
+ # group by PRE paragraph to code block
183
+ # because medium will give continue pre to present code block
184
+ # e.g.
185
+ # type=pre, text=<html>
186
+ # type=pre, text=text
187
+ # type=pre, text=</html>
188
+
189
+ if !previousParagraph.nil?
190
+ if PREParser.isPRE(paragraph)
191
+ # if current is pre
192
+ preTypeParagraphs.append(paragraph)
193
+ elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
194
+ # if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
195
+ if preTypeParagraphs.length > 1
196
+ lastPreTypeParagraph = preTypeParagraphs.pop
197
+
198
+ # group by preParagraphs text to last preParagraph
199
+ groupByText = ""
200
+ preTypeParagraphs.each do |preTypeParagraph|
201
+ if groupByText != ""
202
+ groupByText += "\n"
203
+ end
204
+
205
+ markupParser = MarkupParser.new(postHtml, preTypeParagraph)
206
+ groupByText += markupParser.parse()
207
+ end
208
+
209
+ lastPreTypeParagraph.text = "#{groupByText}"
210
+ lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
211
+
212
+ # remove all preParagraphs
213
+ preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
214
+ preTypeParagraph.name
215
+ end
216
+ paragraphs = paragraphs.select do |paragraph|
217
+ !preTypeParagraphNames.include? paragraph.name
218
+ end
219
+ end
220
+ preTypeParagraphs = []
221
+ end
222
+ end
223
+
176
224
  paragraphs.append(paragraph)
177
- preParagraph = paragraph
225
+ previousParagraph = paragraph
178
226
  end
179
227
 
180
228
  postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
@@ -191,6 +239,10 @@ class ZMediumFetcher
191
239
  Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
192
240
  index = 0
193
241
  File.open(absolutePath, "w+") do |file|
242
+ # write postInfo into top
243
+
244
+ file.puts(Helper.createPostInfo(postInfo))
245
+
194
246
  paragraphs.each do |paragraph|
195
247
  markupParser = MarkupParser.new(postHtml, paragraph)
196
248
  paragraph.text = markupParser.parse()
@@ -268,7 +320,11 @@ begin
268
320
  puts "You have read and agree with the Disclaimer."
269
321
  Main.new()
270
322
  puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
271
- puts "If this repo is helpful, please help to star this repo or recommend it to your friends. Thanks."
323
+ puts "Thanks for using this tool."
324
+ puts "If this is helpful, please help to star the repo or recommend it to your friends."
272
325
  rescue => e
273
- puts "Error: #{e.class} #{e.message}"
326
+ puts "#Error: #{e.class} #{e.message}\n"
327
+ puts e.backtrace
328
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
329
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
274
330
  end
data/lib/Helper.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  $lib = File.expand_path('../lib', File.dirname(__FILE__))
2
2
 
3
+ require 'Post'
4
+
3
5
  class Helper
4
6
  def self.createDirIfNotExist(dirPath)
5
7
  dirs = dirPath.split("/")
@@ -11,12 +13,36 @@ class Helper
11
13
  end while dirs.length > 0
12
14
  end
13
15
 
16
+ def self.makeWarningText(message)
17
+ puts "####################################################\n"
18
+ puts "#WARNING:\n"
19
+ puts "##{message}\n"
20
+ puts "#--------------------------------------------------#\n"
21
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
22
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
23
+ puts "####################################################\n"
24
+ end
25
+
26
+ def self.createPostInfo(postInfo)
27
+ result = "---\n"
28
+ result += "title: #{postInfo.title}\n"
29
+ result += "author: #{postInfo.creator}\n"
30
+ result += "date: #{postInfo.firstPublishedAt}\n"
31
+ result += "tags: [#{postInfo.tags.join(",")}]\n"
32
+ result += "---\n"
33
+ result += "\r\n"
34
+
35
+ result
36
+ end
37
+
14
38
  def self.createWatermark(postURL)
15
39
  text = "\r\n\r\n\r\n"
16
40
  text += "+-----------------------------------------------------------------------------------+"
17
41
  text += "\r\n"
42
+ text += "\r\n"
18
43
  text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
19
44
  text += "\r\n"
45
+ text += "\r\n"
20
46
  text += "+-----------------------------------------------------------------------------------+"
21
47
  text += "\r\n"
22
48
 
@@ -7,7 +7,11 @@ class BQParser < Parser
7
7
  attr_accessor :nextParser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'BQ'
10
- "> #{paragraph.text}"
10
+ result = ""
11
+ paragraph.text.each_line do |p|
12
+ result += "> #{p}"
13
+ end
14
+ result
11
15
  else
12
16
  if !nextParser.nil?
13
17
  nextParser.parse(paragraph)
@@ -0,0 +1,22 @@
1
+ $lib = File.expand_path('../', File.dirname(__FILE__))
2
+
3
+ require "Parsers/Parser"
4
+ require 'Models/Paragraph'
5
+
6
+ class CodeBlockParser < Parser
7
+ attr_accessor :nextParser
8
+
9
+ def self.getTypeString()
10
+ 'CODE_BLOCK'
11
+ end
12
+
13
+ def parse(paragraph)
14
+ if paragraph.type == CodeBlockParser.getTypeString()
15
+ "```\n#{paragraph.text}\n```"
16
+ else
17
+ if !nextParser.nil?
18
+ nextParser.parse(paragraph)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -1,12 +1,13 @@
1
1
  $lib = File.expand_path('../', File.dirname(__FILE__))
2
2
 
3
+ require "Helper"
3
4
  require "Parsers/Parser"
4
5
  require 'Models/Paragraph'
5
6
 
6
7
  class FallbackParser < Parser
7
8
  attr_accessor :nextParser
8
9
  def parse(paragraph)
9
- puts paragraph.type
10
+ Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
10
11
  "#{paragraph.text}"
11
12
  end
12
13
  end
@@ -7,7 +7,11 @@ class PQParser < Parser
7
7
  attr_accessor :nextParser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'PQ'
10
- "> #{paragraph.text}"
10
+ result = ""
11
+ paragraph.text.each_line do |p|
12
+ result += "> #{p}"
13
+ end
14
+ result
11
15
  else
12
16
  if !nextParser.nil?
13
17
  nextParser.parse(paragraph)
@@ -5,9 +5,23 @@ require 'Models/Paragraph'
5
5
 
6
6
  class PREParser < Parser
7
7
  attr_accessor :nextParser
8
+
9
+ def self.isPRE(paragraph)
10
+ if paragraph.nil?
11
+ false
12
+ else
13
+ paragraph.type == "PRE"
14
+ end
15
+ end
16
+
8
17
  def parse(paragraph)
9
- if paragraph.type == 'PRE'
10
- "> #{paragraph.text}"
18
+ if PREParser.isPRE(paragraph)
19
+ result = "```\n"
20
+ paragraph.text.each_line do |p|
21
+ result += p
22
+ end
23
+ result += "\n```"
24
+ result
11
25
  else
12
26
  if !nextParser.nil?
13
27
  nextParser.parse(paragraph)
data/lib/Post.rb CHANGED
@@ -4,8 +4,14 @@ require "Request"
4
4
  require 'uri'
5
5
  require 'nokogiri'
6
6
  require 'json'
7
+ require 'date'
7
8
 
8
9
  class Post
10
+
11
+ class PostInfo
12
+ attr_accessor :title, :tags, :creator, :firstPublishedAt
13
+ end
14
+
9
15
  def self.getPostIDFromPostURLString(postURLString)
10
16
  uri = URI.parse(postURLString)
11
17
  postID = uri.path.split('/').last.split('-').last
@@ -40,4 +46,22 @@ class Post
40
46
  result.map { |paragraph| content[paragraph["__ref"]] }
41
47
  end
42
48
  end
49
+
50
+ def self.parsePostInfoFromPostContent(content, postID)
51
+ postInfo = PostInfo.new()
52
+ postInfo.title = content&.dig("Post:#{postID}", "title")
53
+ postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
54
+
55
+ creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
56
+ if !creatorRef.nil?
57
+ postInfo.creator = content&.dig(creatorRef, "name")
58
+ end
59
+
60
+ firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
61
+ if !firstPublishedAt.nil?
62
+ postInfo.firstPublishedAt = DateTime.strptime(firstPublishedAt.to_s,'%Q')
63
+ end
64
+
65
+ postInfo
66
+ end
43
67
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-28 00:00:00.000000000 Z
11
+ date: 2022-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,6 +66,7 @@ files:
66
66
  - lib/ImageDownloader.rb
67
67
  - lib/Models/Paragraph.rb
68
68
  - lib/Parsers/BQParser.rb
69
+ - lib/Parsers/CodeBlockParser.rb
69
70
  - lib/Parsers/FallbackParser.rb
70
71
  - lib/Parsers/H1Parser.rb
71
72
  - lib/Parsers/H2Parser.rb