ZMediumToMarkdown 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 43d039e9c0ec69d1e765f14c7f23d9b171cd40030d5ea53a81934e074b4176e3
4
- data.tar.gz: 26083e071edd627b747d0a67ca8e84921e2558d2d8b4416be99d1d685c54e232
3
+ metadata.gz: a2243519ed0bc3c844758752f194791e882eb88371e91c082cb6e342c5fb94c0
4
+ data.tar.gz: 32fd50f1f531288f8d0e0f13a01e0412bd7acd3341f2b4e9af059635a0478f14
5
5
  SHA512:
6
- metadata.gz: 15a68b2b9c048f5dbcfea9a6bcf1285d2a2e55839aaa69766cdc0c437f2d97d29ed1d5b16c40a1e9d0cdf7c504a28ddc9a0e5526d08998d7d3b4e160a66f2f25
7
- data.tar.gz: f27d305f8f2b2886d32b97afe3c967851fe7108bd76bb618010015f6952dc0e2f98e8a14eef8d211025baa1524eefbc8294366047e92570e3ac700889ce94002
6
+ metadata.gz: cce4567526e3db1c0d92ef7fa5b1f2953a4242c8dc505fd35f090deee5df8eabd81762712b72ebd4a854082c3a4eae2057622f7e4283335d1df98a191235f38a
7
+ data.tar.gz: c87a247402fdc62ab3634ffaa422729cfb5fdfe1f0f6220a70c5e5bb598a542f05a64b97a42c747cab7521f63d8942f179bfb74def31ccefa0d49002da60d673
data/bin/ZMediumFetcher CHANGED
@@ -24,6 +24,7 @@ require "Parsers/OLIParser"
24
24
  require "Parsers/MIXTAPEEMBEDParser"
25
25
  require "Parsers/PQParser"
26
26
  require "Parsers/LinkParser"
27
+ require "Parsers/CodeBlockParser"
27
28
 
28
29
  require "PathPolicy"
29
30
  require "Request"
@@ -124,8 +125,10 @@ class ZMediumFetcher
124
125
  imgParser.setNext(bqParser)
125
126
  preParser = PREParser.new()
126
127
  bqParser.setNext(preParser)
128
+ codeBlockParser = CodeBlockParser.new()
129
+ preParser.setNext(codeBlockParser)
127
130
  fallbackParser = FallbackParser.new()
128
- preParser.setNext(fallbackParser)
131
+ codeBlockParser.setNext(fallbackParser)
129
132
 
130
133
 
131
134
  h1Parser
@@ -145,6 +148,8 @@ class ZMediumFetcher
145
148
  if postContent.nil?
146
149
  raise "Error: Content is empty! PostURL: #{postURL}"
147
150
  end
151
+
152
+ postInfo = Post.parsePostInfoFromPostContent(postContent, postID)
148
153
 
149
154
  sourceParagraphs = Post.parsePostParagraphsFromPostContent(postContent, postID)
150
155
  if sourceParagraphs.nil?
@@ -156,7 +161,8 @@ class ZMediumFetcher
156
161
 
157
162
  paragraphs = []
158
163
  oliIndex = 0
159
- preParagraph = nil
164
+ previousParagraph = nil
165
+ preTypeParagraphs = []
160
166
  sourceParagraphs.each do |sourcParagraph|
161
167
  paragraph = Paragraph.new(sourcParagraph, postID, postContent)
162
168
  if OLIParser.isOLI(paragraph)
@@ -168,13 +174,55 @@ class ZMediumFetcher
168
174
 
169
175
  # if previous is OLI or ULI and current is not OLI or ULI
170
176
  # than insert a blank paragraph to keep markdown foramt correct
171
- if (OLIParser.isOLI(preParagraph) && !OLIParser.isOLI(paragraph)) ||
172
- (ULIParser.isULI(preParagraph) && !ULIParser.isULI(paragraph))
177
+ if (OLIParser.isOLI(previousParagraph) && !OLIParser.isOLI(paragraph)) ||
178
+ (ULIParser.isULI(previousParagraph) && !ULIParser.isULI(paragraph))
173
179
  paragraphs.append(Paragraph.makeBlankParagraph(postID))
174
180
  end
175
181
 
182
+ # group by PRE paragraph to code block
183
+ # because medium will give continue pre to present code block
184
+ # e.g.
185
+ # type=pre, text=<html>
186
+ # type=pre, text=text
187
+ # type=pre, text=</html>
188
+
189
+ if !previousParagraph.nil?
190
+ if PREParser.isPRE(paragraph)
191
+ # if current is pre
192
+ preTypeParagraphs.append(paragraph)
193
+ elsif PREParser.isPRE(previousParagraph) && !PREParser.isPRE(paragraph)
194
+ # if current is note pre and previousParagraph is pre and preTypeParagraphs > 1
195
+ if preTypeParagraphs.length > 1
196
+ lastPreTypeParagraph = preTypeParagraphs.pop
197
+
198
+ # group by preParagraphs text to last preParagraph
199
+ groupByText = ""
200
+ preTypeParagraphs.each do |preTypeParagraph|
201
+ if groupByText != ""
202
+ groupByText += "\n"
203
+ end
204
+
205
+ markupParser = MarkupParser.new(postHtml, preTypeParagraph)
206
+ groupByText += markupParser.parse()
207
+ end
208
+
209
+ lastPreTypeParagraph.text = "#{groupByText}"
210
+ lastPreTypeParagraph.type = CodeBlockParser.getTypeString()
211
+
212
+ # remove all preParagraphs
213
+ preTypeParagraphNames = preTypeParagraphs.map do |preTypeParagraph|
214
+ preTypeParagraph.name
215
+ end
216
+ paragraphs = paragraphs.select do |paragraph|
217
+ !preTypeParagraphNames.include? paragraph.name
218
+ end
219
+ end
220
+ preTypeParagraphs = []
221
+ end
222
+ end
223
+
176
224
  paragraphs.append(paragraph)
177
- preParagraph = paragraph
225
+ previousParagraph = paragraph
178
226
  end
179
227
 
180
228
  postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
@@ -191,6 +239,10 @@ class ZMediumFetcher
191
239
  Helper.createDirIfNotExist(postPathPolicy.getAbsolutePath(nil))
192
240
  index = 0
193
241
  File.open(absolutePath, "w+") do |file|
242
+ # write postInfo into top
243
+
244
+ file.puts(Helper.createPostInfo(postInfo))
245
+
194
246
  paragraphs.each do |paragraph|
195
247
  markupParser = MarkupParser.new(postHtml, paragraph)
196
248
  paragraph.text = markupParser.parse()
@@ -268,7 +320,11 @@ begin
268
320
  puts "You have read and agree with the Disclaimer."
269
321
  Main.new()
270
322
  puts "https://github.com/ZhgChgLi/ZMediumToMarkdown"
271
- puts "If this repo is helpful, please help to star this repo or recommend it to your friends. Thanks."
323
+ puts "Thanks for using this tool."
324
+ puts "If this is helpful, please help to star the repo or recommend it to your friends."
272
325
  rescue => e
273
- puts "Error: #{e.class} #{e.message}"
326
+ puts "#Error: #{e.class} #{e.message}\n"
327
+ puts e.backtrace
328
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
329
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
274
330
  end
data/lib/Helper.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  $lib = File.expand_path('../lib', File.dirname(__FILE__))
2
2
 
3
+ require 'Post'
4
+
3
5
  class Helper
4
6
  def self.createDirIfNotExist(dirPath)
5
7
  dirs = dirPath.split("/")
@@ -11,6 +13,28 @@ class Helper
11
13
  end while dirs.length > 0
12
14
  end
13
15
 
16
+ def self.makeWarningText(message)
17
+ puts "####################################################\n"
18
+ puts "#WARNING:\n"
19
+ puts "##{message}\n"
20
+ puts "#--------------------------------------------------#\n"
21
+ puts "#Please feel free to open an Issue or submit a fix/contribution via Pull Request on:\n"
22
+ puts "#https://github.com/ZhgChgLi/ZMediumToMarkdown\n"
23
+ puts "####################################################\n"
24
+ end
25
+
26
+ def self.createPostInfo(postInfo)
27
+ result = "---\n"
28
+ result += "title: #{postInfo.title}\n"
29
+ result += "author: #{postInfo.creator}\n"
30
+ result += "date: #{postInfo.firstPublishedAt}\n"
31
+ result += "tags: [#{postInfo.tags.join(",")}]\n"
32
+ result += "---\n"
33
+ result += "\r\n"
34
+
35
+ result
36
+ end
37
+
14
38
  def self.createWatermark(postURL)
15
39
  text = "\r\n\r\n\r\n"
16
40
  text += "+-----------------------------------------------------------------------------------+"
@@ -0,0 +1,22 @@
1
+ $lib = File.expand_path('../', File.dirname(__FILE__))
2
+
3
+ require "Parsers/Parser"
4
+ require 'Models/Paragraph'
5
+
6
+ class CodeBlockParser < Parser
7
+ attr_accessor :nextParser
8
+
9
+ def self.getTypeString()
10
+ 'CODE_BLOCK'
11
+ end
12
+
13
+ def parse(paragraph)
14
+ if paragraph.type == CodeBlockParser.getTypeString()
15
+ "```\n#{paragraph.text}\n```"
16
+ else
17
+ if !nextParser.nil?
18
+ nextParser.parse(paragraph)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -1,12 +1,13 @@
1
1
  $lib = File.expand_path('../', File.dirname(__FILE__))
2
2
 
3
+ require "Helper"
3
4
  require "Parsers/Parser"
4
5
  require 'Models/Paragraph'
5
6
 
6
7
  class FallbackParser < Parser
7
8
  attr_accessor :nextParser
8
9
  def parse(paragraph)
9
- puts paragraph.type
10
+ Helper.makeWarningText("Undefined Paragraph Type: #{paragraph.type}, will treat as plain text temporarily.")
10
11
  "#{paragraph.text}"
11
12
  end
12
13
  end
@@ -7,7 +7,11 @@ class PQParser < Parser
7
7
  attr_accessor :nextParser
8
8
  def parse(paragraph)
9
9
  if paragraph.type == 'PQ'
10
- "> #{paragraph.text}"
10
+ result = ""
11
+ paragraph.text.each_line do |p|
12
+ result += "> #{p}"
13
+ end
14
+ result
11
15
  else
12
16
  if !nextParser.nil?
13
17
  nextParser.parse(paragraph)
@@ -5,9 +5,22 @@ require 'Models/Paragraph'
5
5
 
6
6
  class PREParser < Parser
7
7
  attr_accessor :nextParser
8
+
9
+ def self.isPRE(paragraph)
10
+ if paragraph.nil?
11
+ false
12
+ else
13
+ paragraph.type == "PRE"
14
+ end
15
+ end
16
+
8
17
  def parse(paragraph)
9
- if paragraph.type == 'PRE'
10
- "> #{paragraph.text}"
18
+ if PREParser.isPRE(paragraph)
19
+ result = ""
20
+ paragraph.text.each_line do |p|
21
+ result += "> #{p}"
22
+ end
23
+ result
11
24
  else
12
25
  if !nextParser.nil?
13
26
  nextParser.parse(paragraph)
data/lib/Post.rb CHANGED
@@ -4,8 +4,14 @@ require "Request"
4
4
  require 'uri'
5
5
  require 'nokogiri'
6
6
  require 'json'
7
+ require 'date'
7
8
 
8
9
  class Post
10
+
11
+ class PostInfo
12
+ attr_accessor :title, :tags, :creator, :firstPublishedAt
13
+ end
14
+
9
15
  def self.getPostIDFromPostURLString(postURLString)
10
16
  uri = URI.parse(postURLString)
11
17
  postID = uri.path.split('/').last.split('-').last
@@ -40,4 +46,22 @@ class Post
40
46
  result.map { |paragraph| content[paragraph["__ref"]] }
41
47
  end
42
48
  end
49
+
50
+ def self.parsePostInfoFromPostContent(content, postID)
51
+ postInfo = PostInfo.new()
52
+ postInfo.title = content&.dig("Post:#{postID}", "title")
53
+ postInfo.tags = content&.dig("Post:#{postID}", "tags").map{ |tag| tag["__ref"].gsub! 'Tag:', '' }
54
+
55
+ creatorRef = content&.dig("Post:#{postID}", "creator", "__ref")
56
+ if !creatorRef.nil?
57
+ postInfo.creator = content&.dig(creatorRef, "name")
58
+ end
59
+
60
+ firstPublishedAt = content&.dig("Post:#{postID}", "firstPublishedAt")
61
+ if !firstPublishedAt.nil?
62
+ postInfo.firstPublishedAt = DateTime.strptime(firstPublishedAt.to_s,'%Q')
63
+ end
64
+
65
+ postInfo
66
+ end
43
67
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
@@ -66,6 +66,7 @@ files:
66
66
  - lib/ImageDownloader.rb
67
67
  - lib/Models/Paragraph.rb
68
68
  - lib/Parsers/BQParser.rb
69
+ - lib/Parsers/CodeBlockParser.rb
69
70
  - lib/Parsers/FallbackParser.rb
70
71
  - lib/Parsers/H1Parser.rb
71
72
  - lib/Parsers/H2Parser.rb