ZMediumToMarkdown 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92b412153da4daf0a7594c1558a847412863405eec14da02325984a22bc855a8
4
- data.tar.gz: ccdf1c0e166323323f55b91fc0991b3eae1da0ad070e2b884b3b30266661c03b
3
+ metadata.gz: ff9b27bae7ac1365701ccb1b3174a8b7a2d3b7b2fa12771d10665368636c1733
4
+ data.tar.gz: 0f68f9aca2a902694898560c82b689bc10b0e45d6c2d181b279002fb263f74d4
5
5
  SHA512:
6
- metadata.gz: 120c75b800992a2a815c058430326f6b279f52f08ba4484465fdf7eca31b8460f1170bdd30956c1bfd771a3fbb0d5763a21e9327530e80e76250d52551a4ec9f
7
- data.tar.gz: ffb13239eabe2a6f302a093dfea6309eaaaa3eee0ffb960ee9798f96969c922de6a287b3c8ac47aca1c8b4af2f51adcc52bdf2470e59f8cf12420338a03e019b
6
+ metadata.gz: a397b8860995b6fe0e7f4aea405c352d5ce05de20de0d9dc041001bc8af3c8509ac01387f5ea7ae5ecedc844ba41e70ade1ac6f6f58d4d9baca02cce0783c28d
7
+ data.tar.gz: 532bcf53a46474d897bb27dcdfe3153dfd3103cc43de01a9588608776284ddb0f23f89e836797bf453469168c38ed041a1c63f1a607c0b16616bafd002df75f1
@@ -14,18 +14,35 @@ class Main
14
14
  ARGV << '-h' if ARGV.empty?
15
15
 
16
16
  filePath = ENV['PWD'] || ::Dir.pwd
17
- outputFilePath = PathPolicy.new(filePath, "Output")
18
17
 
19
18
  OptionParser.new do |opts|
20
19
  opts.banner = "Usage: ZMediumFetcher [options]"
21
20
 
22
21
  opts.on('-uUSERNAME', '--username=USERNAME', 'Downloading all posts from user') do |username|
22
+ outputFilePath = PathPolicy.new(filePath, "/")
23
23
  fetcher.downloadPostsByUsername(username, outputFilePath)
24
24
 
25
25
  Helper.printNewVersionMessageIfExists()
26
26
  end
27
27
 
28
28
  opts.on('-pPOST_URL', '--postURL=POST_URL', 'Downloading single post') do |postURL|
29
+ outputFilePath = PathPolicy.new(filePath, "/")
30
+ fetcher.downloadPost(postURL, outputFilePath)
31
+
32
+ Helper.printNewVersionMessageIfExists()
33
+ end
34
+
35
+ opts.on('-jUSERNAME', '--jekyllUsername=USERNAME', 'Downloading all posts from user with Jekyll friendly') do |username|
36
+ outputFilePath = PathPolicy.new(filePath, "Output")
37
+ fetcher.isForJekyll = true
38
+ fetcher.downloadPostsByUsername(username, outputFilePath)
39
+
40
+ Helper.printNewVersionMessageIfExists()
41
+ end
42
+
43
+ opts.on('-kpPOST_URL', '--jekyllPostURL=POST_URL', 'Downloading single post with Jekyll friendly') do |postURL|
44
+ outputFilePath = PathPolicy.new(filePath, "Output")
45
+ fetcher.isForJekyll = true
29
46
  fetcher.downloadPost(postURL, outputFilePath)
30
47
 
31
48
  Helper.printNewVersionMessageIfExists()
data/lib/Helper.rb CHANGED
@@ -159,17 +159,7 @@ class Helper
159
159
  end
160
160
 
161
161
 
162
- def self.createWatermark(postURL)
163
- text = "\r\n\r\n\r\n"
164
- text += "+-----------------------------------------------------------------------------------+"
165
- text += "\r\n"
166
- text += "\r\n"
167
- text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
168
- text += "\r\n"
169
- text += "\r\n"
170
- text += "+-----------------------------------------------------------------------------------+"
171
- text += "\r\n"
172
-
173
- text
162
+ def self.createWatermark(postURL)
163
+ text = "\r\n[Medium 原文](#{postURL})"
174
164
  end
175
- end
165
+ end
@@ -10,8 +10,16 @@ class CodeBlockParser < Parser
10
10
  'CODE_BLOCK'
11
11
  end
12
12
 
13
+ def self.isCodeBlock(paragraph)
14
+ if paragraph.nil?
15
+ false
16
+ else
17
+ paragraph.type == CodeBlockParser.getTypeString()
18
+ end
19
+ end
20
+
13
21
  def parse(paragraph)
14
- if paragraph.type == CodeBlockParser.getTypeString()
22
+ if CodeBlockParser.isCodeBlock(paragraph)
15
23
  "```\n#{paragraph.text}\n```"
16
24
  else
17
25
  if !nextParser.nil?
@@ -7,7 +7,12 @@ require 'ImageDownloader'
7
7
  require 'PathPolicy'
8
8
 
9
9
  class IMGParser < Parser
10
- attr_accessor :nextParser, :pathPolicy
10
+ attr_accessor :nextParser, :pathPolicy, :isForJekyll
11
+
12
+ def initialize(isForJekyll)
13
+ @isForJekyll = isForJekyll
14
+ end
15
+
11
16
  def parse(paragraph)
12
17
  if paragraph.type == 'IMG'
13
18
 
@@ -25,9 +30,13 @@ class IMGParser < Parser
25
30
 
26
31
  if ImageDownloader.download(absolutePath, imageURL)
27
32
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
28
- "![#{paragraph.text}](/#{relativePath}#{comment})"
33
+ if isForJekyll
34
+ "\r\n![#{paragraph.text}](/#{relativePath}#{comment})\r\n"
35
+ else
36
+ "\r\n![#{paragraph.text}](#{relativePath}#{comment})\r\n"
37
+ end
29
38
  else
30
- "![#{paragraph.text}](#{imageURL}#{comment})"
39
+ "\r\n![#{paragraph.text}](#{imageURL}#{comment})\r\n"
31
40
  end
32
41
  else
33
42
  if !nextParser.nil?
@@ -11,7 +11,12 @@ require 'ImageDownloader'
11
11
  require 'PathPolicy'
12
12
 
13
13
  class IframeParser < Parser
14
- attr_accessor :nextParser, :pathPolicy
14
+ attr_accessor :nextParser, :pathPolicy, :isForJekyll
15
+
16
+ def initialize(isForJekyll)
17
+ @isForJekyll = isForJekyll
18
+ end
19
+
15
20
  def parse(paragraph)
16
21
  if paragraph.type == 'IFRAME'
17
22
  if !paragraph.iframe.src.nil? && paragraph.iframe.src != ""
@@ -35,9 +40,13 @@ class IframeParser < Parser
35
40
  title = paragraph.iframe.title
36
41
  if ImageDownloader.download(absolutePath, imageURL)
37
42
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
38
- result = "\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})"
43
+ if isForJekyll
44
+ result = "\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n"
45
+ else
46
+ result = "\r\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})\r\n"
47
+ end
39
48
  else
40
- result = "\n[#{title}](#{params["url"]})"
49
+ result = "\r\n[#{title}](#{params["url"]})\r\n"
41
50
  end
42
51
  end
43
52
  else
@@ -3,10 +3,11 @@ $lib = File.expand_path('../', File.dirname(__FILE__))
3
3
  require 'Models/Paragraph'
4
4
 
5
5
  class LinkParser
6
- attr_accessor :usersPostURLs
6
+ attr_accessor :usersPostURLs, :isForJekyll
7
7
 
8
- def initialize(usersPostURLs)
8
+ def initialize(usersPostURLs, isForJekyll)
9
9
  @usersPostURLs = usersPostURLs
10
+ @isForJekyll = isForJekyll
10
11
  end
11
12
 
12
13
  def parse(markdownString, markupLinks)
@@ -21,20 +22,17 @@ class LinkParser
21
22
  # if have provide user's post urls
22
23
  # find & replace medium url to local post url if matched
23
24
 
24
- postPath = link.split("/").last
25
- if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
26
- markdownString = markdownString.sub! link, postPath
25
+ if isForJekyll
26
+ postPath = link.split("/").last.split("-").last
27
+ else
28
+ postPath = link.split("/").last
27
29
  end
28
- else
29
- if !(link =~ /\A#{URI::regexp(['http', 'https'])}\z/)
30
- # medium will give you an relative path if url is medium's post (due to we use html to markdown render)
31
- # e.g. /zrealm-ios-dev/visitor-pattern-in-ios-swift-ba5773a7bfea
32
- # it's not a vaild url
33
-
34
- # fullfill url from markup attribute
35
- match = markupLinks.find{ |markupLink| markupLink.include? link }
36
- if !match.nil?
37
- markdownString = markdownString.sub! link, match
30
+
31
+ if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
32
+ if isForJekyll
33
+ markdownString = markdownString.sub! link, "../#{postPath}"
34
+ else
35
+ markdownString = markdownString.sub! link, "#{postPath}"
38
36
  end
39
37
  end
40
38
  end
@@ -32,12 +32,10 @@ class MarkupStyleRender
32
32
  chars = {}
33
33
  index = 0
34
34
 
35
- emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
36
- excludesEmojis = ["⚠"]
37
35
  paragraph.text.each_char do |char|
38
36
  chars[index] = TextChar.new([char], "Text")
39
37
  index += 1
40
- if char =~ emojiRegex && !excludesEmojis.include?(char)
38
+ if char.bytes.length >= 4
41
39
  # some emoji need more space (in Medium)
42
40
  chars[index] = TextChar.new([], "Text")
43
41
  index += 1
@@ -30,7 +30,7 @@ require 'date'
30
30
 
31
31
  class ZMediumFetcher
32
32
 
33
- attr_accessor :progress, :linkParser
33
+ attr_accessor :progress, :linkParser, :isForJekyll
34
34
 
35
35
  class Progress
36
36
  attr_accessor :username, :postPath, :currentPostIndex, :totalPostsLength, :currentPostParagraphIndex, :totalPostParagraphsLength, :message
@@ -71,7 +71,8 @@ class ZMediumFetcher
71
71
 
72
72
  def initialize
73
73
  @progress = Progress.new()
74
- @linkParser = LinkParser.new(nil)
74
+ @linkParser = LinkParser.new(nil, false)
75
+ @isForJekyll = false
75
76
  end
76
77
 
77
78
  def buildParser(imagePathPolicy)
@@ -92,10 +93,10 @@ class ZMediumFetcher
92
93
  oliParser.setNext(mixtapeembedParser)
93
94
  pqParser = PQParser.new()
94
95
  mixtapeembedParser.setNext(pqParser)
95
- iframeParser = IframeParser.new()
96
+ iframeParser = IframeParser.new(isForJekyll)
96
97
  iframeParser.pathPolicy = imagePathPolicy
97
98
  pqParser.setNext(iframeParser)
98
- imgParser = IMGParser.new()
99
+ imgParser = IMGParser.new(isForJekyll)
99
100
  imgParser.pathPolicy = imagePathPolicy
100
101
  iframeParser.setNext(imgParser)
101
102
  bqParser = BQParser.new()
@@ -113,7 +114,12 @@ class ZMediumFetcher
113
114
 
114
115
  def downloadPost(postURL, pathPolicy)
115
116
  postID = Post.getPostIDFromPostURLString(postURL)
116
- postPath = Post.getPostPathFromPostURLString(postURL)
117
+
118
+ if isForJekyll
119
+ postPath = postID # use only post id is more friendly for url seo
120
+ else
121
+ postPath = Post.getPostPathFromPostURLString(postURL)
122
+ end
117
123
 
118
124
  progress.postPath = postPath
119
125
  progress.message = "Downloading Post..."
@@ -203,9 +209,14 @@ class ZMediumFetcher
203
209
  previousParagraph = paragraph
204
210
  end
205
211
 
206
- postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
207
-
208
- imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
212
+ if isForJekyll
213
+ postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts")
214
+ imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
215
+ else
216
+ postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
217
+ imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
218
+ end
219
+
209
220
  startParser = buildParser(imagePathPolicy)
210
221
 
211
222
  progress.totalPostParagraphsLength = paragraphs.length
@@ -231,13 +242,21 @@ class ZMediumFetcher
231
242
 
232
243
  index = 0
233
244
  paragraphs.each do |paragraph|
234
- markupParser = MarkupParser.new(paragraph)
235
- paragraph.text = markupParser.parse()
245
+
246
+ if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
247
+ markupParser = MarkupParser.new(paragraph)
248
+ paragraph.text = markupParser.parse()
249
+ end
250
+
236
251
  result = startParser.parse(paragraph)
237
252
 
238
253
  if !linkParser.nil?
239
254
  result = linkParser.parse(result, paragraph.markupLinks)
240
255
  end
256
+
257
+ if paragraph.orgText == "延伸閱讀" or result.include? "Like Z Realm" or paragraph.orgText == "有任何問題及指教歡迎與我聯絡。"
258
+ break
259
+ end
241
260
 
242
261
  file.puts(result)
243
262
 
@@ -281,7 +300,7 @@ class ZMediumFetcher
281
300
  nextID = postPageInfo["nextID"]
282
301
  end while !nextID.nil?
283
302
 
284
- @linkParser = LinkParser.new(postURLS)
303
+ @linkParser = LinkParser.new(postURLS, isForJekyll)
285
304
 
286
305
  progress.totalPostsLength = postURLS.length
287
306
  progress.currentPostIndex = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-07 00:00:00.000000000 Z
11
+ date: 2022-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri