ZMediumToMarkdown 1.6.1 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 92b412153da4daf0a7594c1558a847412863405eec14da02325984a22bc855a8
4
- data.tar.gz: ccdf1c0e166323323f55b91fc0991b3eae1da0ad070e2b884b3b30266661c03b
3
+ metadata.gz: ff9b27bae7ac1365701ccb1b3174a8b7a2d3b7b2fa12771d10665368636c1733
4
+ data.tar.gz: 0f68f9aca2a902694898560c82b689bc10b0e45d6c2d181b279002fb263f74d4
5
5
  SHA512:
6
- metadata.gz: 120c75b800992a2a815c058430326f6b279f52f08ba4484465fdf7eca31b8460f1170bdd30956c1bfd771a3fbb0d5763a21e9327530e80e76250d52551a4ec9f
7
- data.tar.gz: ffb13239eabe2a6f302a093dfea6309eaaaa3eee0ffb960ee9798f96969c922de6a287b3c8ac47aca1c8b4af2f51adcc52bdf2470e59f8cf12420338a03e019b
6
+ metadata.gz: a397b8860995b6fe0e7f4aea405c352d5ce05de20de0d9dc041001bc8af3c8509ac01387f5ea7ae5ecedc844ba41e70ade1ac6f6f58d4d9baca02cce0783c28d
7
+ data.tar.gz: 532bcf53a46474d897bb27dcdfe3153dfd3103cc43de01a9588608776284ddb0f23f89e836797bf453469168c38ed041a1c63f1a607c0b16616bafd002df75f1
@@ -14,18 +14,35 @@ class Main
14
14
  ARGV << '-h' if ARGV.empty?
15
15
 
16
16
  filePath = ENV['PWD'] || ::Dir.pwd
17
- outputFilePath = PathPolicy.new(filePath, "Output")
18
17
 
19
18
  OptionParser.new do |opts|
20
19
  opts.banner = "Usage: ZMediumFetcher [options]"
21
20
 
22
21
  opts.on('-uUSERNAME', '--username=USERNAME', 'Downloading all posts from user') do |username|
22
+ outputFilePath = PathPolicy.new(filePath, "/")
23
23
  fetcher.downloadPostsByUsername(username, outputFilePath)
24
24
 
25
25
  Helper.printNewVersionMessageIfExists()
26
26
  end
27
27
 
28
28
  opts.on('-pPOST_URL', '--postURL=POST_URL', 'Downloading single post') do |postURL|
29
+ outputFilePath = PathPolicy.new(filePath, "/")
30
+ fetcher.downloadPost(postURL, outputFilePath)
31
+
32
+ Helper.printNewVersionMessageIfExists()
33
+ end
34
+
35
+ opts.on('-jUSERNAME', '--jekyllUsername=USERNAME', 'Downloading all posts from user with Jekyll friendly') do |username|
36
+ outputFilePath = PathPolicy.new(filePath, "Output")
37
+ fetcher.isForJekyll = true
38
+ fetcher.downloadPostsByUsername(username, outputFilePath)
39
+
40
+ Helper.printNewVersionMessageIfExists()
41
+ end
42
+
43
+ opts.on('-kpPOST_URL', '--jekyllPostURL=POST_URL', 'Downloading single post with Jekyll friendly') do |postURL|
44
+ outputFilePath = PathPolicy.new(filePath, "Output")
45
+ fetcher.isForJekyll = true
29
46
  fetcher.downloadPost(postURL, outputFilePath)
30
47
 
31
48
  Helper.printNewVersionMessageIfExists()
data/lib/Helper.rb CHANGED
@@ -159,17 +159,7 @@ class Helper
159
159
  end
160
160
 
161
161
 
162
- def self.createWatermark(postURL)
163
- text = "\r\n\r\n\r\n"
164
- text += "+-----------------------------------------------------------------------------------+"
165
- text += "\r\n"
166
- text += "\r\n"
167
- text += "| **[View original post on Medium](#{postURL}) - Converted by [ZhgChgLi](https://blog.zhgchg.li)/[ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)** |"
168
- text += "\r\n"
169
- text += "\r\n"
170
- text += "+-----------------------------------------------------------------------------------+"
171
- text += "\r\n"
172
-
173
- text
162
+ def self.createWatermark(postURL)
163
+ text = "\r\n[Medium 原文](#{postURL})"
174
164
  end
175
- end
165
+ end
@@ -10,8 +10,16 @@ class CodeBlockParser < Parser
10
10
  'CODE_BLOCK'
11
11
  end
12
12
 
13
+ def self.isCodeBlock(paragraph)
14
+ if paragraph.nil?
15
+ false
16
+ else
17
+ paragraph.type == CodeBlockParser.getTypeString()
18
+ end
19
+ end
20
+
13
21
  def parse(paragraph)
14
- if paragraph.type == CodeBlockParser.getTypeString()
22
+ if CodeBlockParser.isCodeBlock(paragraph)
15
23
  "```\n#{paragraph.text}\n```"
16
24
  else
17
25
  if !nextParser.nil?
@@ -7,7 +7,12 @@ require 'ImageDownloader'
7
7
  require 'PathPolicy'
8
8
 
9
9
  class IMGParser < Parser
10
- attr_accessor :nextParser, :pathPolicy
10
+ attr_accessor :nextParser, :pathPolicy, :isForJekyll
11
+
12
+ def initialize(isForJekyll)
13
+ @isForJekyll = isForJekyll
14
+ end
15
+
11
16
  def parse(paragraph)
12
17
  if paragraph.type == 'IMG'
13
18
 
@@ -25,9 +30,13 @@ class IMGParser < Parser
25
30
 
26
31
  if ImageDownloader.download(absolutePath, imageURL)
27
32
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
28
- "![#{paragraph.text}](/#{relativePath}#{comment})"
33
+ if isForJekyll
34
+ "\r\n![#{paragraph.text}](/#{relativePath}#{comment})\r\n"
35
+ else
36
+ "\r\n![#{paragraph.text}](#{relativePath}#{comment})\r\n"
37
+ end
29
38
  else
30
- "![#{paragraph.text}](#{imageURL}#{comment})"
39
+ "\r\n![#{paragraph.text}](#{imageURL}#{comment})\r\n"
31
40
  end
32
41
  else
33
42
  if !nextParser.nil?
@@ -11,7 +11,12 @@ require 'ImageDownloader'
11
11
  require 'PathPolicy'
12
12
 
13
13
  class IframeParser < Parser
14
- attr_accessor :nextParser, :pathPolicy
14
+ attr_accessor :nextParser, :pathPolicy, :isForJekyll
15
+
16
+ def initialize(isForJekyll)
17
+ @isForJekyll = isForJekyll
18
+ end
19
+
15
20
  def parse(paragraph)
16
21
  if paragraph.type == 'IFRAME'
17
22
  if !paragraph.iframe.src.nil? && paragraph.iframe.src != ""
@@ -35,9 +40,13 @@ class IframeParser < Parser
35
40
  title = paragraph.iframe.title
36
41
  if ImageDownloader.download(absolutePath, imageURL)
37
42
  relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
38
- result = "\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})"
43
+ if isForJekyll
44
+ result = "\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n"
45
+ else
46
+ result = "\r\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})\r\n"
47
+ end
39
48
  else
40
- result = "\n[#{title}](#{params["url"]})"
49
+ result = "\r\n[#{title}](#{params["url"]})\r\n"
41
50
  end
42
51
  end
43
52
  else
@@ -3,10 +3,11 @@ $lib = File.expand_path('../', File.dirname(__FILE__))
3
3
  require 'Models/Paragraph'
4
4
 
5
5
  class LinkParser
6
- attr_accessor :usersPostURLs
6
+ attr_accessor :usersPostURLs, :isForJekyll
7
7
 
8
- def initialize(usersPostURLs)
8
+ def initialize(usersPostURLs, isForJekyll)
9
9
  @usersPostURLs = usersPostURLs
10
+ @isForJekyll = isForJekyll
10
11
  end
11
12
 
12
13
  def parse(markdownString, markupLinks)
@@ -21,20 +22,17 @@ class LinkParser
21
22
  # if have provide user's post urls
22
23
  # find & replace medium url to local post url if matched
23
24
 
24
- postPath = link.split("/").last
25
- if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
26
- markdownString = markdownString.sub! link, postPath
25
+ if isForJekyll
26
+ postPath = link.split("/").last.split("-").last
27
+ else
28
+ postPath = link.split("/").last
27
29
  end
28
- else
29
- if !(link =~ /\A#{URI::regexp(['http', 'https'])}\z/)
30
- # medium will give you an relative path if url is medium's post (due to we use html to markdown render)
31
- # e.g. /zrealm-ios-dev/visitor-pattern-in-ios-swift-ba5773a7bfea
32
- # it's not a vaild url
33
-
34
- # fullfill url from markup attribute
35
- match = markupLinks.find{ |markupLink| markupLink.include? link }
36
- if !match.nil?
37
- markdownString = markdownString.sub! link, match
30
+
31
+ if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
32
+ if isForJekyll
33
+ markdownString = markdownString.sub! link, "../#{postPath}"
34
+ else
35
+ markdownString = markdownString.sub! link, "#{postPath}"
38
36
  end
39
37
  end
40
38
  end
@@ -32,12 +32,10 @@ class MarkupStyleRender
32
32
  chars = {}
33
33
  index = 0
34
34
 
35
- emojiRegex = /[\u{203C}\u{2049}\u{20E3}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{23E9}-\u{23EC}\u{23F0}\u{23F3}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2601}\u{260E}\u{2611}\u{2614}-\u{2615}\u{261D}\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2693}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26CE}\u{26D4}\u{26EA}\u{26F2}-\u{26F3}\u{26F5}\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270C}\u{270F}\u{2712}\u{2714}\u{2716}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F1E7}-\u{1F1EC}\u{1F1EE}-\u{1F1F0}\u{1F1F3}\u{1F1F5}\u{1F1F7}-\u{1F1FA}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F320}\u{1F330}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F380}-\u{1F393}\u{1F3A0}-\u{1F3C4}\u{1F3C6}-\u{1F3CA}\u{1F3E0}-\u{1F3F0}\u{1F400}-\u{1F43E}\u{1F440}\u{1F442}-\u{1F4F7}\u{1F4F9}-\u{1F4FC}\u{1F500}-\u{1F507}\u{1F509}-\u{1F53D}\u{1F550}-\u{1F567}\u{1F5FB}-\u{1F640}\u{1F645}-\u{1F64F}\u{1F680}-\u{1F68A}]/
36
- excludesEmojis = ["⚠"]
37
35
  paragraph.text.each_char do |char|
38
36
  chars[index] = TextChar.new([char], "Text")
39
37
  index += 1
40
- if char =~ emojiRegex && !excludesEmojis.include?(char)
38
+ if char.bytes.length >= 4
41
39
  # some emoji need more space (in Medium)
42
40
  chars[index] = TextChar.new([], "Text")
43
41
  index += 1
@@ -30,7 +30,7 @@ require 'date'
30
30
 
31
31
  class ZMediumFetcher
32
32
 
33
- attr_accessor :progress, :linkParser
33
+ attr_accessor :progress, :linkParser, :isForJekyll
34
34
 
35
35
  class Progress
36
36
  attr_accessor :username, :postPath, :currentPostIndex, :totalPostsLength, :currentPostParagraphIndex, :totalPostParagraphsLength, :message
@@ -71,7 +71,8 @@ class ZMediumFetcher
71
71
 
72
72
  def initialize
73
73
  @progress = Progress.new()
74
- @linkParser = LinkParser.new(nil)
74
+ @linkParser = LinkParser.new(nil, false)
75
+ @isForJekyll = false
75
76
  end
76
77
 
77
78
  def buildParser(imagePathPolicy)
@@ -92,10 +93,10 @@ class ZMediumFetcher
92
93
  oliParser.setNext(mixtapeembedParser)
93
94
  pqParser = PQParser.new()
94
95
  mixtapeembedParser.setNext(pqParser)
95
- iframeParser = IframeParser.new()
96
+ iframeParser = IframeParser.new(isForJekyll)
96
97
  iframeParser.pathPolicy = imagePathPolicy
97
98
  pqParser.setNext(iframeParser)
98
- imgParser = IMGParser.new()
99
+ imgParser = IMGParser.new(isForJekyll)
99
100
  imgParser.pathPolicy = imagePathPolicy
100
101
  iframeParser.setNext(imgParser)
101
102
  bqParser = BQParser.new()
@@ -113,7 +114,12 @@ class ZMediumFetcher
113
114
 
114
115
  def downloadPost(postURL, pathPolicy)
115
116
  postID = Post.getPostIDFromPostURLString(postURL)
116
- postPath = Post.getPostPathFromPostURLString(postURL)
117
+
118
+ if isForJekyll
119
+ postPath = postID # use only post id is more friendly for url seo
120
+ else
121
+ postPath = Post.getPostPathFromPostURLString(postURL)
122
+ end
117
123
 
118
124
  progress.postPath = postPath
119
125
  progress.message = "Downloading Post..."
@@ -203,9 +209,14 @@ class ZMediumFetcher
203
209
  previousParagraph = paragraph
204
210
  end
205
211
 
206
- postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
207
-
208
- imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
212
+ if isForJekyll
213
+ postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts")
214
+ imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
215
+ else
216
+ postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "posts")
217
+ imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
218
+ end
219
+
209
220
  startParser = buildParser(imagePathPolicy)
210
221
 
211
222
  progress.totalPostParagraphsLength = paragraphs.length
@@ -231,13 +242,21 @@ class ZMediumFetcher
231
242
 
232
243
  index = 0
233
244
  paragraphs.each do |paragraph|
234
- markupParser = MarkupParser.new(paragraph)
235
- paragraph.text = markupParser.parse()
245
+
246
+ if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
247
+ markupParser = MarkupParser.new(paragraph)
248
+ paragraph.text = markupParser.parse()
249
+ end
250
+
236
251
  result = startParser.parse(paragraph)
237
252
 
238
253
  if !linkParser.nil?
239
254
  result = linkParser.parse(result, paragraph.markupLinks)
240
255
  end
256
+
257
+ if paragraph.orgText == "延伸閱讀" or result.include? "Like Z Realm" or paragraph.orgText == "有任何問題及指教歡迎與我聯絡。"
258
+ break
259
+ end
241
260
 
242
261
  file.puts(result)
243
262
 
@@ -281,7 +300,7 @@ class ZMediumFetcher
281
300
  nextID = postPageInfo["nextID"]
282
301
  end while !nextID.nil?
283
302
 
284
- @linkParser = LinkParser.new(postURLS)
303
+ @linkParser = LinkParser.new(postURLS, isForJekyll)
285
304
 
286
305
  progress.totalPostsLength = postURLS.length
287
306
  progress.currentPostIndex = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.1
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-07 00:00:00.000000000 Z
11
+ date: 2022-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri