ZMediumToMarkdown 2.0.0 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 796d995f3d5f3f1edf3de599a28df4a8bea5ab9083d9bf8191d0d0535c924eb3
4
- data.tar.gz: 7fa92c888507d4fea9293a3649c1656d3891a04b34d73d035d1a65676a5c8dc4
3
+ metadata.gz: 2c6540c31739d0b7673b180fa73887641933b08f431346a7aa77e89c5188acb6
4
+ data.tar.gz: 9cd571bc32f08011d136d6814fadf8afb3ab3bc1251ea1476bc0144b30fc4461
5
5
  SHA512:
6
- metadata.gz: 434c7f737e281189a0feaa821d25e936632be3e72731a495ba62ea945bfef67063518637879b44ecf5f9121ce4ae8b302553bca55959a93e874904cc0fddc808
7
- data.tar.gz: 298efd510208b800826cd39be456e47a7b1744291078fd7e61b954a59e9c9257e796c09ed1813b098ad0c8615c17b1b7e19217388dcbcd9f085d9b5151a35663
6
+ metadata.gz: 57162ffcec3607c5fdb654b984593eef97a6badbd1e3ebb8e11bb8be2ea1b7301f17aa51d93fa6d864a2db58bfbb45ceda6b3cade25a2ca633a5eb90d08c5240
7
+ data.tar.gz: 862072b9c0d384bf1a45f02590b5a0f9286d96ad480b8874670f067e56fc5c66b9c2c33e62f24d799871459504cb807e705d7876576c56f5e9317e80caa0a145
@@ -19,21 +19,21 @@ class Main
19
19
  opts.banner = "Usage: ZMediumFetcher [options]"
20
20
 
21
21
  opts.on('-uUSERNAME', '--username=USERNAME', 'Downloading all posts from user') do |username|
22
- outputFilePath = PathPolicy.new(filePath, "Output")
22
+ outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
23
23
  fetcher.downloadPostsByUsername(username, outputFilePath)
24
24
 
25
25
  Helper.printNewVersionMessageIfExists()
26
26
  end
27
27
 
28
28
  opts.on('-pPOST_URL', '--postURL=POST_URL', 'Downloading single post') do |postURL|
29
- outputFilePath = PathPolicy.new(filePath, "Output")
29
+ outputFilePath = PathPolicy.new("#{filePath}/Output", "Output")
30
30
  fetcher.downloadPost(postURL, outputFilePath)
31
31
 
32
32
  Helper.printNewVersionMessageIfExists()
33
33
  end
34
34
 
35
35
  opts.on('-jUSERNAME', '--jekyllUsername=USERNAME', 'Downloading all posts from user with Jekyll friendly') do |username|
36
- outputFilePath = PathPolicy.new(filePath, "/")
36
+ outputFilePath = PathPolicy.new(filePath, "")
37
37
  fetcher.isForJekyll = true
38
38
  fetcher.downloadPostsByUsername(username, outputFilePath)
39
39
 
@@ -41,7 +41,7 @@ class Main
41
41
  end
42
42
 
43
43
  opts.on('-kPOST_URL', '--jekyllPostURL=POST_URL', 'Downloading single post with Jekyll friendly') do |postURL|
44
- outputFilePath = PathPolicy.new(filePath, "/")
44
+ outputFilePath = PathPolicy.new(filePath, "")
45
45
  fetcher.isForJekyll = true
46
46
  fetcher.downloadPost(postURL, outputFilePath)
47
47
 
data/lib/Helper.rb CHANGED
@@ -12,10 +12,6 @@ require 'nokogiri'
12
12
 
13
13
  class Helper
14
14
 
15
- def self.escapeMarkdown(text)
16
- text.gsub(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/){ |x| "\\#{x}" }
17
- end
18
-
19
15
  def self.fetchOGImage(url)
20
16
  html = Request.html(Request.URL(url))
21
17
  content = html.search("meta[property='og:image']").attribute('content')
@@ -99,30 +95,28 @@ class Helper
99
95
  end
100
96
 
101
97
  def self.createPostInfo(postInfo, isForJekyll)
98
+ title = postInfo.title.gsub("[","")
99
+ title = title.gsub("]","")
100
+
101
+ result = "---\n"
102
+ result += "title: #{title}\n"
103
+ result += "author: #{postInfo.creator}\n"
104
+ result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
105
+ result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
106
+ result += "categories: #{postInfo.collectionName}\n"
107
+ result += "tags: [#{postInfo.tags.join(",")}]\n"
108
+ result += "description: #{postInfo.description}\n"
109
+ if !postInfo.previewImage.nil?
110
+ result += "image:\r\n"
111
+ result += " path: #{postInfo.previewImage}\r\n"
112
+ end
102
113
  if isForJekyll
103
- title = postInfo.title.gsub("[","")
104
- title = title.gsub("]","")
105
-
106
- result = "---\n"
107
- result += "title: #{title}\n"
108
- result += "author: #{postInfo.creator}\n"
109
- result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
110
- result += "categories: #{postInfo.collectionName}\n"
111
- result += "tags: [#{postInfo.tags.join(",")}]\n"
112
- result += "description: #{postInfo.description}\n"
113
- if !postInfo.previewImage.nil?
114
- result += "image:\r\n"
115
- result += " path: #{postInfo.previewImage}\r\n"
116
- end
117
114
  result += "render_with_liquid: false\n"
118
-
119
- result += "---\n"
120
- result += "\r\n"
121
-
122
- result
123
- else
124
- nil
125
115
  end
116
+ result += "---\n"
117
+ result += "\r\n"
118
+
119
+ result
126
120
  end
127
121
 
128
122
  def self.printNewVersionMessageIfExists()
@@ -5,7 +5,7 @@ require 'Parsers/PParser'
5
5
  require 'securerandom'
6
6
 
7
7
  class Paragraph
8
- attr_accessor :postID, :name, :orgText, :orgTextWithEscape, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
8
+ attr_accessor :postID, :name, :orgText, :text, :type, :href, :metadata, :mixtapeMetadata, :iframe, :oliIndex, :markups, :markupLinks
9
9
 
10
10
  class Iframe
11
11
  attr_accessor :id, :title, :type, :src
@@ -66,9 +66,6 @@ class Paragraph
66
66
  @href = json['href']
67
67
  @postID = postID
68
68
 
69
- orgTextWithEscape = Helper.escapeMarkdown(json['text'])
70
- @orgTextWithEscape = orgTextWithEscape
71
-
72
69
  if json['metadata'].nil?
73
70
  @metadata = nil
74
71
  else
@@ -87,19 +84,28 @@ class Paragraph
87
84
  @iframe = Iframe.new(json['iframe']['mediaResource'])
88
85
  end
89
86
 
87
+ markups = []
90
88
  if !json['markups'].nil? && json['markups'].length > 0
91
- markups = []
92
89
  json['markups'].each do |markup|
93
90
  markups.append(Markup.new(markup))
94
91
  end
95
- @markups = markups
96
-
92
+
97
93
  links = json['markups'].select{ |markup| markup["type"] == "A" }
98
94
  if !links.nil? && links.length > 0
99
95
  @markupLinks = links.map{ |link| link["href"] }
100
96
  end
101
- else
102
- @markups = nil
103
97
  end
98
+
99
+ i = 0
100
+ while i = orgText.index(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/, i + 1)
101
+ escapeMarkup = {
102
+ "type" => 'ESCAPE',
103
+ "start" => i,
104
+ "end" => i + 1
105
+ }
106
+ markups.append(Markup.new(escapeMarkup))
107
+ end
108
+
109
+ @markups = markups
104
110
  end
105
111
  end
@@ -20,24 +20,21 @@ class IMGParser < Parser
20
20
 
21
21
  imageURL = "https://miro.medium.com/max/1400/#{fileName}"
22
22
 
23
- imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
23
+ imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
24
24
  absolutePath = imagePathPolicy.getAbsolutePath(fileName)
25
25
 
26
26
  result = ""
27
27
  alt = ""
28
- if paragraph.orgTextWithEscape != ""
29
- alt = " \"#{paragraph.orgTextWithEscape}\""
30
- end
31
28
 
32
29
  if ImageDownloader.download(absolutePath, imageURL)
33
- relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
30
+ relativePath = imagePathPolicy.getRelativePath(fileName)
34
31
  if isForJekyll
35
- result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](/#{relativePath}#{alt})\r\n\r\n"
32
+ result = "\r\n\r\n![#{paragraph.text}](/#{relativePath}#{alt})\r\n\r\n"
36
33
  else
37
- result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](#{relativePath}#{alt})\r\n\r\n"
34
+ result = "\r\n\r\n![#{paragraph.text}](#{relativePath}#{alt})\r\n\r\n"
38
35
  end
39
36
  else
40
- result = "\r\n\r\n![#{paragraph.orgTextWithEscape}](#{imageURL}#{alt})\r\n\r\n"
37
+ result = "\r\n\r\n![#{paragraph.text}](#{imageURL}#{alt})\r\n\r\n"
41
38
  end
42
39
 
43
40
  if paragraph.text != ""
@@ -39,7 +39,7 @@ class IframeParser < Parser
39
39
  fileName = "#{paragraph.name}_#{URI(params["image"]).path.split("/").last}" #21de_default.jpg
40
40
 
41
41
  imageURL = params["image"]
42
- imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), paragraph.postID)
42
+ imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(paragraph.postID), pathPolicy.getRelativePath(paragraph.postID))
43
43
  absolutePath = imagePathPolicy.getAbsolutePath(fileName)
44
44
  title = paragraph.iframe.title
45
45
  if title.nil? or title == ""
@@ -47,7 +47,7 @@ class IframeParser < Parser
47
47
  end
48
48
 
49
49
  if ImageDownloader.download(absolutePath, imageURL)
50
- relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(fileName)}"
50
+ relativePath = imagePathPolicy.getRelativePath(fileName)
51
51
  if isForJekyll
52
52
  result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
53
53
  else
@@ -10,43 +10,41 @@ class LinkParser
10
10
  @isForJekyll = false
11
11
  end
12
12
 
13
- def parse(markdownString, markupLinks)
14
- if !markupLinks.nil?
15
- matchLinks = markdownString.scan(/\[[^\]]*\]\(([^\)]*)\)/)
16
- if !matchLinks.nil?
13
+ def parse(markdownString)
14
+ matchLinks = markdownString.scan(/\[[^\]]*\]\(([^\)]*)\)/m)
15
+ if !matchLinks.nil?
17
16
 
18
- matchLinks.each do |matchLink|
19
- link = matchLink[0]
20
- linkMarkdown = "(#{link})"
21
- newLinkMarkdown = linkMarkdown
17
+ matchLinks.each do |matchLink|
18
+ link = matchLink[0]
19
+ linkMarkdown = "(#{link})"
20
+ newLinkMarkdown = linkMarkdown
21
+
22
+ if isForJekyll
23
+ newLinkMarkdown = "(#{link}){:target=\"_blank\"}"
24
+ end
25
+
26
+
27
+ if !usersPostURLs.nil?
28
+ # if have provide user's post urls
29
+ # find & replace medium url to local post url if matched
22
30
 
23
31
  if isForJekyll
24
- newLinkMarkdown = "(#{link}){:target=\"_blank\"}"
32
+ postPath = link.split("/").last.split("-").last
33
+ else
34
+ postPath = link.split("/").last
25
35
  end
26
36
 
27
-
28
- if !usersPostURLs.nil?
29
- # if have provide user's post urls
30
- # find & replace medium url to local post url if matched
31
-
37
+ if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
32
38
  if isForJekyll
33
- postPath = link.split("/").last.split("-").last
39
+ newLinkMarkdown = "(../#{postPath})"
34
40
  else
35
- postPath = link.split("/").last
36
- end
37
-
38
- if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
39
- if isForJekyll
40
- newLinkMarkdown = "(../#{postPath})"
41
- else
42
- newLinkMarkdown = "(#{postPath})"
43
- end
41
+ newLinkMarkdown = "(#{postPath})"
44
42
  end
45
43
  end
44
+ end
46
45
 
47
- if linkMarkdown != newLinkMarkdown
48
- markdownString = markdownString.sub! linkMarkdown, newLinkMarkdown
49
- end
46
+ if linkMarkdown != newLinkMarkdown
47
+ markdownString = markdownString.sub! linkMarkdown, newLinkMarkdown
50
48
  end
51
49
  end
52
50
  end
@@ -11,9 +11,9 @@ class MIXTAPEEMBEDParser < Parser
11
11
  if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
12
12
  ogImageURL = Helper.fetchOGImage(paragraph.mixtapeMetadata.href)
13
13
  if !ogImageURL.nil?
14
- "\r\n\r\n[![#{paragraph.orgTextWithEscape}](#{ogImageURL} \"#{paragraph.orgTextWithEscape}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
14
+ "\r\n\r\n[![#{paragraph.text}](#{ogImageURL} \"#{paragraph.text}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
15
15
  else
16
- "\n[#{paragraph.orgTextWithEscape}](#{paragraph.mixtapeMetadata.href})"
16
+ "\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
17
17
  end
18
18
  else
19
19
  "\n#{paragraph.text}"
@@ -174,6 +174,12 @@ class MarkupStyleRender
174
174
  tag = TagChar.new(3, markup.start, markup.end, "`", "`")
175
175
  elsif markup.type == "STRONG"
176
176
  tag = TagChar.new(2, markup.start, markup.end, "**", "**")
177
+ elsif markup.type == "ESCAPE"
178
+ escapeTagChar = TagChar.new(0,markup.start, markup.end,'','')
179
+ escapeTagChar.startChars = TextChar.new('\\'.chars,'Text')
180
+ escapeTagChar.endChars = TextChar.new([],'Text')
181
+
182
+ tag = escapeTagChar
177
183
  elsif markup.type == "A"
178
184
  url = markup.href
179
185
  if markup.anchorType == "LINK"
data/lib/PathPolicy.rb CHANGED
@@ -8,18 +8,29 @@ class PathPolicy
8
8
  end
9
9
 
10
10
  def getRelativePath(lastPath)
11
- if lastPath.nil?
12
- "#{path}"
13
- else
14
- "#{path}/#{lastPath}"
11
+ result = path
12
+
13
+ if result != ""
14
+ result += "/"
15
+ end
16
+
17
+ if !lastPath.nil?
18
+ result += lastPath
15
19
  end
20
+
21
+ result
16
22
  end
17
23
 
18
24
  def getAbsolutePath(lastPath)
19
- if lastPath.nil?
20
- "#{rootPath}/#{path}"
21
- else
22
- "#{rootPath}/#{path}/#{lastPath}"
25
+ result = rootPath
26
+
27
+ if !lastPath.nil?
28
+ if result != ""
29
+ result += "/"
30
+ end
31
+ result += "#{lastPath}"
23
32
  end
33
+
34
+ result
24
35
  end
25
36
  end
data/lib/Post.rb CHANGED
@@ -70,13 +70,14 @@ class Post
70
70
  if !previewImage.nil?
71
71
  previewImageFIleName = content&.dig(previewImage, "id")
72
72
 
73
- imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), postID)
73
+ imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(postID), pathPolicy.getRelativePath(postID))
74
+
74
75
  absolutePath = imagePathPolicy.getAbsolutePath(previewImageFIleName)
75
76
 
76
77
  imageURL = "https://miro.medium.com/max/1400/#{previewImageFIleName}"
77
78
 
78
79
  if ImageDownloader.download(absolutePath, imageURL)
79
- relativePath = "#{pathPolicy.getRelativePath(nil)}/#{imagePathPolicy.getRelativePath(previewImageFIleName)}"
80
+ relativePath = imagePathPolicy.getRelativePath(previewImageFIleName)
80
81
  postInfo.previewImage = relativePath
81
82
  end
82
83
  end
@@ -122,11 +122,11 @@ class ZMediumFetcher
122
122
  end
123
123
 
124
124
  if isForJekyll
125
- postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "_posts/zmediumtomarkdown")
126
- imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "assets")
125
+ postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("_posts/zmediumtomarkdown"), pathPolicy.getRelativePath("_posts/zmediumtomarkdown"))
126
+ imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("assets"), "assets")
127
127
  else
128
- postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "zmediumtomarkdown")
129
- imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath(nil), "assets")
128
+ postPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("zmediumtomarkdown"), pathPolicy.getRelativePath("zmediumtomarkdown"))
129
+ imagePathPolicy = PathPolicy.new(postPathPolicy.getAbsolutePath("assets"), "assets")
130
130
  end
131
131
 
132
132
  progress.postPath = postPath
@@ -229,8 +229,19 @@ class ZMediumFetcher
229
229
 
230
230
  absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
231
231
 
232
- # if markdown file is exists and last modification time is >= latestPublishedAt(last update post time on medium)
233
- if File.file?(absolutePath) && File.mtime(absolutePath).to_time.to_i >= postInfo.latestPublishedAt.to_i
232
+ fileLatestPublishedAt = nil
233
+
234
+ if File.file?(absolutePath)
235
+ lines = File.foreach(absolutePath).first(15)
236
+ if lines.first.start_with?("---")
237
+ dateLine = lines.select { |line| line.start_with?("last_modified_at:") }.first
238
+ if !dateLine.nil?
239
+ fileLatestPublishedAt = Time.parse(dateLine[/^(last_modified_at:)\s+(\S*)/, 2]).to_i
240
+ end
241
+ end
242
+ end
243
+
244
+ if !fileLatestPublishedAt.nil? && fileLatestPublishedAt >= postInfo.latestPublishedAt.to_i
234
245
  # Already downloaded and nothing has changed!, Skip!
235
246
  progress.currentPostParagraphIndex = paragraphs.length
236
247
  progress.message = "Skip, Post already downloaded and nothing has changed!"
@@ -253,7 +264,7 @@ class ZMediumFetcher
253
264
  end
254
265
 
255
266
  result = startParser.parse(paragraph)
256
- result = linkParser.parse(result, paragraph.markupLinks)
267
+ result = linkParser.parse(result)
257
268
 
258
269
  file.puts(result)
259
270
 
@@ -310,7 +321,7 @@ class ZMediumFetcher
310
321
  if isForJekyll
311
322
  downloadPathPolicy = pathPolicy
312
323
  else
313
- downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(nil), "users/#{username}")
324
+ downloadPathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath("users/#{username}"), pathPolicy.getRelativePath("users/#{username}"))
314
325
  end
315
326
 
316
327
  index = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-18 00:00:00.000000000 Z
11
+ date: 2022-07-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri