ZMediumToMarkdown 2.0.4 → 2.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e5ce4efab5975e6c870bacec7f2a480bbf0f938dfdaa9fe6f050e0c99254f646
4
- data.tar.gz: c6bd9f40a681d5f75213604bdbf4e1678d78401e2403d0808cbfe9cb748d554c
3
+ metadata.gz: 411d9f653e728ad8708bfdd0738e181de68085865a3860e5d5272125236e856c
4
+ data.tar.gz: 94c713a02a605b480bf63e1022a4ee53dcfca3fe4355daf401b2253354f6f194
5
5
  SHA512:
6
- metadata.gz: 2e221e9785da97d866daa114ca1dee36b9ad94f89434b2b88dc8b8ea34f3c2fbd50e85fcac1046a272f518da89ac6534d3b3975bb9513a8f05ee9195b2c623bb
7
- data.tar.gz: 9710e3fdb0076085770f516865dd7c30fdcd0ddcc6240521516025f53c66993cf49c9972637230dfd02b52a16d75cf7d5ddeeeb972fd86330ad028b8d210d4ec
6
+ metadata.gz: 1b90384c75f6de2b1fcaaac3fcc1f3885ec6bf223698ce677c7cd621eec479eabead53794eb66d7ce2367867ef187468c733473e1e42a711eadf360a249a5803
7
+ data.tar.gz: 89c9ec47b56b047042a060716f923ad1924722d17c842ccce954252976d8112cac0c3d610f7626caadd1a4eaada5031dea17ca26dea4349e2b8e9e2ca1431b1c
data/lib/Helper.rb CHANGED
@@ -19,6 +19,10 @@ class Helper
19
19
  content
20
20
  end
21
21
 
22
+ def self.escapeMarkdown(text)
23
+ text.gsub(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/){ |x| "\\#{x}" }
24
+ end
25
+
22
26
  def self.escapeHTML(text)
23
27
  if text == "<"
24
28
  "&lt;"
@@ -101,8 +105,8 @@ class Helper
101
105
  result = "---\n"
102
106
  result += "title: #{title}\n"
103
107
  result += "author: #{postInfo.creator}\n"
104
- result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
105
- result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%LZ')}\n"
108
+ result += "date: #{postInfo.firstPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%L%z')}\n"
109
+ result += "last_modified_at: #{postInfo.latestPublishedAt.strftime('%Y-%m-%dT%H:%M:%S.%L%z')}\n"
106
110
  result += "categories: #{postInfo.collectionName}\n"
107
111
  result += "tags: [#{postInfo.tags.join(",")}]\n"
108
112
  result += "description: #{postInfo.description}\n"
@@ -185,9 +189,14 @@ class Helper
185
189
  end
186
190
 
187
191
 
188
- def self.createWatermark(postURL)
192
+ def self.createWatermark(postURL, isForJekyll)
193
+ jekyllOpen = ""
194
+ if isForJekyll
195
+ jekyllOpen = "{:target=\"_blank\"}"
196
+ end
197
+
189
198
  text = "\r\n\r\n\r\n"
190
- text += "_Converted [Medium Post](#{postURL}) by [ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)._"
199
+ text += "_Converted [Medium Post](#{postURL})#{jekyllOpen} by [ZMediumToMarkdown](https://github.com/ZhgChgLi/ZMediumToMarkdown)#{jekyllOpen}._"
191
200
  text += "\r\n"
192
201
 
193
202
  text
@@ -96,14 +96,23 @@ class Paragraph
96
96
  end
97
97
  end
98
98
 
99
- i = 0
100
- while i = orgText.index(/(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/, i + 1)
101
- escapeMarkup = {
102
- "type" => 'ESCAPE',
103
- "start" => i,
104
- "end" => i + 1
105
- }
106
- markups.append(Markup.new(escapeMarkup))
99
+ index = 0
100
+ orgText.each_char do |char|
101
+
102
+ if char.chars.join() =~ /(\*|_|`|\||\\|\{|\}|\[|\]|\(|\)|#|\+|\-|\.|\!)/
103
+ escapeMarkup = {
104
+ "type" => 'ESCAPE',
105
+ "start" => index,
106
+ "end" => index + 1
107
+ }
108
+ markups.append(Markup.new(escapeMarkup))
109
+ end
110
+
111
+ index += 1
112
+ if char.bytes.length >= 4
113
+ # some emoji need more space (in Medium)
114
+ index += 1
115
+ end
107
116
  end
108
117
 
109
118
  @markups = markups
@@ -16,7 +16,7 @@ class BQParser < Parser
16
16
 
17
17
  def parse(paragraph)
18
18
  if BQParser.isBQ(paragraph)
19
- result = "> #{paragraph.text}"
19
+ result = "> #{paragraph.text} \n\n"
20
20
  result
21
21
  else
22
22
  if !nextParser.nil?
@@ -19,6 +19,12 @@ class IframeParser < Parser
19
19
  end
20
20
 
21
21
  def parse(paragraph)
22
+
23
+ jekyllOpen = ""
24
+ if isForJekyll
25
+ jekyllOpen = "{:target=\"_blank\"}"
26
+ end
27
+
22
28
  if paragraph.type == 'IFRAME'
23
29
 
24
30
  if !paragraph.iframe.src.nil? && paragraph.iframe.src != ""
@@ -27,7 +33,7 @@ class IframeParser < Parser
27
33
  url = "https://medium.com/media/#{paragraph.iframe.id}"
28
34
  end
29
35
 
30
- result = "[#{paragraph.iframe.title}](#{url})"
36
+ result = "[#{paragraph.iframe.title}](#{url})#{jekyllOpen}"
31
37
 
32
38
  if !url[/(www\.youtube\.com)/].nil?
33
39
  # is youtube
@@ -49,12 +55,12 @@ class IframeParser < Parser
49
55
  if ImageDownloader.download(absolutePath, imageURL)
50
56
  relativePath = imagePathPolicy.getRelativePath(fileName)
51
57
  if isForJekyll
52
- result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
58
+ result = "\r\n\r\n[![#{title}](/#{relativePath} \"#{title}\")](#{params["url"]})#{jekyllOpen}\r\n\r\n"
53
59
  else
54
- result = "\r\n\r\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})\r\n\r\n"
60
+ result = "\r\n\r\n[![#{title}](#{relativePath} \"#{title}\")](#{params["url"]})#{jekyllOpen}\r\n\r\n"
55
61
  end
56
62
  else
57
- result = "\r\n[#{title}](#{params["url"]})\r\n"
63
+ result = "\r\n[#{title}](#{params["url"]})#{jekyllOpen}\r\n"
58
64
  end
59
65
  end
60
66
  else
@@ -118,9 +124,9 @@ class IframeParser < Parser
118
124
  createdAt = Time.parse(twitterObj["created_at"]).strftime('%Y-%m-%d %H:%M:%S')
119
125
  result = "\n\n"
120
126
  result += "■■■■■■■■■■■■■■ \n"
121
- result += "> **[#{twitterObj["user"]["name"]}](https://twitter.com/#{twitterObj["user"]["screen_name"]}) @ Twitter Says:** \n\n"
127
+ result += "> **[#{twitterObj["user"]["name"]}](https://twitter.com/#{twitterObj["user"]["screen_name"]})#{jekyllOpen} @ Twitter Says:** \n\n"
122
128
  result += "> > #{fullText} \n\n"
123
- result += "> **Tweeted at [#{createdAt}](#{ogURL}).** \n\n"
129
+ result += "> **Tweeted at [#{createdAt}](#{ogURL})#{jekyllOpen}.** \n\n"
124
130
  result += "■■■■■■■■■■■■■■ \n\n"
125
131
  end
126
132
  else
@@ -132,9 +138,9 @@ class IframeParser < Parser
132
138
  end
133
139
 
134
140
  if !ogImageURL.nil?
135
- result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})\r\n\r\n"
141
+ result = "\r\n\r\n[![#{title}](#{ogImageURL} \"#{title}\")](#{ogURL})#{jekyllOpen}\r\n\r\n"
136
142
  else
137
- result = "[#{title}](#{ogURL})"
143
+ result = "[#{title}](#{ogURL})#{jekyllOpen}"
138
144
  end
139
145
  end
140
146
  end
@@ -5,15 +5,24 @@ require "Parsers/Parser"
5
5
  require 'Models/Paragraph'
6
6
 
7
7
  class MIXTAPEEMBEDParser < Parser
8
- attr_accessor :nextParser
8
+ attr_accessor :nextParser, :isForJekyll
9
+
10
+ def initialize(isForJekyll)
11
+ @isForJekyll = isForJekyll
12
+ end
13
+
9
14
  def parse(paragraph)
10
15
  if paragraph.type == 'MIXTAPE_EMBED'
11
16
  if !paragraph.mixtapeMetadata.nil? && !paragraph.mixtapeMetadata.href.nil?
12
17
  ogImageURL = Helper.fetchOGImage(paragraph.mixtapeMetadata.href)
13
18
  if !ogImageURL.nil?
14
- "\r\n\r\n[![#{paragraph.text}](#{ogImageURL} \"#{paragraph.text}\")](#{paragraph.mixtapeMetadata.href})\r\n\r\n"
19
+ jekyllOpen = ""
20
+ if isForJekyll
21
+ jekyllOpen = "{:target=\"_blank\"}"
22
+ end
23
+ "\r\n\r\n[![](#{ogImageURL})](#{paragraph.mixtapeMetadata.href})#{jekyllOpen}\r\n\r\n"
15
24
  else
16
- "\n[#{paragraph.text}](#{paragraph.mixtapeMetadata.href})"
25
+ "\n#{paragraph.text}"
17
26
  end
18
27
  else
19
28
  "\n#{paragraph.text}"
@@ -7,7 +7,7 @@ require 'securerandom'
7
7
  require 'User'
8
8
 
9
9
  class MarkupParser
10
- attr_accessor :body, :paragraph, :isForJekyll
10
+ attr_accessor :body, :paragraph, :isForJekyll, :usersPostURLs
11
11
 
12
12
  def initialize(paragraph, isForJekyll)
13
13
  @paragraph = paragraph
@@ -18,7 +18,7 @@ class MarkupParser
18
18
  result = paragraph.text
19
19
  if !paragraph.markups.nil? && paragraph.markups.length > 0
20
20
  markupRender = MarkupStyleRender.new(paragraph, isForJekyll)
21
-
21
+ markupRender.usersPostURLs = usersPostURLs
22
22
  begin
23
23
  result = markupRender.parse()
24
24
  rescue => e
@@ -5,7 +5,7 @@ require 'Models/Paragraph'
5
5
  require 'Helper'
6
6
 
7
7
  class MarkupStyleRender
8
- attr_accessor :paragraph, :chars, :encodeType, :isForJekyll
8
+ attr_accessor :paragraph, :chars, :encodeType, :isForJekyll, :usersPostURLs
9
9
 
10
10
  class TextChar
11
11
  attr_accessor :chars, :type
@@ -188,7 +188,27 @@ class MarkupStyleRender
188
188
  url = "https://medium.com/u/#{markup.userId}"
189
189
  end
190
190
 
191
- tag = TagChar.new(1, markup.start, markup.end, "[", "](#{url})")
191
+ lastPath = url.split("/").last
192
+ lastQuery = nil
193
+ if !lastPath.nil?
194
+ lastQuery = lastPath.split("-").last
195
+ end
196
+
197
+ if !usersPostURLs.nil? && !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == lastQuery }.nil?
198
+ if isForJekyll
199
+ url = "(../#{lastQuery}/)"
200
+ else
201
+ url = "(#{lastPath})"
202
+ end
203
+ else
204
+ if isForJekyll
205
+ url = "(#{url}){:target=\"_blank\"}"
206
+ else
207
+ url = "(#{url})"
208
+ end
209
+ end
210
+
211
+ tag = TagChar.new(1, markup.start, markup.end, "[", "]#{url}")
192
212
  else
193
213
  Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
194
214
  end
@@ -205,6 +225,15 @@ class MarkupStyleRender
205
225
 
206
226
  chars.each do |index, char|
207
227
 
228
+ # is in code block
229
+ if !stack.last.nil? && stack.last.endChars.chars.join() == "`"
230
+ containEndTag = tags.select { |tag| tag.endIndex == index && tag.endChars.chars.join() == "`" }.length > 0
231
+ if !containEndTag
232
+ response.append(char)
233
+ next
234
+ end
235
+ end
236
+
208
237
  if char.chars.join() == "\n"
209
238
  brStack = stack.dup
210
239
  while brStack.length > 0
@@ -228,21 +257,16 @@ class MarkupStyleRender
228
257
  end
229
258
 
230
259
  if char.chars.join() != "\n"
231
- if !stack.select { |tag| tag.startChars.chars.join() == "`" }.nil?
232
- # is in code block
233
- response.append(char)
234
- else
235
- resultChar = Helper.escapeMarkdown(char.chars.join())
236
- if isForJekyll
237
- resultChar = Helper.escapeHTML(resultChar)
238
- end
239
-
240
- response.append(TextChar.new(resultChar.chars, "Text"))
260
+ resultChar = char.chars.join()
261
+ if isForJekyll
262
+ resultChar = Helper.escapeHTML(resultChar)
241
263
  end
264
+
265
+ response.append(TextChar.new(resultChar.chars, "Text"))
242
266
  end
243
267
 
244
268
  endTags = tags.select { |tag| tag.endIndex == index }
245
- if !endTags.nil? && endTags.length > 0
269
+ if endTags.length > 0
246
270
  mismatchTags = []
247
271
  while endTags.length > 0
248
272
  stackTag = stack.pop
@@ -268,14 +292,14 @@ class MarkupStyleRender
268
292
  tag = stack.pop
269
293
  response.push(tag.endChars)
270
294
  end
271
-
295
+
272
296
  response = optimize(response)
273
297
  result = response.map{ |response| response.chars }.join()
274
298
 
275
299
  else
276
300
  response = []
277
301
  chars.each do |index, char|
278
- resultChar = escapeMarkdown(char)
302
+ resultChar = char
279
303
  if isForJekyll
280
304
  resultChar = escapeHTML(char)
281
305
  end
@@ -9,7 +9,7 @@ class PQParser < Parser
9
9
  if paragraph.type == 'PQ'
10
10
  result = "\r\n\r\n"
11
11
  paragraph.text.each_line do |p|
12
- result += "> #{p}"
12
+ result += "> #{p} \n\n"
13
13
  end
14
14
  result += "\r\n\r\n"
15
15
 
@@ -19,7 +19,6 @@ require "Parsers/MarkupParser"
19
19
  require "Parsers/OLIParser"
20
20
  require "Parsers/MIXTAPEEMBEDParser"
21
21
  require "Parsers/PQParser"
22
- require "Parsers/LinkParser"
23
22
  require "Parsers/CodeBlockParser"
24
23
 
25
24
  require "PathPolicy"
@@ -30,7 +29,7 @@ require 'date'
30
29
 
31
30
  class ZMediumFetcher
32
31
 
33
- attr_accessor :progress, :linkParser, :isForJekyll
32
+ attr_accessor :progress, :usersPostURLs, :isForJekyll
34
33
 
35
34
  class Progress
36
35
  attr_accessor :username, :postPath, :currentPostIndex, :totalPostsLength, :currentPostParagraphIndex, :totalPostParagraphsLength, :message
@@ -71,7 +70,7 @@ class ZMediumFetcher
71
70
 
72
71
  def initialize
73
72
  @progress = Progress.new()
74
- @linkParser = LinkParser.new()
73
+ @usersPostURLs = nil
75
74
  @isForJekyll = false
76
75
  end
77
76
 
@@ -89,7 +88,7 @@ class ZMediumFetcher
89
88
  ppParser.setNext(uliParser)
90
89
  oliParser = OLIParser.new()
91
90
  uliParser.setNext(oliParser)
92
- mixtapeembedParser = MIXTAPEEMBEDParser.new()
91
+ mixtapeembedParser = MIXTAPEEMBEDParser.new(isForJekyll)
93
92
  oliParser.setNext(mixtapeembedParser)
94
93
  pqParser = PQParser.new()
95
94
  mixtapeembedParser.setNext(pqParser)
@@ -223,8 +222,6 @@ class ZMediumFetcher
223
222
  progress.message = "Converting Post..."
224
223
  progress.printLog()
225
224
 
226
- linkParser.isForJekyll = isForJekyll
227
-
228
225
  postWithDatePath = "#{postInfo.firstPublishedAt.strftime("%Y-%m-%d")}-#{postPath}"
229
226
 
230
227
  absolutePath = postPathPolicy.getAbsolutePath("#{postWithDatePath}.md")
@@ -260,12 +257,12 @@ class ZMediumFetcher
260
257
 
261
258
  if !(CodeBlockParser.isCodeBlock(paragraph) || PREParser.isPRE(paragraph))
262
259
  markupParser = MarkupParser.new(paragraph, isForJekyll)
260
+ markupParser.usersPostURLs = usersPostURLs
263
261
  paragraph.text = markupParser.parse()
264
262
  end
265
263
 
266
264
  result = startParser.parse(paragraph)
267
- result = linkParser.parse(result)
268
-
265
+
269
266
  file.puts(result)
270
267
 
271
268
  index += 1
@@ -274,7 +271,7 @@ class ZMediumFetcher
274
271
  progress.printLog()
275
272
  end
276
273
 
277
- postWatermark = Helper.createWatermark(postURL)
274
+ postWatermark = Helper.createWatermark(postURL, isForJekyll)
278
275
  if !postWatermark.nil?
279
276
  file.puts(postWatermark)
280
277
  end
@@ -311,7 +308,7 @@ class ZMediumFetcher
311
308
  nextID = postPageInfo["nextID"]
312
309
  end while !nextID.nil?
313
310
 
314
- linkParser.usersPostURLs = postURLS
311
+ @usersPostURLs = postURLS
315
312
 
316
313
  progress.totalPostsLength = postURLS.length
317
314
  progress.currentPostIndex = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-20 00:00:00.000000000 Z
11
+ date: 2022-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -74,7 +74,6 @@ files:
74
74
  - lib/Parsers/H4Parser.rb
75
75
  - lib/Parsers/IMGParser.rb
76
76
  - lib/Parsers/IframeParser.rb
77
- - lib/Parsers/LinkParser.rb
78
77
  - lib/Parsers/MIXTAPEEMBEDParser.rb
79
78
  - lib/Parsers/MarkupParser.rb
80
79
  - lib/Parsers/MarkupStyleRender.rb
@@ -1,54 +0,0 @@
1
- $lib = File.expand_path('../', File.dirname(__FILE__))
2
-
3
- require 'Models/Paragraph'
4
-
5
- class LinkParser
6
- attr_accessor :usersPostURLs, :isForJekyll
7
-
8
- def initialize()
9
- @usersPostURLs = nil
10
- @isForJekyll = false
11
- end
12
-
13
- def parse(markdownString)
14
- matchLinks = markdownString.scan(/\[[^\]]*\]\(([^\)]*)\)/m)
15
- if !matchLinks.nil?
16
-
17
- matchLinks.each do |matchLink|
18
- link = matchLink[0]
19
- linkMarkdown = "(#{link})"
20
- newLinkMarkdown = linkMarkdown
21
-
22
- if isForJekyll
23
- newLinkMarkdown = "(#{link}){:target=\"_blank\"}"
24
- end
25
-
26
-
27
- if !usersPostURLs.nil?
28
- # if have provide user's post urls
29
- # find & replace medium url to local post url if matched
30
-
31
- if isForJekyll
32
- postPath = link.split("/").last.split("-").last
33
- else
34
- postPath = link.split("/").last
35
- end
36
-
37
- if !usersPostURLs.find { |usersPostURL| usersPostURL.split("/").last.split("-").last == postPath.split("-").last }.nil?
38
- if isForJekyll
39
- newLinkMarkdown = "(../#{postPath}/)"
40
- else
41
- newLinkMarkdown = "(#{postPath})"
42
- end
43
- end
44
- end
45
-
46
- if linkMarkdown != newLinkMarkdown
47
- markdownString = markdownString.sub! linkMarkdown, newLinkMarkdown
48
- end
49
- end
50
- end
51
-
52
- markdownString
53
- end
54
- end