ZMediumToMarkdown 3.5.1 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aad5256e92463eb91197f407050468d928098faf55b90bf97454bbb75544fa41
4
- data.tar.gz: 8e05c320fbb2dd468236e69e8c54a9d0b4a403ea10840b49c30d8b650db71b9b
3
+ metadata.gz: 7c9120282aaa35d8568b605ae3af2c24d874c60604198168e603d38457d1ae2b
4
+ data.tar.gz: fe051875aa9f1970da37bd858aa937b2f529d26af82baef3e1ccec71385a09f4
5
5
  SHA512:
6
- metadata.gz: 2a32d5a034f142eece10a2ad997473d9f00223435f5d6b28671d178545f3d17f925698ca3c032a86fa7aaffff23db6fa172cf3ac2ae4efd5f8945b2c3111d85f
7
- data.tar.gz: 57a66882f1447c6ddb58da6f68d74b79f3fc7f501eba7d09707ecef879ac791fd1ab80f6f0e69de754085b2f30f8f74f43bdfc4cd25d32eb273a8861516d853a
6
+ metadata.gz: 160d52fafbcbe3fdfe1c0653934fc4bffa99c76b173bec42108cc47df670cdd37a4c7e0444ccd84ee1185945ffa9ec50372004a812761edb7e23b40dec6a88b3
7
+ data.tar.gz: 9476fe987ff76ad6e9002dfc142241546654ca7a64771c9d69d296e8938de31bf5dac505ce737d3f7e6299d83c58fb1f5bfa62c49c0e043892476cc4255eea1c
data/lib/CLI.rb CHANGED
@@ -158,8 +158,7 @@ module CLI
158
158
  # other than the default upstream Medium URL — i.e. user pointed it
159
159
  # at their own Cloudflare Worker (or another proxy).
160
160
  def proxyConfigured?
161
- host = ENV['MEDIUM_HOST'].to_s
162
- !host.empty? && host != DEFAULT_MEDIUM_HOST
161
+ !Request.mediumProxyOrigin.nil?
163
162
  end
164
163
 
165
164
  # Only warn when the invocation will actually hit Medium — skip for
@@ -1,21 +1,74 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
1
4
  require 'Helper'
5
+ require 'Request'
2
6
 
3
7
  class ImageDownloader
8
+ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'.freeze
9
+ MAX_REDIRECTS = 5
10
+
11
+ # Downloads `url` to disk at `path`. Routes medium.com / miro.medium.com
12
+ # URLs through MEDIUM_HOST when configured (so requests inherit the
13
+ # Worker's IP reputation + auth) and attaches `X-Medium-Proxy-Secret`
14
+ # and the global cookie jar when the destination is the user's proxy.
15
+ # Other hosts (i.ytimg.com, pbs.twimg.com, etc.) are fetched directly.
4
16
  def self.download(path, url)
5
- dir = path.split("/")
6
- dir.pop()
7
- Helper.createDirIfNotExist(dir.join("/"))
8
-
9
- if File.exist?(path)
10
- return true
17
+ dir = path.split('/')
18
+ dir.pop
19
+ Helper.createDirIfNotExist(dir.join('/'))
20
+
21
+ return true if File.exist?(path)
22
+
23
+ rewritten = Request.mediumProxiedURL(url)
24
+ uri = URI.parse(rewritten) rescue nil
25
+ return false if uri.nil? || uri.host.nil?
26
+
27
+ response = fetchWithRedirects(uri, MAX_REDIRECTS)
28
+ return false if response.nil? || response.code.to_i != 200
29
+
30
+ body = response.body
31
+ return false if body.nil? || body.empty?
32
+
33
+ File.binwrite(path, body)
34
+ true
35
+ rescue StandardError
36
+ false
37
+ end
38
+
39
+ def self.fetchWithRedirects(uri, limit)
40
+ return nil if limit <= 0
41
+
42
+ https = Net::HTTP.new(uri.host, uri.port)
43
+ https.use_ssl = (uri.scheme == 'https')
44
+ https.open_timeout = 10
45
+ https.read_timeout = 60
46
+
47
+ request = Net::HTTP::Get.new(uri)
48
+ request['User-Agent'] = USER_AGENT
49
+
50
+ if Request.proxyURI?(uri)
51
+ secret = ENV['MEDIUM_HOST_SECRET'].to_s
52
+ request['X-Medium-Proxy-Secret'] = secret unless secret.empty?
11
53
  end
12
54
 
13
- begin
14
- imageResponse = URI.open(url)
15
- File.write(path, imageResponse.read)
16
- true
17
- rescue
18
- false
55
+ cookies = $cookies || {}
56
+ cookieString = cookies.reject { |_, v| v.nil? }
57
+ .map { |k, v| "#{k}=#{v}" }
58
+ .join('; ')
59
+ request['Cookie'] = cookieString unless cookieString.empty?
60
+
61
+ response = https.request(request)
62
+
63
+ case response.code.to_i
64
+ when 301, 302, 303, 307, 308
65
+ location = response['location'].to_s
66
+ return nil if location.empty?
67
+ target = URI.parse(URI.join(uri.to_s, location).to_s)
68
+ target = URI.parse(Request.mediumProxiedURL(target.to_s))
69
+ fetchWithRedirects(target, limit - 1)
70
+ else
71
+ response
19
72
  end
20
73
  end
21
- end
74
+ end
@@ -18,6 +18,16 @@ class Paragraph
18
18
 
19
19
  class Markup
20
20
  attr_accessor :type, :start, :end, :href, :anchorType, :userId, :linkMetadata
21
+
22
+ # Semantic identity fields used for `==` / `eql?` / `hash`. `start` and
23
+ # `end` are interval coordinates (handled by Rangeable as the [lo, hi]
24
+ # pair) rather than identity. `linkMetadata` is currently unused
25
+ # downstream so it is excluded too. This identity is what lets
26
+ # Rangeable merge two Markups that describe the same logical span
27
+ # (e.g. two STRONG runs that overlap) into a single coalesced
28
+ # interval.
29
+ SEMANTIC_KEYS = [:type, :href, :anchorType, :userId].freeze
30
+
21
31
  def initialize(json)
22
32
  @type = json['type']
23
33
  @start = json['start']
@@ -27,6 +37,16 @@ class Paragraph
27
37
  @userId = json['userId']
28
38
  @linkMetadata = json['linkMetadata']
29
39
  end
40
+
41
+ def ==(other)
42
+ return false unless other.is_a?(Markup)
43
+ SEMANTIC_KEYS.all? { |k| public_send(k) == other.public_send(k) }
44
+ end
45
+ alias_method :eql?, :==
46
+
47
+ def hash
48
+ SEMANTIC_KEYS.map { |k| public_send(k) }.hash
49
+ end
30
50
  end
31
51
 
32
52
  class MetaData
@@ -1,5 +1,6 @@
1
1
  require 'Models/Paragraph'
2
2
  require 'Helper'
3
+ require 'rangeable'
3
4
 
4
5
  # Renders a Paragraph's text + Markup list into final markdown.
5
6
  #
@@ -111,7 +112,7 @@ class MarkupStyleRender
111
112
  end
112
113
 
113
114
  def buildTag(markup)
114
- case markup.type
115
+ tag = case markup.type
115
116
  when "EM" then TagChar.new(2, markup.start, markup.end, "_", "_")
116
117
  when "CODE" then TagChar.new(0, markup.start, markup.end, "`", "`")
117
118
  when "STRONG" then TagChar.new(2, markup.start, markup.end, "**", "**")
@@ -119,8 +120,12 @@ class MarkupStyleRender
119
120
  when "A" then buildAnchorTag(markup)
120
121
  else
121
122
  Helper.makeWarningText("Undefined Markup Type: #{markup.type}.")
122
- nil
123
+ return nil
123
124
  end
125
+ # Stash the originating Markup on the tag so walkCharsWithTags can
126
+ # use it as the Rangeable element key (see #walkCharsWithTags).
127
+ tag&.instance_variable_set(:@_markup, markup)
128
+ tag
124
129
  end
125
130
 
126
131
  def buildAnchorTag(markup)
@@ -148,18 +153,42 @@ class MarkupStyleRender
148
153
  end
149
154
  end
150
155
 
156
+ # Walks every char index and dispatches into the open/close hooks. We
157
+ # build two index-keyed Hashes (`opens_at`, `closes_at`) up front so the
158
+ # hot path is O(1) per char instead of the previous O(m) `tags.select`
159
+ # scan; combined with the linear walk over chars that turns total cost
160
+ # from O(L · m) into O(L + m). Same-position tags inside each bucket
161
+ # keep their pre-sorted order from the caller.
162
+ #
163
+ # ESCAPE tags bypass Rangeable entirely. ESCAPE ranges are single-char
164
+ # synthetic markups injected by Paragraph#initialize and they MUST stay
165
+ # disjoint — feeding them through Rangeable would coalesce two ESCAPEs
166
+ # at adjacent positions into a single span, double-emitting the
167
+ # backslash. Non-ESCAPE markups go through Rangeable so identical-type
168
+ # overlapping spans (e.g. two STRONGs that share a few chars) get
169
+ # merged into a single tag pair.
151
170
  def walkCharsWithTags(tags)
171
+ rangeable_tags, escape_tags = tags.partition { |t| !escape_tag?(t) }
172
+ merged_tags = mergeTagsViaRangeable(rangeable_tags)
173
+ final_tags = (merged_tags + escape_tags).sort_by(&:startIndex)
174
+
175
+ opens_at = Hash.new { |h, k| h[k] = [] }
176
+ closes_at = Hash.new { |h, k| h[k] = [] }
177
+ final_tags.each do |t|
178
+ opens_at[t.startIndex] << t
179
+ closes_at[t.endIndex] << t
180
+ end
181
+
152
182
  response = []
153
183
  stack = []
154
-
155
184
  chars.each do |index, char|
156
185
  if newline?(char)
157
186
  emitNewline(char, stack, response)
158
187
  end
159
188
 
160
- openStartingTags(tags, index, stack, response)
189
+ openStartingTags(opens_at[index], stack, response) if opens_at.key?(index)
161
190
  emitChar(char, stack, response) unless newline?(char)
162
- closeEndingTags(tags, index, stack, response)
191
+ closeEndingTags(closes_at[index], stack, response) if closes_at.key?(index)
163
192
  end
164
193
 
165
194
  # Flush any tags still open at end-of-paragraph.
@@ -167,6 +196,45 @@ class MarkupStyleRender
167
196
  response
168
197
  end
169
198
 
199
+ # Build a Rangeable from the non-ESCAPE TagChars, then read the merged
200
+ # ranges back out as fresh TagChar instances (one per coalesced span,
201
+ # rather than one per original markup). Each TagChar carries enough
202
+ # info (sort priority, start/end strings) to drive emission, so we
203
+ # reuse a representative original TagChar per Markup as the prototype.
204
+ def mergeTagsViaRangeable(rangeable_tags)
205
+ return [] if rangeable_tags.empty?
206
+
207
+ rangeable = Rangeable.new
208
+ proto_by_markup = {}
209
+
210
+ rangeable_tags.each do |tag|
211
+ markup = tag.instance_variable_get(:@_markup)
212
+ proto_by_markup[markup] ||= tag
213
+ # TagChar stored endIndex as `end - 1` (last covered slot); restore
214
+ # the half-open `end` for Rangeable's closed-interval insert.
215
+ rangeable.insert(markup, start: tag.startIndex, end: tag.endIndex)
216
+ end
217
+
218
+ merged = []
219
+ rangeable.each do |markup, ranges|
220
+ proto = proto_by_markup[markup]
221
+ startCharsStr = proto.startChars.chars.join
222
+ endCharsStr = proto.endChars.chars.join
223
+ ranges.each do |lo, hi|
224
+ # TagChar.new takes the half-open `end`; it stores `end - 1`.
225
+ merged << TagChar.new(proto.sort, lo, hi + 1, startCharsStr, endCharsStr)
226
+ end
227
+ end
228
+ merged
229
+ end
230
+
231
+ # ESCAPE markups are emitted as TagChar with startChars == "\\" and
232
+ # empty endChars; identifying them by start-string is simpler than
233
+ # threading a type tag through the TagChar struct.
234
+ def escape_tag?(tag)
235
+ tag.startChars.chars.join == "\\"
236
+ end
237
+
170
238
  def newline?(char)
171
239
  char.chars.join == "\n"
172
240
  end
@@ -180,8 +248,8 @@ class MarkupStyleRender
180
248
  stack.each { |tag| response.push(tag.startChars) }
181
249
  end
182
250
 
183
- def openStartingTags(tags, index, stack, response)
184
- startTags = tags.select { |t| t.startIndex == index }.sort_by(&:sort)
251
+ def openStartingTags(startTags, stack, response)
252
+ startTags = startTags.sort_by(&:sort)
185
253
  suppressEmit = false
186
254
  startTags.each do |tag|
187
255
  response.append(tag.startChars) unless suppressEmit
@@ -211,10 +279,13 @@ class MarkupStyleRender
211
279
  # supposed to end here (overlapping markups), close it anyway and
212
280
  # re-open it after the legitimate closes — keeping each individual
213
281
  # tag pair properly nested in the output.
214
- def closeEndingTags(tags, index, stack, response)
215
- endTags = tags.select { |t| t.endIndex == index }
282
+ def closeEndingTags(endTags, stack, response)
216
283
  return if endTags.empty?
217
284
 
285
+ # Caller passes the pre-built bucket; clone so we can mutate locally
286
+ # (find_index + delete_at) without trashing the cached array.
287
+ endTags = endTags.dup
288
+
218
289
  mismatchTags = []
219
290
  until endTags.empty?
220
291
  stackTag = stack.pop
data/lib/Post.rb CHANGED
@@ -93,8 +93,7 @@ class Post
93
93
  "query" => queryString
94
94
  }]
95
95
 
96
- host = ENV.fetch('MEDIUM_HOST', 'https://medium.com/_/graphql')
97
- response = Request.body(Request.URL(host, 'POST', body))
96
+ response = Request.body(Request.URL(Request.mediumGraphqlEndpoint, 'POST', body))
98
97
  return nil if response.nil?
99
98
 
100
99
  JSON.parse(response)
data/lib/Request.rb CHANGED
@@ -288,31 +288,47 @@ class Request
288
288
  end
289
289
 
290
290
  # If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
291
- # rewrite *any* https://medium.com/<path> URL to <worker-origin>/<path>
292
- # so non-GraphQL hits (iframe metadata at /media/<id>, OG-image fallback
293
- # to /<user>/<post>, etc.) also benefit from the proxy. GraphQL callers
294
- # already hand us the proxy URL directly via ENV['MEDIUM_HOST'], so they
295
- # short-circuit the rewrite.
291
+ # rewrite any https://medium.com/<path> OR https://miro.medium.com/<path>
292
+ # URL to <worker-origin>/<path> so non-GraphQL hits (iframe metadata at
293
+ # /media/<id>, OG-image fallback to /<user>/<post>, miro image downloads,
294
+ # etc.) all benefit from the proxy. GraphQL callers already hand us the
295
+ # proxy URL directly via mediumGraphqlEndpoint, so they short-circuit.
296
296
  def self.mediumProxiedURL(url)
297
- return url unless url.is_a?(String) && url.start_with?('https://medium.com/')
297
+ return url unless url.is_a?(String)
298
298
  origin = mediumProxyOrigin
299
299
  return url if origin.nil?
300
- url.sub(%r{\Ahttps://medium\.com}, origin)
300
+ if url.start_with?('https://medium.com/')
301
+ url.sub(%r{\Ahttps://medium\.com}, origin)
302
+ elsif url.start_with?('https://miro.medium.com/')
303
+ url.sub(%r{\Ahttps://miro\.medium\.com}, origin)
304
+ else
305
+ url
306
+ end
301
307
  end
302
308
 
303
309
  # Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
304
- # proxy is configured (or it still points at medium.com itself).
310
+ # proxy is configured (or it still points at upstream medium.com).
311
+ # Accepts MEDIUM_HOST in any form — bare root, with /_/graphql suffix,
312
+ # or any other path — only the origin matters here.
305
313
  def self.mediumProxyOrigin
306
314
  host = ENV['MEDIUM_HOST'].to_s
307
315
  return nil if host.empty?
308
316
  uri = URI.parse(host)
309
- return nil if uri.host.nil? || uri.host == 'medium.com'
317
+ return nil if uri.host.nil? || uri.host == 'medium.com' || uri.host == 'miro.medium.com'
310
318
  port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
311
319
  "#{uri.scheme}://#{uri.host}#{port}"
312
320
  rescue URI::InvalidURIError
313
321
  nil
314
322
  end
315
323
 
324
+ # GraphQL endpoint the gem should POST to. When MEDIUM_HOST configures a
325
+ # proxy, it's <proxy-origin>/_/graphql regardless of whether the user set
326
+ # MEDIUM_HOST to the bare root or already with the /_/graphql suffix.
327
+ def self.mediumGraphqlEndpoint
328
+ origin = mediumProxyOrigin
329
+ origin.nil? ? 'https://medium.com/_/graphql' : "#{origin}/_/graphql"
330
+ end
331
+
316
332
  # Resolve the host the gem should use for miro.medium.com image fetches.
317
333
  # Single-Worker setups: the same MEDIUM_HOST proxy handles both medium.com
318
334
  # and miro.medium.com via path dispatch, so we always derive miro from
@@ -322,17 +338,16 @@ class Request
322
338
  end
323
339
 
324
340
  # True iff `uri` is hosted by the configured Worker proxy — i.e. its
325
- # host matches MEDIUM_HOST and MEDIUM_HOST is set to something other
326
- # than upstream medium.com. Used to gate the MEDIUM_HOST_SECRET auth
327
- # header so the secret only leaves the process when heading to the
341
+ # host matches MEDIUM_HOST's origin. Used to gate the MEDIUM_HOST_SECRET
342
+ # auth header so the secret only leaves the process when heading to the
328
343
  # user's own proxy.
329
344
  def self.proxyURI?(uri)
330
345
  return false if uri.nil? || uri.host.nil?
331
- envValue = ENV['MEDIUM_HOST'].to_s
332
- return false if envValue.empty?
333
- parsed = URI.parse(envValue) rescue nil
346
+ origin = mediumProxyOrigin
347
+ return false if origin.nil?
348
+ parsed = URI.parse(origin) rescue nil
334
349
  return false if parsed.nil? || parsed.host.nil?
335
- parsed.host != 'medium.com' && parsed.host == uri.host
350
+ parsed.host == uri.host
336
351
  end
337
352
 
338
353
  # Cloudflare tags blocked responses via either the cf-mitigated header
data/lib/User.rb CHANGED
@@ -22,8 +22,7 @@ class User
22
22
  }
23
23
  ]
24
24
 
25
- host = ENV.fetch('MEDIUM_HOST', 'https://medium.com/_/graphql')
26
- body = Request.body(Request.URL(host, "POST", query))
25
+ body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
27
26
  return nil if body.nil?
28
27
 
29
28
  json = JSON.parse(body)
@@ -44,8 +43,7 @@ class User
44
43
  }
45
44
  ]
46
45
 
47
- host = ENV.fetch('MEDIUM_HOST', 'https://medium.com/_/graphql')
48
- body = Request.body(Request.URL(host, "POST", query))
46
+ body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
49
47
  return { "nextID" => nil, "postURLs" => [] } if body.nil?
50
48
 
51
49
  json = JSON.parse(body)
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.5.1
4
+ version: 3.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-05-06 00:00:00.000000000 Z
10
+ date: 2026-05-09 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: nokogiri
@@ -91,6 +91,20 @@ dependencies:
91
91
  - - "~>"
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0.15'
94
+ - !ruby/object:Gem::Dependency
95
+ name: rangeable
96
+ requirement: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - "~>"
99
+ - !ruby/object:Gem::Version
100
+ version: '1.0'
101
+ type: :runtime
102
+ prerelease: false
103
+ version_requirements: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '1.0'
94
108
  description: ZMediumToMarkdown converts Medium posts into clean, portable Markdown.
95
109
  It can download a single post or every post from a Medium username, preserving headings,
96
110
  lists, blockquotes, code blocks, images, links, and common embeds such as GitHub