ZMediumToMarkdown 3.5.1 → 3.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aad5256e92463eb91197f407050468d928098faf55b90bf97454bbb75544fa41
4
- data.tar.gz: 8e05c320fbb2dd468236e69e8c54a9d0b4a403ea10840b49c30d8b650db71b9b
3
+ metadata.gz: 396aabb00395a5451c046ad36eca450186120dd1f836242c2bc0d40885126ee3
4
+ data.tar.gz: f10a39b453030fcf5286b1df32ed482b948c49c4152d6dd17e7a5871f9b5ed45
5
5
  SHA512:
6
- metadata.gz: 2a32d5a034f142eece10a2ad997473d9f00223435f5d6b28671d178545f3d17f925698ca3c032a86fa7aaffff23db6fa172cf3ac2ae4efd5f8945b2c3111d85f
7
- data.tar.gz: 57a66882f1447c6ddb58da6f68d74b79f3fc7f501eba7d09707ecef879ac791fd1ab80f6f0e69de754085b2f30f8f74f43bdfc4cd25d32eb273a8861516d853a
6
+ metadata.gz: 70a93c6be7b9c0e62966dc8cc1217ad95d458468833a26d2b61329eee2b0b9626d33e232490a02bab76f83f8923d5144906c41e394e4aa049b3634c37cc7382d
7
+ data.tar.gz: a67492a79d7857fff707c0e2aa313332db8a92a58c8e1c61941096194301910d60578051bf150f3ddbd7e04144eac9cd28c1c8a6ee2b0cdbf7e1069b967b24dd
data/lib/CLI.rb CHANGED
@@ -158,8 +158,7 @@ module CLI
158
158
  # other than the default upstream Medium URL — i.e. user pointed it
159
159
  # at their own Cloudflare Worker (or another proxy).
160
160
  def proxyConfigured?
161
- host = ENV['MEDIUM_HOST'].to_s
162
- !host.empty? && host != DEFAULT_MEDIUM_HOST
161
+ !Request.mediumProxyOrigin.nil?
163
162
  end
164
163
 
165
164
  # Only warn when the invocation will actually hit Medium — skip for
@@ -1,21 +1,74 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
1
4
  require 'Helper'
5
+ require 'Request'
2
6
 
3
7
  class ImageDownloader
8
+ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'.freeze
9
+ MAX_REDIRECTS = 5
10
+
11
+ # Downloads `url` to disk at `path`. Routes medium.com / miro.medium.com
12
+ # URLs through MEDIUM_HOST when configured (so requests inherit the
13
+ # Worker's IP reputation + auth) and attaches `X-Medium-Proxy-Secret`
14
+ # and the global cookie jar when the destination is the user's proxy.
15
+ # Other hosts (i.ytimg.com, pbs.twimg.com, etc.) are fetched directly.
4
16
  def self.download(path, url)
5
- dir = path.split("/")
6
- dir.pop()
7
- Helper.createDirIfNotExist(dir.join("/"))
8
-
9
- if File.exist?(path)
10
- return true
17
+ dir = path.split('/')
18
+ dir.pop
19
+ Helper.createDirIfNotExist(dir.join('/'))
20
+
21
+ return true if File.exist?(path)
22
+
23
+ rewritten = Request.mediumProxiedURL(url)
24
+ uri = URI.parse(rewritten) rescue nil
25
+ return false if uri.nil? || uri.host.nil?
26
+
27
+ response = fetchWithRedirects(uri, MAX_REDIRECTS)
28
+ return false if response.nil? || response.code.to_i != 200
29
+
30
+ body = response.body
31
+ return false if body.nil? || body.empty?
32
+
33
+ File.binwrite(path, body)
34
+ true
35
+ rescue StandardError
36
+ false
37
+ end
38
+
39
+ def self.fetchWithRedirects(uri, limit)
40
+ return nil if limit <= 0
41
+
42
+ https = Net::HTTP.new(uri.host, uri.port)
43
+ https.use_ssl = (uri.scheme == 'https')
44
+ https.open_timeout = 10
45
+ https.read_timeout = 60
46
+
47
+ request = Net::HTTP::Get.new(uri)
48
+ request['User-Agent'] = USER_AGENT
49
+
50
+ if Request.proxyURI?(uri)
51
+ secret = ENV['MEDIUM_HOST_SECRET'].to_s
52
+ request['X-Medium-Proxy-Secret'] = secret unless secret.empty?
11
53
  end
12
54
 
13
- begin
14
- imageResponse = URI.open(url)
15
- File.write(path, imageResponse.read)
16
- true
17
- rescue
18
- false
55
+ cookies = $cookies || {}
56
+ cookieString = cookies.reject { |_, v| v.nil? }
57
+ .map { |k, v| "#{k}=#{v}" }
58
+ .join('; ')
59
+ request['Cookie'] = cookieString unless cookieString.empty?
60
+
61
+ response = https.request(request)
62
+
63
+ case response.code.to_i
64
+ when 301, 302, 303, 307, 308
65
+ location = response['location'].to_s
66
+ return nil if location.empty?
67
+ target = URI.parse(URI.join(uri.to_s, location).to_s)
68
+ target = URI.parse(Request.mediumProxiedURL(target.to_s))
69
+ fetchWithRedirects(target, limit - 1)
70
+ else
71
+ response
19
72
  end
20
73
  end
21
- end
74
+ end
data/lib/Post.rb CHANGED
@@ -93,8 +93,7 @@ class Post
93
93
  "query" => queryString
94
94
  }]
95
95
 
96
- host = ENV.fetch('MEDIUM_HOST', 'https://medium.com/_/graphql')
97
- response = Request.body(Request.URL(host, 'POST', body))
96
+ response = Request.body(Request.URL(Request.mediumGraphqlEndpoint, 'POST', body))
98
97
  return nil if response.nil?
99
98
 
100
99
  JSON.parse(response)
data/lib/Request.rb CHANGED
@@ -288,31 +288,47 @@ class Request
288
288
  end
289
289
 
290
290
  # If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
291
- # rewrite *any* https://medium.com/<path> URL to <worker-origin>/<path>
292
- # so non-GraphQL hits (iframe metadata at /media/<id>, OG-image fallback
293
- # to /<user>/<post>, etc.) also benefit from the proxy. GraphQL callers
294
- # already hand us the proxy URL directly via ENV['MEDIUM_HOST'], so they
295
- # short-circuit the rewrite.
291
+ # rewrite any https://medium.com/<path> OR https://miro.medium.com/<path>
292
+ # URL to <worker-origin>/<path> so non-GraphQL hits (iframe metadata at
293
+ # /media/<id>, OG-image fallback to /<user>/<post>, miro image downloads,
294
+ # etc.) all benefit from the proxy. GraphQL callers already hand us the
295
+ # proxy URL directly via mediumGraphqlEndpoint, so they short-circuit.
296
296
  def self.mediumProxiedURL(url)
297
- return url unless url.is_a?(String) && url.start_with?('https://medium.com/')
297
+ return url unless url.is_a?(String)
298
298
  origin = mediumProxyOrigin
299
299
  return url if origin.nil?
300
- url.sub(%r{\Ahttps://medium\.com}, origin)
300
+ if url.start_with?('https://medium.com/')
301
+ url.sub(%r{\Ahttps://medium\.com}, origin)
302
+ elsif url.start_with?('https://miro.medium.com/')
303
+ url.sub(%r{\Ahttps://miro\.medium\.com}, origin)
304
+ else
305
+ url
306
+ end
301
307
  end
302
308
 
303
309
  # Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
304
- # proxy is configured (or it still points at medium.com itself).
310
+ # proxy is configured (or it still points at upstream medium.com).
311
+ # Accepts MEDIUM_HOST in any form — bare root, with /_/graphql suffix,
312
+ # or any other path — only the origin matters here.
305
313
  def self.mediumProxyOrigin
306
314
  host = ENV['MEDIUM_HOST'].to_s
307
315
  return nil if host.empty?
308
316
  uri = URI.parse(host)
309
- return nil if uri.host.nil? || uri.host == 'medium.com'
317
+ return nil if uri.host.nil? || uri.host == 'medium.com' || uri.host == 'miro.medium.com'
310
318
  port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
311
319
  "#{uri.scheme}://#{uri.host}#{port}"
312
320
  rescue URI::InvalidURIError
313
321
  nil
314
322
  end
315
323
 
324
+ # GraphQL endpoint the gem should POST to. When MEDIUM_HOST configures a
325
+ # proxy, it's <proxy-origin>/_/graphql regardless of whether the user set
326
+ # MEDIUM_HOST to the bare root or already with the /_/graphql suffix.
327
+ def self.mediumGraphqlEndpoint
328
+ origin = mediumProxyOrigin
329
+ origin.nil? ? 'https://medium.com/_/graphql' : "#{origin}/_/graphql"
330
+ end
331
+
316
332
  # Resolve the host the gem should use for miro.medium.com image fetches.
317
333
  # Single-Worker setups: the same MEDIUM_HOST proxy handles both medium.com
318
334
  # and miro.medium.com via path dispatch, so we always derive miro from
@@ -322,17 +338,16 @@ class Request
322
338
  end
323
339
 
324
340
  # True iff `uri` is hosted by the configured Worker proxy — i.e. its
325
- # host matches MEDIUM_HOST and MEDIUM_HOST is set to something other
326
- # than upstream medium.com. Used to gate the MEDIUM_HOST_SECRET auth
327
- # header so the secret only leaves the process when heading to the
341
+ # host matches MEDIUM_HOST's origin. Used to gate the MEDIUM_HOST_SECRET
342
+ # auth header so the secret only leaves the process when heading to the
328
343
  # user's own proxy.
329
344
  def self.proxyURI?(uri)
330
345
  return false if uri.nil? || uri.host.nil?
331
- envValue = ENV['MEDIUM_HOST'].to_s
332
- return false if envValue.empty?
333
- parsed = URI.parse(envValue) rescue nil
346
+ origin = mediumProxyOrigin
347
+ return false if origin.nil?
348
+ parsed = URI.parse(origin) rescue nil
334
349
  return false if parsed.nil? || parsed.host.nil?
335
- parsed.host != 'medium.com' && parsed.host == uri.host
350
+ parsed.host == uri.host
336
351
  end
337
352
 
338
353
  # Cloudflare tags blocked responses via either the cf-mitigated header
data/lib/User.rb CHANGED
@@ -22,8 +22,7 @@ class User
22
22
  }
23
23
  ]
24
24
 
25
- host = ENV.fetch('MEDIUM_HOST', 'https://medium.com/_/graphql')
26
- body = Request.body(Request.URL(host, "POST", query))
25
+ body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
27
26
  return nil if body.nil?
28
27
 
29
28
  json = JSON.parse(body)
@@ -44,8 +43,7 @@ class User
44
43
  }
45
44
  ]
46
45
 
47
- host = ENV.fetch('MEDIUM_HOST', 'https://medium.com/_/graphql')
48
- body = Request.body(Request.URL(host, "POST", query))
46
+ body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
49
47
  return { "nextID" => nil, "postURLs" => [] } if body.nil?
50
48
 
51
49
  json = JSON.parse(body)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.5.1
4
+ version: 3.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi