ZMediumToMarkdown 3.5.1 → 3.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/CLI.rb +1 -2
- data/lib/ImageDownloader.rb +66 -13
- data/lib/Post.rb +1 -2
- data/lib/Request.rb +31 -16
- data/lib/User.rb +2 -4
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 396aabb00395a5451c046ad36eca450186120dd1f836242c2bc0d40885126ee3
|
|
4
|
+
data.tar.gz: f10a39b453030fcf5286b1df32ed482b948c49c4152d6dd17e7a5871f9b5ed45
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 70a93c6be7b9c0e62966dc8cc1217ad95d458468833a26d2b61329eee2b0b9626d33e232490a02bab76f83f8923d5144906c41e394e4aa049b3634c37cc7382d
|
|
7
|
+
data.tar.gz: a67492a79d7857fff707c0e2aa313332db8a92a58c8e1c61941096194301910d60578051bf150f3ddbd7e04144eac9cd28c1c8a6ee2b0cdbf7e1069b967b24dd
|
data/lib/CLI.rb
CHANGED
|
@@ -158,8 +158,7 @@ module CLI
|
|
|
158
158
|
# other than the default upstream Medium URL — i.e. user pointed it
|
|
159
159
|
# at their own Cloudflare Worker (or another proxy).
|
|
160
160
|
def proxyConfigured?
|
|
161
|
-
|
|
162
|
-
!host.empty? && host != DEFAULT_MEDIUM_HOST
|
|
161
|
+
!Request.mediumProxyOrigin.nil?
|
|
163
162
|
end
|
|
164
163
|
|
|
165
164
|
# Only warn when the invocation will actually hit Medium — skip for
|
data/lib/ImageDownloader.rb
CHANGED
|
@@ -1,21 +1,74 @@
|
|
|
1
|
+
require 'net/http'
|
|
2
|
+
require 'uri'
|
|
3
|
+
|
|
1
4
|
require 'Helper'
|
|
5
|
+
require 'Request'
|
|
2
6
|
|
|
3
7
|
class ImageDownloader
|
|
8
|
+
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'.freeze
|
|
9
|
+
MAX_REDIRECTS = 5
|
|
10
|
+
|
|
11
|
+
# Downloads `url` to disk at `path`. Routes medium.com / miro.medium.com
|
|
12
|
+
# URLs through MEDIUM_HOST when configured (so requests inherit the
|
|
13
|
+
# Worker's IP reputation + auth) and attaches `X-Medium-Proxy-Secret`
|
|
14
|
+
# and the global cookie jar when the destination is the user's proxy.
|
|
15
|
+
# Other hosts (i.ytimg.com, pbs.twimg.com, etc.) are fetched directly.
|
|
4
16
|
def self.download(path, url)
|
|
5
|
-
dir = path.split(
|
|
6
|
-
dir.pop
|
|
7
|
-
Helper.createDirIfNotExist(dir.join(
|
|
8
|
-
|
|
9
|
-
if File.exist?(path)
|
|
10
|
-
|
|
17
|
+
dir = path.split('/')
|
|
18
|
+
dir.pop
|
|
19
|
+
Helper.createDirIfNotExist(dir.join('/'))
|
|
20
|
+
|
|
21
|
+
return true if File.exist?(path)
|
|
22
|
+
|
|
23
|
+
rewritten = Request.mediumProxiedURL(url)
|
|
24
|
+
uri = URI.parse(rewritten) rescue nil
|
|
25
|
+
return false if uri.nil? || uri.host.nil?
|
|
26
|
+
|
|
27
|
+
response = fetchWithRedirects(uri, MAX_REDIRECTS)
|
|
28
|
+
return false if response.nil? || response.code.to_i != 200
|
|
29
|
+
|
|
30
|
+
body = response.body
|
|
31
|
+
return false if body.nil? || body.empty?
|
|
32
|
+
|
|
33
|
+
File.binwrite(path, body)
|
|
34
|
+
true
|
|
35
|
+
rescue StandardError
|
|
36
|
+
false
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.fetchWithRedirects(uri, limit)
|
|
40
|
+
return nil if limit <= 0
|
|
41
|
+
|
|
42
|
+
https = Net::HTTP.new(uri.host, uri.port)
|
|
43
|
+
https.use_ssl = (uri.scheme == 'https')
|
|
44
|
+
https.open_timeout = 10
|
|
45
|
+
https.read_timeout = 60
|
|
46
|
+
|
|
47
|
+
request = Net::HTTP::Get.new(uri)
|
|
48
|
+
request['User-Agent'] = USER_AGENT
|
|
49
|
+
|
|
50
|
+
if Request.proxyURI?(uri)
|
|
51
|
+
secret = ENV['MEDIUM_HOST_SECRET'].to_s
|
|
52
|
+
request['X-Medium-Proxy-Secret'] = secret unless secret.empty?
|
|
11
53
|
end
|
|
12
54
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
55
|
+
cookies = $cookies || {}
|
|
56
|
+
cookieString = cookies.reject { |_, v| v.nil? }
|
|
57
|
+
.map { |k, v| "#{k}=#{v}" }
|
|
58
|
+
.join('; ')
|
|
59
|
+
request['Cookie'] = cookieString unless cookieString.empty?
|
|
60
|
+
|
|
61
|
+
response = https.request(request)
|
|
62
|
+
|
|
63
|
+
case response.code.to_i
|
|
64
|
+
when 301, 302, 303, 307, 308
|
|
65
|
+
location = response['location'].to_s
|
|
66
|
+
return nil if location.empty?
|
|
67
|
+
target = URI.parse(URI.join(uri.to_s, location).to_s)
|
|
68
|
+
target = URI.parse(Request.mediumProxiedURL(target.to_s))
|
|
69
|
+
fetchWithRedirects(target, limit - 1)
|
|
70
|
+
else
|
|
71
|
+
response
|
|
19
72
|
end
|
|
20
73
|
end
|
|
21
|
-
end
|
|
74
|
+
end
|
data/lib/Post.rb
CHANGED
|
@@ -93,8 +93,7 @@ class Post
|
|
|
93
93
|
"query" => queryString
|
|
94
94
|
}]
|
|
95
95
|
|
|
96
|
-
|
|
97
|
-
response = Request.body(Request.URL(host, 'POST', body))
|
|
96
|
+
response = Request.body(Request.URL(Request.mediumGraphqlEndpoint, 'POST', body))
|
|
98
97
|
return nil if response.nil?
|
|
99
98
|
|
|
100
99
|
JSON.parse(response)
|
data/lib/Request.rb
CHANGED
|
@@ -288,31 +288,47 @@ class Request
|
|
|
288
288
|
end
|
|
289
289
|
|
|
290
290
|
# If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
|
|
291
|
-
# rewrite
|
|
292
|
-
# so non-GraphQL hits (iframe metadata at
|
|
293
|
-
# to /<user>/<post>,
|
|
294
|
-
#
|
|
295
|
-
# short-circuit
|
|
291
|
+
# rewrite any https://medium.com/<path> OR https://miro.medium.com/<path>
|
|
292
|
+
# URL to <worker-origin>/<path> so non-GraphQL hits (iframe metadata at
|
|
293
|
+
# /media/<id>, OG-image fallback to /<user>/<post>, miro image downloads,
|
|
294
|
+
# etc.) all benefit from the proxy. GraphQL callers already hand us the
|
|
295
|
+
# proxy URL directly via mediumGraphqlEndpoint, so they short-circuit.
|
|
296
296
|
def self.mediumProxiedURL(url)
|
|
297
|
-
return url unless url.is_a?(String)
|
|
297
|
+
return url unless url.is_a?(String)
|
|
298
298
|
origin = mediumProxyOrigin
|
|
299
299
|
return url if origin.nil?
|
|
300
|
-
url.
|
|
300
|
+
if url.start_with?('https://medium.com/')
|
|
301
|
+
url.sub(%r{\Ahttps://medium\.com}, origin)
|
|
302
|
+
elsif url.start_with?('https://miro.medium.com/')
|
|
303
|
+
url.sub(%r{\Ahttps://miro\.medium\.com}, origin)
|
|
304
|
+
else
|
|
305
|
+
url
|
|
306
|
+
end
|
|
301
307
|
end
|
|
302
308
|
|
|
303
309
|
# Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
|
|
304
|
-
# proxy is configured (or it still points at medium.com
|
|
310
|
+
# proxy is configured (or it still points at upstream medium.com).
|
|
311
|
+
# Accepts MEDIUM_HOST in any form — bare root, with /_/graphql suffix,
|
|
312
|
+
# or any other path — only the origin matters here.
|
|
305
313
|
def self.mediumProxyOrigin
|
|
306
314
|
host = ENV['MEDIUM_HOST'].to_s
|
|
307
315
|
return nil if host.empty?
|
|
308
316
|
uri = URI.parse(host)
|
|
309
|
-
return nil if uri.host.nil? || uri.host == 'medium.com'
|
|
317
|
+
return nil if uri.host.nil? || uri.host == 'medium.com' || uri.host == 'miro.medium.com'
|
|
310
318
|
port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
|
|
311
319
|
"#{uri.scheme}://#{uri.host}#{port}"
|
|
312
320
|
rescue URI::InvalidURIError
|
|
313
321
|
nil
|
|
314
322
|
end
|
|
315
323
|
|
|
324
|
+
# GraphQL endpoint the gem should POST to. When MEDIUM_HOST configures a
|
|
325
|
+
# proxy, it's <proxy-origin>/_/graphql regardless of whether the user set
|
|
326
|
+
# MEDIUM_HOST to the bare root or already with the /_/graphql suffix.
|
|
327
|
+
def self.mediumGraphqlEndpoint
|
|
328
|
+
origin = mediumProxyOrigin
|
|
329
|
+
origin.nil? ? 'https://medium.com/_/graphql' : "#{origin}/_/graphql"
|
|
330
|
+
end
|
|
331
|
+
|
|
316
332
|
# Resolve the host the gem should use for miro.medium.com image fetches.
|
|
317
333
|
# Single-Worker setups: the same MEDIUM_HOST proxy handles both medium.com
|
|
318
334
|
# and miro.medium.com via path dispatch, so we always derive miro from
|
|
@@ -322,17 +338,16 @@ class Request
|
|
|
322
338
|
end
|
|
323
339
|
|
|
324
340
|
# True iff `uri` is hosted by the configured Worker proxy — i.e. its
|
|
325
|
-
# host matches MEDIUM_HOST
|
|
326
|
-
#
|
|
327
|
-
# header so the secret only leaves the process when heading to the
|
|
341
|
+
# host matches MEDIUM_HOST's origin. Used to gate the MEDIUM_HOST_SECRET
|
|
342
|
+
# auth header so the secret only leaves the process when heading to the
|
|
328
343
|
# user's own proxy.
|
|
329
344
|
def self.proxyURI?(uri)
|
|
330
345
|
return false if uri.nil? || uri.host.nil?
|
|
331
|
-
|
|
332
|
-
return false if
|
|
333
|
-
parsed = URI.parse(
|
|
346
|
+
origin = mediumProxyOrigin
|
|
347
|
+
return false if origin.nil?
|
|
348
|
+
parsed = URI.parse(origin) rescue nil
|
|
334
349
|
return false if parsed.nil? || parsed.host.nil?
|
|
335
|
-
parsed.host
|
|
350
|
+
parsed.host == uri.host
|
|
336
351
|
end
|
|
337
352
|
|
|
338
353
|
# Cloudflare tags blocked responses via either the cf-mitigated header
|
data/lib/User.rb
CHANGED
|
@@ -22,8 +22,7 @@ class User
|
|
|
22
22
|
}
|
|
23
23
|
]
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
body = Request.body(Request.URL(host, "POST", query))
|
|
25
|
+
body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
|
|
27
26
|
return nil if body.nil?
|
|
28
27
|
|
|
29
28
|
json = JSON.parse(body)
|
|
@@ -44,8 +43,7 @@ class User
|
|
|
44
43
|
}
|
|
45
44
|
]
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
body = Request.body(Request.URL(host, "POST", query))
|
|
46
|
+
body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
|
|
49
47
|
return { "nextID" => nil, "postURLs" => [] } if body.nil?
|
|
50
48
|
|
|
51
49
|
json = JSON.parse(body)
|