ZMediumToMarkdown 3.5.0 → 3.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/CLI.rb +2 -3
- data/lib/ImageDownloader.rb +66 -13
- data/lib/Post.rb +1 -2
- data/lib/Request.rb +32 -17
- data/lib/User.rb +2 -4
- data/lib/ZMediumFetcher.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 396aabb00395a5451c046ad36eca450186120dd1f836242c2bc0d40885126ee3
|
|
4
|
+
data.tar.gz: f10a39b453030fcf5286b1df32ed482b948c49c4152d6dd17e7a5871f9b5ed45
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 70a93c6be7b9c0e62966dc8cc1217ad95d458468833a26d2b61329eee2b0b9626d33e232490a02bab76f83f8923d5144906c41e394e4aa049b3634c37cc7382d
|
|
7
|
+
data.tar.gz: a67492a79d7857fff707c0e2aa313332db8a92a58c8e1c61941096194301910d60578051bf150f3ddbd7e04144eac9cd28c1c8a6ee2b0cdbf7e1069b967b24dd
|
data/lib/CLI.rb
CHANGED
|
@@ -11,7 +11,7 @@ require 'ChromeAuth'
|
|
|
11
11
|
# All CLI-side concerns for the `ZMediumToMarkdown` executable. Pulled out
|
|
12
12
|
# of bin/ so it can be exercised by unit tests without spawning processes.
|
|
13
13
|
module CLI
|
|
14
|
-
COOKIE_SETUP_URL = 'https://github.com/ZhgChgLi/ZMediumToMarkdown/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy'.freeze
|
|
14
|
+
COOKIE_SETUP_URL = 'https://github.com/ZhgChgLi/ZMediumToMarkdown/blob/main/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy.md'.freeze
|
|
15
15
|
|
|
16
16
|
DEFAULT_MEDIUM_HOST = 'https://medium.com/_/graphql'.freeze
|
|
17
17
|
|
|
@@ -158,8 +158,7 @@ module CLI
|
|
|
158
158
|
# other than the default upstream Medium URL — i.e. user pointed it
|
|
159
159
|
# at their own Cloudflare Worker (or another proxy).
|
|
160
160
|
def proxyConfigured?
|
|
161
|
-
|
|
162
|
-
!host.empty? && host != DEFAULT_MEDIUM_HOST
|
|
161
|
+
!Request.mediumProxyOrigin.nil?
|
|
163
162
|
end
|
|
164
163
|
|
|
165
164
|
# Only warn when the invocation will actually hit Medium — skip for
|
data/lib/ImageDownloader.rb
CHANGED
|
@@ -1,21 +1,74 @@
|
|
|
1
|
+
require 'net/http'
|
|
2
|
+
require 'uri'
|
|
3
|
+
|
|
1
4
|
require 'Helper'
|
|
5
|
+
require 'Request'
|
|
2
6
|
|
|
3
7
|
class ImageDownloader
|
|
8
|
+
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'.freeze
|
|
9
|
+
MAX_REDIRECTS = 5
|
|
10
|
+
|
|
11
|
+
# Downloads `url` to disk at `path`. Routes medium.com / miro.medium.com
|
|
12
|
+
# URLs through MEDIUM_HOST when configured (so requests inherit the
|
|
13
|
+
# Worker's IP reputation + auth) and attaches `X-Medium-Proxy-Secret`
|
|
14
|
+
# and the global cookie jar when the destination is the user's proxy.
|
|
15
|
+
# Other hosts (i.ytimg.com, pbs.twimg.com, etc.) are fetched directly.
|
|
4
16
|
def self.download(path, url)
|
|
5
|
-
dir = path.split(
|
|
6
|
-
dir.pop
|
|
7
|
-
Helper.createDirIfNotExist(dir.join(
|
|
8
|
-
|
|
9
|
-
if File.exist?(path)
|
|
10
|
-
|
|
17
|
+
dir = path.split('/')
|
|
18
|
+
dir.pop
|
|
19
|
+
Helper.createDirIfNotExist(dir.join('/'))
|
|
20
|
+
|
|
21
|
+
return true if File.exist?(path)
|
|
22
|
+
|
|
23
|
+
rewritten = Request.mediumProxiedURL(url)
|
|
24
|
+
uri = URI.parse(rewritten) rescue nil
|
|
25
|
+
return false if uri.nil? || uri.host.nil?
|
|
26
|
+
|
|
27
|
+
response = fetchWithRedirects(uri, MAX_REDIRECTS)
|
|
28
|
+
return false if response.nil? || response.code.to_i != 200
|
|
29
|
+
|
|
30
|
+
body = response.body
|
|
31
|
+
return false if body.nil? || body.empty?
|
|
32
|
+
|
|
33
|
+
File.binwrite(path, body)
|
|
34
|
+
true
|
|
35
|
+
rescue StandardError
|
|
36
|
+
false
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.fetchWithRedirects(uri, limit)
|
|
40
|
+
return nil if limit <= 0
|
|
41
|
+
|
|
42
|
+
https = Net::HTTP.new(uri.host, uri.port)
|
|
43
|
+
https.use_ssl = (uri.scheme == 'https')
|
|
44
|
+
https.open_timeout = 10
|
|
45
|
+
https.read_timeout = 60
|
|
46
|
+
|
|
47
|
+
request = Net::HTTP::Get.new(uri)
|
|
48
|
+
request['User-Agent'] = USER_AGENT
|
|
49
|
+
|
|
50
|
+
if Request.proxyURI?(uri)
|
|
51
|
+
secret = ENV['MEDIUM_HOST_SECRET'].to_s
|
|
52
|
+
request['X-Medium-Proxy-Secret'] = secret unless secret.empty?
|
|
11
53
|
end
|
|
12
54
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
55
|
+
cookies = $cookies || {}
|
|
56
|
+
cookieString = cookies.reject { |_, v| v.nil? }
|
|
57
|
+
.map { |k, v| "#{k}=#{v}" }
|
|
58
|
+
.join('; ')
|
|
59
|
+
request['Cookie'] = cookieString unless cookieString.empty?
|
|
60
|
+
|
|
61
|
+
response = https.request(request)
|
|
62
|
+
|
|
63
|
+
case response.code.to_i
|
|
64
|
+
when 301, 302, 303, 307, 308
|
|
65
|
+
location = response['location'].to_s
|
|
66
|
+
return nil if location.empty?
|
|
67
|
+
target = URI.parse(URI.join(uri.to_s, location).to_s)
|
|
68
|
+
target = URI.parse(Request.mediumProxiedURL(target.to_s))
|
|
69
|
+
fetchWithRedirects(target, limit - 1)
|
|
70
|
+
else
|
|
71
|
+
response
|
|
19
72
|
end
|
|
20
73
|
end
|
|
21
|
-
end
|
|
74
|
+
end
|
data/lib/Post.rb
CHANGED
|
@@ -93,8 +93,7 @@ class Post
|
|
|
93
93
|
"query" => queryString
|
|
94
94
|
}]
|
|
95
95
|
|
|
96
|
-
|
|
97
|
-
response = Request.body(Request.URL(host, 'POST', body))
|
|
96
|
+
response = Request.body(Request.URL(Request.mediumGraphqlEndpoint, 'POST', body))
|
|
98
97
|
return nil if response.nil?
|
|
99
98
|
|
|
100
99
|
JSON.parse(response)
|
data/lib/Request.rb
CHANGED
|
@@ -48,7 +48,7 @@ class Request
|
|
|
48
48
|
the MEDIUM_HOST env var. (Recommended.)
|
|
49
49
|
|
|
50
50
|
Full step-by-step setup guide:
|
|
51
|
-
https://github.com/ZhgChgLi/ZMediumToMarkdown/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy
|
|
51
|
+
https://github.com/ZhgChgLi/ZMediumToMarkdown/blob/main/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy.md
|
|
52
52
|
MSG
|
|
53
53
|
end
|
|
54
54
|
end
|
|
@@ -288,31 +288,47 @@ class Request
|
|
|
288
288
|
end
|
|
289
289
|
|
|
290
290
|
# If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
|
|
291
|
-
# rewrite
|
|
292
|
-
# so non-GraphQL hits (iframe metadata at
|
|
293
|
-
# to /<user>/<post>,
|
|
294
|
-
#
|
|
295
|
-
# short-circuit
|
|
291
|
+
# rewrite any https://medium.com/<path> OR https://miro.medium.com/<path>
|
|
292
|
+
# URL to <worker-origin>/<path> so non-GraphQL hits (iframe metadata at
|
|
293
|
+
# /media/<id>, OG-image fallback to /<user>/<post>, miro image downloads,
|
|
294
|
+
# etc.) all benefit from the proxy. GraphQL callers already hand us the
|
|
295
|
+
# proxy URL directly via mediumGraphqlEndpoint, so they short-circuit.
|
|
296
296
|
def self.mediumProxiedURL(url)
|
|
297
|
-
return url unless url.is_a?(String)
|
|
297
|
+
return url unless url.is_a?(String)
|
|
298
298
|
origin = mediumProxyOrigin
|
|
299
299
|
return url if origin.nil?
|
|
300
|
-
url.
|
|
300
|
+
if url.start_with?('https://medium.com/')
|
|
301
|
+
url.sub(%r{\Ahttps://medium\.com}, origin)
|
|
302
|
+
elsif url.start_with?('https://miro.medium.com/')
|
|
303
|
+
url.sub(%r{\Ahttps://miro\.medium\.com}, origin)
|
|
304
|
+
else
|
|
305
|
+
url
|
|
306
|
+
end
|
|
301
307
|
end
|
|
302
308
|
|
|
303
309
|
# Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
|
|
304
|
-
# proxy is configured (or it still points at medium.com
|
|
310
|
+
# proxy is configured (or it still points at upstream medium.com).
|
|
311
|
+
# Accepts MEDIUM_HOST in any form — bare root, with /_/graphql suffix,
|
|
312
|
+
# or any other path — only the origin matters here.
|
|
305
313
|
def self.mediumProxyOrigin
|
|
306
314
|
host = ENV['MEDIUM_HOST'].to_s
|
|
307
315
|
return nil if host.empty?
|
|
308
316
|
uri = URI.parse(host)
|
|
309
|
-
return nil if uri.host.nil? || uri.host == 'medium.com'
|
|
317
|
+
return nil if uri.host.nil? || uri.host == 'medium.com' || uri.host == 'miro.medium.com'
|
|
310
318
|
port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
|
|
311
319
|
"#{uri.scheme}://#{uri.host}#{port}"
|
|
312
320
|
rescue URI::InvalidURIError
|
|
313
321
|
nil
|
|
314
322
|
end
|
|
315
323
|
|
|
324
|
+
# GraphQL endpoint the gem should POST to. When MEDIUM_HOST configures a
|
|
325
|
+
# proxy, it's <proxy-origin>/_/graphql regardless of whether the user set
|
|
326
|
+
# MEDIUM_HOST to the bare root or already with the /_/graphql suffix.
|
|
327
|
+
def self.mediumGraphqlEndpoint
|
|
328
|
+
origin = mediumProxyOrigin
|
|
329
|
+
origin.nil? ? 'https://medium.com/_/graphql' : "#{origin}/_/graphql"
|
|
330
|
+
end
|
|
331
|
+
|
|
316
332
|
# Resolve the host the gem should use for miro.medium.com image fetches.
|
|
317
333
|
# Single-Worker setups: the same MEDIUM_HOST proxy handles both medium.com
|
|
318
334
|
# and miro.medium.com via path dispatch, so we always derive miro from
|
|
@@ -322,17 +338,16 @@ class Request
|
|
|
322
338
|
end
|
|
323
339
|
|
|
324
340
|
# True iff `uri` is hosted by the configured Worker proxy — i.e. its
|
|
325
|
-
# host matches MEDIUM_HOST
|
|
326
|
-
#
|
|
327
|
-
# header so the secret only leaves the process when heading to the
|
|
341
|
+
# host matches MEDIUM_HOST's origin. Used to gate the MEDIUM_HOST_SECRET
|
|
342
|
+
# auth header so the secret only leaves the process when heading to the
|
|
328
343
|
# user's own proxy.
|
|
329
344
|
def self.proxyURI?(uri)
|
|
330
345
|
return false if uri.nil? || uri.host.nil?
|
|
331
|
-
|
|
332
|
-
return false if
|
|
333
|
-
parsed = URI.parse(
|
|
346
|
+
origin = mediumProxyOrigin
|
|
347
|
+
return false if origin.nil?
|
|
348
|
+
parsed = URI.parse(origin) rescue nil
|
|
334
349
|
return false if parsed.nil? || parsed.host.nil?
|
|
335
|
-
parsed.host
|
|
350
|
+
parsed.host == uri.host
|
|
336
351
|
end
|
|
337
352
|
|
|
338
353
|
# Cloudflare tags blocked responses via either the cf-mitigated header
|
data/lib/User.rb
CHANGED
|
@@ -22,8 +22,7 @@ class User
|
|
|
22
22
|
}
|
|
23
23
|
]
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
body = Request.body(Request.URL(host, "POST", query))
|
|
25
|
+
body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
|
|
27
26
|
return nil if body.nil?
|
|
28
27
|
|
|
29
28
|
json = JSON.parse(body)
|
|
@@ -44,8 +43,7 @@ class User
|
|
|
44
43
|
}
|
|
45
44
|
]
|
|
46
45
|
|
|
47
|
-
|
|
48
|
-
body = Request.body(Request.URL(host, "POST", query))
|
|
46
|
+
body = Request.body(Request.URL(Request.mediumGraphqlEndpoint, "POST", query))
|
|
49
47
|
return { "nextID" => nil, "postURLs" => [] } if body.nil?
|
|
50
48
|
|
|
51
49
|
json = JSON.parse(body)
|
data/lib/ZMediumFetcher.rb
CHANGED
|
@@ -419,7 +419,7 @@ class ZMediumFetcher
|
|
|
419
419
|
# cookies belong to a Medium Member account that has access to the post.
|
|
420
420
|
def paywallMessage
|
|
421
421
|
if !defined?($cookies) || $cookies.nil? || ($cookies['sid'].to_s.empty? && $cookies['uid'].to_s.empty?)
|
|
422
|
-
"This post is behind Medium's paywall. Cookies (sid / uid) are REQUIRED to download the full content — without them you only get the public preview. Setup guide: https://github.com/ZhgChgLi/ZMediumToMarkdown/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy"
|
|
422
|
+
"This post is behind Medium's paywall. Cookies (sid / uid) are REQUIRED to download the full content — without them you only get the public preview. Setup guide: https://github.com/ZhgChgLi/ZMediumToMarkdown/blob/main/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy.md"
|
|
423
423
|
else
|
|
424
424
|
"This post is behind Medium's paywall and the provided cookies don't grant access. Verify your sid / uid belong to a Medium Member account that can read this post. Cookies stay valid as long as they're being used (each successful request resets a ~2-week sliding window); they only expire after ~2 weeks of inactivity."
|
|
425
425
|
end
|