ZMediumToMarkdown 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/CLI.rb +83 -51
- data/lib/ChromeAuth.rb +163 -0
- data/lib/CookieCache.rb +93 -0
- data/lib/Request.rb +56 -18
- metadata +18 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1e19605bba5d079fa1086c14dfb33c7d33f36da86b46c6c6c97affc960521d26
|
|
4
|
+
data.tar.gz: 96e13a36f2573d2dae057ee7a318ad66d9d09dc69990deff5bf5033a72032f8c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 81e76d2686c7f06976ea9fdcb09243b75e5f628669e3c405b27a636705d6a54862f264c569bc4e33bcf571d3ba6696ed578b20a45350ea81fb993b605d76e860
|
|
7
|
+
data.tar.gz: 34d756095ba13548c551da107d9e4903b51d094087ce8ac71ed140f52adf0e3a852afbe35bae15b27c09ed08cafb4df91fc121bdf7e262b123e989b3053af8bb
|
data/lib/CLI.rb
CHANGED
|
@@ -5,6 +5,8 @@ require 'ZMediumFetcher'
|
|
|
5
5
|
require 'Helper'
|
|
6
6
|
require 'PathPolicy'
|
|
7
7
|
require 'Request'
|
|
8
|
+
require 'CookieCache'
|
|
9
|
+
require 'ChromeAuth'
|
|
8
10
|
|
|
9
11
|
# All CLI-side concerns for the `ZMediumToMarkdown` executable. Pulled out
|
|
10
12
|
# of bin/ so it can be exercised by unit tests without spawning processes.
|
|
@@ -21,7 +23,7 @@ module CLI
|
|
|
21
23
|
argv << '-h' if argv.empty?
|
|
22
24
|
|
|
23
25
|
options = parseArgs(argv, errput: errput)
|
|
24
|
-
|
|
26
|
+
loadCookies!
|
|
25
27
|
warnAboutMissingSetup(options, errput: errput)
|
|
26
28
|
run(options, cwd, output: output, errput: errput)
|
|
27
29
|
end
|
|
@@ -39,6 +41,14 @@ module CLI
|
|
|
39
41
|
$cookies['uid'] = v
|
|
40
42
|
end
|
|
41
43
|
|
|
44
|
+
opts.on('--cookie_cf_clearance VALUE', 'Cloudflare cf_clearance cookie value (or set $MEDIUM_COOKIE_CF_CLEARANCE)') do |v|
|
|
45
|
+
$cookies['cf_clearance'] = v
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
opts.on('--cookie_cfuvid VALUE', 'Cloudflare _cfuvid cookie value (or set $MEDIUM_COOKIE_CFUVID)') do |v|
|
|
49
|
+
$cookies['_cfuvid'] = v
|
|
50
|
+
end
|
|
51
|
+
|
|
42
52
|
opts.on('-x', '--medium_host URL', 'Cloudflare Worker proxy URL for Medium GraphQL (or set $MEDIUM_HOST). Strongly recommended for CI / bulk runs — see the wiki setup guide.') do |v|
|
|
43
53
|
ENV['MEDIUM_HOST'] = v
|
|
44
54
|
end
|
|
@@ -95,6 +105,15 @@ module CLI
|
|
|
95
105
|
options[:version] = true
|
|
96
106
|
end
|
|
97
107
|
|
|
108
|
+
opts.on('--non-interactive', 'Never prompt or open a browser. CI runners auto-detect this; use the flag to force the same behavior on a TTY.') do
|
|
109
|
+
options[:nonInteractive] = true
|
|
110
|
+
ENV['MEDIUM_NO_AUTO_BROWSER'] = '1'
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
opts.on('--auth', 'Open Chrome to sign in, capture sid / uid / cf_clearance / _cfuvid into the encrypted cookie cache, and exit. Run once before bulk / scheduled jobs to seed the cache.') do
|
|
114
|
+
options[:auth] = true
|
|
115
|
+
end
|
|
116
|
+
|
|
98
117
|
opts.on('-h', '--help', 'Show this help message') do
|
|
99
118
|
options[:help] = opts.to_s
|
|
100
119
|
end
|
|
@@ -104,9 +123,31 @@ module CLI
|
|
|
104
123
|
options
|
|
105
124
|
end
|
|
106
125
|
|
|
126
|
+
# Cookie precedence (highest → lowest):
|
|
127
|
+
# 1. CLI flags (already written to $cookies in parseArgs)
|
|
128
|
+
# 2. Env vars (MEDIUM_COOKIE_*)
|
|
129
|
+
# 3. On-disk cache (~/.config/ZMediumToMarkdown/cookies.json)
|
|
130
|
+
# Each layer only fills slots the higher layer left empty.
|
|
131
|
+
def loadCookies!
|
|
132
|
+
loadCookiesFromEnv!
|
|
133
|
+
loadCookiesFromCache!
|
|
134
|
+
end
|
|
135
|
+
|
|
107
136
|
def loadCookiesFromEnv!
|
|
108
137
|
$cookies['sid'] = ENV['MEDIUM_COOKIE_SID'] if cookieMissing?('sid') && !ENV['MEDIUM_COOKIE_SID'].to_s.empty?
|
|
109
138
|
$cookies['uid'] = ENV['MEDIUM_COOKIE_UID'] if cookieMissing?('uid') && !ENV['MEDIUM_COOKIE_UID'].to_s.empty?
|
|
139
|
+
$cookies['cf_clearance'] = ENV['MEDIUM_COOKIE_CF_CLEARANCE'] if cookieMissing?('cf_clearance') && !ENV['MEDIUM_COOKIE_CF_CLEARANCE'].to_s.empty?
|
|
140
|
+
$cookies['_cfuvid'] = ENV['MEDIUM_COOKIE_CFUVID'] if cookieMissing?('_cfuvid') && !ENV['MEDIUM_COOKIE_CFUVID'].to_s.empty?
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def loadCookiesFromCache!
|
|
144
|
+
cached = CookieCache.load
|
|
145
|
+
return if cached.empty?
|
|
146
|
+
ChromeAuth::TARGET_COOKIES.each do |name|
|
|
147
|
+
value = cached[name]
|
|
148
|
+
next if value.to_s.empty?
|
|
149
|
+
$cookies[name] = value if cookieMissing?(name)
|
|
150
|
+
end
|
|
110
151
|
end
|
|
111
152
|
|
|
112
153
|
def cookieMissing?(name)
|
|
@@ -131,6 +172,7 @@ module CLI
|
|
|
131
172
|
!host.empty? && host != DEFAULT_MIRO_MEDIUM_HOST
|
|
132
173
|
end
|
|
133
174
|
|
|
175
|
+
|
|
134
176
|
# Only warn when the invocation will actually hit Medium — skip for
|
|
135
177
|
# --version, --clean, --help, --new.
|
|
136
178
|
def warnAboutMissingSetup(options, errput: $stderr)
|
|
@@ -150,57 +192,16 @@ module CLI
|
|
|
150
192
|
!options[:postURL].nil? || !options[:username].nil?
|
|
151
193
|
end
|
|
152
194
|
|
|
153
|
-
#
|
|
154
|
-
#
|
|
155
|
-
# act; body is static guidance covering empirical limits, scenarios,
|
|
156
|
-
# and how to pass each value via flag or env.
|
|
195
|
+
# One-line warning. The wiki has the actual setup steps; we just
|
|
196
|
+
# nudge the user toward it instead of dumping a wall of guidance.
|
|
157
197
|
def buildSetupBanner(missingCookies:, missingProxy:, missingImageProxy:)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
lines << ' • Cloudflare Worker proxy for image CDN (MIRO_MEDIUM_HOST not set or still default; optional companion).' if missingImageProxy
|
|
166
|
-
lines << ''
|
|
167
|
-
lines << <<~BODY.chomp
|
|
168
|
-
Empirical limits without setup:
|
|
169
|
-
• Without cookies : Cloudflare blocks after ~10 posts.
|
|
170
|
-
• Without Worker proxy : Cloudflare blocks after ~25 posts
|
|
171
|
-
when running from CI / datacenter IPs.
|
|
172
|
-
• Paywalled posts : cookies are REQUIRED for full content;
|
|
173
|
-
without them you only get the preview.
|
|
174
|
-
|
|
175
|
-
Recommended setup:
|
|
176
|
-
• CI / CD (GitHub Actions, cloud runners):
|
|
177
|
-
STRONGLY recommend BOTH cookies AND a Cloudflare Worker proxy.
|
|
178
|
-
• Local machine:
|
|
179
|
-
Cookies recommended for paywalled posts. If a Cloudflare
|
|
180
|
-
challenge appears, the tool will automatically open
|
|
181
|
-
https://medium.com in your browser and prompt you to retry
|
|
182
|
-
once you've cleared it. Set MEDIUM_NO_AUTO_BROWSER=1 to
|
|
183
|
-
opt out and just fail fast.
|
|
184
|
-
|
|
185
|
-
Pass cookies via env (preferred — keeps secrets out of shell history):
|
|
186
|
-
MEDIUM_COOKIE_SID=... MEDIUM_COOKIE_UID=... ZMediumToMarkdown -p URL
|
|
187
|
-
|
|
188
|
-
Or via flags (fine for one-off local runs):
|
|
189
|
-
ZMediumToMarkdown -p URL -s YOUR_SID -d YOUR_UID
|
|
190
|
-
|
|
191
|
-
Pass Cloudflare Worker proxy URL(s):
|
|
192
|
-
ZMediumToMarkdown -p URL \\
|
|
193
|
-
-x https://YOUR-WORKER.workers.dev/_/graphql \\
|
|
194
|
-
--miro_medium_host https://YOUR-IMAGE-WORKER.workers.dev
|
|
195
|
-
# or via env:
|
|
196
|
-
# MEDIUM_HOST=https://YOUR-WORKER.workers.dev/_/graphql
|
|
197
|
-
# MIRO_MEDIUM_HOST=https://YOUR-IMAGE-WORKER.workers.dev
|
|
198
|
-
|
|
199
|
-
Full setup guide (cookies + Cloudflare Worker proxy):
|
|
200
|
-
#{COOKIE_SETUP_URL}
|
|
201
|
-
──────────────────────────────────────────────────────────────────────
|
|
202
|
-
BODY
|
|
203
|
-
lines.join("\n")
|
|
198
|
+
missing = []
|
|
199
|
+
missing << 'Medium cookies (sid / uid)' if missingCookies
|
|
200
|
+
missing << 'Cloudflare Worker proxy (MEDIUM_HOST)' if missingProxy
|
|
201
|
+
missing << 'Cloudflare image proxy (MIRO_MEDIUM_HOST)' if missingImageProxy
|
|
202
|
+
return '' if missing.empty?
|
|
203
|
+
|
|
204
|
+
"⚠ Missing #{missing.join(' / ')}. Medium / Cloudflare may block the run. Setup guide: #{COOKIE_SETUP_URL}"
|
|
204
205
|
end
|
|
205
206
|
|
|
206
207
|
def run(options, cwd, output: $stdout, errput: $stderr)
|
|
@@ -234,6 +235,11 @@ module CLI
|
|
|
234
235
|
return
|
|
235
236
|
end
|
|
236
237
|
|
|
238
|
+
if options[:auth]
|
|
239
|
+
runAuth(errput: errput)
|
|
240
|
+
return
|
|
241
|
+
end
|
|
242
|
+
|
|
237
243
|
# --stdout / --list path: render to the given output stream, skip
|
|
238
244
|
# all filesystem writes and asset downloads. Progress goes to errput
|
|
239
245
|
# so stdout stays pure markdown / NDJSON for embedding callers.
|
|
@@ -278,6 +284,32 @@ module CLI
|
|
|
278
284
|
Helper.printNewVersionMessageIfExists()
|
|
279
285
|
end
|
|
280
286
|
|
|
287
|
+
# `--auth` entry point: drive the Chrome login flow on demand so users
|
|
288
|
+
# can seed the cookie cache before kicking off a bulk / CI job. Errors
|
|
289
|
+
# are surfaced to errput; we never raise — `--auth` is best-effort
|
|
290
|
+
# setup, not a critical path.
|
|
291
|
+
def runAuth(errput: $stderr)
|
|
292
|
+
unless ChromeAuth.available?
|
|
293
|
+
errput.puts <<~MSG
|
|
294
|
+
⚠ Chrome was not detected, so --auth can't run the auto-login flow.
|
|
295
|
+
Install Google Chrome (or any Chromium-based browser ferrum can
|
|
296
|
+
detect), or extract sid / uid manually — see:
|
|
297
|
+
#{COOKIE_SETUP_URL}
|
|
298
|
+
MSG
|
|
299
|
+
return
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
cookies = ChromeAuth.login!(errput: errput)
|
|
303
|
+
if cookies.empty?
|
|
304
|
+
errput.puts '⚠ No cookies were captured. Make sure you finished signing in on a medium.com page before pressing Enter.'
|
|
305
|
+
return
|
|
306
|
+
end
|
|
307
|
+
cookies.each { |k, v| $cookies[k] = v unless v.to_s.empty? }
|
|
308
|
+
errput.puts "✅ Captured #{cookies.keys.join(' / ')} → #{CookieCache.path}"
|
|
309
|
+
rescue StandardError => e
|
|
310
|
+
errput.puts "(Auto-login failed: #{e.class}: #{e.message})"
|
|
311
|
+
end
|
|
312
|
+
|
|
281
313
|
# Jekyll mode writes into the cwd (so files land in `_posts/...` and
|
|
282
314
|
# `assets/...` of an existing Jekyll site). Plain mode nests under
|
|
283
315
|
# `Output/` to keep the user's cwd tidy.
|
data/lib/ChromeAuth.rb
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
require 'CookieCache'
|
|
2
|
+
|
|
3
|
+
# Drive a visible Chrome (via ferrum / CDP) to let the user sign into Medium
|
|
4
|
+
# in a real browser, then read sid/uid/cf_clearance/_cfuvid back out of the
|
|
5
|
+
# session. Used both for first-time setup (no cookies on disk) and as the
|
|
6
|
+
# Cloudflare-block recovery flow (cf_clearance refresh).
|
|
7
|
+
#
|
|
8
|
+
# "Headless" in the user's spec is a misnomer — login is interactive, so we
|
|
9
|
+
# launch with headless:false and rely on the user to complete the login
|
|
10
|
+
# in the visible window before pressing Enter.
|
|
11
|
+
module ChromeAuth
|
|
12
|
+
TARGET_COOKIES = %w[sid uid cf_clearance _cfuvid].freeze
|
|
13
|
+
LOGIN_URL = 'https://medium.com/m/signin'.freeze
|
|
14
|
+
REFRESH_URL = 'https://medium.com'.freeze
|
|
15
|
+
|
|
16
|
+
@@session = nil
|
|
17
|
+
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
# True iff ferrum loads AND a Chrome binary is detectable. Anything
|
|
21
|
+
# else returns false so the caller can fall back to the legacy
|
|
22
|
+
# default-browser flow without aborting.
|
|
23
|
+
def available?
|
|
24
|
+
require 'ferrum'
|
|
25
|
+
path = Ferrum::Browser::Options::Chrome.options.detect_path
|
|
26
|
+
!path.nil? && !path.empty?
|
|
27
|
+
rescue LoadError, StandardError
|
|
28
|
+
false
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# ---- Single-shot CLI flow ---------------------------------------
|
|
32
|
+
# Open Chrome at openURL, wait for the user to press Enter, then
|
|
33
|
+
# collect the four target cookies. Returns hash { 'sid' => '...', ... }
|
|
34
|
+
# — keys missing from the browser are simply omitted, so callers must
|
|
35
|
+
# check what came back rather than assume completeness.
|
|
36
|
+
#
|
|
37
|
+
# Raises StandardError on browser launch / navigation failure; callers
|
|
38
|
+
# are expected to rescue and degrade gracefully.
|
|
39
|
+
def login!(errput: $stderr, input: $stdin, openURL: LOGIN_URL)
|
|
40
|
+
startSession!(openURL: openURL)
|
|
41
|
+
promptUser(errput, input, openURL)
|
|
42
|
+
finishSession!
|
|
43
|
+
rescue StandardError
|
|
44
|
+
cancelSession!
|
|
45
|
+
raise
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# ---- Split flow for MCP / other long-lived hosts ----------------
|
|
49
|
+
# `startSession!` / `finishSession!` / `cancelSession!` let a caller
|
|
50
|
+
# spawn the browser in one tool call and harvest cookies in another,
|
|
51
|
+
# using the host process (e.g. an MCP server) as the place that
|
|
52
|
+
# holds the still-open browser between calls.
|
|
53
|
+
#
|
|
54
|
+
# Lifecycle:
|
|
55
|
+
# startSession! → opens browser, returns immediately. If a session
|
|
56
|
+
# is already alive, that one is force-cancelled
|
|
57
|
+
# first so a stale browser can't strand cookies.
|
|
58
|
+
# finishSession! → reads cookies from the live browser, writes
|
|
59
|
+
# cache, quits browser, clears session, returns
|
|
60
|
+
# the cookies hash.
|
|
61
|
+
# cancelSession! → quit + clear; idempotent.
|
|
62
|
+
#
|
|
63
|
+
# Not thread-safe: assumes a single MCP request handler at a time.
|
|
64
|
+
def startSession!(openURL: LOGIN_URL)
|
|
65
|
+
cancelSession! if sessionActive?
|
|
66
|
+
browser = buildBrowser
|
|
67
|
+
browser.go_to(openURL)
|
|
68
|
+
@@session = { browser: browser, openURL: openURL, startedAt: Time.now }
|
|
69
|
+
{ ok: true, openURL: openURL }
|
|
70
|
+
rescue StandardError
|
|
71
|
+
# If go_to or anything else blew up, make sure we don't leave a
|
|
72
|
+
# half-built browser around with no handle.
|
|
73
|
+
begin
|
|
74
|
+
browser&.quit
|
|
75
|
+
rescue StandardError
|
|
76
|
+
# ignore
|
|
77
|
+
end
|
|
78
|
+
@@session = nil
|
|
79
|
+
raise
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def finishSession!
|
|
83
|
+
raise 'No active ChromeAuth session — call startSession! first.' unless sessionActive?
|
|
84
|
+
browser = @@session[:browser]
|
|
85
|
+
cookies = collectMediumCookies(browser)
|
|
86
|
+
CookieCache.save(CookieCache.load.merge(cookies)) if cookies.any?
|
|
87
|
+
cookies
|
|
88
|
+
ensure
|
|
89
|
+
cancelSession!
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def cancelSession!
|
|
93
|
+
return false unless sessionActive?
|
|
94
|
+
browser = @@session[:browser]
|
|
95
|
+
@@session = nil
|
|
96
|
+
begin
|
|
97
|
+
browser&.quit
|
|
98
|
+
rescue StandardError
|
|
99
|
+
# ignore: best-effort shutdown
|
|
100
|
+
end
|
|
101
|
+
true
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def sessionActive?
|
|
105
|
+
!@@session.nil?
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Factory split out so tests can stub it. Tweaking ferrum options
|
|
109
|
+
# globally (window size, timeouts) belongs here too.
|
|
110
|
+
def buildBrowser
|
|
111
|
+
require 'ferrum'
|
|
112
|
+
Ferrum::Browser.new(
|
|
113
|
+
headless: false,
|
|
114
|
+
window_size: [1280, 900],
|
|
115
|
+
timeout: 60,
|
|
116
|
+
process_timeout: 30
|
|
117
|
+
)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Filter the browser's cookie jar down to medium.com cookies whose
|
|
121
|
+
# name is one of TARGET_COOKIES. We accept both .medium.com and
|
|
122
|
+
# medium.com because Cloudflare sets _cfuvid on the apex while
|
|
123
|
+
# Medium tends to set sid/uid on the dot-prefixed domain.
|
|
124
|
+
def collectMediumCookies(browser)
|
|
125
|
+
result = {}
|
|
126
|
+
browser.cookies.each do |cookie|
|
|
127
|
+
next unless TARGET_COOKIES.include?(cookie.name)
|
|
128
|
+
next unless mediumDomain?(cookie.domain)
|
|
129
|
+
result[cookie.name] = cookie.value
|
|
130
|
+
end
|
|
131
|
+
result
|
|
132
|
+
rescue StandardError
|
|
133
|
+
{}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def mediumDomain?(domain)
|
|
137
|
+
return false if domain.nil?
|
|
138
|
+
normalized = domain.start_with?('.') ? domain[1..] : domain
|
|
139
|
+
normalized == 'medium.com' || normalized.end_with?('.medium.com')
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def promptUser(errput, input, url)
|
|
143
|
+
errput.puts <<~MSG
|
|
144
|
+
|
|
145
|
+
──────────────────────────────────────────────────────────────────────
|
|
146
|
+
🔐 Sign into Medium in the Chrome window that just opened.
|
|
147
|
+
|
|
148
|
+
Steps:
|
|
149
|
+
1. Complete login (and any Cloudflare challenge) at #{url}.
|
|
150
|
+
2. Stay on a medium.com page once you're signed in.
|
|
151
|
+
3. Come back here and press Enter — we'll read sid / uid /
|
|
152
|
+
cf_clearance / _cfuvid out of the browser and cache them at
|
|
153
|
+
#{CookieCache.path}.
|
|
154
|
+
|
|
155
|
+
(Press Ctrl-D to abort and fall back to manual setup.)
|
|
156
|
+
──────────────────────────────────────────────────────────────────────
|
|
157
|
+
MSG
|
|
158
|
+
errput.print 'Press Enter when signed in… '
|
|
159
|
+
line = input.gets
|
|
160
|
+
errput.puts
|
|
161
|
+
line
|
|
162
|
+
end
|
|
163
|
+
end
|
data/lib/CookieCache.rb
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'fileutils'
|
|
3
|
+
require 'openssl'
|
|
4
|
+
|
|
5
|
+
# On-disk cache for Medium / Cloudflare cookies captured by ChromeAuth.
|
|
6
|
+
# Stored at ~/.zmediumtomarkdown so subsequent runs can reuse sid/uid
|
|
7
|
+
# (long-lived) and ride out a still-valid cf_clearance/_cfuvid without
|
|
8
|
+
# prompting again.
|
|
9
|
+
#
|
|
10
|
+
# Encrypted at rest with AES-256-GCM using a fixed key shipped with the
|
|
11
|
+
# gem. The key is constant on purpose — this is *obfuscation against
|
|
12
|
+
# casual file-system snoops*, not protection from an attacker who has
|
|
13
|
+
# the gem source. The file is also written 0600.
|
|
14
|
+
#
|
|
15
|
+
# On-disk layout (binary):
|
|
16
|
+
# bytes 0..11 : 12-byte IV (random per write)
|
|
17
|
+
# bytes 12..27 : 16-byte tag (GCM auth tag)
|
|
18
|
+
# bytes 28.. : ciphertext
|
|
19
|
+
#
|
|
20
|
+
# The path can be overridden with ZMEDIUM_COOKIE_CACHE_PATH (used by tests
|
|
21
|
+
# and power users who want the cache in a different location).
|
|
22
|
+
module CookieCache
|
|
23
|
+
PATH_ENV = 'ZMEDIUM_COOKIE_CACHE_PATH'.freeze
|
|
24
|
+
DEFAULT_BASENAME = '.zmediumtomarkdown'.freeze
|
|
25
|
+
CIPHER = 'aes-256-gcm'.freeze
|
|
26
|
+
SECRET = 'r3n2wJAX8o944MqFVZPwirjUGZ9A7mII'.freeze # 32 bytes → AES-256
|
|
27
|
+
IV_LEN = 12
|
|
28
|
+
TAG_LEN = 16
|
|
29
|
+
|
|
30
|
+
module_function
|
|
31
|
+
|
|
32
|
+
def path
|
|
33
|
+
override = ENV[PATH_ENV].to_s
|
|
34
|
+
return override unless override.empty?
|
|
35
|
+
File.join(Dir.home, DEFAULT_BASENAME)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Returns hash of cached cookies. Missing file or unreadable / corrupt
|
|
39
|
+
# blob (wrong key, truncated, tampered) returns {} — never raises, so
|
|
40
|
+
# the caller can treat the cache as best-effort.
|
|
41
|
+
def load
|
|
42
|
+
return {} unless File.exist?(path)
|
|
43
|
+
plaintext = decrypt(File.binread(path))
|
|
44
|
+
parsed = JSON.parse(plaintext)
|
|
45
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
46
|
+
rescue StandardError
|
|
47
|
+
{}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Atomic write: encrypt the JSON blob, write to a sibling tmp file at
|
|
51
|
+
# 0600, rename. Best-effort: any IO/encryption error is swallowed
|
|
52
|
+
# (cache is convenience, not source of truth — losing a write should
|
|
53
|
+
# not abort the run).
|
|
54
|
+
def save(hash)
|
|
55
|
+
return unless hash.is_a?(Hash) && !hash.empty?
|
|
56
|
+
FileUtils.mkdir_p(File.dirname(path))
|
|
57
|
+
tmp = "#{path}.tmp.#{Process.pid}"
|
|
58
|
+
File.open(tmp, File::WRONLY | File::CREAT | File::TRUNC | File::BINARY, 0o600) do |f|
|
|
59
|
+
f.write(encrypt(JSON.generate(hash)))
|
|
60
|
+
end
|
|
61
|
+
File.rename(tmp, path)
|
|
62
|
+
rescue StandardError
|
|
63
|
+
File.unlink(tmp) if defined?(tmp) && tmp && File.exist?(tmp)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def clear
|
|
67
|
+
File.unlink(path) if File.exist?(path)
|
|
68
|
+
rescue Errno::ENOENT
|
|
69
|
+
# already gone
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def encrypt(plaintext)
|
|
73
|
+
cipher = OpenSSL::Cipher.new(CIPHER).encrypt
|
|
74
|
+
cipher.key = SECRET
|
|
75
|
+
iv = cipher.random_iv # 12 bytes
|
|
76
|
+
cipher.auth_data = ''
|
|
77
|
+
ct = cipher.update(plaintext) + cipher.final
|
|
78
|
+
iv + cipher.auth_tag + ct
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def decrypt(blob)
|
|
82
|
+
raise 'cache blob too short' if blob.nil? || blob.bytesize < IV_LEN + TAG_LEN
|
|
83
|
+
iv = blob.byteslice(0, IV_LEN)
|
|
84
|
+
tag = blob.byteslice(IV_LEN, TAG_LEN)
|
|
85
|
+
ct = blob.byteslice(IV_LEN + TAG_LEN, blob.bytesize - IV_LEN - TAG_LEN)
|
|
86
|
+
cipher = OpenSSL::Cipher.new(CIPHER).decrypt
|
|
87
|
+
cipher.key = SECRET
|
|
88
|
+
cipher.iv = iv
|
|
89
|
+
cipher.auth_tag = tag
|
|
90
|
+
cipher.auth_data = ''
|
|
91
|
+
cipher.update(ct) + cipher.final
|
|
92
|
+
end
|
|
93
|
+
end
|
data/lib/Request.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
require 'net/http'
|
|
2
2
|
require 'nokogiri'
|
|
3
|
+
require 'ChromeAuth'
|
|
4
|
+
require 'CookieCache'
|
|
3
5
|
|
|
4
6
|
class Request
|
|
5
7
|
# Raised when Medium's Cloudflare layer blocks the request (typically
|
|
@@ -27,20 +29,22 @@ class Request
|
|
|
27
29
|
Pick the fix that matches where you're running:
|
|
28
30
|
|
|
29
31
|
• Local machine (your laptop / desktop):
|
|
30
|
-
|
|
31
|
-
the
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
Re-run on a TTY without --non-interactive to trigger
|
|
33
|
+
the Chrome auto-login flow (captures sid / uid /
|
|
34
|
+
cf_clearance / _cfuvid). Or open https://medium.com
|
|
35
|
+
in a normal browser and clear the challenge by hand.
|
|
34
36
|
|
|
35
37
|
• CI / CD (GitHub Actions, cloud runners):
|
|
36
38
|
A human can't clear the challenge. Set up BOTH:
|
|
37
39
|
1. Medium login cookies (sid / uid) — pass via env
|
|
38
40
|
MEDIUM_COOKIE_SID and MEDIUM_COOKIE_UID, or via
|
|
39
|
-
the -s / -d flags.
|
|
41
|
+
the -s / -d flags. Optionally add cf_clearance
|
|
42
|
+
/ _cfuvid via MEDIUM_COOKIE_CF_CLEARANCE /
|
|
43
|
+
MEDIUM_COOKIE_CFUVID for short-term unblocking.
|
|
40
44
|
2. A Cloudflare Worker proxy so requests originate
|
|
41
45
|
from inside Cloudflare's network instead of a
|
|
42
46
|
flagged datacenter IP. Point the tool at it via
|
|
43
|
-
the MEDIUM_HOST env var.
|
|
47
|
+
the MEDIUM_HOST env var. (Recommended.)
|
|
44
48
|
|
|
45
49
|
Full step-by-step setup guide:
|
|
46
50
|
https://github.com/ZhgChgLi/ZMediumToMarkdown/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy
|
|
@@ -100,9 +104,42 @@ class Request
|
|
|
100
104
|
end
|
|
101
105
|
|
|
102
106
|
# Run the interactive recovery flow. Returns true if the user
|
|
103
|
-
#
|
|
107
|
+
# cleared the challenge (and, when Chrome is available, we
|
|
108
|
+
# successfully refreshed cookies); false if they pressed Ctrl-D
|
|
104
109
|
# (EOF) or otherwise gave up.
|
|
110
|
+
#
|
|
111
|
+
# Two paths:
|
|
112
|
+
# 1. ChromeAuth available → drive Chrome via ferrum; on success
|
|
113
|
+
# sid/uid/cf_clearance/_cfuvid land in $cookies and the cache.
|
|
114
|
+
# 2. Otherwise → legacy fallback: open default browser, ask the
|
|
115
|
+
# user to clear the challenge by hand, retry without new cookies.
|
|
105
116
|
def run(url, errput: $stderr, input: $stdin, autoOpen: true)
|
|
117
|
+
if ChromeAuth.available?
|
|
118
|
+
return runChromeFlow(url, errput: errput, input: input)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
runDefaultBrowserFlow(url, errput: errput, input: input, autoOpen: autoOpen)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def runChromeFlow(url, errput:, input:)
|
|
125
|
+
errput.puts <<~MSG
|
|
126
|
+
|
|
127
|
+
──────────────────────────────────────────────────────────────────────
|
|
128
|
+
⚠ Cloudflare bot challenge detected at #{url}.
|
|
129
|
+
Opening Chrome so you can clear it (and refresh login if needed).
|
|
130
|
+
──────────────────────────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
MSG
|
|
133
|
+
cookies = ChromeAuth.login!(errput: errput, input: input,
|
|
134
|
+
openURL: ChromeAuth::REFRESH_URL)
|
|
135
|
+
cookies.each { |k, v| $cookies[k] = v unless v.to_s.empty? }
|
|
136
|
+
!cookies.empty?
|
|
137
|
+
rescue StandardError => e
|
|
138
|
+
errput.puts "(Chrome auto-recovery failed: #{e.class}: #{e.message}. Falling back to default browser.)"
|
|
139
|
+
runDefaultBrowserFlow(url, errput: errput, input: input, autoOpen: true)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def runDefaultBrowserFlow(url, errput:, input:, autoOpen:)
|
|
106
143
|
errput.puts <<~MSG
|
|
107
144
|
|
|
108
145
|
──────────────────────────────────────────────────────────────────────
|
|
@@ -114,6 +151,7 @@ class Request
|
|
|
114
151
|
2. Complete the "Just a moment…" / CAPTCHA challenge there.
|
|
115
152
|
3. Come back here and press Enter to retry.
|
|
116
153
|
|
|
154
|
+
(Install Google Chrome to enable auto-cookie capture next time.)
|
|
117
155
|
(To disable this prompt and just fail fast, set #{DISABLE_ENV_VAR}=1.)
|
|
118
156
|
──────────────────────────────────────────────────────────────────────
|
|
119
157
|
|
|
@@ -128,12 +166,10 @@ class Request
|
|
|
128
166
|
end
|
|
129
167
|
end
|
|
130
168
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
#
|
|
134
|
-
|
|
135
|
-
@@cloudflareInteractiveResolutionAttempted = false
|
|
136
|
-
end
|
|
169
|
+
# Cap how many times a single self.URL call chain can fall through
|
|
170
|
+
# the Cloudflare-recovery branch, so a user who keeps saying yes to
|
|
171
|
+
# the prompt while Medium keeps blocking can't loop forever.
|
|
172
|
+
CLOUDFLARE_RECOVERY_LIMIT = 5
|
|
137
173
|
|
|
138
174
|
def self.URL(url, method = 'GET', data = nil, retryCount = 0)
|
|
139
175
|
retryCount += 1
|
|
@@ -204,11 +240,13 @@ class Request
|
|
|
204
240
|
end
|
|
205
241
|
|
|
206
242
|
if cloudflareBlocked?(response)
|
|
207
|
-
#
|
|
208
|
-
#
|
|
209
|
-
#
|
|
210
|
-
|
|
211
|
-
|
|
243
|
+
# On every Cloudflare block — even when cookies are already
|
|
244
|
+
# set — re-run the recovery flow on a TTY. ChromeAuth refreshes
|
|
245
|
+
# sid/uid/cf_clearance/_cfuvid into $cookies + the cache, so
|
|
246
|
+
# the next attempt usually succeeds. Bounded by retryCount so
|
|
247
|
+
# a degenerate loop (user keeps clearing, Medium keeps blocking)
|
|
248
|
+
# eventually surfaces the error. CI / non-TTY just raises.
|
|
249
|
+
if retryCount <= CLOUDFLARE_RECOVERY_LIMIT && InteractiveCloudflareRecovery.available?
|
|
212
250
|
if InteractiveCloudflareRecovery.run(url)
|
|
213
251
|
return self.URL(url, method, data, retryCount)
|
|
214
252
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ZMediumToMarkdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.
|
|
4
|
+
version: 3.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- ZhgChgLi
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-05-
|
|
10
|
+
date: 2026-05-06 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: nokogiri
|
|
@@ -77,6 +77,20 @@ dependencies:
|
|
|
77
77
|
- - "<"
|
|
78
78
|
- !ruby/object:Gem::Version
|
|
79
79
|
version: '2.0'
|
|
80
|
+
- !ruby/object:Gem::Dependency
|
|
81
|
+
name: ferrum
|
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
|
83
|
+
requirements:
|
|
84
|
+
- - "~>"
|
|
85
|
+
- !ruby/object:Gem::Version
|
|
86
|
+
version: '0.15'
|
|
87
|
+
type: :runtime
|
|
88
|
+
prerelease: false
|
|
89
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
90
|
+
requirements:
|
|
91
|
+
- - "~>"
|
|
92
|
+
- !ruby/object:Gem::Version
|
|
93
|
+
version: '0.15'
|
|
80
94
|
description: ZMediumToMarkdown converts Medium posts into clean, portable Markdown.
|
|
81
95
|
It can download a single post or every post from a Medium username, preserving headings,
|
|
82
96
|
lists, blockquotes, code blocks, images, links, and common embeds such as GitHub
|
|
@@ -89,6 +103,8 @@ extra_rdoc_files: []
|
|
|
89
103
|
files:
|
|
90
104
|
- bin/ZMediumToMarkdown
|
|
91
105
|
- lib/CLI.rb
|
|
106
|
+
- lib/ChromeAuth.rb
|
|
107
|
+
- lib/CookieCache.rb
|
|
92
108
|
- lib/Helper.rb
|
|
93
109
|
- lib/ImageDownloader.rb
|
|
94
110
|
- lib/Models/Paragraph.rb
|