ZMediumToMarkdown 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ef6ecd9a875dbef00977b254ceb317ec946c67003573a607e3a81662b91a76db
4
- data.tar.gz: 9b4e3e21b0e3b08f6effdb4b2339f112dc53c1f5acae7655dea28ed23cbe78fa
3
+ metadata.gz: 1e19605bba5d079fa1086c14dfb33c7d33f36da86b46c6c6c97affc960521d26
4
+ data.tar.gz: 96e13a36f2573d2dae057ee7a318ad66d9d09dc69990deff5bf5033a72032f8c
5
5
  SHA512:
6
- metadata.gz: e746fef4d6506fbb3894d3b5afd6dbfeb2a2850bebf4839c562482b397566b01e97866bde41e734e8c3e8824612f263cea21984025c60a1761c26623dd0501e7
7
- data.tar.gz: bc4625d560ae67f278335eda453dcaf3ee511318af072a147a4c56ac0386c63f37ef049e77b37e8cdba672a386a7d915815f7dbbda5e1bbdc5488fdb091796d4
6
+ metadata.gz: 81e76d2686c7f06976ea9fdcb09243b75e5f628669e3c405b27a636705d6a54862f264c569bc4e33bcf571d3ba6696ed578b20a45350ea81fb993b605d76e860
7
+ data.tar.gz: 34d756095ba13548c551da107d9e4903b51d094087ce8ac71ed140f52adf0e3a852afbe35bae15b27c09ed08cafb4df91fc121bdf7e262b123e989b3053af8bb
data/lib/CLI.rb CHANGED
@@ -5,6 +5,8 @@ require 'ZMediumFetcher'
5
5
  require 'Helper'
6
6
  require 'PathPolicy'
7
7
  require 'Request'
8
+ require 'CookieCache'
9
+ require 'ChromeAuth'
8
10
 
9
11
  # All CLI-side concerns for the `ZMediumToMarkdown` executable. Pulled out
10
12
  # of bin/ so it can be exercised by unit tests without spawning processes.
@@ -21,7 +23,7 @@ module CLI
21
23
  argv << '-h' if argv.empty?
22
24
 
23
25
  options = parseArgs(argv, errput: errput)
24
- loadCookiesFromEnv!
26
+ loadCookies!
25
27
  warnAboutMissingSetup(options, errput: errput)
26
28
  run(options, cwd, output: output, errput: errput)
27
29
  end
@@ -39,6 +41,14 @@ module CLI
39
41
  $cookies['uid'] = v
40
42
  end
41
43
 
44
+ opts.on('--cookie_cf_clearance VALUE', 'Cloudflare cf_clearance cookie value (or set $MEDIUM_COOKIE_CF_CLEARANCE)') do |v|
45
+ $cookies['cf_clearance'] = v
46
+ end
47
+
48
+ opts.on('--cookie_cfuvid VALUE', 'Cloudflare _cfuvid cookie value (or set $MEDIUM_COOKIE_CFUVID)') do |v|
49
+ $cookies['_cfuvid'] = v
50
+ end
51
+
42
52
  opts.on('-x', '--medium_host URL', 'Cloudflare Worker proxy URL for Medium GraphQL (or set $MEDIUM_HOST). Strongly recommended for CI / bulk runs — see the wiki setup guide.') do |v|
43
53
  ENV['MEDIUM_HOST'] = v
44
54
  end
@@ -95,6 +105,15 @@ module CLI
95
105
  options[:version] = true
96
106
  end
97
107
 
108
+ opts.on('--non-interactive', 'Never prompt or open a browser. CI runners auto-detect this; use the flag to force the same behavior on a TTY.') do
109
+ options[:nonInteractive] = true
110
+ ENV['MEDIUM_NO_AUTO_BROWSER'] = '1'
111
+ end
112
+
113
+ opts.on('--auth', 'Open Chrome to sign in, capture sid / uid / cf_clearance / _cfuvid into the encrypted cookie cache, and exit. Run once before bulk / scheduled jobs to seed the cache.') do
114
+ options[:auth] = true
115
+ end
116
+
98
117
  opts.on('-h', '--help', 'Show this help message') do
99
118
  options[:help] = opts.to_s
100
119
  end
@@ -104,9 +123,31 @@ module CLI
104
123
  options
105
124
  end
106
125
 
126
+ # Cookie precedence (highest → lowest):
127
+ # 1. CLI flags (already written to $cookies in parseArgs)
128
+ # 2. Env vars (MEDIUM_COOKIE_*)
129
+ # 3. On-disk cache (~/.config/ZMediumToMarkdown/cookies.json)
130
+ # Each layer only fills slots the higher layer left empty.
131
+ def loadCookies!
132
+ loadCookiesFromEnv!
133
+ loadCookiesFromCache!
134
+ end
135
+
107
136
  def loadCookiesFromEnv!
108
137
  $cookies['sid'] = ENV['MEDIUM_COOKIE_SID'] if cookieMissing?('sid') && !ENV['MEDIUM_COOKIE_SID'].to_s.empty?
109
138
  $cookies['uid'] = ENV['MEDIUM_COOKIE_UID'] if cookieMissing?('uid') && !ENV['MEDIUM_COOKIE_UID'].to_s.empty?
139
+ $cookies['cf_clearance'] = ENV['MEDIUM_COOKIE_CF_CLEARANCE'] if cookieMissing?('cf_clearance') && !ENV['MEDIUM_COOKIE_CF_CLEARANCE'].to_s.empty?
140
+ $cookies['_cfuvid'] = ENV['MEDIUM_COOKIE_CFUVID'] if cookieMissing?('_cfuvid') && !ENV['MEDIUM_COOKIE_CFUVID'].to_s.empty?
141
+ end
142
+
143
+ def loadCookiesFromCache!
144
+ cached = CookieCache.load
145
+ return if cached.empty?
146
+ ChromeAuth::TARGET_COOKIES.each do |name|
147
+ value = cached[name]
148
+ next if value.to_s.empty?
149
+ $cookies[name] = value if cookieMissing?(name)
150
+ end
110
151
  end
111
152
 
112
153
  def cookieMissing?(name)
@@ -131,6 +172,7 @@ module CLI
131
172
  !host.empty? && host != DEFAULT_MIRO_MEDIUM_HOST
132
173
  end
133
174
 
175
+
134
176
  # Only warn when the invocation will actually hit Medium — skip for
135
177
  # --version, --clean, --help, --new.
136
178
  def warnAboutMissingSetup(options, errput: $stderr)
@@ -150,57 +192,16 @@ module CLI
150
192
  !options[:postURL].nil? || !options[:username].nil?
151
193
  end
152
194
 
153
- # Builds the dynamic setup-warning banner. Header lists exactly which
154
- # of (cookies, GraphQL proxy, image proxy) is missing so the user can
155
- # act; body is static guidance covering empirical limits, scenarios,
156
- # and how to pass each value via flag or env.
195
+ # One-line warning. The wiki has the actual setup steps; we just
196
+ # nudge the user toward it instead of dumping a wall of guidance.
157
197
  def buildSetupBanner(missingCookies:, missingProxy:, missingImageProxy:)
158
- lines = []
159
- lines << '──────────────────────────────────────────────────────────────────────'
160
- lines << '⚠ Setup notice your run will work, but reliability is limited.'
161
- lines << ''
162
- lines << "What's missing:"
163
- lines << ' • Medium login cookies (sid / uid).' if missingCookies
164
- lines << ' Cloudflare Worker proxy for Medium GraphQL (MEDIUM_HOST not set or still default).' if missingProxy
165
- lines << ' • Cloudflare Worker proxy for image CDN (MIRO_MEDIUM_HOST not set or still default; optional companion).' if missingImageProxy
166
- lines << ''
167
- lines << <<~BODY.chomp
168
- Empirical limits without setup:
169
- • Without cookies : Cloudflare blocks after ~10 posts.
170
- • Without Worker proxy : Cloudflare blocks after ~25 posts
171
- when running from CI / datacenter IPs.
172
- • Paywalled posts : cookies are REQUIRED for full content;
173
- without them you only get the preview.
174
-
175
- Recommended setup:
176
- • CI / CD (GitHub Actions, cloud runners):
177
- STRONGLY recommend BOTH cookies AND a Cloudflare Worker proxy.
178
- • Local machine:
179
- Cookies recommended for paywalled posts. If a Cloudflare
180
- challenge appears, the tool will automatically open
181
- https://medium.com in your browser and prompt you to retry
182
- once you've cleared it. Set MEDIUM_NO_AUTO_BROWSER=1 to
183
- opt out and just fail fast.
184
-
185
- Pass cookies via env (preferred — keeps secrets out of shell history):
186
- MEDIUM_COOKIE_SID=... MEDIUM_COOKIE_UID=... ZMediumToMarkdown -p URL
187
-
188
- Or via flags (fine for one-off local runs):
189
- ZMediumToMarkdown -p URL -s YOUR_SID -d YOUR_UID
190
-
191
- Pass Cloudflare Worker proxy URL(s):
192
- ZMediumToMarkdown -p URL \\
193
- -x https://YOUR-WORKER.workers.dev/_/graphql \\
194
- --miro_medium_host https://YOUR-IMAGE-WORKER.workers.dev
195
- # or via env:
196
- # MEDIUM_HOST=https://YOUR-WORKER.workers.dev/_/graphql
197
- # MIRO_MEDIUM_HOST=https://YOUR-IMAGE-WORKER.workers.dev
198
-
199
- Full setup guide (cookies + Cloudflare Worker proxy):
200
- #{COOKIE_SETUP_URL}
201
- ──────────────────────────────────────────────────────────────────────
202
- BODY
203
- lines.join("\n")
198
+ missing = []
199
+ missing << 'Medium cookies (sid / uid)' if missingCookies
200
+ missing << 'Cloudflare Worker proxy (MEDIUM_HOST)' if missingProxy
201
+ missing << 'Cloudflare image proxy (MIRO_MEDIUM_HOST)' if missingImageProxy
202
+ return '' if missing.empty?
203
+
204
+ "⚠ Missing #{missing.join(' / ')}. Medium / Cloudflare may block the run. Setup guide: #{COOKIE_SETUP_URL}"
204
205
  end
205
206
 
206
207
  def run(options, cwd, output: $stdout, errput: $stderr)
@@ -234,6 +235,11 @@ module CLI
234
235
  return
235
236
  end
236
237
 
238
+ if options[:auth]
239
+ runAuth(errput: errput)
240
+ return
241
+ end
242
+
237
243
  # --stdout / --list path: render to the given output stream, skip
238
244
  # all filesystem writes and asset downloads. Progress goes to errput
239
245
  # so stdout stays pure markdown / NDJSON for embedding callers.
@@ -278,6 +284,32 @@ module CLI
278
284
  Helper.printNewVersionMessageIfExists()
279
285
  end
280
286
 
287
+ # `--auth` entry point: drive the Chrome login flow on demand so users
288
+ # can seed the cookie cache before kicking off a bulk / CI job. Errors
289
+ # are surfaced to errput; we never raise — `--auth` is best-effort
290
+ # setup, not a critical path.
291
+ def runAuth(errput: $stderr)
292
+ unless ChromeAuth.available?
293
+ errput.puts <<~MSG
294
+ ⚠ Chrome was not detected, so --auth can't run the auto-login flow.
295
+ Install Google Chrome (or any Chromium-based browser ferrum can
296
+ detect), or extract sid / uid manually — see:
297
+ #{COOKIE_SETUP_URL}
298
+ MSG
299
+ return
300
+ end
301
+
302
+ cookies = ChromeAuth.login!(errput: errput)
303
+ if cookies.empty?
304
+ errput.puts '⚠ No cookies were captured. Make sure you finished signing in on a medium.com page before pressing Enter.'
305
+ return
306
+ end
307
+ cookies.each { |k, v| $cookies[k] = v unless v.to_s.empty? }
308
+ errput.puts "✅ Captured #{cookies.keys.join(' / ')} → #{CookieCache.path}"
309
+ rescue StandardError => e
310
+ errput.puts "(Auto-login failed: #{e.class}: #{e.message})"
311
+ end
312
+
281
313
  # Jekyll mode writes into the cwd (so files land in `_posts/...` and
282
314
  # `assets/...` of an existing Jekyll site). Plain mode nests under
283
315
  # `Output/` to keep the user's cwd tidy.
data/lib/ChromeAuth.rb ADDED
@@ -0,0 +1,163 @@
1
+ require 'CookieCache'
2
+
3
+ # Drive a visible Chrome (via ferrum / CDP) to let the user sign into Medium
4
+ # in a real browser, then read sid/uid/cf_clearance/_cfuvid back out of the
5
+ # session. Used both for first-time setup (no cookies on disk) and as the
6
+ # Cloudflare-block recovery flow (cf_clearance refresh).
7
+ #
8
+ # "Headless" in the user's spec is a misnomer — login is interactive, so we
9
+ # launch with headless:false and rely on the user to complete the login
10
+ # in the visible window before pressing Enter.
11
+ module ChromeAuth
12
+ TARGET_COOKIES = %w[sid uid cf_clearance _cfuvid].freeze
13
+ LOGIN_URL = 'https://medium.com/m/signin'.freeze
14
+ REFRESH_URL = 'https://medium.com'.freeze
15
+
16
+ @@session = nil
17
+
18
+ module_function
19
+
20
+ # True iff ferrum loads AND a Chrome binary is detectable. Anything
21
+ # else returns false so the caller can fall back to the legacy
22
+ # default-browser flow without aborting.
23
+ def available?
24
+ require 'ferrum'
25
+ path = Ferrum::Browser::Options::Chrome.options.detect_path
26
+ !path.nil? && !path.empty?
27
+ rescue LoadError, StandardError
28
+ false
29
+ end
30
+
31
+ # ---- Single-shot CLI flow ---------------------------------------
32
+ # Open Chrome at openURL, wait for the user to press Enter, then
33
+ # collect the four target cookies. Returns hash { 'sid' => '...', ... }
34
+ # — keys missing from the browser are simply omitted, so callers must
35
+ # check what came back rather than assume completeness.
36
+ #
37
+ # Raises StandardError on browser launch / navigation failure; callers
38
+ # are expected to rescue and degrade gracefully.
39
+ def login!(errput: $stderr, input: $stdin, openURL: LOGIN_URL)
40
+ startSession!(openURL: openURL)
41
+ promptUser(errput, input, openURL)
42
+ finishSession!
43
+ rescue StandardError
44
+ cancelSession!
45
+ raise
46
+ end
47
+
48
+ # ---- Split flow for MCP / other long-lived hosts ----------------
49
+ # `startSession!` / `finishSession!` / `cancelSession!` let a caller
50
+ # spawn the browser in one tool call and harvest cookies in another,
51
+ # using the host process (e.g. an MCP server) as the place that
52
+ # holds the still-open browser between calls.
53
+ #
54
+ # Lifecycle:
55
+ # startSession! → opens browser, returns immediately. If a session
56
+ # is already alive, that one is force-cancelled
57
+ # first so a stale browser can't strand cookies.
58
+ # finishSession! → reads cookies from the live browser, writes
59
+ # cache, quits browser, clears session, returns
60
+ # the cookies hash.
61
+ # cancelSession! → quit + clear; idempotent.
62
+ #
63
+ # Not thread-safe: assumes a single MCP request handler at a time.
64
+ def startSession!(openURL: LOGIN_URL)
65
+ cancelSession! if sessionActive?
66
+ browser = buildBrowser
67
+ browser.go_to(openURL)
68
+ @@session = { browser: browser, openURL: openURL, startedAt: Time.now }
69
+ { ok: true, openURL: openURL }
70
+ rescue StandardError
71
+ # If go_to or anything else blew up, make sure we don't leave a
72
+ # half-built browser around with no handle.
73
+ begin
74
+ browser&.quit
75
+ rescue StandardError
76
+ # ignore
77
+ end
78
+ @@session = nil
79
+ raise
80
+ end
81
+
82
+ def finishSession!
83
+ raise 'No active ChromeAuth session — call startSession! first.' unless sessionActive?
84
+ browser = @@session[:browser]
85
+ cookies = collectMediumCookies(browser)
86
+ CookieCache.save(CookieCache.load.merge(cookies)) if cookies.any?
87
+ cookies
88
+ ensure
89
+ cancelSession!
90
+ end
91
+
92
+ def cancelSession!
93
+ return false unless sessionActive?
94
+ browser = @@session[:browser]
95
+ @@session = nil
96
+ begin
97
+ browser&.quit
98
+ rescue StandardError
99
+ # ignore: best-effort shutdown
100
+ end
101
+ true
102
+ end
103
+
104
+ def sessionActive?
105
+ !@@session.nil?
106
+ end
107
+
108
+ # Factory split out so tests can stub it. Tweaking ferrum options
109
+ # globally (window size, timeouts) belongs here too.
110
+ def buildBrowser
111
+ require 'ferrum'
112
+ Ferrum::Browser.new(
113
+ headless: false,
114
+ window_size: [1280, 900],
115
+ timeout: 60,
116
+ process_timeout: 30
117
+ )
118
+ end
119
+
120
+ # Filter the browser's cookie jar down to medium.com cookies whose
121
+ # name is one of TARGET_COOKIES. We accept both .medium.com and
122
+ # medium.com because Cloudflare sets _cfuvid on the apex while
123
+ # Medium tends to set sid/uid on the dot-prefixed domain.
124
+ def collectMediumCookies(browser)
125
+ result = {}
126
+ browser.cookies.each do |cookie|
127
+ next unless TARGET_COOKIES.include?(cookie.name)
128
+ next unless mediumDomain?(cookie.domain)
129
+ result[cookie.name] = cookie.value
130
+ end
131
+ result
132
+ rescue StandardError
133
+ {}
134
+ end
135
+
136
+ def mediumDomain?(domain)
137
+ return false if domain.nil?
138
+ normalized = domain.start_with?('.') ? domain[1..] : domain
139
+ normalized == 'medium.com' || normalized.end_with?('.medium.com')
140
+ end
141
+
142
+ def promptUser(errput, input, url)
143
+ errput.puts <<~MSG
144
+
145
+ ──────────────────────────────────────────────────────────────────────
146
+ 🔐 Sign into Medium in the Chrome window that just opened.
147
+
148
+ Steps:
149
+ 1. Complete login (and any Cloudflare challenge) at #{url}.
150
+ 2. Stay on a medium.com page once you're signed in.
151
+ 3. Come back here and press Enter — we'll read sid / uid /
152
+ cf_clearance / _cfuvid out of the browser and cache them at
153
+ #{CookieCache.path}.
154
+
155
+ (Press Ctrl-D to abort and fall back to manual setup.)
156
+ ──────────────────────────────────────────────────────────────────────
157
+ MSG
158
+ errput.print 'Press Enter when signed in… '
159
+ line = input.gets
160
+ errput.puts
161
+ line
162
+ end
163
+ end
@@ -0,0 +1,93 @@
1
+ require 'json'
2
+ require 'fileutils'
3
+ require 'openssl'
4
+
5
+ # On-disk cache for Medium / Cloudflare cookies captured by ChromeAuth.
6
+ # Stored at ~/.zmediumtomarkdown so subsequent runs can reuse sid/uid
7
+ # (long-lived) and ride out a still-valid cf_clearance/_cfuvid without
8
+ # prompting again.
9
+ #
10
+ # Encrypted at rest with AES-256-GCM using a fixed key shipped with the
11
+ # gem. The key is constant on purpose — this is *obfuscation against
12
+ # casual file-system snoops*, not protection from an attacker who has
13
+ # the gem source. The file is also written 0600.
14
+ #
15
+ # On-disk layout (binary):
16
+ # bytes 0..11 : 12-byte IV (random per write)
17
+ # bytes 12..27 : 16-byte tag (GCM auth tag)
18
+ # bytes 28.. : ciphertext
19
+ #
20
+ # The path can be overridden with ZMEDIUM_COOKIE_CACHE_PATH (used by tests
21
+ # and power users who want the cache in a different location).
22
+ module CookieCache
23
+ PATH_ENV = 'ZMEDIUM_COOKIE_CACHE_PATH'.freeze
24
+ DEFAULT_BASENAME = '.zmediumtomarkdown'.freeze
25
+ CIPHER = 'aes-256-gcm'.freeze
26
+ SECRET = 'r3n2wJAX8o944MqFVZPwirjUGZ9A7mII'.freeze # 32 bytes → AES-256
27
+ IV_LEN = 12
28
+ TAG_LEN = 16
29
+
30
+ module_function
31
+
32
+ def path
33
+ override = ENV[PATH_ENV].to_s
34
+ return override unless override.empty?
35
+ File.join(Dir.home, DEFAULT_BASENAME)
36
+ end
37
+
38
+ # Returns hash of cached cookies. Missing file or unreadable / corrupt
39
+ # blob (wrong key, truncated, tampered) returns {} — never raises, so
40
+ # the caller can treat the cache as best-effort.
41
+ def load
42
+ return {} unless File.exist?(path)
43
+ plaintext = decrypt(File.binread(path))
44
+ parsed = JSON.parse(plaintext)
45
+ parsed.is_a?(Hash) ? parsed : {}
46
+ rescue StandardError
47
+ {}
48
+ end
49
+
50
+ # Atomic write: encrypt the JSON blob, write to a sibling tmp file at
51
+ # 0600, rename. Best-effort: any IO/encryption error is swallowed
52
+ # (cache is convenience, not source of truth — losing a write should
53
+ # not abort the run).
54
+ def save(hash)
55
+ return unless hash.is_a?(Hash) && !hash.empty?
56
+ FileUtils.mkdir_p(File.dirname(path))
57
+ tmp = "#{path}.tmp.#{Process.pid}"
58
+ File.open(tmp, File::WRONLY | File::CREAT | File::TRUNC | File::BINARY, 0o600) do |f|
59
+ f.write(encrypt(JSON.generate(hash)))
60
+ end
61
+ File.rename(tmp, path)
62
+ rescue StandardError
63
+ File.unlink(tmp) if defined?(tmp) && tmp && File.exist?(tmp)
64
+ end
65
+
66
+ def clear
67
+ File.unlink(path) if File.exist?(path)
68
+ rescue Errno::ENOENT
69
+ # already gone
70
+ end
71
+
72
+ def encrypt(plaintext)
73
+ cipher = OpenSSL::Cipher.new(CIPHER).encrypt
74
+ cipher.key = SECRET
75
+ iv = cipher.random_iv # 12 bytes
76
+ cipher.auth_data = ''
77
+ ct = cipher.update(plaintext) + cipher.final
78
+ iv + cipher.auth_tag + ct
79
+ end
80
+
81
+ def decrypt(blob)
82
+ raise 'cache blob too short' if blob.nil? || blob.bytesize < IV_LEN + TAG_LEN
83
+ iv = blob.byteslice(0, IV_LEN)
84
+ tag = blob.byteslice(IV_LEN, TAG_LEN)
85
+ ct = blob.byteslice(IV_LEN + TAG_LEN, blob.bytesize - IV_LEN - TAG_LEN)
86
+ cipher = OpenSSL::Cipher.new(CIPHER).decrypt
87
+ cipher.key = SECRET
88
+ cipher.iv = iv
89
+ cipher.auth_tag = tag
90
+ cipher.auth_data = ''
91
+ cipher.update(ct) + cipher.final
92
+ end
93
+ end
data/lib/Request.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require 'net/http'
2
2
  require 'nokogiri'
3
+ require 'ChromeAuth'
4
+ require 'CookieCache'
3
5
 
4
6
  class Request
5
7
  # Raised when Medium's Cloudflare layer blocks the request (typically
@@ -27,20 +29,22 @@ class Request
27
29
  Pick the fix that matches where you're running:
28
30
 
29
31
  • Local machine (your laptop / desktop):
30
- Open https://medium.com in a normal browser and complete
31
- the Cloudflare challenge ("I'm not a robot" / "Just a
32
- moment…"). Then re-run the script — your residential IP
33
- will be cleared for a while.
32
+ Re-run on a TTY without --non-interactive to trigger
33
+ the Chrome auto-login flow (captures sid / uid /
34
+ cf_clearance / _cfuvid). Or open https://medium.com
35
+ in a normal browser and clear the challenge by hand.
34
36
 
35
37
  • CI / CD (GitHub Actions, cloud runners):
36
38
  A human can't clear the challenge. Set up BOTH:
37
39
  1. Medium login cookies (sid / uid) — pass via env
38
40
  MEDIUM_COOKIE_SID and MEDIUM_COOKIE_UID, or via
39
- the -s / -d flags.
41
+ the -s / -d flags. Optionally add cf_clearance
42
+ / _cfuvid via MEDIUM_COOKIE_CF_CLEARANCE /
43
+ MEDIUM_COOKIE_CFUVID for short-term unblocking.
40
44
  2. A Cloudflare Worker proxy so requests originate
41
45
  from inside Cloudflare's network instead of a
42
46
  flagged datacenter IP. Point the tool at it via
43
- the MEDIUM_HOST env var.
47
+ the MEDIUM_HOST env var. (Recommended.)
44
48
 
45
49
  Full step-by-step setup guide:
46
50
  https://github.com/ZhgChgLi/ZMediumToMarkdown/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy
@@ -100,9 +104,42 @@ class Request
100
104
  end
101
105
 
102
106
  # Run the interactive recovery flow. Returns true if the user
103
- # confirmed they cleared the challenge, false if they pressed Ctrl-D
107
+ # cleared the challenge (and, when Chrome is available, we
108
+ # successfully refreshed cookies); false if they pressed Ctrl-D
104
109
  # (EOF) or otherwise gave up.
110
+ #
111
+ # Two paths:
112
+ # 1. ChromeAuth available → drive Chrome via ferrum; on success
113
+ # sid/uid/cf_clearance/_cfuvid land in $cookies and the cache.
114
+ # 2. Otherwise → legacy fallback: open default browser, ask the
115
+ # user to clear the challenge by hand, retry without new cookies.
105
116
  def run(url, errput: $stderr, input: $stdin, autoOpen: true)
117
+ if ChromeAuth.available?
118
+ return runChromeFlow(url, errput: errput, input: input)
119
+ end
120
+
121
+ runDefaultBrowserFlow(url, errput: errput, input: input, autoOpen: autoOpen)
122
+ end
123
+
124
+ def runChromeFlow(url, errput:, input:)
125
+ errput.puts <<~MSG
126
+
127
+ ──────────────────────────────────────────────────────────────────────
128
+ ⚠ Cloudflare bot challenge detected at #{url}.
129
+ Opening Chrome so you can clear it (and refresh login if needed).
130
+ ──────────────────────────────────────────────────────────────────────
131
+
132
+ MSG
133
+ cookies = ChromeAuth.login!(errput: errput, input: input,
134
+ openURL: ChromeAuth::REFRESH_URL)
135
+ cookies.each { |k, v| $cookies[k] = v unless v.to_s.empty? }
136
+ !cookies.empty?
137
+ rescue StandardError => e
138
+ errput.puts "(Chrome auto-recovery failed: #{e.class}: #{e.message}. Falling back to default browser.)"
139
+ runDefaultBrowserFlow(url, errput: errput, input: input, autoOpen: true)
140
+ end
141
+
142
+ def runDefaultBrowserFlow(url, errput:, input:, autoOpen:)
106
143
  errput.puts <<~MSG
107
144
 
108
145
  ──────────────────────────────────────────────────────────────────────
@@ -114,6 +151,7 @@ class Request
114
151
  2. Complete the "Just a moment…" / CAPTCHA challenge there.
115
152
  3. Come back here and press Enter to retry.
116
153
 
154
+ (Install Google Chrome to enable auto-cookie capture next time.)
117
155
  (To disable this prompt and just fail fast, set #{DISABLE_ENV_VAR}=1.)
118
156
  ──────────────────────────────────────────────────────────────────────
119
157
 
@@ -128,12 +166,10 @@ class Request
128
166
  end
129
167
  end
130
168
 
131
- @@cloudflareInteractiveResolutionAttempted = false
132
-
133
- # Test helper: reset the once-per-process recovery flag.
134
- def self.resetCloudflareInteractiveResolution!
135
- @@cloudflareInteractiveResolutionAttempted = false
136
- end
169
+ # Cap how many times a single self.URL call chain can fall through
170
+ # the Cloudflare-recovery branch, so a user who keeps saying yes to
171
+ # the prompt while Medium keeps blocking can't loop forever.
172
+ CLOUDFLARE_RECOVERY_LIMIT = 5
137
173
 
138
174
  def self.URL(url, method = 'GET', data = nil, retryCount = 0)
139
175
  retryCount += 1
@@ -204,11 +240,13 @@ class Request
204
240
  end
205
241
 
206
242
  if cloudflareBlocked?(response)
207
- # Once-per-process: if we're on an interactive TTY, ask the user
208
- # to clear the challenge in a browser and retry. CI / non-TTY
209
- # environments fall straight through to the raise below.
210
- if !@@cloudflareInteractiveResolutionAttempted && InteractiveCloudflareRecovery.available?
211
- @@cloudflareInteractiveResolutionAttempted = true
243
+ # On every Cloudflare block even when cookies are already
244
+ # set re-run the recovery flow on a TTY. ChromeAuth refreshes
245
+ # sid/uid/cf_clearance/_cfuvid into $cookies + the cache, so
246
+ # the next attempt usually succeeds. Bounded by retryCount so
247
+ # a degenerate loop (user keeps clearing, Medium keeps blocking)
248
+ # eventually surfaces the error. CI / non-TTY just raises.
249
+ if retryCount <= CLOUDFLARE_RECOVERY_LIMIT && InteractiveCloudflareRecovery.available?
212
250
  if InteractiveCloudflareRecovery.run(url)
213
251
  return self.URL(url, method, data, retryCount)
214
252
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2026-05-05 00:00:00.000000000 Z
10
+ date: 2026-05-06 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: nokogiri
@@ -77,6 +77,20 @@ dependencies:
77
77
  - - "<"
78
78
  - !ruby/object:Gem::Version
79
79
  version: '2.0'
80
+ - !ruby/object:Gem::Dependency
81
+ name: ferrum
82
+ requirement: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - "~>"
85
+ - !ruby/object:Gem::Version
86
+ version: '0.15'
87
+ type: :runtime
88
+ prerelease: false
89
+ version_requirements: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - "~>"
92
+ - !ruby/object:Gem::Version
93
+ version: '0.15'
80
94
  description: ZMediumToMarkdown converts Medium posts into clean, portable Markdown.
81
95
  It can download a single post or every post from a Medium username, preserving headings,
82
96
  lists, blockquotes, code blocks, images, links, and common embeds such as GitHub
@@ -89,6 +103,8 @@ extra_rdoc_files: []
89
103
  files:
90
104
  - bin/ZMediumToMarkdown
91
105
  - lib/CLI.rb
106
+ - lib/ChromeAuth.rb
107
+ - lib/CookieCache.rb
92
108
  - lib/Helper.rb
93
109
  - lib/ImageDownloader.rb
94
110
  - lib/Models/Paragraph.rb