ZMediumToMarkdown 3.3.3 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/CLI.rb +5 -19
- data/lib/Parsers/IMGParser.rb +2 -1
- data/lib/Post.rb +1 -1
- data/lib/Request.rb +58 -0
- data/lib/ZMediumFetcher.rb +23 -5
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 047cd0234c7a856da3d912b93d419ddcd7ad1396079ba49d3c494807ca37a8de
|
|
4
|
+
data.tar.gz: c4a96904909f9885c48bff408200a886191858be9c9f8746a01db065e66aaa8f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 07f6681e620d463a849ccff8e8d497c10114073d8aad3792f0d2beb6cf730145dfb36ac6e304a3ad805de56aad5f24e3f028c5e2613da4f38228657bbc9f70f7
|
|
7
|
+
data.tar.gz: 6e8056232851f1f2656a5b6340b6972bdc603f8ff4aedd18e101c9ec8eb5fe10d7b4ca4d6376086e533daddd180d169a4db7b3c2ead4bd69ba1b175cf73661d9
|
data/lib/CLI.rb
CHANGED
|
@@ -14,7 +14,6 @@ module CLI
|
|
|
14
14
|
COOKIE_SETUP_URL = 'https://github.com/ZhgChgLi/ZMediumToMarkdown/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy'.freeze
|
|
15
15
|
|
|
16
16
|
DEFAULT_MEDIUM_HOST = 'https://medium.com/_/graphql'.freeze
|
|
17
|
-
DEFAULT_MIRO_MEDIUM_HOST = 'https://miro.medium.com'.freeze
|
|
18
17
|
|
|
19
18
|
module_function
|
|
20
19
|
|
|
@@ -53,10 +52,6 @@ module CLI
|
|
|
53
52
|
ENV['MEDIUM_HOST'] = v
|
|
54
53
|
end
|
|
55
54
|
|
|
56
|
-
opts.on('--miro_medium_host URL', 'Cloudflare Worker proxy URL for Medium image CDN (or set $MIRO_MEDIUM_HOST). Optional companion to --medium_host.') do |v|
|
|
57
|
-
ENV['MIRO_MEDIUM_HOST'] = v
|
|
58
|
-
end
|
|
59
|
-
|
|
60
55
|
opts.on('-u', '--username USERNAME', 'Download all posts from a Medium username') do |v|
|
|
61
56
|
options[:username] = v
|
|
62
57
|
end
|
|
@@ -167,25 +162,17 @@ module CLI
|
|
|
167
162
|
!host.empty? && host != DEFAULT_MEDIUM_HOST
|
|
168
163
|
end
|
|
169
164
|
|
|
170
|
-
def imageProxyConfigured?
|
|
171
|
-
host = ENV['MIRO_MEDIUM_HOST'].to_s
|
|
172
|
-
!host.empty? && host != DEFAULT_MIRO_MEDIUM_HOST
|
|
173
|
-
end
|
|
174
|
-
|
|
175
|
-
|
|
176
165
|
# Only warn when the invocation will actually hit Medium — skip for
|
|
177
166
|
# --version, --clean, --help, --new.
|
|
178
167
|
def warnAboutMissingSetup(options, errput: $stderr)
|
|
179
168
|
return unless willHitMedium?(options)
|
|
180
169
|
|
|
181
|
-
missingCookies
|
|
182
|
-
missingProxy
|
|
183
|
-
|
|
184
|
-
return if !missingCookies && !missingProxy && !missingImageProxy
|
|
170
|
+
missingCookies = !cookiesPresent?
|
|
171
|
+
missingProxy = !proxyConfigured?
|
|
172
|
+
return if !missingCookies && !missingProxy
|
|
185
173
|
|
|
186
174
|
errput.puts buildSetupBanner(missingCookies: missingCookies,
|
|
187
|
-
missingProxy: missingProxy
|
|
188
|
-
missingImageProxy: missingImageProxy)
|
|
175
|
+
missingProxy: missingProxy)
|
|
189
176
|
end
|
|
190
177
|
|
|
191
178
|
def willHitMedium?(options)
|
|
@@ -194,11 +181,10 @@ module CLI
|
|
|
194
181
|
|
|
195
182
|
# One-line warning. The wiki has the actual setup steps; we just
|
|
196
183
|
# nudge the user toward it instead of dumping a wall of guidance.
|
|
197
|
-
def buildSetupBanner(missingCookies:, missingProxy
|
|
184
|
+
def buildSetupBanner(missingCookies:, missingProxy:)
|
|
198
185
|
missing = []
|
|
199
186
|
missing << 'Medium cookies (sid / uid)' if missingCookies
|
|
200
187
|
missing << 'Cloudflare Worker proxy (MEDIUM_HOST)' if missingProxy
|
|
201
|
-
missing << 'Cloudflare image proxy (MIRO_MEDIUM_HOST)' if missingImageProxy
|
|
202
188
|
return '' if missing.empty?
|
|
203
189
|
|
|
204
190
|
"⚠ Missing #{missing.join(' / ')}. Medium / Cloudflare may block the run. Setup guide: #{COOKIE_SETUP_URL}"
|
data/lib/Parsers/IMGParser.rb
CHANGED
|
@@ -3,6 +3,7 @@ require 'Models/Paragraph'
|
|
|
3
3
|
|
|
4
4
|
require 'ImageDownloader'
|
|
5
5
|
require 'PathPolicy'
|
|
6
|
+
require 'Request'
|
|
6
7
|
|
|
7
8
|
class IMGParser < Parser
|
|
8
9
|
attr_accessor :nextParser, :pathPolicy, :isForJekyll
|
|
@@ -20,7 +21,7 @@ class IMGParser < Parser
|
|
|
20
21
|
|
|
21
22
|
fileName = paragraph.metadata.id #d*fsafwfe.jpg
|
|
22
23
|
|
|
23
|
-
miro_host =
|
|
24
|
+
miro_host = Request.miroHost
|
|
24
25
|
imageURL = "#{miro_host}/#{fileName}"
|
|
25
26
|
|
|
26
27
|
result = ""
|
data/lib/Post.rb
CHANGED
|
@@ -67,7 +67,7 @@ class Post
|
|
|
67
67
|
imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(postID), pathPolicy.getRelativePath(postID))
|
|
68
68
|
absolutePath = imagePathPolicy.getAbsolutePath(previewImageFileName)
|
|
69
69
|
|
|
70
|
-
miro_host =
|
|
70
|
+
miro_host = Request.miroHost
|
|
71
71
|
imageURL = "#{miro_host}/#{previewImageFileName}"
|
|
72
72
|
|
|
73
73
|
if ImageDownloader.download(absolutePath, imageURL)
|
data/lib/Request.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'net/http'
|
|
2
2
|
require 'nokogiri'
|
|
3
|
+
require 'uri'
|
|
3
4
|
require 'ChromeAuth'
|
|
4
5
|
require 'CookieCache'
|
|
5
6
|
|
|
@@ -173,6 +174,7 @@ class Request
|
|
|
173
174
|
|
|
174
175
|
def self.URL(url, method = 'GET', data = nil, retryCount = 0)
|
|
175
176
|
retryCount += 1
|
|
177
|
+
url = mediumProxiedURL(url)
|
|
176
178
|
|
|
177
179
|
uri = URI(url)
|
|
178
180
|
https = Net::HTTP.new(uri.host, uri.port)
|
|
@@ -225,6 +227,14 @@ class Request
|
|
|
225
227
|
request['Cookie'] = cookiesString;
|
|
226
228
|
end
|
|
227
229
|
|
|
230
|
+
# When the request is going to a configured Worker proxy (and only
|
|
231
|
+
# then), attach the user's MEDIUM_HOST_SECRET as a header so the
|
|
232
|
+
# Worker can authenticate the caller. Skipped for upstream
|
|
233
|
+
# medium.com / miro.medium.com so the secret never leaks to Medium.
|
|
234
|
+
if proxyURI?(uri) && (proxySecret = ENV['MEDIUM_HOST_SECRET'].to_s) && !proxySecret.empty?
|
|
235
|
+
request['X-Medium-Proxy-Secret'] = proxySecret
|
|
236
|
+
end
|
|
237
|
+
|
|
228
238
|
response = https.request(request);
|
|
229
239
|
|
|
230
240
|
setCookieString = response.get_fields('set-cookie');
|
|
@@ -277,6 +287,54 @@ class Request
|
|
|
277
287
|
response
|
|
278
288
|
end
|
|
279
289
|
|
|
290
|
+
# If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
|
|
291
|
+
# rewrite *any* https://medium.com/<path> URL to <worker-origin>/<path>
|
|
292
|
+
# so non-GraphQL hits (iframe metadata at /media/<id>, OG-image fallback
|
|
293
|
+
# to /<user>/<post>, etc.) also benefit from the proxy. GraphQL callers
|
|
294
|
+
# already hand us the proxy URL directly via ENV['MEDIUM_HOST'], so they
|
|
295
|
+
# short-circuit the rewrite.
|
|
296
|
+
def self.mediumProxiedURL(url)
|
|
297
|
+
return url unless url.is_a?(String) && url.start_with?('https://medium.com/')
|
|
298
|
+
origin = mediumProxyOrigin
|
|
299
|
+
return url if origin.nil?
|
|
300
|
+
url.sub(%r{\Ahttps://medium\.com}, origin)
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
|
|
304
|
+
# proxy is configured (or it still points at medium.com itself).
|
|
305
|
+
def self.mediumProxyOrigin
|
|
306
|
+
host = ENV['MEDIUM_HOST'].to_s
|
|
307
|
+
return nil if host.empty?
|
|
308
|
+
uri = URI.parse(host)
|
|
309
|
+
return nil if uri.host.nil? || uri.host == 'medium.com'
|
|
310
|
+
port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
|
|
311
|
+
"#{uri.scheme}://#{uri.host}#{port}"
|
|
312
|
+
rescue URI::InvalidURIError
|
|
313
|
+
nil
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
# Resolve the host the gem should use for miro.medium.com image fetches.
|
|
317
|
+
# Single-Worker setups: the same MEDIUM_HOST proxy handles both medium.com
|
|
318
|
+
# and miro.medium.com via path dispatch, so we always derive miro from
|
|
319
|
+
# MEDIUM_HOST's origin. No proxy → upstream miro.medium.com.
|
|
320
|
+
def self.miroHost
|
|
321
|
+
mediumProxyOrigin || 'https://miro.medium.com'
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# True iff `uri` is hosted by the configured Worker proxy — i.e. its
|
|
325
|
+
# host matches MEDIUM_HOST and MEDIUM_HOST is set to something other
|
|
326
|
+
# than upstream medium.com. Used to gate the MEDIUM_HOST_SECRET auth
|
|
327
|
+
# header so the secret only leaves the process when heading to the
|
|
328
|
+
# user's own proxy.
|
|
329
|
+
def self.proxyURI?(uri)
|
|
330
|
+
return false if uri.nil? || uri.host.nil?
|
|
331
|
+
envValue = ENV['MEDIUM_HOST'].to_s
|
|
332
|
+
return false if envValue.empty?
|
|
333
|
+
parsed = URI.parse(envValue) rescue nil
|
|
334
|
+
return false if parsed.nil? || parsed.host.nil?
|
|
335
|
+
parsed.host != 'medium.com' && parsed.host == uri.host
|
|
336
|
+
end
|
|
337
|
+
|
|
280
338
|
# Cloudflare tags blocked responses via either the cf-mitigated header
|
|
281
339
|
# or the standard "Just a moment..." challenge HTML. We check both
|
|
282
340
|
# so we catch challenges even on Cloudflare deployments that don't
|
data/lib/ZMediumFetcher.rb
CHANGED
|
@@ -153,9 +153,7 @@ class ZMediumFetcher
|
|
|
153
153
|
|
|
154
154
|
existingMeta = readExistingFrontMatter(absolutePath)
|
|
155
155
|
|
|
156
|
-
if existingMeta
|
|
157
|
-
!isPin.nil? && isPin == existingMeta[:pin] &&
|
|
158
|
-
!isLockedPreviewOnly.nil? && isLockedPreviewOnly == existingMeta[:lockedPreviewOnly]
|
|
156
|
+
if shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
|
|
159
157
|
# Already downloaded and nothing has changed!, Skip!
|
|
160
158
|
progress.currentPostParagraphIndex = paragraphs.length
|
|
161
159
|
progress.message = "Skip, Post already downloaded and nothing has changed!"
|
|
@@ -180,8 +178,8 @@ class ZMediumFetcher
|
|
|
180
178
|
|
|
181
179
|
# Stdout fast path: render markdown directly to `stdoutIO` without
|
|
182
180
|
# touching the filesystem and without downloading any images. Image
|
|
183
|
-
# references stay as remote miro.medium.com
|
|
184
|
-
# proxy
|
|
181
|
+
# references stay as remote URLs on miro.medium.com (or the configured
|
|
182
|
+
# MEDIUM_HOST proxy origin when set).
|
|
185
183
|
def downloadPostToStdout(postURL, isPin)
|
|
186
184
|
postID = Post.getPostIDFromPostURLString(postURL)
|
|
187
185
|
postPath = Post.getPostPathFromPostURLString(postURL)
|
|
@@ -374,6 +372,26 @@ class ZMediumFetcher
|
|
|
374
372
|
|
|
375
373
|
# Reads YAML-ish front matter from a previously-generated post and
|
|
376
374
|
# returns the fields we care about for skip-already-downloaded logic.
|
|
375
|
+
# Does the existing on-disk post still match the freshly-fetched
|
|
376
|
+
# metadata? All three signals must agree:
|
|
377
|
+
# 1. last_modified_at on disk is >= Medium's latestPublishedAt
|
|
378
|
+
# 2. isPin matches the file's pin flag
|
|
379
|
+
# 3. isLockedPreviewOnly matches the file's lockedPreviewOnly flag
|
|
380
|
+
#
|
|
381
|
+
# Boolean signals are normalized to true/false before comparing so that
|
|
382
|
+
# `nil` (Medium's GraphQL response can omit the field for non-paywalled
|
|
383
|
+
# / non-pinned posts) and `false` (the default written when the front-
|
|
384
|
+
# matter line is omitted by Helper.createPostInfo) are treated as
|
|
385
|
+
# equivalent — otherwise free, never-pinned posts would re-download on
|
|
386
|
+
# every run.
|
|
387
|
+
def shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
|
|
388
|
+
return false unless existingMeta[:lastModifiedAt]
|
|
389
|
+
return false unless existingMeta[:lastModifiedAt] >= postInfo.latestPublishedAt.to_i
|
|
390
|
+
return false unless (isPin == true) == existingMeta[:pin]
|
|
391
|
+
return false unless (isLockedPreviewOnly == true) == existingMeta[:lockedPreviewOnly]
|
|
392
|
+
true
|
|
393
|
+
end
|
|
394
|
+
|
|
377
395
|
def readExistingFrontMatter(absolutePath)
|
|
378
396
|
meta = { lastModifiedAt: nil, pin: false, lockedPreviewOnly: false }
|
|
379
397
|
return meta unless File.file?(absolutePath)
|