ZMediumToMarkdown 3.3.3 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/Request.rb +28 -0
- data/lib/ZMediumFetcher.rb +21 -3
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 41871cb0a57ded38ba5b49cbe98b19ffa8f963b4425d0bf6a0e8597f305fcbc3
|
|
4
|
+
data.tar.gz: 555122d0f62b4434462200240910ed5fed97db9bebc7f54bc1b2b73a308b6702
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8f4b1e85672bbb3f24b2fcc7ab7f4e21455eb76dced1abefde45479fc5cce31a0ad928c73a08e34733ea8c0f1913073f547fa3bfaceeab10d3e17aed4d6e0711
|
|
7
|
+
data.tar.gz: 915db5d26bc3f78a67fd1d58cc858364f69f4cb5830527213ccefbc7d48911469c6a326367a476ea6d8ecb7da69978ac76fccd2f72f600e0fbcbcc6707615f09
|
data/lib/Request.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'net/http'
|
|
2
2
|
require 'nokogiri'
|
|
3
|
+
require 'uri'
|
|
3
4
|
require 'ChromeAuth'
|
|
4
5
|
require 'CookieCache'
|
|
5
6
|
|
|
@@ -173,6 +174,7 @@ class Request
|
|
|
173
174
|
|
|
174
175
|
def self.URL(url, method = 'GET', data = nil, retryCount = 0)
|
|
175
176
|
retryCount += 1
|
|
177
|
+
url = mediumProxiedURL(url)
|
|
176
178
|
|
|
177
179
|
uri = URI(url)
|
|
178
180
|
https = Net::HTTP.new(uri.host, uri.port)
|
|
@@ -277,6 +279,32 @@ class Request
|
|
|
277
279
|
response
|
|
278
280
|
end
|
|
279
281
|
|
|
282
|
+
# If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
|
|
283
|
+
# rewrite *any* https://medium.com/<path> URL to <worker-origin>/<path>
|
|
284
|
+
# so non-GraphQL hits (iframe metadata at /media/<id>, OG-image fallback
|
|
285
|
+
# to /<user>/<post>, etc.) also benefit from the proxy. GraphQL callers
|
|
286
|
+
# already hand us the proxy URL directly via ENV['MEDIUM_HOST'], so they
|
|
287
|
+
# short-circuit the rewrite.
|
|
288
|
+
def self.mediumProxiedURL(url)
|
|
289
|
+
return url unless url.is_a?(String) && url.start_with?('https://medium.com/')
|
|
290
|
+
origin = mediumProxyOrigin
|
|
291
|
+
return url if origin.nil?
|
|
292
|
+
url.sub(%r{\Ahttps://medium\.com}, origin)
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
|
|
296
|
+
# proxy is configured (or it still points at medium.com itself).
|
|
297
|
+
def self.mediumProxyOrigin
|
|
298
|
+
host = ENV['MEDIUM_HOST'].to_s
|
|
299
|
+
return nil if host.empty?
|
|
300
|
+
uri = URI.parse(host)
|
|
301
|
+
return nil if uri.host.nil? || uri.host == 'medium.com'
|
|
302
|
+
port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
|
|
303
|
+
"#{uri.scheme}://#{uri.host}#{port}"
|
|
304
|
+
rescue URI::InvalidURIError
|
|
305
|
+
nil
|
|
306
|
+
end
|
|
307
|
+
|
|
280
308
|
# Cloudflare tags blocked responses via either the cf-mitigated header
|
|
281
309
|
# or the standard "Just a moment..." challenge HTML. We check both
|
|
282
310
|
# so we catch challenges even on Cloudflare deployments that don't
|
data/lib/ZMediumFetcher.rb
CHANGED
|
@@ -153,9 +153,7 @@ class ZMediumFetcher
|
|
|
153
153
|
|
|
154
154
|
existingMeta = readExistingFrontMatter(absolutePath)
|
|
155
155
|
|
|
156
|
-
if existingMeta
|
|
157
|
-
!isPin.nil? && isPin == existingMeta[:pin] &&
|
|
158
|
-
!isLockedPreviewOnly.nil? && isLockedPreviewOnly == existingMeta[:lockedPreviewOnly]
|
|
156
|
+
if shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
|
|
159
157
|
# Already downloaded and nothing has changed!, Skip!
|
|
160
158
|
progress.currentPostParagraphIndex = paragraphs.length
|
|
161
159
|
progress.message = "Skip, Post already downloaded and nothing has changed!"
|
|
@@ -374,6 +372,26 @@ class ZMediumFetcher
|
|
|
374
372
|
|
|
375
373
|
# Reads YAML-ish front matter from a previously-generated post and
|
|
376
374
|
# returns the fields we care about for skip-already-downloaded logic.
|
|
375
|
+
# Does the existing on-disk post still match the freshly-fetched
|
|
376
|
+
# metadata? All three signals must agree:
|
|
377
|
+
# 1. last_modified_at on disk is >= Medium's latestPublishedAt
|
|
378
|
+
# 2. isPin matches the file's pin flag
|
|
379
|
+
# 3. isLockedPreviewOnly matches the file's lockedPreviewOnly flag
|
|
380
|
+
#
|
|
381
|
+
# Boolean signals are normalized to true/false before comparing so that
|
|
382
|
+
# `nil` (Medium's GraphQL response can omit the field for non-paywalled
|
|
383
|
+
# / non-pinned posts) and `false` (the default written when the front-
|
|
384
|
+
# matter line is omitted by Helper.createPostInfo) are treated as
|
|
385
|
+
# equivalent — otherwise free, never-pinned posts would re-download on
|
|
386
|
+
# every run.
|
|
387
|
+
def shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
|
|
388
|
+
return false unless existingMeta[:lastModifiedAt]
|
|
389
|
+
return false unless existingMeta[:lastModifiedAt] >= postInfo.latestPublishedAt.to_i
|
|
390
|
+
return false unless (isPin == true) == existingMeta[:pin]
|
|
391
|
+
return false unless (isLockedPreviewOnly == true) == existingMeta[:lockedPreviewOnly]
|
|
392
|
+
true
|
|
393
|
+
end
|
|
394
|
+
|
|
377
395
|
def readExistingFrontMatter(absolutePath)
|
|
378
396
|
meta = { lastModifiedAt: nil, pin: false, lockedPreviewOnly: false }
|
|
379
397
|
return meta unless File.file?(absolutePath)
|