ZMediumToMarkdown 3.3.3 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/Request.rb +28 -0
  3. data/lib/ZMediumFetcher.rb +21 -3
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 97751da25df535110c1771b77a009171f82bd761ff74a5948927a349d06358a0
4
- data.tar.gz: e8063778d0980fc394ff52fb9f0e1caeb59578915e812881da385b8a4f2b0fe3
3
+ metadata.gz: 41871cb0a57ded38ba5b49cbe98b19ffa8f963b4425d0bf6a0e8597f305fcbc3
4
+ data.tar.gz: 555122d0f62b4434462200240910ed5fed97db9bebc7f54bc1b2b73a308b6702
5
5
  SHA512:
6
- metadata.gz: f53c5d05e53b7ed984f65e908eef65b0fdab7606e740871c8639816eb32422d47d87c75b4f2f368f8aa6e89add6b531a818f977045a1af88e720757c1a386e24
7
- data.tar.gz: fc8dff70656090f4d5ce5790aadadd4fea82ac2da5989979c1c9c2fe1edb36514b5ea8027652d13dff695ad7ba080e281dab822d3b4078afd4347b457b889b25
6
+ metadata.gz: 8f4b1e85672bbb3f24b2fcc7ab7f4e21455eb76dced1abefde45479fc5cce31a0ad928c73a08e34733ea8c0f1913073f547fa3bfaceeab10d3e17aed4d6e0711
7
+ data.tar.gz: 915db5d26bc3f78a67fd1d58cc858364f69f4cb5830527213ccefbc7d48911469c6a326367a476ea6d8ecb7da69978ac76fccd2f72f600e0fbcbcc6707615f09
data/lib/Request.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'net/http'
2
2
  require 'nokogiri'
3
+ require 'uri'
3
4
  require 'ChromeAuth'
4
5
  require 'CookieCache'
5
6
 
@@ -173,6 +174,7 @@ class Request
173
174
 
174
175
  def self.URL(url, method = 'GET', data = nil, retryCount = 0)
175
176
  retryCount += 1
177
+ url = mediumProxiedURL(url)
176
178
 
177
179
  uri = URI(url)
178
180
  https = Net::HTTP.new(uri.host, uri.port)
@@ -277,6 +279,32 @@ class Request
277
279
  response
278
280
  end
279
281
 
282
+ # If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
283
+ # rewrite *any* https://medium.com/<path> URL to <worker-origin>/<path>
284
+ # so non-GraphQL hits (iframe metadata at /media/<id>, OG-image fallback
285
+ # to /<user>/<post>, etc.) also benefit from the proxy. GraphQL callers
286
+ # already hand us the proxy URL directly via ENV['MEDIUM_HOST'], so they
287
+ # short-circuit the rewrite.
288
+ def self.mediumProxiedURL(url)
289
+ return url unless url.is_a?(String) && url.start_with?('https://medium.com/')
290
+ origin = mediumProxyOrigin
291
+ return url if origin.nil?
292
+ url.sub(%r{\Ahttps://medium\.com}, origin)
293
+ end
294
+
295
+ # Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
296
+ # proxy is configured (or it still points at medium.com itself).
297
+ def self.mediumProxyOrigin
298
+ host = ENV['MEDIUM_HOST'].to_s
299
+ return nil if host.empty?
300
+ uri = URI.parse(host)
301
+ return nil if uri.host.nil? || uri.host == 'medium.com'
302
+ port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
303
+ "#{uri.scheme}://#{uri.host}#{port}"
304
+ rescue URI::InvalidURIError
305
+ nil
306
+ end
307
+
280
308
  # Cloudflare tags blocked responses via either the cf-mitigated header
281
309
  # or the standard "Just a moment..." challenge HTML. We check both
282
310
  # so we catch challenges even on Cloudflare deployments that don't
@@ -153,9 +153,7 @@ class ZMediumFetcher
153
153
 
154
154
  existingMeta = readExistingFrontMatter(absolutePath)
155
155
 
156
- if existingMeta[:lastModifiedAt] && existingMeta[:lastModifiedAt] >= postInfo.latestPublishedAt.to_i &&
157
- !isPin.nil? && isPin == existingMeta[:pin] &&
158
- !isLockedPreviewOnly.nil? && isLockedPreviewOnly == existingMeta[:lockedPreviewOnly]
156
+ if shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
159
157
  # Already downloaded and nothing has changed!, Skip!
160
158
  progress.currentPostParagraphIndex = paragraphs.length
161
159
  progress.message = "Skip, Post already downloaded and nothing has changed!"
@@ -374,6 +372,26 @@ class ZMediumFetcher
374
372
 
375
373
  # Reads YAML-ish front matter from a previously-generated post and
376
374
  # returns the fields we care about for skip-already-downloaded logic.
375
+ # Does the existing on-disk post still match the freshly-fetched
376
+ # metadata? All three signals must agree:
377
+ # 1. last_modified_at on disk is >= Medium's latestPublishedAt
378
+ # 2. isPin matches the file's pin flag
379
+ # 3. isLockedPreviewOnly matches the file's lockedPreviewOnly flag
380
+ #
381
+ # Boolean signals are normalized to true/false before comparing so that
382
+ # `nil` (Medium's GraphQL response can omit the field for non-paywalled
383
+ # / non-pinned posts) and `false` (the default written when the front-
384
+ # matter line is omitted by Helper.createPostInfo) are treated as
385
+ # equivalent — otherwise free, never-pinned posts would re-download on
386
+ # every run.
387
+ def shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
388
+ return false unless existingMeta[:lastModifiedAt]
389
+ return false unless existingMeta[:lastModifiedAt] >= postInfo.latestPublishedAt.to_i
390
+ return false unless (isPin == true) == existingMeta[:pin]
391
+ return false unless (isLockedPreviewOnly == true) == existingMeta[:lockedPreviewOnly]
392
+ true
393
+ end
394
+
377
395
  def readExistingFrontMatter(absolutePath)
378
396
  meta = { lastModifiedAt: nil, pin: false, lockedPreviewOnly: false }
379
397
  return meta unless File.file?(absolutePath)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.3
4
+ version: 3.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi