ZMediumToMarkdown 3.3.3 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 97751da25df535110c1771b77a009171f82bd761ff74a5948927a349d06358a0
4
- data.tar.gz: e8063778d0980fc394ff52fb9f0e1caeb59578915e812881da385b8a4f2b0fe3
3
+ metadata.gz: 047cd0234c7a856da3d912b93d419ddcd7ad1396079ba49d3c494807ca37a8de
4
+ data.tar.gz: c4a96904909f9885c48bff408200a886191858be9c9f8746a01db065e66aaa8f
5
5
  SHA512:
6
- metadata.gz: f53c5d05e53b7ed984f65e908eef65b0fdab7606e740871c8639816eb32422d47d87c75b4f2f368f8aa6e89add6b531a818f977045a1af88e720757c1a386e24
7
- data.tar.gz: fc8dff70656090f4d5ce5790aadadd4fea82ac2da5989979c1c9c2fe1edb36514b5ea8027652d13dff695ad7ba080e281dab822d3b4078afd4347b457b889b25
6
+ metadata.gz: 07f6681e620d463a849ccff8e8d497c10114073d8aad3792f0d2beb6cf730145dfb36ac6e304a3ad805de56aad5f24e3f028c5e2613da4f38228657bbc9f70f7
7
+ data.tar.gz: 6e8056232851f1f2656a5b6340b6972bdc603f8ff4aedd18e101c9ec8eb5fe10d7b4ca4d6376086e533daddd180d169a4db7b3c2ead4bd69ba1b175cf73661d9
data/lib/CLI.rb CHANGED
@@ -14,7 +14,6 @@ module CLI
14
14
  COOKIE_SETUP_URL = 'https://github.com/ZhgChgLi/ZMediumToMarkdown/wiki/Setting-Up-Medium-Cookies-and-a-Cloudflare-Worker-Proxy'.freeze
15
15
 
16
16
  DEFAULT_MEDIUM_HOST = 'https://medium.com/_/graphql'.freeze
17
- DEFAULT_MIRO_MEDIUM_HOST = 'https://miro.medium.com'.freeze
18
17
 
19
18
  module_function
20
19
 
@@ -53,10 +52,6 @@ module CLI
53
52
  ENV['MEDIUM_HOST'] = v
54
53
  end
55
54
 
56
- opts.on('--miro_medium_host URL', 'Cloudflare Worker proxy URL for Medium image CDN (or set $MIRO_MEDIUM_HOST). Optional companion to --medium_host.') do |v|
57
- ENV['MIRO_MEDIUM_HOST'] = v
58
- end
59
-
60
55
  opts.on('-u', '--username USERNAME', 'Download all posts from a Medium username') do |v|
61
56
  options[:username] = v
62
57
  end
@@ -167,25 +162,17 @@ module CLI
167
162
  !host.empty? && host != DEFAULT_MEDIUM_HOST
168
163
  end
169
164
 
170
- def imageProxyConfigured?
171
- host = ENV['MIRO_MEDIUM_HOST'].to_s
172
- !host.empty? && host != DEFAULT_MIRO_MEDIUM_HOST
173
- end
174
-
175
-
176
165
  # Only warn when the invocation will actually hit Medium — skip for
177
166
  # --version, --clean, --help, --new.
178
167
  def warnAboutMissingSetup(options, errput: $stderr)
179
168
  return unless willHitMedium?(options)
180
169
 
181
- missingCookies = !cookiesPresent?
182
- missingProxy = !proxyConfigured?
183
- missingImageProxy = !imageProxyConfigured?
184
- return if !missingCookies && !missingProxy && !missingImageProxy
170
+ missingCookies = !cookiesPresent?
171
+ missingProxy = !proxyConfigured?
172
+ return if !missingCookies && !missingProxy
185
173
 
186
174
  errput.puts buildSetupBanner(missingCookies: missingCookies,
187
- missingProxy: missingProxy,
188
- missingImageProxy: missingImageProxy)
175
+ missingProxy: missingProxy)
189
176
  end
190
177
 
191
178
  def willHitMedium?(options)
@@ -194,11 +181,10 @@ module CLI
194
181
 
195
182
  # One-line warning. The wiki has the actual setup steps; we just
196
183
  # nudge the user toward it instead of dumping a wall of guidance.
197
- def buildSetupBanner(missingCookies:, missingProxy:, missingImageProxy:)
184
+ def buildSetupBanner(missingCookies:, missingProxy:)
198
185
  missing = []
199
186
  missing << 'Medium cookies (sid / uid)' if missingCookies
200
187
  missing << 'Cloudflare Worker proxy (MEDIUM_HOST)' if missingProxy
201
- missing << 'Cloudflare image proxy (MIRO_MEDIUM_HOST)' if missingImageProxy
202
188
  return '' if missing.empty?
203
189
 
204
190
  "⚠ Missing #{missing.join(' / ')}. Medium / Cloudflare may block the run. Setup guide: #{COOKIE_SETUP_URL}"
@@ -3,6 +3,7 @@ require 'Models/Paragraph'
3
3
 
4
4
  require 'ImageDownloader'
5
5
  require 'PathPolicy'
6
+ require 'Request'
6
7
 
7
8
  class IMGParser < Parser
8
9
  attr_accessor :nextParser, :pathPolicy, :isForJekyll
@@ -20,7 +21,7 @@ class IMGParser < Parser
20
21
 
21
22
  fileName = paragraph.metadata.id #d*fsafwfe.jpg
22
23
 
23
- miro_host = ENV.fetch('MIRO_MEDIUM_HOST', 'https://miro.medium.com')
24
+ miro_host = Request.miroHost
24
25
  imageURL = "#{miro_host}/#{fileName}"
25
26
 
26
27
  result = ""
data/lib/Post.rb CHANGED
@@ -67,7 +67,7 @@ class Post
67
67
  imagePathPolicy = PathPolicy.new(pathPolicy.getAbsolutePath(postID), pathPolicy.getRelativePath(postID))
68
68
  absolutePath = imagePathPolicy.getAbsolutePath(previewImageFileName)
69
69
 
70
- miro_host = ENV.fetch('MIRO_MEDIUM_HOST', 'https://miro.medium.com')
70
+ miro_host = Request.miroHost
71
71
  imageURL = "#{miro_host}/#{previewImageFileName}"
72
72
 
73
73
  if ImageDownloader.download(absolutePath, imageURL)
data/lib/Request.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'net/http'
2
2
  require 'nokogiri'
3
+ require 'uri'
3
4
  require 'ChromeAuth'
4
5
  require 'CookieCache'
5
6
 
@@ -173,6 +174,7 @@ class Request
173
174
 
174
175
  def self.URL(url, method = 'GET', data = nil, retryCount = 0)
175
176
  retryCount += 1
177
+ url = mediumProxiedURL(url)
176
178
 
177
179
  uri = URI(url)
178
180
  https = Net::HTTP.new(uri.host, uri.port)
@@ -225,6 +227,14 @@ class Request
225
227
  request['Cookie'] = cookiesString;
226
228
  end
227
229
 
230
+ # When the request is going to a configured Worker proxy (and only
231
+ # then), attach the user's MEDIUM_HOST_SECRET as a header so the
232
+ # Worker can authenticate the caller. Skipped for upstream
233
+ # medium.com / miro.medium.com so the secret never leaks to Medium.
234
+ if proxyURI?(uri) && (proxySecret = ENV['MEDIUM_HOST_SECRET'].to_s) && !proxySecret.empty?
235
+ request['X-Medium-Proxy-Secret'] = proxySecret
236
+ end
237
+
228
238
  response = https.request(request);
229
239
 
230
240
  setCookieString = response.get_fields('set-cookie');
@@ -277,6 +287,54 @@ class Request
277
287
  response
278
288
  end
279
289
 
290
+ # If the user has configured a Cloudflare Worker proxy via MEDIUM_HOST,
291
+ # rewrite *any* https://medium.com/<path> URL to <worker-origin>/<path>
292
+ # so non-GraphQL hits (iframe metadata at /media/<id>, OG-image fallback
293
+ # to /<user>/<post>, etc.) also benefit from the proxy. GraphQL callers
294
+ # already hand us the proxy URL directly via ENV['MEDIUM_HOST'], so they
295
+ # short-circuit the rewrite.
296
+ def self.mediumProxiedURL(url)
297
+ return url unless url.is_a?(String) && url.start_with?('https://medium.com/')
298
+ origin = mediumProxyOrigin
299
+ return url if origin.nil?
300
+ url.sub(%r{\Ahttps://medium\.com}, origin)
301
+ end
302
+
303
+ # Extract the `<scheme>://<host>[:port]` of MEDIUM_HOST, or nil if no
304
+ # proxy is configured (or it still points at medium.com itself).
305
+ def self.mediumProxyOrigin
306
+ host = ENV['MEDIUM_HOST'].to_s
307
+ return nil if host.empty?
308
+ uri = URI.parse(host)
309
+ return nil if uri.host.nil? || uri.host == 'medium.com'
310
+ port = (uri.port && uri.port != uri.default_port) ? ":#{uri.port}" : ''
311
+ "#{uri.scheme}://#{uri.host}#{port}"
312
+ rescue URI::InvalidURIError
313
+ nil
314
+ end
315
+
316
+ # Resolve the host the gem should use for miro.medium.com image fetches.
317
+ # Single-Worker setups: the same MEDIUM_HOST proxy handles both medium.com
318
+ # and miro.medium.com via path dispatch, so we always derive miro from
319
+ # MEDIUM_HOST's origin. No proxy → upstream miro.medium.com.
320
+ def self.miroHost
321
+ mediumProxyOrigin || 'https://miro.medium.com'
322
+ end
323
+
324
+ # True iff `uri` is hosted by the configured Worker proxy — i.e. its
325
+ # host matches MEDIUM_HOST and MEDIUM_HOST is set to something other
326
+ # than upstream medium.com. Used to gate the MEDIUM_HOST_SECRET auth
327
+ # header so the secret only leaves the process when heading to the
328
+ # user's own proxy.
329
+ def self.proxyURI?(uri)
330
+ return false if uri.nil? || uri.host.nil?
331
+ envValue = ENV['MEDIUM_HOST'].to_s
332
+ return false if envValue.empty?
333
+ parsed = URI.parse(envValue) rescue nil
334
+ return false if parsed.nil? || parsed.host.nil?
335
+ parsed.host != 'medium.com' && parsed.host == uri.host
336
+ end
337
+
280
338
  # Cloudflare tags blocked responses via either the cf-mitigated header
281
339
  # or the standard "Just a moment..." challenge HTML. We check both
282
340
  # so we catch challenges even on Cloudflare deployments that don't
@@ -153,9 +153,7 @@ class ZMediumFetcher
153
153
 
154
154
  existingMeta = readExistingFrontMatter(absolutePath)
155
155
 
156
- if existingMeta[:lastModifiedAt] && existingMeta[:lastModifiedAt] >= postInfo.latestPublishedAt.to_i &&
157
- !isPin.nil? && isPin == existingMeta[:pin] &&
158
- !isLockedPreviewOnly.nil? && isLockedPreviewOnly == existingMeta[:lockedPreviewOnly]
156
+ if shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
159
157
  # Already downloaded and nothing has changed!, Skip!
160
158
  progress.currentPostParagraphIndex = paragraphs.length
161
159
  progress.message = "Skip, Post already downloaded and nothing has changed!"
@@ -180,8 +178,8 @@ class ZMediumFetcher
180
178
 
181
179
  # Stdout fast path: render markdown directly to `stdoutIO` without
182
180
  # touching the filesystem and without downloading any images. Image
183
- # references stay as remote miro.medium.com URLs (or MIRO_MEDIUM_HOST
184
- # proxy if set).
181
+ # references stay as remote URLs on miro.medium.com (or the configured
182
+ # MEDIUM_HOST proxy origin when set).
185
183
  def downloadPostToStdout(postURL, isPin)
186
184
  postID = Post.getPostIDFromPostURLString(postURL)
187
185
  postPath = Post.getPostPathFromPostURLString(postURL)
@@ -374,6 +372,26 @@ class ZMediumFetcher
374
372
 
375
373
  # Reads YAML-ish front matter from a previously-generated post and
376
374
  # returns the fields we care about for skip-already-downloaded logic.
375
+ # Does the existing on-disk post still match the freshly-fetched
376
+ # metadata? All three signals must agree:
377
+ # 1. last_modified_at on disk is >= Medium's latestPublishedAt
378
+ # 2. isPin matches the file's pin flag
379
+ # 3. isLockedPreviewOnly matches the file's lockedPreviewOnly flag
380
+ #
381
+ # Boolean signals are normalized to true/false before comparing so that
382
+ # `nil` (Medium's GraphQL response can omit the field for non-paywalled
383
+ # / non-pinned posts) and `false` (the default written when the front-
384
+ # matter line is omitted by Helper.createPostInfo) are treated as
385
+ # equivalent — otherwise free, never-pinned posts would re-download on
386
+ # every run.
387
+ def shouldSkipExistingPost?(existingMeta, postInfo, isPin, isLockedPreviewOnly)
388
+ return false unless existingMeta[:lastModifiedAt]
389
+ return false unless existingMeta[:lastModifiedAt] >= postInfo.latestPublishedAt.to_i
390
+ return false unless (isPin == true) == existingMeta[:pin]
391
+ return false unless (isLockedPreviewOnly == true) == existingMeta[:lockedPreviewOnly]
392
+ true
393
+ end
394
+
377
395
  def readExistingFrontMatter(absolutePath)
378
396
  meta = { lastModifiedAt: nil, pin: false, lockedPreviewOnly: false }
379
397
  return meta unless File.file?(absolutePath)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ZMediumToMarkdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.3.3
4
+ version: 3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ZhgChgLi