parse-stack-next 5.0.1 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.yml +105 -0
  3. data/.github/ISSUE_TEMPLATE/feature_request.yml +67 -0
  4. data/.github/dependabot.yml +13 -0
  5. data/.github/workflows/codeql.yml +1 -1
  6. data/.github/workflows/docs.yml +3 -3
  7. data/.github/workflows/release.yml +14 -3
  8. data/.github/workflows/ruby.yml +1 -1
  9. data/.gitignore +1 -0
  10. data/.yardopts +19 -0
  11. data/CHANGELOG.md +792 -0
  12. data/Gemfile +3 -0
  13. data/Gemfile.lock +8 -5
  14. data/README.md +15 -0
  15. data/Rakefile +5 -1
  16. data/docs/acl_clp_guide.md +553 -0
  17. data/docs/atlas_vector_search_guide.md +123 -22
  18. data/docs/client_sdk_guide.md +201 -5
  19. data/docs/usage_guide.md +21 -0
  20. data/docs/yard-template/default/fulldoc/html/css/common.css +1222 -0
  21. data/docs/yard-template/default/fulldoc/html/css/full_list.css +387 -0
  22. data/lib/parse/agent/tools.rb +153 -1
  23. data/lib/parse/cache/redis.rb +53 -0
  24. data/lib/parse/client/caching.rb +18 -1
  25. data/lib/parse/client.rb +79 -12
  26. data/lib/parse/embeddings/cohere.rb +143 -6
  27. data/lib/parse/embeddings/provider.rb +20 -2
  28. data/lib/parse/embeddings/voyage.rb +102 -0
  29. data/lib/parse/embeddings.rb +332 -1
  30. data/lib/parse/live_query/client.rb +167 -4
  31. data/lib/parse/live_query/configuration.rb +12 -0
  32. data/lib/parse/live_query/subscription.rb +55 -2
  33. data/lib/parse/live_query.rb +123 -1
  34. data/lib/parse/lock.rb +342 -0
  35. data/lib/parse/lock_backend.rb +308 -0
  36. data/lib/parse/model/classes/audience.rb +5 -0
  37. data/lib/parse/model/classes/installation.rb +122 -0
  38. data/lib/parse/model/classes/job_schedule.rb +3 -1
  39. data/lib/parse/model/classes/job_status.rb +4 -1
  40. data/lib/parse/model/classes/push_status.rb +4 -1
  41. data/lib/parse/model/classes/session.rb +7 -0
  42. data/lib/parse/model/classes/user.rb +204 -0
  43. data/lib/parse/model/core/create_lock.rb +28 -146
  44. data/lib/parse/model/core/embed_managed.rb +162 -13
  45. data/lib/parse/model/core/parse_reference.rb +17 -1
  46. data/lib/parse/model/core/querying.rb +26 -2
  47. data/lib/parse/model/file.rb +523 -18
  48. data/lib/parse/query.rb +31 -1
  49. data/lib/parse/stack/version.rb +1 -1
  50. data/lib/parse/stack.rb +98 -1
  51. data/parse-stack-next.gemspec +2 -2
  52. metadata +17 -7
data/lib/parse/client.rb CHANGED
@@ -678,6 +678,12 @@ module Parse
678
678
  end
679
679
  private :validate_faraday_opts!
680
680
 
681
+ # Hosts considered "loopback" for the cleartext-ws:// guard in
682
+ # {#configure_live_query}. Mirrors
683
+ # {Parse::LiveQuery::Client::LOOPBACK_HOSTS} so the explicit-URL
684
+ # path and the derived-URL path agree on what counts as local.
685
+ LIVE_QUERY_LOOPBACK_HOSTS = %w[localhost 127.0.0.1 ::1 [::1] 0.0.0.0].freeze
686
+
681
687
  # Configure LiveQuery with the given options
682
688
  # @param opts [Hash] configuration options
683
689
  # @option opts [String] :live_query_url WebSocket URL for LiveQuery server (wss://...)
@@ -690,14 +696,74 @@ module Parse
690
696
  require_relative "live_query"
691
697
 
692
698
  live_query_opts = opts[:live_query].is_a?(Hash) ? opts[:live_query] : {}
699
+ resolved_url = live_query_url || live_query_opts[:url]
700
+
701
+ # Refuse explicit `ws://` against a non-loopback host unless
702
+ # `allow_insecure: true` is also passed in `live_query:`. The
703
+ # downstream `derive_websocket_url` path already enforces this for
704
+ # URLs derived from a Parse Server `http://` URL, but an explicit
705
+ # `live_query: { url: "ws://prod-host" }` or
706
+ # `live_query_url: "ws://prod-host"` bypassed it — the master key
707
+ # and any session token would ride the connect frame in cleartext.
708
+ validate_live_query_url!(resolved_url, allow_insecure: live_query_opts[:allow_insecure])
709
+
710
+ # Warn (don't raise) on `live_query: { ... }` keys that are not
711
+ # `Parse::LiveQuery::Configuration` setters. The block form would
712
+ # otherwise silently swallow typos like
713
+ # `live_query: { ssl_min_versoin: :TLSv1_3 }` and leave TLS at the
714
+ # default, losing the operator's intent. The pre-fix kwargs form
715
+ # raised `ArgumentError` here; this restores the surface without
716
+ # making it a hard failure for unknown-but-harmless keys.
717
+ warn_about_unknown_live_query_keys!(live_query_opts)
718
+
719
+ Parse::LiveQuery.configure do |config|
720
+ config.application_id = @application_id if @application_id
721
+ config.client_key = @api_key if @api_key
722
+ config.master_key = @master_key if @master_key
723
+
724
+ # Apply hash-form options first so the resolved URL (which honors
725
+ # top-level `live_query_url:` over `live_query: { url: }`) wins.
726
+ # Without this, the loop would re-write `config.url` from the
727
+ # hash and silently invert the documented precedence.
728
+ live_query_opts.each do |key, value|
729
+ next if key == :url
730
+ setter = "#{key}="
731
+ config.public_send(setter, value) if config.respond_to?(setter)
732
+ end
733
+
734
+ config.url = resolved_url if resolved_url
735
+ end
736
+ end
693
737
 
694
- Parse::LiveQuery.configure(
695
- url: live_query_url || live_query_opts[:url],
696
- application_id: @application_id,
697
- client_key: @api_key,
698
- master_key: @master_key,
699
- **live_query_opts,
700
- )
738
+ # @api private
739
+ def validate_live_query_url!(url, allow_insecure:)
740
+ return unless url.is_a?(String) && url.start_with?("ws://")
741
+
742
+ host = URI.parse(url).host.to_s rescue ""
743
+ return if LIVE_QUERY_LOOPBACK_HOSTS.include?(host)
744
+ return if allow_insecure
745
+
746
+ raise ArgumentError,
747
+ "[Parse::Client] Refusing explicit insecure LiveQuery URL #{url.inspect}. " \
748
+ "The connect frame carries the master key and any session token in " \
749
+ "plaintext on this socket. Use wss:// for routable hosts, or pass " \
750
+ "`live_query: { allow_insecure: true }` to opt into cleartext for " \
751
+ "local development on a non-loopback address."
752
+ end
753
+
754
+ # @api private
755
+ def warn_about_unknown_live_query_keys!(live_query_opts)
756
+ return unless live_query_opts.is_a?(Hash) && live_query_opts.any?
757
+
758
+ probe = Parse::LiveQuery::Configuration.new
759
+ unknown = live_query_opts.keys.reject { |k| probe.respond_to?("#{k}=") }
760
+ return if unknown.empty?
761
+
762
+ warn "[Parse::Client] Ignoring unknown live_query option(s): " \
763
+ "#{unknown.inspect}. Valid keys are Parse::LiveQuery::Configuration " \
764
+ "setters (url, application_id, client_key, master_key, ping_interval, " \
765
+ "pong_timeout, allow_insecure, ssl_min_version, ssl_max_version, " \
766
+ "logging_enabled, log_level, ...). Check for typos."
701
767
  end
702
768
 
703
769
  # If set, returns the current retry count for this instance. Otherwise,
@@ -1026,11 +1092,12 @@ module Parse
1026
1092
  # @return (see Parse::Client.setup)
1027
1093
  # @see Parse::Client.setup
1028
1094
  def self.setup(opts = {}, &block)
1029
- if block_given?
1030
- Parse::Client.new(opts, &block)
1031
- else
1032
- Parse::Client.new(opts)
1033
- end
1095
+ # Delegate to Parse::Client.setup so repeated Parse.setup calls overwrite
1096
+ # the registered :default client. Going through Parse::Client.new instead
1097
+ # would hit the `@clients[:default] ||= self` guard inside #initialize and
1098
+ # silently keep the first-registered client, while Parse::Client.setup
1099
+ # uses `=` and replaces it. Both entry points must behave identically.
1100
+ Parse::Client.setup(opts, &block)
1034
1101
  end
1035
1102
 
1036
1103
  # @!visibility private
@@ -14,9 +14,13 @@ module Parse
14
14
  #
15
15
  # * **v4** — `embed-v4.0` (1536 native, Matryoshka {256, 512, 1024,
16
16
  # 1536}, 128k-token context). Unified text + image model at the
17
- # network boundary; this provider exposes the text-input path
18
- # only image inputs will land in v5.1 alongside the
19
- # {Provider#embed_image} hook.
17
+ # network boundary. The text path uses Cohere's `/v1/embed`
18
+ # endpoint; the image path ({#embed_image}, v5.1+) uses the
19
+ # `/v2/embed` multimodal endpoint with OpenAI-style
20
+ # `{ type: "image_url", image_url: { url: ... } }` content rows.
21
+ # Text vectors stored today share the vector space with the
22
+ # eventual image vectors (no re-embed required when adding
23
+ # image-side data).
20
24
  # * **v3** — `embed-english-v3.0`, `embed-multilingual-v3.0` (both
21
25
  # 1024-dim), `embed-english-light-v3.0`,
22
26
  # `embed-multilingual-light-v3.0` (both 384-dim). Text-only.
@@ -94,6 +98,10 @@ module Parse
94
98
  # models reject the field with a 400.
95
99
  MATRYOSHKA_MODELS = %w[embed-v4.0].freeze
96
100
 
101
+ # Models that accept image inputs via the `/v2/embed` multimodal
102
+ # endpoint. Currently only `embed-v4.0` — v3 is text-only.
103
+ MULTIMODAL_MODELS = %w[embed-v4.0].freeze
104
+
97
105
  # Allowed Matryoshka widths per model (Cohere quantizes the
98
106
  # available truncations rather than accepting any integer ≤
99
107
  # native). Empty allowlist = any integer ≤ native is fine, but
@@ -246,6 +254,105 @@ module Parse
246
254
  end
247
255
  end
248
256
 
257
+ # @return [Array<Symbol>] `[:text, :image]` for `embed-v4.0`,
258
+ # `[:text]` for v3 models.
259
+ def modalities
260
+ MULTIMODAL_MODELS.include?(@model) ? %i[text image] : [:text]
261
+ end
262
+
263
+ # Embed a batch of image URLs through Cohere's `/v2/embed`
264
+ # multimodal endpoint. v5.1 ships URL-only — the provider
265
+ # receives a public URL and issues its own fetch. The SDK does
266
+ # NOT download the image; it validates the URL through
267
+ # {Parse::Embeddings.validate_image_url!} (sentinel-gated egress
268
+ # opt-in, CIDR / port / host allowlist) and forwards the
269
+ # canonicalized URL string in the `{ type: "image_url",
270
+ # image_url: { url: ... } }` content row.
271
+ #
272
+ # **Multimodal model required.** Cohere's v3 models do not accept
273
+ # image inputs; calling `embed_image` on a v3-configured provider
274
+ # raises {BadRequestError} before any network call.
275
+ #
276
+ # **Wire shape differs from {Voyage#embed_image}.** Voyage uses
277
+ # `{ type: "image_url", image_url: "<url>" }` (flat String); Cohere
278
+ # v2 uses `{ type: "image_url", image_url: { url: "<url>" } }`
279
+ # (nested object), matching the OpenAI chat-completions content
280
+ # convention. The high-level SDK contract is identical — callers
281
+ # pass an `Array<String>` of URLs.
282
+ #
283
+ # @param sources [Array<String>] image URLs. Each must satisfy
284
+ # {Parse::Embeddings.validate_image_url!}; failing entries
285
+ # abort the whole batch (no partial forwarding).
286
+ # @param input_type [Symbol] one of {INPUT_TYPE_WIRE_VALUES}'s
287
+ # keys; mapped to Cohere's `input_type` field. Defaults to
288
+ # `:search_document`.
289
+ # @param allow_insecure [Boolean] forwarded to the URL validator;
290
+ # permit `http://` for local-dev CDN proxies.
291
+ # @return [Array<Array<Float>>] vectors aligned 1:1 with `sources`.
292
+ def embed_image(sources, input_type: :search_document, allow_insecure: false)
293
+ unless MULTIMODAL_MODELS.include?(@model)
294
+ raise BadRequestError,
295
+ "Parse::Embeddings::Cohere#embed_image: model #{@model.inspect} does not " \
296
+ "accept image inputs. Configure the provider with a multimodal model " \
297
+ "(supported: #{MULTIMODAL_MODELS.inspect})."
298
+ end
299
+ unless sources.is_a?(Array)
300
+ raise ArgumentError,
301
+ "Parse::Embeddings::Cohere#embed_image expects Array of image URLs " \
302
+ "(got #{sources.class})."
303
+ end
304
+ return [] if sources.empty?
305
+
306
+ wire_input_type = INPUT_TYPE_WIRE_VALUES[input_type]
307
+ unless wire_input_type
308
+ raise ArgumentError,
309
+ "Parse::Embeddings::Cohere#embed_image input_type #{input_type.inspect} not in " \
310
+ "#{INPUT_TYPE_WIRE_VALUES.keys.inspect}."
311
+ end
312
+ # Cohere caps `/v2/embed` at the same 96-input per-call limit
313
+ # as `/v1/embed`. Guard direct-API callers against a silent
314
+ # 400 — the DSL passes a single URL per directive.
315
+ if sources.length > @embed_batch_size
316
+ raise ArgumentError,
317
+ "Parse::Embeddings::Cohere#embed_image: batch size #{sources.length} exceeds " \
318
+ "the configured cap #{@embed_batch_size} (Cohere per-request max: 96). " \
319
+ "Split the input and call embed_image once per chunk."
320
+ end
321
+
322
+ # Validate every URL up-front so a malformed entry in slot N
323
+ # does not slip through after slots 0..N-1 are already in the
324
+ # wire body. Forward the canonicalized URL the validator
325
+ # returned — not the caller's raw input.
326
+ canonical_urls = sources.each_with_index.map do |url, i|
327
+ unless url.is_a?(String)
328
+ raise ArgumentError,
329
+ "Parse::Embeddings::Cohere#embed_image sources[#{i}] is not a String " \
330
+ "(#{url.class}). v5.1 ships URL-only — bytes/IO support is v5.3."
331
+ end
332
+ Parse::Embeddings.validate_image_url!(url, allow_insecure: allow_insecure)
333
+ end
334
+
335
+ body = {
336
+ model: @model,
337
+ input_type: wire_input_type,
338
+ embedding_types: ["float"],
339
+ inputs: canonical_urls.map { |u|
340
+ { content: [{ type: "image_url", image_url: { url: u } }] }
341
+ },
342
+ }
343
+
344
+ instrument_embed(sources.length, input_type, modality: :image) do |emit_payload|
345
+ payload = post_embeddings(body, path: v2_embed_path)
346
+ if payload.is_a?(Hash) && payload["meta"].is_a?(Hash) &&
347
+ payload["meta"]["billed_units"].is_a?(Hash)
348
+ tt = payload["meta"]["billed_units"]["input_tokens"]
349
+ emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
350
+ end
351
+ vectors = extract_vectors!(payload, sources.length)
352
+ validate_response!(sources.length, vectors)
353
+ end
354
+ end
355
+
249
356
  def inspect_attrs
250
357
  super.merge(base: safe_base_host, retries: @max_retries)
251
358
  end
@@ -272,12 +379,42 @@ module Parse
272
379
  conn
273
380
  end
274
381
 
275
- def post_embeddings(body)
382
+ # @api private
383
+ # Compute the v2/embed path relative to the configured base_url's
384
+ # path component. For the default base `https://api.cohere.com/v1`
385
+ # this produces `/v2/embed`; for a custom-proxy base like
386
+ # `https://corp-proxy.example.com/cohere/v1` it produces
387
+ # `/cohere/v2/embed` — so the operator's proxy / egress-logging
388
+ # / API-key custody layer is NOT silently bypassed by image
389
+ # embedding calls. The substitution targets the trailing `/v1`
390
+ # segment specifically; bases without that segment fall back to
391
+ # appending `/v2/embed` to the host root with a warning so the
392
+ # caller sees the asymmetry rather than discovering it via a
393
+ # 404 from a misrouted request.
394
+ def v2_embed_path
395
+ uri = URI.parse(@base_url)
396
+ path = uri.path.to_s
397
+ if path =~ %r{/v1/?\z}i
398
+ # Replace `/v1` (with optional trailing slash) with `/v2/embed`.
399
+ path.sub(%r{/v1/?\z}i, "/v2/embed")
400
+ else
401
+ warn "[Parse::Embeddings::Cohere] base_url path #{path.inspect} does not end " \
402
+ "in `/v1` — embed_image will POST to host-root `/v2/embed`, which may " \
403
+ "bypass a configured proxy path. Configure base_url to end with `/v1`."
404
+ "/v2/embed"
405
+ end
406
+ end
407
+
408
+ # `path:` accepts either a Faraday-relative segment (default
409
+ # `"embed"`, which resolves under the configured `/v1/` base) or
410
+ # an absolute path (`"/v2/embed"`) for endpoints outside the
411
+ # configured base — used by {#embed_image} to reach `/v2/embed`.
412
+ def post_embeddings(body, path: "embed")
276
413
  attempts = 0
277
414
  loop do
278
415
  attempts += 1
279
416
  begin
280
- response = @connection.post("embed") do |req|
417
+ response = @connection.post(path) do |req|
281
418
  req.body = body.to_json
282
419
  end
283
420
  rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
@@ -312,7 +449,7 @@ module Parse
312
449
  next
313
450
  end
314
451
  raise BadRequestError,
315
- "Parse::Embeddings::Cohere: #{status} from POST /embed."
452
+ "Parse::Embeddings::Cohere: #{status} from POST #{path.start_with?('/') ? path : "/#{path}"}."
316
453
  end
317
454
  end
318
455
 
@@ -41,14 +41,32 @@ module Parse
41
41
 
42
42
  # @param sources [Array<URI, IO, String>] image sources — URI for
43
43
  # remote, IO for streamed bytes, String for base64. Concrete
44
- # providers document which forms they accept.
44
+ # providers document which forms they accept. In v5.1 (URL-only
45
+ # path), every source is a raw `String` URL forwarded unchanged
46
+ # from the managed path: {Parse::Core::EmbedManaged} deliberately
47
+ # does NOT validate before calling the provider (validating there
48
+ # would double-resolve every URL). The concrete `embed_image`
49
+ # override is therefore responsible for calling
50
+ # {Parse::Embeddings.validate_image_url!} (passing `allow_insecure:`
51
+ # through) before egress — see the bundled Voyage/Cohere providers,
52
+ # which validate internally.
45
53
  # @param input_type [Symbol] `:search_query` or `:search_document`,
46
54
  # parallel to {#embed_text}.
55
+ # @param allow_insecure [Boolean] **contract kwarg** —
56
+ # {Parse::Core::EmbedManaged.recompute_embedding!} unconditionally
57
+ # forwards this from the directive declaration. Concrete
58
+ # `embed_image` overrides MUST either accept `allow_insecure:`
59
+ # explicitly (passing it through to
60
+ # {Parse::Embeddings.validate_image_url!}) or absorb it via
61
+ # `**opts`. Dropping `**opts` from the override signature
62
+ # without accepting `allow_insecure:` will raise
63
+ # `ArgumentError: unknown keyword: allow_insecure` from the
64
+ # managed-embedding save path. Default `false`.
47
65
  # @param opts [Hash] provider-specific options (e.g. `dim:` for
48
66
  # Matryoshka-style truncation). Forward-compatible escape hatch.
49
67
  # @return [Array<Array<Float>>] vectors aligned 1:1 with `sources`.
50
68
  # @raise [NotImplementedError] image embedding is a v5.1+ feature.
51
- def embed_image(sources, input_type: :search_document, **opts)
69
+ def embed_image(sources, input_type: :search_document, allow_insecure: false, **opts)
52
70
  raise NotImplementedError, "#{self.class} does not support image embedding"
53
71
  end
54
72
 
@@ -272,6 +272,108 @@ module Parse
272
272
  end
273
273
  end
274
274
 
275
+ # @return [Array<Symbol>] Voyage's multimodal models support
276
+ # `[:text, :image]`; text-only models report `[:text]`.
277
+ def modalities
278
+ MULTIMODAL_MODELS.include?(@model) ? %i[text image] : [:text]
279
+ end
280
+
281
+ # Embed a batch of image URLs through Voyage's
282
+ # `/v1/multimodalembeddings` endpoint. v5.1 ships URL-only — the
283
+ # provider receives a public URL and issues its own fetch. The
284
+ # SDK does NOT download the image; it validates the URL through
285
+ # {Parse::Embeddings.validate_image_url!} (CIDR / port / host
286
+ # allowlist, sentinel-gated egress opt-in) and forwards the
287
+ # canonicalized URL string in the `{ type: "image_url",
288
+ # image_url: ... }` content row.
289
+ #
290
+ # **Multimodal model required.** Voyage's text-only models
291
+ # (`voyage-3`, `voyage-4`, etc.) do not accept image inputs;
292
+ # calling `embed_image` on a provider configured with one of
293
+ # those raises {BadRequestError} before any network call.
294
+ #
295
+ # **Bytes-fetch path is v5.3.** A future `bytes:` option will
296
+ # download via {Parse::File.safe_open_url}, MIME-sniff the
297
+ # leading bytes, optionally EXIF-strip, and forward as
298
+ # base64. URL-only ships first because it sidesteps EXIF /
299
+ # MIME-confusion class issues entirely.
300
+ #
301
+ # @param sources [Array<String>] image URLs. Each must satisfy
302
+ # {Parse::Embeddings.validate_image_url!} — failing entries
303
+ # raise the corresponding {Parse::Embeddings::InvalidImageURL}
304
+ # / {Parse::Embeddings::ConfirmationRequired} and ABORT the
305
+ # whole batch (no partial forwarding).
306
+ # @param input_type [Symbol] one of {INPUT_TYPE_WIRE_VALUES}'s
307
+ # keys; mapped to Voyage's `input_type` field. Defaults to
308
+ # `:search_document`.
309
+ # @param allow_insecure [Boolean] forwarded to the URL
310
+ # validator; permit `http://` for local-dev CDN proxies.
311
+ # @return [Array<Array<Float>>] vectors aligned 1:1 with `sources`.
312
+ def embed_image(sources, input_type: :search_document, allow_insecure: false)
313
+ unless MULTIMODAL_MODELS.include?(@model)
314
+ raise BadRequestError,
315
+ "Parse::Embeddings::Voyage#embed_image: model #{@model.inspect} does not " \
316
+ "accept image inputs. Configure the provider with a multimodal model " \
317
+ "(supported: #{MULTIMODAL_MODELS.inspect})."
318
+ end
319
+ unless sources.is_a?(Array)
320
+ raise ArgumentError,
321
+ "Parse::Embeddings::Voyage#embed_image expects Array of image URLs " \
322
+ "(got #{sources.class})."
323
+ end
324
+ return [] if sources.empty?
325
+
326
+ unless INPUT_TYPE_WIRE_VALUES.key?(input_type)
327
+ raise ArgumentError,
328
+ "Parse::Embeddings::Voyage#embed_image input_type #{input_type.inspect} not in " \
329
+ "#{INPUT_TYPE_WIRE_VALUES.keys.inspect}."
330
+ end
331
+ # Voyage caps multimodal requests at the same per-request size
332
+ # as the text endpoint. The text path goes through
333
+ # `embed_text_batched` which chunks automatically; the image
334
+ # path has no chunker yet (every directive is a single URL in
335
+ # v5.1), so guard the direct-API caller against a silent 400.
336
+ if sources.length > @embed_batch_size
337
+ raise ArgumentError,
338
+ "Parse::Embeddings::Voyage#embed_image: batch size #{sources.length} exceeds " \
339
+ "the configured cap #{@embed_batch_size} (Voyage per-request max: 128). " \
340
+ "Split the input and call embed_image once per chunk."
341
+ end
342
+
343
+ # Validate every URL up-front so a malformed entry in slot N
344
+ # does not get past validation while slots 0..N-1 are already
345
+ # in the wire body. The validator returns the canonicalized
346
+ # URL — we forward exactly that, not the caller's raw input.
347
+ canonical_urls = sources.each_with_index.map do |url, i|
348
+ unless url.is_a?(String)
349
+ raise ArgumentError,
350
+ "Parse::Embeddings::Voyage#embed_image sources[#{i}] is not a String " \
351
+ "(#{url.class}). v5.1 ships URL-only — bytes/IO support is v5.3."
352
+ end
353
+ Parse::Embeddings.validate_image_url!(url, allow_insecure: allow_insecure)
354
+ end
355
+
356
+ wire_input_type = INPUT_TYPE_WIRE_VALUES[input_type]
357
+ body = {
358
+ inputs: canonical_urls.map { |u|
359
+ { content: [{ type: "image_url", image_url: u }] }
360
+ },
361
+ model: @model,
362
+ truncation: @truncation,
363
+ }
364
+ body[:input_type] = wire_input_type if wire_input_type
365
+
366
+ instrument_embed(sources.length, input_type, modality: :image) do |emit_payload|
367
+ payload = post_embeddings(body, path: "multimodalembeddings")
368
+ if payload.is_a?(Hash) && payload["usage"].is_a?(Hash)
369
+ tt = payload["usage"]["total_tokens"]
370
+ emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
371
+ end
372
+ vectors = extract_vectors!(payload, sources.length)
373
+ validate_response!(sources.length, vectors)
374
+ end
375
+ end
376
+
275
377
  def inspect_attrs
276
378
  super.merge(base: safe_base_host, retries: @max_retries)
277
379
  end