RubyGems - parse-stack-next - Versions diffs - 5.0.0 → 5.1.0 - Mend

parse-stack-next 5.0.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
data/.bundle/config +2 -0
data/.github/ISSUE_TEMPLATE/bug_report.yml +105 -0
data/.github/ISSUE_TEMPLATE/feature_request.yml +67 -0
data/.github/dependabot.yml +13 -0
data/.github/workflows/codeql.yml +1 -1
data/.github/workflows/docs.yml +3 -3
data/.github/workflows/release.yml +43 -0
data/.github/workflows/ruby.yml +1 -1
data/.gitignore +1 -0
data/.vscode/settings.json +3 -0
data/.yardopts +19 -0
data/CHANGELOG.md +802 -0
data/Gemfile +3 -0
data/Gemfile.lock +8 -5
data/README.md +16 -1
data/Rakefile +5 -1
data/docs/acl_clp_guide.md +553 -0
data/docs/atlas_vector_search_guide.md +123 -22
data/docs/client_sdk_guide.md +201 -5
data/docs/usage_guide.md +21 -0
data/docs/yard-template/default/fulldoc/html/css/common.css +1222 -0
data/docs/yard-template/default/fulldoc/html/css/full_list.css +387 -0
data/lib/parse/agent/tools.rb +153 -1
data/lib/parse/cache/pool.rb +15 -0
data/lib/parse/cache/redis.rb +114 -2
data/lib/parse/client/caching.rb +18 -1
data/lib/parse/client.rb +79 -12
data/lib/parse/embeddings/cohere.rb +143 -6
data/lib/parse/embeddings/provider.rb +20 -2
data/lib/parse/embeddings/voyage.rb +102 -0
data/lib/parse/embeddings.rb +332 -1
data/lib/parse/live_query/client.rb +167 -4
data/lib/parse/live_query/configuration.rb +12 -0
data/lib/parse/live_query/subscription.rb +55 -2
data/lib/parse/live_query.rb +123 -1
data/lib/parse/lock.rb +342 -0
data/lib/parse/lock_backend.rb +308 -0
data/lib/parse/model/classes/audience.rb +5 -0
data/lib/parse/model/classes/installation.rb +122 -0
data/lib/parse/model/classes/job_schedule.rb +3 -1
data/lib/parse/model/classes/job_status.rb +4 -1
data/lib/parse/model/classes/push_status.rb +4 -1
data/lib/parse/model/classes/session.rb +7 -0
data/lib/parse/model/classes/user.rb +204 -0
data/lib/parse/model/core/create_lock.rb +28 -134
data/lib/parse/model/core/embed_managed.rb +162 -13
data/lib/parse/model/core/parse_reference.rb +17 -1
data/lib/parse/model/core/querying.rb +26 -2
data/lib/parse/model/file.rb +523 -18
data/lib/parse/query.rb +31 -1
data/lib/parse/stack/version.rb +1 -1
data/lib/parse/stack.rb +98 -1
data/parse-stack-next.gemspec +2 -2
metadata +19 -7

data/lib/parse/cache/redis.rb CHANGED Viewed

@@ -105,6 +105,72 @@ module Parse
         @pool.store(key, value, options)
       end
+      # Atomic SETNX. Required so `Parse::CreateLock` can acquire
+      # cross-process locks when this wrapper is the configured cache /
+      # `synchronize_create_store`. Returns `true` only when the key did
+      # not already exist.
+      def create(key, value, options = {})
+        @pool.create(key, value, options)
+      end
+      # Atomic counter increment. Forwarded for Moneta surface parity.
+      def increment(key, amount = 1, options = {})
+        @pool.increment(key, amount, options)
+      end
+      # Lua compare-and-delete: delete `key` only if its current value
+      # equals `expected`. Atomic on the Redis server (the GET, the
+      # compare, and the DEL are one script invocation), which closes the
+      # check-then-delete race in a naive GET-then-DEL release where the
+      # lease can expire and be re-acquired by another holder between the
+      # two commands.
+      LOCK_RELEASE_SCRIPT = <<~LUA
+        if redis.call('get', KEYS[1]) == ARGV[1] then
+          return redis.call('del', KEYS[1])
+        else
+          return 0
+        end
+      LUA
+      # Atomically acquire a lock: SET key=owner only if absent, with a
+      # native expiry. Used by {Parse::LockBackend} for {Parse::Lock} and
+      # {Parse::CreateLock}. Deliberately bypasses Moneta's `create` —
+      # `Moneta.new(:Redis)` marshals BOTH keys and values, so a raw-Redis
+      # compare-and-delete on the marshaled blob would be fragile and
+      # coupled to Moneta's serializer config. Routing acquire AND release
+      # through plain-string raw Redis here keeps one consistent encoding
+      # across both ends of the lock and makes the keys human-inspectable
+      # in Redis (`parse-stack:lock:v1:<digest>`). Lock keys are
+      # short-lived (TTL ≤ 30s) so there is no migration concern when a
+      # deploy flips between the Moneta-encoded and raw-encoded paths.
+      #
+      # @param key [String] plain-string lock key.
+      # @param owner [String] unique-per-acquisition owner token.
+      # @param ttl [Integer] seconds until the key self-clears.
+      # @return [Boolean] true when the key was set (lock acquired).
+      def lock_acquire(key, owner, ttl)
+        @pool.pool.with do |store|
+          redis = backend_client(store)
+          # redis-rb returns "OK" on success, nil when NX fails.
+          !!redis.set(key, owner, nx: true, ex: ttl)
+        end
+      end
+      # Atomically release a lock via compare-and-delete. Only the holder
+      # whose `owner` token still matches the stored value deletes the
+      # key — a holder whose lease already expired and was re-acquired by
+      # someone else is a no-op, never a cross-holder delete.
+      #
+      # @param key [String] plain-string lock key.
+      # @param owner [String] the owner token from {#lock_acquire}.
+      # @return [Boolean] true when this owner's key was deleted.
+      def lock_release(key, owner)
+        @pool.pool.with do |store|
+          redis = backend_client(store)
+          redis.eval(LOCK_RELEASE_SCRIPT, keys: [key], argv: [owner]).to_i == 1
+        end
+      end
       # Clear cached entries belonging to this wrapper. Required for
       # `Parse::Client#clear_cache!` compatibility.
       #
@@ -115,8 +181,22 @@ module Parse
       # the backing DB — same blast radius as previous versions, but
       # only for unnamespaced deployments. To opt into the wide
       # FLUSHDB explicitly (e.g. ops tooling), call {#flush_db!}.
-      def clear
-        if @namespace
+      #
+      # @param scope [String, nil] explicit namespace prefix to scan-delete.
+      #   When provided, overrides the wrapper's configured `@namespace` and
+      #   SCAN-deletes `<scope>:*` regardless of how the wrapper was built.
+      #   This is the safe escape hatch for tenants that share a non-
+      #   namespaced wrapper but still want to evict only their own keys
+      #   without `FLUSHDB`-ing siblings (and without wiping
+      #   `parse-stack:foc:v1:*` create-lock keys that live on the same DB).
+      #   The scope must be a non-empty String; the trailing `:` is added
+      #   automatically and any trailing `:` in the input is stripped so
+      #   `"tenant_x"` and `"tenant_x:"` are equivalent.
+      def clear(scope: nil)
+        if scope
+          prefix = validate_scope!(scope)
+          delete_keys_matching!("#{prefix}:*")
+        elsif @namespace
           delete_keys_matching!("#{@namespace}:*")
         else
           @pool.clear
@@ -185,6 +265,38 @@ module Parse
         s = ns.to_s.chomp(":")
         s.empty? ? nil : s
       end
+      # Validate a caller-supplied `scope:` for `clear(scope:)`. Returns the
+      # normalized prefix or raises ArgumentError. We enforce:
+      #
+      # - must be a String (Symbol / Integer / nil would silently `.to_s`
+      #   under `normalize_namespace` and expand the deletion target —
+      #   `scope: 0` would clear `0:*`)
+      # - must be non-empty after trimming a trailing `:`
+      # - must not contain Redis SCAN glob metacharacters (`*`, `?`, `[`,
+      #   `]`, `\`) — otherwise `scope: "*"` would SCAN-delete the whole
+      #   DB, defeating the whole point of having `flush_db!` as the
+      #   explicit wide-blast-radius escape hatch
+      # - must not contain a null byte (defense-in-depth against keys
+      #   crafted to terminate early in some Redis client paths)
+      GLOB_METACHARS = /[\*\?\[\]\\\x00]/.freeze
+      private_constant :GLOB_METACHARS
+      def validate_scope!(scope)
+        unless scope.is_a?(String)
+          raise ArgumentError, "scope: must be a String (got #{scope.class})"
+        end
+        prefix = scope.chomp(":")
+        if prefix.empty?
+          raise ArgumentError, "scope: must be a non-empty namespace string"
+        end
+        if prefix.match?(GLOB_METACHARS)
+          raise ArgumentError,
+                "scope: must not contain Redis SCAN glob characters (*, ?, [, ], \\, or NUL); " \
+                "use flush_db! for a full-DB flush"
+        end
+        prefix
+      end
     end
   end
 end

data/lib/parse/client/caching.rb CHANGED Viewed

@@ -143,6 +143,19 @@ module Parse
           @cache_key = "mk:#{@cache_key}" # prefix for master key requests
         end
+        # Optional ambient cache-tenant scope from `Parse.with_cache_tenant`.
+        # When present, composes between the configured namespace and the
+        # token/mk prefix as `T:<tenant>:` so a SCAN-delete over
+        # `<namespace>:T:<tenant>:*` evicts exactly one tenant, and
+        # `<namespace>:*` still evicts the whole namespace cleanly. The
+        # `T:` discriminator makes tenant prefixes unambiguously
+        # distinguishable from session-token hex prefixes (32-char hex)
+        # and from `mk:`, so legacy cache entries written before the
+        # tenant feature don't accidentally re-hydrate into a tenanted
+        # request and vice versa.
+        @cache_tenant = Parse.respond_to?(:current_cache_tenant) ? Parse.current_cache_tenant : nil
+        @cache_key = "T:#{@cache_tenant}:#{@cache_key}" if @cache_tenant
         # Namespace outermost so a SCAN over `<namespace>:*` evicts a whole
         # tenant/app cleanly without touching another app's entries.
         @cache_key = "#{@namespace}:#{@cache_key}" if @namespace
@@ -277,7 +290,11 @@ module Parse
       # @!visibility private
       def instrument_cache(event, **extra)
         return unless defined?(ActiveSupport::Notifications)
-        payload = { event: event, namespace: @namespace }.merge!(extra)
+        payload = {
+          event: event,
+          namespace: @namespace,
+          cache_tenant: @cache_tenant,
+        }.merge!(extra)
         ActiveSupport::Notifications.instrument("parse.cache.#{event}", payload)
       end

data/lib/parse/client.rb CHANGED Viewed

@@ -678,6 +678,12 @@ module Parse
     end
     private :validate_faraday_opts!
+    # Hosts considered "loopback" for the cleartext-ws:// guard in
+    # {#configure_live_query}. Mirrors
+    # {Parse::LiveQuery::Client::LOOPBACK_HOSTS} so the explicit-URL
+    # path and the derived-URL path agree on what counts as local.
+    LIVE_QUERY_LOOPBACK_HOSTS = %w[localhost 127.0.0.1 ::1 [::1] 0.0.0.0].freeze
     # Configure LiveQuery with the given options
     # @param opts [Hash] configuration options
     # @option opts [String] :live_query_url WebSocket URL for LiveQuery server (wss://...)
@@ -690,14 +696,74 @@ module Parse
       require_relative "live_query"
       live_query_opts = opts[:live_query].is_a?(Hash) ? opts[:live_query] : {}
+      resolved_url = live_query_url || live_query_opts[:url]
+      # Refuse explicit `ws://` against a non-loopback host unless
+      # `allow_insecure: true` is also passed in `live_query:`. The
+      # downstream `derive_websocket_url` path already enforces this for
+      # URLs derived from a Parse Server `http://` URL, but an explicit
+      # `live_query: { url: "ws://prod-host" }` or
+      # `live_query_url: "ws://prod-host"` bypassed it — the master key
+      # and any session token would ride the connect frame in cleartext.
+      validate_live_query_url!(resolved_url, allow_insecure: live_query_opts[:allow_insecure])
+      # Warn (don't raise) on `live_query: { ... }` keys that are not
+      # `Parse::LiveQuery::Configuration` setters. The block form would
+      # otherwise silently swallow typos like
+      # `live_query: { ssl_min_versoin: :TLSv1_3 }` and leave TLS at the
+      # default, losing the operator's intent. The pre-fix kwargs form
+      # raised `ArgumentError` here; this restores the surface without
+      # making it a hard failure for unknown-but-harmless keys.
+      warn_about_unknown_live_query_keys!(live_query_opts)
+      Parse::LiveQuery.configure do |config|
+        config.application_id = @application_id if @application_id
+        config.client_key = @api_key if @api_key
+        config.master_key = @master_key if @master_key
+        # Apply hash-form options first so the resolved URL (which honors
+        # top-level `live_query_url:` over `live_query: { url: }`) wins.
+        # Without this, the loop would re-write `config.url` from the
+        # hash and silently invert the documented precedence.
+        live_query_opts.each do |key, value|
+          next if key == :url
+          setter = "#{key}="
+          config.public_send(setter, value) if config.respond_to?(setter)
+        end
+        config.url = resolved_url if resolved_url
+      end
+    end
-      Parse::LiveQuery.configure(
-        url: live_query_url || live_query_opts[:url],
-        application_id: @application_id,
-        client_key: @api_key,
-        master_key: @master_key,
-        **live_query_opts,
-      )
+    # @api private
+    def validate_live_query_url!(url, allow_insecure:)
+      return unless url.is_a?(String) && url.start_with?("ws://")
+      host = URI.parse(url).host.to_s rescue ""
+      return if LIVE_QUERY_LOOPBACK_HOSTS.include?(host)
+      return if allow_insecure
+      raise ArgumentError,
+        "[Parse::Client] Refusing explicit insecure LiveQuery URL #{url.inspect}. " \
+        "The connect frame carries the master key and any session token in " \
+        "plaintext on this socket. Use wss:// for routable hosts, or pass " \
+        "`live_query: { allow_insecure: true }` to opt into cleartext for " \
+        "local development on a non-loopback address."
+    end
+    # @api private
+    def warn_about_unknown_live_query_keys!(live_query_opts)
+      return unless live_query_opts.is_a?(Hash) && live_query_opts.any?
+      probe = Parse::LiveQuery::Configuration.new
+      unknown = live_query_opts.keys.reject { |k| probe.respond_to?("#{k}=") }
+      return if unknown.empty?
+      warn "[Parse::Client] Ignoring unknown live_query option(s): " \
+           "#{unknown.inspect}. Valid keys are Parse::LiveQuery::Configuration " \
+           "setters (url, application_id, client_key, master_key, ping_interval, " \
+           "pong_timeout, allow_insecure, ssl_min_version, ssl_max_version, " \
+           "logging_enabled, log_level, ...). Check for typos."
     end
     # If set, returns the current retry count for this instance. Otherwise,
@@ -1026,11 +1092,12 @@ module Parse
   # @return (see Parse::Client.setup)
   # @see Parse::Client.setup
   def self.setup(opts = {}, &block)
-    if block_given?
-      Parse::Client.new(opts, &block)
-    else
-      Parse::Client.new(opts)
-    end
+    # Delegate to Parse::Client.setup so repeated Parse.setup calls overwrite
+    # the registered :default client. Going through Parse::Client.new instead
+    # would hit the `@clients[:default] ||= self` guard inside #initialize and
+    # silently keep the first-registered client, while Parse::Client.setup
+    # uses `=` and replaces it. Both entry points must behave identically.
+    Parse::Client.setup(opts, &block)
   end
   # @!visibility private

data/lib/parse/embeddings/cohere.rb CHANGED Viewed

@@ -14,9 +14,13 @@ module Parse
     #
     # * **v4** — `embed-v4.0` (1536 native, Matryoshka {256, 512, 1024,
     #   1536}, 128k-token context). Unified text + image model at the
-    #   network boundary; this provider exposes the text-input path
-    #   only — image inputs will land in v5.1 alongside the
-    #   {Provider#embed_image} hook.
+    #   network boundary. The text path uses Cohere's `/v1/embed`
+    #   endpoint; the image path ({#embed_image}, v5.1+) uses the
+    #   `/v2/embed` multimodal endpoint with OpenAI-style
+    #   `{ type: "image_url", image_url: { url: ... } }` content rows.
+    #   Text vectors stored today share the vector space with the
+    #   eventual image vectors (no re-embed required when adding
+    #   image-side data).
     # * **v3** — `embed-english-v3.0`, `embed-multilingual-v3.0` (both
     #   1024-dim), `embed-english-light-v3.0`,
     #   `embed-multilingual-light-v3.0` (both 384-dim). Text-only.
@@ -94,6 +98,10 @@ module Parse
       # models reject the field with a 400.
       MATRYOSHKA_MODELS = %w[embed-v4.0].freeze
+      # Models that accept image inputs via the `/v2/embed` multimodal
+      # endpoint. Currently only `embed-v4.0` — v3 is text-only.
+      MULTIMODAL_MODELS = %w[embed-v4.0].freeze
       # Allowed Matryoshka widths per model (Cohere quantizes the
       # available truncations rather than accepting any integer ≤
       # native). Empty allowlist = any integer ≤ native is fine, but
@@ -246,6 +254,105 @@ module Parse
         end
       end
+      # @return [Array<Symbol>] `[:text, :image]` for `embed-v4.0`,
+      #   `[:text]` for v3 models.
+      def modalities
+        MULTIMODAL_MODELS.include?(@model) ? %i[text image] : [:text]
+      end
+      # Embed a batch of image URLs through Cohere's `/v2/embed`
+      # multimodal endpoint. v5.1 ships URL-only — the provider
+      # receives a public URL and issues its own fetch. The SDK does
+      # NOT download the image; it validates the URL through
+      # {Parse::Embeddings.validate_image_url!} (sentinel-gated egress
+      # opt-in, CIDR / port / host allowlist) and forwards the
+      # canonicalized URL string in the `{ type: "image_url",
+      # image_url: { url: ... } }` content row.
+      #
+      # **Multimodal model required.** Cohere's v3 models do not accept
+      # image inputs; calling `embed_image` on a v3-configured provider
+      # raises {BadRequestError} before any network call.
+      #
+      # **Wire shape differs from {Voyage#embed_image}.** Voyage uses
+      # `{ type: "image_url", image_url: "<url>" }` (flat String); Cohere
+      # v2 uses `{ type: "image_url", image_url: { url: "<url>" } }`
+      # (nested object), matching the OpenAI chat-completions content
+      # convention. The high-level SDK contract is identical — callers
+      # pass an `Array<String>` of URLs.
+      #
+      # @param sources [Array<String>] image URLs. Each must satisfy
+      #   {Parse::Embeddings.validate_image_url!}; failing entries
+      #   abort the whole batch (no partial forwarding).
+      # @param input_type [Symbol] one of {INPUT_TYPE_WIRE_VALUES}'s
+      #   keys; mapped to Cohere's `input_type` field. Defaults to
+      #   `:search_document`.
+      # @param allow_insecure [Boolean] forwarded to the URL validator;
+      #   permit `http://` for local-dev CDN proxies.
+      # @return [Array<Array<Float>>] vectors aligned 1:1 with `sources`.
+      def embed_image(sources, input_type: :search_document, allow_insecure: false)
+        unless MULTIMODAL_MODELS.include?(@model)
+          raise BadRequestError,
+                "Parse::Embeddings::Cohere#embed_image: model #{@model.inspect} does not " \
+                "accept image inputs. Configure the provider with a multimodal model " \
+                "(supported: #{MULTIMODAL_MODELS.inspect})."
+        end
+        unless sources.is_a?(Array)
+          raise ArgumentError,
+                "Parse::Embeddings::Cohere#embed_image expects Array of image URLs " \
+                "(got #{sources.class})."
+        end
+        return [] if sources.empty?
+        wire_input_type = INPUT_TYPE_WIRE_VALUES[input_type]
+        unless wire_input_type
+          raise ArgumentError,
+                "Parse::Embeddings::Cohere#embed_image input_type #{input_type.inspect} not in " \
+                "#{INPUT_TYPE_WIRE_VALUES.keys.inspect}."
+        end
+        # Cohere caps `/v2/embed` at the same 96-input per-call limit
+        # as `/v1/embed`. Guard direct-API callers against a silent
+        # 400 — the DSL passes a single URL per directive.
+        if sources.length > @embed_batch_size
+          raise ArgumentError,
+                "Parse::Embeddings::Cohere#embed_image: batch size #{sources.length} exceeds " \
+                "the configured cap #{@embed_batch_size} (Cohere per-request max: 96). " \
+                "Split the input and call embed_image once per chunk."
+        end
+        # Validate every URL up-front so a malformed entry in slot N
+        # does not slip through after slots 0..N-1 are already in the
+        # wire body. Forward the canonicalized URL the validator
+        # returned — not the caller's raw input.
+        canonical_urls = sources.each_with_index.map do |url, i|
+          unless url.is_a?(String)
+            raise ArgumentError,
+                  "Parse::Embeddings::Cohere#embed_image sources[#{i}] is not a String " \
+                  "(#{url.class}). v5.1 ships URL-only — bytes/IO support is v5.3."
+          end
+          Parse::Embeddings.validate_image_url!(url, allow_insecure: allow_insecure)
+        end
+        body = {
+          model: @model,
+          input_type: wire_input_type,
+          embedding_types: ["float"],
+          inputs: canonical_urls.map { |u|
+            { content: [{ type: "image_url", image_url: { url: u } }] }
+          },
+        }
+        instrument_embed(sources.length, input_type, modality: :image) do |emit_payload|
+          payload = post_embeddings(body, path: v2_embed_path)
+          if payload.is_a?(Hash) && payload["meta"].is_a?(Hash) &&
+             payload["meta"]["billed_units"].is_a?(Hash)
+            tt = payload["meta"]["billed_units"]["input_tokens"]
+            emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
+          end
+          vectors = extract_vectors!(payload, sources.length)
+          validate_response!(sources.length, vectors)
+        end
+      end
       def inspect_attrs
         super.merge(base: safe_base_host, retries: @max_retries)
       end
@@ -272,12 +379,42 @@ module Parse
         conn
       end
-      def post_embeddings(body)
+      # @api private
+      # Compute the v2/embed path relative to the configured base_url's
+      # path component. For the default base `https://api.cohere.com/v1`
+      # this produces `/v2/embed`; for a custom-proxy base like
+      # `https://corp-proxy.example.com/cohere/v1` it produces
+      # `/cohere/v2/embed` — so the operator's proxy / egress-logging
+      # / API-key custody layer is NOT silently bypassed by image
+      # embedding calls. The substitution targets the trailing `/v1`
+      # segment specifically; bases without that segment fall back to
+      # appending `/v2/embed` to the host root with a warning so the
+      # caller sees the asymmetry rather than discovering it via a
+      # 404 from a misrouted request.
+      def v2_embed_path
+        uri = URI.parse(@base_url)
+        path = uri.path.to_s
+        if path =~ %r{/v1/?\z}i
+          # Replace `/v1` (with optional trailing slash) with `/v2/embed`.
+          path.sub(%r{/v1/?\z}i, "/v2/embed")
+        else
+          warn "[Parse::Embeddings::Cohere] base_url path #{path.inspect} does not end " \
+               "in `/v1` — embed_image will POST to host-root `/v2/embed`, which may " \
+               "bypass a configured proxy path. Configure base_url to end with `/v1`."
+          "/v2/embed"
+        end
+      end
+      # `path:` accepts either a Faraday-relative segment (default
+      # `"embed"`, which resolves under the configured `/v1/` base) or
+      # an absolute path (`"/v2/embed"`) for endpoints outside the
+      # configured base — used by {#embed_image} to reach `/v2/embed`.
+      def post_embeddings(body, path: "embed")
         attempts = 0
         loop do
           attempts += 1
           begin
-            response = @connection.post("embed") do |req|
+            response = @connection.post(path) do |req|
               req.body = body.to_json
             end
           rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
@@ -312,7 +449,7 @@ module Parse
             next
           end
           raise BadRequestError,
-                "Parse::Embeddings::Cohere: #{status} from POST /embed."
+                "Parse::Embeddings::Cohere: #{status} from POST #{path.start_with?('/') ? path : "/#{path}"}."
         end
       end

data/lib/parse/embeddings/provider.rb CHANGED Viewed

@@ -41,14 +41,32 @@ module Parse
       # @param sources [Array<URI, IO, String>] image sources — URI for
       #   remote, IO for streamed bytes, String for base64. Concrete
-      #   providers document which forms they accept.
+      #   providers document which forms they accept. In v5.1 (URL-only
+      #   path), every source is a raw `String` URL forwarded unchanged
+      #   from the managed path: {Parse::Core::EmbedManaged} deliberately
+      #   does NOT validate before calling the provider (validating there
+      #   would double-resolve every URL). The concrete `embed_image`
+      #   override is therefore responsible for calling
+      #   {Parse::Embeddings.validate_image_url!} (passing `allow_insecure:`
+      #   through) before egress — see the bundled Voyage/Cohere providers,
+      #   which validate internally.
       # @param input_type [Symbol] `:search_query` or `:search_document`,
       #   parallel to {#embed_text}.
+      # @param allow_insecure [Boolean] **contract kwarg** —
+      #   {Parse::Core::EmbedManaged.recompute_embedding!} unconditionally
+      #   forwards this from the directive declaration. Concrete
+      #   `embed_image` overrides MUST either accept `allow_insecure:`
+      #   explicitly (passing it through to
+      #   {Parse::Embeddings.validate_image_url!}) or absorb it via
+      #   `**opts`. Dropping `**opts` from the override signature
+      #   without accepting `allow_insecure:` will raise
+      #   `ArgumentError: unknown keyword: allow_insecure` from the
+      #   managed-embedding save path. Default `false`.
       # @param opts [Hash] provider-specific options (e.g. `dim:` for
       #   Matryoshka-style truncation). Forward-compatible escape hatch.
       # @return [Array<Array<Float>>] vectors aligned 1:1 with `sources`.
       # @raise [NotImplementedError] image embedding is a v5.1+ feature.
-      def embed_image(sources, input_type: :search_document, **opts)
+      def embed_image(sources, input_type: :search_document, allow_insecure: false, **opts)
         raise NotImplementedError, "#{self.class} does not support image embedding"
       end

data/lib/parse/embeddings/voyage.rb CHANGED Viewed

@@ -272,6 +272,108 @@ module Parse
         end
       end
+      # @return [Array<Symbol>] Voyage's multimodal models support
+      #   `[:text, :image]`; text-only models report `[:text]`.
+      def modalities
+        MULTIMODAL_MODELS.include?(@model) ? %i[text image] : [:text]
+      end
+      # Embed a batch of image URLs through Voyage's
+      # `/v1/multimodalembeddings` endpoint. v5.1 ships URL-only — the
+      # provider receives a public URL and issues its own fetch. The
+      # SDK does NOT download the image; it validates the URL through
+      # {Parse::Embeddings.validate_image_url!} (CIDR / port / host
+      # allowlist, sentinel-gated egress opt-in) and forwards the
+      # canonicalized URL string in the `{ type: "image_url",
+      # image_url: ... }` content row.
+      #
+      # **Multimodal model required.** Voyage's text-only models
+      # (`voyage-3`, `voyage-4`, etc.) do not accept image inputs;
+      # calling `embed_image` on a provider configured with one of
+      # those raises {BadRequestError} before any network call.
+      #
+      # **Bytes-fetch path is v5.3.** A future `bytes:` option will
+      # download via {Parse::File.safe_open_url}, MIME-sniff the
+      # leading bytes, optionally EXIF-strip, and forward as
+      # base64. URL-only ships first because it sidesteps EXIF /
+      # MIME-confusion class issues entirely.
+      #
+      # @param sources [Array<String>] image URLs. Each must satisfy
+      #   {Parse::Embeddings.validate_image_url!} — failing entries
+      #   raise the corresponding {Parse::Embeddings::InvalidImageURL}
+      #   / {Parse::Embeddings::ConfirmationRequired} and ABORT the
+      #   whole batch (no partial forwarding).
+      # @param input_type [Symbol] one of {INPUT_TYPE_WIRE_VALUES}'s
+      #   keys; mapped to Voyage's `input_type` field. Defaults to
+      #   `:search_document`.
+      # @param allow_insecure [Boolean] forwarded to the URL
+      #   validator; permit `http://` for local-dev CDN proxies.
+      # @return [Array<Array<Float>>] vectors aligned 1:1 with `sources`.
+      def embed_image(sources, input_type: :search_document, allow_insecure: false)
+        unless MULTIMODAL_MODELS.include?(@model)
+          raise BadRequestError,
+                "Parse::Embeddings::Voyage#embed_image: model #{@model.inspect} does not " \
+                "accept image inputs. Configure the provider with a multimodal model " \
+                "(supported: #{MULTIMODAL_MODELS.inspect})."
+        end
+        unless sources.is_a?(Array)
+          raise ArgumentError,
+                "Parse::Embeddings::Voyage#embed_image expects Array of image URLs " \
+                "(got #{sources.class})."
+        end
+        return [] if sources.empty?
+        unless INPUT_TYPE_WIRE_VALUES.key?(input_type)
+          raise ArgumentError,
+                "Parse::Embeddings::Voyage#embed_image input_type #{input_type.inspect} not in " \
+                "#{INPUT_TYPE_WIRE_VALUES.keys.inspect}."
+        end
+        # Voyage caps multimodal requests at the same per-request size
+        # as the text endpoint. The text path goes through
+        # `embed_text_batched` which chunks automatically; the image
+        # path has no chunker yet (every directive is a single URL in
+        # v5.1), so guard the direct-API caller against a silent 400.
+        if sources.length > @embed_batch_size
+          raise ArgumentError,
+                "Parse::Embeddings::Voyage#embed_image: batch size #{sources.length} exceeds " \
+                "the configured cap #{@embed_batch_size} (Voyage per-request max: 128). " \
+                "Split the input and call embed_image once per chunk."
+        end
+        # Validate every URL up-front so a malformed entry in slot N
+        # does not get past validation while slots 0..N-1 are already
+        # in the wire body. The validator returns the canonicalized
+        # URL — we forward exactly that, not the caller's raw input.
+        canonical_urls = sources.each_with_index.map do |url, i|
+          unless url.is_a?(String)
+            raise ArgumentError,
+                  "Parse::Embeddings::Voyage#embed_image sources[#{i}] is not a String " \
+                  "(#{url.class}). v5.1 ships URL-only — bytes/IO support is v5.3."
+          end
+          Parse::Embeddings.validate_image_url!(url, allow_insecure: allow_insecure)
+        end
+        wire_input_type = INPUT_TYPE_WIRE_VALUES[input_type]
+        body = {
+          inputs: canonical_urls.map { |u|
+            { content: [{ type: "image_url", image_url: u }] }
+          },
+          model: @model,
+          truncation: @truncation,
+        }
+        body[:input_type] = wire_input_type if wire_input_type
+        instrument_embed(sources.length, input_type, modality: :image) do |emit_payload|
+          payload = post_embeddings(body, path: "multimodalembeddings")
+          if payload.is_a?(Hash) && payload["usage"].is_a?(Hash)
+            tt = payload["usage"]["total_tokens"]
+            emit_payload[:total_tokens] = tt if tt.is_a?(Integer) && tt >= 0
+          end
+          vectors = extract_vectors!(payload, sources.length)
+          validate_response!(sources.length, vectors)
+        end
+      end
       def inspect_attrs
         super.merge(base: safe_base_host, retries: @max_retries)
       end