RubyGems - parse-stack-next - Versions diffs - 5.4.1 → 5.5.0 - Mend

parse-stack-next 5.4.1 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +344 -0
data/Gemfile.lock +1 -1
data/README.md +45 -6
data/docs/atlas_vector_search_guide.md +314 -19
data/lib/parse/api/users.rb +10 -0
data/lib/parse/client.rb +19 -1
data/lib/parse/embeddings/batch_embedder.rb +188 -0
data/lib/parse/embeddings/cache.rb +322 -0
data/lib/parse/embeddings/cohere.rb +31 -18
data/lib/parse/embeddings/image_fetch.rb +347 -0
data/lib/parse/embeddings/provider.rb +17 -11
data/lib/parse/embeddings/spend_cap.rb +117 -3
data/lib/parse/embeddings/voyage.rb +34 -25
data/lib/parse/embeddings.rb +40 -3
data/lib/parse/model/acl.rb +15 -11
data/lib/parse/model/core/embed_managed.rb +243 -14
data/lib/parse/model/core/vector_searchable.rb +157 -8
data/lib/parse/query/constraint.rb +22 -0
data/lib/parse/query/constraints.rb +271 -250
data/lib/parse/query.rb +233 -42
data/lib/parse/retrieval/agent_tool.rb +21 -14
data/lib/parse/retrieval/retriever.rb +84 -0
data/lib/parse/schema/search_index_migrator.rb +48 -1
data/lib/parse/stack/version.rb +1 -1
data/lib/parse/vector_search/hybrid.rb +39 -1
data/lib/parse/vector_search.rb +34 -0
data/lib/parse/webhooks/payload.rb +7 -1
data/lib/parse/webhooks.rb +107 -21
metadata +4 -1

data/lib/parse/vector_search/hybrid.rb CHANGED Viewed

@@ -372,6 +372,17 @@ module Parse
             if pointer_fields
               rows = Parse::CLPScope.filter_by_pointer_fields(rows, pointer_fields, resolution.user_id)
             end
+            # NEW-VEC-1: the `$rankFusion` meta score is materialized
+            # BEFORE the ACL `$match`, so a surviving row's raw
+            # `_hybrid_score` encodes its rank among rows the caller
+            # cannot read — a cross-ACL inference channel for scoped
+            # callers probing with crafted queries. Recompute the
+            # surfaced score from the POST-filter ordering (the rows are
+            # already sorted by the true fused score, so relative order
+            # is preserved); the new value is a function of visible rows
+            # only. The client-side RRF path is unaffected — it ranks
+            # from already-filtered branch results.
+            recompute_scores_from_visible_order!(rows, k_constant: k_constant, weights: weights)
           end
           rows.map! { |doc| Parse::PipelineSecurity.strip_internal_fields(doc) }
           rows
@@ -443,10 +454,20 @@ module Parse
         # recognized-but-misused `$rankFusion` (or an unrelated auth/parse
         # error) is treated as supported and surfaces its real error on the
         # actual query rather than silently disabling native fusion.
+        #
+        # Deliberately narrow (NEW-VEC-2): a broad phrase like
+        # "is not allowed" also appears in MongoDB authorization errors
+        # ("not allowed to execute command aggregate"), which combined
+        # with the stage name in the message would misclassify an
+        # auth-failing cluster and cache the wrong probe verdict for
+        # PROBE_CACHE_TTL. Only phrases that unambiguously mean
+        # "this stage name is unknown to the parser" belong here; any
+        # other failure falls through to "supported" and the real query
+        # surfaces the real error (with the client path as fallback).
         UNSUPPORTED_STAGE_FRAGMENTS = [
           "unrecognized pipeline stage name",
           "unknown aggregation stage",
-          "is not allowed",
+          "unknown stage",
         ].freeze
         private_constant :UNSUPPORTED_STAGE_FRAGMENTS
@@ -455,6 +476,23 @@ module Parse
           msg.include?("rankfusion") && UNSUPPORTED_STAGE_FRAGMENTS.any? { |f| msg.include?(f) }
         end
+        # @!visibility private
+        # Replace each visible row's `_hybrid_score` with an RRF-shaped
+        # score derived from its position AMONG VISIBLE ROWS:
+        # `Σ_b weight_b / (k_constant + visible_rank)`. Monotone with the
+        # original fused order (input is already score-sorted), but
+        # carries no information about how many hidden rows ranked above
+        # or between the visible ones. See NEW-VEC-1.
+        def recompute_scores_from_visible_order!(rows, k_constant:, weights:)
+          w = weights ? symbolize(weights) : nil
+          total_weight = weight_for(w, :lexical).to_f + weight_for(w, :vector).to_f
+          rows.each_with_index do |doc, i|
+            next unless doc.is_a?(Hash)
+            doc["_hybrid_score"] = total_weight / (k_constant + i + 1)
+          end
+          rows
+        end
         # -- probe cache -------------------------------------------------
         PROBE_MUTEX_INIT = Mutex.new

data/lib/parse/vector_search.rb CHANGED Viewed

@@ -95,7 +95,41 @@ module Parse
     # one. Atlas's guidance: numCandidates ≥ 10 × limit, ≤ 10_000.
     DEFAULT_NUM_CANDIDATES_MULTIPLIER = 20
+    # Accepted {.index_drift_policy} values.
+    INDEX_DRIFT_POLICIES = %i[warn raise ignore].freeze
     class << self
+      # Policy applied when first-query index verification (see
+      # {Parse::Core::VectorSearchable}) finds the deployed Atlas
+      # vectorSearch index disagreeing with the model declaration —
+      # wrong `numDimensions`, wrong `similarity`, or a tenant-scope
+      # field missing from the index's `filter` paths.
+      #
+      # * `:warn` (default) — emit a `[Parse::VectorSearch:DRIFT]`
+      #   warning once per (class, field, index) and continue. Drift
+      #   usually means the index predates a model change; queries
+      #   still run but return degraded or wrongly-scoped results.
+      # * `:raise` — fail the query with
+      #   {Parse::Core::VectorSearchable::IndexDriftError}. Strict mode
+      #   for deployments that treat drift as a release blocker.
+      # * `:ignore` — skip verification entirely.
+      #
+      # @param value [Symbol]
+      # @return [Symbol]
+      def index_drift_policy=(value)
+        v = value.respond_to?(:to_sym) ? value.to_sym : nil
+        unless v && INDEX_DRIFT_POLICIES.include?(v)
+          raise ArgumentError,
+                "Parse::VectorSearch.index_drift_policy must be one of " \
+                "#{INDEX_DRIFT_POLICIES.inspect} (got #{value.inspect})."
+        end
+        @index_drift_policy = v
+      end
+      # @return [Symbol] current drift policy (default `:warn`).
+      def index_drift_policy
+        @index_drift_policy ||= :warn
+      end
       # Low-level `$vectorSearch` entry point.
       #
       # @param collection_name [String] Parse class name / Mongo

data/lib/parse/webhooks/payload.rb CHANGED Viewed

@@ -740,7 +740,13 @@ module Parse
       # callback handling based on the request origin.
       # @return [Boolean] true if the request originated from Ruby Parse Stack
       def ruby_initiated?
-        @ruby_initiated ||= begin
+        # Stable memoization: a plain `||=` re-derives whenever the stored value
+        # is `false`, so a previously-computed (or externally-stamped, e.g. by
+        # Parse::Webhooks.call_route) `false` would be recomputed on every call
+        # and could disagree with the stamping caller. Cache on `defined?` so a
+        # `false` result is memoized exactly once and never silently re-derived.
+        return @ruby_initiated if defined?(@ruby_initiated) && !@ruby_initiated.nil?
+        @ruby_initiated = begin
             request_id = nil
             if @raw.respond_to?(:[])

data/lib/parse/webhooks.rb CHANGED Viewed

@@ -455,34 +455,114 @@ module Parse
         end
         if type == :after_save && payload&.parse_object.present? && payload.parse_object.is_a?(Parse::Object)
-          # Handle after_save callbacks intelligently based on request origin.
-          # For trusted-Ruby-initiated saves (both `_RB_` header AND master
-          # key), Parse Stack's local `run_callbacks :save` will fire
-          # after_create and after_save callbacks after the REST response
-          # returns; firing them again here would double-fire any side
-          # effect (e.g. an `after_save :send_email` would send two emails
-          # per save). For everything else -- client-initiated saves, or a
-          # spoofed `_RB_` from a non-master client -- Parse Stack never had
-          # a chance to run callbacks, so we fire them here.
+          # The chained ActiveModel after_save/after_create callbacks are NOT
+          # fired here. `call!` dispatches every trigger twice -- once for the
+          # specific class route and once for the generic `"*"` route -- so
+          # firing the model callbacks inside this per-route block double-fired
+          # them for any app that registered BOTH a class route and a `"*"`
+          # route (e.g. an `after_save :send_email` would send two emails per
+          # save). The dispatch now lives in `run_after_save_chain`, which
+          # `call!` invokes exactly once per delivery after both route calls.
           #
-          # The decision depends ONLY on request origin, never on what the
-          # handler returned. Parse Server discards the afterSave response
-          # body entirely (it resolves {success} even if the handler throws),
-          # so a handler that returns the parse_object -- the recommended
-          # before_save pattern, easy to copy by mistake -- must NOT silently
-          # suppress these callbacks. We normalize the result to `true` below
-          # so a returned object never leaks into the response or the log.
-          is_new = payload.original.nil?
-          unless trusted_ruby_initiated
-            payload.parse_object.run_after_create_callbacks if is_new
-            payload.parse_object.run_after_save_callbacks
-          end
+          # We still normalize the result to `true` so a handler that returned
+          # the parse_object (the recommended before_save pattern, easy to copy
+          # by mistake) never leaks an object into the response or the log.
           result = true
         end
         result
       end
+      # Fires the chained ActiveModel after_save (and after_create, for a new
+      # object) callbacks for an afterSave delivery -- exactly once per request.
+      #
+      # This lives in `call!` rather than `call_route` because `call!` dispatches
+      # every trigger twice (the specific class route AND the generic `"*"`
+      # route). Firing the model callbacks per-route would double-fire any side
+      # effect for an app that registered both routes. Calling this once, after
+      # both route calls, fires the chain exactly once regardless of how many
+      # routes matched.
+      #
+      # The decision to fire depends ONLY on request origin, never on what a
+      # handler returned: Parse Server discards the afterSave response body
+      # entirely, so a handler returning the parse_object must not suppress the
+      # callbacks. For trusted-Ruby-initiated saves (both the `_RB_` request-id
+      # header AND the master key) Parse Stack's local `run_callbacks :save`
+      # already fires these after the REST response returns, so we skip them
+      # here to avoid the double-fire. The route-present guard preserves the
+      # "an unregistered afterSave trigger never fires model callbacks" contract
+      # that `call_route`'s early return used to provide.
+      #
+      # @param payload [Parse::Webhooks::Payload] the afterSave payload.
+      # @return [void]
+      def run_after_save_chain(payload)
+        return unless payload&.after_save?
+        return unless payload.parse_object.is_a?(Parse::Object)
+        # Preserve the "no registered route => no model callbacks" behavior that
+        # call_route's `return unless routes[type][className].present?` enforced.
+        # Mirror that guard exactly: key on parse_class.to_s (as call_route does)
+        # and use `.present?` on the value -- registration stores an Array, and an
+        # empty/absent registration must NOT fire (matching the original).
+        after_save_routes = routes[:after_save]
+        return unless after_save_routes &&
+                      (after_save_routes[payload.parse_class.to_s].present? ||
+                       after_save_routes["*"].present?)
+        # Trusted-Ruby-initiated saves run their callbacks locally; firing again
+        # here would double them. This must match call_route's trusted_ruby_initiated
+        # EXACTLY. call_route runs (and stamps @ruby_initiated) before this for any
+        # matched route, so read that stamped value rather than recomputing via
+        # `ruby_initiated?` -- whose `||=` memoization re-derives on a stamped
+        # `false` and could disagree with call_route's header lookup.
+        return if payload.ruby_initiated? && payload.master? == true
+        # By the time afterSave fires the object is ALREADY persisted in Parse
+        # Server, and Parse Server discards the afterSave response body entirely
+        # (it resolves success even if the handler throws). So a chained callback
+        # that raises must not (a) 500 the webhook endpoint -- `call!`'s rescue
+        # only catches ResponseError / ValidationError, so a bare StandardError
+        # would escape -- nor (b) take out the OTHER phase's unrelated side
+        # effects. Run the after_create and after_save phases independently, each
+        # guarded, logging and swallowing any StandardError. This mirrors Parse's
+        # own afterSave semantics (log-and-continue on a post-persist failure):
+        # a raising `after_create :send_welcome_email` no longer silently skips
+        # an unrelated `after_save :reindex`, and neither can crash the endpoint.
+        obj = payload.parse_object
+        run_after_save_phase(obj, :after_create) if payload.original.nil?
+        run_after_save_phase(obj, :after_save)
+        nil
+      end
+      # Runs one phase (:after_create or :after_save) of an afterSave object's
+      # chained ActiveModel callbacks, swallowing and logging any StandardError
+      # so a post-persist callback failure can't crash the webhook endpoint or
+      # suppress the sibling phase. ActiveModel still halts the rest of *this*
+      # phase's chain on a raise -- only the cross-phase / endpoint blast radius
+      # is contained here. Note this also swallows a ResponseError/ValidationError
+      # raised from inside an after_save callback: afterSave is post-persist and
+      # Parse Server discards the response body, so an `error!` there cannot deny
+      # the (already-committed) write -- it is logged, not propagated.
+      # @param obj [Parse::Object] the persisted afterSave object.
+      # @param phase [Symbol] :after_create or :after_save.
+      # @return [void]
+      def run_after_save_phase(obj, phase)
+        case phase
+        when :after_create then obj.run_after_create_callbacks
+        when :after_save then obj.run_after_save_callbacks
+        end
+        nil
+      rescue => e
+        # Redact the exception message before logging: a callback error can echo
+        # record contents/tokens, and the rest of this file routes log output
+        # through the same redactor.
+        warn "[Parse::Webhooks] afterSave #{phase} callback raised for " \
+             "#{obj.class}##{obj.id} -- the object is already persisted; " \
+             "logging and continuing: #{e.class}: " \
+             "#{Parse::Middleware::BodyBuilder.redact(e.message)}"
+        nil
+      end
       # Generates a success response for Parse Server.
       # @param data [Object] the data to send back with the success.
       # @return [Hash] a success data payload
@@ -688,6 +768,12 @@ module Parse
             # call hooks subscribed to any class route
             generic_result = Parse::Webhooks.call_route(payload.trigger_name, "*", payload)
             result = generic_result if generic_result.present? && result.nil?
+            # Fire the chained ActiveModel after_save/after_create callbacks
+            # exactly once per delivery -- after BOTH route calls above -- so an
+            # app that registers both a class route and a `"*"` route doesn't
+            # double-fire them. No-op for every non-afterSave trigger.
+            Parse::Webhooks.run_after_save_chain(payload)
           else
             if self.logging.present?
               puts "[Webhooks] --> Could not find mapping route for " \

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: parse-stack-next
 version: !ruby/object:Gem::Version
-  version: 5.4.1
+  version: 5.5.0
 platform: ruby
 authors:
 - Adrian Curtin
@@ -319,8 +319,11 @@ files:
 - lib/parse/clp_scope.rb
 - lib/parse/console.rb
 - lib/parse/embeddings.rb
+- lib/parse/embeddings/batch_embedder.rb
+- lib/parse/embeddings/cache.rb
 - lib/parse/embeddings/cohere.rb
 - lib/parse/embeddings/fixture.rb
+- lib/parse/embeddings/image_fetch.rb
 - lib/parse/embeddings/jina.rb
 - lib/parse/embeddings/local_http.rb
 - lib/parse/embeddings/openai.rb