parse-stack-next 5.3.0 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/CHANGELOG.md +461 -0
  4. data/Gemfile +7 -0
  5. data/Gemfile.lock +12 -4
  6. data/README.md +160 -3
  7. data/Rakefile +52 -3
  8. data/docs/atlas_vector_search_guide.md +86 -2
  9. data/docs/client_sdk_guide.md +5 -0
  10. data/docs/mcp_guide.md +59 -4
  11. data/docs/mongodb_direct_guide.md +93 -1
  12. data/docs/usage_guide.md +11 -1
  13. data/docs/webhooks_guide.md +418 -0
  14. data/examples/README.md +46 -0
  15. data/examples/basic_client.rb +93 -0
  16. data/examples/basic_server.rb +109 -0
  17. data/examples/live_query_listener.rb +98 -0
  18. data/examples/rag_chatbot.rb +221 -0
  19. data/examples/webhook_server.rb +111 -0
  20. data/lib/parse/agent/mcp_rack_app.rb +285 -62
  21. data/lib/parse/agent/tools.rb +45 -5
  22. data/lib/parse/api/aggregate.rb +7 -1
  23. data/lib/parse/api/cloud_functions.rb +12 -4
  24. data/lib/parse/api/hooks.rb +46 -9
  25. data/lib/parse/api/objects.rb +16 -2
  26. data/lib/parse/api/path_segment.rb +33 -0
  27. data/lib/parse/api/server.rb +94 -0
  28. data/lib/parse/api/users.rb +58 -2
  29. data/lib/parse/atlas_search.rb +7 -7
  30. data/lib/parse/client/body_builder.rb +5 -0
  31. data/lib/parse/client/protocol.rb +4 -0
  32. data/lib/parse/client.rb +55 -2
  33. data/lib/parse/embeddings/spend_cap.rb +255 -0
  34. data/lib/parse/embeddings.rb +1 -0
  35. data/lib/parse/live_query/client.rb +3 -1
  36. data/lib/parse/live_query/subscription.rb +32 -5
  37. data/lib/parse/model/acl.rb +4 -2
  38. data/lib/parse/model/classes/audience.rb +52 -4
  39. data/lib/parse/model/classes/user.rb +180 -3
  40. data/lib/parse/model/core/embed_managed.rb +113 -0
  41. data/lib/parse/model/core/querying.rb +3 -1
  42. data/lib/parse/model/core/vector_searchable.rb +161 -0
  43. data/lib/parse/model/object.rb +28 -5
  44. data/lib/parse/mongodb.rb +7 -1
  45. data/lib/parse/pipeline_security.rb +5 -3
  46. data/lib/parse/query/constraints.rb +29 -0
  47. data/lib/parse/query.rb +265 -27
  48. data/lib/parse/retrieval/agent_tool.rb +49 -0
  49. data/lib/parse/retrieval/reranker/cohere.rb +218 -0
  50. data/lib/parse/retrieval/reranker.rb +157 -0
  51. data/lib/parse/retrieval/retriever.rb +110 -23
  52. data/lib/parse/stack/version.rb +1 -1
  53. data/lib/parse/stack.rb +17 -0
  54. data/lib/parse/two_factor_auth/user_extension.rb +123 -31
  55. data/lib/parse/vector_search/hybrid.rb +578 -0
  56. data/lib/parse/webhooks/payload.rb +252 -7
  57. data/lib/parse/webhooks/trigger_audit.rb +502 -0
  58. data/lib/parse/webhooks.rb +215 -3
  59. data/scripts/docker/Dockerfile.parse +5 -1
  60. data/scripts/docker/docker-compose.test.yml +31 -0
  61. data/scripts/docker/docker-compose.verifyemail.yml +4 -0
  62. data/scripts/docker/preflight.sh +76 -0
  63. data/scripts/start-parse.sh +52 -4
  64. metadata +15 -1
@@ -0,0 +1,218 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ require "json"
5
+ require "uri"
6
+ require_relative "../reranker"
7
+
8
+ module Parse
9
+ module Retrieval
10
+ module Reranker
11
+ # Cohere cross-encoder reranker. Wraps `POST /v2/rerank`.
12
+ #
13
+ # Cohere's rerank API takes a query plus a list of document strings
14
+ # and returns a relevance-ordered list of `{ index, relevance_score }`
15
+ # objects. It is a distinct endpoint from `/v1/embed` /
16
+ # `/v2/embed` — do NOT confuse it with
17
+ # {Parse::Embeddings::Cohere} (the embeddings provider).
18
+ #
19
+ # The HTTP stack mirrors the embeddings provider's hardening:
20
+ # explicit `proxy: nil` unless opted in, bounded timeouts, capped
21
+ # retries with backoff on 429/5xx, response-size cap, and a redacted
22
+ # `#inspect`.
23
+ #
24
+ # @example
25
+ # reranker = Parse::Retrieval::Reranker::Cohere.new(
26
+ # api_key: ENV.fetch("COHERE_API_KEY"),
27
+ # model: "rerank-v3.5",
28
+ # )
29
+ # reranker.rerank(query: "rain songs", documents: lyrics, top_n: 5)
30
+ class Cohere < Base
31
+ class AuthenticationError < Error; end
32
+ class RateLimitError < Error; end
33
+ class TransientError < Error; end
34
+ class BadRequestError < Error; end
35
+
36
+ DEFAULT_BASE_URL = "https://api.cohere.com/v2"
37
+ DEFAULT_MODEL = "rerank-v3.5"
38
+ DEFAULT_TIMEOUT = 30
39
+ DEFAULT_OPEN_TIMEOUT = 5
40
+ DEFAULT_MAX_RETRIES = 2
41
+
42
+ # Cohere documents a cap of 1000 documents per rerank call; the
43
+ # {Base::MAX_DOCUMENTS} cap (1000) already enforces this.
44
+ MAX_RESPONSE_BYTES = 5 * 1024 * 1024
45
+
46
+ # @param api_key [String] Cohere API key.
47
+ # @param model [String] rerank model (default {DEFAULT_MODEL}).
48
+ # @param base_url [String] API base (default {DEFAULT_BASE_URL}).
49
+ # @param timeout [Integer] read timeout (seconds).
50
+ # @param open_timeout [Integer] connect timeout (seconds).
51
+ # @param max_retries [Integer] retry budget for 429 / 5xx /
52
+ # transient connection errors.
53
+ # @param allow_faraday_proxy [Boolean] permit Faraday to honor
54
+ # `*_proxy` env vars (default false — explicit `proxy: nil`).
55
+ def initialize(api_key:, model: DEFAULT_MODEL, base_url: DEFAULT_BASE_URL,
56
+ timeout: DEFAULT_TIMEOUT, open_timeout: DEFAULT_OPEN_TIMEOUT,
57
+ max_retries: DEFAULT_MAX_RETRIES, allow_faraday_proxy: false)
58
+ validate_api_key!(api_key)
59
+ @api_key = api_key
60
+ @model = model.to_s
61
+ raise ArgumentError, "Reranker::Cohere: model must be non-empty." if @model.empty?
62
+ @base_url = base_url.to_s
63
+ validate_base_url!(@base_url)
64
+ @timeout = Integer(timeout)
65
+ @open_timeout = Integer(open_timeout)
66
+ @max_retries = Integer(max_retries)
67
+ raise ArgumentError, "Reranker::Cohere: max_retries must be >= 0." if @max_retries.negative?
68
+ @allow_faraday_proxy = allow_faraday_proxy ? true : false
69
+ @connection = build_connection
70
+ end
71
+
72
+ # @return [String] the rerank model name.
73
+ attr_reader :model
74
+
75
+ def inspect
76
+ "#<#{self.class} model=#{@model.inspect} base=#{safe_base_host.inspect} " \
77
+ "retries=#{@max_retries} api_key=[REDACTED]>"
78
+ end
79
+
80
+ protected
81
+
82
+ def rerank_scores(query, documents, top_n)
83
+ require_faraday!
84
+ body = {
85
+ "model" => @model,
86
+ "query" => query,
87
+ "documents" => documents,
88
+ "top_n" => top_n,
89
+ }
90
+ payload = post_rerank(body)
91
+ extract_results!(payload, documents.length)
92
+ end
93
+
94
+ private
95
+
96
+ def post_rerank(body)
97
+ attempts = 0
98
+ loop do
99
+ attempts += 1
100
+ begin
101
+ response = @connection.post("rerank") { |req| req.body = body.to_json }
102
+ rescue Faraday::TimeoutError, Faraday::ConnectionFailed => e
103
+ raise TransientError, "Reranker::Cohere: #{e.class} after #{attempts} attempt(s)." if attempts > @max_retries
104
+ sleep(backoff_seconds(attempts))
105
+ next
106
+ end
107
+
108
+ status = response.status
109
+ return parse_json_body!(response.body) if status >= 200 && status < 300
110
+
111
+ case status
112
+ when 401
113
+ raise AuthenticationError, "Reranker::Cohere: 401 Unauthorized — check api_key."
114
+ when 429
115
+ raise RateLimitError, "Reranker::Cohere: 429 rate limited after #{attempts} attempt(s)." if attempts > @max_retries
116
+ sleep(retry_after_seconds(response) || backoff_seconds(attempts))
117
+ when 500..599
118
+ raise TransientError, "Reranker::Cohere: #{status} after #{attempts} attempt(s)." if attempts > @max_retries
119
+ sleep(backoff_seconds(attempts))
120
+ else
121
+ raise BadRequestError, "Reranker::Cohere: #{status} from POST /rerank."
122
+ end
123
+ end
124
+ end
125
+
126
+ # Cohere v2 /rerank response shape:
127
+ # { "id": "...", "results": [ { "index": 0, "relevance_score": 0.98 }, ... ],
128
+ # "meta": { "billed_units": { "search_units": 1 } } }
129
+ def extract_results!(payload, doc_count)
130
+ unless payload.is_a?(Hash)
131
+ raise InvalidResponseError, "Reranker::Cohere: response body is not a JSON object."
132
+ end
133
+ results = payload["results"]
134
+ unless results.is_a?(Array)
135
+ raise InvalidResponseError, "Reranker::Cohere: response.results is not an Array."
136
+ end
137
+ results.map do |r|
138
+ unless r.is_a?(Hash)
139
+ raise InvalidResponseError, "Reranker::Cohere: rerank result is not an object (#{r.inspect})."
140
+ end
141
+ Result.new(index: r["index"], relevance_score: r["relevance_score"])
142
+ end
143
+ end
144
+
145
+ def parse_json_body!(body)
146
+ s = body.to_s
147
+ if s.bytesize > MAX_RESPONSE_BYTES
148
+ raise InvalidResponseError,
149
+ "Reranker::Cohere: response body exceeds #{MAX_RESPONSE_BYTES} bytes (#{s.bytesize})."
150
+ end
151
+ JSON.parse(s, max_nesting: 32)
152
+ rescue JSON::ParserError => e
153
+ raise InvalidResponseError, "Reranker::Cohere: response is not valid JSON (#{e.message})."
154
+ end
155
+
156
+ def build_connection
157
+ require_faraday!
158
+ headers = {
159
+ "Authorization" => "Bearer #{@api_key}",
160
+ "Content-Type" => "application/json",
161
+ "Accept" => "application/json",
162
+ "User-Agent" => "parse-stack-reranker/#{Parse::Stack::VERSION rescue "0"}",
163
+ }
164
+ # base_url must end with a trailing slash so Faraday resolves the
165
+ # relative "rerank" path under /v2/ rather than replacing it.
166
+ base = @base_url.end_with?("/") ? @base_url : "#{@base_url}/"
167
+ faraday_opts = { url: base, headers: headers }
168
+ faraday_opts[:proxy] = nil unless @allow_faraday_proxy
169
+ conn = Faraday.new(**faraday_opts) do |f|
170
+ f.options.timeout = @timeout
171
+ f.options.open_timeout = @open_timeout
172
+ f.adapter Faraday.default_adapter
173
+ end
174
+ conn.proxy = nil if !@allow_faraday_proxy && conn.respond_to?(:proxy=)
175
+ conn
176
+ end
177
+
178
+ def backoff_seconds(attempt)
179
+ [0.5 * (2**(attempt - 1)), 30.0].min
180
+ end
181
+
182
+ def retry_after_seconds(response)
183
+ ra = response.respond_to?(:headers) ? response.headers["retry-after"] || response.headers["Retry-After"] : nil
184
+ return nil unless ra
185
+ v = ra.to_f
186
+ v.positive? ? [v, 60.0].min : nil
187
+ end
188
+
189
+ def validate_api_key!(api_key)
190
+ unless api_key.is_a?(String) && !api_key.empty?
191
+ raise ArgumentError, "Reranker::Cohere: api_key must be a non-empty String."
192
+ end
193
+ end
194
+
195
+ def validate_base_url!(base_url)
196
+ uri = URI.parse(base_url)
197
+ unless uri.is_a?(URI::HTTPS) || uri.is_a?(URI::HTTP)
198
+ raise ArgumentError, "Reranker::Cohere: base_url must be http(s) (got #{base_url.inspect})."
199
+ end
200
+ rescue URI::InvalidURIError => e
201
+ raise ArgumentError, "Reranker::Cohere: invalid base_url #{base_url.inspect} (#{e.message})."
202
+ end
203
+
204
+ def safe_base_host
205
+ URI.parse(@base_url).host
206
+ rescue StandardError
207
+ "?"
208
+ end
209
+
210
+ def require_faraday!
211
+ require "faraday" unless defined?(Faraday)
212
+ rescue LoadError
213
+ raise Error, "Reranker::Cohere requires the `faraday` gem."
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,157 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ module Parse
5
+ module Retrieval
6
+ # Cross-encoder reranking for retrieved documents.
7
+ #
8
+ # A reranker takes a query and a list of candidate document texts and
9
+ # returns a relevance-ordered scoring. It runs AFTER the (vector,
10
+ # lexical, or hybrid) retrieval step and BEFORE chunking, reordering
11
+ # the retrieved documents by a more expensive cross-encoder relevance
12
+ # model than the first-stage similarity score.
13
+ #
14
+ # == Protocol
15
+ #
16
+ # A reranker is any object that responds to:
17
+ #
18
+ # #rerank(query:, documents:, top_n: nil) -> Array<Result>
19
+ #
20
+ # where `documents` is an Array<String> and the return is an Array of
21
+ # {Result} (`index` into `documents`, plus `relevance_score`),
22
+ # descending by relevance. Implementations MUST:
23
+ #
24
+ # * Return at most `documents.length` results (and at most `top_n`
25
+ # when given).
26
+ # * Use 0-based `index` values that are valid positions in the input.
27
+ # * Never raise for an empty `documents` list — return `[]`.
28
+ #
29
+ # {Base} provides input validation and result normalization so
30
+ # adapters only implement the network call ({Base#rerank_scores}).
31
+ #
32
+ # @example wiring into retrieve
33
+ # reranker = Parse::Retrieval::Reranker::Cohere.new(api_key: ENV.fetch("COHERE_API_KEY"))
34
+ # chunks = Parse::Retrieval.retrieve(query: q, klass: Article, k: 30,
35
+ # rerank: reranker, rerank_top_n: 5)
36
+ module Reranker
37
+ # The Cohere `/v2/rerank` adapter is loaded lazily — it requires
38
+ # Faraday, which the core retrieval path does not.
39
+ autoload :Cohere, ::File.expand_path("reranker/cohere", __dir__)
40
+
41
+ # Base error for the reranker layer. Adapters raise subclasses.
42
+ class Error < StandardError; end
43
+
44
+ # Raised when a reranker returns a response that doesn't satisfy the
45
+ # protocol (bad index, non-numeric score, over-length result set).
46
+ class InvalidResponseError < Error; end
47
+
48
+ # A single rerank result: the 0-based position of a document in the
49
+ # input list, plus its cross-encoder relevance score (higher is more
50
+ # relevant; range is provider-defined).
51
+ Result = Struct.new(:index, :relevance_score, keyword_init: true)
52
+
53
+ # Common superclass: validates inputs, bounds `top_n`, and
54
+ # normalizes raw `(index, score)` pairs into sorted {Result}s.
55
+ # Concrete adapters implement {#rerank_scores}.
56
+ class Base
57
+ # Hard cap on the number of documents a single rerank call may
58
+ # carry, to bound provider cost / payload size. Providers
59
+ # typically cap around 1000; we stay conservative.
60
+ MAX_DOCUMENTS = 1000
61
+
62
+ # Rerank `documents` against `query`.
63
+ #
64
+ # @param query [String] the natural-language query.
65
+ # @param documents [Array<String>] candidate document texts.
66
+ # @param top_n [Integer, nil] return at most this many results.
67
+ # @return [Array<Result>] descending by `relevance_score`.
68
+ def rerank(query:, documents:, top_n: nil)
69
+ unless query.is_a?(String) && !query.strip.empty?
70
+ raise ArgumentError, "#{self.class}#rerank: query must be a non-empty String."
71
+ end
72
+ docs = Array(documents).map(&:to_s)
73
+ return [] if docs.empty?
74
+ if docs.length > MAX_DOCUMENTS
75
+ raise ArgumentError,
76
+ "#{self.class}#rerank: #{docs.length} documents exceeds MAX_DOCUMENTS=#{MAX_DOCUMENTS}."
77
+ end
78
+ n = top_n.nil? ? docs.length : [Integer(top_n), docs.length].min
79
+ n = docs.length if n <= 0
80
+
81
+ pairs = rerank_scores(query, docs, n)
82
+ normalize_results(pairs, docs.length, n)
83
+ end
84
+
85
+ protected
86
+
87
+ # Adapter hook: return an Array of `[index, score]` pairs (or
88
+ # {Result}s) for `documents`. `top_n` is a hint; the base class
89
+ # re-bounds and re-sorts regardless.
90
+ #
91
+ # @param query [String]
92
+ # @param documents [Array<String>]
93
+ # @param top_n [Integer]
94
+ # @return [Array<Array(Integer, Numeric)>, Array<Result>]
95
+ def rerank_scores(query, documents, top_n)
96
+ raise NotImplementedError, "#{self.class}#rerank_scores must be implemented."
97
+ end
98
+
99
+ private
100
+
101
+ def normalize_results(pairs, doc_count, top_n)
102
+ results = Array(pairs).map do |p|
103
+ idx, score =
104
+ case p
105
+ when Result then [p.index, p.relevance_score]
106
+ when Array then [p[0], p[1]]
107
+ when Hash then [p[:index] || p["index"], p[:relevance_score] || p["relevance_score"]]
108
+ else
109
+ raise InvalidResponseError, "#{self.class}: unexpected rerank result element #{p.inspect}."
110
+ end
111
+ i = Integer(idx)
112
+ unless i >= 0 && i < doc_count
113
+ raise InvalidResponseError,
114
+ "#{self.class}: rerank index #{i} out of range 0...#{doc_count}."
115
+ end
116
+ unless score.is_a?(Numeric) && score.to_f.finite?
117
+ raise InvalidResponseError,
118
+ "#{self.class}: rerank relevance_score #{score.inspect} is not a finite number."
119
+ end
120
+ Result.new(index: i, relevance_score: score.to_f)
121
+ end
122
+ # Defensive: drop duplicate indices (keep the first / highest),
123
+ # then sort descending and bound to top_n.
124
+ seen = {}
125
+ results.each { |r| seen[r.index] ||= r }
126
+ seen.values.sort_by { |r| [-r.relevance_score, r.index] }.first(top_n)
127
+ end
128
+ end
129
+
130
+ # Deterministic, zero-network reranker for tests and offline use.
131
+ # Scores each document by lexical token overlap with the query
132
+ # (Jaccard-ish: shared unique lowercased word count, tie-broken by
133
+ # input order). No external dependency, fully reproducible.
134
+ class Fixture < Base
135
+ protected
136
+
137
+ def rerank_scores(query, documents, _top_n)
138
+ q_tokens = tokenize(query)
139
+ documents.each_with_index.map do |doc, i|
140
+ d_tokens = tokenize(doc)
141
+ overlap = (q_tokens & d_tokens).length
142
+ # Normalize into a 0..1-ish score so output looks like a real
143
+ # relevance score; longer-overlap docs rank higher.
144
+ denom = [q_tokens.length, 1].max
145
+ [i, overlap.to_f / denom]
146
+ end
147
+ end
148
+
149
+ private
150
+
151
+ def tokenize(text)
152
+ text.to_s.downcase.scan(/[a-z0-9]+/).uniq
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
@@ -3,6 +3,7 @@
3
3
 
4
4
  require_relative "chunker"
5
5
  require_relative "chunk"
6
+ require_relative "reranker"
6
7
 
7
8
  module Parse
8
9
  # Retrieval-augmented-generation (RAG) helpers. `Parse::RAG` is a
@@ -98,25 +99,33 @@ module Parse
98
99
  # propagates and aborts the whole call (fail-closed). Kept as an
99
100
  # injection point so this model-layer method stays free of any
100
101
  # agent-layer dependency.
101
- # @param hybrid [Object, nil] reserved raises {NotImplementedError}
102
- # if truthy. Hybrid (vector + lexical) retrieval lands in a later
103
- # release; the kwarg locks the API shape now.
104
- # @param rerank [Object, nil] reserved raises {NotImplementedError}
105
- # if non-nil. Cross-encoder rerank lands in a later release.
102
+ # @param hybrid [Boolean, Hash, nil] when truthy, fuse a lexical
103
+ # Atlas Search branch with the `$vectorSearch` branch via
104
+ # reciprocal-rank fusion (see {Parse::Core::VectorSearchable#hybrid_search}).
105
+ # `true` uses defaults (lexical query = `query`); a Hash may carry
106
+ # `:lexical`, `:vector`, and `:fusion` sub-configs.
107
+ # @param rerank [#rerank, nil] a {Parse::Retrieval::Reranker::Base}
108
+ # (or any object answering `#rerank(query:, documents:, top_n:)`).
109
+ # When present, retrieved documents are reordered by the
110
+ # cross-encoder relevance score BEFORE chunking, and the chunk score
111
+ # becomes the rerank relevance score.
112
+ # @param rerank_top_n [Integer, nil] keep only the top-N documents
113
+ # after reranking (defaults to all retrieved documents).
106
114
  # @param scope_opts [Hash] ACL/CLP scope kwargs forwarded verbatim to
107
- # `find_similar`: `session_token:` / `acl_user:` / `acl_role:` /
108
- # `master:`.
115
+ # `find_similar` / `hybrid_search`: `session_token:` / `acl_user:` /
116
+ # `acl_role:` / `master:`.
109
117
  # @return [Array<Parse::Retrieval::Chunk>] descending by score; chunk
110
118
  # order within a document is positional.
111
119
  def retrieve(query:, klass: nil, field: nil, text_field: nil, k: 10,
112
120
  filter: nil, vector_filter: nil, chunker: nil,
113
121
  tenant_scope: nil, score_quantize: false,
114
122
  source_transform: nil, hybrid: nil, rerank: nil,
115
- **scope_opts)
116
- raise NotImplementedError,
117
- "Parse::Retrieval.retrieve: `hybrid:` is reserved for a future release." if hybrid
118
- raise NotImplementedError,
119
- "Parse::Retrieval.retrieve: `rerank:` is reserved for a future release." if rerank
123
+ rerank_top_n: nil, **scope_opts)
124
+ if rerank && !rerank.respond_to?(:rerank)
125
+ raise ArgumentError,
126
+ "Parse::Retrieval.retrieve: `rerank:` must respond to #rerank " \
127
+ "(a Parse::Retrieval::Reranker::Base); got #{rerank.class}."
128
+ end
120
129
 
121
130
  # `class:` alias (reserved word — arrives via **scope_opts).
122
131
  klass ||= scope_opts.delete(:class)
@@ -129,25 +138,60 @@ module Parse
129
138
  resolved_text_field = (text_field || infer_text_field!(klass)).to_sym
130
139
  merged_vector_filter = fold_tenant_scope(klass, vector_filter, tenant_scope)
131
140
  chunker ||= default_chunker
141
+ text_wire = wire_name(klass, resolved_text_field)
132
142
 
133
- raw_hits = klass.find_similar(
134
- text: query,
135
- k: k,
136
- field: field,
137
- filter: filter,
138
- vector_filter: merged_vector_filter,
139
- raw: true,
140
- **scope_opts,
141
- )
143
+ raw_hits =
144
+ if hybrid
145
+ fetch_hybrid_hits(klass, query, k, field, filter, merged_vector_filter,
146
+ tenant_scope, hybrid, scope_opts)
147
+ else
148
+ klass.find_similar(
149
+ text: query, k: k, field: field, filter: filter,
150
+ vector_filter: merged_vector_filter, raw: true, **scope_opts,
151
+ )
152
+ end
142
153
  return [] if raw_hits.nil? || raw_hits.empty?
143
154
 
144
- text_wire = wire_name(klass, resolved_text_field)
155
+ raw_hits = apply_rerank(rerank, query, raw_hits, text_wire, rerank_top_n) if rerank
145
156
 
146
157
  raw_hits.flat_map do |doc|
147
158
  build_chunks_for(doc, klass, text_wire, score_quantize, source_transform, chunker)
148
159
  end
149
160
  end
150
161
 
162
+ # @!visibility private
163
+ # Run the hybrid (lexical + vector) branch and return fused raw rows.
164
+ # Tenant scope is folded into BOTH branches: the vector branch via the
165
+ # Atlas pre-filter (`merged_vector_filter`) and the lexical branch via
166
+ # a post-`$search` `$match` (so neither branch leaks cross-tenant
167
+ # document existence).
168
+ def fetch_hybrid_hits(klass, query, k, field, filter, merged_vector_filter,
169
+ tenant_scope, hybrid, scope_opts)
170
+ cfg = hybrid.is_a?(Hash) ? hybrid : {}
171
+ lexical = (cfg[:lexical] || cfg["lexical"] || {}).dup
172
+ vector = (cfg[:vector] || cfg["vector"] || {}).dup
173
+ fusion = cfg[:fusion] || cfg["fusion"]
174
+
175
+ lexical[:query] ||= query
176
+ # Tenant scope must be AUTHORITATIVE in BOTH branches. The previous
177
+ # `||=` form let a caller-supplied `vector[:vector_filter]` (or a
178
+ # colliding `lexical[:filter]`) REPLACE the tenant-folded filter
179
+ # rather than narrow within it — silently dropping tenant isolation
180
+ # and contradicting this method's "folded into BOTH branches"
181
+ # contract. `merge_filters` is last-wins, so ordering the tenant
182
+ # constraint LAST guarantees its key survives any caller collision:
183
+ # callers can narrow the result set but never escape their tenant.
184
+ lexical[:filter] = merge_filters(filter, lexical[:filter], tenant_filter_hash(klass, tenant_scope))
185
+ vector[:field] ||= field unless field.nil?
186
+ vector[:filter] = merge_filters(vector[:filter], filter)
187
+ vector[:vector_filter] = merge_filters(vector[:vector_filter], merged_vector_filter)
188
+
189
+ klass.hybrid_search(
190
+ text: query, lexical: lexical, vector: vector,
191
+ k: k, fusion: fusion, raw: true, **scope_opts,
192
+ )
193
+ end
194
+
151
195
  # @!visibility private
152
196
  def resolve_class!(klass)
153
197
  resolved =
@@ -227,10 +271,53 @@ module Parse
227
271
  doc[sym]
228
272
  end
229
273
 
274
+ # @!visibility private
275
+ # Reorder retrieved documents by a cross-encoder reranker and stamp
276
+ # each surviving hit with its `_rerank_score`. The reranker scores the
277
+ # document's presentation text (the same `text_field` used for
278
+ # chunking). Index alignment between `documents` and `raw_hits` is
279
+ # preserved so the returned `index` maps back to the right hit.
280
+ def apply_rerank(reranker, query, raw_hits, text_wire, top_n)
281
+ documents = raw_hits.map { |doc| fetch_field(doc, text_wire, text_wire).to_s }
282
+ results = reranker.rerank(query: query, documents: documents, top_n: top_n)
283
+ results.map do |r|
284
+ hit = raw_hits[r.index]
285
+ next nil if hit.nil?
286
+ hit = hit.dup
287
+ hit["_rerank_score"] = r.relevance_score
288
+ hit
289
+ end.compact
290
+ end
291
+
292
+ # @!visibility private
293
+ # Convert a `{ field:, value: }` tenant scope into a `{ wire => value }`
294
+ # filter hash (the lexical branch's post-`$search` `$match`), or nil.
295
+ def tenant_filter_hash(klass, tenant_scope)
296
+ return nil if tenant_scope.nil?
297
+ field = tenant_scope[:field] || tenant_scope["field"]
298
+ return nil if field.nil?
299
+ value = tenant_scope.key?(:value) ? tenant_scope[:value] : tenant_scope["value"]
300
+ { wire_name(klass, field) => value }
301
+ end
302
+
303
+ # @!visibility private
304
+ # Shallow-merge non-empty filter hashes (left-to-right; later keys
305
+ # win). Returns nil when nothing is left to apply.
306
+ def merge_filters(*filters)
307
+ merged = {}
308
+ filters.each do |f|
309
+ next if f.nil? || (f.respond_to?(:empty?) && f.empty?)
310
+ merged.merge!(f)
311
+ end
312
+ merged.empty? ? nil : merged
313
+ end
314
+
230
315
  # @!visibility private
231
316
  def build_chunks_for(doc, klass, text_wire, score_quantize, source_transform, chunker)
232
317
  object_id = (doc["_id"] || doc[:_id] || doc["objectId"] || doc[:objectId]).to_s
233
- raw_score = doc["_vscore"] || doc[:_vscore]
318
+ raw_score = doc["_rerank_score"] || doc[:_rerank_score] ||
319
+ doc["_hybrid_score"] || doc[:_hybrid_score] ||
320
+ doc["_vscore"] || doc[:_vscore]
234
321
  score = quantize_score(raw_score, score_quantize)
235
322
 
236
323
  text = fetch_field(doc, text_wire, text_wire)
@@ -6,6 +6,6 @@ module Parse
6
6
  # The Parse Server SDK for Ruby
7
7
  module Stack
8
8
  # The current version.
9
- VERSION = "5.3.0"
9
+ VERSION = "5.4.0"
10
10
  end
11
11
  end
data/lib/parse/stack.rb CHANGED
@@ -940,6 +940,23 @@ module Parse
940
940
  end
941
941
  Parse.client.send_analytics(event_name, dimensions, **opts)
942
942
  end
943
+
944
+ # Capability probe against the connected Parse Server, delegated to the
945
+ # default client. Builds on the memoized `serverInfo` fetch — see
946
+ # {Parse::API::Server#server_supports?} for the capability table and the
947
+ # fail-open-to-modern semantics.
948
+ # @param feature [Symbol] a capability key.
949
+ # @return [Boolean] whether the connected server supports the feature.
950
+ def server_supports?(feature)
951
+ Parse.client.server_supports?(feature)
952
+ end
953
+
954
+ # The coarse `features` block advertised by `GET /serverInfo`, delegated
955
+ # to the default client. @see Parse::API::Server#server_features
956
+ # @return [Hash] the advertised features block, or `{}` if unavailable.
957
+ def server_features
958
+ Parse.client.server_features
959
+ end
943
960
  end
944
961
 
945
962
  # Error raised when {Parse::CreateLock#synchronize} cannot acquire the