parse-stack-next 5.4.1 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +344 -0
- data/Gemfile.lock +1 -1
- data/README.md +45 -6
- data/docs/atlas_vector_search_guide.md +314 -19
- data/lib/parse/api/users.rb +10 -0
- data/lib/parse/client.rb +19 -1
- data/lib/parse/embeddings/batch_embedder.rb +188 -0
- data/lib/parse/embeddings/cache.rb +322 -0
- data/lib/parse/embeddings/cohere.rb +31 -18
- data/lib/parse/embeddings/image_fetch.rb +347 -0
- data/lib/parse/embeddings/provider.rb +17 -11
- data/lib/parse/embeddings/spend_cap.rb +117 -3
- data/lib/parse/embeddings/voyage.rb +34 -25
- data/lib/parse/embeddings.rb +40 -3
- data/lib/parse/model/acl.rb +15 -11
- data/lib/parse/model/core/embed_managed.rb +243 -14
- data/lib/parse/model/core/vector_searchable.rb +157 -8
- data/lib/parse/query/constraint.rb +22 -0
- data/lib/parse/query/constraints.rb +271 -250
- data/lib/parse/query.rb +233 -42
- data/lib/parse/retrieval/agent_tool.rb +21 -14
- data/lib/parse/retrieval/retriever.rb +84 -0
- data/lib/parse/schema/search_index_migrator.rb +48 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/vector_search/hybrid.rb +39 -1
- data/lib/parse/vector_search.rb +34 -0
- data/lib/parse/webhooks/payload.rb +7 -1
- data/lib/parse/webhooks.rb +107 -21
- metadata +4 -1
|
@@ -55,6 +55,22 @@ module Parse
|
|
|
55
55
|
# field and the caller didn't pass an explicit `index:` kwarg.
|
|
56
56
|
class IndexNotResolved < ArgumentError; end
|
|
57
57
|
|
|
58
|
+
# Raised (under `Parse::VectorSearch.index_drift_policy = :raise`)
|
|
59
|
+
# when first-query verification finds the deployed vectorSearch
|
|
60
|
+
# index disagreeing with the model declaration — wrong
|
|
61
|
+
# `numDimensions`, wrong `similarity`, or a registered
|
|
62
|
+
# tenant-scope field missing from the index's `filter` paths.
|
|
63
|
+
# Under the default `:warn` policy the same findings emit a
|
|
64
|
+
# single `[Parse::VectorSearch:DRIFT]` warning instead.
|
|
65
|
+
class IndexDriftError < StandardError
|
|
66
|
+
# @return [Array<String>] human-readable drift findings.
|
|
67
|
+
attr_reader :findings
|
|
68
|
+
def initialize(message, findings: [])
|
|
69
|
+
@findings = findings
|
|
70
|
+
super(message)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
58
74
|
# Raised by the `find_similar(text:)` overload when the resolved
|
|
59
75
|
# `:vector` property has no `provider:` (and therefore no way to
|
|
60
76
|
# turn `text:` into a query vector). Distinct from
|
|
@@ -367,13 +383,23 @@ module Parse
|
|
|
367
383
|
"on the property, or pass an explicit `vector:`."
|
|
368
384
|
end
|
|
369
385
|
provider = Parse::Embeddings.provider(provider_name)
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
386
|
+
# Spend cap: every query-embed path (find_similar(text:),
|
|
387
|
+
# hybrid_search(text:), Retrieval.retrieve) funnels through this
|
|
388
|
+
# method, so charging here closes the "direct callers bypass the
|
|
389
|
+
# cap" gap. No-op when no limit is configured, or when an
|
|
390
|
+
# upstream caller (the semantic_search agent tool) has already
|
|
391
|
+
# charged with per-tenant identity (SpendCap.with_precharged).
|
|
392
|
+
#
|
|
393
|
+
# Deliberate: the charge runs BEFORE the cache lookup, so cache
|
|
394
|
+
# hits bill at full price. The cap bounds query *volume* (an
|
|
395
|
+
# abuse/probing control), not just provider spend — a caller
|
|
396
|
+
# replaying one cached query must not get unlimited throughput.
|
|
397
|
+
Parse::Embeddings::SpendCap.charge_query!(text)
|
|
398
|
+
# Query-embed cache: repeated identical queries skip the
|
|
399
|
+
# provider round-trip when Parse::Embeddings::Cache.enable! has
|
|
400
|
+
# been called; pass-through (with the provider's own response
|
|
401
|
+
# validation preserved) when disabled.
|
|
402
|
+
Parse::Embeddings::Cache.fetch_vector(provider, text, input_type: :search_query)
|
|
377
403
|
end
|
|
378
404
|
|
|
379
405
|
def coerce_query_vector(vector)
|
|
@@ -387,7 +413,10 @@ module Parse
|
|
|
387
413
|
end
|
|
388
414
|
|
|
389
415
|
def resolve_vector_index!(field, explicit_index)
|
|
390
|
-
|
|
416
|
+
if explicit_index && !explicit_index.to_s.empty?
|
|
417
|
+
verify_explicit_vector_index(field, explicit_index.to_s)
|
|
418
|
+
return explicit_index
|
|
419
|
+
end
|
|
391
420
|
begin
|
|
392
421
|
require_relative "../../atlas_search"
|
|
393
422
|
rescue LoadError
|
|
@@ -402,9 +431,129 @@ module Parse
|
|
|
402
431
|
"#{parse_class}.#{field}; pass index: explicitly or create one " \
|
|
403
432
|
"via Parse::AtlasSearch::IndexCatalog.create_index."
|
|
404
433
|
end
|
|
434
|
+
verify_vector_index!(field, idx)
|
|
405
435
|
(idx["name"] || idx[:name]).to_s
|
|
406
436
|
end
|
|
407
437
|
|
|
438
|
+
# Best-effort drift verification for an explicitly named `index:`.
|
|
439
|
+
# The auto-discovery path verifies the index it resolves; an
|
|
440
|
+
# explicit kwarg would otherwise skip verification entirely. Look
|
|
441
|
+
# the field's covering index up in the catalog and verify it when
|
|
442
|
+
# its name matches the explicit one. Lookup failures (catalog
|
|
443
|
+
# unavailable, index not discoverable, name targeting a different
|
|
444
|
+
# index) skip verification rather than failing the query — the
|
|
445
|
+
# explicit kwarg is an override, not a discovery request.
|
|
446
|
+
def verify_explicit_vector_index(field, index_name)
|
|
447
|
+
return if Parse::VectorSearch.index_drift_policy == :ignore
|
|
448
|
+
begin
|
|
449
|
+
require_relative "../../atlas_search"
|
|
450
|
+
idx = Parse::AtlasSearch::IndexCatalog.find_vector_index(parse_class, field: field)
|
|
451
|
+
rescue StandardError, LoadError
|
|
452
|
+
return
|
|
453
|
+
end
|
|
454
|
+
return if idx.nil?
|
|
455
|
+
return unless (idx["name"] || idx[:name]).to_s == index_name
|
|
456
|
+
verify_vector_index!(field, idx)
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
# First-query drift verification: compare the deployed index's
|
|
460
|
+
# `latestDefinition` against the model declaration. The drift
|
|
461
|
+
# findings are computed once per (field, index name) per class per
|
|
462
|
+
# process and cached; the policy check runs on EVERY query, so
|
|
463
|
+
# under `:raise` a drifted index keeps failing instead of failing
|
|
464
|
+
# once and then silently serving results. Under `:warn` the
|
|
465
|
+
# warning is emitted only on the first check to avoid log spam.
|
|
466
|
+
# Honors {Parse::VectorSearch.index_drift_policy} (`:warn` default
|
|
467
|
+
# / `:raise` / `:ignore`).
|
|
468
|
+
#
|
|
469
|
+
# Checks:
|
|
470
|
+
# 1. `numDimensions` on the covering `type: "vector"` entry vs the
|
|
471
|
+
# property's declared `dimensions:`.
|
|
472
|
+
# 2. `similarity` vs the property's declared `similarity:` (only
|
|
473
|
+
# when both sides declare one).
|
|
474
|
+
# 3. When the class registers an `agent_tenant_scope`, the scope
|
|
475
|
+
# field must appear among the index's `type: "filter"` paths —
|
|
476
|
+
# otherwise the tenant pre-filter that
|
|
477
|
+
# {Parse::Retrieval.retrieve} folds into `$vectorSearch.filter`
|
|
478
|
+
# fails Atlas-side at query time.
|
|
479
|
+
def verify_vector_index!(field, idx)
|
|
480
|
+
return if Parse::VectorSearch.index_drift_policy == :ignore
|
|
481
|
+
index_name = (idx["name"] || idx[:name]).to_s
|
|
482
|
+
@_verified_vector_indexes ||= {}
|
|
483
|
+
cache_key = "#{field}|#{index_name}"
|
|
484
|
+
findings = @_verified_vector_indexes[cache_key]
|
|
485
|
+
first_check = findings.nil?
|
|
486
|
+
if first_check
|
|
487
|
+
findings = vector_index_drift_findings(field, idx).freeze
|
|
488
|
+
@_verified_vector_indexes[cache_key] = findings
|
|
489
|
+
end
|
|
490
|
+
return if findings.empty?
|
|
491
|
+
|
|
492
|
+
message = "#{self} vectorSearch index #{index_name.inspect} drifts from the " \
|
|
493
|
+
"model declaration for :#{field}: #{findings.join("; ")}"
|
|
494
|
+
if Parse::VectorSearch.index_drift_policy == :raise
|
|
495
|
+
# Raise on every query, not just the first: strict mode means a
|
|
496
|
+
# drifted index must never serve results.
|
|
497
|
+
raise IndexDriftError.new(message, findings: findings)
|
|
498
|
+
end
|
|
499
|
+
warn "[Parse::VectorSearch:DRIFT] #{message}" if first_check
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
# @!visibility private
|
|
503
|
+
# @return [Array<String>] drift findings (empty when in sync).
|
|
504
|
+
def vector_index_drift_findings(field, idx)
|
|
505
|
+
defn = idx["latestDefinition"] || idx[:latestDefinition] || {}
|
|
506
|
+
entries = defn["fields"] || defn[:fields] || []
|
|
507
|
+
field_str = field.to_s
|
|
508
|
+
vector_entry = entries.find do |f|
|
|
509
|
+
(f["type"] || f[:type]).to_s == "vector" && (f["path"] || f[:path]).to_s == field_str
|
|
510
|
+
end
|
|
511
|
+
findings = []
|
|
512
|
+
return findings if vector_entry.nil? # find_vector_index matched on it; defensive
|
|
513
|
+
|
|
514
|
+
declared_dims = vector_properties.dig(field.to_sym, :dimensions)
|
|
515
|
+
index_dims = vector_entry["numDimensions"] || vector_entry[:numDimensions]
|
|
516
|
+
if declared_dims && index_dims && Integer(index_dims) != Integer(declared_dims)
|
|
517
|
+
findings << "index numDimensions=#{index_dims} but property declares " \
|
|
518
|
+
"dimensions: #{declared_dims} (every query will mismatch — " \
|
|
519
|
+
"rebuild the index or run #{self}.reembed! after fixing the declaration)"
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
declared_sim = vector_properties.dig(field.to_sym, :similarity)
|
|
523
|
+
index_sim = vector_entry["similarity"] || vector_entry[:similarity]
|
|
524
|
+
if declared_sim && index_sim && index_sim.to_s != declared_sim.to_s
|
|
525
|
+
findings << "index similarity=#{index_sim.inspect} but property declares " \
|
|
526
|
+
"similarity: #{declared_sim.inspect}"
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
scope_field = registered_tenant_scope_field
|
|
530
|
+
if scope_field
|
|
531
|
+
filter_paths = entries.select { |f| (f["type"] || f[:type]).to_s == "filter" }
|
|
532
|
+
.map { |f| (f["path"] || f[:path]).to_s }
|
|
533
|
+
unless filter_paths.include?(scope_field)
|
|
534
|
+
findings << "agent_tenant_scope field #{scope_field.inspect} is not declared " \
|
|
535
|
+
"as a type: \"filter\" path in the index — tenant-scoped " \
|
|
536
|
+
"$vectorSearch.filter will fail Atlas-side"
|
|
537
|
+
end
|
|
538
|
+
end
|
|
539
|
+
findings
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
# @!visibility private
|
|
543
|
+
# Wire/storage name of the class's registered tenant-scope field,
|
|
544
|
+
# or nil. Mirrors the resolution Parse::Retrieval#wire_name uses
|
|
545
|
+
# when folding the scope into $vectorSearch.filter.
|
|
546
|
+
def registered_tenant_scope_field
|
|
547
|
+
return nil unless defined?(Parse::Agent::MetadataRegistry)
|
|
548
|
+
rule = Parse::Agent::MetadataRegistry.tenant_scope_rule(parse_class)
|
|
549
|
+
return nil unless rule
|
|
550
|
+
sym = rule[:field].to_sym
|
|
551
|
+
fmap = respond_to?(:field_map) ? field_map : {}
|
|
552
|
+
(fmap[sym] || sym.to_s.columnize).to_s
|
|
553
|
+
rescue StandardError
|
|
554
|
+
nil
|
|
555
|
+
end
|
|
556
|
+
|
|
408
557
|
def build_vector_hits(raw_hits)
|
|
409
558
|
return [] if raw_hits.nil? || raw_hits.empty?
|
|
410
559
|
converted = Parse::MongoDB.convert_documents_to_parse(raw_hits, parse_class)
|
|
@@ -191,6 +191,28 @@ module Parse
|
|
|
191
191
|
self.class.formatted_value(@value)
|
|
192
192
|
end
|
|
193
193
|
|
|
194
|
+
# Supports the opt-in `{ value:, unicode: true }` form accepted by the
|
|
195
|
+
# regex-based constraints ({RegularExpressionConstraint},
|
|
196
|
+
# {StartsWithConstraint}, {ContainsConstraint}, {EndsWithConstraint}).
|
|
197
|
+
# When the `unicode` flag is set, the constraint adds the `u` flag to the
|
|
198
|
+
# compiled `$options`, asking the backend to treat the pattern and subject
|
|
199
|
+
# as UTF-8 for correct multibyte (e.g. accented or CJK) case-insensitive
|
|
200
|
+
# matching.
|
|
201
|
+
#
|
|
202
|
+
# The `u` flag is only honored by Parse Server 8.3.0+ over REST (older
|
|
203
|
+
# servers reject it) and by MongoDB 6.1+ on the mongo-direct path; it is
|
|
204
|
+
# therefore strictly opt-in and never emitted for the bare-value form.
|
|
205
|
+
#
|
|
206
|
+
# @param raw [Object] the raw constraint value (`@value`).
|
|
207
|
+
# @return [Array(Object, Boolean)] the unwrapped value and the unicode flag.
|
|
208
|
+
# @api private
|
|
209
|
+
def regex_unicode_option(raw)
|
|
210
|
+
return [raw, false] unless raw.is_a?(Hash)
|
|
211
|
+
|
|
212
|
+
opts = raw.symbolize_keys
|
|
213
|
+
[opts[:value], opts[:unicode] ? true : false]
|
|
214
|
+
end
|
|
215
|
+
|
|
194
216
|
# Registers the default constraint of equality
|
|
195
217
|
register :eq, Constraint
|
|
196
218
|
precedence 100
|