parse-stack-next 5.4.1 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,6 +55,22 @@ module Parse
55
55
  # field and the caller didn't pass an explicit `index:` kwarg.
56
56
  class IndexNotResolved < ArgumentError; end
57
57
 
58
+ # Raised (under `Parse::VectorSearch.index_drift_policy = :raise`)
59
+ # when first-query verification finds the deployed vectorSearch
60
+ # index disagreeing with the model declaration — wrong
61
+ # `numDimensions`, wrong `similarity`, or a registered
62
+ # tenant-scope field missing from the index's `filter` paths.
63
+ # Under the default `:warn` policy the same findings emit a
64
+ # single `[Parse::VectorSearch:DRIFT]` warning instead.
65
+ class IndexDriftError < StandardError
66
+ # @return [Array<String>] human-readable drift findings.
67
+ attr_reader :findings
68
+ def initialize(message, findings: [])
69
+ @findings = findings
70
+ super(message)
71
+ end
72
+ end
73
+
58
74
  # Raised by the `find_similar(text:)` overload when the resolved
59
75
  # `:vector` property has no `provider:` (and therefore no way to
60
76
  # turn `text:` into a query vector). Distinct from
@@ -367,13 +383,23 @@ module Parse
367
383
  "on the property, or pass an explicit `vector:`."
368
384
  end
369
385
  provider = Parse::Embeddings.provider(provider_name)
370
- vectors = provider.embed_text([text], input_type: :search_query)
371
- unless vectors.is_a?(Array) && vectors.length == 1 && vectors.first.is_a?(Array)
372
- raise Parse::Embeddings::InvalidResponseError,
373
- "#{self}.find_similar: provider #{provider_name.inspect} did not return " \
374
- "a single vector for `text:` (got #{vectors.inspect[0, 80]})."
375
- end
376
- vectors.first
386
+ # Spend cap: every query-embed path (find_similar(text:),
387
+ # hybrid_search(text:), Retrieval.retrieve) funnels through this
388
+ # method, so charging here closes the "direct callers bypass the
389
+ # cap" gap. No-op when no limit is configured, or when an
390
+ # upstream caller (the semantic_search agent tool) has already
391
+ # charged with per-tenant identity (SpendCap.with_precharged).
392
+ #
393
+ # Deliberate: the charge runs BEFORE the cache lookup, so cache
394
+ # hits bill at full price. The cap bounds query *volume* (an
395
+ # abuse/probing control), not just provider spend — a caller
396
+ # replaying one cached query must not get unlimited throughput.
397
+ Parse::Embeddings::SpendCap.charge_query!(text)
398
+ # Query-embed cache: repeated identical queries skip the
399
+ # provider round-trip when Parse::Embeddings::Cache.enable! has
400
+ # been called; pass-through (with the provider's own response
401
+ # validation preserved) when disabled.
402
+ Parse::Embeddings::Cache.fetch_vector(provider, text, input_type: :search_query)
377
403
  end
378
404
 
379
405
  def coerce_query_vector(vector)
@@ -387,7 +413,10 @@ module Parse
387
413
  end
388
414
 
389
415
  def resolve_vector_index!(field, explicit_index)
390
- return explicit_index if explicit_index && !explicit_index.to_s.empty?
416
+ if explicit_index && !explicit_index.to_s.empty?
417
+ verify_explicit_vector_index(field, explicit_index.to_s)
418
+ return explicit_index
419
+ end
391
420
  begin
392
421
  require_relative "../../atlas_search"
393
422
  rescue LoadError
@@ -402,9 +431,129 @@ module Parse
402
431
  "#{parse_class}.#{field}; pass index: explicitly or create one " \
403
432
  "via Parse::AtlasSearch::IndexCatalog.create_index."
404
433
  end
434
+ verify_vector_index!(field, idx)
405
435
  (idx["name"] || idx[:name]).to_s
406
436
  end
407
437
 
438
+ # Best-effort drift verification for an explicitly named `index:`.
439
+ # The auto-discovery path verifies the index it resolves; an
440
+ # explicit kwarg would otherwise skip verification entirely. Look
441
+ # the field's covering index up in the catalog and verify it when
442
+ # its name matches the explicit one. Lookup failures (catalog
443
+ # unavailable, index not discoverable, name targeting a different
444
+ # index) skip verification rather than failing the query — the
445
+ # explicit kwarg is an override, not a discovery request.
446
+ def verify_explicit_vector_index(field, index_name)
447
+ return if Parse::VectorSearch.index_drift_policy == :ignore
448
+ begin
449
+ require_relative "../../atlas_search"
450
+ idx = Parse::AtlasSearch::IndexCatalog.find_vector_index(parse_class, field: field)
451
+ rescue StandardError, LoadError
452
+ return
453
+ end
454
+ return if idx.nil?
455
+ return unless (idx["name"] || idx[:name]).to_s == index_name
456
+ verify_vector_index!(field, idx)
457
+ end
458
+
459
+ # First-query drift verification: compare the deployed index's
460
+ # `latestDefinition` against the model declaration. The drift
461
+ # findings are computed once per (field, index name) per class per
462
+ # process and cached; the policy check runs on EVERY query, so
463
+ # under `:raise` a drifted index keeps failing instead of failing
464
+ # once and then silently serving results. Under `:warn` the
465
+ # warning is emitted only on the first check to avoid log spam.
466
+ # Honors {Parse::VectorSearch.index_drift_policy} (`:warn` default
467
+ # / `:raise` / `:ignore`).
468
+ #
469
+ # Checks:
470
+ # 1. `numDimensions` on the covering `type: "vector"` entry vs the
471
+ # property's declared `dimensions:`.
472
+ # 2. `similarity` vs the property's declared `similarity:` (only
473
+ # when both sides declare one).
474
+ # 3. When the class registers an `agent_tenant_scope`, the scope
475
+ # field must appear among the index's `type: "filter"` paths —
476
+ # otherwise the tenant pre-filter that
477
+ # {Parse::Retrieval.retrieve} folds into `$vectorSearch.filter`
478
+ # fails Atlas-side at query time.
479
+ def verify_vector_index!(field, idx)
480
+ return if Parse::VectorSearch.index_drift_policy == :ignore
481
+ index_name = (idx["name"] || idx[:name]).to_s
482
+ @_verified_vector_indexes ||= {}
483
+ cache_key = "#{field}|#{index_name}"
484
+ findings = @_verified_vector_indexes[cache_key]
485
+ first_check = findings.nil?
486
+ if first_check
487
+ findings = vector_index_drift_findings(field, idx).freeze
488
+ @_verified_vector_indexes[cache_key] = findings
489
+ end
490
+ return if findings.empty?
491
+
492
+ message = "#{self} vectorSearch index #{index_name.inspect} drifts from the " \
493
+ "model declaration for :#{field}: #{findings.join("; ")}"
494
+ if Parse::VectorSearch.index_drift_policy == :raise
495
+ # Raise on every query, not just the first: strict mode means a
496
+ # drifted index must never serve results.
497
+ raise IndexDriftError.new(message, findings: findings)
498
+ end
499
+ warn "[Parse::VectorSearch:DRIFT] #{message}" if first_check
500
+ end
501
+
502
+ # @!visibility private
503
+ # @return [Array<String>] drift findings (empty when in sync).
504
+ def vector_index_drift_findings(field, idx)
505
+ defn = idx["latestDefinition"] || idx[:latestDefinition] || {}
506
+ entries = defn["fields"] || defn[:fields] || []
507
+ field_str = field.to_s
508
+ vector_entry = entries.find do |f|
509
+ (f["type"] || f[:type]).to_s == "vector" && (f["path"] || f[:path]).to_s == field_str
510
+ end
511
+ findings = []
512
+ return findings if vector_entry.nil? # find_vector_index matched on it; defensive
513
+
514
+ declared_dims = vector_properties.dig(field.to_sym, :dimensions)
515
+ index_dims = vector_entry["numDimensions"] || vector_entry[:numDimensions]
516
+ if declared_dims && index_dims && Integer(index_dims) != Integer(declared_dims)
517
+ findings << "index numDimensions=#{index_dims} but property declares " \
518
+ "dimensions: #{declared_dims} (every query will mismatch — " \
519
+ "rebuild the index or run #{self}.reembed! after fixing the declaration)"
520
+ end
521
+
522
+ declared_sim = vector_properties.dig(field.to_sym, :similarity)
523
+ index_sim = vector_entry["similarity"] || vector_entry[:similarity]
524
+ if declared_sim && index_sim && index_sim.to_s != declared_sim.to_s
525
+ findings << "index similarity=#{index_sim.inspect} but property declares " \
526
+ "similarity: #{declared_sim.inspect}"
527
+ end
528
+
529
+ scope_field = registered_tenant_scope_field
530
+ if scope_field
531
+ filter_paths = entries.select { |f| (f["type"] || f[:type]).to_s == "filter" }
532
+ .map { |f| (f["path"] || f[:path]).to_s }
533
+ unless filter_paths.include?(scope_field)
534
+ findings << "agent_tenant_scope field #{scope_field.inspect} is not declared " \
535
+ "as a type: \"filter\" path in the index — tenant-scoped " \
536
+ "$vectorSearch.filter will fail Atlas-side"
537
+ end
538
+ end
539
+ findings
540
+ end
541
+
542
+ # @!visibility private
543
+ # Wire/storage name of the class's registered tenant-scope field,
544
+ # or nil. Mirrors the resolution Parse::Retrieval#wire_name uses
545
+ # when folding the scope into $vectorSearch.filter.
546
+ def registered_tenant_scope_field
547
+ return nil unless defined?(Parse::Agent::MetadataRegistry)
548
+ rule = Parse::Agent::MetadataRegistry.tenant_scope_rule(parse_class)
549
+ return nil unless rule
550
+ sym = rule[:field].to_sym
551
+ fmap = respond_to?(:field_map) ? field_map : {}
552
+ (fmap[sym] || sym.to_s.columnize).to_s
553
+ rescue StandardError
554
+ nil
555
+ end
556
+
408
557
  def build_vector_hits(raw_hits)
409
558
  return [] if raw_hits.nil? || raw_hits.empty?
410
559
  converted = Parse::MongoDB.convert_documents_to_parse(raw_hits, parse_class)
@@ -191,6 +191,28 @@ module Parse
191
191
  self.class.formatted_value(@value)
192
192
  end
193
193
 
194
+ # Supports the opt-in `{ value:, unicode: true }` form accepted by the
195
+ # regex-based constraints ({RegularExpressionConstraint},
196
+ # {StartsWithConstraint}, {ContainsConstraint}, {EndsWithConstraint}).
197
+ # When the `unicode` flag is set, the constraint adds the `u` flag to the
198
+ # compiled `$options`, asking the backend to treat the pattern and subject
199
+ # as UTF-8 for correct multibyte (e.g. accented or CJK) case-insensitive
200
+ # matching.
201
+ #
202
+ # The `u` flag is only honored by Parse Server 8.3.0+ over REST (older
203
+ # servers reject it) and by MongoDB 6.1+ on the mongo-direct path; it is
204
+ # therefore strictly opt-in and never emitted for the bare-value form.
205
+ #
206
+ # @param raw [Object] the raw constraint value (`@value`).
207
+ # @return [Array(Object, Boolean)] the unwrapped value and the unicode flag.
208
+ # @api private
209
+ def regex_unicode_option(raw)
210
+ return [raw, false] unless raw.is_a?(Hash)
211
+
212
+ opts = raw.symbolize_keys
213
+ [opts[:value], opts[:unicode] ? true : false]
214
+ end
215
+
194
216
  # Registers the default constraint of equality
195
217
  register :eq, Constraint
196
218
  precedence 100