parse-stack-next 5.4.1 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +344 -0
- data/Gemfile.lock +1 -1
- data/README.md +45 -6
- data/docs/atlas_vector_search_guide.md +314 -19
- data/lib/parse/api/users.rb +10 -0
- data/lib/parse/client.rb +19 -1
- data/lib/parse/embeddings/batch_embedder.rb +188 -0
- data/lib/parse/embeddings/cache.rb +322 -0
- data/lib/parse/embeddings/cohere.rb +31 -18
- data/lib/parse/embeddings/image_fetch.rb +347 -0
- data/lib/parse/embeddings/provider.rb +17 -11
- data/lib/parse/embeddings/spend_cap.rb +117 -3
- data/lib/parse/embeddings/voyage.rb +34 -25
- data/lib/parse/embeddings.rb +40 -3
- data/lib/parse/model/acl.rb +15 -11
- data/lib/parse/model/core/embed_managed.rb +243 -14
- data/lib/parse/model/core/vector_searchable.rb +157 -8
- data/lib/parse/query/constraint.rb +22 -0
- data/lib/parse/query/constraints.rb +271 -250
- data/lib/parse/query.rb +233 -42
- data/lib/parse/retrieval/agent_tool.rb +21 -14
- data/lib/parse/retrieval/retriever.rb +84 -0
- data/lib/parse/schema/search_index_migrator.rb +48 -1
- data/lib/parse/stack/version.rb +1 -1
- data/lib/parse/vector_search/hybrid.rb +39 -1
- data/lib/parse/vector_search.rb +34 -0
- data/lib/parse/webhooks/payload.rb +7 -1
- data/lib/parse/webhooks.rb +107 -21
- metadata +4 -1
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "digest"
|
|
5
|
+
require "monitor"
|
|
6
|
+
|
|
7
|
+
module Parse
|
|
8
|
+
module Embeddings
|
|
9
|
+
# Process-local embedding cache keyed by
|
|
10
|
+
# `(provider, model, input_type, input_hash)`.
|
|
11
|
+
#
|
|
12
|
+
# Query-side embedding is the hot repeat path: the same natural-
|
|
13
|
+
# language query (an agent retrying a tool call, a user paging
|
|
14
|
+
# through results, a dashboard refreshing) re-embeds identical text
|
|
15
|
+
# on every call, paying provider latency and per-token cost each
|
|
16
|
+
# time. The cache short-circuits those repeats. Write-side managed
|
|
17
|
+
# embeds (`embed` / `embed_image` save callbacks) already have their
|
|
18
|
+
# own digest-tracked elision and do not use this cache.
|
|
19
|
+
#
|
|
20
|
+
# == Disabled by default
|
|
21
|
+
#
|
|
22
|
+
# With the cache disabled {.fetch_vector} is a pass-through. Opt in:
|
|
23
|
+
#
|
|
24
|
+
# Parse::Embeddings::Cache.enable!(max_entries: 2048, ttl: 600)
|
|
25
|
+
#
|
|
26
|
+
# The default store is an in-process LRU with per-entry TTL. A
|
|
27
|
+
# custom store (e.g. Redis-backed) can be supplied via
|
|
28
|
+
# `enable!(store: my_store)` — it must respond to `get(key)`
|
|
29
|
+
# (returning `Array<Float>` or nil) and `set(key, vector)`; TTL
|
|
30
|
+
# management is then the store's responsibility.
|
|
31
|
+
#
|
|
32
|
+
# == Key derivation
|
|
33
|
+
#
|
|
34
|
+
# `provider.class.name | model_name | input_type | SHA-256(input)`.
|
|
35
|
+
# The full input text never becomes part of the key, so a shared
|
|
36
|
+
# external store does not accumulate plaintext queries.
|
|
37
|
+
#
|
|
38
|
+
# == Observability
|
|
39
|
+
#
|
|
40
|
+
# A cache hit emits the same `parse.embeddings.embed` AS::N event a
|
|
41
|
+
# real provider call would, with `cached: true` — existing
|
|
42
|
+
# spend-tracking subscribers see hits and misses on one stream.
|
|
43
|
+
module Cache
|
|
44
|
+
# Internal LRU + TTL store. Access is synchronized by the module-
|
|
45
|
+
# level monitor in {Cache}; the store itself is not thread-safe.
|
|
46
|
+
# @!visibility private
|
|
47
|
+
class LRUStore
|
|
48
|
+
def initialize(max_entries:, ttl:)
|
|
49
|
+
@max_entries = max_entries
|
|
50
|
+
@ttl = ttl
|
|
51
|
+
@entries = {} # key => [vector, monotonic_expiry]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def get(key)
|
|
55
|
+
entry = @entries[key]
|
|
56
|
+
return nil if entry.nil?
|
|
57
|
+
if @ttl && entry[1] && entry[1] < Cache.monotonic
|
|
58
|
+
@entries.delete(key)
|
|
59
|
+
return nil
|
|
60
|
+
end
|
|
61
|
+
# Refresh recency (Hash preserves insertion order).
|
|
62
|
+
@entries.delete(key)
|
|
63
|
+
@entries[key] = entry
|
|
64
|
+
entry[0]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def set(key, vector)
|
|
68
|
+
@entries.delete(key)
|
|
69
|
+
expiry = @ttl ? Cache.monotonic + @ttl : nil
|
|
70
|
+
@entries[key] = [vector, expiry]
|
|
71
|
+
@entries.shift while @entries.length > @max_entries
|
|
72
|
+
vector
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def size
|
|
76
|
+
@entries.length
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def clear
|
|
80
|
+
@entries = {}
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Adapter exposing any Moneta-compatible key/value store (`[]` /
|
|
85
|
+
# `[]=`, optionally `store(key, value, expires:)`) through the
|
|
86
|
+
# `get`/`set` duck {Cache.enable!} expects — the persistent-L2
|
|
87
|
+
# option. Point it at the same Redis your `Parse.cache` uses and
|
|
88
|
+
# query-embed cache entries survive process restarts and are
|
|
89
|
+
# shared across processes:
|
|
90
|
+
#
|
|
91
|
+
# require "moneta"
|
|
92
|
+
# moneta = Moneta.new(:Redis, url: ENV["REDIS_URL"])
|
|
93
|
+
# Parse::Embeddings::Cache.enable!(
|
|
94
|
+
# store: Parse::Embeddings::Cache::MonetaStore.new(moneta, ttl: 30 * 24 * 3600),
|
|
95
|
+
# )
|
|
96
|
+
#
|
|
97
|
+
# Keys are namespaced (`emb:` by default) so the entries are
|
|
98
|
+
# recognizable next to other application keys; values are the
|
|
99
|
+
# raw vector Arrays (Moneta's own serializer handles encoding).
|
|
100
|
+
# TTL is forwarded via Moneta's `expires:` option when the
|
|
101
|
+
# backend supports it, ignored otherwise.
|
|
102
|
+
#
|
|
103
|
+
# Fail-open by design: a backend error (Redis down, serialization
|
|
104
|
+
# hiccup) degrades to a cache miss / dropped write — the embed
|
|
105
|
+
# path must never fail because the CACHE is unhealthy.
|
|
106
|
+
#
|
|
107
|
+
# The cross-process race the in-process LRU doesn't have applies
|
|
108
|
+
# here: two processes missing the same key concurrently both call
|
|
109
|
+
# the provider and both write. That is correct (embeddings are
|
|
110
|
+
# deterministic per key) and bounded — no locking is attempted.
|
|
111
|
+
class MonetaStore
|
|
112
|
+
# @param moneta [#[], #[]=] a Moneta store (or anything with the
|
|
113
|
+
# same indexing duck).
|
|
114
|
+
# @param ttl [Numeric, nil] per-entry lifetime in seconds,
|
|
115
|
+
# forwarded as `expires:` when the backend supports
|
|
116
|
+
# `store(key, value, expires:)`. nil = no expiry.
|
|
117
|
+
# @param namespace [String] key prefix.
|
|
118
|
+
def initialize(moneta, ttl: nil, namespace: "emb:")
|
|
119
|
+
unless moneta.respond_to?(:[]) && moneta.respond_to?(:[]=)
|
|
120
|
+
raise ArgumentError,
|
|
121
|
+
"Parse::Embeddings::Cache::MonetaStore expects a Moneta-compatible " \
|
|
122
|
+
"store responding to #[] and #[]= (got #{moneta.class})."
|
|
123
|
+
end
|
|
124
|
+
@moneta = moneta
|
|
125
|
+
@ttl = ttl && Float(ttl)
|
|
126
|
+
@namespace = namespace.to_s
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# @return [Array<Float>, nil]
|
|
130
|
+
def get(key)
|
|
131
|
+
value = @moneta[@namespace + key]
|
|
132
|
+
value.is_a?(Array) ? value : nil
|
|
133
|
+
rescue StandardError
|
|
134
|
+
nil
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# @return [Array<Float>] the vector, unchanged.
|
|
138
|
+
def set(key, vector)
|
|
139
|
+
k = @namespace + key
|
|
140
|
+
if @ttl && @moneta.respond_to?(:store)
|
|
141
|
+
begin
|
|
142
|
+
@moneta.store(k, vector, expires: @ttl)
|
|
143
|
+
rescue ArgumentError
|
|
144
|
+
# Hash-like backends define #store(key, value) with no
|
|
145
|
+
# options arg, so the expires: form raises ArgumentError.
|
|
146
|
+
# Fall back to a plain write (no expiry) rather than letting
|
|
147
|
+
# the fail-open rescue below silently drop every vector.
|
|
148
|
+
@moneta[k] = vector
|
|
149
|
+
end
|
|
150
|
+
else
|
|
151
|
+
@moneta[k] = vector
|
|
152
|
+
end
|
|
153
|
+
vector
|
|
154
|
+
rescue StandardError
|
|
155
|
+
vector
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
MONITOR = Monitor.new
|
|
160
|
+
private_constant :MONITOR
|
|
161
|
+
|
|
162
|
+
class << self
|
|
163
|
+
# Enable the cache.
|
|
164
|
+
#
|
|
165
|
+
# @param max_entries [Integer] LRU capacity (default store only).
|
|
166
|
+
# @param ttl [Numeric, nil] per-entry lifetime in seconds; nil
|
|
167
|
+
# disables expiry (default store only). Default 600.
|
|
168
|
+
# @param store [#get, #set, nil] custom backing store; overrides
|
|
169
|
+
# the built-in LRU when given.
|
|
170
|
+
# @return [void]
|
|
171
|
+
def enable!(max_entries: 2048, ttl: 600, store: nil)
|
|
172
|
+
if store && !(store.respond_to?(:get) && store.respond_to?(:set))
|
|
173
|
+
raise ArgumentError,
|
|
174
|
+
"Parse::Embeddings::Cache.enable!: store must respond to #get and #set."
|
|
175
|
+
end
|
|
176
|
+
me = Integer(max_entries)
|
|
177
|
+
raise ArgumentError, "max_entries must be positive" if me <= 0
|
|
178
|
+
MONITOR.synchronize do
|
|
179
|
+
@store = store || LRUStore.new(max_entries: me, ttl: ttl && Float(ttl))
|
|
180
|
+
@enabled = true
|
|
181
|
+
@hits = 0
|
|
182
|
+
@misses = 0
|
|
183
|
+
end
|
|
184
|
+
nil
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Disable and drop the store.
|
|
188
|
+
# @return [void]
|
|
189
|
+
def disable!
|
|
190
|
+
MONITOR.synchronize do
|
|
191
|
+
@enabled = false
|
|
192
|
+
@store = nil
|
|
193
|
+
end
|
|
194
|
+
nil
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# @return [Boolean]
|
|
198
|
+
def enabled?
|
|
199
|
+
MONITOR.synchronize { !!@enabled }
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Clear cached entries (default store) and reset hit/miss counters.
|
|
203
|
+
# @return [void]
|
|
204
|
+
def clear!
|
|
205
|
+
MONITOR.synchronize do
|
|
206
|
+
@store.clear if @store.respond_to?(:clear)
|
|
207
|
+
@hits = 0
|
|
208
|
+
@misses = 0
|
|
209
|
+
end
|
|
210
|
+
nil
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# @return [Hash] `{ enabled:, hits:, misses:, size: }`. `size` is
|
|
214
|
+
# nil for custom stores that don't expose one.
|
|
215
|
+
def stats
|
|
216
|
+
MONITOR.synchronize do
|
|
217
|
+
{
|
|
218
|
+
enabled: !!@enabled,
|
|
219
|
+
hits: @hits.to_i,
|
|
220
|
+
misses: @misses.to_i,
|
|
221
|
+
size: (@store.respond_to?(:size) ? @store.size : nil),
|
|
222
|
+
}
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Embed a single input through `provider`, serving repeats from
|
|
227
|
+
# the cache. Pass-through (no caching, no instrumentation
|
|
228
|
+
# changes) when the cache is disabled.
|
|
229
|
+
#
|
|
230
|
+
# @param provider [Provider] the embedding provider.
|
|
231
|
+
# @param input [String] the text to embed.
|
|
232
|
+
# @param input_type [Symbol] forwarded to `embed_text`.
|
|
233
|
+
# @return [Array<Float>] the embedding vector.
|
|
234
|
+
def fetch_vector(provider, input, input_type: :search_query)
|
|
235
|
+
unless enabled?
|
|
236
|
+
return embed_single!(provider, input, input_type)
|
|
237
|
+
end
|
|
238
|
+
key = key_for(provider, input, input_type)
|
|
239
|
+
cached = MONITOR.synchronize { @store && @store.get(key) }
|
|
240
|
+
if cached
|
|
241
|
+
MONITOR.synchronize { @hits = @hits.to_i + 1 }
|
|
242
|
+
instrument_hit(provider, input_type)
|
|
243
|
+
return cached
|
|
244
|
+
end
|
|
245
|
+
vector = embed_single!(provider, input, input_type)
|
|
246
|
+
MONITOR.synchronize do
|
|
247
|
+
@misses = @misses.to_i + 1
|
|
248
|
+
@store.set(key, vector) if @store
|
|
249
|
+
end
|
|
250
|
+
vector
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# @!visibility private
|
|
254
|
+
# Composite cache key. The input is hashed so plaintext never
|
|
255
|
+
# lands in a shared store; provider identity + model + dimensions
|
|
256
|
+
# + input_type namespace the hash (two models' vectors are never
|
|
257
|
+
# confused). Dimensions matter independently of the model name:
|
|
258
|
+
# Matryoshka-capable providers (OpenAI text-embedding-3-*, Cohere
|
|
259
|
+
# embed-v4, Voyage, Jina, Qwen) can register the same model at
|
|
260
|
+
# different output widths, and serving one width's cached vector
|
|
261
|
+
# to the other poisons the narrower/wider field.
|
|
262
|
+
def key_for(provider, input, input_type)
|
|
263
|
+
model = begin
|
|
264
|
+
provider.model_name
|
|
265
|
+
rescue NotImplementedError
|
|
266
|
+
"unknown"
|
|
267
|
+
end
|
|
268
|
+
dims = begin
|
|
269
|
+
provider.dimensions
|
|
270
|
+
rescue NotImplementedError
|
|
271
|
+
"unknown"
|
|
272
|
+
end
|
|
273
|
+
"#{provider.class.name}|#{model}|#{dims}|#{input_type}|#{Digest::SHA256.hexdigest(input.to_s)}"
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# @!visibility private
|
|
277
|
+
def monotonic
|
|
278
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
private
|
|
282
|
+
|
|
283
|
+
def embed_single!(provider, input, input_type)
|
|
284
|
+
vectors = provider.embed_text([input], input_type: input_type)
|
|
285
|
+
unless vectors.is_a?(Array) && vectors.length == 1 && vectors.first.is_a?(Array)
|
|
286
|
+
raise InvalidResponseError,
|
|
287
|
+
"Parse::Embeddings::Cache: provider #{provider.class} did not return a " \
|
|
288
|
+
"single vector (got #{vectors.inspect[0, 80]})."
|
|
289
|
+
end
|
|
290
|
+
vectors.first
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Emit the standard embed event so spend subscribers see cache
|
|
294
|
+
# hits on the same stream as real calls.
|
|
295
|
+
def instrument_hit(provider, input_type)
|
|
296
|
+
return unless defined?(ActiveSupport::Notifications)
|
|
297
|
+
model = begin
|
|
298
|
+
provider.model_name
|
|
299
|
+
rescue NotImplementedError
|
|
300
|
+
nil
|
|
301
|
+
end
|
|
302
|
+
dims = begin
|
|
303
|
+
provider.dimensions
|
|
304
|
+
rescue NotImplementedError
|
|
305
|
+
nil
|
|
306
|
+
end
|
|
307
|
+
payload = {
|
|
308
|
+
provider: provider.class.name,
|
|
309
|
+
model: model,
|
|
310
|
+
dimensions: dims,
|
|
311
|
+
input_count: 1,
|
|
312
|
+
input_type: input_type,
|
|
313
|
+
total_tokens: nil,
|
|
314
|
+
cached: true,
|
|
315
|
+
error: nil,
|
|
316
|
+
}
|
|
317
|
+
ActiveSupport::Notifications.instrument(Provider::AS_NOTIFICATION_NAME, payload) {}
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
end
|
|
@@ -260,14 +260,23 @@ module Parse
|
|
|
260
260
|
MULTIMODAL_MODELS.include?(@model) ? %i[text image] : [:text]
|
|
261
261
|
end
|
|
262
262
|
|
|
263
|
-
# Embed a batch of
|
|
264
|
-
# multimodal endpoint.
|
|
265
|
-
#
|
|
266
|
-
#
|
|
267
|
-
#
|
|
268
|
-
#
|
|
269
|
-
#
|
|
270
|
-
#
|
|
263
|
+
# Embed a batch of images through Cohere's `/v2/embed`
|
|
264
|
+
# multimodal endpoint. Two source forms:
|
|
265
|
+
#
|
|
266
|
+
# * **String URL** (v5.1 path) — the provider receives a public
|
|
267
|
+
# URL and issues its own fetch. The SDK does NOT download the
|
|
268
|
+
# image; it validates the URL through
|
|
269
|
+
# {Parse::Embeddings.validate_image_url!} (sentinel-gated
|
|
270
|
+
# egress opt-in, CIDR / port / host allowlist) and forwards
|
|
271
|
+
# the canonicalized URL string in the `{ type: "image_url",
|
|
272
|
+
# image_url: { url: ... } }` content row.
|
|
273
|
+
# * **{Parse::Embeddings::ImageFetch::FetchedImage}** (v5.5 bytes
|
|
274
|
+
# path) — bytes the SDK already downloaded through
|
|
275
|
+
# {Parse::File.safe_open_url}, magic-byte-verified, and
|
|
276
|
+
# EXIF-stripped. Forwarded as a base64 data URI in the same
|
|
277
|
+
# `image_url` content row (Cohere v2 accepts data URIs). No
|
|
278
|
+
# URL validation runs and the `trust_provider_url_fetch`
|
|
279
|
+
# sentinel is NOT required.
|
|
271
280
|
#
|
|
272
281
|
# **Multimodal model required.** Cohere's v3 models do not accept
|
|
273
282
|
# image inputs; calling `embed_image` on a v3-configured provider
|
|
@@ -321,24 +330,28 @@ module Parse
|
|
|
321
330
|
|
|
322
331
|
# Validate every URL up-front so a malformed entry in slot N
|
|
323
332
|
# does not slip through after slots 0..N-1 are already in the
|
|
324
|
-
# wire body.
|
|
325
|
-
#
|
|
326
|
-
|
|
327
|
-
|
|
333
|
+
# wire body. URL entries forward the validator's canonicalized
|
|
334
|
+
# URL — not the caller's raw input; fetched-bytes entries skip
|
|
335
|
+
# URL validation (already downloaded + verified by ImageFetch)
|
|
336
|
+
# and forward as a base64 data URI.
|
|
337
|
+
content_rows = sources.each_with_index.map do |src, i|
|
|
338
|
+
if src.is_a?(Parse::Embeddings::ImageFetch::FetchedImage)
|
|
339
|
+
{ content: [{ type: "image_url", image_url: { url: src.to_data_uri } }] }
|
|
340
|
+
elsif src.is_a?(String)
|
|
341
|
+
canonical = Parse::Embeddings.validate_image_url!(src, allow_insecure: allow_insecure)
|
|
342
|
+
{ content: [{ type: "image_url", image_url: { url: canonical } }] }
|
|
343
|
+
else
|
|
328
344
|
raise ArgumentError,
|
|
329
|
-
"Parse::Embeddings::Cohere#embed_image sources[#{i}]
|
|
330
|
-
"(#{
|
|
345
|
+
"Parse::Embeddings::Cohere#embed_image sources[#{i}] must be a URL String " \
|
|
346
|
+
"or Parse::Embeddings::ImageFetch::FetchedImage (got #{src.class})."
|
|
331
347
|
end
|
|
332
|
-
Parse::Embeddings.validate_image_url!(url, allow_insecure: allow_insecure)
|
|
333
348
|
end
|
|
334
349
|
|
|
335
350
|
body = {
|
|
336
351
|
model: @model,
|
|
337
352
|
input_type: wire_input_type,
|
|
338
353
|
embedding_types: ["float"],
|
|
339
|
-
inputs:
|
|
340
|
-
{ content: [{ type: "image_url", image_url: { url: u } }] }
|
|
341
|
-
},
|
|
354
|
+
inputs: content_rows,
|
|
342
355
|
}
|
|
343
356
|
|
|
344
357
|
instrument_embed(sources.length, input_type, modality: :image) do |emit_payload|
|