parse-stack-next 5.4.1 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ # encoding: UTF-8
2
+ # frozen_string_literal: true
3
+
4
+ require "digest"
5
+ require "monitor"
6
+
7
+ module Parse
8
+ module Embeddings
9
+ # Process-local embedding cache keyed by
10
+ # `(provider, model, input_type, input_hash)`.
11
+ #
12
+ # Query-side embedding is the hot repeat path: the same natural-
13
+ # language query (an agent retrying a tool call, a user paging
14
+ # through results, a dashboard refreshing) re-embeds identical text
15
+ # on every call, paying provider latency and per-token cost each
16
+ # time. The cache short-circuits those repeats. Write-side managed
17
+ # embeds (`embed` / `embed_image` save callbacks) already have their
18
+ # own digest-tracked elision and do not use this cache.
19
+ #
20
+ # == Disabled by default
21
+ #
22
+ # With the cache disabled {.fetch_vector} is a pass-through. Opt in:
23
+ #
24
+ # Parse::Embeddings::Cache.enable!(max_entries: 2048, ttl: 600)
25
+ #
26
+ # The default store is an in-process LRU with per-entry TTL. A
27
+ # custom store (e.g. Redis-backed) can be supplied via
28
+ # `enable!(store: my_store)` — it must respond to `get(key)`
29
+ # (returning `Array<Float>` or nil) and `set(key, vector)`; TTL
30
+ # management is then the store's responsibility.
31
+ #
32
+ # == Key derivation
33
+ #
34
+ # `provider.class.name | model_name | input_type | SHA-256(input)`.
35
+ # The full input text never becomes part of the key, so a shared
36
+ # external store does not accumulate plaintext queries.
37
+ #
38
+ # == Observability
39
+ #
40
+ # A cache hit emits the same `parse.embeddings.embed` AS::N event a
41
+ # real provider call would, with `cached: true` — existing
42
+ # spend-tracking subscribers see hits and misses on one stream.
43
+ module Cache
44
+ # Internal LRU + TTL store. Access is synchronized by the module-
45
+ # level monitor in {Cache}; the store itself is not thread-safe.
46
+ # @!visibility private
47
+ class LRUStore
48
+ def initialize(max_entries:, ttl:)
49
+ @max_entries = max_entries
50
+ @ttl = ttl
51
+ @entries = {} # key => [vector, monotonic_expiry]
52
+ end
53
+
54
+ def get(key)
55
+ entry = @entries[key]
56
+ return nil if entry.nil?
57
+ if @ttl && entry[1] && entry[1] < Cache.monotonic
58
+ @entries.delete(key)
59
+ return nil
60
+ end
61
+ # Refresh recency (Hash preserves insertion order).
62
+ @entries.delete(key)
63
+ @entries[key] = entry
64
+ entry[0]
65
+ end
66
+
67
+ def set(key, vector)
68
+ @entries.delete(key)
69
+ expiry = @ttl ? Cache.monotonic + @ttl : nil
70
+ @entries[key] = [vector, expiry]
71
+ @entries.shift while @entries.length > @max_entries
72
+ vector
73
+ end
74
+
75
+ def size
76
+ @entries.length
77
+ end
78
+
79
+ def clear
80
+ @entries = {}
81
+ end
82
+ end
83
+
84
+ # Adapter exposing any Moneta-compatible key/value store (`[]` /
85
+ # `[]=`, optionally `store(key, value, expires:)`) through the
86
+ # `get`/`set` duck {Cache.enable!} expects — the persistent-L2
87
+ # option. Point it at the same Redis your `Parse.cache` uses and
88
+ # query-embed cache entries survive process restarts and are
89
+ # shared across processes:
90
+ #
91
+ # require "moneta"
92
+ # moneta = Moneta.new(:Redis, url: ENV["REDIS_URL"])
93
+ # Parse::Embeddings::Cache.enable!(
94
+ # store: Parse::Embeddings::Cache::MonetaStore.new(moneta, ttl: 30 * 24 * 3600),
95
+ # )
96
+ #
97
+ # Keys are namespaced (`emb:` by default) so the entries are
98
+ # recognizable next to other application keys; values are the
99
+ # raw vector Arrays (Moneta's own serializer handles encoding).
100
+ # TTL is forwarded via Moneta's `expires:` option when the
101
+ # backend supports it, ignored otherwise.
102
+ #
103
+ # Fail-open by design: a backend error (Redis down, serialization
104
+ # hiccup) degrades to a cache miss / dropped write — the embed
105
+ # path must never fail because the CACHE is unhealthy.
106
+ #
107
+ # The cross-process race the in-process LRU doesn't have applies
108
+ # here: two processes missing the same key concurrently both call
109
+ # the provider and both write. That is correct (embeddings are
110
+ # deterministic per key) and bounded — no locking is attempted.
111
+ class MonetaStore
112
+ # @param moneta [#[], #[]=] a Moneta store (or anything with the
113
+ # same indexing duck).
114
+ # @param ttl [Numeric, nil] per-entry lifetime in seconds,
115
+ # forwarded as `expires:` when the backend supports
116
+ # `store(key, value, expires:)`. nil = no expiry.
117
+ # @param namespace [String] key prefix.
118
+ def initialize(moneta, ttl: nil, namespace: "emb:")
119
+ unless moneta.respond_to?(:[]) && moneta.respond_to?(:[]=)
120
+ raise ArgumentError,
121
+ "Parse::Embeddings::Cache::MonetaStore expects a Moneta-compatible " \
122
+ "store responding to #[] and #[]= (got #{moneta.class})."
123
+ end
124
+ @moneta = moneta
125
+ @ttl = ttl && Float(ttl)
126
+ @namespace = namespace.to_s
127
+ end
128
+
129
+ # @return [Array<Float>, nil]
130
+ def get(key)
131
+ value = @moneta[@namespace + key]
132
+ value.is_a?(Array) ? value : nil
133
+ rescue StandardError
134
+ nil
135
+ end
136
+
137
+ # @return [Array<Float>] the vector, unchanged.
138
+ def set(key, vector)
139
+ k = @namespace + key
140
+ if @ttl && @moneta.respond_to?(:store)
141
+ begin
142
+ @moneta.store(k, vector, expires: @ttl)
143
+ rescue ArgumentError
144
+ # Hash-like backends define #store(key, value) with no
145
+ # options arg, so the expires: form raises ArgumentError.
146
+ # Fall back to a plain write (no expiry) rather than letting
147
+ # the fail-open rescue below silently drop every vector.
148
+ @moneta[k] = vector
149
+ end
150
+ else
151
+ @moneta[k] = vector
152
+ end
153
+ vector
154
+ rescue StandardError
155
+ vector
156
+ end
157
+ end
158
+
159
+ MONITOR = Monitor.new
160
+ private_constant :MONITOR
161
+
162
+ class << self
163
+ # Enable the cache.
164
+ #
165
+ # @param max_entries [Integer] LRU capacity (default store only).
166
+ # @param ttl [Numeric, nil] per-entry lifetime in seconds; nil
167
+ # disables expiry (default store only). Default 600.
168
+ # @param store [#get, #set, nil] custom backing store; overrides
169
+ # the built-in LRU when given.
170
+ # @return [void]
171
+ def enable!(max_entries: 2048, ttl: 600, store: nil)
172
+ if store && !(store.respond_to?(:get) && store.respond_to?(:set))
173
+ raise ArgumentError,
174
+ "Parse::Embeddings::Cache.enable!: store must respond to #get and #set."
175
+ end
176
+ me = Integer(max_entries)
177
+ raise ArgumentError, "max_entries must be positive" if me <= 0
178
+ MONITOR.synchronize do
179
+ @store = store || LRUStore.new(max_entries: me, ttl: ttl && Float(ttl))
180
+ @enabled = true
181
+ @hits = 0
182
+ @misses = 0
183
+ end
184
+ nil
185
+ end
186
+
187
+ # Disable and drop the store.
188
+ # @return [void]
189
+ def disable!
190
+ MONITOR.synchronize do
191
+ @enabled = false
192
+ @store = nil
193
+ end
194
+ nil
195
+ end
196
+
197
+ # @return [Boolean]
198
+ def enabled?
199
+ MONITOR.synchronize { !!@enabled }
200
+ end
201
+
202
+ # Clear cached entries (default store) and reset hit/miss counters.
203
+ # @return [void]
204
+ def clear!
205
+ MONITOR.synchronize do
206
+ @store.clear if @store.respond_to?(:clear)
207
+ @hits = 0
208
+ @misses = 0
209
+ end
210
+ nil
211
+ end
212
+
213
+ # @return [Hash] `{ enabled:, hits:, misses:, size: }`. `size` is
214
+ # nil for custom stores that don't expose one.
215
+ def stats
216
+ MONITOR.synchronize do
217
+ {
218
+ enabled: !!@enabled,
219
+ hits: @hits.to_i,
220
+ misses: @misses.to_i,
221
+ size: (@store.respond_to?(:size) ? @store.size : nil),
222
+ }
223
+ end
224
+ end
225
+
226
+ # Embed a single input through `provider`, serving repeats from
227
+ # the cache. Pass-through (no caching, no instrumentation
228
+ # changes) when the cache is disabled.
229
+ #
230
+ # @param provider [Provider] the embedding provider.
231
+ # @param input [String] the text to embed.
232
+ # @param input_type [Symbol] forwarded to `embed_text`.
233
+ # @return [Array<Float>] the embedding vector.
234
+ def fetch_vector(provider, input, input_type: :search_query)
235
+ unless enabled?
236
+ return embed_single!(provider, input, input_type)
237
+ end
238
+ key = key_for(provider, input, input_type)
239
+ cached = MONITOR.synchronize { @store && @store.get(key) }
240
+ if cached
241
+ MONITOR.synchronize { @hits = @hits.to_i + 1 }
242
+ instrument_hit(provider, input_type)
243
+ return cached
244
+ end
245
+ vector = embed_single!(provider, input, input_type)
246
+ MONITOR.synchronize do
247
+ @misses = @misses.to_i + 1
248
+ @store.set(key, vector) if @store
249
+ end
250
+ vector
251
+ end
252
+
253
+ # @!visibility private
254
+ # Composite cache key. The input is hashed so plaintext never
255
+ # lands in a shared store; provider identity + model + dimensions
256
+ # + input_type namespace the hash (two models' vectors are never
257
+ # confused). Dimensions matter independently of the model name:
258
+ # Matryoshka-capable providers (OpenAI text-embedding-3-*, Cohere
259
+ # embed-v4, Voyage, Jina, Qwen) can register the same model at
260
+ # different output widths, and serving one width's cached vector
261
+ # to the other poisons the narrower/wider field.
262
+ def key_for(provider, input, input_type)
263
+ model = begin
264
+ provider.model_name
265
+ rescue NotImplementedError
266
+ "unknown"
267
+ end
268
+ dims = begin
269
+ provider.dimensions
270
+ rescue NotImplementedError
271
+ "unknown"
272
+ end
273
+ "#{provider.class.name}|#{model}|#{dims}|#{input_type}|#{Digest::SHA256.hexdigest(input.to_s)}"
274
+ end
275
+
276
+ # @!visibility private
277
+ def monotonic
278
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
279
+ end
280
+
281
+ private
282
+
283
+ def embed_single!(provider, input, input_type)
284
+ vectors = provider.embed_text([input], input_type: input_type)
285
+ unless vectors.is_a?(Array) && vectors.length == 1 && vectors.first.is_a?(Array)
286
+ raise InvalidResponseError,
287
+ "Parse::Embeddings::Cache: provider #{provider.class} did not return a " \
288
+ "single vector (got #{vectors.inspect[0, 80]})."
289
+ end
290
+ vectors.first
291
+ end
292
+
293
+ # Emit the standard embed event so spend subscribers see cache
294
+ # hits on the same stream as real calls.
295
+ def instrument_hit(provider, input_type)
296
+ return unless defined?(ActiveSupport::Notifications)
297
+ model = begin
298
+ provider.model_name
299
+ rescue NotImplementedError
300
+ nil
301
+ end
302
+ dims = begin
303
+ provider.dimensions
304
+ rescue NotImplementedError
305
+ nil
306
+ end
307
+ payload = {
308
+ provider: provider.class.name,
309
+ model: model,
310
+ dimensions: dims,
311
+ input_count: 1,
312
+ input_type: input_type,
313
+ total_tokens: nil,
314
+ cached: true,
315
+ error: nil,
316
+ }
317
+ ActiveSupport::Notifications.instrument(Provider::AS_NOTIFICATION_NAME, payload) {}
318
+ end
319
+ end
320
+ end
321
+ end
322
+ end
@@ -260,14 +260,23 @@ module Parse
260
260
  MULTIMODAL_MODELS.include?(@model) ? %i[text image] : [:text]
261
261
  end
262
262
 
263
- # Embed a batch of image URLs through Cohere's `/v2/embed`
264
- # multimodal endpoint. v5.1 ships URL-only — the provider
265
- # receives a public URL and issues its own fetch. The SDK does
266
- # NOT download the image; it validates the URL through
267
- # {Parse::Embeddings.validate_image_url!} (sentinel-gated egress
268
- # opt-in, CIDR / port / host allowlist) and forwards the
269
- # canonicalized URL string in the `{ type: "image_url",
270
- # image_url: { url: ... } }` content row.
263
+ # Embed a batch of images through Cohere's `/v2/embed`
264
+ # multimodal endpoint. Two source forms:
265
+ #
266
+ # * **String URL** (v5.1 path) the provider receives a public
267
+ # URL and issues its own fetch. The SDK does NOT download the
268
+ # image; it validates the URL through
269
+ # {Parse::Embeddings.validate_image_url!} (sentinel-gated
270
+ # egress opt-in, CIDR / port / host allowlist) and forwards
271
+ # the canonicalized URL string in the `{ type: "image_url",
272
+ # image_url: { url: ... } }` content row.
273
+ # * **{Parse::Embeddings::ImageFetch::FetchedImage}** (v5.5 bytes
274
+ # path) — bytes the SDK already downloaded through
275
+ # {Parse::File.safe_open_url}, magic-byte-verified, and
276
+ # EXIF-stripped. Forwarded as a base64 data URI in the same
277
+ # `image_url` content row (Cohere v2 accepts data URIs). No
278
+ # URL validation runs and the `trust_provider_url_fetch`
279
+ # sentinel is NOT required.
271
280
  #
272
281
  # **Multimodal model required.** Cohere's v3 models do not accept
273
282
  # image inputs; calling `embed_image` on a v3-configured provider
@@ -321,24 +330,28 @@ module Parse
321
330
 
322
331
  # Validate every URL up-front so a malformed entry in slot N
323
332
  # does not slip through after slots 0..N-1 are already in the
324
- # wire body. Forward the canonicalized URL the validator
325
- # returned — not the caller's raw input.
326
- canonical_urls = sources.each_with_index.map do |url, i|
327
- unless url.is_a?(String)
333
+ # wire body. URL entries forward the validator's canonicalized
334
+ # URL — not the caller's raw input; fetched-bytes entries skip
335
+ # URL validation (already downloaded + verified by ImageFetch)
336
+ # and forward as a base64 data URI.
337
+ content_rows = sources.each_with_index.map do |src, i|
338
+ if src.is_a?(Parse::Embeddings::ImageFetch::FetchedImage)
339
+ { content: [{ type: "image_url", image_url: { url: src.to_data_uri } }] }
340
+ elsif src.is_a?(String)
341
+ canonical = Parse::Embeddings.validate_image_url!(src, allow_insecure: allow_insecure)
342
+ { content: [{ type: "image_url", image_url: { url: canonical } }] }
343
+ else
328
344
  raise ArgumentError,
329
- "Parse::Embeddings::Cohere#embed_image sources[#{i}] is not a String " \
330
- "(#{url.class}). v5.1 ships URL-only — bytes/IO support is v5.3."
345
+ "Parse::Embeddings::Cohere#embed_image sources[#{i}] must be a URL String " \
346
+ "or Parse::Embeddings::ImageFetch::FetchedImage (got #{src.class})."
331
347
  end
332
- Parse::Embeddings.validate_image_url!(url, allow_insecure: allow_insecure)
333
348
  end
334
349
 
335
350
  body = {
336
351
  model: @model,
337
352
  input_type: wire_input_type,
338
353
  embedding_types: ["float"],
339
- inputs: canonical_urls.map { |u|
340
- { content: [{ type: "image_url", image_url: { url: u } }] }
341
- },
354
+ inputs: content_rows,
342
355
  }
343
356
 
344
357
  instrument_embed(sources.length, input_type, modality: :image) do |emit_payload|