ai-functions 2.1.3 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +90 -1
- package/README.md +38 -0
- package/dist/ai-promise.d.ts +3 -3
- package/dist/ai-promise.d.ts.map +1 -1
- package/dist/ai-promise.js +135 -64
- package/dist/ai-promise.js.map +1 -1
- package/dist/ai-schemas.d.ts +56 -0
- package/dist/ai-schemas.d.ts.map +1 -0
- package/dist/ai-schemas.js +53 -0
- package/dist/ai-schemas.js.map +1 -0
- package/dist/ai.d.ts +16 -242
- package/dist/ai.d.ts.map +1 -1
- package/dist/ai.js +51 -858
- package/dist/ai.js.map +1 -1
- package/dist/batch/anthropic.d.ts +6 -4
- package/dist/batch/anthropic.d.ts.map +1 -1
- package/dist/batch/anthropic.js +83 -145
- package/dist/batch/anthropic.js.map +1 -1
- package/dist/batch/bedrock.d.ts +8 -30
- package/dist/batch/bedrock.d.ts.map +1 -1
- package/dist/batch/bedrock.js +155 -338
- package/dist/batch/bedrock.js.map +1 -1
- package/dist/batch/cloudflare.d.ts +8 -20
- package/dist/batch/cloudflare.d.ts.map +1 -1
- package/dist/batch/cloudflare.js +68 -189
- package/dist/batch/cloudflare.js.map +1 -1
- package/dist/batch/google.d.ts +6 -20
- package/dist/batch/google.d.ts.map +1 -1
- package/dist/batch/google.js +70 -238
- package/dist/batch/google.js.map +1 -1
- package/dist/batch/index.d.ts +4 -1
- package/dist/batch/index.d.ts.map +1 -1
- package/dist/batch/index.js +4 -1
- package/dist/batch/index.js.map +1 -1
- package/dist/batch/memory.d.ts +1 -1
- package/dist/batch/memory.d.ts.map +1 -1
- package/dist/batch/memory.js +14 -10
- package/dist/batch/memory.js.map +1 -1
- package/dist/batch/openai.d.ts +11 -14
- package/dist/batch/openai.d.ts.map +1 -1
- package/dist/batch/openai.js +52 -156
- package/dist/batch/openai.js.map +1 -1
- package/dist/batch/provider.d.ts +111 -0
- package/dist/batch/provider.d.ts.map +1 -0
- package/dist/batch/provider.js +233 -0
- package/dist/batch/provider.js.map +1 -0
- package/dist/batch-map.d.ts.map +1 -1
- package/dist/batch-map.js +23 -17
- package/dist/batch-map.js.map +1 -1
- package/dist/batch-queue.d.ts +65 -0
- package/dist/batch-queue.d.ts.map +1 -1
- package/dist/batch-queue.js +169 -14
- package/dist/batch-queue.js.map +1 -1
- package/dist/budget.d.ts.map +1 -1
- package/dist/budget.js +27 -14
- package/dist/budget.js.map +1 -1
- package/dist/cache.d.ts +23 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +36 -15
- package/dist/cache.js.map +1 -1
- package/dist/context.d.ts +26 -8
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +64 -62
- package/dist/context.js.map +1 -1
- package/dist/digital-objects-registry.d.ts +229 -0
- package/dist/digital-objects-registry.d.ts.map +1 -0
- package/dist/digital-objects-registry.js +617 -0
- package/dist/digital-objects-registry.js.map +1 -0
- package/dist/embeddings.d.ts +2 -2
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/errors.d.ts +22 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +35 -0
- package/dist/errors.js.map +1 -0
- package/dist/eval/runner.d.ts +8 -0
- package/dist/eval/runner.d.ts.map +1 -1
- package/dist/eval/runner.js +41 -35
- package/dist/eval/runner.js.map +1 -1
- package/dist/eval-log/in-memory.d.ts +34 -0
- package/dist/eval-log/in-memory.d.ts.map +1 -0
- package/dist/eval-log/in-memory.js +84 -0
- package/dist/eval-log/in-memory.js.map +1 -0
- package/dist/eval-log/index.d.ts +29 -0
- package/dist/eval-log/index.d.ts.map +1 -0
- package/dist/eval-log/index.js +39 -0
- package/dist/eval-log/index.js.map +1 -0
- package/dist/eval-log/types.d.ts +101 -0
- package/dist/eval-log/types.d.ts.map +1 -0
- package/dist/eval-log/types.js +16 -0
- package/dist/eval-log/types.js.map +1 -0
- package/dist/function-registry.d.ts +176 -0
- package/dist/function-registry.d.ts.map +1 -0
- package/dist/function-registry.js +685 -0
- package/dist/function-registry.js.map +1 -0
- package/dist/generate.d.ts +9 -3
- package/dist/generate.d.ts.map +1 -1
- package/dist/generate.js +18 -18
- package/dist/generate.js.map +1 -1
- package/dist/index.d.ts +18 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +35 -18
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +118 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +187 -0
- package/dist/logger.js.map +1 -0
- package/dist/middleware/budget.d.ts +84 -0
- package/dist/middleware/budget.d.ts.map +1 -0
- package/dist/middleware/budget.js +110 -0
- package/dist/middleware/budget.js.map +1 -0
- package/dist/middleware/cache.d.ts +103 -0
- package/dist/middleware/cache.d.ts.map +1 -0
- package/dist/middleware/cache.js +228 -0
- package/dist/middleware/cache.js.map +1 -0
- package/dist/middleware/embed-cache.d.ts +99 -0
- package/dist/middleware/embed-cache.d.ts.map +1 -0
- package/dist/middleware/embed-cache.js +128 -0
- package/dist/middleware/embed-cache.js.map +1 -0
- package/dist/middleware/index.d.ts +11 -0
- package/dist/middleware/index.d.ts.map +1 -0
- package/dist/middleware/index.js +11 -0
- package/dist/middleware/index.js.map +1 -0
- package/dist/middleware/trace.d.ts +103 -0
- package/dist/middleware/trace.d.ts.map +1 -0
- package/dist/middleware/trace.js +176 -0
- package/dist/middleware/trace.js.map +1 -0
- package/dist/primitives.d.ts +120 -1
- package/dist/primitives.d.ts.map +1 -1
- package/dist/primitives.js +398 -26
- package/dist/primitives.js.map +1 -1
- package/dist/retry.d.ts +66 -1
- package/dist/retry.d.ts.map +1 -1
- package/dist/retry.js +115 -8
- package/dist/retry.js.map +1 -1
- package/dist/sandbox.d.ts +36 -0
- package/dist/sandbox.d.ts.map +1 -0
- package/dist/sandbox.js +44 -0
- package/dist/sandbox.js.map +1 -0
- package/dist/schema.js +2 -2
- package/dist/schema.js.map +1 -1
- package/dist/telemetry.d.ts +128 -0
- package/dist/telemetry.d.ts.map +1 -0
- package/dist/telemetry.js +285 -0
- package/dist/telemetry.js.map +1 -0
- package/dist/template.d.ts.map +1 -1
- package/dist/template.js +6 -1
- package/dist/template.js.map +1 -1
- package/dist/tool-orchestration.d.ts +66 -4
- package/dist/tool-orchestration.d.ts.map +1 -1
- package/dist/tool-orchestration.js +123 -23
- package/dist/tool-orchestration.js.map +1 -1
- package/dist/type-guards.d.ts +28 -0
- package/dist/type-guards.d.ts.map +1 -0
- package/dist/type-guards.js +29 -0
- package/dist/type-guards.js.map +1 -0
- package/dist/types.d.ts +155 -19
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +36 -1
- package/dist/types.js.map +1 -1
- package/dist/wrap-for-v3.d.ts +80 -0
- package/dist/wrap-for-v3.d.ts.map +1 -0
- package/dist/wrap-for-v3.js +89 -0
- package/dist/wrap-for-v3.js.map +1 -0
- package/examples/00-quickstart.ts +232 -0
- package/examples/01-rag-chatbot.ts +212 -0
- package/examples/02-multi-agent-research.ts +290 -0
- package/examples/03-email-classification.ts +379 -0
- package/examples/04-content-moderation.ts +400 -0
- package/examples/05-document-extraction.ts +455 -0
- package/examples/06-streaming-chat-nextjs.ts +437 -0
- package/examples/07-cloudflare-worker.ts +483 -0
- package/examples/08-batch-processing.ts +491 -0
- package/examples/09-budget-constrained.ts +527 -0
- package/examples/10-tool-orchestration.ts +565 -0
- package/examples/11-retry-resilience.ts +403 -0
- package/examples/12-caching-strategies.ts +422 -0
- package/examples/README.md +145 -0
- package/package.json +29 -25
- package/src/ai-promise.ts +226 -140
- package/src/ai-schemas.ts +122 -0
- package/src/ai.ts +71 -1176
- package/src/batch/anthropic.ts +96 -161
- package/src/batch/bedrock.ts +203 -454
- package/src/batch/cloudflare.ts +99 -282
- package/src/batch/google.ts +91 -297
- package/src/batch/index.ts +4 -1
- package/src/batch/memory.ts +15 -10
- package/src/batch/openai.ts +65 -193
- package/src/batch/provider.ts +336 -0
- package/src/batch-map.ts +29 -24
- package/src/batch-queue.ts +200 -11
- package/src/budget.ts +31 -18
- package/src/cache.ts +45 -17
- package/src/context.ts +106 -77
- package/src/digital-objects-registry.ts +750 -0
- package/src/errors.ts +37 -0
- package/src/eval/runner.ts +60 -36
- package/src/eval-log/in-memory.ts +90 -0
- package/src/eval-log/index.ts +46 -0
- package/src/eval-log/types.ts +110 -0
- package/src/function-registry.ts +874 -0
- package/src/generate.ts +33 -28
- package/src/index.ts +122 -21
- package/src/logger.ts +232 -0
- package/src/middleware/budget.ts +171 -0
- package/src/middleware/cache.ts +299 -0
- package/src/middleware/embed-cache.ts +195 -0
- package/src/middleware/index.ts +23 -0
- package/src/middleware/trace.ts +248 -0
- package/src/primitives.ts +589 -62
- package/src/retry.ts +144 -18
- package/src/sandbox.ts +52 -0
- package/src/schema.ts +8 -8
- package/src/telemetry.ts +403 -0
- package/src/template.ts +8 -4
- package/src/tool-orchestration.ts +213 -48
- package/src/type-guards.ts +31 -0
- package/src/types.ts +186 -27
- package/src/wrap-for-v3.ts +105 -0
- package/test/ai-promise.test.ts +1080 -0
- package/test/ai-proxy.test.ts +1 -1
- package/test/batch-autosubmit-errors.test.ts +49 -37
- package/test/batch-blog-posts.test.ts +87 -129
- package/test/core-functions.test.ts +183 -579
- package/test/decide.test.ts +154 -322
- package/test/define.test.ts +211 -8
- package/test/digital-objects-registry.test.ts +760 -0
- package/test/embedding-cache-middleware.test.ts +140 -0
- package/test/fill-template.test.ts +89 -0
- package/test/generate-core.test.ts +140 -229
- package/test/implicit-batch.test.ts +22 -65
- package/test/retry-policy-integration.test.ts +117 -0
- package/test/sandbox-execution.test.ts +155 -0
- package/test/schema.test.ts +55 -19
- package/test/template.test.ts +1164 -0
- package/test/tool-orchestration.test.ts +270 -0
- package/test/wrap-for-v3.test.ts +612 -0
- package/vitest.config.js +6 -0
- package/vitest.config.ts +20 -0
- package/LICENSE +0 -21
- package/dist/rpc/auth.d.ts +0 -69
- package/dist/rpc/auth.d.ts.map +0 -1
- package/dist/rpc/auth.js +0 -136
- package/dist/rpc/auth.js.map +0 -1
- package/dist/rpc/client.d.ts +0 -62
- package/dist/rpc/client.d.ts.map +0 -1
- package/dist/rpc/client.js +0 -103
- package/dist/rpc/client.js.map +0 -1
- package/dist/rpc/deferred.d.ts +0 -60
- package/dist/rpc/deferred.d.ts.map +0 -1
- package/dist/rpc/deferred.js +0 -96
- package/dist/rpc/deferred.js.map +0 -1
- package/dist/rpc/index.d.ts +0 -22
- package/dist/rpc/index.d.ts.map +0 -1
- package/dist/rpc/index.js +0 -38
- package/dist/rpc/index.js.map +0 -1
- package/dist/rpc/local.d.ts +0 -42
- package/dist/rpc/local.d.ts.map +0 -1
- package/dist/rpc/local.js +0 -50
- package/dist/rpc/local.js.map +0 -1
- package/dist/rpc/server.d.ts +0 -165
- package/dist/rpc/server.d.ts.map +0 -1
- package/dist/rpc/server.js +0 -405
- package/dist/rpc/server.js.map +0 -1
- package/dist/rpc/session.d.ts +0 -32
- package/dist/rpc/session.d.ts.map +0 -1
- package/dist/rpc/session.js +0 -43
- package/dist/rpc/session.js.map +0 -1
- package/dist/rpc/transport.d.ts +0 -306
- package/dist/rpc/transport.d.ts.map +0 -1
- package/dist/rpc/transport.js +0 -731
- package/dist/rpc/transport.js.map +0 -1
- package/src/batch/anthropic.js +0 -256
- package/src/batch/bedrock.js +0 -584
- package/src/batch/cloudflare.js +0 -287
- package/src/batch/google.js +0 -359
- package/src/batch/index.js +0 -30
- package/src/batch/memory.js +0 -187
- package/src/batch/openai.js +0 -402
- package/src/eval/index.js +0 -7
- package/src/eval/models.js +0 -119
- package/src/eval/runner.js +0 -147
- package/test/schema.test.js +0 -96
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cacheMiddleware — content-addressable cache for `wrapLanguageModel`
|
|
3
|
+
*
|
|
4
|
+
* Implements the AI SDK cookbook's local-caching-middleware pattern
|
|
5
|
+
* (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the
|
|
6
|
+
* AI SDK 6 `LanguageModelV3Middleware` shape:
|
|
7
|
+
*
|
|
8
|
+
* - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }`
|
|
9
|
+
* so a schema change (responseFormat.type === 'json' carries a `schema`
|
|
10
|
+
* JSONSchema7) invalidates the entry. Generation parameters (temperature,
|
|
11
|
+
* topP, etc.) are deliberately *not* part of the key for the eval-fixture
|
|
12
|
+
* use case — flipping temperature shouldn't blow up a 5x verify-time win.
|
|
13
|
+
* Callers who want strict keying should pass a custom `keyHash`.
|
|
14
|
+
*
|
|
15
|
+
* - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]`
|
|
16
|
+
* array; `wrapStream` replays them via `simulateReadableStream` so consumers
|
|
17
|
+
* see the same chunked event sequence on a hit. (`wrapGenerate` is the
|
|
18
|
+
* common path; both share the same cache map.)
|
|
19
|
+
*
|
|
20
|
+
* - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are
|
|
21
|
+
* evicted on access (lazy expiry — no background timer).
|
|
22
|
+
*
|
|
23
|
+
* - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to
|
|
24
|
+
* a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture
|
|
25
|
+
* sharing. Disk reads/writes are best-effort — IO failures fall through
|
|
26
|
+
* to the wrapped model.
|
|
27
|
+
*
|
|
28
|
+
* - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the
|
|
29
|
+
* middleware short-circuits to a passthrough — useful for production where
|
|
30
|
+
* cache hits would be incorrect but the operator wants the same wrap chain.
|
|
31
|
+
* Set to `'1'` (or any truthy non-empty string) to enable.
|
|
32
|
+
*
|
|
33
|
+
* @packageDocumentation
|
|
34
|
+
*/
|
|
35
|
+
import { simulateReadableStream } from 'ai';
|
|
36
|
+
import { hashKey } from '../cache.js';
|
|
37
|
+
// ============================================================================
|
|
38
|
+
// Stores
|
|
39
|
+
// ============================================================================
|
|
40
|
+
class MemoryStore {
|
|
41
|
+
map = new Map();
|
|
42
|
+
get(key) {
|
|
43
|
+
return this.map.get(key);
|
|
44
|
+
}
|
|
45
|
+
set(key, value) {
|
|
46
|
+
this.map.set(key, value);
|
|
47
|
+
}
|
|
48
|
+
delete(key) {
|
|
49
|
+
this.map.delete(key);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Disk-backed store. Best-effort — JSON parse / write errors fall through
|
|
54
|
+
* silently so a corrupt cache file never blocks an LLM call. The whole map
|
|
55
|
+
* is rewritten on each `set` (cheap for the eval-fixture use case which is
|
|
56
|
+
* dominated by reads).
|
|
57
|
+
*/
|
|
58
|
+
class DiskStore {
|
|
59
|
+
path;
|
|
60
|
+
cache = null;
|
|
61
|
+
constructor(path) {
|
|
62
|
+
this.path = path;
|
|
63
|
+
}
|
|
64
|
+
load() {
|
|
65
|
+
if (this.cache !== null)
|
|
66
|
+
return this.cache;
|
|
67
|
+
this.cache = new Map();
|
|
68
|
+
try {
|
|
69
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
70
|
+
const fs = require('fs');
|
|
71
|
+
if (fs.existsSync(this.path)) {
|
|
72
|
+
const raw = fs.readFileSync(this.path, 'utf-8');
|
|
73
|
+
const parsed = JSON.parse(raw);
|
|
74
|
+
for (const [k, v] of Object.entries(parsed)) {
|
|
75
|
+
this.cache.set(k, v);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
// best-effort
|
|
81
|
+
}
|
|
82
|
+
return this.cache;
|
|
83
|
+
}
|
|
84
|
+
flush() {
|
|
85
|
+
if (this.cache === null)
|
|
86
|
+
return;
|
|
87
|
+
try {
|
|
88
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
89
|
+
const fs = require('fs');
|
|
90
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
91
|
+
const path = require('path');
|
|
92
|
+
const dir = path.dirname(this.path);
|
|
93
|
+
if (!fs.existsSync(dir)) {
|
|
94
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
95
|
+
}
|
|
96
|
+
const obj = Object.fromEntries(this.cache);
|
|
97
|
+
fs.writeFileSync(this.path, JSON.stringify(obj), 'utf-8');
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
// best-effort
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
get(key) {
|
|
104
|
+
return this.load().get(key);
|
|
105
|
+
}
|
|
106
|
+
set(key, value) {
|
|
107
|
+
this.load().set(key, value);
|
|
108
|
+
this.flush();
|
|
109
|
+
}
|
|
110
|
+
delete(key) {
|
|
111
|
+
this.load().delete(key);
|
|
112
|
+
this.flush();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// ============================================================================
|
|
116
|
+
// Helpers
|
|
117
|
+
// ============================================================================
|
|
118
|
+
const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000;
|
|
119
|
+
function defaultKeyHash(params, modelId) {
|
|
120
|
+
// Stable hash of prompt + model + responseFormat (which carries the
|
|
121
|
+
// schema for object generation). Generation knobs are deliberately
|
|
122
|
+
// excluded so the eval-fixture cache survives temperature tweaks.
|
|
123
|
+
return hashKey({
|
|
124
|
+
prompt: params.prompt,
|
|
125
|
+
modelId,
|
|
126
|
+
responseFormat: params.responseFormat,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
function envGateEnabled() {
|
|
130
|
+
const v = process.env['V3_EVAL_CACHE'];
|
|
131
|
+
return typeof v === 'string' && v.length > 0;
|
|
132
|
+
}
|
|
133
|
+
function isExpired(entry, ttlMs) {
|
|
134
|
+
return Date.now() - entry.createdAt > ttlMs;
|
|
135
|
+
}
|
|
136
|
+
// ============================================================================
|
|
137
|
+
// Middleware
|
|
138
|
+
// ============================================================================
|
|
139
|
+
/**
|
|
140
|
+
* Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and
|
|
141
|
+
* `doStream`; on a hit replays the cached payload, on a miss invokes the
|
|
142
|
+
* downstream model and stores the result.
|
|
143
|
+
*
|
|
144
|
+
* Composition note: install **before** budget/trace so cache hits don't
|
|
145
|
+
* pay the downstream model cost (the trace/budget middleware still see the
|
|
146
|
+
* payload via the wrapped result they observe in their own `wrapGenerate`).
|
|
147
|
+
*
|
|
148
|
+
* @example
|
|
149
|
+
* ```ts
|
|
150
|
+
* import { wrapLanguageModel } from 'ai'
|
|
151
|
+
* import { cacheMiddleware } from 'ai-functions'
|
|
152
|
+
*
|
|
153
|
+
* const model = wrapLanguageModel({
|
|
154
|
+
* model: openai('gpt-4o'),
|
|
155
|
+
* middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }),
|
|
156
|
+
* })
|
|
157
|
+
* ```
|
|
158
|
+
*/
|
|
159
|
+
export function cacheMiddleware(options = {}) {
|
|
160
|
+
const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS;
|
|
161
|
+
const keyHash = options.keyHash ?? defaultKeyHash;
|
|
162
|
+
const store = options.store === undefined || options.store === 'memory'
|
|
163
|
+
? new MemoryStore()
|
|
164
|
+
: options.store === 'disk'
|
|
165
|
+
? new DiskStore(options.diskPath ?? '.cache/v3-eval-cache.json')
|
|
166
|
+
: options.store;
|
|
167
|
+
const enabled = options.enabled ?? envGateEnabled();
|
|
168
|
+
return {
|
|
169
|
+
specificationVersion: 'v3',
|
|
170
|
+
async wrapGenerate({ doGenerate, params, model }) {
|
|
171
|
+
if (!enabled)
|
|
172
|
+
return doGenerate();
|
|
173
|
+
const key = keyHash(params, model.modelId);
|
|
174
|
+
const cached = store.get(key);
|
|
175
|
+
if (cached !== undefined) {
|
|
176
|
+
if (isExpired(cached, ttlMs)) {
|
|
177
|
+
store.delete(key);
|
|
178
|
+
}
|
|
179
|
+
else if (cached.generateResult !== undefined) {
|
|
180
|
+
return cached.generateResult;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
const result = await doGenerate();
|
|
184
|
+
store.set(key, { generateResult: result, createdAt: Date.now() });
|
|
185
|
+
return result;
|
|
186
|
+
},
|
|
187
|
+
async wrapStream({ doStream, params, model }) {
|
|
188
|
+
if (!enabled)
|
|
189
|
+
return doStream();
|
|
190
|
+
const key = keyHash(params, model.modelId);
|
|
191
|
+
const cached = store.get(key);
|
|
192
|
+
if (cached !== undefined) {
|
|
193
|
+
if (isExpired(cached, ttlMs)) {
|
|
194
|
+
store.delete(key);
|
|
195
|
+
}
|
|
196
|
+
else if (cached.streamChunks !== undefined) {
|
|
197
|
+
// Replay cached chunks via simulateReadableStream so consumers
|
|
198
|
+
// see the same async iteration shape as a fresh call.
|
|
199
|
+
const replay = {
|
|
200
|
+
stream: simulateReadableStream({
|
|
201
|
+
chunks: cached.streamChunks,
|
|
202
|
+
initialDelayInMs: 0,
|
|
203
|
+
chunkDelayInMs: 0,
|
|
204
|
+
}),
|
|
205
|
+
};
|
|
206
|
+
return replay;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
const result = await doStream();
|
|
210
|
+
// Tee the stream: forward to caller, accumulate for cache.
|
|
211
|
+
const chunks = [];
|
|
212
|
+
const transformedStream = result.stream.pipeThrough(new TransformStream({
|
|
213
|
+
transform(chunk, controller) {
|
|
214
|
+
chunks.push(chunk);
|
|
215
|
+
controller.enqueue(chunk);
|
|
216
|
+
},
|
|
217
|
+
flush() {
|
|
218
|
+
store.set(key, { streamChunks: chunks, createdAt: Date.now() });
|
|
219
|
+
},
|
|
220
|
+
}));
|
|
221
|
+
return {
|
|
222
|
+
...result,
|
|
223
|
+
stream: transformedStream,
|
|
224
|
+
};
|
|
225
|
+
},
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/middleware/cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,IAAI,CAAA;AAQ3C,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAA;AAsDrC,+EAA+E;AAC/E,SAAS;AACT,+EAA+E;AAE/E,MAAM,WAAW;IACE,GAAG,GAA4B,IAAI,GAAG,EAAE,CAAA;IACzD,GAAG,CAAC,GAAW;QACb,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC1B,CAAC;IACD,GAAG,CAAC,GAAW,EAAE,KAAiB;QAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;IAC1B,CAAC;IACD,MAAM,CAAC,GAAW;QAChB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;IACtB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,SAAS;IACI,IAAI,CAAQ;IACrB,KAAK,GAAmC,IAAI,CAAA;IAEpD,YAAY,IAAY;QACtB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAA;IAClB,CAAC;IAEO,IAAI;QACV,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,KAAK,CAAA;QAC1C,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAA;QACtB,IAAI,CAAC;YACH,iEAAiE;YACjE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAwB,CAAA;YAC/C,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;gBAC/C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA+B,CAAA;gBAC5D,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,cAAc;QAChB,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAA;IACnB,CAAC;IAEO,KAAK;QACX,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI;YAAE,OAAM;QAC/B,IAAI,CAAC;YACH,iEAAiE;YACjE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAwB,CAAA;YAC/C,iEAAiE;YACjE,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAA0B,CAAA;YACrD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACnC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YACxC,CAAC;YACD,MAAM,GAAG,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC1C,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,OAAO,CAAC,CAAA;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,cAAc;QAChB,CAAC;IACH,CAAC;IAED,GAAG,CAAC,GAAW;QACb,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC7B,CAAC;IAED,GAAG,CAAC,GAAW,EAAE,KAAiB;QAChC,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC3B,IAAI,CAAC,KAAK,EAAE,CAAA;IACd,CAAC;IAED,MAAM,CAAC,GAAW;QAChB,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;QACvB,IAAI,CAAC,KAAK,EAAE,CAAA;IACd,CAAC;CACF;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,MAAM,cAAc,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAA;AAE1C,SAAS,cAAc,CAAC,MAAkC,EAAE,OAAe;IACzE,oEAAoE;IACpE,mEAAmE;IACnE,kEAAkE;IAClE,OAAO,OAAO,CAAC;QACb,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO;QACP,cAAc,EAAE,MAAM,CAAC,cAAc;KACtC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,cAAc;IACrB,MAAM,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAA;IACtC,OAAO,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;AAC9C,CAAC;AAED,SAAS,SAAS,CAAC,KAAiB,EAAE,KAAa;IACjD,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,GAAG,KAAK,CAAA;AAC7C,CAAC;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,eAAe,CAAC,UAAkC,EAAE;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,cAAc,CAAA;IAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,cAAc,CAAA;IACjD,MAAM,KAAK,GACT,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,KAAK,QAAQ;QACvD,CAAC,CAAC,IAAI,WAAW,EAAE;QACnB,CAAC,CAAC,OAAO,CAAC,KAAK,KAAK,MAAM;YAC1B,CAAC,CAAC,IAAI,SAAS,CAAC,OAAO,CAAC,QAAQ,IAAI,2BAA2B,CAAC;YAChE,CAAC,CAAC,OAAO,CAAC,KAAK,CAAA;IACnB,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,cAAc,EAAE,CAAA;IAEnD,OAAO;QACL,oBAAoB,EAAE,IAAI;QAC1B,KAAK,CAAC,YAAY,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE;YAC9C,IAAI,CAAC,OAAO;gBAAE,OAAO,UAAU,EAAE,CAAA;YACjC,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;YAC1C,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC7B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,IAAI,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC7B,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,IAAI,MAAM,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;oBAC/C,OAAO,MAAM,CAAC,cAAc,CAAA;gBAC9B,CAAC;YACH,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,UAAU,EAAE,CAAA;YACjC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;YACjE,OAAO,MAAM,CAAA;QACf,CAAC;QACD,KAAK,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE;YAC1C,IAAI,CAAC,OAAO;gBAAE,OAAO,QAAQ,EAAE,CAAA;YAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;YAC1C,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC7B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,IAAI,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC7B,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,IAAI,MAAM,CAAC,YAAY,KAAK,SAAS,EAAE,CAAC;oBAC7C,+DAA+D;oBAC/D,sDAAsD;oBACtD,MAAM,MAAM,GAAgC;wBAC1C,MAAM,EAAE,sBAAsB,CAA4B;4BACxD,MAAM,EAAE,MAAM,CAAC,YAAY;4BAC3B,gBAAgB,EAAE,CAAC;4BACnB,cAAc,EAAE,CAAC;yBAClB,CAAC;qBACH,CAAA;oBACD,OAAO,MAAM,CAAA;gBACf,CAAC;YACH,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,QAAQ,EAAE,CAAA;YAC/B,2DAA2D;YAC3D,MAAM,MAAM,GAAgC,EAAE,CAAA;YAC9C,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CACjD,IAAI,eAAe,CAAuD;gBACxE,SAAS,CAAC,KAAK,EAAE,UAAU;oBACzB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;oBAClB,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;gBAC3B,CAAC;gBACD,KAAK;oBACH,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;gBACjE,CAAC;aACF,CAAC,CACH,CAAA;YACD,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,iBAAiB;aAC1B,CAAA;QACH,CAAC;KACF,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* embeddingCacheMiddleware — content-addressable cache for `wrapEmbeddingModel`
|
|
3
|
+
*
|
|
4
|
+
* Embedding-side analogue of {@link cacheMiddleware}. Wraps `doEmbed` and
|
|
5
|
+
* caches the resulting embeddings keyed on
|
|
6
|
+
* `{ values, modelId, providerOptions }` so a re-embed of the same value
|
|
7
|
+
* batch with the same model returns the cached vectors without hitting the
|
|
8
|
+
* provider.
|
|
9
|
+
*
|
|
10
|
+
* **Why a separate middleware instead of reusing `cacheMiddleware`?**
|
|
11
|
+
* AI SDK 6 splits language-model and embedding-model surfaces:
|
|
12
|
+
* `LanguageModelV3Middleware` exposes `wrapGenerate` / `wrapStream` against
|
|
13
|
+
* `LanguageModelV3CallOptions`, while `EmbeddingModelV3Middleware` exposes
|
|
14
|
+
* `wrapEmbed` against `EmbeddingModelV3CallOptions`. The cache shape
|
|
15
|
+
* (per-value vector vs. per-prompt completion payload) is also different —
|
|
16
|
+
* embeddings cache batched arrays, generations cache single result objects.
|
|
17
|
+
*
|
|
18
|
+
* - **Hit derivation:** stable hash of `{ values, modelId, providerOptions }`.
|
|
19
|
+
* `values` is the array as-passed (caller can pre-normalise if they want
|
|
20
|
+
* case/whitespace insensitivity). Generation knobs don't apply.
|
|
21
|
+
*
|
|
22
|
+
* - **Batch semantics:** the cache key is the *whole* batch. A subset hit
|
|
23
|
+
* doesn't trigger a partial-fill — that's a more invasive shape change
|
|
24
|
+
* (the legacy `EmbeddingCache.getMany` did per-text caching, but it was
|
|
25
|
+
* only used in the example and added 100+ LOC of bookkeeping). Callers
|
|
26
|
+
* that want per-text caching should use stable per-text batches.
|
|
27
|
+
*
|
|
28
|
+
* - **TTL:** 24h default, configurable. Lazy expiry on access.
|
|
29
|
+
*
|
|
30
|
+
* - **Pluggable store:** in-memory default (Map-backed); custom store
|
|
31
|
+
* honored as-is. Disk persistence is intentionally not provided here —
|
|
32
|
+
* embedding payloads (large `number[][]`) make on-disk JSON a bad fit;
|
|
33
|
+
* callers who want it should pass a custom store.
|
|
34
|
+
*
|
|
35
|
+
* - **Env gate:** honors `process.env.V3_EVAL_CACHE` for parity with
|
|
36
|
+
* `cacheMiddleware`. Override via the `enabled` option.
|
|
37
|
+
*
|
|
38
|
+
* @packageDocumentation
|
|
39
|
+
*/
|
|
40
|
+
import type { EmbeddingModelV3CallOptions, EmbeddingModelV3Embedding, EmbeddingModelV3Middleware, SharedV3Warning } from '@ai-sdk/provider';
|
|
41
|
+
/** Cached embedding payload. */
|
|
42
|
+
interface EmbedCacheEntry {
|
|
43
|
+
/** The embedding vectors returned for the cached batch. */
|
|
44
|
+
embeddings: Array<EmbeddingModelV3Embedding>;
|
|
45
|
+
/** Provider warnings carried alongside the cached batch. */
|
|
46
|
+
warnings: Array<SharedV3Warning>;
|
|
47
|
+
/** Insert epoch ms — drives TTL eviction. */
|
|
48
|
+
createdAt: number;
|
|
49
|
+
}
|
|
50
|
+
/** Pluggable cache store for embedding results. */
|
|
51
|
+
export interface EmbedCacheMiddlewareStore {
|
|
52
|
+
get(key: string): EmbedCacheEntry | undefined;
|
|
53
|
+
set(key: string, value: EmbedCacheEntry): void;
|
|
54
|
+
delete(key: string): void;
|
|
55
|
+
}
|
|
56
|
+
/** Options for {@link embeddingCacheMiddleware}. */
|
|
57
|
+
export interface EmbedCacheMiddlewareOptions {
|
|
58
|
+
/**
|
|
59
|
+
* Cache backend. `'memory'` uses a process-local Map. A custom
|
|
60
|
+
* {@link EmbedCacheMiddlewareStore} can be passed instead.
|
|
61
|
+
*
|
|
62
|
+
* @default 'memory'
|
|
63
|
+
*/
|
|
64
|
+
store?: 'memory' | EmbedCacheMiddlewareStore;
|
|
65
|
+
/**
|
|
66
|
+
* TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
|
|
67
|
+
*
|
|
68
|
+
* @default 86_400_000 (24h)
|
|
69
|
+
*/
|
|
70
|
+
ttlMs?: number;
|
|
71
|
+
/**
|
|
72
|
+
* Custom hash function for cache keys. Defaults to a stable hash of
|
|
73
|
+
* `{ values, modelId, providerOptions }`.
|
|
74
|
+
*/
|
|
75
|
+
keyHash?: (params: EmbeddingModelV3CallOptions, modelId: string) => string;
|
|
76
|
+
/**
|
|
77
|
+
* Optional override for the env gate. When `false`, the middleware acts
|
|
78
|
+
* as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
|
|
79
|
+
* caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
|
|
80
|
+
*/
|
|
81
|
+
enabled?: boolean;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Build an embedding-cache middleware for `wrapEmbeddingModel`.
|
|
85
|
+
*
|
|
86
|
+
* @example
|
|
87
|
+
* ```ts
|
|
88
|
+
* import { wrapEmbeddingModel } from 'ai'
|
|
89
|
+
* import { embeddingCacheMiddleware } from 'ai-functions'
|
|
90
|
+
*
|
|
91
|
+
* const model = wrapEmbeddingModel({
|
|
92
|
+
* model: openai.embedding('text-embedding-3-small'),
|
|
93
|
+
* middleware: embeddingCacheMiddleware({ ttlMs: 86_400_000 }),
|
|
94
|
+
* })
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
97
|
+
export declare function embeddingCacheMiddleware(options?: EmbedCacheMiddlewareOptions): EmbeddingModelV3Middleware;
|
|
98
|
+
export {};
|
|
99
|
+
//# sourceMappingURL=embed-cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embed-cache.d.ts","sourceRoot":"","sources":["../../src/middleware/embed-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAEH,OAAO,KAAK,EACV,2BAA2B,EAC3B,yBAAyB,EACzB,0BAA0B,EAE1B,eAAe,EAChB,MAAM,kBAAkB,CAAA;AAOzB,gCAAgC;AAChC,UAAU,eAAe;IACvB,2DAA2D;IAC3D,UAAU,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;IAC5C,4DAA4D;IAC5D,QAAQ,EAAE,KAAK,CAAC,eAAe,CAAC,CAAA;IAChC,6CAA6C;IAC7C,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,mDAAmD;AACnD,MAAM,WAAW,yBAAyB;IACxC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,GAAG,SAAS,CAAA;IAC7C,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,eAAe,GAAG,IAAI,CAAA;IAC9C,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAAA;CAC1B;AAED,oDAAoD;AACpD,MAAM,WAAW,2BAA2B;IAC1C;;;;;OAKG;IACH,KAAK,CAAC,EAAE,QAAQ,GAAG,yBAAyB,CAAA;IAC5C;;;;OAIG;IACH,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;;OAGG;IACH,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,2BAA2B,EAAE,OAAO,EAAE,MAAM,KAAK,MAAM,CAAA;IAC1E;;;;OAIG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AA8CD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,wBAAwB,CACtC,OAAO,GAAE,2BAAgC,GACxC,0BAA0B,CAoC5B"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* embeddingCacheMiddleware — content-addressable cache for `wrapEmbeddingModel`
|
|
3
|
+
*
|
|
4
|
+
* Embedding-side analogue of {@link cacheMiddleware}. Wraps `doEmbed` and
|
|
5
|
+
* caches the resulting embeddings keyed on
|
|
6
|
+
* `{ values, modelId, providerOptions }` so a re-embed of the same value
|
|
7
|
+
* batch with the same model returns the cached vectors without hitting the
|
|
8
|
+
* provider.
|
|
9
|
+
*
|
|
10
|
+
* **Why a separate middleware instead of reusing `cacheMiddleware`?**
|
|
11
|
+
* AI SDK 6 splits language-model and embedding-model surfaces:
|
|
12
|
+
* `LanguageModelV3Middleware` exposes `wrapGenerate` / `wrapStream` against
|
|
13
|
+
* `LanguageModelV3CallOptions`, while `EmbeddingModelV3Middleware` exposes
|
|
14
|
+
* `wrapEmbed` against `EmbeddingModelV3CallOptions`. The cache shape
|
|
15
|
+
* (per-value vector vs. per-prompt completion payload) is also different —
|
|
16
|
+
* embeddings cache batched arrays, generations cache single result objects.
|
|
17
|
+
*
|
|
18
|
+
* - **Hit derivation:** stable hash of `{ values, modelId, providerOptions }`.
|
|
19
|
+
* `values` is the array as-passed (caller can pre-normalise if they want
|
|
20
|
+
* case/whitespace insensitivity). Generation knobs don't apply.
|
|
21
|
+
*
|
|
22
|
+
* - **Batch semantics:** the cache key is the *whole* batch. A subset hit
|
|
23
|
+
* doesn't trigger a partial-fill — that's a more invasive shape change
|
|
24
|
+
* (the legacy `EmbeddingCache.getMany` did per-text caching, but it was
|
|
25
|
+
* only used in the example and added 100+ LOC of bookkeeping). Callers
|
|
26
|
+
* that want per-text caching should use stable per-text batches.
|
|
27
|
+
*
|
|
28
|
+
* - **TTL:** 24h default, configurable. Lazy expiry on access.
|
|
29
|
+
*
|
|
30
|
+
* - **Pluggable store:** in-memory default (Map-backed); custom store
|
|
31
|
+
* honored as-is. Disk persistence is intentionally not provided here —
|
|
32
|
+
* embedding payloads (large `number[][]`) make on-disk JSON a bad fit;
|
|
33
|
+
* callers who want it should pass a custom store.
|
|
34
|
+
*
|
|
35
|
+
* - **Env gate:** honors `process.env.V3_EVAL_CACHE` for parity with
|
|
36
|
+
* `cacheMiddleware`. Override via the `enabled` option.
|
|
37
|
+
*
|
|
38
|
+
* @packageDocumentation
|
|
39
|
+
*/
|
|
40
|
+
import { hashKey } from '../cache.js';
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// Stores
|
|
43
|
+
// ============================================================================
|
|
44
|
+
class MemoryStore {
|
|
45
|
+
map = new Map();
|
|
46
|
+
get(key) {
|
|
47
|
+
return this.map.get(key);
|
|
48
|
+
}
|
|
49
|
+
set(key, value) {
|
|
50
|
+
this.map.set(key, value);
|
|
51
|
+
}
|
|
52
|
+
delete(key) {
|
|
53
|
+
this.map.delete(key);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// ============================================================================
|
|
57
|
+
// Helpers
|
|
58
|
+
// ============================================================================
|
|
59
|
+
const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000;
|
|
60
|
+
function defaultKeyHash(params, modelId) {
|
|
61
|
+
return hashKey({
|
|
62
|
+
values: params.values,
|
|
63
|
+
modelId,
|
|
64
|
+
providerOptions: params.providerOptions,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
function envGateEnabled() {
|
|
68
|
+
const v = process.env['V3_EVAL_CACHE'];
|
|
69
|
+
return typeof v === 'string' && v.length > 0;
|
|
70
|
+
}
|
|
71
|
+
function isExpired(entry, ttlMs) {
|
|
72
|
+
return Date.now() - entry.createdAt > ttlMs;
|
|
73
|
+
}
|
|
74
|
+
// ============================================================================
|
|
75
|
+
// Middleware
|
|
76
|
+
// ============================================================================
|
|
77
|
+
/**
|
|
78
|
+
* Build an embedding-cache middleware for `wrapEmbeddingModel`.
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* ```ts
|
|
82
|
+
* import { wrapEmbeddingModel } from 'ai'
|
|
83
|
+
* import { embeddingCacheMiddleware } from 'ai-functions'
|
|
84
|
+
*
|
|
85
|
+
* const model = wrapEmbeddingModel({
|
|
86
|
+
* model: openai.embedding('text-embedding-3-small'),
|
|
87
|
+
* middleware: embeddingCacheMiddleware({ ttlMs: 86_400_000 }),
|
|
88
|
+
* })
|
|
89
|
+
* ```
|
|
90
|
+
*/
|
|
91
|
+
export function embeddingCacheMiddleware(options = {}) {
|
|
92
|
+
const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS;
|
|
93
|
+
const keyHash = options.keyHash ?? defaultKeyHash;
|
|
94
|
+
const store = options.store === undefined || options.store === 'memory' ? new MemoryStore() : options.store;
|
|
95
|
+
const enabled = options.enabled ?? envGateEnabled();
|
|
96
|
+
return {
|
|
97
|
+
specificationVersion: 'v3',
|
|
98
|
+
async wrapEmbed({ doEmbed, params, model }) {
|
|
99
|
+
if (!enabled)
|
|
100
|
+
return doEmbed();
|
|
101
|
+
const key = keyHash(params, model.modelId);
|
|
102
|
+
const cached = store.get(key);
|
|
103
|
+
if (cached !== undefined) {
|
|
104
|
+
if (isExpired(cached, ttlMs)) {
|
|
105
|
+
store.delete(key);
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
// Replay shape matches EmbeddingModelV3Result. Provider-side
|
|
109
|
+
// metadata (response headers, body, usage) is intentionally absent
|
|
110
|
+
// on a hit — callers reading those should disable the cache.
|
|
111
|
+
const replay = {
|
|
112
|
+
embeddings: cached.embeddings,
|
|
113
|
+
warnings: cached.warnings,
|
|
114
|
+
};
|
|
115
|
+
return replay;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
const result = await doEmbed();
|
|
119
|
+
store.set(key, {
|
|
120
|
+
embeddings: result.embeddings,
|
|
121
|
+
warnings: result.warnings,
|
|
122
|
+
createdAt: Date.now(),
|
|
123
|
+
});
|
|
124
|
+
return result;
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=embed-cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embed-cache.js","sourceRoot":"","sources":["../../src/middleware/embed-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AASH,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAA;AAmDrC,+EAA+E;AAC/E,SAAS;AACT,+EAA+E;AAE/E,MAAM,WAAW;IACE,GAAG,GAAiC,IAAI,GAAG,EAAE,CAAA;IAC9D,GAAG,CAAC,GAAW;QACb,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC1B,CAAC;IACD,GAAG,CAAC,GAAW,EAAE,KAAsB;QACrC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;IAC1B,CAAC;IACD,MAAM,CAAC,GAAW;QAChB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;IACtB,CAAC;CACF;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,MAAM,cAAc,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAA;AAE1C,SAAS,cAAc,CAAC,MAAmC,EAAE,OAAe;IAC1E,OAAO,OAAO,CAAC;QACb,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO;QACP,eAAe,EAAE,MAAM,CAAC,eAAe;KACxC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,cAAc;IACrB,MAAM,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAA;IACtC,OAAO,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;AAC9C,CAAC;AAED,SAAS,SAAS,CAAC,KAAsB,EAAE,KAAa;IACtD,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,GAAG,KAAK,CAAA;AAC7C,CAAC;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,wBAAwB,CACtC,UAAuC,EAAE;IAEzC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,cAAc,CAAA;IAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,cAAc,CAAA;IACjD,MAAM,KAAK,GACT,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,WAAW,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAA;IAC/F,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,cAAc,EAAE,CAAA;IAEnD,OAAO;QACL,oBAAoB,EAAE,IAAI;QAC1B,KAAK,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE;YACxC,IAAI,CAAC,OAAO;gBAAE,OAAO,OAAO,EAAE,CAAA;YAC9B,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;YAC1C,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC7B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,IAAI,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC7B,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,CAAC;oBACN,6DAA6D;oBAC7D,mEAAmE;oBACnE,6DAA6D;oBAC7D,MAAM,MAAM,GAA2B;wBACrC,UAAU,EAAE,MAAM,CAAC,UAAU;wBAC7B,QAAQ,EAAE,MAAM,CAAC,QAAQ;qBAC1B,CAAA;oBACD,OAAO,MAAM,CAAA;gBACf,CAAC;YACH,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,OAAO,EAAE,CAAA;YAC9B,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE;gBACb,UAAU,EAAE,MAAM,CAAC,UAAU;gBAC7B,QAAQ,EAAE,MAAM,CAAC,QAAQ;gBACzB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC,CAAA;YACF,OAAO,MAAM,CAAA;QACf,CAAC;KACF,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Middleware barrel — composable AI SDK 6 `LanguageModelV3Middleware`
|
|
3
|
+
* primitives for `wrapLanguageModel`.
|
|
4
|
+
*
|
|
5
|
+
* @packageDocumentation
|
|
6
|
+
*/
|
|
7
|
+
export { cacheMiddleware, type CacheMiddlewareOptions, type CacheMiddlewareStore } from './cache.js';
|
|
8
|
+
export { embeddingCacheMiddleware, type EmbedCacheMiddlewareOptions, type EmbedCacheMiddlewareStore, } from './embed-cache.js';
|
|
9
|
+
export { budgetMiddleware, type BudgetMiddlewareOptions, type PricingOverlay } from './budget.js';
|
|
10
|
+
export { traceMiddleware, type TraceEvent, type TraceEventKind, type TraceMiddlewareOptions, } from './trace.js';
|
|
11
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/middleware/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,KAAK,sBAAsB,EAAE,KAAK,oBAAoB,EAAE,MAAM,YAAY,CAAA;AAEpG,OAAO,EACL,wBAAwB,EACxB,KAAK,2BAA2B,EAChC,KAAK,yBAAyB,GAC/B,MAAM,kBAAkB,CAAA;AAEzB,OAAO,EAAE,gBAAgB,EAAE,KAAK,uBAAuB,EAAE,KAAK,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjG,OAAO,EACL,eAAe,EACf,KAAK,UAAU,EACf,KAAK,cAAc,EACnB,KAAK,sBAAsB,GAC5B,MAAM,YAAY,CAAA"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Middleware barrel — composable AI SDK 6 `LanguageModelV3Middleware`
|
|
3
|
+
* primitives for `wrapLanguageModel`.
|
|
4
|
+
*
|
|
5
|
+
* @packageDocumentation
|
|
6
|
+
*/
|
|
7
|
+
export { cacheMiddleware } from './cache.js';
|
|
8
|
+
export { embeddingCacheMiddleware, } from './embed-cache.js';
|
|
9
|
+
export { budgetMiddleware } from './budget.js';
|
|
10
|
+
export { traceMiddleware, } from './trace.js';
|
|
11
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/middleware/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAA0D,MAAM,YAAY,CAAA;AAEpG,OAAO,EACL,wBAAwB,GAGzB,MAAM,kBAAkB,CAAA;AAEzB,OAAO,EAAE,gBAAgB,EAAqD,MAAM,aAAa,CAAA;AAEjG,OAAO,EACL,eAAe,GAIhB,MAAM,YAAY,CAAA"}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* traceMiddleware — emit per-call trace events for `wrapLanguageModel`
|
|
3
|
+
*
|
|
4
|
+
* Wraps `doGenerate` / `doStream` and emits a {@link TraceEvent} on every
|
|
5
|
+
* completion. The sink is opaque (caller supplies `emit`) so this primitive
|
|
6
|
+
* works equally well piping into:
|
|
7
|
+
*
|
|
8
|
+
* - the v3 cascade-walker InvocationEvent stream (round 16+ work to add
|
|
9
|
+
* `'persona-trace'` / `'cascade-trace'` to the union),
|
|
10
|
+
* - an {@link import('../eval-log/index.js').EvalLogStore} for fixture
|
|
11
|
+
* replay,
|
|
12
|
+
* - OpenTelemetry / Datadog / Honeycomb adapters that map the event into
|
|
13
|
+
* a span.
|
|
14
|
+
*
|
|
15
|
+
* **Emit-error tolerance:** if the supplied `emit` throws, we *swallow* the
|
|
16
|
+
* error (with a one-time `console.warn`) so a flaky trace sink can never
|
|
17
|
+
* break the wrapped LLM call. This matches the Evalite v0.19 trace
|
|
18
|
+
* middleware behaviour.
|
|
19
|
+
*
|
|
20
|
+
* Composition note: install **last** so the event sees the final outcome
|
|
21
|
+
* (post-cache, post-budget). The event's `costUsd` field is best-effort —
|
|
22
|
+
* the trace middleware doesn't have direct access to the budget tracker, so
|
|
23
|
+
* the caller can pass a `getCostUsd` resolver if they want costs in the
|
|
24
|
+
* event payload.
|
|
25
|
+
*
|
|
26
|
+
* @packageDocumentation
|
|
27
|
+
*/
|
|
28
|
+
import type { LanguageModelV3Middleware, LanguageModelV3Usage } from '@ai-sdk/provider';
|
|
29
|
+
/**
|
|
30
|
+
* Discriminator for the originating call site. Callers inject this via the
|
|
31
|
+
* `kind` option so a single sink can fan events into different downstream
|
|
32
|
+
* streams (persona panel vs. cascade walker vs. ad-hoc test).
|
|
33
|
+
*/
|
|
34
|
+
export type TraceEventKind = 'persona-trace' | 'cascade-trace' | 'eval-trace' | string;
|
|
35
|
+
/**
|
|
36
|
+
* Trace event payload emitted on every wrapped call completion.
|
|
37
|
+
*
|
|
38
|
+
* Field design notes:
|
|
39
|
+
* - `prompt` / `response` are stringified for cheap downstream storage
|
|
40
|
+
* (the structured `LanguageModelV3Prompt` / `LanguageModelV3Content[]`
|
|
41
|
+
* shapes are intentionally flattened).
|
|
42
|
+
* - `usage` is the raw V3 shape (with the cache breakdown) — the
|
|
43
|
+
* EvalLogStore consumer flattens it into total counts.
|
|
44
|
+
* - `costUsd` is optional because the trace middleware doesn't compute
|
|
45
|
+
* cost itself; callers either pass a resolver or compute downstream
|
|
46
|
+
* from `usage`.
|
|
47
|
+
*/
|
|
48
|
+
export interface TraceEvent {
|
|
49
|
+
kind: TraceEventKind;
|
|
50
|
+
model: string;
|
|
51
|
+
prompt: string;
|
|
52
|
+
response: string;
|
|
53
|
+
usage: LanguageModelV3Usage | undefined;
|
|
54
|
+
costUsd?: number;
|
|
55
|
+
durationMs: number;
|
|
56
|
+
/** Optional caller-supplied tags for downstream filtering. */
|
|
57
|
+
tags?: Record<string, string>;
|
|
58
|
+
}
|
|
59
|
+
/** Options for {@link traceMiddleware}. */
|
|
60
|
+
export interface TraceMiddlewareOptions {
|
|
61
|
+
/**
|
|
62
|
+
* Opaque sink. Errors thrown from `emit` are swallowed (with a one-time
|
|
63
|
+
* `console.warn`) so a flaky sink never breaks the wrapped LLM call.
|
|
64
|
+
*/
|
|
65
|
+
emit: (event: TraceEvent) => void | Promise<void>;
|
|
66
|
+
/**
|
|
67
|
+
* Discriminator threaded into the event's `kind` field. Defaults to
|
|
68
|
+
* `'eval-trace'`.
|
|
69
|
+
*/
|
|
70
|
+
kind?: TraceEventKind;
|
|
71
|
+
/**
|
|
72
|
+
* Optional cost resolver. When supplied, called with the V3 usage shape
|
|
73
|
+
* and the modelId; result is set on `event.costUsd`. Useful when the
|
|
74
|
+
* caller has a side-channel pricing table (the budgetMiddleware's
|
|
75
|
+
* tracker) and wants costs in the trace event itself.
|
|
76
|
+
*/
|
|
77
|
+
getCostUsd?: (modelId: string, usage: LanguageModelV3Usage | undefined) => number;
|
|
78
|
+
/** Optional caller-supplied tags merged into every emitted event. */
|
|
79
|
+
tags?: Record<string, string>;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Build a trace middleware for `wrapLanguageModel`. Emits a
|
|
83
|
+
* {@link TraceEvent} on every successful `doGenerate` / `doStream`
|
|
84
|
+
* completion. Errors from `emit` are swallowed (one-time warn) so a flaky
|
|
85
|
+
* trace sink can never break the wrapped LLM call.
|
|
86
|
+
*
|
|
87
|
+
* @example
|
|
88
|
+
* ```ts
|
|
89
|
+
* import { wrapLanguageModel } from 'ai'
|
|
90
|
+
* import { traceMiddleware, getEvalLogStore } from 'ai-functions'
|
|
91
|
+
*
|
|
92
|
+
* const store = getEvalLogStore()
|
|
93
|
+
* const model = wrapLanguageModel({
|
|
94
|
+
* model: openai('gpt-4o'),
|
|
95
|
+
* middleware: traceMiddleware({
|
|
96
|
+
* kind: 'cascade-trace',
|
|
97
|
+
* emit: (event) => store.record({ ...event, costUsd: event.costUsd ?? 0 }),
|
|
98
|
+
* }),
|
|
99
|
+
* })
|
|
100
|
+
* ```
|
|
101
|
+
*/
|
|
102
|
+
export declare function traceMiddleware(options: TraceMiddlewareOptions): LanguageModelV3Middleware;
|
|
103
|
+
//# sourceMappingURL=trace.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"trace.d.ts","sourceRoot":"","sources":["../../src/middleware/trace.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAGV,yBAAyB,EAGzB,oBAAoB,EACrB,MAAM,kBAAkB,CAAA;AAMzB;;;;GAIG;AACH,MAAM,MAAM,cAAc,GAAG,eAAe,GAAG,eAAe,GAAG,YAAY,GAAG,MAAM,CAAA;AAEtF;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,cAAc,CAAA;IACpB,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,EAAE,oBAAoB,GAAG,SAAS,CAAA;IACvC,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,UAAU,EAAE,MAAM,CAAA;IAClB,8DAA8D;IAC9D,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC9B;AAED,2CAA2C;AAC3C,MAAM,WAAW,sBAAsB;IACrC;;;OAGG;IACH,IAAI,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAA;IACjD;;;OAGG;IACH,IAAI,CAAC,EAAE,cAAc,CAAA;IACrB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,oBAAoB,GAAG,SAAS,KAAK,MAAM,CAAA;IACjF,qEAAqE;IACrE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAC9B;AAwED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,sBAAsB,GAAG,yBAAyB,CA4D1F"}
|