ai-functions 2.1.3 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +90 -1
  3. package/README.md +38 -0
  4. package/dist/ai-promise.d.ts +3 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +135 -64
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +51 -858
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts.map +1 -1
  56. package/dist/budget.js +27 -14
  57. package/dist/budget.js.map +1 -1
  58. package/dist/cache.d.ts +23 -0
  59. package/dist/cache.d.ts.map +1 -1
  60. package/dist/cache.js +36 -15
  61. package/dist/cache.js.map +1 -1
  62. package/dist/context.d.ts +26 -8
  63. package/dist/context.d.ts.map +1 -1
  64. package/dist/context.js +64 -62
  65. package/dist/context.js.map +1 -1
  66. package/dist/digital-objects-registry.d.ts +229 -0
  67. package/dist/digital-objects-registry.d.ts.map +1 -0
  68. package/dist/digital-objects-registry.js +617 -0
  69. package/dist/digital-objects-registry.js.map +1 -0
  70. package/dist/embeddings.d.ts +2 -2
  71. package/dist/embeddings.d.ts.map +1 -1
  72. package/dist/errors.d.ts +22 -0
  73. package/dist/errors.d.ts.map +1 -0
  74. package/dist/errors.js +35 -0
  75. package/dist/errors.js.map +1 -0
  76. package/dist/eval/runner.d.ts +8 -0
  77. package/dist/eval/runner.d.ts.map +1 -1
  78. package/dist/eval/runner.js +41 -35
  79. package/dist/eval/runner.js.map +1 -1
  80. package/dist/eval-log/in-memory.d.ts +34 -0
  81. package/dist/eval-log/in-memory.d.ts.map +1 -0
  82. package/dist/eval-log/in-memory.js +84 -0
  83. package/dist/eval-log/in-memory.js.map +1 -0
  84. package/dist/eval-log/index.d.ts +29 -0
  85. package/dist/eval-log/index.d.ts.map +1 -0
  86. package/dist/eval-log/index.js +39 -0
  87. package/dist/eval-log/index.js.map +1 -0
  88. package/dist/eval-log/types.d.ts +101 -0
  89. package/dist/eval-log/types.d.ts.map +1 -0
  90. package/dist/eval-log/types.js +16 -0
  91. package/dist/eval-log/types.js.map +1 -0
  92. package/dist/function-registry.d.ts +176 -0
  93. package/dist/function-registry.d.ts.map +1 -0
  94. package/dist/function-registry.js +685 -0
  95. package/dist/function-registry.js.map +1 -0
  96. package/dist/generate.d.ts +9 -3
  97. package/dist/generate.d.ts.map +1 -1
  98. package/dist/generate.js +18 -18
  99. package/dist/generate.js.map +1 -1
  100. package/dist/index.d.ts +18 -11
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js +35 -18
  103. package/dist/index.js.map +1 -1
  104. package/dist/logger.d.ts +118 -0
  105. package/dist/logger.d.ts.map +1 -0
  106. package/dist/logger.js +187 -0
  107. package/dist/logger.js.map +1 -0
  108. package/dist/middleware/budget.d.ts +84 -0
  109. package/dist/middleware/budget.d.ts.map +1 -0
  110. package/dist/middleware/budget.js +110 -0
  111. package/dist/middleware/budget.js.map +1 -0
  112. package/dist/middleware/cache.d.ts +103 -0
  113. package/dist/middleware/cache.d.ts.map +1 -0
  114. package/dist/middleware/cache.js +228 -0
  115. package/dist/middleware/cache.js.map +1 -0
  116. package/dist/middleware/embed-cache.d.ts +99 -0
  117. package/dist/middleware/embed-cache.d.ts.map +1 -0
  118. package/dist/middleware/embed-cache.js +128 -0
  119. package/dist/middleware/embed-cache.js.map +1 -0
  120. package/dist/middleware/index.d.ts +11 -0
  121. package/dist/middleware/index.d.ts.map +1 -0
  122. package/dist/middleware/index.js +11 -0
  123. package/dist/middleware/index.js.map +1 -0
  124. package/dist/middleware/trace.d.ts +103 -0
  125. package/dist/middleware/trace.d.ts.map +1 -0
  126. package/dist/middleware/trace.js +176 -0
  127. package/dist/middleware/trace.js.map +1 -0
  128. package/dist/primitives.d.ts +120 -1
  129. package/dist/primitives.d.ts.map +1 -1
  130. package/dist/primitives.js +398 -26
  131. package/dist/primitives.js.map +1 -1
  132. package/dist/retry.d.ts +66 -1
  133. package/dist/retry.d.ts.map +1 -1
  134. package/dist/retry.js +115 -8
  135. package/dist/retry.js.map +1 -1
  136. package/dist/sandbox.d.ts +36 -0
  137. package/dist/sandbox.d.ts.map +1 -0
  138. package/dist/sandbox.js +44 -0
  139. package/dist/sandbox.js.map +1 -0
  140. package/dist/schema.js +2 -2
  141. package/dist/schema.js.map +1 -1
  142. package/dist/telemetry.d.ts +128 -0
  143. package/dist/telemetry.d.ts.map +1 -0
  144. package/dist/telemetry.js +285 -0
  145. package/dist/telemetry.js.map +1 -0
  146. package/dist/template.d.ts.map +1 -1
  147. package/dist/template.js +6 -1
  148. package/dist/template.js.map +1 -1
  149. package/dist/tool-orchestration.d.ts +66 -4
  150. package/dist/tool-orchestration.d.ts.map +1 -1
  151. package/dist/tool-orchestration.js +123 -23
  152. package/dist/tool-orchestration.js.map +1 -1
  153. package/dist/type-guards.d.ts +28 -0
  154. package/dist/type-guards.d.ts.map +1 -0
  155. package/dist/type-guards.js +29 -0
  156. package/dist/type-guards.js.map +1 -0
  157. package/dist/types.d.ts +155 -19
  158. package/dist/types.d.ts.map +1 -1
  159. package/dist/types.js +36 -1
  160. package/dist/types.js.map +1 -1
  161. package/dist/wrap-for-v3.d.ts +80 -0
  162. package/dist/wrap-for-v3.d.ts.map +1 -0
  163. package/dist/wrap-for-v3.js +89 -0
  164. package/dist/wrap-for-v3.js.map +1 -0
  165. package/examples/00-quickstart.ts +232 -0
  166. package/examples/01-rag-chatbot.ts +212 -0
  167. package/examples/02-multi-agent-research.ts +290 -0
  168. package/examples/03-email-classification.ts +379 -0
  169. package/examples/04-content-moderation.ts +400 -0
  170. package/examples/05-document-extraction.ts +455 -0
  171. package/examples/06-streaming-chat-nextjs.ts +437 -0
  172. package/examples/07-cloudflare-worker.ts +483 -0
  173. package/examples/08-batch-processing.ts +491 -0
  174. package/examples/09-budget-constrained.ts +527 -0
  175. package/examples/10-tool-orchestration.ts +565 -0
  176. package/examples/11-retry-resilience.ts +403 -0
  177. package/examples/12-caching-strategies.ts +422 -0
  178. package/examples/README.md +145 -0
  179. package/package.json +29 -25
  180. package/src/ai-promise.ts +226 -140
  181. package/src/ai-schemas.ts +122 -0
  182. package/src/ai.ts +71 -1176
  183. package/src/batch/anthropic.ts +96 -161
  184. package/src/batch/bedrock.ts +203 -454
  185. package/src/batch/cloudflare.ts +99 -282
  186. package/src/batch/google.ts +91 -297
  187. package/src/batch/index.ts +4 -1
  188. package/src/batch/memory.ts +15 -10
  189. package/src/batch/openai.ts +65 -193
  190. package/src/batch/provider.ts +336 -0
  191. package/src/batch-map.ts +29 -24
  192. package/src/batch-queue.ts +200 -11
  193. package/src/budget.ts +31 -18
  194. package/src/cache.ts +45 -17
  195. package/src/context.ts +106 -77
  196. package/src/digital-objects-registry.ts +750 -0
  197. package/src/errors.ts +37 -0
  198. package/src/eval/runner.ts +60 -36
  199. package/src/eval-log/in-memory.ts +90 -0
  200. package/src/eval-log/index.ts +46 -0
  201. package/src/eval-log/types.ts +110 -0
  202. package/src/function-registry.ts +874 -0
  203. package/src/generate.ts +33 -28
  204. package/src/index.ts +122 -21
  205. package/src/logger.ts +232 -0
  206. package/src/middleware/budget.ts +171 -0
  207. package/src/middleware/cache.ts +299 -0
  208. package/src/middleware/embed-cache.ts +195 -0
  209. package/src/middleware/index.ts +23 -0
  210. package/src/middleware/trace.ts +248 -0
  211. package/src/primitives.ts +589 -62
  212. package/src/retry.ts +144 -18
  213. package/src/sandbox.ts +52 -0
  214. package/src/schema.ts +8 -8
  215. package/src/telemetry.ts +403 -0
  216. package/src/template.ts +8 -4
  217. package/src/tool-orchestration.ts +213 -48
  218. package/src/type-guards.ts +31 -0
  219. package/src/types.ts +186 -27
  220. package/src/wrap-for-v3.ts +105 -0
  221. package/test/ai-promise.test.ts +1080 -0
  222. package/test/ai-proxy.test.ts +1 -1
  223. package/test/batch-autosubmit-errors.test.ts +49 -37
  224. package/test/batch-blog-posts.test.ts +87 -129
  225. package/test/core-functions.test.ts +183 -579
  226. package/test/decide.test.ts +154 -322
  227. package/test/define.test.ts +211 -8
  228. package/test/digital-objects-registry.test.ts +760 -0
  229. package/test/embedding-cache-middleware.test.ts +140 -0
  230. package/test/fill-template.test.ts +89 -0
  231. package/test/generate-core.test.ts +140 -229
  232. package/test/implicit-batch.test.ts +22 -65
  233. package/test/retry-policy-integration.test.ts +117 -0
  234. package/test/sandbox-execution.test.ts +155 -0
  235. package/test/schema.test.ts +55 -19
  236. package/test/template.test.ts +1164 -0
  237. package/test/tool-orchestration.test.ts +270 -0
  238. package/test/wrap-for-v3.test.ts +612 -0
  239. package/vitest.config.js +6 -0
  240. package/vitest.config.ts +20 -0
  241. package/LICENSE +0 -21
  242. package/dist/rpc/auth.d.ts +0 -69
  243. package/dist/rpc/auth.d.ts.map +0 -1
  244. package/dist/rpc/auth.js +0 -136
  245. package/dist/rpc/auth.js.map +0 -1
  246. package/dist/rpc/client.d.ts +0 -62
  247. package/dist/rpc/client.d.ts.map +0 -1
  248. package/dist/rpc/client.js +0 -103
  249. package/dist/rpc/client.js.map +0 -1
  250. package/dist/rpc/deferred.d.ts +0 -60
  251. package/dist/rpc/deferred.d.ts.map +0 -1
  252. package/dist/rpc/deferred.js +0 -96
  253. package/dist/rpc/deferred.js.map +0 -1
  254. package/dist/rpc/index.d.ts +0 -22
  255. package/dist/rpc/index.d.ts.map +0 -1
  256. package/dist/rpc/index.js +0 -38
  257. package/dist/rpc/index.js.map +0 -1
  258. package/dist/rpc/local.d.ts +0 -42
  259. package/dist/rpc/local.d.ts.map +0 -1
  260. package/dist/rpc/local.js +0 -50
  261. package/dist/rpc/local.js.map +0 -1
  262. package/dist/rpc/server.d.ts +0 -165
  263. package/dist/rpc/server.d.ts.map +0 -1
  264. package/dist/rpc/server.js +0 -405
  265. package/dist/rpc/server.js.map +0 -1
  266. package/dist/rpc/session.d.ts +0 -32
  267. package/dist/rpc/session.d.ts.map +0 -1
  268. package/dist/rpc/session.js +0 -43
  269. package/dist/rpc/session.js.map +0 -1
  270. package/dist/rpc/transport.d.ts +0 -306
  271. package/dist/rpc/transport.d.ts.map +0 -1
  272. package/dist/rpc/transport.js +0 -731
  273. package/dist/rpc/transport.js.map +0 -1
  274. package/src/batch/anthropic.js +0 -256
  275. package/src/batch/bedrock.js +0 -584
  276. package/src/batch/cloudflare.js +0 -287
  277. package/src/batch/google.js +0 -359
  278. package/src/batch/index.js +0 -30
  279. package/src/batch/memory.js +0 -187
  280. package/src/batch/openai.js +0 -402
  281. package/src/eval/index.js +0 -7
  282. package/src/eval/models.js +0 -119
  283. package/src/eval/runner.js +0 -147
  284. package/test/schema.test.js +0 -96
@@ -0,0 +1,171 @@
1
+ /**
2
+ * budgetMiddleware — record token usage + cost into a {@link BudgetTracker}
3
+ *
4
+ * Replaces the post-hoc duck-typing in
5
+ * `services-as-software/src/v3/invoke/cost-estimate.ts` with a single
6
+ * AI-SDK-6 middleware: on `doGenerate` / `doStream` completion, read the
7
+ * `LanguageModelV3Usage` shape directly off the result and call
8
+ * `tracker.recordUsage(...)`. The pricing overlay is supplied via
9
+ * `customPricing` on the {@link BudgetTracker} (or we hand the tracker the
10
+ * pricing at construction time when the caller wants per-call isolation).
11
+ *
12
+ * Key V3 → BudgetTracker mapping detail: AI SDK 6 reports
13
+ * `usage.inputTokens.total` / `usage.outputTokens.total` as
14
+ * `number | undefined`. We coerce undefined → 0 so partial-streaming results
15
+ * (where the upstream provider didn't emit token counts) don't blow up the
16
+ * tracker. The `inputTokens.cacheRead` / `inputTokens.cacheWrite` breakdown
17
+ * is *not* propagated yet — round 13+ work to add prompt-cache awareness to
18
+ * BudgetTracker.
19
+ *
20
+ * Composition note: install **after** cache (so a cache hit still records
21
+ * the cost — the wrapped result is the same regardless of which layer
22
+ * served it) and **before** trace (so the trace event sees the final
23
+ * computed cost via the tracker).
24
+ *
25
+ * @packageDocumentation
26
+ */
27
+
28
+ import type {
29
+ LanguageModelV3GenerateResult,
30
+ LanguageModelV3Middleware,
31
+ LanguageModelV3StreamPart,
32
+ LanguageModelV3StreamResult,
33
+ LanguageModelV3Usage,
34
+ } from '@ai-sdk/provider'
35
+ import type { BudgetTracker, ModelPricing } from '../budget.js'
36
+
37
+ // ============================================================================
38
+ // Types
39
+ // ============================================================================
40
+
41
+ /**
42
+ * Pricing overlay supplied to the middleware. Mirrors the
43
+ * `BudgetConfig.customPricing` shape — keyed on model id, value is the
44
+ * per-million USD rate. Sourced (in services-as-software) from the
45
+ * `language-models/data/models.json` catalog so Llama / DeepSeek / Mistral /
46
+ * Qwen / Grok / Perplexity Sonar all get their real per-token rate.
47
+ */
48
+ export type PricingOverlay = Record<string, ModelPricing>
49
+
50
+ /** Options for {@link budgetMiddleware}. */
51
+ export interface BudgetMiddlewareOptions {
52
+ /**
53
+ * The {@link BudgetTracker} to record usage into. Required — the
54
+ * middleware never constructs its own tracker (the tracker holds budget
55
+ * limits + alert callbacks, which the caller owns).
56
+ */
57
+ tracker: BudgetTracker
58
+ /**
59
+ * Pricing overlay (per-model rates). When supplied, takes precedence over
60
+ * the BudgetTracker's own default pricing for any matching model id. Pass
61
+ * the language-models catalog overlay here to extend pricing without
62
+ * mutating the tracker.
63
+ */
64
+ pricing?: PricingOverlay
65
+ /**
66
+ * Optional override for the model id reported to the tracker. Defaults to
67
+ * `model.modelId` (the wrapped model's underlying id). Pass an alias
68
+ * (`'sonnet'`, `'opus'`) to bridge to the alias-based pricing tables.
69
+ */
70
+ modelIdOverride?: string
71
+ }
72
+
73
+ // ============================================================================
74
+ // Helpers
75
+ // ============================================================================
76
+
77
+ function coerceUsage(usage: LanguageModelV3Usage | undefined): {
78
+ inputTokens: number
79
+ outputTokens: number
80
+ } {
81
+ if (!usage) return { inputTokens: 0, outputTokens: 0 }
82
+ return {
83
+ inputTokens: usage.inputTokens?.total ?? 0,
84
+ outputTokens: usage.outputTokens?.total ?? 0,
85
+ }
86
+ }
87
+
88
+ function record(
89
+ tracker: BudgetTracker,
90
+ pricing: PricingOverlay | undefined,
91
+ modelId: string,
92
+ usage: LanguageModelV3Usage | undefined
93
+ ): void {
94
+ const { inputTokens, outputTokens } = coerceUsage(usage)
95
+ if (inputTokens === 0 && outputTokens === 0) return
96
+ // The pricing overlay is wired in via the tracker's `customPricing`
97
+ // already (set at BudgetTracker construction time by the caller). When
98
+ // the caller wants per-call pricing override, they install
99
+ // `pricing[modelId]` ahead of time. We expose `pricing` here as a
100
+ // forward-looking hook so we can later add per-call pricing without a
101
+ // breaking change.
102
+ void pricing
103
+ tracker.recordUsage({ inputTokens, outputTokens, model: modelId })
104
+ }
105
+
106
+ // ============================================================================
107
+ // Middleware
108
+ // ============================================================================
109
+
110
+ /**
111
+ * Build a budget middleware for `wrapLanguageModel`. Records
112
+ * {@link LanguageModelV3Usage} into the supplied {@link BudgetTracker} on
113
+ * every successful `doGenerate` / `doStream` completion. Errors from the
114
+ * downstream model propagate unchanged — the tracker is only updated on
115
+ * success.
116
+ *
117
+ * For streaming calls, we accumulate the final `usage` from the `'finish'`
118
+ * stream part (per the V3 spec, the final `'finish'` event carries the
119
+ * authoritative usage shape) and record once on stream end.
120
+ *
121
+ * @example
122
+ * ```ts
123
+ * import { wrapLanguageModel } from 'ai'
124
+ * import { BudgetTracker, budgetMiddleware } from 'ai-functions'
125
+ *
126
+ * const tracker = new BudgetTracker({ maxCost: 1.0 })
127
+ * const model = wrapLanguageModel({
128
+ * model: openai('gpt-4o'),
129
+ * middleware: budgetMiddleware({ tracker }),
130
+ * })
131
+ * ```
132
+ */
133
+ export function budgetMiddleware(options: BudgetMiddlewareOptions): LanguageModelV3Middleware {
134
+ const { tracker, pricing, modelIdOverride } = options
135
+ return {
136
+ specificationVersion: 'v3',
137
+ async wrapGenerate({ doGenerate, model }) {
138
+ const result = await doGenerate()
139
+ const modelId = modelIdOverride ?? model.modelId
140
+ record(tracker, pricing, modelId, result.usage)
141
+ return result
142
+ },
143
+ async wrapStream({ doStream, model }) {
144
+ const result = await doStream()
145
+ const modelId = modelIdOverride ?? model.modelId
146
+ let finalUsage: LanguageModelV3Usage | undefined
147
+ const transformedStream = result.stream.pipeThrough(
148
+ new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({
149
+ transform(chunk, controller) {
150
+ if (chunk.type === 'finish') {
151
+ finalUsage = chunk.usage
152
+ }
153
+ controller.enqueue(chunk)
154
+ },
155
+ flush() {
156
+ record(tracker, pricing, modelId, finalUsage)
157
+ },
158
+ })
159
+ )
160
+ const wrapped: LanguageModelV3StreamResult = {
161
+ ...result,
162
+ stream: transformedStream,
163
+ }
164
+ return wrapped
165
+ },
166
+ }
167
+ }
168
+
169
+ // Re-export to make the result type available to consumers writing custom
170
+ // middleware chains.
171
+ export type { LanguageModelV3GenerateResult }
@@ -0,0 +1,299 @@
1
+ /**
2
+ * cacheMiddleware — content-addressable cache for `wrapLanguageModel`
3
+ *
4
+ * Implements the AI SDK cookbook's local-caching-middleware pattern
5
+ * (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the
6
+ * AI SDK 6 `LanguageModelV3Middleware` shape:
7
+ *
8
+ * - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }`
9
+ * so a schema change (responseFormat.type === 'json' carries a `schema`
10
+ * JSONSchema7) invalidates the entry. Generation parameters (temperature,
11
+ * topP, etc.) are deliberately *not* part of the key for the eval-fixture
12
+ * use case — flipping temperature shouldn't blow up a 5x verify-time win.
13
+ * Callers who want strict keying should pass a custom `keyHash`.
14
+ *
15
+ * - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]`
16
+ * array; `wrapStream` replays them via `simulateReadableStream` so consumers
17
+ * see the same chunked event sequence on a hit. (`wrapGenerate` is the
18
+ * common path; both share the same cache map.)
19
+ *
20
+ * - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are
21
+ * evicted on access (lazy expiry — no background timer).
22
+ *
23
+ * - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to
24
+ * a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture
25
+ * sharing. Disk reads/writes are best-effort — IO failures fall through
26
+ * to the wrapped model.
27
+ *
28
+ * - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the
29
+ * middleware short-circuits to a passthrough — useful for production where
30
+ * cache hits would be incorrect but the operator wants the same wrap chain.
31
+ * Set to `'1'` (or any truthy non-empty string) to enable.
32
+ *
33
+ * @packageDocumentation
34
+ */
35
+
36
+ import { simulateReadableStream } from 'ai'
37
+ import type {
38
+ LanguageModelV3CallOptions,
39
+ LanguageModelV3GenerateResult,
40
+ LanguageModelV3Middleware,
41
+ LanguageModelV3StreamPart,
42
+ LanguageModelV3StreamResult,
43
+ } from '@ai-sdk/provider'
44
+ import { hashKey } from '../cache.js'
45
+
46
+ // ============================================================================
47
+ // Types
48
+ // ============================================================================
49
+
50
+ /** Cached payload — both generate result and stream chunks under one key. */
51
+ interface CacheEntry {
52
+ /** Result captured from `doGenerate`. Absent if the entry came from a stream call. */
53
+ generateResult?: LanguageModelV3GenerateResult
54
+ /** Stream chunks captured from `doStream` (replayed via simulateReadableStream). */
55
+ streamChunks?: LanguageModelV3StreamPart[]
56
+ /** Insert epoch ms — drives TTL eviction. */
57
+ createdAt: number
58
+ }
59
+
60
+ /** Pluggable cache store for cached LLM results. */
61
+ export interface CacheMiddlewareStore {
62
+ get(key: string): CacheEntry | undefined
63
+ set(key: string, value: CacheEntry): void
64
+ delete(key: string): void
65
+ }
66
+
67
+ /** Options for {@link cacheMiddleware}. */
68
+ export interface CacheMiddlewareOptions {
69
+ /**
70
+ * Cache backend. `'memory'` uses a process-local Map; `'disk'` writes to
71
+ * `.cache/v3-eval-cache.json` for cross-process fixture sharing. A custom
72
+ * {@link CacheMiddlewareStore} can be passed instead.
73
+ *
74
+ * @default 'memory'
75
+ */
76
+ store?: 'memory' | 'disk' | CacheMiddlewareStore
77
+ /**
78
+ * TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
79
+ *
80
+ * @default 86_400_000 (24h)
81
+ */
82
+ ttlMs?: number
83
+ /**
84
+ * Custom hash function for cache keys. Defaults to a stable hash of
85
+ * `{ prompt, modelId, responseFormat }`.
86
+ */
87
+ keyHash?: (params: LanguageModelV3CallOptions, modelId: string) => string
88
+ /**
89
+ * Optional override for the env gate. When `false`, the middleware acts
90
+ * as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
91
+ * caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
92
+ */
93
+ enabled?: boolean
94
+ /** Optional custom path for the disk store (defaults to `.cache/v3-eval-cache.json`). */
95
+ diskPath?: string
96
+ }
97
+
98
+ // ============================================================================
99
+ // Stores
100
+ // ============================================================================
101
+
102
+ class MemoryStore implements CacheMiddlewareStore {
103
+ private readonly map: Map<string, CacheEntry> = new Map()
104
+ get(key: string): CacheEntry | undefined {
105
+ return this.map.get(key)
106
+ }
107
+ set(key: string, value: CacheEntry): void {
108
+ this.map.set(key, value)
109
+ }
110
+ delete(key: string): void {
111
+ this.map.delete(key)
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Disk-backed store. Best-effort — JSON parse / write errors fall through
117
+ * silently so a corrupt cache file never blocks an LLM call. The whole map
118
+ * is rewritten on each `set` (cheap for the eval-fixture use case which is
119
+ * dominated by reads).
120
+ */
121
+ class DiskStore implements CacheMiddlewareStore {
122
+ private readonly path: string
123
+ private cache: Map<string, CacheEntry> | null = null
124
+
125
+ constructor(path: string) {
126
+ this.path = path
127
+ }
128
+
129
+ private load(): Map<string, CacheEntry> {
130
+ if (this.cache !== null) return this.cache
131
+ this.cache = new Map()
132
+ try {
133
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
134
+ const fs = require('fs') as typeof import('fs')
135
+ if (fs.existsSync(this.path)) {
136
+ const raw = fs.readFileSync(this.path, 'utf-8')
137
+ const parsed = JSON.parse(raw) as Record<string, CacheEntry>
138
+ for (const [k, v] of Object.entries(parsed)) {
139
+ this.cache.set(k, v)
140
+ }
141
+ }
142
+ } catch {
143
+ // best-effort
144
+ }
145
+ return this.cache
146
+ }
147
+
148
+ private flush(): void {
149
+ if (this.cache === null) return
150
+ try {
151
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
152
+ const fs = require('fs') as typeof import('fs')
153
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
154
+ const path = require('path') as typeof import('path')
155
+ const dir = path.dirname(this.path)
156
+ if (!fs.existsSync(dir)) {
157
+ fs.mkdirSync(dir, { recursive: true })
158
+ }
159
+ const obj = Object.fromEntries(this.cache)
160
+ fs.writeFileSync(this.path, JSON.stringify(obj), 'utf-8')
161
+ } catch {
162
+ // best-effort
163
+ }
164
+ }
165
+
166
+ get(key: string): CacheEntry | undefined {
167
+ return this.load().get(key)
168
+ }
169
+
170
+ set(key: string, value: CacheEntry): void {
171
+ this.load().set(key, value)
172
+ this.flush()
173
+ }
174
+
175
+ delete(key: string): void {
176
+ this.load().delete(key)
177
+ this.flush()
178
+ }
179
+ }
180
+
181
+ // ============================================================================
182
+ // Helpers
183
+ // ============================================================================
184
+
185
+ const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000
186
+
187
+ function defaultKeyHash(params: LanguageModelV3CallOptions, modelId: string): string {
188
+ // Stable hash of prompt + model + responseFormat (which carries the
189
+ // schema for object generation). Generation knobs are deliberately
190
+ // excluded so the eval-fixture cache survives temperature tweaks.
191
+ return hashKey({
192
+ prompt: params.prompt,
193
+ modelId,
194
+ responseFormat: params.responseFormat,
195
+ })
196
+ }
197
+
198
+ function envGateEnabled(): boolean {
199
+ const v = process.env['V3_EVAL_CACHE']
200
+ return typeof v === 'string' && v.length > 0
201
+ }
202
+
203
+ function isExpired(entry: CacheEntry, ttlMs: number): boolean {
204
+ return Date.now() - entry.createdAt > ttlMs
205
+ }
206
+
207
+ // ============================================================================
208
+ // Middleware
209
+ // ============================================================================
210
+
211
+ /**
212
+ * Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and
213
+ * `doStream`; on a hit replays the cached payload, on a miss invokes the
214
+ * downstream model and stores the result.
215
+ *
216
+ * Composition note: install **before** budget/trace so cache hits don't
217
+ * pay the downstream model cost (the trace/budget middleware still see the
218
+ * payload via the wrapped result they observe in their own `wrapGenerate`).
219
+ *
220
+ * @example
221
+ * ```ts
222
+ * import { wrapLanguageModel } from 'ai'
223
+ * import { cacheMiddleware } from 'ai-functions'
224
+ *
225
+ * const model = wrapLanguageModel({
226
+ * model: openai('gpt-4o'),
227
+ * middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }),
228
+ * })
229
+ * ```
230
+ */
231
+ export function cacheMiddleware(options: CacheMiddlewareOptions = {}): LanguageModelV3Middleware {
232
+ const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS
233
+ const keyHash = options.keyHash ?? defaultKeyHash
234
+ const store: CacheMiddlewareStore =
235
+ options.store === undefined || options.store === 'memory'
236
+ ? new MemoryStore()
237
+ : options.store === 'disk'
238
+ ? new DiskStore(options.diskPath ?? '.cache/v3-eval-cache.json')
239
+ : options.store
240
+ const enabled = options.enabled ?? envGateEnabled()
241
+
242
+ return {
243
+ specificationVersion: 'v3',
244
+ async wrapGenerate({ doGenerate, params, model }) {
245
+ if (!enabled) return doGenerate()
246
+ const key = keyHash(params, model.modelId)
247
+ const cached = store.get(key)
248
+ if (cached !== undefined) {
249
+ if (isExpired(cached, ttlMs)) {
250
+ store.delete(key)
251
+ } else if (cached.generateResult !== undefined) {
252
+ return cached.generateResult
253
+ }
254
+ }
255
+ const result = await doGenerate()
256
+ store.set(key, { generateResult: result, createdAt: Date.now() })
257
+ return result
258
+ },
259
+ async wrapStream({ doStream, params, model }) {
260
+ if (!enabled) return doStream()
261
+ const key = keyHash(params, model.modelId)
262
+ const cached = store.get(key)
263
+ if (cached !== undefined) {
264
+ if (isExpired(cached, ttlMs)) {
265
+ store.delete(key)
266
+ } else if (cached.streamChunks !== undefined) {
267
+ // Replay cached chunks via simulateReadableStream so consumers
268
+ // see the same async iteration shape as a fresh call.
269
+ const replay: LanguageModelV3StreamResult = {
270
+ stream: simulateReadableStream<LanguageModelV3StreamPart>({
271
+ chunks: cached.streamChunks,
272
+ initialDelayInMs: 0,
273
+ chunkDelayInMs: 0,
274
+ }),
275
+ }
276
+ return replay
277
+ }
278
+ }
279
+ const result = await doStream()
280
+ // Tee the stream: forward to caller, accumulate for cache.
281
+ const chunks: LanguageModelV3StreamPart[] = []
282
+ const transformedStream = result.stream.pipeThrough(
283
+ new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({
284
+ transform(chunk, controller) {
285
+ chunks.push(chunk)
286
+ controller.enqueue(chunk)
287
+ },
288
+ flush() {
289
+ store.set(key, { streamChunks: chunks, createdAt: Date.now() })
290
+ },
291
+ })
292
+ )
293
+ return {
294
+ ...result,
295
+ stream: transformedStream,
296
+ }
297
+ },
298
+ }
299
+ }
@@ -0,0 +1,195 @@
1
+ /**
2
+ * embeddingCacheMiddleware — content-addressable cache for `wrapEmbeddingModel`
3
+ *
4
+ * Embedding-side analogue of {@link cacheMiddleware}. Wraps `doEmbed` and
5
+ * caches the resulting embeddings keyed on
6
+ * `{ values, modelId, providerOptions }` so a re-embed of the same value
7
+ * batch with the same model returns the cached vectors without hitting the
8
+ * provider.
9
+ *
10
+ * **Why a separate middleware instead of reusing `cacheMiddleware`?**
11
+ * AI SDK 6 splits language-model and embedding-model surfaces:
12
+ * `LanguageModelV3Middleware` exposes `wrapGenerate` / `wrapStream` against
13
+ * `LanguageModelV3CallOptions`, while `EmbeddingModelV3Middleware` exposes
14
+ * `wrapEmbed` against `EmbeddingModelV3CallOptions`. The cache shape
15
+ * (per-value vector vs. per-prompt completion payload) is also different —
16
+ * embeddings cache batched arrays, generations cache single result objects.
17
+ *
18
+ * - **Hit derivation:** stable hash of `{ values, modelId, providerOptions }`.
19
+ * `values` is the array as-passed (caller can pre-normalise if they want
20
+ * case/whitespace insensitivity). Generation knobs don't apply.
21
+ *
22
+ * - **Batch semantics:** the cache key is the *whole* batch. A subset hit
23
+ * doesn't trigger a partial-fill — that's a more invasive shape change
24
+ * (the legacy `EmbeddingCache.getMany` did per-text caching, but it was
25
+ * only used in the example and added 100+ LOC of bookkeeping). Callers
26
+ * that want per-text caching should use stable per-text batches.
27
+ *
28
+ * - **TTL:** 24h default, configurable. Lazy expiry on access.
29
+ *
30
+ * - **Pluggable store:** in-memory default (Map-backed); custom store
31
+ * honored as-is. Disk persistence is intentionally not provided here —
32
+ * embedding payloads (large `number[][]`) make on-disk JSON a bad fit;
33
+ * callers who want it should pass a custom store.
34
+ *
35
+ * - **Env gate:** honors `process.env.V3_EVAL_CACHE` for parity with
36
+ * `cacheMiddleware`. Override via the `enabled` option.
37
+ *
38
+ * @packageDocumentation
39
+ */
40
+
41
+ import type {
42
+ EmbeddingModelV3CallOptions,
43
+ EmbeddingModelV3Embedding,
44
+ EmbeddingModelV3Middleware,
45
+ EmbeddingModelV3Result,
46
+ SharedV3Warning,
47
+ } from '@ai-sdk/provider'
48
+ import { hashKey } from '../cache.js'
49
+
50
+ // ============================================================================
51
+ // Types
52
+ // ============================================================================
53
+
54
+ /** Cached embedding payload. */
55
+ interface EmbedCacheEntry {
56
+ /** The embedding vectors returned for the cached batch. */
57
+ embeddings: Array<EmbeddingModelV3Embedding>
58
+ /** Provider warnings carried alongside the cached batch. */
59
+ warnings: Array<SharedV3Warning>
60
+ /** Insert epoch ms — drives TTL eviction. */
61
+ createdAt: number
62
+ }
63
+
64
+ /** Pluggable cache store for embedding results. */
65
+ export interface EmbedCacheMiddlewareStore {
66
+ get(key: string): EmbedCacheEntry | undefined
67
+ set(key: string, value: EmbedCacheEntry): void
68
+ delete(key: string): void
69
+ }
70
+
71
+ /** Options for {@link embeddingCacheMiddleware}. */
72
+ export interface EmbedCacheMiddlewareOptions {
73
+ /**
74
+ * Cache backend. `'memory'` uses a process-local Map. A custom
75
+ * {@link EmbedCacheMiddlewareStore} can be passed instead.
76
+ *
77
+ * @default 'memory'
78
+ */
79
+ store?: 'memory' | EmbedCacheMiddlewareStore
80
+ /**
81
+ * TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
82
+ *
83
+ * @default 86_400_000 (24h)
84
+ */
85
+ ttlMs?: number
86
+ /**
87
+ * Custom hash function for cache keys. Defaults to a stable hash of
88
+ * `{ values, modelId, providerOptions }`.
89
+ */
90
+ keyHash?: (params: EmbeddingModelV3CallOptions, modelId: string) => string
91
+ /**
92
+ * Optional override for the env gate. When `false`, the middleware acts
93
+ * as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
94
+ * caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
95
+ */
96
+ enabled?: boolean
97
+ }
98
+
99
+ // ============================================================================
100
+ // Stores
101
+ // ============================================================================
102
+
103
+ class MemoryStore implements EmbedCacheMiddlewareStore {
104
+ private readonly map: Map<string, EmbedCacheEntry> = new Map()
105
+ get(key: string): EmbedCacheEntry | undefined {
106
+ return this.map.get(key)
107
+ }
108
+ set(key: string, value: EmbedCacheEntry): void {
109
+ this.map.set(key, value)
110
+ }
111
+ delete(key: string): void {
112
+ this.map.delete(key)
113
+ }
114
+ }
115
+
116
+ // ============================================================================
117
+ // Helpers
118
+ // ============================================================================
119
+
120
+ const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000
121
+
122
+ function defaultKeyHash(params: EmbeddingModelV3CallOptions, modelId: string): string {
123
+ return hashKey({
124
+ values: params.values,
125
+ modelId,
126
+ providerOptions: params.providerOptions,
127
+ })
128
+ }
129
+
130
+ function envGateEnabled(): boolean {
131
+ const v = process.env['V3_EVAL_CACHE']
132
+ return typeof v === 'string' && v.length > 0
133
+ }
134
+
135
+ function isExpired(entry: EmbedCacheEntry, ttlMs: number): boolean {
136
+ return Date.now() - entry.createdAt > ttlMs
137
+ }
138
+
139
+ // ============================================================================
140
+ // Middleware
141
+ // ============================================================================
142
+
143
+ /**
144
+ * Build an embedding-cache middleware for `wrapEmbeddingModel`.
145
+ *
146
+ * @example
147
+ * ```ts
148
+ * import { wrapEmbeddingModel } from 'ai'
149
+ * import { embeddingCacheMiddleware } from 'ai-functions'
150
+ *
151
+ * const model = wrapEmbeddingModel({
152
+ * model: openai.embedding('text-embedding-3-small'),
153
+ * middleware: embeddingCacheMiddleware({ ttlMs: 86_400_000 }),
154
+ * })
155
+ * ```
156
+ */
157
+ export function embeddingCacheMiddleware(
158
+ options: EmbedCacheMiddlewareOptions = {}
159
+ ): EmbeddingModelV3Middleware {
160
+ const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS
161
+ const keyHash = options.keyHash ?? defaultKeyHash
162
+ const store: EmbedCacheMiddlewareStore =
163
+ options.store === undefined || options.store === 'memory' ? new MemoryStore() : options.store
164
+ const enabled = options.enabled ?? envGateEnabled()
165
+
166
+ return {
167
+ specificationVersion: 'v3',
168
+ async wrapEmbed({ doEmbed, params, model }) {
169
+ if (!enabled) return doEmbed()
170
+ const key = keyHash(params, model.modelId)
171
+ const cached = store.get(key)
172
+ if (cached !== undefined) {
173
+ if (isExpired(cached, ttlMs)) {
174
+ store.delete(key)
175
+ } else {
176
+ // Replay shape matches EmbeddingModelV3Result. Provider-side
177
+ // metadata (response headers, body, usage) is intentionally absent
178
+ // on a hit — callers reading those should disable the cache.
179
+ const replay: EmbeddingModelV3Result = {
180
+ embeddings: cached.embeddings,
181
+ warnings: cached.warnings,
182
+ }
183
+ return replay
184
+ }
185
+ }
186
+ const result = await doEmbed()
187
+ store.set(key, {
188
+ embeddings: result.embeddings,
189
+ warnings: result.warnings,
190
+ createdAt: Date.now(),
191
+ })
192
+ return result
193
+ },
194
+ }
195
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Middleware barrel — composable AI SDK 6 `LanguageModelV3Middleware`
3
+ * primitives for `wrapLanguageModel`.
4
+ *
5
+ * @packageDocumentation
6
+ */
7
+
8
+ export { cacheMiddleware, type CacheMiddlewareOptions, type CacheMiddlewareStore } from './cache.js'
9
+
10
+ export {
11
+ embeddingCacheMiddleware,
12
+ type EmbedCacheMiddlewareOptions,
13
+ type EmbedCacheMiddlewareStore,
14
+ } from './embed-cache.js'
15
+
16
+ export { budgetMiddleware, type BudgetMiddlewareOptions, type PricingOverlay } from './budget.js'
17
+
18
+ export {
19
+ traceMiddleware,
20
+ type TraceEvent,
21
+ type TraceEventKind,
22
+ type TraceMiddlewareOptions,
23
+ } from './trace.js'