ai-functions 2.1.3 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (277) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +55 -1
  3. package/README.md +38 -0
  4. package/dist/ai-promise.d.ts +3 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +135 -64
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +51 -858
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts.map +1 -1
  56. package/dist/budget.js +27 -14
  57. package/dist/budget.js.map +1 -1
  58. package/dist/cache.d.ts +23 -0
  59. package/dist/cache.d.ts.map +1 -1
  60. package/dist/cache.js +36 -15
  61. package/dist/cache.js.map +1 -1
  62. package/dist/context.d.ts +26 -8
  63. package/dist/context.d.ts.map +1 -1
  64. package/dist/context.js +64 -62
  65. package/dist/context.js.map +1 -1
  66. package/dist/digital-objects-registry.d.ts +229 -0
  67. package/dist/digital-objects-registry.d.ts.map +1 -0
  68. package/dist/digital-objects-registry.js +617 -0
  69. package/dist/digital-objects-registry.js.map +1 -0
  70. package/dist/embeddings.d.ts +2 -2
  71. package/dist/embeddings.d.ts.map +1 -1
  72. package/dist/errors.d.ts +22 -0
  73. package/dist/errors.d.ts.map +1 -0
  74. package/dist/errors.js +35 -0
  75. package/dist/errors.js.map +1 -0
  76. package/dist/eval/runner.d.ts +8 -0
  77. package/dist/eval/runner.d.ts.map +1 -1
  78. package/dist/eval/runner.js +41 -35
  79. package/dist/eval/runner.js.map +1 -1
  80. package/dist/eval-log/in-memory.d.ts +34 -0
  81. package/dist/eval-log/in-memory.d.ts.map +1 -0
  82. package/dist/eval-log/in-memory.js +84 -0
  83. package/dist/eval-log/in-memory.js.map +1 -0
  84. package/dist/eval-log/index.d.ts +29 -0
  85. package/dist/eval-log/index.d.ts.map +1 -0
  86. package/dist/eval-log/index.js +39 -0
  87. package/dist/eval-log/index.js.map +1 -0
  88. package/dist/eval-log/types.d.ts +101 -0
  89. package/dist/eval-log/types.d.ts.map +1 -0
  90. package/dist/eval-log/types.js +16 -0
  91. package/dist/eval-log/types.js.map +1 -0
  92. package/dist/function-registry.d.ts +116 -0
  93. package/dist/function-registry.d.ts.map +1 -0
  94. package/dist/function-registry.js +546 -0
  95. package/dist/function-registry.js.map +1 -0
  96. package/dist/generate.d.ts +9 -3
  97. package/dist/generate.d.ts.map +1 -1
  98. package/dist/generate.js +18 -18
  99. package/dist/generate.js.map +1 -1
  100. package/dist/index.d.ts +18 -11
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js +35 -18
  103. package/dist/index.js.map +1 -1
  104. package/dist/logger.d.ts +118 -0
  105. package/dist/logger.d.ts.map +1 -0
  106. package/dist/logger.js +187 -0
  107. package/dist/logger.js.map +1 -0
  108. package/dist/middleware/budget.d.ts +84 -0
  109. package/dist/middleware/budget.d.ts.map +1 -0
  110. package/dist/middleware/budget.js +110 -0
  111. package/dist/middleware/budget.js.map +1 -0
  112. package/dist/middleware/cache.d.ts +103 -0
  113. package/dist/middleware/cache.d.ts.map +1 -0
  114. package/dist/middleware/cache.js +228 -0
  115. package/dist/middleware/cache.js.map +1 -0
  116. package/dist/middleware/embed-cache.d.ts +99 -0
  117. package/dist/middleware/embed-cache.d.ts.map +1 -0
  118. package/dist/middleware/embed-cache.js +128 -0
  119. package/dist/middleware/embed-cache.js.map +1 -0
  120. package/dist/middleware/index.d.ts +11 -0
  121. package/dist/middleware/index.d.ts.map +1 -0
  122. package/dist/middleware/index.js +11 -0
  123. package/dist/middleware/index.js.map +1 -0
  124. package/dist/middleware/trace.d.ts +103 -0
  125. package/dist/middleware/trace.d.ts.map +1 -0
  126. package/dist/middleware/trace.js +176 -0
  127. package/dist/middleware/trace.js.map +1 -0
  128. package/dist/primitives.d.ts +120 -1
  129. package/dist/primitives.d.ts.map +1 -1
  130. package/dist/primitives.js +398 -26
  131. package/dist/primitives.js.map +1 -1
  132. package/dist/retry.d.ts +66 -1
  133. package/dist/retry.d.ts.map +1 -1
  134. package/dist/retry.js +115 -8
  135. package/dist/retry.js.map +1 -1
  136. package/dist/schema.js +2 -2
  137. package/dist/schema.js.map +1 -1
  138. package/dist/telemetry.d.ts +128 -0
  139. package/dist/telemetry.d.ts.map +1 -0
  140. package/dist/telemetry.js +285 -0
  141. package/dist/telemetry.js.map +1 -0
  142. package/dist/template.d.ts.map +1 -1
  143. package/dist/template.js +6 -1
  144. package/dist/template.js.map +1 -1
  145. package/dist/tool-orchestration.d.ts +66 -4
  146. package/dist/tool-orchestration.d.ts.map +1 -1
  147. package/dist/tool-orchestration.js +123 -23
  148. package/dist/tool-orchestration.js.map +1 -1
  149. package/dist/type-guards.d.ts +28 -0
  150. package/dist/type-guards.d.ts.map +1 -0
  151. package/dist/type-guards.js +29 -0
  152. package/dist/type-guards.js.map +1 -0
  153. package/dist/types.d.ts +135 -17
  154. package/dist/types.d.ts.map +1 -1
  155. package/dist/types.js +36 -1
  156. package/dist/types.js.map +1 -1
  157. package/dist/wrap-for-v3.d.ts +80 -0
  158. package/dist/wrap-for-v3.d.ts.map +1 -0
  159. package/dist/wrap-for-v3.js +89 -0
  160. package/dist/wrap-for-v3.js.map +1 -0
  161. package/examples/00-quickstart.ts +232 -0
  162. package/examples/01-rag-chatbot.ts +212 -0
  163. package/examples/02-multi-agent-research.ts +290 -0
  164. package/examples/03-email-classification.ts +379 -0
  165. package/examples/04-content-moderation.ts +400 -0
  166. package/examples/05-document-extraction.ts +455 -0
  167. package/examples/06-streaming-chat-nextjs.ts +437 -0
  168. package/examples/07-cloudflare-worker.ts +483 -0
  169. package/examples/08-batch-processing.ts +491 -0
  170. package/examples/09-budget-constrained.ts +527 -0
  171. package/examples/10-tool-orchestration.ts +565 -0
  172. package/examples/11-retry-resilience.ts +403 -0
  173. package/examples/12-caching-strategies.ts +422 -0
  174. package/examples/README.md +145 -0
  175. package/package.json +28 -25
  176. package/src/ai-promise.ts +226 -140
  177. package/src/ai-schemas.ts +122 -0
  178. package/src/ai.ts +69 -1176
  179. package/src/batch/anthropic.ts +96 -161
  180. package/src/batch/bedrock.ts +203 -454
  181. package/src/batch/cloudflare.ts +99 -282
  182. package/src/batch/google.ts +91 -297
  183. package/src/batch/index.ts +4 -1
  184. package/src/batch/memory.ts +15 -10
  185. package/src/batch/openai.ts +65 -193
  186. package/src/batch/provider.ts +336 -0
  187. package/src/batch-map.ts +29 -24
  188. package/src/batch-queue.ts +200 -11
  189. package/src/budget.ts +31 -18
  190. package/src/cache.ts +45 -17
  191. package/src/context.ts +106 -77
  192. package/src/digital-objects-registry.ts +750 -0
  193. package/src/errors.ts +37 -0
  194. package/src/eval/runner.ts +60 -36
  195. package/src/eval-log/in-memory.ts +90 -0
  196. package/src/eval-log/index.ts +46 -0
  197. package/src/eval-log/types.ts +110 -0
  198. package/src/function-registry.ts +671 -0
  199. package/src/generate.ts +33 -28
  200. package/src/index.ts +119 -21
  201. package/src/logger.ts +232 -0
  202. package/src/middleware/budget.ts +171 -0
  203. package/src/middleware/cache.ts +299 -0
  204. package/src/middleware/embed-cache.ts +195 -0
  205. package/src/middleware/index.ts +23 -0
  206. package/src/middleware/trace.ts +248 -0
  207. package/src/primitives.ts +589 -62
  208. package/src/retry.ts +144 -18
  209. package/src/schema.ts +8 -8
  210. package/src/telemetry.ts +403 -0
  211. package/src/template.ts +8 -4
  212. package/src/tool-orchestration.ts +213 -48
  213. package/src/type-guards.ts +31 -0
  214. package/src/types.ts +164 -25
  215. package/src/wrap-for-v3.ts +105 -0
  216. package/test/ai-promise.test.ts +1080 -0
  217. package/test/ai-proxy.test.ts +1 -1
  218. package/test/batch-autosubmit-errors.test.ts +49 -37
  219. package/test/batch-blog-posts.test.ts +87 -129
  220. package/test/core-functions.test.ts +183 -579
  221. package/test/decide.test.ts +154 -322
  222. package/test/define.test.ts +211 -8
  223. package/test/digital-objects-registry.test.ts +760 -0
  224. package/test/embedding-cache-middleware.test.ts +140 -0
  225. package/test/generate-core.test.ts +140 -229
  226. package/test/implicit-batch.test.ts +22 -65
  227. package/test/retry-policy-integration.test.ts +117 -0
  228. package/test/schema.test.ts +55 -19
  229. package/test/template.test.ts +1164 -0
  230. package/test/tool-orchestration.test.ts +270 -0
  231. package/test/wrap-for-v3.test.ts +612 -0
  232. package/vitest.config.js +6 -0
  233. package/vitest.config.ts +20 -0
  234. package/LICENSE +0 -21
  235. package/dist/rpc/auth.d.ts +0 -69
  236. package/dist/rpc/auth.d.ts.map +0 -1
  237. package/dist/rpc/auth.js +0 -136
  238. package/dist/rpc/auth.js.map +0 -1
  239. package/dist/rpc/client.d.ts +0 -62
  240. package/dist/rpc/client.d.ts.map +0 -1
  241. package/dist/rpc/client.js +0 -103
  242. package/dist/rpc/client.js.map +0 -1
  243. package/dist/rpc/deferred.d.ts +0 -60
  244. package/dist/rpc/deferred.d.ts.map +0 -1
  245. package/dist/rpc/deferred.js +0 -96
  246. package/dist/rpc/deferred.js.map +0 -1
  247. package/dist/rpc/index.d.ts +0 -22
  248. package/dist/rpc/index.d.ts.map +0 -1
  249. package/dist/rpc/index.js +0 -38
  250. package/dist/rpc/index.js.map +0 -1
  251. package/dist/rpc/local.d.ts +0 -42
  252. package/dist/rpc/local.d.ts.map +0 -1
  253. package/dist/rpc/local.js +0 -50
  254. package/dist/rpc/local.js.map +0 -1
  255. package/dist/rpc/server.d.ts +0 -165
  256. package/dist/rpc/server.d.ts.map +0 -1
  257. package/dist/rpc/server.js +0 -405
  258. package/dist/rpc/server.js.map +0 -1
  259. package/dist/rpc/session.d.ts +0 -32
  260. package/dist/rpc/session.d.ts.map +0 -1
  261. package/dist/rpc/session.js +0 -43
  262. package/dist/rpc/session.js.map +0 -1
  263. package/dist/rpc/transport.d.ts +0 -306
  264. package/dist/rpc/transport.d.ts.map +0 -1
  265. package/dist/rpc/transport.js +0 -731
  266. package/dist/rpc/transport.js.map +0 -1
  267. package/src/batch/anthropic.js +0 -256
  268. package/src/batch/bedrock.js +0 -584
  269. package/src/batch/cloudflare.js +0 -287
  270. package/src/batch/google.js +0 -359
  271. package/src/batch/index.js +0 -30
  272. package/src/batch/memory.js +0 -187
  273. package/src/batch/openai.js +0 -402
  274. package/src/eval/index.js +0 -7
  275. package/src/eval/models.js +0 -119
  276. package/src/eval/runner.js +0 -147
  277. package/test/schema.test.js +0 -96
@@ -0,0 +1,84 @@
1
+ /**
2
+ * budgetMiddleware — record token usage + cost into a {@link BudgetTracker}
3
+ *
4
+ * Replaces the post-hoc duck-typing in
5
+ * `services-as-software/src/v3/invoke/cost-estimate.ts` with a single
6
+ * AI-SDK-6 middleware: on `doGenerate` / `doStream` completion, read the
7
+ * `LanguageModelV3Usage` shape directly off the result and call
8
+ * `tracker.recordUsage(...)`. The pricing overlay is supplied via
9
+ * `customPricing` on the {@link BudgetTracker} (or we hand the tracker the
10
+ * pricing at construction time when the caller wants per-call isolation).
11
+ *
12
+ * Key V3 → BudgetTracker mapping detail: AI SDK 6 reports
13
+ * `usage.inputTokens.total` / `usage.outputTokens.total` as
14
+ * `number | undefined`. We coerce undefined → 0 so partial-streaming results
15
+ * (where the upstream provider didn't emit token counts) don't blow up the
16
+ * tracker. The `inputTokens.cacheRead` / `inputTokens.cacheWrite` breakdown
17
+ * is *not* propagated yet — round 13+ work to add prompt-cache awareness to
18
+ * BudgetTracker.
19
+ *
20
+ * Composition note: install **after** cache (so a cache hit still records
21
+ * the cost — the wrapped result is the same regardless of which layer
22
+ * served it) and **before** trace (so the trace event sees the final
23
+ * computed cost via the tracker).
24
+ *
25
+ * @packageDocumentation
26
+ */
27
+ import type { LanguageModelV3GenerateResult, LanguageModelV3Middleware } from '@ai-sdk/provider';
28
+ import type { BudgetTracker, ModelPricing } from '../budget.js';
29
+ /**
30
+ * Pricing overlay supplied to the middleware. Mirrors the
31
+ * `BudgetConfig.customPricing` shape — keyed on model id, value is the
32
+ * per-million USD rate. Sourced (in services-as-software) from the
33
+ * `language-models/data/models.json` catalog so Llama / DeepSeek / Mistral /
34
+ * Qwen / Grok / Perplexity Sonar all get their real per-token rate.
35
+ */
36
+ export type PricingOverlay = Record<string, ModelPricing>;
37
+ /** Options for {@link budgetMiddleware}. */
38
+ export interface BudgetMiddlewareOptions {
39
+ /**
40
+ * The {@link BudgetTracker} to record usage into. Required — the
41
+ * middleware never constructs its own tracker (the tracker holds budget
42
+ * limits + alert callbacks, which the caller owns).
43
+ */
44
+ tracker: BudgetTracker;
45
+ /**
46
+ * Pricing overlay (per-model rates). When supplied, takes precedence over
47
+ * the BudgetTracker's own default pricing for any matching model id. Pass
48
+ * the language-models catalog overlay here to extend pricing without
49
+ * mutating the tracker.
50
+ */
51
+ pricing?: PricingOverlay;
52
+ /**
53
+ * Optional override for the model id reported to the tracker. Defaults to
54
+ * `model.modelId` (the wrapped model's underlying id). Pass an alias
55
+ * (`'sonnet'`, `'opus'`) to bridge to the alias-based pricing tables.
56
+ */
57
+ modelIdOverride?: string;
58
+ }
59
+ /**
60
+ * Build a budget middleware for `wrapLanguageModel`. Records
61
+ * {@link LanguageModelV3Usage} into the supplied {@link BudgetTracker} on
62
+ * every successful `doGenerate` / `doStream` completion. Errors from the
63
+ * downstream model propagate unchanged — the tracker is only updated on
64
+ * success.
65
+ *
66
+ * For streaming calls, we accumulate the final `usage` from the `'finish'`
67
+ * stream part (per the V3 spec, the final `'finish'` event carries the
68
+ * authoritative usage shape) and record once on stream end.
69
+ *
70
+ * @example
71
+ * ```ts
72
+ * import { wrapLanguageModel } from 'ai'
73
+ * import { BudgetTracker, budgetMiddleware } from 'ai-functions'
74
+ *
75
+ * const tracker = new BudgetTracker({ maxCost: 1.0 })
76
+ * const model = wrapLanguageModel({
77
+ * model: openai('gpt-4o'),
78
+ * middleware: budgetMiddleware({ tracker }),
79
+ * })
80
+ * ```
81
+ */
82
+ export declare function budgetMiddleware(options: BudgetMiddlewareOptions): LanguageModelV3Middleware;
83
+ export type { LanguageModelV3GenerateResult };
84
+ //# sourceMappingURL=budget.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"budget.d.ts","sourceRoot":"","sources":["../../src/middleware/budget.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,KAAK,EACV,6BAA6B,EAC7B,yBAAyB,EAI1B,MAAM,kBAAkB,CAAA;AACzB,OAAO,KAAK,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,cAAc,CAAA;AAM/D;;;;;;GAMG;AACH,MAAM,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAA;AAEzD,4CAA4C;AAC5C,MAAM,WAAW,uBAAuB;IACtC;;;;OAIG;IACH,OAAO,EAAE,aAAa,CAAA;IACtB;;;;;OAKG;IACH,OAAO,CAAC,EAAE,cAAc,CAAA;IACxB;;;;OAIG;IACH,eAAe,CAAC,EAAE,MAAM,CAAA;CACzB;AAuCD;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,uBAAuB,GAAG,yBAAyB,CAkC5F;AAID,YAAY,EAAE,6BAA6B,EAAE,CAAA"}
@@ -0,0 +1,110 @@
1
+ /**
2
+ * budgetMiddleware — record token usage + cost into a {@link BudgetTracker}
3
+ *
4
+ * Replaces the post-hoc duck-typing in
5
+ * `services-as-software/src/v3/invoke/cost-estimate.ts` with a single
6
+ * AI-SDK-6 middleware: on `doGenerate` / `doStream` completion, read the
7
+ * `LanguageModelV3Usage` shape directly off the result and call
8
+ * `tracker.recordUsage(...)`. The pricing overlay is supplied via
9
+ * `customPricing` on the {@link BudgetTracker} (or we hand the tracker the
10
+ * pricing at construction time when the caller wants per-call isolation).
11
+ *
12
+ * Key V3 → BudgetTracker mapping detail: AI SDK 6 reports
13
+ * `usage.inputTokens.total` / `usage.outputTokens.total` as
14
+ * `number | undefined`. We coerce undefined → 0 so partial-streaming results
15
+ * (where the upstream provider didn't emit token counts) don't blow up the
16
+ * tracker. The `inputTokens.cacheRead` / `inputTokens.cacheWrite` breakdown
17
+ * is *not* propagated yet — round 13+ work to add prompt-cache awareness to
18
+ * BudgetTracker.
19
+ *
20
+ * Composition note: install **after** cache (so a cache hit still records
21
+ * the cost — the wrapped result is the same regardless of which layer
22
+ * served it) and **before** trace (so the trace event sees the final
23
+ * computed cost via the tracker).
24
+ *
25
+ * @packageDocumentation
26
+ */
27
+ // ============================================================================
28
+ // Helpers
29
+ // ============================================================================
30
+ function coerceUsage(usage) {
31
+ if (!usage)
32
+ return { inputTokens: 0, outputTokens: 0 };
33
+ return {
34
+ inputTokens: usage.inputTokens?.total ?? 0,
35
+ outputTokens: usage.outputTokens?.total ?? 0,
36
+ };
37
+ }
38
+ function record(tracker, pricing, modelId, usage) {
39
+ const { inputTokens, outputTokens } = coerceUsage(usage);
40
+ if (inputTokens === 0 && outputTokens === 0)
41
+ return;
42
+ // The pricing overlay is wired in via the tracker's `customPricing`
43
+ // already (set at BudgetTracker construction time by the caller). When
44
+ // the caller wants per-call pricing override, they install
45
+ // `pricing[modelId]` ahead of time. We expose `pricing` here as a
46
+ // forward-looking hook so we can later add per-call pricing without a
47
+ // breaking change.
48
+ void pricing;
49
+ tracker.recordUsage({ inputTokens, outputTokens, model: modelId });
50
+ }
51
+ // ============================================================================
52
+ // Middleware
53
+ // ============================================================================
54
+ /**
55
+ * Build a budget middleware for `wrapLanguageModel`. Records
56
+ * {@link LanguageModelV3Usage} into the supplied {@link BudgetTracker} on
57
+ * every successful `doGenerate` / `doStream` completion. Errors from the
58
+ * downstream model propagate unchanged — the tracker is only updated on
59
+ * success.
60
+ *
61
+ * For streaming calls, we accumulate the final `usage` from the `'finish'`
62
+ * stream part (per the V3 spec, the final `'finish'` event carries the
63
+ * authoritative usage shape) and record once on stream end.
64
+ *
65
+ * @example
66
+ * ```ts
67
+ * import { wrapLanguageModel } from 'ai'
68
+ * import { BudgetTracker, budgetMiddleware } from 'ai-functions'
69
+ *
70
+ * const tracker = new BudgetTracker({ maxCost: 1.0 })
71
+ * const model = wrapLanguageModel({
72
+ * model: openai('gpt-4o'),
73
+ * middleware: budgetMiddleware({ tracker }),
74
+ * })
75
+ * ```
76
+ */
77
+ export function budgetMiddleware(options) {
78
+ const { tracker, pricing, modelIdOverride } = options;
79
+ return {
80
+ specificationVersion: 'v3',
81
+ async wrapGenerate({ doGenerate, model }) {
82
+ const result = await doGenerate();
83
+ const modelId = modelIdOverride ?? model.modelId;
84
+ record(tracker, pricing, modelId, result.usage);
85
+ return result;
86
+ },
87
+ async wrapStream({ doStream, model }) {
88
+ const result = await doStream();
89
+ const modelId = modelIdOverride ?? model.modelId;
90
+ let finalUsage;
91
+ const transformedStream = result.stream.pipeThrough(new TransformStream({
92
+ transform(chunk, controller) {
93
+ if (chunk.type === 'finish') {
94
+ finalUsage = chunk.usage;
95
+ }
96
+ controller.enqueue(chunk);
97
+ },
98
+ flush() {
99
+ record(tracker, pricing, modelId, finalUsage);
100
+ },
101
+ }));
102
+ const wrapped = {
103
+ ...result,
104
+ stream: transformedStream,
105
+ };
106
+ return wrapped;
107
+ },
108
+ };
109
+ }
110
+ //# sourceMappingURL=budget.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"budget.js","sourceRoot":"","sources":["../../src/middleware/budget.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AA+CH,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,SAAS,WAAW,CAAC,KAAuC;IAI1D,IAAI,CAAC,KAAK;QAAE,OAAO,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAA;IACtD,OAAO;QACL,WAAW,EAAE,KAAK,CAAC,WAAW,EAAE,KAAK,IAAI,CAAC;QAC1C,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,KAAK,IAAI,CAAC;KAC7C,CAAA;AACH,CAAC;AAED,SAAS,MAAM,CACb,OAAsB,EACtB,OAAmC,EACnC,OAAe,EACf,KAAuC;IAEvC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,GAAG,WAAW,CAAC,KAAK,CAAC,CAAA;IACxD,IAAI,WAAW,KAAK,CAAC,IAAI,YAAY,KAAK,CAAC;QAAE,OAAM;IACnD,oEAAoE;IACpE,uEAAuE;IACvE,2DAA2D;IAC3D,kEAAkE;IAClE,sEAAsE;IACtE,mBAAmB;IACnB,KAAK,OAAO,CAAA;IACZ,OAAO,CAAC,WAAW,CAAC,EAAE,WAAW,EAAE,YAAY,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAA;AACpE,CAAC;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAgC;IAC/D,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,GAAG,OAAO,CAAA;IACrD,OAAO;QACL,oBAAoB,EAAE,IAAI;QAC1B,KAAK,CAAC,YAAY,CAAC,EAAE,UAAU,EAAE,KAAK,EAAE;YACtC,MAAM,MAAM,GAAG,MAAM,UAAU,EAAE,CAAA;YACjC,MAAM,OAAO,GAAG,eAAe,IAAI,KAAK,CAAC,OAAO,CAAA;YAChD,MAAM,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC,CAAA;YAC/C,OAAO,MAAM,CAAA;QACf,CAAC;QACD,KAAK,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE;YAClC,MAAM,MAAM,GAAG,MAAM,QAAQ,EAAE,CAAA;YAC/B,MAAM,OAAO,GAAG,eAAe,IAAI,KAAK,CAAC,OAAO,CAAA;YAChD,IAAI,UAA4C,CAAA;YAChD,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CACjD,IAAI,eAAe,CAAuD;gBACxE,SAAS,CAAC,KAAK,EAAE,UAAU;oBACzB,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;wBAC5B,UAAU,GAAG,KAAK,CAAC,KAAK,CAAA;oBAC1B,CAAC;oBACD,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;gBAC3B,CAAC;gBACD,KAAK;oBACH,MAAM,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,CAAC,CAAA;gBAC/C,CAAC;aACF,CAAC,CACH,CAAA;YACD,MAAM,OAAO,GAAgC;gBAC3C,GAAG,MAAM;gBACT,MAAM,EAAE,iBAAiB;aAC1B,CAAA;YACD,OAAO,OAAO,CAAA;QAChB,CAAC;KACF,CAAA;AACH,CAAC"}
@@ -0,0 +1,103 @@
1
+ /**
2
+ * cacheMiddleware — content-addressable cache for `wrapLanguageModel`
3
+ *
4
+ * Implements the AI SDK cookbook's local-caching-middleware pattern
5
+ * (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the
6
+ * AI SDK 6 `LanguageModelV3Middleware` shape:
7
+ *
8
+ * - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }`
9
+ * so a schema change (responseFormat.type === 'json' carries a `schema`
10
+ * JSONSchema7) invalidates the entry. Generation parameters (temperature,
11
+ * topP, etc.) are deliberately *not* part of the key for the eval-fixture
12
+ * use case — flipping temperature shouldn't blow up a 5x verify-time win.
13
+ * Callers who want strict keying should pass a custom `keyHash`.
14
+ *
15
+ * - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]`
16
+ * array; `wrapStream` replays them via `simulateReadableStream` so consumers
17
+ * see the same chunked event sequence on a hit. (`wrapGenerate` is the
18
+ * common path; both share the same cache map.)
19
+ *
20
+ * - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are
21
+ * evicted on access (lazy expiry — no background timer).
22
+ *
23
+ * - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to
24
+ * a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture
25
+ * sharing. Disk reads/writes are best-effort — IO failures fall through
26
+ * to the wrapped model.
27
+ *
28
+ * - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the
29
+ * middleware short-circuits to a passthrough — useful for production where
30
+ * cache hits would be incorrect but the operator wants the same wrap chain.
31
+ * Set to `'1'` (or any truthy non-empty string) to enable.
32
+ *
33
+ * @packageDocumentation
34
+ */
35
+ import type { LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3Middleware, LanguageModelV3StreamPart } from '@ai-sdk/provider';
36
+ /** Cached payload — both generate result and stream chunks under one key. */
37
+ interface CacheEntry {
38
+ /** Result captured from `doGenerate`. Absent if the entry came from a stream call. */
39
+ generateResult?: LanguageModelV3GenerateResult;
40
+ /** Stream chunks captured from `doStream` (replayed via simulateReadableStream). */
41
+ streamChunks?: LanguageModelV3StreamPart[];
42
+ /** Insert epoch ms — drives TTL eviction. */
43
+ createdAt: number;
44
+ }
45
+ /** Pluggable cache store for cached LLM results. */
46
+ export interface CacheMiddlewareStore {
47
+ get(key: string): CacheEntry | undefined;
48
+ set(key: string, value: CacheEntry): void;
49
+ delete(key: string): void;
50
+ }
51
+ /** Options for {@link cacheMiddleware}. */
52
+ export interface CacheMiddlewareOptions {
53
+ /**
54
+ * Cache backend. `'memory'` uses a process-local Map; `'disk'` writes to
55
+ * `.cache/v3-eval-cache.json` for cross-process fixture sharing. A custom
56
+ * {@link CacheMiddlewareStore} can be passed instead.
57
+ *
58
+ * @default 'memory'
59
+ */
60
+ store?: 'memory' | 'disk' | CacheMiddlewareStore;
61
+ /**
62
+ * TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
63
+ *
64
+ * @default 86_400_000 (24h)
65
+ */
66
+ ttlMs?: number;
67
+ /**
68
+ * Custom hash function for cache keys. Defaults to a stable hash of
69
+ * `{ prompt, modelId, responseFormat }`.
70
+ */
71
+ keyHash?: (params: LanguageModelV3CallOptions, modelId: string) => string;
72
+ /**
73
+ * Optional override for the env gate. When `false`, the middleware acts
74
+ * as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
75
+ * caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
76
+ */
77
+ enabled?: boolean;
78
+ /** Optional custom path for the disk store (defaults to `.cache/v3-eval-cache.json`). */
79
+ diskPath?: string;
80
+ }
81
+ /**
82
+ * Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and
83
+ * `doStream`; on a hit replays the cached payload, on a miss invokes the
84
+ * downstream model and stores the result.
85
+ *
86
+ * Composition note: install **before** budget/trace so cache hits don't
87
+ * pay the downstream model cost (the trace/budget middleware still see the
88
+ * payload via the wrapped result they observe in their own `wrapGenerate`).
89
+ *
90
+ * @example
91
+ * ```ts
92
+ * import { wrapLanguageModel } from 'ai'
93
+ * import { cacheMiddleware } from 'ai-functions'
94
+ *
95
+ * const model = wrapLanguageModel({
96
+ * model: openai('gpt-4o'),
97
+ * middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }),
98
+ * })
99
+ * ```
100
+ */
101
+ export declare function cacheMiddleware(options?: CacheMiddlewareOptions): LanguageModelV3Middleware;
102
+ export {};
103
+ //# sourceMappingURL=cache.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../../src/middleware/cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAGH,OAAO,KAAK,EACV,0BAA0B,EAC1B,6BAA6B,EAC7B,yBAAyB,EACzB,yBAAyB,EAE1B,MAAM,kBAAkB,CAAA;AAOzB,6EAA6E;AAC7E,UAAU,UAAU;IAClB,sFAAsF;IACtF,cAAc,CAAC,EAAE,6BAA6B,CAAA;IAC9C,oFAAoF;IACpF,YAAY,CAAC,EAAE,yBAAyB,EAAE,CAAA;IAC1C,6CAA6C;IAC7C,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,oDAAoD;AACpD,MAAM,WAAW,oBAAoB;IACnC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAAA;IACxC,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,GAAG,IAAI,CAAA;IACzC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAAA;CAC1B;AAED,2CAA2C;AAC3C,MAAM,WAAW,sBAAsB;IACrC;;;;;;OAMG;IACH,KAAK,CAAC,EAAE,QAAQ,GAAG,MAAM,GAAG,oBAAoB,CAAA;IAChD;;;;OAIG;IACH,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;;OAGG;IACH,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,0BAA0B,EAAE,OAAO,EAAE,MAAM,KAAK,MAAM,CAAA;IACzE;;;;OAIG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;IACjB,yFAAyF;IACzF,QAAQ,CAAC,EAAE,MAAM,CAAA;CAClB;AAmHD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,wBAAgB,eAAe,CAAC,OAAO,GAAE,sBAA2B,GAAG,yBAAyB,CAoE/F"}
@@ -0,0 +1,228 @@
1
+ /**
2
+ * cacheMiddleware — content-addressable cache for `wrapLanguageModel`
3
+ *
4
+ * Implements the AI SDK cookbook's local-caching-middleware pattern
5
+ * (https://ai-sdk.dev/cookbook/node/local-caching-middleware) on top of the
6
+ * AI SDK 6 `LanguageModelV3Middleware` shape:
7
+ *
8
+ * - **Hit derivation:** content-hash of `{ prompt, modelId, responseFormat }`
9
+ * so a schema change (responseFormat.type === 'json' carries a `schema`
10
+ * JSONSchema7) invalidates the entry. Generation parameters (temperature,
11
+ * topP, etc.) are deliberately *not* part of the key for the eval-fixture
12
+ * use case — flipping temperature shouldn't blow up a 5x verify-time win.
13
+ * Callers who want strict keying should pass a custom `keyHash`.
14
+ *
15
+ * - **Stream support:** cached entries store the `LanguageModelV3StreamPart[]`
16
+ * array; `wrapStream` replays them via `simulateReadableStream` so consumers
17
+ * see the same chunked event sequence on a hit. (`wrapGenerate` is the
18
+ * common path; both share the same cache map.)
19
+ *
20
+ * - **TTL:** 24h default, configurable via `ttlMs`. Entries past TTL are
21
+ * evicted on access (lazy expiry — no background timer).
22
+ *
23
+ * - **Pluggable store:** in-memory default (Map-backed); `'disk'` writes to
24
+ * a JSON file at `.cache/v3-eval-cache.json` for cross-process fixture
25
+ * sharing. Disk reads/writes are best-effort — IO failures fall through
26
+ * to the wrapped model.
27
+ *
28
+ * - **Env gate:** honors `process.env.V3_EVAL_CACHE`. When unset/empty, the
29
+ * middleware short-circuits to a passthrough — useful for production where
30
+ * cache hits would be incorrect but the operator wants the same wrap chain.
31
+ * Set to `'1'` (or any truthy non-empty string) to enable.
32
+ *
33
+ * @packageDocumentation
34
+ */
35
+ import { simulateReadableStream } from 'ai';
36
+ import { hashKey } from '../cache.js';
37
+ // ============================================================================
38
+ // Stores
39
+ // ============================================================================
40
+ class MemoryStore {
41
+ map = new Map();
42
+ get(key) {
43
+ return this.map.get(key);
44
+ }
45
+ set(key, value) {
46
+ this.map.set(key, value);
47
+ }
48
+ delete(key) {
49
+ this.map.delete(key);
50
+ }
51
+ }
52
+ /**
53
+ * Disk-backed store. Best-effort — JSON parse / write errors fall through
54
+ * silently so a corrupt cache file never blocks an LLM call. The whole map
55
+ * is rewritten on each `set` (cheap for the eval-fixture use case which is
56
+ * dominated by reads).
57
+ */
58
+ class DiskStore {
59
+ path;
60
+ cache = null;
61
+ constructor(path) {
62
+ this.path = path;
63
+ }
64
+ load() {
65
+ if (this.cache !== null)
66
+ return this.cache;
67
+ this.cache = new Map();
68
+ try {
69
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
70
+ const fs = require('fs');
71
+ if (fs.existsSync(this.path)) {
72
+ const raw = fs.readFileSync(this.path, 'utf-8');
73
+ const parsed = JSON.parse(raw);
74
+ for (const [k, v] of Object.entries(parsed)) {
75
+ this.cache.set(k, v);
76
+ }
77
+ }
78
+ }
79
+ catch {
80
+ // best-effort
81
+ }
82
+ return this.cache;
83
+ }
84
+ flush() {
85
+ if (this.cache === null)
86
+ return;
87
+ try {
88
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
89
+ const fs = require('fs');
90
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
91
+ const path = require('path');
92
+ const dir = path.dirname(this.path);
93
+ if (!fs.existsSync(dir)) {
94
+ fs.mkdirSync(dir, { recursive: true });
95
+ }
96
+ const obj = Object.fromEntries(this.cache);
97
+ fs.writeFileSync(this.path, JSON.stringify(obj), 'utf-8');
98
+ }
99
+ catch {
100
+ // best-effort
101
+ }
102
+ }
103
+ get(key) {
104
+ return this.load().get(key);
105
+ }
106
+ set(key, value) {
107
+ this.load().set(key, value);
108
+ this.flush();
109
+ }
110
+ delete(key) {
111
+ this.load().delete(key);
112
+ this.flush();
113
+ }
114
+ }
115
+ // ============================================================================
116
+ // Helpers
117
+ // ============================================================================
118
+ const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000;
119
+ function defaultKeyHash(params, modelId) {
120
+ // Stable hash of prompt + model + responseFormat (which carries the
121
+ // schema for object generation). Generation knobs are deliberately
122
+ // excluded so the eval-fixture cache survives temperature tweaks.
123
+ return hashKey({
124
+ prompt: params.prompt,
125
+ modelId,
126
+ responseFormat: params.responseFormat,
127
+ });
128
+ }
129
+ function envGateEnabled() {
130
+ const v = process.env['V3_EVAL_CACHE'];
131
+ return typeof v === 'string' && v.length > 0;
132
+ }
133
+ function isExpired(entry, ttlMs) {
134
+ return Date.now() - entry.createdAt > ttlMs;
135
+ }
136
+ // ============================================================================
137
+ // Middleware
138
+ // ============================================================================
139
+ /**
140
+ * Build a cache middleware for `wrapLanguageModel`. Wraps `doGenerate` and
141
+ * `doStream`; on a hit replays the cached payload, on a miss invokes the
142
+ * downstream model and stores the result.
143
+ *
144
+ * Composition note: install **before** budget/trace so cache hits don't
145
+ * pay the downstream model cost (the trace/budget middleware still see the
146
+ * payload via the wrapped result they observe in their own `wrapGenerate`).
147
+ *
148
+ * @example
149
+ * ```ts
150
+ * import { wrapLanguageModel } from 'ai'
151
+ * import { cacheMiddleware } from 'ai-functions'
152
+ *
153
+ * const model = wrapLanguageModel({
154
+ * model: openai('gpt-4o'),
155
+ * middleware: cacheMiddleware({ store: 'disk', ttlMs: 86_400_000 }),
156
+ * })
157
+ * ```
158
+ */
159
+ export function cacheMiddleware(options = {}) {
160
+ const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS;
161
+ const keyHash = options.keyHash ?? defaultKeyHash;
162
+ const store = options.store === undefined || options.store === 'memory'
163
+ ? new MemoryStore()
164
+ : options.store === 'disk'
165
+ ? new DiskStore(options.diskPath ?? '.cache/v3-eval-cache.json')
166
+ : options.store;
167
+ const enabled = options.enabled ?? envGateEnabled();
168
+ return {
169
+ specificationVersion: 'v3',
170
+ async wrapGenerate({ doGenerate, params, model }) {
171
+ if (!enabled)
172
+ return doGenerate();
173
+ const key = keyHash(params, model.modelId);
174
+ const cached = store.get(key);
175
+ if (cached !== undefined) {
176
+ if (isExpired(cached, ttlMs)) {
177
+ store.delete(key);
178
+ }
179
+ else if (cached.generateResult !== undefined) {
180
+ return cached.generateResult;
181
+ }
182
+ }
183
+ const result = await doGenerate();
184
+ store.set(key, { generateResult: result, createdAt: Date.now() });
185
+ return result;
186
+ },
187
+ async wrapStream({ doStream, params, model }) {
188
+ if (!enabled)
189
+ return doStream();
190
+ const key = keyHash(params, model.modelId);
191
+ const cached = store.get(key);
192
+ if (cached !== undefined) {
193
+ if (isExpired(cached, ttlMs)) {
194
+ store.delete(key);
195
+ }
196
+ else if (cached.streamChunks !== undefined) {
197
+ // Replay cached chunks via simulateReadableStream so consumers
198
+ // see the same async iteration shape as a fresh call.
199
+ const replay = {
200
+ stream: simulateReadableStream({
201
+ chunks: cached.streamChunks,
202
+ initialDelayInMs: 0,
203
+ chunkDelayInMs: 0,
204
+ }),
205
+ };
206
+ return replay;
207
+ }
208
+ }
209
+ const result = await doStream();
210
+ // Tee the stream: forward to caller, accumulate for cache.
211
+ const chunks = [];
212
+ const transformedStream = result.stream.pipeThrough(new TransformStream({
213
+ transform(chunk, controller) {
214
+ chunks.push(chunk);
215
+ controller.enqueue(chunk);
216
+ },
217
+ flush() {
218
+ store.set(key, { streamChunks: chunks, createdAt: Date.now() });
219
+ },
220
+ }));
221
+ return {
222
+ ...result,
223
+ stream: transformedStream,
224
+ };
225
+ },
226
+ };
227
+ }
228
+ //# sourceMappingURL=cache.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/middleware/cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,IAAI,CAAA;AAQ3C,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAA;AAsDrC,+EAA+E;AAC/E,SAAS;AACT,+EAA+E;AAE/E,MAAM,WAAW;IACE,GAAG,GAA4B,IAAI,GAAG,EAAE,CAAA;IACzD,GAAG,CAAC,GAAW;QACb,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC1B,CAAC;IACD,GAAG,CAAC,GAAW,EAAE,KAAiB;QAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;IAC1B,CAAC;IACD,MAAM,CAAC,GAAW;QAChB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;IACtB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,SAAS;IACI,IAAI,CAAQ;IACrB,KAAK,GAAmC,IAAI,CAAA;IAEpD,YAAY,IAAY;QACtB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAA;IAClB,CAAC;IAEO,IAAI;QACV,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI;YAAE,OAAO,IAAI,CAAC,KAAK,CAAA;QAC1C,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAA;QACtB,IAAI,CAAC;YACH,iEAAiE;YACjE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAwB,CAAA;YAC/C,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;gBAC/C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA+B,CAAA;gBAC5D,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;gBACtB,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,cAAc;QAChB,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAA;IACnB,CAAC;IAEO,KAAK;QACX,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI;YAAE,OAAM;QAC/B,IAAI,CAAC;YACH,iEAAiE;YACjE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAwB,CAAA;YAC/C,iEAAiE;YACjE,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAA0B,CAAA;YACrD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACnC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;YACxC,CAAC;YACD,MAAM,GAAG,GAAG,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC1C,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,OAAO,CAAC,CAAA;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,cAAc;QAChB,CAAC;IACH,CAAC;IAED,GAAG,CAAC,GAAW;QACb,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC7B,CAAC;IAED,GAAG,CAAC,GAAW,EAAE,KAAiB;QAChC,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAC3B,IAAI,CAAC,KAAK,EAAE,CAAA;IACd,CAAC;IAED,MAAM,CAAC,GAAW;QAChB,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;QACvB,IAAI,CAAC,KAAK,EAAE,CAAA;IACd,CAAC;CACF;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,MAAM,cAAc,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAA;AAE1C,SAAS,cAAc,CAAC,MAAkC,EAAE,OAAe;IACzE,oEAAoE;IACpE,mEAAmE;IACnE,kEAAkE;IAClE,OAAO,OAAO,CAAC;QACb,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,OAAO;QACP,cAAc,EAAE,MAAM,CAAC,cAAc;KACtC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,cAAc;IACrB,MAAM,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAA;IACtC,OAAO,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAA;AAC9C,CAAC;AAED,SAAS,SAAS,CAAC,KAAiB,EAAE,KAAa;IACjD,OAAO,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,GAAG,KAAK,CAAA;AAC7C,CAAC;AAED,+EAA+E;AAC/E,aAAa;AACb,+EAA+E;AAE/E;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,UAAU,eAAe,CAAC,UAAkC,EAAE;IAClE,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,cAAc,CAAA;IAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,cAAc,CAAA;IACjD,MAAM,KAAK,GACT,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,KAAK,QAAQ;QACvD,CAAC,CAAC,IAAI,WAAW,EAAE;QACnB,CAAC,CAAC,OAAO,CAAC,KAAK,KAAK,MAAM;YAC1B,CAAC,CAAC,IAAI,SAAS,CAAC,OAAO,CAAC,QAAQ,IAAI,2BAA2B,CAAC;YAChE,CAAC,CAAC,OAAO,CAAC,KAAK,CAAA;IACnB,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,cAAc,EAAE,CAAA;IAEnD,OAAO;QACL,oBAAoB,EAAE,IAAI;QAC1B,KAAK,CAAC,YAAY,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE;YAC9C,IAAI,CAAC,OAAO;gBAAE,OAAO,UAAU,EAAE,CAAA;YACjC,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;YAC1C,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC7B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,IAAI,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC7B,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,IAAI,MAAM,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;oBAC/C,OAAO,MAAM,CAAC,cAAc,CAAA;gBAC9B,CAAC;YACH,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,UAAU,EAAE,CAAA;YACjC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;YACjE,OAAO,MAAM,CAAA;QACf,CAAC;QACD,KAAK,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE;YAC1C,IAAI,CAAC,OAAO;gBAAE,OAAO,QAAQ,EAAE,CAAA;YAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,CAAA;YAC1C,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC7B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,IAAI,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC7B,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,IAAI,MAAM,CAAC,YAAY,KAAK,SAAS,EAAE,CAAC;oBAC7C,+DAA+D;oBAC/D,sDAAsD;oBACtD,MAAM,MAAM,GAAgC;wBAC1C,MAAM,EAAE,sBAAsB,CAA4B;4BACxD,MAAM,EAAE,MAAM,CAAC,YAAY;4BAC3B,gBAAgB,EAAE,CAAC;4BACnB,cAAc,EAAE,CAAC;yBAClB,CAAC;qBACH,CAAA;oBACD,OAAO,MAAM,CAAA;gBACf,CAAC;YACH,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,QAAQ,EAAE,CAAA;YAC/B,2DAA2D;YAC3D,MAAM,MAAM,GAAgC,EAAE,CAAA;YAC9C,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CACjD,IAAI,eAAe,CAAuD;gBACxE,SAAS,CAAC,KAAK,EAAE,UAAU;oBACzB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;oBAClB,UAAU,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;gBAC3B,CAAC;gBACD,KAAK;oBACH,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;gBACjE,CAAC;aACF,CAAC,CACH,CAAA;YACD,OAAO;gBACL,GAAG,MAAM;gBACT,MAAM,EAAE,iBAAiB;aAC1B,CAAA;QACH,CAAC;KACF,CAAA;AACH,CAAC"}
@@ -0,0 +1,99 @@
1
+ /**
2
+ * embeddingCacheMiddleware — content-addressable cache for `wrapEmbeddingModel`
3
+ *
4
+ * Embedding-side analogue of {@link cacheMiddleware}. Wraps `doEmbed` and
5
+ * caches the resulting embeddings keyed on
6
+ * `{ values, modelId, providerOptions }` so a re-embed of the same value
7
+ * batch with the same model returns the cached vectors without hitting the
8
+ * provider.
9
+ *
10
+ * **Why a separate middleware instead of reusing `cacheMiddleware`?**
11
+ * AI SDK 6 splits language-model and embedding-model surfaces:
12
+ * `LanguageModelV3Middleware` exposes `wrapGenerate` / `wrapStream` against
13
+ * `LanguageModelV3CallOptions`, while `EmbeddingModelV3Middleware` exposes
14
+ * `wrapEmbed` against `EmbeddingModelV3CallOptions`. The cache shape
15
+ * (per-value vector vs. per-prompt completion payload) is also different —
16
+ * embeddings cache batched arrays, generations cache single result objects.
17
+ *
18
+ * - **Hit derivation:** stable hash of `{ values, modelId, providerOptions }`.
19
+ * `values` is the array as-passed (caller can pre-normalise if they want
20
+ * case/whitespace insensitivity). Generation knobs don't apply.
21
+ *
22
+ * - **Batch semantics:** the cache key is the *whole* batch. A subset hit
23
+ * doesn't trigger a partial-fill — that's a more invasive shape change
24
+ * (the legacy `EmbeddingCache.getMany` did per-text caching, but it was
25
+ * only used in the example and added 100+ LOC of bookkeeping). Callers
26
+ * that want per-text caching should use stable per-text batches.
27
+ *
28
+ * - **TTL:** 24h default, configurable. Lazy expiry on access.
29
+ *
30
+ * - **Pluggable store:** in-memory default (Map-backed); custom store
31
+ * honored as-is. Disk persistence is intentionally not provided here —
32
+ * embedding payloads (large `number[][]`) make on-disk JSON a bad fit;
33
+ * callers who want it should pass a custom store.
34
+ *
35
+ * - **Env gate:** honors `process.env.V3_EVAL_CACHE` for parity with
36
+ * `cacheMiddleware`. Override via the `enabled` option.
37
+ *
38
+ * @packageDocumentation
39
+ */
40
+ import type { EmbeddingModelV3CallOptions, EmbeddingModelV3Embedding, EmbeddingModelV3Middleware, SharedV3Warning } from '@ai-sdk/provider';
41
+ /** Cached embedding payload. */
42
+ interface EmbedCacheEntry {
43
+ /** The embedding vectors returned for the cached batch. */
44
+ embeddings: Array<EmbeddingModelV3Embedding>;
45
+ /** Provider warnings carried alongside the cached batch. */
46
+ warnings: Array<SharedV3Warning>;
47
+ /** Insert epoch ms — drives TTL eviction. */
48
+ createdAt: number;
49
+ }
50
+ /** Pluggable cache store for embedding results. */
51
+ export interface EmbedCacheMiddlewareStore {
52
+ get(key: string): EmbedCacheEntry | undefined;
53
+ set(key: string, value: EmbedCacheEntry): void;
54
+ delete(key: string): void;
55
+ }
56
+ /** Options for {@link embeddingCacheMiddleware}. */
57
+ export interface EmbedCacheMiddlewareOptions {
58
+ /**
59
+ * Cache backend. `'memory'` uses a process-local Map. A custom
60
+ * {@link EmbedCacheMiddlewareStore} can be passed instead.
61
+ *
62
+ * @default 'memory'
63
+ */
64
+ store?: 'memory' | EmbedCacheMiddlewareStore;
65
+ /**
66
+ * TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
67
+ *
68
+ * @default 86_400_000 (24h)
69
+ */
70
+ ttlMs?: number;
71
+ /**
72
+ * Custom hash function for cache keys. Defaults to a stable hash of
73
+ * `{ values, modelId, providerOptions }`.
74
+ */
75
+ keyHash?: (params: EmbeddingModelV3CallOptions, modelId: string) => string;
76
+ /**
77
+ * Optional override for the env gate. When `false`, the middleware acts
78
+ * as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
79
+ * caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
80
+ */
81
+ enabled?: boolean;
82
+ }
83
+ /**
84
+ * Build an embedding-cache middleware for `wrapEmbeddingModel`.
85
+ *
86
+ * @example
87
+ * ```ts
88
+ * import { wrapEmbeddingModel } from 'ai'
89
+ * import { embeddingCacheMiddleware } from 'ai-functions'
90
+ *
91
+ * const model = wrapEmbeddingModel({
92
+ * model: openai.embedding('text-embedding-3-small'),
93
+ * middleware: embeddingCacheMiddleware({ ttlMs: 86_400_000 }),
94
+ * })
95
+ * ```
96
+ */
97
+ export declare function embeddingCacheMiddleware(options?: EmbedCacheMiddlewareOptions): EmbeddingModelV3Middleware;
98
+ export {};
99
+ //# sourceMappingURL=embed-cache.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embed-cache.d.ts","sourceRoot":"","sources":["../../src/middleware/embed-cache.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAEH,OAAO,KAAK,EACV,2BAA2B,EAC3B,yBAAyB,EACzB,0BAA0B,EAE1B,eAAe,EAChB,MAAM,kBAAkB,CAAA;AAOzB,gCAAgC;AAChC,UAAU,eAAe;IACvB,2DAA2D;IAC3D,UAAU,EAAE,KAAK,CAAC,yBAAyB,CAAC,CAAA;IAC5C,4DAA4D;IAC5D,QAAQ,EAAE,KAAK,CAAC,eAAe,CAAC,CAAA;IAChC,6CAA6C;IAC7C,SAAS,EAAE,MAAM,CAAA;CAClB;AAED,mDAAmD;AACnD,MAAM,WAAW,yBAAyB;IACxC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,eAAe,GAAG,SAAS,CAAA;IAC7C,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,eAAe,GAAG,IAAI,CAAA;IAC9C,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAAA;CAC1B;AAED,oDAAoD;AACpD,MAAM,WAAW,2BAA2B;IAC1C;;;;;OAKG;IACH,KAAK,CAAC,EAAE,QAAQ,GAAG,yBAAyB,CAAA;IAC5C;;;;OAIG;IACH,KAAK,CAAC,EAAE,MAAM,CAAA;IACd;;;OAGG;IACH,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,2BAA2B,EAAE,OAAO,EAAE,MAAM,KAAK,MAAM,CAAA;IAC1E;;;;OAIG;IACH,OAAO,CAAC,EAAE,OAAO,CAAA;CAClB;AA8CD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,wBAAwB,CACtC,OAAO,GAAE,2BAAgC,GACxC,0BAA0B,CAoC5B"}