ai-functions 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/.turbo/turbo-build.log +1 -4
  2. package/CHANGELOG.md +68 -1
  3. package/README.md +397 -157
  4. package/dist/ai-promise.d.ts +50 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +410 -51
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +54 -837
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts +272 -0
  56. package/dist/budget.d.ts.map +1 -0
  57. package/dist/budget.js +513 -0
  58. package/dist/budget.js.map +1 -0
  59. package/dist/cache.d.ts +295 -0
  60. package/dist/cache.d.ts.map +1 -0
  61. package/dist/cache.js +433 -0
  62. package/dist/cache.js.map +1 -0
  63. package/dist/context.d.ts +42 -8
  64. package/dist/context.d.ts.map +1 -1
  65. package/dist/context.js +64 -62
  66. package/dist/context.js.map +1 -1
  67. package/dist/digital-objects-registry.d.ts +229 -0
  68. package/dist/digital-objects-registry.d.ts.map +1 -0
  69. package/dist/digital-objects-registry.js +617 -0
  70. package/dist/digital-objects-registry.js.map +1 -0
  71. package/dist/embeddings.d.ts +2 -2
  72. package/dist/embeddings.d.ts.map +1 -1
  73. package/dist/errors.d.ts +22 -0
  74. package/dist/errors.d.ts.map +1 -0
  75. package/dist/errors.js +35 -0
  76. package/dist/errors.js.map +1 -0
  77. package/dist/eval/runner.d.ts +10 -1
  78. package/dist/eval/runner.d.ts.map +1 -1
  79. package/dist/eval/runner.js +41 -35
  80. package/dist/eval/runner.js.map +1 -1
  81. package/dist/eval-log/in-memory.d.ts +34 -0
  82. package/dist/eval-log/in-memory.d.ts.map +1 -0
  83. package/dist/eval-log/in-memory.js +84 -0
  84. package/dist/eval-log/in-memory.js.map +1 -0
  85. package/dist/eval-log/index.d.ts +29 -0
  86. package/dist/eval-log/index.d.ts.map +1 -0
  87. package/dist/eval-log/index.js +39 -0
  88. package/dist/eval-log/index.js.map +1 -0
  89. package/dist/eval-log/types.d.ts +101 -0
  90. package/dist/eval-log/types.d.ts.map +1 -0
  91. package/dist/eval-log/types.js +16 -0
  92. package/dist/eval-log/types.js.map +1 -0
  93. package/dist/function-registry.d.ts +116 -0
  94. package/dist/function-registry.d.ts.map +1 -0
  95. package/dist/function-registry.js +546 -0
  96. package/dist/function-registry.js.map +1 -0
  97. package/dist/generate.d.ts +9 -3
  98. package/dist/generate.d.ts.map +1 -1
  99. package/dist/generate.js +18 -22
  100. package/dist/generate.js.map +1 -1
  101. package/dist/index.d.ts +35 -20
  102. package/dist/index.d.ts.map +1 -1
  103. package/dist/index.js +89 -42
  104. package/dist/index.js.map +1 -1
  105. package/dist/logger.d.ts +118 -0
  106. package/dist/logger.d.ts.map +1 -0
  107. package/dist/logger.js +187 -0
  108. package/dist/logger.js.map +1 -0
  109. package/dist/middleware/budget.d.ts +84 -0
  110. package/dist/middleware/budget.d.ts.map +1 -0
  111. package/dist/middleware/budget.js +110 -0
  112. package/dist/middleware/budget.js.map +1 -0
  113. package/dist/middleware/cache.d.ts +103 -0
  114. package/dist/middleware/cache.d.ts.map +1 -0
  115. package/dist/middleware/cache.js +228 -0
  116. package/dist/middleware/cache.js.map +1 -0
  117. package/dist/middleware/embed-cache.d.ts +99 -0
  118. package/dist/middleware/embed-cache.d.ts.map +1 -0
  119. package/dist/middleware/embed-cache.js +128 -0
  120. package/dist/middleware/embed-cache.js.map +1 -0
  121. package/dist/middleware/index.d.ts +11 -0
  122. package/dist/middleware/index.d.ts.map +1 -0
  123. package/dist/middleware/index.js +11 -0
  124. package/dist/middleware/index.js.map +1 -0
  125. package/dist/middleware/trace.d.ts +103 -0
  126. package/dist/middleware/trace.d.ts.map +1 -0
  127. package/dist/middleware/trace.js +176 -0
  128. package/dist/middleware/trace.js.map +1 -0
  129. package/dist/primitives.d.ts +120 -1
  130. package/dist/primitives.d.ts.map +1 -1
  131. package/dist/primitives.js +398 -26
  132. package/dist/primitives.js.map +1 -1
  133. package/dist/retry.d.ts +368 -0
  134. package/dist/retry.d.ts.map +1 -0
  135. package/dist/retry.js +646 -0
  136. package/dist/retry.js.map +1 -0
  137. package/dist/schema.d.ts.map +1 -1
  138. package/dist/schema.js +2 -10
  139. package/dist/schema.js.map +1 -1
  140. package/dist/telemetry.d.ts +128 -0
  141. package/dist/telemetry.d.ts.map +1 -0
  142. package/dist/telemetry.js +285 -0
  143. package/dist/telemetry.js.map +1 -0
  144. package/dist/template.d.ts.map +1 -1
  145. package/dist/template.js +6 -1
  146. package/dist/template.js.map +1 -1
  147. package/dist/tool-orchestration.d.ts +453 -0
  148. package/dist/tool-orchestration.d.ts.map +1 -0
  149. package/dist/tool-orchestration.js +763 -0
  150. package/dist/tool-orchestration.js.map +1 -0
  151. package/dist/type-guards.d.ts +28 -0
  152. package/dist/type-guards.d.ts.map +1 -0
  153. package/dist/type-guards.js +29 -0
  154. package/dist/type-guards.js.map +1 -0
  155. package/dist/types.d.ts +135 -17
  156. package/dist/types.d.ts.map +1 -1
  157. package/dist/types.js +36 -1
  158. package/dist/types.js.map +1 -1
  159. package/dist/wrap-for-v3.d.ts +80 -0
  160. package/dist/wrap-for-v3.d.ts.map +1 -0
  161. package/dist/wrap-for-v3.js +89 -0
  162. package/dist/wrap-for-v3.js.map +1 -0
  163. package/examples/00-quickstart.ts +232 -0
  164. package/examples/01-rag-chatbot.ts +212 -0
  165. package/examples/02-multi-agent-research.ts +290 -0
  166. package/examples/03-email-classification.ts +379 -0
  167. package/examples/04-content-moderation.ts +400 -0
  168. package/examples/05-document-extraction.ts +455 -0
  169. package/examples/06-streaming-chat-nextjs.ts +437 -0
  170. package/examples/07-cloudflare-worker.ts +483 -0
  171. package/examples/08-batch-processing.ts +491 -0
  172. package/examples/09-budget-constrained.ts +527 -0
  173. package/examples/10-tool-orchestration.ts +565 -0
  174. package/examples/11-retry-resilience.ts +403 -0
  175. package/examples/12-caching-strategies.ts +422 -0
  176. package/examples/README.md +145 -0
  177. package/package.json +10 -6
  178. package/src/ai-promise.ts +528 -99
  179. package/src/ai-schemas.ts +122 -0
  180. package/src/ai.ts +69 -1153
  181. package/src/batch/anthropic.ts +96 -161
  182. package/src/batch/bedrock.ts +203 -454
  183. package/src/batch/cloudflare.ts +99 -282
  184. package/src/batch/google.ts +91 -297
  185. package/src/batch/index.ts +4 -1
  186. package/src/batch/memory.ts +15 -10
  187. package/src/batch/openai.ts +65 -193
  188. package/src/batch/provider.ts +336 -0
  189. package/src/batch-map.ts +29 -24
  190. package/src/batch-queue.ts +200 -11
  191. package/src/budget.ts +740 -0
  192. package/src/cache.ts +681 -0
  193. package/src/context.ts +122 -76
  194. package/src/digital-objects-registry.ts +750 -0
  195. package/src/errors.ts +37 -0
  196. package/src/eval/runner.ts +63 -38
  197. package/src/eval-log/in-memory.ts +90 -0
  198. package/src/eval-log/index.ts +46 -0
  199. package/src/eval-log/types.ts +110 -0
  200. package/src/function-registry.ts +671 -0
  201. package/src/generate.ts +33 -33
  202. package/src/index.ts +325 -49
  203. package/src/logger.ts +232 -0
  204. package/src/middleware/budget.ts +171 -0
  205. package/src/middleware/cache.ts +299 -0
  206. package/src/middleware/embed-cache.ts +195 -0
  207. package/src/middleware/index.ts +23 -0
  208. package/src/middleware/trace.ts +248 -0
  209. package/src/primitives.ts +589 -62
  210. package/src/retry.ts +902 -0
  211. package/src/schema.ts +8 -17
  212. package/src/telemetry.ts +403 -0
  213. package/src/template.ts +8 -4
  214. package/src/tool-orchestration.ts +1173 -0
  215. package/src/type-guards.ts +31 -0
  216. package/src/types.ts +164 -25
  217. package/src/wrap-for-v3.ts +105 -0
  218. package/test/ai-promise.test.ts +1080 -0
  219. package/test/ai-proxy.test.ts +1 -1
  220. package/test/backward-compat.test.ts +147 -0
  221. package/test/batch-autosubmit-errors.test.ts +610 -0
  222. package/test/batch-blog-posts.test.ts +87 -129
  223. package/test/budget-tracking.test.ts +800 -0
  224. package/test/cache.test.ts +712 -0
  225. package/test/context-isolation.test.ts +687 -0
  226. package/test/core-functions.test.ts +183 -579
  227. package/test/decide.test.ts +154 -322
  228. package/test/define.test.ts +211 -8
  229. package/test/digital-objects-registry.test.ts +760 -0
  230. package/test/embedding-cache-middleware.test.ts +140 -0
  231. package/test/evals/deterministic.eval.test.ts +376 -0
  232. package/test/generate-core.test.ts +140 -229
  233. package/test/implicit-batch.test.ts +22 -65
  234. package/test/json-parse-error-handling.test.ts +463 -0
  235. package/test/retry-policy-integration.test.ts +117 -0
  236. package/test/retry.test.ts +1016 -0
  237. package/test/schema.test.ts +55 -19
  238. package/test/streaming.test.ts +316 -0
  239. package/test/template.test.ts +1164 -0
  240. package/test/tool-orchestration.test.ts +1040 -0
  241. package/test/wrap-for-v3.test.ts +612 -0
  242. package/vitest.config.js +6 -0
  243. package/vitest.config.ts +20 -0
  244. package/dist/rpc/auth.d.ts +0 -69
  245. package/dist/rpc/auth.d.ts.map +0 -1
  246. package/dist/rpc/auth.js +0 -136
  247. package/dist/rpc/auth.js.map +0 -1
  248. package/dist/rpc/client.d.ts +0 -62
  249. package/dist/rpc/client.d.ts.map +0 -1
  250. package/dist/rpc/client.js +0 -103
  251. package/dist/rpc/client.js.map +0 -1
  252. package/dist/rpc/deferred.d.ts +0 -60
  253. package/dist/rpc/deferred.d.ts.map +0 -1
  254. package/dist/rpc/deferred.js +0 -96
  255. package/dist/rpc/deferred.js.map +0 -1
  256. package/dist/rpc/index.d.ts +0 -22
  257. package/dist/rpc/index.d.ts.map +0 -1
  258. package/dist/rpc/index.js +0 -38
  259. package/dist/rpc/index.js.map +0 -1
  260. package/dist/rpc/local.d.ts +0 -42
  261. package/dist/rpc/local.d.ts.map +0 -1
  262. package/dist/rpc/local.js +0 -50
  263. package/dist/rpc/local.js.map +0 -1
  264. package/dist/rpc/server.d.ts +0 -165
  265. package/dist/rpc/server.d.ts.map +0 -1
  266. package/dist/rpc/server.js +0 -405
  267. package/dist/rpc/server.js.map +0 -1
  268. package/dist/rpc/session.d.ts +0 -32
  269. package/dist/rpc/session.d.ts.map +0 -1
  270. package/dist/rpc/session.js +0 -43
  271. package/dist/rpc/session.js.map +0 -1
  272. package/dist/rpc/transport.d.ts +0 -306
  273. package/dist/rpc/transport.d.ts.map +0 -1
  274. package/dist/rpc/transport.js +0 -731
  275. package/dist/rpc/transport.js.map +0 -1
  276. package/src/batch/anthropic.js +0 -256
  277. package/src/batch/bedrock.js +0 -584
  278. package/src/batch/cloudflare.js +0 -287
  279. package/src/batch/google.js +0 -359
  280. package/src/batch/index.js +0 -30
  281. package/src/batch/memory.js +0 -187
  282. package/src/batch/openai.js +0 -402
  283. package/src/eval/index.js +0 -7
  284. package/src/eval/models.js +0 -119
  285. package/src/eval/runner.js +0 -147
  286. package/test/schema.test.js +0 -96
@@ -0,0 +1,195 @@
1
+ /**
2
+ * embeddingCacheMiddleware — content-addressable cache for `wrapEmbeddingModel`
3
+ *
4
+ * Embedding-side analogue of {@link cacheMiddleware}. Wraps `doEmbed` and
5
+ * caches the resulting embeddings keyed on
6
+ * `{ values, modelId, providerOptions }` so a re-embed of the same value
7
+ * batch with the same model returns the cached vectors without hitting the
8
+ * provider.
9
+ *
10
+ * **Why a separate middleware instead of reusing `cacheMiddleware`?**
11
+ * AI SDK 6 splits language-model and embedding-model surfaces:
12
+ * `LanguageModelV3Middleware` exposes `wrapGenerate` / `wrapStream` against
13
+ * `LanguageModelV3CallOptions`, while `EmbeddingModelV3Middleware` exposes
14
+ * `wrapEmbed` against `EmbeddingModelV3CallOptions`. The cache shape
15
+ * (per-value vector vs. per-prompt completion payload) is also different —
16
+ * embeddings cache batched arrays, generations cache single result objects.
17
+ *
18
+ * - **Hit derivation:** stable hash of `{ values, modelId, providerOptions }`.
19
+ * `values` is the array as-passed (caller can pre-normalise if they want
20
+ * case/whitespace insensitivity). Generation knobs don't apply.
21
+ *
22
+ * - **Batch semantics:** the cache key is the *whole* batch. A subset hit
23
+ * doesn't trigger a partial-fill — that's a more invasive shape change
24
+ * (the legacy `EmbeddingCache.getMany` did per-text caching, but it was
25
+ * only used in the example and added 100+ LOC of bookkeeping). Callers
26
+ * that want per-text caching should use stable per-text batches.
27
+ *
28
+ * - **TTL:** 24h default, configurable. Lazy expiry on access.
29
+ *
30
+ * - **Pluggable store:** in-memory default (Map-backed); custom store
31
+ * honored as-is. Disk persistence is intentionally not provided here —
32
+ * embedding payloads (large `number[][]`) make on-disk JSON a bad fit;
33
+ * callers who want it should pass a custom store.
34
+ *
35
+ * - **Env gate:** honors `process.env.V3_EVAL_CACHE` for parity with
36
+ * `cacheMiddleware`. Override via the `enabled` option.
37
+ *
38
+ * @packageDocumentation
39
+ */
40
+
41
+ import type {
42
+ EmbeddingModelV3CallOptions,
43
+ EmbeddingModelV3Embedding,
44
+ EmbeddingModelV3Middleware,
45
+ EmbeddingModelV3Result,
46
+ SharedV3Warning,
47
+ } from '@ai-sdk/provider'
48
+ import { hashKey } from '../cache.js'
49
+
50
+ // ============================================================================
51
+ // Types
52
+ // ============================================================================
53
+
54
+ /** Cached embedding payload. */
55
+ interface EmbedCacheEntry {
56
+ /** The embedding vectors returned for the cached batch. */
57
+ embeddings: Array<EmbeddingModelV3Embedding>
58
+ /** Provider warnings carried alongside the cached batch. */
59
+ warnings: Array<SharedV3Warning>
60
+ /** Insert epoch ms — drives TTL eviction. */
61
+ createdAt: number
62
+ }
63
+
64
+ /** Pluggable cache store for embedding results. */
65
+ export interface EmbedCacheMiddlewareStore {
66
+ get(key: string): EmbedCacheEntry | undefined
67
+ set(key: string, value: EmbedCacheEntry): void
68
+ delete(key: string): void
69
+ }
70
+
71
+ /** Options for {@link embeddingCacheMiddleware}. */
72
+ export interface EmbedCacheMiddlewareOptions {
73
+ /**
74
+ * Cache backend. `'memory'` uses a process-local Map. A custom
75
+ * {@link EmbedCacheMiddlewareStore} can be passed instead.
76
+ *
77
+ * @default 'memory'
78
+ */
79
+ store?: 'memory' | EmbedCacheMiddlewareStore
80
+ /**
81
+ * TTL in milliseconds. Entries older than `ttlMs` are evicted on access.
82
+ *
83
+ * @default 86_400_000 (24h)
84
+ */
85
+ ttlMs?: number
86
+ /**
87
+ * Custom hash function for cache keys. Defaults to a stable hash of
88
+ * `{ values, modelId, providerOptions }`.
89
+ */
90
+ keyHash?: (params: EmbeddingModelV3CallOptions, modelId: string) => string
91
+ /**
92
+ * Optional override for the env gate. When `false`, the middleware acts
93
+ * as a passthrough regardless of `V3_EVAL_CACHE`. When `true`, always
94
+ * caches. Defaults to `process.env.V3_EVAL_CACHE` truthy-check.
95
+ */
96
+ enabled?: boolean
97
+ }
98
+
99
+ // ============================================================================
100
+ // Stores
101
+ // ============================================================================
102
+
103
+ class MemoryStore implements EmbedCacheMiddlewareStore {
104
+ private readonly map: Map<string, EmbedCacheEntry> = new Map()
105
+ get(key: string): EmbedCacheEntry | undefined {
106
+ return this.map.get(key)
107
+ }
108
+ set(key: string, value: EmbedCacheEntry): void {
109
+ this.map.set(key, value)
110
+ }
111
+ delete(key: string): void {
112
+ this.map.delete(key)
113
+ }
114
+ }
115
+
116
+ // ============================================================================
117
+ // Helpers
118
+ // ============================================================================
119
+
120
+ const DEFAULT_TTL_MS = 24 * 60 * 60 * 1000
121
+
122
+ function defaultKeyHash(params: EmbeddingModelV3CallOptions, modelId: string): string {
123
+ return hashKey({
124
+ values: params.values,
125
+ modelId,
126
+ providerOptions: params.providerOptions,
127
+ })
128
+ }
129
+
130
+ function envGateEnabled(): boolean {
131
+ const v = process.env['V3_EVAL_CACHE']
132
+ return typeof v === 'string' && v.length > 0
133
+ }
134
+
135
+ function isExpired(entry: EmbedCacheEntry, ttlMs: number): boolean {
136
+ return Date.now() - entry.createdAt > ttlMs
137
+ }
138
+
139
+ // ============================================================================
140
+ // Middleware
141
+ // ============================================================================
142
+
143
+ /**
144
+ * Build an embedding-cache middleware for `wrapEmbeddingModel`.
145
+ *
146
+ * @example
147
+ * ```ts
148
+ * import { wrapEmbeddingModel } from 'ai'
149
+ * import { embeddingCacheMiddleware } from 'ai-functions'
150
+ *
151
+ * const model = wrapEmbeddingModel({
152
+ * model: openai.embedding('text-embedding-3-small'),
153
+ * middleware: embeddingCacheMiddleware({ ttlMs: 86_400_000 }),
154
+ * })
155
+ * ```
156
+ */
157
+ export function embeddingCacheMiddleware(
158
+ options: EmbedCacheMiddlewareOptions = {}
159
+ ): EmbeddingModelV3Middleware {
160
+ const ttlMs = options.ttlMs ?? DEFAULT_TTL_MS
161
+ const keyHash = options.keyHash ?? defaultKeyHash
162
+ const store: EmbedCacheMiddlewareStore =
163
+ options.store === undefined || options.store === 'memory' ? new MemoryStore() : options.store
164
+ const enabled = options.enabled ?? envGateEnabled()
165
+
166
+ return {
167
+ specificationVersion: 'v3',
168
+ async wrapEmbed({ doEmbed, params, model }) {
169
+ if (!enabled) return doEmbed()
170
+ const key = keyHash(params, model.modelId)
171
+ const cached = store.get(key)
172
+ if (cached !== undefined) {
173
+ if (isExpired(cached, ttlMs)) {
174
+ store.delete(key)
175
+ } else {
176
+ // Replay shape matches EmbeddingModelV3Result. Provider-side
177
+ // metadata (response headers, body, usage) is intentionally absent
178
+ // on a hit — callers reading those should disable the cache.
179
+ const replay: EmbeddingModelV3Result = {
180
+ embeddings: cached.embeddings,
181
+ warnings: cached.warnings,
182
+ }
183
+ return replay
184
+ }
185
+ }
186
+ const result = await doEmbed()
187
+ store.set(key, {
188
+ embeddings: result.embeddings,
189
+ warnings: result.warnings,
190
+ createdAt: Date.now(),
191
+ })
192
+ return result
193
+ },
194
+ }
195
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Middleware barrel — composable AI SDK 6 `LanguageModelV3Middleware`
3
+ * primitives for `wrapLanguageModel`.
4
+ *
5
+ * @packageDocumentation
6
+ */
7
+
8
+ export { cacheMiddleware, type CacheMiddlewareOptions, type CacheMiddlewareStore } from './cache.js'
9
+
10
+ export {
11
+ embeddingCacheMiddleware,
12
+ type EmbedCacheMiddlewareOptions,
13
+ type EmbedCacheMiddlewareStore,
14
+ } from './embed-cache.js'
15
+
16
+ export { budgetMiddleware, type BudgetMiddlewareOptions, type PricingOverlay } from './budget.js'
17
+
18
+ export {
19
+ traceMiddleware,
20
+ type TraceEvent,
21
+ type TraceEventKind,
22
+ type TraceMiddlewareOptions,
23
+ } from './trace.js'
@@ -0,0 +1,248 @@
1
+ /**
2
+ * traceMiddleware — emit per-call trace events for `wrapLanguageModel`
3
+ *
4
+ * Wraps `doGenerate` / `doStream` and emits a {@link TraceEvent} on every
5
+ * completion. The sink is opaque (caller supplies `emit`) so this primitive
6
+ * works equally well piping into:
7
+ *
8
+ * - the v3 cascade-walker InvocationEvent stream (round 16+ work to add
9
+ * `'persona-trace'` / `'cascade-trace'` to the union),
10
+ * - an {@link import('../eval-log/index.js').EvalLogStore} for fixture
11
+ * replay,
12
+ * - OpenTelemetry / Datadog / Honeycomb adapters that map the event into
13
+ * a span.
14
+ *
15
+ * **Emit-error tolerance:** if the supplied `emit` throws, we *swallow* the
16
+ * error (with a one-time `console.warn`) so a flaky trace sink can never
17
+ * break the wrapped LLM call. This matches the Evalite v0.19 trace
18
+ * middleware behaviour.
19
+ *
20
+ * Composition note: install **last** so the event sees the final outcome
21
+ * (post-cache, post-budget). The event's `costUsd` field is best-effort —
22
+ * the trace middleware doesn't have direct access to the budget tracker, so
23
+ * the caller can pass a `getCostUsd` resolver if they want costs in the
24
+ * event payload.
25
+ *
26
+ * @packageDocumentation
27
+ */
28
+
29
+ import type {
30
+ LanguageModelV3CallOptions,
31
+ LanguageModelV3GenerateResult,
32
+ LanguageModelV3Middleware,
33
+ LanguageModelV3StreamPart,
34
+ LanguageModelV3StreamResult,
35
+ LanguageModelV3Usage,
36
+ } from '@ai-sdk/provider'
37
+
38
+ // ============================================================================
39
+ // Types
40
+ // ============================================================================
41
+
42
+ /**
43
+ * Discriminator for the originating call site. Callers inject this via the
44
+ * `kind` option so a single sink can fan events into different downstream
45
+ * streams (persona panel vs. cascade walker vs. ad-hoc test).
46
+ */
47
+ export type TraceEventKind = 'persona-trace' | 'cascade-trace' | 'eval-trace' | string
48
+
49
+ /**
50
+ * Trace event payload emitted on every wrapped call completion.
51
+ *
52
+ * Field design notes:
53
+ * - `prompt` / `response` are stringified for cheap downstream storage
54
+ * (the structured `LanguageModelV3Prompt` / `LanguageModelV3Content[]`
55
+ * shapes are intentionally flattened).
56
+ * - `usage` is the raw V3 shape (with the cache breakdown) — the
57
+ * EvalLogStore consumer flattens it into total counts.
58
+ * - `costUsd` is optional because the trace middleware doesn't compute
59
+ * cost itself; callers either pass a resolver or compute downstream
60
+ * from `usage`.
61
+ */
62
+ export interface TraceEvent {
63
+ kind: TraceEventKind
64
+ model: string
65
+ prompt: string
66
+ response: string
67
+ usage: LanguageModelV3Usage | undefined
68
+ costUsd?: number
69
+ durationMs: number
70
+ /** Optional caller-supplied tags for downstream filtering. */
71
+ tags?: Record<string, string>
72
+ }
73
+
74
+ /** Options for {@link traceMiddleware}. */
75
+ export interface TraceMiddlewareOptions {
76
+ /**
77
+ * Opaque sink. Errors thrown from `emit` are swallowed (with a one-time
78
+ * `console.warn`) so a flaky sink never breaks the wrapped LLM call.
79
+ */
80
+ emit: (event: TraceEvent) => void | Promise<void>
81
+ /**
82
+ * Discriminator threaded into the event's `kind` field. Defaults to
83
+ * `'eval-trace'`.
84
+ */
85
+ kind?: TraceEventKind
86
+ /**
87
+ * Optional cost resolver. When supplied, called with the V3 usage shape
88
+ * and the modelId; result is set on `event.costUsd`. Useful when the
89
+ * caller has a side-channel pricing table (the budgetMiddleware's
90
+ * tracker) and wants costs in the trace event itself.
91
+ */
92
+ getCostUsd?: (modelId: string, usage: LanguageModelV3Usage | undefined) => number
93
+ /** Optional caller-supplied tags merged into every emitted event. */
94
+ tags?: Record<string, string>
95
+ }
96
+
97
+ // ============================================================================
98
+ // Helpers
99
+ // ============================================================================
100
+
101
+ /**
102
+ * Flatten the structured V3 prompt into a single string for cheap storage.
103
+ * Walks system / user / assistant / tool messages and concatenates their
104
+ * text parts. Non-text parts (files, tool results) are summarised with a
105
+ * short marker so the trace doesn't grow unboundedly.
106
+ */
107
+ function stringifyPrompt(params: LanguageModelV3CallOptions): string {
108
+ const out: string[] = []
109
+ for (const msg of params.prompt) {
110
+ if (msg.role === 'system') {
111
+ out.push(`[system] ${msg.content}`)
112
+ continue
113
+ }
114
+ if (typeof msg.content === 'string') {
115
+ out.push(`[${msg.role}] ${msg.content}`)
116
+ continue
117
+ }
118
+ if (Array.isArray(msg.content)) {
119
+ const parts: string[] = []
120
+ for (const part of msg.content) {
121
+ if (part.type === 'text') parts.push(part.text)
122
+ else parts.push(`[${part.type}]`)
123
+ }
124
+ out.push(`[${msg.role}] ${parts.join(' ')}`)
125
+ }
126
+ }
127
+ return out.join('\n')
128
+ }
129
+
130
+ /**
131
+ * Flatten the V3 generate result content into a single string. Walks the
132
+ * `content` array (text, reasoning, tool-call, etc.) and concatenates text
133
+ * parts; non-text parts get short summaries.
134
+ */
135
+ function stringifyContent(content: LanguageModelV3GenerateResult['content']): string {
136
+ const parts: string[] = []
137
+ for (const part of content) {
138
+ if (part.type === 'text') parts.push(part.text)
139
+ else if (part.type === 'reasoning') parts.push(`[reasoning] ${part.text}`)
140
+ else parts.push(`[${part.type}]`)
141
+ }
142
+ return parts.join('')
143
+ }
144
+
145
+ let _hasWarnedEmit = false
146
+
147
+ async function safeEmit(emit: TraceMiddlewareOptions['emit'], event: TraceEvent): Promise<void> {
148
+ try {
149
+ await emit(event)
150
+ } catch (err) {
151
+ if (!_hasWarnedEmit) {
152
+ _hasWarnedEmit = true
153
+ // eslint-disable-next-line no-console
154
+ console.warn(
155
+ `[ai-functions/traceMiddleware] emit() threw — subsequent emit errors will be silenced. ${
156
+ err instanceof Error ? err.message : String(err)
157
+ }`
158
+ )
159
+ }
160
+ }
161
+ }
162
+
163
+ // ============================================================================
164
+ // Middleware
165
+ // ============================================================================
166
+
167
+ /**
168
+ * Build a trace middleware for `wrapLanguageModel`. Emits a
169
+ * {@link TraceEvent} on every successful `doGenerate` / `doStream`
170
+ * completion. Errors from `emit` are swallowed (one-time warn) so a flaky
171
+ * trace sink can never break the wrapped LLM call.
172
+ *
173
+ * @example
174
+ * ```ts
175
+ * import { wrapLanguageModel } from 'ai'
176
+ * import { traceMiddleware, getEvalLogStore } from 'ai-functions'
177
+ *
178
+ * const store = getEvalLogStore()
179
+ * const model = wrapLanguageModel({
180
+ * model: openai('gpt-4o'),
181
+ * middleware: traceMiddleware({
182
+ * kind: 'cascade-trace',
183
+ * emit: (event) => store.record({ ...event, costUsd: event.costUsd ?? 0 }),
184
+ * }),
185
+ * })
186
+ * ```
187
+ */
188
+ export function traceMiddleware(options: TraceMiddlewareOptions): LanguageModelV3Middleware {
189
+ const { emit, kind = 'eval-trace', getCostUsd, tags } = options
190
+ return {
191
+ specificationVersion: 'v3',
192
+ async wrapGenerate({ doGenerate, params, model }) {
193
+ const start = Date.now()
194
+ const result = await doGenerate()
195
+ const durationMs = Date.now() - start
196
+ const modelId = model.modelId
197
+ const event: TraceEvent = {
198
+ kind,
199
+ model: modelId,
200
+ prompt: stringifyPrompt(params),
201
+ response: stringifyContent(result.content),
202
+ usage: result.usage,
203
+ durationMs,
204
+ ...(getCostUsd !== undefined ? { costUsd: getCostUsd(modelId, result.usage) } : {}),
205
+ ...(tags !== undefined ? { tags } : {}),
206
+ }
207
+ await safeEmit(emit, event)
208
+ return result
209
+ },
210
+ async wrapStream({ doStream, params, model }) {
211
+ const start = Date.now()
212
+ const result = await doStream()
213
+ const modelId = model.modelId
214
+ let finalUsage: LanguageModelV3Usage | undefined
215
+ const collected: string[] = []
216
+ const transformedStream = result.stream.pipeThrough(
217
+ new TransformStream<LanguageModelV3StreamPart, LanguageModelV3StreamPart>({
218
+ transform(chunk, controller) {
219
+ if (chunk.type === 'text-delta') collected.push(chunk.delta)
220
+ else if (chunk.type === 'finish') finalUsage = chunk.usage
221
+ controller.enqueue(chunk)
222
+ },
223
+ flush() {
224
+ const durationMs = Date.now() - start
225
+ const event: TraceEvent = {
226
+ kind,
227
+ model: modelId,
228
+ prompt: stringifyPrompt(params),
229
+ response: collected.join(''),
230
+ usage: finalUsage,
231
+ durationMs,
232
+ ...(getCostUsd !== undefined ? { costUsd: getCostUsd(modelId, finalUsage) } : {}),
233
+ ...(tags !== undefined ? { tags } : {}),
234
+ }
235
+ // Fire-and-forget — TransformStream.flush is sync; we don't
236
+ // await safeEmit so a slow sink doesn't block stream close.
237
+ void safeEmit(emit, event)
238
+ },
239
+ })
240
+ )
241
+ const wrapped: LanguageModelV3StreamResult = {
242
+ ...result,
243
+ stream: transformedStream,
244
+ }
245
+ return wrapped
246
+ },
247
+ }
248
+ }