ai-functions 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/.turbo/turbo-build.log +1 -4
  2. package/CHANGELOG.md +68 -1
  3. package/README.md +397 -157
  4. package/dist/ai-promise.d.ts +50 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +410 -51
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +54 -837
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts +272 -0
  56. package/dist/budget.d.ts.map +1 -0
  57. package/dist/budget.js +513 -0
  58. package/dist/budget.js.map +1 -0
  59. package/dist/cache.d.ts +295 -0
  60. package/dist/cache.d.ts.map +1 -0
  61. package/dist/cache.js +433 -0
  62. package/dist/cache.js.map +1 -0
  63. package/dist/context.d.ts +42 -8
  64. package/dist/context.d.ts.map +1 -1
  65. package/dist/context.js +64 -62
  66. package/dist/context.js.map +1 -1
  67. package/dist/digital-objects-registry.d.ts +229 -0
  68. package/dist/digital-objects-registry.d.ts.map +1 -0
  69. package/dist/digital-objects-registry.js +617 -0
  70. package/dist/digital-objects-registry.js.map +1 -0
  71. package/dist/embeddings.d.ts +2 -2
  72. package/dist/embeddings.d.ts.map +1 -1
  73. package/dist/errors.d.ts +22 -0
  74. package/dist/errors.d.ts.map +1 -0
  75. package/dist/errors.js +35 -0
  76. package/dist/errors.js.map +1 -0
  77. package/dist/eval/runner.d.ts +10 -1
  78. package/dist/eval/runner.d.ts.map +1 -1
  79. package/dist/eval/runner.js +41 -35
  80. package/dist/eval/runner.js.map +1 -1
  81. package/dist/eval-log/in-memory.d.ts +34 -0
  82. package/dist/eval-log/in-memory.d.ts.map +1 -0
  83. package/dist/eval-log/in-memory.js +84 -0
  84. package/dist/eval-log/in-memory.js.map +1 -0
  85. package/dist/eval-log/index.d.ts +29 -0
  86. package/dist/eval-log/index.d.ts.map +1 -0
  87. package/dist/eval-log/index.js +39 -0
  88. package/dist/eval-log/index.js.map +1 -0
  89. package/dist/eval-log/types.d.ts +101 -0
  90. package/dist/eval-log/types.d.ts.map +1 -0
  91. package/dist/eval-log/types.js +16 -0
  92. package/dist/eval-log/types.js.map +1 -0
  93. package/dist/function-registry.d.ts +116 -0
  94. package/dist/function-registry.d.ts.map +1 -0
  95. package/dist/function-registry.js +546 -0
  96. package/dist/function-registry.js.map +1 -0
  97. package/dist/generate.d.ts +9 -3
  98. package/dist/generate.d.ts.map +1 -1
  99. package/dist/generate.js +18 -22
  100. package/dist/generate.js.map +1 -1
  101. package/dist/index.d.ts +35 -20
  102. package/dist/index.d.ts.map +1 -1
  103. package/dist/index.js +89 -42
  104. package/dist/index.js.map +1 -1
  105. package/dist/logger.d.ts +118 -0
  106. package/dist/logger.d.ts.map +1 -0
  107. package/dist/logger.js +187 -0
  108. package/dist/logger.js.map +1 -0
  109. package/dist/middleware/budget.d.ts +84 -0
  110. package/dist/middleware/budget.d.ts.map +1 -0
  111. package/dist/middleware/budget.js +110 -0
  112. package/dist/middleware/budget.js.map +1 -0
  113. package/dist/middleware/cache.d.ts +103 -0
  114. package/dist/middleware/cache.d.ts.map +1 -0
  115. package/dist/middleware/cache.js +228 -0
  116. package/dist/middleware/cache.js.map +1 -0
  117. package/dist/middleware/embed-cache.d.ts +99 -0
  118. package/dist/middleware/embed-cache.d.ts.map +1 -0
  119. package/dist/middleware/embed-cache.js +128 -0
  120. package/dist/middleware/embed-cache.js.map +1 -0
  121. package/dist/middleware/index.d.ts +11 -0
  122. package/dist/middleware/index.d.ts.map +1 -0
  123. package/dist/middleware/index.js +11 -0
  124. package/dist/middleware/index.js.map +1 -0
  125. package/dist/middleware/trace.d.ts +103 -0
  126. package/dist/middleware/trace.d.ts.map +1 -0
  127. package/dist/middleware/trace.js +176 -0
  128. package/dist/middleware/trace.js.map +1 -0
  129. package/dist/primitives.d.ts +120 -1
  130. package/dist/primitives.d.ts.map +1 -1
  131. package/dist/primitives.js +398 -26
  132. package/dist/primitives.js.map +1 -1
  133. package/dist/retry.d.ts +368 -0
  134. package/dist/retry.d.ts.map +1 -0
  135. package/dist/retry.js +646 -0
  136. package/dist/retry.js.map +1 -0
  137. package/dist/schema.d.ts.map +1 -1
  138. package/dist/schema.js +2 -10
  139. package/dist/schema.js.map +1 -1
  140. package/dist/telemetry.d.ts +128 -0
  141. package/dist/telemetry.d.ts.map +1 -0
  142. package/dist/telemetry.js +285 -0
  143. package/dist/telemetry.js.map +1 -0
  144. package/dist/template.d.ts.map +1 -1
  145. package/dist/template.js +6 -1
  146. package/dist/template.js.map +1 -1
  147. package/dist/tool-orchestration.d.ts +453 -0
  148. package/dist/tool-orchestration.d.ts.map +1 -0
  149. package/dist/tool-orchestration.js +763 -0
  150. package/dist/tool-orchestration.js.map +1 -0
  151. package/dist/type-guards.d.ts +28 -0
  152. package/dist/type-guards.d.ts.map +1 -0
  153. package/dist/type-guards.js +29 -0
  154. package/dist/type-guards.js.map +1 -0
  155. package/dist/types.d.ts +135 -17
  156. package/dist/types.d.ts.map +1 -1
  157. package/dist/types.js +36 -1
  158. package/dist/types.js.map +1 -1
  159. package/dist/wrap-for-v3.d.ts +80 -0
  160. package/dist/wrap-for-v3.d.ts.map +1 -0
  161. package/dist/wrap-for-v3.js +89 -0
  162. package/dist/wrap-for-v3.js.map +1 -0
  163. package/examples/00-quickstart.ts +232 -0
  164. package/examples/01-rag-chatbot.ts +212 -0
  165. package/examples/02-multi-agent-research.ts +290 -0
  166. package/examples/03-email-classification.ts +379 -0
  167. package/examples/04-content-moderation.ts +400 -0
  168. package/examples/05-document-extraction.ts +455 -0
  169. package/examples/06-streaming-chat-nextjs.ts +437 -0
  170. package/examples/07-cloudflare-worker.ts +483 -0
  171. package/examples/08-batch-processing.ts +491 -0
  172. package/examples/09-budget-constrained.ts +527 -0
  173. package/examples/10-tool-orchestration.ts +565 -0
  174. package/examples/11-retry-resilience.ts +403 -0
  175. package/examples/12-caching-strategies.ts +422 -0
  176. package/examples/README.md +145 -0
  177. package/package.json +10 -6
  178. package/src/ai-promise.ts +528 -99
  179. package/src/ai-schemas.ts +122 -0
  180. package/src/ai.ts +69 -1153
  181. package/src/batch/anthropic.ts +96 -161
  182. package/src/batch/bedrock.ts +203 -454
  183. package/src/batch/cloudflare.ts +99 -282
  184. package/src/batch/google.ts +91 -297
  185. package/src/batch/index.ts +4 -1
  186. package/src/batch/memory.ts +15 -10
  187. package/src/batch/openai.ts +65 -193
  188. package/src/batch/provider.ts +336 -0
  189. package/src/batch-map.ts +29 -24
  190. package/src/batch-queue.ts +200 -11
  191. package/src/budget.ts +740 -0
  192. package/src/cache.ts +681 -0
  193. package/src/context.ts +122 -76
  194. package/src/digital-objects-registry.ts +750 -0
  195. package/src/errors.ts +37 -0
  196. package/src/eval/runner.ts +63 -38
  197. package/src/eval-log/in-memory.ts +90 -0
  198. package/src/eval-log/index.ts +46 -0
  199. package/src/eval-log/types.ts +110 -0
  200. package/src/function-registry.ts +671 -0
  201. package/src/generate.ts +33 -33
  202. package/src/index.ts +325 -49
  203. package/src/logger.ts +232 -0
  204. package/src/middleware/budget.ts +171 -0
  205. package/src/middleware/cache.ts +299 -0
  206. package/src/middleware/embed-cache.ts +195 -0
  207. package/src/middleware/index.ts +23 -0
  208. package/src/middleware/trace.ts +248 -0
  209. package/src/primitives.ts +589 -62
  210. package/src/retry.ts +902 -0
  211. package/src/schema.ts +8 -17
  212. package/src/telemetry.ts +403 -0
  213. package/src/template.ts +8 -4
  214. package/src/tool-orchestration.ts +1173 -0
  215. package/src/type-guards.ts +31 -0
  216. package/src/types.ts +164 -25
  217. package/src/wrap-for-v3.ts +105 -0
  218. package/test/ai-promise.test.ts +1080 -0
  219. package/test/ai-proxy.test.ts +1 -1
  220. package/test/backward-compat.test.ts +147 -0
  221. package/test/batch-autosubmit-errors.test.ts +610 -0
  222. package/test/batch-blog-posts.test.ts +87 -129
  223. package/test/budget-tracking.test.ts +800 -0
  224. package/test/cache.test.ts +712 -0
  225. package/test/context-isolation.test.ts +687 -0
  226. package/test/core-functions.test.ts +183 -579
  227. package/test/decide.test.ts +154 -322
  228. package/test/define.test.ts +211 -8
  229. package/test/digital-objects-registry.test.ts +760 -0
  230. package/test/embedding-cache-middleware.test.ts +140 -0
  231. package/test/evals/deterministic.eval.test.ts +376 -0
  232. package/test/generate-core.test.ts +140 -229
  233. package/test/implicit-batch.test.ts +22 -65
  234. package/test/json-parse-error-handling.test.ts +463 -0
  235. package/test/retry-policy-integration.test.ts +117 -0
  236. package/test/retry.test.ts +1016 -0
  237. package/test/schema.test.ts +55 -19
  238. package/test/streaming.test.ts +316 -0
  239. package/test/template.test.ts +1164 -0
  240. package/test/tool-orchestration.test.ts +1040 -0
  241. package/test/wrap-for-v3.test.ts +612 -0
  242. package/vitest.config.js +6 -0
  243. package/vitest.config.ts +20 -0
  244. package/dist/rpc/auth.d.ts +0 -69
  245. package/dist/rpc/auth.d.ts.map +0 -1
  246. package/dist/rpc/auth.js +0 -136
  247. package/dist/rpc/auth.js.map +0 -1
  248. package/dist/rpc/client.d.ts +0 -62
  249. package/dist/rpc/client.d.ts.map +0 -1
  250. package/dist/rpc/client.js +0 -103
  251. package/dist/rpc/client.js.map +0 -1
  252. package/dist/rpc/deferred.d.ts +0 -60
  253. package/dist/rpc/deferred.d.ts.map +0 -1
  254. package/dist/rpc/deferred.js +0 -96
  255. package/dist/rpc/deferred.js.map +0 -1
  256. package/dist/rpc/index.d.ts +0 -22
  257. package/dist/rpc/index.d.ts.map +0 -1
  258. package/dist/rpc/index.js +0 -38
  259. package/dist/rpc/index.js.map +0 -1
  260. package/dist/rpc/local.d.ts +0 -42
  261. package/dist/rpc/local.d.ts.map +0 -1
  262. package/dist/rpc/local.js +0 -50
  263. package/dist/rpc/local.js.map +0 -1
  264. package/dist/rpc/server.d.ts +0 -165
  265. package/dist/rpc/server.d.ts.map +0 -1
  266. package/dist/rpc/server.js +0 -405
  267. package/dist/rpc/server.js.map +0 -1
  268. package/dist/rpc/session.d.ts +0 -32
  269. package/dist/rpc/session.d.ts.map +0 -1
  270. package/dist/rpc/session.js +0 -43
  271. package/dist/rpc/session.js.map +0 -1
  272. package/dist/rpc/transport.d.ts +0 -306
  273. package/dist/rpc/transport.d.ts.map +0 -1
  274. package/dist/rpc/transport.js +0 -731
  275. package/dist/rpc/transport.js.map +0 -1
  276. package/src/batch/anthropic.js +0 -256
  277. package/src/batch/bedrock.js +0 -584
  278. package/src/batch/cloudflare.js +0 -287
  279. package/src/batch/google.js +0 -359
  280. package/src/batch/index.js +0 -30
  281. package/src/batch/memory.js +0 -187
  282. package/src/batch/openai.js +0 -402
  283. package/src/eval/index.js +0 -7
  284. package/src/eval/models.js +0 -119
  285. package/src/eval/runner.js +0 -147
  286. package/test/schema.test.js +0 -96
@@ -0,0 +1,612 @@
1
+ /**
2
+ * Tests for the v3 middleware stack — cacheMiddleware, budgetMiddleware,
3
+ * traceMiddleware, wrapForV3, and the EvalLogStore primitive.
4
+ *
5
+ * Uses the AI SDK 6 `MockLanguageModelV3` from `'ai/test'` to simulate
6
+ * doGenerate / doStream without hitting a real provider.
7
+ */
8
+
9
+ import { describe, it, expect, beforeEach } from 'vitest'
10
+ import { wrapLanguageModel } from 'ai'
11
+ import { MockLanguageModelV3 } from 'ai/test'
12
+ import type {
13
+ LanguageModelV3CallOptions,
14
+ LanguageModelV3GenerateResult,
15
+ LanguageModelV3StreamResult,
16
+ LanguageModelV3StreamPart,
17
+ } from '@ai-sdk/provider'
18
+ import {
19
+ BudgetTracker,
20
+ cacheMiddleware,
21
+ budgetMiddleware,
22
+ traceMiddleware,
23
+ wrapForV3,
24
+ InMemoryEvalLogStore,
25
+ configureEvalLogStore,
26
+ getEvalLogStore,
27
+ type TraceEvent,
28
+ } from '../src/index.js'
29
+
30
+ // ============================================================================
31
+ // Helpers
32
+ // ============================================================================
33
+
34
+ function makeGenerateResult(
35
+ text: string,
36
+ inputTokens = 100,
37
+ outputTokens = 50
38
+ ): LanguageModelV3GenerateResult {
39
+ return {
40
+ content: [{ type: 'text', text }],
41
+ finishReason: 'stop',
42
+ usage: {
43
+ inputTokens: { total: inputTokens, noCache: inputTokens, cacheRead: 0, cacheWrite: 0 },
44
+ outputTokens: { total: outputTokens, text: outputTokens, reasoning: 0 },
45
+ },
46
+ warnings: [],
47
+ }
48
+ }
49
+
50
+ function makeStreamResult(
51
+ text: string,
52
+ inputTokens = 100,
53
+ outputTokens = 50
54
+ ): LanguageModelV3StreamResult {
55
+ const chunks: LanguageModelV3StreamPart[] = [
56
+ { type: 'stream-start', warnings: [] },
57
+ { type: 'text-start', id: '1' },
58
+ { type: 'text-delta', id: '1', delta: text },
59
+ { type: 'text-end', id: '1' },
60
+ {
61
+ type: 'finish',
62
+ finishReason: 'stop',
63
+ usage: {
64
+ inputTokens: { total: inputTokens, noCache: inputTokens, cacheRead: 0, cacheWrite: 0 },
65
+ outputTokens: { total: outputTokens, text: outputTokens, reasoning: 0 },
66
+ },
67
+ },
68
+ ]
69
+ return {
70
+ stream: new ReadableStream<LanguageModelV3StreamPart>({
71
+ start(controller) {
72
+ for (const chunk of chunks) controller.enqueue(chunk)
73
+ controller.close()
74
+ },
75
+ }),
76
+ }
77
+ }
78
+
79
+ function makeCallOptions(promptText: string): LanguageModelV3CallOptions {
80
+ return {
81
+ prompt: [{ role: 'user', content: [{ type: 'text', text: promptText }] }],
82
+ }
83
+ }
84
+
85
+ async function consumeStream(
86
+ stream: ReadableStream<LanguageModelV3StreamPart>
87
+ ): Promise<LanguageModelV3StreamPart[]> {
88
+ const reader = stream.getReader()
89
+ const out: LanguageModelV3StreamPart[] = []
90
+ while (true) {
91
+ const { done, value } = await reader.read()
92
+ if (done) break
93
+ out.push(value)
94
+ }
95
+ return out
96
+ }
97
+
98
+ // ============================================================================
99
+ // cacheMiddleware
100
+ // ============================================================================
101
+
102
+ describe('cacheMiddleware', () => {
103
+ beforeEach(() => {
104
+ process.env['V3_EVAL_CACHE'] = '1'
105
+ })
106
+
107
+ it('hit: returns cached result; miss: invokes wrapped model', async () => {
108
+ let calls = 0
109
+ const base = new MockLanguageModelV3({
110
+ doGenerate: async () => {
111
+ calls++
112
+ return makeGenerateResult(`response-${calls}`)
113
+ },
114
+ })
115
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
116
+ const params = makeCallOptions('hello')
117
+ const r1 = await wrapped.doGenerate(params)
118
+ const r2 = await wrapped.doGenerate(params)
119
+ expect(calls).toBe(1)
120
+ expect(r1.content).toEqual(r2.content)
121
+ expect((r1.content[0] as { text: string }).text).toBe('response-1')
122
+ })
123
+
124
+ it('key derivation invalidates on prompt change', async () => {
125
+ let calls = 0
126
+ const base = new MockLanguageModelV3({
127
+ doGenerate: async () => {
128
+ calls++
129
+ return makeGenerateResult(`r${calls}`)
130
+ },
131
+ })
132
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
133
+ await wrapped.doGenerate(makeCallOptions('first'))
134
+ await wrapped.doGenerate(makeCallOptions('second'))
135
+ expect(calls).toBe(2)
136
+ })
137
+
138
+ it('key derivation invalidates on schema change', async () => {
139
+ let calls = 0
140
+ const base = new MockLanguageModelV3({
141
+ doGenerate: async () => {
142
+ calls++
143
+ return makeGenerateResult(`r${calls}`)
144
+ },
145
+ })
146
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
147
+ const baseParams = makeCallOptions('hello')
148
+ await wrapped.doGenerate({
149
+ ...baseParams,
150
+ responseFormat: {
151
+ type: 'json',
152
+ schema: { type: 'object', properties: { a: { type: 'string' } } },
153
+ },
154
+ })
155
+ await wrapped.doGenerate({
156
+ ...baseParams,
157
+ responseFormat: {
158
+ type: 'json',
159
+ schema: { type: 'object', properties: { b: { type: 'string' } } },
160
+ },
161
+ })
162
+ expect(calls).toBe(2)
163
+ })
164
+
165
+ it('TTL: expired entries are evicted on access', async () => {
166
+ let calls = 0
167
+ const base = new MockLanguageModelV3({
168
+ doGenerate: async () => {
169
+ calls++
170
+ return makeGenerateResult(`r${calls}`)
171
+ },
172
+ })
173
+ // 1ms TTL — second call after a short await is past expiry.
174
+ const wrapped = wrapLanguageModel({
175
+ model: base,
176
+ middleware: cacheMiddleware({ ttlMs: 1 }),
177
+ })
178
+ const params = makeCallOptions('hello')
179
+ await wrapped.doGenerate(params)
180
+ await new Promise((r) => setTimeout(r, 10))
181
+ await wrapped.doGenerate(params)
182
+ expect(calls).toBe(2)
183
+ })
184
+
185
+ it('respects 24h TTL by default (no eviction in-test)', async () => {
186
+ let calls = 0
187
+ const base = new MockLanguageModelV3({
188
+ doGenerate: async () => {
189
+ calls++
190
+ return makeGenerateResult('cached')
191
+ },
192
+ })
193
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
194
+ const params = makeCallOptions('hello')
195
+ await wrapped.doGenerate(params)
196
+ await wrapped.doGenerate(params)
197
+ await wrapped.doGenerate(params)
198
+ expect(calls).toBe(1)
199
+ })
200
+
201
+ it('passthrough when env gate is disabled', async () => {
202
+ let calls = 0
203
+ const base = new MockLanguageModelV3({
204
+ doGenerate: async () => {
205
+ calls++
206
+ return makeGenerateResult(`r${calls}`)
207
+ },
208
+ })
209
+ const wrapped = wrapLanguageModel({
210
+ model: base,
211
+ middleware: cacheMiddleware({ enabled: false }),
212
+ })
213
+ const params = makeCallOptions('hello')
214
+ await wrapped.doGenerate(params)
215
+ await wrapped.doGenerate(params)
216
+ expect(calls).toBe(2)
217
+ })
218
+
219
+ it('streams: caches and replays chunks via simulateReadableStream', async () => {
220
+ let calls = 0
221
+ const base = new MockLanguageModelV3({
222
+ doStream: async () => {
223
+ calls++
224
+ return makeStreamResult(`stream-${calls}`)
225
+ },
226
+ })
227
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
228
+ const params = makeCallOptions('streaming hello')
229
+ const r1 = await wrapped.doStream(params)
230
+ const chunks1 = await consumeStream(r1.stream)
231
+ const r2 = await wrapped.doStream(params)
232
+ const chunks2 = await consumeStream(r2.stream)
233
+ expect(calls).toBe(1)
234
+ // Same shape, same content
235
+ const text1 = chunks1.find((c) => c.type === 'text-delta') as { delta: string } | undefined
236
+ const text2 = chunks2.find((c) => c.type === 'text-delta') as { delta: string } | undefined
237
+ expect(text1?.delta).toBe('stream-1')
238
+ expect(text2?.delta).toBe('stream-1')
239
+ })
240
+ })
241
+
242
+ // ============================================================================
243
+ // budgetMiddleware
244
+ // ============================================================================
245
+
246
+ describe('budgetMiddleware', () => {
247
+ beforeEach(() => {
248
+ process.env['V3_EVAL_CACHE'] = '1'
249
+ })
250
+
251
+ it('records usage to tracker on completion', async () => {
252
+ const tracker = new BudgetTracker()
253
+ const base = new MockLanguageModelV3({
254
+ modelId: 'gpt-4o',
255
+ doGenerate: async () => makeGenerateResult('hi', 1000, 500),
256
+ })
257
+ const wrapped = wrapLanguageModel({ model: base, middleware: budgetMiddleware({ tracker }) })
258
+ await wrapped.doGenerate(makeCallOptions('hello'))
259
+ expect(tracker.getTotalInputTokens()).toBe(1000)
260
+ expect(tracker.getTotalOutputTokens()).toBe(500)
261
+ // gpt-4o pricing: $2.5/M input, $10/M output → 0.0025 + 0.005 = 0.0075
262
+ expect(tracker.getTotalCost()).toBeCloseTo(0.0075, 6)
263
+ })
264
+
265
+ it('works on cached path AND fresh path', async () => {
266
+ const tracker = new BudgetTracker()
267
+ let underlyingCalls = 0
268
+ const base = new MockLanguageModelV3({
269
+ modelId: 'gpt-4o',
270
+ doGenerate: async () => {
271
+ underlyingCalls++
272
+ return makeGenerateResult('cached', 100, 50)
273
+ },
274
+ })
275
+ // Order matters here: cache → budget. With this order, cache is FIRST
276
+ // in the array → outermost on the way in. On a cache hit, cache short-
277
+ // circuits and budget never sees the call. We flip the order so budget
278
+ // wraps cache: budget always sees the (cached or fresh) result.
279
+ const wrapped = wrapLanguageModel({
280
+ model: base,
281
+ middleware: [budgetMiddleware({ tracker }), cacheMiddleware()],
282
+ })
283
+ const params = makeCallOptions('budget+cache')
284
+ await wrapped.doGenerate(params)
285
+ await wrapped.doGenerate(params)
286
+ expect(underlyingCalls).toBe(1)
287
+ // Budget recorded twice (once on miss, once on hit).
288
+ expect(tracker.getTotalInputTokens()).toBe(200)
289
+ expect(tracker.getTotalOutputTokens()).toBe(100)
290
+ })
291
+
292
+ it('pricing overlay applied via modelIdOverride', async () => {
293
+ const tracker = new BudgetTracker({
294
+ customPricing: {
295
+ sonnet: { inputPricePerMillion: 3, outputPricePerMillion: 15 },
296
+ },
297
+ })
298
+ const base = new MockLanguageModelV3({
299
+ modelId: 'unknown-id',
300
+ doGenerate: async () => makeGenerateResult('hi', 1_000_000, 1_000_000),
301
+ })
302
+ const wrapped = wrapLanguageModel({
303
+ model: base,
304
+ middleware: budgetMiddleware({ tracker, modelIdOverride: 'sonnet' }),
305
+ })
306
+ await wrapped.doGenerate(makeCallOptions('hello'))
307
+ // 1M in @ $3 + 1M out @ $15 = $18
308
+ expect(tracker.getTotalCost()).toBeCloseTo(18, 4)
309
+ })
310
+
311
+ it('streams: records usage from finish part', async () => {
312
+ const tracker = new BudgetTracker()
313
+ const base = new MockLanguageModelV3({
314
+ modelId: 'gpt-4o',
315
+ doStream: async () => makeStreamResult('streamed', 200, 100),
316
+ })
317
+ const wrapped = wrapLanguageModel({ model: base, middleware: budgetMiddleware({ tracker }) })
318
+ const r = await wrapped.doStream(makeCallOptions('hello'))
319
+ await consumeStream(r.stream)
320
+ expect(tracker.getTotalInputTokens()).toBe(200)
321
+ expect(tracker.getTotalOutputTokens()).toBe(100)
322
+ })
323
+ })
324
+
325
+ // ============================================================================
326
+ // traceMiddleware
327
+ // ============================================================================
328
+
329
+ describe('traceMiddleware', () => {
330
+ it('emits expected event shape', async () => {
331
+ const events: TraceEvent[] = []
332
+ const base = new MockLanguageModelV3({
333
+ modelId: 'gpt-4o',
334
+ doGenerate: async () => makeGenerateResult('the response', 10, 5),
335
+ })
336
+ const wrapped = wrapLanguageModel({
337
+ model: base,
338
+ middleware: traceMiddleware({ kind: 'eval-trace', emit: (e) => events.push(e) }),
339
+ })
340
+ await wrapped.doGenerate(makeCallOptions('the prompt'))
341
+ expect(events.length).toBe(1)
342
+ const ev = events[0]!
343
+ expect(ev.kind).toBe('eval-trace')
344
+ expect(ev.model).toBe('gpt-4o')
345
+ expect(ev.prompt).toContain('the prompt')
346
+ expect(ev.response).toBe('the response')
347
+ expect(ev.usage?.inputTokens.total).toBe(10)
348
+ expect(ev.usage?.outputTokens.total).toBe(5)
349
+ expect(typeof ev.durationMs).toBe('number')
350
+ expect(ev.durationMs).toBeGreaterThanOrEqual(0)
351
+ })
352
+
353
+ it("doesn't break the wrapped chain on emit error", async () => {
354
+ const base = new MockLanguageModelV3({
355
+ modelId: 'gpt-4o',
356
+ doGenerate: async () => makeGenerateResult('ok', 1, 1),
357
+ })
358
+ const wrapped = wrapLanguageModel({
359
+ model: base,
360
+ middleware: traceMiddleware({
361
+ emit: () => {
362
+ throw new Error('sink is broken')
363
+ },
364
+ }),
365
+ })
366
+ // Should NOT throw — emit error is swallowed.
367
+ const result = await wrapped.doGenerate(makeCallOptions('hi'))
368
+ expect((result.content[0] as { text: string }).text).toBe('ok')
369
+ })
370
+
371
+ it('supports getCostUsd resolver for costUsd field', async () => {
372
+ const events: TraceEvent[] = []
373
+ const base = new MockLanguageModelV3({
374
+ modelId: 'gpt-4o',
375
+ doGenerate: async () => makeGenerateResult('hi', 1000, 500),
376
+ })
377
+ const wrapped = wrapLanguageModel({
378
+ model: base,
379
+ middleware: traceMiddleware({
380
+ emit: (e) => events.push(e),
381
+ getCostUsd: (_modelId, usage) => {
382
+ const inT = usage?.inputTokens.total ?? 0
383
+ const outT = usage?.outputTokens.total ?? 0
384
+ return (inT / 1_000_000) * 2.5 + (outT / 1_000_000) * 10
385
+ },
386
+ }),
387
+ })
388
+ await wrapped.doGenerate(makeCallOptions('hi'))
389
+ expect(events[0]?.costUsd).toBeCloseTo(0.0075, 6)
390
+ })
391
+
392
+ it('streams: emits on stream end with collected text', async () => {
393
+ const events: TraceEvent[] = []
394
+ const base = new MockLanguageModelV3({
395
+ modelId: 'gpt-4o',
396
+ doStream: async () => makeStreamResult('streamed-text', 50, 25),
397
+ })
398
+ const wrapped = wrapLanguageModel({
399
+ model: base,
400
+ middleware: traceMiddleware({ emit: (e) => events.push(e) }),
401
+ })
402
+ const r = await wrapped.doStream(makeCallOptions('hi'))
403
+ await consumeStream(r.stream)
404
+ // Wait a tick for flush handler
405
+ await new Promise((r) => setTimeout(r, 10))
406
+ expect(events.length).toBe(1)
407
+ expect(events[0]?.response).toBe('streamed-text')
408
+ })
409
+ })
410
+
411
+ // ============================================================================
412
+ // wrapForV3
413
+ // ============================================================================
414
+
415
+ describe('wrapForV3', () => {
416
+ beforeEach(() => {
417
+ process.env['V3_EVAL_CACHE'] = '1'
418
+ })
419
+
420
+ it('composes in correct order (cache → budget → trace)', async () => {
421
+ const tracker = new BudgetTracker()
422
+ const events: TraceEvent[] = []
423
+ let underlyingCalls = 0
424
+ const base = new MockLanguageModelV3({
425
+ modelId: 'gpt-4o',
426
+ doGenerate: async () => {
427
+ underlyingCalls++
428
+ return makeGenerateResult('combined', 100, 50)
429
+ },
430
+ })
431
+ const wrapped = wrapForV3(base, {
432
+ cache: {},
433
+ budget: { tracker },
434
+ trace: { emit: (e) => events.push(e) },
435
+ })
436
+ const params = makeCallOptions('hello combined')
437
+ // First call: miss → underlying invoked, budget records, trace emits
438
+ await (
439
+ wrapped as unknown as {
440
+ doGenerate: (o: LanguageModelV3CallOptions) => Promise<LanguageModelV3GenerateResult>
441
+ }
442
+ ).doGenerate(params)
443
+ // Second call: cache hit → cache short-circuits; budget+trace do NOT
444
+ // run because they're installed AFTER cache. (See JSDoc on wrapForV3
445
+ // composition order — cache-first is the eval-fixture default.)
446
+ await (
447
+ wrapped as unknown as {
448
+ doGenerate: (o: LanguageModelV3CallOptions) => Promise<LanguageModelV3GenerateResult>
449
+ }
450
+ ).doGenerate(params)
451
+ expect(underlyingCalls).toBe(1)
452
+ expect(tracker.getTotalInputTokens()).toBe(100)
453
+ expect(events.length).toBe(1)
454
+ })
455
+
456
+ it('options can be omitted partially', async () => {
457
+ const tracker = new BudgetTracker()
458
+ const base = new MockLanguageModelV3({
459
+ modelId: 'gpt-4o',
460
+ doGenerate: async () => makeGenerateResult('partial', 10, 5),
461
+ })
462
+ // Only budget — no cache, no trace
463
+ const wrapped = wrapForV3(base, { budget: { tracker } })
464
+ await (
465
+ wrapped as unknown as {
466
+ doGenerate: (o: LanguageModelV3CallOptions) => Promise<LanguageModelV3GenerateResult>
467
+ }
468
+ ).doGenerate(makeCallOptions('hi'))
469
+ expect(tracker.getTotalInputTokens()).toBe(10)
470
+ })
471
+
472
+ it('returns the underlying model when all options are absent', async () => {
473
+ const base = new MockLanguageModelV3({
474
+ doGenerate: async () => makeGenerateResult('untouched', 1, 1),
475
+ })
476
+ const wrapped = wrapForV3(base, {})
477
+ expect(wrapped).toBe(base)
478
+ })
479
+ })
480
+
481
+ // ============================================================================
482
+ // EvalLogStore (in-memory)
483
+ // ============================================================================
484
+
485
+ describe('InMemoryEvalLogStore', () => {
486
+ let store: InMemoryEvalLogStore
487
+
488
+ beforeEach(() => {
489
+ store = new InMemoryEvalLogStore()
490
+ })
491
+
492
+ it('record + get round-trips', async () => {
493
+ const stored = await store.record({
494
+ model: 'gpt-4o',
495
+ prompt: 'hello',
496
+ response: 'hi',
497
+ usage: { inputTokens: 10, outputTokens: 5 },
498
+ costUsd: 0.001,
499
+ durationMs: 42,
500
+ })
501
+ expect(stored.$id).toBeTruthy()
502
+ expect(stored.createdAt).toBeGreaterThan(0)
503
+ const fetched = await store.get(stored.$id)
504
+ expect(fetched).toEqual(stored)
505
+ })
506
+
507
+ it('list returns most recent first', async () => {
508
+ await store.record({
509
+ model: 'a',
510
+ prompt: 'p1',
511
+ response: 'r1',
512
+ usage: { inputTokens: 1, outputTokens: 1 },
513
+ costUsd: 0,
514
+ durationMs: 1,
515
+ })
516
+ await store.record({
517
+ model: 'b',
518
+ prompt: 'p2',
519
+ response: 'r2',
520
+ usage: { inputTokens: 1, outputTokens: 1 },
521
+ costUsd: 0,
522
+ durationMs: 1,
523
+ })
524
+ const list = await store.list()
525
+ expect(list.length).toBe(2)
526
+ expect(list[0]?.model).toBe('b')
527
+ expect(list[1]?.model).toBe('a')
528
+ })
529
+
530
+ it('list filters by model and traceId', async () => {
531
+ await store.record({
532
+ model: 'gpt-4o',
533
+ traceId: 't1',
534
+ prompt: 'p',
535
+ response: 'r',
536
+ usage: { inputTokens: 1, outputTokens: 1 },
537
+ costUsd: 0,
538
+ durationMs: 1,
539
+ })
540
+ await store.record({
541
+ model: 'sonnet',
542
+ traceId: 't1',
543
+ prompt: 'p',
544
+ response: 'r',
545
+ usage: { inputTokens: 1, outputTokens: 1 },
546
+ costUsd: 0,
547
+ durationMs: 1,
548
+ })
549
+ await store.record({
550
+ model: 'gpt-4o',
551
+ traceId: 't2',
552
+ prompt: 'p',
553
+ response: 'r',
554
+ usage: { inputTokens: 1, outputTokens: 1 },
555
+ costUsd: 0,
556
+ durationMs: 1,
557
+ })
558
+ expect((await store.list({ model: 'gpt-4o' })).length).toBe(2)
559
+ expect((await store.list({ traceId: 't1' })).length).toBe(2)
560
+ expect((await store.list({ model: 'gpt-4o', traceId: 't1' })).length).toBe(1)
561
+ })
562
+
563
+ it('list filters by tags (superset match)', async () => {
564
+ await store.record({
565
+ model: 'a',
566
+ tags: { persona: 'cfo', step: '3' },
567
+ prompt: 'p',
568
+ response: 'r',
569
+ usage: { inputTokens: 1, outputTokens: 1 },
570
+ costUsd: 0,
571
+ durationMs: 1,
572
+ })
573
+ await store.record({
574
+ model: 'b',
575
+ tags: { persona: 'cto' },
576
+ prompt: 'p',
577
+ response: 'r',
578
+ usage: { inputTokens: 1, outputTokens: 1 },
579
+ costUsd: 0,
580
+ durationMs: 1,
581
+ })
582
+ expect((await store.list({ tags: { persona: 'cfo' } })).length).toBe(1)
583
+ expect((await store.list({ tags: { persona: 'cto' } })).length).toBe(1)
584
+ expect((await store.list({ tags: { persona: 'unknown' } })).length).toBe(0)
585
+ })
586
+
587
+ it('delete removes the entry', async () => {
588
+ const e = await store.record({
589
+ model: 'a',
590
+ prompt: 'p',
591
+ response: 'r',
592
+ usage: { inputTokens: 1, outputTokens: 1 },
593
+ costUsd: 0,
594
+ durationMs: 1,
595
+ })
596
+ expect(await store.delete(e.$id)).toBe(true)
597
+ expect(await store.get(e.$id)).toBeUndefined()
598
+ expect(await store.delete(e.$id)).toBe(false)
599
+ })
600
+
601
+ it('global accessor + override', async () => {
602
+ const custom = new InMemoryEvalLogStore()
603
+ configureEvalLogStore(custom)
604
+ expect(getEvalLogStore()).toBe(custom)
605
+ configureEvalLogStore(null)
606
+ const lazy = getEvalLogStore()
607
+ expect(lazy).toBeInstanceOf(InMemoryEvalLogStore)
608
+ expect(lazy).not.toBe(custom)
609
+ // Reset so subsequent test-runs see a clean default
610
+ configureEvalLogStore(null)
611
+ })
612
+ })
package/vitest.config.js CHANGED
@@ -17,6 +17,12 @@ for (const envPath of envPaths) {
17
17
  }
18
18
  export default defineConfig({
19
19
  test: {
20
+ // CRITICAL: Limit concurrency to prevent resource exhaustion
21
+ maxConcurrency: 1,
22
+ maxWorkers: 1,
23
+ minWorkers: 1,
24
+ fileParallelism: false,
25
+
20
26
  globals: false,
21
27
  environment: 'node',
22
28
  include: ['test/**/*.test.ts'],
package/vitest.config.ts CHANGED
@@ -20,6 +20,12 @@ for (const envPath of envPaths) {
20
20
 
21
21
  export default defineConfig({
22
22
  test: {
23
+ // CRITICAL: Limit concurrency to prevent resource exhaustion
24
+ maxConcurrency: 1,
25
+ maxWorkers: 1,
26
+ minWorkers: 1,
27
+ fileParallelism: false,
28
+
23
29
  globals: false,
24
30
  environment: 'node',
25
31
  include: ['test/**/*.test.ts'],
@@ -38,5 +44,19 @@ export default defineConfig({
38
44
  singleFork: true,
39
45
  },
40
46
  },
47
+
48
+ // Coverage configuration
49
+ coverage: {
50
+ provider: 'v8',
51
+ reporter: ['text', 'json', 'html'],
52
+ include: ['src/**/*.ts'],
53
+ exclude: ['**/*.test.ts', '**/__tests__/**', '**/node_modules/**'],
54
+ thresholds: {
55
+ statements: 65,
56
+ branches: 60,
57
+ functions: 60,
58
+ lines: 65,
59
+ },
60
+ },
41
61
  },
42
62
  })