ai-functions 2.1.3 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (284) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +90 -1
  3. package/README.md +38 -0
  4. package/dist/ai-promise.d.ts +3 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +135 -64
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +51 -858
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts.map +1 -1
  56. package/dist/budget.js +27 -14
  57. package/dist/budget.js.map +1 -1
  58. package/dist/cache.d.ts +23 -0
  59. package/dist/cache.d.ts.map +1 -1
  60. package/dist/cache.js +36 -15
  61. package/dist/cache.js.map +1 -1
  62. package/dist/context.d.ts +26 -8
  63. package/dist/context.d.ts.map +1 -1
  64. package/dist/context.js +64 -62
  65. package/dist/context.js.map +1 -1
  66. package/dist/digital-objects-registry.d.ts +229 -0
  67. package/dist/digital-objects-registry.d.ts.map +1 -0
  68. package/dist/digital-objects-registry.js +617 -0
  69. package/dist/digital-objects-registry.js.map +1 -0
  70. package/dist/embeddings.d.ts +2 -2
  71. package/dist/embeddings.d.ts.map +1 -1
  72. package/dist/errors.d.ts +22 -0
  73. package/dist/errors.d.ts.map +1 -0
  74. package/dist/errors.js +35 -0
  75. package/dist/errors.js.map +1 -0
  76. package/dist/eval/runner.d.ts +8 -0
  77. package/dist/eval/runner.d.ts.map +1 -1
  78. package/dist/eval/runner.js +41 -35
  79. package/dist/eval/runner.js.map +1 -1
  80. package/dist/eval-log/in-memory.d.ts +34 -0
  81. package/dist/eval-log/in-memory.d.ts.map +1 -0
  82. package/dist/eval-log/in-memory.js +84 -0
  83. package/dist/eval-log/in-memory.js.map +1 -0
  84. package/dist/eval-log/index.d.ts +29 -0
  85. package/dist/eval-log/index.d.ts.map +1 -0
  86. package/dist/eval-log/index.js +39 -0
  87. package/dist/eval-log/index.js.map +1 -0
  88. package/dist/eval-log/types.d.ts +101 -0
  89. package/dist/eval-log/types.d.ts.map +1 -0
  90. package/dist/eval-log/types.js +16 -0
  91. package/dist/eval-log/types.js.map +1 -0
  92. package/dist/function-registry.d.ts +176 -0
  93. package/dist/function-registry.d.ts.map +1 -0
  94. package/dist/function-registry.js +685 -0
  95. package/dist/function-registry.js.map +1 -0
  96. package/dist/generate.d.ts +9 -3
  97. package/dist/generate.d.ts.map +1 -1
  98. package/dist/generate.js +18 -18
  99. package/dist/generate.js.map +1 -1
  100. package/dist/index.d.ts +18 -11
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js +35 -18
  103. package/dist/index.js.map +1 -1
  104. package/dist/logger.d.ts +118 -0
  105. package/dist/logger.d.ts.map +1 -0
  106. package/dist/logger.js +187 -0
  107. package/dist/logger.js.map +1 -0
  108. package/dist/middleware/budget.d.ts +84 -0
  109. package/dist/middleware/budget.d.ts.map +1 -0
  110. package/dist/middleware/budget.js +110 -0
  111. package/dist/middleware/budget.js.map +1 -0
  112. package/dist/middleware/cache.d.ts +103 -0
  113. package/dist/middleware/cache.d.ts.map +1 -0
  114. package/dist/middleware/cache.js +228 -0
  115. package/dist/middleware/cache.js.map +1 -0
  116. package/dist/middleware/embed-cache.d.ts +99 -0
  117. package/dist/middleware/embed-cache.d.ts.map +1 -0
  118. package/dist/middleware/embed-cache.js +128 -0
  119. package/dist/middleware/embed-cache.js.map +1 -0
  120. package/dist/middleware/index.d.ts +11 -0
  121. package/dist/middleware/index.d.ts.map +1 -0
  122. package/dist/middleware/index.js +11 -0
  123. package/dist/middleware/index.js.map +1 -0
  124. package/dist/middleware/trace.d.ts +103 -0
  125. package/dist/middleware/trace.d.ts.map +1 -0
  126. package/dist/middleware/trace.js +176 -0
  127. package/dist/middleware/trace.js.map +1 -0
  128. package/dist/primitives.d.ts +120 -1
  129. package/dist/primitives.d.ts.map +1 -1
  130. package/dist/primitives.js +398 -26
  131. package/dist/primitives.js.map +1 -1
  132. package/dist/retry.d.ts +66 -1
  133. package/dist/retry.d.ts.map +1 -1
  134. package/dist/retry.js +115 -8
  135. package/dist/retry.js.map +1 -1
  136. package/dist/sandbox.d.ts +36 -0
  137. package/dist/sandbox.d.ts.map +1 -0
  138. package/dist/sandbox.js +44 -0
  139. package/dist/sandbox.js.map +1 -0
  140. package/dist/schema.js +2 -2
  141. package/dist/schema.js.map +1 -1
  142. package/dist/telemetry.d.ts +128 -0
  143. package/dist/telemetry.d.ts.map +1 -0
  144. package/dist/telemetry.js +285 -0
  145. package/dist/telemetry.js.map +1 -0
  146. package/dist/template.d.ts.map +1 -1
  147. package/dist/template.js +6 -1
  148. package/dist/template.js.map +1 -1
  149. package/dist/tool-orchestration.d.ts +66 -4
  150. package/dist/tool-orchestration.d.ts.map +1 -1
  151. package/dist/tool-orchestration.js +123 -23
  152. package/dist/tool-orchestration.js.map +1 -1
  153. package/dist/type-guards.d.ts +28 -0
  154. package/dist/type-guards.d.ts.map +1 -0
  155. package/dist/type-guards.js +29 -0
  156. package/dist/type-guards.js.map +1 -0
  157. package/dist/types.d.ts +155 -19
  158. package/dist/types.d.ts.map +1 -1
  159. package/dist/types.js +36 -1
  160. package/dist/types.js.map +1 -1
  161. package/dist/wrap-for-v3.d.ts +80 -0
  162. package/dist/wrap-for-v3.d.ts.map +1 -0
  163. package/dist/wrap-for-v3.js +89 -0
  164. package/dist/wrap-for-v3.js.map +1 -0
  165. package/examples/00-quickstart.ts +232 -0
  166. package/examples/01-rag-chatbot.ts +212 -0
  167. package/examples/02-multi-agent-research.ts +290 -0
  168. package/examples/03-email-classification.ts +379 -0
  169. package/examples/04-content-moderation.ts +400 -0
  170. package/examples/05-document-extraction.ts +455 -0
  171. package/examples/06-streaming-chat-nextjs.ts +437 -0
  172. package/examples/07-cloudflare-worker.ts +483 -0
  173. package/examples/08-batch-processing.ts +491 -0
  174. package/examples/09-budget-constrained.ts +527 -0
  175. package/examples/10-tool-orchestration.ts +565 -0
  176. package/examples/11-retry-resilience.ts +403 -0
  177. package/examples/12-caching-strategies.ts +422 -0
  178. package/examples/README.md +145 -0
  179. package/package.json +29 -25
  180. package/src/ai-promise.ts +226 -140
  181. package/src/ai-schemas.ts +122 -0
  182. package/src/ai.ts +71 -1176
  183. package/src/batch/anthropic.ts +96 -161
  184. package/src/batch/bedrock.ts +203 -454
  185. package/src/batch/cloudflare.ts +99 -282
  186. package/src/batch/google.ts +91 -297
  187. package/src/batch/index.ts +4 -1
  188. package/src/batch/memory.ts +15 -10
  189. package/src/batch/openai.ts +65 -193
  190. package/src/batch/provider.ts +336 -0
  191. package/src/batch-map.ts +29 -24
  192. package/src/batch-queue.ts +200 -11
  193. package/src/budget.ts +31 -18
  194. package/src/cache.ts +45 -17
  195. package/src/context.ts +106 -77
  196. package/src/digital-objects-registry.ts +750 -0
  197. package/src/errors.ts +37 -0
  198. package/src/eval/runner.ts +60 -36
  199. package/src/eval-log/in-memory.ts +90 -0
  200. package/src/eval-log/index.ts +46 -0
  201. package/src/eval-log/types.ts +110 -0
  202. package/src/function-registry.ts +874 -0
  203. package/src/generate.ts +33 -28
  204. package/src/index.ts +122 -21
  205. package/src/logger.ts +232 -0
  206. package/src/middleware/budget.ts +171 -0
  207. package/src/middleware/cache.ts +299 -0
  208. package/src/middleware/embed-cache.ts +195 -0
  209. package/src/middleware/index.ts +23 -0
  210. package/src/middleware/trace.ts +248 -0
  211. package/src/primitives.ts +589 -62
  212. package/src/retry.ts +144 -18
  213. package/src/sandbox.ts +52 -0
  214. package/src/schema.ts +8 -8
  215. package/src/telemetry.ts +403 -0
  216. package/src/template.ts +8 -4
  217. package/src/tool-orchestration.ts +213 -48
  218. package/src/type-guards.ts +31 -0
  219. package/src/types.ts +186 -27
  220. package/src/wrap-for-v3.ts +105 -0
  221. package/test/ai-promise.test.ts +1080 -0
  222. package/test/ai-proxy.test.ts +1 -1
  223. package/test/batch-autosubmit-errors.test.ts +49 -37
  224. package/test/batch-blog-posts.test.ts +87 -129
  225. package/test/core-functions.test.ts +183 -579
  226. package/test/decide.test.ts +154 -322
  227. package/test/define.test.ts +211 -8
  228. package/test/digital-objects-registry.test.ts +760 -0
  229. package/test/embedding-cache-middleware.test.ts +140 -0
  230. package/test/fill-template.test.ts +89 -0
  231. package/test/generate-core.test.ts +140 -229
  232. package/test/implicit-batch.test.ts +22 -65
  233. package/test/retry-policy-integration.test.ts +117 -0
  234. package/test/sandbox-execution.test.ts +155 -0
  235. package/test/schema.test.ts +55 -19
  236. package/test/template.test.ts +1164 -0
  237. package/test/tool-orchestration.test.ts +270 -0
  238. package/test/wrap-for-v3.test.ts +612 -0
  239. package/vitest.config.js +6 -0
  240. package/vitest.config.ts +20 -0
  241. package/LICENSE +0 -21
  242. package/dist/rpc/auth.d.ts +0 -69
  243. package/dist/rpc/auth.d.ts.map +0 -1
  244. package/dist/rpc/auth.js +0 -136
  245. package/dist/rpc/auth.js.map +0 -1
  246. package/dist/rpc/client.d.ts +0 -62
  247. package/dist/rpc/client.d.ts.map +0 -1
  248. package/dist/rpc/client.js +0 -103
  249. package/dist/rpc/client.js.map +0 -1
  250. package/dist/rpc/deferred.d.ts +0 -60
  251. package/dist/rpc/deferred.d.ts.map +0 -1
  252. package/dist/rpc/deferred.js +0 -96
  253. package/dist/rpc/deferred.js.map +0 -1
  254. package/dist/rpc/index.d.ts +0 -22
  255. package/dist/rpc/index.d.ts.map +0 -1
  256. package/dist/rpc/index.js +0 -38
  257. package/dist/rpc/index.js.map +0 -1
  258. package/dist/rpc/local.d.ts +0 -42
  259. package/dist/rpc/local.d.ts.map +0 -1
  260. package/dist/rpc/local.js +0 -50
  261. package/dist/rpc/local.js.map +0 -1
  262. package/dist/rpc/server.d.ts +0 -165
  263. package/dist/rpc/server.d.ts.map +0 -1
  264. package/dist/rpc/server.js +0 -405
  265. package/dist/rpc/server.js.map +0 -1
  266. package/dist/rpc/session.d.ts +0 -32
  267. package/dist/rpc/session.d.ts.map +0 -1
  268. package/dist/rpc/session.js +0 -43
  269. package/dist/rpc/session.js.map +0 -1
  270. package/dist/rpc/transport.d.ts +0 -306
  271. package/dist/rpc/transport.d.ts.map +0 -1
  272. package/dist/rpc/transport.js +0 -731
  273. package/dist/rpc/transport.js.map +0 -1
  274. package/src/batch/anthropic.js +0 -256
  275. package/src/batch/bedrock.js +0 -584
  276. package/src/batch/cloudflare.js +0 -287
  277. package/src/batch/google.js +0 -359
  278. package/src/batch/index.js +0 -30
  279. package/src/batch/memory.js +0 -187
  280. package/src/batch/openai.js +0 -402
  281. package/src/eval/index.js +0 -7
  282. package/src/eval/models.js +0 -119
  283. package/src/eval/runner.js +0 -147
  284. package/test/schema.test.js +0 -96
@@ -0,0 +1,612 @@
1
+ /**
2
+ * Tests for the v3 middleware stack — cacheMiddleware, budgetMiddleware,
3
+ * traceMiddleware, wrapForV3, and the EvalLogStore primitive.
4
+ *
5
+ * Uses the AI SDK 6 `MockLanguageModelV3` from `'ai/test'` to simulate
6
+ * doGenerate / doStream without hitting a real provider.
7
+ */
8
+
9
+ import { describe, it, expect, beforeEach } from 'vitest'
10
+ import { wrapLanguageModel } from 'ai'
11
+ import { MockLanguageModelV3 } from 'ai/test'
12
+ import type {
13
+ LanguageModelV3CallOptions,
14
+ LanguageModelV3GenerateResult,
15
+ LanguageModelV3StreamResult,
16
+ LanguageModelV3StreamPart,
17
+ } from '@ai-sdk/provider'
18
+ import {
19
+ BudgetTracker,
20
+ cacheMiddleware,
21
+ budgetMiddleware,
22
+ traceMiddleware,
23
+ wrapForV3,
24
+ InMemoryEvalLogStore,
25
+ configureEvalLogStore,
26
+ getEvalLogStore,
27
+ type TraceEvent,
28
+ } from '../src/index.js'
29
+
30
+ // ============================================================================
31
+ // Helpers
32
+ // ============================================================================
33
+
34
+ function makeGenerateResult(
35
+ text: string,
36
+ inputTokens = 100,
37
+ outputTokens = 50
38
+ ): LanguageModelV3GenerateResult {
39
+ return {
40
+ content: [{ type: 'text', text }],
41
+ finishReason: 'stop',
42
+ usage: {
43
+ inputTokens: { total: inputTokens, noCache: inputTokens, cacheRead: 0, cacheWrite: 0 },
44
+ outputTokens: { total: outputTokens, text: outputTokens, reasoning: 0 },
45
+ },
46
+ warnings: [],
47
+ }
48
+ }
49
+
50
+ function makeStreamResult(
51
+ text: string,
52
+ inputTokens = 100,
53
+ outputTokens = 50
54
+ ): LanguageModelV3StreamResult {
55
+ const chunks: LanguageModelV3StreamPart[] = [
56
+ { type: 'stream-start', warnings: [] },
57
+ { type: 'text-start', id: '1' },
58
+ { type: 'text-delta', id: '1', delta: text },
59
+ { type: 'text-end', id: '1' },
60
+ {
61
+ type: 'finish',
62
+ finishReason: 'stop',
63
+ usage: {
64
+ inputTokens: { total: inputTokens, noCache: inputTokens, cacheRead: 0, cacheWrite: 0 },
65
+ outputTokens: { total: outputTokens, text: outputTokens, reasoning: 0 },
66
+ },
67
+ },
68
+ ]
69
+ return {
70
+ stream: new ReadableStream<LanguageModelV3StreamPart>({
71
+ start(controller) {
72
+ for (const chunk of chunks) controller.enqueue(chunk)
73
+ controller.close()
74
+ },
75
+ }),
76
+ }
77
+ }
78
+
79
+ function makeCallOptions(promptText: string): LanguageModelV3CallOptions {
80
+ return {
81
+ prompt: [{ role: 'user', content: [{ type: 'text', text: promptText }] }],
82
+ }
83
+ }
84
+
85
+ async function consumeStream(
86
+ stream: ReadableStream<LanguageModelV3StreamPart>
87
+ ): Promise<LanguageModelV3StreamPart[]> {
88
+ const reader = stream.getReader()
89
+ const out: LanguageModelV3StreamPart[] = []
90
+ while (true) {
91
+ const { done, value } = await reader.read()
92
+ if (done) break
93
+ out.push(value)
94
+ }
95
+ return out
96
+ }
97
+
98
+ // ============================================================================
99
+ // cacheMiddleware
100
+ // ============================================================================
101
+
102
+ describe('cacheMiddleware', () => {
103
+ beforeEach(() => {
104
+ process.env['V3_EVAL_CACHE'] = '1'
105
+ })
106
+
107
+ it('hit: returns cached result; miss: invokes wrapped model', async () => {
108
+ let calls = 0
109
+ const base = new MockLanguageModelV3({
110
+ doGenerate: async () => {
111
+ calls++
112
+ return makeGenerateResult(`response-${calls}`)
113
+ },
114
+ })
115
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
116
+ const params = makeCallOptions('hello')
117
+ const r1 = await wrapped.doGenerate(params)
118
+ const r2 = await wrapped.doGenerate(params)
119
+ expect(calls).toBe(1)
120
+ expect(r1.content).toEqual(r2.content)
121
+ expect((r1.content[0] as { text: string }).text).toBe('response-1')
122
+ })
123
+
124
+ it('key derivation invalidates on prompt change', async () => {
125
+ let calls = 0
126
+ const base = new MockLanguageModelV3({
127
+ doGenerate: async () => {
128
+ calls++
129
+ return makeGenerateResult(`r${calls}`)
130
+ },
131
+ })
132
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
133
+ await wrapped.doGenerate(makeCallOptions('first'))
134
+ await wrapped.doGenerate(makeCallOptions('second'))
135
+ expect(calls).toBe(2)
136
+ })
137
+
138
+ it('key derivation invalidates on schema change', async () => {
139
+ let calls = 0
140
+ const base = new MockLanguageModelV3({
141
+ doGenerate: async () => {
142
+ calls++
143
+ return makeGenerateResult(`r${calls}`)
144
+ },
145
+ })
146
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
147
+ const baseParams = makeCallOptions('hello')
148
+ await wrapped.doGenerate({
149
+ ...baseParams,
150
+ responseFormat: {
151
+ type: 'json',
152
+ schema: { type: 'object', properties: { a: { type: 'string' } } },
153
+ },
154
+ })
155
+ await wrapped.doGenerate({
156
+ ...baseParams,
157
+ responseFormat: {
158
+ type: 'json',
159
+ schema: { type: 'object', properties: { b: { type: 'string' } } },
160
+ },
161
+ })
162
+ expect(calls).toBe(2)
163
+ })
164
+
165
+ it('TTL: expired entries are evicted on access', async () => {
166
+ let calls = 0
167
+ const base = new MockLanguageModelV3({
168
+ doGenerate: async () => {
169
+ calls++
170
+ return makeGenerateResult(`r${calls}`)
171
+ },
172
+ })
173
+ // 1ms TTL — second call after a short await is past expiry.
174
+ const wrapped = wrapLanguageModel({
175
+ model: base,
176
+ middleware: cacheMiddleware({ ttlMs: 1 }),
177
+ })
178
+ const params = makeCallOptions('hello')
179
+ await wrapped.doGenerate(params)
180
+ await new Promise((r) => setTimeout(r, 10))
181
+ await wrapped.doGenerate(params)
182
+ expect(calls).toBe(2)
183
+ })
184
+
185
+ it('respects 24h TTL by default (no eviction in-test)', async () => {
186
+ let calls = 0
187
+ const base = new MockLanguageModelV3({
188
+ doGenerate: async () => {
189
+ calls++
190
+ return makeGenerateResult('cached')
191
+ },
192
+ })
193
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
194
+ const params = makeCallOptions('hello')
195
+ await wrapped.doGenerate(params)
196
+ await wrapped.doGenerate(params)
197
+ await wrapped.doGenerate(params)
198
+ expect(calls).toBe(1)
199
+ })
200
+
201
+ it('passthrough when env gate is disabled', async () => {
202
+ let calls = 0
203
+ const base = new MockLanguageModelV3({
204
+ doGenerate: async () => {
205
+ calls++
206
+ return makeGenerateResult(`r${calls}`)
207
+ },
208
+ })
209
+ const wrapped = wrapLanguageModel({
210
+ model: base,
211
+ middleware: cacheMiddleware({ enabled: false }),
212
+ })
213
+ const params = makeCallOptions('hello')
214
+ await wrapped.doGenerate(params)
215
+ await wrapped.doGenerate(params)
216
+ expect(calls).toBe(2)
217
+ })
218
+
219
+ it('streams: caches and replays chunks via simulateReadableStream', async () => {
220
+ let calls = 0
221
+ const base = new MockLanguageModelV3({
222
+ doStream: async () => {
223
+ calls++
224
+ return makeStreamResult(`stream-${calls}`)
225
+ },
226
+ })
227
+ const wrapped = wrapLanguageModel({ model: base, middleware: cacheMiddleware() })
228
+ const params = makeCallOptions('streaming hello')
229
+ const r1 = await wrapped.doStream(params)
230
+ const chunks1 = await consumeStream(r1.stream)
231
+ const r2 = await wrapped.doStream(params)
232
+ const chunks2 = await consumeStream(r2.stream)
233
+ expect(calls).toBe(1)
234
+ // Same shape, same content
235
+ const text1 = chunks1.find((c) => c.type === 'text-delta') as { delta: string } | undefined
236
+ const text2 = chunks2.find((c) => c.type === 'text-delta') as { delta: string } | undefined
237
+ expect(text1?.delta).toBe('stream-1')
238
+ expect(text2?.delta).toBe('stream-1')
239
+ })
240
+ })
241
+
242
+ // ============================================================================
243
+ // budgetMiddleware
244
+ // ============================================================================
245
+
246
+ describe('budgetMiddleware', () => {
247
+ beforeEach(() => {
248
+ process.env['V3_EVAL_CACHE'] = '1'
249
+ })
250
+
251
+ it('records usage to tracker on completion', async () => {
252
+ const tracker = new BudgetTracker()
253
+ const base = new MockLanguageModelV3({
254
+ modelId: 'gpt-4o',
255
+ doGenerate: async () => makeGenerateResult('hi', 1000, 500),
256
+ })
257
+ const wrapped = wrapLanguageModel({ model: base, middleware: budgetMiddleware({ tracker }) })
258
+ await wrapped.doGenerate(makeCallOptions('hello'))
259
+ expect(tracker.getTotalInputTokens()).toBe(1000)
260
+ expect(tracker.getTotalOutputTokens()).toBe(500)
261
+ // gpt-4o pricing: $2.5/M input, $10/M output → 0.0025 + 0.005 = 0.0075
262
+ expect(tracker.getTotalCost()).toBeCloseTo(0.0075, 6)
263
+ })
264
+
265
+ it('works on cached path AND fresh path', async () => {
266
+ const tracker = new BudgetTracker()
267
+ let underlyingCalls = 0
268
+ const base = new MockLanguageModelV3({
269
+ modelId: 'gpt-4o',
270
+ doGenerate: async () => {
271
+ underlyingCalls++
272
+ return makeGenerateResult('cached', 100, 50)
273
+ },
274
+ })
275
+ // Order matters here: cache → budget. With this order, cache is FIRST
276
+ // in the array → outermost on the way in. On a cache hit, cache short-
277
+ // circuits and budget never sees the call. We flip the order so budget
278
+ // wraps cache: budget always sees the (cached or fresh) result.
279
+ const wrapped = wrapLanguageModel({
280
+ model: base,
281
+ middleware: [budgetMiddleware({ tracker }), cacheMiddleware()],
282
+ })
283
+ const params = makeCallOptions('budget+cache')
284
+ await wrapped.doGenerate(params)
285
+ await wrapped.doGenerate(params)
286
+ expect(underlyingCalls).toBe(1)
287
+ // Budget recorded twice (once on miss, once on hit).
288
+ expect(tracker.getTotalInputTokens()).toBe(200)
289
+ expect(tracker.getTotalOutputTokens()).toBe(100)
290
+ })
291
+
292
+ it('pricing overlay applied via modelIdOverride', async () => {
293
+ const tracker = new BudgetTracker({
294
+ customPricing: {
295
+ sonnet: { inputPricePerMillion: 3, outputPricePerMillion: 15 },
296
+ },
297
+ })
298
+ const base = new MockLanguageModelV3({
299
+ modelId: 'unknown-id',
300
+ doGenerate: async () => makeGenerateResult('hi', 1_000_000, 1_000_000),
301
+ })
302
+ const wrapped = wrapLanguageModel({
303
+ model: base,
304
+ middleware: budgetMiddleware({ tracker, modelIdOverride: 'sonnet' }),
305
+ })
306
+ await wrapped.doGenerate(makeCallOptions('hello'))
307
+ // 1M in @ $3 + 1M out @ $15 = $18
308
+ expect(tracker.getTotalCost()).toBeCloseTo(18, 4)
309
+ })
310
+
311
+ it('streams: records usage from finish part', async () => {
312
+ const tracker = new BudgetTracker()
313
+ const base = new MockLanguageModelV3({
314
+ modelId: 'gpt-4o',
315
+ doStream: async () => makeStreamResult('streamed', 200, 100),
316
+ })
317
+ const wrapped = wrapLanguageModel({ model: base, middleware: budgetMiddleware({ tracker }) })
318
+ const r = await wrapped.doStream(makeCallOptions('hello'))
319
+ await consumeStream(r.stream)
320
+ expect(tracker.getTotalInputTokens()).toBe(200)
321
+ expect(tracker.getTotalOutputTokens()).toBe(100)
322
+ })
323
+ })
324
+
325
+ // ============================================================================
326
+ // traceMiddleware
327
+ // ============================================================================
328
+
329
+ describe('traceMiddleware', () => {
330
+ it('emits expected event shape', async () => {
331
+ const events: TraceEvent[] = []
332
+ const base = new MockLanguageModelV3({
333
+ modelId: 'gpt-4o',
334
+ doGenerate: async () => makeGenerateResult('the response', 10, 5),
335
+ })
336
+ const wrapped = wrapLanguageModel({
337
+ model: base,
338
+ middleware: traceMiddleware({ kind: 'eval-trace', emit: (e) => events.push(e) }),
339
+ })
340
+ await wrapped.doGenerate(makeCallOptions('the prompt'))
341
+ expect(events.length).toBe(1)
342
+ const ev = events[0]!
343
+ expect(ev.kind).toBe('eval-trace')
344
+ expect(ev.model).toBe('gpt-4o')
345
+ expect(ev.prompt).toContain('the prompt')
346
+ expect(ev.response).toBe('the response')
347
+ expect(ev.usage?.inputTokens.total).toBe(10)
348
+ expect(ev.usage?.outputTokens.total).toBe(5)
349
+ expect(typeof ev.durationMs).toBe('number')
350
+ expect(ev.durationMs).toBeGreaterThanOrEqual(0)
351
+ })
352
+
353
+ it("doesn't break the wrapped chain on emit error", async () => {
354
+ const base = new MockLanguageModelV3({
355
+ modelId: 'gpt-4o',
356
+ doGenerate: async () => makeGenerateResult('ok', 1, 1),
357
+ })
358
+ const wrapped = wrapLanguageModel({
359
+ model: base,
360
+ middleware: traceMiddleware({
361
+ emit: () => {
362
+ throw new Error('sink is broken')
363
+ },
364
+ }),
365
+ })
366
+ // Should NOT throw — emit error is swallowed.
367
+ const result = await wrapped.doGenerate(makeCallOptions('hi'))
368
+ expect((result.content[0] as { text: string }).text).toBe('ok')
369
+ })
370
+
371
+ it('supports getCostUsd resolver for costUsd field', async () => {
372
+ const events: TraceEvent[] = []
373
+ const base = new MockLanguageModelV3({
374
+ modelId: 'gpt-4o',
375
+ doGenerate: async () => makeGenerateResult('hi', 1000, 500),
376
+ })
377
+ const wrapped = wrapLanguageModel({
378
+ model: base,
379
+ middleware: traceMiddleware({
380
+ emit: (e) => events.push(e),
381
+ getCostUsd: (_modelId, usage) => {
382
+ const inT = usage?.inputTokens.total ?? 0
383
+ const outT = usage?.outputTokens.total ?? 0
384
+ return (inT / 1_000_000) * 2.5 + (outT / 1_000_000) * 10
385
+ },
386
+ }),
387
+ })
388
+ await wrapped.doGenerate(makeCallOptions('hi'))
389
+ expect(events[0]?.costUsd).toBeCloseTo(0.0075, 6)
390
+ })
391
+
392
+ it('streams: emits on stream end with collected text', async () => {
393
+ const events: TraceEvent[] = []
394
+ const base = new MockLanguageModelV3({
395
+ modelId: 'gpt-4o',
396
+ doStream: async () => makeStreamResult('streamed-text', 50, 25),
397
+ })
398
+ const wrapped = wrapLanguageModel({
399
+ model: base,
400
+ middleware: traceMiddleware({ emit: (e) => events.push(e) }),
401
+ })
402
+ const r = await wrapped.doStream(makeCallOptions('hi'))
403
+ await consumeStream(r.stream)
404
+ // Wait a tick for flush handler
405
+ await new Promise((r) => setTimeout(r, 10))
406
+ expect(events.length).toBe(1)
407
+ expect(events[0]?.response).toBe('streamed-text')
408
+ })
409
+ })
410
+
411
+ // ============================================================================
412
+ // wrapForV3
413
+ // ============================================================================
414
+
415
+ describe('wrapForV3', () => {
416
+ beforeEach(() => {
417
+ process.env['V3_EVAL_CACHE'] = '1'
418
+ })
419
+
420
+ it('composes in correct order (cache → budget → trace)', async () => {
421
+ const tracker = new BudgetTracker()
422
+ const events: TraceEvent[] = []
423
+ let underlyingCalls = 0
424
+ const base = new MockLanguageModelV3({
425
+ modelId: 'gpt-4o',
426
+ doGenerate: async () => {
427
+ underlyingCalls++
428
+ return makeGenerateResult('combined', 100, 50)
429
+ },
430
+ })
431
+ const wrapped = wrapForV3(base, {
432
+ cache: {},
433
+ budget: { tracker },
434
+ trace: { emit: (e) => events.push(e) },
435
+ })
436
+ const params = makeCallOptions('hello combined')
437
+ // First call: miss → underlying invoked, budget records, trace emits
438
+ await (
439
+ wrapped as unknown as {
440
+ doGenerate: (o: LanguageModelV3CallOptions) => Promise<LanguageModelV3GenerateResult>
441
+ }
442
+ ).doGenerate(params)
443
+ // Second call: cache hit → cache short-circuits; budget+trace do NOT
444
+ // run because they're installed AFTER cache. (See JSDoc on wrapForV3
445
+ // composition order — cache-first is the eval-fixture default.)
446
+ await (
447
+ wrapped as unknown as {
448
+ doGenerate: (o: LanguageModelV3CallOptions) => Promise<LanguageModelV3GenerateResult>
449
+ }
450
+ ).doGenerate(params)
451
+ expect(underlyingCalls).toBe(1)
452
+ expect(tracker.getTotalInputTokens()).toBe(100)
453
+ expect(events.length).toBe(1)
454
+ })
455
+
456
+ it('options can be omitted partially', async () => {
457
+ const tracker = new BudgetTracker()
458
+ const base = new MockLanguageModelV3({
459
+ modelId: 'gpt-4o',
460
+ doGenerate: async () => makeGenerateResult('partial', 10, 5),
461
+ })
462
+ // Only budget — no cache, no trace
463
+ const wrapped = wrapForV3(base, { budget: { tracker } })
464
+ await (
465
+ wrapped as unknown as {
466
+ doGenerate: (o: LanguageModelV3CallOptions) => Promise<LanguageModelV3GenerateResult>
467
+ }
468
+ ).doGenerate(makeCallOptions('hi'))
469
+ expect(tracker.getTotalInputTokens()).toBe(10)
470
+ })
471
+
472
+ it('returns the underlying model when all options are absent', async () => {
473
+ const base = new MockLanguageModelV3({
474
+ doGenerate: async () => makeGenerateResult('untouched', 1, 1),
475
+ })
476
+ const wrapped = wrapForV3(base, {})
477
+ expect(wrapped).toBe(base)
478
+ })
479
+ })
480
+
481
+ // ============================================================================
482
+ // EvalLogStore (in-memory)
483
+ // ============================================================================
484
+
485
+ describe('InMemoryEvalLogStore', () => {
486
+ let store: InMemoryEvalLogStore
487
+
488
+ beforeEach(() => {
489
+ store = new InMemoryEvalLogStore()
490
+ })
491
+
492
+ it('record + get round-trips', async () => {
493
+ const stored = await store.record({
494
+ model: 'gpt-4o',
495
+ prompt: 'hello',
496
+ response: 'hi',
497
+ usage: { inputTokens: 10, outputTokens: 5 },
498
+ costUsd: 0.001,
499
+ durationMs: 42,
500
+ })
501
+ expect(stored.$id).toBeTruthy()
502
+ expect(stored.createdAt).toBeGreaterThan(0)
503
+ const fetched = await store.get(stored.$id)
504
+ expect(fetched).toEqual(stored)
505
+ })
506
+
507
+ it('list returns most recent first', async () => {
508
+ await store.record({
509
+ model: 'a',
510
+ prompt: 'p1',
511
+ response: 'r1',
512
+ usage: { inputTokens: 1, outputTokens: 1 },
513
+ costUsd: 0,
514
+ durationMs: 1,
515
+ })
516
+ await store.record({
517
+ model: 'b',
518
+ prompt: 'p2',
519
+ response: 'r2',
520
+ usage: { inputTokens: 1, outputTokens: 1 },
521
+ costUsd: 0,
522
+ durationMs: 1,
523
+ })
524
+ const list = await store.list()
525
+ expect(list.length).toBe(2)
526
+ expect(list[0]?.model).toBe('b')
527
+ expect(list[1]?.model).toBe('a')
528
+ })
529
+
530
+ it('list filters by model and traceId', async () => {
531
+ await store.record({
532
+ model: 'gpt-4o',
533
+ traceId: 't1',
534
+ prompt: 'p',
535
+ response: 'r',
536
+ usage: { inputTokens: 1, outputTokens: 1 },
537
+ costUsd: 0,
538
+ durationMs: 1,
539
+ })
540
+ await store.record({
541
+ model: 'sonnet',
542
+ traceId: 't1',
543
+ prompt: 'p',
544
+ response: 'r',
545
+ usage: { inputTokens: 1, outputTokens: 1 },
546
+ costUsd: 0,
547
+ durationMs: 1,
548
+ })
549
+ await store.record({
550
+ model: 'gpt-4o',
551
+ traceId: 't2',
552
+ prompt: 'p',
553
+ response: 'r',
554
+ usage: { inputTokens: 1, outputTokens: 1 },
555
+ costUsd: 0,
556
+ durationMs: 1,
557
+ })
558
+ expect((await store.list({ model: 'gpt-4o' })).length).toBe(2)
559
+ expect((await store.list({ traceId: 't1' })).length).toBe(2)
560
+ expect((await store.list({ model: 'gpt-4o', traceId: 't1' })).length).toBe(1)
561
+ })
562
+
563
+ it('list filters by tags (superset match)', async () => {
564
+ await store.record({
565
+ model: 'a',
566
+ tags: { persona: 'cfo', step: '3' },
567
+ prompt: 'p',
568
+ response: 'r',
569
+ usage: { inputTokens: 1, outputTokens: 1 },
570
+ costUsd: 0,
571
+ durationMs: 1,
572
+ })
573
+ await store.record({
574
+ model: 'b',
575
+ tags: { persona: 'cto' },
576
+ prompt: 'p',
577
+ response: 'r',
578
+ usage: { inputTokens: 1, outputTokens: 1 },
579
+ costUsd: 0,
580
+ durationMs: 1,
581
+ })
582
+ expect((await store.list({ tags: { persona: 'cfo' } })).length).toBe(1)
583
+ expect((await store.list({ tags: { persona: 'cto' } })).length).toBe(1)
584
+ expect((await store.list({ tags: { persona: 'unknown' } })).length).toBe(0)
585
+ })
586
+
587
+ it('delete removes the entry', async () => {
588
+ const e = await store.record({
589
+ model: 'a',
590
+ prompt: 'p',
591
+ response: 'r',
592
+ usage: { inputTokens: 1, outputTokens: 1 },
593
+ costUsd: 0,
594
+ durationMs: 1,
595
+ })
596
+ expect(await store.delete(e.$id)).toBe(true)
597
+ expect(await store.get(e.$id)).toBeUndefined()
598
+ expect(await store.delete(e.$id)).toBe(false)
599
+ })
600
+
601
+ it('global accessor + override', async () => {
602
+ const custom = new InMemoryEvalLogStore()
603
+ configureEvalLogStore(custom)
604
+ expect(getEvalLogStore()).toBe(custom)
605
+ configureEvalLogStore(null)
606
+ const lazy = getEvalLogStore()
607
+ expect(lazy).toBeInstanceOf(InMemoryEvalLogStore)
608
+ expect(lazy).not.toBe(custom)
609
+ // Reset so subsequent test-runs see a clean default
610
+ configureEvalLogStore(null)
611
+ })
612
+ })
package/vitest.config.js CHANGED
@@ -17,6 +17,12 @@ for (const envPath of envPaths) {
17
17
  }
18
18
  export default defineConfig({
19
19
  test: {
20
+ // CRITICAL: Limit concurrency to prevent resource exhaustion
21
+ maxConcurrency: 1,
22
+ maxWorkers: 1,
23
+ minWorkers: 1,
24
+ fileParallelism: false,
25
+
20
26
  globals: false,
21
27
  environment: 'node',
22
28
  include: ['test/**/*.test.ts'],
package/vitest.config.ts CHANGED
@@ -20,6 +20,12 @@ for (const envPath of envPaths) {
20
20
 
21
21
  export default defineConfig({
22
22
  test: {
23
+ // CRITICAL: Limit concurrency to prevent resource exhaustion
24
+ maxConcurrency: 1,
25
+ maxWorkers: 1,
26
+ minWorkers: 1,
27
+ fileParallelism: false,
28
+
23
29
  globals: false,
24
30
  environment: 'node',
25
31
  include: ['test/**/*.test.ts'],
@@ -38,5 +44,19 @@ export default defineConfig({
38
44
  singleFork: true,
39
45
  },
40
46
  },
47
+
48
+ // Coverage configuration
49
+ coverage: {
50
+ provider: 'v8',
51
+ reporter: ['text', 'json', 'html'],
52
+ include: ['src/**/*.ts'],
53
+ exclude: ['**/*.test.ts', '**/__tests__/**', '**/node_modules/**'],
54
+ thresholds: {
55
+ statements: 65,
56
+ branches: 60,
57
+ functions: 60,
58
+ lines: 65,
59
+ },
60
+ },
41
61
  },
42
62
  })
package/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 .org.ai
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.