ai-functions 2.0.2 → 2.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/.turbo/turbo-build.log +4 -5
  2. package/CHANGELOG.md +38 -0
  3. package/LICENSE +21 -0
  4. package/README.md +361 -159
  5. package/dist/ai-promise.d.ts +47 -0
  6. package/dist/ai-promise.d.ts.map +1 -1
  7. package/dist/ai-promise.js +291 -3
  8. package/dist/ai-promise.js.map +1 -1
  9. package/dist/ai.d.ts +17 -18
  10. package/dist/ai.d.ts.map +1 -1
  11. package/dist/ai.js +93 -39
  12. package/dist/ai.js.map +1 -1
  13. package/dist/batch-map.d.ts +46 -4
  14. package/dist/batch-map.d.ts.map +1 -1
  15. package/dist/batch-map.js +35 -2
  16. package/dist/batch-map.js.map +1 -1
  17. package/dist/batch-queue.d.ts +116 -12
  18. package/dist/batch-queue.d.ts.map +1 -1
  19. package/dist/batch-queue.js +47 -2
  20. package/dist/batch-queue.js.map +1 -1
  21. package/dist/budget.d.ts +272 -0
  22. package/dist/budget.d.ts.map +1 -0
  23. package/dist/budget.js +500 -0
  24. package/dist/budget.js.map +1 -0
  25. package/dist/cache.d.ts +272 -0
  26. package/dist/cache.d.ts.map +1 -0
  27. package/dist/cache.js +412 -0
  28. package/dist/cache.js.map +1 -0
  29. package/dist/context.d.ts +32 -1
  30. package/dist/context.d.ts.map +1 -1
  31. package/dist/context.js +16 -1
  32. package/dist/context.js.map +1 -1
  33. package/dist/eval/runner.d.ts +2 -1
  34. package/dist/eval/runner.d.ts.map +1 -1
  35. package/dist/eval/runner.js.map +1 -1
  36. package/dist/generate.d.ts.map +1 -1
  37. package/dist/generate.js +6 -10
  38. package/dist/generate.js.map +1 -1
  39. package/dist/index.d.ts +27 -20
  40. package/dist/index.d.ts.map +1 -1
  41. package/dist/index.js +72 -42
  42. package/dist/index.js.map +1 -1
  43. package/dist/primitives.d.ts +17 -0
  44. package/dist/primitives.d.ts.map +1 -1
  45. package/dist/primitives.js +19 -1
  46. package/dist/primitives.js.map +1 -1
  47. package/dist/retry.d.ts +303 -0
  48. package/dist/retry.d.ts.map +1 -0
  49. package/dist/retry.js +539 -0
  50. package/dist/retry.js.map +1 -0
  51. package/dist/schema.d.ts.map +1 -1
  52. package/dist/schema.js +1 -9
  53. package/dist/schema.js.map +1 -1
  54. package/dist/tool-orchestration.d.ts +391 -0
  55. package/dist/tool-orchestration.d.ts.map +1 -0
  56. package/dist/tool-orchestration.js +663 -0
  57. package/dist/tool-orchestration.js.map +1 -0
  58. package/dist/types.d.ts +50 -33
  59. package/dist/types.d.ts.map +1 -1
  60. package/evalite.config.js +14 -0
  61. package/evals/classification.eval.js +97 -0
  62. package/evals/marketing.eval.js +289 -0
  63. package/evals/math.eval.js +83 -0
  64. package/evals/run-evals.js +151 -0
  65. package/evals/structured-output.eval.js +131 -0
  66. package/evals/writing.eval.js +105 -0
  67. package/examples/batch-blog-posts.js +128 -0
  68. package/package.json +26 -26
  69. package/src/ai-promise.ts +359 -3
  70. package/src/ai.ts +155 -110
  71. package/src/batch/anthropic.js +256 -0
  72. package/src/batch/bedrock.js +584 -0
  73. package/src/batch/cloudflare.js +287 -0
  74. package/src/batch/google.js +359 -0
  75. package/src/batch/index.js +30 -0
  76. package/src/batch/memory.js +187 -0
  77. package/src/batch/openai.js +402 -0
  78. package/src/batch-map.ts +46 -4
  79. package/src/batch-queue.ts +116 -12
  80. package/src/budget.ts +727 -0
  81. package/src/cache.ts +653 -0
  82. package/src/context.ts +33 -1
  83. package/src/eval/index.js +7 -0
  84. package/src/eval/models.js +119 -0
  85. package/src/eval/runner.js +147 -0
  86. package/src/eval/runner.ts +3 -2
  87. package/src/generate.ts +7 -12
  88. package/src/index.ts +231 -53
  89. package/src/primitives.ts +19 -1
  90. package/src/retry.ts +776 -0
  91. package/src/schema.ts +1 -10
  92. package/src/tool-orchestration.ts +1008 -0
  93. package/src/types.ts +59 -41
  94. package/test/ai-proxy.test.js +157 -0
  95. package/test/async-iterators.test.js +261 -0
  96. package/test/backward-compat.test.ts +147 -0
  97. package/test/batch-autosubmit-errors.test.ts +598 -0
  98. package/test/batch-background.test.js +352 -0
  99. package/test/batch-blog-posts.test.js +293 -0
  100. package/test/blog-generation.test.js +390 -0
  101. package/test/browse-read.test.js +480 -0
  102. package/test/budget-tracking.test.ts +800 -0
  103. package/test/cache.test.ts +712 -0
  104. package/test/context-isolation.test.ts +687 -0
  105. package/test/core-functions.test.js +490 -0
  106. package/test/decide.test.js +260 -0
  107. package/test/define.test.js +232 -0
  108. package/test/e2e-bedrock-manual.js +136 -0
  109. package/test/e2e-bedrock.test.js +164 -0
  110. package/test/e2e-flex-gateway.js +131 -0
  111. package/test/e2e-flex-manual.js +156 -0
  112. package/test/e2e-flex.test.js +174 -0
  113. package/test/e2e-google-manual.js +150 -0
  114. package/test/e2e-google.test.js +181 -0
  115. package/test/embeddings.test.js +220 -0
  116. package/test/evals/define-function.eval.test.js +309 -0
  117. package/test/evals/deterministic.eval.test.ts +376 -0
  118. package/test/evals/primitives.eval.test.js +360 -0
  119. package/test/function-types.test.js +407 -0
  120. package/test/generate-core.test.js +213 -0
  121. package/test/generate.test.js +143 -0
  122. package/test/generic-order.test.ts +342 -0
  123. package/test/implicit-batch.test.js +326 -0
  124. package/test/json-parse-error-handling.test.ts +463 -0
  125. package/test/retry.test.ts +1016 -0
  126. package/test/schema.test.js +96 -0
  127. package/test/streaming.test.ts +316 -0
  128. package/test/tagged-templates.test.js +240 -0
  129. package/test/tool-orchestration.test.ts +770 -0
  130. package/vitest.config.js +39 -0
@@ -0,0 +1,800 @@
1
+ /**
2
+ * Tests for Budget Tracking and Request Tracing
3
+ *
4
+ * TDD: RED phase - Write failing tests first
5
+ *
6
+ * Features tested:
7
+ * 1. Token counting per request
8
+ * 2. Budget limits (reject when exceeded)
9
+ * 3. Request ID generation and propagation
10
+ * 4. User/tenant context isolation
11
+ * 5. Cost tracking by model
12
+ */
13
+
14
+ import { describe, it, expect, beforeEach, vi } from 'vitest'
15
+ import {
16
+ BudgetTracker,
17
+ TokenCounter,
18
+ RequestContext,
19
+ withBudget,
20
+ createRequestContext,
21
+ BudgetExceededError,
22
+ type BudgetConfig,
23
+ type TokenUsage,
24
+ type RequestInfo,
25
+ type BudgetAlert,
26
+ } from '../src/budget.js'
27
+ import { configure, resetContext, withContext } from '../src/context.js'
28
+
29
+ // ============================================================================
30
+ // Token Counting Tests
31
+ // ============================================================================
32
+
33
+ describe('TokenCounter', () => {
34
+ describe('estimateTokens', () => {
35
+ it('estimates tokens for a simple string', () => {
36
+ const counter = new TokenCounter()
37
+ const tokens = counter.estimateTokens('Hello, world!')
38
+
39
+ // Rough estimate: ~4 chars per token for English
40
+ expect(tokens).toBeGreaterThan(0)
41
+ expect(tokens).toBeLessThan(10)
42
+ })
43
+
44
+ it('estimates tokens for longer text', () => {
45
+ const counter = new TokenCounter()
46
+ const text = 'The quick brown fox jumps over the lazy dog. '.repeat(10)
47
+ const tokens = counter.estimateTokens(text)
48
+
49
+ // Should be roughly proportional to length
50
+ expect(tokens).toBeGreaterThan(50)
51
+ expect(tokens).toBeLessThan(200)
52
+ })
53
+
54
+ it('handles empty string', () => {
55
+ const counter = new TokenCounter()
56
+ expect(counter.estimateTokens('')).toBe(0)
57
+ })
58
+
59
+ it('handles unicode and special characters', () => {
60
+ const counter = new TokenCounter()
61
+ const tokens = counter.estimateTokens('Hello! こんにちは 你好 🌍')
62
+
63
+ expect(tokens).toBeGreaterThan(0)
64
+ })
65
+ })
66
+
67
+ describe('model-specific estimation', () => {
68
+ it('adjusts estimation for different models', () => {
69
+ const counter = new TokenCounter()
70
+ const text = 'Hello, world!'
71
+
72
+ const gpt4Tokens = counter.estimateTokens(text, 'gpt-4o')
73
+ const claudeTokens = counter.estimateTokens(text, 'claude-sonnet-4-20250514')
74
+
75
+ // Both should give reasonable estimates
76
+ expect(gpt4Tokens).toBeGreaterThan(0)
77
+ expect(claudeTokens).toBeGreaterThan(0)
78
+ })
79
+ })
80
+
81
+ describe('countMessageTokens', () => {
82
+ it('counts tokens in a message array', () => {
83
+ const counter = new TokenCounter()
84
+ const messages = [
85
+ { role: 'system', content: 'You are a helpful assistant.' },
86
+ { role: 'user', content: 'Hello!' },
87
+ ]
88
+
89
+ const tokens = counter.countMessageTokens(messages)
90
+ expect(tokens).toBeGreaterThan(5)
91
+ })
92
+
93
+ it('includes overhead for message formatting', () => {
94
+ const counter = new TokenCounter()
95
+ const textOnly = counter.estimateTokens('You are a helpful assistant. Hello!')
96
+ const messages = [
97
+ { role: 'system', content: 'You are a helpful assistant.' },
98
+ { role: 'user', content: 'Hello!' },
99
+ ]
100
+ const messageTokens = counter.countMessageTokens(messages)
101
+
102
+ // Message tokens should include some overhead
103
+ expect(messageTokens).toBeGreaterThanOrEqual(textOnly)
104
+ })
105
+ })
106
+ })
107
+
108
+ // ============================================================================
109
+ // Budget Tracker Tests
110
+ // ============================================================================
111
+
112
+ describe('BudgetTracker', () => {
113
+ let tracker: BudgetTracker
114
+
115
+ beforeEach(() => {
116
+ tracker = new BudgetTracker()
117
+ })
118
+
119
+ describe('token tracking', () => {
120
+ it('tracks cumulative input tokens', () => {
121
+ tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
122
+ tracker.recordUsage({ inputTokens: 150, outputTokens: 75 })
123
+
124
+ expect(tracker.getTotalInputTokens()).toBe(250)
125
+ })
126
+
127
+ it('tracks cumulative output tokens', () => {
128
+ tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
129
+ tracker.recordUsage({ inputTokens: 150, outputTokens: 75 })
130
+
131
+ expect(tracker.getTotalOutputTokens()).toBe(125)
132
+ })
133
+
134
+ it('tracks total tokens (input + output)', () => {
135
+ tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
136
+ tracker.recordUsage({ inputTokens: 150, outputTokens: 75 })
137
+
138
+ expect(tracker.getTotalTokens()).toBe(375)
139
+ })
140
+
141
+ it('starts with zero tokens', () => {
142
+ expect(tracker.getTotalTokens()).toBe(0)
143
+ expect(tracker.getTotalInputTokens()).toBe(0)
144
+ expect(tracker.getTotalOutputTokens()).toBe(0)
145
+ })
146
+ })
147
+
148
+ describe('cost tracking', () => {
149
+ it('calculates cost based on model pricing', () => {
150
+ tracker.recordUsage({
151
+ inputTokens: 1000,
152
+ outputTokens: 500,
153
+ model: 'gpt-4o',
154
+ })
155
+
156
+ const cost = tracker.getTotalCost()
157
+ expect(cost).toBeGreaterThan(0)
158
+ })
159
+
160
+ it('tracks cost by model', () => {
161
+ tracker.recordUsage({ inputTokens: 1000, outputTokens: 500, model: 'gpt-4o' })
162
+ tracker.recordUsage({ inputTokens: 1000, outputTokens: 500, model: 'claude-sonnet-4-20250514' })
163
+
164
+ const costByModel = tracker.getCostByModel()
165
+ expect(costByModel['gpt-4o']).toBeGreaterThan(0)
166
+ expect(costByModel['claude-sonnet-4-20250514']).toBeGreaterThan(0)
167
+ })
168
+
169
+ it('uses default model pricing when not specified', () => {
170
+ tracker.recordUsage({ inputTokens: 1000, outputTokens: 500 })
171
+
172
+ // Should not throw, should use default pricing
173
+ const cost = tracker.getTotalCost()
174
+ expect(cost).toBeGreaterThan(0)
175
+ })
176
+ })
177
+
178
+ describe('budget limits', () => {
179
+ it('enforces token limits', () => {
180
+ const limitedTracker = new BudgetTracker({
181
+ maxTokens: 500,
182
+ })
183
+
184
+ limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
185
+
186
+ // Should throw when attempting to record usage that exceeds limit
187
+ expect(() => {
188
+ limitedTracker.checkBudget({ estimatedTokens: 300 })
189
+ }).toThrow(BudgetExceededError)
190
+ })
191
+
192
+ it('enforces cost limits', () => {
193
+ const limitedTracker = new BudgetTracker({
194
+ maxCost: 0.10, // $0.10
195
+ })
196
+
197
+ // Record some usage that approaches the limit
198
+ // GPT-4o: $2.5/1M input, $10/1M output
199
+ // 10k input = $0.025, 5k output = $0.05, total = $0.075
200
+ limitedTracker.recordUsage({
201
+ inputTokens: 10000,
202
+ outputTokens: 5000,
203
+ model: 'gpt-4o',
204
+ })
205
+
206
+ // Check should fail if estimated cost would exceed
207
+ // 100k tokens at ~$6/1M average = $0.60, which exceeds remaining ~$0.025
208
+ expect(() => {
209
+ limitedTracker.checkBudget({
210
+ estimatedTokens: 100000,
211
+ model: 'gpt-4o',
212
+ })
213
+ }).toThrow(BudgetExceededError)
214
+ })
215
+
216
+ it('allows usage within limits', () => {
217
+ const limitedTracker = new BudgetTracker({
218
+ maxTokens: 1000,
219
+ })
220
+
221
+ limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
222
+
223
+ // Should not throw
224
+ expect(() => {
225
+ limitedTracker.checkBudget({ estimatedTokens: 100 })
226
+ }).not.toThrow()
227
+ })
228
+
229
+ it('provides remaining budget info', () => {
230
+ const limitedTracker = new BudgetTracker({
231
+ maxTokens: 1000,
232
+ maxCost: 1.0,
233
+ })
234
+
235
+ limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
236
+
237
+ const remaining = limitedTracker.getRemainingBudget()
238
+ expect(remaining.tokens).toBe(700)
239
+ expect(remaining.cost).toBeLessThan(1.0)
240
+ })
241
+ })
242
+
243
+ describe('budget alerts', () => {
244
+ it('triggers alert at 50% threshold', () => {
245
+ const alertCallback = vi.fn()
246
+ const limitedTracker = new BudgetTracker({
247
+ maxTokens: 1000,
248
+ alertThresholds: [0.5],
249
+ onAlert: alertCallback,
250
+ })
251
+
252
+ limitedTracker.recordUsage({ inputTokens: 400, outputTokens: 100 })
253
+
254
+ expect(alertCallback).toHaveBeenCalledWith(
255
+ expect.objectContaining({
256
+ threshold: 0.5,
257
+ currentUsage: expect.any(Number),
258
+ limit: 1000,
259
+ })
260
+ )
261
+ })
262
+
263
+ it('triggers multiple alerts at different thresholds', () => {
264
+ const alertCallback = vi.fn()
265
+ const limitedTracker = new BudgetTracker({
266
+ maxTokens: 1000,
267
+ alertThresholds: [0.5, 0.8, 1.0],
268
+ onAlert: alertCallback,
269
+ })
270
+
271
+ // 50% threshold
272
+ limitedTracker.recordUsage({ inputTokens: 400, outputTokens: 100 })
273
+ expect(alertCallback).toHaveBeenCalledTimes(1)
274
+
275
+ // 80% threshold
276
+ limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
277
+ expect(alertCallback).toHaveBeenCalledTimes(2)
278
+
279
+ // 100% threshold
280
+ limitedTracker.recordUsage({ inputTokens: 150, outputTokens: 50 })
281
+ expect(alertCallback).toHaveBeenCalledTimes(3)
282
+ })
283
+
284
+ it('does not re-trigger same threshold', () => {
285
+ const alertCallback = vi.fn()
286
+ const limitedTracker = new BudgetTracker({
287
+ maxTokens: 1000,
288
+ alertThresholds: [0.5],
289
+ onAlert: alertCallback,
290
+ })
291
+
292
+ limitedTracker.recordUsage({ inputTokens: 400, outputTokens: 100 })
293
+ limitedTracker.recordUsage({ inputTokens: 50, outputTokens: 25 })
294
+
295
+ // Should only be called once even though still above 50%
296
+ expect(alertCallback).toHaveBeenCalledTimes(1)
297
+ })
298
+ })
299
+
300
+ describe('reset and persistence', () => {
301
+ it('resets token counts', () => {
302
+ tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
303
+ tracker.reset()
304
+
305
+ expect(tracker.getTotalTokens()).toBe(0)
306
+ })
307
+
308
+ it('exports usage data for persistence', () => {
309
+ tracker.recordUsage({ inputTokens: 100, outputTokens: 50, model: 'gpt-4o' })
310
+ tracker.recordUsage({ inputTokens: 200, outputTokens: 100, model: 'claude-sonnet-4-20250514' })
311
+
312
+ const snapshot = tracker.export()
313
+
314
+ expect(snapshot.totalInputTokens).toBe(300)
315
+ expect(snapshot.totalOutputTokens).toBe(150)
316
+ expect(snapshot.usageByModel).toHaveProperty('gpt-4o')
317
+ expect(snapshot.usageByModel).toHaveProperty('claude-sonnet-4-20250514')
318
+ })
319
+
320
+ it('imports previously exported data', () => {
321
+ const snapshot = {
322
+ totalInputTokens: 500,
323
+ totalOutputTokens: 250,
324
+ totalCost: 0.05,
325
+ usageByModel: {
326
+ 'gpt-4o': { inputTokens: 500, outputTokens: 250, cost: 0.05 },
327
+ },
328
+ triggeredThresholds: [0.5],
329
+ }
330
+
331
+ tracker.import(snapshot)
332
+
333
+ expect(tracker.getTotalInputTokens()).toBe(500)
334
+ expect(tracker.getTotalOutputTokens()).toBe(250)
335
+ })
336
+ })
337
+ })
338
+
339
+ // ============================================================================
340
+ // Request Context Tests
341
+ // ============================================================================
342
+
343
+ describe('RequestContext', () => {
344
+ describe('request ID generation', () => {
345
+ it('generates unique request IDs', () => {
346
+ const ctx1 = createRequestContext()
347
+ const ctx2 = createRequestContext()
348
+
349
+ expect(ctx1.requestId).toBeDefined()
350
+ expect(ctx2.requestId).toBeDefined()
351
+ expect(ctx1.requestId).not.toBe(ctx2.requestId)
352
+ })
353
+
354
+ it('generates IDs with expected format', () => {
355
+ const ctx = createRequestContext()
356
+
357
+ // Should be a valid UUID or similar format
358
+ expect(ctx.requestId).toMatch(/^[a-z0-9-]+$/i)
359
+ expect(ctx.requestId.length).toBeGreaterThan(8)
360
+ })
361
+
362
+ it('accepts custom request ID', () => {
363
+ const ctx = createRequestContext({ requestId: 'custom-123' })
364
+
365
+ expect(ctx.requestId).toBe('custom-123')
366
+ })
367
+ })
368
+
369
+ describe('user context', () => {
370
+ it('stores user ID', () => {
371
+ const ctx = createRequestContext({ userId: 'user-456' })
372
+
373
+ expect(ctx.userId).toBe('user-456')
374
+ })
375
+
376
+ it('stores tenant ID', () => {
377
+ const ctx = createRequestContext({ tenantId: 'tenant-789' })
378
+
379
+ expect(ctx.tenantId).toBe('tenant-789')
380
+ })
381
+
382
+ it('stores both user and tenant', () => {
383
+ const ctx = createRequestContext({
384
+ userId: 'user-456',
385
+ tenantId: 'tenant-789',
386
+ })
387
+
388
+ expect(ctx.userId).toBe('user-456')
389
+ expect(ctx.tenantId).toBe('tenant-789')
390
+ })
391
+ })
392
+
393
+ describe('parent-child relationships', () => {
394
+ it('tracks parent request ID', () => {
395
+ const parentCtx = createRequestContext()
396
+ const childCtx = createRequestContext({ parentRequestId: parentCtx.requestId })
397
+
398
+ expect(childCtx.parentRequestId).toBe(parentCtx.requestId)
399
+ })
400
+
401
+ it('creates child context from parent', () => {
402
+ const parentCtx = createRequestContext({ userId: 'user-123' })
403
+ const childCtx = parentCtx.createChild()
404
+
405
+ expect(childCtx.parentRequestId).toBe(parentCtx.requestId)
406
+ expect(childCtx.userId).toBe('user-123') // Inherits user
407
+ expect(childCtx.requestId).not.toBe(parentCtx.requestId) // New ID
408
+ })
409
+
410
+ it('allows depth tracking', () => {
411
+ const root = createRequestContext()
412
+ const child = root.createChild()
413
+ const grandchild = child.createChild()
414
+
415
+ expect(root.depth).toBe(0)
416
+ expect(child.depth).toBe(1)
417
+ expect(grandchild.depth).toBe(2)
418
+ })
419
+ })
420
+
421
+ describe('trace context', () => {
422
+ it('serializes to trace headers', () => {
423
+ const ctx = createRequestContext({
424
+ userId: 'user-123',
425
+ tenantId: 'tenant-456',
426
+ })
427
+
428
+ const headers = ctx.toTraceHeaders()
429
+
430
+ expect(headers['x-request-id']).toBe(ctx.requestId)
431
+ expect(headers['x-user-id']).toBe('user-123')
432
+ expect(headers['x-tenant-id']).toBe('tenant-456')
433
+ })
434
+
435
+ it('deserializes from trace headers', () => {
436
+ const headers = {
437
+ 'x-request-id': 'req-789',
438
+ 'x-user-id': 'user-123',
439
+ 'x-tenant-id': 'tenant-456',
440
+ 'x-parent-request-id': 'parent-123',
441
+ }
442
+
443
+ const ctx = RequestContext.fromHeaders(headers)
444
+
445
+ expect(ctx.requestId).toBe('req-789')
446
+ expect(ctx.userId).toBe('user-123')
447
+ expect(ctx.tenantId).toBe('tenant-456')
448
+ expect(ctx.parentRequestId).toBe('parent-123')
449
+ })
450
+
451
+ it('generates W3C traceparent header', () => {
452
+ const ctx = createRequestContext()
453
+ const traceparent = ctx.toTraceparent()
454
+
455
+ // Format: version-trace_id-parent_id-flags
456
+ expect(traceparent).toMatch(/^00-[a-f0-9]{32}-[a-f0-9]{16}-[a-f0-9]{2}$/)
457
+ })
458
+ })
459
+
460
+ describe('metadata', () => {
461
+ it('stores custom metadata', () => {
462
+ const ctx = createRequestContext({
463
+ metadata: {
464
+ feature: 'chat',
465
+ environment: 'production',
466
+ },
467
+ })
468
+
469
+ expect(ctx.metadata?.feature).toBe('chat')
470
+ expect(ctx.metadata?.environment).toBe('production')
471
+ })
472
+
473
+ it('merges metadata in child contexts', () => {
474
+ const parentCtx = createRequestContext({
475
+ metadata: { feature: 'chat' },
476
+ })
477
+ const childCtx = parentCtx.createChild({
478
+ metadata: { action: 'summarize' },
479
+ })
480
+
481
+ expect(childCtx.metadata?.feature).toBe('chat')
482
+ expect(childCtx.metadata?.action).toBe('summarize')
483
+ })
484
+ })
485
+ })
486
+
487
+ // ============================================================================
488
+ // withBudget Tests
489
+ // ============================================================================
490
+
491
+ describe('withBudget', () => {
492
+ beforeEach(() => {
493
+ resetContext()
494
+ })
495
+
496
+ it('executes function within budget context', async () => {
497
+ const result = await withBudget({ maxTokens: 1000 }, async (tracker) => {
498
+ tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
499
+ return 'success'
500
+ })
501
+
502
+ expect(result).toBe('success')
503
+ })
504
+
505
+ it('provides budget tracker to callback', async () => {
506
+ await withBudget({ maxTokens: 1000 }, async (tracker) => {
507
+ expect(tracker).toBeInstanceOf(BudgetTracker)
508
+ expect(typeof tracker.recordUsage).toBe('function')
509
+ expect(typeof tracker.getTotalTokens).toBe('function')
510
+ })
511
+ })
512
+
513
+ it('throws when budget exceeded', async () => {
514
+ await expect(
515
+ withBudget({ maxTokens: 100 }, async (tracker) => {
516
+ tracker.recordUsage({ inputTokens: 150, outputTokens: 50 })
517
+ return 'should not reach'
518
+ })
519
+ ).rejects.toThrow(BudgetExceededError)
520
+ })
521
+
522
+ it('supports cost-based limits', async () => {
523
+ await expect(
524
+ withBudget({ maxCost: 0.001 }, async (tracker) => {
525
+ // Record usage that exceeds cost limit
526
+ tracker.recordUsage({
527
+ inputTokens: 100000,
528
+ outputTokens: 50000,
529
+ model: 'gpt-4o',
530
+ })
531
+ return 'should not reach'
532
+ })
533
+ ).rejects.toThrow(BudgetExceededError)
534
+ })
535
+
536
+ it('nests budget contexts correctly', async () => {
537
+ await withBudget({ maxTokens: 1000 }, async (outerTracker) => {
538
+ outerTracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
539
+
540
+ await withBudget({ maxTokens: 500 }, async (innerTracker) => {
541
+ innerTracker.recordUsage({ inputTokens: 50, outputTokens: 25 })
542
+
543
+ // Inner tracker has its own limit
544
+ expect(innerTracker.getTotalTokens()).toBe(75)
545
+ })
546
+
547
+ // Outer tracker should also have the inner usage
548
+ expect(outerTracker.getTotalTokens()).toBe(225)
549
+ })
550
+ })
551
+
552
+ it('includes request context when provided', async () => {
553
+ await withBudget(
554
+ {
555
+ maxTokens: 1000,
556
+ userId: 'user-123',
557
+ tenantId: 'tenant-456',
558
+ },
559
+ async (tracker, ctx) => {
560
+ expect(ctx?.userId).toBe('user-123')
561
+ expect(ctx?.tenantId).toBe('tenant-456')
562
+ }
563
+ )
564
+ })
565
+ })
566
+
567
+ // ============================================================================
568
+ // User/Tenant Context Isolation Tests
569
+ // ============================================================================
570
+
571
+ describe('User/Tenant Budget Isolation', () => {
572
+ it('tracks budget per user', async () => {
573
+ const userBudgets = new Map<string, BudgetTracker>()
574
+
575
+ const getOrCreateTracker = (userId: string) => {
576
+ if (!userBudgets.has(userId)) {
577
+ userBudgets.set(userId, new BudgetTracker({ maxTokens: 1000 }))
578
+ }
579
+ return userBudgets.get(userId)!
580
+ }
581
+
582
+ // User 1 uses some budget
583
+ const user1Tracker = getOrCreateTracker('user-1')
584
+ user1Tracker.recordUsage({ inputTokens: 300, outputTokens: 150 })
585
+
586
+ // User 2 uses some budget
587
+ const user2Tracker = getOrCreateTracker('user-2')
588
+ user2Tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
589
+
590
+ // Each user has their own budget
591
+ expect(user1Tracker.getTotalTokens()).toBe(450)
592
+ expect(user2Tracker.getTotalTokens()).toBe(150)
593
+ })
594
+
595
+ it('enforces per-tenant limits', async () => {
596
+ const tenantTracker = new BudgetTracker({ maxTokens: 500 })
597
+
598
+ // First request uses some budget
599
+ tenantTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
600
+
601
+ // Second request from same tenant
602
+ tenantTracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
603
+
604
+ // Third request should fail budget check
605
+ expect(() => {
606
+ tenantTracker.checkBudget({ estimatedTokens: 200 })
607
+ }).toThrow(BudgetExceededError)
608
+ })
609
+ })
610
+
611
+ // ============================================================================
612
+ // Cost Tracking by Model Tests
613
+ // ============================================================================
614
+
615
+ describe('Cost Tracking by Model', () => {
616
+ it('uses correct pricing for GPT-4o', () => {
617
+ const tracker = new BudgetTracker()
618
+
619
+ tracker.recordUsage({
620
+ inputTokens: 1000000, // 1M tokens
621
+ outputTokens: 500000,
622
+ model: 'gpt-4o',
623
+ })
624
+
625
+ const cost = tracker.getTotalCost()
626
+ // GPT-4o pricing: $2.50/1M input, $10/1M output (as of 2024)
627
+ // Expected: $2.50 + $5 = $7.50
628
+ expect(cost).toBeCloseTo(7.5, 1)
629
+ })
630
+
631
+ it('uses correct pricing for Claude Sonnet', () => {
632
+ const tracker = new BudgetTracker()
633
+
634
+ tracker.recordUsage({
635
+ inputTokens: 1000000,
636
+ outputTokens: 500000,
637
+ model: 'claude-sonnet-4-20250514',
638
+ })
639
+
640
+ const cost = tracker.getTotalCost()
641
+ // Claude Sonnet pricing: $3/1M input, $15/1M output
642
+ // Expected: $3 + $7.5 = $10.5
643
+ expect(cost).toBeCloseTo(10.5, 1)
644
+ })
645
+
646
+ it('uses correct pricing for Claude Haiku', () => {
647
+ const tracker = new BudgetTracker()
648
+
649
+ tracker.recordUsage({
650
+ inputTokens: 1000000,
651
+ outputTokens: 500000,
652
+ model: 'claude-3-5-haiku-latest',
653
+ })
654
+
655
+ const cost = tracker.getTotalCost()
656
+ // Claude Haiku pricing: $0.25/1M input, $1.25/1M output
657
+ // Expected: $0.25 + $0.625 = $0.875
658
+ expect(cost).toBeCloseTo(0.875, 2)
659
+ })
660
+
661
+ it('aggregates costs across multiple models', () => {
662
+ const tracker = new BudgetTracker()
663
+
664
+ tracker.recordUsage({ inputTokens: 500000, outputTokens: 250000, model: 'gpt-4o' })
665
+ tracker.recordUsage({ inputTokens: 500000, outputTokens: 250000, model: 'claude-sonnet-4-20250514' })
666
+
667
+ const costByModel = tracker.getCostByModel()
668
+
669
+ expect(Object.keys(costByModel)).toContain('gpt-4o')
670
+ expect(Object.keys(costByModel)).toContain('claude-sonnet-4-20250514')
671
+ expect(tracker.getTotalCost()).toBe(
672
+ costByModel['gpt-4o'] + costByModel['claude-sonnet-4-20250514']
673
+ )
674
+ })
675
+
676
+ it('supports custom pricing tables', () => {
677
+ const tracker = new BudgetTracker({
678
+ customPricing: {
679
+ 'my-custom-model': {
680
+ inputPricePerMillion: 1.0,
681
+ outputPricePerMillion: 2.0,
682
+ },
683
+ },
684
+ })
685
+
686
+ tracker.recordUsage({
687
+ inputTokens: 1000000,
688
+ outputTokens: 500000,
689
+ model: 'my-custom-model',
690
+ })
691
+
692
+ const cost = tracker.getTotalCost()
693
+ // Custom pricing: $1/1M input, $2/1M output
694
+ // Expected: $1 + $1 = $2
695
+ expect(cost).toBeCloseTo(2.0, 2)
696
+ })
697
+ })
698
+
699
+ // ============================================================================
700
+ // Context Integration Tests
701
+ // ============================================================================
702
+
703
+ describe('Context Integration', () => {
704
+ beforeEach(() => {
705
+ resetContext()
706
+ })
707
+
708
+ it('integrates budget tracking with execution context', async () => {
709
+ configure({
710
+ budget: {
711
+ maxTokens: 10000,
712
+ maxCost: 1.0,
713
+ },
714
+ })
715
+
716
+ await withContext({}, async () => {
717
+ // Budget should be available in context
718
+ // This tests integration with the context system
719
+ })
720
+ })
721
+
722
+ it('propagates request context through withContext', async () => {
723
+ const ctx = createRequestContext({ userId: 'test-user' })
724
+
725
+ await withContext({ requestContext: ctx }, async () => {
726
+ // Request context should be available
727
+ })
728
+ })
729
+ })
730
+
731
+ // ============================================================================
732
+ // Request Info and Logging Tests
733
+ // ============================================================================
734
+
735
+ describe('Request Info', () => {
736
+ it('records request info with timing', () => {
737
+ const tracker = new BudgetTracker()
738
+
739
+ const requestInfo: RequestInfo = {
740
+ requestId: 'req-123',
741
+ model: 'gpt-4o',
742
+ startTime: Date.now() - 500,
743
+ endTime: Date.now(),
744
+ inputTokens: 100,
745
+ outputTokens: 50,
746
+ }
747
+
748
+ tracker.recordRequest(requestInfo)
749
+
750
+ const requests = tracker.getRequests()
751
+ expect(requests).toHaveLength(1)
752
+ expect(requests[0].requestId).toBe('req-123')
753
+ expect(requests[0].duration).toBeCloseTo(500, -2)
754
+ })
755
+
756
+ it('provides request history for debugging', () => {
757
+ const tracker = new BudgetTracker()
758
+
759
+ tracker.recordRequest({
760
+ requestId: 'req-1',
761
+ model: 'gpt-4o',
762
+ startTime: Date.now() - 1000,
763
+ endTime: Date.now() - 500,
764
+ inputTokens: 100,
765
+ outputTokens: 50,
766
+ })
767
+
768
+ tracker.recordRequest({
769
+ requestId: 'req-2',
770
+ model: 'claude-sonnet-4-20250514',
771
+ startTime: Date.now() - 500,
772
+ endTime: Date.now(),
773
+ inputTokens: 200,
774
+ outputTokens: 100,
775
+ })
776
+
777
+ const requests = tracker.getRequests()
778
+ expect(requests).toHaveLength(2)
779
+ })
780
+
781
+ it('limits request history size', () => {
782
+ const tracker = new BudgetTracker({ maxRequestHistory: 5 })
783
+
784
+ for (let i = 0; i < 10; i++) {
785
+ tracker.recordRequest({
786
+ requestId: `req-${i}`,
787
+ model: 'gpt-4o',
788
+ startTime: Date.now(),
789
+ endTime: Date.now(),
790
+ inputTokens: 100,
791
+ outputTokens: 50,
792
+ })
793
+ }
794
+
795
+ const requests = tracker.getRequests()
796
+ expect(requests.length).toBeLessThanOrEqual(5)
797
+ // Should keep most recent
798
+ expect(requests[requests.length - 1].requestId).toBe('req-9')
799
+ })
800
+ })