ai-functions 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/.turbo/turbo-build.log +1 -4
  2. package/CHANGELOG.md +68 -1
  3. package/README.md +397 -157
  4. package/dist/ai-promise.d.ts +50 -3
  5. package/dist/ai-promise.d.ts.map +1 -1
  6. package/dist/ai-promise.js +410 -51
  7. package/dist/ai-promise.js.map +1 -1
  8. package/dist/ai-schemas.d.ts +56 -0
  9. package/dist/ai-schemas.d.ts.map +1 -0
  10. package/dist/ai-schemas.js +53 -0
  11. package/dist/ai-schemas.js.map +1 -0
  12. package/dist/ai.d.ts +16 -242
  13. package/dist/ai.d.ts.map +1 -1
  14. package/dist/ai.js +54 -837
  15. package/dist/ai.js.map +1 -1
  16. package/dist/batch/anthropic.d.ts +6 -4
  17. package/dist/batch/anthropic.d.ts.map +1 -1
  18. package/dist/batch/anthropic.js +83 -145
  19. package/dist/batch/anthropic.js.map +1 -1
  20. package/dist/batch/bedrock.d.ts +8 -30
  21. package/dist/batch/bedrock.d.ts.map +1 -1
  22. package/dist/batch/bedrock.js +155 -338
  23. package/dist/batch/bedrock.js.map +1 -1
  24. package/dist/batch/cloudflare.d.ts +8 -20
  25. package/dist/batch/cloudflare.d.ts.map +1 -1
  26. package/dist/batch/cloudflare.js +68 -189
  27. package/dist/batch/cloudflare.js.map +1 -1
  28. package/dist/batch/google.d.ts +6 -20
  29. package/dist/batch/google.d.ts.map +1 -1
  30. package/dist/batch/google.js +70 -238
  31. package/dist/batch/google.js.map +1 -1
  32. package/dist/batch/index.d.ts +4 -1
  33. package/dist/batch/index.d.ts.map +1 -1
  34. package/dist/batch/index.js +4 -1
  35. package/dist/batch/index.js.map +1 -1
  36. package/dist/batch/memory.d.ts +1 -1
  37. package/dist/batch/memory.d.ts.map +1 -1
  38. package/dist/batch/memory.js +14 -10
  39. package/dist/batch/memory.js.map +1 -1
  40. package/dist/batch/openai.d.ts +11 -14
  41. package/dist/batch/openai.d.ts.map +1 -1
  42. package/dist/batch/openai.js +52 -156
  43. package/dist/batch/openai.js.map +1 -1
  44. package/dist/batch/provider.d.ts +111 -0
  45. package/dist/batch/provider.d.ts.map +1 -0
  46. package/dist/batch/provider.js +233 -0
  47. package/dist/batch/provider.js.map +1 -0
  48. package/dist/batch-map.d.ts.map +1 -1
  49. package/dist/batch-map.js +23 -17
  50. package/dist/batch-map.js.map +1 -1
  51. package/dist/batch-queue.d.ts +65 -0
  52. package/dist/batch-queue.d.ts.map +1 -1
  53. package/dist/batch-queue.js +169 -14
  54. package/dist/batch-queue.js.map +1 -1
  55. package/dist/budget.d.ts +272 -0
  56. package/dist/budget.d.ts.map +1 -0
  57. package/dist/budget.js +513 -0
  58. package/dist/budget.js.map +1 -0
  59. package/dist/cache.d.ts +295 -0
  60. package/dist/cache.d.ts.map +1 -0
  61. package/dist/cache.js +433 -0
  62. package/dist/cache.js.map +1 -0
  63. package/dist/context.d.ts +42 -8
  64. package/dist/context.d.ts.map +1 -1
  65. package/dist/context.js +64 -62
  66. package/dist/context.js.map +1 -1
  67. package/dist/digital-objects-registry.d.ts +229 -0
  68. package/dist/digital-objects-registry.d.ts.map +1 -0
  69. package/dist/digital-objects-registry.js +617 -0
  70. package/dist/digital-objects-registry.js.map +1 -0
  71. package/dist/embeddings.d.ts +2 -2
  72. package/dist/embeddings.d.ts.map +1 -1
  73. package/dist/errors.d.ts +22 -0
  74. package/dist/errors.d.ts.map +1 -0
  75. package/dist/errors.js +35 -0
  76. package/dist/errors.js.map +1 -0
  77. package/dist/eval/runner.d.ts +10 -1
  78. package/dist/eval/runner.d.ts.map +1 -1
  79. package/dist/eval/runner.js +41 -35
  80. package/dist/eval/runner.js.map +1 -1
  81. package/dist/eval-log/in-memory.d.ts +34 -0
  82. package/dist/eval-log/in-memory.d.ts.map +1 -0
  83. package/dist/eval-log/in-memory.js +84 -0
  84. package/dist/eval-log/in-memory.js.map +1 -0
  85. package/dist/eval-log/index.d.ts +29 -0
  86. package/dist/eval-log/index.d.ts.map +1 -0
  87. package/dist/eval-log/index.js +39 -0
  88. package/dist/eval-log/index.js.map +1 -0
  89. package/dist/eval-log/types.d.ts +101 -0
  90. package/dist/eval-log/types.d.ts.map +1 -0
  91. package/dist/eval-log/types.js +16 -0
  92. package/dist/eval-log/types.js.map +1 -0
  93. package/dist/function-registry.d.ts +116 -0
  94. package/dist/function-registry.d.ts.map +1 -0
  95. package/dist/function-registry.js +546 -0
  96. package/dist/function-registry.js.map +1 -0
  97. package/dist/generate.d.ts +9 -3
  98. package/dist/generate.d.ts.map +1 -1
  99. package/dist/generate.js +18 -22
  100. package/dist/generate.js.map +1 -1
  101. package/dist/index.d.ts +35 -20
  102. package/dist/index.d.ts.map +1 -1
  103. package/dist/index.js +89 -42
  104. package/dist/index.js.map +1 -1
  105. package/dist/logger.d.ts +118 -0
  106. package/dist/logger.d.ts.map +1 -0
  107. package/dist/logger.js +187 -0
  108. package/dist/logger.js.map +1 -0
  109. package/dist/middleware/budget.d.ts +84 -0
  110. package/dist/middleware/budget.d.ts.map +1 -0
  111. package/dist/middleware/budget.js +110 -0
  112. package/dist/middleware/budget.js.map +1 -0
  113. package/dist/middleware/cache.d.ts +103 -0
  114. package/dist/middleware/cache.d.ts.map +1 -0
  115. package/dist/middleware/cache.js +228 -0
  116. package/dist/middleware/cache.js.map +1 -0
  117. package/dist/middleware/embed-cache.d.ts +99 -0
  118. package/dist/middleware/embed-cache.d.ts.map +1 -0
  119. package/dist/middleware/embed-cache.js +128 -0
  120. package/dist/middleware/embed-cache.js.map +1 -0
  121. package/dist/middleware/index.d.ts +11 -0
  122. package/dist/middleware/index.d.ts.map +1 -0
  123. package/dist/middleware/index.js +11 -0
  124. package/dist/middleware/index.js.map +1 -0
  125. package/dist/middleware/trace.d.ts +103 -0
  126. package/dist/middleware/trace.d.ts.map +1 -0
  127. package/dist/middleware/trace.js +176 -0
  128. package/dist/middleware/trace.js.map +1 -0
  129. package/dist/primitives.d.ts +120 -1
  130. package/dist/primitives.d.ts.map +1 -1
  131. package/dist/primitives.js +398 -26
  132. package/dist/primitives.js.map +1 -1
  133. package/dist/retry.d.ts +368 -0
  134. package/dist/retry.d.ts.map +1 -0
  135. package/dist/retry.js +646 -0
  136. package/dist/retry.js.map +1 -0
  137. package/dist/schema.d.ts.map +1 -1
  138. package/dist/schema.js +2 -10
  139. package/dist/schema.js.map +1 -1
  140. package/dist/telemetry.d.ts +128 -0
  141. package/dist/telemetry.d.ts.map +1 -0
  142. package/dist/telemetry.js +285 -0
  143. package/dist/telemetry.js.map +1 -0
  144. package/dist/template.d.ts.map +1 -1
  145. package/dist/template.js +6 -1
  146. package/dist/template.js.map +1 -1
  147. package/dist/tool-orchestration.d.ts +453 -0
  148. package/dist/tool-orchestration.d.ts.map +1 -0
  149. package/dist/tool-orchestration.js +763 -0
  150. package/dist/tool-orchestration.js.map +1 -0
  151. package/dist/type-guards.d.ts +28 -0
  152. package/dist/type-guards.d.ts.map +1 -0
  153. package/dist/type-guards.js +29 -0
  154. package/dist/type-guards.js.map +1 -0
  155. package/dist/types.d.ts +135 -17
  156. package/dist/types.d.ts.map +1 -1
  157. package/dist/types.js +36 -1
  158. package/dist/types.js.map +1 -1
  159. package/dist/wrap-for-v3.d.ts +80 -0
  160. package/dist/wrap-for-v3.d.ts.map +1 -0
  161. package/dist/wrap-for-v3.js +89 -0
  162. package/dist/wrap-for-v3.js.map +1 -0
  163. package/examples/00-quickstart.ts +232 -0
  164. package/examples/01-rag-chatbot.ts +212 -0
  165. package/examples/02-multi-agent-research.ts +290 -0
  166. package/examples/03-email-classification.ts +379 -0
  167. package/examples/04-content-moderation.ts +400 -0
  168. package/examples/05-document-extraction.ts +455 -0
  169. package/examples/06-streaming-chat-nextjs.ts +437 -0
  170. package/examples/07-cloudflare-worker.ts +483 -0
  171. package/examples/08-batch-processing.ts +491 -0
  172. package/examples/09-budget-constrained.ts +527 -0
  173. package/examples/10-tool-orchestration.ts +565 -0
  174. package/examples/11-retry-resilience.ts +403 -0
  175. package/examples/12-caching-strategies.ts +422 -0
  176. package/examples/README.md +145 -0
  177. package/package.json +10 -6
  178. package/src/ai-promise.ts +528 -99
  179. package/src/ai-schemas.ts +122 -0
  180. package/src/ai.ts +69 -1153
  181. package/src/batch/anthropic.ts +96 -161
  182. package/src/batch/bedrock.ts +203 -454
  183. package/src/batch/cloudflare.ts +99 -282
  184. package/src/batch/google.ts +91 -297
  185. package/src/batch/index.ts +4 -1
  186. package/src/batch/memory.ts +15 -10
  187. package/src/batch/openai.ts +65 -193
  188. package/src/batch/provider.ts +336 -0
  189. package/src/batch-map.ts +29 -24
  190. package/src/batch-queue.ts +200 -11
  191. package/src/budget.ts +740 -0
  192. package/src/cache.ts +681 -0
  193. package/src/context.ts +122 -76
  194. package/src/digital-objects-registry.ts +750 -0
  195. package/src/errors.ts +37 -0
  196. package/src/eval/runner.ts +63 -38
  197. package/src/eval-log/in-memory.ts +90 -0
  198. package/src/eval-log/index.ts +46 -0
  199. package/src/eval-log/types.ts +110 -0
  200. package/src/function-registry.ts +671 -0
  201. package/src/generate.ts +33 -33
  202. package/src/index.ts +325 -49
  203. package/src/logger.ts +232 -0
  204. package/src/middleware/budget.ts +171 -0
  205. package/src/middleware/cache.ts +299 -0
  206. package/src/middleware/embed-cache.ts +195 -0
  207. package/src/middleware/index.ts +23 -0
  208. package/src/middleware/trace.ts +248 -0
  209. package/src/primitives.ts +589 -62
  210. package/src/retry.ts +902 -0
  211. package/src/schema.ts +8 -17
  212. package/src/telemetry.ts +403 -0
  213. package/src/template.ts +8 -4
  214. package/src/tool-orchestration.ts +1173 -0
  215. package/src/type-guards.ts +31 -0
  216. package/src/types.ts +164 -25
  217. package/src/wrap-for-v3.ts +105 -0
  218. package/test/ai-promise.test.ts +1080 -0
  219. package/test/ai-proxy.test.ts +1 -1
  220. package/test/backward-compat.test.ts +147 -0
  221. package/test/batch-autosubmit-errors.test.ts +610 -0
  222. package/test/batch-blog-posts.test.ts +87 -129
  223. package/test/budget-tracking.test.ts +800 -0
  224. package/test/cache.test.ts +712 -0
  225. package/test/context-isolation.test.ts +687 -0
  226. package/test/core-functions.test.ts +183 -579
  227. package/test/decide.test.ts +154 -322
  228. package/test/define.test.ts +211 -8
  229. package/test/digital-objects-registry.test.ts +760 -0
  230. package/test/embedding-cache-middleware.test.ts +140 -0
  231. package/test/evals/deterministic.eval.test.ts +376 -0
  232. package/test/generate-core.test.ts +140 -229
  233. package/test/implicit-batch.test.ts +22 -65
  234. package/test/json-parse-error-handling.test.ts +463 -0
  235. package/test/retry-policy-integration.test.ts +117 -0
  236. package/test/retry.test.ts +1016 -0
  237. package/test/schema.test.ts +55 -19
  238. package/test/streaming.test.ts +316 -0
  239. package/test/template.test.ts +1164 -0
  240. package/test/tool-orchestration.test.ts +1040 -0
  241. package/test/wrap-for-v3.test.ts +612 -0
  242. package/vitest.config.js +6 -0
  243. package/vitest.config.ts +20 -0
  244. package/dist/rpc/auth.d.ts +0 -69
  245. package/dist/rpc/auth.d.ts.map +0 -1
  246. package/dist/rpc/auth.js +0 -136
  247. package/dist/rpc/auth.js.map +0 -1
  248. package/dist/rpc/client.d.ts +0 -62
  249. package/dist/rpc/client.d.ts.map +0 -1
  250. package/dist/rpc/client.js +0 -103
  251. package/dist/rpc/client.js.map +0 -1
  252. package/dist/rpc/deferred.d.ts +0 -60
  253. package/dist/rpc/deferred.d.ts.map +0 -1
  254. package/dist/rpc/deferred.js +0 -96
  255. package/dist/rpc/deferred.js.map +0 -1
  256. package/dist/rpc/index.d.ts +0 -22
  257. package/dist/rpc/index.d.ts.map +0 -1
  258. package/dist/rpc/index.js +0 -38
  259. package/dist/rpc/index.js.map +0 -1
  260. package/dist/rpc/local.d.ts +0 -42
  261. package/dist/rpc/local.d.ts.map +0 -1
  262. package/dist/rpc/local.js +0 -50
  263. package/dist/rpc/local.js.map +0 -1
  264. package/dist/rpc/server.d.ts +0 -165
  265. package/dist/rpc/server.d.ts.map +0 -1
  266. package/dist/rpc/server.js +0 -405
  267. package/dist/rpc/server.js.map +0 -1
  268. package/dist/rpc/session.d.ts +0 -32
  269. package/dist/rpc/session.d.ts.map +0 -1
  270. package/dist/rpc/session.js +0 -43
  271. package/dist/rpc/session.js.map +0 -1
  272. package/dist/rpc/transport.d.ts +0 -306
  273. package/dist/rpc/transport.d.ts.map +0 -1
  274. package/dist/rpc/transport.js +0 -731
  275. package/dist/rpc/transport.js.map +0 -1
  276. package/src/batch/anthropic.js +0 -256
  277. package/src/batch/bedrock.js +0 -584
  278. package/src/batch/cloudflare.js +0 -287
  279. package/src/batch/google.js +0 -359
  280. package/src/batch/index.js +0 -30
  281. package/src/batch/memory.js +0 -187
  282. package/src/batch/openai.js +0 -402
  283. package/src/eval/index.js +0 -7
  284. package/src/eval/models.js +0 -119
  285. package/src/eval/runner.js +0 -147
  286. package/test/schema.test.js +0 -96
package/src/budget.ts ADDED
@@ -0,0 +1,740 @@
1
+ /**
2
+ * Budget Tracking and Request Tracing for AI Functions
3
+ *
4
+ * Provides:
5
+ * - Token counting and estimation
6
+ * - Cost tracking by model
7
+ * - Budget limits with alerts
8
+ * - Request ID generation and tracing
9
+ * - User/tenant context isolation
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+
14
+ import { randomUUID } from 'crypto'
15
+
16
+ // ============================================================================
17
+ // Types
18
+ // ============================================================================
19
+
20
+ /** Token usage for a single request */
21
+ export interface TokenUsage {
22
+ inputTokens: number
23
+ outputTokens: number
24
+ model?: string
25
+ }
26
+
27
+ /** Model pricing per million tokens */
28
+ export interface ModelPricing {
29
+ inputPricePerMillion: number
30
+ outputPricePerMillion: number
31
+ }
32
+
33
+ /** Budget configuration */
34
+ export interface BudgetConfig {
35
+ /** Maximum total tokens allowed */
36
+ maxTokens?: number
37
+ /** Maximum cost in USD */
38
+ maxCost?: number
39
+ /** Alert thresholds as fractions (e.g., [0.5, 0.8, 1.0]) */
40
+ alertThresholds?: number[]
41
+ /** Callback when threshold is reached */
42
+ onAlert?: (alert: BudgetAlert) => void
43
+ /** Custom pricing for models not in default pricing table */
44
+ customPricing?: Record<string, ModelPricing>
45
+ /** Maximum number of requests to keep in history */
46
+ maxRequestHistory?: number
47
+ }
48
+
49
+ /** Budget alert payload */
50
+ export interface BudgetAlert {
51
+ threshold: number
52
+ currentUsage: number
53
+ limit: number
54
+ type: 'tokens' | 'cost'
55
+ }
56
+
57
+ /** Check budget options */
58
+ export interface CheckBudgetOptions {
59
+ estimatedTokens?: number
60
+ model?: string
61
+ }
62
+
63
+ /** Remaining budget info */
64
+ export interface RemainingBudget {
65
+ tokens?: number
66
+ cost?: number
67
+ }
68
+
69
+ /** Request info for tracking */
70
+ export interface RequestInfo {
71
+ requestId: string
72
+ model: string
73
+ startTime: number
74
+ endTime: number
75
+ inputTokens: number
76
+ outputTokens: number
77
+ duration?: number
78
+ }
79
+
80
+ /** Stored request with computed duration */
81
+ interface StoredRequest extends RequestInfo {
82
+ duration: number
83
+ }
84
+
85
+ /** Budget snapshot for export/import */
86
+ export interface BudgetSnapshot {
87
+ totalInputTokens: number
88
+ totalOutputTokens: number
89
+ totalCost: number
90
+ usageByModel: Record<string, { inputTokens: number; outputTokens: number; cost: number }>
91
+ triggeredThresholds: number[]
92
+ }
93
+
94
+ /** Request context options */
95
+ export interface RequestContextOptions {
96
+ requestId?: string
97
+ userId?: string
98
+ tenantId?: string
99
+ parentRequestId?: string
100
+ metadata?: Record<string, unknown>
101
+ }
102
+
103
+ /** Request context with tracing */
104
+ export interface IRequestContext {
105
+ requestId: string
106
+ userId?: string
107
+ tenantId?: string
108
+ parentRequestId?: string
109
+ depth: number
110
+ metadata?: Record<string, unknown>
111
+ createChild(options?: Partial<RequestContextOptions>): IRequestContext
112
+ toTraceHeaders(): Record<string, string>
113
+ toTraceparent(): string
114
+ }
115
+
116
+ // ============================================================================
117
+ // Default Model Pricing (per million tokens, USD)
118
+ // ============================================================================
119
+
120
+ const DEFAULT_MODEL_PRICING: Record<string, ModelPricing> = {
121
+ // OpenAI models
122
+ 'gpt-4o': { inputPricePerMillion: 2.5, outputPricePerMillion: 10 },
123
+ 'gpt-4o-mini': { inputPricePerMillion: 0.15, outputPricePerMillion: 0.6 },
124
+ 'gpt-4-turbo': { inputPricePerMillion: 10, outputPricePerMillion: 30 },
125
+ 'gpt-4': { inputPricePerMillion: 30, outputPricePerMillion: 60 },
126
+ 'gpt-3.5-turbo': { inputPricePerMillion: 0.5, outputPricePerMillion: 1.5 },
127
+ o1: { inputPricePerMillion: 15, outputPricePerMillion: 60 },
128
+ 'o1-mini': { inputPricePerMillion: 3, outputPricePerMillion: 12 },
129
+ 'o1-preview': { inputPricePerMillion: 15, outputPricePerMillion: 60 },
130
+ 'o3-mini': { inputPricePerMillion: 1.1, outputPricePerMillion: 4.4 },
131
+
132
+ // Anthropic models
133
+ 'claude-opus-4-20250514': { inputPricePerMillion: 15, outputPricePerMillion: 75 },
134
+ 'claude-sonnet-4-20250514': { inputPricePerMillion: 3, outputPricePerMillion: 15 },
135
+ 'claude-3-5-sonnet-latest': { inputPricePerMillion: 3, outputPricePerMillion: 15 },
136
+ 'claude-3-5-haiku-latest': { inputPricePerMillion: 0.25, outputPricePerMillion: 1.25 },
137
+ 'claude-3-opus-20240229': { inputPricePerMillion: 15, outputPricePerMillion: 75 },
138
+ 'claude-3-sonnet-20240229': { inputPricePerMillion: 3, outputPricePerMillion: 15 },
139
+ 'claude-3-haiku-20240307': { inputPricePerMillion: 0.25, outputPricePerMillion: 1.25 },
140
+
141
+ // Google models
142
+ 'gemini-2.0-flash': { inputPricePerMillion: 0.1, outputPricePerMillion: 0.4 },
143
+ 'gemini-1.5-pro': { inputPricePerMillion: 1.25, outputPricePerMillion: 5 },
144
+ 'gemini-1.5-flash': { inputPricePerMillion: 0.075, outputPricePerMillion: 0.3 },
145
+
146
+ // Default fallback
147
+ default: { inputPricePerMillion: 1, outputPricePerMillion: 3 },
148
+ }
149
+
150
+ // ============================================================================
151
+ // Token Counter
152
+ // ============================================================================
153
+
154
+ /** Message format for token counting */
155
+ interface Message {
156
+ role: string
157
+ content: string
158
+ }
159
+
160
+ /**
161
+ * Token counter for estimating token usage
162
+ *
163
+ * Uses a simple character-based estimation that works across models.
164
+ * For production, consider integrating tiktoken for more accurate counts.
165
+ */
166
+ export class TokenCounter {
167
+ /** Average characters per token (rough estimate) */
168
+ private readonly charsPerToken = 4
169
+
170
+ /** Overhead tokens per message for formatting */
171
+ private readonly messageOverhead = 4
172
+
173
+ /**
174
+ * Estimate tokens for a text string
175
+ */
176
+ estimateTokens(text: string, _model?: string): number {
177
+ if (!text) return 0
178
+
179
+ // Count characters
180
+ const charCount = text.length
181
+
182
+ // Rough estimate: ~4 chars per token for English
183
+ // Unicode characters may use more tokens
184
+ const unicodeChars = Array.from(text).filter((char) => char.charCodeAt(0) > 127).length
185
+ const asciiChars = charCount - unicodeChars
186
+
187
+ // ASCII chars: ~4 per token, Unicode: ~2 per token (rough)
188
+ const asciiTokens = Math.ceil(asciiChars / this.charsPerToken)
189
+ const unicodeTokens = Math.ceil(unicodeChars / 2)
190
+
191
+ return asciiTokens + unicodeTokens
192
+ }
193
+
194
+ /**
195
+ * Count tokens in a message array including formatting overhead
196
+ */
197
+ countMessageTokens(messages: Message[], model?: string): number {
198
+ let total = 0
199
+
200
+ for (const message of messages) {
201
+ // Content tokens
202
+ total += this.estimateTokens(message.content, model)
203
+ // Role tokens (user, assistant, system)
204
+ total += this.estimateTokens(message.role, model)
205
+ // Message formatting overhead
206
+ total += this.messageOverhead
207
+ }
208
+
209
+ return total
210
+ }
211
+ }
212
+
213
+ // ============================================================================
214
+ // Budget Exceeded Error
215
+ // ============================================================================
216
+
217
+ /**
218
+ * Error thrown when budget is exceeded
219
+ */
220
+ export class BudgetExceededError extends Error {
221
+ constructor(
222
+ message: string,
223
+ public readonly type: 'tokens' | 'cost',
224
+ public readonly limit: number,
225
+ public readonly current: number,
226
+ public readonly requested?: number
227
+ ) {
228
+ super(message)
229
+ this.name = 'BudgetExceededError'
230
+ }
231
+ }
232
+
233
+ // ============================================================================
234
+ // Budget Tracker
235
+ // ============================================================================
236
+
237
+ /**
238
+ * Tracks token usage and costs with budget limits
239
+ */
240
+ export class BudgetTracker {
241
+ private totalInputTokens = 0
242
+ private totalOutputTokens = 0
243
+ private usageByModel: Record<
244
+ string,
245
+ { inputTokens: number; outputTokens: number; cost: number }
246
+ > = {}
247
+ private triggeredThresholds: Set<number> = new Set()
248
+ private requests: StoredRequest[] = []
249
+
250
+ private readonly config: BudgetConfig
251
+
252
+ constructor(config: BudgetConfig = {}) {
253
+ this.config = {
254
+ maxRequestHistory: 100,
255
+ ...config,
256
+ }
257
+ }
258
+
259
+ /**
260
+ * Record token usage from a request
261
+ */
262
+ recordUsage(usage: TokenUsage): void {
263
+ const { inputTokens, outputTokens, model = 'default' } = usage
264
+
265
+ this.totalInputTokens += inputTokens
266
+ this.totalOutputTokens += outputTokens
267
+
268
+ // Track by model
269
+ if (!this.usageByModel[model]) {
270
+ this.usageByModel[model] = { inputTokens: 0, outputTokens: 0, cost: 0 }
271
+ }
272
+ this.usageByModel[model].inputTokens += inputTokens
273
+ this.usageByModel[model].outputTokens += outputTokens
274
+
275
+ // Calculate cost for this usage
276
+ const pricing = this.getPricing(model)
277
+ const cost = this.calculateCost(inputTokens, outputTokens, pricing)
278
+ this.usageByModel[model].cost += cost
279
+
280
+ // Check for budget exceeded
281
+ this.checkLimitsAfterRecording()
282
+
283
+ // Check for alerts
284
+ this.checkAlerts()
285
+ }
286
+
287
+ /**
288
+ * Record a complete request with timing info
289
+ */
290
+ recordRequest(info: RequestInfo): void {
291
+ const duration = info.endTime - info.startTime
292
+ const storedRequest: StoredRequest = {
293
+ ...info,
294
+ duration,
295
+ }
296
+
297
+ this.requests.push(storedRequest)
298
+
299
+ // Trim history if needed
300
+ const maxHistory = this.config.maxRequestHistory ?? 100
301
+ while (this.requests.length > maxHistory) {
302
+ this.requests.shift()
303
+ }
304
+
305
+ // Also record the token usage
306
+ this.recordUsage({
307
+ inputTokens: info.inputTokens,
308
+ outputTokens: info.outputTokens,
309
+ model: info.model,
310
+ })
311
+ }
312
+
313
+ /**
314
+ * Get all recorded requests
315
+ */
316
+ getRequests(): StoredRequest[] {
317
+ return [...this.requests]
318
+ }
319
+
320
+ /**
321
+ * Check if a proposed request would exceed budget
322
+ */
323
+ checkBudget(options: CheckBudgetOptions): void {
324
+ const { estimatedTokens = 0, model = 'default' } = options
325
+
326
+ // Check token limit
327
+ if (this.config.maxTokens !== undefined) {
328
+ const projectedTotal = this.getTotalTokens() + estimatedTokens
329
+ if (projectedTotal > this.config.maxTokens) {
330
+ throw new BudgetExceededError(
331
+ `Token budget exceeded: ${projectedTotal} tokens would exceed limit of ${this.config.maxTokens}`,
332
+ 'tokens',
333
+ this.config.maxTokens,
334
+ this.getTotalTokens(),
335
+ estimatedTokens
336
+ )
337
+ }
338
+ }
339
+
340
+ // Check cost limit
341
+ if (this.config.maxCost !== undefined) {
342
+ const pricing = this.getPricing(model)
343
+ // Estimate cost assuming half input, half output
344
+ const estimatedCost = this.calculateCost(
345
+ Math.floor(estimatedTokens / 2),
346
+ Math.ceil(estimatedTokens / 2),
347
+ pricing
348
+ )
349
+ const projectedCost = this.getTotalCost() + estimatedCost
350
+
351
+ if (projectedCost > this.config.maxCost) {
352
+ throw new BudgetExceededError(
353
+ `Cost budget exceeded: $${projectedCost.toFixed(4)} would exceed limit of $${
354
+ this.config.maxCost
355
+ }`,
356
+ 'cost',
357
+ this.config.maxCost,
358
+ this.getTotalCost(),
359
+ estimatedCost
360
+ )
361
+ }
362
+ }
363
+ }
364
+
365
+ /**
366
+ * Check limits after recording and throw if exceeded
367
+ */
368
+ private checkLimitsAfterRecording(): void {
369
+ // Check token limit
370
+ if (this.config.maxTokens !== undefined) {
371
+ if (this.getTotalTokens() > this.config.maxTokens) {
372
+ throw new BudgetExceededError(
373
+ `Token budget exceeded: ${this.getTotalTokens()} tokens exceeds limit of ${
374
+ this.config.maxTokens
375
+ }`,
376
+ 'tokens',
377
+ this.config.maxTokens,
378
+ this.getTotalTokens()
379
+ )
380
+ }
381
+ }
382
+
383
+ // Check cost limit
384
+ if (this.config.maxCost !== undefined) {
385
+ const currentCost = this.getTotalCost()
386
+ if (currentCost > this.config.maxCost) {
387
+ throw new BudgetExceededError(
388
+ `Cost budget exceeded: $${currentCost.toFixed(4)} exceeds limit of $${
389
+ this.config.maxCost
390
+ }`,
391
+ 'cost',
392
+ this.config.maxCost,
393
+ currentCost
394
+ )
395
+ }
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Check and trigger alerts
401
+ */
402
+ private checkAlerts(): void {
403
+ if (!this.config.alertThresholds || !this.config.onAlert) return
404
+
405
+ // Check token-based alerts
406
+ if (this.config.maxTokens !== undefined) {
407
+ const usage = this.getTotalTokens() / this.config.maxTokens
408
+
409
+ for (const threshold of this.config.alertThresholds) {
410
+ if (usage >= threshold && !this.triggeredThresholds.has(threshold)) {
411
+ this.triggeredThresholds.add(threshold)
412
+ this.config.onAlert({
413
+ threshold,
414
+ currentUsage: this.getTotalTokens(),
415
+ limit: this.config.maxTokens,
416
+ type: 'tokens',
417
+ })
418
+ }
419
+ }
420
+ }
421
+
422
+ // Check cost-based alerts
423
+ if (this.config.maxCost !== undefined) {
424
+ const costUsage = this.getTotalCost() / this.config.maxCost
425
+
426
+ for (const threshold of this.config.alertThresholds) {
427
+ // Use a different key to not conflict with token thresholds
428
+ const costThresholdKey = threshold + 1000
429
+ if (costUsage >= threshold && !this.triggeredThresholds.has(costThresholdKey)) {
430
+ this.triggeredThresholds.add(costThresholdKey)
431
+ this.config.onAlert({
432
+ threshold,
433
+ currentUsage: this.getTotalCost(),
434
+ limit: this.config.maxCost,
435
+ type: 'cost',
436
+ })
437
+ }
438
+ }
439
+ }
440
+ }
441
+
442
+ /**
443
+ * Get total input tokens
444
+ */
445
+ getTotalInputTokens(): number {
446
+ return this.totalInputTokens
447
+ }
448
+
449
+ /**
450
+ * Get total output tokens
451
+ */
452
+ getTotalOutputTokens(): number {
453
+ return this.totalOutputTokens
454
+ }
455
+
456
+ /**
457
+ * Get total tokens (input + output)
458
+ */
459
+ getTotalTokens(): number {
460
+ return this.totalInputTokens + this.totalOutputTokens
461
+ }
462
+
463
+ /**
464
+ * Get total cost in USD
465
+ */
466
+ getTotalCost(): number {
467
+ let total = 0
468
+ for (const model of Object.keys(this.usageByModel)) {
469
+ const usage = this.usageByModel[model]
470
+ if (usage) {
471
+ total += usage.cost
472
+ }
473
+ }
474
+ return total
475
+ }
476
+
477
+ /**
478
+ * Get cost breakdown by model
479
+ */
480
+ getCostByModel(): Record<string, number> {
481
+ const result: Record<string, number> = {}
482
+ for (const model of Object.keys(this.usageByModel)) {
483
+ const usage = this.usageByModel[model]
484
+ if (usage) {
485
+ result[model] = usage.cost
486
+ }
487
+ }
488
+ return result
489
+ }
490
+
491
+ /**
492
+ * Get remaining budget
493
+ */
494
+ getRemainingBudget(): RemainingBudget {
495
+ const result: RemainingBudget = {}
496
+
497
+ if (this.config.maxTokens !== undefined) {
498
+ result.tokens = Math.max(0, this.config.maxTokens - this.getTotalTokens())
499
+ }
500
+
501
+ if (this.config.maxCost !== undefined) {
502
+ result.cost = Math.max(0, this.config.maxCost - this.getTotalCost())
503
+ }
504
+
505
+ return result
506
+ }
507
+
508
+ /**
509
+ * Reset all tracking
510
+ */
511
+ reset(): void {
512
+ this.totalInputTokens = 0
513
+ this.totalOutputTokens = 0
514
+ this.usageByModel = {}
515
+ this.triggeredThresholds.clear()
516
+ this.requests = []
517
+ }
518
+
519
+ /**
520
+ * Export current state for persistence
521
+ */
522
+ export(): BudgetSnapshot {
523
+ return {
524
+ totalInputTokens: this.totalInputTokens,
525
+ totalOutputTokens: this.totalOutputTokens,
526
+ totalCost: this.getTotalCost(),
527
+ usageByModel: { ...this.usageByModel },
528
+ triggeredThresholds: Array.from(this.triggeredThresholds),
529
+ }
530
+ }
531
+
532
+ /**
533
+ * Import previously exported state
534
+ */
535
+ import(snapshot: BudgetSnapshot): void {
536
+ this.totalInputTokens = snapshot.totalInputTokens
537
+ this.totalOutputTokens = snapshot.totalOutputTokens
538
+ this.usageByModel = { ...snapshot.usageByModel }
539
+ this.triggeredThresholds = new Set(snapshot.triggeredThresholds)
540
+ }
541
+
542
+ /**
543
+ * Get pricing for a model
544
+ */
545
+ private getPricing(model: string): ModelPricing {
546
+ // Check custom pricing first
547
+ const customPrice = this.config.customPricing?.[model]
548
+ if (customPrice) {
549
+ return customPrice
550
+ }
551
+
552
+ // Check default pricing
553
+ const defaultPrice = DEFAULT_MODEL_PRICING[model]
554
+ if (defaultPrice) {
555
+ return defaultPrice
556
+ }
557
+
558
+ // Fallback to default (always defined)
559
+ return DEFAULT_MODEL_PRICING['default']!
560
+ }
561
+
562
+ /**
563
+ * Calculate cost for token usage
564
+ */
565
+ private calculateCost(inputTokens: number, outputTokens: number, pricing: ModelPricing): number {
566
+ const inputCost = (inputTokens / 1_000_000) * pricing.inputPricePerMillion
567
+ const outputCost = (outputTokens / 1_000_000) * pricing.outputPricePerMillion
568
+ return inputCost + outputCost
569
+ }
570
+ }
571
+
572
+ // ============================================================================
573
+ // Request Context
574
+ // ============================================================================
575
+
576
+ /**
577
+ * Request context for tracing and user isolation
578
+ */
579
+ export class RequestContext implements IRequestContext {
580
+ readonly requestId: string
581
+ readonly userId?: string
582
+ readonly tenantId?: string
583
+ readonly parentRequestId?: string
584
+ readonly depth: number
585
+ readonly metadata?: Record<string, unknown>
586
+
587
+ private readonly traceId: string
588
+ private readonly spanId: string
589
+
590
+ constructor(options: RequestContextOptions & { depth?: number } = {}) {
591
+ this.requestId = options.requestId ?? randomUUID()
592
+ if (options.userId !== undefined) this.userId = options.userId
593
+ if (options.tenantId !== undefined) this.tenantId = options.tenantId
594
+ if (options.parentRequestId !== undefined) this.parentRequestId = options.parentRequestId
595
+ this.depth = (options as { depth?: number }).depth ?? 0
596
+ if (options.metadata !== undefined) this.metadata = options.metadata
597
+
598
+ // Generate trace/span IDs for W3C traceparent
599
+ this.traceId = randomUUID().replace(/-/g, '')
600
+ this.spanId = randomUUID().replace(/-/g, '').slice(0, 16)
601
+ }
602
+
603
+ /**
604
+ * Create a child context that inherits from this one
605
+ */
606
+ createChild(options: Partial<RequestContextOptions> = {}): RequestContext {
607
+ // Destructure to separate metadata from other options
608
+ const { metadata: childMetadata, ...restOptions } = options
609
+
610
+ return new RequestContext({
611
+ userId: this.userId,
612
+ tenantId: this.tenantId,
613
+ parentRequestId: this.requestId,
614
+ ...restOptions,
615
+ metadata: {
616
+ ...this.metadata,
617
+ ...childMetadata,
618
+ },
619
+ depth: this.depth + 1,
620
+ } as RequestContextOptions & { depth: number })
621
+ }
622
+
623
+ /**
624
+ * Serialize to trace headers
625
+ */
626
+ toTraceHeaders(): Record<string, string> {
627
+ const headers: Record<string, string> = {
628
+ 'x-request-id': this.requestId,
629
+ }
630
+
631
+ if (this.userId) {
632
+ headers['x-user-id'] = this.userId
633
+ }
634
+
635
+ if (this.tenantId) {
636
+ headers['x-tenant-id'] = this.tenantId
637
+ }
638
+
639
+ if (this.parentRequestId) {
640
+ headers['x-parent-request-id'] = this.parentRequestId
641
+ }
642
+
643
+ return headers
644
+ }
645
+
646
+ /**
647
+ * Generate W3C traceparent header
648
+ * Format: version-trace_id-parent_id-flags
649
+ */
650
+ toTraceparent(): string {
651
+ const version = '00'
652
+ const flags = '01' // sampled
653
+ return `${version}-${this.traceId}-${this.spanId}-${flags}`
654
+ }
655
+
656
+ /**
657
+ * Create a RequestContext from trace headers
658
+ */
659
+ static fromHeaders(headers: Record<string, string>): RequestContext {
660
+ const opts: RequestContextOptions = {}
661
+ if (headers['x-request-id'] !== undefined) opts.requestId = headers['x-request-id']
662
+ if (headers['x-user-id'] !== undefined) opts.userId = headers['x-user-id']
663
+ if (headers['x-tenant-id'] !== undefined) opts.tenantId = headers['x-tenant-id']
664
+ if (headers['x-parent-request-id'] !== undefined)
665
+ opts.parentRequestId = headers['x-parent-request-id']
666
+ return new RequestContext(opts)
667
+ }
668
+ }
669
+
670
+ /**
671
+ * Create a new request context
672
+ */
673
+ export function createRequestContext(options: RequestContextOptions = {}): RequestContext {
674
+ return new RequestContext(options)
675
+ }
676
+
677
+ // ============================================================================
678
+ // withBudget Wrapper
679
+ // ============================================================================
680
+
681
+ /** Options for withBudget */
682
+ export interface WithBudgetOptions extends BudgetConfig {
683
+ userId?: string
684
+ tenantId?: string
685
+ }
686
+
687
+ // Track nested budget contexts
688
+ let currentBudgetTracker: BudgetTracker | null = null
689
+
690
+ /**
691
+ * Execute a function with budget tracking
692
+ *
693
+ * @example
694
+ * ```ts
695
+ * const result = await withBudget({ maxTokens: 1000 }, async (tracker) => {
696
+ * tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
697
+ * return 'success'
698
+ * })
699
+ * ```
700
+ */
701
+ export async function withBudget<T>(
702
+ options: WithBudgetOptions,
703
+ fn: (tracker: BudgetTracker, ctx?: RequestContext) => Promise<T>
704
+ ): Promise<T> {
705
+ const { userId, tenantId, ...budgetConfig } = options
706
+
707
+ const tracker = new BudgetTracker(budgetConfig)
708
+ const ctxOptions: RequestContextOptions = {}
709
+ if (userId !== undefined) ctxOptions.userId = userId
710
+ if (tenantId !== undefined) ctxOptions.tenantId = tenantId
711
+ const ctx = userId || tenantId ? createRequestContext(ctxOptions) : undefined
712
+
713
+ // Track parent tracker for nested contexts
714
+ const parentTracker = currentBudgetTracker
715
+
716
+ // Create a wrapper tracker that propagates to parent
717
+ const wrappedTracker = new Proxy(tracker, {
718
+ get(target, prop, receiver) {
719
+ const value = Reflect.get(target, prop, receiver)
720
+
721
+ // Wrap recordUsage to propagate to parent
722
+ if (prop === 'recordUsage' && parentTracker) {
723
+ return (usage: TokenUsage) => {
724
+ target.recordUsage(usage)
725
+ parentTracker.recordUsage(usage)
726
+ }
727
+ }
728
+
729
+ return value
730
+ },
731
+ })
732
+
733
+ currentBudgetTracker = tracker
734
+
735
+ try {
736
+ return await fn(wrappedTracker, ctx)
737
+ } finally {
738
+ currentBudgetTracker = parentTracker
739
+ }
740
+ }