budget-agent 0.4.6 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +445 -123
- package/package.json +28 -18
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# budget-agent
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Stop runaway AI agents from burning through your API credits. Track cost, tokens, runtime, and steps. Enforce hard budget limits for OpenAI, Anthropic, LangGraph, LangChain, OpenRouter, CrewAI, Mastra, AutoGen, and any LLM workflow.
|
|
4
4
|
|
|
5
|
-
**
|
|
5
|
+
budget-agent helps developers **track AI agent costs**, **enforce token limits**, **set spending caps**, **monitor LLM usage**, and **prevent runaway OpenAI, Anthropic, and OpenRouter agents** from exceeding budget. Works with every provider. Zero vendor lock-in.
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
|
@@ -13,113 +13,138 @@ npm install budget-agent
|
|
|
13
13
|
## Quick start
|
|
14
14
|
|
|
15
15
|
```ts
|
|
16
|
-
import { AgentBudget } from 'budget-agent';
|
|
16
|
+
import { AgentBudget, BudgetError } from 'budget-agent';
|
|
17
17
|
|
|
18
18
|
const agent = new AgentBudget({
|
|
19
19
|
apiKey: process.env.OPENROUTER_API_KEY,
|
|
20
|
-
limits: {
|
|
20
|
+
limits: {
|
|
21
|
+
maxCostUSD: 0.10,
|
|
22
|
+
maxSteps: 15,
|
|
23
|
+
maxTotalTokens: 50_000,
|
|
24
|
+
maxWallTimeMs: 30_000,
|
|
25
|
+
},
|
|
21
26
|
});
|
|
22
27
|
|
|
23
28
|
const response = await agent.step({
|
|
24
|
-
model: 'anthropic/claude-
|
|
29
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
25
30
|
messages: [{ role: 'user', content: 'Hello' }],
|
|
26
31
|
});
|
|
27
32
|
|
|
28
33
|
console.log(agent.getUsage());
|
|
29
|
-
// { steps: 1, totalCostUSD: 0.000015, totalInputTokens: 12, ... }
|
|
30
34
|
```
|
|
31
35
|
|
|
32
|
-
|
|
36
|
+
---
|
|
33
37
|
|
|
34
|
-
|
|
38
|
+
## Prevent runaway AI agents
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
## Budget limits
|
|
39
|
-
|
|
40
|
-
Set limits on cost, tokens, steps, and wall time. Every limit is optional.
|
|
40
|
+
Agent loops multiply LLM costs across every step. Without guardrails, a single loop can burn through your entire API budget in seconds. budget-agent blocks each call before and after it hits your provider -- so you never overspend.
|
|
41
41
|
|
|
42
42
|
```ts
|
|
43
43
|
const agent = new AgentBudget({
|
|
44
44
|
apiKey: key,
|
|
45
|
-
limits: {
|
|
46
|
-
maxCostUSD: 0.05, // total USD before abort
|
|
47
|
-
maxSteps: 10, // total LLM calls before abort
|
|
48
|
-
maxInputTokens: 50000, // total input tokens
|
|
49
|
-
maxOutputTokens: 10000, // total output tokens
|
|
50
|
-
maxTotalTokens: 60000, // input + output combined
|
|
51
|
-
maxWallTimeMs: 60000, // 60 seconds wall clock
|
|
52
|
-
},
|
|
45
|
+
limits: { maxCostUSD: 0.05, maxSteps: 10 },
|
|
53
46
|
});
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
while (true) {
|
|
50
|
+
const res = await agent.step({ model, messages });
|
|
51
|
+
messages.push(res.choices[0].message);
|
|
52
|
+
messages.push({ role: 'user', content: 'Continue.' });
|
|
53
|
+
}
|
|
54
|
+
} catch (err) {
|
|
55
|
+
if (err instanceof BudgetError) {
|
|
56
|
+
console.log('Agent stopped:', err.exceeded.reason);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
54
59
|
```
|
|
55
60
|
|
|
56
|
-
|
|
61
|
+
---
|
|
57
62
|
|
|
58
|
-
|
|
63
|
+
## Track LLM costs in production
|
|
59
64
|
|
|
60
|
-
|
|
61
|
-
2. **Post-step** -- after recording real token/cost data. If a limit is exceeded, the step is rolled back from the tracker so you can retry without a stale balance.
|
|
65
|
+
Get real-time visibility into every API call. See cost per step, total spend, token breakdown, and wall time.
|
|
62
66
|
|
|
63
67
|
```ts
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
const usage = agent.getUsage();
|
|
69
|
+
// {
|
|
70
|
+
// steps: 12,
|
|
71
|
+
// totalCostUSD: 0.0847,
|
|
72
|
+
// totalInputTokens: 24300,
|
|
73
|
+
// totalOutputTokens: 8200,
|
|
74
|
+
// elapsedMs: 45200,
|
|
75
|
+
// stepHistory: [...]
|
|
76
|
+
// }
|
|
77
|
+
|
|
78
|
+
agent.summary(); // formatted table in console
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Set hard budget caps
|
|
84
|
+
|
|
85
|
+
Every limit is optional. Set only what you need.
|
|
86
|
+
|
|
87
|
+
```ts
|
|
88
|
+
limits: {
|
|
89
|
+
maxCostUSD: 0.05, // total USD across all steps
|
|
90
|
+
maxSteps: 10, // total LLM calls
|
|
91
|
+
maxInputTokens: 50000, // input tokens only
|
|
92
|
+
maxOutputTokens: 10000, // output tokens only
|
|
93
|
+
maxTotalTokens: 60000, // input + output combined
|
|
94
|
+
maxWallTimeMs: 60000, // wall clock time in ms
|
|
70
95
|
}
|
|
71
96
|
```
|
|
72
97
|
|
|
73
|
-
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Runtime limits for AI agents
|
|
101
|
+
|
|
102
|
+
Kill agents that run too long. Set wall time limits to prevent infinite loops from consuming compute and money.
|
|
74
103
|
|
|
75
104
|
```ts
|
|
76
105
|
const agent = new AgentBudget({
|
|
77
106
|
apiKey: key,
|
|
78
|
-
limits: {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
// Log, alert, switch models -- never throws
|
|
107
|
+
limits: {
|
|
108
|
+
maxWallTimeMs: 30_000, // 30 second hard stop
|
|
109
|
+
maxCostUSD: 1.00,
|
|
82
110
|
},
|
|
83
111
|
});
|
|
84
112
|
```
|
|
85
113
|
|
|
86
|
-
|
|
114
|
+
---
|
|
87
115
|
|
|
88
|
-
|
|
116
|
+
## Token usage tracking
|
|
89
117
|
|
|
90
|
-
|
|
91
|
-
const agent = new AgentBudget({
|
|
92
|
-
limits: { maxCostUSD: 0.10 },
|
|
93
|
-
warningThreshold: 0.5, // fire 'budget:warning' at 50% consumption
|
|
94
|
-
});
|
|
118
|
+
Track input tokens, output tokens, and total tokens across every step. Know exactly where your budget goes.
|
|
95
119
|
|
|
96
|
-
|
|
97
|
-
|
|
120
|
+
```ts
|
|
121
|
+
agent.on('step:end', (e) => {
|
|
122
|
+
console.log(`Step ${e.stepIndex}: ${e.inputTokens} in / ${e.outputTokens} out / $${e.costUSD}`);
|
|
98
123
|
});
|
|
99
124
|
```
|
|
100
125
|
|
|
101
|
-
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Agent guardrails
|
|
102
129
|
|
|
103
|
-
|
|
130
|
+
Pre-flight checks estimate output cost before the API call. Post-step checks record actual spend. If a limit is hit, the step rolls back and you can retry cleanly.
|
|
104
131
|
|
|
105
132
|
```ts
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
],
|
|
115
|
-
thresholds: [0.4, 0.75], // downgrade at 40% and 75% budget consumed
|
|
116
|
-
},
|
|
117
|
-
});
|
|
133
|
+
try {
|
|
134
|
+
await agent.step({ model, messages });
|
|
135
|
+
} catch (err) {
|
|
136
|
+
if (err instanceof BudgetError) {
|
|
137
|
+
err.exceeded.reason; // 'cost' | 'steps' | 'totalTokens' | 'wallTime'
|
|
138
|
+
err.exceeded.usage; // full snapshot at cutoff
|
|
139
|
+
}
|
|
140
|
+
}
|
|
118
141
|
```
|
|
119
142
|
|
|
120
|
-
|
|
143
|
+
---
|
|
121
144
|
|
|
122
|
-
|
|
145
|
+
## OpenAI cost tracking
|
|
146
|
+
|
|
147
|
+
Use budget-agent with the OpenAI SDK to track GPT-4, GPT-4o, and GPT-3.5 costs in real time.
|
|
123
148
|
|
|
124
149
|
```ts
|
|
125
150
|
import { AgentBudget } from 'budget-agent';
|
|
@@ -129,7 +154,7 @@ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
|
129
154
|
|
|
130
155
|
const agent = new AgentBudget({
|
|
131
156
|
apiKey: process.env.OPENAI_API_KEY,
|
|
132
|
-
limits: { maxCostUSD: 0.
|
|
157
|
+
limits: { maxCostUSD: 0.50 },
|
|
133
158
|
executor: async (request) => {
|
|
134
159
|
const completion = await openai.chat.completions.create({
|
|
135
160
|
model: request.model,
|
|
@@ -151,105 +176,402 @@ const agent = new AgentBudget({
|
|
|
151
176
|
});
|
|
152
177
|
```
|
|
153
178
|
|
|
154
|
-
|
|
179
|
+
---
|
|
155
180
|
|
|
156
|
-
##
|
|
181
|
+
## Anthropic budget limits
|
|
157
182
|
|
|
158
|
-
|
|
159
|
-
- **Adaptive routing** -- automatic model downgrade as budget depletes
|
|
160
|
-
- **Circuit breaker** -- detect repetition or stagnation and halt the agent
|
|
161
|
-
- **Auto-compress** -- truncate message history with LLM summary when tokens exceed threshold
|
|
162
|
-
- **Checkpoints** -- save and resume agent state across restarts
|
|
163
|
-
- **Streaming** -- set `stream: true` and listen for `step:token` events
|
|
164
|
-
- **Events** -- subscribe to `step:start`, `step:end`, `step:token`, `budget:exceeded`, and more
|
|
165
|
-
- **Pricing cache** -- model pricing fetched from OpenRouter with configurable TTL
|
|
166
|
-
- **Rate-limit retry** -- automatic 429 retry with exponential backoff
|
|
167
|
-
- **OpenTelemetry** -- optional tracing spans via `telemetry: { enabled: true }`
|
|
183
|
+
Set spending caps on Claude Opus, Sonnet, and Haiku. Track token usage and enforce cost limits for Anthropic models.
|
|
168
184
|
|
|
169
|
-
|
|
185
|
+
```ts
|
|
186
|
+
const agent = new AgentBudget({
|
|
187
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
188
|
+
limits: { maxCostUSD: 0.25, maxSteps: 20 },
|
|
189
|
+
executor: async (request) => {
|
|
190
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
191
|
+
method: 'POST',
|
|
192
|
+
headers: {
|
|
193
|
+
'x-api-key': process.env.ANTHROPIC_API_KEY!,
|
|
194
|
+
'anthropic-version': '2023-06-01',
|
|
195
|
+
'content-type': 'application/json',
|
|
196
|
+
},
|
|
197
|
+
body: JSON.stringify({
|
|
198
|
+
model: request.model,
|
|
199
|
+
messages: request.messages,
|
|
200
|
+
max_tokens: 1024,
|
|
201
|
+
}),
|
|
202
|
+
});
|
|
203
|
+
const data = await response.json();
|
|
204
|
+
return {
|
|
205
|
+
model: data.model,
|
|
206
|
+
usage: {
|
|
207
|
+
prompt_tokens: data.usage?.input_tokens ?? 0,
|
|
208
|
+
completion_tokens: data.usage?.output_tokens ?? 0,
|
|
209
|
+
total_tokens: (data.usage?.input_tokens ?? 0) + (data.usage?.output_tokens ?? 0),
|
|
210
|
+
},
|
|
211
|
+
choices: [{
|
|
212
|
+
message: { role: 'assistant', content: data.content?.[0]?.text ?? '' },
|
|
213
|
+
finish_reason: 'stop',
|
|
214
|
+
}],
|
|
215
|
+
};
|
|
216
|
+
},
|
|
217
|
+
});
|
|
218
|
+
```
|
|
170
219
|
|
|
171
|
-
|
|
220
|
+
---
|
|
172
221
|
|
|
173
|
-
|
|
174
|
-
|--------|------|---------|-------------|
|
|
175
|
-
| `apiKey` | `string` | -- | Your provider API key |
|
|
176
|
-
| `limits` | `object` | -- | Budget limits (cost, tokens, steps, wall time) |
|
|
177
|
-
| `executor` | `function` | -- | Custom API executor (replaces built-in fetch) |
|
|
178
|
-
| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL |
|
|
179
|
-
| `defaultHeaders` | `object` | -- | Extra HTTP headers |
|
|
180
|
-
| `autoCompress` | `object` | -- | Auto-compress messages at token threshold |
|
|
181
|
-
| `circuitBreaker` | `object` | -- | Detect repetition/stagnation loops |
|
|
182
|
-
| `adaptiveRouting` | `object` | -- | Downgrade model tiers as budget depletes |
|
|
183
|
-
| `checkpoint` | `object` | -- | Persist and resume agent state |
|
|
184
|
-
| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when budget exceeded |
|
|
185
|
-
| `onEvent` | `function` | -- | Global event listener |
|
|
186
|
-
| `warningThreshold` | `number` | `0.75` | Fraction of limit that triggers warning |
|
|
187
|
-
| `pricingCacheTTLMs` | `number` | `300000` | Pricing cache TTL in ms |
|
|
188
|
-
| `telemetry` | `object` | -- | Enable OpenTelemetry spans |
|
|
222
|
+
## LangGraph budget control
|
|
189
223
|
|
|
190
|
-
|
|
224
|
+
Add budget limits to LangGraph agent graphs. Prevent infinite loops and control cost per execution.
|
|
225
|
+
|
|
226
|
+
```ts
|
|
227
|
+
import { AgentBudget, BudgetError } from 'budget-agent';
|
|
228
|
+
|
|
229
|
+
const agent = new AgentBudget({
|
|
230
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
231
|
+
limits: { maxCostUSD: 0.20, maxSteps: 50 },
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// Use inside a LangGraph node
|
|
235
|
+
async function agentNode(state) {
|
|
236
|
+
const response = await agent.step({
|
|
237
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
238
|
+
messages: state.messages,
|
|
239
|
+
});
|
|
240
|
+
return { messages: [...state.messages, response.choices[0].message] };
|
|
241
|
+
}
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## LangChain cost monitoring
|
|
191
247
|
|
|
192
|
-
|
|
248
|
+
Track costs for LangChain chains and agents. Set token limits and spending caps.
|
|
193
249
|
|
|
194
250
|
```ts
|
|
251
|
+
import { AgentBudget } from 'budget-agent';
|
|
252
|
+
|
|
253
|
+
const agent = new AgentBudget({
|
|
254
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
255
|
+
limits: { maxCostUSD: 0.15, maxTotalTokens: 100_000 },
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Wrap any LangChain call
|
|
195
259
|
const response = await agent.step({
|
|
196
|
-
model: '
|
|
197
|
-
messages: [{ role: 'user', content:
|
|
198
|
-
stream: true, // optional -- emit step:token events
|
|
260
|
+
model: 'openai/gpt-4o',
|
|
261
|
+
messages: [{ role: 'user', content: prompt }],
|
|
199
262
|
});
|
|
200
263
|
```
|
|
201
264
|
|
|
202
|
-
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## OpenRouter spend caps
|
|
203
268
|
|
|
204
|
-
|
|
269
|
+
budget-agent fetches live pricing from OpenRouter. No hardcoded price tables. If OpenRouter adds a model, it works automatically.
|
|
205
270
|
|
|
206
271
|
```ts
|
|
207
|
-
{
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
272
|
+
const agent = new AgentBudget({
|
|
273
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
274
|
+
limits: { maxCostUSD: 0.10 },
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
// Pricing is fetched and cached automatically
|
|
278
|
+
const response = await agent.step({
|
|
279
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
280
|
+
messages: [{ role: 'user', content: 'Hello' }],
|
|
281
|
+
});
|
|
215
282
|
```
|
|
216
283
|
|
|
217
|
-
|
|
284
|
+
---
|
|
218
285
|
|
|
219
|
-
|
|
286
|
+
## Ollama agent limits
|
|
220
287
|
|
|
221
|
-
|
|
288
|
+
Set budget limits for local Ollama models. Track token usage even for self-hosted inference.
|
|
289
|
+
|
|
290
|
+
```ts
|
|
291
|
+
const agent = new AgentBudget({
|
|
292
|
+
apiKey: 'ollama',
|
|
293
|
+
limits: { maxSteps: 100, maxWallTimeMs: 60_000 },
|
|
294
|
+
baseUrl: 'http://localhost:11434/v1',
|
|
295
|
+
executor: async (request) => {
|
|
296
|
+
const res = await fetch('http://localhost:11434/api/chat', {
|
|
297
|
+
method: 'POST',
|
|
298
|
+
body: JSON.stringify({ model: request.model, messages: request.messages }),
|
|
299
|
+
});
|
|
300
|
+
const data = await res.json();
|
|
301
|
+
return {
|
|
302
|
+
model: data.model,
|
|
303
|
+
usage: data.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
304
|
+
choices: data.messages?.map((m) => ({
|
|
305
|
+
message: { role: m.role, content: m.content },
|
|
306
|
+
finish_reason: 'stop',
|
|
307
|
+
})) ?? [],
|
|
308
|
+
};
|
|
309
|
+
},
|
|
310
|
+
});
|
|
311
|
+
```
|
|
222
312
|
|
|
223
|
-
|
|
313
|
+
---
|
|
224
314
|
|
|
225
|
-
|
|
315
|
+
## CrewAI budget enforcement
|
|
226
316
|
|
|
227
|
-
|
|
317
|
+
Add cost limits to CrewAI agent crews. Prevent multi-agent systems from running up bills.
|
|
228
318
|
|
|
229
|
-
|
|
319
|
+
```ts
|
|
320
|
+
import { AgentBudget } from 'budget-agent';
|
|
230
321
|
|
|
231
|
-
|
|
322
|
+
const agent = new AgentBudget({
|
|
323
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
324
|
+
limits: { maxCostUSD: 1.00, maxSteps: 100 },
|
|
325
|
+
});
|
|
232
326
|
|
|
233
|
-
|
|
327
|
+
// Use in CrewAI task execution
|
|
328
|
+
const response = await agent.step({
|
|
329
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
330
|
+
messages: [{ role: 'user', content: taskDescription }],
|
|
331
|
+
});
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
---
|
|
234
335
|
|
|
235
|
-
|
|
336
|
+
## Mastra agent limits
|
|
236
337
|
|
|
237
|
-
|
|
338
|
+
Set budget limits for Mastra agents. Track cost and tokens across agent workflows.
|
|
339
|
+
|
|
340
|
+
```ts
|
|
341
|
+
import { AgentBudget } from 'budget-agent';
|
|
342
|
+
|
|
343
|
+
const agent = new AgentBudget({
|
|
344
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
345
|
+
limits: { maxCostUSD: 0.50, maxSteps: 30 },
|
|
346
|
+
});
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
---
|
|
350
|
+
|
|
351
|
+
## AutoGen cost control
|
|
352
|
+
|
|
353
|
+
Add budget limits to AutoGen multi-agent conversations. Prevent agent loops from exceeding budget.
|
|
354
|
+
|
|
355
|
+
```ts
|
|
356
|
+
import { AgentBudget } from 'budget-agent';
|
|
357
|
+
|
|
358
|
+
const agent = new AgentBudget({
|
|
359
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
360
|
+
limits: { maxCostUSD: 0.25, maxSteps: 20 },
|
|
361
|
+
});
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
---
|
|
365
|
+
|
|
366
|
+
## LLM observability
|
|
367
|
+
|
|
368
|
+
Subscribe to lifecycle events for full visibility into agent behavior.
|
|
238
369
|
|
|
239
370
|
```ts
|
|
240
371
|
agent.on('step:start', (e) => console.log('Step', e.stepIndex, 'started'));
|
|
241
372
|
agent.on('step:token', (e) => process.stdout.write(e.token));
|
|
242
|
-
agent.on('step:end', (e) => console.log(
|
|
373
|
+
agent.on('step:end', (e) => console.log(`Step cost: $${e.costUSD}`));
|
|
243
374
|
agent.on('budget:exceeded', (e) => console.log('Limit hit:', e.exceeded.reason));
|
|
244
|
-
agent.on('
|
|
375
|
+
agent.on('budget:warning', (e) => console.log(`Warning: ${e.pctConsumed * 100}% consumed`));
|
|
376
|
+
agent.on('model:downgraded', (e) => console.log(`Downgraded: ${e.from} → ${e.to}`));
|
|
245
377
|
```
|
|
246
378
|
|
|
247
|
-
|
|
379
|
+
---
|
|
248
380
|
|
|
249
|
-
|
|
250
|
-
|
|
381
|
+
## Adaptive model routing
|
|
382
|
+
|
|
383
|
+
Downgrade to cheaper models as budget depletes. Automatic fallback chains.
|
|
384
|
+
|
|
385
|
+
```ts
|
|
386
|
+
const agent = new AgentBudget({
|
|
387
|
+
apiKey: key,
|
|
388
|
+
limits: { maxCostUSD: 5.00 },
|
|
389
|
+
adaptiveRouting: {
|
|
390
|
+
fallbackChain: [
|
|
391
|
+
'anthropic/claude-opus-4.8-fast',
|
|
392
|
+
'openai/gpt-4o',
|
|
393
|
+
'openrouter/free',
|
|
394
|
+
],
|
|
395
|
+
thresholds: [0.4, 0.75],
|
|
396
|
+
},
|
|
397
|
+
});
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
---
|
|
401
|
+
|
|
402
|
+
## Circuit breaker
|
|
403
|
+
|
|
404
|
+
Detect repetition or stagnation and halt the agent before it burns through credits.
|
|
405
|
+
|
|
406
|
+
```ts
|
|
407
|
+
const agent = new AgentBudget({
|
|
408
|
+
apiKey: key,
|
|
409
|
+
limits: { maxCostUSD: 1.00 },
|
|
410
|
+
circuitBreaker: {
|
|
411
|
+
repetitionWindow: 3,
|
|
412
|
+
repetitionThreshold: 0.85,
|
|
413
|
+
stagnationWindow: 4,
|
|
414
|
+
stagnationMinLength: 50,
|
|
415
|
+
},
|
|
416
|
+
});
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
---
|
|
420
|
+
|
|
421
|
+
## Auto-compress messages
|
|
422
|
+
|
|
423
|
+
Truncate message history with an LLM summary when token count exceeds a threshold.
|
|
424
|
+
|
|
425
|
+
```ts
|
|
426
|
+
const agent = new AgentBudget({
|
|
427
|
+
apiKey: key,
|
|
428
|
+
limits: { maxTotalTokens: 100_000 },
|
|
429
|
+
autoCompress: {
|
|
430
|
+
thresholdTokens: 80_000,
|
|
431
|
+
keepLastN: 4,
|
|
432
|
+
},
|
|
433
|
+
});
|
|
251
434
|
```
|
|
252
435
|
|
|
436
|
+
---
|
|
437
|
+
|
|
438
|
+
## Checkpoints
|
|
439
|
+
|
|
440
|
+
Save and resume agent state across restarts.
|
|
441
|
+
|
|
442
|
+
```ts
|
|
443
|
+
const agent = new AgentBudget({
|
|
444
|
+
apiKey: key,
|
|
445
|
+
limits: { maxCostUSD: 0.50 },
|
|
446
|
+
checkpoint: { enabled: true, path: './agent-state.json' },
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
// Resume later
|
|
450
|
+
const resumed = await AgentBudget.resume(options);
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
---
|
|
454
|
+
|
|
455
|
+
## Warning thresholds
|
|
456
|
+
|
|
457
|
+
Get notified before hitting limits.
|
|
458
|
+
|
|
459
|
+
```ts
|
|
460
|
+
const agent = new AgentBudget({
|
|
461
|
+
limits: { maxCostUSD: 0.10 },
|
|
462
|
+
warningThreshold: 0.5,
|
|
463
|
+
});
|
|
464
|
+
|
|
465
|
+
agent.on('budget:warning', (e) => {
|
|
466
|
+
console.log(`${e.pctConsumed * 100}% of ${e.reason} budget consumed`);
|
|
467
|
+
});
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
---
|
|
471
|
+
|
|
472
|
+
## budget-agent vs LangSmith
|
|
473
|
+
|
|
474
|
+
LangSmith is an observability platform. budget-agent is a runtime enforcement layer. LangSmith shows you what happened. budget-agent stops it from happening.
|
|
475
|
+
|
|
476
|
+
| | budget-agent | LangSmith |
|
|
477
|
+
|---|---|---|
|
|
478
|
+
| Runtime enforcement | Yes | No |
|
|
479
|
+
| Pre-flight cost estimation | Yes | No |
|
|
480
|
+
| Budget limits | Hard stops | Soft alerts |
|
|
481
|
+
| Pricing | Free, self-hosted | Paid SaaS |
|
|
482
|
+
| Provider lock-in | None | LangChain ecosystem |
|
|
483
|
+
|
|
484
|
+
---
|
|
485
|
+
|
|
486
|
+
## budget-agent vs Helicone
|
|
487
|
+
|
|
488
|
+
Helicone is a proxy for LLM cost tracking. budget-agent is an SDK that enforces limits at runtime. Helicone tracks after the fact. budget-agent blocks before spend happens.
|
|
489
|
+
|
|
490
|
+
| | budget-agent | Helicone |
|
|
491
|
+
|---|---|---|
|
|
492
|
+
| Runtime enforcement | Yes | No |
|
|
493
|
+
| Pre-flight checks | Yes | No |
|
|
494
|
+
| Self-hosted | Yes | Cloud only |
|
|
495
|
+
| Free tier | Yes | Limited |
|
|
496
|
+
|
|
497
|
+
---
|
|
498
|
+
|
|
499
|
+
## budget-agent vs Langfuse
|
|
500
|
+
|
|
501
|
+
Langfuse is an LLM observability tool. budget-agent is a budget enforcement SDK. Langfuse gives you dashboards. budget-agent gives you hard limits.
|
|
502
|
+
|
|
503
|
+
| | budget-agent | Langfuse |
|
|
504
|
+
|---|---|---|
|
|
505
|
+
| Runtime enforcement | Yes | No |
|
|
506
|
+
| Pre-flight cost estimation | Yes | No |
|
|
507
|
+
| Budget limits | Hard stops | Observability only |
|
|
508
|
+
| Self-hosted | Yes | Yes |
|
|
509
|
+
| Free | Yes | Yes (self-hosted) |
|
|
510
|
+
|
|
511
|
+
---
|
|
512
|
+
|
|
513
|
+
## budget-agent vs OpenAI Usage Dashboard
|
|
514
|
+
|
|
515
|
+
OpenAI's dashboard shows usage after the fact. budget-agent prevents overspend in real time.
|
|
516
|
+
|
|
517
|
+
| | budget-agent | OpenAI Dashboard |
|
|
518
|
+
|---|---|---|
|
|
519
|
+
| Real-time enforcement | Yes | No |
|
|
520
|
+
| Pre-flight checks | Yes | No |
|
|
521
|
+
| Multi-provider | Yes | OpenAI only |
|
|
522
|
+
| Agent loop protection | Yes | No |
|
|
523
|
+
|
|
524
|
+
---
|
|
525
|
+
|
|
526
|
+
## API
|
|
527
|
+
|
|
528
|
+
### `new AgentBudget(options)`
|
|
529
|
+
|
|
530
|
+
| Option | Type | Default | Description |
|
|
531
|
+
|--------|------|---------|-------------|
|
|
532
|
+
| `apiKey` | `string` | required | Your provider API key |
|
|
533
|
+
| `limits` | `object` | required | Budget limits (cost, tokens, steps, wall time) |
|
|
534
|
+
| `executor` | `function` | -- | Custom API executor |
|
|
535
|
+
| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL |
|
|
536
|
+
| `defaultHeaders` | `object` | -- | Extra HTTP headers |
|
|
537
|
+
| `autoCompress` | `object` | -- | Auto-compress messages at token threshold |
|
|
538
|
+
| `circuitBreaker` | `object` | -- | Detect repetition/stagnation |
|
|
539
|
+
| `adaptiveRouting` | `object` | -- | Downgrade models as budget depletes |
|
|
540
|
+
| `checkpoint` | `object` | -- | Persist and resume agent state |
|
|
541
|
+
| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when limit hit |
|
|
542
|
+
| `onEvent` | `function` | -- | Global event listener |
|
|
543
|
+
| `warningThreshold` | `number` | `0.75` | Warning at this fraction of any limit |
|
|
544
|
+
| `pricingCacheTTLMs` | `number` | `300000` | Pricing cache TTL |
|
|
545
|
+
| `telemetry` | `object` | -- | Enable OpenTelemetry spans |
|
|
546
|
+
|
|
547
|
+
### `agent.step(request)`
|
|
548
|
+
|
|
549
|
+
One LLM call. Checks limits before and after. Throws `BudgetError` on exceed.
|
|
550
|
+
|
|
551
|
+
### `agent.getUsage()`
|
|
552
|
+
|
|
553
|
+
Returns: `steps`, `totalInputTokens`, `totalOutputTokens`, `totalCostUSD`, `elapsedMs`, `stepHistory`.
|
|
554
|
+
|
|
555
|
+
### `agent.reset()`
|
|
556
|
+
|
|
557
|
+
Reset all counters.
|
|
558
|
+
|
|
559
|
+
### `agent.refreshPricing()`
|
|
560
|
+
|
|
561
|
+
Force re-fetch model prices.
|
|
562
|
+
|
|
563
|
+
### `agent.summary()`
|
|
564
|
+
|
|
565
|
+
Formatted usage table in console.
|
|
566
|
+
|
|
567
|
+
### `agent.loadCheckpoint()` / `agent.clearCheckpoint()`
|
|
568
|
+
|
|
569
|
+
Load or clear persisted state.
|
|
570
|
+
|
|
571
|
+
### `AgentBudget.resume(options, checkpointPath?)`
|
|
572
|
+
|
|
573
|
+
Create a new agent pre-loaded with checkpoint state.
|
|
574
|
+
|
|
253
575
|
## License
|
|
254
576
|
|
|
255
577
|
MIT
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "budget-agent",
|
|
3
|
-
"version": "0.4.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.4.8",
|
|
4
|
+
"description": "Track AI agent costs, tokens, runtime and spending. Prevent runaway OpenAI, Anthropic, LangGraph and OpenRouter agents from exceeding budget.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
@@ -29,31 +29,41 @@
|
|
|
29
29
|
"test:legacy": "tsx test-integration.ts"
|
|
30
30
|
},
|
|
31
31
|
"keywords": [
|
|
32
|
-
"
|
|
32
|
+
"ai",
|
|
33
33
|
"agent",
|
|
34
|
-
"
|
|
35
|
-
"cost-control",
|
|
36
|
-
"token-limit",
|
|
37
|
-
"openrouter",
|
|
34
|
+
"llm",
|
|
38
35
|
"openai",
|
|
39
36
|
"anthropic",
|
|
40
|
-
"
|
|
37
|
+
"langgraph",
|
|
38
|
+
"langchain",
|
|
39
|
+
"openrouter",
|
|
40
|
+
"ollama",
|
|
41
|
+
"crewai",
|
|
42
|
+
"mastra",
|
|
43
|
+
"autogen",
|
|
44
|
+
"cost-tracking",
|
|
45
|
+
"budget",
|
|
46
|
+
"token-tracking",
|
|
41
47
|
"agent-budget",
|
|
48
|
+
"ai-cost",
|
|
49
|
+
"ai-observability",
|
|
50
|
+
"agent-monitoring",
|
|
51
|
+
"guardrails",
|
|
52
|
+
"runtime-limits",
|
|
53
|
+
"token-limits",
|
|
42
54
|
"spending-limit",
|
|
43
|
-
"
|
|
55
|
+
"cost-control",
|
|
56
|
+
"llm-cost",
|
|
57
|
+
"agent-guardrails",
|
|
58
|
+
"runaway-agent",
|
|
59
|
+
"budget-enforcement",
|
|
44
60
|
"circuit-breaker",
|
|
45
61
|
"checkpoint",
|
|
46
|
-
"token-tracker",
|
|
47
|
-
"cost-tracker",
|
|
48
|
-
"llm-agent",
|
|
49
|
-
"ai-agent",
|
|
50
|
-
"prompt-cost",
|
|
51
|
-
"usage-tracking",
|
|
52
|
-
"budget-enforcement",
|
|
53
|
-
"ollama",
|
|
54
62
|
"gpt-4",
|
|
55
63
|
"claude",
|
|
56
|
-
"
|
|
64
|
+
"langsmith",
|
|
65
|
+
"langfuse",
|
|
66
|
+
"helicone"
|
|
57
67
|
],
|
|
58
68
|
"license": "MIT",
|
|
59
69
|
"repository": {
|