budget-agent 0.4.4 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -136
- package/dist/index.js +12 -3
- package/package.json +38 -6
package/README.md
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
# agent
|
|
1
|
+
# budget-agent
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Control LLM agent costs with real-time token, cost, and step tracking. Set budget limits, enforce spend caps, and prevent runaway agents from burning through your API credits.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
**Works with OpenAI, Anthropic, OpenRouter, Ollama, Together AI, Fireworks, and any OpenAI-compatible endpoint.**
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
6
10
|
npm install budget-agent
|
|
7
11
|
```
|
|
8
12
|
|
|
9
13
|
## Quick start
|
|
10
14
|
|
|
11
|
-
You bring your own API key and model. The SDK calls your provider.
|
|
12
|
-
|
|
13
15
|
```ts
|
|
14
16
|
import { AgentBudget } from 'budget-agent';
|
|
15
17
|
|
|
@@ -27,48 +29,38 @@ console.log(agent.getUsage());
|
|
|
27
29
|
// { steps: 1, totalCostUSD: 0.000015, totalInputTokens: 12, ... }
|
|
28
30
|
```
|
|
29
31
|
|
|
30
|
-
##
|
|
32
|
+
## Why use this
|
|
31
33
|
|
|
32
|
-
|
|
34
|
+
LLM API calls cost money. Agent loops multiply that cost across every step. Without guardrails, a single runaway agent can burn through your credits in seconds.
|
|
33
35
|
|
|
34
|
-
|
|
35
|
-
2. Makes the API request to your provider
|
|
36
|
-
3. Tracks tokens, cost, and duration
|
|
37
|
-
4. Checks budget after the call (post-step)
|
|
38
|
-
5. Emits events for streaming, warnings, and overages
|
|
36
|
+
This SDK sits between your agent and the LLM provider. It tracks every call, checks your budget before each one, and stops the agent when it hits a limit. No provider is bundled. No model is defaulted. You bring everything.
|
|
39
37
|
|
|
40
|
-
|
|
38
|
+
## Budget limits
|
|
41
39
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
Budget guardrails that stop your agent before it spends too much:
|
|
40
|
+
Set limits on cost, tokens, steps, and wall time. Every limit is optional.
|
|
45
41
|
|
|
46
42
|
```ts
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
43
|
+
const agent = new AgentBudget({
|
|
44
|
+
apiKey: key,
|
|
45
|
+
limits: {
|
|
46
|
+
maxCostUSD: 0.05, // total USD before abort
|
|
47
|
+
maxSteps: 10, // total LLM calls before abort
|
|
48
|
+
maxInputTokens: 50000, // total input tokens
|
|
49
|
+
maxOutputTokens: 10000, // total output tokens
|
|
50
|
+
maxTotalTokens: 60000, // input + output combined
|
|
51
|
+
maxWallTimeMs: 60000, // 60 seconds wall clock
|
|
52
|
+
},
|
|
53
|
+
});
|
|
55
54
|
```
|
|
56
55
|
|
|
57
|
-
Every limit is optional. Omit what you don't want to enforce.
|
|
58
|
-
|
|
59
56
|
### How enforcement works
|
|
60
57
|
|
|
61
58
|
Each `step()` runs two checks:
|
|
62
59
|
|
|
63
|
-
1. **Pre-flight**
|
|
64
|
-
2. **Post-step**
|
|
60
|
+
1. **Pre-flight** -- before the API call. Estimates output cost and catches over-budget calls before spending money.
|
|
61
|
+
2. **Post-step** -- after recording real token/cost data. If a limit is exceeded, the step is rolled back from the tracker so you can retry without a stale balance.
|
|
65
62
|
|
|
66
63
|
```ts
|
|
67
|
-
const agent = new AgentBudget({
|
|
68
|
-
apiKey: key,
|
|
69
|
-
limits: { maxCostUSD: 0.01, maxSteps: 3 },
|
|
70
|
-
});
|
|
71
|
-
|
|
72
64
|
try {
|
|
73
65
|
await agent.step({ model, messages });
|
|
74
66
|
} catch (err) {
|
|
@@ -85,23 +77,15 @@ const agent = new AgentBudget({
|
|
|
85
77
|
apiKey: key,
|
|
86
78
|
limits: { maxCostUSD: 0.01 },
|
|
87
79
|
onExceeded: (usage) => {
|
|
88
|
-
// Log, alert, switch models — never throws
|
|
89
80
|
console.log(`Over budget: $${usage.totalCostUSD}`);
|
|
81
|
+
// Log, alert, switch models -- never throws
|
|
90
82
|
},
|
|
91
83
|
});
|
|
92
84
|
```
|
|
93
85
|
|
|
94
|
-
###
|
|
95
|
-
|
|
96
|
-
```ts
|
|
97
|
-
limits: {
|
|
98
|
-
maxCostUSD: 0.05,
|
|
99
|
-
preflightCheck: false, // skip pre-flight entirely
|
|
100
|
-
preflightOutputTokenEstimate: 2048, // safety buffer (default 512)
|
|
101
|
-
}
|
|
102
|
-
```
|
|
86
|
+
### Warning thresholds
|
|
103
87
|
|
|
104
|
-
|
|
88
|
+
Get notified before hitting limits:
|
|
105
89
|
|
|
106
90
|
```ts
|
|
107
91
|
const agent = new AgentBudget({
|
|
@@ -114,7 +98,9 @@ agent.on('budget:warning', (e) => {
|
|
|
114
98
|
});
|
|
115
99
|
```
|
|
116
100
|
|
|
117
|
-
|
|
101
|
+
## Adaptive model routing
|
|
102
|
+
|
|
103
|
+
Downgrade to cheaper models as budget depletes:
|
|
118
104
|
|
|
119
105
|
```ts
|
|
120
106
|
const agent = new AgentBudget({
|
|
@@ -122,20 +108,18 @@ const agent = new AgentBudget({
|
|
|
122
108
|
limits: { maxCostUSD: 5.00 },
|
|
123
109
|
adaptiveRouting: {
|
|
124
110
|
fallbackChain: [
|
|
125
|
-
'anthropic/claude-opus-4.8-fast', //
|
|
126
|
-
'openai/gpt-4o', //
|
|
127
|
-
'openrouter/free', //
|
|
111
|
+
'anthropic/claude-opus-4.8-fast', // best model
|
|
112
|
+
'openai/gpt-4o', // mid-tier
|
|
113
|
+
'openrouter/free', // emergency fallback
|
|
128
114
|
],
|
|
129
|
-
thresholds: [0.4, 0.75], // downgrade at 40% and 75%
|
|
115
|
+
thresholds: [0.4, 0.75], // downgrade at 40% and 75% budget consumed
|
|
130
116
|
},
|
|
131
117
|
});
|
|
132
118
|
```
|
|
133
119
|
|
|
134
|
-
The router downgrades the model tier as the budget depletes. Each `step()` checks the current consumption against the thresholds and selects the appropriate model from the chain before the API call.
|
|
135
|
-
|
|
136
120
|
## Bring your own executor
|
|
137
121
|
|
|
138
|
-
Use any LLM provider
|
|
122
|
+
Use any LLM provider with a custom executor:
|
|
139
123
|
|
|
140
124
|
```ts
|
|
141
125
|
import { AgentBudget } from 'budget-agent';
|
|
@@ -165,66 +149,22 @@ const agent = new AgentBudget({
|
|
|
165
149
|
};
|
|
166
150
|
},
|
|
167
151
|
});
|
|
168
|
-
|
|
169
|
-
const response = await agent.step({
|
|
170
|
-
model: 'anthropic/claude-opus-4.8-fast',
|
|
171
|
-
messages: [{ role: 'user', content: 'Hello' }],
|
|
172
|
-
});
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
Or use raw fetch to any API:
|
|
176
|
-
|
|
177
|
-
```ts
|
|
178
|
-
const agent = new AgentBudget({
|
|
179
|
-
apiKey: 'none',
|
|
180
|
-
limits: { maxCostUSD: 0.05 },
|
|
181
|
-
executor: async (request) => {
|
|
182
|
-
const res = await fetch('http://localhost:11434/api/chat', {
|
|
183
|
-
method: 'POST',
|
|
184
|
-
body: JSON.stringify({ model: request.model, messages: request.messages }),
|
|
185
|
-
});
|
|
186
|
-
const data = await res.json();
|
|
187
|
-
return {
|
|
188
|
-
model: data.model,
|
|
189
|
-
usage: data.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
190
|
-
choices: data.messages?.map((m: any) => ({
|
|
191
|
-
message: { role: m.role, content: m.content },
|
|
192
|
-
finish_reason: 'stop',
|
|
193
|
-
})) ?? [],
|
|
194
|
-
};
|
|
195
|
-
},
|
|
196
|
-
});
|
|
197
152
|
```
|
|
198
153
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
By default, the SDK calls OpenRouter's API. Configure the endpoint and headers:
|
|
202
|
-
|
|
203
|
-
```ts
|
|
204
|
-
const agent = new AgentBudget({
|
|
205
|
-
apiKey: process.env.OPENROUTER_API_KEY,
|
|
206
|
-
baseUrl: 'https://openrouter.ai/api/v1', // default — change for any OpenAI-compatible API
|
|
207
|
-
siteUrl: 'https://mysite.com', // OpenRouter attribution
|
|
208
|
-
appTitle: 'My App', // OpenRouter attribution
|
|
209
|
-
defaultHeaders: { 'X-Custom': 'value' }, // extra headers for every request
|
|
210
|
-
limits: { maxCostUSD: 0.10 },
|
|
211
|
-
});
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fireworks, LocalAI, Ollama (with compat layer), etc.
|
|
154
|
+
Works with OpenAI, Anthropic, Ollama, Together AI, Fireworks, LocalAI, or any OpenAI-compatible API.
|
|
215
155
|
|
|
216
156
|
## Features
|
|
217
157
|
|
|
218
|
-
- **Budget enforcement**
|
|
219
|
-
- **
|
|
220
|
-
- **Circuit breaker**
|
|
221
|
-
- **
|
|
222
|
-
- **Checkpoints**
|
|
223
|
-
- **
|
|
224
|
-
- **
|
|
225
|
-
- **
|
|
226
|
-
- **
|
|
227
|
-
- **OpenTelemetry**
|
|
158
|
+
- **Budget enforcement** -- cost, tokens, steps, wall time limits checked before and after every LLM call
|
|
159
|
+
- **Adaptive routing** -- automatic model downgrade as budget depletes
|
|
160
|
+
- **Circuit breaker** -- detect repetition or stagnation and halt the agent
|
|
161
|
+
- **Auto-compress** -- truncate message history with LLM summary when tokens exceed threshold
|
|
162
|
+
- **Checkpoints** -- save and resume agent state across restarts
|
|
163
|
+
- **Streaming** -- set `stream: true` and listen for `step:token` events
|
|
164
|
+
- **Events** -- subscribe to `step:start`, `step:end`, `step:token`, `budget:exceeded`, and more
|
|
165
|
+
- **Pricing cache** -- model pricing fetched from OpenRouter with configurable TTL
|
|
166
|
+
- **Rate-limit retry** -- automatic 429 retry with exponential backoff
|
|
167
|
+
- **OpenTelemetry** -- optional tracing spans via `telemetry: { enabled: true }`
|
|
228
168
|
|
|
229
169
|
## API
|
|
230
170
|
|
|
@@ -232,21 +172,20 @@ Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fire
|
|
|
232
172
|
|
|
233
173
|
| Option | Type | Default | Description |
|
|
234
174
|
|--------|------|---------|-------------|
|
|
235
|
-
| `apiKey` | `string` |
|
|
236
|
-
| `limits
|
|
237
|
-
| `executor` | `
|
|
238
|
-
| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL
|
|
239
|
-
| `defaultHeaders` | `object` |
|
|
240
|
-
| `autoCompress` | `object` |
|
|
241
|
-
| `circuitBreaker` | `object` |
|
|
242
|
-
| `adaptiveRouting` | `object` |
|
|
243
|
-
| `checkpoint` | `object` |
|
|
175
|
+
| `apiKey` | `string` | -- | Your provider API key |
|
|
176
|
+
| `limits` | `object` | -- | Budget limits (cost, tokens, steps, wall time) |
|
|
177
|
+
| `executor` | `function` | -- | Custom API executor (replaces built-in fetch) |
|
|
178
|
+
| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL |
|
|
179
|
+
| `defaultHeaders` | `object` | -- | Extra HTTP headers |
|
|
180
|
+
| `autoCompress` | `object` | -- | Auto-compress messages at token threshold |
|
|
181
|
+
| `circuitBreaker` | `object` | -- | Detect repetition/stagnation loops |
|
|
182
|
+
| `adaptiveRouting` | `object` | -- | Downgrade model tiers as budget depletes |
|
|
183
|
+
| `checkpoint` | `object` | -- | Persist and resume agent state |
|
|
244
184
|
| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when budget exceeded |
|
|
245
|
-
| `onEvent` | `function` |
|
|
246
|
-
| `
|
|
247
|
-
| `
|
|
248
|
-
| `
|
|
249
|
-
| `telemetry` | `object` | — | Enable OpenTelemetry spans |
|
|
185
|
+
| `onEvent` | `function` | -- | Global event listener |
|
|
186
|
+
| `warningThreshold` | `number` | `0.75` | Fraction of limit that triggers warning |
|
|
187
|
+
| `pricingCacheTTLMs` | `number` | `300000` | Pricing cache TTL in ms |
|
|
188
|
+
| `telemetry` | `object` | -- | Enable OpenTelemetry spans |
|
|
250
189
|
|
|
251
190
|
### `agent.step(request)`
|
|
252
191
|
|
|
@@ -254,17 +193,15 @@ Make one LLM call. Checks limits before and after. Throws `BudgetError` if excee
|
|
|
254
193
|
|
|
255
194
|
```ts
|
|
256
195
|
const response = await agent.step({
|
|
257
|
-
model: 'anthropic/claude-opus-4.8-fast',
|
|
196
|
+
model: 'anthropic/claude-opus-4.8-fast',
|
|
258
197
|
messages: [{ role: 'user', content: 'Hi' }],
|
|
259
|
-
stream: true,
|
|
198
|
+
stream: true, // optional -- emit step:token events
|
|
260
199
|
});
|
|
261
200
|
```
|
|
262
201
|
|
|
263
|
-
**Budget enforcement with rollback.** When a step exceeds budget, the step is recorded for circuit-breaker analysis, then rolled back before throwing. The tracker stays clean for retry. The actual spend is available in the `BudgetError`.
|
|
264
|
-
|
|
265
202
|
### `agent.getUsage()`
|
|
266
203
|
|
|
267
|
-
Returns
|
|
204
|
+
Returns current usage snapshot:
|
|
268
205
|
|
|
269
206
|
```ts
|
|
270
207
|
{
|
|
@@ -279,7 +216,7 @@ Returns a snapshot of current usage:
|
|
|
279
216
|
|
|
280
217
|
### `agent.summary()`
|
|
281
218
|
|
|
282
|
-
Prints a formatted table to console and returns the
|
|
219
|
+
Prints a formatted table to console and returns the usage snapshot.
|
|
283
220
|
|
|
284
221
|
### `agent.reset()`
|
|
285
222
|
|
|
@@ -300,22 +237,19 @@ Static factory. Creates a new agent pre-loaded with checkpoint state.
|
|
|
300
237
|
## Events
|
|
301
238
|
|
|
302
239
|
```ts
|
|
303
|
-
agent.on('step:start', (
|
|
304
|
-
agent.on('step:token', (
|
|
305
|
-
agent.on('step:end', (
|
|
306
|
-
agent.on('budget:exceeded', (
|
|
307
|
-
agent.on('
|
|
308
|
-
agent.on('model:downgraded', (event) => console.log('Downgraded to', event.to));
|
|
240
|
+
agent.on('step:start', (e) => console.log('Step', e.stepIndex, 'started'));
|
|
241
|
+
agent.on('step:token', (e) => process.stdout.write(e.token));
|
|
242
|
+
agent.on('step:end', (e) => console.log('Step cost:', e.costUSD));
|
|
243
|
+
agent.on('budget:exceeded', (e) => console.log('Limit hit:', e.exceeded.reason));
|
|
244
|
+
agent.on('model:downgraded', (e) => console.log('Downgraded to', e.to));
|
|
309
245
|
```
|
|
310
246
|
|
|
311
247
|
## Testing
|
|
312
248
|
|
|
313
|
-
```
|
|
249
|
+
```bash
|
|
314
250
|
npm test
|
|
315
251
|
```
|
|
316
252
|
|
|
317
|
-
Runs 10 real-API tests against OpenRouter with simulated pricing.
|
|
318
|
-
|
|
319
253
|
## License
|
|
320
254
|
|
|
321
255
|
MIT
|
package/dist/index.js
CHANGED
|
@@ -519,9 +519,18 @@ export class AgentBudget {
|
|
|
519
519
|
const response = request.stream === true
|
|
520
520
|
? await this._readStream(res, request.model, stepIndex, Date.now(), pricing)
|
|
521
521
|
: (await res.json());
|
|
522
|
-
// OpenRouter may return HTTP 200 with an error
|
|
523
|
-
//
|
|
524
|
-
|
|
522
|
+
// OpenRouter may return HTTP 200 with an error body.
|
|
523
|
+
// Check top-level error first (rate limit, auth, etc.).
|
|
524
|
+
const bodyAny = response;
|
|
525
|
+
if (bodyAny.error) {
|
|
526
|
+
const code = bodyAny.error.code ?? 500;
|
|
527
|
+
const msg = bodyAny.error.message ?? 'Unknown error';
|
|
528
|
+
if (code === 429) {
|
|
529
|
+
throw new RateLimitError(429, 0, msg);
|
|
530
|
+
}
|
|
531
|
+
throw new UpstreamError(code, msg);
|
|
532
|
+
}
|
|
533
|
+
// Also check choices[0].error (provider-level rejection).
|
|
525
534
|
const choiceError = response.choices?.[0]?.error;
|
|
526
535
|
if (choiceError) {
|
|
527
536
|
throw new UpstreamError(choiceError.code, choiceError.message, choiceError.metadata);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "budget-agent",
|
|
3
|
-
"version": "0.4.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.4.6",
|
|
4
|
+
"description": "Control LLM agent costs with real-time token, cost, and step tracking. Set budget limits, enforce spend caps, and prevent runaway agents. Works with OpenAI, Anthropic, OpenRouter, Ollama, and any provider.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
@@ -18,19 +18,51 @@
|
|
|
18
18
|
"engines": {
|
|
19
19
|
"node": ">=18"
|
|
20
20
|
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "tsc",
|
|
23
|
+
"prepublishOnly": "npm run build",
|
|
24
|
+
"typecheck": "tsc --noEmit",
|
|
25
|
+
"test": "tsx test/run.ts",
|
|
26
|
+
"test:unit": "tsx test/run.ts --unit",
|
|
27
|
+
"test:integration": "tsx test/run.ts --integration",
|
|
28
|
+
"test:gauntlet": "tsx test-gauntlet.ts",
|
|
29
|
+
"test:legacy": "tsx test-integration.ts"
|
|
30
|
+
},
|
|
21
31
|
"keywords": [
|
|
32
|
+
"llm",
|
|
22
33
|
"agent",
|
|
23
34
|
"budget",
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
35
|
+
"cost-control",
|
|
36
|
+
"token-limit",
|
|
37
|
+
"openrouter",
|
|
38
|
+
"openai",
|
|
39
|
+
"anthropic",
|
|
40
|
+
"llm-cost",
|
|
41
|
+
"agent-budget",
|
|
42
|
+
"spending-limit",
|
|
43
|
+
"rate-limit",
|
|
27
44
|
"circuit-breaker",
|
|
28
45
|
"checkpoint",
|
|
29
|
-
"
|
|
46
|
+
"token-tracker",
|
|
47
|
+
"cost-tracker",
|
|
48
|
+
"llm-agent",
|
|
49
|
+
"ai-agent",
|
|
50
|
+
"prompt-cost",
|
|
51
|
+
"usage-tracking",
|
|
52
|
+
"budget-enforcement",
|
|
53
|
+
"ollama",
|
|
54
|
+
"gpt-4",
|
|
55
|
+
"claude",
|
|
56
|
+
"llm-proxy"
|
|
30
57
|
],
|
|
31
58
|
"license": "MIT",
|
|
32
59
|
"repository": {
|
|
33
60
|
"type": "git",
|
|
34
61
|
"url": "https://github.com/duggal1/agent-budget.git"
|
|
62
|
+
},
|
|
63
|
+
"devDependencies": {
|
|
64
|
+
"dotenv": "^17.4.2",
|
|
65
|
+
"tsx": "^4.19.0",
|
|
66
|
+
"typescript": "^5.4.0"
|
|
35
67
|
}
|
|
36
68
|
}
|