budget-agent 0.4.5 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -213
- package/dist/index.js +12 -3
- package/package.json +38 -6
package/README.md
CHANGED
|
@@ -1,141 +1,74 @@
|
|
|
1
1
|
# budget-agent
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Stop runaway LLM agents from burning your API credits. Set hard limits on cost, tokens, steps, and wall time. The SDK blocks each call before and after it hits your provider -- so you never overspend.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Works with **OpenAI**, **Anthropic**, **OpenRouter**, **Ollama**, **Together AI**, **Fireworks**, and any OpenAI-compatible API.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
6
10
|
npm install budget-agent
|
|
7
11
|
```
|
|
8
12
|
|
|
9
|
-
##
|
|
10
|
-
|
|
11
|
-
You bring your own API key and model. The SDK calls your provider.
|
|
13
|
+
## Usage
|
|
12
14
|
|
|
13
15
|
```ts
|
|
14
|
-
import { AgentBudget } from 'budget-agent';
|
|
16
|
+
import { AgentBudget, BudgetError } from 'budget-agent';
|
|
15
17
|
|
|
16
18
|
const agent = new AgentBudget({
|
|
17
19
|
apiKey: process.env.OPENROUTER_API_KEY,
|
|
18
|
-
limits: {
|
|
20
|
+
limits: {
|
|
21
|
+
maxCostUSD: 0.10,
|
|
22
|
+
maxSteps: 15,
|
|
23
|
+
maxTotalTokens: 50_000,
|
|
24
|
+
maxWallTimeMs: 30_000,
|
|
25
|
+
},
|
|
19
26
|
});
|
|
20
27
|
|
|
21
28
|
const response = await agent.step({
|
|
22
|
-
model: 'anthropic/claude-
|
|
29
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
23
30
|
messages: [{ role: 'user', content: 'Hello' }],
|
|
24
31
|
});
|
|
25
32
|
|
|
26
33
|
console.log(agent.getUsage());
|
|
27
|
-
// { steps: 1, totalCostUSD: 0.000015, totalInputTokens: 12, ... }
|
|
28
34
|
```
|
|
29
35
|
|
|
30
36
|
## How it works
|
|
31
37
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
1. Checks budget before the call (pre-flight)
|
|
35
|
-
2. Makes the API request to your provider
|
|
36
|
-
3. Tracks tokens, cost, and duration
|
|
37
|
-
4. Checks budget after the call (post-step)
|
|
38
|
-
5. Emits events for streaming, warnings, and overages
|
|
39
|
-
|
|
40
|
-
No provider is bundled. No model is defaulted. You bring everything.
|
|
41
|
-
|
|
42
|
-
## Limits
|
|
43
|
-
|
|
44
|
-
Budget guardrails that stop your agent before it spends too much:
|
|
38
|
+
Every `step()` call runs two budget checks:
|
|
45
39
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
maxCostUSD: 0.05, // total USD before the agent aborts
|
|
49
|
-
maxSteps: 10, // total LLM calls before abort
|
|
50
|
-
maxInputTokens: 50000, // total input tokens sent to models
|
|
51
|
-
maxOutputTokens: 10000, // total output tokens received
|
|
52
|
-
maxTotalTokens: 60000, // input + output combined
|
|
53
|
-
maxWallTimeMs: 60000, // 60 seconds wall clock
|
|
54
|
-
}
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
Every limit is optional. Omit what you don't want to enforce.
|
|
58
|
-
|
|
59
|
-
### How enforcement works
|
|
60
|
-
|
|
61
|
-
Each `step()` runs two checks:
|
|
62
|
-
|
|
63
|
-
1. **Pre-flight** — before the API call. Estimates output cost (default 512 tokens) and catches over-budget calls before burning money.
|
|
64
|
-
2. **Post-step** — after recording the real token/cost. If exceeded, the step is **rolled back** from the tracker so you can retry without a stale balance.
|
|
40
|
+
1. **Before the API call** -- estimates cost and blocks if you'd go over budget.
|
|
41
|
+
2. **After the API call** -- records actual tokens/cost and blocks if a limit was hit. The step rolls back so you can retry cleanly.
|
|
65
42
|
|
|
66
43
|
```ts
|
|
67
|
-
const agent = new AgentBudget({
|
|
68
|
-
apiKey: key,
|
|
69
|
-
limits: { maxCostUSD: 0.01, maxSteps: 3 },
|
|
70
|
-
});
|
|
71
|
-
|
|
72
44
|
try {
|
|
73
45
|
await agent.step({ model, messages });
|
|
74
46
|
} catch (err) {
|
|
75
47
|
if (err instanceof BudgetError) {
|
|
76
|
-
console.log(err.exceeded.reason);
|
|
48
|
+
console.log(err.exceeded.reason); // 'cost' | 'steps' | 'totalTokens' | 'wallTime'
|
|
49
|
+
console.log(err.exceeded.usage); // full usage snapshot at cutoff
|
|
77
50
|
}
|
|
78
51
|
}
|
|
79
52
|
```
|
|
80
53
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
```ts
|
|
84
|
-
const agent = new AgentBudget({
|
|
85
|
-
apiKey: key,
|
|
86
|
-
limits: { maxCostUSD: 0.01 },
|
|
87
|
-
onExceeded: (usage) => {
|
|
88
|
-
// Log, alert, switch models — never throws
|
|
89
|
-
console.log(`Over budget: $${usage.totalCostUSD}`);
|
|
90
|
-
},
|
|
91
|
-
});
|
|
92
|
-
```
|
|
54
|
+
## Limits
|
|
93
55
|
|
|
94
|
-
|
|
56
|
+
Every limit is optional. Set only what you need.
|
|
95
57
|
|
|
96
58
|
```ts
|
|
97
59
|
limits: {
|
|
98
|
-
maxCostUSD:
|
|
99
|
-
|
|
100
|
-
|
|
60
|
+
maxCostUSD: 0.05, // total USD across all steps
|
|
61
|
+
maxSteps: 10, // total LLM calls
|
|
62
|
+
maxInputTokens: 50000, // input tokens only
|
|
63
|
+
maxOutputTokens: 10000, // output tokens only
|
|
64
|
+
maxTotalTokens: 60000, // input + output combined
|
|
65
|
+
maxWallTimeMs: 60000, // wall clock time in ms
|
|
101
66
|
}
|
|
102
67
|
```
|
|
103
68
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
```ts
|
|
107
|
-
const agent = new AgentBudget({
|
|
108
|
-
limits: { maxCostUSD: 0.10 },
|
|
109
|
-
warningThreshold: 0.5, // fire 'budget:warning' at 50% consumption
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
agent.on('budget:warning', (e) => {
|
|
113
|
-
// { reason: 'cost', pctConsumed: 0.51, remaining: 0.049 }
|
|
114
|
-
});
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
### Combine with adaptive routing
|
|
118
|
-
|
|
119
|
-
```ts
|
|
120
|
-
const agent = new AgentBudget({
|
|
121
|
-
apiKey: key,
|
|
122
|
-
limits: { maxCostUSD: 5.00 },
|
|
123
|
-
adaptiveRouting: {
|
|
124
|
-
fallbackChain: [
|
|
125
|
-
'anthropic/claude-opus-4.8-fast', // $15/M tokens — best model
|
|
126
|
-
'openai/gpt-4o', // $5/M tokens
|
|
127
|
-
'openrouter/free', // $0 — emergency
|
|
128
|
-
],
|
|
129
|
-
thresholds: [0.4, 0.75], // downgrade at 40% and 75% of budget consumed
|
|
130
|
-
},
|
|
131
|
-
});
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
The router downgrades the model tier as the budget depletes. Each `step()` checks the current consumption against the thresholds and selects the appropriate model from the chain before the API call.
|
|
135
|
-
|
|
136
|
-
## Bring your own executor
|
|
69
|
+
## Custom executor (any provider)
|
|
137
70
|
|
|
138
|
-
Use any LLM provider
|
|
71
|
+
Use any LLM provider with a custom executor:
|
|
139
72
|
|
|
140
73
|
```ts
|
|
141
74
|
import { AgentBudget } from 'budget-agent';
|
|
@@ -165,66 +98,25 @@ const agent = new AgentBudget({
|
|
|
165
98
|
};
|
|
166
99
|
},
|
|
167
100
|
});
|
|
168
|
-
|
|
169
|
-
const response = await agent.step({
|
|
170
|
-
model: 'anthropic/claude-opus-4.8-fast',
|
|
171
|
-
messages: [{ role: 'user', content: 'Hello' }],
|
|
172
|
-
});
|
|
173
|
-
```
|
|
174
|
-
|
|
175
|
-
Or use raw fetch to any API:
|
|
176
|
-
|
|
177
|
-
```ts
|
|
178
|
-
const agent = new AgentBudget({
|
|
179
|
-
apiKey: 'none',
|
|
180
|
-
limits: { maxCostUSD: 0.05 },
|
|
181
|
-
executor: async (request) => {
|
|
182
|
-
const res = await fetch('http://localhost:11434/api/chat', {
|
|
183
|
-
method: 'POST',
|
|
184
|
-
body: JSON.stringify({ model: request.model, messages: request.messages }),
|
|
185
|
-
});
|
|
186
|
-
const data = await res.json();
|
|
187
|
-
return {
|
|
188
|
-
model: data.model,
|
|
189
|
-
usage: data.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
|
|
190
|
-
choices: data.messages?.map((m: any) => ({
|
|
191
|
-
message: { role: m.role, content: m.content },
|
|
192
|
-
finish_reason: 'stop',
|
|
193
|
-
})) ?? [],
|
|
194
|
-
};
|
|
195
|
-
},
|
|
196
|
-
});
|
|
197
101
|
```
|
|
198
102
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
By default, the SDK calls OpenRouter's API. Configure the endpoint and headers:
|
|
202
|
-
|
|
203
|
-
```ts
|
|
204
|
-
const agent = new AgentBudget({
|
|
205
|
-
apiKey: process.env.OPENROUTER_API_KEY,
|
|
206
|
-
baseUrl: 'https://openrouter.ai/api/v1', // default — change for any OpenAI-compatible API
|
|
207
|
-
siteUrl: 'https://mysite.com', // OpenRouter attribution
|
|
208
|
-
appTitle: 'My App', // OpenRouter attribution
|
|
209
|
-
defaultHeaders: { 'X-Custom': 'value' }, // extra headers for every request
|
|
210
|
-
limits: { maxCostUSD: 0.10 },
|
|
211
|
-
});
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fireworks, LocalAI, Ollama (with compat layer), etc.
|
|
103
|
+
Works with OpenAI, Anthropic, Ollama, Together AI, Fireworks, LocalAI, or any OpenAI-compatible endpoint.
|
|
215
104
|
|
|
216
105
|
## Features
|
|
217
106
|
|
|
218
|
-
- **
|
|
219
|
-
- **
|
|
220
|
-
- **
|
|
221
|
-
- **
|
|
222
|
-
- **
|
|
223
|
-
- **
|
|
224
|
-
- **
|
|
225
|
-
- **
|
|
226
|
-
- **
|
|
227
|
-
- **
|
|
107
|
+
- **Cost limits** -- hard stop at a USD ceiling across all steps
|
|
108
|
+
- **Token limits** -- cap input, output, or total tokens
|
|
109
|
+
- **Step limits** -- max number of LLM calls
|
|
110
|
+
- **Wall time limits** -- kill agents that run too long
|
|
111
|
+
- **Pre-flight checks** -- estimate cost before spending money
|
|
112
|
+
- **Rollback on exceed** -- step rolls back so retry stays clean
|
|
113
|
+
- **Adaptive routing** -- auto-downgrade to cheaper models as budget depletes
|
|
114
|
+
- **Circuit breaker** -- detect repetition or stagnation, halt the agent
|
|
115
|
+
- **Auto-compress** -- truncate message history when tokens exceed threshold
|
|
116
|
+
- **Checkpoints** -- save and resume agent state across restarts
|
|
117
|
+
- **Streaming** -- set `stream: true`, listen for `step:token` events
|
|
118
|
+
- **Rate-limit retry** -- automatic 429 retry with exponential backoff
|
|
119
|
+
- **OpenTelemetry** -- optional tracing spans
|
|
228
120
|
|
|
229
121
|
## API
|
|
230
122
|
|
|
@@ -232,90 +124,56 @@ Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fire
|
|
|
232
124
|
|
|
233
125
|
| Option | Type | Default | Description |
|
|
234
126
|
|--------|------|---------|-------------|
|
|
235
|
-
| `apiKey` | `string` |
|
|
236
|
-
| `limits
|
|
237
|
-
| `executor` | `
|
|
238
|
-
| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL
|
|
239
|
-
| `defaultHeaders` | `object` |
|
|
240
|
-
| `autoCompress` | `object` |
|
|
241
|
-
| `circuitBreaker` | `object` |
|
|
242
|
-
| `adaptiveRouting` | `object` |
|
|
243
|
-
| `checkpoint` | `object` |
|
|
244
|
-
| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when
|
|
245
|
-
| `onEvent` | `function` |
|
|
246
|
-
| `
|
|
247
|
-
| `
|
|
248
|
-
| `
|
|
249
|
-
| `telemetry` | `object` | — | Enable OpenTelemetry spans |
|
|
127
|
+
| `apiKey` | `string` | required | Your provider API key |
|
|
128
|
+
| `limits` | `object` | required | Budget limits (cost, tokens, steps, wall time) |
|
|
129
|
+
| `executor` | `function` | -- | Custom API executor |
|
|
130
|
+
| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL |
|
|
131
|
+
| `defaultHeaders` | `object` | -- | Extra HTTP headers |
|
|
132
|
+
| `autoCompress` | `object` | -- | Auto-compress messages at token threshold |
|
|
133
|
+
| `circuitBreaker` | `object` | -- | Detect repetition/stagnation |
|
|
134
|
+
| `adaptiveRouting` | `object` | -- | Downgrade models as budget depletes |
|
|
135
|
+
| `checkpoint` | `object` | -- | Persist and resume agent state |
|
|
136
|
+
| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when limit hit |
|
|
137
|
+
| `onEvent` | `function` | -- | Global event listener |
|
|
138
|
+
| `warningThreshold` | `number` | `0.75` | Warning at this fraction of any limit |
|
|
139
|
+
| `pricingCacheTTLMs` | `number` | `300000` | Pricing cache TTL |
|
|
140
|
+
| `telemetry` | `object` | -- | Enable OpenTelemetry spans |
|
|
250
141
|
|
|
251
142
|
### `agent.step(request)`
|
|
252
143
|
|
|
253
|
-
|
|
144
|
+
One LLM call. Checks limits before and after. Throws `BudgetError` on exceed.
|
|
254
145
|
|
|
255
146
|
```ts
|
|
256
147
|
const response = await agent.step({
|
|
257
|
-
model: 'anthropic/claude-
|
|
148
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
258
149
|
messages: [{ role: 'user', content: 'Hi' }],
|
|
259
|
-
stream: true,
|
|
150
|
+
stream: true,
|
|
260
151
|
});
|
|
261
152
|
```
|
|
262
153
|
|
|
263
|
-
**Budget enforcement with rollback.** When a step exceeds budget, the step is recorded for circuit-breaker analysis, then rolled back before throwing. The tracker stays clean for retry. The actual spend is available in the `BudgetError`.
|
|
264
|
-
|
|
265
154
|
### `agent.getUsage()`
|
|
266
155
|
|
|
267
|
-
Returns
|
|
268
|
-
|
|
269
|
-
```ts
|
|
270
|
-
{
|
|
271
|
-
steps: number;
|
|
272
|
-
totalInputTokens: number;
|
|
273
|
-
totalOutputTokens: number;
|
|
274
|
-
totalCostUSD: number;
|
|
275
|
-
elapsedMs: number;
|
|
276
|
-
stepHistory: StepUsage[];
|
|
277
|
-
}
|
|
278
|
-
```
|
|
279
|
-
|
|
280
|
-
### `agent.summary()`
|
|
281
|
-
|
|
282
|
-
Prints a formatted table to console and returns the same usage snapshot.
|
|
156
|
+
Returns current usage: `steps`, `totalInputTokens`, `totalOutputTokens`, `totalCostUSD`, `elapsedMs`, `stepHistory`.
|
|
283
157
|
|
|
284
158
|
### `agent.reset()`
|
|
285
159
|
|
|
286
|
-
Reset all
|
|
287
|
-
|
|
288
|
-
### `agent.compressMessages(messages, keepLastN?)`
|
|
289
|
-
|
|
290
|
-
Manually compress a message array via LLM summary.
|
|
291
|
-
|
|
292
|
-
### `agent.loadCheckpoint()` / `agent.clearCheckpoint()`
|
|
293
|
-
|
|
294
|
-
Load or clear persisted checkpoint state.
|
|
160
|
+
Reset all counters.
|
|
295
161
|
|
|
296
|
-
### `
|
|
162
|
+
### `agent.refreshPricing()`
|
|
297
163
|
|
|
298
|
-
|
|
164
|
+
Force re-fetch model prices from OpenRouter.
|
|
299
165
|
|
|
300
166
|
## Events
|
|
301
167
|
|
|
302
168
|
```ts
|
|
303
|
-
agent.on('step:start', (
|
|
304
|
-
agent.on('step:token', (
|
|
305
|
-
agent.on('step:end', (
|
|
306
|
-
agent.on('budget:exceeded', (
|
|
307
|
-
agent.on('
|
|
308
|
-
agent.on('model:downgraded', (
|
|
169
|
+
agent.on('step:start', (e) => {});
|
|
170
|
+
agent.on('step:token', (e) => {});
|
|
171
|
+
agent.on('step:end', (e) => {});
|
|
172
|
+
agent.on('budget:exceeded', (e) => {});
|
|
173
|
+
agent.on('budget:warning', (e) => {});
|
|
174
|
+
agent.on('model:downgraded', (e) => {});
|
|
309
175
|
```
|
|
310
176
|
|
|
311
|
-
## Testing
|
|
312
|
-
|
|
313
|
-
```
|
|
314
|
-
npm test
|
|
315
|
-
```
|
|
316
|
-
|
|
317
|
-
Runs 10 real-API tests against OpenRouter with simulated pricing.
|
|
318
|
-
|
|
319
177
|
## License
|
|
320
178
|
|
|
321
179
|
MIT
|
package/dist/index.js
CHANGED
|
@@ -519,9 +519,18 @@ export class AgentBudget {
|
|
|
519
519
|
const response = request.stream === true
|
|
520
520
|
? await this._readStream(res, request.model, stepIndex, Date.now(), pricing)
|
|
521
521
|
: (await res.json());
|
|
522
|
-
// OpenRouter may return HTTP 200 with an error
|
|
523
|
-
//
|
|
524
|
-
|
|
522
|
+
// OpenRouter may return HTTP 200 with an error body.
|
|
523
|
+
// Check top-level error first (rate limit, auth, etc.).
|
|
524
|
+
const bodyAny = response;
|
|
525
|
+
if (bodyAny.error) {
|
|
526
|
+
const code = bodyAny.error.code ?? 500;
|
|
527
|
+
const msg = bodyAny.error.message ?? 'Unknown error';
|
|
528
|
+
if (code === 429) {
|
|
529
|
+
throw new RateLimitError(429, 0, msg);
|
|
530
|
+
}
|
|
531
|
+
throw new UpstreamError(code, msg);
|
|
532
|
+
}
|
|
533
|
+
// Also check choices[0].error (provider-level rejection).
|
|
525
534
|
const choiceError = response.choices?.[0]?.error;
|
|
526
535
|
if (choiceError) {
|
|
527
536
|
throw new UpstreamError(choiceError.code, choiceError.message, choiceError.metadata);
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "budget-agent",
|
|
3
|
-
"version": "0.4.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.4.7",
|
|
4
|
+
"description": "Control LLM agent costs with real-time token, cost, and step tracking. Set budget limits, enforce spend caps, and prevent runaway agents. Works with OpenAI, Anthropic, OpenRouter, Ollama, and any provider.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
7
7
|
"types": "./dist/index.d.ts",
|
|
@@ -18,19 +18,51 @@
|
|
|
18
18
|
"engines": {
|
|
19
19
|
"node": ">=18"
|
|
20
20
|
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "tsc",
|
|
23
|
+
"prepublishOnly": "npm run build",
|
|
24
|
+
"typecheck": "tsc --noEmit",
|
|
25
|
+
"test": "tsx test/run.ts",
|
|
26
|
+
"test:unit": "tsx test/run.ts --unit",
|
|
27
|
+
"test:integration": "tsx test/run.ts --integration",
|
|
28
|
+
"test:gauntlet": "tsx test-gauntlet.ts",
|
|
29
|
+
"test:legacy": "tsx test-integration.ts"
|
|
30
|
+
},
|
|
21
31
|
"keywords": [
|
|
32
|
+
"llm",
|
|
22
33
|
"agent",
|
|
23
34
|
"budget",
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
35
|
+
"cost-control",
|
|
36
|
+
"token-limit",
|
|
37
|
+
"openrouter",
|
|
38
|
+
"openai",
|
|
39
|
+
"anthropic",
|
|
40
|
+
"llm-cost",
|
|
41
|
+
"agent-budget",
|
|
42
|
+
"spending-limit",
|
|
43
|
+
"rate-limit",
|
|
27
44
|
"circuit-breaker",
|
|
28
45
|
"checkpoint",
|
|
29
|
-
"
|
|
46
|
+
"token-tracker",
|
|
47
|
+
"cost-tracker",
|
|
48
|
+
"llm-agent",
|
|
49
|
+
"ai-agent",
|
|
50
|
+
"prompt-cost",
|
|
51
|
+
"usage-tracking",
|
|
52
|
+
"budget-enforcement",
|
|
53
|
+
"ollama",
|
|
54
|
+
"gpt-4",
|
|
55
|
+
"claude",
|
|
56
|
+
"llm-proxy"
|
|
30
57
|
],
|
|
31
58
|
"license": "MIT",
|
|
32
59
|
"repository": {
|
|
33
60
|
"type": "git",
|
|
34
61
|
"url": "https://github.com/duggal1/agent-budget.git"
|
|
62
|
+
},
|
|
63
|
+
"devDependencies": {
|
|
64
|
+
"dotenv": "^17.4.2",
|
|
65
|
+
"tsx": "^4.19.0",
|
|
66
|
+
"typescript": "^5.4.0"
|
|
35
67
|
}
|
|
36
68
|
}
|