budget-agent 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -137
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# budget-agent
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Stop runaway LLM agents from burning your API credits. Set hard limits on cost, tokens, steps, and wall time. The SDK blocks each call before and after it hits your provider -- so you never overspend.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Works with **OpenAI**, **Anthropic**, **OpenRouter**, **Ollama**, **Together AI**, **Fireworks**, and any OpenAI-compatible API.
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
|
@@ -10,114 +10,63 @@ Control LLM agent costs with real-time token, cost, and step tracking. Set budge
|
|
|
10
10
|
npm install budget-agent
|
|
11
11
|
```
|
|
12
12
|
|
|
13
|
-
##
|
|
13
|
+
## Usage
|
|
14
14
|
|
|
15
15
|
```ts
|
|
16
|
-
import { AgentBudget } from 'budget-agent';
|
|
16
|
+
import { AgentBudget, BudgetError } from 'budget-agent';
|
|
17
17
|
|
|
18
18
|
const agent = new AgentBudget({
|
|
19
19
|
apiKey: process.env.OPENROUTER_API_KEY,
|
|
20
|
-
limits: {
|
|
20
|
+
limits: {
|
|
21
|
+
maxCostUSD: 0.10,
|
|
22
|
+
maxSteps: 15,
|
|
23
|
+
maxTotalTokens: 50_000,
|
|
24
|
+
maxWallTimeMs: 30_000,
|
|
25
|
+
},
|
|
21
26
|
});
|
|
22
27
|
|
|
23
28
|
const response = await agent.step({
|
|
24
|
-
model: 'anthropic/claude-
|
|
29
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
25
30
|
messages: [{ role: 'user', content: 'Hello' }],
|
|
26
31
|
});
|
|
27
32
|
|
|
28
33
|
console.log(agent.getUsage());
|
|
29
|
-
// { steps: 1, totalCostUSD: 0.000015, totalInputTokens: 12, ... }
|
|
30
34
|
```
|
|
31
35
|
|
|
32
|
-
##
|
|
33
|
-
|
|
34
|
-
LLM API calls cost money. Agent loops multiply that cost across every step. Without guardrails, a single runaway agent can burn through your credits in seconds.
|
|
36
|
+
## How it works
|
|
35
37
|
|
|
36
|
-
|
|
38
|
+
Every `step()` call runs two budget checks:
|
|
37
39
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
Set limits on cost, tokens, steps, and wall time. Every limit is optional.
|
|
41
|
-
|
|
42
|
-
```ts
|
|
43
|
-
const agent = new AgentBudget({
|
|
44
|
-
apiKey: key,
|
|
45
|
-
limits: {
|
|
46
|
-
maxCostUSD: 0.05, // total USD before abort
|
|
47
|
-
maxSteps: 10, // total LLM calls before abort
|
|
48
|
-
maxInputTokens: 50000, // total input tokens
|
|
49
|
-
maxOutputTokens: 10000, // total output tokens
|
|
50
|
-
maxTotalTokens: 60000, // input + output combined
|
|
51
|
-
maxWallTimeMs: 60000, // 60 seconds wall clock
|
|
52
|
-
},
|
|
53
|
-
});
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
### How enforcement works
|
|
57
|
-
|
|
58
|
-
Each `step()` runs two checks:
|
|
59
|
-
|
|
60
|
-
1. **Pre-flight** -- before the API call. Estimates output cost and catches over-budget calls before spending money.
|
|
61
|
-
2. **Post-step** -- after recording real token/cost data. If a limit is exceeded, the step is rolled back from the tracker so you can retry without a stale balance.
|
|
40
|
+
1. **Before the API call** -- estimates cost and blocks if you'd go over budget.
|
|
41
|
+
2. **After the API call** -- records actual tokens/cost and blocks if a limit was hit. The step rolls back so you can retry cleanly.
|
|
62
42
|
|
|
63
43
|
```ts
|
|
64
44
|
try {
|
|
65
45
|
await agent.step({ model, messages });
|
|
66
46
|
} catch (err) {
|
|
67
47
|
if (err instanceof BudgetError) {
|
|
68
|
-
console.log(err.exceeded.reason);
|
|
48
|
+
console.log(err.exceeded.reason); // 'cost' | 'steps' | 'totalTokens' | 'wallTime'
|
|
49
|
+
console.log(err.exceeded.usage); // full usage snapshot at cutoff
|
|
69
50
|
}
|
|
70
51
|
}
|
|
71
52
|
```
|
|
72
53
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
```ts
|
|
76
|
-
const agent = new AgentBudget({
|
|
77
|
-
apiKey: key,
|
|
78
|
-
limits: { maxCostUSD: 0.01 },
|
|
79
|
-
onExceeded: (usage) => {
|
|
80
|
-
console.log(`Over budget: $${usage.totalCostUSD}`);
|
|
81
|
-
// Log, alert, switch models -- never throws
|
|
82
|
-
},
|
|
83
|
-
});
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
### Warning thresholds
|
|
87
|
-
|
|
88
|
-
Get notified before hitting limits:
|
|
89
|
-
|
|
90
|
-
```ts
|
|
91
|
-
const agent = new AgentBudget({
|
|
92
|
-
limits: { maxCostUSD: 0.10 },
|
|
93
|
-
warningThreshold: 0.5, // fire 'budget:warning' at 50% consumption
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
agent.on('budget:warning', (e) => {
|
|
97
|
-
// { reason: 'cost', pctConsumed: 0.51, remaining: 0.049 }
|
|
98
|
-
});
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
## Adaptive model routing
|
|
54
|
+
## Limits
|
|
102
55
|
|
|
103
|
-
|
|
56
|
+
Every limit is optional. Set only what you need.
|
|
104
57
|
|
|
105
58
|
```ts
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
],
|
|
115
|
-
thresholds: [0.4, 0.75], // downgrade at 40% and 75% budget consumed
|
|
116
|
-
},
|
|
117
|
-
});
|
|
59
|
+
limits: {
|
|
60
|
+
maxCostUSD: 0.05, // total USD across all steps
|
|
61
|
+
maxSteps: 10, // total LLM calls
|
|
62
|
+
maxInputTokens: 50000, // input tokens only
|
|
63
|
+
maxOutputTokens: 10000, // output tokens only
|
|
64
|
+
maxTotalTokens: 60000, // input + output combined
|
|
65
|
+
maxWallTimeMs: 60000, // wall clock time in ms
|
|
66
|
+
}
|
|
118
67
|
```
|
|
119
68
|
|
|
120
|
-
##
|
|
69
|
+
## Custom executor (any provider)
|
|
121
70
|
|
|
122
71
|
Use any LLM provider with a custom executor:
|
|
123
72
|
|
|
@@ -151,20 +100,23 @@ const agent = new AgentBudget({
|
|
|
151
100
|
});
|
|
152
101
|
```
|
|
153
102
|
|
|
154
|
-
Works with OpenAI, Anthropic, Ollama, Together AI, Fireworks, LocalAI, or any OpenAI-compatible
|
|
103
|
+
Works with OpenAI, Anthropic, Ollama, Together AI, Fireworks, LocalAI, or any OpenAI-compatible endpoint.
|
|
155
104
|
|
|
156
105
|
## Features
|
|
157
106
|
|
|
158
|
-
- **
|
|
159
|
-
- **
|
|
160
|
-
- **
|
|
161
|
-
- **
|
|
107
|
+
- **Cost limits** -- hard stop at a USD ceiling across all steps
|
|
108
|
+
- **Token limits** -- cap input, output, or total tokens
|
|
109
|
+
- **Step limits** -- max number of LLM calls
|
|
110
|
+
- **Wall time limits** -- kill agents that run too long
|
|
111
|
+
- **Pre-flight checks** -- estimate cost before spending money
|
|
112
|
+
- **Rollback on exceed** -- step rolls back so retry stays clean
|
|
113
|
+
- **Adaptive routing** -- auto-downgrade to cheaper models as budget depletes
|
|
114
|
+
- **Circuit breaker** -- detect repetition or stagnation, halt the agent
|
|
115
|
+
- **Auto-compress** -- truncate message history when tokens exceed threshold
|
|
162
116
|
- **Checkpoints** -- save and resume agent state across restarts
|
|
163
|
-
- **Streaming** -- set `stream: true
|
|
164
|
-
- **Events** -- subscribe to `step:start`, `step:end`, `step:token`, `budget:exceeded`, and more
|
|
165
|
-
- **Pricing cache** -- model pricing fetched from OpenRouter with configurable TTL
|
|
117
|
+
- **Streaming** -- set `stream: true`, listen for `step:token` events
|
|
166
118
|
- **Rate-limit retry** -- automatic 429 retry with exponential backoff
|
|
167
|
-
- **OpenTelemetry** -- optional tracing spans
|
|
119
|
+
- **OpenTelemetry** -- optional tracing spans
|
|
168
120
|
|
|
169
121
|
## API
|
|
170
122
|
|
|
@@ -172,82 +124,54 @@ Works with OpenAI, Anthropic, Ollama, Together AI, Fireworks, LocalAI, or any Op
|
|
|
172
124
|
|
|
173
125
|
| Option | Type | Default | Description |
|
|
174
126
|
|--------|------|---------|-------------|
|
|
175
|
-
| `apiKey` | `string` |
|
|
176
|
-
| `limits` | `object` |
|
|
177
|
-
| `executor` | `function` | -- | Custom API executor
|
|
127
|
+
| `apiKey` | `string` | required | Your provider API key |
|
|
128
|
+
| `limits` | `object` | required | Budget limits (cost, tokens, steps, wall time) |
|
|
129
|
+
| `executor` | `function` | -- | Custom API executor |
|
|
178
130
|
| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL |
|
|
179
131
|
| `defaultHeaders` | `object` | -- | Extra HTTP headers |
|
|
180
132
|
| `autoCompress` | `object` | -- | Auto-compress messages at token threshold |
|
|
181
|
-
| `circuitBreaker` | `object` | -- | Detect repetition/stagnation
|
|
182
|
-
| `adaptiveRouting` | `object` | -- | Downgrade
|
|
133
|
+
| `circuitBreaker` | `object` | -- | Detect repetition/stagnation |
|
|
134
|
+
| `adaptiveRouting` | `object` | -- | Downgrade models as budget depletes |
|
|
183
135
|
| `checkpoint` | `object` | -- | Persist and resume agent state |
|
|
184
|
-
| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when
|
|
136
|
+
| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when limit hit |
|
|
185
137
|
| `onEvent` | `function` | -- | Global event listener |
|
|
186
|
-
| `warningThreshold` | `number` | `0.75` |
|
|
187
|
-
| `pricingCacheTTLMs` | `number` | `300000` | Pricing cache TTL
|
|
138
|
+
| `warningThreshold` | `number` | `0.75` | Warning at this fraction of any limit |
|
|
139
|
+
| `pricingCacheTTLMs` | `number` | `300000` | Pricing cache TTL |
|
|
188
140
|
| `telemetry` | `object` | -- | Enable OpenTelemetry spans |
|
|
189
141
|
|
|
190
142
|
### `agent.step(request)`
|
|
191
143
|
|
|
192
|
-
|
|
144
|
+
One LLM call. Checks limits before and after. Throws `BudgetError` on exceed.
|
|
193
145
|
|
|
194
146
|
```ts
|
|
195
147
|
const response = await agent.step({
|
|
196
|
-
model: 'anthropic/claude-
|
|
148
|
+
model: 'anthropic/claude-sonnet-4-5',
|
|
197
149
|
messages: [{ role: 'user', content: 'Hi' }],
|
|
198
|
-
stream: true,
|
|
150
|
+
stream: true,
|
|
199
151
|
});
|
|
200
152
|
```
|
|
201
153
|
|
|
202
154
|
### `agent.getUsage()`
|
|
203
155
|
|
|
204
|
-
Returns current usage
|
|
205
|
-
|
|
206
|
-
```ts
|
|
207
|
-
{
|
|
208
|
-
steps: number;
|
|
209
|
-
totalInputTokens: number;
|
|
210
|
-
totalOutputTokens: number;
|
|
211
|
-
totalCostUSD: number;
|
|
212
|
-
elapsedMs: number;
|
|
213
|
-
stepHistory: StepUsage[];
|
|
214
|
-
}
|
|
215
|
-
```
|
|
216
|
-
|
|
217
|
-
### `agent.summary()`
|
|
218
|
-
|
|
219
|
-
Prints a formatted table to console and returns the usage snapshot.
|
|
156
|
+
Returns current usage: `steps`, `totalInputTokens`, `totalOutputTokens`, `totalCostUSD`, `elapsedMs`, `stepHistory`.
|
|
220
157
|
|
|
221
158
|
### `agent.reset()`
|
|
222
159
|
|
|
223
|
-
Reset all
|
|
224
|
-
|
|
225
|
-
### `agent.compressMessages(messages, keepLastN?)`
|
|
226
|
-
|
|
227
|
-
Manually compress a message array via LLM summary.
|
|
160
|
+
Reset all counters.
|
|
228
161
|
|
|
229
|
-
### `agent.
|
|
162
|
+
### `agent.refreshPricing()`
|
|
230
163
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
### `AgentBudget.resume(options, checkpointPath?)`
|
|
234
|
-
|
|
235
|
-
Static factory. Creates a new agent pre-loaded with checkpoint state.
|
|
164
|
+
Force re-fetch model prices from OpenRouter.
|
|
236
165
|
|
|
237
166
|
## Events
|
|
238
167
|
|
|
239
168
|
```ts
|
|
240
|
-
agent.on('step:start', (e) =>
|
|
241
|
-
agent.on('step:token', (e) =>
|
|
242
|
-
agent.on('step:end', (e) =>
|
|
243
|
-
agent.on('budget:exceeded', (e) =>
|
|
244
|
-
agent.on('
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
## Testing
|
|
248
|
-
|
|
249
|
-
```bash
|
|
250
|
-
npm test
|
|
169
|
+
agent.on('step:start', (e) => {});
|
|
170
|
+
agent.on('step:token', (e) => {});
|
|
171
|
+
agent.on('step:end', (e) => {});
|
|
172
|
+
agent.on('budget:exceeded', (e) => {});
|
|
173
|
+
agent.on('budget:warning', (e) => {});
|
|
174
|
+
agent.on('model:downgraded', (e) => {});
|
|
251
175
|
```
|
|
252
176
|
|
|
253
177
|
## License
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "budget-agent",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.7",
|
|
4
4
|
"description": "Control LLM agent costs with real-time token, cost, and step tracking. Set budget limits, enforce spend caps, and prevent runaway agents. Works with OpenAI, Anthropic, OpenRouter, Ollama, and any provider.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|