budget-agent 0.4.4 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +70 -136
  2. package/dist/index.js +12 -3
  3. package/package.json +38 -6
package/README.md CHANGED
@@ -1,15 +1,17 @@
1
- # agent-budget
1
+ # budget-agent
2
2
 
3
- Budget-aware enforcement layer for LLM agents. Track token, cost, and step usage in real time. Enforce limits before and after every LLM call. Works with any provider.
3
+ Control LLM agent costs with real-time token, cost, and step tracking. Set budget limits, enforce spend caps, and prevent runaway agents from burning through your API credits.
4
4
 
5
- ```
5
+ **Works with OpenAI, Anthropic, OpenRouter, Ollama, Together AI, Fireworks, and any OpenAI-compatible endpoint.**
6
+
7
+ ## Install
8
+
9
+ ```bash
6
10
  npm install budget-agent
7
11
  ```
8
12
 
9
13
  ## Quick start
10
14
 
11
- You bring your own API key and model. The SDK calls your provider.
12
-
13
15
  ```ts
14
16
  import { AgentBudget } from 'budget-agent';
15
17
 
@@ -27,48 +29,38 @@ console.log(agent.getUsage());
27
29
  // { steps: 1, totalCostUSD: 0.000015, totalInputTokens: 12, ... }
28
30
  ```
29
31
 
30
- ## How it works
32
+ ## Why use this
31
33
 
32
- You provide the **model**, the **messages**, and your **API key**. The SDK:
34
+ LLM API calls cost money. Agent loops multiply that cost across every step. Without guardrails, a single runaway agent can burn through your credits in seconds.
33
35
 
34
- 1. Checks budget before the call (pre-flight)
35
- 2. Makes the API request to your provider
36
- 3. Tracks tokens, cost, and duration
37
- 4. Checks budget after the call (post-step)
38
- 5. Emits events for streaming, warnings, and overages
36
+ This SDK sits between your agent and the LLM provider. It tracks every call, checks your budget before each one, and stops the agent when it hits a limit. No provider is bundled. No model is defaulted. You bring everything.
39
37
 
40
- No provider is bundled. No model is defaulted. You bring everything.
38
+ ## Budget limits
41
39
 
42
- ## Limits
43
-
44
- Budget guardrails that stop your agent before it spends too much:
40
+ Set limits on cost, tokens, steps, and wall time. Every limit is optional.
45
41
 
46
42
  ```ts
47
- limits: {
48
- maxCostUSD: 0.05, // total USD before the agent aborts
49
- maxSteps: 10, // total LLM calls before abort
50
- maxInputTokens: 50000, // total input tokens sent to models
51
- maxOutputTokens: 10000, // total output tokens received
52
- maxTotalTokens: 60000, // input + output combined
53
- maxWallTimeMs: 60000, // 60 seconds wall clock
54
- }
43
+ const agent = new AgentBudget({
44
+ apiKey: key,
45
+ limits: {
46
+ maxCostUSD: 0.05, // total USD before abort
47
+ maxSteps: 10, // total LLM calls before abort
48
+ maxInputTokens: 50000, // total input tokens
49
+ maxOutputTokens: 10000, // total output tokens
50
+ maxTotalTokens: 60000, // input + output combined
51
+ maxWallTimeMs: 60000, // 60 seconds wall clock
52
+ },
53
+ });
55
54
  ```
56
55
 
57
- Every limit is optional. Omit what you don't want to enforce.
58
-
59
56
  ### How enforcement works
60
57
 
61
58
  Each `step()` runs two checks:
62
59
 
63
- 1. **Pre-flight** before the API call. Estimates output cost (default 512 tokens) and catches over-budget calls before burning money.
64
- 2. **Post-step** after recording the real token/cost. If exceeded, the step is **rolled back** from the tracker so you can retry without a stale balance.
60
+ 1. **Pre-flight** -- before the API call. Estimates output cost and catches over-budget calls before spending money.
61
+ 2. **Post-step** -- after recording real token/cost data. If a limit is exceeded, the step is rolled back from the tracker so you can retry without a stale balance.
65
62
 
66
63
  ```ts
67
- const agent = new AgentBudget({
68
- apiKey: key,
69
- limits: { maxCostUSD: 0.01, maxSteps: 3 },
70
- });
71
-
72
64
  try {
73
65
  await agent.step({ model, messages });
74
66
  } catch (err) {
@@ -85,23 +77,15 @@ const agent = new AgentBudget({
85
77
  apiKey: key,
86
78
  limits: { maxCostUSD: 0.01 },
87
79
  onExceeded: (usage) => {
88
- // Log, alert, switch models — never throws
89
80
  console.log(`Over budget: $${usage.totalCostUSD}`);
81
+ // Log, alert, switch models -- never throws
90
82
  },
91
83
  });
92
84
  ```
93
85
 
94
- ### Tune pre-flight estimation
95
-
96
- ```ts
97
- limits: {
98
- maxCostUSD: 0.05,
99
- preflightCheck: false, // skip pre-flight entirely
100
- preflightOutputTokenEstimate: 2048, // safety buffer (default 512)
101
- }
102
- ```
86
+ ### Warning thresholds
103
87
 
104
- ### Warning thresholds (non-blocking)
88
+ Get notified before hitting limits:
105
89
 
106
90
  ```ts
107
91
  const agent = new AgentBudget({
@@ -114,7 +98,9 @@ agent.on('budget:warning', (e) => {
114
98
  });
115
99
  ```
116
100
 
117
- ### Combine with adaptive routing
101
+ ## Adaptive model routing
102
+
103
+ Downgrade to cheaper models as budget depletes:
118
104
 
119
105
  ```ts
120
106
  const agent = new AgentBudget({
@@ -122,20 +108,18 @@ const agent = new AgentBudget({
122
108
  limits: { maxCostUSD: 5.00 },
123
109
  adaptiveRouting: {
124
110
  fallbackChain: [
125
- 'anthropic/claude-opus-4.8-fast', // $15/M tokens — best model
126
- 'openai/gpt-4o', // $5/M tokens
127
- 'openrouter/free', // $0 — emergency
111
+ 'anthropic/claude-opus-4.8-fast', // best model
112
+ 'openai/gpt-4o', // mid-tier
113
+ 'openrouter/free', // emergency fallback
128
114
  ],
129
- thresholds: [0.4, 0.75], // downgrade at 40% and 75% of budget consumed
115
+ thresholds: [0.4, 0.75], // downgrade at 40% and 75% budget consumed
130
116
  },
131
117
  });
132
118
  ```
133
119
 
134
- The router downgrades the model tier as the budget depletes. Each `step()` checks the current consumption against the thresholds and selects the appropriate model from the chain before the API call.
135
-
136
120
  ## Bring your own executor
137
121
 
138
- Use any LLM provider OpenAI, Anthropic, Ollama, local models, or the OpenRouter Agent SDK:
122
+ Use any LLM provider with a custom executor:
139
123
 
140
124
  ```ts
141
125
  import { AgentBudget } from 'budget-agent';
@@ -165,66 +149,22 @@ const agent = new AgentBudget({
165
149
  };
166
150
  },
167
151
  });
168
-
169
- const response = await agent.step({
170
- model: 'anthropic/claude-opus-4.8-fast',
171
- messages: [{ role: 'user', content: 'Hello' }],
172
- });
173
- ```
174
-
175
- Or use raw fetch to any API:
176
-
177
- ```ts
178
- const agent = new AgentBudget({
179
- apiKey: 'none',
180
- limits: { maxCostUSD: 0.05 },
181
- executor: async (request) => {
182
- const res = await fetch('http://localhost:11434/api/chat', {
183
- method: 'POST',
184
- body: JSON.stringify({ model: request.model, messages: request.messages }),
185
- });
186
- const data = await res.json();
187
- return {
188
- model: data.model,
189
- usage: data.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
190
- choices: data.messages?.map((m: any) => ({
191
- message: { role: m.role, content: m.content },
192
- finish_reason: 'stop',
193
- })) ?? [],
194
- };
195
- },
196
- });
197
152
  ```
198
153
 
199
- ## Built-in OpenRouter support
200
-
201
- By default, the SDK calls OpenRouter's API. Configure the endpoint and headers:
202
-
203
- ```ts
204
- const agent = new AgentBudget({
205
- apiKey: process.env.OPENROUTER_API_KEY,
206
- baseUrl: 'https://openrouter.ai/api/v1', // default — change for any OpenAI-compatible API
207
- siteUrl: 'https://mysite.com', // OpenRouter attribution
208
- appTitle: 'My App', // OpenRouter attribution
209
- defaultHeaders: { 'X-Custom': 'value' }, // extra headers for every request
210
- limits: { maxCostUSD: 0.10 },
211
- });
212
- ```
213
-
214
- Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fireworks, LocalAI, Ollama (with compat layer), etc.
154
+ Works with OpenAI, Anthropic, Ollama, Together AI, Fireworks, LocalAI, or any OpenAI-compatible API.
215
155
 
216
156
  ## Features
217
157
 
218
- - **Budget enforcement** set limits on cost, tokens, steps, wall time. Checked pre-flight and post-step.
219
- - **Auto-compress** truncate message history with an LLM summary when token count exceeds a threshold.
220
- - **Circuit breaker** detect repetition or stagnation and halt the agent.
221
- - **Adaptive routing** downgrade to cheaper models as budget depletes.
222
- - **Checkpoints** save and resume agent state across restarts.
223
- - **Events** subscribe to lifecycle events (`step:start`, `step:end`, `step:token`, `budget:exceeded`, etc.).
224
- - **Pricing cache** model pricing fetched from OpenRouter with configurable TTL (or use `setModelPricing()` for any model).
225
- - **Rate-limit retry** automatic 429 retry with exponential backoff (3 attempts).
226
- - **Streaming** set `stream: true` and listen for `step:token` events.
227
- - **OpenTelemetry** optional spans via `telemetry: { enabled: true }` (requires `@opentelemetry/api`).
158
+ - **Budget enforcement** -- cost, tokens, steps, wall time limits checked before and after every LLM call
159
+ - **Adaptive routing** -- automatic model downgrade as budget depletes
160
+ - **Circuit breaker** -- detect repetition or stagnation and halt the agent
161
+ - **Auto-compress** -- truncate message history with LLM summary when tokens exceed threshold
162
+ - **Checkpoints** -- save and resume agent state across restarts
163
+ - **Streaming** -- set `stream: true` and listen for `step:token` events
164
+ - **Events** -- subscribe to `step:start`, `step:end`, `step:token`, `budget:exceeded`, and more
165
+ - **Pricing cache** -- model pricing fetched from OpenRouter with configurable TTL
166
+ - **Rate-limit retry** -- automatic 429 retry with exponential backoff
167
+ - **OpenTelemetry** -- optional tracing spans via `telemetry: { enabled: true }`
228
168
 
229
169
  ## API
230
170
 
@@ -232,21 +172,20 @@ Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fire
232
172
 
233
173
  | Option | Type | Default | Description |
234
174
  |--------|------|---------|-------------|
235
- | `apiKey` | `string` | | Your provider API key |
236
- | `limits.*` | `object` | | Budget limits (cost, tokens, steps, wall time) |
237
- | `executor` | `AgentExecutor` | | Custom API executor (replaces built-in fetch) |
238
- | `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL for built-in fetch |
239
- | `defaultHeaders` | `object` | | Extra HTTP headers for built-in fetch |
240
- | `autoCompress` | `object` | | Auto-compress messages at token threshold |
241
- | `circuitBreaker` | `object` | | Detect repetition/stagnation loops |
242
- | `adaptiveRouting` | `object` | | Downgrade model tiers as budget depletes |
243
- | `checkpoint` | `object` | | Persist and resume agent state |
175
+ | `apiKey` | `string` | -- | Your provider API key |
176
+ | `limits` | `object` | -- | Budget limits (cost, tokens, steps, wall time) |
177
+ | `executor` | `function` | -- | Custom API executor (replaces built-in fetch) |
178
+ | `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL |
179
+ | `defaultHeaders` | `object` | -- | Extra HTTP headers |
180
+ | `autoCompress` | `object` | -- | Auto-compress messages at token threshold |
181
+ | `circuitBreaker` | `object` | -- | Detect repetition/stagnation loops |
182
+ | `adaptiveRouting` | `object` | -- | Downgrade model tiers as budget depletes |
183
+ | `checkpoint` | `object` | -- | Persist and resume agent state |
244
184
  | `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when budget exceeded |
245
- | `onEvent` | `function` | | Global event listener |
246
- | `pricingCacheTTLMs` | `number` | `300_000` | Pricing cache TTL |
247
- | `siteUrl` | `string` | | OpenRouter HTTP-Referer |
248
- | `appTitle` | `string` | | OpenRouter X-OpenRouter-Title |
249
- | `telemetry` | `object` | — | Enable OpenTelemetry spans |
185
+ | `onEvent` | `function` | -- | Global event listener |
186
+ | `warningThreshold` | `number` | `0.75` | Fraction of limit that triggers warning |
187
+ | `pricingCacheTTLMs` | `number` | `300000` | Pricing cache TTL in ms |
188
+ | `telemetry` | `object` | -- | Enable OpenTelemetry spans |
250
189
 
251
190
  ### `agent.step(request)`
252
191
 
@@ -254,17 +193,15 @@ Make one LLM call. Checks limits before and after. Throws `BudgetError` if excee
254
193
 
255
194
  ```ts
256
195
  const response = await agent.step({
257
- model: 'anthropic/claude-opus-4.8-fast', // any model slug
196
+ model: 'anthropic/claude-opus-4.8-fast',
258
197
  messages: [{ role: 'user', content: 'Hi' }],
259
- stream: true, // optional emit step:token events
198
+ stream: true, // optional -- emit step:token events
260
199
  });
261
200
  ```
262
201
 
263
- **Budget enforcement with rollback.** When a step exceeds budget, the step is recorded for circuit-breaker analysis, then rolled back before throwing. The tracker stays clean for retry. The actual spend is available in the `BudgetError`.
264
-
265
202
  ### `agent.getUsage()`
266
203
 
267
- Returns a snapshot of current usage:
204
+ Returns current usage snapshot:
268
205
 
269
206
  ```ts
270
207
  {
@@ -279,7 +216,7 @@ Returns a snapshot of current usage:
279
216
 
280
217
  ### `agent.summary()`
281
218
 
282
- Prints a formatted table to console and returns the same usage snapshot.
219
+ Prints a formatted table to console and returns the usage snapshot.
283
220
 
284
221
  ### `agent.reset()`
285
222
 
@@ -300,22 +237,19 @@ Static factory. Creates a new agent pre-loaded with checkpoint state.
300
237
  ## Events
301
238
 
302
239
  ```ts
303
- agent.on('step:start', (event) => console.log('Step', event.stepIndex, 'started'));
304
- agent.on('step:token', (event) => process.stdout.write(event.token));
305
- agent.on('step:end', (event) => console.log('Step cost:', event.costUSD));
306
- agent.on('budget:exceeded', (event) => console.log('Limit hit:', event.exceeded.reason));
307
- agent.on('compress:triggered', (event) => console.log('Compressed:', event.messagesBefore, '→', event.messagesAfter));
308
- agent.on('model:downgraded', (event) => console.log('Downgraded to', event.to));
240
+ agent.on('step:start', (e) => console.log('Step', e.stepIndex, 'started'));
241
+ agent.on('step:token', (e) => process.stdout.write(e.token));
242
+ agent.on('step:end', (e) => console.log('Step cost:', e.costUSD));
243
+ agent.on('budget:exceeded', (e) => console.log('Limit hit:', e.exceeded.reason));
244
+ agent.on('model:downgraded', (e) => console.log('Downgraded to', e.to));
309
245
  ```
310
246
 
311
247
  ## Testing
312
248
 
313
- ```
249
+ ```bash
314
250
  npm test
315
251
  ```
316
252
 
317
- Runs 10 real-API tests against OpenRouter with simulated pricing.
318
-
319
253
  ## License
320
254
 
321
255
  MIT
package/dist/index.js CHANGED
@@ -519,9 +519,18 @@ export class AgentBudget {
519
519
  const response = request.stream === true
520
520
  ? await this._readStream(res, request.model, stepIndex, Date.now(), pricing)
521
521
  : (await res.json());
522
- // OpenRouter may return HTTP 200 with an error inside choices[0].
523
- // This happens when the provider rejects the request (insufficient
524
- // credits, guardrail, provider outage, etc.).
522
+ // OpenRouter may return HTTP 200 with an error body.
523
+ // Check top-level error first (rate limit, auth, etc.).
524
+ const bodyAny = response;
525
+ if (bodyAny.error) {
526
+ const code = bodyAny.error.code ?? 500;
527
+ const msg = bodyAny.error.message ?? 'Unknown error';
528
+ if (code === 429) {
529
+ throw new RateLimitError(429, 0, msg);
530
+ }
531
+ throw new UpstreamError(code, msg);
532
+ }
533
+ // Also check choices[0].error (provider-level rejection).
525
534
  const choiceError = response.choices?.[0]?.error;
526
535
  if (choiceError) {
527
536
  throw new UpstreamError(choiceError.code, choiceError.message, choiceError.metadata);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "budget-agent",
3
- "version": "0.4.4",
4
- "description": "Provider-agnostic budget enforcement SDK for LLM agents. Track token/cost/step usage, enforce limits, auto-compress, circuit-breaker, checkpoints, adaptive routing, and more.",
3
+ "version": "0.4.6",
4
+ "description": "Control LLM agent costs with real-time token, cost, and step tracking. Set budget limits, enforce spend caps, and prevent runaway agents. Works with OpenAI, Anthropic, OpenRouter, Ollama, and any provider.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "types": "./dist/index.d.ts",
@@ -18,19 +18,51 @@
18
18
  "engines": {
19
19
  "node": ">=18"
20
20
  },
21
+ "scripts": {
22
+ "build": "tsc",
23
+ "prepublishOnly": "npm run build",
24
+ "typecheck": "tsc --noEmit",
25
+ "test": "tsx test/run.ts",
26
+ "test:unit": "tsx test/run.ts --unit",
27
+ "test:integration": "tsx test/run.ts --integration",
28
+ "test:gauntlet": "tsx test-gauntlet.ts",
29
+ "test:legacy": "tsx test-integration.ts"
30
+ },
21
31
  "keywords": [
32
+ "llm",
22
33
  "agent",
23
34
  "budget",
24
- "token",
25
- "cost",
26
- "llm",
35
+ "cost-control",
36
+ "token-limit",
37
+ "openrouter",
38
+ "openai",
39
+ "anthropic",
40
+ "llm-cost",
41
+ "agent-budget",
42
+ "spending-limit",
43
+ "rate-limit",
27
44
  "circuit-breaker",
28
45
  "checkpoint",
29
- "rate-limit"
46
+ "token-tracker",
47
+ "cost-tracker",
48
+ "llm-agent",
49
+ "ai-agent",
50
+ "prompt-cost",
51
+ "usage-tracking",
52
+ "budget-enforcement",
53
+ "ollama",
54
+ "gpt-4",
55
+ "claude",
56
+ "llm-proxy"
30
57
  ],
31
58
  "license": "MIT",
32
59
  "repository": {
33
60
  "type": "git",
34
61
  "url": "https://github.com/duggal1/agent-budget.git"
62
+ },
63
+ "devDependencies": {
64
+ "dotenv": "^17.4.2",
65
+ "tsx": "^4.19.0",
66
+ "typescript": "^5.4.0"
35
67
  }
36
68
  }