npm - budget-agent - Versions diffs - 0.4.3 - Mend

budget-agent 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +321 -0
package/dist/budget.d.ts +27 -0
package/dist/budget.js +63 -0
package/dist/checkpoint.d.ts +10 -0
package/dist/checkpoint.js +35 -0
package/dist/circuit-breaker.d.ts +27 -0
package/dist/circuit-breaker.js +93 -0
package/dist/compressor.d.ts +22 -0
package/dist/compressor.js +126 -0
package/dist/estimator.d.ts +12 -0
package/dist/estimator.js +38 -0
package/dist/events.d.ts +99 -0
package/dist/events.js +56 -0
package/dist/index.d.ts +108 -0
package/dist/index.js +557 -0
package/dist/pricing.d.ts +19 -0
package/dist/pricing.js +81 -0
package/dist/router.d.ts +14 -0
package/dist/router.js +29 -0
package/dist/tracker.d.ts +15 -0
package/dist/tracker.js +53 -0
package/dist/types.d.ts +149 -0
package/dist/types.js +2 -0
package/package.json +36 -0

package/README.md ADDED Viewed

@@ -0,0 +1,321 @@
+# @painitehq/agent-budget
+Budget-aware enforcement layer for LLM agents. Track token, cost, and step usage in real time — enforce limits before and after every LLM call. Works with any provider.
+```
+npm install @painitehq/agent-budget
+```
+## Quick start
+You bring your own API key and model. The SDK calls your provider.
+```ts
+import { AgentBudget } from '@painitehq/agent-budget';
+const agent = new AgentBudget({
+  apiKey: process.env.OPENROUTER_API_KEY,
+  limits: { maxCostUSD: 0.05, maxSteps: 10 },
+});
+const response = await agent.step({
+  model: 'anthropic/claude-opus-4.8-fast',
+  messages: [{ role: 'user', content: 'Hello' }],
+});
+console.log(agent.getUsage());
+// { steps: 1, totalCostUSD: 0.000015, totalInputTokens: 12, ... }
+```
+## How it works
+You provide the **model**, the **messages**, and your **API key**. The SDK:
+1. Checks budget before the call (pre-flight)
+2. Makes the API request to your provider
+3. Tracks tokens, cost, and duration
+4. Checks budget after the call (post-step)
+5. Emits events for streaming, warnings, and overages
+No provider is bundled. No model is defaulted. You bring everything.
+## Limits
+Budget guardrails that stop your agent before it spends too much:
+```ts
+limits: {
+  maxCostUSD:     0.05,   // total USD before the agent aborts
+  maxSteps:       10,     // total LLM calls before abort
+  maxInputTokens: 50000,  // total input tokens sent to models
+  maxOutputTokens: 10000, // total output tokens received
+  maxTotalTokens:  60000, // input + output combined
+  maxWallTimeMs:   60000, // 60 seconds wall clock
+}
+```
+Every limit is optional. Omit what you don't want to enforce.
+### How enforcement works
+Each `step()` runs two checks:
+1. **Pre-flight** — before the API call. Estimates output cost (default 512 tokens) and catches over-budget calls before burning money.
+2. **Post-step** — after recording the real token/cost. If exceeded, the step is **rolled back** from the tracker so you can retry without a stale balance.
+```ts
+const agent = new AgentBudget({
+  apiKey: key,
+  limits: { maxCostUSD: 0.01, maxSteps: 3 },
+});
+try {
+  await agent.step({ model, messages });
+} catch (err) {
+  if (err instanceof BudgetError) {
+    console.log(err.exceeded.reason); // 'cost' | 'steps' | 'wallTime' | ...
+  }
+}
+```
+### Custom callback instead of abort
+```ts
+const agent = new AgentBudget({
+  apiKey: key,
+  limits: { maxCostUSD: 0.01 },
+  onExceeded: (usage) => {
+    // Log, alert, switch models — never throws
+    console.log(`Over budget: $${usage.totalCostUSD}`);
+  },
+});
+```
+### Tune pre-flight estimation
+```ts
+limits: {
+  maxCostUSD: 0.05,
+  preflightCheck: false,              // skip pre-flight entirely
+  preflightOutputTokenEstimate: 2048, // safety buffer (default 512)
+}
+```
+### Warning thresholds (non-blocking)
+```ts
+const agent = new AgentBudget({
+  limits: { maxCostUSD: 0.10 },
+  warningThreshold: 0.5, // fire 'budget:warning' at 50% consumption
+});
+agent.on('budget:warning', (e) => {
+  // { reason: 'cost', pctConsumed: 0.51, remaining: 0.049 }
+});
+```
+### Combine with adaptive routing
+```ts
+const agent = new AgentBudget({
+  apiKey: key,
+  limits: { maxCostUSD: 5.00 },
+  adaptiveRouting: {
+    fallbackChain: [
+      'anthropic/claude-opus-4.8-fast', // $15/M tokens — best model
+      'openai/gpt-4o',                  // $5/M tokens
+      'openrouter/free',                // $0 — emergency
+    ],
+    thresholds: [0.4, 0.75], // downgrade at 40% and 75% of budget consumed
+  },
+});
+```
+The router downgrades the model tier as the budget depletes. Each `step()` checks the current consumption against the thresholds and selects the appropriate model from the chain before the API call.
+## Bring your own executor
+Use any LLM provider — OpenAI, Anthropic, Ollama, local models, or the OpenRouter Agent SDK:
+```ts
+import { AgentBudget } from '@painitehq/agent-budget';
+import OpenAI from 'openai';
+const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
+const agent = new AgentBudget({
+  apiKey: process.env.OPENAI_API_KEY,
+  limits: { maxCostUSD: 0.10 },
+  executor: async (request) => {
+    const completion = await openai.chat.completions.create({
+      model: request.model,
+      messages: request.messages,
+    });
+    return {
+      model: completion.model,
+      usage: {
+        prompt_tokens: completion.usage?.prompt_tokens ?? 0,
+        completion_tokens: completion.usage?.completion_tokens ?? 0,
+        total_tokens: completion.usage?.total_tokens ?? 0,
+      },
+      choices: completion.choices.map(c => ({
+        message: { role: c.message.role, content: c.message.content ?? '' },
+        finish_reason: c.finish_reason ?? 'stop',
+      })),
+    };
+  },
+});
+const response = await agent.step({
+  model: 'anthropic/claude-opus-4.8-fast',
+  messages: [{ role: 'user', content: 'Hello' }],
+});
+```
+Or use raw fetch to any API:
+```ts
+const agent = new AgentBudget({
+  apiKey: 'none',
+  limits: { maxCostUSD: 0.05 },
+  executor: async (request) => {
+    const res = await fetch('http://localhost:11434/api/chat', {
+      method: 'POST',
+      body: JSON.stringify({ model: request.model, messages: request.messages }),
+    });
+    const data = await res.json();
+    return {
+      model: data.model,
+      usage: data.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
+      choices: data.messages?.map((m: any) => ({
+        message: { role: m.role, content: m.content },
+        finish_reason: 'stop',
+      })) ?? [],
+    };
+  },
+});
+```
+## Built-in OpenRouter support
+By default, the SDK calls OpenRouter's API. Configure the endpoint and headers:
+```ts
+const agent = new AgentBudget({
+  apiKey: process.env.OPENROUTER_API_KEY,
+  baseUrl: 'https://openrouter.ai/api/v1',        // default — change for any OpenAI-compatible API
+  siteUrl: 'https://mysite.com',                   // OpenRouter attribution
+  appTitle: 'My App',                              // OpenRouter attribution
+  defaultHeaders: { 'X-Custom': 'value' },         // extra headers for every request
+  limits: { maxCostUSD: 0.10 },
+});
+```
+Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fireworks, LocalAI, Ollama (with compat layer), etc.
+## Features
+- **Budget enforcement** — set limits on cost, tokens, steps, wall time. Checked pre-flight and post-step.
+- **Auto-compress** — truncate message history with an LLM summary when token count exceeds a threshold.
+- **Circuit breaker** — detect repetition or stagnation and halt the agent.
+- **Adaptive routing** — downgrade to cheaper models as budget depletes.
+- **Checkpoints** — save and resume agent state across restarts.
+- **Events** — subscribe to lifecycle events (`step:start`, `step:end`, `step:token`, `budget:exceeded`, etc.).
+- **Pricing cache** — model pricing fetched from OpenRouter with configurable TTL (or use `setModelPricing()` for any model).
+- **Rate-limit retry** — automatic 429 retry with exponential backoff (3 attempts).
+- **Streaming** — set `stream: true` and listen for `step:token` events.
+- **OpenTelemetry** — optional spans via `telemetry: { enabled: true }` (requires `@opentelemetry/api`).
+## API
+### `new AgentBudget(options)`
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `apiKey` | `string` | — | Your provider API key |
+| `limits.*` | `object` | — | Budget limits (cost, tokens, steps, wall time) |
+| `executor` | `AgentExecutor` | — | Custom API executor (replaces built-in fetch) |
+| `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL for built-in fetch |
+| `defaultHeaders` | `object` | — | Extra HTTP headers for built-in fetch |
+| `autoCompress` | `object` | — | Auto-compress messages at token threshold |
+| `circuitBreaker` | `object` | — | Detect repetition/stagnation loops |
+| `adaptiveRouting` | `object` | — | Downgrade model tiers as budget depletes |
+| `checkpoint` | `object` | — | Persist and resume agent state |
+| `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when budget exceeded |
+| `onEvent` | `function` | — | Global event listener |
+| `pricingCacheTTLMs` | `number` | `300_000` | Pricing cache TTL |
+| `siteUrl` | `string` | — | OpenRouter HTTP-Referer |
+| `appTitle` | `string` | — | OpenRouter X-OpenRouter-Title |
+| `telemetry` | `object` | — | Enable OpenTelemetry spans |
+### `agent.step(request)`
+Make one LLM call. Checks limits before and after. Throws `BudgetError` if exceeded.
+```ts
+const response = await agent.step({
+  model: 'anthropic/claude-opus-4.8-fast',            // any model slug
+  messages: [{ role: 'user', content: 'Hi' }],
+  stream: true,                        // optional — emit step:token events
+});
+```
+**Budget enforcement with rollback.** When a step exceeds budget, the step is recorded for circuit-breaker analysis, then rolled back before throwing. The tracker stays clean for retry. The actual spend is available in the `BudgetError`.
+### `agent.getUsage()`
+Returns a snapshot of current usage:
+```ts
+{
+  steps: number;
+  totalInputTokens: number;
+  totalOutputTokens: number;
+  totalCostUSD: number;
+  elapsedMs: number;
+  stepHistory: StepUsage[];
+}
+```
+### `agent.summary()`
+Prints a formatted table to console and returns the same usage snapshot.
+### `agent.reset()`
+Reset all usage counters.
+### `agent.compressMessages(messages, keepLastN?)`
+Manually compress a message array via LLM summary.
+### `agent.loadCheckpoint()` / `agent.clearCheckpoint()`
+Load or clear persisted checkpoint state.
+### `AgentBudget.resume(options, checkpointPath?)`
+Static factory. Creates a new agent pre-loaded with checkpoint state.
+## Events
+```ts
+agent.on('step:start', (event) => console.log('Step', event.stepIndex, 'started'));
+agent.on('step:token', (event) => process.stdout.write(event.token));
+agent.on('step:end', (event) => console.log('Step cost:', event.costUSD));
+agent.on('budget:exceeded', (event) => console.log('Limit hit:', event.exceeded.reason));
+agent.on('compress:triggered', (event) => console.log('Compressed:', event.messagesBefore, '→', event.messagesAfter));
+agent.on('model:downgraded', (event) => console.log('Downgraded to', event.to));
+```
+## Testing
+```
+npm test
+```
+Runs 10 real-API tests against OpenRouter with simulated pricing.
+## License
+MIT

package/dist/budget.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import type { BudgetLimits, BudgetUsage, BudgetExceededError } from './types.js';
+export declare class BudgetError extends Error {
+    readonly exceeded: BudgetExceededError;
+    constructor(exceeded: BudgetExceededError);
+}
+export declare class RateLimitError extends Error {
+    readonly retryAfter: number;
+    readonly statusCode: number;
+    constructor(statusCode: number, retryAfter: number, message: string);
+}
+/**
+ * An error returned by the provider inside the chat completion response.
+ * This happens when OpenRouter returns HTTP 200 but `choices[0].error`
+ * contains a provider-level error (e.g., 402 insufficient credits,
+ * guardrail block, provider outage, etc.).
+ */
+export declare class UpstreamError extends Error {
+    readonly code: number;
+    readonly metadata?: Record<string, unknown>;
+    readonly statusCode: number;
+    constructor(code: number, message: string, metadata?: Record<string, unknown>);
+}
+/**
+ * Returns the first exceeded limit, or null if within budget.
+ * Order of precedence: cost → steps → totalTokens → inputTokens → outputTokens → wallTime
+ */
+export declare function checkLimits(usage: BudgetUsage, limits: BudgetLimits): BudgetExceededError | null;

package/dist/budget.js ADDED Viewed

@@ -0,0 +1,63 @@
+// ─── Error ────────────────────────────────────────────────────────────────────
+export class BudgetError extends Error {
+    exceeded;
+    constructor(exceeded) {
+        const extra = exceeded.reason === 'preflightCostEstimate'
+            ? ` — remaining: $${exceeded.remainingBudget?.toFixed(8)}, estimated: $${exceeded.estimatedCost?.toFixed(8)}`
+            : '';
+        super(`[agent-budget] Limit exceeded — reason: ${exceeded.reason}, ` +
+            `limit: ${exceeded.limit}, actual: ${exceeded.actual.toFixed(6)}${extra}`);
+        this.name = 'BudgetError';
+        this.exceeded = exceeded;
+    }
+}
+export class RateLimitError extends Error {
+    retryAfter;
+    statusCode;
+    constructor(statusCode, retryAfter, message) {
+        super(message);
+        this.name = 'RateLimitError';
+        this.statusCode = statusCode;
+        this.retryAfter = retryAfter;
+    }
+}
+/**
+ * An error returned by the provider inside the chat completion response.
+ * This happens when OpenRouter returns HTTP 200 but `choices[0].error`
+ * contains a provider-level error (e.g., 402 insufficient credits,
+ * guardrail block, provider outage, etc.).
+ */
+export class UpstreamError extends Error {
+    code;
+    metadata;
+    statusCode;
+    constructor(code, message, metadata) {
+        super(message ? `[agent-budget] Provider error ${code}: ${message}` : `[agent-budget] Provider error ${code}`);
+        this.name = 'UpstreamError';
+        this.code = code;
+        this.metadata = metadata;
+        // Map known OpenRouter error codes to HTTP-like status
+        this.statusCode = code;
+    }
+}
+// ─── Checker ─────────────────────────────────────────────────────────────────
+/**
+ * Returns the first exceeded limit, or null if within budget.
+ * Order of precedence: cost → steps → totalTokens → inputTokens → outputTokens → wallTime
+ */
+export function checkLimits(usage, limits) {
+    const checks = [
+        { reason: 'cost', limit: limits.maxCostUSD, actual: usage.totalCostUSD },
+        { reason: 'steps', limit: limits.maxSteps, actual: usage.steps },
+        { reason: 'totalTokens', limit: limits.maxTotalTokens, actual: usage.totalInputTokens + usage.totalOutputTokens },
+        { reason: 'inputTokens', limit: limits.maxInputTokens, actual: usage.totalInputTokens },
+        { reason: 'outputTokens', limit: limits.maxOutputTokens, actual: usage.totalOutputTokens },
+        { reason: 'wallTime', limit: limits.maxWallTimeMs, actual: usage.elapsedMs },
+    ];
+    for (const { reason, limit, actual } of checks) {
+        if (limit !== undefined && actual > limit) {
+            return { reason, limit, actual, usage };
+        }
+    }
+    return null;
+}

package/dist/checkpoint.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import type { BudgetUsage, CheckpointData, OpenRouterMessage } from './types.js';
+export declare class CheckpointManager {
+    private readonly filePath;
+    constructor(options?: {
+        path?: string;
+    });
+    save(messages: OpenRouterMessage[], usage: BudgetUsage, model: string, resumeFromStep: number): Promise<void>;
+    load(): Promise<CheckpointData | null>;
+    clear(): Promise<void>;
+}

package/dist/checkpoint.js ADDED Viewed

@@ -0,0 +1,35 @@
+import { promises as fs } from 'node:fs';
+export class CheckpointManager {
+    filePath;
+    constructor(options) {
+        this.filePath = options?.path ?? './.agent-checkpoint.json';
+    }
+    async save(messages, usage, model, resumeFromStep) {
+        const data = {
+            checkpointVersion: '1.0',
+            messages,
+            usage,
+            model,
+            resumeFromStep,
+            createdAt: new Date().toISOString(),
+        };
+        await fs.writeFile(this.filePath, JSON.stringify(data, null, 2), 'utf-8');
+    }
+    async load() {
+        try {
+            const raw = await fs.readFile(this.filePath, 'utf-8');
+            return JSON.parse(raw);
+        }
+        catch {
+            return null;
+        }
+    }
+    async clear() {
+        try {
+            await fs.unlink(this.filePath);
+        }
+        catch {
+            // File may not exist — ignore
+        }
+    }
+}

package/dist/circuit-breaker.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+import type { BudgetUsage } from './types.js';
+export interface CircuitBreakerConfig {
+    repetitionWindow?: number;
+    repetitionThreshold?: number;
+    stagnationWindow?: number;
+    stagnationMinLength?: number;
+}
+export interface CircuitBreakerTrip {
+    triggerMode: 'repetition' | 'stagnation';
+    windowSize: number;
+    similarity?: number;
+}
+/**
+ * Stateless circuit breaker. Pulls stepHistory from BudgetUsage each time
+ * it is checked. No internal data store across calls.
+ */
+export declare class CircuitBreaker {
+    private readonly cfg;
+    constructor(config?: CircuitBreakerConfig);
+    /**
+     * Analyze recent step history and return a trip if repetition or stagnation
+     * is detected. Returns null if all clear.
+     */
+    check(usage: BudgetUsage): CircuitBreakerTrip | null;
+    private checkRepetition;
+    private checkStagnation;
+}

package/dist/circuit-breaker.js ADDED Viewed

@@ -0,0 +1,93 @@
+// ─── Defaults ────────────────────────────────────────────────────────────────
+const DEFAULTS = {
+    repetitionWindow: 3,
+    repetitionThreshold: 0.85,
+    stagnationWindow: 4,
+    stagnationMinLength: 50,
+};
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+function normalizeText(s) {
+    return s
+        .toLowerCase()
+        .replace(/[^\w\s]/g, '')
+        .replace(/\s+/g, ' ')
+        .trim();
+}
+function jaccardSimilarity(a, b) {
+    const wordsA = new Set(normalizeText(a).split(' '));
+    const wordsB = new Set(normalizeText(b).split(' '));
+    if (wordsA.size === 0 && wordsB.size === 0)
+        return 1;
+    let intersection = 0;
+    for (const w of wordsA) {
+        if (wordsB.has(w))
+            intersection++;
+    }
+    const union = wordsA.size + wordsB.size - intersection;
+    return union === 0 ? 0 : intersection / union;
+}
+// ─── CircuitBreaker ──────────────────────────────────────────────────────────
+/**
+ * Stateless circuit breaker. Pulls stepHistory from BudgetUsage each time
+ * it is checked. No internal data store across calls.
+ */
+export class CircuitBreaker {
+    cfg;
+    constructor(config) {
+        this.cfg = { ...DEFAULTS, ...config };
+    }
+    /**
+     * Analyze recent step history and return a trip if repetition or stagnation
+     * is detected. Returns null if all clear.
+     */
+    check(usage) {
+        return this.checkRepetition(usage) ?? this.checkStagnation(usage);
+    }
+    checkRepetition(usage) {
+        const history = usage.stepHistory;
+        if (history.length < this.cfg.repetitionWindow)
+            return null;
+        // Look at the last N entries for consecutive similarity
+        const recent = history.slice(-this.cfg.repetitionWindow);
+        let checkedPairs = 0;
+        let totalSim = 0;
+        for (let i = 1; i < recent.length; i++) {
+            const prev = recent[i - 1].outputContent;
+            const curr = recent[i].outputContent;
+            if (!prev || !curr)
+                continue;
+            checkedPairs++;
+            const sim = jaccardSimilarity(prev, curr);
+            totalSim += sim;
+            if (sim < this.cfg.repetitionThreshold) {
+                return null;
+            }
+        }
+        // Need at least one valid pair to trip
+        if (checkedPairs === 0)
+            return null;
+        return {
+            triggerMode: 'repetition',
+            windowSize: this.cfg.repetitionWindow,
+            similarity: totalSim / checkedPairs,
+        };
+    }
+    checkStagnation(usage) {
+        const history = usage.stepHistory;
+        if (history.length < this.cfg.stagnationWindow)
+            return null;
+        // Only consider steps that have outputContent set — steps recorded via
+        // recordStep() or other non-LLM paths have no output content and skipping
+        // them prevents false-positive stagnation trips.
+        const recent = history.slice(-this.cfg.stagnationWindow).filter((s) => s.outputContent !== undefined);
+        if (recent.length < this.cfg.stagnationWindow)
+            return null;
+        const allStagnant = recent.every((s) => s.outputContent.length < this.cfg.stagnationMinLength);
+        if (!allStagnant)
+            return null;
+        return {
+            triggerMode: 'stagnation',
+            windowSize: this.cfg.stagnationWindow,
+        };
+    }
+}

package/dist/compressor.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+import type { OpenRouterMessage } from './types.js';
+/**
+ * Character-based token approximation. 4 chars ≈ 1 token.
+ * Good enough for threshold decisions — not for billing.
+ */
+export declare function estimateTokens(text: string): number;
+/**
+ * Estimates total token count for a message array.
+ */
+export declare function estimateMessagesTokens(messages: OpenRouterMessage[]): number;
+/**
+ * Compresses a message array by summarizing the middle section via an LLM call.
+ *
+ * Strategy:
+ * 1. Preserve the system message (if present) — never touched.
+ * 2. Preserve the last `keepLastN` messages — never touched.
+ * 3. Everything in between is replaced with a single synthetic assistant
+ *    message containing an LLM-generated summary.
+ *
+ * The summary is clearly marked so downstream code can detect it.
+ */
+export declare function compressMessages(messages: OpenRouterMessage[], apiKey: string, keepLastN?: number): Promise<OpenRouterMessage[]>;