budget-agent 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ const OPENROUTER_CHAT = 'https://openrouter.ai/api/v1/chat/completions';
2
+ const COMPRESSION_MODEL = 'cohere/north-mini-code:free';
3
+ const CHARS_PER_TOKEN = 4;
4
+ // ─── Token estimation ─────────────────────────────────────────────────────────
5
+ /**
6
+ * Character-based token approximation. 4 chars ≈ 1 token.
7
+ * Good enough for threshold decisions — not for billing.
8
+ */
9
+ export function estimateTokens(text) {
10
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
11
+ }
12
+ /**
13
+ * Estimates total token count for a message array.
14
+ */
15
+ export function estimateMessagesTokens(messages) {
16
+ let total = 0;
17
+ for (const msg of messages) {
18
+ if (typeof msg.content === 'string') {
19
+ total += estimateTokens(msg.content);
20
+ }
21
+ }
22
+ return total;
23
+ }
24
+ // ─── Compression ──────────────────────────────────────────────────────────────
25
+ /**
26
+ * Compresses a message array by summarizing the middle section via an LLM call.
27
+ *
28
+ * Strategy:
29
+ * 1. Preserve the system message (if present) — never touched.
30
+ * 2. Preserve the last `keepLastN` messages — never touched.
31
+ * 3. Everything in between is replaced with a single synthetic assistant
32
+ * message containing an LLM-generated summary.
33
+ *
34
+ * The summary is clearly marked so downstream code can detect it.
35
+ */
36
+ export async function compressMessages(messages, apiKey, keepLastN = 4) {
37
+ // Nothing to compress if we have fewer messages than the keep window
38
+ if (messages.length <= keepLastN + 1) {
39
+ return messages;
40
+ }
41
+ // Split: system message (if any) + middle messages + last N messages
42
+ let systemMessage = null;
43
+ let startIndex = 0;
44
+ if (messages[0]?.role === 'system') {
45
+ systemMessage = messages[0];
46
+ startIndex = 1;
47
+ }
48
+ const endIndex = messages.length - keepLastN;
49
+ const middleMessages = messages.slice(startIndex, endIndex);
50
+ const lastNMessages = messages.slice(endIndex);
51
+ if (middleMessages.length === 0) {
52
+ return messages;
53
+ }
54
+ // Build the conversation text for summarization
55
+ const conversationText = middleMessages
56
+ .map((msg) => `[${msg.role}]: ${msg.content ?? ''}`)
57
+ .join('\n\n');
58
+ // Call LLM to generate summary
59
+ const summaryContent = await generateSummary(conversationText, apiKey, middleMessages.length);
60
+ // Build compressed message array
61
+ const compressed = [];
62
+ if (systemMessage) {
63
+ compressed.push(systemMessage);
64
+ }
65
+ compressed.push({
66
+ role: 'assistant',
67
+ content: summaryContent,
68
+ });
69
+ compressed.push(...lastNMessages);
70
+ return compressed;
71
+ }
72
+ // ─── Summary generation ───────────────────────────────────────────────────────
73
+ async function generateSummary(conversationText, apiKey, collapsedCount) {
74
+ const prompt = 'You are a conversation summarizer for an AI agent loop. ' +
75
+ 'Summarize the following conversation between a user and an assistant. ' +
76
+ 'Focus on:\n' +
77
+ '- What was discussed\n' +
78
+ '- What decisions were made\n' +
79
+ '- What tool calls were made\n' +
80
+ '- What the current goal state is\n\n' +
81
+ 'Keep the summary concise but comprehensive. Output ONLY the summary text, no preamble.\n\n' +
82
+ `Conversation:\n${conversationText}`;
83
+ const res = await fetch(OPENROUTER_CHAT, {
84
+ method: 'POST',
85
+ headers: {
86
+ Authorization: `Bearer ${apiKey}`,
87
+ 'Content-Type': 'application/json',
88
+ },
89
+ body: JSON.stringify({
90
+ model: COMPRESSION_MODEL,
91
+ messages: [{ role: 'user', content: prompt }],
92
+ max_tokens: 256,
93
+ }),
94
+ });
95
+ if (!res.ok) {
96
+ const body = await res.text();
97
+ // If the LLM call fails (rate limit, etc.), use a heuristic fallback
98
+ console.warn(`[agent-budget] Compression summary LLM call failed (${res.status}), using heuristic fallback`);
99
+ const heuristic = makeHeuristicSummary(conversationText, collapsedCount);
100
+ return `[COMPRESSED SUMMARY — ${collapsedCount} messages collapsed]\n${heuristic}`;
101
+ }
102
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
103
+ const json = (await res.json());
104
+ const summary = json.choices?.[0]?.message?.content ?? '';
105
+ return `[COMPRESSED SUMMARY — ${collapsedCount} messages collapsed]\n${summary}`;
106
+ }
107
+ /**
108
+ * Heuristic fallback when the LLM summary call fails.
109
+ * Extracts key topics from the conversation text to produce a basic summary.
110
+ */
111
+ function makeHeuristicSummary(conversationText, collapsedCount) {
112
+ const lines = conversationText.split('\n').filter(Boolean);
113
+ const userLines = lines.filter(l => l.startsWith('[user]:'));
114
+ const assistantLines = lines.filter(l => l.startsWith('[assistant]:'));
115
+ // Extract key topics by finding noun phrases from user messages
116
+ const keyPhrases = [];
117
+ for (const line of userLines.slice(0, 5)) {
118
+ const words = line.replace(/\[user\]:\s*/i, '').split(' ');
119
+ const topic = words.slice(0, 8).join(' ');
120
+ keyPhrases.push(topic);
121
+ }
122
+ const summary = `The conversation covered ${collapsedCount} exchanges between user and assistant. ` +
123
+ `Key topics discussed include: ${keyPhrases.join('; ')}. ` +
124
+ `The assistant provided ${assistantLines.length} responses with explanations, code examples, and guidance.`;
125
+ return summary;
126
+ }
@@ -0,0 +1,12 @@
1
+ import type { StepRequest, ModelPricing } from './types.js';
2
+ export interface CostEstimate {
3
+ estimatedInputTokens: number;
4
+ estimatedOutputTokens: number;
5
+ estimatedCostUSD: number;
6
+ confidence: 'approximate';
7
+ }
8
+ /**
9
+ * Estimates the cost of a step BEFORE making the API call.
10
+ * Uses character-based approximation (4 chars ≈ 1 token).
11
+ */
12
+ export declare function estimateStepCost(request: StepRequest, pricing: ModelPricing, defaultOutputTokens?: number): CostEstimate;
@@ -0,0 +1,38 @@
1
+ import { calculateCost } from './pricing.js';
2
+ // ─── Character-based token estimation ─────────────────────────────────────────
3
+ const CHARS_PER_TOKEN = 4;
4
+ function estimateMessageTokens(messages) {
5
+ let chars = 0;
6
+ for (const msg of messages) {
7
+ if (msg.content)
8
+ chars += msg.content.length;
9
+ if (msg.tool_call_id)
10
+ chars += msg.tool_call_id.length;
11
+ if (msg.name)
12
+ chars += msg.name.length;
13
+ }
14
+ return Math.ceil(chars / CHARS_PER_TOKEN);
15
+ }
16
+ function estimateToolTokens(tools) {
17
+ if (!tools || tools.length === 0)
18
+ return 0;
19
+ return Math.ceil(JSON.stringify(tools).length / CHARS_PER_TOKEN);
20
+ }
21
+ // ─── Public API ───────────────────────────────────────────────────────────────
22
+ /**
23
+ * Estimates the cost of a step BEFORE making the API call.
24
+ * Uses character-based approximation (4 chars ≈ 1 token).
25
+ */
26
+ export function estimateStepCost(request, pricing, defaultOutputTokens = 512) {
27
+ const messageTokens = estimateMessageTokens(request.messages);
28
+ const toolTokens = estimateToolTokens(request.tools);
29
+ const estimatedInputTokens = messageTokens + toolTokens;
30
+ const estimatedOutputTokens = request.max_tokens ?? defaultOutputTokens;
31
+ const estimatedCostUSD = calculateCost(pricing, estimatedInputTokens, estimatedOutputTokens);
32
+ return {
33
+ estimatedInputTokens,
34
+ estimatedOutputTokens,
35
+ estimatedCostUSD,
36
+ confidence: 'approximate',
37
+ };
38
+ }
@@ -0,0 +1,99 @@
1
+ import type { BudgetExceededError, ExceededReason } from './types.js';
2
+ export type AgentBudgetEvent = {
3
+ type: 'step:start';
4
+ stepIndex: number;
5
+ model: string;
6
+ estimatedCostUSD?: number;
7
+ } | {
8
+ type: 'step:token';
9
+ stepIndex: number;
10
+ token: string;
11
+ } | {
12
+ type: 'step:end';
13
+ stepIndex: number;
14
+ model: string;
15
+ inputTokens: number;
16
+ outputTokens: number;
17
+ costUSD: number;
18
+ durationMs: number;
19
+ } | {
20
+ type: 'budget:warning';
21
+ reason: ExceededReason;
22
+ pctConsumed: number;
23
+ remaining: number;
24
+ } | {
25
+ type: 'budget:exceeded';
26
+ exceeded: BudgetExceededError;
27
+ } | {
28
+ type: 'model:downgraded';
29
+ from: string;
30
+ to: string;
31
+ pctConsumed: number;
32
+ } | {
33
+ type: 'circuit:tripped';
34
+ triggerMode: 'repetition' | 'stagnation';
35
+ stepIndex: number;
36
+ } | {
37
+ type: 'compress:triggered';
38
+ messagesBefore: number;
39
+ messagesAfter: number;
40
+ tokensFreed: number;
41
+ } | {
42
+ type: 'pricing:fetched';
43
+ modelCount: number;
44
+ cachedUntil: number;
45
+ };
46
+ export interface AgentBudgetEventMap {
47
+ 'step:start': AgentBudgetEvent & {
48
+ type: 'step:start';
49
+ };
50
+ 'step:token': AgentBudgetEvent & {
51
+ type: 'step:token';
52
+ };
53
+ 'step:end': AgentBudgetEvent & {
54
+ type: 'step:end';
55
+ };
56
+ 'budget:warning': AgentBudgetEvent & {
57
+ type: 'budget:warning';
58
+ };
59
+ 'budget:exceeded': AgentBudgetEvent & {
60
+ type: 'budget:exceeded';
61
+ };
62
+ 'model:downgraded': AgentBudgetEvent & {
63
+ type: 'model:downgraded';
64
+ };
65
+ 'circuit:tripped': AgentBudgetEvent & {
66
+ type: 'circuit:tripped';
67
+ };
68
+ 'compress:triggered': AgentBudgetEvent & {
69
+ type: 'compress:triggered';
70
+ };
71
+ 'pricing:fetched': AgentBudgetEvent & {
72
+ type: 'pricing:fetched';
73
+ };
74
+ }
75
+ export declare class AgentEventEmitter {
76
+ private readonly onEvent?;
77
+ private readonly emitter;
78
+ constructor(onEvent?: ((event: AgentBudgetEvent) => void) | undefined);
79
+ emit(event: AgentBudgetEvent): void;
80
+ on<K extends keyof AgentBudgetEventMap>(type: K, handler: (event: AgentBudgetEventMap[K]) => void): this;
81
+ off<K extends keyof AgentBudgetEventMap>(type: K, handler: (event: AgentBudgetEventMap[K]) => void): this;
82
+ removeAllListeners(): void;
83
+ }
84
+ export declare class WarningChecker {
85
+ private firedMetrics;
86
+ reset(): void;
87
+ check(usage: {
88
+ totalCostUSD: number;
89
+ totalInputTokens: number;
90
+ totalOutputTokens: number;
91
+ steps: number;
92
+ }, limits: {
93
+ maxCostUSD?: number;
94
+ maxInputTokens?: number;
95
+ maxOutputTokens?: number;
96
+ maxTotalTokens?: number;
97
+ maxSteps?: number;
98
+ }, warnPct: number, emit: (event: AgentBudgetEvent) => void): void;
99
+ }
package/dist/events.js ADDED
@@ -0,0 +1,56 @@
1
+ import { EventEmitter } from 'node:events';
2
+ // ─── Typed emitter ───────────────────────────────────────────────────────────
3
+ export class AgentEventEmitter {
4
+ onEvent;
5
+ emitter = new EventEmitter();
6
+ constructor(onEvent) {
7
+ this.onEvent = onEvent;
8
+ this.emitter.setMaxListeners(50);
9
+ }
10
+ emit(event) {
11
+ this.emitter.emit(event.type, event);
12
+ this.onEvent?.(event);
13
+ }
14
+ on(type, handler) {
15
+ this.emitter.on(type, handler);
16
+ return this;
17
+ }
18
+ off(type, handler) {
19
+ this.emitter.off(type, handler);
20
+ return this;
21
+ }
22
+ removeAllListeners() {
23
+ this.emitter.removeAllListeners();
24
+ }
25
+ }
26
+ // ─── Warning threshold checker ───────────────────────────────────────────────
27
+ export class WarningChecker {
28
+ firedMetrics = new Set();
29
+ reset() {
30
+ this.firedMetrics.clear();
31
+ }
32
+ check(usage, limits, warnPct, emit) {
33
+ const totalTokens = usage.totalInputTokens + usage.totalOutputTokens;
34
+ const metrics = [
35
+ { key: 'cost', reason: 'cost', limit: limits.maxCostUSD, actual: usage.totalCostUSD },
36
+ { key: 'steps', reason: 'steps', limit: limits.maxSteps, actual: usage.steps },
37
+ { key: 'totalTokens', reason: 'totalTokens', limit: limits.maxTotalTokens, actual: totalTokens },
38
+ { key: 'inputTokens', reason: 'inputTokens', limit: limits.maxInputTokens, actual: usage.totalInputTokens },
39
+ { key: 'outputTokens', reason: 'outputTokens', limit: limits.maxOutputTokens, actual: usage.totalOutputTokens },
40
+ ];
41
+ for (const { key, reason, limit, actual } of metrics) {
42
+ if (limit === undefined)
43
+ continue;
44
+ const pctConsumed = actual / limit;
45
+ if (pctConsumed >= warnPct && !this.firedMetrics.has(key)) {
46
+ this.firedMetrics.add(key);
47
+ emit({
48
+ type: 'budget:warning',
49
+ reason,
50
+ pctConsumed,
51
+ remaining: limit - actual,
52
+ });
53
+ }
54
+ }
55
+ }
56
+ }
@@ -0,0 +1,108 @@
1
+ import type { BudgetOptions, BudgetUsage, StepRequest, OpenRouterResponse, OpenRouterMessage, CheckpointData } from './types.js';
2
+ export declare class AgentBudget {
3
+ private readonly apiKey;
4
+ private readonly limits;
5
+ private readonly onExceeded;
6
+ private readonly cacheTTL;
7
+ private readonly siteUrl?;
8
+ private readonly appTitle?;
9
+ private readonly autoCompress?;
10
+ private readonly adaptiveRouting?;
11
+ private currentModelIndex;
12
+ private tracker;
13
+ private readonly circuitBreaker;
14
+ private readonly checkpointManager;
15
+ private readonly emitter;
16
+ private readonly warningChecker;
17
+ private readonly warningThreshold;
18
+ private readonly telemetry;
19
+ private tracer;
20
+ private readonly executor?;
21
+ private readonly baseUrl;
22
+ private readonly defaultHeaders;
23
+ constructor(options: BudgetOptions);
24
+ /**
25
+ * Execute one agent step through OpenRouter.
26
+ * Checks budget limits before AND after the API call.
27
+ * Throws BudgetError if any limit is exceeded.
28
+ */
29
+ step(request: StepRequest): Promise<OpenRouterResponse>;
30
+ /**
31
+ * Current accumulated usage. Safe to call at any time.
32
+ */
33
+ getUsage(): BudgetUsage;
34
+ /**
35
+ * Prints a single summary table to console. Returns the same usage snapshot.
36
+ */
37
+ summary(): BudgetUsage;
38
+ /**
39
+ * Subscribe to a specific event type.
40
+ */
41
+ on<K extends keyof import('./events.js').AgentBudgetEventMap>(type: K, handler: (event: import('./events.js').AgentBudgetEventMap[K]) => void): this;
42
+ /**
43
+ * Unsubscribe from a specific event type.
44
+ */
45
+ off<K extends keyof import('./events.js').AgentBudgetEventMap>(type: K, handler: (event: import('./events.js').AgentBudgetEventMap[K]) => void): this;
46
+ /**
47
+ * Resets all usage counters. Does NOT reset pricing cache.
48
+ */
49
+ reset(): void;
50
+ /**
51
+ * Force-refresh pricing on next step. Useful for long-running agents.
52
+ */
53
+ refreshPricing(): void;
54
+ /**
55
+ * Returns the model that the adaptive router would use right now,
56
+ * or undefined if adaptive routing is not configured.
57
+ */
58
+ getCurrentModel(): string | undefined;
59
+ /**
60
+ * Manually record a step into the tracker.
61
+ * Useful for replaying checkpoints or simulating usage in tests.
62
+ */
63
+ recordStep(usage: {
64
+ inputTokens: number;
65
+ outputTokens: number;
66
+ costUSD: number;
67
+ }): void;
68
+ /**
69
+ * Manually compress a message array. Useful outside of the step() flow.
70
+ * Preserves the system message (if any) and the last `keepLastN` messages.
71
+ * Everything in between is summarized via an LLM call.
72
+ */
73
+ compressMessages(messages: OpenRouterMessage[], keepLastN?: number): Promise<OpenRouterMessage[]>;
74
+ /**
75
+ * Delete the checkpoint file. Call after the agent loop completes successfully.
76
+ */
77
+ clearCheckpoint(): Promise<void>;
78
+ /**
79
+ * Load an existing checkpoint. Returns null if none exists.
80
+ */
81
+ loadCheckpoint(): Promise<CheckpointData | null>;
82
+ /**
83
+ * Resume from a checkpoint. Constructs a new AgentBudget with tracker state
84
+ * pre-loaded so budget accounting continues from where it left off.
85
+ * Throws if no checkpoint file exists.
86
+ */
87
+ static resume(options: BudgetOptions, checkpointPath?: string): Promise<AgentBudget>;
88
+ private _initTracer;
89
+ private _startSpan;
90
+ private _readStream;
91
+ private _defaultFetch;
92
+ private _checkOrThrow;
93
+ }
94
+ export declare function createAgentBudget(options: BudgetOptions): AgentBudget;
95
+ export { BudgetError, RateLimitError, UpstreamError } from './budget.js';
96
+ export { getModelPricing, calculateCost, invalidatePricingCache, setModelPricing } from './pricing.js';
97
+ export { estimateStepCost } from './estimator.js';
98
+ export { CircuitBreaker } from './circuit-breaker.js';
99
+ export { resolveModel } from './router.js';
100
+ export type { RoutingDecision } from './router.js';
101
+ export type { CostEstimate } from './estimator.js';
102
+ export type { CircuitBreakerConfig, CircuitBreakerTrip } from './circuit-breaker.js';
103
+ export { CheckpointManager } from './checkpoint.js';
104
+ export { compressMessages, estimateMessagesTokens } from './compressor.js';
105
+ export { AgentEventEmitter } from './events.js';
106
+ export type { AgentBudgetEvent, AgentBudgetEventMap } from './events.js';
107
+ export type { CheckpointData } from './types.js';
108
+ export type { BudgetOptions, BudgetLimits, BudgetUsage, StepUsage, StepRequest, OpenRouterResponse, OpenRouterMessage, BudgetExceededError, ExceededReason, ExceededStrategy, ModelPricing, StreamChunk, TokenCallback, AgentExecutor, ExecutorResult, } from './types.js';