budget-agent 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,321 @@
1
+ # @painitehq/agent-budget
2
+
3
+ Budget-aware enforcement layer for LLM agents. Track token, cost, and step usage in real time — enforce limits before and after every LLM call. Works with any provider.
4
+
5
+ ```
6
+ npm install @painitehq/agent-budget
7
+ ```
8
+
9
+ ## Quick start
10
+
11
+ You bring your own API key and model. The SDK calls your provider.
12
+
13
+ ```ts
14
+ import { AgentBudget } from '@painitehq/agent-budget';
15
+
16
+ const agent = new AgentBudget({
17
+ apiKey: process.env.OPENROUTER_API_KEY,
18
+ limits: { maxCostUSD: 0.05, maxSteps: 10 },
19
+ });
20
+
21
+ const response = await agent.step({
22
+ model: 'anthropic/claude-opus-4.8-fast',
23
+ messages: [{ role: 'user', content: 'Hello' }],
24
+ });
25
+
26
+ console.log(agent.getUsage());
27
+ // { steps: 1, totalCostUSD: 0.000015, totalInputTokens: 12, ... }
28
+ ```
29
+
30
+ ## How it works
31
+
32
+ You provide the **model**, the **messages**, and your **API key**. The SDK:
33
+
34
+ 1. Checks budget before the call (pre-flight)
35
+ 2. Makes the API request to your provider
36
+ 3. Tracks tokens, cost, and duration
37
+ 4. Checks budget after the call (post-step)
38
+ 5. Emits events for streaming, warnings, and overages
39
+
40
+ No provider is bundled. No model is defaulted. You bring everything.
41
+
42
+ ## Limits
43
+
44
+ Budget guardrails that stop your agent before it spends too much:
45
+
46
+ ```ts
47
+ limits: {
48
+ maxCostUSD: 0.05, // total USD before the agent aborts
49
+ maxSteps: 10, // total LLM calls before abort
50
+ maxInputTokens: 50000, // total input tokens sent to models
51
+ maxOutputTokens: 10000, // total output tokens received
52
+ maxTotalTokens: 60000, // input + output combined
53
+ maxWallTimeMs: 60000, // 60 seconds wall clock
54
+ }
55
+ ```
56
+
57
+ Every limit is optional. Omit what you don't want to enforce.
58
+
59
+ ### How enforcement works
60
+
61
+ Each `step()` runs two checks:
62
+
63
+ 1. **Pre-flight** — before the API call. Estimates output cost (default 512 tokens) and catches over-budget calls before burning money.
64
+ 2. **Post-step** — after recording the real token/cost. If exceeded, the step is **rolled back** from the tracker so you can retry without a stale balance.
65
+
66
+ ```ts
67
+ const agent = new AgentBudget({
68
+ apiKey: key,
69
+ limits: { maxCostUSD: 0.01, maxSteps: 3 },
70
+ });
71
+
72
+ try {
73
+ await agent.step({ model, messages });
74
+ } catch (err) {
75
+ if (err instanceof BudgetError) {
76
+ console.log(err.exceeded.reason); // 'cost' | 'steps' | 'wallTime' | ...
77
+ }
78
+ }
79
+ ```
80
+
81
+ ### Custom callback instead of abort
82
+
83
+ ```ts
84
+ const agent = new AgentBudget({
85
+ apiKey: key,
86
+ limits: { maxCostUSD: 0.01 },
87
+ onExceeded: (usage) => {
88
+ // Log, alert, switch models — never throws
89
+ console.log(`Over budget: $${usage.totalCostUSD}`);
90
+ },
91
+ });
92
+ ```
93
+
94
+ ### Tune pre-flight estimation
95
+
96
+ ```ts
97
+ limits: {
98
+ maxCostUSD: 0.05,
99
+ preflightCheck: false, // skip pre-flight entirely
100
+ preflightOutputTokenEstimate: 2048, // safety buffer (default 512)
101
+ }
102
+ ```
103
+
104
+ ### Warning thresholds (non-blocking)
105
+
106
+ ```ts
107
+ const agent = new AgentBudget({
108
+ limits: { maxCostUSD: 0.10 },
109
+ warningThreshold: 0.5, // fire 'budget:warning' at 50% consumption
110
+ });
111
+
112
+ agent.on('budget:warning', (e) => {
113
+ // { reason: 'cost', pctConsumed: 0.51, remaining: 0.049 }
114
+ });
115
+ ```
116
+
117
+ ### Combine with adaptive routing
118
+
119
+ ```ts
120
+ const agent = new AgentBudget({
121
+ apiKey: key,
122
+ limits: { maxCostUSD: 5.00 },
123
+ adaptiveRouting: {
124
+ fallbackChain: [
125
+ 'anthropic/claude-opus-4.8-fast', // $15/M tokens — best model
126
+ 'openai/gpt-4o', // $5/M tokens
127
+ 'openrouter/free', // $0 — emergency
128
+ ],
129
+ thresholds: [0.4, 0.75], // downgrade at 40% and 75% of budget consumed
130
+ },
131
+ });
132
+ ```
133
+
134
+ The router downgrades the model tier as the budget depletes. Each `step()` checks the current consumption against the thresholds and selects the appropriate model from the chain before the API call.
135
+
136
+ ## Bring your own executor
137
+
138
+ Use any LLM provider — OpenAI, Anthropic, Ollama, local models, or the OpenRouter Agent SDK:
139
+
140
+ ```ts
141
+ import { AgentBudget } from '@painitehq/agent-budget';
142
+ import OpenAI from 'openai';
143
+
144
+ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
145
+
146
+ const agent = new AgentBudget({
147
+ apiKey: process.env.OPENAI_API_KEY,
148
+ limits: { maxCostUSD: 0.10 },
149
+ executor: async (request) => {
150
+ const completion = await openai.chat.completions.create({
151
+ model: request.model,
152
+ messages: request.messages,
153
+ });
154
+ return {
155
+ model: completion.model,
156
+ usage: {
157
+ prompt_tokens: completion.usage?.prompt_tokens ?? 0,
158
+ completion_tokens: completion.usage?.completion_tokens ?? 0,
159
+ total_tokens: completion.usage?.total_tokens ?? 0,
160
+ },
161
+ choices: completion.choices.map(c => ({
162
+ message: { role: c.message.role, content: c.message.content ?? '' },
163
+ finish_reason: c.finish_reason ?? 'stop',
164
+ })),
165
+ };
166
+ },
167
+ });
168
+
169
+ const response = await agent.step({
170
+ model: 'anthropic/claude-opus-4.8-fast',
171
+ messages: [{ role: 'user', content: 'Hello' }],
172
+ });
173
+ ```
174
+
175
+ Or use raw fetch to any API:
176
+
177
+ ```ts
178
+ const agent = new AgentBudget({
179
+ apiKey: 'none',
180
+ limits: { maxCostUSD: 0.05 },
181
+ executor: async (request) => {
182
+ const res = await fetch('http://localhost:11434/api/chat', {
183
+ method: 'POST',
184
+ body: JSON.stringify({ model: request.model, messages: request.messages }),
185
+ });
186
+ const data = await res.json();
187
+ return {
188
+ model: data.model,
189
+ usage: data.usage ?? { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
190
+ choices: data.messages?.map((m: any) => ({
191
+ message: { role: m.role, content: m.content },
192
+ finish_reason: 'stop',
193
+ })) ?? [],
194
+ };
195
+ },
196
+ });
197
+ ```
198
+
199
+ ## Built-in OpenRouter support
200
+
201
+ By default, the SDK calls OpenRouter's API. Configure the endpoint and headers:
202
+
203
+ ```ts
204
+ const agent = new AgentBudget({
205
+ apiKey: process.env.OPENROUTER_API_KEY,
206
+ baseUrl: 'https://openrouter.ai/api/v1', // default — change for any OpenAI-compatible API
207
+ siteUrl: 'https://mysite.com', // OpenRouter attribution
208
+ appTitle: 'My App', // OpenRouter attribution
209
+ defaultHeaders: { 'X-Custom': 'value' }, // extra headers for every request
210
+ limits: { maxCostUSD: 0.10 },
211
+ });
212
+ ```
213
+
214
+ Works with any OpenAI-compatible endpoint: OpenRouter, OpenAI, Together AI, Fireworks, LocalAI, Ollama (with compat layer), etc.
215
+
216
+ ## Features
217
+
218
+ - **Budget enforcement** — set limits on cost, tokens, steps, wall time. Checked pre-flight and post-step.
219
+ - **Auto-compress** — truncate message history with an LLM summary when token count exceeds a threshold.
220
+ - **Circuit breaker** — detect repetition or stagnation and halt the agent.
221
+ - **Adaptive routing** — downgrade to cheaper models as budget depletes.
222
+ - **Checkpoints** — save and resume agent state across restarts.
223
+ - **Events** — subscribe to lifecycle events (`step:start`, `step:end`, `step:token`, `budget:exceeded`, etc.).
224
+ - **Pricing cache** — model pricing fetched from OpenRouter with configurable TTL (or use `setModelPricing()` for any model).
225
+ - **Rate-limit retry** — automatic 429 retry with exponential backoff (3 attempts).
226
+ - **Streaming** — set `stream: true` and listen for `step:token` events.
227
+ - **OpenTelemetry** — optional spans via `telemetry: { enabled: true }` (requires `@opentelemetry/api`).
228
+
229
+ ## API
230
+
231
+ ### `new AgentBudget(options)`
232
+
233
+ | Option | Type | Default | Description |
234
+ |--------|------|---------|-------------|
235
+ | `apiKey` | `string` | — | Your provider API key |
236
+ | `limits.*` | `object` | — | Budget limits (cost, tokens, steps, wall time) |
237
+ | `executor` | `AgentExecutor` | — | Custom API executor (replaces built-in fetch) |
238
+ | `baseUrl` | `string` | `https://openrouter.ai/api/v1` | API base URL for built-in fetch |
239
+ | `defaultHeaders` | `object` | — | Extra HTTP headers for built-in fetch |
240
+ | `autoCompress` | `object` | — | Auto-compress messages at token threshold |
241
+ | `circuitBreaker` | `object` | — | Detect repetition/stagnation loops |
242
+ | `adaptiveRouting` | `object` | — | Downgrade model tiers as budget depletes |
243
+ | `checkpoint` | `object` | — | Persist and resume agent state |
244
+ | `onExceeded` | `'abort' \| function` | `'abort'` | Strategy when budget exceeded |
245
+ | `onEvent` | `function` | — | Global event listener |
246
+ | `pricingCacheTTLMs` | `number` | `300_000` | Pricing cache TTL |
247
+ | `siteUrl` | `string` | — | OpenRouter HTTP-Referer |
248
+ | `appTitle` | `string` | — | OpenRouter X-OpenRouter-Title |
249
+ | `telemetry` | `object` | — | Enable OpenTelemetry spans |
250
+
251
+ ### `agent.step(request)`
252
+
253
+ Make one LLM call. Checks limits before and after. Throws `BudgetError` if exceeded.
254
+
255
+ ```ts
256
+ const response = await agent.step({
257
+ model: 'anthropic/claude-opus-4.8-fast', // any model slug
258
+ messages: [{ role: 'user', content: 'Hi' }],
259
+ stream: true, // optional — emit step:token events
260
+ });
261
+ ```
262
+
263
+ **Budget enforcement with rollback.** When a step exceeds budget, the step is recorded for circuit-breaker analysis, then rolled back before throwing. The tracker stays clean for retry. The actual spend is available in the `BudgetError`.
264
+
265
+ ### `agent.getUsage()`
266
+
267
+ Returns a snapshot of current usage:
268
+
269
+ ```ts
270
+ {
271
+ steps: number;
272
+ totalInputTokens: number;
273
+ totalOutputTokens: number;
274
+ totalCostUSD: number;
275
+ elapsedMs: number;
276
+ stepHistory: StepUsage[];
277
+ }
278
+ ```
279
+
280
+ ### `agent.summary()`
281
+
282
+ Prints a formatted table to console and returns the same usage snapshot.
283
+
284
+ ### `agent.reset()`
285
+
286
+ Reset all usage counters.
287
+
288
+ ### `agent.compressMessages(messages, keepLastN?)`
289
+
290
+ Manually compress a message array via LLM summary.
291
+
292
+ ### `agent.loadCheckpoint()` / `agent.clearCheckpoint()`
293
+
294
+ Load or clear persisted checkpoint state.
295
+
296
+ ### `AgentBudget.resume(options, checkpointPath?)`
297
+
298
+ Static factory. Creates a new agent pre-loaded with checkpoint state.
299
+
300
+ ## Events
301
+
302
+ ```ts
303
+ agent.on('step:start', (event) => console.log('Step', event.stepIndex, 'started'));
304
+ agent.on('step:token', (event) => process.stdout.write(event.token));
305
+ agent.on('step:end', (event) => console.log('Step cost:', event.costUSD));
306
+ agent.on('budget:exceeded', (event) => console.log('Limit hit:', event.exceeded.reason));
307
+ agent.on('compress:triggered', (event) => console.log('Compressed:', event.messagesBefore, '→', event.messagesAfter));
308
+ agent.on('model:downgraded', (event) => console.log('Downgraded to', event.to));
309
+ ```
310
+
311
+ ## Testing
312
+
313
+ ```
314
+ npm test
315
+ ```
316
+
317
+ Runs 10 real-API tests against OpenRouter with simulated pricing.
318
+
319
+ ## License
320
+
321
+ MIT
@@ -0,0 +1,27 @@
1
+ import type { BudgetLimits, BudgetUsage, BudgetExceededError } from './types.js';
2
+ export declare class BudgetError extends Error {
3
+ readonly exceeded: BudgetExceededError;
4
+ constructor(exceeded: BudgetExceededError);
5
+ }
6
+ export declare class RateLimitError extends Error {
7
+ readonly retryAfter: number;
8
+ readonly statusCode: number;
9
+ constructor(statusCode: number, retryAfter: number, message: string);
10
+ }
11
+ /**
12
+ * An error returned by the provider inside the chat completion response.
13
+ * This happens when OpenRouter returns HTTP 200 but `choices[0].error`
14
+ * contains a provider-level error (e.g., 402 insufficient credits,
15
+ * guardrail block, provider outage, etc.).
16
+ */
17
+ export declare class UpstreamError extends Error {
18
+ readonly code: number;
19
+ readonly metadata?: Record<string, unknown>;
20
+ readonly statusCode: number;
21
+ constructor(code: number, message: string, metadata?: Record<string, unknown>);
22
+ }
23
+ /**
24
+ * Returns the first exceeded limit, or null if within budget.
25
+ * Order of precedence: cost → steps → totalTokens → inputTokens → outputTokens → wallTime
26
+ */
27
+ export declare function checkLimits(usage: BudgetUsage, limits: BudgetLimits): BudgetExceededError | null;
package/dist/budget.js ADDED
@@ -0,0 +1,63 @@
1
+ // ─── Error ────────────────────────────────────────────────────────────────────
2
+ export class BudgetError extends Error {
3
+ exceeded;
4
+ constructor(exceeded) {
5
+ const extra = exceeded.reason === 'preflightCostEstimate'
6
+ ? ` — remaining: $${exceeded.remainingBudget?.toFixed(8)}, estimated: $${exceeded.estimatedCost?.toFixed(8)}`
7
+ : '';
8
+ super(`[agent-budget] Limit exceeded — reason: ${exceeded.reason}, ` +
9
+ `limit: ${exceeded.limit}, actual: ${exceeded.actual.toFixed(6)}${extra}`);
10
+ this.name = 'BudgetError';
11
+ this.exceeded = exceeded;
12
+ }
13
+ }
14
+ export class RateLimitError extends Error {
15
+ retryAfter;
16
+ statusCode;
17
+ constructor(statusCode, retryAfter, message) {
18
+ super(message);
19
+ this.name = 'RateLimitError';
20
+ this.statusCode = statusCode;
21
+ this.retryAfter = retryAfter;
22
+ }
23
+ }
24
+ /**
25
+ * An error returned by the provider inside the chat completion response.
26
+ * This happens when OpenRouter returns HTTP 200 but `choices[0].error`
27
+ * contains a provider-level error (e.g., 402 insufficient credits,
28
+ * guardrail block, provider outage, etc.).
29
+ */
30
+ export class UpstreamError extends Error {
31
+ code;
32
+ metadata;
33
+ statusCode;
34
+ constructor(code, message, metadata) {
35
+ super(message ? `[agent-budget] Provider error ${code}: ${message}` : `[agent-budget] Provider error ${code}`);
36
+ this.name = 'UpstreamError';
37
+ this.code = code;
38
+ this.metadata = metadata;
39
+ // Map known OpenRouter error codes to HTTP-like status
40
+ this.statusCode = code;
41
+ }
42
+ }
43
+ // ─── Checker ─────────────────────────────────────────────────────────────────
44
+ /**
45
+ * Returns the first exceeded limit, or null if within budget.
46
+ * Order of precedence: cost → steps → totalTokens → inputTokens → outputTokens → wallTime
47
+ */
48
+ export function checkLimits(usage, limits) {
49
+ const checks = [
50
+ { reason: 'cost', limit: limits.maxCostUSD, actual: usage.totalCostUSD },
51
+ { reason: 'steps', limit: limits.maxSteps, actual: usage.steps },
52
+ { reason: 'totalTokens', limit: limits.maxTotalTokens, actual: usage.totalInputTokens + usage.totalOutputTokens },
53
+ { reason: 'inputTokens', limit: limits.maxInputTokens, actual: usage.totalInputTokens },
54
+ { reason: 'outputTokens', limit: limits.maxOutputTokens, actual: usage.totalOutputTokens },
55
+ { reason: 'wallTime', limit: limits.maxWallTimeMs, actual: usage.elapsedMs },
56
+ ];
57
+ for (const { reason, limit, actual } of checks) {
58
+ if (limit !== undefined && actual > limit) {
59
+ return { reason, limit, actual, usage };
60
+ }
61
+ }
62
+ return null;
63
+ }
@@ -0,0 +1,10 @@
1
+ import type { BudgetUsage, CheckpointData, OpenRouterMessage } from './types.js';
2
+ export declare class CheckpointManager {
3
+ private readonly filePath;
4
+ constructor(options?: {
5
+ path?: string;
6
+ });
7
+ save(messages: OpenRouterMessage[], usage: BudgetUsage, model: string, resumeFromStep: number): Promise<void>;
8
+ load(): Promise<CheckpointData | null>;
9
+ clear(): Promise<void>;
10
+ }
@@ -0,0 +1,35 @@
1
+ import { promises as fs } from 'node:fs';
2
+ export class CheckpointManager {
3
+ filePath;
4
+ constructor(options) {
5
+ this.filePath = options?.path ?? './.agent-checkpoint.json';
6
+ }
7
+ async save(messages, usage, model, resumeFromStep) {
8
+ const data = {
9
+ checkpointVersion: '1.0',
10
+ messages,
11
+ usage,
12
+ model,
13
+ resumeFromStep,
14
+ createdAt: new Date().toISOString(),
15
+ };
16
+ await fs.writeFile(this.filePath, JSON.stringify(data, null, 2), 'utf-8');
17
+ }
18
+ async load() {
19
+ try {
20
+ const raw = await fs.readFile(this.filePath, 'utf-8');
21
+ return JSON.parse(raw);
22
+ }
23
+ catch {
24
+ return null;
25
+ }
26
+ }
27
+ async clear() {
28
+ try {
29
+ await fs.unlink(this.filePath);
30
+ }
31
+ catch {
32
+ // File may not exist — ignore
33
+ }
34
+ }
35
+ }
@@ -0,0 +1,27 @@
1
+ import type { BudgetUsage } from './types.js';
2
+ export interface CircuitBreakerConfig {
3
+ repetitionWindow?: number;
4
+ repetitionThreshold?: number;
5
+ stagnationWindow?: number;
6
+ stagnationMinLength?: number;
7
+ }
8
+ export interface CircuitBreakerTrip {
9
+ triggerMode: 'repetition' | 'stagnation';
10
+ windowSize: number;
11
+ similarity?: number;
12
+ }
13
+ /**
14
+ * Stateless circuit breaker. Pulls stepHistory from BudgetUsage each time
15
+ * it is checked. No internal data store across calls.
16
+ */
17
+ export declare class CircuitBreaker {
18
+ private readonly cfg;
19
+ constructor(config?: CircuitBreakerConfig);
20
+ /**
21
+ * Analyze recent step history and return a trip if repetition or stagnation
22
+ * is detected. Returns null if all clear.
23
+ */
24
+ check(usage: BudgetUsage): CircuitBreakerTrip | null;
25
+ private checkRepetition;
26
+ private checkStagnation;
27
+ }
@@ -0,0 +1,93 @@
1
+ // ─── Defaults ────────────────────────────────────────────────────────────────
2
+ const DEFAULTS = {
3
+ repetitionWindow: 3,
4
+ repetitionThreshold: 0.85,
5
+ stagnationWindow: 4,
6
+ stagnationMinLength: 50,
7
+ };
8
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
9
+ function normalizeText(s) {
10
+ return s
11
+ .toLowerCase()
12
+ .replace(/[^\w\s]/g, '')
13
+ .replace(/\s+/g, ' ')
14
+ .trim();
15
+ }
16
+ function jaccardSimilarity(a, b) {
17
+ const wordsA = new Set(normalizeText(a).split(' '));
18
+ const wordsB = new Set(normalizeText(b).split(' '));
19
+ if (wordsA.size === 0 && wordsB.size === 0)
20
+ return 1;
21
+ let intersection = 0;
22
+ for (const w of wordsA) {
23
+ if (wordsB.has(w))
24
+ intersection++;
25
+ }
26
+ const union = wordsA.size + wordsB.size - intersection;
27
+ return union === 0 ? 0 : intersection / union;
28
+ }
29
+ // ─── CircuitBreaker ──────────────────────────────────────────────────────────
30
+ /**
31
+ * Stateless circuit breaker. Pulls stepHistory from BudgetUsage each time
32
+ * it is checked. No internal data store across calls.
33
+ */
34
+ export class CircuitBreaker {
35
+ cfg;
36
+ constructor(config) {
37
+ this.cfg = { ...DEFAULTS, ...config };
38
+ }
39
+ /**
40
+ * Analyze recent step history and return a trip if repetition or stagnation
41
+ * is detected. Returns null if all clear.
42
+ */
43
+ check(usage) {
44
+ return this.checkRepetition(usage) ?? this.checkStagnation(usage);
45
+ }
46
+ checkRepetition(usage) {
47
+ const history = usage.stepHistory;
48
+ if (history.length < this.cfg.repetitionWindow)
49
+ return null;
50
+ // Look at the last N entries for consecutive similarity
51
+ const recent = history.slice(-this.cfg.repetitionWindow);
52
+ let checkedPairs = 0;
53
+ let totalSim = 0;
54
+ for (let i = 1; i < recent.length; i++) {
55
+ const prev = recent[i - 1].outputContent;
56
+ const curr = recent[i].outputContent;
57
+ if (!prev || !curr)
58
+ continue;
59
+ checkedPairs++;
60
+ const sim = jaccardSimilarity(prev, curr);
61
+ totalSim += sim;
62
+ if (sim < this.cfg.repetitionThreshold) {
63
+ return null;
64
+ }
65
+ }
66
+ // Need at least one valid pair to trip
67
+ if (checkedPairs === 0)
68
+ return null;
69
+ return {
70
+ triggerMode: 'repetition',
71
+ windowSize: this.cfg.repetitionWindow,
72
+ similarity: totalSim / checkedPairs,
73
+ };
74
+ }
75
+ checkStagnation(usage) {
76
+ const history = usage.stepHistory;
77
+ if (history.length < this.cfg.stagnationWindow)
78
+ return null;
79
+ // Only consider steps that have outputContent set — steps recorded via
80
+ // recordStep() or other non-LLM paths have no output content and skipping
81
+ // them prevents false-positive stagnation trips.
82
+ const recent = history.slice(-this.cfg.stagnationWindow).filter((s) => s.outputContent !== undefined);
83
+ if (recent.length < this.cfg.stagnationWindow)
84
+ return null;
85
+ const allStagnant = recent.every((s) => s.outputContent.length < this.cfg.stagnationMinLength);
86
+ if (!allStagnant)
87
+ return null;
88
+ return {
89
+ triggerMode: 'stagnation',
90
+ windowSize: this.cfg.stagnationWindow,
91
+ };
92
+ }
93
+ }
@@ -0,0 +1,22 @@
1
+ import type { OpenRouterMessage } from './types.js';
2
+ /**
3
+ * Character-based token approximation. 4 chars ≈ 1 token.
4
+ * Good enough for threshold decisions — not for billing.
5
+ */
6
+ export declare function estimateTokens(text: string): number;
7
+ /**
8
+ * Estimates total token count for a message array.
9
+ */
10
+ export declare function estimateMessagesTokens(messages: OpenRouterMessage[]): number;
11
+ /**
12
+ * Compresses a message array by summarizing the middle section via an LLM call.
13
+ *
14
+ * Strategy:
15
+ * 1. Preserve the system message (if present) — never touched.
16
+ * 2. Preserve the last `keepLastN` messages — never touched.
17
+ * 3. Everything in between is replaced with a single synthetic assistant
18
+ * message containing an LLM-generated summary.
19
+ *
20
+ * The summary is clearly marked so downstream code can detect it.
21
+ */
22
+ export declare function compressMessages(messages: OpenRouterMessage[], apiKey: string, keepLastN?: number): Promise<OpenRouterMessage[]>;