tokenfirewall 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +466 -158
- package/dist/core/pricingRegistry.js +56 -5
- package/dist/index.d.ts +32 -0
- package/dist/index.js +66 -12
- package/dist/interceptors/fetchInterceptor.d.ts +5 -0
- package/dist/interceptors/fetchInterceptor.js +278 -27
- package/dist/introspection/contextRegistry.d.ts +5 -0
- package/dist/introspection/contextRegistry.js +58 -6
- package/dist/logger.d.ts +5 -0
- package/dist/logger.js +10 -0
- package/dist/router/errorDetector.d.ts +45 -0
- package/dist/router/errorDetector.js +170 -0
- package/dist/router/modelRouter.d.ts +33 -0
- package/dist/router/modelRouter.js +111 -0
- package/dist/router/routingStrategies.d.ts +16 -0
- package/dist/router/routingStrategies.js +243 -0
- package/dist/router/types.d.ts +65 -0
- package/dist/router/types.js +5 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,16 +1,52 @@
|
|
|
1
|
-
#
|
|
1
|
+
# TokenFirewall
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Enterprise-grade LLM cost enforcement middleware for Node.js with automatic budget protection, intelligent model routing, and comprehensive multi-provider support.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
[](https://www.npmjs.com/package/tokenfirewall)
|
|
6
|
+
[](https://www.npmjs.com/package/tokenfirewall)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
[](https://www.typescriptlang.org/)
|
|
6
9
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
- **
|
|
10
|
+
## Overview
|
|
11
|
+
|
|
12
|
+
TokenFirewall is a production-ready middleware that automatically tracks and enforces budget limits for Large Language Model (LLM) API calls. It provides transparent cost monitoring, prevents budget overruns, intelligent model routing with automatic failover, and supports multiple providers through a unified interface.
|
|
13
|
+
|
|
14
|
+
### Key Features
|
|
15
|
+
|
|
16
|
+
- **Never Exceed Your Budget** - Automatically blocks API calls when spending limits are reached, preventing surprise bills
|
|
17
|
+
- **Zero Code Changes Required** - Drop-in middleware that works with any LLM API without modifying your existing code
|
|
18
|
+
- **Automatic Failover** - Intelligent router switches to backup models when primary fails, keeping your app running
|
|
19
|
+
- **Real-time Cost Tracking** - See exactly how much each API call costs based on actual token usage
|
|
20
|
+
- **Multi-Provider Support** - Works with OpenAI, Anthropic, Gemini, Grok, Kimi, and any custom LLM provider
|
|
21
|
+
- **Custom Model Support** - Register your own models with custom pricing and context limits at runtime
|
|
22
|
+
- **Production Ready** - Battle-tested with comprehensive error handling and edge case coverage
|
|
23
|
+
- **TypeScript Native** - Full type safety with included definitions
|
|
24
|
+
|
|
25
|
+
### What's New in v2.0.0
|
|
26
|
+
|
|
27
|
+
- **Intelligent Router** - Automatic failover to backup models when API calls fail
|
|
28
|
+
- **40+ Latest Models** - GPT-5, Claude 4.5, Gemini 3, with accurate 2026 pricing
|
|
29
|
+
- **Dynamic Registration** - Add custom models and pricing at runtime
|
|
30
|
+
- **Production Hardened** - Comprehensive validation, error handling, and edge case coverage
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Table of Contents
|
|
35
|
+
|
|
36
|
+
- [Installation](#installation)
|
|
37
|
+
- [Quick Start](#quick-start)
|
|
38
|
+
- [Core Concepts](#core-concepts)
|
|
39
|
+
- [API Reference](#api-reference)
|
|
40
|
+
- [Intelligent Model Router](#intelligent-model-router)
|
|
41
|
+
- [Dynamic Model Registration](#dynamic-model-registration)
|
|
42
|
+
- [Supported Providers](#supported-providers)
|
|
43
|
+
- [Examples](#examples)
|
|
44
|
+
- [TypeScript Support](#typescript-support)
|
|
45
|
+
- [Error Handling](#error-handling)
|
|
46
|
+
- [Best Practices](#best-practices)
|
|
47
|
+
- [License](#license)
|
|
48
|
+
|
|
49
|
+
---
|
|
14
50
|
|
|
15
51
|
## Installation
|
|
16
52
|
|
|
@@ -18,21 +54,27 @@ Production-grade LLM cost enforcement middleware for Node.js with automatic trac
|
|
|
18
54
|
npm install tokenfirewall
|
|
19
55
|
```
|
|
20
56
|
|
|
57
|
+
**Requirements:**
|
|
58
|
+
- Node.js >= 16.0.0
|
|
59
|
+
- TypeScript >= 5.0.0 (for TypeScript projects)
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
21
63
|
## Quick Start
|
|
22
64
|
|
|
23
65
|
```javascript
|
|
24
66
|
const { createBudgetGuard, patchGlobalFetch } = require("tokenfirewall");
|
|
25
67
|
|
|
26
|
-
// Set budget
|
|
68
|
+
// Step 1: Set up budget protection
|
|
27
69
|
createBudgetGuard({
|
|
28
70
|
monthlyLimit: 100, // $100 USD
|
|
29
|
-
mode: "block" //
|
|
71
|
+
mode: "block" // Throw error when exceeded
|
|
30
72
|
});
|
|
31
73
|
|
|
32
|
-
//
|
|
74
|
+
// Step 2: Patch global fetch
|
|
33
75
|
patchGlobalFetch();
|
|
34
76
|
|
|
35
|
-
// Use any LLM API
|
|
77
|
+
// Step 3: Use any LLM API normally
|
|
36
78
|
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
37
79
|
method: "POST",
|
|
38
80
|
headers: {
|
|
@@ -40,273 +82,539 @@ const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
|
40
82
|
"Content-Type": "application/json"
|
|
41
83
|
},
|
|
42
84
|
body: JSON.stringify({
|
|
43
|
-
model: "gpt-4o",
|
|
85
|
+
model: "gpt-4o-mini",
|
|
44
86
|
messages: [{ role: "user", content: "Hello!" }]
|
|
45
87
|
})
|
|
46
88
|
});
|
|
89
|
+
|
|
90
|
+
// Costs are automatically tracked and logged
|
|
47
91
|
```
|
|
48
92
|
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Core Concepts
|
|
96
|
+
|
|
97
|
+
### Budget Guard
|
|
98
|
+
|
|
99
|
+
The Budget Guard tracks spending and enforces limits in two modes:
|
|
100
|
+
|
|
101
|
+
- **Block Mode** (`mode: "block"`): Throws an error when budget is exceeded, preventing the API call
|
|
102
|
+
- **Warn Mode** (`mode: "warn"`): Logs a warning but allows the API call to proceed
|
|
103
|
+
|
|
104
|
+
### Automatic Interception
|
|
105
|
+
|
|
106
|
+
TokenFirewall intercepts HTTP requests at the `fetch` level, automatically:
|
|
107
|
+
1. Detecting LLM API responses
|
|
108
|
+
2. Extracting token usage information
|
|
109
|
+
3. Calculating costs based on provider pricing
|
|
110
|
+
4. Tracking against your budget
|
|
111
|
+
5. Logging usage details
|
|
112
|
+
|
|
113
|
+
### Provider Adapters
|
|
114
|
+
|
|
115
|
+
Each LLM provider has a dedicated adapter that:
|
|
116
|
+
- Detects provider-specific response formats
|
|
117
|
+
- Normalizes token usage data
|
|
118
|
+
- Applies correct pricing models
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
49
122
|
## API Reference
|
|
50
123
|
|
|
51
124
|
### Budget Management
|
|
52
125
|
|
|
53
126
|
#### `createBudgetGuard(options)`
|
|
54
127
|
|
|
55
|
-
|
|
128
|
+
Creates and configures a budget guard instance.
|
|
129
|
+
|
|
130
|
+
**Parameters:**
|
|
131
|
+
|
|
132
|
+
```typescript
|
|
133
|
+
interface BudgetGuardOptions {
|
|
134
|
+
monthlyLimit: number; // Maximum spending limit in USD
|
|
135
|
+
mode?: "block" | "warn"; // Enforcement mode (default: "block")
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
**Example:**
|
|
56
140
|
|
|
57
141
|
```javascript
|
|
142
|
+
const { createBudgetGuard } = require("tokenfirewall");
|
|
143
|
+
|
|
144
|
+
// Block mode - strict enforcement
|
|
58
145
|
createBudgetGuard({
|
|
59
|
-
monthlyLimit: 100,
|
|
60
|
-
mode: "block"
|
|
146
|
+
monthlyLimit: 100,
|
|
147
|
+
mode: "block"
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
// Warn mode - soft limits
|
|
151
|
+
createBudgetGuard({
|
|
152
|
+
monthlyLimit: 500,
|
|
153
|
+
mode: "warn"
|
|
61
154
|
});
|
|
62
155
|
```
|
|
63
156
|
|
|
157
|
+
---
|
|
158
|
+
|
|
64
159
|
#### `getBudgetStatus()`
|
|
65
160
|
|
|
66
|
-
|
|
161
|
+
Retrieves the current budget status and usage statistics.
|
|
162
|
+
|
|
163
|
+
**Returns:**
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
interface BudgetStatus {
|
|
167
|
+
totalSpent: number; // Total amount spent in USD
|
|
168
|
+
limit: number; // Monthly limit in USD
|
|
169
|
+
remaining: number; // Remaining budget in USD
|
|
170
|
+
percentageUsed: number; // Percentage of budget used (0-100)
|
|
171
|
+
}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**Example:**
|
|
67
175
|
|
|
68
176
|
```javascript
|
|
177
|
+
const { getBudgetStatus } = require("tokenfirewall");
|
|
178
|
+
|
|
69
179
|
const status = getBudgetStatus();
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
// }
|
|
180
|
+
if (status) {
|
|
181
|
+
console.log(`Spent: $${status.totalSpent.toFixed(2)}`);
|
|
182
|
+
console.log(`Remaining: $${status.remaining.toFixed(2)}`);
|
|
183
|
+
console.log(`Usage: ${status.percentageUsed.toFixed(1)}%`);
|
|
184
|
+
}
|
|
76
185
|
```
|
|
77
186
|
|
|
187
|
+
---
|
|
188
|
+
|
|
78
189
|
#### `resetBudget()`
|
|
79
190
|
|
|
80
|
-
|
|
191
|
+
Resets the budget tracking to zero.
|
|
81
192
|
|
|
82
193
|
```javascript
|
|
194
|
+
const { resetBudget } = require("tokenfirewall");
|
|
195
|
+
|
|
196
|
+
// Reset at the start of each month
|
|
83
197
|
resetBudget();
|
|
84
198
|
```
|
|
85
199
|
|
|
86
|
-
|
|
200
|
+
---
|
|
87
201
|
|
|
88
|
-
#### `
|
|
202
|
+
#### `exportBudgetState()` / `importBudgetState(state)`
|
|
89
203
|
|
|
90
|
-
|
|
204
|
+
Save and restore budget state for persistence.
|
|
91
205
|
|
|
92
206
|
```javascript
|
|
93
|
-
|
|
207
|
+
const { exportBudgetState, importBudgetState } = require("tokenfirewall");
|
|
208
|
+
const fs = require("fs");
|
|
209
|
+
|
|
210
|
+
// Export state
|
|
211
|
+
const state = exportBudgetState();
|
|
212
|
+
fs.writeFileSync("budget.json", JSON.stringify(state));
|
|
213
|
+
|
|
214
|
+
// Import state
|
|
215
|
+
const savedState = JSON.parse(fs.readFileSync("budget.json"));
|
|
216
|
+
importBudgetState(savedState);
|
|
94
217
|
```
|
|
95
218
|
|
|
96
|
-
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
### Interception
|
|
222
|
+
|
|
223
|
+
#### `patchGlobalFetch()`
|
|
97
224
|
|
|
98
|
-
|
|
225
|
+
Patches the global `fetch` function to intercept and track LLM API calls.
|
|
99
226
|
|
|
100
227
|
```javascript
|
|
101
|
-
|
|
228
|
+
const { patchGlobalFetch } = require("tokenfirewall");
|
|
229
|
+
|
|
230
|
+
patchGlobalFetch();
|
|
231
|
+
|
|
232
|
+
// All subsequent fetch calls are intercepted
|
|
102
233
|
```
|
|
103
234
|
|
|
235
|
+
---
|
|
236
|
+
|
|
104
237
|
### Model Discovery
|
|
105
238
|
|
|
106
|
-
#### `
|
|
239
|
+
#### `listModels(options)`
|
|
240
|
+
|
|
241
|
+
Lists available models from a provider with context limits and budget information.
|
|
107
242
|
|
|
108
|
-
|
|
243
|
+
**Parameters:**
|
|
244
|
+
|
|
245
|
+
```typescript
|
|
246
|
+
interface ListModelsOptions {
|
|
247
|
+
provider: string; // Provider name
|
|
248
|
+
apiKey: string; // Provider API key
|
|
249
|
+
baseURL?: string; // Custom API endpoint
|
|
250
|
+
includeBudgetUsage?: boolean; // Include budget usage %
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
**Example:**
|
|
109
255
|
|
|
110
256
|
```javascript
|
|
111
|
-
const
|
|
257
|
+
const { listModels } = require("tokenfirewall");
|
|
258
|
+
|
|
259
|
+
const models = await listModels({
|
|
112
260
|
provider: "openai",
|
|
113
261
|
apiKey: process.env.OPENAI_API_KEY,
|
|
114
|
-
includeBudgetUsage: true
|
|
262
|
+
includeBudgetUsage: true
|
|
115
263
|
});
|
|
116
264
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
// model: "gpt-4o",
|
|
121
|
-
// contextLimit: 128000,
|
|
122
|
-
// budgetUsagePercentage: 32.4
|
|
123
|
-
// }
|
|
124
|
-
// ]
|
|
265
|
+
models.forEach(model => {
|
|
266
|
+
console.log(`${model.model}: ${model.contextLimit} tokens`);
|
|
267
|
+
});
|
|
125
268
|
```
|
|
126
269
|
|
|
127
|
-
|
|
270
|
+
---
|
|
128
271
|
|
|
129
|
-
|
|
272
|
+
## Intelligent Model Router
|
|
130
273
|
|
|
131
|
-
|
|
274
|
+
The Model Router provides automatic retry and model switching on failures.
|
|
132
275
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
detect: (response) => /* detection logic */,
|
|
137
|
-
normalize: (response) => /* normalization logic */
|
|
138
|
-
});
|
|
139
|
-
```
|
|
276
|
+
### `createModelRouter(options)`
|
|
277
|
+
|
|
278
|
+
Creates and configures an intelligent model router.
|
|
140
279
|
|
|
141
|
-
|
|
280
|
+
**Parameters:**
|
|
142
281
|
|
|
143
|
-
|
|
282
|
+
```typescript
|
|
283
|
+
interface ModelRouterOptions {
|
|
284
|
+
strategy: "fallback" | "context" | "cost"; // Routing strategy
|
|
285
|
+
fallbackMap?: Record<string, string[]>; // Fallback model map
|
|
286
|
+
maxRetries?: number; // Max retry attempts (default: 1)
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
**Example:**
|
|
144
291
|
|
|
145
292
|
```javascript
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
293
|
+
const { createModelRouter, patchGlobalFetch } = require("tokenfirewall");
|
|
294
|
+
|
|
295
|
+
// Fallback strategy - use predefined fallback models
|
|
296
|
+
createModelRouter({
|
|
297
|
+
strategy: "fallback",
|
|
298
|
+
fallbackMap: {
|
|
299
|
+
"gpt-4o": ["gpt-4o-mini", "gpt-3.5-turbo"],
|
|
300
|
+
"claude-3-5-sonnet-20241022": ["claude-3-5-haiku-20241022"]
|
|
301
|
+
},
|
|
302
|
+
maxRetries: 2
|
|
149
303
|
});
|
|
304
|
+
|
|
305
|
+
patchGlobalFetch();
|
|
306
|
+
|
|
307
|
+
// API calls will automatically retry with fallback models on failure
|
|
150
308
|
```
|
|
151
309
|
|
|
152
|
-
|
|
310
|
+
### Routing Strategies
|
|
311
|
+
|
|
312
|
+
**1. Fallback Strategy** - Uses predefined fallback map
|
|
313
|
+
- Tries models in order from fallbackMap
|
|
314
|
+
- Best for: Known model preferences, production resilience
|
|
315
|
+
|
|
316
|
+
**2. Context Strategy** - Upgrades to larger context window
|
|
317
|
+
- Only triggers on context overflow errors
|
|
318
|
+
- Selects model with larger context from same provider
|
|
319
|
+
- Best for: Handling variable input sizes
|
|
320
|
+
|
|
321
|
+
**3. Cost Strategy** - Switches to cheaper model
|
|
322
|
+
- Selects cheaper model from same provider
|
|
323
|
+
- Best for: Cost optimization, rate limit handling
|
|
324
|
+
|
|
325
|
+
### Error Detection
|
|
153
326
|
|
|
154
|
-
|
|
327
|
+
The router automatically detects and classifies failures:
|
|
328
|
+
- `rate_limit` - HTTP 429 or rate limit errors
|
|
329
|
+
- `context_overflow` - Context length exceeded errors
|
|
330
|
+
- `model_unavailable` - HTTP 404 or model not found
|
|
331
|
+
- `access_denied` - HTTP 403 or unauthorized
|
|
332
|
+
- `unknown` - Other errors
|
|
333
|
+
|
|
334
|
+
### `disableModelRouter()`
|
|
335
|
+
|
|
336
|
+
Disables the model router.
|
|
155
337
|
|
|
156
338
|
```javascript
|
|
157
|
-
|
|
339
|
+
const { disableModelRouter } = require("tokenfirewall");
|
|
340
|
+
|
|
341
|
+
disableModelRouter();
|
|
158
342
|
```
|
|
159
343
|
|
|
160
|
-
|
|
344
|
+
---
|
|
161
345
|
|
|
162
|
-
|
|
163
|
-
|----------|--------|----------------|
|
|
164
|
-
| OpenAI | gpt-4o, gpt-4o-mini, gpt-3.5-turbo | 16K - 128K |
|
|
165
|
-
| Anthropic | claude-3-5-sonnet, claude-3-5-haiku | 200K |
|
|
166
|
-
| Gemini | gemini-2.5-pro, gemini-2.5-flash | 1M - 2M |
|
|
167
|
-
| Grok | grok-beta, llama-3.3-70b | 131K |
|
|
168
|
-
| Kimi | moonshot-v1-8k/32k/128k | 8K - 128K |
|
|
346
|
+
## Dynamic Model Registration
|
|
169
347
|
|
|
170
|
-
|
|
348
|
+
Register models with pricing and context limits at runtime.
|
|
171
349
|
|
|
172
|
-
###
|
|
350
|
+
### `registerModels(provider, models)`
|
|
173
351
|
|
|
174
|
-
|
|
175
|
-
const { createBudgetGuard, patchGlobalFetch } = require("tokenfirewall");
|
|
352
|
+
Bulk register models for a provider.
|
|
176
353
|
|
|
177
|
-
|
|
178
|
-
patchGlobalFetch();
|
|
354
|
+
**Parameters:**
|
|
179
355
|
|
|
180
|
-
|
|
356
|
+
```typescript
|
|
357
|
+
interface ModelConfig {
|
|
358
|
+
name: string; // Model identifier
|
|
359
|
+
contextLimit?: number; // Context window size in tokens
|
|
360
|
+
pricing?: { // Pricing per 1M tokens (USD)
|
|
361
|
+
input: number;
|
|
362
|
+
output: number;
|
|
363
|
+
};
|
|
364
|
+
}
|
|
181
365
|
```
|
|
182
366
|
|
|
183
|
-
|
|
367
|
+
**Example:**
|
|
184
368
|
|
|
185
369
|
```javascript
|
|
186
|
-
const {
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
370
|
+
const { registerModels, createModelRouter } = require("tokenfirewall");
|
|
371
|
+
|
|
372
|
+
// Register custom models
|
|
373
|
+
registerModels("my-provider", [
|
|
374
|
+
{
|
|
375
|
+
name: "my-large-model",
|
|
376
|
+
contextLimit: 200000,
|
|
377
|
+
pricing: { input: 5.0, output: 15.0 }
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
name: "my-small-model",
|
|
381
|
+
contextLimit: 50000,
|
|
382
|
+
pricing: { input: 1.0, output: 3.0 }
|
|
383
|
+
}
|
|
384
|
+
]);
|
|
385
|
+
|
|
386
|
+
// Router will use dynamically registered models
|
|
387
|
+
createModelRouter({
|
|
388
|
+
strategy: "cost",
|
|
389
|
+
maxRetries: 2
|
|
192
390
|
});
|
|
193
|
-
|
|
194
|
-
const status = getBudgetStatus();
|
|
195
|
-
if (status.remaining < 10) {
|
|
196
|
-
console.log("Low budget - use cheaper models");
|
|
197
|
-
const cheapModels = models.filter(m => m.model.includes("mini"));
|
|
198
|
-
}
|
|
199
391
|
```
|
|
200
392
|
|
|
201
|
-
###
|
|
393
|
+
### `registerPricing(provider, model, pricing)`
|
|
394
|
+
|
|
395
|
+
Register custom pricing for a specific model.
|
|
202
396
|
|
|
203
397
|
```javascript
|
|
204
|
-
const {
|
|
398
|
+
const { registerPricing } = require("tokenfirewall");
|
|
205
399
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
400
|
+
registerPricing("openai", "gpt-5", {
|
|
401
|
+
input: 5.0, // $5 per 1M input tokens
|
|
402
|
+
output: 15.0 // $15 per 1M output tokens
|
|
209
403
|
});
|
|
210
|
-
|
|
211
|
-
// Find model with sufficient context
|
|
212
|
-
const suitable = models.find(m =>
|
|
213
|
-
m.contextLimit && m.contextLimit >= promptTokens * 1.5
|
|
214
|
-
);
|
|
215
404
|
```
|
|
216
405
|
|
|
217
|
-
###
|
|
406
|
+
### `registerContextLimit(provider, model, contextLimit)`
|
|
407
|
+
|
|
408
|
+
Register custom context window limit.
|
|
218
409
|
|
|
219
410
|
```javascript
|
|
220
|
-
const {
|
|
221
|
-
|
|
222
|
-
// Add Ollama support
|
|
223
|
-
registerAdapter({
|
|
224
|
-
name: "ollama",
|
|
225
|
-
detect: (response) => response?.model && response?.prompt_eval_count !== undefined,
|
|
226
|
-
normalize: (response) => ({
|
|
227
|
-
provider: "ollama",
|
|
228
|
-
model: response.model,
|
|
229
|
-
inputTokens: response.prompt_eval_count || 0,
|
|
230
|
-
outputTokens: response.eval_count || 0,
|
|
231
|
-
totalTokens: (response.prompt_eval_count || 0) + (response.eval_count || 0)
|
|
232
|
-
})
|
|
233
|
-
});
|
|
411
|
+
const { registerContextLimit } = require("tokenfirewall");
|
|
234
412
|
|
|
235
|
-
|
|
413
|
+
registerContextLimit("openai", "gpt-5", 256000);
|
|
236
414
|
```
|
|
237
415
|
|
|
416
|
+
---
|
|
417
|
+
|
|
418
|
+
## Supported Providers
|
|
419
|
+
|
|
420
|
+
TokenFirewall includes built-in support for:
|
|
421
|
+
|
|
422
|
+
| Provider | Models | Pricing | Discovery |
|
|
423
|
+
|----------|--------|---------|-----------|
|
|
424
|
+
| **OpenAI** | GPT-5, GPT-5-mini, GPT-4.1, GPT-4o, o1, gpt-image-1 | Included | API |
|
|
425
|
+
| **Anthropic** | Claude 4.5 (Opus, Sonnet, Haiku), Claude 4, Claude 3.5 | Included | Static |
|
|
426
|
+
| **Google Gemini** | Gemini 3, Gemini 3.1, Gemini 2.5, Nano Banana | Included | API |
|
|
427
|
+
| **Grok (X.AI)** | Grok 3, Grok 2, Grok Vision | Included | API |
|
|
428
|
+
| **Kimi (Moonshot)** | Moonshot v1 (8k, 32k, 128k) | Included | API |
|
|
429
|
+
| **Meta** | Llama 3.3, Llama 3.1 | Included | Static |
|
|
430
|
+
| **Mistral** | Mistral Large, Mixtral | Included | Static |
|
|
431
|
+
| **Cohere** | Command R+, Command R | Included | Static |
|
|
432
|
+
| **Custom** | Any LLM API | Register | Custom |
|
|
433
|
+
|
|
434
|
+
### Pricing (Per 1M Tokens)
|
|
435
|
+
|
|
436
|
+
**OpenAI:**
|
|
437
|
+
- GPT-5: $5.00 / $15.00
|
|
438
|
+
- GPT-5-mini: $1.50 / $5.00
|
|
439
|
+
- GPT-4.1: $3.00 / $12.00
|
|
440
|
+
- GPT-4o: $2.50 / $10.00
|
|
441
|
+
- o1: $6.00 / $18.00
|
|
442
|
+
|
|
443
|
+
**Anthropic:**
|
|
444
|
+
- Claude Opus 4.5: $17.00 / $85.00
|
|
445
|
+
- Claude Sonnet 4.5: $4.00 / $20.00
|
|
446
|
+
- Claude Haiku 4.5: $1.20 / $6.00
|
|
447
|
+
|
|
448
|
+
**Gemini:**
|
|
449
|
+
- Gemini 3 Pro: $3.50 / $14.00
|
|
450
|
+
- Gemini 3 Flash: $0.35 / $1.50
|
|
451
|
+
- Gemini 2.5 Pro: $2.50 / $10.00
|
|
452
|
+
- Nano Banana: $0.05 / $0.20
|
|
453
|
+
|
|
454
|
+
### Context Limits
|
|
455
|
+
|
|
456
|
+
- GPT-5: 256K tokens
|
|
457
|
+
- GPT-4.1: 200K tokens
|
|
458
|
+
- Claude 4.5: 200K tokens
|
|
459
|
+
- Gemini 3 Pro: 2M tokens
|
|
460
|
+
- o1: 200K tokens
|
|
461
|
+
|
|
462
|
+
---
|
|
463
|
+
|
|
464
|
+
## Examples
|
|
465
|
+
|
|
466
|
+
See the [`examples/`](./examples) directory for complete, runnable examples:
|
|
467
|
+
|
|
468
|
+
1. **[Basic Usage](./examples/1-basic-usage.js)** - Core functionality and budget protection
|
|
469
|
+
2. **[Multiple Providers](./examples/2-multiple-providers.js)** - Unified tracking across providers
|
|
470
|
+
3. **[Budget Persistence](./examples/3-budget-persistence.js)** - Save and restore state
|
|
471
|
+
4. **[Custom Provider](./examples/4-custom-provider.js)** - Add your own LLM provider
|
|
472
|
+
5. **[Model Discovery](./examples/5-model-discovery.js)** - Find and compare models
|
|
473
|
+
6. **[Intelligent Routing](./examples/6-intelligent-routing.js)** - Automatic retry and fallback
|
|
474
|
+
7. **[Dynamic Models](./examples/7-dynamic-models.js)** - Register models at runtime
|
|
475
|
+
|
|
476
|
+
---
|
|
477
|
+
|
|
238
478
|
## TypeScript Support
|
|
239
479
|
|
|
240
|
-
|
|
480
|
+
TokenFirewall is written in TypeScript and includes full type definitions.
|
|
241
481
|
|
|
242
482
|
```typescript
|
|
243
|
-
import {
|
|
483
|
+
import {
|
|
244
484
|
createBudgetGuard,
|
|
245
|
-
|
|
485
|
+
patchGlobalFetch,
|
|
486
|
+
getBudgetStatus,
|
|
487
|
+
createModelRouter,
|
|
488
|
+
registerModels,
|
|
246
489
|
BudgetGuardOptions,
|
|
490
|
+
BudgetStatus,
|
|
247
491
|
ModelInfo,
|
|
248
|
-
|
|
492
|
+
ModelRouterOptions,
|
|
493
|
+
ModelConfig
|
|
249
494
|
} from "tokenfirewall";
|
|
250
495
|
|
|
496
|
+
// Full type safety
|
|
251
497
|
const options: BudgetGuardOptions = {
|
|
252
498
|
monthlyLimit: 100,
|
|
253
499
|
mode: "block"
|
|
254
500
|
};
|
|
255
501
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
502
|
+
createBudgetGuard(options);
|
|
503
|
+
patchGlobalFetch();
|
|
504
|
+
|
|
505
|
+
const status: BudgetStatus | null = getBudgetStatus();
|
|
260
506
|
```
|
|
261
507
|
|
|
262
|
-
|
|
508
|
+
---
|
|
509
|
+
|
|
510
|
+
## Error Handling
|
|
511
|
+
|
|
512
|
+
TokenFirewall provides clear, actionable error messages:
|
|
263
513
|
|
|
514
|
+
```javascript
|
|
515
|
+
try {
|
|
516
|
+
const response = await fetch(/* ... */);
|
|
517
|
+
} catch (error) {
|
|
518
|
+
if (error.message.includes("TokenFirewall: Budget exceeded")) {
|
|
519
|
+
console.error("Monthly budget exhausted");
|
|
520
|
+
// Handle budget limit
|
|
521
|
+
} else if (error.message.includes("TokenFirewall Router: Max routing retries exceeded")) {
|
|
522
|
+
console.error("All fallback models failed");
|
|
523
|
+
// Handle routing failure
|
|
524
|
+
} else {
|
|
525
|
+
console.error("API error:", error.message);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
264
528
|
```
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
529
|
+
|
|
530
|
+
**Common Errors:**
|
|
531
|
+
|
|
532
|
+
| Error Message | Cause | Solution |
|
|
533
|
+
|---------------|-------|----------|
|
|
534
|
+
| `Budget exceeded! Would spend $X of $Y limit` | Budget limit reached | Increase limit or wait for reset |
|
|
535
|
+
| `monthlyLimit must be a valid number` | Invalid budget configuration | Provide positive number |
|
|
536
|
+
| `Max routing retries exceeded` | All fallback models failed | Check API status or fallback map |
|
|
537
|
+
| `No pricing found for model "X"` | Unknown model | Register custom pricing |
|
|
538
|
+
|
|
539
|
+
---
|
|
540
|
+
|
|
541
|
+
## Best Practices
|
|
542
|
+
|
|
543
|
+
### 1. Initialize Early
|
|
544
|
+
|
|
545
|
+
```javascript
|
|
546
|
+
// At application startup
|
|
547
|
+
createBudgetGuard({ monthlyLimit: 100, mode: "block" });
|
|
548
|
+
patchGlobalFetch();
|
|
271
549
|
```
|
|
272
550
|
|
|
273
|
-
|
|
551
|
+
### 2. Use Warn Mode in Development
|
|
274
552
|
|
|
275
|
-
|
|
553
|
+
```javascript
|
|
554
|
+
const mode = process.env.NODE_ENV === "production" ? "block" : "warn";
|
|
555
|
+
createBudgetGuard({ monthlyLimit: 100, mode });
|
|
556
|
+
```
|
|
276
557
|
|
|
277
|
-
|
|
558
|
+
### 3. Persist Budget State
|
|
278
559
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
560
|
+
```javascript
|
|
561
|
+
// Save on exit
|
|
562
|
+
process.on("beforeExit", () => {
|
|
563
|
+
const state = exportBudgetState();
|
|
564
|
+
if (state) saveToDatabase(state);
|
|
565
|
+
});
|
|
566
|
+
```
|
|
286
567
|
|
|
287
|
-
|
|
568
|
+
### 4. Monitor Usage
|
|
288
569
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
570
|
+
```javascript
|
|
571
|
+
// Alert at 80% usage
|
|
572
|
+
const status = getBudgetStatus();
|
|
573
|
+
if (status && status.percentageUsed > 80) {
|
|
574
|
+
await sendAlert("Budget usage high");
|
|
575
|
+
}
|
|
576
|
+
```
|
|
294
577
|
|
|
295
|
-
|
|
578
|
+
### 5. Use Router for Resilience
|
|
296
579
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
580
|
+
```javascript
|
|
581
|
+
// Automatic fallback on failures
|
|
582
|
+
createModelRouter({
|
|
583
|
+
strategy: "fallback",
|
|
584
|
+
fallbackMap: {
|
|
585
|
+
"gpt-4o": ["gpt-4o-mini", "gpt-3.5-turbo"]
|
|
586
|
+
},
|
|
587
|
+
maxRetries: 2
|
|
588
|
+
});
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
### 6. Register Models Dynamically
|
|
592
|
+
|
|
593
|
+
```javascript
|
|
594
|
+
// Discover and register models from API
|
|
595
|
+
const models = await discoverModels(apiKey);
|
|
596
|
+
registerModels("provider", models.map(m => ({
|
|
597
|
+
name: m.id,
|
|
598
|
+
contextLimit: m.context_window,
|
|
599
|
+
pricing: { input: m.input_price, output: m.output_price }
|
|
600
|
+
})));
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
---
|
|
301
604
|
|
|
302
605
|
## License
|
|
303
606
|
|
|
304
|
-
MIT
|
|
607
|
+
MIT © [Ruthwik](https://github.com/Ruthwik000)
|
|
608
|
+
|
|
609
|
+
---
|
|
305
610
|
|
|
306
|
-
##
|
|
611
|
+
## Links
|
|
307
612
|
|
|
308
|
-
|
|
613
|
+
- **GitHub:** https://github.com/Ruthwik000/tokenfirewall
|
|
614
|
+
- **npm:** https://www.npmjs.com/package/tokenfirewall
|
|
615
|
+
- **Issues:** https://github.com/Ruthwik000/tokenfirewall/issues
|
|
616
|
+
- **Changelog:** [CHANGELOG.md](./CHANGELOG.md)
|
|
309
617
|
|
|
310
|
-
|
|
618
|
+
---
|
|
311
619
|
|
|
312
|
-
|
|
620
|
+
Built with ❤️ for the AI developer community.
|