tokenfirewall 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +219 -474
- package/dist/core/pricingRegistry.js +56 -5
- package/dist/index.d.ts +32 -0
- package/dist/index.js +66 -12
- package/dist/interceptors/fetchInterceptor.d.ts +5 -0
- package/dist/interceptors/fetchInterceptor.js +278 -27
- package/dist/introspection/contextRegistry.d.ts +5 -0
- package/dist/introspection/contextRegistry.js +58 -6
- package/dist/logger.d.ts +5 -0
- package/dist/logger.js +10 -0
- package/dist/router/errorDetector.d.ts +45 -0
- package/dist/router/errorDetector.js +170 -0
- package/dist/router/modelRouter.d.ts +33 -0
- package/dist/router/modelRouter.js +111 -0
- package/dist/router/routingStrategies.d.ts +16 -0
- package/dist/router/routingStrategies.js +243 -0
- package/dist/router/types.d.ts +65 -0
- package/dist/router/types.js +5 -0
- package/package.json +1 -1
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.fallbackStrategy = fallbackStrategy;
|
|
4
|
+
exports.contextStrategy = contextStrategy;
|
|
5
|
+
exports.costStrategy = costStrategy;
|
|
6
|
+
const contextRegistry_1 = require("../introspection/contextRegistry");
|
|
7
|
+
const pricingRegistry_1 = require("../core/pricingRegistry");
|
|
8
|
+
/**
|
|
9
|
+
* Fallback routing strategy
|
|
10
|
+
* Uses predefined fallback map to select next model
|
|
11
|
+
*/
|
|
12
|
+
function fallbackStrategy(context, failureType, fallbackMap) {
|
|
13
|
+
const { originalModel, attemptedModels } = context;
|
|
14
|
+
// Get fallback list for this model
|
|
15
|
+
const fallbacks = fallbackMap[originalModel];
|
|
16
|
+
if (!fallbacks || fallbacks.length === 0) {
|
|
17
|
+
return {
|
|
18
|
+
retry: false,
|
|
19
|
+
reason: `No fallback models configured for ${originalModel}`
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
// Find first fallback that hasn't been attempted
|
|
23
|
+
const nextModel = fallbacks.find(model => !attemptedModels.includes(model));
|
|
24
|
+
if (!nextModel) {
|
|
25
|
+
return {
|
|
26
|
+
retry: false,
|
|
27
|
+
reason: "All fallback models have been attempted"
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
return {
|
|
31
|
+
retry: true,
|
|
32
|
+
nextModel,
|
|
33
|
+
reason: `Fallback from ${originalModel} due to ${failureType}`
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Context-based routing strategy
|
|
38
|
+
* Selects model with larger context window when context overflow occurs
|
|
39
|
+
*/
|
|
40
|
+
function contextStrategy(context, failureType) {
|
|
41
|
+
const { originalModel, provider, attemptedModels } = context;
|
|
42
|
+
// Only applicable for context overflow
|
|
43
|
+
if (failureType !== "context_overflow") {
|
|
44
|
+
return {
|
|
45
|
+
retry: false,
|
|
46
|
+
reason: `Context strategy only applies to context_overflow, got ${failureType}`
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
// Get current model's context limit
|
|
50
|
+
const currentLimit = contextRegistry_1.contextRegistry.getContextLimit(provider, originalModel);
|
|
51
|
+
if (currentLimit === undefined) {
|
|
52
|
+
return {
|
|
53
|
+
retry: false,
|
|
54
|
+
reason: `No context limit information for ${originalModel}`
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
// Find models from same provider with larger context
|
|
58
|
+
const availableModels = contextRegistry_1.contextRegistry.getModelsForProvider(provider);
|
|
59
|
+
if (!availableModels || availableModels.length === 0) {
|
|
60
|
+
return {
|
|
61
|
+
retry: false,
|
|
62
|
+
reason: `No alternative models found for provider ${provider}`
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
// Filter models with larger context that haven't been attempted
|
|
66
|
+
const largerContextModels = availableModels
|
|
67
|
+
.filter((model) => {
|
|
68
|
+
const limit = contextRegistry_1.contextRegistry.getContextLimit(provider, model);
|
|
69
|
+
return (limit !== undefined &&
|
|
70
|
+
limit > currentLimit &&
|
|
71
|
+
!attemptedModels.includes(model) &&
|
|
72
|
+
model !== originalModel // Don't suggest the same model
|
|
73
|
+
);
|
|
74
|
+
})
|
|
75
|
+
.sort((a, b) => {
|
|
76
|
+
const limitA = contextRegistry_1.contextRegistry.getContextLimit(provider, a) || 0;
|
|
77
|
+
const limitB = contextRegistry_1.contextRegistry.getContextLimit(provider, b) || 0;
|
|
78
|
+
return limitA - limitB; // Sort ascending (smallest upgrade first)
|
|
79
|
+
});
|
|
80
|
+
if (largerContextModels.length === 0) {
|
|
81
|
+
return {
|
|
82
|
+
retry: false,
|
|
83
|
+
reason: "No models with larger context window available"
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
const nextModel = largerContextModels[0];
|
|
87
|
+
const nextLimit = contextRegistry_1.contextRegistry.getContextLimit(provider, nextModel);
|
|
88
|
+
return {
|
|
89
|
+
retry: true,
|
|
90
|
+
nextModel,
|
|
91
|
+
reason: `Upgrading from ${currentLimit} to ${nextLimit} tokens context`
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Cost-based routing strategy
|
|
96
|
+
* Selects cheaper model from same provider
|
|
97
|
+
*/
|
|
98
|
+
function costStrategy(context, failureType) {
|
|
99
|
+
const { originalModel, provider, attemptedModels } = context;
|
|
100
|
+
// Get current model's pricing
|
|
101
|
+
let currentPricing;
|
|
102
|
+
try {
|
|
103
|
+
currentPricing = pricingRegistry_1.pricingRegistry.getPricing(provider, originalModel);
|
|
104
|
+
}
|
|
105
|
+
catch (error) {
|
|
106
|
+
return {
|
|
107
|
+
retry: false,
|
|
108
|
+
reason: `No pricing information for ${originalModel}`
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
// Calculate average cost for current model
|
|
112
|
+
const currentAvgCost = (currentPricing.input + currentPricing.output) / 2;
|
|
113
|
+
// Get all models for this provider
|
|
114
|
+
const providerModels = getProviderModels(provider);
|
|
115
|
+
if (providerModels.length === 0) {
|
|
116
|
+
return {
|
|
117
|
+
retry: false,
|
|
118
|
+
reason: `No alternative models found for provider ${provider}`
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
// Find cheaper models that haven't been attempted
|
|
122
|
+
const cheaperModels = providerModels
|
|
123
|
+
.filter((model) => {
|
|
124
|
+
if (attemptedModels.includes(model) || model === originalModel) {
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
try {
|
|
128
|
+
const pricing = pricingRegistry_1.pricingRegistry.getPricing(provider, model);
|
|
129
|
+
const avgCost = (pricing.input + pricing.output) / 2;
|
|
130
|
+
return avgCost < currentAvgCost;
|
|
131
|
+
}
|
|
132
|
+
catch {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
})
|
|
136
|
+
.sort((a, b) => {
|
|
137
|
+
const pricingA = pricingRegistry_1.pricingRegistry.getPricing(provider, a);
|
|
138
|
+
const pricingB = pricingRegistry_1.pricingRegistry.getPricing(provider, b);
|
|
139
|
+
const avgCostA = (pricingA.input + pricingA.output) / 2;
|
|
140
|
+
const avgCostB = (pricingB.input + pricingB.output) / 2;
|
|
141
|
+
return avgCostA - avgCostB; // Sort ascending (cheapest first)
|
|
142
|
+
});
|
|
143
|
+
if (cheaperModels.length === 0) {
|
|
144
|
+
return {
|
|
145
|
+
retry: false,
|
|
146
|
+
reason: "No cheaper models available"
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
const nextModel = cheaperModels[0];
|
|
150
|
+
return {
|
|
151
|
+
retry: true,
|
|
152
|
+
nextModel,
|
|
153
|
+
reason: `Switching to cheaper model due to ${failureType}`
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Helper to get known models for a provider
|
|
158
|
+
* Uses context registry for dynamic model discovery, falls back to static list
|
|
159
|
+
*/
|
|
160
|
+
function getProviderModels(provider) {
|
|
161
|
+
// First, try to get models from context registry (dynamic)
|
|
162
|
+
const registeredModels = contextRegistry_1.contextRegistry.getModelsForProvider(provider);
|
|
163
|
+
if (registeredModels && registeredModels.length > 0) {
|
|
164
|
+
return registeredModels;
|
|
165
|
+
}
|
|
166
|
+
// Fallback to static list if no models registered
|
|
167
|
+
// This ensures the router works even without model discovery
|
|
168
|
+
const knownModels = {
|
|
169
|
+
openai: [
|
|
170
|
+
// ===== Flagship / Chat =====
|
|
171
|
+
"gpt-5",
|
|
172
|
+
"gpt-5-mini",
|
|
173
|
+
"gpt-4.1",
|
|
174
|
+
"gpt-4.1-mini",
|
|
175
|
+
"gpt-4o",
|
|
176
|
+
"gpt-4o-mini",
|
|
177
|
+
// ===== Reasoning =====
|
|
178
|
+
"o1",
|
|
179
|
+
"o1-mini",
|
|
180
|
+
// ===== Image Generation =====
|
|
181
|
+
"gpt-image-1"
|
|
182
|
+
],
|
|
183
|
+
anthropic: [
|
|
184
|
+
// ===== Claude 4.5 (Newer Improved) =====
|
|
185
|
+
"claude-opus-4.5",
|
|
186
|
+
"claude-sonnet-4.5",
|
|
187
|
+
"claude-haiku-4.5",
|
|
188
|
+
// ===== Classic Claude 4 =====
|
|
189
|
+
"claude-4-opus",
|
|
190
|
+
"claude-sonnet-4",
|
|
191
|
+
"claude-haiku-4",
|
|
192
|
+
// ===== Stable Claude 3.5 Fallback =====
|
|
193
|
+
"claude-3-5-sonnet-latest",
|
|
194
|
+
"claude-3-5-haiku-latest"
|
|
195
|
+
],
|
|
196
|
+
gemini: [
|
|
197
|
+
// ===== Gemini 3 (Latest Generation) =====
|
|
198
|
+
"gemini-3-pro", // Flagship reasoning - Most capable
|
|
199
|
+
"gemini-3.1-pro", // Enhanced reasoning - Latest improved 3.x
|
|
200
|
+
"gemini-3-flash", // Fast multimodal - Optimized for latency
|
|
201
|
+
"gemini-3-flash-lite", // Cost-efficient flash variant
|
|
202
|
+
"gemini-3-pro-image", // High-quality image - Nano Banana Pro
|
|
203
|
+
"gemini-3.1-flash-image", // Latest image model - Nano Banana 2
|
|
204
|
+
// ===== Gemini 2.5 (Stable Production Tier) =====
|
|
205
|
+
"gemini-2.5-pro", // Stable reasoning - 2.5 generation flagship
|
|
206
|
+
"gemini-2.5-flash", // Fast multimodal - Default in many workflows
|
|
207
|
+
"gemini-2.5-flash-lite", // Cost-efficient - Lighter, cheaper variant
|
|
208
|
+
"gemini-2.5-flash-image", // Image generation - Nano Banana (Cloud)
|
|
209
|
+
// ===== Ultra-light / Experimental =====
|
|
210
|
+
"gemini-nano-banana" // Ultra-light multimodal
|
|
211
|
+
],
|
|
212
|
+
grok: [
|
|
213
|
+
"grok-3",
|
|
214
|
+
"grok-2",
|
|
215
|
+
"grok-2-mini",
|
|
216
|
+
"grok-vision"
|
|
217
|
+
],
|
|
218
|
+
kimi: [
|
|
219
|
+
"moonshot-v1-8k",
|
|
220
|
+
"moonshot-v1-32k",
|
|
221
|
+
"moonshot-v1-128k"
|
|
222
|
+
],
|
|
223
|
+
meta: [
|
|
224
|
+
"llama-3.3-70b",
|
|
225
|
+
"llama-3.1-405b",
|
|
226
|
+
"llama-3.1-70b",
|
|
227
|
+
"llama-3.1-8b"
|
|
228
|
+
],
|
|
229
|
+
mistral: [
|
|
230
|
+
"mistral-large-latest",
|
|
231
|
+
"mistral-medium-latest",
|
|
232
|
+
"mistral-small-latest",
|
|
233
|
+
"mixtral-8x7b",
|
|
234
|
+
"mixtral-8x22b"
|
|
235
|
+
],
|
|
236
|
+
cohere: [
|
|
237
|
+
"command-r-plus",
|
|
238
|
+
"command-r",
|
|
239
|
+
"command-light"
|
|
240
|
+
]
|
|
241
|
+
};
|
|
242
|
+
return knownModels[provider.toLowerCase()] || [];
|
|
243
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Type definitions for the Intelligent Model Router
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Routing strategy types
|
|
6
|
+
*/
|
|
7
|
+
export type RoutingStrategy = "fallback" | "context" | "cost";
|
|
8
|
+
/**
|
|
9
|
+
* Failure types detected by error detector
|
|
10
|
+
*/
|
|
11
|
+
export type FailureType = "rate_limit" | "context_overflow" | "model_unavailable" | "access_denied" | "unknown";
|
|
12
|
+
/**
|
|
13
|
+
* Configuration options for model router
|
|
14
|
+
*/
|
|
15
|
+
export interface ModelRouterOptions {
|
|
16
|
+
/** Routing strategy to use */
|
|
17
|
+
strategy: RoutingStrategy;
|
|
18
|
+
/** Map of primary models to fallback models */
|
|
19
|
+
fallbackMap?: Record<string, string[]>;
|
|
20
|
+
/** Maximum number of retry attempts (default: 1) */
|
|
21
|
+
maxRetries?: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Context information about a failed request
|
|
25
|
+
*/
|
|
26
|
+
export interface FailureContext {
|
|
27
|
+
/** The error that occurred */
|
|
28
|
+
error: unknown;
|
|
29
|
+
/** Original model that failed */
|
|
30
|
+
originalModel: string;
|
|
31
|
+
/** Request body sent to API */
|
|
32
|
+
requestBody: any;
|
|
33
|
+
/** Provider name */
|
|
34
|
+
provider: string;
|
|
35
|
+
/** Current retry attempt count */
|
|
36
|
+
retryCount: number;
|
|
37
|
+
/** Models already attempted */
|
|
38
|
+
attemptedModels: string[];
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Decision made by routing strategy
|
|
42
|
+
*/
|
|
43
|
+
export interface RoutingDecision {
|
|
44
|
+
/** Whether to retry the request */
|
|
45
|
+
retry: boolean;
|
|
46
|
+
/** Next model to try (if retry is true) */
|
|
47
|
+
nextModel?: string;
|
|
48
|
+
/** Reason for the decision */
|
|
49
|
+
reason: string;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Router event for logging
|
|
53
|
+
*/
|
|
54
|
+
export interface RouterEvent {
|
|
55
|
+
/** Original model that failed */
|
|
56
|
+
originalModel: string;
|
|
57
|
+
/** Next model to try */
|
|
58
|
+
nextModel: string;
|
|
59
|
+
/** Reason for switching */
|
|
60
|
+
reason: string;
|
|
61
|
+
/** Current attempt number */
|
|
62
|
+
attempt: number;
|
|
63
|
+
/** Maximum retries allowed */
|
|
64
|
+
maxRetries: number;
|
|
65
|
+
}
|
package/package.json
CHANGED