tokenfirewall 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.fallbackStrategy = fallbackStrategy;
4
+ exports.contextStrategy = contextStrategy;
5
+ exports.costStrategy = costStrategy;
6
+ const contextRegistry_1 = require("../introspection/contextRegistry");
7
+ const pricingRegistry_1 = require("../core/pricingRegistry");
8
+ /**
9
+ * Fallback routing strategy
10
+ * Uses predefined fallback map to select next model
11
+ */
12
+ function fallbackStrategy(context, failureType, fallbackMap) {
13
+ const { originalModel, attemptedModels } = context;
14
+ // Get fallback list for this model
15
+ const fallbacks = fallbackMap[originalModel];
16
+ if (!fallbacks || fallbacks.length === 0) {
17
+ return {
18
+ retry: false,
19
+ reason: `No fallback models configured for ${originalModel}`
20
+ };
21
+ }
22
+ // Find first fallback that hasn't been attempted
23
+ const nextModel = fallbacks.find(model => !attemptedModels.includes(model));
24
+ if (!nextModel) {
25
+ return {
26
+ retry: false,
27
+ reason: "All fallback models have been attempted"
28
+ };
29
+ }
30
+ return {
31
+ retry: true,
32
+ nextModel,
33
+ reason: `Fallback from ${originalModel} due to ${failureType}`
34
+ };
35
+ }
36
+ /**
37
+ * Context-based routing strategy
38
+ * Selects model with larger context window when context overflow occurs
39
+ */
40
+ function contextStrategy(context, failureType) {
41
+ const { originalModel, provider, attemptedModels } = context;
42
+ // Only applicable for context overflow
43
+ if (failureType !== "context_overflow") {
44
+ return {
45
+ retry: false,
46
+ reason: `Context strategy only applies to context_overflow, got ${failureType}`
47
+ };
48
+ }
49
+ // Get current model's context limit
50
+ const currentLimit = contextRegistry_1.contextRegistry.getContextLimit(provider, originalModel);
51
+ if (currentLimit === undefined) {
52
+ return {
53
+ retry: false,
54
+ reason: `No context limit information for ${originalModel}`
55
+ };
56
+ }
57
+ // Find models from same provider with larger context
58
+ const availableModels = contextRegistry_1.contextRegistry.getModelsForProvider(provider);
59
+ if (!availableModels || availableModels.length === 0) {
60
+ return {
61
+ retry: false,
62
+ reason: `No alternative models found for provider ${provider}`
63
+ };
64
+ }
65
+ // Filter models with larger context that haven't been attempted
66
+ const largerContextModels = availableModels
67
+ .filter((model) => {
68
+ const limit = contextRegistry_1.contextRegistry.getContextLimit(provider, model);
69
+ return (limit !== undefined &&
70
+ limit > currentLimit &&
71
+ !attemptedModels.includes(model) &&
72
+ model !== originalModel // Don't suggest the same model
73
+ );
74
+ })
75
+ .sort((a, b) => {
76
+ const limitA = contextRegistry_1.contextRegistry.getContextLimit(provider, a) || 0;
77
+ const limitB = contextRegistry_1.contextRegistry.getContextLimit(provider, b) || 0;
78
+ return limitA - limitB; // Sort ascending (smallest upgrade first)
79
+ });
80
+ if (largerContextModels.length === 0) {
81
+ return {
82
+ retry: false,
83
+ reason: "No models with larger context window available"
84
+ };
85
+ }
86
+ const nextModel = largerContextModels[0];
87
+ const nextLimit = contextRegistry_1.contextRegistry.getContextLimit(provider, nextModel);
88
+ return {
89
+ retry: true,
90
+ nextModel,
91
+ reason: `Upgrading from ${currentLimit} to ${nextLimit} tokens context`
92
+ };
93
+ }
94
+ /**
95
+ * Cost-based routing strategy
96
+ * Selects cheaper model from same provider
97
+ */
98
+ function costStrategy(context, failureType) {
99
+ const { originalModel, provider, attemptedModels } = context;
100
+ // Get current model's pricing
101
+ let currentPricing;
102
+ try {
103
+ currentPricing = pricingRegistry_1.pricingRegistry.getPricing(provider, originalModel);
104
+ }
105
+ catch (error) {
106
+ return {
107
+ retry: false,
108
+ reason: `No pricing information for ${originalModel}`
109
+ };
110
+ }
111
+ // Calculate average cost for current model
112
+ const currentAvgCost = (currentPricing.input + currentPricing.output) / 2;
113
+ // Get all models for this provider
114
+ const providerModels = getProviderModels(provider);
115
+ if (providerModels.length === 0) {
116
+ return {
117
+ retry: false,
118
+ reason: `No alternative models found for provider ${provider}`
119
+ };
120
+ }
121
+ // Find cheaper models that haven't been attempted
122
+ const cheaperModels = providerModels
123
+ .filter((model) => {
124
+ if (attemptedModels.includes(model) || model === originalModel) {
125
+ return false;
126
+ }
127
+ try {
128
+ const pricing = pricingRegistry_1.pricingRegistry.getPricing(provider, model);
129
+ const avgCost = (pricing.input + pricing.output) / 2;
130
+ return avgCost < currentAvgCost;
131
+ }
132
+ catch {
133
+ return false;
134
+ }
135
+ })
136
+ .sort((a, b) => {
137
+ const pricingA = pricingRegistry_1.pricingRegistry.getPricing(provider, a);
138
+ const pricingB = pricingRegistry_1.pricingRegistry.getPricing(provider, b);
139
+ const avgCostA = (pricingA.input + pricingA.output) / 2;
140
+ const avgCostB = (pricingB.input + pricingB.output) / 2;
141
+ return avgCostA - avgCostB; // Sort ascending (cheapest first)
142
+ });
143
+ if (cheaperModels.length === 0) {
144
+ return {
145
+ retry: false,
146
+ reason: "No cheaper models available"
147
+ };
148
+ }
149
+ const nextModel = cheaperModels[0];
150
+ return {
151
+ retry: true,
152
+ nextModel,
153
+ reason: `Switching to cheaper model due to ${failureType}`
154
+ };
155
+ }
156
+ /**
157
+ * Helper to get known models for a provider
158
+ * Uses context registry for dynamic model discovery, falls back to static list
159
+ */
160
+ function getProviderModels(provider) {
161
+ // First, try to get models from context registry (dynamic)
162
+ const registeredModels = contextRegistry_1.contextRegistry.getModelsForProvider(provider);
163
+ if (registeredModels && registeredModels.length > 0) {
164
+ return registeredModels;
165
+ }
166
+ // Fallback to static list if no models registered
167
+ // This ensures the router works even without model discovery
168
+ const knownModels = {
169
+ openai: [
170
+ // ===== Flagship / Chat =====
171
+ "gpt-5",
172
+ "gpt-5-mini",
173
+ "gpt-4.1",
174
+ "gpt-4.1-mini",
175
+ "gpt-4o",
176
+ "gpt-4o-mini",
177
+ // ===== Reasoning =====
178
+ "o1",
179
+ "o1-mini",
180
+ // ===== Image Generation =====
181
+ "gpt-image-1"
182
+ ],
183
+ anthropic: [
184
+ // ===== Claude 4.5 (Newer Improved) =====
185
+ "claude-opus-4.5",
186
+ "claude-sonnet-4.5",
187
+ "claude-haiku-4.5",
188
+ // ===== Classic Claude 4 =====
189
+ "claude-4-opus",
190
+ "claude-sonnet-4",
191
+ "claude-haiku-4",
192
+ // ===== Stable Claude 3.5 Fallback =====
193
+ "claude-3-5-sonnet-latest",
194
+ "claude-3-5-haiku-latest"
195
+ ],
196
+ gemini: [
197
+ // ===== Gemini 3 (Latest Generation) =====
198
+ "gemini-3-pro", // Flagship reasoning - Most capable
199
+ "gemini-3.1-pro", // Enhanced reasoning - Latest improved 3.x
200
+ "gemini-3-flash", // Fast multimodal - Optimized for latency
201
+ "gemini-3-flash-lite", // Cost-efficient flash variant
202
+ "gemini-3-pro-image", // High-quality image - Nano Banana Pro
203
+ "gemini-3.1-flash-image", // Latest image model - Nano Banana 2
204
+ // ===== Gemini 2.5 (Stable Production Tier) =====
205
+ "gemini-2.5-pro", // Stable reasoning - 2.5 generation flagship
206
+ "gemini-2.5-flash", // Fast multimodal - Default in many workflows
207
+ "gemini-2.5-flash-lite", // Cost-efficient - Lighter, cheaper variant
208
+ "gemini-2.5-flash-image", // Image generation - Nano Banana (Cloud)
209
+ // ===== Ultra-light / Experimental =====
210
+ "gemini-nano-banana" // Ultra-light multimodal
211
+ ],
212
+ grok: [
213
+ "grok-3",
214
+ "grok-2",
215
+ "grok-2-mini",
216
+ "grok-vision"
217
+ ],
218
+ kimi: [
219
+ "moonshot-v1-8k",
220
+ "moonshot-v1-32k",
221
+ "moonshot-v1-128k"
222
+ ],
223
+ meta: [
224
+ "llama-3.3-70b",
225
+ "llama-3.1-405b",
226
+ "llama-3.1-70b",
227
+ "llama-3.1-8b"
228
+ ],
229
+ mistral: [
230
+ "mistral-large-latest",
231
+ "mistral-medium-latest",
232
+ "mistral-small-latest",
233
+ "mixtral-8x7b",
234
+ "mixtral-8x22b"
235
+ ],
236
+ cohere: [
237
+ "command-r-plus",
238
+ "command-r",
239
+ "command-light"
240
+ ]
241
+ };
242
+ return knownModels[provider.toLowerCase()] || [];
243
+ }
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Type definitions for the Intelligent Model Router
3
+ */
4
+ /**
5
+ * Routing strategy types
6
+ */
7
+ export type RoutingStrategy = "fallback" | "context" | "cost";
8
+ /**
9
+ * Failure types detected by error detector
10
+ */
11
+ export type FailureType = "rate_limit" | "context_overflow" | "model_unavailable" | "access_denied" | "unknown";
12
+ /**
13
+ * Configuration options for model router
14
+ */
15
+ export interface ModelRouterOptions {
16
+ /** Routing strategy to use */
17
+ strategy: RoutingStrategy;
18
+ /** Map of primary models to fallback models */
19
+ fallbackMap?: Record<string, string[]>;
20
+ /** Maximum number of retry attempts (default: 1) */
21
+ maxRetries?: number;
22
+ }
23
+ /**
24
+ * Context information about a failed request
25
+ */
26
+ export interface FailureContext {
27
+ /** The error that occurred */
28
+ error: unknown;
29
+ /** Original model that failed */
30
+ originalModel: string;
31
+ /** Request body sent to API */
32
+ requestBody: any;
33
+ /** Provider name */
34
+ provider: string;
35
+ /** Current retry attempt count */
36
+ retryCount: number;
37
+ /** Models already attempted */
38
+ attemptedModels: string[];
39
+ }
40
+ /**
41
+ * Decision made by routing strategy
42
+ */
43
+ export interface RoutingDecision {
44
+ /** Whether to retry the request */
45
+ retry: boolean;
46
+ /** Next model to try (if retry is true) */
47
+ nextModel?: string;
48
+ /** Reason for the decision */
49
+ reason: string;
50
+ }
51
+ /**
52
+ * Router event for logging
53
+ */
54
+ export interface RouterEvent {
55
+ /** Original model that failed */
56
+ originalModel: string;
57
+ /** Next model to try */
58
+ nextModel: string;
59
+ /** Reason for switching */
60
+ reason: string;
61
+ /** Current attempt number */
62
+ attempt: number;
63
+ /** Maximum retries allowed */
64
+ maxRetries: number;
65
+ }
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ /**
3
+ * Type definitions for the Intelligent Model Router
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tokenfirewall",
3
- "version": "1.0.2",
3
+ "version": "2.0.0",
4
4
  "description": "Scalable, adapter-driven LLM cost enforcement middleware for Node.js with model discovery and context intelligence",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",