tokenfirewall 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +222 -474
- package/dist/core/pricingRegistry.js +54 -5
- package/dist/index.d.ts +32 -0
- package/dist/index.js +66 -12
- package/dist/interceptors/fetchInterceptor.d.ts +5 -0
- package/dist/interceptors/fetchInterceptor.js +278 -27
- package/dist/introspection/contextRegistry.d.ts +5 -0
- package/dist/introspection/contextRegistry.js +58 -6
- package/dist/logger.d.ts +5 -0
- package/dist/logger.js +10 -0
- package/dist/router/errorDetector.d.ts +45 -0
- package/dist/router/errorDetector.js +170 -0
- package/dist/router/modelRouter.d.ts +33 -0
- package/dist/router/modelRouter.js +111 -0
- package/dist/router/routingStrategies.d.ts +16 -0
- package/dist/router/routingStrategies.js +243 -0
- package/dist/router/types.d.ts +65 -0
- package/dist/router/types.js +5 -0
- package/package.json +1 -1
|
@@ -15,21 +15,59 @@ class ContextRegistry {
|
|
|
15
15
|
*/
|
|
16
16
|
initializeContextLimits() {
|
|
17
17
|
// OpenAI context limits
|
|
18
|
+
// ===== GPT-5 (Latest Flagship) =====
|
|
19
|
+
this.register("openai", "gpt-5", { tokens: 256000 });
|
|
20
|
+
this.register("openai", "gpt-5-mini", { tokens: 256000 });
|
|
21
|
+
// ===== GPT-4.1 Series =====
|
|
22
|
+
this.register("openai", "gpt-4.1", { tokens: 200000 });
|
|
23
|
+
this.register("openai", "gpt-4.1-mini", { tokens: 200000 });
|
|
24
|
+
// ===== GPT-4o (Balanced Multimodal) =====
|
|
18
25
|
this.register("openai", "gpt-4o", { tokens: 128000 });
|
|
19
26
|
this.register("openai", "gpt-4o-mini", { tokens: 128000 });
|
|
27
|
+
// ===== Reasoning Models =====
|
|
28
|
+
this.register("openai", "o1", { tokens: 200000 });
|
|
29
|
+
this.register("openai", "o1-mini", { tokens: 128000 });
|
|
30
|
+
// ===== Image Generation =====
|
|
31
|
+
this.register("openai", "gpt-image-1", { tokens: 128000 });
|
|
32
|
+
// ===== Legacy Models =====
|
|
20
33
|
this.register("openai", "gpt-4-turbo", { tokens: 128000 });
|
|
21
34
|
this.register("openai", "gpt-4", { tokens: 8192 });
|
|
22
35
|
this.register("openai", "gpt-3.5-turbo", { tokens: 16385 });
|
|
23
36
|
this.register("openai", "gpt-3.5-turbo-16k", { tokens: 16385 });
|
|
24
37
|
// Anthropic context limits
|
|
38
|
+
// ===== Claude 4.5 (Newer Improved) =====
|
|
39
|
+
this.register("anthropic", "claude-opus-4.5", { tokens: 200000 });
|
|
40
|
+
this.register("anthropic", "claude-sonnet-4.5", { tokens: 200000 });
|
|
41
|
+
this.register("anthropic", "claude-haiku-4.5", { tokens: 200000 });
|
|
42
|
+
// ===== Classic Claude 4 =====
|
|
43
|
+
this.register("anthropic", "claude-4-opus", { tokens: 200000 });
|
|
44
|
+
this.register("anthropic", "claude-sonnet-4", { tokens: 200000 });
|
|
45
|
+
this.register("anthropic", "claude-haiku-4", { tokens: 200000 });
|
|
46
|
+
// ===== Stable Claude 3.5 Fallback =====
|
|
47
|
+
this.register("anthropic", "claude-3-5-sonnet-latest", { tokens: 200000 });
|
|
48
|
+
this.register("anthropic", "claude-3-5-haiku-latest", { tokens: 200000 });
|
|
49
|
+
// ===== Legacy Models =====
|
|
25
50
|
this.register("anthropic", "claude-3-5-sonnet-20241022", { tokens: 200000 });
|
|
26
51
|
this.register("anthropic", "claude-3-5-haiku-20241022", { tokens: 200000 });
|
|
27
52
|
this.register("anthropic", "claude-3-opus-20240229", { tokens: 200000 });
|
|
28
53
|
this.register("anthropic", "claude-3-sonnet-20240229", { tokens: 200000 });
|
|
29
54
|
this.register("anthropic", "claude-3-haiku-20240307", { tokens: 200000 });
|
|
30
55
|
// Gemini context limits (updated with latest models)
|
|
31
|
-
|
|
56
|
+
// ===== Gemini 3 (Latest Generation) =====
|
|
57
|
+
this.register("gemini", "gemini-3-pro", { tokens: 2097152 });
|
|
58
|
+
this.register("gemini", "gemini-3.1-pro", { tokens: 2097152 });
|
|
59
|
+
this.register("gemini", "gemini-3-flash", { tokens: 1048576 });
|
|
60
|
+
this.register("gemini", "gemini-3-flash-lite", { tokens: 1048576 });
|
|
61
|
+
this.register("gemini", "gemini-3-pro-image", { tokens: 2097152 });
|
|
62
|
+
this.register("gemini", "gemini-3.1-flash-image", { tokens: 1048576 });
|
|
63
|
+
// ===== Gemini 2.5 (Stable Production Tier) =====
|
|
32
64
|
this.register("gemini", "gemini-2.5-pro", { tokens: 2097152 });
|
|
65
|
+
this.register("gemini", "gemini-2.5-flash", { tokens: 1048576 });
|
|
66
|
+
this.register("gemini", "gemini-2.5-flash-lite", { tokens: 1048576 });
|
|
67
|
+
this.register("gemini", "gemini-2.5-flash-image", { tokens: 1048576 });
|
|
68
|
+
// ===== Ultra-light / Experimental =====
|
|
69
|
+
this.register("gemini", "gemini-nano-banana", { tokens: 524288 });
|
|
70
|
+
// ===== Legacy Models =====
|
|
33
71
|
this.register("gemini", "gemini-2.0-flash", { tokens: 1048576 });
|
|
34
72
|
this.register("gemini", "gemini-2.0-flash-exp", { tokens: 1048576 });
|
|
35
73
|
this.register("gemini", "gemini-1.5-pro", { tokens: 2097152 });
|
|
@@ -54,17 +92,19 @@ class ContextRegistry {
|
|
|
54
92
|
* Register context limit for a model
|
|
55
93
|
*/
|
|
56
94
|
register(provider, model, limit) {
|
|
57
|
-
|
|
58
|
-
|
|
95
|
+
const normalizedProvider = provider.toLowerCase();
|
|
96
|
+
if (!this.limits.has(normalizedProvider)) {
|
|
97
|
+
this.limits.set(normalizedProvider, new Map());
|
|
59
98
|
}
|
|
60
|
-
this.limits.get(
|
|
99
|
+
this.limits.get(normalizedProvider).set(model, limit);
|
|
61
100
|
}
|
|
62
101
|
/**
|
|
63
102
|
* Get context limit for a model
|
|
64
103
|
* Returns undefined if not found (does not throw)
|
|
65
104
|
*/
|
|
66
105
|
getContextLimit(provider, model) {
|
|
67
|
-
const
|
|
106
|
+
const normalizedProvider = provider.toLowerCase();
|
|
107
|
+
const providerLimits = this.limits.get(normalizedProvider);
|
|
68
108
|
if (!providerLimits) {
|
|
69
109
|
return undefined;
|
|
70
110
|
}
|
|
@@ -75,7 +115,19 @@ class ContextRegistry {
|
|
|
75
115
|
* Check if provider is supported
|
|
76
116
|
*/
|
|
77
117
|
isProviderSupported(provider) {
|
|
78
|
-
return this.limits.has(provider);
|
|
118
|
+
return this.limits.has(provider.toLowerCase());
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Get all models for a provider
|
|
122
|
+
* Returns array of model names
|
|
123
|
+
*/
|
|
124
|
+
getModelsForProvider(provider) {
|
|
125
|
+
const normalizedProvider = provider.toLowerCase();
|
|
126
|
+
const providerLimits = this.limits.get(normalizedProvider);
|
|
127
|
+
if (!providerLimits) {
|
|
128
|
+
return [];
|
|
129
|
+
}
|
|
130
|
+
return Array.from(providerLimits.keys());
|
|
79
131
|
}
|
|
80
132
|
}
|
|
81
133
|
exports.contextRegistry = new ContextRegistry();
|
package/dist/logger.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { NormalizedUsage, CostBreakdown } from "./core/types";
|
|
2
|
+
import { RouterEvent } from "./router/types";
|
|
2
3
|
/**
|
|
3
4
|
* Structured logger for LLM usage and costs
|
|
4
5
|
*/
|
|
@@ -7,5 +8,9 @@ export declare class Logger {
|
|
|
7
8
|
* Log usage and cost information
|
|
8
9
|
*/
|
|
9
10
|
logUsage(usage: NormalizedUsage, cost: CostBreakdown): void;
|
|
11
|
+
/**
|
|
12
|
+
* Log router event (model switching)
|
|
13
|
+
*/
|
|
14
|
+
logRouterEvent(event: RouterEvent): void;
|
|
10
15
|
}
|
|
11
16
|
export declare const logger: Logger;
|
package/dist/logger.js
CHANGED
|
@@ -26,6 +26,16 @@ class Logger {
|
|
|
26
26
|
},
|
|
27
27
|
}));
|
|
28
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* Log router event (model switching)
|
|
31
|
+
*/
|
|
32
|
+
logRouterEvent(event) {
|
|
33
|
+
console.log(`[TOKENFIREWALL ROUTER]\n` +
|
|
34
|
+
`Original: ${event.originalModel}\n` +
|
|
35
|
+
`Switched: ${event.nextModel}\n` +
|
|
36
|
+
`Reason: ${event.reason}\n` +
|
|
37
|
+
`Attempt: ${event.attempt}/${event.maxRetries}`);
|
|
38
|
+
}
|
|
29
39
|
}
|
|
30
40
|
exports.Logger = Logger;
|
|
31
41
|
exports.logger = new Logger();
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { FailureType } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Detects and classifies API failures
|
|
4
|
+
*/
|
|
5
|
+
export declare class ErrorDetector {
|
|
6
|
+
/**
|
|
7
|
+
* Detect failure type from error
|
|
8
|
+
* @param error - The error object
|
|
9
|
+
* @returns Classified failure type
|
|
10
|
+
*/
|
|
11
|
+
detectFailureType(error: unknown): FailureType;
|
|
12
|
+
/**
|
|
13
|
+
* Check if error is an HTTP error with status code
|
|
14
|
+
*/
|
|
15
|
+
private isHttpError;
|
|
16
|
+
/**
|
|
17
|
+
* Classify HTTP error by status code
|
|
18
|
+
*/
|
|
19
|
+
private classifyHttpError;
|
|
20
|
+
/**
|
|
21
|
+
* Check if error is an API error with response body
|
|
22
|
+
*/
|
|
23
|
+
private isApiError;
|
|
24
|
+
/**
|
|
25
|
+
* Classify API error by response body
|
|
26
|
+
*/
|
|
27
|
+
private classifyApiError;
|
|
28
|
+
/**
|
|
29
|
+
* Classify error by message content
|
|
30
|
+
*/
|
|
31
|
+
private classifyErrorMessage;
|
|
32
|
+
/**
|
|
33
|
+
* Check if error indicates context overflow
|
|
34
|
+
*/
|
|
35
|
+
private isContextOverflow;
|
|
36
|
+
/**
|
|
37
|
+
* Check if error indicates rate limit
|
|
38
|
+
*/
|
|
39
|
+
private isRateLimit;
|
|
40
|
+
/**
|
|
41
|
+
* Check if error indicates model unavailable
|
|
42
|
+
*/
|
|
43
|
+
private isModelUnavailable;
|
|
44
|
+
}
|
|
45
|
+
export declare const errorDetector: ErrorDetector;
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.errorDetector = exports.ErrorDetector = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* Detects and classifies API failures
|
|
6
|
+
*/
|
|
7
|
+
class ErrorDetector {
|
|
8
|
+
/**
|
|
9
|
+
* Detect failure type from error
|
|
10
|
+
* @param error - The error object
|
|
11
|
+
* @returns Classified failure type
|
|
12
|
+
*/
|
|
13
|
+
detectFailureType(error) {
|
|
14
|
+
// Handle HTTP errors
|
|
15
|
+
if (this.isHttpError(error)) {
|
|
16
|
+
return this.classifyHttpError(error);
|
|
17
|
+
}
|
|
18
|
+
// Handle API response errors
|
|
19
|
+
if (this.isApiError(error)) {
|
|
20
|
+
return this.classifyApiError(error);
|
|
21
|
+
}
|
|
22
|
+
// Handle Error objects
|
|
23
|
+
if (error instanceof Error) {
|
|
24
|
+
return this.classifyErrorMessage(error.message);
|
|
25
|
+
}
|
|
26
|
+
return "unknown";
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Check if error is an HTTP error with status code
|
|
30
|
+
*/
|
|
31
|
+
isHttpError(error) {
|
|
32
|
+
return (typeof error === "object" &&
|
|
33
|
+
error !== null &&
|
|
34
|
+
"status" in error &&
|
|
35
|
+
typeof error.status === "number");
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Classify HTTP error by status code
|
|
39
|
+
*/
|
|
40
|
+
classifyHttpError(error) {
|
|
41
|
+
switch (error.status) {
|
|
42
|
+
case 429:
|
|
43
|
+
return "rate_limit";
|
|
44
|
+
case 404:
|
|
45
|
+
return "model_unavailable";
|
|
46
|
+
case 403:
|
|
47
|
+
return "access_denied";
|
|
48
|
+
case 400:
|
|
49
|
+
// 400 can be context overflow or other issues
|
|
50
|
+
return "unknown";
|
|
51
|
+
default:
|
|
52
|
+
return "unknown";
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Check if error is an API error with response body
|
|
57
|
+
*/
|
|
58
|
+
isApiError(error) {
|
|
59
|
+
return (typeof error === "object" &&
|
|
60
|
+
error !== null &&
|
|
61
|
+
"response" in error);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Classify API error by response body
|
|
65
|
+
*/
|
|
66
|
+
classifyApiError(error) {
|
|
67
|
+
const data = error.response?.data;
|
|
68
|
+
if (!data) {
|
|
69
|
+
return "unknown";
|
|
70
|
+
}
|
|
71
|
+
// Check for context overflow indicators
|
|
72
|
+
if (this.isContextOverflow(data)) {
|
|
73
|
+
return "context_overflow";
|
|
74
|
+
}
|
|
75
|
+
// Check for rate limit indicators
|
|
76
|
+
if (this.isRateLimit(data)) {
|
|
77
|
+
return "rate_limit";
|
|
78
|
+
}
|
|
79
|
+
// Check for model unavailable indicators
|
|
80
|
+
if (this.isModelUnavailable(data)) {
|
|
81
|
+
return "model_unavailable";
|
|
82
|
+
}
|
|
83
|
+
return "unknown";
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Classify error by message content
|
|
87
|
+
*/
|
|
88
|
+
classifyErrorMessage(message) {
|
|
89
|
+
const lowerMessage = message.toLowerCase();
|
|
90
|
+
// Context overflow patterns
|
|
91
|
+
if (lowerMessage.includes("context") &&
|
|
92
|
+
(lowerMessage.includes("length") ||
|
|
93
|
+
lowerMessage.includes("limit") ||
|
|
94
|
+
lowerMessage.includes("exceeded") ||
|
|
95
|
+
lowerMessage.includes("too long"))) {
|
|
96
|
+
return "context_overflow";
|
|
97
|
+
}
|
|
98
|
+
// Rate limit patterns
|
|
99
|
+
if (lowerMessage.includes("rate limit") ||
|
|
100
|
+
lowerMessage.includes("too many requests") ||
|
|
101
|
+
lowerMessage.includes("quota exceeded")) {
|
|
102
|
+
return "rate_limit";
|
|
103
|
+
}
|
|
104
|
+
// Model unavailable patterns
|
|
105
|
+
if (lowerMessage.includes("model") &&
|
|
106
|
+
(lowerMessage.includes("not found") ||
|
|
107
|
+
lowerMessage.includes("unavailable") ||
|
|
108
|
+
lowerMessage.includes("does not exist"))) {
|
|
109
|
+
return "model_unavailable";
|
|
110
|
+
}
|
|
111
|
+
// Access denied patterns
|
|
112
|
+
if (lowerMessage.includes("access denied") ||
|
|
113
|
+
lowerMessage.includes("unauthorized") ||
|
|
114
|
+
lowerMessage.includes("forbidden")) {
|
|
115
|
+
return "access_denied";
|
|
116
|
+
}
|
|
117
|
+
return "unknown";
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Check if error indicates context overflow
|
|
121
|
+
*/
|
|
122
|
+
isContextOverflow(data) {
|
|
123
|
+
if (typeof data === "string") {
|
|
124
|
+
return this.classifyErrorMessage(data) === "context_overflow";
|
|
125
|
+
}
|
|
126
|
+
if (typeof data === "object" && data !== null) {
|
|
127
|
+
const errorMessage = data.error?.message || data.message || "";
|
|
128
|
+
const errorType = data.error?.type || data.type || "";
|
|
129
|
+
const errorCode = data.error?.code || data.code || "";
|
|
130
|
+
return (errorType === "invalid_request_error" &&
|
|
131
|
+
(errorCode === "context_length_exceeded" ||
|
|
132
|
+
errorMessage.toLowerCase().includes("context")));
|
|
133
|
+
}
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Check if error indicates rate limit
|
|
138
|
+
*/
|
|
139
|
+
isRateLimit(data) {
|
|
140
|
+
if (typeof data === "string") {
|
|
141
|
+
return this.classifyErrorMessage(data) === "rate_limit";
|
|
142
|
+
}
|
|
143
|
+
if (typeof data === "object" && data !== null) {
|
|
144
|
+
const errorType = data.error?.type || data.type || "";
|
|
145
|
+
const errorCode = data.error?.code || data.code || "";
|
|
146
|
+
return (errorType === "rate_limit_error" ||
|
|
147
|
+
errorCode === "rate_limit_exceeded" ||
|
|
148
|
+
errorCode === "429");
|
|
149
|
+
}
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Check if error indicates model unavailable
|
|
154
|
+
*/
|
|
155
|
+
isModelUnavailable(data) {
|
|
156
|
+
if (typeof data === "string") {
|
|
157
|
+
return this.classifyErrorMessage(data) === "model_unavailable";
|
|
158
|
+
}
|
|
159
|
+
if (typeof data === "object" && data !== null) {
|
|
160
|
+
const errorMessage = data.error?.message || data.message || "";
|
|
161
|
+
const errorCode = data.error?.code || data.code || "";
|
|
162
|
+
return (errorCode === "model_not_found" ||
|
|
163
|
+
errorMessage.toLowerCase().includes("model") &&
|
|
164
|
+
errorMessage.toLowerCase().includes("not found"));
|
|
165
|
+
}
|
|
166
|
+
return false;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
exports.ErrorDetector = ErrorDetector;
|
|
170
|
+
exports.errorDetector = new ErrorDetector();
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { ModelRouterOptions, FailureContext, RoutingDecision, RoutingStrategy } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Intelligent Model Router
|
|
4
|
+
* Handles automatic retries and model switching on failures
|
|
5
|
+
*/
|
|
6
|
+
export declare class ModelRouter {
|
|
7
|
+
private strategy;
|
|
8
|
+
private fallbackMap;
|
|
9
|
+
private maxRetries;
|
|
10
|
+
constructor(options: ModelRouterOptions);
|
|
11
|
+
/**
|
|
12
|
+
* Validate router configuration
|
|
13
|
+
*/
|
|
14
|
+
private validateOptions;
|
|
15
|
+
/**
|
|
16
|
+
* Handle a failed request and decide on retry strategy
|
|
17
|
+
* @param context - Context about the failed request
|
|
18
|
+
* @returns Routing decision
|
|
19
|
+
*/
|
|
20
|
+
handleFailure(context: FailureContext): RoutingDecision;
|
|
21
|
+
/**
|
|
22
|
+
* Select and execute routing strategy
|
|
23
|
+
*/
|
|
24
|
+
private selectStrategy;
|
|
25
|
+
/**
|
|
26
|
+
* Get maximum retries configured
|
|
27
|
+
*/
|
|
28
|
+
getMaxRetries(): number;
|
|
29
|
+
/**
|
|
30
|
+
* Get current strategy
|
|
31
|
+
*/
|
|
32
|
+
getStrategy(): RoutingStrategy;
|
|
33
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ModelRouter = void 0;
|
|
4
|
+
const errorDetector_1 = require("./errorDetector");
|
|
5
|
+
const routingStrategies_1 = require("./routingStrategies");
|
|
6
|
+
/**
|
|
7
|
+
* Intelligent Model Router
|
|
8
|
+
* Handles automatic retries and model switching on failures
|
|
9
|
+
*/
|
|
10
|
+
class ModelRouter {
|
|
11
|
+
constructor(options) {
|
|
12
|
+
this.strategy = options.strategy;
|
|
13
|
+
this.fallbackMap = options.fallbackMap || {};
|
|
14
|
+
this.maxRetries = options.maxRetries ?? 1;
|
|
15
|
+
this.validateOptions();
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Validate router configuration
|
|
19
|
+
*/
|
|
20
|
+
validateOptions() {
|
|
21
|
+
if (this.maxRetries < 0) {
|
|
22
|
+
throw new Error("TokenFirewall Router: maxRetries must be non-negative");
|
|
23
|
+
}
|
|
24
|
+
if (this.maxRetries > 5) {
|
|
25
|
+
console.warn("TokenFirewall Router: maxRetries > 5 may cause excessive API calls");
|
|
26
|
+
}
|
|
27
|
+
if (this.strategy === "fallback") {
|
|
28
|
+
if (Object.keys(this.fallbackMap).length === 0) {
|
|
29
|
+
throw new Error("TokenFirewall Router: fallback strategy requires fallbackMap configuration. " +
|
|
30
|
+
"Provide at least one fallback mapping or use a different strategy.");
|
|
31
|
+
}
|
|
32
|
+
// Validate fallback map structure
|
|
33
|
+
for (const [model, fallbacks] of Object.entries(this.fallbackMap)) {
|
|
34
|
+
if (!Array.isArray(fallbacks) || fallbacks.length === 0) {
|
|
35
|
+
throw new Error(`TokenFirewall Router: fallbackMap for "${model}" must be a non-empty array`);
|
|
36
|
+
}
|
|
37
|
+
// Validate each fallback model name
|
|
38
|
+
for (const fallbackModel of fallbacks) {
|
|
39
|
+
if (typeof fallbackModel !== 'string' || fallbackModel.trim() === '') {
|
|
40
|
+
throw new Error(`TokenFirewall Router: fallbackMap for "${model}" contains invalid model name (empty or whitespace)`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Handle a failed request and decide on retry strategy
|
|
48
|
+
* @param context - Context about the failed request
|
|
49
|
+
* @returns Routing decision
|
|
50
|
+
*/
|
|
51
|
+
handleFailure(context) {
|
|
52
|
+
// Check if max retries exceeded
|
|
53
|
+
if (context.retryCount >= this.maxRetries) {
|
|
54
|
+
return {
|
|
55
|
+
retry: false,
|
|
56
|
+
reason: `Max retries (${this.maxRetries}) exceeded`
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
// Detect failure type
|
|
60
|
+
const failureType = errorDetector_1.errorDetector.detectFailureType(context.error);
|
|
61
|
+
// Select routing strategy
|
|
62
|
+
const decision = this.selectStrategy(context, failureType);
|
|
63
|
+
// Validate decision
|
|
64
|
+
if (decision.retry && !decision.nextModel) {
|
|
65
|
+
throw new Error("TokenFirewall Router: Invalid decision - retry=true but no nextModel specified");
|
|
66
|
+
}
|
|
67
|
+
// Prevent retrying same model
|
|
68
|
+
if (decision.retry && context.attemptedModels.includes(decision.nextModel)) {
|
|
69
|
+
return {
|
|
70
|
+
retry: false,
|
|
71
|
+
reason: `Model ${decision.nextModel} has already been attempted`
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
// Prevent switching back to original model (circular retry)
|
|
75
|
+
if (decision.retry && decision.nextModel === context.originalModel && context.retryCount > 0) {
|
|
76
|
+
return {
|
|
77
|
+
retry: false,
|
|
78
|
+
reason: `Cannot switch back to original model ${context.originalModel}`
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
return decision;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Select and execute routing strategy
|
|
85
|
+
*/
|
|
86
|
+
selectStrategy(context, failureType) {
|
|
87
|
+
switch (this.strategy) {
|
|
88
|
+
case "fallback":
|
|
89
|
+
return (0, routingStrategies_1.fallbackStrategy)(context, failureType, this.fallbackMap);
|
|
90
|
+
case "context":
|
|
91
|
+
return (0, routingStrategies_1.contextStrategy)(context, failureType);
|
|
92
|
+
case "cost":
|
|
93
|
+
return (0, routingStrategies_1.costStrategy)(context, failureType);
|
|
94
|
+
default:
|
|
95
|
+
throw new Error(`TokenFirewall Router: Unknown strategy "${this.strategy}"`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Get maximum retries configured
|
|
100
|
+
*/
|
|
101
|
+
getMaxRetries() {
|
|
102
|
+
return this.maxRetries;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Get current strategy
|
|
106
|
+
*/
|
|
107
|
+
getStrategy() {
|
|
108
|
+
return this.strategy;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
exports.ModelRouter = ModelRouter;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { FailureContext, RoutingDecision, FailureType } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Fallback routing strategy
|
|
4
|
+
* Uses predefined fallback map to select next model
|
|
5
|
+
*/
|
|
6
|
+
export declare function fallbackStrategy(context: FailureContext, failureType: FailureType, fallbackMap: Record<string, string[]>): RoutingDecision;
|
|
7
|
+
/**
|
|
8
|
+
* Context-based routing strategy
|
|
9
|
+
* Selects model with larger context window when context overflow occurs
|
|
10
|
+
*/
|
|
11
|
+
export declare function contextStrategy(context: FailureContext, failureType: FailureType): RoutingDecision;
|
|
12
|
+
/**
|
|
13
|
+
* Cost-based routing strategy
|
|
14
|
+
* Selects cheaper model from same provider
|
|
15
|
+
*/
|
|
16
|
+
export declare function costStrategy(context: FailureContext, failureType: FailureType): RoutingDecision;
|