@juspay/neurolink 7.33.2 → 7.33.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -0
- package/dist/constants/index.d.ts +192 -0
- package/dist/constants/index.js +195 -0
- package/dist/constants/performance.d.ts +366 -0
- package/dist/constants/performance.js +389 -0
- package/dist/constants/retry.d.ts +224 -0
- package/dist/constants/retry.js +266 -0
- package/dist/constants/timeouts.d.ts +225 -0
- package/dist/constants/timeouts.js +182 -0
- package/dist/constants/tokens.d.ts +234 -0
- package/dist/constants/tokens.js +314 -0
- package/dist/core/types.d.ts +268 -0
- package/dist/core/types.js +153 -0
- package/dist/lib/constants/index.d.ts +192 -0
- package/dist/lib/constants/index.js +195 -0
- package/dist/lib/constants/performance.d.ts +366 -0
- package/dist/lib/constants/performance.js +389 -0
- package/dist/lib/constants/retry.d.ts +224 -0
- package/dist/lib/constants/retry.js +266 -0
- package/dist/lib/constants/timeouts.d.ts +225 -0
- package/dist/lib/constants/timeouts.js +182 -0
- package/dist/lib/constants/tokens.d.ts +234 -0
- package/dist/lib/constants/tokens.js +314 -0
- package/dist/lib/core/types.d.ts +268 -0
- package/dist/lib/core/types.js +153 -0
- package/dist/lib/models/modelRegistry.d.ts +1 -1
- package/dist/lib/models/modelRegistry.js +63 -37
- package/dist/lib/neurolink.js +35 -34
- package/dist/lib/providers/azureOpenai.d.ts +1 -1
- package/dist/lib/providers/azureOpenai.js +2 -1
- package/dist/lib/utils/providerConfig.d.ts +25 -0
- package/dist/lib/utils/providerConfig.js +24 -3
- package/dist/lib/utils/providerHealth.d.ts +1 -1
- package/dist/lib/utils/providerHealth.js +40 -33
- package/dist/lib/utils/providerSetupMessages.js +7 -6
- package/dist/lib/utils/providerUtils.js +16 -24
- package/dist/models/modelRegistry.d.ts +1 -1
- package/dist/models/modelRegistry.js +63 -37
- package/dist/neurolink.js +35 -34
- package/dist/providers/azureOpenai.d.ts +1 -1
- package/dist/providers/azureOpenai.js +2 -1
- package/dist/utils/providerConfig.d.ts +25 -0
- package/dist/utils/providerConfig.js +24 -3
- package/dist/utils/providerHealth.d.ts +1 -1
- package/dist/utils/providerHealth.js +40 -33
- package/dist/utils/providerSetupMessages.js +7 -6
- package/dist/utils/providerUtils.js +16 -24
- package/package.json +1 -1
@@ -0,0 +1,234 @@
|
|
1
|
+
/**
|
2
|
+
* Token Limit Constants for NeuroLink
|
3
|
+
*
|
4
|
+
* Centralized token configuration to replace magic numbers throughout the codebase.
|
5
|
+
* Includes model-specific token limits, use-case optimized limits, and provider constraints.
|
6
|
+
*
|
7
|
+
* @fileoverview Token constants for AI model interactions
|
8
|
+
* @author NeuroLink Team
|
9
|
+
* @version 1.0.0
|
10
|
+
*/
|
11
|
+
/**
|
12
|
+
* Standard token limit categories
|
13
|
+
* General-purpose token limits for different use cases
|
14
|
+
*/
|
15
|
+
export declare const TOKEN_LIMITS: {
|
16
|
+
/** Conservative limits (reliable across all models) */
|
17
|
+
readonly CONSERVATIVE: 4096;
|
18
|
+
/** Standard limits (most modern models) */
|
19
|
+
readonly STANDARD: 8192;
|
20
|
+
/** High-capacity limits (premium models) */
|
21
|
+
readonly HIGH_CAPACITY: 16384;
|
22
|
+
/** Large context windows (specialized models) */
|
23
|
+
readonly LARGE_CONTEXT: 100000;
|
24
|
+
/** Ultra-large context windows (latest models) */
|
25
|
+
readonly ULTRA_LARGE_CONTEXT: 200000;
|
26
|
+
/** Maximum context for any model */
|
27
|
+
readonly MAXIMUM_CONTEXT: 2097152;
|
28
|
+
};
|
29
|
+
/**
|
30
|
+
* Use-case specific token limits
|
31
|
+
* Optimized token limits for different application scenarios
|
32
|
+
*/
|
33
|
+
export declare const USE_CASE_TOKENS: {
|
34
|
+
/** Quick evaluation tasks */
|
35
|
+
readonly EVALUATION: 500;
|
36
|
+
/** Analysis operations */
|
37
|
+
readonly ANALYSIS: 800;
|
38
|
+
/** Summary generation */
|
39
|
+
readonly SUMMARY: 1000;
|
40
|
+
/** Documentation generation */
|
41
|
+
readonly DOCUMENTATION: 12000;
|
42
|
+
/** Code generation */
|
43
|
+
readonly CODE_GENERATION: 4000;
|
44
|
+
/** Creative writing */
|
45
|
+
readonly CREATIVE_WRITING: 6000;
|
46
|
+
/** Translation tasks */
|
47
|
+
readonly TRANSLATION: 2000;
|
48
|
+
/** Conversation responses */
|
49
|
+
readonly CONVERSATION: 2048;
|
50
|
+
/** Technical explanations */
|
51
|
+
readonly TECHNICAL_EXPLANATION: 3000;
|
52
|
+
/** Research tasks */
|
53
|
+
readonly RESEARCH: 8000;
|
54
|
+
};
|
55
|
+
/**
|
56
|
+
* Provider-specific token limits
|
57
|
+
* Safe token limits for each AI provider based on testing
|
58
|
+
*/
|
59
|
+
export declare const PROVIDER_TOKEN_LIMITS: {
|
60
|
+
/** Anthropic model limits */
|
61
|
+
readonly ANTHROPIC: {
|
62
|
+
readonly "claude-3-haiku-20240307": 4096;
|
63
|
+
readonly "claude-3-5-sonnet-20241022": 4096;
|
64
|
+
readonly "claude-3-opus-20240229": 4096;
|
65
|
+
readonly "claude-3-5-haiku-20241022": 4096;
|
66
|
+
readonly "claude-3-sonnet-20240229": 4096;
|
67
|
+
readonly default: 4096;
|
68
|
+
};
|
69
|
+
/** OpenAI model limits */
|
70
|
+
readonly OPENAI: {
|
71
|
+
readonly "gpt-4o": 16384;
|
72
|
+
readonly "gpt-4o-mini": 16384;
|
73
|
+
readonly "gpt-3.5-turbo": 4096;
|
74
|
+
readonly "gpt-4": 8192;
|
75
|
+
readonly "gpt-4-turbo": 4096;
|
76
|
+
readonly "o1-preview": 8192;
|
77
|
+
readonly "o1-mini": 8192;
|
78
|
+
readonly default: 8192;
|
79
|
+
};
|
80
|
+
/** Google AI model limits */
|
81
|
+
readonly GOOGLE_AI: {
|
82
|
+
readonly "gemini-1.5-pro": 8192;
|
83
|
+
readonly "gemini-1.5-flash": 8192;
|
84
|
+
readonly "gemini-1.5-flash-lite": 8192;
|
85
|
+
readonly "gemini-2.5-pro": 8192;
|
86
|
+
readonly "gemini-2.5-flash": 8192;
|
87
|
+
readonly "gemini-2.5-flash-lite": 8192;
|
88
|
+
readonly "gemini-2.0-flash-001": 8192;
|
89
|
+
readonly default: 4096;
|
90
|
+
};
|
91
|
+
/** Google Vertex AI model limits */
|
92
|
+
readonly VERTEX: {
|
93
|
+
readonly "gemini-1.5-pro": 8192;
|
94
|
+
readonly "gemini-1.5-flash": 8192;
|
95
|
+
readonly "gemini-2.5-pro": 8192;
|
96
|
+
readonly "gemini-2.5-flash": 8192;
|
97
|
+
readonly "gemini-2.5-flash-lite": 8192;
|
98
|
+
readonly "gemini-2.0-flash-001": 8192;
|
99
|
+
readonly "claude-sonnet-4@20250514": 4096;
|
100
|
+
readonly "claude-opus-4@20250514": 4096;
|
101
|
+
readonly "claude-3-5-sonnet-20241022": 4096;
|
102
|
+
readonly "claude-3-5-haiku-20241022": 4096;
|
103
|
+
readonly "claude-3-sonnet-20240229": 4096;
|
104
|
+
readonly "claude-3-haiku-20240307": 4096;
|
105
|
+
readonly "claude-3-opus-20240229": 4096;
|
106
|
+
readonly default: 4096;
|
107
|
+
};
|
108
|
+
/** AWS Bedrock model limits */
|
109
|
+
readonly BEDROCK: {
|
110
|
+
readonly "anthropic.claude-3-sonnet-20240229-v1:0": 4096;
|
111
|
+
readonly "anthropic.claude-3-haiku-20240307-v1:0": 4096;
|
112
|
+
readonly "anthropic.claude-3-5-sonnet-20240620-v1:0": 4096;
|
113
|
+
readonly "anthropic.claude-3-opus-20240229-v1:0": 4096;
|
114
|
+
readonly "arn:aws:bedrock:us-east-2:225681119357:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0": 4096;
|
115
|
+
readonly default: 4096;
|
116
|
+
};
|
117
|
+
/** Azure OpenAI model limits */
|
118
|
+
readonly AZURE: {
|
119
|
+
readonly "gpt-4o": 16384;
|
120
|
+
readonly "gpt-4o-mini": 16384;
|
121
|
+
readonly "gpt-4": 8192;
|
122
|
+
readonly "gpt-35-turbo": 4096;
|
123
|
+
readonly default: 8192;
|
124
|
+
};
|
125
|
+
/** Ollama model limits (local) */
|
126
|
+
readonly OLLAMA: {
|
127
|
+
readonly "llama3.2:latest": 8192;
|
128
|
+
readonly llama2: 4096;
|
129
|
+
readonly codellama: 8192;
|
130
|
+
readonly mistral: 4096;
|
131
|
+
readonly default: 8192;
|
132
|
+
};
|
133
|
+
/** Hugging Face model limits */
|
134
|
+
readonly HUGGINGFACE: {
|
135
|
+
readonly default: 2048;
|
136
|
+
};
|
137
|
+
/** Mistral model limits */
|
138
|
+
readonly MISTRAL: {
|
139
|
+
readonly "mistral-small-latest": 4096;
|
140
|
+
readonly "mistral-medium-latest": 4096;
|
141
|
+
readonly "mistral-large-latest": 8192;
|
142
|
+
readonly default: 4096;
|
143
|
+
};
|
144
|
+
/** LiteLLM proxy limits */
|
145
|
+
readonly LITELLM: {
|
146
|
+
readonly default: 4096;
|
147
|
+
};
|
148
|
+
/** Safe default across all providers */
|
149
|
+
readonly DEFAULT: 4096;
|
150
|
+
};
|
151
|
+
/**
|
152
|
+
* Context window sizes for different models
|
153
|
+
* Maximum input token limits (separate from output limits)
|
154
|
+
*/
|
155
|
+
export declare const CONTEXT_WINDOWS: {
|
156
|
+
/** Small context models */
|
157
|
+
readonly SMALL: 4096;
|
158
|
+
/** Medium context models */
|
159
|
+
readonly MEDIUM: 32768;
|
160
|
+
/** Large context models */
|
161
|
+
readonly LARGE: 128000;
|
162
|
+
/** Ultra-large context models */
|
163
|
+
readonly ULTRA_LARGE: 1048576;
|
164
|
+
/** Maximum theoretical context */
|
165
|
+
readonly MAXIMUM: 2097152;
|
166
|
+
};
|
167
|
+
/**
|
168
|
+
* Token estimation utilities
|
169
|
+
* Rough estimates for token counting without full tokenization
|
170
|
+
*/
|
171
|
+
export declare const TOKEN_ESTIMATION: {
|
172
|
+
/** Average characters per token (English) */
|
173
|
+
readonly CHARS_PER_TOKEN: 4;
|
174
|
+
/** Average words per token */
|
175
|
+
readonly WORDS_PER_TOKEN: 0.75;
|
176
|
+
/** Code characters per token (more compact) */
|
177
|
+
readonly CODE_CHARS_PER_TOKEN: 3;
|
178
|
+
/** Safety margin for token estimates */
|
179
|
+
readonly SAFETY_MARGIN: 0.8;
|
180
|
+
};
|
181
|
+
/**
|
182
|
+
* Token utility functions
|
183
|
+
*/
|
184
|
+
export declare const TokenUtils: {
|
185
|
+
/**
|
186
|
+
* Get safe token limit for a specific provider and model
|
187
|
+
* @param provider - Provider name
|
188
|
+
* @param model - Model name (optional)
|
189
|
+
* @returns Safe token limit for the provider/model
|
190
|
+
*/
|
191
|
+
readonly getProviderTokenLimit: (provider: string, model?: string) => number;
|
192
|
+
/**
|
193
|
+
* Get token limit for specific use case
|
194
|
+
* @param useCase - Use case type
|
195
|
+
* @returns Appropriate token limit
|
196
|
+
*/
|
197
|
+
readonly getUseCaseTokenLimit: (useCase: keyof typeof USE_CASE_TOKENS) => number;
|
198
|
+
/**
|
199
|
+
* Estimate token count from text
|
200
|
+
* @param text - Input text
|
201
|
+
* @param isCode - Whether the text is code (more compact tokenization)
|
202
|
+
* @returns Estimated token count
|
203
|
+
*/
|
204
|
+
readonly estimateTokenCount: (text: string, isCode?: boolean) => number;
|
205
|
+
/**
|
206
|
+
* Check if text exceeds token limit
|
207
|
+
* @param text - Input text
|
208
|
+
* @param limit - Token limit to check against
|
209
|
+
* @param isCode - Whether the text is code
|
210
|
+
* @returns True if text exceeds limit
|
211
|
+
*/
|
212
|
+
readonly exceedsTokenLimit: (text: string, limit: number, isCode?: boolean) => boolean;
|
213
|
+
/**
|
214
|
+
* Get appropriate token limit category
|
215
|
+
* @param estimatedTokens - Estimated token count
|
216
|
+
* @returns Token limit category
|
217
|
+
*/
|
218
|
+
readonly getTokenLimitCategory: (estimatedTokens: number) => keyof typeof TOKEN_LIMITS;
|
219
|
+
/**
|
220
|
+
* Truncate text to fit within token limit
|
221
|
+
* @param text - Input text
|
222
|
+
* @param tokenLimit - Maximum tokens allowed
|
223
|
+
* @param isCode - Whether the text is code
|
224
|
+
* @returns Truncated text
|
225
|
+
*/
|
226
|
+
readonly truncateToTokenLimit: (text: string, tokenLimit: number, isCode?: boolean) => string;
|
227
|
+
};
|
228
|
+
export declare const DEFAULT_MAX_TOKENS: 8192;
|
229
|
+
export declare const DEFAULT_EVALUATION_MAX_TOKENS: 500;
|
230
|
+
export declare const DEFAULT_ANALYSIS_MAX_TOKENS: 800;
|
231
|
+
export declare const DEFAULT_DOCUMENTATION_MAX_TOKENS: 12000;
|
232
|
+
export declare const ANTHROPIC_SAFE: 4096;
|
233
|
+
export declare const OPENAI_STANDARD: 8192;
|
234
|
+
export declare const GOOGLE_STANDARD: 4096;
|
@@ -0,0 +1,314 @@
|
|
1
|
+
/**
|
2
|
+
* Token Limit Constants for NeuroLink
|
3
|
+
*
|
4
|
+
* Centralized token configuration to replace magic numbers throughout the codebase.
|
5
|
+
* Includes model-specific token limits, use-case optimized limits, and provider constraints.
|
6
|
+
*
|
7
|
+
* @fileoverview Token constants for AI model interactions
|
8
|
+
* @author NeuroLink Team
|
9
|
+
* @version 1.0.0
|
10
|
+
*/
|
11
|
+
/**
|
12
|
+
* Standard token limit categories
|
13
|
+
* General-purpose token limits for different use cases
|
14
|
+
*/
|
15
|
+
export const TOKEN_LIMITS = {
|
16
|
+
/** Conservative limits (reliable across all models) */
|
17
|
+
CONSERVATIVE: 4096, // 4K - Safe for all providers
|
18
|
+
/** Standard limits (most modern models) */
|
19
|
+
STANDARD: 8192, // 8K - Modern model standard
|
20
|
+
/** High-capacity limits (premium models) */
|
21
|
+
HIGH_CAPACITY: 16384, // 16K - High-capacity models
|
22
|
+
/** Large context windows (specialized models) */
|
23
|
+
LARGE_CONTEXT: 100000, // 100K - Large context models
|
24
|
+
/** Ultra-large context windows (latest models) */
|
25
|
+
ULTRA_LARGE_CONTEXT: 200000, // 200K - Ultra-large context
|
26
|
+
/** Maximum context for any model */
|
27
|
+
MAXIMUM_CONTEXT: 2097152, // 2M - Theoretical maximum
|
28
|
+
};
|
29
|
+
/**
|
30
|
+
* Use-case specific token limits
|
31
|
+
* Optimized token limits for different application scenarios
|
32
|
+
*/
|
33
|
+
export const USE_CASE_TOKENS = {
|
34
|
+
/** Quick evaluation tasks */
|
35
|
+
EVALUATION: 500, // 500 - Keep evaluation fast
|
36
|
+
/** Analysis operations */
|
37
|
+
ANALYSIS: 800, // 800 - Analysis operations
|
38
|
+
/** Summary generation */
|
39
|
+
SUMMARY: 1000, // 1K - Summary generation
|
40
|
+
/** Documentation generation */
|
41
|
+
DOCUMENTATION: 12000, // 12K - Documentation generation
|
42
|
+
/** Code generation */
|
43
|
+
CODE_GENERATION: 4000, // 4K - Code generation tasks
|
44
|
+
/** Creative writing */
|
45
|
+
CREATIVE_WRITING: 6000, // 6K - Creative writing tasks
|
46
|
+
/** Translation tasks */
|
47
|
+
TRANSLATION: 2000, // 2K - Translation tasks
|
48
|
+
/** Conversation responses */
|
49
|
+
CONVERSATION: 2048, // 2K - Conversation responses
|
50
|
+
/** Technical explanations */
|
51
|
+
TECHNICAL_EXPLANATION: 3000, // 3K - Technical explanations
|
52
|
+
/** Research tasks */
|
53
|
+
RESEARCH: 8000, // 8K - Research and analysis
|
54
|
+
};
|
55
|
+
/**
|
56
|
+
* Provider-specific token limits
|
57
|
+
* Safe token limits for each AI provider based on testing
|
58
|
+
*/
|
59
|
+
export const PROVIDER_TOKEN_LIMITS = {
|
60
|
+
/** Anthropic model limits */
|
61
|
+
ANTHROPIC: {
|
62
|
+
"claude-3-haiku-20240307": 4096,
|
63
|
+
"claude-3-5-sonnet-20241022": 4096,
|
64
|
+
"claude-3-opus-20240229": 4096,
|
65
|
+
"claude-3-5-haiku-20241022": 4096,
|
66
|
+
"claude-3-sonnet-20240229": 4096,
|
67
|
+
default: 4096, // Conservative default for Anthropic
|
68
|
+
},
|
69
|
+
/** OpenAI model limits */
|
70
|
+
OPENAI: {
|
71
|
+
"gpt-4o": 16384,
|
72
|
+
"gpt-4o-mini": 16384,
|
73
|
+
"gpt-3.5-turbo": 4096,
|
74
|
+
"gpt-4": 8192,
|
75
|
+
"gpt-4-turbo": 4096,
|
76
|
+
"o1-preview": 8192,
|
77
|
+
"o1-mini": 8192,
|
78
|
+
default: 8192, // OpenAI generally supports higher limits
|
79
|
+
},
|
80
|
+
/** Google AI model limits */
|
81
|
+
GOOGLE_AI: {
|
82
|
+
"gemini-1.5-pro": 8192,
|
83
|
+
"gemini-1.5-flash": 8192,
|
84
|
+
"gemini-1.5-flash-lite": 8192,
|
85
|
+
"gemini-2.5-pro": 8192,
|
86
|
+
"gemini-2.5-flash": 8192,
|
87
|
+
"gemini-2.5-flash-lite": 8192,
|
88
|
+
"gemini-2.0-flash-001": 8192,
|
89
|
+
default: 4096, // Conservative default due to 500 errors at high limits
|
90
|
+
},
|
91
|
+
/** Google Vertex AI model limits */
|
92
|
+
VERTEX: {
|
93
|
+
"gemini-1.5-pro": 8192,
|
94
|
+
"gemini-1.5-flash": 8192,
|
95
|
+
"gemini-2.5-pro": 8192,
|
96
|
+
"gemini-2.5-flash": 8192,
|
97
|
+
"gemini-2.5-flash-lite": 8192,
|
98
|
+
"gemini-2.0-flash-001": 8192,
|
99
|
+
"claude-sonnet-4@20250514": 4096,
|
100
|
+
"claude-opus-4@20250514": 4096,
|
101
|
+
"claude-3-5-sonnet-20241022": 4096,
|
102
|
+
"claude-3-5-haiku-20241022": 4096,
|
103
|
+
"claude-3-sonnet-20240229": 4096,
|
104
|
+
"claude-3-haiku-20240307": 4096,
|
105
|
+
"claude-3-opus-20240229": 4096,
|
106
|
+
default: 4096,
|
107
|
+
},
|
108
|
+
/** AWS Bedrock model limits */
|
109
|
+
BEDROCK: {
|
110
|
+
"anthropic.claude-3-sonnet-20240229-v1:0": 4096,
|
111
|
+
"anthropic.claude-3-haiku-20240307-v1:0": 4096,
|
112
|
+
"anthropic.claude-3-5-sonnet-20240620-v1:0": 4096,
|
113
|
+
"anthropic.claude-3-opus-20240229-v1:0": 4096,
|
114
|
+
"arn:aws:bedrock:us-east-2:225681119357:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0": 4096,
|
115
|
+
default: 4096,
|
116
|
+
},
|
117
|
+
/** Azure OpenAI model limits */
|
118
|
+
AZURE: {
|
119
|
+
"gpt-4o": 16384,
|
120
|
+
"gpt-4o-mini": 16384,
|
121
|
+
"gpt-4": 8192,
|
122
|
+
"gpt-35-turbo": 4096,
|
123
|
+
default: 8192,
|
124
|
+
},
|
125
|
+
/** Ollama model limits (local) */
|
126
|
+
OLLAMA: {
|
127
|
+
"llama3.2:latest": 8192,
|
128
|
+
llama2: 4096,
|
129
|
+
codellama: 8192,
|
130
|
+
mistral: 4096,
|
131
|
+
default: 8192, // Ollama typically supports higher limits
|
132
|
+
},
|
133
|
+
/** Hugging Face model limits */
|
134
|
+
HUGGINGFACE: {
|
135
|
+
default: 2048, // Conservative default for HuggingFace
|
136
|
+
},
|
137
|
+
/** Mistral model limits */
|
138
|
+
MISTRAL: {
|
139
|
+
"mistral-small-latest": 4096,
|
140
|
+
"mistral-medium-latest": 4096,
|
141
|
+
"mistral-large-latest": 8192,
|
142
|
+
default: 4096,
|
143
|
+
},
|
144
|
+
/** LiteLLM proxy limits */
|
145
|
+
LITELLM: {
|
146
|
+
default: 4096, // Conservative default
|
147
|
+
},
|
148
|
+
/** Safe default across all providers */
|
149
|
+
DEFAULT: 4096,
|
150
|
+
};
|
151
|
+
/**
|
152
|
+
* Context window sizes for different models
|
153
|
+
* Maximum input token limits (separate from output limits)
|
154
|
+
*/
|
155
|
+
export const CONTEXT_WINDOWS = {
|
156
|
+
/** Small context models */
|
157
|
+
SMALL: 4096, // 4K - Small context
|
158
|
+
/** Medium context models */
|
159
|
+
MEDIUM: 32768, // 32K - Medium context
|
160
|
+
/** Large context models */
|
161
|
+
LARGE: 128000, // 128K - Large context
|
162
|
+
/** Ultra-large context models */
|
163
|
+
ULTRA_LARGE: 1048576, // 1M - Ultra-large context
|
164
|
+
/** Maximum theoretical context */
|
165
|
+
MAXIMUM: 2097152, // 2M - Maximum context
|
166
|
+
};
|
167
|
+
/**
|
168
|
+
* Token estimation utilities
|
169
|
+
* Rough estimates for token counting without full tokenization
|
170
|
+
*/
|
171
|
+
export const TOKEN_ESTIMATION = {
|
172
|
+
/** Average characters per token (English) */
|
173
|
+
CHARS_PER_TOKEN: 4, // 4 chars - English average
|
174
|
+
/** Average words per token */
|
175
|
+
WORDS_PER_TOKEN: 0.75, // 0.75 words - English average
|
176
|
+
/** Code characters per token (more compact) */
|
177
|
+
CODE_CHARS_PER_TOKEN: 3, // 3 chars - Code is more compact
|
178
|
+
/** Safety margin for token estimates */
|
179
|
+
SAFETY_MARGIN: 0.8, // 80% - Safety margin for estimates
|
180
|
+
};
|
181
|
+
/**
|
182
|
+
* Token utility functions
|
183
|
+
*/
|
184
|
+
export const TokenUtils = {
|
185
|
+
/**
|
186
|
+
* Get safe token limit for a specific provider and model
|
187
|
+
* @param provider - Provider name
|
188
|
+
* @param model - Model name (optional)
|
189
|
+
* @returns Safe token limit for the provider/model
|
190
|
+
*/
|
191
|
+
getProviderTokenLimit: (provider, model) => {
|
192
|
+
const normalizedProvider = provider.toLowerCase().replace(/[-_]/g, "");
|
193
|
+
let providerLimits;
|
194
|
+
switch (normalizedProvider) {
|
195
|
+
case "anthropic":
|
196
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.ANTHROPIC;
|
197
|
+
break;
|
198
|
+
case "openai":
|
199
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.OPENAI;
|
200
|
+
break;
|
201
|
+
case "googleai":
|
202
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.GOOGLE_AI;
|
203
|
+
break;
|
204
|
+
case "vertex":
|
205
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.VERTEX;
|
206
|
+
break;
|
207
|
+
case "bedrock":
|
208
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.BEDROCK;
|
209
|
+
break;
|
210
|
+
case "azure":
|
211
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.AZURE;
|
212
|
+
break;
|
213
|
+
case "ollama":
|
214
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.OLLAMA;
|
215
|
+
break;
|
216
|
+
case "huggingface":
|
217
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.HUGGINGFACE;
|
218
|
+
break;
|
219
|
+
case "mistral":
|
220
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.MISTRAL;
|
221
|
+
break;
|
222
|
+
case "litellm":
|
223
|
+
providerLimits = PROVIDER_TOKEN_LIMITS.LITELLM;
|
224
|
+
break;
|
225
|
+
default:
|
226
|
+
return PROVIDER_TOKEN_LIMITS.DEFAULT;
|
227
|
+
}
|
228
|
+
if (model && providerLimits[model]) {
|
229
|
+
return providerLimits[model];
|
230
|
+
}
|
231
|
+
return providerLimits.default || PROVIDER_TOKEN_LIMITS.DEFAULT;
|
232
|
+
},
|
233
|
+
/**
|
234
|
+
* Get token limit for specific use case
|
235
|
+
* @param useCase - Use case type
|
236
|
+
* @returns Appropriate token limit
|
237
|
+
*/
|
238
|
+
getUseCaseTokenLimit: (useCase) => {
|
239
|
+
return USE_CASE_TOKENS[useCase] || TOKEN_LIMITS.STANDARD;
|
240
|
+
},
|
241
|
+
/**
|
242
|
+
* Estimate token count from text
|
243
|
+
* @param text - Input text
|
244
|
+
* @param isCode - Whether the text is code (more compact tokenization)
|
245
|
+
* @returns Estimated token count
|
246
|
+
*/
|
247
|
+
estimateTokenCount: (text, isCode = false) => {
|
248
|
+
const charsPerToken = isCode
|
249
|
+
? TOKEN_ESTIMATION.CODE_CHARS_PER_TOKEN
|
250
|
+
: TOKEN_ESTIMATION.CHARS_PER_TOKEN;
|
251
|
+
const estimatedTokens = Math.ceil(text.length / charsPerToken);
|
252
|
+
// Apply safety margin
|
253
|
+
return Math.ceil(estimatedTokens / TOKEN_ESTIMATION.SAFETY_MARGIN);
|
254
|
+
},
|
255
|
+
/**
|
256
|
+
* Check if text exceeds token limit
|
257
|
+
* @param text - Input text
|
258
|
+
* @param limit - Token limit to check against
|
259
|
+
* @param isCode - Whether the text is code
|
260
|
+
* @returns True if text exceeds limit
|
261
|
+
*/
|
262
|
+
exceedsTokenLimit: (text, limit, isCode = false) => {
|
263
|
+
const estimatedTokens = TokenUtils.estimateTokenCount(text, isCode);
|
264
|
+
return estimatedTokens > limit;
|
265
|
+
},
|
266
|
+
/**
|
267
|
+
* Get appropriate token limit category
|
268
|
+
* @param estimatedTokens - Estimated token count
|
269
|
+
* @returns Token limit category
|
270
|
+
*/
|
271
|
+
getTokenLimitCategory: (estimatedTokens) => {
|
272
|
+
if (estimatedTokens <= TOKEN_LIMITS.CONSERVATIVE) {
|
273
|
+
return "CONSERVATIVE";
|
274
|
+
}
|
275
|
+
if (estimatedTokens <= TOKEN_LIMITS.STANDARD) {
|
276
|
+
return "STANDARD";
|
277
|
+
}
|
278
|
+
if (estimatedTokens <= TOKEN_LIMITS.HIGH_CAPACITY) {
|
279
|
+
return "HIGH_CAPACITY";
|
280
|
+
}
|
281
|
+
if (estimatedTokens <= TOKEN_LIMITS.LARGE_CONTEXT) {
|
282
|
+
return "LARGE_CONTEXT";
|
283
|
+
}
|
284
|
+
return "ULTRA_LARGE_CONTEXT";
|
285
|
+
},
|
286
|
+
/**
|
287
|
+
* Truncate text to fit within token limit
|
288
|
+
* @param text - Input text
|
289
|
+
* @param tokenLimit - Maximum tokens allowed
|
290
|
+
* @param isCode - Whether the text is code
|
291
|
+
* @returns Truncated text
|
292
|
+
*/
|
293
|
+
truncateToTokenLimit: (text, tokenLimit, isCode = false) => {
|
294
|
+
const charsPerToken = isCode
|
295
|
+
? TOKEN_ESTIMATION.CODE_CHARS_PER_TOKEN
|
296
|
+
: TOKEN_ESTIMATION.CHARS_PER_TOKEN;
|
297
|
+
// Apply safety margin
|
298
|
+
const maxChars = Math.floor(tokenLimit * charsPerToken * TOKEN_ESTIMATION.SAFETY_MARGIN);
|
299
|
+
if (text.length <= maxChars) {
|
300
|
+
return text;
|
301
|
+
}
|
302
|
+
// Truncate and add ellipsis
|
303
|
+
return text.substring(0, maxChars - 3) + "...";
|
304
|
+
},
|
305
|
+
};
|
306
|
+
// Legacy compatibility exports from existing constants
|
307
|
+
export const DEFAULT_MAX_TOKENS = TOKEN_LIMITS.STANDARD;
|
308
|
+
export const DEFAULT_EVALUATION_MAX_TOKENS = USE_CASE_TOKENS.EVALUATION;
|
309
|
+
export const DEFAULT_ANALYSIS_MAX_TOKENS = USE_CASE_TOKENS.ANALYSIS;
|
310
|
+
export const DEFAULT_DOCUMENTATION_MAX_TOKENS = USE_CASE_TOKENS.DOCUMENTATION;
|
311
|
+
// Provider-specific safe defaults (from existing PROVIDER_MAX_TOKENS)
|
312
|
+
export const ANTHROPIC_SAFE = PROVIDER_TOKEN_LIMITS.ANTHROPIC.default;
|
313
|
+
export const OPENAI_STANDARD = PROVIDER_TOKEN_LIMITS.OPENAI.default;
|
314
|
+
export const GOOGLE_STANDARD = PROVIDER_TOKEN_LIMITS.GOOGLE_AI.default;
|