@juspay/neurolink 8.26.0 → 8.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +47 -25
- package/dist/adapters/providerImageAdapter.js +11 -0
- package/dist/cli/commands/config.js +16 -23
- package/dist/cli/commands/setup-anthropic.js +3 -26
- package/dist/cli/commands/setup-azure.js +3 -22
- package/dist/cli/commands/setup-bedrock.js +3 -26
- package/dist/cli/commands/setup-google-ai.js +3 -22
- package/dist/cli/commands/setup-mistral.js +3 -31
- package/dist/cli/commands/setup-openai.js +3 -22
- package/dist/cli/factories/commandFactory.js +32 -0
- package/dist/cli/factories/ollamaCommandFactory.js +5 -17
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/cli/loop/optionsSchema.js +13 -0
- package/dist/config/modelSpecificPrompts.d.ts +9 -0
- package/dist/config/modelSpecificPrompts.js +38 -0
- package/dist/constants/enums.d.ts +8 -0
- package/dist/constants/enums.js +8 -0
- package/dist/constants/tokens.d.ts +25 -0
- package/dist/constants/tokens.js +18 -0
- package/dist/core/analytics.js +7 -28
- package/dist/core/baseProvider.js +1 -0
- package/dist/core/constants.d.ts +1 -0
- package/dist/core/constants.js +1 -0
- package/dist/core/modules/GenerationHandler.js +43 -5
- package/dist/core/streamAnalytics.d.ts +1 -0
- package/dist/core/streamAnalytics.js +8 -16
- package/dist/lib/adapters/providerImageAdapter.js +11 -0
- package/dist/lib/config/modelSpecificPrompts.d.ts +9 -0
- package/dist/lib/config/modelSpecificPrompts.js +39 -0
- package/dist/lib/constants/enums.d.ts +8 -0
- package/dist/lib/constants/enums.js +8 -0
- package/dist/lib/constants/tokens.d.ts +25 -0
- package/dist/lib/constants/tokens.js +18 -0
- package/dist/lib/core/analytics.js +7 -28
- package/dist/lib/core/baseProvider.js +1 -0
- package/dist/lib/core/constants.d.ts +1 -0
- package/dist/lib/core/constants.js +1 -0
- package/dist/lib/core/modules/GenerationHandler.js +43 -5
- package/dist/lib/core/streamAnalytics.d.ts +1 -0
- package/dist/lib/core/streamAnalytics.js +8 -16
- package/dist/lib/providers/googleAiStudio.d.ts +15 -0
- package/dist/lib/providers/googleAiStudio.js +659 -3
- package/dist/lib/providers/googleVertex.d.ts +25 -0
- package/dist/lib/providers/googleVertex.js +978 -3
- package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
- package/dist/lib/types/analytics.d.ts +4 -0
- package/dist/lib/types/cli.d.ts +16 -0
- package/dist/lib/types/conversation.d.ts +72 -4
- package/dist/lib/types/conversation.js +30 -0
- package/dist/lib/types/generateTypes.d.ts +135 -0
- package/dist/lib/types/groundingTypes.d.ts +231 -0
- package/dist/lib/types/groundingTypes.js +12 -0
- package/dist/lib/types/providers.d.ts +29 -0
- package/dist/lib/types/streamTypes.d.ts +54 -0
- package/dist/lib/utils/analyticsUtils.js +22 -2
- package/dist/lib/utils/errorHandling.d.ts +65 -0
- package/dist/lib/utils/errorHandling.js +268 -0
- package/dist/lib/utils/modelChoices.d.ts +82 -0
- package/dist/lib/utils/modelChoices.js +402 -0
- package/dist/lib/utils/modelDetection.d.ts +9 -0
- package/dist/lib/utils/modelDetection.js +81 -0
- package/dist/lib/utils/parameterValidation.d.ts +59 -1
- package/dist/lib/utils/parameterValidation.js +196 -0
- package/dist/lib/utils/schemaConversion.d.ts +12 -0
- package/dist/lib/utils/schemaConversion.js +90 -0
- package/dist/lib/utils/thinkingConfig.d.ts +108 -0
- package/dist/lib/utils/thinkingConfig.js +105 -0
- package/dist/lib/utils/tokenUtils.d.ts +124 -0
- package/dist/lib/utils/tokenUtils.js +240 -0
- package/dist/lib/utils/transformationUtils.js +15 -26
- package/dist/providers/googleAiStudio.d.ts +15 -0
- package/dist/providers/googleAiStudio.js +659 -3
- package/dist/providers/googleVertex.d.ts +25 -0
- package/dist/providers/googleVertex.js +978 -3
- package/dist/types/analytics.d.ts +4 -0
- package/dist/types/cli.d.ts +16 -0
- package/dist/types/conversation.d.ts +72 -4
- package/dist/types/conversation.js +30 -0
- package/dist/types/generateTypes.d.ts +135 -0
- package/dist/types/groundingTypes.d.ts +231 -0
- package/dist/types/groundingTypes.js +11 -0
- package/dist/types/providers.d.ts +29 -0
- package/dist/types/streamTypes.d.ts +54 -0
- package/dist/utils/analyticsUtils.js +22 -2
- package/dist/utils/errorHandling.d.ts +65 -0
- package/dist/utils/errorHandling.js +268 -0
- package/dist/utils/modelChoices.d.ts +82 -0
- package/dist/utils/modelChoices.js +401 -0
- package/dist/utils/modelDetection.d.ts +9 -0
- package/dist/utils/modelDetection.js +80 -0
- package/dist/utils/parameterValidation.d.ts +59 -1
- package/dist/utils/parameterValidation.js +196 -0
- package/dist/utils/schemaConversion.d.ts +12 -0
- package/dist/utils/schemaConversion.js +90 -0
- package/dist/utils/thinkingConfig.d.ts +108 -0
- package/dist/utils/thinkingConfig.js +104 -0
- package/dist/utils/tokenUtils.d.ts +124 -0
- package/dist/utils/tokenUtils.js +239 -0
- package/dist/utils/transformationUtils.js +15 -26
- package/package.json +4 -3
|
@@ -65,5 +65,18 @@ export const textGenerationOptionsSchema = {
|
|
|
65
65
|
type: "boolean",
|
|
66
66
|
description: "Enable or disable automatic conversation summarization for this request.",
|
|
67
67
|
},
|
|
68
|
+
thinking: {
|
|
69
|
+
type: "boolean",
|
|
70
|
+
description: "Enable extended thinking/reasoning capability.",
|
|
71
|
+
},
|
|
72
|
+
thinkingBudget: {
|
|
73
|
+
type: "number",
|
|
74
|
+
description: "Token budget for thinking (Anthropic models: 5000-100000).",
|
|
75
|
+
},
|
|
76
|
+
thinkingLevel: {
|
|
77
|
+
type: "string",
|
|
78
|
+
description: "Thinking level for Gemini 3 models: minimal, low, medium, high.",
|
|
79
|
+
allowedValues: ["minimal", "low", "medium", "high"],
|
|
80
|
+
},
|
|
68
81
|
};
|
|
69
82
|
//# sourceMappingURL=optionsSchema.js.map
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-specific prompt configurations and enhancement utilities
|
|
3
|
+
*/
|
|
4
|
+
import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
|
|
5
|
+
export { isGemini3Model, isGemini25Model };
|
|
6
|
+
export declare const MODEL_SPECIFIC_INSTRUCTIONS: Record<string, string>;
|
|
7
|
+
export declare function getModelSpecificInstructions(model: string): string;
|
|
8
|
+
export declare function enhancePromptForModel(basePrompt: string, model: string, _provider?: string): string;
|
|
9
|
+
export declare function shouldEnhancePrompt(model: string): boolean;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-specific prompt configurations and enhancement utilities
|
|
3
|
+
*/
|
|
4
|
+
import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
|
|
5
|
+
// Re-export from modelDetection for backwards compatibility
|
|
6
|
+
export { isGemini3Model, isGemini25Model };
|
|
7
|
+
export const MODEL_SPECIFIC_INSTRUCTIONS = {
|
|
8
|
+
"gemini-3": `You have access to extended thinking capabilities. Use them for complex reasoning tasks that require deep analysis.`,
|
|
9
|
+
"gemini-2.5": `You support function calling and structured outputs. Format responses according to the requested schema when provided.`,
|
|
10
|
+
"gpt-4": `You are a helpful assistant with strong reasoning capabilities.`,
|
|
11
|
+
"claude-3": `You have extended thinking capabilities available when enabled. Use systematic reasoning for complex problems.`,
|
|
12
|
+
default: "",
|
|
13
|
+
};
|
|
14
|
+
export function getModelSpecificInstructions(model) {
|
|
15
|
+
if (isGemini3Model(model)) {
|
|
16
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["gemini-3"];
|
|
17
|
+
}
|
|
18
|
+
if (isGemini25Model(model)) {
|
|
19
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["gemini-2.5"];
|
|
20
|
+
}
|
|
21
|
+
if (/^gpt-4/i.test(model)) {
|
|
22
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["gpt-4"];
|
|
23
|
+
}
|
|
24
|
+
if (/^claude-3/i.test(model)) {
|
|
25
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["claude-3"];
|
|
26
|
+
}
|
|
27
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["default"];
|
|
28
|
+
}
|
|
29
|
+
export function enhancePromptForModel(basePrompt, model, _provider) {
|
|
30
|
+
const modelInstructions = getModelSpecificInstructions(model);
|
|
31
|
+
if (!modelInstructions) {
|
|
32
|
+
return basePrompt;
|
|
33
|
+
}
|
|
34
|
+
return `${modelInstructions}\n\n${basePrompt}`;
|
|
35
|
+
}
|
|
36
|
+
export function shouldEnhancePrompt(model) {
|
|
37
|
+
return isGemini3Model(model) || isGemini25Model(model);
|
|
38
|
+
}
|
|
@@ -191,6 +191,12 @@ export declare enum VertexModels {
|
|
|
191
191
|
GEMINI_3_PRO_LATEST = "gemini-3-pro-latest",
|
|
192
192
|
/** Gemini 3 Pro Preview - Generic preview (legacy) */
|
|
193
193
|
GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
|
|
194
|
+
/** Gemini 3 Flash - Base model with adaptive thinking */
|
|
195
|
+
GEMINI_3_FLASH = "gemini-3-flash",
|
|
196
|
+
/** Gemini 3 Flash Preview - Versioned preview */
|
|
197
|
+
GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
|
|
198
|
+
/** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
|
|
199
|
+
GEMINI_3_FLASH_LATEST = "gemini-3-flash-latest",
|
|
194
200
|
GEMINI_2_5_PRO = "gemini-2.5-pro",
|
|
195
201
|
GEMINI_2_5_FLASH = "gemini-2.5-flash",
|
|
196
202
|
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
|
|
@@ -208,6 +214,8 @@ export declare enum VertexModels {
|
|
|
208
214
|
export declare enum GoogleAIModels {
|
|
209
215
|
GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
|
|
210
216
|
GEMINI_3_PRO_IMAGE_PREVIEW = "gemini-3-pro-image-preview",
|
|
217
|
+
GEMINI_3_FLASH = "gemini-3-flash",
|
|
218
|
+
GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
|
|
211
219
|
GEMINI_2_5_PRO = "gemini-2.5-pro",
|
|
212
220
|
GEMINI_2_5_FLASH = "gemini-2.5-flash",
|
|
213
221
|
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
|
package/dist/constants/enums.js
CHANGED
|
@@ -269,6 +269,12 @@ export var VertexModels;
|
|
|
269
269
|
VertexModels["GEMINI_3_PRO_LATEST"] = "gemini-3-pro-latest";
|
|
270
270
|
/** Gemini 3 Pro Preview - Generic preview (legacy) */
|
|
271
271
|
VertexModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
|
|
272
|
+
/** Gemini 3 Flash - Base model with adaptive thinking */
|
|
273
|
+
VertexModels["GEMINI_3_FLASH"] = "gemini-3-flash";
|
|
274
|
+
/** Gemini 3 Flash Preview - Versioned preview */
|
|
275
|
+
VertexModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
|
|
276
|
+
/** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
|
|
277
|
+
VertexModels["GEMINI_3_FLASH_LATEST"] = "gemini-3-flash-latest";
|
|
272
278
|
// Gemini 2.5 Series (Latest - 2025)
|
|
273
279
|
VertexModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
|
|
274
280
|
VertexModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
|
|
@@ -291,6 +297,8 @@ export var GoogleAIModels;
|
|
|
291
297
|
// Gemini 3 Series
|
|
292
298
|
GoogleAIModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
|
|
293
299
|
GoogleAIModels["GEMINI_3_PRO_IMAGE_PREVIEW"] = "gemini-3-pro-image-preview";
|
|
300
|
+
GoogleAIModels["GEMINI_3_FLASH"] = "gemini-3-flash";
|
|
301
|
+
GoogleAIModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
|
|
294
302
|
// Gemini 2.5 Series
|
|
295
303
|
GoogleAIModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
|
|
296
304
|
GoogleAIModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
|
|
@@ -85,6 +85,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
|
|
|
85
85
|
readonly "gemini-3-pro-preview": 8192;
|
|
86
86
|
readonly "gemini-3-pro-preview-11-2025": 8192;
|
|
87
87
|
readonly "gemini-3-pro-latest": 8192;
|
|
88
|
+
readonly "gemini-3-flash": 65536;
|
|
89
|
+
readonly "gemini-3-flash-preview": 65536;
|
|
90
|
+
readonly "gemini-3-flash-latest": 65536;
|
|
88
91
|
readonly "gemini-2.5-pro": 8192;
|
|
89
92
|
readonly "gemini-2.5-flash": 8192;
|
|
90
93
|
readonly "gemini-2.5-flash-lite": 8192;
|
|
@@ -100,6 +103,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
|
|
|
100
103
|
readonly "gemini-3-pro-preview": 8192;
|
|
101
104
|
readonly "gemini-3-pro-preview-11-2025": 8192;
|
|
102
105
|
readonly "gemini-3-pro-latest": 8192;
|
|
106
|
+
readonly "gemini-3-flash": 65536;
|
|
107
|
+
readonly "gemini-3-flash-preview": 65536;
|
|
108
|
+
readonly "gemini-3-flash-latest": 65536;
|
|
103
109
|
readonly "gemini-2.5-pro": 8192;
|
|
104
110
|
readonly "gemini-2.5-flash": 8192;
|
|
105
111
|
readonly "gemini-2.5-flash-lite": 8192;
|
|
@@ -182,6 +188,25 @@ export declare const CONTEXT_WINDOWS: {
|
|
|
182
188
|
/** Maximum theoretical context */
|
|
183
189
|
readonly MAXIMUM: 2097152;
|
|
184
190
|
};
|
|
191
|
+
/**
|
|
192
|
+
* Model-specific token limits with input/output breakdown
|
|
193
|
+
* For models that require explicit input and output token limits
|
|
194
|
+
*/
|
|
195
|
+
export declare const MODEL_TOKEN_LIMITS: {
|
|
196
|
+
/** Gemini 3 Flash Series */
|
|
197
|
+
readonly "gemini-3-flash": {
|
|
198
|
+
readonly input: 1000000;
|
|
199
|
+
readonly output: 65536;
|
|
200
|
+
};
|
|
201
|
+
readonly "gemini-3-flash-preview": {
|
|
202
|
+
readonly input: 1000000;
|
|
203
|
+
readonly output: 65536;
|
|
204
|
+
};
|
|
205
|
+
readonly "gemini-3-flash-latest": {
|
|
206
|
+
readonly input: 1000000;
|
|
207
|
+
readonly output: 65536;
|
|
208
|
+
};
|
|
209
|
+
};
|
|
185
210
|
/**
|
|
186
211
|
* Token estimation utilities
|
|
187
212
|
* Rough estimates for token counting without full tokenization
|
package/dist/constants/tokens.js
CHANGED
|
@@ -89,6 +89,10 @@ export const PROVIDER_TOKEN_LIMITS = {
|
|
|
89
89
|
"gemini-3-pro-preview": 8192,
|
|
90
90
|
"gemini-3-pro-preview-11-2025": 8192,
|
|
91
91
|
"gemini-3-pro-latest": 8192,
|
|
92
|
+
// Gemini 3 Flash Series
|
|
93
|
+
"gemini-3-flash": 65536,
|
|
94
|
+
"gemini-3-flash-preview": 65536,
|
|
95
|
+
"gemini-3-flash-latest": 65536,
|
|
92
96
|
// Gemini 2.5 Series
|
|
93
97
|
"gemini-2.5-pro": 8192,
|
|
94
98
|
"gemini-2.5-flash": 8192,
|
|
@@ -108,6 +112,10 @@ export const PROVIDER_TOKEN_LIMITS = {
|
|
|
108
112
|
"gemini-3-pro-preview": 8192,
|
|
109
113
|
"gemini-3-pro-preview-11-2025": 8192,
|
|
110
114
|
"gemini-3-pro-latest": 8192,
|
|
115
|
+
// Gemini 3 Flash Series
|
|
116
|
+
"gemini-3-flash": 65536,
|
|
117
|
+
"gemini-3-flash-preview": 65536,
|
|
118
|
+
"gemini-3-flash-latest": 65536,
|
|
111
119
|
// Gemini 2.5 Series
|
|
112
120
|
"gemini-2.5-pro": 8192,
|
|
113
121
|
"gemini-2.5-flash": 8192,
|
|
@@ -201,6 +209,16 @@ export const CONTEXT_WINDOWS = {
|
|
|
201
209
|
/** Maximum theoretical context */
|
|
202
210
|
MAXIMUM: 2097152, // 2M - Maximum context
|
|
203
211
|
};
|
|
212
|
+
/**
|
|
213
|
+
* Model-specific token limits with input/output breakdown
|
|
214
|
+
* For models that require explicit input and output token limits
|
|
215
|
+
*/
|
|
216
|
+
export const MODEL_TOKEN_LIMITS = {
|
|
217
|
+
/** Gemini 3 Flash Series */
|
|
218
|
+
"gemini-3-flash": { input: 1000000, output: 65536 },
|
|
219
|
+
"gemini-3-flash-preview": { input: 1000000, output: 65536 },
|
|
220
|
+
"gemini-3-flash-latest": { input: 1000000, output: 65536 },
|
|
221
|
+
};
|
|
204
222
|
/**
|
|
205
223
|
* Token estimation utilities
|
|
206
224
|
* Rough estimates for token counting without full tokenization
|
package/dist/core/analytics.js
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { logger } from "../utils/logger.js";
|
|
8
8
|
import { modelConfig } from "./modelConfiguration.js";
|
|
9
|
+
import { extractTokenUsage as extractTokenUsageUtil } from "../utils/tokenUtils.js";
|
|
9
10
|
/**
|
|
10
11
|
* Create analytics data structure from AI response
|
|
11
12
|
*/
|
|
@@ -49,36 +50,14 @@ export function createAnalytics(provider, model, result, responseTime, context)
|
|
|
49
50
|
}
|
|
50
51
|
/**
|
|
51
52
|
* Extract token usage from various AI result formats
|
|
53
|
+
* Delegates to centralized tokenUtils for consistent extraction across providers
|
|
52
54
|
*/
|
|
53
55
|
function extractTokenUsage(result) {
|
|
54
|
-
// Use
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
// Try BaseProvider normalized format first (input/output/total)
|
|
60
|
-
if (typeof usage.input === "number" || typeof usage.output === "number") {
|
|
61
|
-
const input = typeof usage.input === "number" ? usage.input : 0;
|
|
62
|
-
const output = typeof usage.output === "number" ? usage.output : 0;
|
|
63
|
-
const total = typeof usage.total === "number" ? usage.total : input + output;
|
|
64
|
-
return { input, output, total };
|
|
65
|
-
}
|
|
66
|
-
// Try OpenAI/Mistral format (promptTokens/completionTokens)
|
|
67
|
-
if (typeof usage.promptTokens === "number" ||
|
|
68
|
-
typeof usage.completionTokens === "number") {
|
|
69
|
-
const input = typeof usage.promptTokens === "number" ? usage.promptTokens : 0;
|
|
70
|
-
const output = typeof usage.completionTokens === "number" ? usage.completionTokens : 0;
|
|
71
|
-
const total = typeof usage.total === "number" ? usage.total : input + output;
|
|
72
|
-
return { input, output, total };
|
|
73
|
-
}
|
|
74
|
-
// Handle total-only case
|
|
75
|
-
if (typeof usage.total === "number") {
|
|
76
|
-
return { input: 0, output: 0, total: usage.total };
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
// Fallback for edge cases
|
|
80
|
-
logger.debug("Token extraction failed: unknown usage format", { result });
|
|
81
|
-
return { input: 0, output: 0, total: 0 };
|
|
56
|
+
// Use centralized token extraction utility
|
|
57
|
+
// The utility handles nested usage objects, multiple provider formats,
|
|
58
|
+
// cache tokens, reasoning tokens, and cache savings calculation
|
|
59
|
+
// Cast result to allow extractTokenUsageUtil to handle type normalization
|
|
60
|
+
return extractTokenUsageUtil(result.usage);
|
|
82
61
|
}
|
|
83
62
|
/**
|
|
84
63
|
* Estimate cost based on provider, model, and token usage
|
|
@@ -126,6 +126,7 @@ export class BaseProvider {
|
|
|
126
126
|
maxSteps: options.maxSteps || 5,
|
|
127
127
|
provider: options.provider,
|
|
128
128
|
model: options.model,
|
|
129
|
+
region: options.region, // Pass region for Vertex AI
|
|
129
130
|
// 🔧 FIX: Include analytics and evaluation options from stream options
|
|
130
131
|
enableAnalytics: options.enableAnalytics,
|
|
131
132
|
enableEvaluation: options.enableEvaluation,
|
package/dist/core/constants.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ export declare const DEFAULT_MAX_TOKENS: undefined;
|
|
|
6
6
|
export declare const DEFAULT_TEMPERATURE = 0.7;
|
|
7
7
|
export declare const DEFAULT_TIMEOUT = 60000;
|
|
8
8
|
export declare const DEFAULT_MAX_STEPS = 200;
|
|
9
|
+
export declare const DEFAULT_TOOL_MAX_RETRIES = 2;
|
|
9
10
|
export declare const STEP_LIMITS: {
|
|
10
11
|
min: number;
|
|
11
12
|
max: number;
|
package/dist/core/constants.js
CHANGED
|
@@ -7,6 +7,7 @@ export const DEFAULT_MAX_TOKENS = undefined; // Unlimited by default - let provi
|
|
|
7
7
|
export const DEFAULT_TEMPERATURE = 0.7;
|
|
8
8
|
export const DEFAULT_TIMEOUT = 60000;
|
|
9
9
|
export const DEFAULT_MAX_STEPS = 200;
|
|
10
|
+
export const DEFAULT_TOOL_MAX_RETRIES = 2; // Maximum retries per tool before permanently failing
|
|
10
11
|
// Step execution limits
|
|
11
12
|
export const STEP_LIMITS = {
|
|
12
13
|
min: 1,
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import { generateText, Output, NoObjectGeneratedError } from "ai";
|
|
16
16
|
import { logger } from "../../utils/logger.js";
|
|
17
|
+
import { extractTokenUsage } from "../../utils/tokenUtils.js";
|
|
17
18
|
import { DEFAULT_MAX_STEPS } from "../constants.js";
|
|
18
19
|
/**
|
|
19
20
|
* GenerationHandler class - Handles text generation operations for AI providers
|
|
@@ -36,6 +37,10 @@ export class GenerationHandler {
|
|
|
36
37
|
* @private
|
|
37
38
|
*/
|
|
38
39
|
async callGenerateText(model, messages, tools, options, shouldUseTools, includeStructuredOutput) {
|
|
40
|
+
// Check if this is a Google provider (for provider-specific options)
|
|
41
|
+
const isGoogleProvider = this.providerName === "google-ai" || this.providerName === "vertex";
|
|
42
|
+
// Check if this is an Anthropic provider
|
|
43
|
+
const isAnthropicProvider = this.providerName === "anthropic" || this.providerName === "bedrock";
|
|
39
44
|
const useStructuredOutput = includeStructuredOutput &&
|
|
40
45
|
!!options.schema &&
|
|
41
46
|
(options.output?.format === "json" ||
|
|
@@ -52,6 +57,39 @@ export class GenerationHandler {
|
|
|
52
57
|
options.schema && {
|
|
53
58
|
experimental_output: Output.object({ schema: options.schema }),
|
|
54
59
|
}),
|
|
60
|
+
// Add thinking configuration for extended reasoning
|
|
61
|
+
// Gemini 3 models use providerOptions.google.thinkingConfig with thinkingLevel
|
|
62
|
+
// Gemini 2.5 models use thinkingBudget
|
|
63
|
+
// Anthropic models use experimental_thinking with budgetTokens
|
|
64
|
+
...(options.thinkingConfig?.enabled && {
|
|
65
|
+
// For Anthropic: experimental_thinking with budgetTokens
|
|
66
|
+
...(isAnthropicProvider &&
|
|
67
|
+
options.thinkingConfig.budgetTokens &&
|
|
68
|
+
!options.thinkingConfig.thinkingLevel && {
|
|
69
|
+
experimental_thinking: {
|
|
70
|
+
type: "enabled",
|
|
71
|
+
budgetTokens: options.thinkingConfig.budgetTokens,
|
|
72
|
+
},
|
|
73
|
+
}),
|
|
74
|
+
// For Google Gemini 3: providerOptions with thinkingLevel
|
|
75
|
+
// For Gemini 2.5: providerOptions with thinkingBudget
|
|
76
|
+
...(isGoogleProvider && {
|
|
77
|
+
providerOptions: {
|
|
78
|
+
google: {
|
|
79
|
+
thinkingConfig: {
|
|
80
|
+
...(options.thinkingConfig.thinkingLevel && {
|
|
81
|
+
thinkingLevel: options.thinkingConfig.thinkingLevel,
|
|
82
|
+
}),
|
|
83
|
+
...(options.thinkingConfig.budgetTokens &&
|
|
84
|
+
!options.thinkingConfig.thinkingLevel && {
|
|
85
|
+
thinkingBudget: options.thinkingConfig.budgetTokens,
|
|
86
|
+
}),
|
|
87
|
+
includeThoughts: true,
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
}),
|
|
92
|
+
}),
|
|
55
93
|
experimental_telemetry: this.getTelemetryConfigFn(options, "generate"),
|
|
56
94
|
onStepFinish: ({ toolCalls, toolResults }) => {
|
|
57
95
|
logger.info("Tool execution completed", { toolResults, toolCalls });
|
|
@@ -227,13 +265,13 @@ export class GenerationHandler {
|
|
|
227
265
|
else {
|
|
228
266
|
content = generateResult.text;
|
|
229
267
|
}
|
|
268
|
+
// Extract usage with support for different formats and reasoning tokens
|
|
269
|
+
// Note: The AI SDK bundles thinking tokens into promptTokens for Google models.
|
|
270
|
+
// Separate reasoningTokens tracking will work when/if the AI SDK adds support.
|
|
271
|
+
const usage = extractTokenUsage(generateResult.usage);
|
|
230
272
|
return {
|
|
231
273
|
content,
|
|
232
|
-
usage
|
|
233
|
-
input: generateResult.usage?.promptTokens || 0,
|
|
234
|
-
output: generateResult.usage?.completionTokens || 0,
|
|
235
|
-
total: generateResult.usage?.totalTokens || 0,
|
|
236
|
-
},
|
|
274
|
+
usage,
|
|
237
275
|
provider: this.providerName,
|
|
238
276
|
model: this.modelName,
|
|
239
277
|
toolCalls: generateResult.toolCalls
|
|
@@ -6,6 +6,7 @@ import type { StreamTextResult, StreamAnalyticsCollector, ResponseMetadata } fro
|
|
|
6
6
|
export declare class BaseStreamAnalyticsCollector implements StreamAnalyticsCollector {
|
|
7
7
|
/**
|
|
8
8
|
* Collect token usage from stream result
|
|
9
|
+
* Uses centralized tokenUtils for consistent extraction across providers
|
|
9
10
|
*/
|
|
10
11
|
collectUsage(result: StreamTextResult): Promise<TokenUsage>;
|
|
11
12
|
/**
|
|
@@ -1,37 +1,29 @@
|
|
|
1
1
|
import { createAnalytics } from "./analytics.js";
|
|
2
2
|
import { logger } from "../utils/logger.js";
|
|
3
|
+
import { extractTokenUsage, createEmptyTokenUsage, } from "../utils/tokenUtils.js";
|
|
3
4
|
/**
|
|
4
5
|
* Base implementation for collecting analytics from Vercel AI SDK stream results
|
|
5
6
|
*/
|
|
6
7
|
export class BaseStreamAnalyticsCollector {
|
|
7
8
|
/**
|
|
8
9
|
* Collect token usage from stream result
|
|
10
|
+
* Uses centralized tokenUtils for consistent extraction across providers
|
|
9
11
|
*/
|
|
10
12
|
async collectUsage(result) {
|
|
11
13
|
try {
|
|
12
14
|
const usage = await result.usage;
|
|
13
15
|
if (!usage) {
|
|
14
16
|
logger.debug("No usage data available from stream result");
|
|
15
|
-
return
|
|
16
|
-
input: 0,
|
|
17
|
-
output: 0,
|
|
18
|
-
total: 0,
|
|
19
|
-
};
|
|
17
|
+
return createEmptyTokenUsage();
|
|
20
18
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
(usage.promptTokens || 0) + (usage.completionTokens || 0),
|
|
26
|
-
};
|
|
19
|
+
// Use centralized token extraction utility
|
|
20
|
+
// Handles multiple provider formats, cache tokens, reasoning tokens,
|
|
21
|
+
// and cache savings calculation
|
|
22
|
+
return extractTokenUsage(usage);
|
|
27
23
|
}
|
|
28
24
|
catch (error) {
|
|
29
25
|
logger.warn("Failed to collect usage from stream result", { error });
|
|
30
|
-
return
|
|
31
|
-
input: 0,
|
|
32
|
-
output: 0,
|
|
33
|
-
total: 0,
|
|
34
|
-
};
|
|
26
|
+
return createEmptyTokenUsage();
|
|
35
27
|
}
|
|
36
28
|
}
|
|
37
29
|
/**
|
|
@@ -77,6 +77,10 @@ const VISION_CAPABILITIES = {
|
|
|
77
77
|
"gemini-3-pro-preview-11-2025",
|
|
78
78
|
"gemini-3-pro-latest",
|
|
79
79
|
"gemini-3-pro-image-preview",
|
|
80
|
+
// Gemini 3 Flash Series
|
|
81
|
+
"gemini-3-flash",
|
|
82
|
+
"gemini-3-flash-preview",
|
|
83
|
+
"gemini-3-flash-latest",
|
|
80
84
|
// Gemini 2.5 Series
|
|
81
85
|
"gemini-2.5-pro",
|
|
82
86
|
"gemini-2.5-flash",
|
|
@@ -151,6 +155,10 @@ const VISION_CAPABILITIES = {
|
|
|
151
155
|
"gemini-3-pro-latest",
|
|
152
156
|
"gemini-3-pro-preview",
|
|
153
157
|
"gemini-3-pro",
|
|
158
|
+
// Gemini 3 Flash Series on Vertex AI
|
|
159
|
+
"gemini-3-flash",
|
|
160
|
+
"gemini-3-flash-preview",
|
|
161
|
+
"gemini-3-flash-latest",
|
|
154
162
|
// Gemini 2.5 models on Vertex AI
|
|
155
163
|
"gemini-2.5-pro",
|
|
156
164
|
"gemini-2.5-flash",
|
|
@@ -226,6 +234,9 @@ const VISION_CAPABILITIES = {
|
|
|
226
234
|
"gemini/gemini-2.0-flash",
|
|
227
235
|
"gemini-3-pro-preview",
|
|
228
236
|
"gemini-3-pro-latest",
|
|
237
|
+
"gemini-3-flash",
|
|
238
|
+
"gemini-3-flash-preview",
|
|
239
|
+
"gemini-3-flash-latest",
|
|
229
240
|
"gemini-2.5-pro",
|
|
230
241
|
"gemini-2.5-flash",
|
|
231
242
|
"gemini-2.0-flash-lite",
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-specific prompt configurations and enhancement utilities
|
|
3
|
+
*/
|
|
4
|
+
import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
|
|
5
|
+
export { isGemini3Model, isGemini25Model };
|
|
6
|
+
export declare const MODEL_SPECIFIC_INSTRUCTIONS: Record<string, string>;
|
|
7
|
+
export declare function getModelSpecificInstructions(model: string): string;
|
|
8
|
+
export declare function enhancePromptForModel(basePrompt: string, model: string, _provider?: string): string;
|
|
9
|
+
export declare function shouldEnhancePrompt(model: string): boolean;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-specific prompt configurations and enhancement utilities
|
|
3
|
+
*/
|
|
4
|
+
import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
|
|
5
|
+
// Re-export from modelDetection for backwards compatibility
|
|
6
|
+
export { isGemini3Model, isGemini25Model };
|
|
7
|
+
export const MODEL_SPECIFIC_INSTRUCTIONS = {
|
|
8
|
+
"gemini-3": `You have access to extended thinking capabilities. Use them for complex reasoning tasks that require deep analysis.`,
|
|
9
|
+
"gemini-2.5": `You support function calling and structured outputs. Format responses according to the requested schema when provided.`,
|
|
10
|
+
"gpt-4": `You are a helpful assistant with strong reasoning capabilities.`,
|
|
11
|
+
"claude-3": `You have extended thinking capabilities available when enabled. Use systematic reasoning for complex problems.`,
|
|
12
|
+
default: "",
|
|
13
|
+
};
|
|
14
|
+
export function getModelSpecificInstructions(model) {
|
|
15
|
+
if (isGemini3Model(model)) {
|
|
16
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["gemini-3"];
|
|
17
|
+
}
|
|
18
|
+
if (isGemini25Model(model)) {
|
|
19
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["gemini-2.5"];
|
|
20
|
+
}
|
|
21
|
+
if (/^gpt-4/i.test(model)) {
|
|
22
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["gpt-4"];
|
|
23
|
+
}
|
|
24
|
+
if (/^claude-3/i.test(model)) {
|
|
25
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["claude-3"];
|
|
26
|
+
}
|
|
27
|
+
return MODEL_SPECIFIC_INSTRUCTIONS["default"];
|
|
28
|
+
}
|
|
29
|
+
export function enhancePromptForModel(basePrompt, model, _provider) {
|
|
30
|
+
const modelInstructions = getModelSpecificInstructions(model);
|
|
31
|
+
if (!modelInstructions) {
|
|
32
|
+
return basePrompt;
|
|
33
|
+
}
|
|
34
|
+
return `${modelInstructions}\n\n${basePrompt}`;
|
|
35
|
+
}
|
|
36
|
+
export function shouldEnhancePrompt(model) {
|
|
37
|
+
return isGemini3Model(model) || isGemini25Model(model);
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=modelSpecificPrompts.js.map
|
|
@@ -191,6 +191,12 @@ export declare enum VertexModels {
|
|
|
191
191
|
GEMINI_3_PRO_LATEST = "gemini-3-pro-latest",
|
|
192
192
|
/** Gemini 3 Pro Preview - Generic preview (legacy) */
|
|
193
193
|
GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
|
|
194
|
+
/** Gemini 3 Flash - Base model with adaptive thinking */
|
|
195
|
+
GEMINI_3_FLASH = "gemini-3-flash",
|
|
196
|
+
/** Gemini 3 Flash Preview - Versioned preview */
|
|
197
|
+
GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
|
|
198
|
+
/** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
|
|
199
|
+
GEMINI_3_FLASH_LATEST = "gemini-3-flash-latest",
|
|
194
200
|
GEMINI_2_5_PRO = "gemini-2.5-pro",
|
|
195
201
|
GEMINI_2_5_FLASH = "gemini-2.5-flash",
|
|
196
202
|
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
|
|
@@ -208,6 +214,8 @@ export declare enum VertexModels {
|
|
|
208
214
|
export declare enum GoogleAIModels {
|
|
209
215
|
GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
|
|
210
216
|
GEMINI_3_PRO_IMAGE_PREVIEW = "gemini-3-pro-image-preview",
|
|
217
|
+
GEMINI_3_FLASH = "gemini-3-flash",
|
|
218
|
+
GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
|
|
211
219
|
GEMINI_2_5_PRO = "gemini-2.5-pro",
|
|
212
220
|
GEMINI_2_5_FLASH = "gemini-2.5-flash",
|
|
213
221
|
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
|
|
@@ -269,6 +269,12 @@ export var VertexModels;
|
|
|
269
269
|
VertexModels["GEMINI_3_PRO_LATEST"] = "gemini-3-pro-latest";
|
|
270
270
|
/** Gemini 3 Pro Preview - Generic preview (legacy) */
|
|
271
271
|
VertexModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
|
|
272
|
+
/** Gemini 3 Flash - Base model with adaptive thinking */
|
|
273
|
+
VertexModels["GEMINI_3_FLASH"] = "gemini-3-flash";
|
|
274
|
+
/** Gemini 3 Flash Preview - Versioned preview */
|
|
275
|
+
VertexModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
|
|
276
|
+
/** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
|
|
277
|
+
VertexModels["GEMINI_3_FLASH_LATEST"] = "gemini-3-flash-latest";
|
|
272
278
|
// Gemini 2.5 Series (Latest - 2025)
|
|
273
279
|
VertexModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
|
|
274
280
|
VertexModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
|
|
@@ -291,6 +297,8 @@ export var GoogleAIModels;
|
|
|
291
297
|
// Gemini 3 Series
|
|
292
298
|
GoogleAIModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
|
|
293
299
|
GoogleAIModels["GEMINI_3_PRO_IMAGE_PREVIEW"] = "gemini-3-pro-image-preview";
|
|
300
|
+
GoogleAIModels["GEMINI_3_FLASH"] = "gemini-3-flash";
|
|
301
|
+
GoogleAIModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
|
|
294
302
|
// Gemini 2.5 Series
|
|
295
303
|
GoogleAIModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
|
|
296
304
|
GoogleAIModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
|
|
@@ -85,6 +85,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
|
|
|
85
85
|
readonly "gemini-3-pro-preview": 8192;
|
|
86
86
|
readonly "gemini-3-pro-preview-11-2025": 8192;
|
|
87
87
|
readonly "gemini-3-pro-latest": 8192;
|
|
88
|
+
readonly "gemini-3-flash": 65536;
|
|
89
|
+
readonly "gemini-3-flash-preview": 65536;
|
|
90
|
+
readonly "gemini-3-flash-latest": 65536;
|
|
88
91
|
readonly "gemini-2.5-pro": 8192;
|
|
89
92
|
readonly "gemini-2.5-flash": 8192;
|
|
90
93
|
readonly "gemini-2.5-flash-lite": 8192;
|
|
@@ -100,6 +103,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
|
|
|
100
103
|
readonly "gemini-3-pro-preview": 8192;
|
|
101
104
|
readonly "gemini-3-pro-preview-11-2025": 8192;
|
|
102
105
|
readonly "gemini-3-pro-latest": 8192;
|
|
106
|
+
readonly "gemini-3-flash": 65536;
|
|
107
|
+
readonly "gemini-3-flash-preview": 65536;
|
|
108
|
+
readonly "gemini-3-flash-latest": 65536;
|
|
103
109
|
readonly "gemini-2.5-pro": 8192;
|
|
104
110
|
readonly "gemini-2.5-flash": 8192;
|
|
105
111
|
readonly "gemini-2.5-flash-lite": 8192;
|
|
@@ -182,6 +188,25 @@ export declare const CONTEXT_WINDOWS: {
|
|
|
182
188
|
/** Maximum theoretical context */
|
|
183
189
|
readonly MAXIMUM: 2097152;
|
|
184
190
|
};
|
|
191
|
+
/**
|
|
192
|
+
* Model-specific token limits with input/output breakdown
|
|
193
|
+
* For models that require explicit input and output token limits
|
|
194
|
+
*/
|
|
195
|
+
export declare const MODEL_TOKEN_LIMITS: {
|
|
196
|
+
/** Gemini 3 Flash Series */
|
|
197
|
+
readonly "gemini-3-flash": {
|
|
198
|
+
readonly input: 1000000;
|
|
199
|
+
readonly output: 65536;
|
|
200
|
+
};
|
|
201
|
+
readonly "gemini-3-flash-preview": {
|
|
202
|
+
readonly input: 1000000;
|
|
203
|
+
readonly output: 65536;
|
|
204
|
+
};
|
|
205
|
+
readonly "gemini-3-flash-latest": {
|
|
206
|
+
readonly input: 1000000;
|
|
207
|
+
readonly output: 65536;
|
|
208
|
+
};
|
|
209
|
+
};
|
|
185
210
|
/**
|
|
186
211
|
* Token estimation utilities
|
|
187
212
|
* Rough estimates for token counting without full tokenization
|
|
@@ -89,6 +89,10 @@ export const PROVIDER_TOKEN_LIMITS = {
|
|
|
89
89
|
"gemini-3-pro-preview": 8192,
|
|
90
90
|
"gemini-3-pro-preview-11-2025": 8192,
|
|
91
91
|
"gemini-3-pro-latest": 8192,
|
|
92
|
+
// Gemini 3 Flash Series
|
|
93
|
+
"gemini-3-flash": 65536,
|
|
94
|
+
"gemini-3-flash-preview": 65536,
|
|
95
|
+
"gemini-3-flash-latest": 65536,
|
|
92
96
|
// Gemini 2.5 Series
|
|
93
97
|
"gemini-2.5-pro": 8192,
|
|
94
98
|
"gemini-2.5-flash": 8192,
|
|
@@ -108,6 +112,10 @@ export const PROVIDER_TOKEN_LIMITS = {
|
|
|
108
112
|
"gemini-3-pro-preview": 8192,
|
|
109
113
|
"gemini-3-pro-preview-11-2025": 8192,
|
|
110
114
|
"gemini-3-pro-latest": 8192,
|
|
115
|
+
// Gemini 3 Flash Series
|
|
116
|
+
"gemini-3-flash": 65536,
|
|
117
|
+
"gemini-3-flash-preview": 65536,
|
|
118
|
+
"gemini-3-flash-latest": 65536,
|
|
111
119
|
// Gemini 2.5 Series
|
|
112
120
|
"gemini-2.5-pro": 8192,
|
|
113
121
|
"gemini-2.5-flash": 8192,
|
|
@@ -201,6 +209,16 @@ export const CONTEXT_WINDOWS = {
|
|
|
201
209
|
/** Maximum theoretical context */
|
|
202
210
|
MAXIMUM: 2097152, // 2M - Maximum context
|
|
203
211
|
};
|
|
212
|
+
/**
|
|
213
|
+
* Model-specific token limits with input/output breakdown
|
|
214
|
+
* For models that require explicit input and output token limits
|
|
215
|
+
*/
|
|
216
|
+
export const MODEL_TOKEN_LIMITS = {
|
|
217
|
+
/** Gemini 3 Flash Series */
|
|
218
|
+
"gemini-3-flash": { input: 1000000, output: 65536 },
|
|
219
|
+
"gemini-3-flash-preview": { input: 1000000, output: 65536 },
|
|
220
|
+
"gemini-3-flash-latest": { input: 1000000, output: 65536 },
|
|
221
|
+
};
|
|
204
222
|
/**
|
|
205
223
|
* Token estimation utilities
|
|
206
224
|
* Rough estimates for token counting without full tokenization
|