@juspay/neurolink 8.26.0 → 8.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +47 -25
  3. package/dist/adapters/providerImageAdapter.js +11 -0
  4. package/dist/cli/commands/config.js +16 -23
  5. package/dist/cli/commands/setup-anthropic.js +3 -26
  6. package/dist/cli/commands/setup-azure.js +3 -22
  7. package/dist/cli/commands/setup-bedrock.js +3 -26
  8. package/dist/cli/commands/setup-google-ai.js +3 -22
  9. package/dist/cli/commands/setup-mistral.js +3 -31
  10. package/dist/cli/commands/setup-openai.js +3 -22
  11. package/dist/cli/factories/commandFactory.js +32 -0
  12. package/dist/cli/factories/ollamaCommandFactory.js +5 -17
  13. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  14. package/dist/cli/loop/optionsSchema.js +13 -0
  15. package/dist/config/modelSpecificPrompts.d.ts +9 -0
  16. package/dist/config/modelSpecificPrompts.js +38 -0
  17. package/dist/constants/enums.d.ts +8 -0
  18. package/dist/constants/enums.js +8 -0
  19. package/dist/constants/tokens.d.ts +25 -0
  20. package/dist/constants/tokens.js +18 -0
  21. package/dist/core/analytics.js +7 -28
  22. package/dist/core/baseProvider.js +1 -0
  23. package/dist/core/constants.d.ts +1 -0
  24. package/dist/core/constants.js +1 -0
  25. package/dist/core/modules/GenerationHandler.js +43 -5
  26. package/dist/core/streamAnalytics.d.ts +1 -0
  27. package/dist/core/streamAnalytics.js +8 -16
  28. package/dist/lib/adapters/providerImageAdapter.js +11 -0
  29. package/dist/lib/config/modelSpecificPrompts.d.ts +9 -0
  30. package/dist/lib/config/modelSpecificPrompts.js +39 -0
  31. package/dist/lib/constants/enums.d.ts +8 -0
  32. package/dist/lib/constants/enums.js +8 -0
  33. package/dist/lib/constants/tokens.d.ts +25 -0
  34. package/dist/lib/constants/tokens.js +18 -0
  35. package/dist/lib/core/analytics.js +7 -28
  36. package/dist/lib/core/baseProvider.js +1 -0
  37. package/dist/lib/core/constants.d.ts +1 -0
  38. package/dist/lib/core/constants.js +1 -0
  39. package/dist/lib/core/modules/GenerationHandler.js +43 -5
  40. package/dist/lib/core/streamAnalytics.d.ts +1 -0
  41. package/dist/lib/core/streamAnalytics.js +8 -16
  42. package/dist/lib/providers/googleAiStudio.d.ts +15 -0
  43. package/dist/lib/providers/googleAiStudio.js +659 -3
  44. package/dist/lib/providers/googleVertex.d.ts +25 -0
  45. package/dist/lib/providers/googleVertex.js +978 -3
  46. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  47. package/dist/lib/types/analytics.d.ts +4 -0
  48. package/dist/lib/types/cli.d.ts +16 -0
  49. package/dist/lib/types/conversation.d.ts +72 -4
  50. package/dist/lib/types/conversation.js +30 -0
  51. package/dist/lib/types/generateTypes.d.ts +135 -0
  52. package/dist/lib/types/groundingTypes.d.ts +231 -0
  53. package/dist/lib/types/groundingTypes.js +12 -0
  54. package/dist/lib/types/providers.d.ts +29 -0
  55. package/dist/lib/types/streamTypes.d.ts +54 -0
  56. package/dist/lib/utils/analyticsUtils.js +22 -2
  57. package/dist/lib/utils/errorHandling.d.ts +65 -0
  58. package/dist/lib/utils/errorHandling.js +268 -0
  59. package/dist/lib/utils/modelChoices.d.ts +82 -0
  60. package/dist/lib/utils/modelChoices.js +402 -0
  61. package/dist/lib/utils/modelDetection.d.ts +9 -0
  62. package/dist/lib/utils/modelDetection.js +81 -0
  63. package/dist/lib/utils/parameterValidation.d.ts +59 -1
  64. package/dist/lib/utils/parameterValidation.js +196 -0
  65. package/dist/lib/utils/schemaConversion.d.ts +12 -0
  66. package/dist/lib/utils/schemaConversion.js +90 -0
  67. package/dist/lib/utils/thinkingConfig.d.ts +108 -0
  68. package/dist/lib/utils/thinkingConfig.js +105 -0
  69. package/dist/lib/utils/tokenUtils.d.ts +124 -0
  70. package/dist/lib/utils/tokenUtils.js +240 -0
  71. package/dist/lib/utils/transformationUtils.js +15 -26
  72. package/dist/providers/googleAiStudio.d.ts +15 -0
  73. package/dist/providers/googleAiStudio.js +659 -3
  74. package/dist/providers/googleVertex.d.ts +25 -0
  75. package/dist/providers/googleVertex.js +978 -3
  76. package/dist/types/analytics.d.ts +4 -0
  77. package/dist/types/cli.d.ts +16 -0
  78. package/dist/types/conversation.d.ts +72 -4
  79. package/dist/types/conversation.js +30 -0
  80. package/dist/types/generateTypes.d.ts +135 -0
  81. package/dist/types/groundingTypes.d.ts +231 -0
  82. package/dist/types/groundingTypes.js +11 -0
  83. package/dist/types/providers.d.ts +29 -0
  84. package/dist/types/streamTypes.d.ts +54 -0
  85. package/dist/utils/analyticsUtils.js +22 -2
  86. package/dist/utils/errorHandling.d.ts +65 -0
  87. package/dist/utils/errorHandling.js +268 -0
  88. package/dist/utils/modelChoices.d.ts +82 -0
  89. package/dist/utils/modelChoices.js +401 -0
  90. package/dist/utils/modelDetection.d.ts +9 -0
  91. package/dist/utils/modelDetection.js +80 -0
  92. package/dist/utils/parameterValidation.d.ts +59 -1
  93. package/dist/utils/parameterValidation.js +196 -0
  94. package/dist/utils/schemaConversion.d.ts +12 -0
  95. package/dist/utils/schemaConversion.js +90 -0
  96. package/dist/utils/thinkingConfig.d.ts +108 -0
  97. package/dist/utils/thinkingConfig.js +104 -0
  98. package/dist/utils/tokenUtils.d.ts +124 -0
  99. package/dist/utils/tokenUtils.js +239 -0
  100. package/dist/utils/transformationUtils.js +15 -26
  101. package/package.json +4 -3
@@ -65,5 +65,18 @@ export const textGenerationOptionsSchema = {
65
65
  type: "boolean",
66
66
  description: "Enable or disable automatic conversation summarization for this request.",
67
67
  },
68
+ thinking: {
69
+ type: "boolean",
70
+ description: "Enable extended thinking/reasoning capability.",
71
+ },
72
+ thinkingBudget: {
73
+ type: "number",
74
+ description: "Token budget for thinking (Anthropic models: 5000-100000).",
75
+ },
76
+ thinkingLevel: {
77
+ type: "string",
78
+ description: "Thinking level for Gemini 3 models: minimal, low, medium, high.",
79
+ allowedValues: ["minimal", "low", "medium", "high"],
80
+ },
68
81
  };
69
82
  //# sourceMappingURL=optionsSchema.js.map
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Model-specific prompt configurations and enhancement utilities
3
+ */
4
+ import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
5
+ export { isGemini3Model, isGemini25Model };
6
+ export declare const MODEL_SPECIFIC_INSTRUCTIONS: Record<string, string>;
7
+ export declare function getModelSpecificInstructions(model: string): string;
8
+ export declare function enhancePromptForModel(basePrompt: string, model: string, _provider?: string): string;
9
+ export declare function shouldEnhancePrompt(model: string): boolean;
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Model-specific prompt configurations and enhancement utilities
3
+ */
4
+ import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
5
+ // Re-export from modelDetection for backwards compatibility
6
+ export { isGemini3Model, isGemini25Model };
7
+ export const MODEL_SPECIFIC_INSTRUCTIONS = {
8
+ "gemini-3": `You have access to extended thinking capabilities. Use them for complex reasoning tasks that require deep analysis.`,
9
+ "gemini-2.5": `You support function calling and structured outputs. Format responses according to the requested schema when provided.`,
10
+ "gpt-4": `You are a helpful assistant with strong reasoning capabilities.`,
11
+ "claude-3": `You have extended thinking capabilities available when enabled. Use systematic reasoning for complex problems.`,
12
+ default: "",
13
+ };
14
+ export function getModelSpecificInstructions(model) {
15
+ if (isGemini3Model(model)) {
16
+ return MODEL_SPECIFIC_INSTRUCTIONS["gemini-3"];
17
+ }
18
+ if (isGemini25Model(model)) {
19
+ return MODEL_SPECIFIC_INSTRUCTIONS["gemini-2.5"];
20
+ }
21
+ if (/^gpt-4/i.test(model)) {
22
+ return MODEL_SPECIFIC_INSTRUCTIONS["gpt-4"];
23
+ }
24
+ if (/^claude-3/i.test(model)) {
25
+ return MODEL_SPECIFIC_INSTRUCTIONS["claude-3"];
26
+ }
27
+ return MODEL_SPECIFIC_INSTRUCTIONS["default"];
28
+ }
29
+ export function enhancePromptForModel(basePrompt, model, _provider) {
30
+ const modelInstructions = getModelSpecificInstructions(model);
31
+ if (!modelInstructions) {
32
+ return basePrompt;
33
+ }
34
+ return `${modelInstructions}\n\n${basePrompt}`;
35
+ }
36
+ export function shouldEnhancePrompt(model) {
37
+ return isGemini3Model(model) || isGemini25Model(model);
38
+ }
@@ -191,6 +191,12 @@ export declare enum VertexModels {
191
191
  GEMINI_3_PRO_LATEST = "gemini-3-pro-latest",
192
192
  /** Gemini 3 Pro Preview - Generic preview (legacy) */
193
193
  GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
194
+ /** Gemini 3 Flash - Base model with adaptive thinking */
195
+ GEMINI_3_FLASH = "gemini-3-flash",
196
+ /** Gemini 3 Flash Preview - Versioned preview */
197
+ GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
198
+ /** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
199
+ GEMINI_3_FLASH_LATEST = "gemini-3-flash-latest",
194
200
  GEMINI_2_5_PRO = "gemini-2.5-pro",
195
201
  GEMINI_2_5_FLASH = "gemini-2.5-flash",
196
202
  GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
@@ -208,6 +214,8 @@ export declare enum VertexModels {
208
214
  export declare enum GoogleAIModels {
209
215
  GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
210
216
  GEMINI_3_PRO_IMAGE_PREVIEW = "gemini-3-pro-image-preview",
217
+ GEMINI_3_FLASH = "gemini-3-flash",
218
+ GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
211
219
  GEMINI_2_5_PRO = "gemini-2.5-pro",
212
220
  GEMINI_2_5_FLASH = "gemini-2.5-flash",
213
221
  GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
@@ -269,6 +269,12 @@ export var VertexModels;
269
269
  VertexModels["GEMINI_3_PRO_LATEST"] = "gemini-3-pro-latest";
270
270
  /** Gemini 3 Pro Preview - Generic preview (legacy) */
271
271
  VertexModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
272
+ /** Gemini 3 Flash - Base model with adaptive thinking */
273
+ VertexModels["GEMINI_3_FLASH"] = "gemini-3-flash";
274
+ /** Gemini 3 Flash Preview - Versioned preview */
275
+ VertexModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
276
+ /** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
277
+ VertexModels["GEMINI_3_FLASH_LATEST"] = "gemini-3-flash-latest";
272
278
  // Gemini 2.5 Series (Latest - 2025)
273
279
  VertexModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
274
280
  VertexModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
@@ -291,6 +297,8 @@ export var GoogleAIModels;
291
297
  // Gemini 3 Series
292
298
  GoogleAIModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
293
299
  GoogleAIModels["GEMINI_3_PRO_IMAGE_PREVIEW"] = "gemini-3-pro-image-preview";
300
+ GoogleAIModels["GEMINI_3_FLASH"] = "gemini-3-flash";
301
+ GoogleAIModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
294
302
  // Gemini 2.5 Series
295
303
  GoogleAIModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
296
304
  GoogleAIModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
@@ -85,6 +85,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
85
85
  readonly "gemini-3-pro-preview": 8192;
86
86
  readonly "gemini-3-pro-preview-11-2025": 8192;
87
87
  readonly "gemini-3-pro-latest": 8192;
88
+ readonly "gemini-3-flash": 65536;
89
+ readonly "gemini-3-flash-preview": 65536;
90
+ readonly "gemini-3-flash-latest": 65536;
88
91
  readonly "gemini-2.5-pro": 8192;
89
92
  readonly "gemini-2.5-flash": 8192;
90
93
  readonly "gemini-2.5-flash-lite": 8192;
@@ -100,6 +103,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
100
103
  readonly "gemini-3-pro-preview": 8192;
101
104
  readonly "gemini-3-pro-preview-11-2025": 8192;
102
105
  readonly "gemini-3-pro-latest": 8192;
106
+ readonly "gemini-3-flash": 65536;
107
+ readonly "gemini-3-flash-preview": 65536;
108
+ readonly "gemini-3-flash-latest": 65536;
103
109
  readonly "gemini-2.5-pro": 8192;
104
110
  readonly "gemini-2.5-flash": 8192;
105
111
  readonly "gemini-2.5-flash-lite": 8192;
@@ -182,6 +188,25 @@ export declare const CONTEXT_WINDOWS: {
182
188
  /** Maximum theoretical context */
183
189
  readonly MAXIMUM: 2097152;
184
190
  };
191
+ /**
192
+ * Model-specific token limits with input/output breakdown
193
+ * For models that require explicit input and output token limits
194
+ */
195
+ export declare const MODEL_TOKEN_LIMITS: {
196
+ /** Gemini 3 Flash Series */
197
+ readonly "gemini-3-flash": {
198
+ readonly input: 1000000;
199
+ readonly output: 65536;
200
+ };
201
+ readonly "gemini-3-flash-preview": {
202
+ readonly input: 1000000;
203
+ readonly output: 65536;
204
+ };
205
+ readonly "gemini-3-flash-latest": {
206
+ readonly input: 1000000;
207
+ readonly output: 65536;
208
+ };
209
+ };
185
210
  /**
186
211
  * Token estimation utilities
187
212
  * Rough estimates for token counting without full tokenization
@@ -89,6 +89,10 @@ export const PROVIDER_TOKEN_LIMITS = {
89
89
  "gemini-3-pro-preview": 8192,
90
90
  "gemini-3-pro-preview-11-2025": 8192,
91
91
  "gemini-3-pro-latest": 8192,
92
+ // Gemini 3 Flash Series
93
+ "gemini-3-flash": 65536,
94
+ "gemini-3-flash-preview": 65536,
95
+ "gemini-3-flash-latest": 65536,
92
96
  // Gemini 2.5 Series
93
97
  "gemini-2.5-pro": 8192,
94
98
  "gemini-2.5-flash": 8192,
@@ -108,6 +112,10 @@ export const PROVIDER_TOKEN_LIMITS = {
108
112
  "gemini-3-pro-preview": 8192,
109
113
  "gemini-3-pro-preview-11-2025": 8192,
110
114
  "gemini-3-pro-latest": 8192,
115
+ // Gemini 3 Flash Series
116
+ "gemini-3-flash": 65536,
117
+ "gemini-3-flash-preview": 65536,
118
+ "gemini-3-flash-latest": 65536,
111
119
  // Gemini 2.5 Series
112
120
  "gemini-2.5-pro": 8192,
113
121
  "gemini-2.5-flash": 8192,
@@ -201,6 +209,16 @@ export const CONTEXT_WINDOWS = {
201
209
  /** Maximum theoretical context */
202
210
  MAXIMUM: 2097152, // 2M - Maximum context
203
211
  };
212
+ /**
213
+ * Model-specific token limits with input/output breakdown
214
+ * For models that require explicit input and output token limits
215
+ */
216
+ export const MODEL_TOKEN_LIMITS = {
217
+ /** Gemini 3 Flash Series */
218
+ "gemini-3-flash": { input: 1000000, output: 65536 },
219
+ "gemini-3-flash-preview": { input: 1000000, output: 65536 },
220
+ "gemini-3-flash-latest": { input: 1000000, output: 65536 },
221
+ };
204
222
  /**
205
223
  * Token estimation utilities
206
224
  * Rough estimates for token counting without full tokenization
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { logger } from "../utils/logger.js";
8
8
  import { modelConfig } from "./modelConfiguration.js";
9
+ import { extractTokenUsage as extractTokenUsageUtil } from "../utils/tokenUtils.js";
9
10
  /**
10
11
  * Create analytics data structure from AI response
11
12
  */
@@ -49,36 +50,14 @@ export function createAnalytics(provider, model, result, responseTime, context)
49
50
  }
50
51
  /**
51
52
  * Extract token usage from various AI result formats
53
+ * Delegates to centralized tokenUtils for consistent extraction across providers
52
54
  */
53
55
  function extractTokenUsage(result) {
54
- // Use properly typed usage object from BaseProvider or direct AI SDK
55
- if (result.usage &&
56
- typeof result.usage === "object" &&
57
- result.usage !== null) {
58
- const usage = result.usage;
59
- // Try BaseProvider normalized format first (input/output/total)
60
- if (typeof usage.input === "number" || typeof usage.output === "number") {
61
- const input = typeof usage.input === "number" ? usage.input : 0;
62
- const output = typeof usage.output === "number" ? usage.output : 0;
63
- const total = typeof usage.total === "number" ? usage.total : input + output;
64
- return { input, output, total };
65
- }
66
- // Try OpenAI/Mistral format (promptTokens/completionTokens)
67
- if (typeof usage.promptTokens === "number" ||
68
- typeof usage.completionTokens === "number") {
69
- const input = typeof usage.promptTokens === "number" ? usage.promptTokens : 0;
70
- const output = typeof usage.completionTokens === "number" ? usage.completionTokens : 0;
71
- const total = typeof usage.total === "number" ? usage.total : input + output;
72
- return { input, output, total };
73
- }
74
- // Handle total-only case
75
- if (typeof usage.total === "number") {
76
- return { input: 0, output: 0, total: usage.total };
77
- }
78
- }
79
- // Fallback for edge cases
80
- logger.debug("Token extraction failed: unknown usage format", { result });
81
- return { input: 0, output: 0, total: 0 };
56
+ // Use centralized token extraction utility
57
+ // The utility handles nested usage objects, multiple provider formats,
58
+ // cache tokens, reasoning tokens, and cache savings calculation
59
+ // Cast result to allow extractTokenUsageUtil to handle type normalization
60
+ return extractTokenUsageUtil(result.usage);
82
61
  }
83
62
  /**
84
63
  * Estimate cost based on provider, model, and token usage
@@ -126,6 +126,7 @@ export class BaseProvider {
126
126
  maxSteps: options.maxSteps || 5,
127
127
  provider: options.provider,
128
128
  model: options.model,
129
+ region: options.region, // Pass region for Vertex AI
129
130
  // 🔧 FIX: Include analytics and evaluation options from stream options
130
131
  enableAnalytics: options.enableAnalytics,
131
132
  enableEvaluation: options.enableEvaluation,
@@ -6,6 +6,7 @@ export declare const DEFAULT_MAX_TOKENS: undefined;
6
6
  export declare const DEFAULT_TEMPERATURE = 0.7;
7
7
  export declare const DEFAULT_TIMEOUT = 60000;
8
8
  export declare const DEFAULT_MAX_STEPS = 200;
9
+ export declare const DEFAULT_TOOL_MAX_RETRIES = 2;
9
10
  export declare const STEP_LIMITS: {
10
11
  min: number;
11
12
  max: number;
@@ -7,6 +7,7 @@ export const DEFAULT_MAX_TOKENS = undefined; // Unlimited by default - let provi
7
7
  export const DEFAULT_TEMPERATURE = 0.7;
8
8
  export const DEFAULT_TIMEOUT = 60000;
9
9
  export const DEFAULT_MAX_STEPS = 200;
10
+ export const DEFAULT_TOOL_MAX_RETRIES = 2; // Maximum retries per tool before permanently failing
10
11
  // Step execution limits
11
12
  export const STEP_LIMITS = {
12
13
  min: 1,
@@ -14,6 +14,7 @@
14
14
  */
15
15
  import { generateText, Output, NoObjectGeneratedError } from "ai";
16
16
  import { logger } from "../../utils/logger.js";
17
+ import { extractTokenUsage } from "../../utils/tokenUtils.js";
17
18
  import { DEFAULT_MAX_STEPS } from "../constants.js";
18
19
  /**
19
20
  * GenerationHandler class - Handles text generation operations for AI providers
@@ -36,6 +37,10 @@ export class GenerationHandler {
36
37
  * @private
37
38
  */
38
39
  async callGenerateText(model, messages, tools, options, shouldUseTools, includeStructuredOutput) {
40
+ // Check if this is a Google provider (for provider-specific options)
41
+ const isGoogleProvider = this.providerName === "google-ai" || this.providerName === "vertex";
42
+ // Check if this is an Anthropic provider
43
+ const isAnthropicProvider = this.providerName === "anthropic" || this.providerName === "bedrock";
39
44
  const useStructuredOutput = includeStructuredOutput &&
40
45
  !!options.schema &&
41
46
  (options.output?.format === "json" ||
@@ -52,6 +57,39 @@ export class GenerationHandler {
52
57
  options.schema && {
53
58
  experimental_output: Output.object({ schema: options.schema }),
54
59
  }),
60
+ // Add thinking configuration for extended reasoning
61
+ // Gemini 3 models use providerOptions.google.thinkingConfig with thinkingLevel
62
+ // Gemini 2.5 models use thinkingBudget
63
+ // Anthropic models use experimental_thinking with budgetTokens
64
+ ...(options.thinkingConfig?.enabled && {
65
+ // For Anthropic: experimental_thinking with budgetTokens
66
+ ...(isAnthropicProvider &&
67
+ options.thinkingConfig.budgetTokens &&
68
+ !options.thinkingConfig.thinkingLevel && {
69
+ experimental_thinking: {
70
+ type: "enabled",
71
+ budgetTokens: options.thinkingConfig.budgetTokens,
72
+ },
73
+ }),
74
+ // For Google Gemini 3: providerOptions with thinkingLevel
75
+ // For Gemini 2.5: providerOptions with thinkingBudget
76
+ ...(isGoogleProvider && {
77
+ providerOptions: {
78
+ google: {
79
+ thinkingConfig: {
80
+ ...(options.thinkingConfig.thinkingLevel && {
81
+ thinkingLevel: options.thinkingConfig.thinkingLevel,
82
+ }),
83
+ ...(options.thinkingConfig.budgetTokens &&
84
+ !options.thinkingConfig.thinkingLevel && {
85
+ thinkingBudget: options.thinkingConfig.budgetTokens,
86
+ }),
87
+ includeThoughts: true,
88
+ },
89
+ },
90
+ },
91
+ }),
92
+ }),
55
93
  experimental_telemetry: this.getTelemetryConfigFn(options, "generate"),
56
94
  onStepFinish: ({ toolCalls, toolResults }) => {
57
95
  logger.info("Tool execution completed", { toolResults, toolCalls });
@@ -227,13 +265,13 @@ export class GenerationHandler {
227
265
  else {
228
266
  content = generateResult.text;
229
267
  }
268
+ // Extract usage with support for different formats and reasoning tokens
269
+ // Note: The AI SDK bundles thinking tokens into promptTokens for Google models.
270
+ // Separate reasoningTokens tracking will work when/if the AI SDK adds support.
271
+ const usage = extractTokenUsage(generateResult.usage);
230
272
  return {
231
273
  content,
232
- usage: {
233
- input: generateResult.usage?.promptTokens || 0,
234
- output: generateResult.usage?.completionTokens || 0,
235
- total: generateResult.usage?.totalTokens || 0,
236
- },
274
+ usage,
237
275
  provider: this.providerName,
238
276
  model: this.modelName,
239
277
  toolCalls: generateResult.toolCalls
@@ -6,6 +6,7 @@ import type { StreamTextResult, StreamAnalyticsCollector, ResponseMetadata } fro
6
6
  export declare class BaseStreamAnalyticsCollector implements StreamAnalyticsCollector {
7
7
  /**
8
8
  * Collect token usage from stream result
9
+ * Uses centralized tokenUtils for consistent extraction across providers
9
10
  */
10
11
  collectUsage(result: StreamTextResult): Promise<TokenUsage>;
11
12
  /**
@@ -1,37 +1,29 @@
1
1
  import { createAnalytics } from "./analytics.js";
2
2
  import { logger } from "../utils/logger.js";
3
+ import { extractTokenUsage, createEmptyTokenUsage, } from "../utils/tokenUtils.js";
3
4
  /**
4
5
  * Base implementation for collecting analytics from Vercel AI SDK stream results
5
6
  */
6
7
  export class BaseStreamAnalyticsCollector {
7
8
  /**
8
9
  * Collect token usage from stream result
10
+ * Uses centralized tokenUtils for consistent extraction across providers
9
11
  */
10
12
  async collectUsage(result) {
11
13
  try {
12
14
  const usage = await result.usage;
13
15
  if (!usage) {
14
16
  logger.debug("No usage data available from stream result");
15
- return {
16
- input: 0,
17
- output: 0,
18
- total: 0,
19
- };
17
+ return createEmptyTokenUsage();
20
18
  }
21
- return {
22
- input: usage.promptTokens || 0,
23
- output: usage.completionTokens || 0,
24
- total: usage.totalTokens ||
25
- (usage.promptTokens || 0) + (usage.completionTokens || 0),
26
- };
19
+ // Use centralized token extraction utility
20
+ // Handles multiple provider formats, cache tokens, reasoning tokens,
21
+ // and cache savings calculation
22
+ return extractTokenUsage(usage);
27
23
  }
28
24
  catch (error) {
29
25
  logger.warn("Failed to collect usage from stream result", { error });
30
- return {
31
- input: 0,
32
- output: 0,
33
- total: 0,
34
- };
26
+ return createEmptyTokenUsage();
35
27
  }
36
28
  }
37
29
  /**
@@ -77,6 +77,10 @@ const VISION_CAPABILITIES = {
77
77
  "gemini-3-pro-preview-11-2025",
78
78
  "gemini-3-pro-latest",
79
79
  "gemini-3-pro-image-preview",
80
+ // Gemini 3 Flash Series
81
+ "gemini-3-flash",
82
+ "gemini-3-flash-preview",
83
+ "gemini-3-flash-latest",
80
84
  // Gemini 2.5 Series
81
85
  "gemini-2.5-pro",
82
86
  "gemini-2.5-flash",
@@ -151,6 +155,10 @@ const VISION_CAPABILITIES = {
151
155
  "gemini-3-pro-latest",
152
156
  "gemini-3-pro-preview",
153
157
  "gemini-3-pro",
158
+ // Gemini 3 Flash Series on Vertex AI
159
+ "gemini-3-flash",
160
+ "gemini-3-flash-preview",
161
+ "gemini-3-flash-latest",
154
162
  // Gemini 2.5 models on Vertex AI
155
163
  "gemini-2.5-pro",
156
164
  "gemini-2.5-flash",
@@ -226,6 +234,9 @@ const VISION_CAPABILITIES = {
226
234
  "gemini/gemini-2.0-flash",
227
235
  "gemini-3-pro-preview",
228
236
  "gemini-3-pro-latest",
237
+ "gemini-3-flash",
238
+ "gemini-3-flash-preview",
239
+ "gemini-3-flash-latest",
229
240
  "gemini-2.5-pro",
230
241
  "gemini-2.5-flash",
231
242
  "gemini-2.0-flash-lite",
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Model-specific prompt configurations and enhancement utilities
3
+ */
4
+ import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
5
+ export { isGemini3Model, isGemini25Model };
6
+ export declare const MODEL_SPECIFIC_INSTRUCTIONS: Record<string, string>;
7
+ export declare function getModelSpecificInstructions(model: string): string;
8
+ export declare function enhancePromptForModel(basePrompt: string, model: string, _provider?: string): string;
9
+ export declare function shouldEnhancePrompt(model: string): boolean;
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Model-specific prompt configurations and enhancement utilities
3
+ */
4
+ import { isGemini3Model, isGemini25Model } from "../utils/modelDetection.js";
5
+ // Re-export from modelDetection for backwards compatibility
6
+ export { isGemini3Model, isGemini25Model };
7
+ export const MODEL_SPECIFIC_INSTRUCTIONS = {
8
+ "gemini-3": `You have access to extended thinking capabilities. Use them for complex reasoning tasks that require deep analysis.`,
9
+ "gemini-2.5": `You support function calling and structured outputs. Format responses according to the requested schema when provided.`,
10
+ "gpt-4": `You are a helpful assistant with strong reasoning capabilities.`,
11
+ "claude-3": `You have extended thinking capabilities available when enabled. Use systematic reasoning for complex problems.`,
12
+ default: "",
13
+ };
14
+ export function getModelSpecificInstructions(model) {
15
+ if (isGemini3Model(model)) {
16
+ return MODEL_SPECIFIC_INSTRUCTIONS["gemini-3"];
17
+ }
18
+ if (isGemini25Model(model)) {
19
+ return MODEL_SPECIFIC_INSTRUCTIONS["gemini-2.5"];
20
+ }
21
+ if (/^gpt-4/i.test(model)) {
22
+ return MODEL_SPECIFIC_INSTRUCTIONS["gpt-4"];
23
+ }
24
+ if (/^claude-3/i.test(model)) {
25
+ return MODEL_SPECIFIC_INSTRUCTIONS["claude-3"];
26
+ }
27
+ return MODEL_SPECIFIC_INSTRUCTIONS["default"];
28
+ }
29
+ export function enhancePromptForModel(basePrompt, model, _provider) {
30
+ const modelInstructions = getModelSpecificInstructions(model);
31
+ if (!modelInstructions) {
32
+ return basePrompt;
33
+ }
34
+ return `${modelInstructions}\n\n${basePrompt}`;
35
+ }
36
+ export function shouldEnhancePrompt(model) {
37
+ return isGemini3Model(model) || isGemini25Model(model);
38
+ }
39
+ //# sourceMappingURL=modelSpecificPrompts.js.map
@@ -191,6 +191,12 @@ export declare enum VertexModels {
191
191
  GEMINI_3_PRO_LATEST = "gemini-3-pro-latest",
192
192
  /** Gemini 3 Pro Preview - Generic preview (legacy) */
193
193
  GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
194
+ /** Gemini 3 Flash - Base model with adaptive thinking */
195
+ GEMINI_3_FLASH = "gemini-3-flash",
196
+ /** Gemini 3 Flash Preview - Versioned preview */
197
+ GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
198
+ /** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
199
+ GEMINI_3_FLASH_LATEST = "gemini-3-flash-latest",
194
200
  GEMINI_2_5_PRO = "gemini-2.5-pro",
195
201
  GEMINI_2_5_FLASH = "gemini-2.5-flash",
196
202
  GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
@@ -208,6 +214,8 @@ export declare enum VertexModels {
208
214
  export declare enum GoogleAIModels {
209
215
  GEMINI_3_PRO_PREVIEW = "gemini-3-pro-preview",
210
216
  GEMINI_3_PRO_IMAGE_PREVIEW = "gemini-3-pro-image-preview",
217
+ GEMINI_3_FLASH = "gemini-3-flash",
218
+ GEMINI_3_FLASH_PREVIEW = "gemini-3-flash-preview",
211
219
  GEMINI_2_5_PRO = "gemini-2.5-pro",
212
220
  GEMINI_2_5_FLASH = "gemini-2.5-flash",
213
221
  GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
@@ -269,6 +269,12 @@ export var VertexModels;
269
269
  VertexModels["GEMINI_3_PRO_LATEST"] = "gemini-3-pro-latest";
270
270
  /** Gemini 3 Pro Preview - Generic preview (legacy) */
271
271
  VertexModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
272
+ /** Gemini 3 Flash - Base model with adaptive thinking */
273
+ VertexModels["GEMINI_3_FLASH"] = "gemini-3-flash";
274
+ /** Gemini 3 Flash Preview - Versioned preview */
275
+ VertexModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
276
+ /** Gemini 3 Flash Latest - Auto-updated alias (always points to latest preview) */
277
+ VertexModels["GEMINI_3_FLASH_LATEST"] = "gemini-3-flash-latest";
272
278
  // Gemini 2.5 Series (Latest - 2025)
273
279
  VertexModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
274
280
  VertexModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
@@ -291,6 +297,8 @@ export var GoogleAIModels;
291
297
  // Gemini 3 Series
292
298
  GoogleAIModels["GEMINI_3_PRO_PREVIEW"] = "gemini-3-pro-preview";
293
299
  GoogleAIModels["GEMINI_3_PRO_IMAGE_PREVIEW"] = "gemini-3-pro-image-preview";
300
+ GoogleAIModels["GEMINI_3_FLASH"] = "gemini-3-flash";
301
+ GoogleAIModels["GEMINI_3_FLASH_PREVIEW"] = "gemini-3-flash-preview";
294
302
  // Gemini 2.5 Series
295
303
  GoogleAIModels["GEMINI_2_5_PRO"] = "gemini-2.5-pro";
296
304
  GoogleAIModels["GEMINI_2_5_FLASH"] = "gemini-2.5-flash";
@@ -85,6 +85,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
85
85
  readonly "gemini-3-pro-preview": 8192;
86
86
  readonly "gemini-3-pro-preview-11-2025": 8192;
87
87
  readonly "gemini-3-pro-latest": 8192;
88
+ readonly "gemini-3-flash": 65536;
89
+ readonly "gemini-3-flash-preview": 65536;
90
+ readonly "gemini-3-flash-latest": 65536;
88
91
  readonly "gemini-2.5-pro": 8192;
89
92
  readonly "gemini-2.5-flash": 8192;
90
93
  readonly "gemini-2.5-flash-lite": 8192;
@@ -100,6 +103,9 @@ export declare const PROVIDER_TOKEN_LIMITS: {
100
103
  readonly "gemini-3-pro-preview": 8192;
101
104
  readonly "gemini-3-pro-preview-11-2025": 8192;
102
105
  readonly "gemini-3-pro-latest": 8192;
106
+ readonly "gemini-3-flash": 65536;
107
+ readonly "gemini-3-flash-preview": 65536;
108
+ readonly "gemini-3-flash-latest": 65536;
103
109
  readonly "gemini-2.5-pro": 8192;
104
110
  readonly "gemini-2.5-flash": 8192;
105
111
  readonly "gemini-2.5-flash-lite": 8192;
@@ -182,6 +188,25 @@ export declare const CONTEXT_WINDOWS: {
182
188
  /** Maximum theoretical context */
183
189
  readonly MAXIMUM: 2097152;
184
190
  };
191
+ /**
192
+ * Model-specific token limits with input/output breakdown
193
+ * For models that require explicit input and output token limits
194
+ */
195
+ export declare const MODEL_TOKEN_LIMITS: {
196
+ /** Gemini 3 Flash Series */
197
+ readonly "gemini-3-flash": {
198
+ readonly input: 1000000;
199
+ readonly output: 65536;
200
+ };
201
+ readonly "gemini-3-flash-preview": {
202
+ readonly input: 1000000;
203
+ readonly output: 65536;
204
+ };
205
+ readonly "gemini-3-flash-latest": {
206
+ readonly input: 1000000;
207
+ readonly output: 65536;
208
+ };
209
+ };
185
210
  /**
186
211
  * Token estimation utilities
187
212
  * Rough estimates for token counting without full tokenization
@@ -89,6 +89,10 @@ export const PROVIDER_TOKEN_LIMITS = {
89
89
  "gemini-3-pro-preview": 8192,
90
90
  "gemini-3-pro-preview-11-2025": 8192,
91
91
  "gemini-3-pro-latest": 8192,
92
+ // Gemini 3 Flash Series
93
+ "gemini-3-flash": 65536,
94
+ "gemini-3-flash-preview": 65536,
95
+ "gemini-3-flash-latest": 65536,
92
96
  // Gemini 2.5 Series
93
97
  "gemini-2.5-pro": 8192,
94
98
  "gemini-2.5-flash": 8192,
@@ -108,6 +112,10 @@ export const PROVIDER_TOKEN_LIMITS = {
108
112
  "gemini-3-pro-preview": 8192,
109
113
  "gemini-3-pro-preview-11-2025": 8192,
110
114
  "gemini-3-pro-latest": 8192,
115
+ // Gemini 3 Flash Series
116
+ "gemini-3-flash": 65536,
117
+ "gemini-3-flash-preview": 65536,
118
+ "gemini-3-flash-latest": 65536,
111
119
  // Gemini 2.5 Series
112
120
  "gemini-2.5-pro": 8192,
113
121
  "gemini-2.5-flash": 8192,
@@ -201,6 +209,16 @@ export const CONTEXT_WINDOWS = {
201
209
  /** Maximum theoretical context */
202
210
  MAXIMUM: 2097152, // 2M - Maximum context
203
211
  };
212
+ /**
213
+ * Model-specific token limits with input/output breakdown
214
+ * For models that require explicit input and output token limits
215
+ */
216
+ export const MODEL_TOKEN_LIMITS = {
217
+ /** Gemini 3 Flash Series */
218
+ "gemini-3-flash": { input: 1000000, output: 65536 },
219
+ "gemini-3-flash-preview": { input: 1000000, output: 65536 },
220
+ "gemini-3-flash-latest": { input: 1000000, output: 65536 },
221
+ };
204
222
  /**
205
223
  * Token estimation utilities
206
224
  * Rough estimates for token counting without full tokenization