@juspay/neurolink 9.9.0 → 9.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## [9.10.1](https://github.com/juspay/neurolink/compare/v9.10.0...v9.10.1) (2026-02-21)
2
+
3
+ ### Bug Fixes
4
+
5
+ - **(video-analysis):** add stream support for video analysis ([938aeef](https://github.com/juspay/neurolink/commit/938aeef876277360700d2a7192155af1f1316f28))
6
+
7
+ ## [9.10.0](https://github.com/juspay/neurolink/compare/v9.9.0...v9.10.0) (2026-02-20)
8
+
9
+ ### Features
10
+
11
+ - **(generateText):** add prepareStep and toolChoice passthrough support for multi-step agentic generation ([4cd340a](https://github.com/juspay/neurolink/commit/4cd340af7d39f72006d09fe86569232d751dcd8d))
12
+
1
13
  ## [9.9.0](https://github.com/juspay/neurolink/compare/v9.8.0...v9.9.0) (2026-02-17)
2
14
 
3
15
  ### Features
package/README.md CHANGED
@@ -35,17 +35,18 @@ Extracted from production systems at Juspay and battle-tested at enterprise scal
35
35
 
36
36
  ## What's New (Q1 2026)
37
37
 
38
- | Feature | Version | Description | Guide |
39
- | ----------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- |
40
- | **Context Window Management** | v9.2.0 | 4-stage compaction pipeline with auto-detection, budget gate at 80% usage, per-provider token estimation | [Context Compaction Guide](docs/features/context-compaction.md) |
41
- | **File Processor System** | v9.1.0 | 17+ file type processors with ProcessorRegistry, security sanitization, SVG text injection | [File Processors Guide](docs/features/file-processors.md) |
42
- | **RAG with generate()/stream()** | v9.2.0 | Pass `rag: { files }` to generate/stream for automatic document chunking, embedding, and AI-powered search. 10 chunking strategies, hybrid search, reranking. | [RAG Guide](docs/features/rag.md) |
43
- | **External TracerProvider Support** | v8.43.0 | Integrate NeuroLink with existing OpenTelemetry instrumentation. Prevents duplicate registration conflicts. | [Observability Guide](docs/features/observability.md) |
44
- | **Server Adapters** | v8.43.0 | Multi-framework HTTP server with Hono, Express, Fastify, Koa support. Full CLI for server management with foreground/background modes. | [Server Adapters Guide](docs/guides/server-adapters/index.md) |
45
- | **Title Generation Events** | v8.38.0 | Emit `conversation:titleGenerated` event when conversation title is generated. Supports custom title prompts via `NEUROLINK_TITLE_PROMPT`. | [Conversation Memory Guide](docs/conversation-memory.md) |
46
- | **Video Generation with Veo** | v8.32.0 | Video generation using Veo 3.1 (`veo-3.1`). Realistic video generation with many parameter options | [Video Generation Guide](docs/features/video-generation.md) |
47
- | **Image Generation with Gemini** | v8.31.0 | Native image generation using Gemini 2.0 Flash Experimental (`imagen-3.0-generate-002`). High-quality image synthesis directly from Google AI. | [Image Generation Guide](docs/image-generation-streaming.md) |
48
- | **HTTP/Streamable HTTP Transport** | v8.29.0 | Connect to remote MCP servers via HTTP with authentication headers, automatic retry with exponential backoff, and configurable rate limiting. | [HTTP Transport Guide](docs/mcp-http-transport.md) |
38
+ | Feature | Version | Description | Guide |
39
+ | ----------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------- |
40
+ | **Context Window Management** | v9.2.0 | 4-stage compaction pipeline with auto-detection, budget gate at 80% usage, per-provider token estimation | [Context Compaction Guide](docs/features/context-compaction.md) |
41
+ | **Tool Execution Control** | v9.3.0 | `prepareStep` and `toolChoice` support for per-step tool enforcement in multi-step agentic loops. API-level control over tool calls. | [API Reference](docs/api/type-aliases/GenerateOptions.md#preparestep) |
42
+ | **File Processor System** | v9.1.0 | 17+ file type processors with ProcessorRegistry, security sanitization, SVG text injection | [File Processors Guide](docs/features/file-processors.md) |
43
+ | **RAG with generate()/stream()** | v9.2.0 | Pass `rag: { files }` to generate/stream for automatic document chunking, embedding, and AI-powered search. 10 chunking strategies, hybrid search, reranking. | [RAG Guide](docs/features/rag.md) |
44
+ | **External TracerProvider Support** | v8.43.0 | Integrate NeuroLink with existing OpenTelemetry instrumentation. Prevents duplicate registration conflicts. | [Observability Guide](docs/features/observability.md) |
45
+ | **Server Adapters** | v8.43.0 | Multi-framework HTTP server with Hono, Express, Fastify, Koa support. Full CLI for server management with foreground/background modes. | [Server Adapters Guide](docs/guides/server-adapters/index.md) |
46
+ | **Title Generation Events** | v8.38.0 | Emit `conversation:titleGenerated` event when conversation title is generated. Supports custom title prompts via `NEUROLINK_TITLE_PROMPT`. | [Conversation Memory Guide](docs/conversation-memory.md) |
47
+ | **Video Generation with Veo** | v8.32.0 | Video generation using Veo 3.1 (`veo-3.1`). Realistic video generation with many parameter options | [Video Generation Guide](docs/features/video-generation.md) |
48
+ | **Image Generation with Gemini** | v8.31.0 | Native image generation using Gemini 2.0 Flash Experimental (`imagen-3.0-generate-002`). High-quality image synthesis directly from Google AI. | [Image Generation Guide](docs/image-generation-streaming.md) |
49
+ | **HTTP/Streamable HTTP Transport** | v8.29.0 | Connect to remote MCP servers via HTTP with authentication headers, automatic retry with exponential backoff, and configurable rate limiting. | [HTTP Transport Guide](docs/mcp-http-transport.md) |
49
50
 
50
51
  - **External TracerProvider Support** – Integrate NeuroLink with applications that already have OpenTelemetry instrumentation. Supports auto-detection and manual configuration. → [Observability Guide](docs/features/observability.md)
51
52
  - **Server Adapters** – Deploy NeuroLink as an HTTP API server with your framework of choice (Hono, Express, Fastify, Koa). Full CLI support with `serve` and `server` commands for foreground/background modes, route management, and OpenAPI generation. → [Server Adapters Guide](docs/guides/server-adapters/index.md)
@@ -56,6 +57,7 @@ Extracted from production systems at Juspay and battle-tested at enterprise scal
56
57
  - **RAG with generate()/stream()** – Just pass `rag: { files: ["./docs/guide.md"] }` to `generate()` or `stream()`. NeuroLink auto-chunks, embeds, and creates a search tool the AI can invoke. 10 chunking strategies, hybrid search, 5 reranker types. → [RAG Guide](docs/features/rag.md)
57
58
  - **HTTP/Streamable HTTP Transport for MCP** – Connect to remote MCP servers via HTTP with authentication headers, retry logic, and rate limiting. → [HTTP Transport Guide](docs/mcp-http-transport.md)
58
59
  - 🧠 **Gemini 3 Preview Support** - Full support for gemini-3-flash-preview and gemini-3-pro-preview with extended thinking capabilities
60
+ - 🎯 **Tool Execution Control** – Use `prepareStep` to enforce specific tool calls, change the LLM models per step in multi-step agentic executions. Prevents LLMs from skipping required tools. Use `toolChoice` for static control, or `prepareStep` for dynamic per-step logic. → [GenerateOptions Reference](docs/api/type-aliases/GenerateOptions.md#preparestep)
59
61
  - **Structured Output with Zod Schemas** – Type-safe JSON generation with automatic validation using `schema` + `output.format: "json"` in `generate()`. → [Structured Output Guide](docs/features/structured-output.md)
60
62
  - **CSV File Support** – Attach CSV files to prompts for AI-powered data analysis with auto-detection. → [CSV Guide](docs/features/multimodal-chat.md#csv-file-support)
61
63
  - **PDF File Support** – Process PDF documents with native visual analysis for Vertex AI, Anthropic, Bedrock, AI Studio. → [PDF Guide](docs/features/pdf-support.md)
@@ -8,16 +8,16 @@
8
8
  */
9
9
  import { AIProviderName } from "../../constants/enums.js";
10
10
  import type { CoreMessage } from "ai";
11
- export declare function analyzeVideoWithVertexAI(frames: CoreMessage, options?: {
11
+ export declare function analyzeVideoWithVertexAI(messages: CoreMessage[], options?: {
12
12
  project?: string;
13
13
  location?: string;
14
14
  model?: string;
15
15
  }): Promise<string>;
16
- export declare function analyzeVideoWithGeminiAPI(frames: CoreMessage, options?: {
16
+ export declare function analyzeVideoWithGeminiAPI(messages: CoreMessage[], options?: {
17
17
  apiKey?: string;
18
18
  model?: string;
19
19
  }): Promise<string>;
20
- export declare function analyzeVideo(frames: CoreMessage, options?: {
20
+ export declare function analyzeVideo(messages: CoreMessage[], options?: {
21
21
  provider?: AIProviderName;
22
22
  project?: string;
23
23
  location?: string;
@@ -9,23 +9,35 @@
9
9
  import { AIProviderName, ErrorSeverity, ErrorCategory, } from "../../constants/enums.js";
10
10
  import { logger } from "../../utils/logger.js";
11
11
  import { readFile } from "node:fs/promises";
12
- import { NeuroLinkError } from "../../utils/errorHandling.js";
12
+ import { NeuroLinkError, ErrorFactory } from "../../utils/errorHandling.js";
13
13
  // ---------------------------------------------------------------------------
14
14
  // Shared config
15
15
  // ---------------------------------------------------------------------------
16
16
  const DEFAULT_MODEL = "gemini-2.0-flash";
17
17
  const DEFAULT_LOCATION = "us-central1";
18
+ /**
19
+ * Extract content items from user messages
20
+ *
21
+ * @param messages - Array of CoreMessage objects
22
+ * @returns Flattened array of content items from user messages
23
+ */
24
+ function extractUserContent(messages) {
25
+ const userMessages = messages.filter((msg) => msg.role === "user");
26
+ return userMessages.flatMap((msg) => Array.isArray(msg.content) ? msg.content : []);
27
+ }
18
28
  /**
19
29
  * Convert CoreMessage content array to Gemini parts format
20
30
  *
21
- * @param contentArray - Array of content items from CoreMessage
31
+ * @param messages - Array of CoreMessage objects
22
32
  * @returns Array of parts in Gemini API format
23
33
  */
24
- function buildContentParts(frames) {
25
- const contentArray = Array.isArray(frames.content) ? frames.content : [];
26
- return contentArray.map((item) => {
27
- if (item.type === "text" && item.text) {
28
- return { text: item.text };
34
+ function buildContentParts(messages) {
35
+ const allContent = extractUserContent(messages);
36
+ return allContent
37
+ .map((item) => {
38
+ if (item.type === "text") {
39
+ // Accept text parts regardless of whether text is empty
40
+ return { text: item.text || "" };
29
41
  }
30
42
  else if (item.type === "image" && item.image) {
31
43
  let base64Data;
@@ -38,7 +50,7 @@ function buildContentParts(frames) {
38
50
  base64Data = item.image.replace(/^data:image\/[a-z]+;base64,/, "");
39
51
  }
40
52
  else {
41
- throw new Error(`Invalid image data type: expected string, Buffer, or Uint8Array, got ${typeof item.image}`);
53
+ throw ErrorFactory.invalidConfiguration("image data type", `expected string, Buffer, or Uint8Array, got ${typeof item.image}`, { itemType: item.type, dataType: typeof item.image });
42
54
  }
43
55
  return {
44
56
  inlineData: {
@@ -47,8 +59,14 @@ function buildContentParts(frames) {
47
59
  },
48
60
  };
49
61
  }
50
- throw new Error(`Invalid content type: ${item.type}`);
51
- });
62
+ else if (item.type === "file") {
63
+ // Skip file parts - not supported in Gemini parts format
64
+ return null;
65
+ }
66
+ // Return null for unsupported types
67
+ return null;
68
+ })
69
+ .filter((part) => part !== null);
52
70
  }
53
71
  /**
54
72
  * Configuration for video frame analysis.
@@ -88,7 +106,7 @@ Ensure the final response is fully self-sufficient and does not reference extern
88
106
  // ---------------------------------------------------------------------------
89
107
  // Vertex AI
90
108
  // ---------------------------------------------------------------------------
91
- export async function analyzeVideoWithVertexAI(frames, options = {}) {
109
+ export async function analyzeVideoWithVertexAI(messages, options = {}) {
92
110
  const startTime = Date.now();
93
111
  const { GoogleGenAI } = await import("@google/genai");
94
112
  // Get default config and merge with provided options
@@ -96,9 +114,9 @@ export async function analyzeVideoWithVertexAI(frames, options = {}) {
96
114
  const project = options.project ?? config.project;
97
115
  const location = options.location ?? config.location;
98
116
  const model = options.model || DEFAULT_MODEL;
99
- // Extract content array from CoreMessage
100
- const contentArray = Array.isArray(frames.content) ? frames.content : [];
101
- const frameCount = contentArray.filter((item) => item.type === "image").length;
117
+ // Convert frames content to parts array for Gemini
118
+ const parts = buildContentParts(messages);
119
+ const frameCount = parts.filter((part) => "inlineData" in part && part.inlineData).length;
102
120
  logger.debug("[GeminiVideoAnalyzer] Analyzing video with Vertex AI", {
103
121
  project,
104
122
  location,
@@ -106,8 +124,6 @@ export async function analyzeVideoWithVertexAI(frames, options = {}) {
106
124
  frameCount,
107
125
  });
108
126
  const ai = new GoogleGenAI({ vertexai: true, project, location });
109
- // Convert frames content to parts array for Gemini
110
- const parts = buildContentParts(frames);
111
127
  const response = await ai.models.generateContent({
112
128
  model,
113
129
  config: buildConfig(),
@@ -129,7 +145,7 @@ export async function analyzeVideoWithVertexAI(frames, options = {}) {
129
145
  // ---------------------------------------------------------------------------
130
146
  // Gemini API (Google AI)
131
147
  // ---------------------------------------------------------------------------
132
- export async function analyzeVideoWithGeminiAPI(frames, options = {}) {
148
+ export async function analyzeVideoWithGeminiAPI(messages, options = {}) {
133
149
  const startTime = Date.now();
134
150
  const { GoogleGenAI } = await import("@google/genai");
135
151
  const apiKey = options.apiKey || process.env.GOOGLE_AI_API_KEY;
@@ -137,16 +153,14 @@ export async function analyzeVideoWithGeminiAPI(frames, options = {}) {
137
153
  if (!apiKey) {
138
154
  throw new Error("GOOGLE_AI_API_KEY environment variable is required for Gemini API video analysis");
139
155
  }
140
- // Extract content array from CoreMessage
141
- const contentArray = Array.isArray(frames.content) ? frames.content : [];
142
- const frameCount = contentArray.filter((item) => item.type === "image").length;
156
+ // Convert frames content to parts array for Gemini
157
+ const parts = buildContentParts(messages);
158
+ const frameCount = parts.filter((part) => "inlineData" in part && part.inlineData).length;
143
159
  logger.debug("[GeminiVideoAnalyzer] Analyzing video with Gemini API", {
144
160
  model,
145
161
  frameCount,
146
162
  });
147
163
  const ai = new GoogleGenAI({ apiKey });
148
- // Convert frames content to parts array for Gemini
149
- const parts = buildContentParts(frames);
150
164
  logger.debug("[GeminiVideoAnalyzer] Generating analysis with frames");
151
165
  const response = await ai.models.generateContent({
152
166
  model,
@@ -207,15 +221,15 @@ async function getVertexConfig() {
207
221
  }
208
222
  return { project, location };
209
223
  }
210
- export async function analyzeVideo(frames, options = {}) {
224
+ export async function analyzeVideo(messages, options = {}) {
211
225
  const provider = options.provider || AIProviderName.AUTO;
212
226
  // Vertex — only when GOOGLE_VERTEX_PROJECT is explicitly set
213
227
  if (provider === AIProviderName.VERTEX || provider === AIProviderName.AUTO) {
214
- return analyzeVideoWithVertexAI(frames, options);
228
+ return analyzeVideoWithVertexAI(messages, options);
215
229
  }
216
230
  // Gemini API — when GOOGLE_AI_API_KEY is set
217
231
  if (provider === AIProviderName.GOOGLE_AI && process.env.GOOGLE_AI_API_KEY) {
218
- return analyzeVideoWithGeminiAPI(frames, options);
232
+ return analyzeVideoWithGeminiAPI(messages, options);
219
233
  }
220
234
  throw new Error("No valid provider configuration found. " +
221
235
  "Set GOOGLE_VERTEX_PROJECT for Vertex AI or GOOGLE_AI_API_KEY for Gemini API.");
@@ -5,4 +5,4 @@ import type { TextGenerationOptions } from "../../lib/types/generateTypes.js";
5
5
  * This object provides metadata for validation and help text in the CLI loop.
6
6
  * It is derived from the main TextGenerationOptions interface to ensure consistency.
7
7
  */
8
- export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria" | "region" | "csvOptions" | "tts" | "thinkingConfig" | "fileRegistry" | "abortSignal" | "toolFilter" | "excludeTools">, OptionSchema>;
8
+ export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria" | "region" | "csvOptions" | "tts" | "thinkingConfig" | "fileRegistry" | "abortSignal" | "toolFilter" | "excludeTools" | "toolChoice" | "prepareStep">, OptionSchema>;
@@ -86,6 +86,20 @@ export class BaseProvider {
86
86
  temperature: options.temperature,
87
87
  timestamp: Date.now(),
88
88
  });
89
+ // ===== EARLY MULTIMODAL DETECTION =====
90
+ const hasFileInput = !!options.input?.files?.length || !!options.input?.videoFiles?.length;
91
+ if (hasFileInput) {
92
+ // ===== VIDEO ANALYSIS DETECTION =====
93
+ // Check if video frames are present and handle with fake streaming
94
+ const messages = await this.buildMessagesForStream(options);
95
+ if (hasVideoFrames(messages)) {
96
+ logger.info(`Video frames detected in stream, using fake streaming for video analysis`, {
97
+ provider: this.providerName,
98
+ model: this.modelName,
99
+ });
100
+ return await this.executeFakeStreaming(options, analysisSchema);
101
+ }
102
+ }
89
103
  // 🔧 CRITICAL: Image generation models don't support real streaming
90
104
  // Force fake streaming for image models to ensure image output is yielded
91
105
  const isImageModel = IMAGE_GENERATION_MODELS.some((m) => this.modelName.includes(m));
@@ -49,8 +49,12 @@ export class GenerationHandler {
49
49
  model,
50
50
  messages,
51
51
  ...(shouldUseTools && Object.keys(tools).length > 0 && { tools }),
52
- maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
53
- ...(shouldUseTools && { toolChoice: "auto" }),
52
+ maxSteps: options.maxSteps ?? DEFAULT_MAX_STEPS,
53
+ ...(shouldUseTools &&
54
+ options.toolChoice && { toolChoice: options.toolChoice }),
55
+ ...(options.prepareStep && {
56
+ experimental_prepareStep: options.prepareStep,
57
+ }),
54
58
  temperature: options.temperature,
55
59
  maxTokens: options.maxTokens,
56
60
  abortSignal: options.abortSignal,
@@ -8,16 +8,16 @@
8
8
  */
9
9
  import { AIProviderName } from "../../constants/enums.js";
10
10
  import type { CoreMessage } from "ai";
11
- export declare function analyzeVideoWithVertexAI(frames: CoreMessage, options?: {
11
+ export declare function analyzeVideoWithVertexAI(messages: CoreMessage[], options?: {
12
12
  project?: string;
13
13
  location?: string;
14
14
  model?: string;
15
15
  }): Promise<string>;
16
- export declare function analyzeVideoWithGeminiAPI(frames: CoreMessage, options?: {
16
+ export declare function analyzeVideoWithGeminiAPI(messages: CoreMessage[], options?: {
17
17
  apiKey?: string;
18
18
  model?: string;
19
19
  }): Promise<string>;
20
- export declare function analyzeVideo(frames: CoreMessage, options?: {
20
+ export declare function analyzeVideo(messages: CoreMessage[], options?: {
21
21
  provider?: AIProviderName;
22
22
  project?: string;
23
23
  location?: string;
@@ -9,23 +9,35 @@
9
9
  import { AIProviderName, ErrorSeverity, ErrorCategory, } from "../../constants/enums.js";
10
10
  import { logger } from "../../utils/logger.js";
11
11
  import { readFile } from "node:fs/promises";
12
- import { NeuroLinkError } from "../../utils/errorHandling.js";
12
+ import { NeuroLinkError, ErrorFactory } from "../../utils/errorHandling.js";
13
13
  // ---------------------------------------------------------------------------
14
14
  // Shared config
15
15
  // ---------------------------------------------------------------------------
16
16
  const DEFAULT_MODEL = "gemini-2.0-flash";
17
17
  const DEFAULT_LOCATION = "us-central1";
18
+ /**
19
+ * Extract content items from user messages
20
+ *
21
+ * @param messages - Array of CoreMessage objects
22
+ * @returns Flattened array of content items from user messages
23
+ */
24
+ function extractUserContent(messages) {
25
+ const userMessages = messages.filter((msg) => msg.role === "user");
26
+ return userMessages.flatMap((msg) => Array.isArray(msg.content) ? msg.content : []);
27
+ }
18
28
  /**
19
29
  * Convert CoreMessage content array to Gemini parts format
20
30
  *
21
- * @param contentArray - Array of content items from CoreMessage
31
+ * @param messages - Array of CoreMessage objects
22
32
  * @returns Array of parts in Gemini API format
23
33
  */
24
- function buildContentParts(frames) {
25
- const contentArray = Array.isArray(frames.content) ? frames.content : [];
26
- return contentArray.map((item) => {
27
- if (item.type === "text" && item.text) {
28
- return { text: item.text };
34
+ function buildContentParts(messages) {
35
+ const allContent = extractUserContent(messages);
36
+ return allContent
37
+ .map((item) => {
38
+ if (item.type === "text") {
39
+ // Accept text parts regardless of whether text is empty
40
+ return { text: item.text || "" };
29
41
  }
30
42
  else if (item.type === "image" && item.image) {
31
43
  let base64Data;
@@ -38,7 +50,7 @@ function buildContentParts(frames) {
38
50
  base64Data = item.image.replace(/^data:image\/[a-z]+;base64,/, "");
39
51
  }
40
52
  else {
41
- throw new Error(`Invalid image data type: expected string, Buffer, or Uint8Array, got ${typeof item.image}`);
53
+ throw ErrorFactory.invalidConfiguration("image data type", `expected string, Buffer, or Uint8Array, got ${typeof item.image}`, { itemType: item.type, dataType: typeof item.image });
42
54
  }
43
55
  return {
44
56
  inlineData: {
@@ -47,8 +59,14 @@ function buildContentParts(frames) {
47
59
  },
48
60
  };
49
61
  }
50
- throw new Error(`Invalid content type: ${item.type}`);
51
- });
62
+ else if (item.type === "file") {
63
+ // Skip file parts - not supported in Gemini parts format
64
+ return null;
65
+ }
66
+ // Return null for unsupported types
67
+ return null;
68
+ })
69
+ .filter((part) => part !== null);
52
70
  }
53
71
  /**
54
72
  * Configuration for video frame analysis.
@@ -88,7 +106,7 @@ Ensure the final response is fully self-sufficient and does not reference extern
88
106
  // ---------------------------------------------------------------------------
89
107
  // Vertex AI
90
108
  // ---------------------------------------------------------------------------
91
- export async function analyzeVideoWithVertexAI(frames, options = {}) {
109
+ export async function analyzeVideoWithVertexAI(messages, options = {}) {
92
110
  const startTime = Date.now();
93
111
  const { GoogleGenAI } = await import("@google/genai");
94
112
  // Get default config and merge with provided options
@@ -96,9 +114,9 @@ export async function analyzeVideoWithVertexAI(frames, options = {}) {
96
114
  const project = options.project ?? config.project;
97
115
  const location = options.location ?? config.location;
98
116
  const model = options.model || DEFAULT_MODEL;
99
- // Extract content array from CoreMessage
100
- const contentArray = Array.isArray(frames.content) ? frames.content : [];
101
- const frameCount = contentArray.filter((item) => item.type === "image").length;
117
+ // Convert frames content to parts array for Gemini
118
+ const parts = buildContentParts(messages);
119
+ const frameCount = parts.filter((part) => "inlineData" in part && part.inlineData).length;
102
120
  logger.debug("[GeminiVideoAnalyzer] Analyzing video with Vertex AI", {
103
121
  project,
104
122
  location,
@@ -106,8 +124,6 @@ export async function analyzeVideoWithVertexAI(frames, options = {}) {
106
124
  frameCount,
107
125
  });
108
126
  const ai = new GoogleGenAI({ vertexai: true, project, location });
109
- // Convert frames content to parts array for Gemini
110
- const parts = buildContentParts(frames);
111
127
  const response = await ai.models.generateContent({
112
128
  model,
113
129
  config: buildConfig(),
@@ -129,7 +145,7 @@ export async function analyzeVideoWithVertexAI(frames, options = {}) {
129
145
  // ---------------------------------------------------------------------------
130
146
  // Gemini API (Google AI)
131
147
  // ---------------------------------------------------------------------------
132
- export async function analyzeVideoWithGeminiAPI(frames, options = {}) {
148
+ export async function analyzeVideoWithGeminiAPI(messages, options = {}) {
133
149
  const startTime = Date.now();
134
150
  const { GoogleGenAI } = await import("@google/genai");
135
151
  const apiKey = options.apiKey || process.env.GOOGLE_AI_API_KEY;
@@ -137,16 +153,14 @@ export async function analyzeVideoWithGeminiAPI(frames, options = {}) {
137
153
  if (!apiKey) {
138
154
  throw new Error("GOOGLE_AI_API_KEY environment variable is required for Gemini API video analysis");
139
155
  }
140
- // Extract content array from CoreMessage
141
- const contentArray = Array.isArray(frames.content) ? frames.content : [];
142
- const frameCount = contentArray.filter((item) => item.type === "image").length;
156
+ // Convert frames content to parts array for Gemini
157
+ const parts = buildContentParts(messages);
158
+ const frameCount = parts.filter((part) => "inlineData" in part && part.inlineData).length;
143
159
  logger.debug("[GeminiVideoAnalyzer] Analyzing video with Gemini API", {
144
160
  model,
145
161
  frameCount,
146
162
  });
147
163
  const ai = new GoogleGenAI({ apiKey });
148
- // Convert frames content to parts array for Gemini
149
- const parts = buildContentParts(frames);
150
164
  logger.debug("[GeminiVideoAnalyzer] Generating analysis with frames");
151
165
  const response = await ai.models.generateContent({
152
166
  model,
@@ -207,15 +221,15 @@ async function getVertexConfig() {
207
221
  }
208
222
  return { project, location };
209
223
  }
210
- export async function analyzeVideo(frames, options = {}) {
224
+ export async function analyzeVideo(messages, options = {}) {
211
225
  const provider = options.provider || AIProviderName.AUTO;
212
226
  // Vertex — only when GOOGLE_VERTEX_PROJECT is explicitly set
213
227
  if (provider === AIProviderName.VERTEX || provider === AIProviderName.AUTO) {
214
- return analyzeVideoWithVertexAI(frames, options);
228
+ return analyzeVideoWithVertexAI(messages, options);
215
229
  }
216
230
  // Gemini API — when GOOGLE_AI_API_KEY is set
217
231
  if (provider === AIProviderName.GOOGLE_AI && process.env.GOOGLE_AI_API_KEY) {
218
- return analyzeVideoWithGeminiAPI(frames, options);
232
+ return analyzeVideoWithGeminiAPI(messages, options);
219
233
  }
220
234
  throw new Error("No valid provider configuration found. " +
221
235
  "Set GOOGLE_VERTEX_PROJECT for Vertex AI or GOOGLE_AI_API_KEY for Gemini API.");
@@ -86,6 +86,20 @@ export class BaseProvider {
86
86
  temperature: options.temperature,
87
87
  timestamp: Date.now(),
88
88
  });
89
+ // ===== EARLY MULTIMODAL DETECTION =====
90
+ const hasFileInput = !!options.input?.files?.length || !!options.input?.videoFiles?.length;
91
+ if (hasFileInput) {
92
+ // ===== VIDEO ANALYSIS DETECTION =====
93
+ // Check if video frames are present and handle with fake streaming
94
+ const messages = await this.buildMessagesForStream(options);
95
+ if (hasVideoFrames(messages)) {
96
+ logger.info(`Video frames detected in stream, using fake streaming for video analysis`, {
97
+ provider: this.providerName,
98
+ model: this.modelName,
99
+ });
100
+ return await this.executeFakeStreaming(options, analysisSchema);
101
+ }
102
+ }
89
103
  // 🔧 CRITICAL: Image generation models don't support real streaming
90
104
  // Force fake streaming for image models to ensure image output is yielded
91
105
  const isImageModel = IMAGE_GENERATION_MODELS.some((m) => this.modelName.includes(m));
@@ -49,8 +49,12 @@ export class GenerationHandler {
49
49
  model,
50
50
  messages,
51
51
  ...(shouldUseTools && Object.keys(tools).length > 0 && { tools }),
52
- maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
53
- ...(shouldUseTools && { toolChoice: "auto" }),
52
+ maxSteps: options.maxSteps ?? DEFAULT_MAX_STEPS,
53
+ ...(shouldUseTools &&
54
+ options.toolChoice && { toolChoice: options.toolChoice }),
55
+ ...(options.prepareStep && {
56
+ experimental_prepareStep: options.prepareStep,
57
+ }),
54
58
  temperature: options.temperature,
55
59
  maxTokens: options.maxTokens,
56
60
  abortSignal: options.abortSignal,
@@ -1570,6 +1570,9 @@ Current user's request: ${currentInput}`;
1570
1570
  disableTools: options.disableTools,
1571
1571
  toolFilter: options.toolFilter,
1572
1572
  excludeTools: options.excludeTools,
1573
+ maxSteps: options.maxSteps,
1574
+ toolChoice: options.toolChoice,
1575
+ prepareStep: options.prepareStep,
1573
1576
  enableAnalytics: options.enableAnalytics,
1574
1577
  enableEvaluation: options.enableEvaluation,
1575
1578
  context: options.context,
@@ -1,4 +1,4 @@
1
- import type { Schema, Tool } from "ai";
1
+ import type { Schema, Tool, ToolChoice, StepResult, LanguageModel } from "ai";
2
2
  import type { AIProviderName } from "../constants/enums.js";
3
3
  import type { RAGConfig } from "../rag/types.js";
4
4
  import type { AnalyticsData, TokenUsage } from "./analytics.js";
@@ -248,6 +248,55 @@ export type GenerateOptions = {
248
248
  * Default: false (backward compatible — tool schemas are injected into system prompt).
249
249
  */
250
250
  skipToolPromptInjection?: boolean;
251
+ /** Maximum number of tool execution steps (default: 200) */
252
+ maxSteps?: number;
253
+ /**
254
+ * Tool choice configuration for the generation.
255
+ * Controls whether and which tools the model must call.
256
+ *
257
+ * - `"auto"` (default): the model can choose whether and which tools to call
258
+ * - `"none"`: no tool calls allowed
259
+ * - `"required"`: the model must call at least one tool
260
+ * - `{ type: "tool", toolName: string }`: the model must call the specified tool
261
+ *
262
+ * Note: When used without `prepareStep`, this applies to **every step** in the
263
+ * `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
264
+ * will cause infinite tool calls until `maxSteps` is exhausted.
265
+ */
266
+ toolChoice?: ToolChoice<Record<string, Tool>>;
267
+ /**
268
+ * Optional callback that runs before each step in a multi-step generation.
269
+ * Allows dynamically changing `toolChoice` and available tools per step.
270
+ *
271
+ * This is the recommended way to enforce specific tool calls on certain steps
272
+ * while allowing the model freedom on others.
273
+ *
274
+ * Maps to Vercel AI SDK's `experimental_prepareStep`.
275
+ *
276
+ * @example Force a specific tool on step 0, then switch to auto:
277
+ * ```typescript
278
+ * prepareStep: ({ stepNumber, steps }) => {
279
+ * if (stepNumber === 0) {
280
+ * return {
281
+ * toolChoice: { type: 'tool', toolName: 'myTool' }
282
+ * };
283
+ * }
284
+ * return { toolChoice: 'auto' };
285
+ * }
286
+ * ```
287
+ *
288
+ * @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
289
+ */
290
+ prepareStep?: (options: {
291
+ steps: StepResult<Record<string, Tool>>[];
292
+ stepNumber: number;
293
+ maxSteps: number;
294
+ model: LanguageModel;
295
+ }) => PromiseLike<{
296
+ model?: LanguageModel;
297
+ toolChoice?: ToolChoice<Record<string, Tool>>;
298
+ experimental_activeTools?: string[];
299
+ } | undefined>;
251
300
  enableEvaluation?: boolean;
252
301
  enableAnalytics?: boolean;
253
302
  context?: StandardRecord;
@@ -569,6 +618,53 @@ export type TextGenerationOptions = {
569
618
  toolFilter?: string[];
570
619
  /** Exclude these tools by name (blacklist). Applied after toolFilter. */
571
620
  excludeTools?: string[];
621
+ /**
622
+ * Tool choice configuration for the generation.
623
+ * Controls whether and which tools the model must call.
624
+ *
625
+ * - `"auto"` (default): the model can choose whether and which tools to call
626
+ * - `"none"`: no tool calls allowed
627
+ * - `"required"`: the model must call at least one tool
628
+ * - `{ type: "tool", toolName: string }`: the model must call the specified tool
629
+ *
630
+ * Note: When used without `prepareStep`, this applies to **every step** in the
631
+ * `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
632
+ * will cause infinite tool calls until `maxSteps` is exhausted.
633
+ */
634
+ toolChoice?: ToolChoice<Record<string, Tool>>;
635
+ /**
636
+ * Optional callback that runs before each step in a multi-step generation.
637
+ * Allows dynamically changing `toolChoice` and available tools per step.
638
+ *
639
+ * This is the recommended way to enforce specific tool calls on certain steps
640
+ * while allowing the model freedom on others.
641
+ *
642
+ * Maps to Vercel AI SDK's `experimental_prepareStep`.
643
+ *
644
+ * @example Force a specific tool on step 0, then switch to auto:
645
+ * ```typescript
646
+ * prepareStep: ({ stepNumber, steps }) => {
647
+ * if (stepNumber === 0) {
648
+ * return {
649
+ * toolChoice: { type: 'tool', toolName: 'myTool' }
650
+ * };
651
+ * }
652
+ * return { toolChoice: 'auto' };
653
+ * }
654
+ * ```
655
+ *
656
+ * @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
657
+ */
658
+ prepareStep?: (options: {
659
+ steps: StepResult<Record<string, Tool>>[];
660
+ stepNumber: number;
661
+ maxSteps: number;
662
+ model: LanguageModel;
663
+ }) => PromiseLike<{
664
+ model?: LanguageModel;
665
+ toolChoice?: ToolChoice<Record<string, Tool>>;
666
+ experimental_activeTools?: string[];
667
+ } | undefined>;
572
668
  /**
573
669
  * Text-to-Speech (TTS) configuration
574
670
  *
@@ -9,9 +9,10 @@ import type { CoreMessage } from "ai";
9
9
  import { AIProviderName } from "../constants/enums.js";
10
10
  /**
11
11
  * Check if messages contain video frames (images)
12
+ * Only checks user messages to match buildContentParts behavior
12
13
  *
13
14
  * @param messages - Array of CoreMessage objects
14
- * @returns true if video frames are present
15
+ * @returns true if video frames are present in user messages
15
16
  */
16
17
  export declare function hasVideoFrames(messages: CoreMessage[]): boolean;
17
18
  /**
@@ -9,12 +9,17 @@ import { AIProviderName } from "../constants/enums.js";
9
9
  import { logger } from "./logger.js";
10
10
  /**
11
11
  * Check if messages contain video frames (images)
12
+ * Only checks user messages to match buildContentParts behavior
12
13
  *
13
14
  * @param messages - Array of CoreMessage objects
14
- * @returns true if video frames are present
15
+ * @returns true if video frames are present in user messages
15
16
  */
16
17
  export function hasVideoFrames(messages) {
17
18
  return messages.some((msg) => {
19
+ // Only check user messages to match buildContentParts behavior
20
+ if (msg.role !== "user") {
21
+ return false;
22
+ }
18
23
  if (Array.isArray(msg.content)) {
19
24
  return msg.content.some((part) => typeof part === "object" &&
20
25
  part !== null &&
@@ -42,7 +47,7 @@ export async function executeVideoAnalysis(messages, options) {
42
47
  options.providerName === AIProviderName.VERTEX
43
48
  ? AIProviderName.VERTEX
44
49
  : AIProviderName.AUTO;
45
- const videoAnalysisText = await analyzeVideo(messages[0], {
50
+ const videoAnalysisText = await analyzeVideo(messages, {
46
51
  provider: provider,
47
52
  project: options.region
48
53
  ? undefined
package/dist/neurolink.js CHANGED
@@ -1570,6 +1570,9 @@ Current user's request: ${currentInput}`;
1570
1570
  disableTools: options.disableTools,
1571
1571
  toolFilter: options.toolFilter,
1572
1572
  excludeTools: options.excludeTools,
1573
+ maxSteps: options.maxSteps,
1574
+ toolChoice: options.toolChoice,
1575
+ prepareStep: options.prepareStep,
1573
1576
  enableAnalytics: options.enableAnalytics,
1574
1577
  enableEvaluation: options.enableEvaluation,
1575
1578
  context: options.context,
@@ -1,4 +1,4 @@
1
- import type { Schema, Tool } from "ai";
1
+ import type { Schema, Tool, ToolChoice, StepResult, LanguageModel } from "ai";
2
2
  import type { AIProviderName } from "../constants/enums.js";
3
3
  import type { RAGConfig } from "../rag/types.js";
4
4
  import type { AnalyticsData, TokenUsage } from "./analytics.js";
@@ -248,6 +248,55 @@ export type GenerateOptions = {
248
248
  * Default: false (backward compatible — tool schemas are injected into system prompt).
249
249
  */
250
250
  skipToolPromptInjection?: boolean;
251
+ /** Maximum number of tool execution steps (default: 200) */
252
+ maxSteps?: number;
253
+ /**
254
+ * Tool choice configuration for the generation.
255
+ * Controls whether and which tools the model must call.
256
+ *
257
+ * - `"auto"` (default): the model can choose whether and which tools to call
258
+ * - `"none"`: no tool calls allowed
259
+ * - `"required"`: the model must call at least one tool
260
+ * - `{ type: "tool", toolName: string }`: the model must call the specified tool
261
+ *
262
+ * Note: When used without `prepareStep`, this applies to **every step** in the
263
+ * `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
264
+ * will cause infinite tool calls until `maxSteps` is exhausted.
265
+ */
266
+ toolChoice?: ToolChoice<Record<string, Tool>>;
267
+ /**
268
+ * Optional callback that runs before each step in a multi-step generation.
269
+ * Allows dynamically changing `toolChoice` and available tools per step.
270
+ *
271
+ * This is the recommended way to enforce specific tool calls on certain steps
272
+ * while allowing the model freedom on others.
273
+ *
274
+ * Maps to Vercel AI SDK's `experimental_prepareStep`.
275
+ *
276
+ * @example Force a specific tool on step 0, then switch to auto:
277
+ * ```typescript
278
+ * prepareStep: ({ stepNumber, steps }) => {
279
+ * if (stepNumber === 0) {
280
+ * return {
281
+ * toolChoice: { type: 'tool', toolName: 'myTool' }
282
+ * };
283
+ * }
284
+ * return { toolChoice: 'auto' };
285
+ * }
286
+ * ```
287
+ *
288
+ * @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
289
+ */
290
+ prepareStep?: (options: {
291
+ steps: StepResult<Record<string, Tool>>[];
292
+ stepNumber: number;
293
+ maxSteps: number;
294
+ model: LanguageModel;
295
+ }) => PromiseLike<{
296
+ model?: LanguageModel;
297
+ toolChoice?: ToolChoice<Record<string, Tool>>;
298
+ experimental_activeTools?: string[];
299
+ } | undefined>;
251
300
  enableEvaluation?: boolean;
252
301
  enableAnalytics?: boolean;
253
302
  context?: StandardRecord;
@@ -569,6 +618,53 @@ export type TextGenerationOptions = {
569
618
  toolFilter?: string[];
570
619
  /** Exclude these tools by name (blacklist). Applied after toolFilter. */
571
620
  excludeTools?: string[];
621
+ /**
622
+ * Tool choice configuration for the generation.
623
+ * Controls whether and which tools the model must call.
624
+ *
625
+ * - `"auto"` (default): the model can choose whether and which tools to call
626
+ * - `"none"`: no tool calls allowed
627
+ * - `"required"`: the model must call at least one tool
628
+ * - `{ type: "tool", toolName: string }`: the model must call the specified tool
629
+ *
630
+ * Note: When used without `prepareStep`, this applies to **every step** in the
631
+ * `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
632
+ * will cause infinite tool calls until `maxSteps` is exhausted.
633
+ */
634
+ toolChoice?: ToolChoice<Record<string, Tool>>;
635
+ /**
636
+ * Optional callback that runs before each step in a multi-step generation.
637
+ * Allows dynamically changing `toolChoice` and available tools per step.
638
+ *
639
+ * This is the recommended way to enforce specific tool calls on certain steps
640
+ * while allowing the model freedom on others.
641
+ *
642
+ * Maps to Vercel AI SDK's `experimental_prepareStep`.
643
+ *
644
+ * @example Force a specific tool on step 0, then switch to auto:
645
+ * ```typescript
646
+ * prepareStep: ({ stepNumber, steps }) => {
647
+ * if (stepNumber === 0) {
648
+ * return {
649
+ * toolChoice: { type: 'tool', toolName: 'myTool' }
650
+ * };
651
+ * }
652
+ * return { toolChoice: 'auto' };
653
+ * }
654
+ * ```
655
+ *
656
+ * @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
657
+ */
658
+ prepareStep?: (options: {
659
+ steps: StepResult<Record<string, Tool>>[];
660
+ stepNumber: number;
661
+ maxSteps: number;
662
+ model: LanguageModel;
663
+ }) => PromiseLike<{
664
+ model?: LanguageModel;
665
+ toolChoice?: ToolChoice<Record<string, Tool>>;
666
+ experimental_activeTools?: string[];
667
+ } | undefined>;
572
668
  /**
573
669
  * Text-to-Speech (TTS) configuration
574
670
  *
@@ -9,9 +9,10 @@ import type { CoreMessage } from "ai";
9
9
  import { AIProviderName } from "../constants/enums.js";
10
10
  /**
11
11
  * Check if messages contain video frames (images)
12
+ * Only checks user messages to match buildContentParts behavior
12
13
  *
13
14
  * @param messages - Array of CoreMessage objects
14
- * @returns true if video frames are present
15
+ * @returns true if video frames are present in user messages
15
16
  */
16
17
  export declare function hasVideoFrames(messages: CoreMessage[]): boolean;
17
18
  /**
@@ -9,12 +9,17 @@ import { AIProviderName } from "../constants/enums.js";
9
9
  import { logger } from "./logger.js";
10
10
  /**
11
11
  * Check if messages contain video frames (images)
12
+ * Only checks user messages to match buildContentParts behavior
12
13
  *
13
14
  * @param messages - Array of CoreMessage objects
14
- * @returns true if video frames are present
15
+ * @returns true if video frames are present in user messages
15
16
  */
16
17
  export function hasVideoFrames(messages) {
17
18
  return messages.some((msg) => {
19
+ // Only check user messages to match buildContentParts behavior
20
+ if (msg.role !== "user") {
21
+ return false;
22
+ }
18
23
  if (Array.isArray(msg.content)) {
19
24
  return msg.content.some((part) => typeof part === "object" &&
20
25
  part !== null &&
@@ -42,7 +47,7 @@ export async function executeVideoAnalysis(messages, options) {
42
47
  options.providerName === AIProviderName.VERTEX
43
48
  ? AIProviderName.VERTEX
44
49
  : AIProviderName.AUTO;
45
- const videoAnalysisText = await analyzeVideo(messages[0], {
50
+ const videoAnalysisText = await analyzeVideo(messages, {
46
51
  provider: provider,
47
52
  project: options.region
48
53
  ? undefined
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "9.9.0",
3
+ "version": "9.10.1",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",