@juspay/neurolink 9.8.0 → 9.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +13 -11
- package/dist/adapters/video/videoAnalyzer.d.ts +26 -0
- package/dist/adapters/video/videoAnalyzer.js +222 -0
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/core/baseProvider.js +20 -0
- package/dist/core/modules/GenerationHandler.js +6 -2
- package/dist/lib/adapters/video/videoAnalyzer.d.ts +26 -0
- package/dist/lib/adapters/video/videoAnalyzer.js +223 -0
- package/dist/lib/core/baseProvider.js +20 -0
- package/dist/lib/core/modules/GenerationHandler.js +6 -2
- package/dist/lib/neurolink.js +3 -0
- package/dist/lib/processors/media/VideoProcessor.js +9 -12
- package/dist/lib/types/generateTypes.d.ts +98 -1
- package/dist/lib/utils/videoAnalysisProcessor.d.ts +30 -0
- package/dist/lib/utils/videoAnalysisProcessor.js +59 -0
- package/dist/neurolink.js +3 -0
- package/dist/processors/media/VideoProcessor.js +9 -12
- package/dist/types/generateTypes.d.ts +98 -1
- package/dist/utils/videoAnalysisProcessor.d.ts +30 -0
- package/dist/utils/videoAnalysisProcessor.js +58 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
## [9.10.0](https://github.com/juspay/neurolink/compare/v9.9.0...v9.10.0) (2026-02-20)
|
|
2
|
+
|
|
3
|
+
### Features
|
|
4
|
+
|
|
5
|
+
- **(generateText):** add prepareStep and toolChoice passthrough support for multi-step agentic generation ([4cd340a](https://github.com/juspay/neurolink/commit/4cd340af7d39f72006d09fe86569232d751dcd8d))
|
|
6
|
+
|
|
7
|
+
## [9.9.0](https://github.com/juspay/neurolink/compare/v9.8.0...v9.9.0) (2026-02-17)
|
|
8
|
+
|
|
9
|
+
### Features
|
|
10
|
+
|
|
11
|
+
- **(video-analysis):** add video-analysis support in neurolink ([c35f8a8](https://github.com/juspay/neurolink/commit/c35f8a8d52cc1366e10b8701285e1bec52e27d98))
|
|
12
|
+
|
|
1
13
|
## [9.8.0](https://github.com/juspay/neurolink/compare/v9.7.0...v9.8.0) (2026-02-17)
|
|
2
14
|
|
|
3
15
|
### Features
|
package/README.md
CHANGED
|
@@ -35,17 +35,18 @@ Extracted from production systems at Juspay and battle-tested at enterprise scal
|
|
|
35
35
|
|
|
36
36
|
## What's New (Q1 2026)
|
|
37
37
|
|
|
38
|
-
| Feature | Version | Description | Guide
|
|
39
|
-
| ----------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
40
|
-
| **Context Window Management** | v9.2.0 | 4-stage compaction pipeline with auto-detection, budget gate at 80% usage, per-provider token estimation | [Context Compaction Guide](docs/features/context-compaction.md)
|
|
41
|
-
| **
|
|
42
|
-
| **
|
|
43
|
-
| **
|
|
44
|
-
| **
|
|
45
|
-
| **
|
|
46
|
-
| **
|
|
47
|
-
| **
|
|
48
|
-
| **
|
|
38
|
+
| Feature | Version | Description | Guide |
|
|
39
|
+
| ----------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------- |
|
|
40
|
+
| **Context Window Management** | v9.2.0 | 4-stage compaction pipeline with auto-detection, budget gate at 80% usage, per-provider token estimation | [Context Compaction Guide](docs/features/context-compaction.md) |
|
|
41
|
+
| **Tool Execution Control** | v9.3.0 | `prepareStep` and `toolChoice` support for per-step tool enforcement in multi-step agentic loops. API-level control over tool calls. | [API Reference](docs/api/type-aliases/GenerateOptions.md#preparestep) |
|
|
42
|
+
| **File Processor System** | v9.1.0 | 17+ file type processors with ProcessorRegistry, security sanitization, SVG text injection | [File Processors Guide](docs/features/file-processors.md) |
|
|
43
|
+
| **RAG with generate()/stream()** | v9.2.0 | Pass `rag: { files }` to generate/stream for automatic document chunking, embedding, and AI-powered search. 10 chunking strategies, hybrid search, reranking. | [RAG Guide](docs/features/rag.md) |
|
|
44
|
+
| **External TracerProvider Support** | v8.43.0 | Integrate NeuroLink with existing OpenTelemetry instrumentation. Prevents duplicate registration conflicts. | [Observability Guide](docs/features/observability.md) |
|
|
45
|
+
| **Server Adapters** | v8.43.0 | Multi-framework HTTP server with Hono, Express, Fastify, Koa support. Full CLI for server management with foreground/background modes. | [Server Adapters Guide](docs/guides/server-adapters/index.md) |
|
|
46
|
+
| **Title Generation Events** | v8.38.0 | Emit `conversation:titleGenerated` event when conversation title is generated. Supports custom title prompts via `NEUROLINK_TITLE_PROMPT`. | [Conversation Memory Guide](docs/conversation-memory.md) |
|
|
47
|
+
| **Video Generation with Veo** | v8.32.0 | Video generation using Veo 3.1 (`veo-3.1`). Realistic video generation with many parameter options | [Video Generation Guide](docs/features/video-generation.md) |
|
|
48
|
+
| **Image Generation with Gemini** | v8.31.0 | Native image generation using Gemini 2.0 Flash Experimental (`imagen-3.0-generate-002`). High-quality image synthesis directly from Google AI. | [Image Generation Guide](docs/image-generation-streaming.md) |
|
|
49
|
+
| **HTTP/Streamable HTTP Transport** | v8.29.0 | Connect to remote MCP servers via HTTP with authentication headers, automatic retry with exponential backoff, and configurable rate limiting. | [HTTP Transport Guide](docs/mcp-http-transport.md) |
|
|
49
50
|
|
|
50
51
|
- **External TracerProvider Support** – Integrate NeuroLink with applications that already have OpenTelemetry instrumentation. Supports auto-detection and manual configuration. → [Observability Guide](docs/features/observability.md)
|
|
51
52
|
- **Server Adapters** – Deploy NeuroLink as an HTTP API server with your framework of choice (Hono, Express, Fastify, Koa). Full CLI support with `serve` and `server` commands for foreground/background modes, route management, and OpenAPI generation. → [Server Adapters Guide](docs/guides/server-adapters/index.md)
|
|
@@ -56,6 +57,7 @@ Extracted from production systems at Juspay and battle-tested at enterprise scal
|
|
|
56
57
|
- **RAG with generate()/stream()** – Just pass `rag: { files: ["./docs/guide.md"] }` to `generate()` or `stream()`. NeuroLink auto-chunks, embeds, and creates a search tool the AI can invoke. 10 chunking strategies, hybrid search, 5 reranker types. → [RAG Guide](docs/features/rag.md)
|
|
57
58
|
- **HTTP/Streamable HTTP Transport for MCP** – Connect to remote MCP servers via HTTP with authentication headers, retry logic, and rate limiting. → [HTTP Transport Guide](docs/mcp-http-transport.md)
|
|
58
59
|
- 🧠 **Gemini 3 Preview Support** - Full support for gemini-3-flash-preview and gemini-3-pro-preview with extended thinking capabilities
|
|
60
|
+
- 🎯 **Tool Execution Control** – Use `prepareStep` to enforce specific tool calls, change the LLM models per step in multi-step agentic executions. Prevents LLMs from skipping required tools. Use `toolChoice` for static control, or `prepareStep` for dynamic per-step logic. → [GenerateOptions Reference](docs/api/type-aliases/GenerateOptions.md#preparestep)
|
|
59
61
|
- **Structured Output with Zod Schemas** – Type-safe JSON generation with automatic validation using `schema` + `output.format: "json"` in `generate()`. → [Structured Output Guide](docs/features/structured-output.md)
|
|
60
62
|
- **CSV File Support** – Attach CSV files to prompts for AI-powered data analysis with auto-detection. → [CSV Guide](docs/features/multimodal-chat.md#csv-file-support)
|
|
61
63
|
- **PDF File Support** – Process PDF documents with native visual analysis for Vertex AI, Anthropic, Bedrock, AI Studio. → [PDF Guide](docs/features/pdf-support.md)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Handler
|
|
3
|
+
*
|
|
4
|
+
* Provides video analysis using Google's Gemini 2.0 Flash model.
|
|
5
|
+
* Supports both Vertex AI and Gemini API providers.
|
|
6
|
+
*
|
|
7
|
+
* @module adapters/video/geminiVideoAnalyzer
|
|
8
|
+
*/
|
|
9
|
+
import { AIProviderName } from "../../constants/enums.js";
|
|
10
|
+
import type { CoreMessage } from "ai";
|
|
11
|
+
export declare function analyzeVideoWithVertexAI(frames: CoreMessage, options?: {
|
|
12
|
+
project?: string;
|
|
13
|
+
location?: string;
|
|
14
|
+
model?: string;
|
|
15
|
+
}): Promise<string>;
|
|
16
|
+
export declare function analyzeVideoWithGeminiAPI(frames: CoreMessage, options?: {
|
|
17
|
+
apiKey?: string;
|
|
18
|
+
model?: string;
|
|
19
|
+
}): Promise<string>;
|
|
20
|
+
export declare function analyzeVideo(frames: CoreMessage, options?: {
|
|
21
|
+
provider?: AIProviderName;
|
|
22
|
+
project?: string;
|
|
23
|
+
location?: string;
|
|
24
|
+
apiKey?: string;
|
|
25
|
+
model?: string;
|
|
26
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Handler
|
|
3
|
+
*
|
|
4
|
+
* Provides video analysis using Google's Gemini 2.0 Flash model.
|
|
5
|
+
* Supports both Vertex AI and Gemini API providers.
|
|
6
|
+
*
|
|
7
|
+
* @module adapters/video/geminiVideoAnalyzer
|
|
8
|
+
*/
|
|
9
|
+
import { AIProviderName, ErrorSeverity, ErrorCategory, } from "../../constants/enums.js";
|
|
10
|
+
import { logger } from "../../utils/logger.js";
|
|
11
|
+
import { readFile } from "node:fs/promises";
|
|
12
|
+
import { NeuroLinkError } from "../../utils/errorHandling.js";
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Shared config
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
const DEFAULT_MODEL = "gemini-2.0-flash";
|
|
17
|
+
const DEFAULT_LOCATION = "us-central1";
|
|
18
|
+
/**
|
|
19
|
+
* Convert CoreMessage content array to Gemini parts format
|
|
20
|
+
*
|
|
21
|
+
* @param contentArray - Array of content items from CoreMessage
|
|
22
|
+
* @returns Array of parts in Gemini API format
|
|
23
|
+
*/
|
|
24
|
+
function buildContentParts(frames) {
|
|
25
|
+
const contentArray = Array.isArray(frames.content) ? frames.content : [];
|
|
26
|
+
return contentArray.map((item) => {
|
|
27
|
+
if (item.type === "text" && item.text) {
|
|
28
|
+
return { text: item.text };
|
|
29
|
+
}
|
|
30
|
+
else if (item.type === "image" && item.image) {
|
|
31
|
+
let base64Data;
|
|
32
|
+
// Handle Buffer or Uint8Array
|
|
33
|
+
if (Buffer.isBuffer(item.image) || item.image instanceof Uint8Array) {
|
|
34
|
+
base64Data = Buffer.from(item.image).toString("base64");
|
|
35
|
+
}
|
|
36
|
+
else if (typeof item.image === "string") {
|
|
37
|
+
// Strip data URI prefix if present (e.g., "data:image/jpeg;base64,")
|
|
38
|
+
base64Data = item.image.replace(/^data:image\/[a-z]+;base64,/, "");
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
throw new Error(`Invalid image data type: expected string, Buffer, or Uint8Array, got ${typeof item.image}`);
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
inlineData: {
|
|
45
|
+
mimeType: "image/jpeg",
|
|
46
|
+
data: base64Data,
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`Invalid content type: ${item.type}`);
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Configuration for video frame analysis.
|
|
55
|
+
* Generic prompt that handles both general content and technical bug reporting.
|
|
56
|
+
*/
|
|
57
|
+
function buildConfig() {
|
|
58
|
+
return {
|
|
59
|
+
systemInstruction: `You are a Visual Analysis Assistant.
|
|
60
|
+
Your task is to analyze images or video frames provided by the user and extract structured visual features. The user may or may not provide an issue description. Your role is to understand the visual content, optionally correlate it with the provided issue, and produce a structured output that can be directly consumed by another LLM for analysis, debugging, or decision-making.
|
|
61
|
+
|
|
62
|
+
Follow these rules strictly:
|
|
63
|
+
- The analysis must be generic and applicable to any domain (UI, dashboards, video frames, animations, charts, documents, etc.).
|
|
64
|
+
- Support both images and videos (single frame or multiple frames).
|
|
65
|
+
- Extract only what is visually observable; do not assume backend behavior unless supported by visuals.
|
|
66
|
+
- The JSON must be structured, consistent, and machine-readable.
|
|
67
|
+
- Logs are optional and should only be included if explicitly provided.
|
|
68
|
+
- The final output must be clear, concise, and actionable for an LLM.
|
|
69
|
+
|
|
70
|
+
Always produce the output in the following format:
|
|
71
|
+
|
|
72
|
+
Issue:
|
|
73
|
+
<Refined issue description if provided, otherwise a clear description of the observed visual situation>
|
|
74
|
+
|
|
75
|
+
Image/Video Patterns:
|
|
76
|
+
<Structured JSON describing extracted visual features and anomalies>
|
|
77
|
+
|
|
78
|
+
Steps to Reproduce:
|
|
79
|
+
<Ordered steps that reliably reproduce the issue based on the visual context>
|
|
80
|
+
|
|
81
|
+
[Logs: Include ONLY if provided by the user]
|
|
82
|
+
|
|
83
|
+
Proof:
|
|
84
|
+
<Visual evidence explaining how the image/video confirms the issue>
|
|
85
|
+
Ensure the final response is fully self-sufficient and does not reference external context.`,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Vertex AI
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
export async function analyzeVideoWithVertexAI(frames, options = {}) {
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
const { GoogleGenAI } = await import("@google/genai");
|
|
94
|
+
// Get default config and merge with provided options
|
|
95
|
+
const config = await getVertexConfig();
|
|
96
|
+
const project = options.project ?? config.project;
|
|
97
|
+
const location = options.location ?? config.location;
|
|
98
|
+
const model = options.model || DEFAULT_MODEL;
|
|
99
|
+
// Extract content array from CoreMessage
|
|
100
|
+
const contentArray = Array.isArray(frames.content) ? frames.content : [];
|
|
101
|
+
const frameCount = contentArray.filter((item) => item.type === "image").length;
|
|
102
|
+
logger.debug("[GeminiVideoAnalyzer] Analyzing video with Vertex AI", {
|
|
103
|
+
project,
|
|
104
|
+
location,
|
|
105
|
+
model,
|
|
106
|
+
frameCount,
|
|
107
|
+
});
|
|
108
|
+
const ai = new GoogleGenAI({ vertexai: true, project, location });
|
|
109
|
+
// Convert frames content to parts array for Gemini
|
|
110
|
+
const parts = buildContentParts(frames);
|
|
111
|
+
const response = await ai.models.generateContent({
|
|
112
|
+
model,
|
|
113
|
+
config: buildConfig(),
|
|
114
|
+
contents: [
|
|
115
|
+
{
|
|
116
|
+
role: "user",
|
|
117
|
+
parts,
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
});
|
|
121
|
+
const responseText = response.text || "";
|
|
122
|
+
const processingTime = Date.now() - startTime;
|
|
123
|
+
logger.debug("[GeminiVideoAnalyzer] Vertex response received", {
|
|
124
|
+
responseLength: responseText.length,
|
|
125
|
+
processingTime,
|
|
126
|
+
});
|
|
127
|
+
return responseText;
|
|
128
|
+
}
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
// Gemini API (Google AI)
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
export async function analyzeVideoWithGeminiAPI(frames, options = {}) {
|
|
133
|
+
const startTime = Date.now();
|
|
134
|
+
const { GoogleGenAI } = await import("@google/genai");
|
|
135
|
+
const apiKey = options.apiKey || process.env.GOOGLE_AI_API_KEY;
|
|
136
|
+
const model = options.model || DEFAULT_MODEL;
|
|
137
|
+
if (!apiKey) {
|
|
138
|
+
throw new Error("GOOGLE_AI_API_KEY environment variable is required for Gemini API video analysis");
|
|
139
|
+
}
|
|
140
|
+
// Extract content array from CoreMessage
|
|
141
|
+
const contentArray = Array.isArray(frames.content) ? frames.content : [];
|
|
142
|
+
const frameCount = contentArray.filter((item) => item.type === "image").length;
|
|
143
|
+
logger.debug("[GeminiVideoAnalyzer] Analyzing video with Gemini API", {
|
|
144
|
+
model,
|
|
145
|
+
frameCount,
|
|
146
|
+
});
|
|
147
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
148
|
+
// Convert frames content to parts array for Gemini
|
|
149
|
+
const parts = buildContentParts(frames);
|
|
150
|
+
logger.debug("[GeminiVideoAnalyzer] Generating analysis with frames");
|
|
151
|
+
const response = await ai.models.generateContent({
|
|
152
|
+
model,
|
|
153
|
+
config: buildConfig(),
|
|
154
|
+
contents: [
|
|
155
|
+
{
|
|
156
|
+
role: "user",
|
|
157
|
+
parts,
|
|
158
|
+
},
|
|
159
|
+
],
|
|
160
|
+
});
|
|
161
|
+
const responseText = response.text || "";
|
|
162
|
+
const processingTime = Date.now() - startTime;
|
|
163
|
+
logger.debug("[GeminiVideoAnalyzer] Gemini API response received", {
|
|
164
|
+
responseLength: responseText.length,
|
|
165
|
+
processingTime,
|
|
166
|
+
});
|
|
167
|
+
return responseText;
|
|
168
|
+
}
|
|
169
|
+
async function getVertexConfig() {
|
|
170
|
+
const location = process.env.GOOGLE_VERTEX_LOCATION || DEFAULT_LOCATION;
|
|
171
|
+
// Try environment variables first
|
|
172
|
+
let project = process.env.GOOGLE_VERTEX_PROJECT ||
|
|
173
|
+
process.env.GOOGLE_CLOUD_PROJECT ||
|
|
174
|
+
process.env.GOOGLE_CLOUD_PROJECT_ID ||
|
|
175
|
+
process.env.VERTEX_PROJECT_ID;
|
|
176
|
+
// Fallback: read from ADC credentials file
|
|
177
|
+
if (!project && process.env.GOOGLE_APPLICATION_CREDENTIALS) {
|
|
178
|
+
try {
|
|
179
|
+
const credData = JSON.parse(await readFile(process.env.GOOGLE_APPLICATION_CREDENTIALS, "utf-8"));
|
|
180
|
+
project = credData.quota_project_id || credData.project_id;
|
|
181
|
+
}
|
|
182
|
+
catch (e) {
|
|
183
|
+
// Ignore read errors, will throw below if project still not found
|
|
184
|
+
logger.debug("Failed to read project from credentials file", {
|
|
185
|
+
error: e instanceof Error ? e.message : String(e),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
if (!project) {
|
|
190
|
+
throw new NeuroLinkError({
|
|
191
|
+
code: "PROVIDER_NOT_CONFIGURED",
|
|
192
|
+
message: "Google Cloud project not found. Set GOOGLE_VERTEX_PROJECT or GOOGLE_CLOUD_PROJECT environment variable, or ensure ADC credentials contain project_id",
|
|
193
|
+
category: ErrorCategory.CONFIGURATION,
|
|
194
|
+
severity: ErrorSeverity.HIGH,
|
|
195
|
+
retriable: false,
|
|
196
|
+
context: {
|
|
197
|
+
missingVar: "GOOGLE_VERTEX_PROJECT",
|
|
198
|
+
feature: "video-generation",
|
|
199
|
+
checkedEnvVars: [
|
|
200
|
+
"GOOGLE_VERTEX_PROJECT",
|
|
201
|
+
"GOOGLE_CLOUD_PROJECT",
|
|
202
|
+
"GOOGLE_CLOUD_PROJECT_ID",
|
|
203
|
+
"VERTEX_PROJECT_ID",
|
|
204
|
+
],
|
|
205
|
+
},
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
return { project, location };
|
|
209
|
+
}
|
|
210
|
+
export async function analyzeVideo(frames, options = {}) {
|
|
211
|
+
const provider = options.provider || AIProviderName.AUTO;
|
|
212
|
+
// Vertex — only when GOOGLE_VERTEX_PROJECT is explicitly set
|
|
213
|
+
if (provider === AIProviderName.VERTEX || provider === AIProviderName.AUTO) {
|
|
214
|
+
return analyzeVideoWithVertexAI(frames, options);
|
|
215
|
+
}
|
|
216
|
+
// Gemini API — when GOOGLE_AI_API_KEY is set
|
|
217
|
+
if (provider === AIProviderName.GOOGLE_AI && process.env.GOOGLE_AI_API_KEY) {
|
|
218
|
+
return analyzeVideoWithGeminiAPI(frames, options);
|
|
219
|
+
}
|
|
220
|
+
throw new Error("No valid provider configuration found. " +
|
|
221
|
+
"Set GOOGLE_VERTEX_PROJECT for Vertex AI or GOOGLE_AI_API_KEY for Gemini API.");
|
|
222
|
+
}
|
|
@@ -5,4 +5,4 @@ import type { TextGenerationOptions } from "../../lib/types/generateTypes.js";
|
|
|
5
5
|
* This object provides metadata for validation and help text in the CLI loop.
|
|
6
6
|
* It is derived from the main TextGenerationOptions interface to ensure consistency.
|
|
7
7
|
*/
|
|
8
|
-
export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria" | "region" | "csvOptions" | "tts" | "thinkingConfig" | "fileRegistry" | "abortSignal" | "toolFilter" | "excludeTools">, OptionSchema>;
|
|
8
|
+
export declare const textGenerationOptionsSchema: Record<keyof Omit<TextGenerationOptions, "prompt" | "input" | "schema" | "tools" | "context" | "conversationHistory" | "conversationMessages" | "conversationMemoryConfig" | "originalPrompt" | "middleware" | "expectedOutcome" | "evaluationCriteria" | "region" | "csvOptions" | "tts" | "thinkingConfig" | "fileRegistry" | "abortSignal" | "toolFilter" | "excludeTools" | "toolChoice" | "prepareStep">, OptionSchema>;
|
|
@@ -7,6 +7,7 @@ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../
|
|
|
7
7
|
import { shouldDisableBuiltinTools } from "../utils/toolUtils.js";
|
|
8
8
|
import { getKeyCount, getKeysAsString } from "../utils/transformationUtils.js";
|
|
9
9
|
import { TTSProcessor } from "../utils/ttsProcessor.js";
|
|
10
|
+
import { hasVideoFrames, executeVideoAnalysis, } from "../utils/videoAnalysisProcessor.js";
|
|
10
11
|
import { GenerationHandler } from "./modules/GenerationHandler.js";
|
|
11
12
|
// Import modules for composition
|
|
12
13
|
import { MessageBuilder } from "./modules/MessageBuilder.js";
|
|
@@ -473,6 +474,25 @@ export class BaseProvider {
|
|
|
473
474
|
// ===== Normal AI Generation Flow =====
|
|
474
475
|
const { tools, model } = await this.prepareGenerationContext(options);
|
|
475
476
|
const messages = await this.buildMessages(options);
|
|
477
|
+
// ===== VIDEO ANALYSIS FROM MESSAGES CONTENT =====
|
|
478
|
+
// Check if video files are present in messages content array
|
|
479
|
+
// If video analysis is needed, perform it and return early to avoid running generation
|
|
480
|
+
if (hasVideoFrames(messages)) {
|
|
481
|
+
const videoAnalysisResult = await executeVideoAnalysis(messages, {
|
|
482
|
+
provider: options.provider,
|
|
483
|
+
providerName: this.providerName,
|
|
484
|
+
region: options.region,
|
|
485
|
+
model: options.model,
|
|
486
|
+
});
|
|
487
|
+
// Return video analysis result directly without running generation
|
|
488
|
+
const videoResult = {
|
|
489
|
+
content: videoAnalysisResult,
|
|
490
|
+
provider: options.provider ?? this.providerName,
|
|
491
|
+
model: this.modelName,
|
|
492
|
+
usage: { input: 0, output: 0, total: 0 }, // Video analysis doesn't use standard token counting
|
|
493
|
+
};
|
|
494
|
+
return await this.enhanceResult(videoResult, options, startTime);
|
|
495
|
+
}
|
|
476
496
|
// Compose timeout signal with user-provided abort signal (mirrors stream path)
|
|
477
497
|
const timeoutController = createTimeoutController(options.timeout, this.providerName, "generate");
|
|
478
498
|
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
@@ -49,8 +49,12 @@ export class GenerationHandler {
|
|
|
49
49
|
model,
|
|
50
50
|
messages,
|
|
51
51
|
...(shouldUseTools && Object.keys(tools).length > 0 && { tools }),
|
|
52
|
-
maxSteps: options.maxSteps
|
|
53
|
-
...(shouldUseTools &&
|
|
52
|
+
maxSteps: options.maxSteps ?? DEFAULT_MAX_STEPS,
|
|
53
|
+
...(shouldUseTools &&
|
|
54
|
+
options.toolChoice && { toolChoice: options.toolChoice }),
|
|
55
|
+
...(options.prepareStep && {
|
|
56
|
+
experimental_prepareStep: options.prepareStep,
|
|
57
|
+
}),
|
|
54
58
|
temperature: options.temperature,
|
|
55
59
|
maxTokens: options.maxTokens,
|
|
56
60
|
abortSignal: options.abortSignal,
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Handler
|
|
3
|
+
*
|
|
4
|
+
* Provides video analysis using Google's Gemini 2.0 Flash model.
|
|
5
|
+
* Supports both Vertex AI and Gemini API providers.
|
|
6
|
+
*
|
|
7
|
+
* @module adapters/video/geminiVideoAnalyzer
|
|
8
|
+
*/
|
|
9
|
+
import { AIProviderName } from "../../constants/enums.js";
|
|
10
|
+
import type { CoreMessage } from "ai";
|
|
11
|
+
export declare function analyzeVideoWithVertexAI(frames: CoreMessage, options?: {
|
|
12
|
+
project?: string;
|
|
13
|
+
location?: string;
|
|
14
|
+
model?: string;
|
|
15
|
+
}): Promise<string>;
|
|
16
|
+
export declare function analyzeVideoWithGeminiAPI(frames: CoreMessage, options?: {
|
|
17
|
+
apiKey?: string;
|
|
18
|
+
model?: string;
|
|
19
|
+
}): Promise<string>;
|
|
20
|
+
export declare function analyzeVideo(frames: CoreMessage, options?: {
|
|
21
|
+
provider?: AIProviderName;
|
|
22
|
+
project?: string;
|
|
23
|
+
location?: string;
|
|
24
|
+
apiKey?: string;
|
|
25
|
+
model?: string;
|
|
26
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Handler
|
|
3
|
+
*
|
|
4
|
+
* Provides video analysis using Google's Gemini 2.0 Flash model.
|
|
5
|
+
* Supports both Vertex AI and Gemini API providers.
|
|
6
|
+
*
|
|
7
|
+
* @module adapters/video/geminiVideoAnalyzer
|
|
8
|
+
*/
|
|
9
|
+
import { AIProviderName, ErrorSeverity, ErrorCategory, } from "../../constants/enums.js";
|
|
10
|
+
import { logger } from "../../utils/logger.js";
|
|
11
|
+
import { readFile } from "node:fs/promises";
|
|
12
|
+
import { NeuroLinkError } from "../../utils/errorHandling.js";
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Shared config
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
const DEFAULT_MODEL = "gemini-2.0-flash";
|
|
17
|
+
const DEFAULT_LOCATION = "us-central1";
|
|
18
|
+
/**
|
|
19
|
+
* Convert CoreMessage content array to Gemini parts format
|
|
20
|
+
*
|
|
21
|
+
* @param contentArray - Array of content items from CoreMessage
|
|
22
|
+
* @returns Array of parts in Gemini API format
|
|
23
|
+
*/
|
|
24
|
+
function buildContentParts(frames) {
|
|
25
|
+
const contentArray = Array.isArray(frames.content) ? frames.content : [];
|
|
26
|
+
return contentArray.map((item) => {
|
|
27
|
+
if (item.type === "text" && item.text) {
|
|
28
|
+
return { text: item.text };
|
|
29
|
+
}
|
|
30
|
+
else if (item.type === "image" && item.image) {
|
|
31
|
+
let base64Data;
|
|
32
|
+
// Handle Buffer or Uint8Array
|
|
33
|
+
if (Buffer.isBuffer(item.image) || item.image instanceof Uint8Array) {
|
|
34
|
+
base64Data = Buffer.from(item.image).toString("base64");
|
|
35
|
+
}
|
|
36
|
+
else if (typeof item.image === "string") {
|
|
37
|
+
// Strip data URI prefix if present (e.g., "data:image/jpeg;base64,")
|
|
38
|
+
base64Data = item.image.replace(/^data:image\/[a-z]+;base64,/, "");
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
throw new Error(`Invalid image data type: expected string, Buffer, or Uint8Array, got ${typeof item.image}`);
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
inlineData: {
|
|
45
|
+
mimeType: "image/jpeg",
|
|
46
|
+
data: base64Data,
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`Invalid content type: ${item.type}`);
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Configuration for video frame analysis.
|
|
55
|
+
* Generic prompt that handles both general content and technical bug reporting.
|
|
56
|
+
*/
|
|
57
|
+
function buildConfig() {
|
|
58
|
+
return {
|
|
59
|
+
systemInstruction: `You are a Visual Analysis Assistant.
|
|
60
|
+
Your task is to analyze images or video frames provided by the user and extract structured visual features. The user may or may not provide an issue description. Your role is to understand the visual content, optionally correlate it with the provided issue, and produce a structured output that can be directly consumed by another LLM for analysis, debugging, or decision-making.
|
|
61
|
+
|
|
62
|
+
Follow these rules strictly:
|
|
63
|
+
- The analysis must be generic and applicable to any domain (UI, dashboards, video frames, animations, charts, documents, etc.).
|
|
64
|
+
- Support both images and videos (single frame or multiple frames).
|
|
65
|
+
- Extract only what is visually observable; do not assume backend behavior unless supported by visuals.
|
|
66
|
+
- The JSON must be structured, consistent, and machine-readable.
|
|
67
|
+
- Logs are optional and should only be included if explicitly provided.
|
|
68
|
+
- The final output must be clear, concise, and actionable for an LLM.
|
|
69
|
+
|
|
70
|
+
Always produce the output in the following format:
|
|
71
|
+
|
|
72
|
+
Issue:
|
|
73
|
+
<Refined issue description if provided, otherwise a clear description of the observed visual situation>
|
|
74
|
+
|
|
75
|
+
Image/Video Patterns:
|
|
76
|
+
<Structured JSON describing extracted visual features and anomalies>
|
|
77
|
+
|
|
78
|
+
Steps to Reproduce:
|
|
79
|
+
<Ordered steps that reliably reproduce the issue based on the visual context>
|
|
80
|
+
|
|
81
|
+
[Logs: Include ONLY if provided by the user]
|
|
82
|
+
|
|
83
|
+
Proof:
|
|
84
|
+
<Visual evidence explaining how the image/video confirms the issue>
|
|
85
|
+
Ensure the final response is fully self-sufficient and does not reference external context.`,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
// Vertex AI
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
export async function analyzeVideoWithVertexAI(frames, options = {}) {
|
|
92
|
+
const startTime = Date.now();
|
|
93
|
+
const { GoogleGenAI } = await import("@google/genai");
|
|
94
|
+
// Get default config and merge with provided options
|
|
95
|
+
const config = await getVertexConfig();
|
|
96
|
+
const project = options.project ?? config.project;
|
|
97
|
+
const location = options.location ?? config.location;
|
|
98
|
+
const model = options.model || DEFAULT_MODEL;
|
|
99
|
+
// Extract content array from CoreMessage
|
|
100
|
+
const contentArray = Array.isArray(frames.content) ? frames.content : [];
|
|
101
|
+
const frameCount = contentArray.filter((item) => item.type === "image").length;
|
|
102
|
+
logger.debug("[GeminiVideoAnalyzer] Analyzing video with Vertex AI", {
|
|
103
|
+
project,
|
|
104
|
+
location,
|
|
105
|
+
model,
|
|
106
|
+
frameCount,
|
|
107
|
+
});
|
|
108
|
+
const ai = new GoogleGenAI({ vertexai: true, project, location });
|
|
109
|
+
// Convert frames content to parts array for Gemini
|
|
110
|
+
const parts = buildContentParts(frames);
|
|
111
|
+
const response = await ai.models.generateContent({
|
|
112
|
+
model,
|
|
113
|
+
config: buildConfig(),
|
|
114
|
+
contents: [
|
|
115
|
+
{
|
|
116
|
+
role: "user",
|
|
117
|
+
parts,
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
});
|
|
121
|
+
const responseText = response.text || "";
|
|
122
|
+
const processingTime = Date.now() - startTime;
|
|
123
|
+
logger.debug("[GeminiVideoAnalyzer] Vertex response received", {
|
|
124
|
+
responseLength: responseText.length,
|
|
125
|
+
processingTime,
|
|
126
|
+
});
|
|
127
|
+
return responseText;
|
|
128
|
+
}
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
// Gemini API (Google AI)
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
export async function analyzeVideoWithGeminiAPI(frames, options = {}) {
|
|
133
|
+
const startTime = Date.now();
|
|
134
|
+
const { GoogleGenAI } = await import("@google/genai");
|
|
135
|
+
const apiKey = options.apiKey || process.env.GOOGLE_AI_API_KEY;
|
|
136
|
+
const model = options.model || DEFAULT_MODEL;
|
|
137
|
+
if (!apiKey) {
|
|
138
|
+
throw new Error("GOOGLE_AI_API_KEY environment variable is required for Gemini API video analysis");
|
|
139
|
+
}
|
|
140
|
+
// Extract content array from CoreMessage
|
|
141
|
+
const contentArray = Array.isArray(frames.content) ? frames.content : [];
|
|
142
|
+
const frameCount = contentArray.filter((item) => item.type === "image").length;
|
|
143
|
+
logger.debug("[GeminiVideoAnalyzer] Analyzing video with Gemini API", {
|
|
144
|
+
model,
|
|
145
|
+
frameCount,
|
|
146
|
+
});
|
|
147
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
148
|
+
// Convert frames content to parts array for Gemini
|
|
149
|
+
const parts = buildContentParts(frames);
|
|
150
|
+
logger.debug("[GeminiVideoAnalyzer] Generating analysis with frames");
|
|
151
|
+
const response = await ai.models.generateContent({
|
|
152
|
+
model,
|
|
153
|
+
config: buildConfig(),
|
|
154
|
+
contents: [
|
|
155
|
+
{
|
|
156
|
+
role: "user",
|
|
157
|
+
parts,
|
|
158
|
+
},
|
|
159
|
+
],
|
|
160
|
+
});
|
|
161
|
+
const responseText = response.text || "";
|
|
162
|
+
const processingTime = Date.now() - startTime;
|
|
163
|
+
logger.debug("[GeminiVideoAnalyzer] Gemini API response received", {
|
|
164
|
+
responseLength: responseText.length,
|
|
165
|
+
processingTime,
|
|
166
|
+
});
|
|
167
|
+
return responseText;
|
|
168
|
+
}
|
|
169
|
+
async function getVertexConfig() {
|
|
170
|
+
const location = process.env.GOOGLE_VERTEX_LOCATION || DEFAULT_LOCATION;
|
|
171
|
+
// Try environment variables first
|
|
172
|
+
let project = process.env.GOOGLE_VERTEX_PROJECT ||
|
|
173
|
+
process.env.GOOGLE_CLOUD_PROJECT ||
|
|
174
|
+
process.env.GOOGLE_CLOUD_PROJECT_ID ||
|
|
175
|
+
process.env.VERTEX_PROJECT_ID;
|
|
176
|
+
// Fallback: read from ADC credentials file
|
|
177
|
+
if (!project && process.env.GOOGLE_APPLICATION_CREDENTIALS) {
|
|
178
|
+
try {
|
|
179
|
+
const credData = JSON.parse(await readFile(process.env.GOOGLE_APPLICATION_CREDENTIALS, "utf-8"));
|
|
180
|
+
project = credData.quota_project_id || credData.project_id;
|
|
181
|
+
}
|
|
182
|
+
catch (e) {
|
|
183
|
+
// Ignore read errors, will throw below if project still not found
|
|
184
|
+
logger.debug("Failed to read project from credentials file", {
|
|
185
|
+
error: e instanceof Error ? e.message : String(e),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
if (!project) {
|
|
190
|
+
throw new NeuroLinkError({
|
|
191
|
+
code: "PROVIDER_NOT_CONFIGURED",
|
|
192
|
+
message: "Google Cloud project not found. Set GOOGLE_VERTEX_PROJECT or GOOGLE_CLOUD_PROJECT environment variable, or ensure ADC credentials contain project_id",
|
|
193
|
+
category: ErrorCategory.CONFIGURATION,
|
|
194
|
+
severity: ErrorSeverity.HIGH,
|
|
195
|
+
retriable: false,
|
|
196
|
+
context: {
|
|
197
|
+
missingVar: "GOOGLE_VERTEX_PROJECT",
|
|
198
|
+
feature: "video-generation",
|
|
199
|
+
checkedEnvVars: [
|
|
200
|
+
"GOOGLE_VERTEX_PROJECT",
|
|
201
|
+
"GOOGLE_CLOUD_PROJECT",
|
|
202
|
+
"GOOGLE_CLOUD_PROJECT_ID",
|
|
203
|
+
"VERTEX_PROJECT_ID",
|
|
204
|
+
],
|
|
205
|
+
},
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
return { project, location };
|
|
209
|
+
}
|
|
210
|
+
export async function analyzeVideo(frames, options = {}) {
|
|
211
|
+
const provider = options.provider || AIProviderName.AUTO;
|
|
212
|
+
// Vertex — only when GOOGLE_VERTEX_PROJECT is explicitly set
|
|
213
|
+
if (provider === AIProviderName.VERTEX || provider === AIProviderName.AUTO) {
|
|
214
|
+
return analyzeVideoWithVertexAI(frames, options);
|
|
215
|
+
}
|
|
216
|
+
// Gemini API — when GOOGLE_AI_API_KEY is set
|
|
217
|
+
if (provider === AIProviderName.GOOGLE_AI && process.env.GOOGLE_AI_API_KEY) {
|
|
218
|
+
return analyzeVideoWithGeminiAPI(frames, options);
|
|
219
|
+
}
|
|
220
|
+
throw new Error("No valid provider configuration found. " +
|
|
221
|
+
"Set GOOGLE_VERTEX_PROJECT for Vertex AI or GOOGLE_AI_API_KEY for Gemini API.");
|
|
222
|
+
}
|
|
223
|
+
//# sourceMappingURL=videoAnalyzer.js.map
|
|
@@ -7,6 +7,7 @@ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../
|
|
|
7
7
|
import { shouldDisableBuiltinTools } from "../utils/toolUtils.js";
|
|
8
8
|
import { getKeyCount, getKeysAsString } from "../utils/transformationUtils.js";
|
|
9
9
|
import { TTSProcessor } from "../utils/ttsProcessor.js";
|
|
10
|
+
import { hasVideoFrames, executeVideoAnalysis, } from "../utils/videoAnalysisProcessor.js";
|
|
10
11
|
import { GenerationHandler } from "./modules/GenerationHandler.js";
|
|
11
12
|
// Import modules for composition
|
|
12
13
|
import { MessageBuilder } from "./modules/MessageBuilder.js";
|
|
@@ -473,6 +474,25 @@ export class BaseProvider {
|
|
|
473
474
|
// ===== Normal AI Generation Flow =====
|
|
474
475
|
const { tools, model } = await this.prepareGenerationContext(options);
|
|
475
476
|
const messages = await this.buildMessages(options);
|
|
477
|
+
// ===== VIDEO ANALYSIS FROM MESSAGES CONTENT =====
|
|
478
|
+
// Check if video files are present in messages content array
|
|
479
|
+
// If video analysis is needed, perform it and return early to avoid running generation
|
|
480
|
+
if (hasVideoFrames(messages)) {
|
|
481
|
+
const videoAnalysisResult = await executeVideoAnalysis(messages, {
|
|
482
|
+
provider: options.provider,
|
|
483
|
+
providerName: this.providerName,
|
|
484
|
+
region: options.region,
|
|
485
|
+
model: options.model,
|
|
486
|
+
});
|
|
487
|
+
// Return video analysis result directly without running generation
|
|
488
|
+
const videoResult = {
|
|
489
|
+
content: videoAnalysisResult,
|
|
490
|
+
provider: options.provider ?? this.providerName,
|
|
491
|
+
model: this.modelName,
|
|
492
|
+
usage: { input: 0, output: 0, total: 0 }, // Video analysis doesn't use standard token counting
|
|
493
|
+
};
|
|
494
|
+
return await this.enhanceResult(videoResult, options, startTime);
|
|
495
|
+
}
|
|
476
496
|
// Compose timeout signal with user-provided abort signal (mirrors stream path)
|
|
477
497
|
const timeoutController = createTimeoutController(options.timeout, this.providerName, "generate");
|
|
478
498
|
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
@@ -49,8 +49,12 @@ export class GenerationHandler {
|
|
|
49
49
|
model,
|
|
50
50
|
messages,
|
|
51
51
|
...(shouldUseTools && Object.keys(tools).length > 0 && { tools }),
|
|
52
|
-
maxSteps: options.maxSteps
|
|
53
|
-
...(shouldUseTools &&
|
|
52
|
+
maxSteps: options.maxSteps ?? DEFAULT_MAX_STEPS,
|
|
53
|
+
...(shouldUseTools &&
|
|
54
|
+
options.toolChoice && { toolChoice: options.toolChoice }),
|
|
55
|
+
...(options.prepareStep && {
|
|
56
|
+
experimental_prepareStep: options.prepareStep,
|
|
57
|
+
}),
|
|
54
58
|
temperature: options.temperature,
|
|
55
59
|
maxTokens: options.maxTokens,
|
|
56
60
|
abortSignal: options.abortSignal,
|
package/dist/lib/neurolink.js
CHANGED
|
@@ -1570,6 +1570,9 @@ Current user's request: ${currentInput}`;
|
|
|
1570
1570
|
disableTools: options.disableTools,
|
|
1571
1571
|
toolFilter: options.toolFilter,
|
|
1572
1572
|
excludeTools: options.excludeTools,
|
|
1573
|
+
maxSteps: options.maxSteps,
|
|
1574
|
+
toolChoice: options.toolChoice,
|
|
1575
|
+
prepareStep: options.prepareStep,
|
|
1573
1576
|
enableAnalytics: options.enableAnalytics,
|
|
1574
1577
|
enableEvaluation: options.enableEvaluation,
|
|
1575
1578
|
context: options.context,
|
|
@@ -529,7 +529,7 @@ export class VideoProcessor extends BaseFileProcessor {
|
|
|
529
529
|
// Extract frames using ffmpeg
|
|
530
530
|
const framesDir = join(tempDir, "frames");
|
|
531
531
|
await fs.mkdir(framesDir, { recursive: true });
|
|
532
|
-
await this.runFfmpegFrameExtraction(videoPath, framesDir, timestamps);
|
|
532
|
+
await this.runFfmpegFrameExtraction(videoPath, framesDir, timestamps, intervalSec);
|
|
533
533
|
// Read extracted frames and resize with sharp
|
|
534
534
|
const keyframes = [];
|
|
535
535
|
for (let i = 0; i < timestamps.length; i++) {
|
|
@@ -563,15 +563,11 @@ export class VideoProcessor extends BaseFileProcessor {
|
|
|
563
563
|
* @param outputDir - Directory to write frame files
|
|
564
564
|
* @param timestamps - Array of timestamps in seconds
|
|
565
565
|
*/
|
|
566
|
-
runFfmpegFrameExtraction(videoPath, outputDir, timestamps) {
|
|
566
|
+
runFfmpegFrameExtraction(videoPath, outputDir, timestamps, intervalSec) {
|
|
567
567
|
return new Promise((resolve, reject) => {
|
|
568
|
-
//
|
|
569
|
-
//
|
|
570
|
-
|
|
571
|
-
// Build timestamp-based filter
|
|
572
|
-
const selectExpr = timestamps
|
|
573
|
-
.map((t) => `gte(t\\,${t})*lt(t\\,${t + 0.5})`)
|
|
574
|
-
.join("+");
|
|
568
|
+
// Improved select expression to pick exactly one frame per interval
|
|
569
|
+
// instead of multiple frames within a 0.5s window.
|
|
570
|
+
const selectExpr = `isnan(prev_selected_t)+gte(t-prev_selected_t,${intervalSec}-0.001)`;
|
|
575
571
|
const timeoutId = setTimeout(() => {
|
|
576
572
|
reject(new Error(`ffmpeg frame extraction timed out after ${VIDEO_CONFIG.FFMPEG_TIMEOUT_MS}ms`));
|
|
577
573
|
}, VIDEO_CONFIG.FFMPEG_TIMEOUT_MS);
|
|
@@ -861,19 +857,20 @@ export class VideoProcessor extends BaseFileProcessor {
|
|
|
861
857
|
}
|
|
862
858
|
const clampedCount = Math.min(frameCount, VIDEO_CONFIG.MAX_FRAMES);
|
|
863
859
|
const timestamps = [];
|
|
860
|
+
let interval = duration;
|
|
864
861
|
if (clampedCount === 1) {
|
|
865
862
|
timestamps.push(startSec);
|
|
866
863
|
}
|
|
867
864
|
else {
|
|
868
|
-
|
|
865
|
+
interval = duration / (clampedCount - 1);
|
|
869
866
|
for (let i = 0; i < clampedCount; i++) {
|
|
870
|
-
timestamps.push(startSec +
|
|
867
|
+
timestamps.push(startSec + interval * i);
|
|
871
868
|
}
|
|
872
869
|
}
|
|
873
870
|
// Extract frames
|
|
874
871
|
const framesDir = join(tempDir, "frames");
|
|
875
872
|
await fs.mkdir(framesDir, { recursive: true });
|
|
876
|
-
await this.runFfmpegFrameExtraction(tempVideoPath, framesDir, timestamps);
|
|
873
|
+
await this.runFfmpegFrameExtraction(tempVideoPath, framesDir, timestamps, interval);
|
|
877
874
|
// Read and resize frames
|
|
878
875
|
const keyframes = [];
|
|
879
876
|
for (let i = 0; i < timestamps.length; i++) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Schema, Tool } from "ai";
|
|
1
|
+
import type { Schema, Tool, ToolChoice, StepResult, LanguageModel } from "ai";
|
|
2
2
|
import type { AIProviderName } from "../constants/enums.js";
|
|
3
3
|
import type { RAGConfig } from "../rag/types.js";
|
|
4
4
|
import type { AnalyticsData, TokenUsage } from "./analytics.js";
|
|
@@ -248,6 +248,55 @@ export type GenerateOptions = {
|
|
|
248
248
|
* Default: false (backward compatible — tool schemas are injected into system prompt).
|
|
249
249
|
*/
|
|
250
250
|
skipToolPromptInjection?: boolean;
|
|
251
|
+
/** Maximum number of tool execution steps (default: 200) */
|
|
252
|
+
maxSteps?: number;
|
|
253
|
+
/**
|
|
254
|
+
* Tool choice configuration for the generation.
|
|
255
|
+
* Controls whether and which tools the model must call.
|
|
256
|
+
*
|
|
257
|
+
* - `"auto"` (default): the model can choose whether and which tools to call
|
|
258
|
+
* - `"none"`: no tool calls allowed
|
|
259
|
+
* - `"required"`: the model must call at least one tool
|
|
260
|
+
* - `{ type: "tool", toolName: string }`: the model must call the specified tool
|
|
261
|
+
*
|
|
262
|
+
* Note: When used without `prepareStep`, this applies to **every step** in the
|
|
263
|
+
* `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
|
|
264
|
+
* will cause infinite tool calls until `maxSteps` is exhausted.
|
|
265
|
+
*/
|
|
266
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
267
|
+
/**
|
|
268
|
+
* Optional callback that runs before each step in a multi-step generation.
|
|
269
|
+
* Allows dynamically changing `toolChoice` and available tools per step.
|
|
270
|
+
*
|
|
271
|
+
* This is the recommended way to enforce specific tool calls on certain steps
|
|
272
|
+
* while allowing the model freedom on others.
|
|
273
|
+
*
|
|
274
|
+
* Maps to Vercel AI SDK's `experimental_prepareStep`.
|
|
275
|
+
*
|
|
276
|
+
* @example Force a specific tool on step 0, then switch to auto:
|
|
277
|
+
* ```typescript
|
|
278
|
+
* prepareStep: ({ stepNumber, steps }) => {
|
|
279
|
+
* if (stepNumber === 0) {
|
|
280
|
+
* return {
|
|
281
|
+
* toolChoice: { type: 'tool', toolName: 'myTool' }
|
|
282
|
+
* };
|
|
283
|
+
* }
|
|
284
|
+
* return { toolChoice: 'auto' };
|
|
285
|
+
* }
|
|
286
|
+
* ```
|
|
287
|
+
*
|
|
288
|
+
* @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
|
|
289
|
+
*/
|
|
290
|
+
prepareStep?: (options: {
|
|
291
|
+
steps: StepResult<Record<string, Tool>>[];
|
|
292
|
+
stepNumber: number;
|
|
293
|
+
maxSteps: number;
|
|
294
|
+
model: LanguageModel;
|
|
295
|
+
}) => PromiseLike<{
|
|
296
|
+
model?: LanguageModel;
|
|
297
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
298
|
+
experimental_activeTools?: string[];
|
|
299
|
+
} | undefined>;
|
|
251
300
|
enableEvaluation?: boolean;
|
|
252
301
|
enableAnalytics?: boolean;
|
|
253
302
|
context?: StandardRecord;
|
|
@@ -521,6 +570,7 @@ export type TextGenerationOptions = {
|
|
|
521
570
|
*/
|
|
522
571
|
images?: Array<Buffer | string | import("./content.js").ImageWithAltText>;
|
|
523
572
|
pdfFiles?: Array<Buffer | string>;
|
|
573
|
+
files?: Array<Buffer | string | import("./fileTypes.js").FileWithMetadata>;
|
|
524
574
|
};
|
|
525
575
|
provider?: AIProviderName;
|
|
526
576
|
model?: string;
|
|
@@ -568,6 +618,53 @@ export type TextGenerationOptions = {
|
|
|
568
618
|
toolFilter?: string[];
|
|
569
619
|
/** Exclude these tools by name (blacklist). Applied after toolFilter. */
|
|
570
620
|
excludeTools?: string[];
|
|
621
|
+
/**
|
|
622
|
+
* Tool choice configuration for the generation.
|
|
623
|
+
* Controls whether and which tools the model must call.
|
|
624
|
+
*
|
|
625
|
+
* - `"auto"` (default): the model can choose whether and which tools to call
|
|
626
|
+
* - `"none"`: no tool calls allowed
|
|
627
|
+
* - `"required"`: the model must call at least one tool
|
|
628
|
+
* - `{ type: "tool", toolName: string }`: the model must call the specified tool
|
|
629
|
+
*
|
|
630
|
+
* Note: When used without `prepareStep`, this applies to **every step** in the
|
|
631
|
+
* `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
|
|
632
|
+
* will cause infinite tool calls until `maxSteps` is exhausted.
|
|
633
|
+
*/
|
|
634
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
635
|
+
/**
|
|
636
|
+
* Optional callback that runs before each step in a multi-step generation.
|
|
637
|
+
* Allows dynamically changing `toolChoice` and available tools per step.
|
|
638
|
+
*
|
|
639
|
+
* This is the recommended way to enforce specific tool calls on certain steps
|
|
640
|
+
* while allowing the model freedom on others.
|
|
641
|
+
*
|
|
642
|
+
* Maps to Vercel AI SDK's `experimental_prepareStep`.
|
|
643
|
+
*
|
|
644
|
+
* @example Force a specific tool on step 0, then switch to auto:
|
|
645
|
+
* ```typescript
|
|
646
|
+
* prepareStep: ({ stepNumber, steps }) => {
|
|
647
|
+
* if (stepNumber === 0) {
|
|
648
|
+
* return {
|
|
649
|
+
* toolChoice: { type: 'tool', toolName: 'myTool' }
|
|
650
|
+
* };
|
|
651
|
+
* }
|
|
652
|
+
* return { toolChoice: 'auto' };
|
|
653
|
+
* }
|
|
654
|
+
* ```
|
|
655
|
+
*
|
|
656
|
+
* @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
|
|
657
|
+
*/
|
|
658
|
+
prepareStep?: (options: {
|
|
659
|
+
steps: StepResult<Record<string, Tool>>[];
|
|
660
|
+
stepNumber: number;
|
|
661
|
+
maxSteps: number;
|
|
662
|
+
model: LanguageModel;
|
|
663
|
+
}) => PromiseLike<{
|
|
664
|
+
model?: LanguageModel;
|
|
665
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
666
|
+
experimental_activeTools?: string[];
|
|
667
|
+
} | undefined>;
|
|
571
668
|
/**
|
|
572
669
|
* Text-to-Speech (TTS) configuration
|
|
573
670
|
*
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Processor
|
|
3
|
+
*
|
|
4
|
+
* Formats video analysis results into human-readable text
|
|
5
|
+
*
|
|
6
|
+
* @module utils/videoAnalysisProcessor
|
|
7
|
+
*/
|
|
8
|
+
import type { CoreMessage } from "ai";
|
|
9
|
+
import { AIProviderName } from "../constants/enums.js";
|
|
10
|
+
/**
|
|
11
|
+
* Check if messages contain video frames (images)
|
|
12
|
+
*
|
|
13
|
+
* @param messages - Array of CoreMessage objects
|
|
14
|
+
* @returns true if video frames are present
|
|
15
|
+
*/
|
|
16
|
+
export declare function hasVideoFrames(messages: CoreMessage[]): boolean;
|
|
17
|
+
/**
|
|
18
|
+
* Execute video analysis on messages containing video frames
|
|
19
|
+
*
|
|
20
|
+
* @param messages - Array of CoreMessage objects with video frames
|
|
21
|
+
* @param options - Video analysis options
|
|
22
|
+
* @returns Video analysis text result
|
|
23
|
+
* @throws Error if analysis fails
|
|
24
|
+
*/
|
|
25
|
+
export declare function executeVideoAnalysis(messages: CoreMessage[], options: {
|
|
26
|
+
provider?: AIProviderName | string;
|
|
27
|
+
providerName?: AIProviderName;
|
|
28
|
+
region?: string;
|
|
29
|
+
model?: string;
|
|
30
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Processor
|
|
3
|
+
*
|
|
4
|
+
* Formats video analysis results into human-readable text
|
|
5
|
+
*
|
|
6
|
+
* @module utils/videoAnalysisProcessor
|
|
7
|
+
*/
|
|
8
|
+
import { AIProviderName } from "../constants/enums.js";
|
|
9
|
+
import { logger } from "./logger.js";
|
|
10
|
+
/**
|
|
11
|
+
* Check if messages contain video frames (images)
|
|
12
|
+
*
|
|
13
|
+
* @param messages - Array of CoreMessage objects
|
|
14
|
+
* @returns true if video frames are present
|
|
15
|
+
*/
|
|
16
|
+
export function hasVideoFrames(messages) {
|
|
17
|
+
return messages.some((msg) => {
|
|
18
|
+
if (Array.isArray(msg.content)) {
|
|
19
|
+
return msg.content.some((part) => typeof part === "object" &&
|
|
20
|
+
part !== null &&
|
|
21
|
+
"type" in part &&
|
|
22
|
+
part.type === "image");
|
|
23
|
+
}
|
|
24
|
+
return false;
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Execute video analysis on messages containing video frames
|
|
29
|
+
*
|
|
30
|
+
* @param messages - Array of CoreMessage objects with video frames
|
|
31
|
+
* @param options - Video analysis options
|
|
32
|
+
* @returns Video analysis text result
|
|
33
|
+
* @throws Error if analysis fails
|
|
34
|
+
*/
|
|
35
|
+
export async function executeVideoAnalysis(messages, options) {
|
|
36
|
+
logger.debug("[VideoAnalysisProcessor] Video frames detected, triggering analysis");
|
|
37
|
+
const { analyzeVideo } = await import("../adapters/video/videoAnalyzer.js");
|
|
38
|
+
const provider = options.provider === AIProviderName.GOOGLE_AI ||
|
|
39
|
+
(options.provider === AIProviderName.AUTO && process.env.GOOGLE_AI_API_KEY)
|
|
40
|
+
? AIProviderName.GOOGLE_AI
|
|
41
|
+
: options.provider === AIProviderName.VERTEX ||
|
|
42
|
+
options.providerName === AIProviderName.VERTEX
|
|
43
|
+
? AIProviderName.VERTEX
|
|
44
|
+
: AIProviderName.AUTO;
|
|
45
|
+
const videoAnalysisText = await analyzeVideo(messages[0], {
|
|
46
|
+
provider: provider,
|
|
47
|
+
project: options.region
|
|
48
|
+
? undefined
|
|
49
|
+
: process.env.GOOGLE_VERTEX_PROJECT || process.env.GOOGLE_CLOUD_PROJECT,
|
|
50
|
+
location: options.region || process.env.GOOGLE_VERTEX_LOCATION,
|
|
51
|
+
model: options.model || "gemini-2.0-flash",
|
|
52
|
+
});
|
|
53
|
+
logger.debug("[VideoAnalysisProcessor] Video analysis completed", {
|
|
54
|
+
hasResult: !!videoAnalysisText,
|
|
55
|
+
resultLength: videoAnalysisText?.length,
|
|
56
|
+
});
|
|
57
|
+
return videoAnalysisText;
|
|
58
|
+
}
|
|
59
|
+
//# sourceMappingURL=videoAnalysisProcessor.js.map
|
package/dist/neurolink.js
CHANGED
|
@@ -1570,6 +1570,9 @@ Current user's request: ${currentInput}`;
|
|
|
1570
1570
|
disableTools: options.disableTools,
|
|
1571
1571
|
toolFilter: options.toolFilter,
|
|
1572
1572
|
excludeTools: options.excludeTools,
|
|
1573
|
+
maxSteps: options.maxSteps,
|
|
1574
|
+
toolChoice: options.toolChoice,
|
|
1575
|
+
prepareStep: options.prepareStep,
|
|
1573
1576
|
enableAnalytics: options.enableAnalytics,
|
|
1574
1577
|
enableEvaluation: options.enableEvaluation,
|
|
1575
1578
|
context: options.context,
|
|
@@ -529,7 +529,7 @@ export class VideoProcessor extends BaseFileProcessor {
|
|
|
529
529
|
// Extract frames using ffmpeg
|
|
530
530
|
const framesDir = join(tempDir, "frames");
|
|
531
531
|
await fs.mkdir(framesDir, { recursive: true });
|
|
532
|
-
await this.runFfmpegFrameExtraction(videoPath, framesDir, timestamps);
|
|
532
|
+
await this.runFfmpegFrameExtraction(videoPath, framesDir, timestamps, intervalSec);
|
|
533
533
|
// Read extracted frames and resize with sharp
|
|
534
534
|
const keyframes = [];
|
|
535
535
|
for (let i = 0; i < timestamps.length; i++) {
|
|
@@ -563,15 +563,11 @@ export class VideoProcessor extends BaseFileProcessor {
|
|
|
563
563
|
* @param outputDir - Directory to write frame files
|
|
564
564
|
* @param timestamps - Array of timestamps in seconds
|
|
565
565
|
*/
|
|
566
|
-
runFfmpegFrameExtraction(videoPath, outputDir, timestamps) {
|
|
566
|
+
runFfmpegFrameExtraction(videoPath, outputDir, timestamps, intervalSec) {
|
|
567
567
|
return new Promise((resolve, reject) => {
|
|
568
|
-
//
|
|
569
|
-
//
|
|
570
|
-
|
|
571
|
-
// Build timestamp-based filter
|
|
572
|
-
const selectExpr = timestamps
|
|
573
|
-
.map((t) => `gte(t\\,${t})*lt(t\\,${t + 0.5})`)
|
|
574
|
-
.join("+");
|
|
568
|
+
// Improved select expression to pick exactly one frame per interval
|
|
569
|
+
// instead of multiple frames within a 0.5s window.
|
|
570
|
+
const selectExpr = `isnan(prev_selected_t)+gte(t-prev_selected_t,${intervalSec}-0.001)`;
|
|
575
571
|
const timeoutId = setTimeout(() => {
|
|
576
572
|
reject(new Error(`ffmpeg frame extraction timed out after ${VIDEO_CONFIG.FFMPEG_TIMEOUT_MS}ms`));
|
|
577
573
|
}, VIDEO_CONFIG.FFMPEG_TIMEOUT_MS);
|
|
@@ -861,19 +857,20 @@ export class VideoProcessor extends BaseFileProcessor {
|
|
|
861
857
|
}
|
|
862
858
|
const clampedCount = Math.min(frameCount, VIDEO_CONFIG.MAX_FRAMES);
|
|
863
859
|
const timestamps = [];
|
|
860
|
+
let interval = duration;
|
|
864
861
|
if (clampedCount === 1) {
|
|
865
862
|
timestamps.push(startSec);
|
|
866
863
|
}
|
|
867
864
|
else {
|
|
868
|
-
|
|
865
|
+
interval = duration / (clampedCount - 1);
|
|
869
866
|
for (let i = 0; i < clampedCount; i++) {
|
|
870
|
-
timestamps.push(startSec +
|
|
867
|
+
timestamps.push(startSec + interval * i);
|
|
871
868
|
}
|
|
872
869
|
}
|
|
873
870
|
// Extract frames
|
|
874
871
|
const framesDir = join(tempDir, "frames");
|
|
875
872
|
await fs.mkdir(framesDir, { recursive: true });
|
|
876
|
-
await this.runFfmpegFrameExtraction(tempVideoPath, framesDir, timestamps);
|
|
873
|
+
await this.runFfmpegFrameExtraction(tempVideoPath, framesDir, timestamps, interval);
|
|
877
874
|
// Read and resize frames
|
|
878
875
|
const keyframes = [];
|
|
879
876
|
for (let i = 0; i < timestamps.length; i++) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Schema, Tool } from "ai";
|
|
1
|
+
import type { Schema, Tool, ToolChoice, StepResult, LanguageModel } from "ai";
|
|
2
2
|
import type { AIProviderName } from "../constants/enums.js";
|
|
3
3
|
import type { RAGConfig } from "../rag/types.js";
|
|
4
4
|
import type { AnalyticsData, TokenUsage } from "./analytics.js";
|
|
@@ -248,6 +248,55 @@ export type GenerateOptions = {
|
|
|
248
248
|
* Default: false (backward compatible — tool schemas are injected into system prompt).
|
|
249
249
|
*/
|
|
250
250
|
skipToolPromptInjection?: boolean;
|
|
251
|
+
/** Maximum number of tool execution steps (default: 200) */
|
|
252
|
+
maxSteps?: number;
|
|
253
|
+
/**
|
|
254
|
+
* Tool choice configuration for the generation.
|
|
255
|
+
* Controls whether and which tools the model must call.
|
|
256
|
+
*
|
|
257
|
+
* - `"auto"` (default): the model can choose whether and which tools to call
|
|
258
|
+
* - `"none"`: no tool calls allowed
|
|
259
|
+
* - `"required"`: the model must call at least one tool
|
|
260
|
+
* - `{ type: "tool", toolName: string }`: the model must call the specified tool
|
|
261
|
+
*
|
|
262
|
+
* Note: When used without `prepareStep`, this applies to **every step** in the
|
|
263
|
+
* `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
|
|
264
|
+
* will cause infinite tool calls until `maxSteps` is exhausted.
|
|
265
|
+
*/
|
|
266
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
267
|
+
/**
|
|
268
|
+
* Optional callback that runs before each step in a multi-step generation.
|
|
269
|
+
* Allows dynamically changing `toolChoice` and available tools per step.
|
|
270
|
+
*
|
|
271
|
+
* This is the recommended way to enforce specific tool calls on certain steps
|
|
272
|
+
* while allowing the model freedom on others.
|
|
273
|
+
*
|
|
274
|
+
* Maps to Vercel AI SDK's `experimental_prepareStep`.
|
|
275
|
+
*
|
|
276
|
+
* @example Force a specific tool on step 0, then switch to auto:
|
|
277
|
+
* ```typescript
|
|
278
|
+
* prepareStep: ({ stepNumber, steps }) => {
|
|
279
|
+
* if (stepNumber === 0) {
|
|
280
|
+
* return {
|
|
281
|
+
* toolChoice: { type: 'tool', toolName: 'myTool' }
|
|
282
|
+
* };
|
|
283
|
+
* }
|
|
284
|
+
* return { toolChoice: 'auto' };
|
|
285
|
+
* }
|
|
286
|
+
* ```
|
|
287
|
+
*
|
|
288
|
+
* @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
|
|
289
|
+
*/
|
|
290
|
+
prepareStep?: (options: {
|
|
291
|
+
steps: StepResult<Record<string, Tool>>[];
|
|
292
|
+
stepNumber: number;
|
|
293
|
+
maxSteps: number;
|
|
294
|
+
model: LanguageModel;
|
|
295
|
+
}) => PromiseLike<{
|
|
296
|
+
model?: LanguageModel;
|
|
297
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
298
|
+
experimental_activeTools?: string[];
|
|
299
|
+
} | undefined>;
|
|
251
300
|
enableEvaluation?: boolean;
|
|
252
301
|
enableAnalytics?: boolean;
|
|
253
302
|
context?: StandardRecord;
|
|
@@ -521,6 +570,7 @@ export type TextGenerationOptions = {
|
|
|
521
570
|
*/
|
|
522
571
|
images?: Array<Buffer | string | import("./content.js").ImageWithAltText>;
|
|
523
572
|
pdfFiles?: Array<Buffer | string>;
|
|
573
|
+
files?: Array<Buffer | string | import("./fileTypes.js").FileWithMetadata>;
|
|
524
574
|
};
|
|
525
575
|
provider?: AIProviderName;
|
|
526
576
|
model?: string;
|
|
@@ -568,6 +618,53 @@ export type TextGenerationOptions = {
|
|
|
568
618
|
toolFilter?: string[];
|
|
569
619
|
/** Exclude these tools by name (blacklist). Applied after toolFilter. */
|
|
570
620
|
excludeTools?: string[];
|
|
621
|
+
/**
|
|
622
|
+
* Tool choice configuration for the generation.
|
|
623
|
+
* Controls whether and which tools the model must call.
|
|
624
|
+
*
|
|
625
|
+
* - `"auto"` (default): the model can choose whether and which tools to call
|
|
626
|
+
* - `"none"`: no tool calls allowed
|
|
627
|
+
* - `"required"`: the model must call at least one tool
|
|
628
|
+
* - `{ type: "tool", toolName: string }`: the model must call the specified tool
|
|
629
|
+
*
|
|
630
|
+
* Note: When used without `prepareStep`, this applies to **every step** in the
|
|
631
|
+
* `maxSteps` loop. Using `"required"` or `{ type: "tool" }` without `prepareStep`
|
|
632
|
+
* will cause infinite tool calls until `maxSteps` is exhausted.
|
|
633
|
+
*/
|
|
634
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
635
|
+
/**
|
|
636
|
+
* Optional callback that runs before each step in a multi-step generation.
|
|
637
|
+
* Allows dynamically changing `toolChoice` and available tools per step.
|
|
638
|
+
*
|
|
639
|
+
* This is the recommended way to enforce specific tool calls on certain steps
|
|
640
|
+
* while allowing the model freedom on others.
|
|
641
|
+
*
|
|
642
|
+
* Maps to Vercel AI SDK's `experimental_prepareStep`.
|
|
643
|
+
*
|
|
644
|
+
* @example Force a specific tool on step 0, then switch to auto:
|
|
645
|
+
* ```typescript
|
|
646
|
+
* prepareStep: ({ stepNumber, steps }) => {
|
|
647
|
+
* if (stepNumber === 0) {
|
|
648
|
+
* return {
|
|
649
|
+
* toolChoice: { type: 'tool', toolName: 'myTool' }
|
|
650
|
+
* };
|
|
651
|
+
* }
|
|
652
|
+
* return { toolChoice: 'auto' };
|
|
653
|
+
* }
|
|
654
|
+
* ```
|
|
655
|
+
*
|
|
656
|
+
* @see https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#parameters
|
|
657
|
+
*/
|
|
658
|
+
prepareStep?: (options: {
|
|
659
|
+
steps: StepResult<Record<string, Tool>>[];
|
|
660
|
+
stepNumber: number;
|
|
661
|
+
maxSteps: number;
|
|
662
|
+
model: LanguageModel;
|
|
663
|
+
}) => PromiseLike<{
|
|
664
|
+
model?: LanguageModel;
|
|
665
|
+
toolChoice?: ToolChoice<Record<string, Tool>>;
|
|
666
|
+
experimental_activeTools?: string[];
|
|
667
|
+
} | undefined>;
|
|
571
668
|
/**
|
|
572
669
|
* Text-to-Speech (TTS) configuration
|
|
573
670
|
*
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Processor
|
|
3
|
+
*
|
|
4
|
+
* Formats video analysis results into human-readable text
|
|
5
|
+
*
|
|
6
|
+
* @module utils/videoAnalysisProcessor
|
|
7
|
+
*/
|
|
8
|
+
import type { CoreMessage } from "ai";
|
|
9
|
+
import { AIProviderName } from "../constants/enums.js";
|
|
10
|
+
/**
|
|
11
|
+
* Check if messages contain video frames (images)
|
|
12
|
+
*
|
|
13
|
+
* @param messages - Array of CoreMessage objects
|
|
14
|
+
* @returns true if video frames are present
|
|
15
|
+
*/
|
|
16
|
+
export declare function hasVideoFrames(messages: CoreMessage[]): boolean;
|
|
17
|
+
/**
|
|
18
|
+
* Execute video analysis on messages containing video frames
|
|
19
|
+
*
|
|
20
|
+
* @param messages - Array of CoreMessage objects with video frames
|
|
21
|
+
* @param options - Video analysis options
|
|
22
|
+
* @returns Video analysis text result
|
|
23
|
+
* @throws Error if analysis fails
|
|
24
|
+
*/
|
|
25
|
+
export declare function executeVideoAnalysis(messages: CoreMessage[], options: {
|
|
26
|
+
provider?: AIProviderName | string;
|
|
27
|
+
providerName?: AIProviderName;
|
|
28
|
+
region?: string;
|
|
29
|
+
model?: string;
|
|
30
|
+
}): Promise<string>;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Analysis Processor
|
|
3
|
+
*
|
|
4
|
+
* Formats video analysis results into human-readable text
|
|
5
|
+
*
|
|
6
|
+
* @module utils/videoAnalysisProcessor
|
|
7
|
+
*/
|
|
8
|
+
import { AIProviderName } from "../constants/enums.js";
|
|
9
|
+
import { logger } from "./logger.js";
|
|
10
|
+
/**
|
|
11
|
+
* Check if messages contain video frames (images)
|
|
12
|
+
*
|
|
13
|
+
* @param messages - Array of CoreMessage objects
|
|
14
|
+
* @returns true if video frames are present
|
|
15
|
+
*/
|
|
16
|
+
export function hasVideoFrames(messages) {
|
|
17
|
+
return messages.some((msg) => {
|
|
18
|
+
if (Array.isArray(msg.content)) {
|
|
19
|
+
return msg.content.some((part) => typeof part === "object" &&
|
|
20
|
+
part !== null &&
|
|
21
|
+
"type" in part &&
|
|
22
|
+
part.type === "image");
|
|
23
|
+
}
|
|
24
|
+
return false;
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Execute video analysis on messages containing video frames
|
|
29
|
+
*
|
|
30
|
+
* @param messages - Array of CoreMessage objects with video frames
|
|
31
|
+
* @param options - Video analysis options
|
|
32
|
+
* @returns Video analysis text result
|
|
33
|
+
* @throws Error if analysis fails
|
|
34
|
+
*/
|
|
35
|
+
export async function executeVideoAnalysis(messages, options) {
|
|
36
|
+
logger.debug("[VideoAnalysisProcessor] Video frames detected, triggering analysis");
|
|
37
|
+
const { analyzeVideo } = await import("../adapters/video/videoAnalyzer.js");
|
|
38
|
+
const provider = options.provider === AIProviderName.GOOGLE_AI ||
|
|
39
|
+
(options.provider === AIProviderName.AUTO && process.env.GOOGLE_AI_API_KEY)
|
|
40
|
+
? AIProviderName.GOOGLE_AI
|
|
41
|
+
: options.provider === AIProviderName.VERTEX ||
|
|
42
|
+
options.providerName === AIProviderName.VERTEX
|
|
43
|
+
? AIProviderName.VERTEX
|
|
44
|
+
: AIProviderName.AUTO;
|
|
45
|
+
const videoAnalysisText = await analyzeVideo(messages[0], {
|
|
46
|
+
provider: provider,
|
|
47
|
+
project: options.region
|
|
48
|
+
? undefined
|
|
49
|
+
: process.env.GOOGLE_VERTEX_PROJECT || process.env.GOOGLE_CLOUD_PROJECT,
|
|
50
|
+
location: options.region || process.env.GOOGLE_VERTEX_LOCATION,
|
|
51
|
+
model: options.model || "gemini-2.0-flash",
|
|
52
|
+
});
|
|
53
|
+
logger.debug("[VideoAnalysisProcessor] Video analysis completed", {
|
|
54
|
+
hasResult: !!videoAnalysisText,
|
|
55
|
+
resultLength: videoAnalysisText?.length,
|
|
56
|
+
});
|
|
57
|
+
return videoAnalysisText;
|
|
58
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@juspay/neurolink",
|
|
3
|
-
"version": "9.
|
|
3
|
+
"version": "9.10.0",
|
|
4
4
|
"description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Juspay Technologies",
|