@juspay/neurolink 7.31.0 → 7.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## [7.33.0](https://github.com/juspay/neurolink/compare/v7.32.0...v7.33.0) (2025-09-03)
2
+
3
+ ### Features
4
+
5
+ - **(provider):** refactor generate method to use streamText for improved performance and consistency ([a118300](https://github.com/juspay/neurolink/commit/a11830088376b899725bcb1dc2467cb73f44f5b9))
6
+
7
+ ## [7.32.0](https://github.com/juspay/neurolink/compare/v7.31.0...v7.32.0) (2025-09-03)
8
+
9
+ ### Features
10
+
11
+ - **(sdk):** Add Speech to Speech agents implementation ([a8bf953](https://github.com/juspay/neurolink/commit/a8bf953993a16303d3c4a5b3a94d5ea5b6bd83d7))
12
+
1
13
  ## [7.31.0](https://github.com/juspay/neurolink/compare/v7.30.1...v7.31.0) (2025-09-01)
2
14
 
3
15
  ### Features
@@ -1054,8 +1054,22 @@ export class CLICommandFactory {
1054
1054
  // Demo mode - add delay between chunks
1055
1055
  await new Promise((resolve) => setTimeout(resolve, options.delay));
1056
1056
  }
1057
- process.stdout.write(nextResult.value.content);
1058
- fullContent += nextResult.value.content;
1057
+ const evt = nextResult.value;
1058
+ const isText = (o) => !!o &&
1059
+ typeof o === "object" &&
1060
+ typeof o.content === "string";
1061
+ const isAudio = (o) => !!o &&
1062
+ typeof o === "object" &&
1063
+ o.type === "audio";
1064
+ if (isText(evt)) {
1065
+ process.stdout.write(evt.content);
1066
+ fullContent += evt.content;
1067
+ }
1068
+ else if (isAudio(evt)) {
1069
+ if (options.debug && !options.quiet) {
1070
+ process.stdout.write("[audio-chunk]");
1071
+ }
1072
+ }
1059
1073
  }
1060
1074
  }
1061
1075
  catch (error) {
@@ -37,6 +37,8 @@ export declare abstract class BaseProvider implements AIProvider {
37
37
  /**
38
38
  * Text generation method - implements AIProvider interface
39
39
  * Tools are always available unless explicitly disabled
40
+ * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
41
+ * for consistency and better performance
40
42
  */
41
43
  generate(optionsOrPrompt: TextGenerationOptions | string, _analysisSchema?: ValidationSchema): Promise<EnhancedGenerateResult | null>;
42
44
  /**
@@ -110,6 +112,10 @@ export declare abstract class BaseProvider implements AIProvider {
110
112
  * MCP tools are added when available (without blocking)
111
113
  */
112
114
  protected getAllTools(): Promise<Record<string, Tool>>;
115
+ /**
116
+ * Calculate actual cost based on token usage and provider configuration
117
+ */
118
+ private calculateActualCost;
113
119
  /**
114
120
  * Convert MCP JSON Schema to Zod schema for AI SDK tools
115
121
  * Handles common MCP schema patterns safely
@@ -146,6 +146,8 @@ export class BaseProvider {
146
146
  /**
147
147
  * Text generation method - implements AIProvider interface
148
148
  * Tools are always available unless explicitly disabled
149
+ * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
150
+ * for consistency and better performance
149
151
  */
150
152
  async generate(optionsOrPrompt, _analysisSchema) {
151
153
  const options = this.normalizeTextOptions(optionsOrPrompt);
@@ -153,8 +155,9 @@ export class BaseProvider {
153
155
  this.validateOptions(options);
154
156
  const startTime = Date.now();
155
157
  try {
156
- // Import generateText dynamically to avoid circular dependencies
157
- const { generateText } = await import("ai");
158
+ // Import streamText dynamically to avoid circular dependencies
159
+ // Using streamText instead of generateText for unified implementation
160
+ const { streamText } = await import("ai");
158
161
  // Get ALL available tools (direct + MCP + external from options)
159
162
  const shouldUseTools = !options.disableTools && this.supportsTools();
160
163
  const baseTools = shouldUseTools ? await this.getAllTools() : {};
@@ -175,7 +178,8 @@ export class BaseProvider {
175
178
  const model = await this.getAISDKModelWithMiddleware(options);
176
179
  // Build proper message array with conversation history
177
180
  const messages = buildMessagesArray(options);
178
- const result = await generateText({
181
+ // Use streamText and accumulate results instead of generateText
182
+ const streamResult = await streamText({
179
183
  model,
180
184
  messages: messages,
181
185
  tools,
@@ -184,31 +188,30 @@ export class BaseProvider {
184
188
  temperature: options.temperature,
185
189
  maxTokens: options.maxTokens || 8192,
186
190
  });
191
+ // Accumulate the streamed content
192
+ let accumulatedContent = "";
193
+ // Wait for the stream to complete and accumulate content
194
+ for await (const chunk of streamResult.textStream) {
195
+ accumulatedContent += chunk;
196
+ }
197
+ // Get the final result - this should include usage, toolCalls, etc.
198
+ const usage = await streamResult.usage;
199
+ const toolCalls = await streamResult.toolCalls;
200
+ const toolResults = await streamResult.toolResults;
187
201
  const responseTime = Date.now() - startTime;
202
+ // Create a result object compatible with generateText format
203
+ const result = {
204
+ text: accumulatedContent,
205
+ usage: usage,
206
+ toolCalls: toolCalls,
207
+ toolResults: toolResults,
208
+ steps: streamResult.steps, // Include steps for tool execution tracking
209
+ };
188
210
  try {
189
- // Calculate actual cost based on token usage and provider configuration
190
- const calculateActualCost = () => {
191
- try {
192
- const costInfo = modelConfig.getCostInfo(this.providerName, this.modelName);
193
- if (!costInfo) {
194
- return 0; // No cost info available
195
- }
196
- const promptTokens = result.usage?.promptTokens || 0;
197
- const completionTokens = result.usage?.completionTokens || 0;
198
- // Calculate cost per 1K tokens
199
- const inputCost = (promptTokens / 1000) * costInfo.input;
200
- const outputCost = (completionTokens / 1000) * costInfo.output;
201
- return inputCost + outputCost;
202
- }
203
- catch (error) {
204
- logger.debug(`Cost calculation failed for ${this.providerName}:`, error);
205
- return 0; // Fallback to 0 on any error
206
- }
207
- };
208
- const actualCost = calculateActualCost();
211
+ const actualCost = await this.calculateActualCost(usage || { promptTokens: 0, completionTokens: 0, totalTokens: 0 });
209
212
  recordProviderPerformanceFromMetrics(this.providerName, {
210
213
  responseTime,
211
- tokensGenerated: result.usage?.totalTokens || 0,
214
+ tokensGenerated: usage?.totalTokens || 0,
212
215
  cost: actualCost,
213
216
  success: true,
214
217
  });
@@ -216,7 +219,7 @@ export class BaseProvider {
216
219
  const optimizedProvider = getPerformanceOptimizedProvider("speed");
217
220
  logger.debug(`🚀 Performance recorded for ${this.providerName}:`, {
218
221
  responseTime: `${responseTime}ms`,
219
- tokens: result.usage?.totalTokens || 0,
222
+ tokens: usage?.totalTokens || 0,
220
223
  estimatedCost: `$${actualCost.toFixed(6)}`,
221
224
  recommendedSpeedProvider: optimizedProvider?.provider || "none",
222
225
  });
@@ -228,11 +231,9 @@ export class BaseProvider {
228
231
  // AI SDK puts tool calls in steps array for multi-step generation
229
232
  const toolsUsed = [];
230
233
  // First check direct tool calls (fallback)
231
- if (result.toolCalls && result.toolCalls.length > 0) {
232
- toolsUsed.push(...result.toolCalls.map((tc) => {
233
- return (tc.toolName ||
234
- tc.name ||
235
- "unknown");
234
+ if (toolCalls && toolCalls.length > 0) {
235
+ toolsUsed.push(...toolCalls.map((tc) => {
236
+ return tc.toolName || "unknown";
236
237
  }));
237
238
  }
238
239
  // Then check steps for tool calls (primary source for multi-step)
@@ -326,20 +327,14 @@ export class BaseProvider {
326
327
  },
327
328
  provider: this.providerName,
328
329
  model: this.modelName,
329
- toolCalls: result.toolCalls
330
- ? result.toolCalls.map((tc) => ({
331
- toolCallId: tc.toolCallId ||
332
- tc.id ||
333
- "unknown",
334
- toolName: tc.toolName ||
335
- tc.name ||
336
- "unknown",
337
- args: tc.args ||
338
- tc.parameters ||
339
- {},
330
+ toolCalls: toolCalls
331
+ ? toolCalls.map((tc) => ({
332
+ toolCallId: tc.toolCallId || "unknown",
333
+ toolName: tc.toolName || "unknown",
334
+ args: tc.args || {},
340
335
  }))
341
336
  : [],
342
- toolResults: result.toolResults,
337
+ toolResults: toolResults || [],
343
338
  toolsUsed: uniqueToolsUsed,
344
339
  toolExecutions, // ✅ Add extracted tool executions
345
340
  availableTools: Object.keys(tools).map((name) => {
@@ -655,6 +650,27 @@ export class BaseProvider {
655
650
  logger.debug(`[BaseProvider] getAllTools returning tools: ${getKeysAsString(tools)}`);
656
651
  return tools;
657
652
  }
653
+ /**
654
+ * Calculate actual cost based on token usage and provider configuration
655
+ */
656
+ async calculateActualCost(usage) {
657
+ try {
658
+ const costInfo = modelConfig.getCostInfo(this.providerName, this.modelName);
659
+ if (!costInfo) {
660
+ return 0; // No cost info available
661
+ }
662
+ const promptTokens = usage?.promptTokens || 0;
663
+ const completionTokens = usage?.completionTokens || 0;
664
+ // Calculate cost per 1K tokens
665
+ const inputCost = (promptTokens / 1000) * costInfo.input;
666
+ const outputCost = (completionTokens / 1000) * costInfo.output;
667
+ return inputCost + outputCost;
668
+ }
669
+ catch (error) {
670
+ logger.debug(`Cost calculation failed for ${this.providerName}:`, error);
671
+ return 0; // Fallback to 0 on any error
672
+ }
673
+ }
658
674
  /**
659
675
  * Convert MCP JSON Schema to Zod schema for AI SDK tools
660
676
  * Handles common MCP schema patterns safely
@@ -37,6 +37,8 @@ export declare abstract class BaseProvider implements AIProvider {
37
37
  /**
38
38
  * Text generation method - implements AIProvider interface
39
39
  * Tools are always available unless explicitly disabled
40
+ * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
41
+ * for consistency and better performance
40
42
  */
41
43
  generate(optionsOrPrompt: TextGenerationOptions | string, _analysisSchema?: ValidationSchema): Promise<EnhancedGenerateResult | null>;
42
44
  /**
@@ -110,6 +112,10 @@ export declare abstract class BaseProvider implements AIProvider {
110
112
  * MCP tools are added when available (without blocking)
111
113
  */
112
114
  protected getAllTools(): Promise<Record<string, Tool>>;
115
+ /**
116
+ * Calculate actual cost based on token usage and provider configuration
117
+ */
118
+ private calculateActualCost;
113
119
  /**
114
120
  * Convert MCP JSON Schema to Zod schema for AI SDK tools
115
121
  * Handles common MCP schema patterns safely
@@ -146,6 +146,8 @@ export class BaseProvider {
146
146
  /**
147
147
  * Text generation method - implements AIProvider interface
148
148
  * Tools are always available unless explicitly disabled
149
+ * IMPLEMENTATION NOTE: Uses streamText() under the hood and accumulates results
150
+ * for consistency and better performance
149
151
  */
150
152
  async generate(optionsOrPrompt, _analysisSchema) {
151
153
  const options = this.normalizeTextOptions(optionsOrPrompt);
@@ -153,8 +155,9 @@ export class BaseProvider {
153
155
  this.validateOptions(options);
154
156
  const startTime = Date.now();
155
157
  try {
156
- // Import generateText dynamically to avoid circular dependencies
157
- const { generateText } = await import("ai");
158
+ // Import streamText dynamically to avoid circular dependencies
159
+ // Using streamText instead of generateText for unified implementation
160
+ const { streamText } = await import("ai");
158
161
  // Get ALL available tools (direct + MCP + external from options)
159
162
  const shouldUseTools = !options.disableTools && this.supportsTools();
160
163
  const baseTools = shouldUseTools ? await this.getAllTools() : {};
@@ -175,7 +178,8 @@ export class BaseProvider {
175
178
  const model = await this.getAISDKModelWithMiddleware(options);
176
179
  // Build proper message array with conversation history
177
180
  const messages = buildMessagesArray(options);
178
- const result = await generateText({
181
+ // Use streamText and accumulate results instead of generateText
182
+ const streamResult = await streamText({
179
183
  model,
180
184
  messages: messages,
181
185
  tools,
@@ -184,31 +188,30 @@ export class BaseProvider {
184
188
  temperature: options.temperature,
185
189
  maxTokens: options.maxTokens || 8192,
186
190
  });
191
+ // Accumulate the streamed content
192
+ let accumulatedContent = "";
193
+ // Wait for the stream to complete and accumulate content
194
+ for await (const chunk of streamResult.textStream) {
195
+ accumulatedContent += chunk;
196
+ }
197
+ // Get the final result - this should include usage, toolCalls, etc.
198
+ const usage = await streamResult.usage;
199
+ const toolCalls = await streamResult.toolCalls;
200
+ const toolResults = await streamResult.toolResults;
187
201
  const responseTime = Date.now() - startTime;
202
+ // Create a result object compatible with generateText format
203
+ const result = {
204
+ text: accumulatedContent,
205
+ usage: usage,
206
+ toolCalls: toolCalls,
207
+ toolResults: toolResults,
208
+ steps: streamResult.steps, // Include steps for tool execution tracking
209
+ };
188
210
  try {
189
- // Calculate actual cost based on token usage and provider configuration
190
- const calculateActualCost = () => {
191
- try {
192
- const costInfo = modelConfig.getCostInfo(this.providerName, this.modelName);
193
- if (!costInfo) {
194
- return 0; // No cost info available
195
- }
196
- const promptTokens = result.usage?.promptTokens || 0;
197
- const completionTokens = result.usage?.completionTokens || 0;
198
- // Calculate cost per 1K tokens
199
- const inputCost = (promptTokens / 1000) * costInfo.input;
200
- const outputCost = (completionTokens / 1000) * costInfo.output;
201
- return inputCost + outputCost;
202
- }
203
- catch (error) {
204
- logger.debug(`Cost calculation failed for ${this.providerName}:`, error);
205
- return 0; // Fallback to 0 on any error
206
- }
207
- };
208
- const actualCost = calculateActualCost();
211
+ const actualCost = await this.calculateActualCost(usage || { promptTokens: 0, completionTokens: 0, totalTokens: 0 });
209
212
  recordProviderPerformanceFromMetrics(this.providerName, {
210
213
  responseTime,
211
- tokensGenerated: result.usage?.totalTokens || 0,
214
+ tokensGenerated: usage?.totalTokens || 0,
212
215
  cost: actualCost,
213
216
  success: true,
214
217
  });
@@ -216,7 +219,7 @@ export class BaseProvider {
216
219
  const optimizedProvider = getPerformanceOptimizedProvider("speed");
217
220
  logger.debug(`🚀 Performance recorded for ${this.providerName}:`, {
218
221
  responseTime: `${responseTime}ms`,
219
- tokens: result.usage?.totalTokens || 0,
222
+ tokens: usage?.totalTokens || 0,
220
223
  estimatedCost: `$${actualCost.toFixed(6)}`,
221
224
  recommendedSpeedProvider: optimizedProvider?.provider || "none",
222
225
  });
@@ -228,11 +231,9 @@ export class BaseProvider {
228
231
  // AI SDK puts tool calls in steps array for multi-step generation
229
232
  const toolsUsed = [];
230
233
  // First check direct tool calls (fallback)
231
- if (result.toolCalls && result.toolCalls.length > 0) {
232
- toolsUsed.push(...result.toolCalls.map((tc) => {
233
- return (tc.toolName ||
234
- tc.name ||
235
- "unknown");
234
+ if (toolCalls && toolCalls.length > 0) {
235
+ toolsUsed.push(...toolCalls.map((tc) => {
236
+ return tc.toolName || "unknown";
236
237
  }));
237
238
  }
238
239
  // Then check steps for tool calls (primary source for multi-step)
@@ -326,20 +327,14 @@ export class BaseProvider {
326
327
  },
327
328
  provider: this.providerName,
328
329
  model: this.modelName,
329
- toolCalls: result.toolCalls
330
- ? result.toolCalls.map((tc) => ({
331
- toolCallId: tc.toolCallId ||
332
- tc.id ||
333
- "unknown",
334
- toolName: tc.toolName ||
335
- tc.name ||
336
- "unknown",
337
- args: tc.args ||
338
- tc.parameters ||
339
- {},
330
+ toolCalls: toolCalls
331
+ ? toolCalls.map((tc) => ({
332
+ toolCallId: tc.toolCallId || "unknown",
333
+ toolName: tc.toolName || "unknown",
334
+ args: tc.args || {},
340
335
  }))
341
336
  : [],
342
- toolResults: result.toolResults,
337
+ toolResults: toolResults || [],
343
338
  toolsUsed: uniqueToolsUsed,
344
339
  toolExecutions, // ✅ Add extracted tool executions
345
340
  availableTools: Object.keys(tools).map((name) => {
@@ -655,6 +650,27 @@ export class BaseProvider {
655
650
  logger.debug(`[BaseProvider] getAllTools returning tools: ${getKeysAsString(tools)}`);
656
651
  return tools;
657
652
  }
653
+ /**
654
+ * Calculate actual cost based on token usage and provider configuration
655
+ */
656
+ async calculateActualCost(usage) {
657
+ try {
658
+ const costInfo = modelConfig.getCostInfo(this.providerName, this.modelName);
659
+ if (!costInfo) {
660
+ return 0; // No cost info available
661
+ }
662
+ const promptTokens = usage?.promptTokens || 0;
663
+ const completionTokens = usage?.completionTokens || 0;
664
+ // Calculate cost per 1K tokens
665
+ const inputCost = (promptTokens / 1000) * costInfo.input;
666
+ const outputCost = (completionTokens / 1000) * costInfo.output;
667
+ return inputCost + outputCost;
668
+ }
669
+ catch (error) {
670
+ logger.debug(`Cost calculation failed for ${this.providerName}:`, error);
671
+ return 0; // Fallback to 0 on any error
672
+ }
673
+ }
658
674
  /**
659
675
  * Convert MCP JSON Schema to Zod schema for AI SDK tools
660
676
  * Handles common MCP schema patterns safely
@@ -1312,8 +1312,10 @@ export class NeuroLink {
1312
1312
  needsInitialization: !this.mcpInitialized,
1313
1313
  message: "Checking MCP initialization status before generation",
1314
1314
  });
1315
- // Initialize MCP if needed
1316
- await this.initializeMCP();
1315
+ // Initialize MCP only when tools are enabled
1316
+ if (!options.disableTools) {
1317
+ await this.initializeMCP();
1318
+ }
1317
1319
  const mcpInitCheckEndTime = process.hrtime.bigint();
1318
1320
  const mcpInitCheckDurationNs = mcpInitCheckEndTime - mcpInitCheckStartTime;
1319
1321
  logger.debug(`[NeuroLink] ✅ LOG_POINT_T003_MCP_INIT_CHECK_COMPLETE`, {
@@ -1573,10 +1575,16 @@ export class NeuroLink {
1573
1575
  };
1574
1576
  // Call the new stream method
1575
1577
  const result = await this.stream(streamOptions);
1576
- // Convert StreamResult to simple string async iterable
1578
+ // Convert StreamResult to simple string async iterable (filter text events only)
1577
1579
  async function* stringStream() {
1578
- for await (const chunk of result.stream) {
1579
- yield chunk.content;
1580
+ for await (const evt of result.stream) {
1581
+ const anyEvt = evt;
1582
+ if (anyEvt && typeof anyEvt === "object" && "content" in anyEvt) {
1583
+ const content = anyEvt.content;
1584
+ if (typeof content === "string") {
1585
+ yield content;
1586
+ }
1587
+ }
1580
1588
  }
1581
1589
  }
1582
1590
  return stringStream();
@@ -1646,12 +1654,13 @@ export class NeuroLink {
1646
1654
  let factoryResult;
1647
1655
  try {
1648
1656
  await this.initializeMCP();
1649
- const _originalPrompt = options.input.text;
1650
1657
  factoryResult = processStreamingFactoryOptions(options);
1651
1658
  enhancedOptions = createCleanStreamOptions(options);
1652
- const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1653
- if (enhancedPrompt !== options.input.text) {
1654
- enhancedOptions.input.text = enhancedPrompt;
1659
+ if (options.input?.text) {
1660
+ const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1661
+ if (enhancedPrompt !== options.input.text) {
1662
+ enhancedOptions.input.text = enhancedPrompt;
1663
+ }
1655
1664
  }
1656
1665
  const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
1657
1666
  const streamResult = await this.processStreamResult(mcpStream, enhancedOptions, factoryResult);
@@ -1756,9 +1765,13 @@ export class NeuroLink {
1756
1765
  validationStartTimeNs: validationStartTime.toString(),
1757
1766
  message: "Starting comprehensive input validation process",
1758
1767
  });
1759
- if (!options?.input?.text ||
1760
- typeof options.input.text !== "string" ||
1761
- options.input.text.trim() === "") {
1768
+ const hasText = typeof options?.input?.text === "string" &&
1769
+ options.input.text.trim().length > 0;
1770
+ // Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
1771
+ const hasAudio = !!(options?.input?.audio &&
1772
+ options.input.audio.frames &&
1773
+ typeof options.input.audio.frames[Symbol.asyncIterator] !== "undefined");
1774
+ if (!hasText && !hasAudio) {
1762
1775
  const validationFailTime = process.hrtime.bigint();
1763
1776
  const validationDurationNs = validationFailTime - validationStartTime;
1764
1777
  logger.debug(`[NeuroLink] 💥 LOG_POINT_005_VALIDATION_FAILED`, {
@@ -1769,10 +1782,10 @@ export class NeuroLink {
1769
1782
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1770
1783
  validationDurationNs: validationDurationNs.toString(),
1771
1784
  validationDurationMs: Number(validationDurationNs) / 1000000,
1772
- validationError: "Stream options must include input.text as a non-empty string",
1785
+ validationError: "Stream options must include either input.text or input.audio",
1773
1786
  message: "EXHAUSTIVE validation failure analysis with character-level debugging",
1774
1787
  });
1775
- throw new Error("Stream options must include input.text as a non-empty string");
1788
+ throw new Error("Stream options must include either input.text or input.audio");
1776
1789
  }
1777
1790
  const validationSuccessTime = process.hrtime.bigint();
1778
1791
  const validationDurationNs = validationSuccessTime - validationStartTime;
@@ -1784,10 +1797,11 @@ export class NeuroLink {
1784
1797
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1785
1798
  validationDurationNs: validationDurationNs.toString(),
1786
1799
  validationDurationMs: Number(validationDurationNs) / 1000000,
1787
- inputTextValid: true,
1788
- inputTextLength: options.input.text.length,
1789
- inputTextTrimmedLength: options.input.text.trim().length,
1790
- inputTextPreview: options.input.text.substring(0, 100),
1800
+ inputTextValid: hasText,
1801
+ inputAudioPresent: hasAudio,
1802
+ inputTextLength: hasText ? options.input.text.length : 0,
1803
+ inputTextTrimmedLength: hasText ? options.input.text.trim().length : 0,
1804
+ inputTextPreview: hasText ? options.input.text.substring(0, 100) : "",
1791
1805
  message: "EXHAUSTIVE validation success - proceeding with stream processing",
1792
1806
  });
1793
1807
  }
@@ -17,6 +17,7 @@ export declare class GoogleAIStudioProvider extends BaseProvider {
17
17
  protected getAISDKModel(): LanguageModelV1;
18
18
  protected handleProviderError(error: unknown): Error;
19
19
  protected executeStream(options: StreamOptions, _analysisSchema?: ZodUnknownSchema | Schema<unknown>): Promise<StreamResult>;
20
+ private executeAudioStreamViaGeminiLive;
20
21
  private getApiKey;
21
22
  }
22
23
  export default GoogleAIStudioProvider;
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
8
8
  import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
10
  import { buildMessagesArray } from "../utils/messageBuilder.js";
11
+ // Create Google GenAI client
12
+ async function createGoogleGenAIClient(apiKey) {
13
+ const mod = await import("@google/genai");
14
+ const ctor = mod.GoogleGenAI;
15
+ if (!ctor) {
16
+ throw new Error("@google/genai does not export GoogleGenAI");
17
+ }
18
+ const Ctor = ctor;
19
+ return new Ctor({ apiKey });
20
+ }
11
21
  // Environment variable setup
12
22
  if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
13
23
  process.env.GOOGLE_AI_API_KEY) {
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
61
71
  }
62
72
  // executeGenerate removed - BaseProvider handles all generation with tools
63
73
  async executeStream(options, _analysisSchema) {
74
+ // Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
75
+ if (options.input?.audio) {
76
+ return await this.executeAudioStreamViaGeminiLive(options);
77
+ }
64
78
  this.validateStreamOptions(options);
65
79
  const startTime = Date.now();
66
80
  const apiKey = this.getApiKey();
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
115
129
  // ===================
116
130
  // HELPER METHODS
117
131
  // ===================
132
+ async executeAudioStreamViaGeminiLive(options) {
133
+ const startTime = Date.now();
134
+ const apiKey = this.getApiKey();
135
+ // Dynamic import to avoid hard dependency unless audio streaming is used
136
+ let client;
137
+ try {
138
+ client = await createGoogleGenAIClient(apiKey);
139
+ }
140
+ catch {
141
+ throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
142
+ }
143
+ const model = this.modelName ||
144
+ process.env.GOOGLE_VOICE_AI_MODEL ||
145
+ "gemini-2.5-flash-preview-native-audio-dialog";
146
+ const queue = [];
147
+ let resolveNext = null;
148
+ let done = false;
149
+ const push = (item) => {
150
+ if (done) {
151
+ return;
152
+ }
153
+ if (item.type === "audio") {
154
+ if (resolveNext) {
155
+ const fn = resolveNext;
156
+ resolveNext = null;
157
+ fn({ value: { type: "audio", audio: item.audio }, done: false });
158
+ return;
159
+ }
160
+ }
161
+ queue.push(item);
162
+ };
163
+ const session = await client.live.connect({
164
+ model,
165
+ callbacks: {
166
+ onopen: () => {
167
+ // no-op
168
+ },
169
+ onmessage: async (message) => {
170
+ try {
171
+ const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
172
+ if (audio?.data) {
173
+ const buf = Buffer.from(String(audio.data), "base64");
174
+ const chunk = {
175
+ data: buf,
176
+ sampleRateHz: 24000,
177
+ channels: 1,
178
+ encoding: "PCM16LE",
179
+ };
180
+ push({ type: "audio", audio: chunk });
181
+ }
182
+ if (message?.serverContent?.interrupted) {
183
+ // allow consumer to handle; no special action required here
184
+ }
185
+ }
186
+ catch (e) {
187
+ push({ type: "error", error: e });
188
+ }
189
+ },
190
+ onerror: (e) => {
191
+ push({ type: "error", error: e });
192
+ },
193
+ onclose: (_e) => {
194
+ push({ type: "end" });
195
+ },
196
+ },
197
+ config: {
198
+ responseModalities: ["AUDIO"],
199
+ speechConfig: {
200
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
201
+ },
202
+ },
203
+ });
204
+ // Feed upstream audio frames concurrently
205
+ (async () => {
206
+ try {
207
+ const spec = options.input?.audio;
208
+ if (!spec) {
209
+ logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
210
+ return;
211
+ }
212
+ for await (const frame of spec.frames) {
213
+ // Zero-length frame acts as a 'flush' control signal
214
+ if (!frame || frame.byteLength === 0) {
215
+ try {
216
+ if (session.sendInput) {
217
+ await session.sendInput({ event: "flush" });
218
+ }
219
+ else if (session.sendRealtimeInput) {
220
+ await session.sendRealtimeInput({ event: "flush" });
221
+ }
222
+ }
223
+ catch (err) {
224
+ logger.debug("[GeminiLive] flush control failed (non-fatal)", {
225
+ error: err instanceof Error ? err.message : String(err),
226
+ });
227
+ }
228
+ continue;
229
+ }
230
+ // Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
231
+ const base64 = frame.toString("base64");
232
+ const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
233
+ await session.sendRealtimeInput?.({
234
+ media: { data: base64, mimeType },
235
+ });
236
+ }
237
+ // Best-effort flush signal if supported
238
+ try {
239
+ if (session.sendInput) {
240
+ await session.sendInput({ event: "flush" });
241
+ }
242
+ else if (session.sendRealtimeInput) {
243
+ await session.sendRealtimeInput({ event: "flush" });
244
+ }
245
+ }
246
+ catch (err) {
247
+ logger.debug("[GeminiLive] final flush failed (non-fatal)", {
248
+ error: err instanceof Error ? err.message : String(err),
249
+ });
250
+ }
251
+ }
252
+ catch (e) {
253
+ push({ type: "error", error: e });
254
+ }
255
+ })().catch(() => {
256
+ // ignore
257
+ });
258
+ // AsyncIterable for stream events
259
+ const asyncIterable = {
260
+ [Symbol.asyncIterator]() {
261
+ return {
262
+ async next() {
263
+ if (queue.length > 0) {
264
+ const item = queue.shift();
265
+ if (!item) {
266
+ return {
267
+ value: undefined,
268
+ done: true,
269
+ };
270
+ }
271
+ if (item.type === "audio") {
272
+ return {
273
+ value: { type: "audio", audio: item.audio },
274
+ done: false,
275
+ };
276
+ }
277
+ if (item.type === "end") {
278
+ done = true;
279
+ return {
280
+ value: undefined,
281
+ done: true,
282
+ };
283
+ }
284
+ if (item.type === "error") {
285
+ done = true;
286
+ throw item.error instanceof Error
287
+ ? item.error
288
+ : new Error(String(item.error));
289
+ }
290
+ }
291
+ if (done) {
292
+ return {
293
+ value: undefined,
294
+ done: true,
295
+ };
296
+ }
297
+ return await new Promise((resolve) => {
298
+ resolveNext = resolve;
299
+ });
300
+ },
301
+ };
302
+ },
303
+ };
304
+ return {
305
+ stream: asyncIterable,
306
+ provider: this.providerName,
307
+ model: model,
308
+ metadata: {
309
+ startTime,
310
+ streamId: `google-ai-audio-${Date.now()}`,
311
+ },
312
+ };
313
+ }
118
314
  getApiKey() {
119
315
  const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
120
316
  if (!apiKey) {
@@ -834,6 +834,7 @@ export class GoogleVertexProvider extends BaseProvider {
834
834
  message: "Message array built successfully",
835
835
  });
836
836
  }
837
+ /* eslint-disable-next-line max-lines-per-function */
837
838
  async executeStream(options, analysisSchema) {
838
839
  // Initialize stream execution tracking
839
840
  const streamExecutionId = `vertex-stream-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
@@ -863,7 +864,9 @@ export class GoogleVertexProvider extends BaseProvider {
863
864
  streamExecutionId,
864
865
  streamRequestDetails: {
865
866
  modelName: this.modelName,
866
- promptLength: options.input.text.length,
867
+ promptLength: typeof options.input?.text === "string"
868
+ ? options.input.text.length
869
+ : 0,
867
870
  hasSchema: !!analysisSchema,
868
871
  messagesCount: Array.isArray(messages) ? messages.length : 0,
869
872
  temperature: options?.temperature,
@@ -66,9 +66,23 @@ export interface StreamAnalyticsData {
66
66
  * Stream function options interface - Primary method for streaming content
67
67
  * Future-ready for multi-modal capabilities while maintaining text focus
68
68
  */
69
+ export type PCMEncoding = "PCM16LE";
70
+ export interface AudioInputSpec {
71
+ frames: AsyncIterable<Buffer>;
72
+ sampleRateHz?: number;
73
+ encoding?: PCMEncoding;
74
+ channels?: 1;
75
+ }
76
+ export interface AudioChunk {
77
+ data: Buffer;
78
+ sampleRateHz: number;
79
+ channels: number;
80
+ encoding: PCMEncoding;
81
+ }
69
82
  export interface StreamOptions {
70
83
  input: {
71
- text: string;
84
+ text?: string;
85
+ audio?: AudioInputSpec;
72
86
  };
73
87
  output?: {
74
88
  format?: "text" | "structured" | "json";
@@ -121,6 +135,9 @@ export interface StreamOptions {
121
135
  export interface StreamResult {
122
136
  stream: AsyncIterable<{
123
137
  content: string;
138
+ } | {
139
+ type: "audio";
140
+ audio: AudioChunk;
124
141
  }>;
125
142
  provider?: string;
126
143
  model?: string;
@@ -44,7 +44,7 @@ export function convertGenerateToStreamOptions(generateOptions) {
44
44
  export function convertStreamToGenerateOptions(streamOptions) {
45
45
  const generateOptions = {
46
46
  // Core input mapping
47
- input: streamOptions.input,
47
+ input: { text: (streamOptions.input && streamOptions.input.text) || "" },
48
48
  // Provider and model settings
49
49
  provider: streamOptions.provider,
50
50
  model: streamOptions.model,
package/dist/neurolink.js CHANGED
@@ -1312,8 +1312,10 @@ export class NeuroLink {
1312
1312
  needsInitialization: !this.mcpInitialized,
1313
1313
  message: "Checking MCP initialization status before generation",
1314
1314
  });
1315
- // Initialize MCP if needed
1316
- await this.initializeMCP();
1315
+ // Initialize MCP only when tools are enabled
1316
+ if (!options.disableTools) {
1317
+ await this.initializeMCP();
1318
+ }
1317
1319
  const mcpInitCheckEndTime = process.hrtime.bigint();
1318
1320
  const mcpInitCheckDurationNs = mcpInitCheckEndTime - mcpInitCheckStartTime;
1319
1321
  logger.debug(`[NeuroLink] ✅ LOG_POINT_T003_MCP_INIT_CHECK_COMPLETE`, {
@@ -1573,10 +1575,16 @@ export class NeuroLink {
1573
1575
  };
1574
1576
  // Call the new stream method
1575
1577
  const result = await this.stream(streamOptions);
1576
- // Convert StreamResult to simple string async iterable
1578
+ // Convert StreamResult to simple string async iterable (filter text events only)
1577
1579
  async function* stringStream() {
1578
- for await (const chunk of result.stream) {
1579
- yield chunk.content;
1580
+ for await (const evt of result.stream) {
1581
+ const anyEvt = evt;
1582
+ if (anyEvt && typeof anyEvt === "object" && "content" in anyEvt) {
1583
+ const content = anyEvt.content;
1584
+ if (typeof content === "string") {
1585
+ yield content;
1586
+ }
1587
+ }
1580
1588
  }
1581
1589
  }
1582
1590
  return stringStream();
@@ -1646,12 +1654,13 @@ export class NeuroLink {
1646
1654
  let factoryResult;
1647
1655
  try {
1648
1656
  await this.initializeMCP();
1649
- const _originalPrompt = options.input.text;
1650
1657
  factoryResult = processStreamingFactoryOptions(options);
1651
1658
  enhancedOptions = createCleanStreamOptions(options);
1652
- const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1653
- if (enhancedPrompt !== options.input.text) {
1654
- enhancedOptions.input.text = enhancedPrompt;
1659
+ if (options.input?.text) {
1660
+ const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1661
+ if (enhancedPrompt !== options.input.text) {
1662
+ enhancedOptions.input.text = enhancedPrompt;
1663
+ }
1655
1664
  }
1656
1665
  const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
1657
1666
  const streamResult = await this.processStreamResult(mcpStream, enhancedOptions, factoryResult);
@@ -1756,9 +1765,13 @@ export class NeuroLink {
1756
1765
  validationStartTimeNs: validationStartTime.toString(),
1757
1766
  message: "Starting comprehensive input validation process",
1758
1767
  });
1759
- if (!options?.input?.text ||
1760
- typeof options.input.text !== "string" ||
1761
- options.input.text.trim() === "") {
1768
+ const hasText = typeof options?.input?.text === "string" &&
1769
+ options.input.text.trim().length > 0;
1770
+ // Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
1771
+ const hasAudio = !!(options?.input?.audio &&
1772
+ options.input.audio.frames &&
1773
+ typeof options.input.audio.frames[Symbol.asyncIterator] !== "undefined");
1774
+ if (!hasText && !hasAudio) {
1762
1775
  const validationFailTime = process.hrtime.bigint();
1763
1776
  const validationDurationNs = validationFailTime - validationStartTime;
1764
1777
  logger.debug(`[NeuroLink] 💥 LOG_POINT_005_VALIDATION_FAILED`, {
@@ -1769,10 +1782,10 @@ export class NeuroLink {
1769
1782
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1770
1783
  validationDurationNs: validationDurationNs.toString(),
1771
1784
  validationDurationMs: Number(validationDurationNs) / 1000000,
1772
- validationError: "Stream options must include input.text as a non-empty string",
1785
+ validationError: "Stream options must include either input.text or input.audio",
1773
1786
  message: "EXHAUSTIVE validation failure analysis with character-level debugging",
1774
1787
  });
1775
- throw new Error("Stream options must include input.text as a non-empty string");
1788
+ throw new Error("Stream options must include either input.text or input.audio");
1776
1789
  }
1777
1790
  const validationSuccessTime = process.hrtime.bigint();
1778
1791
  const validationDurationNs = validationSuccessTime - validationStartTime;
@@ -1784,10 +1797,11 @@ export class NeuroLink {
1784
1797
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1785
1798
  validationDurationNs: validationDurationNs.toString(),
1786
1799
  validationDurationMs: Number(validationDurationNs) / 1000000,
1787
- inputTextValid: true,
1788
- inputTextLength: options.input.text.length,
1789
- inputTextTrimmedLength: options.input.text.trim().length,
1790
- inputTextPreview: options.input.text.substring(0, 100),
1800
+ inputTextValid: hasText,
1801
+ inputAudioPresent: hasAudio,
1802
+ inputTextLength: hasText ? options.input.text.length : 0,
1803
+ inputTextTrimmedLength: hasText ? options.input.text.trim().length : 0,
1804
+ inputTextPreview: hasText ? options.input.text.substring(0, 100) : "",
1791
1805
  message: "EXHAUSTIVE validation success - proceeding with stream processing",
1792
1806
  });
1793
1807
  }
@@ -17,6 +17,7 @@ export declare class GoogleAIStudioProvider extends BaseProvider {
17
17
  protected getAISDKModel(): LanguageModelV1;
18
18
  protected handleProviderError(error: unknown): Error;
19
19
  protected executeStream(options: StreamOptions, _analysisSchema?: ZodUnknownSchema | Schema<unknown>): Promise<StreamResult>;
20
+ private executeAudioStreamViaGeminiLive;
20
21
  private getApiKey;
21
22
  }
22
23
  export default GoogleAIStudioProvider;
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
8
8
  import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
10
  import { buildMessagesArray } from "../utils/messageBuilder.js";
11
+ // Create Google GenAI client
12
+ async function createGoogleGenAIClient(apiKey) {
13
+ const mod = await import("@google/genai");
14
+ const ctor = mod.GoogleGenAI;
15
+ if (!ctor) {
16
+ throw new Error("@google/genai does not export GoogleGenAI");
17
+ }
18
+ const Ctor = ctor;
19
+ return new Ctor({ apiKey });
20
+ }
11
21
  // Environment variable setup
12
22
  if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
13
23
  process.env.GOOGLE_AI_API_KEY) {
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
61
71
  }
62
72
  // executeGenerate removed - BaseProvider handles all generation with tools
63
73
  async executeStream(options, _analysisSchema) {
74
+ // Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
75
+ if (options.input?.audio) {
76
+ return await this.executeAudioStreamViaGeminiLive(options);
77
+ }
64
78
  this.validateStreamOptions(options);
65
79
  const startTime = Date.now();
66
80
  const apiKey = this.getApiKey();
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
115
129
  // ===================
116
130
  // HELPER METHODS
117
131
  // ===================
132
+ async executeAudioStreamViaGeminiLive(options) {
133
+ const startTime = Date.now();
134
+ const apiKey = this.getApiKey();
135
+ // Dynamic import to avoid hard dependency unless audio streaming is used
136
+ let client;
137
+ try {
138
+ client = await createGoogleGenAIClient(apiKey);
139
+ }
140
+ catch {
141
+ throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
142
+ }
143
+ const model = this.modelName ||
144
+ process.env.GOOGLE_VOICE_AI_MODEL ||
145
+ "gemini-2.5-flash-preview-native-audio-dialog";
146
+ const queue = [];
147
+ let resolveNext = null;
148
+ let done = false;
149
+ const push = (item) => {
150
+ if (done) {
151
+ return;
152
+ }
153
+ if (item.type === "audio") {
154
+ if (resolveNext) {
155
+ const fn = resolveNext;
156
+ resolveNext = null;
157
+ fn({ value: { type: "audio", audio: item.audio }, done: false });
158
+ return;
159
+ }
160
+ }
161
+ queue.push(item);
162
+ };
163
+ const session = await client.live.connect({
164
+ model,
165
+ callbacks: {
166
+ onopen: () => {
167
+ // no-op
168
+ },
169
+ onmessage: async (message) => {
170
+ try {
171
+ const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
172
+ if (audio?.data) {
173
+ const buf = Buffer.from(String(audio.data), "base64");
174
+ const chunk = {
175
+ data: buf,
176
+ sampleRateHz: 24000,
177
+ channels: 1,
178
+ encoding: "PCM16LE",
179
+ };
180
+ push({ type: "audio", audio: chunk });
181
+ }
182
+ if (message?.serverContent?.interrupted) {
183
+ // allow consumer to handle; no special action required here
184
+ }
185
+ }
186
+ catch (e) {
187
+ push({ type: "error", error: e });
188
+ }
189
+ },
190
+ onerror: (e) => {
191
+ push({ type: "error", error: e });
192
+ },
193
+ onclose: (_e) => {
194
+ push({ type: "end" });
195
+ },
196
+ },
197
+ config: {
198
+ responseModalities: ["AUDIO"],
199
+ speechConfig: {
200
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
201
+ },
202
+ },
203
+ });
204
+ // Feed upstream audio frames concurrently
205
+ (async () => {
206
+ try {
207
+ const spec = options.input?.audio;
208
+ if (!spec) {
209
+ logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
210
+ return;
211
+ }
212
+ for await (const frame of spec.frames) {
213
+ // Zero-length frame acts as a 'flush' control signal
214
+ if (!frame || frame.byteLength === 0) {
215
+ try {
216
+ if (session.sendInput) {
217
+ await session.sendInput({ event: "flush" });
218
+ }
219
+ else if (session.sendRealtimeInput) {
220
+ await session.sendRealtimeInput({ event: "flush" });
221
+ }
222
+ }
223
+ catch (err) {
224
+ logger.debug("[GeminiLive] flush control failed (non-fatal)", {
225
+ error: err instanceof Error ? err.message : String(err),
226
+ });
227
+ }
228
+ continue;
229
+ }
230
+ // Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
231
+ const base64 = frame.toString("base64");
232
+ const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
233
+ await session.sendRealtimeInput?.({
234
+ media: { data: base64, mimeType },
235
+ });
236
+ }
237
+ // Best-effort flush signal if supported
238
+ try {
239
+ if (session.sendInput) {
240
+ await session.sendInput({ event: "flush" });
241
+ }
242
+ else if (session.sendRealtimeInput) {
243
+ await session.sendRealtimeInput({ event: "flush" });
244
+ }
245
+ }
246
+ catch (err) {
247
+ logger.debug("[GeminiLive] final flush failed (non-fatal)", {
248
+ error: err instanceof Error ? err.message : String(err),
249
+ });
250
+ }
251
+ }
252
+ catch (e) {
253
+ push({ type: "error", error: e });
254
+ }
255
+ })().catch(() => {
256
+ // ignore
257
+ });
258
+ // AsyncIterable for stream events
259
+ const asyncIterable = {
260
+ [Symbol.asyncIterator]() {
261
+ return {
262
+ async next() {
263
+ if (queue.length > 0) {
264
+ const item = queue.shift();
265
+ if (!item) {
266
+ return {
267
+ value: undefined,
268
+ done: true,
269
+ };
270
+ }
271
+ if (item.type === "audio") {
272
+ return {
273
+ value: { type: "audio", audio: item.audio },
274
+ done: false,
275
+ };
276
+ }
277
+ if (item.type === "end") {
278
+ done = true;
279
+ return {
280
+ value: undefined,
281
+ done: true,
282
+ };
283
+ }
284
+ if (item.type === "error") {
285
+ done = true;
286
+ throw item.error instanceof Error
287
+ ? item.error
288
+ : new Error(String(item.error));
289
+ }
290
+ }
291
+ if (done) {
292
+ return {
293
+ value: undefined,
294
+ done: true,
295
+ };
296
+ }
297
+ return await new Promise((resolve) => {
298
+ resolveNext = resolve;
299
+ });
300
+ },
301
+ };
302
+ },
303
+ };
304
+ return {
305
+ stream: asyncIterable,
306
+ provider: this.providerName,
307
+ model: model,
308
+ metadata: {
309
+ startTime,
310
+ streamId: `google-ai-audio-${Date.now()}`,
311
+ },
312
+ };
313
+ }
118
314
  getApiKey() {
119
315
  const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
120
316
  if (!apiKey) {
@@ -834,6 +834,7 @@ export class GoogleVertexProvider extends BaseProvider {
834
834
  message: "Message array built successfully",
835
835
  });
836
836
  }
837
+ /* eslint-disable-next-line max-lines-per-function */
837
838
  async executeStream(options, analysisSchema) {
838
839
  // Initialize stream execution tracking
839
840
  const streamExecutionId = `vertex-stream-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
@@ -863,7 +864,9 @@ export class GoogleVertexProvider extends BaseProvider {
863
864
  streamExecutionId,
864
865
  streamRequestDetails: {
865
866
  modelName: this.modelName,
866
- promptLength: options.input.text.length,
867
+ promptLength: typeof options.input?.text === "string"
868
+ ? options.input.text.length
869
+ : 0,
867
870
  hasSchema: !!analysisSchema,
868
871
  messagesCount: Array.isArray(messages) ? messages.length : 0,
869
872
  temperature: options?.temperature,
@@ -66,9 +66,23 @@ export interface StreamAnalyticsData {
66
66
  * Stream function options interface - Primary method for streaming content
67
67
  * Future-ready for multi-modal capabilities while maintaining text focus
68
68
  */
69
+ export type PCMEncoding = "PCM16LE";
70
+ export interface AudioInputSpec {
71
+ frames: AsyncIterable<Buffer>;
72
+ sampleRateHz?: number;
73
+ encoding?: PCMEncoding;
74
+ channels?: 1;
75
+ }
76
+ export interface AudioChunk {
77
+ data: Buffer;
78
+ sampleRateHz: number;
79
+ channels: number;
80
+ encoding: PCMEncoding;
81
+ }
69
82
  export interface StreamOptions {
70
83
  input: {
71
- text: string;
84
+ text?: string;
85
+ audio?: AudioInputSpec;
72
86
  };
73
87
  output?: {
74
88
  format?: "text" | "structured" | "json";
@@ -121,6 +135,9 @@ export interface StreamOptions {
121
135
  export interface StreamResult {
122
136
  stream: AsyncIterable<{
123
137
  content: string;
138
+ } | {
139
+ type: "audio";
140
+ audio: AudioChunk;
124
141
  }>;
125
142
  provider?: string;
126
143
  model?: string;
@@ -44,7 +44,7 @@ export function convertGenerateToStreamOptions(generateOptions) {
44
44
  export function convertStreamToGenerateOptions(streamOptions) {
45
45
  const generateOptions = {
46
46
  // Core input mapping
47
- input: streamOptions.input,
47
+ input: { text: (streamOptions.input && streamOptions.input.text) || "" },
48
48
  // Provider and model settings
49
49
  provider: streamOptions.provider,
50
50
  model: streamOptions.model,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "7.31.0",
3
+ "version": "7.33.0",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",
@@ -78,6 +78,7 @@
78
78
  "dev:full": "node tools/development/dev-server.js",
79
79
  "dev:health": "node tools/development/healthMonitor.js",
80
80
  "dev:demo": "concurrently \"pnpm run dev\" \"node neurolink-demo/complete-enhanced-server.js\"",
81
+ "demo:voice": "pnpm build && node examples/voice-demo/server.mjs",
81
82
  "// Build & Deploy (Complete Pipeline)": "",
82
83
  "build:complete": "node tools/automation/buildSystem.js",
83
84
  "build:analyze": "node tools/development/dependency-analyzer.js",
@@ -151,6 +152,7 @@
151
152
  "@aws-sdk/credential-provider-node": "^3.876.0",
152
153
  "@aws-sdk/types": "^3.862.0",
153
154
  "@google-cloud/vertexai": "^1.10.0",
155
+ "@google/genai": "^1.16.0",
154
156
  "@google/generative-ai": "^0.24.1",
155
157
  "@huggingface/inference": "^2.8.0",
156
158
  "@modelcontextprotocol/sdk": "^1.13.0",