@juspay/neurolink 7.31.0 → 7.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [7.32.0](https://github.com/juspay/neurolink/compare/v7.31.0...v7.32.0) (2025-09-03)
2
+
3
+ ### Features
4
+
5
+ - **(sdk):** Add Speech to Speech agents implementation ([a8bf953](https://github.com/juspay/neurolink/commit/a8bf953993a16303d3c4a5b3a94d5ea5b6bd83d7))
6
+
1
7
  ## [7.31.0](https://github.com/juspay/neurolink/compare/v7.30.1...v7.31.0) (2025-09-01)
2
8
 
3
9
  ### Features
@@ -1054,8 +1054,22 @@ export class CLICommandFactory {
1054
1054
  // Demo mode - add delay between chunks
1055
1055
  await new Promise((resolve) => setTimeout(resolve, options.delay));
1056
1056
  }
1057
- process.stdout.write(nextResult.value.content);
1058
- fullContent += nextResult.value.content;
1057
+ const evt = nextResult.value;
1058
+ const isText = (o) => !!o &&
1059
+ typeof o === "object" &&
1060
+ typeof o.content === "string";
1061
+ const isAudio = (o) => !!o &&
1062
+ typeof o === "object" &&
1063
+ o.type === "audio";
1064
+ if (isText(evt)) {
1065
+ process.stdout.write(evt.content);
1066
+ fullContent += evt.content;
1067
+ }
1068
+ else if (isAudio(evt)) {
1069
+ if (options.debug && !options.quiet) {
1070
+ process.stdout.write("[audio-chunk]");
1071
+ }
1072
+ }
1059
1073
  }
1060
1074
  }
1061
1075
  catch (error) {
@@ -1312,8 +1312,10 @@ export class NeuroLink {
1312
1312
  needsInitialization: !this.mcpInitialized,
1313
1313
  message: "Checking MCP initialization status before generation",
1314
1314
  });
1315
- // Initialize MCP if needed
1316
- await this.initializeMCP();
1315
+ // Initialize MCP only when tools are enabled
1316
+ if (!options.disableTools) {
1317
+ await this.initializeMCP();
1318
+ }
1317
1319
  const mcpInitCheckEndTime = process.hrtime.bigint();
1318
1320
  const mcpInitCheckDurationNs = mcpInitCheckEndTime - mcpInitCheckStartTime;
1319
1321
  logger.debug(`[NeuroLink] ✅ LOG_POINT_T003_MCP_INIT_CHECK_COMPLETE`, {
@@ -1573,10 +1575,16 @@ export class NeuroLink {
1573
1575
  };
1574
1576
  // Call the new stream method
1575
1577
  const result = await this.stream(streamOptions);
1576
- // Convert StreamResult to simple string async iterable
1578
+ // Convert StreamResult to simple string async iterable (filter text events only)
1577
1579
  async function* stringStream() {
1578
- for await (const chunk of result.stream) {
1579
- yield chunk.content;
1580
+ for await (const evt of result.stream) {
1581
+ const anyEvt = evt;
1582
+ if (anyEvt && typeof anyEvt === "object" && "content" in anyEvt) {
1583
+ const content = anyEvt.content;
1584
+ if (typeof content === "string") {
1585
+ yield content;
1586
+ }
1587
+ }
1580
1588
  }
1581
1589
  }
1582
1590
  return stringStream();
@@ -1646,12 +1654,13 @@ export class NeuroLink {
1646
1654
  let factoryResult;
1647
1655
  try {
1648
1656
  await this.initializeMCP();
1649
- const _originalPrompt = options.input.text;
1650
1657
  factoryResult = processStreamingFactoryOptions(options);
1651
1658
  enhancedOptions = createCleanStreamOptions(options);
1652
- const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1653
- if (enhancedPrompt !== options.input.text) {
1654
- enhancedOptions.input.text = enhancedPrompt;
1659
+ if (options.input?.text) {
1660
+ const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1661
+ if (enhancedPrompt !== options.input.text) {
1662
+ enhancedOptions.input.text = enhancedPrompt;
1663
+ }
1655
1664
  }
1656
1665
  const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
1657
1666
  const streamResult = await this.processStreamResult(mcpStream, enhancedOptions, factoryResult);
@@ -1756,9 +1765,13 @@ export class NeuroLink {
1756
1765
  validationStartTimeNs: validationStartTime.toString(),
1757
1766
  message: "Starting comprehensive input validation process",
1758
1767
  });
1759
- if (!options?.input?.text ||
1760
- typeof options.input.text !== "string" ||
1761
- options.input.text.trim() === "") {
1768
+ const hasText = typeof options?.input?.text === "string" &&
1769
+ options.input.text.trim().length > 0;
1770
+ // Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
1771
+ const hasAudio = !!(options?.input?.audio &&
1772
+ options.input.audio.frames &&
1773
+ typeof options.input.audio.frames[Symbol.asyncIterator] !== "undefined");
1774
+ if (!hasText && !hasAudio) {
1762
1775
  const validationFailTime = process.hrtime.bigint();
1763
1776
  const validationDurationNs = validationFailTime - validationStartTime;
1764
1777
  logger.debug(`[NeuroLink] 💥 LOG_POINT_005_VALIDATION_FAILED`, {
@@ -1769,10 +1782,10 @@ export class NeuroLink {
1769
1782
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1770
1783
  validationDurationNs: validationDurationNs.toString(),
1771
1784
  validationDurationMs: Number(validationDurationNs) / 1000000,
1772
- validationError: "Stream options must include input.text as a non-empty string",
1785
+ validationError: "Stream options must include either input.text or input.audio",
1773
1786
  message: "EXHAUSTIVE validation failure analysis with character-level debugging",
1774
1787
  });
1775
- throw new Error("Stream options must include input.text as a non-empty string");
1788
+ throw new Error("Stream options must include either input.text or input.audio");
1776
1789
  }
1777
1790
  const validationSuccessTime = process.hrtime.bigint();
1778
1791
  const validationDurationNs = validationSuccessTime - validationStartTime;
@@ -1784,10 +1797,11 @@ export class NeuroLink {
1784
1797
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1785
1798
  validationDurationNs: validationDurationNs.toString(),
1786
1799
  validationDurationMs: Number(validationDurationNs) / 1000000,
1787
- inputTextValid: true,
1788
- inputTextLength: options.input.text.length,
1789
- inputTextTrimmedLength: options.input.text.trim().length,
1790
- inputTextPreview: options.input.text.substring(0, 100),
1800
+ inputTextValid: hasText,
1801
+ inputAudioPresent: hasAudio,
1802
+ inputTextLength: hasText ? options.input.text.length : 0,
1803
+ inputTextTrimmedLength: hasText ? options.input.text.trim().length : 0,
1804
+ inputTextPreview: hasText ? options.input.text.substring(0, 100) : "",
1791
1805
  message: "EXHAUSTIVE validation success - proceeding with stream processing",
1792
1806
  });
1793
1807
  }
@@ -17,6 +17,7 @@ export declare class GoogleAIStudioProvider extends BaseProvider {
17
17
  protected getAISDKModel(): LanguageModelV1;
18
18
  protected handleProviderError(error: unknown): Error;
19
19
  protected executeStream(options: StreamOptions, _analysisSchema?: ZodUnknownSchema | Schema<unknown>): Promise<StreamResult>;
20
+ private executeAudioStreamViaGeminiLive;
20
21
  private getApiKey;
21
22
  }
22
23
  export default GoogleAIStudioProvider;
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
8
8
  import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
10
  import { buildMessagesArray } from "../utils/messageBuilder.js";
11
+ // Create Google GenAI client
12
+ async function createGoogleGenAIClient(apiKey) {
13
+ const mod = await import("@google/genai");
14
+ const ctor = mod.GoogleGenAI;
15
+ if (!ctor) {
16
+ throw new Error("@google/genai does not export GoogleGenAI");
17
+ }
18
+ const Ctor = ctor;
19
+ return new Ctor({ apiKey });
20
+ }
11
21
  // Environment variable setup
12
22
  if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
13
23
  process.env.GOOGLE_AI_API_KEY) {
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
61
71
  }
62
72
  // executeGenerate removed - BaseProvider handles all generation with tools
63
73
  async executeStream(options, _analysisSchema) {
74
+ // Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
75
+ if (options.input?.audio) {
76
+ return await this.executeAudioStreamViaGeminiLive(options);
77
+ }
64
78
  this.validateStreamOptions(options);
65
79
  const startTime = Date.now();
66
80
  const apiKey = this.getApiKey();
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
115
129
  // ===================
116
130
  // HELPER METHODS
117
131
  // ===================
132
+ async executeAudioStreamViaGeminiLive(options) {
133
+ const startTime = Date.now();
134
+ const apiKey = this.getApiKey();
135
+ // Dynamic import to avoid hard dependency unless audio streaming is used
136
+ let client;
137
+ try {
138
+ client = await createGoogleGenAIClient(apiKey);
139
+ }
140
+ catch {
141
+ throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
142
+ }
143
+ const model = this.modelName ||
144
+ process.env.GOOGLE_VOICE_AI_MODEL ||
145
+ "gemini-2.5-flash-preview-native-audio-dialog";
146
+ const queue = [];
147
+ let resolveNext = null;
148
+ let done = false;
149
+ const push = (item) => {
150
+ if (done) {
151
+ return;
152
+ }
153
+ if (item.type === "audio") {
154
+ if (resolveNext) {
155
+ const fn = resolveNext;
156
+ resolveNext = null;
157
+ fn({ value: { type: "audio", audio: item.audio }, done: false });
158
+ return;
159
+ }
160
+ }
161
+ queue.push(item);
162
+ };
163
+ const session = await client.live.connect({
164
+ model,
165
+ callbacks: {
166
+ onopen: () => {
167
+ // no-op
168
+ },
169
+ onmessage: async (message) => {
170
+ try {
171
+ const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
172
+ if (audio?.data) {
173
+ const buf = Buffer.from(String(audio.data), "base64");
174
+ const chunk = {
175
+ data: buf,
176
+ sampleRateHz: 24000,
177
+ channels: 1,
178
+ encoding: "PCM16LE",
179
+ };
180
+ push({ type: "audio", audio: chunk });
181
+ }
182
+ if (message?.serverContent?.interrupted) {
183
+ // allow consumer to handle; no special action required here
184
+ }
185
+ }
186
+ catch (e) {
187
+ push({ type: "error", error: e });
188
+ }
189
+ },
190
+ onerror: (e) => {
191
+ push({ type: "error", error: e });
192
+ },
193
+ onclose: (_e) => {
194
+ push({ type: "end" });
195
+ },
196
+ },
197
+ config: {
198
+ responseModalities: ["AUDIO"],
199
+ speechConfig: {
200
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
201
+ },
202
+ },
203
+ });
204
+ // Feed upstream audio frames concurrently
205
+ (async () => {
206
+ try {
207
+ const spec = options.input?.audio;
208
+ if (!spec) {
209
+ logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
210
+ return;
211
+ }
212
+ for await (const frame of spec.frames) {
213
+ // Zero-length frame acts as a 'flush' control signal
214
+ if (!frame || frame.byteLength === 0) {
215
+ try {
216
+ if (session.sendInput) {
217
+ await session.sendInput({ event: "flush" });
218
+ }
219
+ else if (session.sendRealtimeInput) {
220
+ await session.sendRealtimeInput({ event: "flush" });
221
+ }
222
+ }
223
+ catch (err) {
224
+ logger.debug("[GeminiLive] flush control failed (non-fatal)", {
225
+ error: err instanceof Error ? err.message : String(err),
226
+ });
227
+ }
228
+ continue;
229
+ }
230
+ // Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
231
+ const base64 = frame.toString("base64");
232
+ const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
233
+ await session.sendRealtimeInput?.({
234
+ media: { data: base64, mimeType },
235
+ });
236
+ }
237
+ // Best-effort flush signal if supported
238
+ try {
239
+ if (session.sendInput) {
240
+ await session.sendInput({ event: "flush" });
241
+ }
242
+ else if (session.sendRealtimeInput) {
243
+ await session.sendRealtimeInput({ event: "flush" });
244
+ }
245
+ }
246
+ catch (err) {
247
+ logger.debug("[GeminiLive] final flush failed (non-fatal)", {
248
+ error: err instanceof Error ? err.message : String(err),
249
+ });
250
+ }
251
+ }
252
+ catch (e) {
253
+ push({ type: "error", error: e });
254
+ }
255
+ })().catch(() => {
256
+ // ignore
257
+ });
258
+ // AsyncIterable for stream events
259
+ const asyncIterable = {
260
+ [Symbol.asyncIterator]() {
261
+ return {
262
+ async next() {
263
+ if (queue.length > 0) {
264
+ const item = queue.shift();
265
+ if (!item) {
266
+ return {
267
+ value: undefined,
268
+ done: true,
269
+ };
270
+ }
271
+ if (item.type === "audio") {
272
+ return {
273
+ value: { type: "audio", audio: item.audio },
274
+ done: false,
275
+ };
276
+ }
277
+ if (item.type === "end") {
278
+ done = true;
279
+ return {
280
+ value: undefined,
281
+ done: true,
282
+ };
283
+ }
284
+ if (item.type === "error") {
285
+ done = true;
286
+ throw item.error instanceof Error
287
+ ? item.error
288
+ : new Error(String(item.error));
289
+ }
290
+ }
291
+ if (done) {
292
+ return {
293
+ value: undefined,
294
+ done: true,
295
+ };
296
+ }
297
+ return await new Promise((resolve) => {
298
+ resolveNext = resolve;
299
+ });
300
+ },
301
+ };
302
+ },
303
+ };
304
+ return {
305
+ stream: asyncIterable,
306
+ provider: this.providerName,
307
+ model: model,
308
+ metadata: {
309
+ startTime,
310
+ streamId: `google-ai-audio-${Date.now()}`,
311
+ },
312
+ };
313
+ }
118
314
  getApiKey() {
119
315
  const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
120
316
  if (!apiKey) {
@@ -834,6 +834,7 @@ export class GoogleVertexProvider extends BaseProvider {
834
834
  message: "Message array built successfully",
835
835
  });
836
836
  }
837
+ /* eslint-disable-next-line max-lines-per-function */
837
838
  async executeStream(options, analysisSchema) {
838
839
  // Initialize stream execution tracking
839
840
  const streamExecutionId = `vertex-stream-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
@@ -863,7 +864,9 @@ export class GoogleVertexProvider extends BaseProvider {
863
864
  streamExecutionId,
864
865
  streamRequestDetails: {
865
866
  modelName: this.modelName,
866
- promptLength: options.input.text.length,
867
+ promptLength: typeof options.input?.text === "string"
868
+ ? options.input.text.length
869
+ : 0,
867
870
  hasSchema: !!analysisSchema,
868
871
  messagesCount: Array.isArray(messages) ? messages.length : 0,
869
872
  temperature: options?.temperature,
@@ -66,9 +66,23 @@ export interface StreamAnalyticsData {
66
66
  * Stream function options interface - Primary method for streaming content
67
67
  * Future-ready for multi-modal capabilities while maintaining text focus
68
68
  */
69
+ export type PCMEncoding = "PCM16LE";
70
+ export interface AudioInputSpec {
71
+ frames: AsyncIterable<Buffer>;
72
+ sampleRateHz?: number;
73
+ encoding?: PCMEncoding;
74
+ channels?: 1;
75
+ }
76
+ export interface AudioChunk {
77
+ data: Buffer;
78
+ sampleRateHz: number;
79
+ channels: number;
80
+ encoding: PCMEncoding;
81
+ }
69
82
  export interface StreamOptions {
70
83
  input: {
71
- text: string;
84
+ text?: string;
85
+ audio?: AudioInputSpec;
72
86
  };
73
87
  output?: {
74
88
  format?: "text" | "structured" | "json";
@@ -121,6 +135,9 @@ export interface StreamOptions {
121
135
  export interface StreamResult {
122
136
  stream: AsyncIterable<{
123
137
  content: string;
138
+ } | {
139
+ type: "audio";
140
+ audio: AudioChunk;
124
141
  }>;
125
142
  provider?: string;
126
143
  model?: string;
@@ -44,7 +44,7 @@ export function convertGenerateToStreamOptions(generateOptions) {
44
44
  export function convertStreamToGenerateOptions(streamOptions) {
45
45
  const generateOptions = {
46
46
  // Core input mapping
47
- input: streamOptions.input,
47
+ input: { text: (streamOptions.input && streamOptions.input.text) || "" },
48
48
  // Provider and model settings
49
49
  provider: streamOptions.provider,
50
50
  model: streamOptions.model,
package/dist/neurolink.js CHANGED
@@ -1312,8 +1312,10 @@ export class NeuroLink {
1312
1312
  needsInitialization: !this.mcpInitialized,
1313
1313
  message: "Checking MCP initialization status before generation",
1314
1314
  });
1315
- // Initialize MCP if needed
1316
- await this.initializeMCP();
1315
+ // Initialize MCP only when tools are enabled
1316
+ if (!options.disableTools) {
1317
+ await this.initializeMCP();
1318
+ }
1317
1319
  const mcpInitCheckEndTime = process.hrtime.bigint();
1318
1320
  const mcpInitCheckDurationNs = mcpInitCheckEndTime - mcpInitCheckStartTime;
1319
1321
  logger.debug(`[NeuroLink] ✅ LOG_POINT_T003_MCP_INIT_CHECK_COMPLETE`, {
@@ -1573,10 +1575,16 @@ export class NeuroLink {
1573
1575
  };
1574
1576
  // Call the new stream method
1575
1577
  const result = await this.stream(streamOptions);
1576
- // Convert StreamResult to simple string async iterable
1578
+ // Convert StreamResult to simple string async iterable (filter text events only)
1577
1579
  async function* stringStream() {
1578
- for await (const chunk of result.stream) {
1579
- yield chunk.content;
1580
+ for await (const evt of result.stream) {
1581
+ const anyEvt = evt;
1582
+ if (anyEvt && typeof anyEvt === "object" && "content" in anyEvt) {
1583
+ const content = anyEvt.content;
1584
+ if (typeof content === "string") {
1585
+ yield content;
1586
+ }
1587
+ }
1580
1588
  }
1581
1589
  }
1582
1590
  return stringStream();
@@ -1646,12 +1654,13 @@ export class NeuroLink {
1646
1654
  let factoryResult;
1647
1655
  try {
1648
1656
  await this.initializeMCP();
1649
- const _originalPrompt = options.input.text;
1650
1657
  factoryResult = processStreamingFactoryOptions(options);
1651
1658
  enhancedOptions = createCleanStreamOptions(options);
1652
- const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1653
- if (enhancedPrompt !== options.input.text) {
1654
- enhancedOptions.input.text = enhancedPrompt;
1659
+ if (options.input?.text) {
1660
+ const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1661
+ if (enhancedPrompt !== options.input.text) {
1662
+ enhancedOptions.input.text = enhancedPrompt;
1663
+ }
1655
1664
  }
1656
1665
  const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
1657
1666
  const streamResult = await this.processStreamResult(mcpStream, enhancedOptions, factoryResult);
@@ -1756,9 +1765,13 @@ export class NeuroLink {
1756
1765
  validationStartTimeNs: validationStartTime.toString(),
1757
1766
  message: "Starting comprehensive input validation process",
1758
1767
  });
1759
- if (!options?.input?.text ||
1760
- typeof options.input.text !== "string" ||
1761
- options.input.text.trim() === "") {
1768
+ const hasText = typeof options?.input?.text === "string" &&
1769
+ options.input.text.trim().length > 0;
1770
+ // Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
1771
+ const hasAudio = !!(options?.input?.audio &&
1772
+ options.input.audio.frames &&
1773
+ typeof options.input.audio.frames[Symbol.asyncIterator] !== "undefined");
1774
+ if (!hasText && !hasAudio) {
1762
1775
  const validationFailTime = process.hrtime.bigint();
1763
1776
  const validationDurationNs = validationFailTime - validationStartTime;
1764
1777
  logger.debug(`[NeuroLink] 💥 LOG_POINT_005_VALIDATION_FAILED`, {
@@ -1769,10 +1782,10 @@ export class NeuroLink {
1769
1782
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1770
1783
  validationDurationNs: validationDurationNs.toString(),
1771
1784
  validationDurationMs: Number(validationDurationNs) / 1000000,
1772
- validationError: "Stream options must include input.text as a non-empty string",
1785
+ validationError: "Stream options must include either input.text or input.audio",
1773
1786
  message: "EXHAUSTIVE validation failure analysis with character-level debugging",
1774
1787
  });
1775
- throw new Error("Stream options must include input.text as a non-empty string");
1788
+ throw new Error("Stream options must include either input.text or input.audio");
1776
1789
  }
1777
1790
  const validationSuccessTime = process.hrtime.bigint();
1778
1791
  const validationDurationNs = validationSuccessTime - validationStartTime;
@@ -1784,10 +1797,11 @@ export class NeuroLink {
1784
1797
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1785
1798
  validationDurationNs: validationDurationNs.toString(),
1786
1799
  validationDurationMs: Number(validationDurationNs) / 1000000,
1787
- inputTextValid: true,
1788
- inputTextLength: options.input.text.length,
1789
- inputTextTrimmedLength: options.input.text.trim().length,
1790
- inputTextPreview: options.input.text.substring(0, 100),
1800
+ inputTextValid: hasText,
1801
+ inputAudioPresent: hasAudio,
1802
+ inputTextLength: hasText ? options.input.text.length : 0,
1803
+ inputTextTrimmedLength: hasText ? options.input.text.trim().length : 0,
1804
+ inputTextPreview: hasText ? options.input.text.substring(0, 100) : "",
1791
1805
  message: "EXHAUSTIVE validation success - proceeding with stream processing",
1792
1806
  });
1793
1807
  }
@@ -17,6 +17,7 @@ export declare class GoogleAIStudioProvider extends BaseProvider {
17
17
  protected getAISDKModel(): LanguageModelV1;
18
18
  protected handleProviderError(error: unknown): Error;
19
19
  protected executeStream(options: StreamOptions, _analysisSchema?: ZodUnknownSchema | Schema<unknown>): Promise<StreamResult>;
20
+ private executeAudioStreamViaGeminiLive;
20
21
  private getApiKey;
21
22
  }
22
23
  export default GoogleAIStudioProvider;
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
8
8
  import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
10
  import { buildMessagesArray } from "../utils/messageBuilder.js";
11
+ // Create Google GenAI client
12
+ async function createGoogleGenAIClient(apiKey) {
13
+ const mod = await import("@google/genai");
14
+ const ctor = mod.GoogleGenAI;
15
+ if (!ctor) {
16
+ throw new Error("@google/genai does not export GoogleGenAI");
17
+ }
18
+ const Ctor = ctor;
19
+ return new Ctor({ apiKey });
20
+ }
11
21
  // Environment variable setup
12
22
  if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
13
23
  process.env.GOOGLE_AI_API_KEY) {
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
61
71
  }
62
72
  // executeGenerate removed - BaseProvider handles all generation with tools
63
73
  async executeStream(options, _analysisSchema) {
74
+ // Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
75
+ if (options.input?.audio) {
76
+ return await this.executeAudioStreamViaGeminiLive(options);
77
+ }
64
78
  this.validateStreamOptions(options);
65
79
  const startTime = Date.now();
66
80
  const apiKey = this.getApiKey();
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
115
129
  // ===================
116
130
  // HELPER METHODS
117
131
  // ===================
132
+ async executeAudioStreamViaGeminiLive(options) {
133
+ const startTime = Date.now();
134
+ const apiKey = this.getApiKey();
135
+ // Dynamic import to avoid hard dependency unless audio streaming is used
136
+ let client;
137
+ try {
138
+ client = await createGoogleGenAIClient(apiKey);
139
+ }
140
+ catch {
141
+ throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
142
+ }
143
+ const model = this.modelName ||
144
+ process.env.GOOGLE_VOICE_AI_MODEL ||
145
+ "gemini-2.5-flash-preview-native-audio-dialog";
146
+ const queue = [];
147
+ let resolveNext = null;
148
+ let done = false;
149
+ const push = (item) => {
150
+ if (done) {
151
+ return;
152
+ }
153
+ if (item.type === "audio") {
154
+ if (resolveNext) {
155
+ const fn = resolveNext;
156
+ resolveNext = null;
157
+ fn({ value: { type: "audio", audio: item.audio }, done: false });
158
+ return;
159
+ }
160
+ }
161
+ queue.push(item);
162
+ };
163
+ const session = await client.live.connect({
164
+ model,
165
+ callbacks: {
166
+ onopen: () => {
167
+ // no-op
168
+ },
169
+ onmessage: async (message) => {
170
+ try {
171
+ const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
172
+ if (audio?.data) {
173
+ const buf = Buffer.from(String(audio.data), "base64");
174
+ const chunk = {
175
+ data: buf,
176
+ sampleRateHz: 24000,
177
+ channels: 1,
178
+ encoding: "PCM16LE",
179
+ };
180
+ push({ type: "audio", audio: chunk });
181
+ }
182
+ if (message?.serverContent?.interrupted) {
183
+ // allow consumer to handle; no special action required here
184
+ }
185
+ }
186
+ catch (e) {
187
+ push({ type: "error", error: e });
188
+ }
189
+ },
190
+ onerror: (e) => {
191
+ push({ type: "error", error: e });
192
+ },
193
+ onclose: (_e) => {
194
+ push({ type: "end" });
195
+ },
196
+ },
197
+ config: {
198
+ responseModalities: ["AUDIO"],
199
+ speechConfig: {
200
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
201
+ },
202
+ },
203
+ });
204
+ // Feed upstream audio frames concurrently
205
+ (async () => {
206
+ try {
207
+ const spec = options.input?.audio;
208
+ if (!spec) {
209
+ logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
210
+ return;
211
+ }
212
+ for await (const frame of spec.frames) {
213
+ // Zero-length frame acts as a 'flush' control signal
214
+ if (!frame || frame.byteLength === 0) {
215
+ try {
216
+ if (session.sendInput) {
217
+ await session.sendInput({ event: "flush" });
218
+ }
219
+ else if (session.sendRealtimeInput) {
220
+ await session.sendRealtimeInput({ event: "flush" });
221
+ }
222
+ }
223
+ catch (err) {
224
+ logger.debug("[GeminiLive] flush control failed (non-fatal)", {
225
+ error: err instanceof Error ? err.message : String(err),
226
+ });
227
+ }
228
+ continue;
229
+ }
230
+ // Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
231
+ const base64 = frame.toString("base64");
232
+ const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
233
+ await session.sendRealtimeInput?.({
234
+ media: { data: base64, mimeType },
235
+ });
236
+ }
237
+ // Best-effort flush signal if supported
238
+ try {
239
+ if (session.sendInput) {
240
+ await session.sendInput({ event: "flush" });
241
+ }
242
+ else if (session.sendRealtimeInput) {
243
+ await session.sendRealtimeInput({ event: "flush" });
244
+ }
245
+ }
246
+ catch (err) {
247
+ logger.debug("[GeminiLive] final flush failed (non-fatal)", {
248
+ error: err instanceof Error ? err.message : String(err),
249
+ });
250
+ }
251
+ }
252
+ catch (e) {
253
+ push({ type: "error", error: e });
254
+ }
255
+ })().catch(() => {
256
+ // ignore
257
+ });
258
+ // AsyncIterable for stream events
259
+ const asyncIterable = {
260
+ [Symbol.asyncIterator]() {
261
+ return {
262
+ async next() {
263
+ if (queue.length > 0) {
264
+ const item = queue.shift();
265
+ if (!item) {
266
+ return {
267
+ value: undefined,
268
+ done: true,
269
+ };
270
+ }
271
+ if (item.type === "audio") {
272
+ return {
273
+ value: { type: "audio", audio: item.audio },
274
+ done: false,
275
+ };
276
+ }
277
+ if (item.type === "end") {
278
+ done = true;
279
+ return {
280
+ value: undefined,
281
+ done: true,
282
+ };
283
+ }
284
+ if (item.type === "error") {
285
+ done = true;
286
+ throw item.error instanceof Error
287
+ ? item.error
288
+ : new Error(String(item.error));
289
+ }
290
+ }
291
+ if (done) {
292
+ return {
293
+ value: undefined,
294
+ done: true,
295
+ };
296
+ }
297
+ return await new Promise((resolve) => {
298
+ resolveNext = resolve;
299
+ });
300
+ },
301
+ };
302
+ },
303
+ };
304
+ return {
305
+ stream: asyncIterable,
306
+ provider: this.providerName,
307
+ model: model,
308
+ metadata: {
309
+ startTime,
310
+ streamId: `google-ai-audio-${Date.now()}`,
311
+ },
312
+ };
313
+ }
118
314
  getApiKey() {
119
315
  const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
120
316
  if (!apiKey) {
@@ -834,6 +834,7 @@ export class GoogleVertexProvider extends BaseProvider {
834
834
  message: "Message array built successfully",
835
835
  });
836
836
  }
837
+ /* eslint-disable-next-line max-lines-per-function */
837
838
  async executeStream(options, analysisSchema) {
838
839
  // Initialize stream execution tracking
839
840
  const streamExecutionId = `vertex-stream-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
@@ -863,7 +864,9 @@ export class GoogleVertexProvider extends BaseProvider {
863
864
  streamExecutionId,
864
865
  streamRequestDetails: {
865
866
  modelName: this.modelName,
866
- promptLength: options.input.text.length,
867
+ promptLength: typeof options.input?.text === "string"
868
+ ? options.input.text.length
869
+ : 0,
867
870
  hasSchema: !!analysisSchema,
868
871
  messagesCount: Array.isArray(messages) ? messages.length : 0,
869
872
  temperature: options?.temperature,
@@ -66,9 +66,23 @@ export interface StreamAnalyticsData {
66
66
  * Stream function options interface - Primary method for streaming content
67
67
  * Future-ready for multi-modal capabilities while maintaining text focus
68
68
  */
69
+ export type PCMEncoding = "PCM16LE";
70
+ export interface AudioInputSpec {
71
+ frames: AsyncIterable<Buffer>;
72
+ sampleRateHz?: number;
73
+ encoding?: PCMEncoding;
74
+ channels?: 1;
75
+ }
76
+ export interface AudioChunk {
77
+ data: Buffer;
78
+ sampleRateHz: number;
79
+ channels: number;
80
+ encoding: PCMEncoding;
81
+ }
69
82
  export interface StreamOptions {
70
83
  input: {
71
- text: string;
84
+ text?: string;
85
+ audio?: AudioInputSpec;
72
86
  };
73
87
  output?: {
74
88
  format?: "text" | "structured" | "json";
@@ -121,6 +135,9 @@ export interface StreamOptions {
121
135
  export interface StreamResult {
122
136
  stream: AsyncIterable<{
123
137
  content: string;
138
+ } | {
139
+ type: "audio";
140
+ audio: AudioChunk;
124
141
  }>;
125
142
  provider?: string;
126
143
  model?: string;
@@ -44,7 +44,7 @@ export function convertGenerateToStreamOptions(generateOptions) {
44
44
  export function convertStreamToGenerateOptions(streamOptions) {
45
45
  const generateOptions = {
46
46
  // Core input mapping
47
- input: streamOptions.input,
47
+ input: { text: (streamOptions.input && streamOptions.input.text) || "" },
48
48
  // Provider and model settings
49
49
  provider: streamOptions.provider,
50
50
  model: streamOptions.model,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "7.31.0",
3
+ "version": "7.32.0",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",
@@ -78,6 +78,7 @@
78
78
  "dev:full": "node tools/development/dev-server.js",
79
79
  "dev:health": "node tools/development/healthMonitor.js",
80
80
  "dev:demo": "concurrently \"pnpm run dev\" \"node neurolink-demo/complete-enhanced-server.js\"",
81
+ "demo:voice": "pnpm build && node examples/voice-demo/server.mjs",
81
82
  "// Build & Deploy (Complete Pipeline)": "",
82
83
  "build:complete": "node tools/automation/buildSystem.js",
83
84
  "build:analyze": "node tools/development/dependency-analyzer.js",
@@ -151,6 +152,7 @@
151
152
  "@aws-sdk/credential-provider-node": "^3.876.0",
152
153
  "@aws-sdk/types": "^3.862.0",
153
154
  "@google-cloud/vertexai": "^1.10.0",
155
+ "@google/genai": "^1.16.0",
154
156
  "@google/generative-ai": "^0.24.1",
155
157
  "@huggingface/inference": "^2.8.0",
156
158
  "@modelcontextprotocol/sdk": "^1.13.0",