@juspay/neurolink 7.30.1 → 7.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/cli/factories/commandFactory.js +16 -2
  3. package/dist/core/baseProvider.d.ts +6 -6
  4. package/dist/core/baseProvider.js +30 -34
  5. package/dist/core/types.d.ts +2 -0
  6. package/dist/index.d.ts +1 -1
  7. package/dist/lib/core/baseProvider.d.ts +6 -6
  8. package/dist/lib/core/baseProvider.js +30 -34
  9. package/dist/lib/core/types.d.ts +2 -0
  10. package/dist/lib/index.d.ts +1 -1
  11. package/dist/lib/middleware/builtin/analytics.d.ts +1 -1
  12. package/dist/lib/middleware/builtin/guardrails.d.ts +1 -1
  13. package/dist/lib/middleware/factory.d.ts +1 -1
  14. package/dist/lib/middleware/index.d.ts +1 -1
  15. package/dist/lib/middleware/registry.d.ts +1 -1
  16. package/dist/lib/neurolink.js +32 -18
  17. package/dist/lib/providers/googleAiStudio.d.ts +1 -0
  18. package/dist/lib/providers/googleAiStudio.js +196 -0
  19. package/dist/lib/providers/googleVertex.js +4 -1
  20. package/dist/lib/types/streamTypes.d.ts +20 -1
  21. package/dist/lib/utils/optionsConversion.js +1 -1
  22. package/dist/middleware/builtin/analytics.d.ts +1 -1
  23. package/dist/middleware/builtin/guardrails.d.ts +1 -1
  24. package/dist/middleware/factory.d.ts +1 -1
  25. package/dist/middleware/index.d.ts +1 -1
  26. package/dist/middleware/registry.d.ts +1 -1
  27. package/dist/neurolink.js +32 -18
  28. package/dist/providers/googleAiStudio.d.ts +1 -0
  29. package/dist/providers/googleAiStudio.js +196 -0
  30. package/dist/providers/googleVertex.js +4 -1
  31. package/dist/types/streamTypes.d.ts +20 -1
  32. package/dist/utils/optionsConversion.js +1 -1
  33. package/package.json +3 -1
  34. /package/dist/lib/{middleware/types.d.ts → types/middlewareTypes.d.ts} +0 -0
  35. /package/dist/lib/{middleware/types.js → types/middlewareTypes.js} +0 -0
  36. /package/dist/{middleware/types.d.ts → types/middlewareTypes.d.ts} +0 -0
  37. /package/dist/{middleware/types.js → types/middlewareTypes.js} +0 -0
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
8
8
  import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
10
  import { buildMessagesArray } from "../utils/messageBuilder.js";
11
+ // Create Google GenAI client
12
+ async function createGoogleGenAIClient(apiKey) {
13
+ const mod = await import("@google/genai");
14
+ const ctor = mod.GoogleGenAI;
15
+ if (!ctor) {
16
+ throw new Error("@google/genai does not export GoogleGenAI");
17
+ }
18
+ const Ctor = ctor;
19
+ return new Ctor({ apiKey });
20
+ }
11
21
  // Environment variable setup
12
22
  if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
13
23
  process.env.GOOGLE_AI_API_KEY) {
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
61
71
  }
62
72
  // executeGenerate removed - BaseProvider handles all generation with tools
63
73
  async executeStream(options, _analysisSchema) {
74
+ // Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
75
+ if (options.input?.audio) {
76
+ return await this.executeAudioStreamViaGeminiLive(options);
77
+ }
64
78
  this.validateStreamOptions(options);
65
79
  const startTime = Date.now();
66
80
  const apiKey = this.getApiKey();
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
115
129
  // ===================
116
130
  // HELPER METHODS
117
131
  // ===================
132
+ async executeAudioStreamViaGeminiLive(options) {
133
+ const startTime = Date.now();
134
+ const apiKey = this.getApiKey();
135
+ // Dynamic import to avoid hard dependency unless audio streaming is used
136
+ let client;
137
+ try {
138
+ client = await createGoogleGenAIClient(apiKey);
139
+ }
140
+ catch {
141
+ throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
142
+ }
143
+ const model = this.modelName ||
144
+ process.env.GOOGLE_VOICE_AI_MODEL ||
145
+ "gemini-2.5-flash-preview-native-audio-dialog";
146
+ const queue = [];
147
+ let resolveNext = null;
148
+ let done = false;
149
+ const push = (item) => {
150
+ if (done) {
151
+ return;
152
+ }
153
+ if (item.type === "audio") {
154
+ if (resolveNext) {
155
+ const fn = resolveNext;
156
+ resolveNext = null;
157
+ fn({ value: { type: "audio", audio: item.audio }, done: false });
158
+ return;
159
+ }
160
+ }
161
+ queue.push(item);
162
+ };
163
+ const session = await client.live.connect({
164
+ model,
165
+ callbacks: {
166
+ onopen: () => {
167
+ // no-op
168
+ },
169
+ onmessage: async (message) => {
170
+ try {
171
+ const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
172
+ if (audio?.data) {
173
+ const buf = Buffer.from(String(audio.data), "base64");
174
+ const chunk = {
175
+ data: buf,
176
+ sampleRateHz: 24000,
177
+ channels: 1,
178
+ encoding: "PCM16LE",
179
+ };
180
+ push({ type: "audio", audio: chunk });
181
+ }
182
+ if (message?.serverContent?.interrupted) {
183
+ // allow consumer to handle; no special action required here
184
+ }
185
+ }
186
+ catch (e) {
187
+ push({ type: "error", error: e });
188
+ }
189
+ },
190
+ onerror: (e) => {
191
+ push({ type: "error", error: e });
192
+ },
193
+ onclose: (_e) => {
194
+ push({ type: "end" });
195
+ },
196
+ },
197
+ config: {
198
+ responseModalities: ["AUDIO"],
199
+ speechConfig: {
200
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
201
+ },
202
+ },
203
+ });
204
+ // Feed upstream audio frames concurrently
205
+ (async () => {
206
+ try {
207
+ const spec = options.input?.audio;
208
+ if (!spec) {
209
+ logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
210
+ return;
211
+ }
212
+ for await (const frame of spec.frames) {
213
+ // Zero-length frame acts as a 'flush' control signal
214
+ if (!frame || frame.byteLength === 0) {
215
+ try {
216
+ if (session.sendInput) {
217
+ await session.sendInput({ event: "flush" });
218
+ }
219
+ else if (session.sendRealtimeInput) {
220
+ await session.sendRealtimeInput({ event: "flush" });
221
+ }
222
+ }
223
+ catch (err) {
224
+ logger.debug("[GeminiLive] flush control failed (non-fatal)", {
225
+ error: err instanceof Error ? err.message : String(err),
226
+ });
227
+ }
228
+ continue;
229
+ }
230
+ // Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
231
+ const base64 = frame.toString("base64");
232
+ const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
233
+ await session.sendRealtimeInput?.({
234
+ media: { data: base64, mimeType },
235
+ });
236
+ }
237
+ // Best-effort flush signal if supported
238
+ try {
239
+ if (session.sendInput) {
240
+ await session.sendInput({ event: "flush" });
241
+ }
242
+ else if (session.sendRealtimeInput) {
243
+ await session.sendRealtimeInput({ event: "flush" });
244
+ }
245
+ }
246
+ catch (err) {
247
+ logger.debug("[GeminiLive] final flush failed (non-fatal)", {
248
+ error: err instanceof Error ? err.message : String(err),
249
+ });
250
+ }
251
+ }
252
+ catch (e) {
253
+ push({ type: "error", error: e });
254
+ }
255
+ })().catch(() => {
256
+ // ignore
257
+ });
258
+ // AsyncIterable for stream events
259
+ const asyncIterable = {
260
+ [Symbol.asyncIterator]() {
261
+ return {
262
+ async next() {
263
+ if (queue.length > 0) {
264
+ const item = queue.shift();
265
+ if (!item) {
266
+ return {
267
+ value: undefined,
268
+ done: true,
269
+ };
270
+ }
271
+ if (item.type === "audio") {
272
+ return {
273
+ value: { type: "audio", audio: item.audio },
274
+ done: false,
275
+ };
276
+ }
277
+ if (item.type === "end") {
278
+ done = true;
279
+ return {
280
+ value: undefined,
281
+ done: true,
282
+ };
283
+ }
284
+ if (item.type === "error") {
285
+ done = true;
286
+ throw item.error instanceof Error
287
+ ? item.error
288
+ : new Error(String(item.error));
289
+ }
290
+ }
291
+ if (done) {
292
+ return {
293
+ value: undefined,
294
+ done: true,
295
+ };
296
+ }
297
+ return await new Promise((resolve) => {
298
+ resolveNext = resolve;
299
+ });
300
+ },
301
+ };
302
+ },
303
+ };
304
+ return {
305
+ stream: asyncIterable,
306
+ provider: this.providerName,
307
+ model: model,
308
+ metadata: {
309
+ startTime,
310
+ streamId: `google-ai-audio-${Date.now()}`,
311
+ },
312
+ };
313
+ }
118
314
  getApiKey() {
119
315
  const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
120
316
  if (!apiKey) {
@@ -834,6 +834,7 @@ export class GoogleVertexProvider extends BaseProvider {
834
834
  message: "Message array built successfully",
835
835
  });
836
836
  }
837
+ /* eslint-disable-next-line max-lines-per-function */
837
838
  async executeStream(options, analysisSchema) {
838
839
  // Initialize stream execution tracking
839
840
  const streamExecutionId = `vertex-stream-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
@@ -863,7 +864,9 @@ export class GoogleVertexProvider extends BaseProvider {
863
864
  streamExecutionId,
864
865
  streamRequestDetails: {
865
866
  modelName: this.modelName,
866
- promptLength: options.input.text.length,
867
+ promptLength: typeof options.input?.text === "string"
868
+ ? options.input.text.length
869
+ : 0,
867
870
  hasSchema: !!analysisSchema,
868
871
  messagesCount: Array.isArray(messages) ? messages.length : 0,
869
872
  temperature: options?.temperature,
@@ -5,6 +5,7 @@ import type { EvaluationData } from "../index.js";
5
5
  import type { TokenUsage } from "./providers.js";
6
6
  import type { UnknownRecord, JsonValue } from "./common.js";
7
7
  import type { ChatMessage } from "./conversationTypes.js";
8
+ import type { MiddlewareFactoryOptions } from "../types/middlewareTypes.js";
8
9
  /**
9
10
  * Interface for tool execution calls (AI SDK compatible)
10
11
  */
@@ -65,9 +66,23 @@ export interface StreamAnalyticsData {
65
66
  * Stream function options interface - Primary method for streaming content
66
67
  * Future-ready for multi-modal capabilities while maintaining text focus
67
68
  */
69
+ export type PCMEncoding = "PCM16LE";
70
+ export interface AudioInputSpec {
71
+ frames: AsyncIterable<Buffer>;
72
+ sampleRateHz?: number;
73
+ encoding?: PCMEncoding;
74
+ channels?: 1;
75
+ }
76
+ export interface AudioChunk {
77
+ data: Buffer;
78
+ sampleRateHz: number;
79
+ channels: number;
80
+ encoding: PCMEncoding;
81
+ }
68
82
  export interface StreamOptions {
69
83
  input: {
70
- text: string;
84
+ text?: string;
85
+ audio?: AudioInputSpec;
71
86
  };
72
87
  output?: {
73
88
  format?: "text" | "structured" | "json";
@@ -111,6 +126,7 @@ export interface StreamOptions {
111
126
  fallbackToGenerate?: boolean;
112
127
  };
113
128
  conversationMessages?: ChatMessage[];
129
+ middleware?: MiddlewareFactoryOptions;
114
130
  }
115
131
  /**
116
132
  * Stream function result interface - Primary output format for streaming
@@ -119,6 +135,9 @@ export interface StreamOptions {
119
135
  export interface StreamResult {
120
136
  stream: AsyncIterable<{
121
137
  content: string;
138
+ } | {
139
+ type: "audio";
140
+ audio: AudioChunk;
122
141
  }>;
123
142
  provider?: string;
124
143
  model?: string;
@@ -44,7 +44,7 @@ export function convertGenerateToStreamOptions(generateOptions) {
44
44
  export function convertStreamToGenerateOptions(streamOptions) {
45
45
  const generateOptions = {
46
46
  // Core input mapping
47
- input: streamOptions.input,
47
+ input: { text: (streamOptions.input && streamOptions.input.text) || "" },
48
48
  // Provider and model settings
49
49
  provider: streamOptions.provider,
50
50
  model: streamOptions.model,
@@ -1,4 +1,4 @@
1
- import type { NeuroLinkMiddleware } from "../types.js";
1
+ import type { NeuroLinkMiddleware } from "../../types/middlewareTypes.js";
2
2
  /**
3
3
  * Create analytics middleware for tracking AI model usage
4
4
  * Collects metrics on token usage, response times, and model performance
@@ -1,5 +1,5 @@
1
1
  import type { LanguageModelV1 } from "ai";
2
- import type { NeuroLinkMiddleware } from "../types.js";
2
+ import type { NeuroLinkMiddleware } from "../../types/middlewareTypes.js";
3
3
  /**
4
4
  * Configuration for the Guardrails middleware.
5
5
  */
@@ -1,5 +1,5 @@
1
1
  import type { LanguageModelV1 } from "ai";
2
- import type { MiddlewareContext, MiddlewareConfig, MiddlewareFactoryOptions, MiddlewareChainStats, MiddlewarePreset, NeuroLinkMiddleware, MiddlewareRegistrationOptions } from "./types.js";
2
+ import type { MiddlewareContext, MiddlewareConfig, MiddlewareFactoryOptions, MiddlewareChainStats, MiddlewarePreset, NeuroLinkMiddleware, MiddlewareRegistrationOptions } from "../types/middlewareTypes.js";
3
3
  import { MiddlewareRegistry } from "./registry.js";
4
4
  /**
5
5
  * Middleware factory for creating and applying middleware chains.
@@ -6,7 +6,7 @@
6
6
  * of language models with features like analytics, guardrails, caching, and more.
7
7
  */
8
8
  import { MiddlewareFactory } from "./factory.js";
9
- export type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareConditions, MiddlewareRegistrationOptions, MiddlewareExecutionResult, MiddlewareChainStats, MiddlewarePreset, MiddlewareFactoryOptions, BuiltInMiddlewareType, } from "./types.js";
9
+ export type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareConditions, MiddlewareRegistrationOptions, MiddlewareExecutionResult, MiddlewareChainStats, MiddlewarePreset, MiddlewareFactoryOptions, BuiltInMiddlewareType, } from "../types/middlewareTypes.js";
10
10
  export type { LanguageModelV1Middleware } from "ai";
11
11
  export { MiddlewareFactory };
12
12
  export default MiddlewareFactory;
@@ -1,5 +1,5 @@
1
1
  import type { LanguageModelV1Middleware } from "ai";
2
- import type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareRegistrationOptions, MiddlewareExecutionResult } from "./types.js";
2
+ import type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareRegistrationOptions, MiddlewareExecutionResult } from "../types/middlewareTypes.js";
3
3
  /**
4
4
  * Manages the registration, configuration, and execution of middleware for a single factory instance.
5
5
  */
package/dist/neurolink.js CHANGED
@@ -1312,8 +1312,10 @@ export class NeuroLink {
1312
1312
  needsInitialization: !this.mcpInitialized,
1313
1313
  message: "Checking MCP initialization status before generation",
1314
1314
  });
1315
- // Initialize MCP if needed
1316
- await this.initializeMCP();
1315
+ // Initialize MCP only when tools are enabled
1316
+ if (!options.disableTools) {
1317
+ await this.initializeMCP();
1318
+ }
1317
1319
  const mcpInitCheckEndTime = process.hrtime.bigint();
1318
1320
  const mcpInitCheckDurationNs = mcpInitCheckEndTime - mcpInitCheckStartTime;
1319
1321
  logger.debug(`[NeuroLink] ✅ LOG_POINT_T003_MCP_INIT_CHECK_COMPLETE`, {
@@ -1573,10 +1575,16 @@ export class NeuroLink {
1573
1575
  };
1574
1576
  // Call the new stream method
1575
1577
  const result = await this.stream(streamOptions);
1576
- // Convert StreamResult to simple string async iterable
1578
+ // Convert StreamResult to simple string async iterable (filter text events only)
1577
1579
  async function* stringStream() {
1578
- for await (const chunk of result.stream) {
1579
- yield chunk.content;
1580
+ for await (const evt of result.stream) {
1581
+ const anyEvt = evt;
1582
+ if (anyEvt && typeof anyEvt === "object" && "content" in anyEvt) {
1583
+ const content = anyEvt.content;
1584
+ if (typeof content === "string") {
1585
+ yield content;
1586
+ }
1587
+ }
1580
1588
  }
1581
1589
  }
1582
1590
  return stringStream();
@@ -1646,12 +1654,13 @@ export class NeuroLink {
1646
1654
  let factoryResult;
1647
1655
  try {
1648
1656
  await this.initializeMCP();
1649
- const _originalPrompt = options.input.text;
1650
1657
  factoryResult = processStreamingFactoryOptions(options);
1651
1658
  enhancedOptions = createCleanStreamOptions(options);
1652
- const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1653
- if (enhancedPrompt !== options.input.text) {
1654
- enhancedOptions.input.text = enhancedPrompt;
1659
+ if (options.input?.text) {
1660
+ const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
1661
+ if (enhancedPrompt !== options.input.text) {
1662
+ enhancedOptions.input.text = enhancedPrompt;
1663
+ }
1655
1664
  }
1656
1665
  const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
1657
1666
  const streamResult = await this.processStreamResult(mcpStream, enhancedOptions, factoryResult);
@@ -1756,9 +1765,13 @@ export class NeuroLink {
1756
1765
  validationStartTimeNs: validationStartTime.toString(),
1757
1766
  message: "Starting comprehensive input validation process",
1758
1767
  });
1759
- if (!options?.input?.text ||
1760
- typeof options.input.text !== "string" ||
1761
- options.input.text.trim() === "") {
1768
+ const hasText = typeof options?.input?.text === "string" &&
1769
+ options.input.text.trim().length > 0;
1770
+ // Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
1771
+ const hasAudio = !!(options?.input?.audio &&
1772
+ options.input.audio.frames &&
1773
+ typeof options.input.audio.frames[Symbol.asyncIterator] !== "undefined");
1774
+ if (!hasText && !hasAudio) {
1762
1775
  const validationFailTime = process.hrtime.bigint();
1763
1776
  const validationDurationNs = validationFailTime - validationStartTime;
1764
1777
  logger.debug(`[NeuroLink] 💥 LOG_POINT_005_VALIDATION_FAILED`, {
@@ -1769,10 +1782,10 @@ export class NeuroLink {
1769
1782
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1770
1783
  validationDurationNs: validationDurationNs.toString(),
1771
1784
  validationDurationMs: Number(validationDurationNs) / 1000000,
1772
- validationError: "Stream options must include input.text as a non-empty string",
1785
+ validationError: "Stream options must include either input.text or input.audio",
1773
1786
  message: "EXHAUSTIVE validation failure analysis with character-level debugging",
1774
1787
  });
1775
- throw new Error("Stream options must include input.text as a non-empty string");
1788
+ throw new Error("Stream options must include either input.text or input.audio");
1776
1789
  }
1777
1790
  const validationSuccessTime = process.hrtime.bigint();
1778
1791
  const validationDurationNs = validationSuccessTime - validationStartTime;
@@ -1784,10 +1797,11 @@ export class NeuroLink {
1784
1797
  elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
1785
1798
  validationDurationNs: validationDurationNs.toString(),
1786
1799
  validationDurationMs: Number(validationDurationNs) / 1000000,
1787
- inputTextValid: true,
1788
- inputTextLength: options.input.text.length,
1789
- inputTextTrimmedLength: options.input.text.trim().length,
1790
- inputTextPreview: options.input.text.substring(0, 100),
1800
+ inputTextValid: hasText,
1801
+ inputAudioPresent: hasAudio,
1802
+ inputTextLength: hasText ? options.input.text.length : 0,
1803
+ inputTextTrimmedLength: hasText ? options.input.text.trim().length : 0,
1804
+ inputTextPreview: hasText ? options.input.text.substring(0, 100) : "",
1791
1805
  message: "EXHAUSTIVE validation success - proceeding with stream processing",
1792
1806
  });
1793
1807
  }
@@ -17,6 +17,7 @@ export declare class GoogleAIStudioProvider extends BaseProvider {
17
17
  protected getAISDKModel(): LanguageModelV1;
18
18
  protected handleProviderError(error: unknown): Error;
19
19
  protected executeStream(options: StreamOptions, _analysisSchema?: ZodUnknownSchema | Schema<unknown>): Promise<StreamResult>;
20
+ private executeAudioStreamViaGeminiLive;
20
21
  private getApiKey;
21
22
  }
22
23
  export default GoogleAIStudioProvider;
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
8
8
  import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
9
9
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
10
10
  import { buildMessagesArray } from "../utils/messageBuilder.js";
11
+ // Create Google GenAI client
12
+ async function createGoogleGenAIClient(apiKey) {
13
+ const mod = await import("@google/genai");
14
+ const ctor = mod.GoogleGenAI;
15
+ if (!ctor) {
16
+ throw new Error("@google/genai does not export GoogleGenAI");
17
+ }
18
+ const Ctor = ctor;
19
+ return new Ctor({ apiKey });
20
+ }
11
21
  // Environment variable setup
12
22
  if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
13
23
  process.env.GOOGLE_AI_API_KEY) {
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
61
71
  }
62
72
  // executeGenerate removed - BaseProvider handles all generation with tools
63
73
  async executeStream(options, _analysisSchema) {
74
+ // Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
75
+ if (options.input?.audio) {
76
+ return await this.executeAudioStreamViaGeminiLive(options);
77
+ }
64
78
  this.validateStreamOptions(options);
65
79
  const startTime = Date.now();
66
80
  const apiKey = this.getApiKey();
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
115
129
  // ===================
116
130
  // HELPER METHODS
117
131
  // ===================
132
+ async executeAudioStreamViaGeminiLive(options) {
133
+ const startTime = Date.now();
134
+ const apiKey = this.getApiKey();
135
+ // Dynamic import to avoid hard dependency unless audio streaming is used
136
+ let client;
137
+ try {
138
+ client = await createGoogleGenAIClient(apiKey);
139
+ }
140
+ catch {
141
+ throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
142
+ }
143
+ const model = this.modelName ||
144
+ process.env.GOOGLE_VOICE_AI_MODEL ||
145
+ "gemini-2.5-flash-preview-native-audio-dialog";
146
+ const queue = [];
147
+ let resolveNext = null;
148
+ let done = false;
149
+ const push = (item) => {
150
+ if (done) {
151
+ return;
152
+ }
153
+ if (item.type === "audio") {
154
+ if (resolveNext) {
155
+ const fn = resolveNext;
156
+ resolveNext = null;
157
+ fn({ value: { type: "audio", audio: item.audio }, done: false });
158
+ return;
159
+ }
160
+ }
161
+ queue.push(item);
162
+ };
163
+ const session = await client.live.connect({
164
+ model,
165
+ callbacks: {
166
+ onopen: () => {
167
+ // no-op
168
+ },
169
+ onmessage: async (message) => {
170
+ try {
171
+ const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
172
+ if (audio?.data) {
173
+ const buf = Buffer.from(String(audio.data), "base64");
174
+ const chunk = {
175
+ data: buf,
176
+ sampleRateHz: 24000,
177
+ channels: 1,
178
+ encoding: "PCM16LE",
179
+ };
180
+ push({ type: "audio", audio: chunk });
181
+ }
182
+ if (message?.serverContent?.interrupted) {
183
+ // allow consumer to handle; no special action required here
184
+ }
185
+ }
186
+ catch (e) {
187
+ push({ type: "error", error: e });
188
+ }
189
+ },
190
+ onerror: (e) => {
191
+ push({ type: "error", error: e });
192
+ },
193
+ onclose: (_e) => {
194
+ push({ type: "end" });
195
+ },
196
+ },
197
+ config: {
198
+ responseModalities: ["AUDIO"],
199
+ speechConfig: {
200
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
201
+ },
202
+ },
203
+ });
204
+ // Feed upstream audio frames concurrently
205
+ (async () => {
206
+ try {
207
+ const spec = options.input?.audio;
208
+ if (!spec) {
209
+ logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
210
+ return;
211
+ }
212
+ for await (const frame of spec.frames) {
213
+ // Zero-length frame acts as a 'flush' control signal
214
+ if (!frame || frame.byteLength === 0) {
215
+ try {
216
+ if (session.sendInput) {
217
+ await session.sendInput({ event: "flush" });
218
+ }
219
+ else if (session.sendRealtimeInput) {
220
+ await session.sendRealtimeInput({ event: "flush" });
221
+ }
222
+ }
223
+ catch (err) {
224
+ logger.debug("[GeminiLive] flush control failed (non-fatal)", {
225
+ error: err instanceof Error ? err.message : String(err),
226
+ });
227
+ }
228
+ continue;
229
+ }
230
+ // Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
231
+ const base64 = frame.toString("base64");
232
+ const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
233
+ await session.sendRealtimeInput?.({
234
+ media: { data: base64, mimeType },
235
+ });
236
+ }
237
+ // Best-effort flush signal if supported
238
+ try {
239
+ if (session.sendInput) {
240
+ await session.sendInput({ event: "flush" });
241
+ }
242
+ else if (session.sendRealtimeInput) {
243
+ await session.sendRealtimeInput({ event: "flush" });
244
+ }
245
+ }
246
+ catch (err) {
247
+ logger.debug("[GeminiLive] final flush failed (non-fatal)", {
248
+ error: err instanceof Error ? err.message : String(err),
249
+ });
250
+ }
251
+ }
252
+ catch (e) {
253
+ push({ type: "error", error: e });
254
+ }
255
+ })().catch(() => {
256
+ // ignore
257
+ });
258
+ // AsyncIterable for stream events
259
+ const asyncIterable = {
260
+ [Symbol.asyncIterator]() {
261
+ return {
262
+ async next() {
263
+ if (queue.length > 0) {
264
+ const item = queue.shift();
265
+ if (!item) {
266
+ return {
267
+ value: undefined,
268
+ done: true,
269
+ };
270
+ }
271
+ if (item.type === "audio") {
272
+ return {
273
+ value: { type: "audio", audio: item.audio },
274
+ done: false,
275
+ };
276
+ }
277
+ if (item.type === "end") {
278
+ done = true;
279
+ return {
280
+ value: undefined,
281
+ done: true,
282
+ };
283
+ }
284
+ if (item.type === "error") {
285
+ done = true;
286
+ throw item.error instanceof Error
287
+ ? item.error
288
+ : new Error(String(item.error));
289
+ }
290
+ }
291
+ if (done) {
292
+ return {
293
+ value: undefined,
294
+ done: true,
295
+ };
296
+ }
297
+ return await new Promise((resolve) => {
298
+ resolveNext = resolve;
299
+ });
300
+ },
301
+ };
302
+ },
303
+ };
304
+ return {
305
+ stream: asyncIterable,
306
+ provider: this.providerName,
307
+ model: model,
308
+ metadata: {
309
+ startTime,
310
+ streamId: `google-ai-audio-${Date.now()}`,
311
+ },
312
+ };
313
+ }
118
314
  getApiKey() {
119
315
  const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
120
316
  if (!apiKey) {