@juspay/neurolink 7.30.1 → 7.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/cli/factories/commandFactory.js +16 -2
- package/dist/core/baseProvider.d.ts +6 -6
- package/dist/core/baseProvider.js +30 -34
- package/dist/core/types.d.ts +2 -0
- package/dist/index.d.ts +1 -1
- package/dist/lib/core/baseProvider.d.ts +6 -6
- package/dist/lib/core/baseProvider.js +30 -34
- package/dist/lib/core/types.d.ts +2 -0
- package/dist/lib/index.d.ts +1 -1
- package/dist/lib/middleware/builtin/analytics.d.ts +1 -1
- package/dist/lib/middleware/builtin/guardrails.d.ts +1 -1
- package/dist/lib/middleware/factory.d.ts +1 -1
- package/dist/lib/middleware/index.d.ts +1 -1
- package/dist/lib/middleware/registry.d.ts +1 -1
- package/dist/lib/neurolink.js +32 -18
- package/dist/lib/providers/googleAiStudio.d.ts +1 -0
- package/dist/lib/providers/googleAiStudio.js +196 -0
- package/dist/lib/providers/googleVertex.js +4 -1
- package/dist/lib/types/streamTypes.d.ts +20 -1
- package/dist/lib/utils/optionsConversion.js +1 -1
- package/dist/middleware/builtin/analytics.d.ts +1 -1
- package/dist/middleware/builtin/guardrails.d.ts +1 -1
- package/dist/middleware/factory.d.ts +1 -1
- package/dist/middleware/index.d.ts +1 -1
- package/dist/middleware/registry.d.ts +1 -1
- package/dist/neurolink.js +32 -18
- package/dist/providers/googleAiStudio.d.ts +1 -0
- package/dist/providers/googleAiStudio.js +196 -0
- package/dist/providers/googleVertex.js +4 -1
- package/dist/types/streamTypes.d.ts +20 -1
- package/dist/utils/optionsConversion.js +1 -1
- package/package.json +3 -1
- /package/dist/lib/{middleware/types.d.ts → types/middlewareTypes.d.ts} +0 -0
- /package/dist/lib/{middleware/types.js → types/middlewareTypes.js} +0 -0
- /package/dist/{middleware/types.d.ts → types/middlewareTypes.d.ts} +0 -0
- /package/dist/{middleware/types.js → types/middlewareTypes.js} +0 -0
|
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
|
|
|
8
8
|
import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
9
9
|
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
10
10
|
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
11
|
+
// Create Google GenAI client
|
|
12
|
+
async function createGoogleGenAIClient(apiKey) {
|
|
13
|
+
const mod = await import("@google/genai");
|
|
14
|
+
const ctor = mod.GoogleGenAI;
|
|
15
|
+
if (!ctor) {
|
|
16
|
+
throw new Error("@google/genai does not export GoogleGenAI");
|
|
17
|
+
}
|
|
18
|
+
const Ctor = ctor;
|
|
19
|
+
return new Ctor({ apiKey });
|
|
20
|
+
}
|
|
11
21
|
// Environment variable setup
|
|
12
22
|
if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
|
|
13
23
|
process.env.GOOGLE_AI_API_KEY) {
|
|
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
61
71
|
}
|
|
62
72
|
// executeGenerate removed - BaseProvider handles all generation with tools
|
|
63
73
|
async executeStream(options, _analysisSchema) {
|
|
74
|
+
// Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
|
|
75
|
+
if (options.input?.audio) {
|
|
76
|
+
return await this.executeAudioStreamViaGeminiLive(options);
|
|
77
|
+
}
|
|
64
78
|
this.validateStreamOptions(options);
|
|
65
79
|
const startTime = Date.now();
|
|
66
80
|
const apiKey = this.getApiKey();
|
|
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
115
129
|
// ===================
|
|
116
130
|
// HELPER METHODS
|
|
117
131
|
// ===================
|
|
132
|
+
async executeAudioStreamViaGeminiLive(options) {
|
|
133
|
+
const startTime = Date.now();
|
|
134
|
+
const apiKey = this.getApiKey();
|
|
135
|
+
// Dynamic import to avoid hard dependency unless audio streaming is used
|
|
136
|
+
let client;
|
|
137
|
+
try {
|
|
138
|
+
client = await createGoogleGenAIClient(apiKey);
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
|
|
142
|
+
}
|
|
143
|
+
const model = this.modelName ||
|
|
144
|
+
process.env.GOOGLE_VOICE_AI_MODEL ||
|
|
145
|
+
"gemini-2.5-flash-preview-native-audio-dialog";
|
|
146
|
+
const queue = [];
|
|
147
|
+
let resolveNext = null;
|
|
148
|
+
let done = false;
|
|
149
|
+
const push = (item) => {
|
|
150
|
+
if (done) {
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
if (item.type === "audio") {
|
|
154
|
+
if (resolveNext) {
|
|
155
|
+
const fn = resolveNext;
|
|
156
|
+
resolveNext = null;
|
|
157
|
+
fn({ value: { type: "audio", audio: item.audio }, done: false });
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
queue.push(item);
|
|
162
|
+
};
|
|
163
|
+
const session = await client.live.connect({
|
|
164
|
+
model,
|
|
165
|
+
callbacks: {
|
|
166
|
+
onopen: () => {
|
|
167
|
+
// no-op
|
|
168
|
+
},
|
|
169
|
+
onmessage: async (message) => {
|
|
170
|
+
try {
|
|
171
|
+
const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
|
|
172
|
+
if (audio?.data) {
|
|
173
|
+
const buf = Buffer.from(String(audio.data), "base64");
|
|
174
|
+
const chunk = {
|
|
175
|
+
data: buf,
|
|
176
|
+
sampleRateHz: 24000,
|
|
177
|
+
channels: 1,
|
|
178
|
+
encoding: "PCM16LE",
|
|
179
|
+
};
|
|
180
|
+
push({ type: "audio", audio: chunk });
|
|
181
|
+
}
|
|
182
|
+
if (message?.serverContent?.interrupted) {
|
|
183
|
+
// allow consumer to handle; no special action required here
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
catch (e) {
|
|
187
|
+
push({ type: "error", error: e });
|
|
188
|
+
}
|
|
189
|
+
},
|
|
190
|
+
onerror: (e) => {
|
|
191
|
+
push({ type: "error", error: e });
|
|
192
|
+
},
|
|
193
|
+
onclose: (_e) => {
|
|
194
|
+
push({ type: "end" });
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
config: {
|
|
198
|
+
responseModalities: ["AUDIO"],
|
|
199
|
+
speechConfig: {
|
|
200
|
+
voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
// Feed upstream audio frames concurrently
|
|
205
|
+
(async () => {
|
|
206
|
+
try {
|
|
207
|
+
const spec = options.input?.audio;
|
|
208
|
+
if (!spec) {
|
|
209
|
+
logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
for await (const frame of spec.frames) {
|
|
213
|
+
// Zero-length frame acts as a 'flush' control signal
|
|
214
|
+
if (!frame || frame.byteLength === 0) {
|
|
215
|
+
try {
|
|
216
|
+
if (session.sendInput) {
|
|
217
|
+
await session.sendInput({ event: "flush" });
|
|
218
|
+
}
|
|
219
|
+
else if (session.sendRealtimeInput) {
|
|
220
|
+
await session.sendRealtimeInput({ event: "flush" });
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
catch (err) {
|
|
224
|
+
logger.debug("[GeminiLive] flush control failed (non-fatal)", {
|
|
225
|
+
error: err instanceof Error ? err.message : String(err),
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
// Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
|
|
231
|
+
const base64 = frame.toString("base64");
|
|
232
|
+
const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
|
|
233
|
+
await session.sendRealtimeInput?.({
|
|
234
|
+
media: { data: base64, mimeType },
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
// Best-effort flush signal if supported
|
|
238
|
+
try {
|
|
239
|
+
if (session.sendInput) {
|
|
240
|
+
await session.sendInput({ event: "flush" });
|
|
241
|
+
}
|
|
242
|
+
else if (session.sendRealtimeInput) {
|
|
243
|
+
await session.sendRealtimeInput({ event: "flush" });
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
catch (err) {
|
|
247
|
+
logger.debug("[GeminiLive] final flush failed (non-fatal)", {
|
|
248
|
+
error: err instanceof Error ? err.message : String(err),
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
catch (e) {
|
|
253
|
+
push({ type: "error", error: e });
|
|
254
|
+
}
|
|
255
|
+
})().catch(() => {
|
|
256
|
+
// ignore
|
|
257
|
+
});
|
|
258
|
+
// AsyncIterable for stream events
|
|
259
|
+
const asyncIterable = {
|
|
260
|
+
[Symbol.asyncIterator]() {
|
|
261
|
+
return {
|
|
262
|
+
async next() {
|
|
263
|
+
if (queue.length > 0) {
|
|
264
|
+
const item = queue.shift();
|
|
265
|
+
if (!item) {
|
|
266
|
+
return {
|
|
267
|
+
value: undefined,
|
|
268
|
+
done: true,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
if (item.type === "audio") {
|
|
272
|
+
return {
|
|
273
|
+
value: { type: "audio", audio: item.audio },
|
|
274
|
+
done: false,
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
if (item.type === "end") {
|
|
278
|
+
done = true;
|
|
279
|
+
return {
|
|
280
|
+
value: undefined,
|
|
281
|
+
done: true,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
if (item.type === "error") {
|
|
285
|
+
done = true;
|
|
286
|
+
throw item.error instanceof Error
|
|
287
|
+
? item.error
|
|
288
|
+
: new Error(String(item.error));
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (done) {
|
|
292
|
+
return {
|
|
293
|
+
value: undefined,
|
|
294
|
+
done: true,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
return await new Promise((resolve) => {
|
|
298
|
+
resolveNext = resolve;
|
|
299
|
+
});
|
|
300
|
+
},
|
|
301
|
+
};
|
|
302
|
+
},
|
|
303
|
+
};
|
|
304
|
+
return {
|
|
305
|
+
stream: asyncIterable,
|
|
306
|
+
provider: this.providerName,
|
|
307
|
+
model: model,
|
|
308
|
+
metadata: {
|
|
309
|
+
startTime,
|
|
310
|
+
streamId: `google-ai-audio-${Date.now()}`,
|
|
311
|
+
},
|
|
312
|
+
};
|
|
313
|
+
}
|
|
118
314
|
getApiKey() {
|
|
119
315
|
const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
120
316
|
if (!apiKey) {
|
|
@@ -834,6 +834,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
834
834
|
message: "Message array built successfully",
|
|
835
835
|
});
|
|
836
836
|
}
|
|
837
|
+
/* eslint-disable-next-line max-lines-per-function */
|
|
837
838
|
async executeStream(options, analysisSchema) {
|
|
838
839
|
// Initialize stream execution tracking
|
|
839
840
|
const streamExecutionId = `vertex-stream-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
|
|
@@ -863,7 +864,9 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
863
864
|
streamExecutionId,
|
|
864
865
|
streamRequestDetails: {
|
|
865
866
|
modelName: this.modelName,
|
|
866
|
-
promptLength: options.input
|
|
867
|
+
promptLength: typeof options.input?.text === "string"
|
|
868
|
+
? options.input.text.length
|
|
869
|
+
: 0,
|
|
867
870
|
hasSchema: !!analysisSchema,
|
|
868
871
|
messagesCount: Array.isArray(messages) ? messages.length : 0,
|
|
869
872
|
temperature: options?.temperature,
|
|
@@ -5,6 +5,7 @@ import type { EvaluationData } from "../index.js";
|
|
|
5
5
|
import type { TokenUsage } from "./providers.js";
|
|
6
6
|
import type { UnknownRecord, JsonValue } from "./common.js";
|
|
7
7
|
import type { ChatMessage } from "./conversationTypes.js";
|
|
8
|
+
import type { MiddlewareFactoryOptions } from "../types/middlewareTypes.js";
|
|
8
9
|
/**
|
|
9
10
|
* Interface for tool execution calls (AI SDK compatible)
|
|
10
11
|
*/
|
|
@@ -65,9 +66,23 @@ export interface StreamAnalyticsData {
|
|
|
65
66
|
* Stream function options interface - Primary method for streaming content
|
|
66
67
|
* Future-ready for multi-modal capabilities while maintaining text focus
|
|
67
68
|
*/
|
|
69
|
+
export type PCMEncoding = "PCM16LE";
|
|
70
|
+
export interface AudioInputSpec {
|
|
71
|
+
frames: AsyncIterable<Buffer>;
|
|
72
|
+
sampleRateHz?: number;
|
|
73
|
+
encoding?: PCMEncoding;
|
|
74
|
+
channels?: 1;
|
|
75
|
+
}
|
|
76
|
+
export interface AudioChunk {
|
|
77
|
+
data: Buffer;
|
|
78
|
+
sampleRateHz: number;
|
|
79
|
+
channels: number;
|
|
80
|
+
encoding: PCMEncoding;
|
|
81
|
+
}
|
|
68
82
|
export interface StreamOptions {
|
|
69
83
|
input: {
|
|
70
|
-
text
|
|
84
|
+
text?: string;
|
|
85
|
+
audio?: AudioInputSpec;
|
|
71
86
|
};
|
|
72
87
|
output?: {
|
|
73
88
|
format?: "text" | "structured" | "json";
|
|
@@ -111,6 +126,7 @@ export interface StreamOptions {
|
|
|
111
126
|
fallbackToGenerate?: boolean;
|
|
112
127
|
};
|
|
113
128
|
conversationMessages?: ChatMessage[];
|
|
129
|
+
middleware?: MiddlewareFactoryOptions;
|
|
114
130
|
}
|
|
115
131
|
/**
|
|
116
132
|
* Stream function result interface - Primary output format for streaming
|
|
@@ -119,6 +135,9 @@ export interface StreamOptions {
|
|
|
119
135
|
export interface StreamResult {
|
|
120
136
|
stream: AsyncIterable<{
|
|
121
137
|
content: string;
|
|
138
|
+
} | {
|
|
139
|
+
type: "audio";
|
|
140
|
+
audio: AudioChunk;
|
|
122
141
|
}>;
|
|
123
142
|
provider?: string;
|
|
124
143
|
model?: string;
|
|
@@ -44,7 +44,7 @@ export function convertGenerateToStreamOptions(generateOptions) {
|
|
|
44
44
|
export function convertStreamToGenerateOptions(streamOptions) {
|
|
45
45
|
const generateOptions = {
|
|
46
46
|
// Core input mapping
|
|
47
|
-
input: streamOptions.input,
|
|
47
|
+
input: { text: (streamOptions.input && streamOptions.input.text) || "" },
|
|
48
48
|
// Provider and model settings
|
|
49
49
|
provider: streamOptions.provider,
|
|
50
50
|
model: streamOptions.model,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { NeuroLinkMiddleware } from "
|
|
1
|
+
import type { NeuroLinkMiddleware } from "../../types/middlewareTypes.js";
|
|
2
2
|
/**
|
|
3
3
|
* Create analytics middleware for tracking AI model usage
|
|
4
4
|
* Collects metrics on token usage, response times, and model performance
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModelV1 } from "ai";
|
|
2
|
-
import type { MiddlewareContext, MiddlewareConfig, MiddlewareFactoryOptions, MiddlewareChainStats, MiddlewarePreset, NeuroLinkMiddleware, MiddlewareRegistrationOptions } from "
|
|
2
|
+
import type { MiddlewareContext, MiddlewareConfig, MiddlewareFactoryOptions, MiddlewareChainStats, MiddlewarePreset, NeuroLinkMiddleware, MiddlewareRegistrationOptions } from "../types/middlewareTypes.js";
|
|
3
3
|
import { MiddlewareRegistry } from "./registry.js";
|
|
4
4
|
/**
|
|
5
5
|
* Middleware factory for creating and applying middleware chains.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* of language models with features like analytics, guardrails, caching, and more.
|
|
7
7
|
*/
|
|
8
8
|
import { MiddlewareFactory } from "./factory.js";
|
|
9
|
-
export type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareConditions, MiddlewareRegistrationOptions, MiddlewareExecutionResult, MiddlewareChainStats, MiddlewarePreset, MiddlewareFactoryOptions, BuiltInMiddlewareType, } from "
|
|
9
|
+
export type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareConditions, MiddlewareRegistrationOptions, MiddlewareExecutionResult, MiddlewareChainStats, MiddlewarePreset, MiddlewareFactoryOptions, BuiltInMiddlewareType, } from "../types/middlewareTypes.js";
|
|
10
10
|
export type { LanguageModelV1Middleware } from "ai";
|
|
11
11
|
export { MiddlewareFactory };
|
|
12
12
|
export default MiddlewareFactory;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { LanguageModelV1Middleware } from "ai";
|
|
2
|
-
import type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareRegistrationOptions, MiddlewareExecutionResult } from "
|
|
2
|
+
import type { NeuroLinkMiddleware, MiddlewareConfig, MiddlewareContext, MiddlewareRegistrationOptions, MiddlewareExecutionResult } from "../types/middlewareTypes.js";
|
|
3
3
|
/**
|
|
4
4
|
* Manages the registration, configuration, and execution of middleware for a single factory instance.
|
|
5
5
|
*/
|
package/dist/neurolink.js
CHANGED
|
@@ -1312,8 +1312,10 @@ export class NeuroLink {
|
|
|
1312
1312
|
needsInitialization: !this.mcpInitialized,
|
|
1313
1313
|
message: "Checking MCP initialization status before generation",
|
|
1314
1314
|
});
|
|
1315
|
-
// Initialize MCP
|
|
1316
|
-
|
|
1315
|
+
// Initialize MCP only when tools are enabled
|
|
1316
|
+
if (!options.disableTools) {
|
|
1317
|
+
await this.initializeMCP();
|
|
1318
|
+
}
|
|
1317
1319
|
const mcpInitCheckEndTime = process.hrtime.bigint();
|
|
1318
1320
|
const mcpInitCheckDurationNs = mcpInitCheckEndTime - mcpInitCheckStartTime;
|
|
1319
1321
|
logger.debug(`[NeuroLink] ✅ LOG_POINT_T003_MCP_INIT_CHECK_COMPLETE`, {
|
|
@@ -1573,10 +1575,16 @@ export class NeuroLink {
|
|
|
1573
1575
|
};
|
|
1574
1576
|
// Call the new stream method
|
|
1575
1577
|
const result = await this.stream(streamOptions);
|
|
1576
|
-
// Convert StreamResult to simple string async iterable
|
|
1578
|
+
// Convert StreamResult to simple string async iterable (filter text events only)
|
|
1577
1579
|
async function* stringStream() {
|
|
1578
|
-
for await (const
|
|
1579
|
-
|
|
1580
|
+
for await (const evt of result.stream) {
|
|
1581
|
+
const anyEvt = evt;
|
|
1582
|
+
if (anyEvt && typeof anyEvt === "object" && "content" in anyEvt) {
|
|
1583
|
+
const content = anyEvt.content;
|
|
1584
|
+
if (typeof content === "string") {
|
|
1585
|
+
yield content;
|
|
1586
|
+
}
|
|
1587
|
+
}
|
|
1580
1588
|
}
|
|
1581
1589
|
}
|
|
1582
1590
|
return stringStream();
|
|
@@ -1646,12 +1654,13 @@ export class NeuroLink {
|
|
|
1646
1654
|
let factoryResult;
|
|
1647
1655
|
try {
|
|
1648
1656
|
await this.initializeMCP();
|
|
1649
|
-
const _originalPrompt = options.input.text;
|
|
1650
1657
|
factoryResult = processStreamingFactoryOptions(options);
|
|
1651
1658
|
enhancedOptions = createCleanStreamOptions(options);
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1659
|
+
if (options.input?.text) {
|
|
1660
|
+
const { toolResults: _toolResults, enhancedPrompt } = await this.detectAndExecuteTools(options.input.text, undefined);
|
|
1661
|
+
if (enhancedPrompt !== options.input.text) {
|
|
1662
|
+
enhancedOptions.input.text = enhancedPrompt;
|
|
1663
|
+
}
|
|
1655
1664
|
}
|
|
1656
1665
|
const { stream: mcpStream, provider: providerName } = await this.createMCPStream(enhancedOptions);
|
|
1657
1666
|
const streamResult = await this.processStreamResult(mcpStream, enhancedOptions, factoryResult);
|
|
@@ -1756,9 +1765,13 @@ export class NeuroLink {
|
|
|
1756
1765
|
validationStartTimeNs: validationStartTime.toString(),
|
|
1757
1766
|
message: "Starting comprehensive input validation process",
|
|
1758
1767
|
});
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1768
|
+
const hasText = typeof options?.input?.text === "string" &&
|
|
1769
|
+
options.input.text.trim().length > 0;
|
|
1770
|
+
// Accept audio when frames are present; sampleRateHz is optional (defaults applied later)
|
|
1771
|
+
const hasAudio = !!(options?.input?.audio &&
|
|
1772
|
+
options.input.audio.frames &&
|
|
1773
|
+
typeof options.input.audio.frames[Symbol.asyncIterator] !== "undefined");
|
|
1774
|
+
if (!hasText && !hasAudio) {
|
|
1762
1775
|
const validationFailTime = process.hrtime.bigint();
|
|
1763
1776
|
const validationDurationNs = validationFailTime - validationStartTime;
|
|
1764
1777
|
logger.debug(`[NeuroLink] 💥 LOG_POINT_005_VALIDATION_FAILED`, {
|
|
@@ -1769,10 +1782,10 @@ export class NeuroLink {
|
|
|
1769
1782
|
elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
|
|
1770
1783
|
validationDurationNs: validationDurationNs.toString(),
|
|
1771
1784
|
validationDurationMs: Number(validationDurationNs) / 1000000,
|
|
1772
|
-
validationError: "Stream options must include input.text
|
|
1785
|
+
validationError: "Stream options must include either input.text or input.audio",
|
|
1773
1786
|
message: "EXHAUSTIVE validation failure analysis with character-level debugging",
|
|
1774
1787
|
});
|
|
1775
|
-
throw new Error("Stream options must include input.text
|
|
1788
|
+
throw new Error("Stream options must include either input.text or input.audio");
|
|
1776
1789
|
}
|
|
1777
1790
|
const validationSuccessTime = process.hrtime.bigint();
|
|
1778
1791
|
const validationDurationNs = validationSuccessTime - validationStartTime;
|
|
@@ -1784,10 +1797,11 @@ export class NeuroLink {
|
|
|
1784
1797
|
elapsedNs: (process.hrtime.bigint() - hrTimeStart).toString(),
|
|
1785
1798
|
validationDurationNs: validationDurationNs.toString(),
|
|
1786
1799
|
validationDurationMs: Number(validationDurationNs) / 1000000,
|
|
1787
|
-
inputTextValid:
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1800
|
+
inputTextValid: hasText,
|
|
1801
|
+
inputAudioPresent: hasAudio,
|
|
1802
|
+
inputTextLength: hasText ? options.input.text.length : 0,
|
|
1803
|
+
inputTextTrimmedLength: hasText ? options.input.text.trim().length : 0,
|
|
1804
|
+
inputTextPreview: hasText ? options.input.text.substring(0, 100) : "",
|
|
1791
1805
|
message: "EXHAUSTIVE validation success - proceeding with stream processing",
|
|
1792
1806
|
});
|
|
1793
1807
|
}
|
|
@@ -17,6 +17,7 @@ export declare class GoogleAIStudioProvider extends BaseProvider {
|
|
|
17
17
|
protected getAISDKModel(): LanguageModelV1;
|
|
18
18
|
protected handleProviderError(error: unknown): Error;
|
|
19
19
|
protected executeStream(options: StreamOptions, _analysisSchema?: ZodUnknownSchema | Schema<unknown>): Promise<StreamResult>;
|
|
20
|
+
private executeAudioStreamViaGeminiLive;
|
|
20
21
|
private getApiKey;
|
|
21
22
|
}
|
|
22
23
|
export default GoogleAIStudioProvider;
|
|
@@ -8,6 +8,16 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, } fro
|
|
|
8
8
|
import { DEFAULT_MAX_TOKENS, DEFAULT_MAX_STEPS } from "../core/constants.js";
|
|
9
9
|
import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
|
|
10
10
|
import { buildMessagesArray } from "../utils/messageBuilder.js";
|
|
11
|
+
// Create Google GenAI client
|
|
12
|
+
async function createGoogleGenAIClient(apiKey) {
|
|
13
|
+
const mod = await import("@google/genai");
|
|
14
|
+
const ctor = mod.GoogleGenAI;
|
|
15
|
+
if (!ctor) {
|
|
16
|
+
throw new Error("@google/genai does not export GoogleGenAI");
|
|
17
|
+
}
|
|
18
|
+
const Ctor = ctor;
|
|
19
|
+
return new Ctor({ apiKey });
|
|
20
|
+
}
|
|
11
21
|
// Environment variable setup
|
|
12
22
|
if (!process.env.GOOGLE_GENERATIVE_AI_API_KEY &&
|
|
13
23
|
process.env.GOOGLE_AI_API_KEY) {
|
|
@@ -61,6 +71,10 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
61
71
|
}
|
|
62
72
|
// executeGenerate removed - BaseProvider handles all generation with tools
|
|
63
73
|
async executeStream(options, _analysisSchema) {
|
|
74
|
+
// Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai
|
|
75
|
+
if (options.input?.audio) {
|
|
76
|
+
return await this.executeAudioStreamViaGeminiLive(options);
|
|
77
|
+
}
|
|
64
78
|
this.validateStreamOptions(options);
|
|
65
79
|
const startTime = Date.now();
|
|
66
80
|
const apiKey = this.getApiKey();
|
|
@@ -115,6 +129,188 @@ export class GoogleAIStudioProvider extends BaseProvider {
|
|
|
115
129
|
// ===================
|
|
116
130
|
// HELPER METHODS
|
|
117
131
|
// ===================
|
|
132
|
+
async executeAudioStreamViaGeminiLive(options) {
|
|
133
|
+
const startTime = Date.now();
|
|
134
|
+
const apiKey = this.getApiKey();
|
|
135
|
+
// Dynamic import to avoid hard dependency unless audio streaming is used
|
|
136
|
+
let client;
|
|
137
|
+
try {
|
|
138
|
+
client = await createGoogleGenAIClient(apiKey);
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
throw new AuthenticationError("Missing '@google/genai'. Install with: pnpm add @google/genai", this.providerName);
|
|
142
|
+
}
|
|
143
|
+
const model = this.modelName ||
|
|
144
|
+
process.env.GOOGLE_VOICE_AI_MODEL ||
|
|
145
|
+
"gemini-2.5-flash-preview-native-audio-dialog";
|
|
146
|
+
const queue = [];
|
|
147
|
+
let resolveNext = null;
|
|
148
|
+
let done = false;
|
|
149
|
+
const push = (item) => {
|
|
150
|
+
if (done) {
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
if (item.type === "audio") {
|
|
154
|
+
if (resolveNext) {
|
|
155
|
+
const fn = resolveNext;
|
|
156
|
+
resolveNext = null;
|
|
157
|
+
fn({ value: { type: "audio", audio: item.audio }, done: false });
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
queue.push(item);
|
|
162
|
+
};
|
|
163
|
+
const session = await client.live.connect({
|
|
164
|
+
model,
|
|
165
|
+
callbacks: {
|
|
166
|
+
onopen: () => {
|
|
167
|
+
// no-op
|
|
168
|
+
},
|
|
169
|
+
onmessage: async (message) => {
|
|
170
|
+
try {
|
|
171
|
+
const audio = message?.serverContent?.modelTurn?.parts?.[0]?.inlineData;
|
|
172
|
+
if (audio?.data) {
|
|
173
|
+
const buf = Buffer.from(String(audio.data), "base64");
|
|
174
|
+
const chunk = {
|
|
175
|
+
data: buf,
|
|
176
|
+
sampleRateHz: 24000,
|
|
177
|
+
channels: 1,
|
|
178
|
+
encoding: "PCM16LE",
|
|
179
|
+
};
|
|
180
|
+
push({ type: "audio", audio: chunk });
|
|
181
|
+
}
|
|
182
|
+
if (message?.serverContent?.interrupted) {
|
|
183
|
+
// allow consumer to handle; no special action required here
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
catch (e) {
|
|
187
|
+
push({ type: "error", error: e });
|
|
188
|
+
}
|
|
189
|
+
},
|
|
190
|
+
onerror: (e) => {
|
|
191
|
+
push({ type: "error", error: e });
|
|
192
|
+
},
|
|
193
|
+
onclose: (_e) => {
|
|
194
|
+
push({ type: "end" });
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
config: {
|
|
198
|
+
responseModalities: ["AUDIO"],
|
|
199
|
+
speechConfig: {
|
|
200
|
+
voiceConfig: { prebuiltVoiceConfig: { voiceName: "Orus" } },
|
|
201
|
+
},
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
// Feed upstream audio frames concurrently
|
|
205
|
+
(async () => {
|
|
206
|
+
try {
|
|
207
|
+
const spec = options.input?.audio;
|
|
208
|
+
if (!spec) {
|
|
209
|
+
logger.debug("[GeminiLive] No audio spec found on input; skipping upstream send");
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
for await (const frame of spec.frames) {
|
|
213
|
+
// Zero-length frame acts as a 'flush' control signal
|
|
214
|
+
if (!frame || frame.byteLength === 0) {
|
|
215
|
+
try {
|
|
216
|
+
if (session.sendInput) {
|
|
217
|
+
await session.sendInput({ event: "flush" });
|
|
218
|
+
}
|
|
219
|
+
else if (session.sendRealtimeInput) {
|
|
220
|
+
await session.sendRealtimeInput({ event: "flush" });
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
catch (err) {
|
|
224
|
+
logger.debug("[GeminiLive] flush control failed (non-fatal)", {
|
|
225
|
+
error: err instanceof Error ? err.message : String(err),
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
// Convert PCM16LE buffer to base64 and wrap in genai Blob-like object
|
|
231
|
+
const base64 = frame.toString("base64");
|
|
232
|
+
const mimeType = `audio/pcm;rate=${spec.sampleRateHz || 16000}`;
|
|
233
|
+
await session.sendRealtimeInput?.({
|
|
234
|
+
media: { data: base64, mimeType },
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
// Best-effort flush signal if supported
|
|
238
|
+
try {
|
|
239
|
+
if (session.sendInput) {
|
|
240
|
+
await session.sendInput({ event: "flush" });
|
|
241
|
+
}
|
|
242
|
+
else if (session.sendRealtimeInput) {
|
|
243
|
+
await session.sendRealtimeInput({ event: "flush" });
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
catch (err) {
|
|
247
|
+
logger.debug("[GeminiLive] final flush failed (non-fatal)", {
|
|
248
|
+
error: err instanceof Error ? err.message : String(err),
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
catch (e) {
|
|
253
|
+
push({ type: "error", error: e });
|
|
254
|
+
}
|
|
255
|
+
})().catch(() => {
|
|
256
|
+
// ignore
|
|
257
|
+
});
|
|
258
|
+
// AsyncIterable for stream events
|
|
259
|
+
const asyncIterable = {
|
|
260
|
+
[Symbol.asyncIterator]() {
|
|
261
|
+
return {
|
|
262
|
+
async next() {
|
|
263
|
+
if (queue.length > 0) {
|
|
264
|
+
const item = queue.shift();
|
|
265
|
+
if (!item) {
|
|
266
|
+
return {
|
|
267
|
+
value: undefined,
|
|
268
|
+
done: true,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
if (item.type === "audio") {
|
|
272
|
+
return {
|
|
273
|
+
value: { type: "audio", audio: item.audio },
|
|
274
|
+
done: false,
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
if (item.type === "end") {
|
|
278
|
+
done = true;
|
|
279
|
+
return {
|
|
280
|
+
value: undefined,
|
|
281
|
+
done: true,
|
|
282
|
+
};
|
|
283
|
+
}
|
|
284
|
+
if (item.type === "error") {
|
|
285
|
+
done = true;
|
|
286
|
+
throw item.error instanceof Error
|
|
287
|
+
? item.error
|
|
288
|
+
: new Error(String(item.error));
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (done) {
|
|
292
|
+
return {
|
|
293
|
+
value: undefined,
|
|
294
|
+
done: true,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
return await new Promise((resolve) => {
|
|
298
|
+
resolveNext = resolve;
|
|
299
|
+
});
|
|
300
|
+
},
|
|
301
|
+
};
|
|
302
|
+
},
|
|
303
|
+
};
|
|
304
|
+
return {
|
|
305
|
+
stream: asyncIterable,
|
|
306
|
+
provider: this.providerName,
|
|
307
|
+
model: model,
|
|
308
|
+
metadata: {
|
|
309
|
+
startTime,
|
|
310
|
+
streamId: `google-ai-audio-${Date.now()}`,
|
|
311
|
+
},
|
|
312
|
+
};
|
|
313
|
+
}
|
|
118
314
|
getApiKey() {
|
|
119
315
|
const apiKey = process.env.GOOGLE_AI_API_KEY || process.env.GOOGLE_GENERATIVE_AI_API_KEY;
|
|
120
316
|
if (!apiKey) {
|