@juspay/neurolink 9.61.1 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +382 -364
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/mcp/toolRegistry.js +7 -1
  20. package/dist/lib/neurolink.d.ts +19 -0
  21. package/dist/lib/neurolink.js +252 -14
  22. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  23. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  24. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  25. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  26. package/dist/lib/server/voice/tokenCompare.js +23 -0
  27. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  29. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  30. package/dist/lib/types/generate.d.ts +47 -0
  31. package/dist/lib/types/hitl.d.ts +3 -0
  32. package/dist/lib/types/index.d.ts +1 -1
  33. package/dist/lib/types/index.js +1 -1
  34. package/dist/lib/types/realtime.d.ts +243 -0
  35. package/dist/lib/types/realtime.js +70 -0
  36. package/dist/lib/types/server.d.ts +68 -0
  37. package/dist/lib/types/span.d.ts +2 -0
  38. package/dist/lib/types/span.js +2 -0
  39. package/dist/lib/types/stream.d.ts +36 -14
  40. package/dist/lib/types/stt.d.ts +585 -0
  41. package/dist/lib/types/stt.js +90 -0
  42. package/dist/lib/types/tools.d.ts +2 -0
  43. package/dist/lib/types/tts.d.ts +23 -11
  44. package/dist/lib/types/tts.js +7 -0
  45. package/dist/lib/types/voice.d.ts +272 -0
  46. package/dist/lib/types/voice.js +137 -0
  47. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  48. package/dist/lib/utils/audioFormatDetector.js +34 -0
  49. package/dist/lib/utils/errorHandling.js +4 -0
  50. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  51. package/dist/lib/utils/sttProcessor.js +295 -0
  52. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  53. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  54. package/dist/lib/voice/audio-utils.d.ts +135 -0
  55. package/dist/lib/voice/audio-utils.js +435 -0
  56. package/dist/lib/voice/errors.d.ts +123 -0
  57. package/dist/lib/voice/errors.js +386 -0
  58. package/dist/lib/voice/index.d.ts +26 -0
  59. package/dist/lib/voice/index.js +55 -0
  60. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  61. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  62. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  63. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  64. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  65. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  66. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  67. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  68. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  69. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  70. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  71. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  72. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  73. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  74. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  75. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  76. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  77. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  78. package/dist/lib/voice/stream-handler.d.ts +166 -0
  79. package/dist/lib/voice/stream-handler.js +514 -0
  80. package/dist/mcp/toolRegistry.js +7 -1
  81. package/dist/neurolink.d.ts +19 -0
  82. package/dist/neurolink.js +252 -14
  83. package/dist/observability/exporters/laminarExporter.js +1 -0
  84. package/dist/observability/exporters/posthogExporter.js +1 -0
  85. package/dist/observability/utils/spanSerializer.js +1 -0
  86. package/dist/server/voice/tokenCompare.d.ts +14 -0
  87. package/dist/server/voice/tokenCompare.js +22 -0
  88. package/dist/server/voice/voiceServerApp.js +62 -3
  89. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  90. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  91. package/dist/types/generate.d.ts +47 -0
  92. package/dist/types/hitl.d.ts +3 -0
  93. package/dist/types/index.d.ts +1 -1
  94. package/dist/types/index.js +1 -1
  95. package/dist/types/realtime.d.ts +243 -0
  96. package/dist/types/realtime.js +69 -0
  97. package/dist/types/server.d.ts +68 -0
  98. package/dist/types/span.d.ts +2 -0
  99. package/dist/types/span.js +2 -0
  100. package/dist/types/stream.d.ts +36 -14
  101. package/dist/types/stt.d.ts +585 -0
  102. package/dist/types/stt.js +89 -0
  103. package/dist/types/tools.d.ts +2 -0
  104. package/dist/types/tts.d.ts +23 -11
  105. package/dist/types/tts.js +7 -0
  106. package/dist/types/voice.d.ts +272 -0
  107. package/dist/types/voice.js +136 -0
  108. package/dist/utils/audioFormatDetector.d.ts +15 -0
  109. package/dist/utils/audioFormatDetector.js +33 -0
  110. package/dist/utils/errorHandling.js +4 -0
  111. package/dist/utils/sttProcessor.d.ts +115 -0
  112. package/dist/utils/sttProcessor.js +294 -0
  113. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  114. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  115. package/dist/voice/audio-utils.d.ts +135 -0
  116. package/dist/voice/audio-utils.js +434 -0
  117. package/dist/voice/errors.d.ts +123 -0
  118. package/dist/voice/errors.js +385 -0
  119. package/dist/voice/index.d.ts +26 -0
  120. package/dist/voice/index.js +54 -0
  121. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  122. package/dist/voice/providers/AzureSTT.js +344 -0
  123. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  124. package/dist/voice/providers/AzureTTS.js +348 -0
  125. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  126. package/dist/voice/providers/DeepgramSTT.js +549 -0
  127. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  128. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  129. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  130. package/dist/voice/providers/GeminiLive.js +371 -0
  131. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  132. package/dist/voice/providers/GoogleSTT.js +453 -0
  133. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  134. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  135. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  136. package/dist/voice/providers/OpenAISTT.js +285 -0
  137. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  138. package/dist/voice/providers/OpenAITTS.js +270 -0
  139. package/dist/voice/stream-handler.d.ts +166 -0
  140. package/dist/voice/stream-handler.js +513 -0
  141. package/package.json +5 -2
@@ -0,0 +1,371 @@
1
+ /**
2
+ * Google Gemini Live Voice API Handler
3
+ *
4
+ * Implementation of bidirectional voice communication using Gemini's Live API.
5
+ *
6
+ * @module voice/providers/GeminiLive
7
+ */
8
+ import { logger } from "../../utils/logger.js";
9
+ import { RealtimeError } from "../errors.js";
10
+ import { BaseRealtimeHandler } from "../RealtimeVoiceAPI.js";
11
+ /**
12
+ * Google Gemini Live Voice API Handler
13
+ *
14
+ * Implements bidirectional voice communication with Gemini's Live API.
15
+ *
16
+ * @see https://ai.google.dev/gemini-api/docs/live
17
+ */
18
+ export class GeminiLive extends BaseRealtimeHandler {
19
+ name = "gemini-live";
20
+ apiKey;
21
+ ws = null;
22
+ audioChunkIndex = 0;
23
+ pendingFunctionCalls = new Map();
24
+ constructor(apiKey) {
25
+ super();
26
+ // Accept GOOGLE_AI_API_KEY / GEMINI_API_KEY as aliases — `.env.example`
27
+ // documents those as the canonical Google credentials, so insisting on
28
+ // GOOGLE_API_KEY here was a setup footgun (Copilot review).
29
+ const resolvedKey = (apiKey ??
30
+ process.env.GOOGLE_API_KEY ??
31
+ process.env.GOOGLE_AI_API_KEY ??
32
+ process.env.GEMINI_API_KEY ??
33
+ "").trim();
34
+ this.apiKey = resolvedKey.length > 0 ? resolvedKey : null;
35
+ }
36
+ isConfigured() {
37
+ return this.apiKey !== null;
38
+ }
39
+ getSupportedFormats() {
40
+ return ["opus", "wav"];
41
+ }
42
+ async connect(config) {
43
+ if (!this.apiKey) {
44
+ throw RealtimeError.providerNotConfigured("gemini-live");
45
+ }
46
+ if (this.isConnected()) {
47
+ throw RealtimeError.sessionAlreadyActive("gemini-live");
48
+ }
49
+ this.emitStateChange("connecting");
50
+ try {
51
+ // Import WebSocket
52
+ const { default: WebSocket } = await import("ws");
53
+ // Determine model
54
+ const model = config.model ?? "gemini-2.5-flash-native-audio-preview-09-2025";
55
+ // Connect to Gemini Live API
56
+ const wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key=${this.apiKey}`;
57
+ this.ws = new WebSocket(wsUrl);
58
+ // Issue 9: capture a local reference so the closure below doesn't need
59
+ // a non-null assertion on `this.ws`. The local `ws` survives even if a
60
+ // timeout nulls `this.ws` — that's intentional, the closure should
61
+ // still be able to detach its own listeners on the same socket.
62
+ const ws = this.ws;
63
+ // Wait for connection
64
+ await new Promise((resolve, reject) => {
65
+ const openHandler = () => {
66
+ clearTimeout(timeout);
67
+ ws.off("error", errorHandler);
68
+ resolve();
69
+ };
70
+ const errorHandler = (err) => {
71
+ clearTimeout(timeout);
72
+ ws.off("open", openHandler);
73
+ reject(err);
74
+ };
75
+ const timeout = setTimeout(() => {
76
+ // C1: close the half-opened socket and detach temp listeners so we
77
+ // don't leak the WebSocket or its closures on connection timeout.
78
+ // NEW7: removing the temp listeners also prevents accumulation
79
+ // across reconnect attempts (they'd otherwise hang forever and
80
+ // silently call reject() on a settled promise).
81
+ ws.off("open", openHandler);
82
+ ws.off("error", errorHandler);
83
+ ws.terminate();
84
+ this.ws = null;
85
+ reject(new Error("Connection timeout"));
86
+ }, config.timeout ?? 30000);
87
+ ws.on("open", openHandler);
88
+ ws.on("error", errorHandler);
89
+ });
90
+ this.ws.on("close", () => {
91
+ this.emitStateChange("disconnected");
92
+ this.session = null;
93
+ });
94
+ this.ws.on("error", (err) => {
95
+ this.emitError(err);
96
+ });
97
+ // Send setup message
98
+ await this.sendSetup(config, model);
99
+ // Wait for setup complete BEFORE attaching the permanent message handler,
100
+ // otherwise early audio/text data arriving during setup race window is
101
+ // dispatched to handleMessage before consumers register their handlers.
102
+ await this.waitForSetupComplete();
103
+ // Set up message handler — only after setup complete.
104
+ this.ws.on("message", (data) => {
105
+ this.handleMessage(data);
106
+ });
107
+ // Generate session ID
108
+ const sessionId = `gemini-${Date.now()}`;
109
+ // Create session object
110
+ this.session = this.createSession(sessionId, config);
111
+ this.emitStateChange("connected");
112
+ logger.info(`[GeminiLiveHandler] Connected to session: ${sessionId}`);
113
+ return this.session;
114
+ }
115
+ catch (err) {
116
+ this.emitStateChange("error");
117
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
118
+ throw RealtimeError.connectionFailed(errorMessage, "gemini-live", err instanceof Error ? err : undefined);
119
+ }
120
+ }
121
+ async disconnect() {
122
+ if (!this.ws) {
123
+ return;
124
+ }
125
+ this.emitStateChange("disconnecting");
126
+ try {
127
+ this.ws.close();
128
+ this.ws = null;
129
+ this.session = null;
130
+ this.audioChunkIndex = 0;
131
+ this.pendingFunctionCalls.clear();
132
+ this.emitStateChange("disconnected");
133
+ logger.info("[GeminiLiveHandler] Disconnected");
134
+ }
135
+ catch (err) {
136
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
137
+ throw RealtimeError.protocolError(`Disconnect failed: ${errorMessage}`, "gemini-live", err instanceof Error ? err : undefined);
138
+ }
139
+ }
140
+ async sendAudio(audio) {
141
+ if (!this.ws || !this.isConnected()) {
142
+ throw RealtimeError.sessionNotActive("gemini-live");
143
+ }
144
+ const audioBuffer = Buffer.isBuffer(audio) ? audio : audio.data;
145
+ // Send audio as realtime input
146
+ const message = {
147
+ realtimeInput: {
148
+ mediaChunks: [
149
+ {
150
+ mimeType: "audio/pcm;rate=16000",
151
+ data: audioBuffer.toString("base64"),
152
+ },
153
+ ],
154
+ },
155
+ };
156
+ this.ws.send(JSON.stringify(message));
157
+ }
158
+ async sendText(text) {
159
+ if (!this.ws || !this.isConnected()) {
160
+ throw RealtimeError.sessionNotActive("gemini-live");
161
+ }
162
+ // Send text as client content
163
+ const message = {
164
+ clientContent: {
165
+ turns: [
166
+ {
167
+ role: "user",
168
+ parts: [{ text }],
169
+ },
170
+ ],
171
+ turnComplete: true,
172
+ },
173
+ };
174
+ this.ws.send(JSON.stringify(message));
175
+ }
176
+ async triggerResponse() {
177
+ // Gemini automatically generates responses based on VAD
178
+ // This is a no-op for Gemini Live
179
+ }
180
+ async cancelResponse() {
181
+ // Gemini doesn't have explicit cancel, but we can send empty content
182
+ // to interrupt
183
+ if (this.ws && this.isConnected()) {
184
+ const message = {
185
+ clientContent: {
186
+ turns: [],
187
+ turnComplete: true,
188
+ },
189
+ };
190
+ this.ws.send(JSON.stringify(message));
191
+ }
192
+ }
193
+ /**
194
+ * Send setup message with configuration
195
+ */
196
+ async sendSetup(config, model) {
197
+ if (!this.ws) {
198
+ return;
199
+ }
200
+ // Issue 9: build the inner `setup` object as a named local so the
201
+ // optional fields below can be assigned without non-null assertions on
202
+ // `setupMessage.setup`.
203
+ const setup = {
204
+ model: `models/${model}`,
205
+ generationConfig: {
206
+ responseModalities: ["AUDIO", "TEXT"],
207
+ speechConfig: {
208
+ voiceConfig: {
209
+ prebuiltVoiceConfig: {
210
+ voiceName: config.voice ?? "Puck",
211
+ },
212
+ },
213
+ },
214
+ },
215
+ };
216
+ // Add system instruction
217
+ if (config.systemPrompt) {
218
+ setup.systemInstruction = {
219
+ parts: [{ text: config.systemPrompt }],
220
+ };
221
+ }
222
+ // Add tools
223
+ if (config.tools && config.tools.length > 0) {
224
+ setup.tools = [
225
+ {
226
+ functionDeclarations: config.tools.map((tool) => ({
227
+ name: tool.name,
228
+ description: tool.description,
229
+ parameters: tool.parameters,
230
+ })),
231
+ },
232
+ ];
233
+ }
234
+ const setupMessage = { setup };
235
+ this.ws.send(JSON.stringify(setupMessage));
236
+ }
237
+ /**
238
+ * Wait for setup complete message
239
+ */
240
+ waitForSetupComplete() {
241
+ return new Promise((resolve, reject) => {
242
+ const handler = (data) => {
243
+ try {
244
+ const response = JSON.parse(data.toString());
245
+ if (response.setupComplete) {
246
+ clearTimeout(timeout);
247
+ this.ws?.off("message", handler);
248
+ resolve();
249
+ }
250
+ }
251
+ catch {
252
+ // Ignore parse errors
253
+ }
254
+ };
255
+ const timeout = setTimeout(() => {
256
+ // M2: detach the message handler before rejecting so future Gemini
257
+ // messages don't invoke a dangling handler for the connection lifetime.
258
+ this.ws?.off("message", handler);
259
+ reject(new Error("Timeout waiting for setup complete"));
260
+ }, 10000);
261
+ this.ws?.on("message", handler);
262
+ });
263
+ }
264
+ /**
265
+ * Handle incoming WebSocket messages
266
+ */
267
+ handleMessage(data) {
268
+ try {
269
+ const response = JSON.parse(data.toString());
270
+ if (response.serverContent) {
271
+ const content = response.serverContent;
272
+ // Handle model turn
273
+ if (content.modelTurn?.parts) {
274
+ for (const part of content.modelTurn.parts) {
275
+ // Handle text
276
+ if (part.text) {
277
+ this.emitText(part.text, content.turnComplete ?? false);
278
+ }
279
+ // Handle audio
280
+ if (part.inlineData) {
281
+ const audioData = Buffer.from(part.inlineData.data, "base64");
282
+ this.emitAudio({
283
+ data: audioData,
284
+ index: this.audioChunkIndex++,
285
+ isFinal: content.turnComplete ?? false,
286
+ format: this.parseAudioFormat(part.inlineData.mimeType),
287
+ sampleRate: 24000,
288
+ });
289
+ }
290
+ }
291
+ }
292
+ // Handle turn complete
293
+ if (content.turnComplete) {
294
+ this.emitTurnEnd();
295
+ this.audioChunkIndex = 0;
296
+ }
297
+ // Handle interruption
298
+ if (content.interrupted) {
299
+ this.emitTurnEnd();
300
+ this.audioChunkIndex = 0;
301
+ }
302
+ }
303
+ // Handle tool calls
304
+ if (response.toolCall?.functionCalls) {
305
+ for (const call of response.toolCall.functionCalls) {
306
+ this.pendingFunctionCalls.set(call.id, call.name);
307
+ this.handleFunctionCall(call.id, call.name, call.args);
308
+ }
309
+ }
310
+ // Handle tool call cancellation
311
+ if (response.toolCallCancellation?.ids) {
312
+ for (const id of response.toolCallCancellation.ids) {
313
+ this.pendingFunctionCalls.delete(id);
314
+ }
315
+ }
316
+ }
317
+ catch (err) {
318
+ logger.warn(`[GeminiLiveHandler] Failed to parse message: ${err instanceof Error ? err.message : String(err)}`);
319
+ }
320
+ }
321
+ /**
322
+ * Parse audio format from MIME type
323
+ */
324
+ parseAudioFormat(mimeType) {
325
+ if (mimeType.includes("opus")) {
326
+ return "opus";
327
+ }
328
+ if (mimeType.includes("wav") || mimeType.includes("pcm")) {
329
+ return "wav";
330
+ }
331
+ if (mimeType.includes("mp3") || mimeType.includes("mpeg")) {
332
+ return "mp3";
333
+ }
334
+ return "opus";
335
+ }
336
+ /**
337
+ * Handle function call from model
338
+ */
339
+ async handleFunctionCall(callId, name, args) {
340
+ try {
341
+ const result = await this.emitFunctionCall(name, args);
342
+ // Send function response
343
+ if (this.ws && this.isConnected()) {
344
+ const responseMessage = {
345
+ toolResponse: {
346
+ functionResponses: [
347
+ {
348
+ id: callId,
349
+ name,
350
+ response: { result },
351
+ },
352
+ ],
353
+ },
354
+ };
355
+ this.ws.send(JSON.stringify(responseMessage));
356
+ this.pendingFunctionCalls.delete(callId);
357
+ }
358
+ }
359
+ catch (err) {
360
+ const error = err instanceof Error
361
+ ? err
362
+ : new Error(String(err || "Function call failed"));
363
+ logger.error(`[GeminiLiveHandler] Function call failed: ${error.message}`);
364
+ this.emitError(error);
365
+ // Clean up the pending entry on the error path too — the success
366
+ // branch deletes at line ~439, but without this delete the Map grows
367
+ // unbounded over a long session with intermittently-failing tools.
368
+ this.pendingFunctionCalls.delete(callId);
369
+ }
370
+ }
371
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Google Cloud Speech-to-Text Handler
3
+ *
4
+ * Implementation of STT using Google Cloud Speech-to-Text API.
5
+ *
6
+ * @module voice/providers/GoogleSTT
7
+ */
8
+ import type { TTSAudioFormat, STTHandler, STTLanguage, STTOptions, STTResult, TranscriptionSegment } from "../../types/index.js";
9
+ /**
10
+ * Google Cloud Speech-to-Text Handler
11
+ *
12
+ * Supports transcription with speaker diarization, word timestamps, and punctuation.
13
+ *
14
+ * @see https://cloud.google.com/speech-to-text/docs
15
+ */
16
+ export declare class GoogleSTT implements STTHandler {
17
+ private readonly apiKey;
18
+ private readonly credentialsPath;
19
+ private readonly baseUrl;
20
+ /**
21
+ * Maximum audio duration in seconds for the synchronous recognize endpoint.
22
+ * For longer audio, use the async longrunningrecognize endpoint (not yet implemented).
23
+ */
24
+ readonly maxAudioDuration = 60;
25
+ /**
26
+ * True streaming requires gRPC (not yet implemented).
27
+ * transcribeStream() uses a chunk-and-batch workaround.
28
+ */
29
+ readonly supportsStreaming = false;
30
+ constructor(apiKey?: string, credentialsPath?: string);
31
+ isConfigured(): boolean;
32
+ getSupportedFormats(): TTSAudioFormat[];
33
+ getSupportedLanguages(): Promise<STTLanguage[]>;
34
+ transcribe(audio: Buffer | ArrayBuffer, options?: STTOptions): Promise<STTResult>;
35
+ /**
36
+ * Streaming transcription (placeholder - requires WebSocket/gRPC)
37
+ */
38
+ transcribeStream(audioStream: AsyncIterable<Buffer>, options: STTOptions): AsyncIterable<TranscriptionSegment>;
39
+ /**
40
+ * Get encoding string for audio format
41
+ */
42
+ private getEncoding;
43
+ /**
44
+ * Parse duration string (e.g., "1.5s") to seconds
45
+ */
46
+ private parseDuration;
47
+ /**
48
+ * Calculate average confidence from results
49
+ */
50
+ private calculateAverageConfidence;
51
+ /**
52
+ * Get access token from service account credentials.
53
+ *
54
+ * M3: previously caught all errors and returned `""`, which then caused
55
+ * a silent 401 from the Google API and a confusing downstream HTTP error
56
+ * with no trace of the original auth failure. Now rethrows as STTError so
57
+ * the caller sees the auth root cause.
58
+ */
59
+ private getAccessToken;
60
+ }