@juspay/neurolink 9.61.1 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +382 -364
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/mcp/toolRegistry.js +7 -1
  20. package/dist/lib/neurolink.d.ts +19 -0
  21. package/dist/lib/neurolink.js +252 -14
  22. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  23. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  24. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  25. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  26. package/dist/lib/server/voice/tokenCompare.js +23 -0
  27. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  29. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  30. package/dist/lib/types/generate.d.ts +47 -0
  31. package/dist/lib/types/hitl.d.ts +3 -0
  32. package/dist/lib/types/index.d.ts +1 -1
  33. package/dist/lib/types/index.js +1 -1
  34. package/dist/lib/types/realtime.d.ts +243 -0
  35. package/dist/lib/types/realtime.js +70 -0
  36. package/dist/lib/types/server.d.ts +68 -0
  37. package/dist/lib/types/span.d.ts +2 -0
  38. package/dist/lib/types/span.js +2 -0
  39. package/dist/lib/types/stream.d.ts +36 -14
  40. package/dist/lib/types/stt.d.ts +585 -0
  41. package/dist/lib/types/stt.js +90 -0
  42. package/dist/lib/types/tools.d.ts +2 -0
  43. package/dist/lib/types/tts.d.ts +23 -11
  44. package/dist/lib/types/tts.js +7 -0
  45. package/dist/lib/types/voice.d.ts +272 -0
  46. package/dist/lib/types/voice.js +137 -0
  47. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  48. package/dist/lib/utils/audioFormatDetector.js +34 -0
  49. package/dist/lib/utils/errorHandling.js +4 -0
  50. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  51. package/dist/lib/utils/sttProcessor.js +295 -0
  52. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  53. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  54. package/dist/lib/voice/audio-utils.d.ts +135 -0
  55. package/dist/lib/voice/audio-utils.js +435 -0
  56. package/dist/lib/voice/errors.d.ts +123 -0
  57. package/dist/lib/voice/errors.js +386 -0
  58. package/dist/lib/voice/index.d.ts +26 -0
  59. package/dist/lib/voice/index.js +55 -0
  60. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  61. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  62. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  63. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  64. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  65. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  66. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  67. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  68. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  69. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  70. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  71. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  72. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  73. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  74. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  75. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  76. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  77. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  78. package/dist/lib/voice/stream-handler.d.ts +166 -0
  79. package/dist/lib/voice/stream-handler.js +514 -0
  80. package/dist/mcp/toolRegistry.js +7 -1
  81. package/dist/neurolink.d.ts +19 -0
  82. package/dist/neurolink.js +252 -14
  83. package/dist/observability/exporters/laminarExporter.js +1 -0
  84. package/dist/observability/exporters/posthogExporter.js +1 -0
  85. package/dist/observability/utils/spanSerializer.js +1 -0
  86. package/dist/server/voice/tokenCompare.d.ts +14 -0
  87. package/dist/server/voice/tokenCompare.js +22 -0
  88. package/dist/server/voice/voiceServerApp.js +62 -3
  89. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  90. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  91. package/dist/types/generate.d.ts +47 -0
  92. package/dist/types/hitl.d.ts +3 -0
  93. package/dist/types/index.d.ts +1 -1
  94. package/dist/types/index.js +1 -1
  95. package/dist/types/realtime.d.ts +243 -0
  96. package/dist/types/realtime.js +69 -0
  97. package/dist/types/server.d.ts +68 -0
  98. package/dist/types/span.d.ts +2 -0
  99. package/dist/types/span.js +2 -0
  100. package/dist/types/stream.d.ts +36 -14
  101. package/dist/types/stt.d.ts +585 -0
  102. package/dist/types/stt.js +89 -0
  103. package/dist/types/tools.d.ts +2 -0
  104. package/dist/types/tts.d.ts +23 -11
  105. package/dist/types/tts.js +7 -0
  106. package/dist/types/voice.d.ts +272 -0
  107. package/dist/types/voice.js +136 -0
  108. package/dist/utils/audioFormatDetector.d.ts +15 -0
  109. package/dist/utils/audioFormatDetector.js +33 -0
  110. package/dist/utils/errorHandling.js +4 -0
  111. package/dist/utils/sttProcessor.d.ts +115 -0
  112. package/dist/utils/sttProcessor.js +294 -0
  113. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  114. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  115. package/dist/voice/audio-utils.d.ts +135 -0
  116. package/dist/voice/audio-utils.js +434 -0
  117. package/dist/voice/errors.d.ts +123 -0
  118. package/dist/voice/errors.js +385 -0
  119. package/dist/voice/index.d.ts +26 -0
  120. package/dist/voice/index.js +54 -0
  121. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  122. package/dist/voice/providers/AzureSTT.js +344 -0
  123. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  124. package/dist/voice/providers/AzureTTS.js +348 -0
  125. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  126. package/dist/voice/providers/DeepgramSTT.js +549 -0
  127. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  128. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  129. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  130. package/dist/voice/providers/GeminiLive.js +371 -0
  131. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  132. package/dist/voice/providers/GoogleSTT.js +453 -0
  133. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  134. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  135. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  136. package/dist/voice/providers/OpenAISTT.js +285 -0
  137. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  138. package/dist/voice/providers/OpenAITTS.js +270 -0
  139. package/dist/voice/stream-handler.d.ts +166 -0
  140. package/dist/voice/stream-handler.js +513 -0
  141. package/package.json +5 -2
@@ -0,0 +1,349 @@
1
+ /**
2
+ * Azure Cognitive Services Text-to-Speech Handler
3
+ *
4
+ * Implementation of TTS using Azure Speech Services.
5
+ *
6
+ * @module voice/providers/AzureTTS
7
+ */
8
+ import { ErrorCategory, ErrorSeverity } from "../../constants/enums.js";
9
+ import { logger } from "../../utils/logger.js";
10
+ import { TTS_ERROR_CODES, TTSError } from "../../utils/ttsProcessor.js";
11
+ /**
12
+ * Azure Cognitive Services Text-to-Speech Handler
13
+ *
14
+ * Supports neural voices with SSML and custom voice styles.
15
+ *
16
+ * @see https://docs.microsoft.com/azure/cognitive-services/speech-service/
17
+ */
18
+ export class AzureTTS {
19
+ apiKey;
20
+ region;
21
+ voicesCache = null;
22
+ static CACHE_TTL_MS = 30 * 60 * 1000; // 30 minutes
23
+ /**
24
+ * Maximum text length (10000 characters for Azure)
25
+ */
26
+ maxTextLength = 10000;
27
+ constructor(apiKey, region) {
28
+ const resolvedKey = (apiKey ?? process.env.AZURE_SPEECH_KEY ?? "").trim();
29
+ this.apiKey = resolvedKey.length > 0 ? resolvedKey : null;
30
+ const resolvedRegion = (region ??
31
+ process.env.AZURE_SPEECH_REGION ??
32
+ "").trim();
33
+ this.region = resolvedRegion.length > 0 ? resolvedRegion : "eastus";
34
+ }
35
+ isConfigured() {
36
+ return this.apiKey !== null && this.region.length > 0;
37
+ }
38
+ async getVoices(languageCode) {
39
+ if (!this.apiKey) {
40
+ throw new TTSError({
41
+ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
42
+ message: "Azure Speech key not configured",
43
+ category: ErrorCategory.CONFIGURATION,
44
+ severity: ErrorSeverity.HIGH,
45
+ retriable: false,
46
+ });
47
+ }
48
+ // Return cached voices if valid
49
+ if (this.voicesCache &&
50
+ Date.now() - this.voicesCache.timestamp < AzureTTS.CACHE_TTL_MS &&
51
+ !languageCode) {
52
+ return this.voicesCache.voices;
53
+ }
54
+ try {
55
+ const voicesController = new AbortController();
56
+ const voicesTimeoutId = setTimeout(() => voicesController.abort(), 30000);
57
+ let response;
58
+ try {
59
+ response = await fetch(`https://${this.region}.tts.speech.microsoft.com/cognitiveservices/voices/list`, {
60
+ method: "GET",
61
+ headers: {
62
+ "Ocp-Apim-Subscription-Key": this.apiKey,
63
+ },
64
+ signal: voicesController.signal,
65
+ });
66
+ }
67
+ catch (fetchErr) {
68
+ if (fetchErr instanceof Error && fetchErr.name === "AbortError") {
69
+ throw new TTSError({
70
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
71
+ message: "Azure TTS voices request timed out after 30 seconds",
72
+ category: ErrorCategory.NETWORK,
73
+ severity: ErrorSeverity.MEDIUM,
74
+ retriable: true,
75
+ originalError: fetchErr,
76
+ });
77
+ }
78
+ throw fetchErr;
79
+ }
80
+ finally {
81
+ clearTimeout(voicesTimeoutId);
82
+ }
83
+ if (!response.ok) {
84
+ throw new Error(`HTTP ${response.status}`);
85
+ }
86
+ const data = (await response.json());
87
+ let voices = data.map((voice) => ({
88
+ id: voice.ShortName,
89
+ name: voice.DisplayName,
90
+ languageCode: voice.Locale,
91
+ languageCodes: [voice.Locale],
92
+ gender: this.mapGender(voice.Gender),
93
+ type: voice.VoiceType.toLowerCase().includes("neural")
94
+ ? "neural"
95
+ : "standard",
96
+ description: voice.LocaleName,
97
+ }));
98
+ // Filter by language if specified
99
+ if (languageCode) {
100
+ voices = voices.filter((v) => v.languageCode
101
+ .toLowerCase()
102
+ .startsWith(languageCode.toLowerCase()) ||
103
+ v.languageCode.toLowerCase() === languageCode.toLowerCase());
104
+ }
105
+ // Cache full list
106
+ if (!languageCode) {
107
+ this.voicesCache = { voices, timestamp: Date.now() };
108
+ }
109
+ return voices;
110
+ }
111
+ catch (err) {
112
+ // Don't double-wrap an already-typed TTSError (the inner try-block
113
+ // throws TTSError on AbortError timeouts) — preserves the clean error
114
+ // chain. synthesize() at line ~249 already uses this pattern.
115
+ if (err instanceof TTSError) {
116
+ throw err;
117
+ }
118
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
119
+ logger.error(`[AzureTTSHandler] Failed to get voices: ${errorMessage}`);
120
+ throw new TTSError({
121
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
122
+ message: `Failed to get voices: ${errorMessage}`,
123
+ category: ErrorCategory.NETWORK,
124
+ severity: ErrorSeverity.MEDIUM,
125
+ retriable: true,
126
+ originalError: err instanceof Error ? err : undefined,
127
+ });
128
+ }
129
+ }
130
+ async synthesize(text, options = {}) {
131
+ if (!this.apiKey) {
132
+ throw new TTSError({
133
+ code: TTS_ERROR_CODES.PROVIDER_NOT_CONFIGURED,
134
+ message: "Azure Speech key not configured",
135
+ category: ErrorCategory.CONFIGURATION,
136
+ severity: ErrorSeverity.HIGH,
137
+ retriable: false,
138
+ });
139
+ }
140
+ const startTime = Date.now();
141
+ const azureOptions = options;
142
+ try {
143
+ // Get voice (default to a common neural voice)
144
+ const voice = options.voice ?? "en-US-JennyNeural";
145
+ // Determine output format
146
+ const outputFormat = azureOptions.outputFormat ?? this.mapFormat(options.format ?? "mp3");
147
+ // Build SSML
148
+ const ssml = this.buildSSML(text, voice, options);
149
+ const controller = new AbortController();
150
+ const timeoutId = setTimeout(() => controller.abort(), 30000);
151
+ let response;
152
+ try {
153
+ response = await fetch(`https://${this.region}.tts.speech.microsoft.com/cognitiveservices/v1`, {
154
+ method: "POST",
155
+ headers: {
156
+ "Ocp-Apim-Subscription-Key": this.apiKey,
157
+ "Content-Type": "application/ssml+xml",
158
+ "X-Microsoft-OutputFormat": outputFormat,
159
+ },
160
+ body: ssml,
161
+ signal: controller.signal,
162
+ });
163
+ }
164
+ catch (fetchErr) {
165
+ if (fetchErr instanceof Error && fetchErr.name === "AbortError") {
166
+ throw new TTSError({
167
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
168
+ message: "Azure TTS request timed out after 30 seconds",
169
+ category: ErrorCategory.NETWORK,
170
+ severity: ErrorSeverity.HIGH,
171
+ retriable: true,
172
+ originalError: fetchErr,
173
+ });
174
+ }
175
+ throw fetchErr;
176
+ }
177
+ finally {
178
+ clearTimeout(timeoutId);
179
+ }
180
+ if (!response.ok) {
181
+ const errorText = await response.text();
182
+ throw new Error(`HTTP ${response.status}: ${errorText}`);
183
+ }
184
+ const latency = Date.now() - startTime;
185
+ // Get audio buffer
186
+ const arrayBuffer = await response.arrayBuffer();
187
+ const audioBuffer = Buffer.from(arrayBuffer);
188
+ const result = {
189
+ buffer: audioBuffer,
190
+ // Use the *effective* output format derived from outputFormat, not the
191
+ // requested format — otherwise unsupported requests that fell back to
192
+ // mp3 would mislabel the buffer (Copilot review).
193
+ format: this.effectiveFormat(outputFormat),
194
+ size: audioBuffer.length,
195
+ voice,
196
+ sampleRate: this.getSampleRate(outputFormat),
197
+ metadata: {
198
+ latency,
199
+ provider: "azure-tts",
200
+ requestedFormat: options.format,
201
+ outputFormat,
202
+ region: this.region,
203
+ },
204
+ };
205
+ logger.info(`[AzureTTSHandler] Synthesized ${audioBuffer.length} bytes in ${latency}ms`);
206
+ return result;
207
+ }
208
+ catch (err) {
209
+ if (err instanceof TTSError) {
210
+ throw err;
211
+ }
212
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
213
+ logger.error(`[AzureTTSHandler] Synthesis failed: ${errorMessage}`);
214
+ throw new TTSError({
215
+ code: TTS_ERROR_CODES.SYNTHESIS_FAILED,
216
+ message: `Synthesis failed: ${errorMessage}`,
217
+ category: ErrorCategory.EXECUTION,
218
+ severity: ErrorSeverity.HIGH,
219
+ retriable: true,
220
+ context: { textLength: text.length },
221
+ originalError: err instanceof Error ? err : undefined,
222
+ });
223
+ }
224
+ }
225
+ /**
226
+ * Build SSML from text and options
227
+ */
228
+ buildSSML(text, voice, options) {
229
+ const azureOptions = options;
230
+ // If custom SSML template provided, use it
231
+ if (azureOptions.ssmlTemplate) {
232
+ return azureOptions.ssmlTemplate
233
+ .replace("{text}", this.escapeXml(text))
234
+ .replace("{voice}", this.escapeXml(voice));
235
+ }
236
+ // m1: Only pass raw SSML through when the caller explicitly opted in via
237
+ // `allowRawSSML`. Otherwise escape — `text` from untrusted sources that
238
+ // happens (or is crafted) to begin with `<speak` would otherwise enable
239
+ // SSML injection (arbitrary voice changes, external content references).
240
+ if (azureOptions.allowRawSSML && text.trim().startsWith("<speak")) {
241
+ return text;
242
+ }
243
+ // Build rate string
244
+ const rate = options.speed
245
+ ? `${Math.round((options.speed - 1) * 100)}%`
246
+ : "0%";
247
+ // Build pitch string. `TTSOptions.pitch` is documented as semitones, and
248
+ // Azure SSML supports semitone units directly via `<n>st` (e.g. "+2st",
249
+ // "-3st"). Previously this emitted `<n>%`, which Azure interprets as a
250
+ // relative percentage — wrong magnitude (Copilot review).
251
+ const pitchValue = options.pitch ?? 0;
252
+ const pitch = pitchValue >= 0
253
+ ? `+${Math.round(pitchValue)}st`
254
+ : `${Math.round(pitchValue)}st`;
255
+ // Build SSML
256
+ return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="${this.escapeXml(this.extractLanguage(voice))}">
257
+ <voice name="${this.escapeXml(voice)}">
258
+ <prosody rate="${rate}" pitch="${pitch}">
259
+ ${this.escapeXml(text)}
260
+ </prosody>
261
+ </voice>
262
+ </speak>`;
263
+ }
264
+ /**
265
+ * Extract language from voice name
266
+ */
267
+ extractLanguage(voice) {
268
+ // Voice names are like "en-US-JennyNeural"
269
+ const match = voice.match(/^([a-z]{2}-[A-Z]{2})/);
270
+ return match ? match[1] : "en-US";
271
+ }
272
+ /**
273
+ * Escape XML special characters
274
+ */
275
+ escapeXml(text) {
276
+ return text
277
+ .replace(/&/g, "&amp;")
278
+ .replace(/</g, "&lt;")
279
+ .replace(/>/g, "&gt;")
280
+ .replace(/"/g, "&quot;")
281
+ .replace(/'/g, "&apos;");
282
+ }
283
+ /**
284
+ * Map gender string to standard type
285
+ */
286
+ mapGender(gender) {
287
+ switch (gender?.toLowerCase()) {
288
+ case "male":
289
+ return "male";
290
+ case "female":
291
+ return "female";
292
+ default:
293
+ return "neutral";
294
+ }
295
+ }
296
+ /**
297
+ * Map TTSAudioFormat to Azure output format
298
+ */
299
+ mapFormat(format) {
300
+ const formats = {
301
+ mp3: "audio-24khz-96kbitrate-mono-mp3",
302
+ wav: "riff-24khz-16bit-mono-pcm",
303
+ ogg: "ogg-24khz-16bit-mono-opus",
304
+ opus: "ogg-24khz-16bit-mono-opus",
305
+ };
306
+ return formats[format] ?? "audio-24khz-96kbitrate-mono-mp3";
307
+ }
308
+ /**
309
+ * Get sample rate from format string
310
+ */
311
+ getSampleRate(format) {
312
+ if (format.includes("24khz")) {
313
+ return 24000;
314
+ }
315
+ if (format.includes("16khz")) {
316
+ return 16000;
317
+ }
318
+ if (format.includes("48khz")) {
319
+ return 48000;
320
+ }
321
+ return 24000;
322
+ }
323
+ /**
324
+ * Map the Azure outputFormat string back to a canonical TTSAudioFormat so
325
+ * TTSResult.format matches what the API actually returned (mapFormat() can
326
+ * coerce unsupported requests to mp3).
327
+ */
328
+ effectiveFormat(outputFormat) {
329
+ if (outputFormat.includes("mp3")) {
330
+ return "mp3";
331
+ }
332
+ if (outputFormat.includes("opus")) {
333
+ return "opus";
334
+ }
335
+ // Raw PCM (no RIFF/WAV header) must not be labeled as "wav" — downstream
336
+ // WAV parsers would misread the buffer. Azure uses the `raw-*` prefix
337
+ // for headerless PCM (e.g. `raw-16khz-16bit-mono-pcm`).
338
+ if (outputFormat.startsWith("raw") && outputFormat.includes("pcm")) {
339
+ return "pcm16";
340
+ }
341
+ if (outputFormat.includes("riff") ||
342
+ outputFormat.includes("pcm") ||
343
+ outputFormat.includes("wav")) {
344
+ return "wav";
345
+ }
346
+ return "mp3";
347
+ }
348
+ }
349
+ //# sourceMappingURL=AzureTTS.js.map
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Deepgram Speech-to-Text Handler
3
+ *
4
+ * Implementation of STT using Deepgram's Speech Recognition API.
5
+ *
6
+ * @module voice/providers/DeepgramSTT
7
+ */
8
+ import type { TTSAudioFormat, STTHandler, STTLanguage, STTOptions, STTResult, TranscriptionSegment } from "../../types/index.js";
9
+ /**
10
+ * Deepgram Speech-to-Text Handler
11
+ *
12
+ * Supports real-time streaming, speaker diarization, and smart formatting.
13
+ *
14
+ * @see https://developers.deepgram.com/docs
15
+ */
16
+ export declare class DeepgramSTT implements STTHandler {
17
+ private readonly apiKey;
18
+ private readonly baseUrl;
19
+ /**
20
+ * Maximum audio duration in seconds (2 hours)
21
+ */
22
+ readonly maxAudioDuration = 7200;
23
+ /**
24
+ * Deepgram supports streaming
25
+ */
26
+ readonly supportsStreaming = true;
27
+ constructor(apiKey?: string);
28
+ isConfigured(): boolean;
29
+ getSupportedFormats(): TTSAudioFormat[];
30
+ getSupportedLanguages(): Promise<STTLanguage[]>;
31
+ transcribe(audio: Buffer | ArrayBuffer, options?: STTOptions): Promise<STTResult>;
32
+ /**
33
+ * Streaming transcription using WebSocket
34
+ */
35
+ transcribeStream(audioStream: AsyncIterable<Buffer>, options: STTOptions): AsyncIterable<TranscriptionSegment>;
36
+ /**
37
+ * Get MIME type for audio format
38
+ */
39
+ private getMimeType;
40
+ }