@juspay/neurolink 9.61.2 → 9.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +23 -17
  3. package/dist/adapters/tts/googleTTSHandler.js +1 -1
  4. package/dist/browser/neurolink.min.js +373 -355
  5. package/dist/cli/commands/serve.js +9 -0
  6. package/dist/cli/commands/voiceServer.d.ts +7 -0
  7. package/dist/cli/commands/voiceServer.js +9 -1
  8. package/dist/cli/factories/commandFactory.js +136 -11
  9. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  10. package/dist/cli/utils/audioFileUtils.d.ts +3 -3
  11. package/dist/cli/utils/audioFileUtils.js +5 -1
  12. package/dist/core/baseProvider.js +29 -6
  13. package/dist/factories/providerRegistry.d.ts +14 -0
  14. package/dist/factories/providerRegistry.js +141 -2
  15. package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
  16. package/dist/lib/core/baseProvider.js +29 -6
  17. package/dist/lib/factories/providerRegistry.d.ts +14 -0
  18. package/dist/lib/factories/providerRegistry.js +141 -2
  19. package/dist/lib/neurolink.d.ts +19 -0
  20. package/dist/lib/neurolink.js +248 -12
  21. package/dist/lib/observability/exporters/laminarExporter.js +1 -0
  22. package/dist/lib/observability/exporters/posthogExporter.js +1 -0
  23. package/dist/lib/observability/utils/spanSerializer.js +1 -0
  24. package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
  25. package/dist/lib/server/voice/tokenCompare.js +23 -0
  26. package/dist/lib/server/voice/voiceServerApp.js +62 -3
  27. package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
  28. package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
  29. package/dist/lib/types/generate.d.ts +47 -0
  30. package/dist/lib/types/index.d.ts +1 -1
  31. package/dist/lib/types/index.js +1 -1
  32. package/dist/lib/types/realtime.d.ts +243 -0
  33. package/dist/lib/types/realtime.js +70 -0
  34. package/dist/lib/types/server.d.ts +68 -0
  35. package/dist/lib/types/span.d.ts +2 -0
  36. package/dist/lib/types/span.js +2 -0
  37. package/dist/lib/types/stream.d.ts +36 -14
  38. package/dist/lib/types/stt.d.ts +585 -0
  39. package/dist/lib/types/stt.js +90 -0
  40. package/dist/lib/types/tts.d.ts +23 -11
  41. package/dist/lib/types/tts.js +7 -0
  42. package/dist/lib/types/voice.d.ts +272 -0
  43. package/dist/lib/types/voice.js +137 -0
  44. package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
  45. package/dist/lib/utils/audioFormatDetector.js +34 -0
  46. package/dist/lib/utils/sttProcessor.d.ts +115 -0
  47. package/dist/lib/utils/sttProcessor.js +295 -0
  48. package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
  49. package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
  50. package/dist/lib/voice/audio-utils.d.ts +135 -0
  51. package/dist/lib/voice/audio-utils.js +435 -0
  52. package/dist/lib/voice/errors.d.ts +123 -0
  53. package/dist/lib/voice/errors.js +386 -0
  54. package/dist/lib/voice/index.d.ts +26 -0
  55. package/dist/lib/voice/index.js +55 -0
  56. package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
  57. package/dist/lib/voice/providers/AzureSTT.js +345 -0
  58. package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
  59. package/dist/lib/voice/providers/AzureTTS.js +349 -0
  60. package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
  61. package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
  62. package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
  63. package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
  64. package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
  65. package/dist/lib/voice/providers/GeminiLive.js +372 -0
  66. package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
  67. package/dist/lib/voice/providers/GoogleSTT.js +454 -0
  68. package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
  69. package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
  70. package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
  71. package/dist/lib/voice/providers/OpenAISTT.js +286 -0
  72. package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
  73. package/dist/lib/voice/providers/OpenAITTS.js +271 -0
  74. package/dist/lib/voice/stream-handler.d.ts +166 -0
  75. package/dist/lib/voice/stream-handler.js +514 -0
  76. package/dist/neurolink.d.ts +19 -0
  77. package/dist/neurolink.js +248 -12
  78. package/dist/observability/exporters/laminarExporter.js +1 -0
  79. package/dist/observability/exporters/posthogExporter.js +1 -0
  80. package/dist/observability/utils/spanSerializer.js +1 -0
  81. package/dist/server/voice/tokenCompare.d.ts +14 -0
  82. package/dist/server/voice/tokenCompare.js +22 -0
  83. package/dist/server/voice/voiceServerApp.js +62 -3
  84. package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
  85. package/dist/server/voice/voiceWebSocketHandler.js +555 -435
  86. package/dist/types/generate.d.ts +47 -0
  87. package/dist/types/index.d.ts +1 -1
  88. package/dist/types/index.js +1 -1
  89. package/dist/types/realtime.d.ts +243 -0
  90. package/dist/types/realtime.js +69 -0
  91. package/dist/types/server.d.ts +68 -0
  92. package/dist/types/span.d.ts +2 -0
  93. package/dist/types/span.js +2 -0
  94. package/dist/types/stream.d.ts +36 -14
  95. package/dist/types/stt.d.ts +585 -0
  96. package/dist/types/stt.js +89 -0
  97. package/dist/types/tts.d.ts +23 -11
  98. package/dist/types/tts.js +7 -0
  99. package/dist/types/voice.d.ts +272 -0
  100. package/dist/types/voice.js +136 -0
  101. package/dist/utils/audioFormatDetector.d.ts +15 -0
  102. package/dist/utils/audioFormatDetector.js +33 -0
  103. package/dist/utils/sttProcessor.d.ts +115 -0
  104. package/dist/utils/sttProcessor.js +294 -0
  105. package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
  106. package/dist/voice/RealtimeVoiceAPI.js +438 -0
  107. package/dist/voice/audio-utils.d.ts +135 -0
  108. package/dist/voice/audio-utils.js +434 -0
  109. package/dist/voice/errors.d.ts +123 -0
  110. package/dist/voice/errors.js +385 -0
  111. package/dist/voice/index.d.ts +26 -0
  112. package/dist/voice/index.js +54 -0
  113. package/dist/voice/providers/AzureSTT.d.ts +47 -0
  114. package/dist/voice/providers/AzureSTT.js +344 -0
  115. package/dist/voice/providers/AzureTTS.d.ts +59 -0
  116. package/dist/voice/providers/AzureTTS.js +348 -0
  117. package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
  118. package/dist/voice/providers/DeepgramSTT.js +549 -0
  119. package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
  120. package/dist/voice/providers/ElevenLabsTTS.js +310 -0
  121. package/dist/voice/providers/GeminiLive.d.ts +52 -0
  122. package/dist/voice/providers/GeminiLive.js +371 -0
  123. package/dist/voice/providers/GoogleSTT.d.ts +60 -0
  124. package/dist/voice/providers/GoogleSTT.js +453 -0
  125. package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
  126. package/dist/voice/providers/OpenAIRealtime.js +411 -0
  127. package/dist/voice/providers/OpenAISTT.d.ts +41 -0
  128. package/dist/voice/providers/OpenAISTT.js +285 -0
  129. package/dist/voice/providers/OpenAITTS.d.ts +49 -0
  130. package/dist/voice/providers/OpenAITTS.js +270 -0
  131. package/dist/voice/stream-handler.d.ts +166 -0
  132. package/dist/voice/stream-handler.js +513 -0
  133. package/package.json +3 -1
@@ -0,0 +1,438 @@
1
+ /**
2
+ * Realtime Voice API Infrastructure
3
+ *
4
+ * Base handler and processor for realtime voice communication.
5
+ * Supports bidirectional audio streaming with providers like OpenAI and Gemini.
6
+ *
7
+ * @module voice/RealtimeVoiceAPI
8
+ */
9
+ import { logger } from "../utils/logger.js";
10
+ import { RealtimeError } from "./errors.js";
11
+ import { DEFAULT_REALTIME_CONFIG, REALTIME_ERROR_CODES, } from "../types/index.js";
12
+ import { ErrorCategory, ErrorSeverity } from "../constants/enums.js";
13
+ /**
14
+ * Realtime Processor class for orchestrating realtime voice operations
15
+ *
16
+ * Provides a unified interface for realtime voice across multiple providers.
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * // Register a handler (typically done in providerRegistry.ts on startup)
21
+ * RealtimeProcessor.registerHandler('openai-realtime', openaiHandler);
22
+ *
23
+ * // Connect to a session — the first arg is the registered handler key,
24
+ * // and `config.provider` must match the same key.
25
+ * const session = await RealtimeProcessor.connect('openai-realtime', {
26
+ * provider: 'openai-realtime',
27
+ * voice: 'alloy',
28
+ * systemPrompt: 'You are a helpful assistant.'
29
+ * });
30
+ *
31
+ * // Send audio
32
+ * await RealtimeProcessor.sendAudio('openai-realtime', audioBuffer);
33
+ *
34
+ * // Disconnect
35
+ * await RealtimeProcessor.disconnect('openai-realtime');
36
+ * ```
37
+ */
38
+ export class RealtimeProcessor {
39
+ /**
40
+ * Handler registry mapping provider names to Realtime handlers
41
+ */
42
+ static handlers = new Map();
43
+ /**
44
+ * Active sessions by provider
45
+ */
46
+ static sessions = new Map();
47
+ /**
48
+ * Register a Realtime handler for a specific provider
49
+ *
50
+ * @param providerName - Provider identifier (e.g., 'openai', 'gemini')
51
+ * @param handler - Realtime handler implementation
52
+ */
53
+ static registerHandler(providerName, handler) {
54
+ if (!providerName) {
55
+ throw new Error("Provider name is required");
56
+ }
57
+ if (!handler) {
58
+ throw new Error("Handler is required");
59
+ }
60
+ const normalizedName = providerName.toLowerCase();
61
+ if (this.handlers.has(normalizedName)) {
62
+ logger.warn(`[RealtimeProcessor] Overwriting existing handler for provider: ${normalizedName}`);
63
+ }
64
+ this.handlers.set(normalizedName, handler);
65
+ logger.debug(`[RealtimeProcessor] Registered Realtime handler for provider: ${normalizedName}`);
66
+ }
67
+ /**
68
+ * Get a registered Realtime handler by provider name
69
+ */
70
+ static getHandler(providerName) {
71
+ const normalizedName = providerName.toLowerCase();
72
+ return this.handlers.get(normalizedName);
73
+ }
74
+ /**
75
+ * Check if a provider is supported
76
+ */
77
+ static supports(providerName) {
78
+ if (!providerName) {
79
+ return false;
80
+ }
81
+ const normalizedName = providerName.toLowerCase();
82
+ return this.handlers.has(normalizedName);
83
+ }
84
+ /**
85
+ * Get list of all registered providers
86
+ */
87
+ static getProviders() {
88
+ return Array.from(this.handlers.keys());
89
+ }
90
+ /**
91
+ * Connect to a realtime session
92
+ *
93
+ * @param provider - Provider identifier
94
+ * @param config - Session configuration
95
+ * @param handlers - Event handlers
96
+ * @returns Session information
97
+ */
98
+ static async connect(provider, config, handlers) {
99
+ const handler = this.getHandler(provider);
100
+ if (!handler) {
101
+ throw RealtimeError.providerNotSupported(provider, Array.from(this.handlers.keys()));
102
+ }
103
+ if (!handler.isConfigured()) {
104
+ throw RealtimeError.providerNotConfigured(provider);
105
+ }
106
+ // Check for existing session
107
+ if (handler.isConnected()) {
108
+ throw RealtimeError.sessionAlreadyActive(provider);
109
+ }
110
+ // Merge with defaults
111
+ const mergedConfig = {
112
+ ...DEFAULT_REALTIME_CONFIG,
113
+ ...config,
114
+ };
115
+ // Register event handlers if provided
116
+ if (handlers) {
117
+ handler.on(handlers);
118
+ }
119
+ try {
120
+ logger.debug(`[RealtimeProcessor] Connecting to provider: ${provider}`);
121
+ const session = await handler.connect(mergedConfig);
122
+ this.sessions.set(provider.toLowerCase(), session);
123
+ logger.info(`[RealtimeProcessor] Connected to ${provider} session: ${session.id}`);
124
+ return session;
125
+ }
126
+ catch (err) {
127
+ if (handlers) {
128
+ handler.off();
129
+ }
130
+ if (err instanceof RealtimeError) {
131
+ throw err;
132
+ }
133
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
134
+ throw RealtimeError.connectionFailed(errorMessage, provider, err instanceof Error ? err : undefined);
135
+ }
136
+ }
137
+ /**
138
+ * Disconnect from a realtime session
139
+ *
140
+ * @param provider - Provider identifier
141
+ */
142
+ static async disconnect(provider) {
143
+ const handler = this.getHandler(provider);
144
+ if (!handler) {
145
+ throw RealtimeError.providerNotSupported(provider, Array.from(this.handlers.keys()));
146
+ }
147
+ if (!handler.isConnected()) {
148
+ logger.warn(`[RealtimeProcessor] No active session for provider: ${provider}`);
149
+ return;
150
+ }
151
+ try {
152
+ await handler.disconnect();
153
+ this.sessions.delete(provider.toLowerCase());
154
+ handler.off();
155
+ logger.info(`[RealtimeProcessor] Disconnected from ${provider}`);
156
+ }
157
+ catch (err) {
158
+ if (err instanceof RealtimeError) {
159
+ throw err;
160
+ }
161
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
162
+ throw RealtimeError.protocolError(`Disconnect failed: ${errorMessage}`, provider, err instanceof Error ? err : undefined);
163
+ }
164
+ }
165
+ /**
166
+ * Send audio to a realtime session
167
+ *
168
+ * @param provider - Provider identifier
169
+ * @param audio - Audio data
170
+ */
171
+ static async sendAudio(provider, audio) {
172
+ const handler = this.getHandler(provider);
173
+ if (!handler) {
174
+ throw RealtimeError.providerNotSupported(provider, Array.from(this.handlers.keys()));
175
+ }
176
+ if (!handler.isConnected()) {
177
+ throw RealtimeError.sessionNotActive(provider);
178
+ }
179
+ try {
180
+ await handler.sendAudio(audio);
181
+ }
182
+ catch (err) {
183
+ if (err instanceof RealtimeError) {
184
+ throw err;
185
+ }
186
+ const errorMessage = err instanceof Error ? err.message : String(err || "Unknown error");
187
+ throw RealtimeError.audioStreamError(errorMessage, provider);
188
+ }
189
+ }
190
+ /**
191
+ * Send text to a realtime session
192
+ *
193
+ * @param provider - Provider identifier
194
+ * @param text - Text to send
195
+ */
196
+ static async sendText(provider, text) {
197
+ const handler = this.getHandler(provider);
198
+ if (!handler) {
199
+ throw RealtimeError.providerNotSupported(provider, Array.from(this.handlers.keys()));
200
+ }
201
+ if (!handler.isConnected()) {
202
+ throw RealtimeError.sessionNotActive(provider);
203
+ }
204
+ if (!handler.sendText) {
205
+ throw new RealtimeError({
206
+ code: REALTIME_ERROR_CODES.PROTOCOL_ERROR,
207
+ message: `Provider "${provider}" does not support text input`,
208
+ category: ErrorCategory.VALIDATION,
209
+ severity: ErrorSeverity.MEDIUM,
210
+ context: { provider },
211
+ });
212
+ }
213
+ // Normalize provider exceptions into RealtimeError so callers see a
214
+ // consistent error taxonomy across sendAudio/sendText/triggerResponse/
215
+ // cancelResponse — previously raw provider errors leaked from the
216
+ // text/control paths while sendAudio wrapped them (CodeRabbit review).
217
+ try {
218
+ await handler.sendText(text);
219
+ }
220
+ catch (err) {
221
+ if (err instanceof RealtimeError) {
222
+ throw err;
223
+ }
224
+ throw new RealtimeError({
225
+ code: REALTIME_ERROR_CODES.PROTOCOL_ERROR,
226
+ message: `sendText failed: ${err instanceof Error ? err.message : String(err)}`,
227
+ category: ErrorCategory.NETWORK,
228
+ severity: ErrorSeverity.MEDIUM,
229
+ retriable: true,
230
+ context: { provider },
231
+ originalError: err instanceof Error ? err : undefined,
232
+ });
233
+ }
234
+ }
235
+ /**
236
+ * Trigger a response from the model (manual turn detection)
237
+ *
238
+ * @param provider - Provider identifier
239
+ */
240
+ static async triggerResponse(provider) {
241
+ const handler = this.getHandler(provider);
242
+ if (!handler) {
243
+ throw RealtimeError.providerNotSupported(provider, Array.from(this.handlers.keys()));
244
+ }
245
+ if (!handler.isConnected()) {
246
+ throw RealtimeError.sessionNotActive(provider);
247
+ }
248
+ if (handler.triggerResponse) {
249
+ try {
250
+ await handler.triggerResponse();
251
+ }
252
+ catch (err) {
253
+ if (err instanceof RealtimeError) {
254
+ throw err;
255
+ }
256
+ throw new RealtimeError({
257
+ code: REALTIME_ERROR_CODES.PROTOCOL_ERROR,
258
+ message: `triggerResponse failed: ${err instanceof Error ? err.message : String(err)}`,
259
+ category: ErrorCategory.NETWORK,
260
+ severity: ErrorSeverity.MEDIUM,
261
+ retriable: true,
262
+ context: { provider },
263
+ originalError: err instanceof Error ? err : undefined,
264
+ });
265
+ }
266
+ }
267
+ }
268
+ /**
269
+ * Cancel the current response
270
+ *
271
+ * @param provider - Provider identifier
272
+ */
273
+ static async cancelResponse(provider) {
274
+ const handler = this.getHandler(provider);
275
+ if (!handler) {
276
+ throw RealtimeError.providerNotSupported(provider, Array.from(this.handlers.keys()));
277
+ }
278
+ if (!handler.isConnected()) {
279
+ return; // Nothing to cancel
280
+ }
281
+ if (handler.cancelResponse) {
282
+ try {
283
+ await handler.cancelResponse();
284
+ }
285
+ catch (err) {
286
+ if (err instanceof RealtimeError) {
287
+ throw err;
288
+ }
289
+ throw new RealtimeError({
290
+ code: REALTIME_ERROR_CODES.PROTOCOL_ERROR,
291
+ message: `cancelResponse failed: ${err instanceof Error ? err.message : String(err)}`,
292
+ category: ErrorCategory.NETWORK,
293
+ severity: ErrorSeverity.MEDIUM,
294
+ retriable: true,
295
+ context: { provider },
296
+ originalError: err instanceof Error ? err : undefined,
297
+ });
298
+ }
299
+ }
300
+ }
301
+ /**
302
+ * Get current session for a provider
303
+ *
304
+ * @param provider - Provider identifier
305
+ * @returns Session or null
306
+ */
307
+ static getSession(provider) {
308
+ const handler = this.getHandler(provider);
309
+ return handler?.getSession() ?? null;
310
+ }
311
+ /**
312
+ * Check if a provider has an active session
313
+ *
314
+ * @param provider - Provider identifier
315
+ */
316
+ static isConnected(provider) {
317
+ const handler = this.getHandler(provider);
318
+ return handler?.isConnected() ?? false;
319
+ }
320
+ /**
321
+ * Get supported formats for a provider
322
+ *
323
+ * @param provider - Provider identifier
324
+ */
325
+ static getSupportedFormats(provider) {
326
+ const handler = this.getHandler(provider);
327
+ return handler?.getSupportedFormats() ?? [];
328
+ }
329
+ /**
330
+ * Clear all handlers and sessions (for testing)
331
+ */
332
+ static clearHandlers() {
333
+ // Disconnect all active sessions
334
+ for (const [provider] of this.sessions) {
335
+ const handler = this.handlers.get(provider);
336
+ if (handler?.isConnected()) {
337
+ handler.disconnect().catch(() => {
338
+ // Ignore errors during cleanup
339
+ });
340
+ }
341
+ }
342
+ this.handlers.clear();
343
+ this.sessions.clear();
344
+ logger.debug("[RealtimeProcessor] Cleared all handlers and sessions");
345
+ }
346
+ }
347
+ /**
348
+ * Base Realtime Handler with common functionality
349
+ *
350
+ * Providers can extend this class for common behavior.
351
+ */
352
+ export class BaseRealtimeHandler {
353
+ session = null;
354
+ eventHandlers = null;
355
+ state = "disconnected";
356
+ isConnected() {
357
+ return this.state === "connected";
358
+ }
359
+ getSession() {
360
+ return this.session;
361
+ }
362
+ on(handlers) {
363
+ this.eventHandlers = handlers;
364
+ }
365
+ off() {
366
+ this.eventHandlers = null;
367
+ }
368
+ /**
369
+ * Emit state change event
370
+ */
371
+ emitStateChange(newState) {
372
+ this.state = newState;
373
+ if (this.session) {
374
+ this.session.state = newState;
375
+ this.session.lastActivityAt = new Date();
376
+ }
377
+ this.eventHandlers?.onStateChange?.(newState);
378
+ }
379
+ /**
380
+ * Emit audio event
381
+ */
382
+ emitAudio(chunk) {
383
+ this.eventHandlers?.onAudio?.(chunk);
384
+ }
385
+ /**
386
+ * Emit transcript event
387
+ */
388
+ emitTranscript(text, isFinal) {
389
+ this.eventHandlers?.onTranscript?.(text, isFinal);
390
+ }
391
+ /**
392
+ * Emit text event
393
+ */
394
+ emitText(text, isFinal) {
395
+ this.eventHandlers?.onText?.(text, isFinal);
396
+ }
397
+ /**
398
+ * Emit function call event
399
+ */
400
+ async emitFunctionCall(name, args) {
401
+ if (this.eventHandlers?.onFunctionCall) {
402
+ return this.eventHandlers.onFunctionCall(name, args);
403
+ }
404
+ return undefined;
405
+ }
406
+ /**
407
+ * Emit error event
408
+ */
409
+ emitError(error) {
410
+ this.eventHandlers?.onError?.(error);
411
+ }
412
+ /**
413
+ * Emit turn start event
414
+ */
415
+ emitTurnStart() {
416
+ this.eventHandlers?.onTurnStart?.();
417
+ }
418
+ /**
419
+ * Emit turn end event
420
+ */
421
+ emitTurnEnd() {
422
+ this.eventHandlers?.onTurnEnd?.();
423
+ }
424
+ /**
425
+ * Create a session object
426
+ */
427
+ createSession(id, config) {
428
+ return {
429
+ id,
430
+ state: "connected",
431
+ provider: this.name,
432
+ model: config.model,
433
+ createdAt: new Date(),
434
+ lastActivityAt: new Date(),
435
+ config,
436
+ };
437
+ }
438
+ }
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Audio Utilities for Voice Module
3
+ *
4
+ * Provides audio format conversion, duration calculation, and buffer utilities.
5
+ *
6
+ * @module voice/audio-utils
7
+ */
8
+ import type { TTSAudioFormat } from "../types/index.js";
9
+ /**
10
+ * Detect audio format from buffer
11
+ *
12
+ * @param buffer - Audio data buffer
13
+ * @returns Detected audio format or null
14
+ */
15
+ export declare function detectAudioFormat(buffer: Buffer): TTSAudioFormat | null;
16
+ /**
17
+ * Get MIME type for audio format
18
+ *
19
+ * @param format - Audio format
20
+ * @returns MIME type string
21
+ */
22
+ export declare function getMimeType(format: TTSAudioFormat): string;
23
+ /**
24
+ * Get file extension for audio format
25
+ *
26
+ * @param format - Audio format
27
+ * @returns File extension with dot
28
+ */
29
+ export declare function getFileExtension(format: TTSAudioFormat): string;
30
+ /**
31
+ * Calculate audio duration from buffer
32
+ *
33
+ * @param buffer - Audio data buffer
34
+ * @param format - Audio format (optional, will be detected if not provided)
35
+ * @param sampleRate - Sample rate in Hz (optional, will be extracted if possible)
36
+ * @returns Duration in seconds, or undefined if cannot be calculated
37
+ */
38
+ export declare function calculateDuration(buffer: Buffer, format?: TTSAudioFormat, sampleRate?: number): number | undefined;
39
+ /**
40
+ * Convert audio format (basic conversion)
41
+ *
42
+ * Note: For full format conversion, external tools like ffmpeg would be needed.
43
+ * This provides basic PCM resampling only.
44
+ *
45
+ * @param buffer - Input audio buffer
46
+ * @param fromFormat - Source format
47
+ * @param toFormat - Target format
48
+ * @param options - Conversion options
49
+ * @returns Converted audio buffer
50
+ */
51
+ export declare function convertAudioFormat(buffer: Buffer, fromFormat: TTSAudioFormat, toFormat: TTSAudioFormat, _options?: Record<string, unknown>): Promise<Buffer>;
52
+ /**
53
+ * Create PCM audio buffer from raw samples
54
+ *
55
+ * @param samples - Array of sample values (-1 to 1)
56
+ * @param sampleRate - Sample rate in Hz
57
+ * @param bitDepth - Bit depth (8, 16, 24, or 32)
58
+ * @returns PCM audio buffer
59
+ */
60
+ export declare function createPcmBuffer(samples: number[], _sampleRate?: number, bitDepth?: 8 | 16 | 24 | 32): Buffer;
61
+ /**
62
+ * Extract PCM samples from buffer
63
+ *
64
+ * @param buffer - PCM audio buffer
65
+ * @param bitDepth - Bit depth (8, 16, 24, or 32)
66
+ * @returns Array of sample values (-1 to 1)
67
+ */
68
+ export declare function extractPcmSamples(buffer: Buffer, bitDepth?: 8 | 16 | 24 | 32): number[];
69
+ /**
70
+ * Resample PCM audio
71
+ *
72
+ * @param samples - Input samples
73
+ * @param fromSampleRate - Source sample rate
74
+ * @param toSampleRate - Target sample rate
75
+ * @returns Resampled samples
76
+ */
77
+ export declare function resamplePcm(samples: number[], fromSampleRate: number, toSampleRate: number): number[];
78
+ /**
79
+ * Normalize audio levels
80
+ *
81
+ * @param samples - Input samples
82
+ * @param targetPeak - Target peak level (0 to 1)
83
+ * @returns Normalized samples
84
+ */
85
+ export declare function normalizeAudio(samples: number[], targetPeak?: number): number[];
86
+ /**
87
+ * Create a WAV header
88
+ *
89
+ * @param dataSize - Size of audio data in bytes
90
+ * @param sampleRate - Sample rate in Hz
91
+ * @param channels - Number of channels
92
+ * @param bitDepth - Bit depth
93
+ * @returns WAV header buffer
94
+ */
95
+ export declare function createWavHeader(dataSize: number, sampleRate?: number, channels?: number, bitDepth?: number): Buffer;
96
+ /**
97
+ * Create a complete WAV file from PCM data
98
+ *
99
+ * @param pcmData - PCM audio data
100
+ * @param sampleRate - Sample rate in Hz
101
+ * @param channels - Number of channels
102
+ * @param bitDepth - Bit depth
103
+ * @returns Complete WAV file buffer
104
+ */
105
+ export declare function createWavFile(pcmData: Buffer, sampleRate?: number, channels?: number, bitDepth?: number): Buffer;
106
+ /**
107
+ * Split audio buffer into chunks
108
+ *
109
+ * @param buffer - Audio buffer to split
110
+ * @param chunkDurationMs - Duration of each chunk in milliseconds
111
+ * @param sampleRate - Sample rate in Hz
112
+ * @param bytesPerSample - Bytes per sample (channels * bitDepth / 8)
113
+ * @returns Array of audio chunks
114
+ */
115
+ export declare function splitIntoChunks(buffer: Buffer, chunkDurationMs: number, sampleRate?: number, bytesPerSample?: number): Buffer[];
116
+ /**
117
+ * Audio format signatures for detection
118
+ */
119
+ export declare const AUDIO_SIGNATURES: {
120
+ readonly wav: Buffer<ArrayBuffer>;
121
+ readonly mp3: {
122
+ readonly id3: Buffer<ArrayBuffer>;
123
+ readonly frameSync: Buffer<ArrayBuffer>;
124
+ };
125
+ readonly ogg: Buffer<ArrayBuffer>;
126
+ };
127
+ /**
128
+ * MIME types for audio formats
129
+ */
130
+ export declare const MIME_TYPES: {
131
+ readonly wav: "audio/wav";
132
+ readonly mp3: "audio/mpeg";
133
+ readonly ogg: "audio/ogg";
134
+ readonly opus: "audio/opus";
135
+ };