@juspay/neurolink 9.61.1 → 9.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +23 -17
- package/dist/adapters/tts/googleTTSHandler.js +1 -1
- package/dist/browser/neurolink.min.js +382 -364
- package/dist/cli/commands/serve.js +9 -0
- package/dist/cli/commands/voiceServer.d.ts +7 -0
- package/dist/cli/commands/voiceServer.js +9 -1
- package/dist/cli/factories/commandFactory.js +136 -11
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/cli/utils/audioFileUtils.d.ts +3 -3
- package/dist/cli/utils/audioFileUtils.js +5 -1
- package/dist/core/baseProvider.js +29 -6
- package/dist/factories/providerRegistry.d.ts +14 -0
- package/dist/factories/providerRegistry.js +141 -2
- package/dist/lib/adapters/tts/googleTTSHandler.js +1 -1
- package/dist/lib/core/baseProvider.js +29 -6
- package/dist/lib/factories/providerRegistry.d.ts +14 -0
- package/dist/lib/factories/providerRegistry.js +141 -2
- package/dist/lib/mcp/toolRegistry.js +7 -1
- package/dist/lib/neurolink.d.ts +19 -0
- package/dist/lib/neurolink.js +252 -14
- package/dist/lib/observability/exporters/laminarExporter.js +1 -0
- package/dist/lib/observability/exporters/posthogExporter.js +1 -0
- package/dist/lib/observability/utils/spanSerializer.js +1 -0
- package/dist/lib/server/voice/tokenCompare.d.ts +14 -0
- package/dist/lib/server/voice/tokenCompare.js +23 -0
- package/dist/lib/server/voice/voiceServerApp.js +62 -3
- package/dist/lib/server/voice/voiceWebSocketHandler.d.ts +20 -3
- package/dist/lib/server/voice/voiceWebSocketHandler.js +555 -435
- package/dist/lib/types/generate.d.ts +47 -0
- package/dist/lib/types/hitl.d.ts +3 -0
- package/dist/lib/types/index.d.ts +1 -1
- package/dist/lib/types/index.js +1 -1
- package/dist/lib/types/realtime.d.ts +243 -0
- package/dist/lib/types/realtime.js +70 -0
- package/dist/lib/types/server.d.ts +68 -0
- package/dist/lib/types/span.d.ts +2 -0
- package/dist/lib/types/span.js +2 -0
- package/dist/lib/types/stream.d.ts +36 -14
- package/dist/lib/types/stt.d.ts +585 -0
- package/dist/lib/types/stt.js +90 -0
- package/dist/lib/types/tools.d.ts +2 -0
- package/dist/lib/types/tts.d.ts +23 -11
- package/dist/lib/types/tts.js +7 -0
- package/dist/lib/types/voice.d.ts +272 -0
- package/dist/lib/types/voice.js +137 -0
- package/dist/lib/utils/audioFormatDetector.d.ts +15 -0
- package/dist/lib/utils/audioFormatDetector.js +34 -0
- package/dist/lib/utils/errorHandling.js +4 -0
- package/dist/lib/utils/sttProcessor.d.ts +115 -0
- package/dist/lib/utils/sttProcessor.js +295 -0
- package/dist/lib/voice/RealtimeVoiceAPI.d.ts +183 -0
- package/dist/lib/voice/RealtimeVoiceAPI.js +439 -0
- package/dist/lib/voice/audio-utils.d.ts +135 -0
- package/dist/lib/voice/audio-utils.js +435 -0
- package/dist/lib/voice/errors.d.ts +123 -0
- package/dist/lib/voice/errors.js +386 -0
- package/dist/lib/voice/index.d.ts +26 -0
- package/dist/lib/voice/index.js +55 -0
- package/dist/lib/voice/providers/AzureSTT.d.ts +47 -0
- package/dist/lib/voice/providers/AzureSTT.js +345 -0
- package/dist/lib/voice/providers/AzureTTS.d.ts +59 -0
- package/dist/lib/voice/providers/AzureTTS.js +349 -0
- package/dist/lib/voice/providers/DeepgramSTT.d.ts +40 -0
- package/dist/lib/voice/providers/DeepgramSTT.js +550 -0
- package/dist/lib/voice/providers/ElevenLabsTTS.d.ts +53 -0
- package/dist/lib/voice/providers/ElevenLabsTTS.js +311 -0
- package/dist/lib/voice/providers/GeminiLive.d.ts +52 -0
- package/dist/lib/voice/providers/GeminiLive.js +372 -0
- package/dist/lib/voice/providers/GoogleSTT.d.ts +60 -0
- package/dist/lib/voice/providers/GoogleSTT.js +454 -0
- package/dist/lib/voice/providers/OpenAIRealtime.d.ts +47 -0
- package/dist/lib/voice/providers/OpenAIRealtime.js +412 -0
- package/dist/lib/voice/providers/OpenAISTT.d.ts +41 -0
- package/dist/lib/voice/providers/OpenAISTT.js +286 -0
- package/dist/lib/voice/providers/OpenAITTS.d.ts +49 -0
- package/dist/lib/voice/providers/OpenAITTS.js +271 -0
- package/dist/lib/voice/stream-handler.d.ts +166 -0
- package/dist/lib/voice/stream-handler.js +514 -0
- package/dist/mcp/toolRegistry.js +7 -1
- package/dist/neurolink.d.ts +19 -0
- package/dist/neurolink.js +252 -14
- package/dist/observability/exporters/laminarExporter.js +1 -0
- package/dist/observability/exporters/posthogExporter.js +1 -0
- package/dist/observability/utils/spanSerializer.js +1 -0
- package/dist/server/voice/tokenCompare.d.ts +14 -0
- package/dist/server/voice/tokenCompare.js +22 -0
- package/dist/server/voice/voiceServerApp.js +62 -3
- package/dist/server/voice/voiceWebSocketHandler.d.ts +20 -3
- package/dist/server/voice/voiceWebSocketHandler.js +555 -435
- package/dist/types/generate.d.ts +47 -0
- package/dist/types/hitl.d.ts +3 -0
- package/dist/types/index.d.ts +1 -1
- package/dist/types/index.js +1 -1
- package/dist/types/realtime.d.ts +243 -0
- package/dist/types/realtime.js +69 -0
- package/dist/types/server.d.ts +68 -0
- package/dist/types/span.d.ts +2 -0
- package/dist/types/span.js +2 -0
- package/dist/types/stream.d.ts +36 -14
- package/dist/types/stt.d.ts +585 -0
- package/dist/types/stt.js +89 -0
- package/dist/types/tools.d.ts +2 -0
- package/dist/types/tts.d.ts +23 -11
- package/dist/types/tts.js +7 -0
- package/dist/types/voice.d.ts +272 -0
- package/dist/types/voice.js +136 -0
- package/dist/utils/audioFormatDetector.d.ts +15 -0
- package/dist/utils/audioFormatDetector.js +33 -0
- package/dist/utils/errorHandling.js +4 -0
- package/dist/utils/sttProcessor.d.ts +115 -0
- package/dist/utils/sttProcessor.js +294 -0
- package/dist/voice/RealtimeVoiceAPI.d.ts +183 -0
- package/dist/voice/RealtimeVoiceAPI.js +438 -0
- package/dist/voice/audio-utils.d.ts +135 -0
- package/dist/voice/audio-utils.js +434 -0
- package/dist/voice/errors.d.ts +123 -0
- package/dist/voice/errors.js +385 -0
- package/dist/voice/index.d.ts +26 -0
- package/dist/voice/index.js +54 -0
- package/dist/voice/providers/AzureSTT.d.ts +47 -0
- package/dist/voice/providers/AzureSTT.js +344 -0
- package/dist/voice/providers/AzureTTS.d.ts +59 -0
- package/dist/voice/providers/AzureTTS.js +348 -0
- package/dist/voice/providers/DeepgramSTT.d.ts +40 -0
- package/dist/voice/providers/DeepgramSTT.js +549 -0
- package/dist/voice/providers/ElevenLabsTTS.d.ts +53 -0
- package/dist/voice/providers/ElevenLabsTTS.js +310 -0
- package/dist/voice/providers/GeminiLive.d.ts +52 -0
- package/dist/voice/providers/GeminiLive.js +371 -0
- package/dist/voice/providers/GoogleSTT.d.ts +60 -0
- package/dist/voice/providers/GoogleSTT.js +453 -0
- package/dist/voice/providers/OpenAIRealtime.d.ts +47 -0
- package/dist/voice/providers/OpenAIRealtime.js +411 -0
- package/dist/voice/providers/OpenAISTT.d.ts +41 -0
- package/dist/voice/providers/OpenAISTT.js +285 -0
- package/dist/voice/providers/OpenAITTS.d.ts +49 -0
- package/dist/voice/providers/OpenAITTS.js +270 -0
- package/dist/voice/stream-handler.d.ts +166 -0
- package/dist/voice/stream-handler.js +513 -0
- package/package.json +5 -2
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Audio Utilities for Voice Module
|
|
3
|
+
*
|
|
4
|
+
* Provides audio format conversion, duration calculation, and buffer utilities.
|
|
5
|
+
*
|
|
6
|
+
* @module voice/audio-utils
|
|
7
|
+
*/
|
|
8
|
+
import { AUDIO_FORMAT_DETAILS } from "../types/index.js";
|
|
9
|
+
import { logger } from "../utils/logger.js";
|
|
10
|
+
/**
|
|
11
|
+
* Detect audio format from buffer
|
|
12
|
+
*
|
|
13
|
+
* @param buffer - Audio data buffer
|
|
14
|
+
* @returns Detected audio format or null
|
|
15
|
+
*/
|
|
16
|
+
export function detectAudioFormat(buffer) {
|
|
17
|
+
if (buffer.length < 12) {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
// Check for WAV (RIFF header)
|
|
21
|
+
if (buffer[0] === 0x52 && // R
|
|
22
|
+
buffer[1] === 0x49 && // I
|
|
23
|
+
buffer[2] === 0x46 && // F
|
|
24
|
+
buffer[3] === 0x46 && // F
|
|
25
|
+
buffer[8] === 0x57 && // W
|
|
26
|
+
buffer[9] === 0x41 && // A
|
|
27
|
+
buffer[10] === 0x56 && // V
|
|
28
|
+
buffer[11] === 0x45 // E
|
|
29
|
+
) {
|
|
30
|
+
return "wav";
|
|
31
|
+
}
|
|
32
|
+
// Check for MP3 (ID3 tag or frame sync)
|
|
33
|
+
if ((buffer[0] === 0x49 && buffer[1] === 0x44 && buffer[2] === 0x33) || // ID3
|
|
34
|
+
(buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0) // Frame sync
|
|
35
|
+
) {
|
|
36
|
+
return "mp3";
|
|
37
|
+
}
|
|
38
|
+
// Check for OGG (OggS header)
|
|
39
|
+
if (buffer[0] === 0x4f && // O
|
|
40
|
+
buffer[1] === 0x67 && // g
|
|
41
|
+
buffer[2] === 0x67 && // g
|
|
42
|
+
buffer[3] === 0x53 // S
|
|
43
|
+
) {
|
|
44
|
+
// Could be Opus or Vorbis, check for Opus header
|
|
45
|
+
// Opus has "OpusHead" in the first page
|
|
46
|
+
const opusOffset = buffer.indexOf("OpusHead");
|
|
47
|
+
if (opusOffset !== -1 && opusOffset < 200) {
|
|
48
|
+
return "opus";
|
|
49
|
+
}
|
|
50
|
+
return "ogg";
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Get MIME type for audio format
|
|
56
|
+
*
|
|
57
|
+
* @param format - Audio format
|
|
58
|
+
* @returns MIME type string
|
|
59
|
+
*/
|
|
60
|
+
export function getMimeType(format) {
|
|
61
|
+
return AUDIO_FORMAT_DETAILS[format]?.mimeType ?? "application/octet-stream";
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Get file extension for audio format
|
|
65
|
+
*
|
|
66
|
+
* @param format - Audio format
|
|
67
|
+
* @returns File extension with dot
|
|
68
|
+
*/
|
|
69
|
+
export function getFileExtension(format) {
|
|
70
|
+
return AUDIO_FORMAT_DETAILS[format]?.extension ?? ".bin";
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Calculate audio duration from buffer
|
|
74
|
+
*
|
|
75
|
+
* @param buffer - Audio data buffer
|
|
76
|
+
* @param format - Audio format (optional, will be detected if not provided)
|
|
77
|
+
* @param sampleRate - Sample rate in Hz (optional, will be extracted if possible)
|
|
78
|
+
* @returns Duration in seconds, or undefined if cannot be calculated
|
|
79
|
+
*/
|
|
80
|
+
export function calculateDuration(buffer, format, sampleRate) {
|
|
81
|
+
const detectedFormat = format ?? detectAudioFormat(buffer);
|
|
82
|
+
if (!detectedFormat) {
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
try {
|
|
86
|
+
switch (detectedFormat) {
|
|
87
|
+
case "wav":
|
|
88
|
+
return calculateWavDuration(buffer);
|
|
89
|
+
case "mp3":
|
|
90
|
+
return estimateMp3Duration(buffer);
|
|
91
|
+
case "ogg":
|
|
92
|
+
case "opus":
|
|
93
|
+
return estimateOpusDuration(buffer);
|
|
94
|
+
default:
|
|
95
|
+
// Estimate based on size and assumed bitrate
|
|
96
|
+
if (sampleRate) {
|
|
97
|
+
// Assume 16-bit mono
|
|
98
|
+
return buffer.length / (sampleRate * 2);
|
|
99
|
+
}
|
|
100
|
+
return undefined;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
catch (err) {
|
|
104
|
+
logger.debug(`[audio-utils] Failed to calculate duration: ${err instanceof Error ? err.message : String(err)}`);
|
|
105
|
+
return undefined;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Calculate WAV duration from header
|
|
110
|
+
*/
|
|
111
|
+
function calculateWavDuration(buffer) {
|
|
112
|
+
if (buffer.length < 44) {
|
|
113
|
+
return undefined;
|
|
114
|
+
}
|
|
115
|
+
// Find data chunk
|
|
116
|
+
let offset = 12;
|
|
117
|
+
while (offset < buffer.length - 8) {
|
|
118
|
+
const chunkId = buffer.toString("ascii", offset, offset + 4);
|
|
119
|
+
const chunkSize = buffer.readUInt32LE(offset + 4);
|
|
120
|
+
if (chunkId === "fmt ") {
|
|
121
|
+
const channels = buffer.readUInt16LE(offset + 10);
|
|
122
|
+
const sampleRate = buffer.readUInt32LE(offset + 12);
|
|
123
|
+
const bitsPerSample = buffer.readUInt16LE(offset + 22);
|
|
124
|
+
// RIFF chunks are word-aligned: odd-sized chunks carry a trailing pad
|
|
125
|
+
// byte that must be skipped, otherwise we land on the wrong header.
|
|
126
|
+
let dataOffset = offset + 8 + chunkSize + (chunkSize % 2);
|
|
127
|
+
while (dataOffset < buffer.length - 8) {
|
|
128
|
+
const dataChunkId = buffer.toString("ascii", dataOffset, dataOffset + 4);
|
|
129
|
+
const dataChunkSize = buffer.readUInt32LE(dataOffset + 4);
|
|
130
|
+
if (dataChunkId === "data") {
|
|
131
|
+
const bytesPerSample = (bitsPerSample / 8) * channels;
|
|
132
|
+
const numSamples = dataChunkSize / bytesPerSample;
|
|
133
|
+
return numSamples / sampleRate;
|
|
134
|
+
}
|
|
135
|
+
dataOffset += 8 + dataChunkSize + (dataChunkSize % 2);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
offset += 8 + chunkSize + (chunkSize % 2);
|
|
139
|
+
}
|
|
140
|
+
return undefined;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Estimate MP3 duration (approximate)
|
|
144
|
+
*/
|
|
145
|
+
function estimateMp3Duration(buffer) {
|
|
146
|
+
// This is a rough estimate based on file size and assumed bitrate
|
|
147
|
+
// For accurate duration, we would need to parse all frames
|
|
148
|
+
// Check for ID3v2 tag and skip it
|
|
149
|
+
let offset = 0;
|
|
150
|
+
if (buffer[0] === 0x49 && buffer[1] === 0x44 && buffer[2] === 0x33) {
|
|
151
|
+
// ID3v2 tag present
|
|
152
|
+
const tagSize = ((buffer[6] & 0x7f) << 21) |
|
|
153
|
+
((buffer[7] & 0x7f) << 14) |
|
|
154
|
+
((buffer[8] & 0x7f) << 7) |
|
|
155
|
+
(buffer[9] & 0x7f);
|
|
156
|
+
offset = 10 + tagSize;
|
|
157
|
+
}
|
|
158
|
+
// Find first MP3 frame header
|
|
159
|
+
while (offset < buffer.length - 4) {
|
|
160
|
+
if (buffer[offset] === 0xff && (buffer[offset + 1] & 0xe0) === 0xe0) {
|
|
161
|
+
// Found frame sync
|
|
162
|
+
const version = (buffer[offset + 1] >> 3) & 0x03;
|
|
163
|
+
const _layer = (buffer[offset + 1] >> 1) & 0x03;
|
|
164
|
+
const bitrateIndex = (buffer[offset + 2] >> 4) & 0x0f;
|
|
165
|
+
const sampleRateIndex = (buffer[offset + 2] >> 2) & 0x03;
|
|
166
|
+
// Get sample rate
|
|
167
|
+
const sampleRates = {
|
|
168
|
+
3: [44100, 48000, 32000], // MPEG1
|
|
169
|
+
2: [22050, 24000, 16000], // MPEG2
|
|
170
|
+
0: [11025, 12000, 8000], // MPEG2.5
|
|
171
|
+
};
|
|
172
|
+
const sampleRate = sampleRates[version]?.[sampleRateIndex];
|
|
173
|
+
// Get bitrate (MPEG1 Layer III)
|
|
174
|
+
const bitrates = [
|
|
175
|
+
0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0,
|
|
176
|
+
];
|
|
177
|
+
const bitrate = bitrates[bitrateIndex];
|
|
178
|
+
if (sampleRate && bitrate) {
|
|
179
|
+
// Estimate duration: (file_size_bits) / bitrate
|
|
180
|
+
const audioBytes = buffer.length - offset;
|
|
181
|
+
return (audioBytes * 8) / (bitrate * 1000);
|
|
182
|
+
}
|
|
183
|
+
break;
|
|
184
|
+
}
|
|
185
|
+
offset++;
|
|
186
|
+
}
|
|
187
|
+
// Fallback: assume 128kbps
|
|
188
|
+
return (buffer.length * 8) / 128000;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Estimate Opus/OGG duration (approximate)
|
|
192
|
+
*/
|
|
193
|
+
function estimateOpusDuration(buffer) {
|
|
194
|
+
// Opus typically uses 48kHz, estimate based on typical bitrate
|
|
195
|
+
// For accurate duration, we would need to parse all pages
|
|
196
|
+
// Assume average bitrate of 64kbps for voice
|
|
197
|
+
return (buffer.length * 8) / 64000;
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Convert audio format (basic conversion)
|
|
201
|
+
*
|
|
202
|
+
* Note: For full format conversion, external tools like ffmpeg would be needed.
|
|
203
|
+
* This provides basic PCM resampling only.
|
|
204
|
+
*
|
|
205
|
+
* @param buffer - Input audio buffer
|
|
206
|
+
* @param fromFormat - Source format
|
|
207
|
+
* @param toFormat - Target format
|
|
208
|
+
* @param options - Conversion options
|
|
209
|
+
* @returns Converted audio buffer
|
|
210
|
+
*/
|
|
211
|
+
export async function convertAudioFormat(buffer, fromFormat, toFormat, _options = {}) {
|
|
212
|
+
// If formats are the same, just return the buffer
|
|
213
|
+
if (fromFormat === toFormat) {
|
|
214
|
+
return buffer;
|
|
215
|
+
}
|
|
216
|
+
// Genuine format conversion needs ffmpeg or similar. Until that's wired up,
|
|
217
|
+
// fail loudly — silently returning the original bytes labeled as the new
|
|
218
|
+
// format pushes a much harder-to-debug failure into the next provider call
|
|
219
|
+
// (Copilot/CodeRabbit review).
|
|
220
|
+
logger.warn(`[audio-utils] Audio format conversion from ${fromFormat} to ${toFormat} is not implemented.`);
|
|
221
|
+
throw new Error(`Audio format conversion from ${fromFormat} to ${toFormat} is not implemented. Convert with ffmpeg before passing to NeuroLink.`);
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Create PCM audio buffer from raw samples
|
|
225
|
+
*
|
|
226
|
+
* @param samples - Array of sample values (-1 to 1)
|
|
227
|
+
* @param sampleRate - Sample rate in Hz
|
|
228
|
+
* @param bitDepth - Bit depth (8, 16, 24, or 32)
|
|
229
|
+
* @returns PCM audio buffer
|
|
230
|
+
*/
|
|
231
|
+
export function createPcmBuffer(samples, _sampleRate = 16000, bitDepth = 16) {
|
|
232
|
+
const bytesPerSample = bitDepth / 8;
|
|
233
|
+
const buffer = Buffer.alloc(samples.length * bytesPerSample);
|
|
234
|
+
for (let i = 0; i < samples.length; i++) {
|
|
235
|
+
const sample = Math.max(-1, Math.min(1, samples[i]));
|
|
236
|
+
const offset = i * bytesPerSample;
|
|
237
|
+
switch (bitDepth) {
|
|
238
|
+
case 8:
|
|
239
|
+
buffer.writeUInt8(Math.round((sample + 1) * 127.5), offset);
|
|
240
|
+
break;
|
|
241
|
+
case 16:
|
|
242
|
+
buffer.writeInt16LE(Math.round(sample * 32767), offset);
|
|
243
|
+
break;
|
|
244
|
+
case 24: {
|
|
245
|
+
const val24 = Math.round(sample * 8388607);
|
|
246
|
+
buffer.writeUInt8(val24 & 0xff, offset);
|
|
247
|
+
buffer.writeUInt8((val24 >> 8) & 0xff, offset + 1);
|
|
248
|
+
buffer.writeUInt8((val24 >> 16) & 0xff, offset + 2);
|
|
249
|
+
break;
|
|
250
|
+
}
|
|
251
|
+
case 32:
|
|
252
|
+
buffer.writeInt32LE(Math.round(sample * 2147483647), offset);
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return buffer;
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Extract PCM samples from buffer
|
|
260
|
+
*
|
|
261
|
+
* @param buffer - PCM audio buffer
|
|
262
|
+
* @param bitDepth - Bit depth (8, 16, 24, or 32)
|
|
263
|
+
* @returns Array of sample values (-1 to 1)
|
|
264
|
+
*/
|
|
265
|
+
export function extractPcmSamples(buffer, bitDepth = 16) {
|
|
266
|
+
const bytesPerSample = bitDepth / 8;
|
|
267
|
+
const numSamples = Math.floor(buffer.length / bytesPerSample);
|
|
268
|
+
const samples = [];
|
|
269
|
+
for (let i = 0; i < numSamples; i++) {
|
|
270
|
+
const offset = i * bytesPerSample;
|
|
271
|
+
switch (bitDepth) {
|
|
272
|
+
case 8:
|
|
273
|
+
samples.push(buffer.readUInt8(offset) / 127.5 - 1);
|
|
274
|
+
break;
|
|
275
|
+
case 16:
|
|
276
|
+
samples.push(buffer.readInt16LE(offset) / 32767);
|
|
277
|
+
break;
|
|
278
|
+
case 24: {
|
|
279
|
+
const val24 = buffer.readUInt8(offset) |
|
|
280
|
+
(buffer.readUInt8(offset + 1) << 8) |
|
|
281
|
+
(buffer.readUInt8(offset + 2) << 16);
|
|
282
|
+
samples.push((val24 > 8388607 ? val24 - 16777216 : val24) / 8388607);
|
|
283
|
+
break;
|
|
284
|
+
}
|
|
285
|
+
case 32:
|
|
286
|
+
samples.push(buffer.readInt32LE(offset) / 2147483647);
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return samples;
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Resample PCM audio
|
|
294
|
+
*
|
|
295
|
+
* @param samples - Input samples
|
|
296
|
+
* @param fromSampleRate - Source sample rate
|
|
297
|
+
* @param toSampleRate - Target sample rate
|
|
298
|
+
* @returns Resampled samples
|
|
299
|
+
*/
|
|
300
|
+
export function resamplePcm(samples, fromSampleRate, toSampleRate) {
|
|
301
|
+
if (fromSampleRate <= 0 || toSampleRate <= 0) {
|
|
302
|
+
return samples;
|
|
303
|
+
}
|
|
304
|
+
if (fromSampleRate === toSampleRate) {
|
|
305
|
+
return samples;
|
|
306
|
+
}
|
|
307
|
+
const ratio = fromSampleRate / toSampleRate;
|
|
308
|
+
const newLength = Math.round(samples.length / ratio);
|
|
309
|
+
const resampled = [];
|
|
310
|
+
for (let i = 0; i < newLength; i++) {
|
|
311
|
+
const srcIndex = i * ratio;
|
|
312
|
+
const srcIndexFloor = Math.floor(srcIndex);
|
|
313
|
+
const srcIndexCeil = Math.min(srcIndexFloor + 1, samples.length - 1);
|
|
314
|
+
const fraction = srcIndex - srcIndexFloor;
|
|
315
|
+
// Linear interpolation
|
|
316
|
+
const value = samples[srcIndexFloor] * (1 - fraction) +
|
|
317
|
+
samples[srcIndexCeil] * fraction;
|
|
318
|
+
resampled.push(value);
|
|
319
|
+
}
|
|
320
|
+
return resampled;
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Normalize audio levels
|
|
324
|
+
*
|
|
325
|
+
* @param samples - Input samples
|
|
326
|
+
* @param targetPeak - Target peak level (0 to 1)
|
|
327
|
+
* @returns Normalized samples
|
|
328
|
+
*/
|
|
329
|
+
export function normalizeAudio(samples, targetPeak = 0.95) {
|
|
330
|
+
if (samples.length === 0) {
|
|
331
|
+
return samples;
|
|
332
|
+
}
|
|
333
|
+
// Find current peak
|
|
334
|
+
let peak = 0;
|
|
335
|
+
for (const sample of samples) {
|
|
336
|
+
peak = Math.max(peak, Math.abs(sample));
|
|
337
|
+
}
|
|
338
|
+
if (peak === 0) {
|
|
339
|
+
return samples;
|
|
340
|
+
}
|
|
341
|
+
// Calculate gain
|
|
342
|
+
const gain = targetPeak / peak;
|
|
343
|
+
// Apply gain
|
|
344
|
+
return samples.map((s) => s * gain);
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Create a WAV header
|
|
348
|
+
*
|
|
349
|
+
* @param dataSize - Size of audio data in bytes
|
|
350
|
+
* @param sampleRate - Sample rate in Hz
|
|
351
|
+
* @param channels - Number of channels
|
|
352
|
+
* @param bitDepth - Bit depth
|
|
353
|
+
* @returns WAV header buffer
|
|
354
|
+
*/
|
|
355
|
+
export function createWavHeader(dataSize, sampleRate = 16000, channels = 1, bitDepth = 16) {
|
|
356
|
+
const header = Buffer.alloc(44);
|
|
357
|
+
const byteRate = sampleRate * channels * (bitDepth / 8);
|
|
358
|
+
const blockAlign = channels * (bitDepth / 8);
|
|
359
|
+
// RIFF header
|
|
360
|
+
header.write("RIFF", 0);
|
|
361
|
+
header.writeUInt32LE(36 + dataSize, 4);
|
|
362
|
+
header.write("WAVE", 8);
|
|
363
|
+
// fmt chunk
|
|
364
|
+
header.write("fmt ", 12);
|
|
365
|
+
header.writeUInt32LE(16, 16); // Subchunk1Size (PCM)
|
|
366
|
+
header.writeUInt16LE(1, 20); // TTSAudioFormat (PCM)
|
|
367
|
+
header.writeUInt16LE(channels, 22);
|
|
368
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
369
|
+
header.writeUInt32LE(byteRate, 28);
|
|
370
|
+
header.writeUInt16LE(blockAlign, 32);
|
|
371
|
+
header.writeUInt16LE(bitDepth, 34);
|
|
372
|
+
// data chunk
|
|
373
|
+
header.write("data", 36);
|
|
374
|
+
header.writeUInt32LE(dataSize, 40);
|
|
375
|
+
return header;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Create a complete WAV file from PCM data
|
|
379
|
+
*
|
|
380
|
+
* @param pcmData - PCM audio data
|
|
381
|
+
* @param sampleRate - Sample rate in Hz
|
|
382
|
+
* @param channels - Number of channels
|
|
383
|
+
* @param bitDepth - Bit depth
|
|
384
|
+
* @returns Complete WAV file buffer
|
|
385
|
+
*/
|
|
386
|
+
export function createWavFile(pcmData, sampleRate = 16000, channels = 1, bitDepth = 16) {
|
|
387
|
+
const header = createWavHeader(pcmData.length, sampleRate, channels, bitDepth);
|
|
388
|
+
return Buffer.concat([header, pcmData]);
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Split audio buffer into chunks
|
|
392
|
+
*
|
|
393
|
+
* @param buffer - Audio buffer to split
|
|
394
|
+
* @param chunkDurationMs - Duration of each chunk in milliseconds
|
|
395
|
+
* @param sampleRate - Sample rate in Hz
|
|
396
|
+
* @param bytesPerSample - Bytes per sample (channels * bitDepth / 8)
|
|
397
|
+
* @returns Array of audio chunks
|
|
398
|
+
*/
|
|
399
|
+
export function splitIntoChunks(buffer, chunkDurationMs, sampleRate = 16000, bytesPerSample = 2) {
|
|
400
|
+
if (chunkDurationMs <= 0 || sampleRate <= 0 || bytesPerSample <= 0) {
|
|
401
|
+
return [buffer];
|
|
402
|
+
}
|
|
403
|
+
const bytesPerMs = (sampleRate * bytesPerSample) / 1000;
|
|
404
|
+
const chunkSize = Math.round(chunkDurationMs * bytesPerMs);
|
|
405
|
+
if (chunkSize <= 0) {
|
|
406
|
+
return [buffer];
|
|
407
|
+
}
|
|
408
|
+
const chunks = [];
|
|
409
|
+
for (let offset = 0; offset < buffer.length; offset += chunkSize) {
|
|
410
|
+
const end = Math.min(offset + chunkSize, buffer.length);
|
|
411
|
+
chunks.push(buffer.subarray(offset, end));
|
|
412
|
+
}
|
|
413
|
+
return chunks;
|
|
414
|
+
}
|
|
415
|
+
/**
|
|
416
|
+
* Audio format signatures for detection
|
|
417
|
+
*/
|
|
418
|
+
export const AUDIO_SIGNATURES = {
|
|
419
|
+
wav: Buffer.from([0x52, 0x49, 0x46, 0x46]), // RIFF
|
|
420
|
+
mp3: {
|
|
421
|
+
id3: Buffer.from([0x49, 0x44, 0x33]), // ID3
|
|
422
|
+
frameSync: Buffer.from([0xff, 0xe0]), // Frame sync mask
|
|
423
|
+
},
|
|
424
|
+
ogg: Buffer.from([0x4f, 0x67, 0x67, 0x53]), // OggS
|
|
425
|
+
};
|
|
426
|
+
/**
|
|
427
|
+
* MIME types for audio formats
|
|
428
|
+
*/
|
|
429
|
+
export const MIME_TYPES = {
|
|
430
|
+
wav: "audio/wav",
|
|
431
|
+
mp3: "audio/mpeg",
|
|
432
|
+
ogg: "audio/ogg",
|
|
433
|
+
opus: "audio/opus",
|
|
434
|
+
};
|
|
435
|
+
//# sourceMappingURL=audio-utils.js.map
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Voice Module Error Classes
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive error handling for TTS, STT, and Realtime Voice operations.
|
|
5
|
+
*
|
|
6
|
+
* @module voice/errors
|
|
7
|
+
*/
|
|
8
|
+
import { NeuroLinkError } from "../utils/errorHandling.js";
|
|
9
|
+
import type { VoiceErrorOptions } from "../types/index.js";
|
|
10
|
+
import { REALTIME_ERROR_CODES, STT_ERROR_CODES, VOICE_ERROR_CODES } from "../types/index.js";
|
|
11
|
+
export { STT_ERROR_CODES, REALTIME_ERROR_CODES, VOICE_ERROR_CODES };
|
|
12
|
+
/**
|
|
13
|
+
* Base Voice Error class for all voice-related errors
|
|
14
|
+
*/
|
|
15
|
+
export declare class VoiceError extends NeuroLinkError {
|
|
16
|
+
constructor(options: VoiceErrorOptions);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* STT Error class for speech-to-text specific errors
|
|
20
|
+
*/
|
|
21
|
+
export declare class STTError extends VoiceError {
|
|
22
|
+
constructor(options: VoiceErrorOptions);
|
|
23
|
+
/**
|
|
24
|
+
* Create an error for empty audio input
|
|
25
|
+
*/
|
|
26
|
+
static audioEmpty(provider?: string): STTError;
|
|
27
|
+
/**
|
|
28
|
+
* Create an error for audio that exceeds maximum duration
|
|
29
|
+
*/
|
|
30
|
+
static audioTooLong(durationSeconds: number, maxDurationSeconds: number, provider?: string): STTError;
|
|
31
|
+
/**
|
|
32
|
+
* Create an error for invalid audio format
|
|
33
|
+
*/
|
|
34
|
+
static invalidFormat(format: string, supportedFormatsOrProvider?: string[] | string, provider?: string): STTError;
|
|
35
|
+
/**
|
|
36
|
+
* Create an error for unsupported language
|
|
37
|
+
*/
|
|
38
|
+
static languageNotSupported(language: string, supportedLanguages?: string[], provider?: string): STTError;
|
|
39
|
+
/**
|
|
40
|
+
* Create an error for transcription failure
|
|
41
|
+
* Supports two signatures:
|
|
42
|
+
* - transcriptionFailed(reason, provider?, originalError?)
|
|
43
|
+
* - transcriptionFailed(reason, originalError, provider)
|
|
44
|
+
*/
|
|
45
|
+
static transcriptionFailed(reason: string, providerOrError?: string | Error, originalErrorOrProvider?: Error | string): STTError;
|
|
46
|
+
/**
|
|
47
|
+
* Create an error for unconfigured provider
|
|
48
|
+
*/
|
|
49
|
+
static providerNotConfigured(provider: string): STTError;
|
|
50
|
+
/**
|
|
51
|
+
* Create an error for unsupported provider
|
|
52
|
+
*/
|
|
53
|
+
static providerNotSupported(provider: string, availableProviders?: string[]): STTError;
|
|
54
|
+
/**
|
|
55
|
+
* Create an error for stream processing failure
|
|
56
|
+
*/
|
|
57
|
+
static streamError(reason: string, provider?: string): STTError;
|
|
58
|
+
/**
|
|
59
|
+
* Alias for providerNotConfigured
|
|
60
|
+
*/
|
|
61
|
+
static notConfigured(provider: string): STTError;
|
|
62
|
+
/**
|
|
63
|
+
* Alias for audioEmpty
|
|
64
|
+
*/
|
|
65
|
+
static emptyAudio(provider?: string): STTError;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Realtime Voice Error class for realtime-specific errors
|
|
69
|
+
*/
|
|
70
|
+
export declare class RealtimeError extends VoiceError {
|
|
71
|
+
constructor(options: VoiceErrorOptions);
|
|
72
|
+
/**
|
|
73
|
+
* Create an error for connection failure
|
|
74
|
+
* Supports two signatures:
|
|
75
|
+
* - connectionFailed(reason, provider?, originalError?)
|
|
76
|
+
* - connectionFailed(reason, originalError?, provider?)
|
|
77
|
+
*/
|
|
78
|
+
static connectionFailed(reason: string, providerOrError?: string | Error, originalErrorOrProvider?: Error | string): RealtimeError;
|
|
79
|
+
/**
|
|
80
|
+
* Create an error for session timeout
|
|
81
|
+
*/
|
|
82
|
+
static sessionTimeout(timeoutMs: number, provider?: string): RealtimeError;
|
|
83
|
+
/**
|
|
84
|
+
* Create an error for protocol errors
|
|
85
|
+
*/
|
|
86
|
+
static protocolError(reason: string, provider?: string, originalError?: Error): RealtimeError;
|
|
87
|
+
/**
|
|
88
|
+
* Create an error for audio stream failures
|
|
89
|
+
*/
|
|
90
|
+
static audioStreamError(reason: string, provider?: string): RealtimeError;
|
|
91
|
+
/**
|
|
92
|
+
* Create an error for unconfigured provider
|
|
93
|
+
*/
|
|
94
|
+
static providerNotConfigured(provider: string): RealtimeError;
|
|
95
|
+
/**
|
|
96
|
+
* Create an error for unsupported provider
|
|
97
|
+
*/
|
|
98
|
+
static providerNotSupported(provider: string, availableProviders?: string[]): RealtimeError;
|
|
99
|
+
/**
|
|
100
|
+
* Create an error for duplicate session
|
|
101
|
+
*/
|
|
102
|
+
static sessionAlreadyActive(provider?: string): RealtimeError;
|
|
103
|
+
/**
|
|
104
|
+
* Create an error for no active session
|
|
105
|
+
*/
|
|
106
|
+
static sessionNotActive(provider?: string): RealtimeError;
|
|
107
|
+
/**
|
|
108
|
+
* Create an error for invalid messages
|
|
109
|
+
*/
|
|
110
|
+
static invalidMessage(reason: string, provider?: string): RealtimeError;
|
|
111
|
+
/**
|
|
112
|
+
* Create an error for connection closed unexpectedly
|
|
113
|
+
*/
|
|
114
|
+
static connectionClosed(reason: string, sessionId?: string, provider?: string): RealtimeError;
|
|
115
|
+
/**
|
|
116
|
+
* Create an error for unconfigured provider (alias)
|
|
117
|
+
*/
|
|
118
|
+
static notConfigured(provider: string): RealtimeError;
|
|
119
|
+
/**
|
|
120
|
+
* Create an error for operation timeout
|
|
121
|
+
*/
|
|
122
|
+
static timeout(operation: string, timeoutMs: number, provider?: string): RealtimeError;
|
|
123
|
+
}
|