voicecc 1.1.36 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/voicecc.js +94 -1
- package/dashboard/dist/assets/index-DCeOdulF.js +28 -0
- package/dashboard/dist/index.html +1 -1
- package/dashboard/routes/agents.ts +28 -8
- package/dashboard/routes/browser-call.ts +3 -2
- package/dashboard/routes/chat.ts +75 -55
- package/dashboard/routes/providers.ts +5 -74
- package/dashboard/routes/twilio.ts +104 -5
- package/dashboard/routes/voice.ts +98 -0
- package/dashboard/server.ts +48 -1
- package/package.json +2 -3
- package/server/index.ts +96 -8
- package/server/services/twilio-manager.ts +29 -10
- package/dashboard/dist/assets/index-C62C9Gp0.js +0 -28
- package/dashboard/dist/audio-processor.js +0 -126
- package/server/services/heartbeat.ts +0 -403
- package/server/voice/assets/chime.wav +0 -0
- package/server/voice/assets/startup.pcm +0 -0
- package/server/voice/audio-adapter.ts +0 -60
- package/server/voice/audio-inactivity.test.ts +0 -108
- package/server/voice/audio-inactivity.ts +0 -91
- package/server/voice/browser-audio-playback.test.ts +0 -149
- package/server/voice/browser-audio.ts +0 -147
- package/server/voice/browser-server.ts +0 -311
- package/server/voice/chat-server.ts +0 -236
- package/server/voice/chime.test.ts +0 -69
- package/server/voice/chime.ts +0 -36
- package/server/voice/claude-session.ts +0 -293
- package/server/voice/endpointing.ts +0 -163
- package/server/voice/mic-vpio +0 -0
- package/server/voice/narration.ts +0 -204
- package/server/voice/prompt-builder.ts +0 -108
- package/server/voice/session-lock.ts +0 -123
- package/server/voice/stt-elevenlabs.ts +0 -210
- package/server/voice/stt-provider.ts +0 -106
- package/server/voice/tts-elevenlabs-hiss.test.ts +0 -183
- package/server/voice/tts-elevenlabs.ts +0 -397
- package/server/voice/tts-provider.ts +0 -155
- package/server/voice/twilio-audio.ts +0 -338
- package/server/voice/twilio-server.ts +0 -540
- package/server/voice/types.ts +0 -282
- package/server/voice/vad.ts +0 -101
- package/server/voice/voice-loop-bugs.test.ts +0 -348
- package/server/voice/voice-server.ts +0 -129
- package/server/voice/voice-session.ts +0 -539
|
@@ -1,210 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* ElevenLabs STT provider via batch transcription API (Scribe v2).
|
|
3
|
-
*
|
|
4
|
-
* Accumulates audio samples locally during speech, then sends the full buffer
|
|
5
|
-
* to the ElevenLabs speech-to-text API on transcribe(). Audio is encoded as a
|
|
6
|
-
* WAV file (16kHz mono 16-bit PCM) before upload.
|
|
7
|
-
*
|
|
8
|
-
* Responsibilities:
|
|
9
|
-
* - Accumulate Float32Array audio chunks during speech
|
|
10
|
-
* - Encode accumulated audio as a WAV file for upload
|
|
11
|
-
* - POST the WAV to the ElevenLabs batch STT API via multipart/form-data
|
|
12
|
-
* - Parse the JSON response and return a TranscriptionResult
|
|
13
|
-
* - Clear the buffer after transcription or on demand
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
import type { SttProcessor, TranscriptionResult } from "./types.js";
|
|
17
|
-
|
|
18
|
-
// ============================================================================
|
|
19
|
-
// CONSTANTS
|
|
20
|
-
// ============================================================================
|
|
21
|
-
|
|
22
|
-
/** ElevenLabs STT API endpoint */
|
|
23
|
-
const ELEVENLABS_STT_URL = "https://api.elevenlabs.io/v1/speech-to-text";
|
|
24
|
-
|
|
25
|
-
/** Sample rate for the WAV file (must match input audio from microphone) */
|
|
26
|
-
const WAV_SAMPLE_RATE = 16000;
|
|
27
|
-
|
|
28
|
-
/** Number of audio channels */
|
|
29
|
-
const WAV_CHANNELS = 1;
|
|
30
|
-
|
|
31
|
-
/** Bits per sample in the WAV file */
|
|
32
|
-
const WAV_BIT_DEPTH = 16;
|
|
33
|
-
|
|
34
|
-
/** Size of the WAV file header in bytes */
|
|
35
|
-
const WAV_HEADER_SIZE = 44;
|
|
36
|
-
|
|
37
|
-
// ============================================================================
|
|
38
|
-
// INTERFACES
|
|
39
|
-
// ============================================================================
|
|
40
|
-
|
|
41
|
-
/**
|
|
42
|
-
* Configuration for the ElevenLabs STT provider.
|
|
43
|
-
*/
|
|
44
|
-
export interface ElevenlabsSttConfig {
|
|
45
|
-
/** ElevenLabs API key for authentication */
|
|
46
|
-
apiKey: string;
|
|
47
|
-
/** ElevenLabs STT model ID (e.g. "scribe_v1") */
|
|
48
|
-
modelId: string;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// ============================================================================
|
|
52
|
-
// MAIN HANDLERS
|
|
53
|
-
// ============================================================================
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Create an SttProcessor that uses the ElevenLabs batch transcription API.
|
|
57
|
-
*
|
|
58
|
-
* Audio is accumulated locally via accumulate(), then the full buffer is
|
|
59
|
-
* encoded as WAV and sent to ElevenLabs on transcribe(). This fits the
|
|
60
|
-
* existing SttProcessor interface without changes.
|
|
61
|
-
*
|
|
62
|
-
* @param config - ElevenLabs STT configuration (API key and model ID)
|
|
63
|
-
* @returns An SttProcessor instance ready for transcription
|
|
64
|
-
*/
|
|
65
|
-
export async function createElevenlabsStt(config: ElevenlabsSttConfig): Promise<SttProcessor> {
|
|
66
|
-
const { apiKey, modelId } = config;
|
|
67
|
-
|
|
68
|
-
let audioChunks: Float32Array[] = [];
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Append audio samples to the internal buffer.
|
|
72
|
-
* @param samples - Float32Array of audio samples (16kHz, normalized -1.0 to 1.0)
|
|
73
|
-
*/
|
|
74
|
-
function accumulate(samples: Float32Array): void {
|
|
75
|
-
audioChunks.push(samples);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* Transcribe the accumulated audio buffer by sending it to the ElevenLabs API.
|
|
80
|
-
* Encodes the audio as WAV, uploads via multipart/form-data, and parses the result.
|
|
81
|
-
*
|
|
82
|
-
* @returns Transcription result with text, isFinal flag, and timestamp
|
|
83
|
-
* @throws Error on empty buffer, non-2xx response, or network failure
|
|
84
|
-
*/
|
|
85
|
-
async function transcribe(): Promise<TranscriptionResult> {
|
|
86
|
-
const combinedSamples = concatenateChunks(audioChunks);
|
|
87
|
-
audioChunks = [];
|
|
88
|
-
|
|
89
|
-
if (combinedSamples.length === 0) {
|
|
90
|
-
return { text: "", isFinal: true, timestamp: Date.now() };
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Encode audio as WAV and upload to ElevenLabs
|
|
94
|
-
const wavBuffer = encodeWav(combinedSamples);
|
|
95
|
-
const wavBlob = new Blob([new Uint8Array(wavBuffer)], { type: "audio/wav" });
|
|
96
|
-
|
|
97
|
-
const formData = new FormData();
|
|
98
|
-
formData.append("file", wavBlob, "audio.wav");
|
|
99
|
-
formData.append("model_id", modelId);
|
|
100
|
-
|
|
101
|
-
const response = await fetch(ELEVENLABS_STT_URL, {
|
|
102
|
-
method: "POST",
|
|
103
|
-
headers: {
|
|
104
|
-
"xi-api-key": apiKey,
|
|
105
|
-
},
|
|
106
|
-
body: formData,
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
if (!response.ok) {
|
|
110
|
-
const errorText = await response.text().catch(() => "unknown error");
|
|
111
|
-
throw new Error(`ElevenLabs STT API error ${response.status}: ${errorText}`);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
const result = await response.json() as { text: string };
|
|
115
|
-
|
|
116
|
-
return { text: result.text.trim(), isFinal: true, timestamp: Date.now() };
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
/**
|
|
120
|
-
* Clear the accumulated audio buffer without transcribing.
|
|
121
|
-
*/
|
|
122
|
-
function clearBuffer(): void {
|
|
123
|
-
audioChunks = [];
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
/**
|
|
127
|
-
* Free resources. Clears the buffer (no external resources to release).
|
|
128
|
-
*/
|
|
129
|
-
function destroy(): void {
|
|
130
|
-
audioChunks = [];
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
return {
|
|
134
|
-
accumulate,
|
|
135
|
-
transcribe,
|
|
136
|
-
clearBuffer,
|
|
137
|
-
destroy,
|
|
138
|
-
};
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// ============================================================================
|
|
142
|
-
// HELPER FUNCTIONS
|
|
143
|
-
// ============================================================================
|
|
144
|
-
|
|
145
|
-
/**
|
|
146
|
-
* Encode Float32Array audio samples as a WAV file buffer.
|
|
147
|
-
* Writes a 44-byte WAV header followed by 16-bit signed PCM data.
|
|
148
|
-
*
|
|
149
|
-
* @param samples - Float32Array of audio samples (normalized -1.0 to 1.0)
|
|
150
|
-
* @returns Buffer containing a valid WAV file
|
|
151
|
-
*/
|
|
152
|
-
function encodeWav(samples: Float32Array): Buffer {
|
|
153
|
-
const bytesPerSample = WAV_BIT_DEPTH / 8;
|
|
154
|
-
const dataSize = samples.length * bytesPerSample;
|
|
155
|
-
const fileSize = WAV_HEADER_SIZE + dataSize;
|
|
156
|
-
|
|
157
|
-
const buffer = Buffer.alloc(fileSize);
|
|
158
|
-
let offset = 0;
|
|
159
|
-
|
|
160
|
-
// RIFF header
|
|
161
|
-
buffer.write("RIFF", offset); offset += 4;
|
|
162
|
-
buffer.writeUInt32LE(fileSize - 8, offset); offset += 4;
|
|
163
|
-
buffer.write("WAVE", offset); offset += 4;
|
|
164
|
-
|
|
165
|
-
// fmt sub-chunk
|
|
166
|
-
buffer.write("fmt ", offset); offset += 4;
|
|
167
|
-
buffer.writeUInt32LE(16, offset); offset += 4; // Sub-chunk size (16 for PCM)
|
|
168
|
-
buffer.writeUInt16LE(1, offset); offset += 2; // Audio format (1 = PCM)
|
|
169
|
-
buffer.writeUInt16LE(WAV_CHANNELS, offset); offset += 2; // Number of channels
|
|
170
|
-
buffer.writeUInt32LE(WAV_SAMPLE_RATE, offset); offset += 4; // Sample rate
|
|
171
|
-
buffer.writeUInt32LE(WAV_SAMPLE_RATE * WAV_CHANNELS * bytesPerSample, offset); offset += 4; // Byte rate
|
|
172
|
-
buffer.writeUInt16LE(WAV_CHANNELS * bytesPerSample, offset); offset += 2; // Block align
|
|
173
|
-
buffer.writeUInt16LE(WAV_BIT_DEPTH, offset); offset += 2; // Bits per sample
|
|
174
|
-
|
|
175
|
-
// data sub-chunk
|
|
176
|
-
buffer.write("data", offset); offset += 4;
|
|
177
|
-
buffer.writeUInt32LE(dataSize, offset); offset += 4;
|
|
178
|
-
|
|
179
|
-
// Convert float samples to 16-bit signed PCM
|
|
180
|
-
for (let i = 0; i < samples.length; i++) {
|
|
181
|
-
const clamped = Math.max(-1, Math.min(1, samples[i]));
|
|
182
|
-
const int16 = clamped < 0 ? clamped * 0x8000 : clamped * 0x7FFF;
|
|
183
|
-
buffer.writeInt16LE(Math.round(int16), offset);
|
|
184
|
-
offset += 2;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
return buffer;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
/**
|
|
191
|
-
* Concatenate an array of Float32Array chunks into a single Float32Array.
|
|
192
|
-
*
|
|
193
|
-
* @param chunks - Array of Float32Array audio chunks
|
|
194
|
-
* @returns Single concatenated Float32Array
|
|
195
|
-
*/
|
|
196
|
-
function concatenateChunks(chunks: Float32Array[]): Float32Array {
|
|
197
|
-
if (chunks.length === 0) return new Float32Array(0);
|
|
198
|
-
if (chunks.length === 1) return chunks[0];
|
|
199
|
-
|
|
200
|
-
const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
|
201
|
-
const result = new Float32Array(totalLength);
|
|
202
|
-
|
|
203
|
-
let offset = 0;
|
|
204
|
-
for (const chunk of chunks) {
|
|
205
|
-
result.set(chunk, offset);
|
|
206
|
-
offset += chunk.length;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
return result;
|
|
210
|
-
}
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* STT provider factory and readiness checks.
|
|
3
|
-
*
|
|
4
|
-
* Routes STT creation to the ElevenLabs provider implementation.
|
|
5
|
-
* Checks provider readiness (API keys) for dashboard status.
|
|
6
|
-
*
|
|
7
|
-
* Responsibilities:
|
|
8
|
-
* - Create an SttProcessor for the configured provider
|
|
9
|
-
* - Check provider readiness (API keys set)
|
|
10
|
-
* - Provide static metadata about available STT providers
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { createElevenlabsStt } from "./stt-elevenlabs.js";
|
|
14
|
-
import { readEnv } from "../services/env.js";
|
|
15
|
-
|
|
16
|
-
import type { SttProcessor, SttProviderType, SttProviderConfig, ProviderStatus } from "./types.js";
|
|
17
|
-
|
|
18
|
-
// ============================================================================
|
|
19
|
-
// INTERFACES
|
|
20
|
-
// ============================================================================
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Metadata about an STT provider for display in the dashboard.
|
|
24
|
-
*/
|
|
25
|
-
export interface SttProviderInfo {
|
|
26
|
-
/** Provider type identifier */
|
|
27
|
-
type: SttProviderType;
|
|
28
|
-
/** Human-readable provider name */
|
|
29
|
-
name: string;
|
|
30
|
-
/** Short description of the provider */
|
|
31
|
-
description: string;
|
|
32
|
-
/** Environment variable name for the API key (undefined = no key needed) */
|
|
33
|
-
requiresApiKey?: string;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
* Options for creating an STT processor via the provider factory.
|
|
38
|
-
*/
|
|
39
|
-
export interface CreateSttOptions {
|
|
40
|
-
/** Provider configuration (which provider + per-provider settings) */
|
|
41
|
-
providerConfig: SttProviderConfig;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// ============================================================================
|
|
45
|
-
// MAIN HANDLERS
|
|
46
|
-
// ============================================================================
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* Create an SttProcessor for the configured provider.
|
|
50
|
-
*
|
|
51
|
-
* @param options - Provider config with per-provider settings
|
|
52
|
-
* @returns An SttProcessor instance ready for transcription
|
|
53
|
-
* @throws Error if the provider is not implemented
|
|
54
|
-
*/
|
|
55
|
-
export async function createSttForProvider(options: CreateSttOptions): Promise<SttProcessor> {
|
|
56
|
-
const { providerConfig } = options;
|
|
57
|
-
|
|
58
|
-
switch (providerConfig.provider) {
|
|
59
|
-
case "elevenlabs":
|
|
60
|
-
return createElevenlabsStt({
|
|
61
|
-
apiKey: providerConfig.elevenlabs.apiKey,
|
|
62
|
-
modelId: providerConfig.elevenlabs.modelId,
|
|
63
|
-
});
|
|
64
|
-
|
|
65
|
-
default:
|
|
66
|
-
throw new Error(`Unknown STT provider: ${providerConfig.provider}`);
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Check whether an STT provider is ready to use.
|
|
72
|
-
* Checks ELEVENLABS_API_KEY is set in .env.
|
|
73
|
-
*
|
|
74
|
-
* @param providerType - The provider to check
|
|
75
|
-
* @returns Readiness status with reason if not ready
|
|
76
|
-
*/
|
|
77
|
-
export async function getSttProviderStatus(providerType: SttProviderType): Promise<ProviderStatus> {
|
|
78
|
-
switch (providerType) {
|
|
79
|
-
case "elevenlabs": {
|
|
80
|
-
const env = await readEnv();
|
|
81
|
-
if (!env.ELEVENLABS_API_KEY) {
|
|
82
|
-
return { ready: false, reason: "missing_api_key", detail: "ELEVENLABS_API_KEY is not set in .env" };
|
|
83
|
-
}
|
|
84
|
-
return { ready: true };
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
default:
|
|
88
|
-
throw new Error(`Unknown STT provider: ${providerType}`);
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* Get the list of all known STT providers with metadata.
|
|
94
|
-
*
|
|
95
|
-
* @returns Static array of STT provider info
|
|
96
|
-
*/
|
|
97
|
-
export function getAvailableSttProviders(): SttProviderInfo[] {
|
|
98
|
-
return [
|
|
99
|
-
{
|
|
100
|
-
type: "elevenlabs",
|
|
101
|
-
name: "ElevenLabs Scribe",
|
|
102
|
-
description: "Cloud STT via ElevenLabs batch transcription API",
|
|
103
|
-
requiresApiKey: "ELEVENLABS_API_KEY",
|
|
104
|
-
},
|
|
105
|
-
];
|
|
106
|
-
}
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests that ElevenLabs TTS writes sample-aligned PCM to the speaker stream.
|
|
3
|
-
*
|
|
4
|
-
* ElevenLabs streams raw PCM (16-bit, 24kHz mono) over HTTP. The fetch
|
|
5
|
-
* response body yields chunks at arbitrary byte boundaries (TCP packets).
|
|
6
|
-
* Each chunk is written to the speaker stream, which for the browser path
|
|
7
|
-
* becomes a separate WebSocket message. The browser interprets each message
|
|
8
|
-
* as Int16Array -- if a chunk has an odd byte count, a sample is split and
|
|
9
|
-
* all subsequent audio is corrupted (hiss/static).
|
|
10
|
-
*
|
|
11
|
-
* Run: npx tsx --test server/voice/tts-elevenlabs-hiss.test.ts
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
import { test } from "node:test";
|
|
15
|
-
import { strict as assert } from "node:assert";
|
|
16
|
-
import { PassThrough } from "stream";
|
|
17
|
-
|
|
18
|
-
import { createElevenlabsTts } from "./tts-elevenlabs.js";
|
|
19
|
-
|
|
20
|
-
// ============================================================================
|
|
21
|
-
// CONSTANTS
|
|
22
|
-
// ============================================================================
|
|
23
|
-
|
|
24
|
-
/** Sample rate of ElevenLabs PCM output */
|
|
25
|
-
const SAMPLE_RATE = 24000;
|
|
26
|
-
|
|
27
|
-
/** Bytes per sample (16-bit) */
|
|
28
|
-
const BYTES_PER_SAMPLE = 2;
|
|
29
|
-
|
|
30
|
-
// ============================================================================
|
|
31
|
-
// HELPERS
|
|
32
|
-
// ============================================================================
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Generate a buffer of raw 16-bit signed LE PCM (440Hz sine wave).
|
|
36
|
-
*
|
|
37
|
-
* @param sampleCount - Number of samples to generate
|
|
38
|
-
* @returns Buffer of int16 LE PCM
|
|
39
|
-
*/
|
|
40
|
-
function generateSineWavePcm(sampleCount: number): Buffer {
|
|
41
|
-
const buf = Buffer.alloc(sampleCount * BYTES_PER_SAMPLE);
|
|
42
|
-
|
|
43
|
-
for (let i = 0; i < sampleCount; i++) {
|
|
44
|
-
const t = i / SAMPLE_RATE;
|
|
45
|
-
const int16 = Math.round(Math.sin(2 * Math.PI * 440 * t) * 32767);
|
|
46
|
-
buf.writeInt16LE(int16, i * BYTES_PER_SAMPLE);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return buf;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Create a mock fetch Response whose body streams the given PCM buffer
|
|
54
|
-
* split into chunks at the specified byte offsets (simulating arbitrary
|
|
55
|
-
* HTTP chunked transfer boundaries).
|
|
56
|
-
*
|
|
57
|
-
* @param pcm - Full PCM buffer to stream
|
|
58
|
-
* @param splitOffsets - Byte offsets at which to split (e.g. [1001, 2000])
|
|
59
|
-
* @returns A Response object with a streaming body
|
|
60
|
-
*/
|
|
61
|
-
function createMockResponse(pcm: Buffer, splitOffsets: number[]): Response {
|
|
62
|
-
const chunks: Uint8Array[] = [];
|
|
63
|
-
let offset = 0;
|
|
64
|
-
|
|
65
|
-
for (const splitAt of splitOffsets) {
|
|
66
|
-
if (offset >= pcm.byteLength) break;
|
|
67
|
-
chunks.push(new Uint8Array(pcm.subarray(offset, Math.min(splitAt, pcm.byteLength))));
|
|
68
|
-
offset = splitAt;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
if (offset < pcm.byteLength) {
|
|
72
|
-
chunks.push(new Uint8Array(pcm.subarray(offset)));
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
const stream = new ReadableStream<Uint8Array>({
|
|
76
|
-
start(controller) {
|
|
77
|
-
for (const chunk of chunks) {
|
|
78
|
-
controller.enqueue(chunk);
|
|
79
|
-
}
|
|
80
|
-
controller.close();
|
|
81
|
-
},
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
return new Response(stream, { status: 200 });
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Stub global fetch to return a mock response, run the callback, then restore.
|
|
89
|
-
*
|
|
90
|
-
* @param mockResponse - The Response to return from fetch
|
|
91
|
-
* @param fn - Async function to run while fetch is stubbed
|
|
92
|
-
*/
|
|
93
|
-
async function withMockFetch(mockResponse: Response, fn: () => Promise<void>): Promise<void> {
|
|
94
|
-
const originalFetch = globalThis.fetch;
|
|
95
|
-
globalThis.fetch = async () => mockResponse;
|
|
96
|
-
|
|
97
|
-
try {
|
|
98
|
-
await fn();
|
|
99
|
-
} finally {
|
|
100
|
-
globalThis.fetch = originalFetch;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// ============================================================================
|
|
105
|
-
// TESTS
|
|
106
|
-
// ============================================================================
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* Every write to the speaker stream must have an even byte count so the
|
|
110
|
-
* browser can interpret it as Int16Array without truncating or misaligning.
|
|
111
|
-
*/
|
|
112
|
-
test("ElevenLabs chunks written to speaker must be sample-aligned (even byte count)", async () => {
|
|
113
|
-
const pcm = generateSineWavePcm(2400); // 0.1s of audio = 4800 bytes
|
|
114
|
-
const speakerOutput = new PassThrough();
|
|
115
|
-
|
|
116
|
-
// Split at odd byte offsets to simulate arbitrary HTTP chunk boundaries
|
|
117
|
-
const mockResponse = createMockResponse(pcm, [1001, 2000, 3333, 4001]);
|
|
118
|
-
|
|
119
|
-
const player = await createElevenlabsTts({
|
|
120
|
-
apiKey: "test-key",
|
|
121
|
-
voiceId: "test-voice",
|
|
122
|
-
modelId: "test-model",
|
|
123
|
-
speakerInput: speakerOutput,
|
|
124
|
-
interruptPlayback: () => {},
|
|
125
|
-
resumePlayback: () => {},
|
|
126
|
-
});
|
|
127
|
-
|
|
128
|
-
// Collect all chunks written to the speaker stream
|
|
129
|
-
const writtenChunks: Buffer[] = [];
|
|
130
|
-
speakerOutput.on("data", (chunk: Buffer) => {
|
|
131
|
-
writtenChunks.push(Buffer.from(chunk));
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
await withMockFetch(mockResponse, () => player.speak("Hello world"));
|
|
135
|
-
|
|
136
|
-
// Every chunk written to the speaker must be sample-aligned
|
|
137
|
-
const oddChunks = writtenChunks.filter((c) => c.byteLength % BYTES_PER_SAMPLE !== 0);
|
|
138
|
-
|
|
139
|
-
assert.equal(
|
|
140
|
-
oddChunks.length,
|
|
141
|
-
0,
|
|
142
|
-
`${oddChunks.length} of ${writtenChunks.length} chunks written to speaker had odd byte length ` +
|
|
143
|
-
`(${oddChunks.map((c) => c.byteLength).join(", ")} bytes). ` +
|
|
144
|
-
`Odd-length chunks split 16-bit PCM samples, causing hiss in browser playback.`
|
|
145
|
-
);
|
|
146
|
-
});
|
|
147
|
-
|
|
148
|
-
/**
|
|
149
|
-
* Sample alignment must not lose audio data. The total bytes written to the
|
|
150
|
-
* speaker must equal the original PCM size.
|
|
151
|
-
*/
|
|
152
|
-
test("total bytes written to speaker must equal source PCM size", async () => {
|
|
153
|
-
const pcm = generateSineWavePcm(2400); // 0.1s = 4800 bytes
|
|
154
|
-
const speakerOutput = new PassThrough();
|
|
155
|
-
|
|
156
|
-
// Odd splits that would cause byte loss if alignment just truncates
|
|
157
|
-
const mockResponse = createMockResponse(pcm, [1001, 2000, 3333, 4001]);
|
|
158
|
-
|
|
159
|
-
const player = await createElevenlabsTts({
|
|
160
|
-
apiKey: "test-key",
|
|
161
|
-
voiceId: "test-voice",
|
|
162
|
-
modelId: "test-model",
|
|
163
|
-
speakerInput: speakerOutput,
|
|
164
|
-
interruptPlayback: () => {},
|
|
165
|
-
resumePlayback: () => {},
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
const writtenChunks: Buffer[] = [];
|
|
169
|
-
speakerOutput.on("data", (chunk: Buffer) => {
|
|
170
|
-
writtenChunks.push(Buffer.from(chunk));
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
await withMockFetch(mockResponse, () => player.speak("Hello world"));
|
|
174
|
-
|
|
175
|
-
const totalWritten = writtenChunks.reduce((sum, c) => sum + c.byteLength, 0);
|
|
176
|
-
|
|
177
|
-
assert.equal(
|
|
178
|
-
totalWritten,
|
|
179
|
-
pcm.byteLength,
|
|
180
|
-
`Expected ${pcm.byteLength} bytes written to speaker, got ${totalWritten}. ` +
|
|
181
|
-
`Sample alignment must carry over leftover bytes, not drop them.`
|
|
182
|
-
);
|
|
183
|
-
});
|