voicecc 1.1.36 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/bin/voicecc.js +94 -1
  2. package/dashboard/dist/assets/index-DCeOdulF.js +28 -0
  3. package/dashboard/dist/index.html +1 -1
  4. package/dashboard/routes/agents.ts +28 -8
  5. package/dashboard/routes/browser-call.ts +3 -2
  6. package/dashboard/routes/chat.ts +75 -55
  7. package/dashboard/routes/providers.ts +5 -74
  8. package/dashboard/routes/twilio.ts +104 -5
  9. package/dashboard/routes/voice.ts +98 -0
  10. package/dashboard/server.ts +48 -1
  11. package/package.json +2 -3
  12. package/server/index.ts +96 -8
  13. package/server/services/twilio-manager.ts +29 -10
  14. package/dashboard/dist/assets/index-C62C9Gp0.js +0 -28
  15. package/dashboard/dist/audio-processor.js +0 -126
  16. package/server/services/heartbeat.ts +0 -403
  17. package/server/voice/assets/chime.wav +0 -0
  18. package/server/voice/assets/startup.pcm +0 -0
  19. package/server/voice/audio-adapter.ts +0 -60
  20. package/server/voice/audio-inactivity.test.ts +0 -108
  21. package/server/voice/audio-inactivity.ts +0 -91
  22. package/server/voice/browser-audio-playback.test.ts +0 -149
  23. package/server/voice/browser-audio.ts +0 -147
  24. package/server/voice/browser-server.ts +0 -311
  25. package/server/voice/chat-server.ts +0 -236
  26. package/server/voice/chime.test.ts +0 -69
  27. package/server/voice/chime.ts +0 -36
  28. package/server/voice/claude-session.ts +0 -293
  29. package/server/voice/endpointing.ts +0 -163
  30. package/server/voice/mic-vpio +0 -0
  31. package/server/voice/narration.ts +0 -204
  32. package/server/voice/prompt-builder.ts +0 -108
  33. package/server/voice/session-lock.ts +0 -123
  34. package/server/voice/stt-elevenlabs.ts +0 -210
  35. package/server/voice/stt-provider.ts +0 -106
  36. package/server/voice/tts-elevenlabs-hiss.test.ts +0 -183
  37. package/server/voice/tts-elevenlabs.ts +0 -397
  38. package/server/voice/tts-provider.ts +0 -155
  39. package/server/voice/twilio-audio.ts +0 -338
  40. package/server/voice/twilio-server.ts +0 -540
  41. package/server/voice/types.ts +0 -282
  42. package/server/voice/vad.ts +0 -101
  43. package/server/voice/voice-loop-bugs.test.ts +0 -348
  44. package/server/voice/voice-server.ts +0 -129
  45. package/server/voice/voice-session.ts +0 -539
@@ -1,210 +0,0 @@
1
- /**
2
- * ElevenLabs STT provider via batch transcription API (Scribe v2).
3
- *
4
- * Accumulates audio samples locally during speech, then sends the full buffer
5
- * to the ElevenLabs speech-to-text API on transcribe(). Audio is encoded as a
6
- * WAV file (16kHz mono 16-bit PCM) before upload.
7
- *
8
- * Responsibilities:
9
- * - Accumulate Float32Array audio chunks during speech
10
- * - Encode accumulated audio as a WAV file for upload
11
- * - POST the WAV to the ElevenLabs batch STT API via multipart/form-data
12
- * - Parse the JSON response and return a TranscriptionResult
13
- * - Clear the buffer after transcription or on demand
14
- */
15
-
16
- import type { SttProcessor, TranscriptionResult } from "./types.js";
17
-
18
- // ============================================================================
19
- // CONSTANTS
20
- // ============================================================================
21
-
22
- /** ElevenLabs STT API endpoint */
23
- const ELEVENLABS_STT_URL = "https://api.elevenlabs.io/v1/speech-to-text";
24
-
25
- /** Sample rate for the WAV file (must match input audio from microphone) */
26
- const WAV_SAMPLE_RATE = 16000;
27
-
28
- /** Number of audio channels */
29
- const WAV_CHANNELS = 1;
30
-
31
- /** Bits per sample in the WAV file */
32
- const WAV_BIT_DEPTH = 16;
33
-
34
- /** Size of the WAV file header in bytes */
35
- const WAV_HEADER_SIZE = 44;
36
-
37
- // ============================================================================
38
- // INTERFACES
39
- // ============================================================================
40
-
41
- /**
42
- * Configuration for the ElevenLabs STT provider.
43
- */
44
- export interface ElevenlabsSttConfig {
45
- /** ElevenLabs API key for authentication */
46
- apiKey: string;
47
- /** ElevenLabs STT model ID (e.g. "scribe_v1") */
48
- modelId: string;
49
- }
50
-
51
- // ============================================================================
52
- // MAIN HANDLERS
53
- // ============================================================================
54
-
55
- /**
56
- * Create an SttProcessor that uses the ElevenLabs batch transcription API.
57
- *
58
- * Audio is accumulated locally via accumulate(), then the full buffer is
59
- * encoded as WAV and sent to ElevenLabs on transcribe(). This fits the
60
- * existing SttProcessor interface without changes.
61
- *
62
- * @param config - ElevenLabs STT configuration (API key and model ID)
63
- * @returns An SttProcessor instance ready for transcription
64
- */
65
- export async function createElevenlabsStt(config: ElevenlabsSttConfig): Promise<SttProcessor> {
66
- const { apiKey, modelId } = config;
67
-
68
- let audioChunks: Float32Array[] = [];
69
-
70
- /**
71
- * Append audio samples to the internal buffer.
72
- * @param samples - Float32Array of audio samples (16kHz, normalized -1.0 to 1.0)
73
- */
74
- function accumulate(samples: Float32Array): void {
75
- audioChunks.push(samples);
76
- }
77
-
78
- /**
79
- * Transcribe the accumulated audio buffer by sending it to the ElevenLabs API.
80
- * Encodes the audio as WAV, uploads via multipart/form-data, and parses the result.
81
- *
82
- * @returns Transcription result with text, isFinal flag, and timestamp
83
- * @throws Error on empty buffer, non-2xx response, or network failure
84
- */
85
- async function transcribe(): Promise<TranscriptionResult> {
86
- const combinedSamples = concatenateChunks(audioChunks);
87
- audioChunks = [];
88
-
89
- if (combinedSamples.length === 0) {
90
- return { text: "", isFinal: true, timestamp: Date.now() };
91
- }
92
-
93
- // Encode audio as WAV and upload to ElevenLabs
94
- const wavBuffer = encodeWav(combinedSamples);
95
- const wavBlob = new Blob([new Uint8Array(wavBuffer)], { type: "audio/wav" });
96
-
97
- const formData = new FormData();
98
- formData.append("file", wavBlob, "audio.wav");
99
- formData.append("model_id", modelId);
100
-
101
- const response = await fetch(ELEVENLABS_STT_URL, {
102
- method: "POST",
103
- headers: {
104
- "xi-api-key": apiKey,
105
- },
106
- body: formData,
107
- });
108
-
109
- if (!response.ok) {
110
- const errorText = await response.text().catch(() => "unknown error");
111
- throw new Error(`ElevenLabs STT API error ${response.status}: ${errorText}`);
112
- }
113
-
114
- const result = await response.json() as { text: string };
115
-
116
- return { text: result.text.trim(), isFinal: true, timestamp: Date.now() };
117
- }
118
-
119
- /**
120
- * Clear the accumulated audio buffer without transcribing.
121
- */
122
- function clearBuffer(): void {
123
- audioChunks = [];
124
- }
125
-
126
- /**
127
- * Free resources. Clears the buffer (no external resources to release).
128
- */
129
- function destroy(): void {
130
- audioChunks = [];
131
- }
132
-
133
- return {
134
- accumulate,
135
- transcribe,
136
- clearBuffer,
137
- destroy,
138
- };
139
- }
140
-
141
- // ============================================================================
142
- // HELPER FUNCTIONS
143
- // ============================================================================
144
-
145
- /**
146
- * Encode Float32Array audio samples as a WAV file buffer.
147
- * Writes a 44-byte WAV header followed by 16-bit signed PCM data.
148
- *
149
- * @param samples - Float32Array of audio samples (normalized -1.0 to 1.0)
150
- * @returns Buffer containing a valid WAV file
151
- */
152
- function encodeWav(samples: Float32Array): Buffer {
153
- const bytesPerSample = WAV_BIT_DEPTH / 8;
154
- const dataSize = samples.length * bytesPerSample;
155
- const fileSize = WAV_HEADER_SIZE + dataSize;
156
-
157
- const buffer = Buffer.alloc(fileSize);
158
- let offset = 0;
159
-
160
- // RIFF header
161
- buffer.write("RIFF", offset); offset += 4;
162
- buffer.writeUInt32LE(fileSize - 8, offset); offset += 4;
163
- buffer.write("WAVE", offset); offset += 4;
164
-
165
- // fmt sub-chunk
166
- buffer.write("fmt ", offset); offset += 4;
167
- buffer.writeUInt32LE(16, offset); offset += 4; // Sub-chunk size (16 for PCM)
168
- buffer.writeUInt16LE(1, offset); offset += 2; // Audio format (1 = PCM)
169
- buffer.writeUInt16LE(WAV_CHANNELS, offset); offset += 2; // Number of channels
170
- buffer.writeUInt32LE(WAV_SAMPLE_RATE, offset); offset += 4; // Sample rate
171
- buffer.writeUInt32LE(WAV_SAMPLE_RATE * WAV_CHANNELS * bytesPerSample, offset); offset += 4; // Byte rate
172
- buffer.writeUInt16LE(WAV_CHANNELS * bytesPerSample, offset); offset += 2; // Block align
173
- buffer.writeUInt16LE(WAV_BIT_DEPTH, offset); offset += 2; // Bits per sample
174
-
175
- // data sub-chunk
176
- buffer.write("data", offset); offset += 4;
177
- buffer.writeUInt32LE(dataSize, offset); offset += 4;
178
-
179
- // Convert float samples to 16-bit signed PCM
180
- for (let i = 0; i < samples.length; i++) {
181
- const clamped = Math.max(-1, Math.min(1, samples[i]));
182
- const int16 = clamped < 0 ? clamped * 0x8000 : clamped * 0x7FFF;
183
- buffer.writeInt16LE(Math.round(int16), offset);
184
- offset += 2;
185
- }
186
-
187
- return buffer;
188
- }
189
-
190
- /**
191
- * Concatenate an array of Float32Array chunks into a single Float32Array.
192
- *
193
- * @param chunks - Array of Float32Array audio chunks
194
- * @returns Single concatenated Float32Array
195
- */
196
- function concatenateChunks(chunks: Float32Array[]): Float32Array {
197
- if (chunks.length === 0) return new Float32Array(0);
198
- if (chunks.length === 1) return chunks[0];
199
-
200
- const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
201
- const result = new Float32Array(totalLength);
202
-
203
- let offset = 0;
204
- for (const chunk of chunks) {
205
- result.set(chunk, offset);
206
- offset += chunk.length;
207
- }
208
-
209
- return result;
210
- }
@@ -1,106 +0,0 @@
1
- /**
2
- * STT provider factory and readiness checks.
3
- *
4
- * Routes STT creation to the ElevenLabs provider implementation.
5
- * Checks provider readiness (API keys) for dashboard status.
6
- *
7
- * Responsibilities:
8
- * - Create an SttProcessor for the configured provider
9
- * - Check provider readiness (API keys set)
10
- * - Provide static metadata about available STT providers
11
- */
12
-
13
- import { createElevenlabsStt } from "./stt-elevenlabs.js";
14
- import { readEnv } from "../services/env.js";
15
-
16
- import type { SttProcessor, SttProviderType, SttProviderConfig, ProviderStatus } from "./types.js";
17
-
18
- // ============================================================================
19
- // INTERFACES
20
- // ============================================================================
21
-
22
- /**
23
- * Metadata about an STT provider for display in the dashboard.
24
- */
25
- export interface SttProviderInfo {
26
- /** Provider type identifier */
27
- type: SttProviderType;
28
- /** Human-readable provider name */
29
- name: string;
30
- /** Short description of the provider */
31
- description: string;
32
- /** Environment variable name for the API key (undefined = no key needed) */
33
- requiresApiKey?: string;
34
- }
35
-
36
- /**
37
- * Options for creating an STT processor via the provider factory.
38
- */
39
- export interface CreateSttOptions {
40
- /** Provider configuration (which provider + per-provider settings) */
41
- providerConfig: SttProviderConfig;
42
- }
43
-
44
- // ============================================================================
45
- // MAIN HANDLERS
46
- // ============================================================================
47
-
48
- /**
49
- * Create an SttProcessor for the configured provider.
50
- *
51
- * @param options - Provider config with per-provider settings
52
- * @returns An SttProcessor instance ready for transcription
53
- * @throws Error if the provider is not implemented
54
- */
55
- export async function createSttForProvider(options: CreateSttOptions): Promise<SttProcessor> {
56
- const { providerConfig } = options;
57
-
58
- switch (providerConfig.provider) {
59
- case "elevenlabs":
60
- return createElevenlabsStt({
61
- apiKey: providerConfig.elevenlabs.apiKey,
62
- modelId: providerConfig.elevenlabs.modelId,
63
- });
64
-
65
- default:
66
- throw new Error(`Unknown STT provider: ${providerConfig.provider}`);
67
- }
68
- }
69
-
70
- /**
71
- * Check whether an STT provider is ready to use.
72
- * Checks ELEVENLABS_API_KEY is set in .env.
73
- *
74
- * @param providerType - The provider to check
75
- * @returns Readiness status with reason if not ready
76
- */
77
- export async function getSttProviderStatus(providerType: SttProviderType): Promise<ProviderStatus> {
78
- switch (providerType) {
79
- case "elevenlabs": {
80
- const env = await readEnv();
81
- if (!env.ELEVENLABS_API_KEY) {
82
- return { ready: false, reason: "missing_api_key", detail: "ELEVENLABS_API_KEY is not set in .env" };
83
- }
84
- return { ready: true };
85
- }
86
-
87
- default:
88
- throw new Error(`Unknown STT provider: ${providerType}`);
89
- }
90
- }
91
-
92
- /**
93
- * Get the list of all known STT providers with metadata.
94
- *
95
- * @returns Static array of STT provider info
96
- */
97
- export function getAvailableSttProviders(): SttProviderInfo[] {
98
- return [
99
- {
100
- type: "elevenlabs",
101
- name: "ElevenLabs Scribe",
102
- description: "Cloud STT via ElevenLabs batch transcription API",
103
- requiresApiKey: "ELEVENLABS_API_KEY",
104
- },
105
- ];
106
- }
@@ -1,183 +0,0 @@
1
- /**
2
- * Tests that ElevenLabs TTS writes sample-aligned PCM to the speaker stream.
3
- *
4
- * ElevenLabs streams raw PCM (16-bit, 24kHz mono) over HTTP. The fetch
5
- * response body yields chunks at arbitrary byte boundaries (TCP packets).
6
- * Each chunk is written to the speaker stream, which for the browser path
7
- * becomes a separate WebSocket message. The browser interprets each message
8
- * as Int16Array -- if a chunk has an odd byte count, a sample is split and
9
- * all subsequent audio is corrupted (hiss/static).
10
- *
11
- * Run: npx tsx --test server/voice/tts-elevenlabs-hiss.test.ts
12
- */
13
-
14
- import { test } from "node:test";
15
- import { strict as assert } from "node:assert";
16
- import { PassThrough } from "stream";
17
-
18
- import { createElevenlabsTts } from "./tts-elevenlabs.js";
19
-
20
- // ============================================================================
21
- // CONSTANTS
22
- // ============================================================================
23
-
24
- /** Sample rate of ElevenLabs PCM output */
25
- const SAMPLE_RATE = 24000;
26
-
27
- /** Bytes per sample (16-bit) */
28
- const BYTES_PER_SAMPLE = 2;
29
-
30
- // ============================================================================
31
- // HELPERS
32
- // ============================================================================
33
-
34
- /**
35
- * Generate a buffer of raw 16-bit signed LE PCM (440Hz sine wave).
36
- *
37
- * @param sampleCount - Number of samples to generate
38
- * @returns Buffer of int16 LE PCM
39
- */
40
- function generateSineWavePcm(sampleCount: number): Buffer {
41
- const buf = Buffer.alloc(sampleCount * BYTES_PER_SAMPLE);
42
-
43
- for (let i = 0; i < sampleCount; i++) {
44
- const t = i / SAMPLE_RATE;
45
- const int16 = Math.round(Math.sin(2 * Math.PI * 440 * t) * 32767);
46
- buf.writeInt16LE(int16, i * BYTES_PER_SAMPLE);
47
- }
48
-
49
- return buf;
50
- }
51
-
52
- /**
53
- * Create a mock fetch Response whose body streams the given PCM buffer
54
- * split into chunks at the specified byte offsets (simulating arbitrary
55
- * HTTP chunked transfer boundaries).
56
- *
57
- * @param pcm - Full PCM buffer to stream
58
- * @param splitOffsets - Byte offsets at which to split (e.g. [1001, 2000])
59
- * @returns A Response object with a streaming body
60
- */
61
- function createMockResponse(pcm: Buffer, splitOffsets: number[]): Response {
62
- const chunks: Uint8Array[] = [];
63
- let offset = 0;
64
-
65
- for (const splitAt of splitOffsets) {
66
- if (offset >= pcm.byteLength) break;
67
- chunks.push(new Uint8Array(pcm.subarray(offset, Math.min(splitAt, pcm.byteLength))));
68
- offset = splitAt;
69
- }
70
-
71
- if (offset < pcm.byteLength) {
72
- chunks.push(new Uint8Array(pcm.subarray(offset)));
73
- }
74
-
75
- const stream = new ReadableStream<Uint8Array>({
76
- start(controller) {
77
- for (const chunk of chunks) {
78
- controller.enqueue(chunk);
79
- }
80
- controller.close();
81
- },
82
- });
83
-
84
- return new Response(stream, { status: 200 });
85
- }
86
-
87
- /**
88
- * Stub global fetch to return a mock response, run the callback, then restore.
89
- *
90
- * @param mockResponse - The Response to return from fetch
91
- * @param fn - Async function to run while fetch is stubbed
92
- */
93
- async function withMockFetch(mockResponse: Response, fn: () => Promise<void>): Promise<void> {
94
- const originalFetch = globalThis.fetch;
95
- globalThis.fetch = async () => mockResponse;
96
-
97
- try {
98
- await fn();
99
- } finally {
100
- globalThis.fetch = originalFetch;
101
- }
102
- }
103
-
104
- // ============================================================================
105
- // TESTS
106
- // ============================================================================
107
-
108
- /**
109
- * Every write to the speaker stream must have an even byte count so the
110
- * browser can interpret it as Int16Array without truncating or misaligning.
111
- */
112
- test("ElevenLabs chunks written to speaker must be sample-aligned (even byte count)", async () => {
113
- const pcm = generateSineWavePcm(2400); // 0.1s of audio = 4800 bytes
114
- const speakerOutput = new PassThrough();
115
-
116
- // Split at odd byte offsets to simulate arbitrary HTTP chunk boundaries
117
- const mockResponse = createMockResponse(pcm, [1001, 2000, 3333, 4001]);
118
-
119
- const player = await createElevenlabsTts({
120
- apiKey: "test-key",
121
- voiceId: "test-voice",
122
- modelId: "test-model",
123
- speakerInput: speakerOutput,
124
- interruptPlayback: () => {},
125
- resumePlayback: () => {},
126
- });
127
-
128
- // Collect all chunks written to the speaker stream
129
- const writtenChunks: Buffer[] = [];
130
- speakerOutput.on("data", (chunk: Buffer) => {
131
- writtenChunks.push(Buffer.from(chunk));
132
- });
133
-
134
- await withMockFetch(mockResponse, () => player.speak("Hello world"));
135
-
136
- // Every chunk written to the speaker must be sample-aligned
137
- const oddChunks = writtenChunks.filter((c) => c.byteLength % BYTES_PER_SAMPLE !== 0);
138
-
139
- assert.equal(
140
- oddChunks.length,
141
- 0,
142
- `${oddChunks.length} of ${writtenChunks.length} chunks written to speaker had odd byte length ` +
143
- `(${oddChunks.map((c) => c.byteLength).join(", ")} bytes). ` +
144
- `Odd-length chunks split 16-bit PCM samples, causing hiss in browser playback.`
145
- );
146
- });
147
-
148
- /**
149
- * Sample alignment must not lose audio data. The total bytes written to the
150
- * speaker must equal the original PCM size.
151
- */
152
- test("total bytes written to speaker must equal source PCM size", async () => {
153
- const pcm = generateSineWavePcm(2400); // 0.1s = 4800 bytes
154
- const speakerOutput = new PassThrough();
155
-
156
- // Odd splits that would cause byte loss if alignment just truncates
157
- const mockResponse = createMockResponse(pcm, [1001, 2000, 3333, 4001]);
158
-
159
- const player = await createElevenlabsTts({
160
- apiKey: "test-key",
161
- voiceId: "test-voice",
162
- modelId: "test-model",
163
- speakerInput: speakerOutput,
164
- interruptPlayback: () => {},
165
- resumePlayback: () => {},
166
- });
167
-
168
- const writtenChunks: Buffer[] = [];
169
- speakerOutput.on("data", (chunk: Buffer) => {
170
- writtenChunks.push(Buffer.from(chunk));
171
- });
172
-
173
- await withMockFetch(mockResponse, () => player.speak("Hello world"));
174
-
175
- const totalWritten = writtenChunks.reduce((sum, c) => sum + c.byteLength, 0);
176
-
177
- assert.equal(
178
- totalWritten,
179
- pcm.byteLength,
180
- `Expected ${pcm.byteLength} bytes written to speaker, got ${totalWritten}. ` +
181
- `Sample alignment must carry over leftover bytes, not drop them.`
182
- );
183
- });