voicecc 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/.claude-plugin/plugin.json +6 -0
  2. package/README.md +48 -0
  3. package/bin/voicecc.js +39 -0
  4. package/dashboard/dist/assets/index-BXemFrMp.css +1 -0
  5. package/dashboard/dist/assets/index-dAYfRls7.js +11 -0
  6. package/dashboard/dist/audio-processor.js +126 -0
  7. package/dashboard/dist/index.html +13 -0
  8. package/dashboard/routes/auth.ts +119 -0
  9. package/dashboard/routes/browser-call.ts +87 -0
  10. package/dashboard/routes/claude-md.ts +50 -0
  11. package/dashboard/routes/conversations.ts +203 -0
  12. package/dashboard/routes/integrations.ts +154 -0
  13. package/dashboard/routes/mcp-servers.ts +198 -0
  14. package/dashboard/routes/settings.ts +64 -0
  15. package/dashboard/routes/tunnel.ts +66 -0
  16. package/dashboard/routes/twilio.ts +120 -0
  17. package/dashboard/routes/voice.ts +48 -0
  18. package/dashboard/routes/webrtc.ts +85 -0
  19. package/dashboard/server.ts +130 -0
  20. package/dashboard/tsconfig.json +13 -0
  21. package/init/CLAUDE.md +18 -0
  22. package/package.json +59 -0
  23. package/run.ts +68 -0
  24. package/scripts/postinstall.js +228 -0
  25. package/services/browser-call-manager.ts +106 -0
  26. package/services/device-pairing.ts +176 -0
  27. package/services/env.ts +88 -0
  28. package/services/tunnel.ts +204 -0
  29. package/services/twilio-manager.ts +126 -0
  30. package/sidecar/assets/startup.pcm +0 -0
  31. package/sidecar/audio-adapter.ts +60 -0
  32. package/sidecar/audio-capture.ts +220 -0
  33. package/sidecar/browser-audio-playback.test.ts +149 -0
  34. package/sidecar/browser-audio.ts +147 -0
  35. package/sidecar/browser-server.ts +331 -0
  36. package/sidecar/chime.test.ts +69 -0
  37. package/sidecar/chime.ts +54 -0
  38. package/sidecar/claude-session.ts +295 -0
  39. package/sidecar/endpointing.ts +163 -0
  40. package/sidecar/index.ts +83 -0
  41. package/sidecar/local-audio.ts +126 -0
  42. package/sidecar/mic-vpio +0 -0
  43. package/sidecar/mic-vpio.swift +484 -0
  44. package/sidecar/mock-tts-server-tagged.mjs +132 -0
  45. package/sidecar/narration.ts +204 -0
  46. package/sidecar/scripts/generate-startup-audio.py +79 -0
  47. package/sidecar/session-lock.ts +123 -0
  48. package/sidecar/sherpa-onnx-node.d.ts +4 -0
  49. package/sidecar/stt.ts +199 -0
  50. package/sidecar/tts-server.py +193 -0
  51. package/sidecar/tts.ts +481 -0
  52. package/sidecar/twilio-audio.ts +338 -0
  53. package/sidecar/twilio-server.ts +436 -0
  54. package/sidecar/types.ts +210 -0
  55. package/sidecar/vad.ts +101 -0
  56. package/sidecar/voice-loop-bugs.test.ts +522 -0
  57. package/sidecar/voice-session.ts +523 -0
  58. package/skills/voice/SKILL.md +26 -0
  59. package/tsconfig.json +22 -0
@@ -0,0 +1,204 @@
1
+ /**
2
+ * Processes Claude's streaming output into TTS-friendly text.
3
+ *
4
+ * Two modes of operation:
5
+ * - Response mode: passes text_delta content through immediately for streaming
6
+ * TTS. Text is buffered into sentences downstream in the TTS module.
7
+ * - Long-task mode: emits periodic template-based summaries during tool use
8
+ * (e.g. "Running Bash...", "Still working on Bash...").
9
+ *
10
+ * Responsibilities:
11
+ * - Pass through streaming text deltas immediately for low-latency TTS
12
+ * - Track tool execution and emit periodic spoken summaries
13
+ * - Flush remaining text on result/error events
14
+ */
15
+
16
+ import type { ClaudeStreamEvent, NarrationConfig } from "./types.js";
17
+
18
+ /** Strip markdown syntax so text reads naturally when spoken. */
19
+ function stripMarkdown(text: string): string {
20
+ return text
21
+ .replace(/\*+/g, "") // bold/italic asterisks
22
+ .replace(/#+\s*/g, "") // heading markers
23
+ .replace(/`+/g, "") // inline code / code fences
24
+ .replace(/\[([^\]]*)\]\([^)]*\)/g, "$1") // [text](url) → text
25
+ .replace(/^-\s+/gm, "") // unordered list markers
26
+ .replace(/^\d+\.\s+/gm, ""); // ordered list markers
27
+ }
28
+
29
+ // ============================================================================
30
+ // INTERFACES
31
+ // ============================================================================
32
+
33
+ /**
34
+ * Narrator instance that processes Claude stream events into speakable text.
35
+ */
36
+ export interface Narrator {
37
+ /**
38
+ * Process a single Claude stream event and return any text ready to be spoken.
39
+ * @param event - The Claude stream event to process
40
+ * @returns Array of strings to speak (often empty, sometimes 1-2 sentences)
41
+ */
42
+ processEvent(event: ClaudeStreamEvent): string[];
43
+
44
+ /**
45
+ * Flush any remaining buffered text that hasn't been emitted yet.
46
+ * @returns Array of remaining text strings to speak
47
+ */
48
+ flush(): string[];
49
+
50
+ /**
51
+ * Reset all internal state for a new conversation turn.
52
+ */
53
+ reset(): void;
54
+ }
55
+
56
+ // ============================================================================
57
+ // MAIN HANDLERS
58
+ // ============================================================================
59
+
60
+ /**
61
+ * Create a new Narrator instance that converts Claude stream events into
62
+ * TTS-friendly sentence chunks.
63
+ * @param config - Narration configuration (summaryIntervalMs controls long-task summary frequency)
64
+ * @returns A Narrator instance
65
+ */
66
+ export function createNarrator(config: NarrationConfig, onEmit?: (text: string) => void): Narrator {
67
+ // -- internal state --
68
+ let currentToolName: string | null = null;
69
+ let summaryTimer: NodeJS.Timeout | null = null;
70
+ let inLongTask = false;
71
+
72
+ /**
73
+ * Process a single Claude stream event.
74
+ * @param event - The streaming event from Claude
75
+ * @returns Array of strings to speak
76
+ */
77
+ function processEvent(event: ClaudeStreamEvent): string[] {
78
+ switch (event.type) {
79
+ case "text_delta":
80
+ return handleTextDelta(event);
81
+ case "tool_start":
82
+ return handleToolStart(event);
83
+ case "tool_end":
84
+ return handleToolEnd();
85
+ case "result":
86
+ case "error":
87
+ return handleTerminal();
88
+ default:
89
+ return [];
90
+ }
91
+ }
92
+
93
+ /**
94
+ * Flush any remaining text in the buffer.
95
+ * @returns Array of remaining text strings
96
+ */
97
+ function flush(): string[] {
98
+ return [];
99
+ }
100
+
101
+ /**
102
+ * Reset all state for a new conversation turn.
103
+ */
104
+ function reset(): void {
105
+ currentToolName = null;
106
+ clearSummaryTimer();
107
+ inLongTask = false;
108
+ }
109
+
110
+ return { processEvent, flush, reset };
111
+
112
+ // ============================================================================
113
+ // HELPER FUNCTIONS
114
+ // ============================================================================
115
+
116
+ /**
117
+ * Handle a text_delta event: pass through immediately, exit long-task mode.
118
+ * Text chunking for TTS is handled downstream by TextSplitterStream.
119
+ * @param event - The text_delta event
120
+ * @returns Array containing the delta text
121
+ */
122
+ function handleTextDelta(event: ClaudeStreamEvent): string[] {
123
+ // Text arriving means Claude is responding directly -- leave long-task mode
124
+ if (inLongTask) {
125
+ clearSummaryTimer();
126
+ inLongTask = false;
127
+ currentToolName = null;
128
+ }
129
+
130
+ const results: string[] = [];
131
+ if (event.content) {
132
+ const clean = stripMarkdown(event.content);
133
+ if (clean) results.push(clean);
134
+ }
135
+ return results;
136
+ }
137
+
138
+ /**
139
+ * Handle a tool_start event: enter long-task mode, start the summary timer,
140
+ * and emit an initial "Running {toolName}..." message.
141
+ * @param event - The tool_start event (must have toolName)
142
+ * @returns Array containing the initial tool message
143
+ */
144
+ function handleToolStart(event: ClaudeStreamEvent): string[] {
145
+ const toolName = event.toolName ?? "unknown tool";
146
+ currentToolName = toolName;
147
+ inLongTask = true;
148
+
149
+ // Clear any existing timer before starting a new one
150
+ clearSummaryTimer();
151
+ startSummaryTimer();
152
+
153
+ return [`Running ${toolName}...`];
154
+ }
155
+
156
+ /**
157
+ * Handle a tool_end event: clear current tool context but stay in long-task
158
+ * mode since more tools might follow.
159
+ * @returns Empty array
160
+ */
161
+ function handleToolEnd(): string[] {
162
+ currentToolName = null;
163
+ return [];
164
+ }
165
+
166
+ /**
167
+ * Handle result or error events: flush all remaining text and reset state.
168
+ * @returns Array of any remaining text
169
+ */
170
+ function handleTerminal(): string[] {
171
+ const remaining = flush();
172
+
173
+ // Full reset for next turn
174
+ clearSummaryTimer();
175
+ currentToolName = null;
176
+ inLongTask = false;
177
+
178
+ return remaining;
179
+ }
180
+
181
+ /**
182
+ * Start the periodic summary timer for long-task mode.
183
+ * Emits "Still working on {toolName}..." at the configured interval.
184
+ */
185
+ function startSummaryTimer(): void {
186
+ summaryTimer = setInterval(() => {
187
+ const name = currentToolName ?? "the task";
188
+ const summary = `Still working on ${name}...`;
189
+ if (onEmit) {
190
+ onEmit(summary);
191
+ }
192
+ }, config.summaryIntervalMs);
193
+ }
194
+
195
+ /**
196
+ * Clear the summary timer if one is active.
197
+ */
198
+ function clearSummaryTimer(): void {
199
+ if (summaryTimer !== null) {
200
+ clearInterval(summaryTimer);
201
+ summaryTimer = null;
202
+ }
203
+ }
204
+ }
@@ -0,0 +1,79 @@
1
+ """
2
+ One-time script to generate the startup audio greeting.
3
+
4
+ Uses mlx_audio's Kokoro model (same API as tts-server.py) to synthesize a short
5
+ spoken greeting and writes it as raw 24kHz 16-bit signed mono PCM to
6
+ sidecar/assets/startup.pcm.
7
+
8
+ Usage:
9
+ cd sidecar
10
+ .venv/bin/python3 scripts/generate-startup-audio.py
11
+ """
12
+
13
+ import os
14
+ import sys
15
+ import numpy as np
16
+
17
+ # ============================================================================
18
+ # CONSTANTS
19
+ # ============================================================================
20
+
21
+ MODEL_ID = "prince-canuma/Kokoro-82M"
22
+ VOICE = "af_heart"
23
+ STARTUP_TEXT = "Hi there! I'm Voice CC. How can I help you today?"
24
+ OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "..", "assets")
25
+ OUTPUT_FILE = os.path.join(OUTPUT_DIR, "startup.pcm")
26
+
27
+ # ============================================================================
28
+ # MAIN ENTRYPOINT
29
+ # ============================================================================
30
+
31
+ def main():
32
+ """Load the Kokoro model, generate startup audio, and save as raw PCM."""
33
+ from mlx_audio.tts.utils import load_model
34
+
35
+ print(f"Loading model: {MODEL_ID}")
36
+ model = load_model(MODEL_ID)
37
+ print(f"Model loaded (sample_rate={model.sample_rate})")
38
+
39
+ print(f"Generating: \"{STARTUP_TEXT}\"")
40
+ chunks = []
41
+ try:
42
+ for result in model.generate(text=STARTUP_TEXT, voice=VOICE, stream=True):
43
+ audio = np.array(result.audio, copy=False)
44
+ chunks.append(audio)
45
+ print(f" chunk {len(chunks)}: {audio.shape}")
46
+ except Exception as e:
47
+ print(f"ERROR during generation: {e}", file=sys.stderr)
48
+ import traceback
49
+ traceback.print_exc()
50
+ sys.exit(1)
51
+
52
+ combined = np.concatenate(chunks)
53
+ pcm = float32_to_int16_pcm(combined)
54
+
55
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
56
+ with open(OUTPUT_FILE, "wb") as f:
57
+ f.write(pcm)
58
+
59
+ duration_s = len(combined) / model.sample_rate
60
+ print(f"Wrote {len(pcm)} bytes ({duration_s:.1f}s) to {OUTPUT_FILE}")
61
+
62
+ # ============================================================================
63
+ # HELPER FUNCTIONS
64
+ # ============================================================================
65
+
66
+ def float32_to_int16_pcm(audio: np.ndarray) -> bytes:
67
+ """
68
+ Convert float32 audio samples (-1.0..1.0) to 16-bit signed PCM bytes.
69
+
70
+ @param audio - numpy array of float32 samples
71
+ @returns Raw bytes of int16 little-endian PCM
72
+ """
73
+ clamped = np.clip(audio, -1.0, 1.0)
74
+ int16 = (clamped * 32767).astype(np.int16)
75
+ return int16.tobytes()
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Cross-process session limiter using PID-based lock files.
3
+ *
4
+ * Ensures the total number of active voice sessions (local mic + Twilio combined)
5
+ * does not exceed MAX_CONCURRENT_SESSIONS. Stale lock files from crashed processes
6
+ * are automatically cleaned up on every acquire.
7
+ *
8
+ * Responsibilities:
9
+ * - Acquire a session slot by creating a PID lock file in ~/.claude-voice-sessions/
10
+ * - Validate existing lock files by checking if their PIDs are still alive
11
+ * - Clean up stale lock files from dead processes
12
+ * - Release the lock file on session stop or process exit
13
+ */
14
+
15
+ import { mkdirSync, readdirSync, readFileSync, writeFileSync, unlinkSync } from "fs";
16
+ import { join } from "path";
17
+ import { homedir } from "os";
18
+ import { randomUUID } from "crypto";
19
+
20
+ // ============================================================================
21
+ // CONSTANTS
22
+ // ============================================================================
23
+
24
+ /** Directory where PID lock files are stored */
25
+ const LOCK_DIR = join(homedir(), ".claude-voice-sessions");
26
+
27
+ // ============================================================================
28
+ // INTERFACES
29
+ // ============================================================================
30
+
31
+ /**
32
+ * Handle returned by acquireSessionLock. Call release() to free the session slot.
33
+ */
34
+ export interface SessionLock {
35
+ /** Release the session lock (deletes the lock file) */
36
+ release: () => void;
37
+ }
38
+
39
+ // ============================================================================
40
+ // MAIN HANDLERS
41
+ // ============================================================================
42
+
43
+ /**
44
+ * Acquire a session lock slot. Throws if the maximum number of concurrent
45
+ * sessions has been reached.
46
+ *
47
+ * Cleans up stale lock files (dead PIDs) on every call. Creates a new lock
48
+ * file containing the current PID. Registers a process.on('exit') handler
49
+ * as a safety net to release on shutdown.
50
+ *
51
+ * @param maxSessions - Maximum number of concurrent sessions allowed
52
+ * @returns A SessionLock handle with a release() method
53
+ * @throws Error if maxSessions has been reached
54
+ */
55
+ export function acquireSessionLock(maxSessions: number): SessionLock {
56
+ // Ensure lock directory exists
57
+ mkdirSync(LOCK_DIR, { recursive: true });
58
+
59
+ // List existing lock files and validate their PIDs
60
+ const files = readdirSync(LOCK_DIR).filter((f) => f.endsWith(".lock"));
61
+ let activeCount = 0;
62
+
63
+ for (const file of files) {
64
+ const filePath = join(LOCK_DIR, file);
65
+ try {
66
+ const pid = parseInt(readFileSync(filePath, "utf-8").trim(), 10);
67
+ if (isNaN(pid) || !isProcessAlive(pid)) {
68
+ // Stale lock file -- process is dead, clean it up
69
+ unlinkSync(filePath);
70
+ } else {
71
+ activeCount++;
72
+ }
73
+ } catch {
74
+ // File disappeared between readdir and read, or parse error -- skip
75
+ try { unlinkSync(filePath); } catch { /* already gone */ }
76
+ }
77
+ }
78
+
79
+ if (activeCount >= maxSessions) {
80
+ throw new Error(
81
+ `Session limit reached (${activeCount}/${maxSessions}). ` +
82
+ `Cannot start another voice session.`
83
+ );
84
+ }
85
+
86
+ // Create a new lock file with the current PID
87
+ const lockFile = join(LOCK_DIR, `${randomUUID()}.lock`);
88
+ writeFileSync(lockFile, String(process.pid), "utf-8");
89
+
90
+ let released = false;
91
+
92
+ /** Delete the lock file if it hasn't been released yet */
93
+ function release(): void {
94
+ if (released) return;
95
+ released = true;
96
+ try { unlinkSync(lockFile); } catch { /* already gone */ }
97
+ }
98
+
99
+ // Safety net: release on process exit
100
+ process.on("exit", release);
101
+
102
+ return { release };
103
+ }
104
+
105
+ // ============================================================================
106
+ // HELPER FUNCTIONS
107
+ // ============================================================================
108
+
109
+ /**
110
+ * Check if a process with the given PID is still alive.
111
+ * Uses signal 0 which does not kill the process -- it only checks existence.
112
+ *
113
+ * @param pid - The process ID to check
114
+ * @returns true if the process is alive, false otherwise
115
+ */
116
+ export function isProcessAlive(pid: number): boolean {
117
+ try {
118
+ process.kill(pid, 0);
119
+ return true;
120
+ } catch {
121
+ return false;
122
+ }
123
+ }
@@ -0,0 +1,4 @@
1
+ declare module "sherpa-onnx-node" {
2
+ const sherpa: any;
3
+ export default sherpa;
4
+ }
package/sidecar/stt.ts ADDED
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Local speech-to-text via sherpa-onnx with Whisper ONNX model (offline/batch).
3
+ *
4
+ * Whisper models in sherpa-onnx are offline-only (not streaming). Audio is
5
+ * accumulated during speech (SPEECH_START to SPEECH_END), then batch-transcribed
6
+ * on SPEECH_END using `createOfflineRecognizer`.
7
+ *
8
+ * Responsibilities:
9
+ * - Load the sherpa-onnx offline recognizer with a Whisper ONNX model
10
+ * - Accumulate audio samples during speech into an internal buffer
11
+ * - Batch-transcribe the accumulated buffer on demand
12
+ * - Manage buffer and recognizer lifecycle
13
+ */
14
+
15
+ import { existsSync } from "fs";
16
+ import type { TranscriptionResult } from "./types.js";
17
+
18
+ // ============================================================================
19
+ // INTERFACES
20
+ // ============================================================================
21
+
22
+ /** Internal interface for the STT processor returned by createStt. */
23
+ interface SttProcessor {
24
+ /**
25
+ * Appends audio samples to the internal buffer.
26
+ * Call continuously during speech (between SPEECH_START and SPEECH_END).
27
+ *
28
+ * @param samples - Float32Array of audio samples (16kHz, normalized -1.0 to 1.0)
29
+ */
30
+ accumulate(samples: Float32Array): void;
31
+
32
+ /**
33
+ * Batch-transcribes the accumulated audio buffer using the offline recognizer.
34
+ * Creates an offline stream, feeds the accumulated audio, decodes, and returns
35
+ * the result. Clears the buffer afterward.
36
+ *
37
+ * @returns Transcription result with text, isFinal flag, and timestamp
38
+ */
39
+ transcribe(): Promise<TranscriptionResult>;
40
+
41
+ /**
42
+ * Clears the accumulated audio buffer without transcribing.
43
+ * Use on interruption or when discarding a speech segment.
44
+ */
45
+ clearBuffer(): void;
46
+
47
+ /**
48
+ * Frees the underlying recognizer resources.
49
+ * Call on shutdown to prevent resource leaks.
50
+ */
51
+ destroy(): void;
52
+ }
53
+
54
+ // ============================================================================
55
+ // CONSTANTS
56
+ // ============================================================================
57
+
58
+ /** Sample rate expected by the Whisper model */
59
+ const SAMPLE_RATE = 16000;
60
+
61
+ /** Default model file prefix (sherpa-onnx naming convention: "small.en", "tiny.en", etc.) */
62
+ const DEFAULT_MODEL_PREFIX = "small.en";
63
+
64
+ /** Required model file suffixes within the model directory */
65
+ const REQUIRED_SUFFIXES = ["-encoder.int8.onnx", "-decoder.int8.onnx", "-tokens.txt"];
66
+
67
+ // ============================================================================
68
+ // MAIN HANDLERS
69
+ // ============================================================================
70
+
71
+ /**
72
+ * Loads the sherpa-onnx offline recognizer with the Whisper model at the given
73
+ * path and returns an SttProcessor.
74
+ *
75
+ * @param modelPath - Path to directory containing encoder.onnx, decoder.onnx, and tokens.txt
76
+ * @returns Promise resolving to an SttProcessor instance
77
+ * @throws Error if any required model files are missing
78
+ */
79
+ async function createStt(modelPath: string): Promise<SttProcessor> {
80
+ validateModelFiles(modelPath);
81
+
82
+ // Dynamic import to avoid ONNX runtime conflict with kokoro-js.
83
+ // Both sherpa-onnx-node and kokoro-js bundle native ONNX runtimes that
84
+ // crash if loaded simultaneously via static imports.
85
+ const sherpa = (await import("sherpa-onnx-node")).default;
86
+
87
+ const prefix = DEFAULT_MODEL_PREFIX;
88
+ const config = {
89
+ modelConfig: {
90
+ whisper: {
91
+ encoder: `${modelPath}/${prefix}-encoder.int8.onnx`,
92
+ decoder: `${modelPath}/${prefix}-decoder.int8.onnx`,
93
+ },
94
+ tokens: `${modelPath}/${prefix}-tokens.txt`,
95
+ },
96
+ };
97
+
98
+ const recognizer = new sherpa.OfflineRecognizer(config);
99
+
100
+ // Buffer stored as array of chunks to avoid repeated copying during accumulation
101
+ let audioChunks: Float32Array[] = [];
102
+
103
+ return {
104
+ accumulate(samples: Float32Array): void {
105
+ audioChunks.push(samples);
106
+ },
107
+
108
+ async transcribe(): Promise<TranscriptionResult> {
109
+ const combinedSamples = concatenateChunks(audioChunks);
110
+ audioChunks = [];
111
+
112
+ if (combinedSamples.length === 0) {
113
+ return { text: "", isFinal: true, timestamp: Date.now() };
114
+ }
115
+
116
+ // Create a fresh stream, feed audio, decode
117
+ const stream = recognizer.createStream();
118
+ stream.acceptWaveform({ sampleRate: SAMPLE_RATE, samples: combinedSamples });
119
+ recognizer.decode(stream);
120
+
121
+ const result = recognizer.getResult(stream);
122
+ const text = result.text.trim();
123
+
124
+ return { text, isFinal: true, timestamp: Date.now() };
125
+ },
126
+
127
+ clearBuffer(): void {
128
+ audioChunks = [];
129
+ },
130
+
131
+ destroy(): void {
132
+ audioChunks = [];
133
+ // recognizer cleanup is handled by sherpa-onnx-node garbage collection
134
+ },
135
+ };
136
+ }
137
+
138
+ // ============================================================================
139
+ // HELPER FUNCTIONS
140
+ // ============================================================================
141
+
142
+ /**
143
+ * Validates that all required model files exist in the given directory.
144
+ *
145
+ * @param modelPath - Path to the model directory
146
+ * @throws Error with details about which files are missing
147
+ */
148
+ function validateModelFiles(modelPath: string): void {
149
+ if (!existsSync(modelPath)) {
150
+ throw new Error(
151
+ `STT model directory not found: ${modelPath}. ` +
152
+ `Download a Whisper ONNX model and place encoder.onnx, decoder.onnx, and tokens.txt in this directory.`
153
+ );
154
+ }
155
+
156
+ const expectedFiles = REQUIRED_SUFFIXES.map((suffix) => `${DEFAULT_MODEL_PREFIX}${suffix}`);
157
+ const missingFiles = expectedFiles.filter(
158
+ (file) => !existsSync(`${modelPath}/${file}`)
159
+ );
160
+
161
+ if (missingFiles.length > 0) {
162
+ throw new Error(
163
+ `Missing STT model files in ${modelPath}: ${missingFiles.join(", ")}. ` +
164
+ `Required files: ${expectedFiles.join(", ")}.`
165
+ );
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Concatenates an array of Float32Array chunks into a single Float32Array.
171
+ * Avoids repeated copying during accumulation by deferring concatenation
172
+ * until transcription time.
173
+ *
174
+ * @param chunks - Array of Float32Array audio chunks
175
+ * @returns Single concatenated Float32Array
176
+ */
177
+ function concatenateChunks(chunks: Float32Array[]): Float32Array {
178
+ if (chunks.length === 0) {
179
+ return new Float32Array(0);
180
+ }
181
+
182
+ if (chunks.length === 1) {
183
+ return chunks[0];
184
+ }
185
+
186
+ const totalLength = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
187
+ const result = new Float32Array(totalLength);
188
+
189
+ let offset = 0;
190
+ for (const chunk of chunks) {
191
+ result.set(chunk, offset);
192
+ offset += chunk.length;
193
+ }
194
+
195
+ return result;
196
+ }
197
+
198
+ export { createStt };
199
+ export type { SttProcessor };