voicecc 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/.claude-plugin/plugin.json +6 -0
  2. package/README.md +48 -0
  3. package/bin/voicecc.js +39 -0
  4. package/dashboard/dist/assets/index-BXemFrMp.css +1 -0
  5. package/dashboard/dist/assets/index-dAYfRls7.js +11 -0
  6. package/dashboard/dist/audio-processor.js +126 -0
  7. package/dashboard/dist/index.html +13 -0
  8. package/dashboard/routes/auth.ts +119 -0
  9. package/dashboard/routes/browser-call.ts +87 -0
  10. package/dashboard/routes/claude-md.ts +50 -0
  11. package/dashboard/routes/conversations.ts +203 -0
  12. package/dashboard/routes/integrations.ts +154 -0
  13. package/dashboard/routes/mcp-servers.ts +198 -0
  14. package/dashboard/routes/settings.ts +64 -0
  15. package/dashboard/routes/tunnel.ts +66 -0
  16. package/dashboard/routes/twilio.ts +120 -0
  17. package/dashboard/routes/voice.ts +48 -0
  18. package/dashboard/routes/webrtc.ts +85 -0
  19. package/dashboard/server.ts +130 -0
  20. package/dashboard/tsconfig.json +13 -0
  21. package/init/CLAUDE.md +18 -0
  22. package/package.json +59 -0
  23. package/run.ts +68 -0
  24. package/scripts/postinstall.js +228 -0
  25. package/services/browser-call-manager.ts +106 -0
  26. package/services/device-pairing.ts +176 -0
  27. package/services/env.ts +88 -0
  28. package/services/tunnel.ts +204 -0
  29. package/services/twilio-manager.ts +126 -0
  30. package/sidecar/assets/startup.pcm +0 -0
  31. package/sidecar/audio-adapter.ts +60 -0
  32. package/sidecar/audio-capture.ts +220 -0
  33. package/sidecar/browser-audio-playback.test.ts +149 -0
  34. package/sidecar/browser-audio.ts +147 -0
  35. package/sidecar/browser-server.ts +331 -0
  36. package/sidecar/chime.test.ts +69 -0
  37. package/sidecar/chime.ts +54 -0
  38. package/sidecar/claude-session.ts +295 -0
  39. package/sidecar/endpointing.ts +163 -0
  40. package/sidecar/index.ts +83 -0
  41. package/sidecar/local-audio.ts +126 -0
  42. package/sidecar/mic-vpio +0 -0
  43. package/sidecar/mic-vpio.swift +484 -0
  44. package/sidecar/mock-tts-server-tagged.mjs +132 -0
  45. package/sidecar/narration.ts +204 -0
  46. package/sidecar/scripts/generate-startup-audio.py +79 -0
  47. package/sidecar/session-lock.ts +123 -0
  48. package/sidecar/sherpa-onnx-node.d.ts +4 -0
  49. package/sidecar/stt.ts +199 -0
  50. package/sidecar/tts-server.py +193 -0
  51. package/sidecar/tts.ts +481 -0
  52. package/sidecar/twilio-audio.ts +338 -0
  53. package/sidecar/twilio-server.ts +436 -0
  54. package/sidecar/types.ts +210 -0
  55. package/sidecar/vad.ts +101 -0
  56. package/sidecar/voice-loop-bugs.test.ts +522 -0
  57. package/sidecar/voice-session.ts +523 -0
  58. package/skills/voice/SKILL.md +26 -0
  59. package/tsconfig.json +22 -0
@@ -0,0 +1,484 @@
1
+ /**
2
+ * macOS Voice Processing IO (VPIO) binary for echo-cancelled audio I/O.
3
+ *
4
+ * Uses macOS's built-in acoustic echo cancellation via the VoiceProcessingIO
5
+ * AudioUnit. Routes TTS audio through the VPIO output element so the AEC has
6
+ * a reference signal to subtract from the mic input.
7
+ *
8
+ * VPIO requires the same sample rate on both elements. Internally uses the
9
+ * speaker rate for the AudioUnit, then resamples the mic output to the
10
+ * requested mic rate using AudioConverter before writing to stdout.
11
+ *
12
+ * - stdin: Raw 16-bit signed mono PCM at speakerRate (TTS audio for playback)
13
+ * - stdout: Raw 16-bit signed mono PCM at micRate (echo-cancelled mic audio)
14
+ * - SIGUSR1: Clear playback ring buffer (for interrupting TTS)
15
+ * - SIGTERM: Clean shutdown
16
+ *
17
+ * Usage: mic-vpio <micRate> <speakerRate>
18
+ * micRate: Sample rate for mic output in Hz (e.g. 16000)
19
+ * speakerRate: Sample rate for speaker input in Hz (e.g. 24000)
20
+ */
21
+
22
+ import AudioToolbox
23
+ import Foundation
24
+
25
+ // ============================================================================
26
+ // CONSTANTS
27
+ // ============================================================================
28
+
29
+ let CHANNELS: UInt32 = 1
30
+ let BITS_PER_CHANNEL: UInt32 = 16
31
+ let BYTES_PER_FRAME: Int = 2
32
+
33
+ /// Ring buffer capacity in bytes (~5 seconds at 48kHz mono 16-bit)
34
+ let RING_BUFFER_CAPACITY = 48000 * 2 * 5
35
+
36
+ // ============================================================================
37
+ // GLOBALS
38
+ // ============================================================================
39
+
40
+ /// The VPIO AudioUnit instance (global for use in C callbacks)
41
+ var gAudioUnit: AudioComponentInstance!
42
+
43
+ /// Ring buffer for stdin audio -> speaker output
44
+ var gRingBuffer: UnsafeMutablePointer<UInt8>!
45
+ var gRingCapacity: Int = RING_BUFFER_CAPACITY
46
+ var gRingWritePos: Int = 0
47
+ var gRingReadPos: Int = 0
48
+ var gRingLock = os_unfair_lock()
49
+
50
+ /// Flag set by SIGUSR1 handler, checked by render callback to clear ring buffer
51
+ var gClearRequested: Bool = false
52
+
53
+ /// Flag set by SIGUSR1, cleared by SIGUSR2. When true, stdin reader discards
54
+ /// data instead of writing to ring buffer. This prevents stale pipe data from
55
+ /// re-filling the ring buffer after an interrupt clears it.
56
+ var gDiscardStdin: Bool = false
57
+
58
+ /// AudioConverter for resampling mic from vpioRate to micRate (nil if rates match)
59
+ var gMicConverter: AudioConverterRef?
60
+
61
+ /// The requested mic output rate (stdout)
62
+ var gMicRate: Double = 16000
63
+
64
+ /// The VPIO internal rate (= speaker rate)
65
+ var gVpioRate: Double = 24000
66
+
67
+ /// Temporary buffer for resampled mic output
68
+ var gResampleBuffer: UnsafeMutablePointer<Int16>?
69
+ var gResampleBufferCapacity: Int = 0
70
+
71
+ /// Leftover samples from the converter that haven't been consumed yet
72
+ var gConverterInputBuffer: UnsafeMutablePointer<Int16>?
73
+ var gConverterInputFrames: UInt32 = 0
74
+
75
+ // ============================================================================
76
+ // RING BUFFER
77
+ // ============================================================================
78
+
79
+ func ringAvailable() -> Int {
80
+ return (gRingWritePos - gRingReadPos + gRingCapacity) % gRingCapacity
81
+ }
82
+
83
+ func ringFreeSpace() -> Int {
84
+ return gRingCapacity - 1 - ringAvailable()
85
+ }
86
+
87
+ func ringWrite(_ src: UnsafePointer<UInt8>, count: Int) -> Int {
88
+ let space = ringFreeSpace()
89
+ let toWrite = min(count, space)
90
+ for i in 0..<toWrite {
91
+ gRingBuffer[(gRingWritePos + i) % gRingCapacity] = src[i]
92
+ }
93
+ gRingWritePos = (gRingWritePos + toWrite) % gRingCapacity
94
+ return toWrite
95
+ }
96
+
97
+ func ringRead(_ dst: UnsafeMutablePointer<UInt8>, count: Int) -> Int {
98
+ let avail = ringAvailable()
99
+ let toRead = min(count, avail)
100
+ for i in 0..<toRead {
101
+ dst[i] = gRingBuffer[(gRingReadPos + i) % gRingCapacity]
102
+ }
103
+ gRingReadPos = (gRingReadPos + toRead) % gRingCapacity
104
+ return toRead
105
+ }
106
+
107
+ func ringClear() {
108
+ gRingWritePos = 0
109
+ gRingReadPos = 0
110
+ }
111
+
112
+ // ============================================================================
113
+ // ENTRY POINT
114
+ // ============================================================================
115
+
116
+ setbuf(stdout, nil)
117
+
118
+ let args = CommandLine.arguments
119
+ guard args.count == 3,
120
+ let micRate = Double(args[1]),
121
+ let speakerRate = Double(args[2]) else {
122
+ fputs("Usage: mic-vpio <micRate> <speakerRate>\n", stderr)
123
+ exit(1)
124
+ }
125
+
126
+ gMicRate = micRate
127
+ gVpioRate = speakerRate
128
+
129
+ // Allocate ring buffer
130
+ gRingBuffer = .allocate(capacity: gRingCapacity)
131
+ gRingBuffer.initialize(repeating: 0, count: gRingCapacity)
132
+
133
+ // ============================================================================
134
+ // MIC RESAMPLER (vpioRate -> micRate)
135
+ // ============================================================================
136
+
137
+ let needsResampling = (micRate != speakerRate)
138
+
139
+ if needsResampling {
140
+ var srcFormat = AudioStreamBasicDescription(
141
+ mSampleRate: speakerRate,
142
+ mFormatID: kAudioFormatLinearPCM,
143
+ mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
144
+ mBytesPerPacket: UInt32(BYTES_PER_FRAME),
145
+ mFramesPerPacket: 1,
146
+ mBytesPerFrame: UInt32(BYTES_PER_FRAME),
147
+ mChannelsPerFrame: CHANNELS,
148
+ mBitsPerChannel: BITS_PER_CHANNEL,
149
+ mReserved: 0
150
+ )
151
+ var dstFormat = AudioStreamBasicDescription(
152
+ mSampleRate: micRate,
153
+ mFormatID: kAudioFormatLinearPCM,
154
+ mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
155
+ mBytesPerPacket: UInt32(BYTES_PER_FRAME),
156
+ mFramesPerPacket: 1,
157
+ mBytesPerFrame: UInt32(BYTES_PER_FRAME),
158
+ mChannelsPerFrame: CHANNELS,
159
+ mBitsPerChannel: BITS_PER_CHANNEL,
160
+ mReserved: 0
161
+ )
162
+
163
+ let converterStatus = AudioConverterNew(&srcFormat, &dstFormat, &gMicConverter)
164
+ guard converterStatus == noErr else {
165
+ fputs("ERROR: Failed to create mic resampler \(speakerRate)Hz -> \(micRate)Hz (status \(converterStatus))\n", stderr)
166
+ exit(1)
167
+ }
168
+
169
+ // Pre-allocate resampling buffer (enough for 4096 output frames)
170
+ gResampleBufferCapacity = 4096
171
+ gResampleBuffer = .allocate(capacity: gResampleBufferCapacity)
172
+ }
173
+
174
+ // ============================================================================
175
+ // VPIO SETUP -- both elements use speakerRate
176
+ // ============================================================================
177
+
178
+ var desc = AudioComponentDescription(
179
+ componentType: kAudioUnitType_Output,
180
+ componentSubType: kAudioUnitSubType_VoiceProcessingIO,
181
+ componentManufacturer: kAudioUnitManufacturer_Apple,
182
+ componentFlags: 0,
183
+ componentFlagsMask: 0
184
+ )
185
+
186
+ guard let component = AudioComponentFindNext(nil, &desc) else {
187
+ fputs("ERROR: Voice Processing IO audio unit not found\n", stderr)
188
+ exit(1)
189
+ }
190
+
191
+ var status = AudioComponentInstanceNew(component, &gAudioUnit)
192
+ guard status == noErr else {
193
+ fputs("ERROR: Failed to create VPIO instance (status \(status))\n", stderr)
194
+ exit(1)
195
+ }
196
+
197
+ // Enable input on element 1 (mic)
198
+ var enableIO: UInt32 = 1
199
+ status = AudioUnitSetProperty(
200
+ gAudioUnit,
201
+ kAudioOutputUnitProperty_EnableIO,
202
+ kAudioUnitScope_Input, 1,
203
+ &enableIO,
204
+ UInt32(MemoryLayout<UInt32>.size)
205
+ )
206
+ guard status == noErr else {
207
+ fputs("ERROR: Failed to enable mic input (status \(status))\n", stderr)
208
+ exit(1)
209
+ }
210
+
211
+ // Single format used for both elements (VPIO requires same rate)
212
+ var vpioFormat = AudioStreamBasicDescription(
213
+ mSampleRate: speakerRate,
214
+ mFormatID: kAudioFormatLinearPCM,
215
+ mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
216
+ mBytesPerPacket: UInt32(BYTES_PER_FRAME),
217
+ mFramesPerPacket: 1,
218
+ mBytesPerFrame: UInt32(BYTES_PER_FRAME),
219
+ mChannelsPerFrame: CHANNELS,
220
+ mBitsPerChannel: BITS_PER_CHANNEL,
221
+ mReserved: 0
222
+ )
223
+
224
+ // Set mic format (output scope of element 1 = what we receive)
225
+ status = AudioUnitSetProperty(
226
+ gAudioUnit,
227
+ kAudioUnitProperty_StreamFormat,
228
+ kAudioUnitScope_Output, 1,
229
+ &vpioFormat,
230
+ UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
231
+ )
232
+ guard status == noErr else {
233
+ fputs("ERROR: Failed to set mic format (status \(status))\n", stderr)
234
+ exit(1)
235
+ }
236
+
237
+ // Set speaker format (input scope of element 0 = what we feed)
238
+ status = AudioUnitSetProperty(
239
+ gAudioUnit,
240
+ kAudioUnitProperty_StreamFormat,
241
+ kAudioUnitScope_Input, 0,
242
+ &vpioFormat,
243
+ UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
244
+ )
245
+ guard status == noErr else {
246
+ fputs("ERROR: Failed to set speaker format (status \(status))\n", stderr)
247
+ exit(1)
248
+ }
249
+
250
+ // ============================================================================
251
+ // INPUT CALLBACK (echo-cancelled mic -> resample -> stdout)
252
+ // ============================================================================
253
+
254
+ /// AudioConverter data supplier callback for mic resampling.
255
+ /// Provides input samples from the VPIO mic capture buffer.
256
+ let converterInputProc: AudioConverterComplexInputDataProc = {
257
+ (_, ioNumberDataPackets, ioData, _, _) -> OSStatus in
258
+
259
+ let requestedFrames = ioNumberDataPackets.pointee
260
+ let available = min(requestedFrames, gConverterInputFrames)
261
+
262
+ if available == 0 {
263
+ ioNumberDataPackets.pointee = 0
264
+ ioData.pointee.mNumberBuffers = 0
265
+ return 100 // End of data sentinel
266
+ }
267
+
268
+ ioData.pointee.mNumberBuffers = 1
269
+ ioData.pointee.mBuffers.mNumberChannels = CHANNELS
270
+ ioData.pointee.mBuffers.mDataByteSize = available * UInt32(BYTES_PER_FRAME)
271
+ ioData.pointee.mBuffers.mData = UnsafeMutableRawPointer(gConverterInputBuffer!)
272
+
273
+ ioNumberDataPackets.pointee = available
274
+ gConverterInputFrames = 0 // Consumed all available input
275
+
276
+ return noErr
277
+ }
278
+
279
+ var inputCallback = AURenderCallbackStruct(
280
+ inputProc: { (_, ioActionFlags, inTimeStamp, _, inNumberFrames, _) -> OSStatus in
281
+ let byteCount = Int(inNumberFrames) * BYTES_PER_FRAME
282
+ let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: byteCount)
283
+ defer { buffer.deallocate() }
284
+
285
+ var bufferList = AudioBufferList(
286
+ mNumberBuffers: 1,
287
+ mBuffers: AudioBuffer(
288
+ mNumberChannels: CHANNELS,
289
+ mDataByteSize: UInt32(byteCount),
290
+ mData: UnsafeMutableRawPointer(buffer)
291
+ )
292
+ )
293
+
294
+ let renderStatus = AudioUnitRender(
295
+ gAudioUnit, ioActionFlags, inTimeStamp, 1, inNumberFrames, &bufferList
296
+ )
297
+ if renderStatus != noErr { return renderStatus }
298
+
299
+ // If no resampling needed, write directly to stdout
300
+ if !needsResampling || gMicConverter == nil {
301
+ fwrite(buffer, 1, byteCount, stdout)
302
+ return noErr
303
+ }
304
+
305
+ // Resample from vpioRate to micRate
306
+ let inputFrames = inNumberFrames
307
+ let outputFrames = UInt32(Double(inputFrames) * gMicRate / gVpioRate) + 1
308
+
309
+ // Ensure resample buffer is large enough
310
+ if Int(outputFrames) > gResampleBufferCapacity {
311
+ gResampleBuffer?.deallocate()
312
+ gResampleBufferCapacity = Int(outputFrames) * 2
313
+ gResampleBuffer = .allocate(capacity: gResampleBufferCapacity)
314
+ }
315
+
316
+ // Set up converter input
317
+ gConverterInputBuffer = UnsafeMutableRawPointer(buffer).assumingMemoryBound(to: Int16.self)
318
+ gConverterInputFrames = inputFrames
319
+
320
+ var outFrameCount = outputFrames
321
+ var outBufferList = AudioBufferList(
322
+ mNumberBuffers: 1,
323
+ mBuffers: AudioBuffer(
324
+ mNumberChannels: CHANNELS,
325
+ mDataByteSize: outFrameCount * UInt32(BYTES_PER_FRAME),
326
+ mData: UnsafeMutableRawPointer(gResampleBuffer!)
327
+ )
328
+ )
329
+
330
+ let convertStatus = AudioConverterFillComplexBuffer(
331
+ gMicConverter!,
332
+ converterInputProc,
333
+ nil,
334
+ &outFrameCount,
335
+ &outBufferList,
336
+ nil
337
+ )
338
+
339
+ // 100 = our "end of data" sentinel, not an error
340
+ if convertStatus != noErr && convertStatus != 100 {
341
+ return convertStatus
342
+ }
343
+
344
+ let outBytes = Int(outFrameCount) * BYTES_PER_FRAME
345
+ if outBytes > 0 {
346
+ fwrite(gResampleBuffer!, 1, outBytes, stdout)
347
+ }
348
+
349
+ return noErr
350
+ },
351
+ inputProcRefCon: nil
352
+ )
353
+
354
+ status = AudioUnitSetProperty(
355
+ gAudioUnit,
356
+ kAudioOutputUnitProperty_SetInputCallback,
357
+ kAudioUnitScope_Global, 0,
358
+ &inputCallback,
359
+ UInt32(MemoryLayout<AURenderCallbackStruct>.size)
360
+ )
361
+ guard status == noErr else {
362
+ fputs("ERROR: Failed to set input callback (status \(status))\n", stderr)
363
+ exit(1)
364
+ }
365
+
366
+ // ============================================================================
367
+ // RENDER CALLBACK (ring buffer -> speakers)
368
+ // ============================================================================
369
+
370
+ var renderCallback = AURenderCallbackStruct(
371
+ inputProc: { (_, _, _, _, inNumberFrames, ioData) -> OSStatus in
372
+ guard let bufferList = ioData else { return noErr }
373
+ let abl = UnsafeMutableAudioBufferListPointer(bufferList)
374
+
375
+ for i in 0..<abl.count {
376
+ let byteCount = Int(inNumberFrames) * BYTES_PER_FRAME
377
+ let dest = abl[i].mData!.assumingMemoryBound(to: UInt8.self)
378
+
379
+ os_unfair_lock_lock(&gRingLock)
380
+
381
+ if gClearRequested {
382
+ ringClear()
383
+ gClearRequested = false
384
+ }
385
+
386
+ let bytesRead = ringRead(dest, count: byteCount)
387
+ os_unfair_lock_unlock(&gRingLock)
388
+
389
+ // Fill remainder with silence
390
+ if bytesRead < byteCount {
391
+ memset(dest.advanced(by: bytesRead), 0, byteCount - bytesRead)
392
+ }
393
+ abl[i].mDataByteSize = UInt32(byteCount)
394
+ }
395
+
396
+ return noErr
397
+ },
398
+ inputProcRefCon: nil
399
+ )
400
+
401
+ status = AudioUnitSetProperty(
402
+ gAudioUnit,
403
+ kAudioUnitProperty_SetRenderCallback,
404
+ kAudioUnitScope_Input, 0,
405
+ &renderCallback,
406
+ UInt32(MemoryLayout<AURenderCallbackStruct>.size)
407
+ )
408
+ guard status == noErr else {
409
+ fputs("ERROR: Failed to set render callback (status \(status))\n", stderr)
410
+ exit(1)
411
+ }
412
+
413
+ // ============================================================================
414
+ // START
415
+ // ============================================================================
416
+
417
+ status = AudioUnitInitialize(gAudioUnit)
418
+ guard status == noErr else {
419
+ fputs("ERROR: Failed to initialize VPIO (status \(status))\n", stderr)
420
+ fputs(" This may mean: no microphone is available, mic access was denied,\n", stderr)
421
+ fputs(" or the audio device doesn't support \(speakerRate)Hz.\n", stderr)
422
+ exit(1)
423
+ }
424
+
425
+ status = AudioOutputUnitStart(gAudioUnit)
426
+ guard status == noErr else {
427
+ fputs("ERROR: Failed to start VPIO (status \(status))\n", stderr)
428
+ exit(1)
429
+ }
430
+
431
+ fputs("READY\n", stderr)
432
+
433
+ // ============================================================================
434
+ // STDIN READER THREAD (TTS audio -> ring buffer)
435
+ // ============================================================================
436
+
437
+ let stdinThread = Thread {
438
+ let chunkSize = 4096
439
+ let buf = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
440
+ defer { buf.deallocate() }
441
+
442
+ while true {
443
+ let bytesRead = fread(buf, 1, chunkSize, stdin)
444
+ if bytesRead == 0 { break }
445
+
446
+ // After SIGUSR1 (interrupt), discard stale pipe data until SIGUSR2 (resume)
447
+ if gDiscardStdin { continue }
448
+
449
+ var offset = 0
450
+ while offset < bytesRead {
451
+ // Re-check discard flag inside the write loop in case SIGUSR1 arrives
452
+ // while we're draining a large read into the ring buffer
453
+ if gDiscardStdin { break }
454
+
455
+ os_unfair_lock_lock(&gRingLock)
456
+ let written = ringWrite(buf.advanced(by: offset), count: bytesRead - offset)
457
+ os_unfair_lock_unlock(&gRingLock)
458
+
459
+ offset += written
460
+ if written == 0 {
461
+ Thread.sleep(forTimeInterval: 0.001)
462
+ }
463
+ }
464
+ }
465
+ }
466
+ stdinThread.start()
467
+
468
+ // ============================================================================
469
+ // SIGNAL HANDLERS
470
+ // ============================================================================
471
+
472
+ signal(SIGUSR1) { _ in
473
+ gClearRequested = true
474
+ gDiscardStdin = true
475
+ }
476
+
477
+ signal(SIGUSR2) { _ in
478
+ gDiscardStdin = false
479
+ }
480
+
481
+ signal(SIGINT) { _ in exit(0) }
482
+ signal(SIGTERM) { _ in exit(0) }
483
+
484
+ dispatchMain()
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Tagged mock TTS server for reproducing stale-audio bugs.
3
+ *
4
+ * Like mock-tts-server.mjs but with two differences:
5
+ * - Tags each PCM chunk with a generation counter byte (0x01, 0x02, ...)
6
+ * so tests can identify which generation produced a given chunk.
7
+ * - Deliberately ignores the interrupt command during generation, simulating
8
+ * the real tts-server.py bug where interrupt can't be processed while the
9
+ * main thread is blocked writing audio to stdout.
10
+ * - Writes chunks with small delays to simulate real TTS generation latency.
11
+ *
12
+ * Protocol: same as tts-server.py (JSON stdin, length-prefixed PCM stdout).
13
+ *
14
+ * Run: node sidecar/mock-tts-server-tagged.mjs
15
+ */
16
+
17
+ import { createInterface } from "readline";
18
+
19
+ // ============================================================================
20
+ // CONSTANTS
21
+ // ============================================================================
22
+
23
+ /** 10ms of 24kHz mono 16-bit silence */
24
+ const CHUNK_SIZE = 480;
25
+
26
+ /** Number of chunks per generate command (overridable via argv[2]) */
27
+ const CHUNKS_PER_GENERATE = parseInt(process.argv[2] || "15");
28
+
29
+ /** Delay between chunks in ms (overridable via argv[3]) */
30
+ const CHUNK_DELAY_MS = parseInt(process.argv[3] || "10");
31
+
32
+ // ============================================================================
33
+ // STATE
34
+ // ============================================================================
35
+
36
+ /** Monotonically increasing generation counter. First generate = 1. */
37
+ let genCounter = 0;
38
+
39
+ /** Serial command queue for generate commands */
40
+ const pendingCommands = [];
41
+ let processing = false;
42
+
43
+ /** Interrupt flag -- set immediately when interrupt command arrives */
44
+ let interrupted = false;
45
+
46
+ // ============================================================================
47
+ // HELPERS
48
+ // ============================================================================
49
+
50
+ function sleep(ms) {
51
+ return new Promise((r) => setTimeout(r, ms));
52
+ }
53
+
54
+ function writeChunk(tag) {
55
+ const header = Buffer.alloc(4);
56
+ header.writeUInt32BE(CHUNK_SIZE, 0);
57
+ process.stdout.write(header);
58
+ process.stdout.write(Buffer.alloc(CHUNK_SIZE, tag));
59
+ }
60
+
61
+ function writeEndMarker() {
62
+ process.stdout.write(Buffer.alloc(4, 0));
63
+ }
64
+
65
+ // ============================================================================
66
+ // COMMAND PROCESSING
67
+ // ============================================================================
68
+
69
+ /**
70
+ * Process commands serially, like the real Python server.
71
+ * A new generate cannot start until the previous one finishes.
72
+ */
73
+ async function drainQueue() {
74
+ if (processing) return;
75
+ processing = true;
76
+
77
+ while (pendingCommands.length > 0) {
78
+ const cmd = pendingCommands.shift();
79
+ await handleCommand(cmd);
80
+ }
81
+
82
+ processing = false;
83
+ }
84
+
85
+ async function handleCommand(cmd) {
86
+ if (cmd.cmd === "generate") {
87
+ interrupted = false;
88
+ genCounter++;
89
+ const tag = genCounter & 0xff;
90
+
91
+ for (let i = 0; i < CHUNKS_PER_GENERATE; i++) {
92
+ if (interrupted) break;
93
+ await sleep(CHUNK_DELAY_MS);
94
+ if (interrupted) break;
95
+ writeChunk(tag);
96
+ }
97
+
98
+ writeEndMarker();
99
+ }
100
+ }
101
+
102
+ // ============================================================================
103
+ // ENTRY POINT
104
+ // ============================================================================
105
+
106
+ process.stderr.write("READY\n");
107
+
108
+ const rl = createInterface({ input: process.stdin });
109
+
110
+ rl.on("line", (line) => {
111
+ let cmd;
112
+ try {
113
+ cmd = JSON.parse(line);
114
+ } catch {
115
+ return;
116
+ }
117
+
118
+ if (cmd.cmd === "quit") {
119
+ process.exit(0);
120
+ }
121
+
122
+ // Handle interrupt immediately -- not through the serial queue.
123
+ // The event loop processes this between awaits in handleCommand,
124
+ // so the interrupted flag is visible to the generate loop.
125
+ if (cmd.cmd === "interrupt") {
126
+ interrupted = true;
127
+ return;
128
+ }
129
+
130
+ pendingCommands.push(cmd);
131
+ drainQueue();
132
+ });