voicecc 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +6 -0
- package/README.md +48 -0
- package/bin/voicecc.js +39 -0
- package/dashboard/dist/assets/index-BXemFrMp.css +1 -0
- package/dashboard/dist/assets/index-dAYfRls7.js +11 -0
- package/dashboard/dist/audio-processor.js +126 -0
- package/dashboard/dist/index.html +13 -0
- package/dashboard/routes/auth.ts +119 -0
- package/dashboard/routes/browser-call.ts +87 -0
- package/dashboard/routes/claude-md.ts +50 -0
- package/dashboard/routes/conversations.ts +203 -0
- package/dashboard/routes/integrations.ts +154 -0
- package/dashboard/routes/mcp-servers.ts +198 -0
- package/dashboard/routes/settings.ts +64 -0
- package/dashboard/routes/tunnel.ts +66 -0
- package/dashboard/routes/twilio.ts +120 -0
- package/dashboard/routes/voice.ts +48 -0
- package/dashboard/routes/webrtc.ts +85 -0
- package/dashboard/server.ts +130 -0
- package/dashboard/tsconfig.json +13 -0
- package/init/CLAUDE.md +18 -0
- package/package.json +59 -0
- package/run.ts +68 -0
- package/scripts/postinstall.js +228 -0
- package/services/browser-call-manager.ts +106 -0
- package/services/device-pairing.ts +176 -0
- package/services/env.ts +88 -0
- package/services/tunnel.ts +204 -0
- package/services/twilio-manager.ts +126 -0
- package/sidecar/assets/startup.pcm +0 -0
- package/sidecar/audio-adapter.ts +60 -0
- package/sidecar/audio-capture.ts +220 -0
- package/sidecar/browser-audio-playback.test.ts +149 -0
- package/sidecar/browser-audio.ts +147 -0
- package/sidecar/browser-server.ts +331 -0
- package/sidecar/chime.test.ts +69 -0
- package/sidecar/chime.ts +54 -0
- package/sidecar/claude-session.ts +295 -0
- package/sidecar/endpointing.ts +163 -0
- package/sidecar/index.ts +83 -0
- package/sidecar/local-audio.ts +126 -0
- package/sidecar/mic-vpio +0 -0
- package/sidecar/mic-vpio.swift +484 -0
- package/sidecar/mock-tts-server-tagged.mjs +132 -0
- package/sidecar/narration.ts +204 -0
- package/sidecar/scripts/generate-startup-audio.py +79 -0
- package/sidecar/session-lock.ts +123 -0
- package/sidecar/sherpa-onnx-node.d.ts +4 -0
- package/sidecar/stt.ts +199 -0
- package/sidecar/tts-server.py +193 -0
- package/sidecar/tts.ts +481 -0
- package/sidecar/twilio-audio.ts +338 -0
- package/sidecar/twilio-server.ts +436 -0
- package/sidecar/types.ts +210 -0
- package/sidecar/vad.ts +101 -0
- package/sidecar/voice-loop-bugs.test.ts +522 -0
- package/sidecar/voice-session.ts +523 -0
- package/skills/voice/SKILL.md +26 -0
- package/tsconfig.json +22 -0
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* macOS Voice Processing IO (VPIO) binary for echo-cancelled audio I/O.
|
|
3
|
+
*
|
|
4
|
+
* Uses macOS's built-in acoustic echo cancellation via the VoiceProcessingIO
|
|
5
|
+
* AudioUnit. Routes TTS audio through the VPIO output element so the AEC has
|
|
6
|
+
* a reference signal to subtract from the mic input.
|
|
7
|
+
*
|
|
8
|
+
* VPIO requires the same sample rate on both elements. Internally uses the
|
|
9
|
+
* speaker rate for the AudioUnit, then resamples the mic output to the
|
|
10
|
+
* requested mic rate using AudioConverter before writing to stdout.
|
|
11
|
+
*
|
|
12
|
+
* - stdin: Raw 16-bit signed mono PCM at speakerRate (TTS audio for playback)
|
|
13
|
+
* - stdout: Raw 16-bit signed mono PCM at micRate (echo-cancelled mic audio)
|
|
14
|
+
* - SIGUSR1: Clear playback ring buffer (for interrupting TTS)
|
|
15
|
+
* - SIGTERM: Clean shutdown
|
|
16
|
+
*
|
|
17
|
+
* Usage: mic-vpio <micRate> <speakerRate>
|
|
18
|
+
* micRate: Sample rate for mic output in Hz (e.g. 16000)
|
|
19
|
+
* speakerRate: Sample rate for speaker input in Hz (e.g. 24000)
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import AudioToolbox
|
|
23
|
+
import Foundation
|
|
24
|
+
|
|
25
|
+
// ============================================================================
|
|
26
|
+
// CONSTANTS
|
|
27
|
+
// ============================================================================
|
|
28
|
+
|
|
29
|
+
let CHANNELS: UInt32 = 1
|
|
30
|
+
let BITS_PER_CHANNEL: UInt32 = 16
|
|
31
|
+
let BYTES_PER_FRAME: Int = 2
|
|
32
|
+
|
|
33
|
+
/// Ring buffer capacity in bytes (~5 seconds at 48kHz mono 16-bit)
|
|
34
|
+
let RING_BUFFER_CAPACITY = 48000 * 2 * 5
|
|
35
|
+
|
|
36
|
+
// ============================================================================
|
|
37
|
+
// GLOBALS
|
|
38
|
+
// ============================================================================
|
|
39
|
+
|
|
40
|
+
/// The VPIO AudioUnit instance (global for use in C callbacks)
|
|
41
|
+
var gAudioUnit: AudioComponentInstance!
|
|
42
|
+
|
|
43
|
+
/// Ring buffer for stdin audio -> speaker output
|
|
44
|
+
var gRingBuffer: UnsafeMutablePointer<UInt8>!
|
|
45
|
+
var gRingCapacity: Int = RING_BUFFER_CAPACITY
|
|
46
|
+
var gRingWritePos: Int = 0
|
|
47
|
+
var gRingReadPos: Int = 0
|
|
48
|
+
var gRingLock = os_unfair_lock()
|
|
49
|
+
|
|
50
|
+
/// Flag set by SIGUSR1 handler, checked by render callback to clear ring buffer
|
|
51
|
+
var gClearRequested: Bool = false
|
|
52
|
+
|
|
53
|
+
/// Flag set by SIGUSR1, cleared by SIGUSR2. When true, stdin reader discards
|
|
54
|
+
/// data instead of writing to ring buffer. This prevents stale pipe data from
|
|
55
|
+
/// re-filling the ring buffer after an interrupt clears it.
|
|
56
|
+
var gDiscardStdin: Bool = false
|
|
57
|
+
|
|
58
|
+
/// AudioConverter for resampling mic from vpioRate to micRate (nil if rates match)
|
|
59
|
+
var gMicConverter: AudioConverterRef?
|
|
60
|
+
|
|
61
|
+
/// The requested mic output rate (stdout)
|
|
62
|
+
var gMicRate: Double = 16000
|
|
63
|
+
|
|
64
|
+
/// The VPIO internal rate (= speaker rate)
|
|
65
|
+
var gVpioRate: Double = 24000
|
|
66
|
+
|
|
67
|
+
/// Temporary buffer for resampled mic output
|
|
68
|
+
var gResampleBuffer: UnsafeMutablePointer<Int16>?
|
|
69
|
+
var gResampleBufferCapacity: Int = 0
|
|
70
|
+
|
|
71
|
+
/// Leftover samples from the converter that haven't been consumed yet
|
|
72
|
+
var gConverterInputBuffer: UnsafeMutablePointer<Int16>?
|
|
73
|
+
var gConverterInputFrames: UInt32 = 0
|
|
74
|
+
|
|
75
|
+
// ============================================================================
|
|
76
|
+
// RING BUFFER
|
|
77
|
+
// ============================================================================
|
|
78
|
+
|
|
79
|
+
func ringAvailable() -> Int {
|
|
80
|
+
return (gRingWritePos - gRingReadPos + gRingCapacity) % gRingCapacity
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
func ringFreeSpace() -> Int {
|
|
84
|
+
return gRingCapacity - 1 - ringAvailable()
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
func ringWrite(_ src: UnsafePointer<UInt8>, count: Int) -> Int {
|
|
88
|
+
let space = ringFreeSpace()
|
|
89
|
+
let toWrite = min(count, space)
|
|
90
|
+
for i in 0..<toWrite {
|
|
91
|
+
gRingBuffer[(gRingWritePos + i) % gRingCapacity] = src[i]
|
|
92
|
+
}
|
|
93
|
+
gRingWritePos = (gRingWritePos + toWrite) % gRingCapacity
|
|
94
|
+
return toWrite
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
func ringRead(_ dst: UnsafeMutablePointer<UInt8>, count: Int) -> Int {
|
|
98
|
+
let avail = ringAvailable()
|
|
99
|
+
let toRead = min(count, avail)
|
|
100
|
+
for i in 0..<toRead {
|
|
101
|
+
dst[i] = gRingBuffer[(gRingReadPos + i) % gRingCapacity]
|
|
102
|
+
}
|
|
103
|
+
gRingReadPos = (gRingReadPos + toRead) % gRingCapacity
|
|
104
|
+
return toRead
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
func ringClear() {
|
|
108
|
+
gRingWritePos = 0
|
|
109
|
+
gRingReadPos = 0
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ============================================================================
|
|
113
|
+
// ENTRY POINT
|
|
114
|
+
// ============================================================================
|
|
115
|
+
|
|
116
|
+
setbuf(stdout, nil)
|
|
117
|
+
|
|
118
|
+
let args = CommandLine.arguments
|
|
119
|
+
guard args.count == 3,
|
|
120
|
+
let micRate = Double(args[1]),
|
|
121
|
+
let speakerRate = Double(args[2]) else {
|
|
122
|
+
fputs("Usage: mic-vpio <micRate> <speakerRate>\n", stderr)
|
|
123
|
+
exit(1)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
gMicRate = micRate
|
|
127
|
+
gVpioRate = speakerRate
|
|
128
|
+
|
|
129
|
+
// Allocate ring buffer
|
|
130
|
+
gRingBuffer = .allocate(capacity: gRingCapacity)
|
|
131
|
+
gRingBuffer.initialize(repeating: 0, count: gRingCapacity)
|
|
132
|
+
|
|
133
|
+
// ============================================================================
|
|
134
|
+
// MIC RESAMPLER (vpioRate -> micRate)
|
|
135
|
+
// ============================================================================
|
|
136
|
+
|
|
137
|
+
let needsResampling = (micRate != speakerRate)
|
|
138
|
+
|
|
139
|
+
if needsResampling {
|
|
140
|
+
var srcFormat = AudioStreamBasicDescription(
|
|
141
|
+
mSampleRate: speakerRate,
|
|
142
|
+
mFormatID: kAudioFormatLinearPCM,
|
|
143
|
+
mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
|
|
144
|
+
mBytesPerPacket: UInt32(BYTES_PER_FRAME),
|
|
145
|
+
mFramesPerPacket: 1,
|
|
146
|
+
mBytesPerFrame: UInt32(BYTES_PER_FRAME),
|
|
147
|
+
mChannelsPerFrame: CHANNELS,
|
|
148
|
+
mBitsPerChannel: BITS_PER_CHANNEL,
|
|
149
|
+
mReserved: 0
|
|
150
|
+
)
|
|
151
|
+
var dstFormat = AudioStreamBasicDescription(
|
|
152
|
+
mSampleRate: micRate,
|
|
153
|
+
mFormatID: kAudioFormatLinearPCM,
|
|
154
|
+
mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
|
|
155
|
+
mBytesPerPacket: UInt32(BYTES_PER_FRAME),
|
|
156
|
+
mFramesPerPacket: 1,
|
|
157
|
+
mBytesPerFrame: UInt32(BYTES_PER_FRAME),
|
|
158
|
+
mChannelsPerFrame: CHANNELS,
|
|
159
|
+
mBitsPerChannel: BITS_PER_CHANNEL,
|
|
160
|
+
mReserved: 0
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
let converterStatus = AudioConverterNew(&srcFormat, &dstFormat, &gMicConverter)
|
|
164
|
+
guard converterStatus == noErr else {
|
|
165
|
+
fputs("ERROR: Failed to create mic resampler \(speakerRate)Hz -> \(micRate)Hz (status \(converterStatus))\n", stderr)
|
|
166
|
+
exit(1)
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Pre-allocate resampling buffer (enough for 4096 output frames)
|
|
170
|
+
gResampleBufferCapacity = 4096
|
|
171
|
+
gResampleBuffer = .allocate(capacity: gResampleBufferCapacity)
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// ============================================================================
|
|
175
|
+
// VPIO SETUP -- both elements use speakerRate
|
|
176
|
+
// ============================================================================
|
|
177
|
+
|
|
178
|
+
var desc = AudioComponentDescription(
|
|
179
|
+
componentType: kAudioUnitType_Output,
|
|
180
|
+
componentSubType: kAudioUnitSubType_VoiceProcessingIO,
|
|
181
|
+
componentManufacturer: kAudioUnitManufacturer_Apple,
|
|
182
|
+
componentFlags: 0,
|
|
183
|
+
componentFlagsMask: 0
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
guard let component = AudioComponentFindNext(nil, &desc) else {
|
|
187
|
+
fputs("ERROR: Voice Processing IO audio unit not found\n", stderr)
|
|
188
|
+
exit(1)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
var status = AudioComponentInstanceNew(component, &gAudioUnit)
|
|
192
|
+
guard status == noErr else {
|
|
193
|
+
fputs("ERROR: Failed to create VPIO instance (status \(status))\n", stderr)
|
|
194
|
+
exit(1)
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Enable input on element 1 (mic)
|
|
198
|
+
var enableIO: UInt32 = 1
|
|
199
|
+
status = AudioUnitSetProperty(
|
|
200
|
+
gAudioUnit,
|
|
201
|
+
kAudioOutputUnitProperty_EnableIO,
|
|
202
|
+
kAudioUnitScope_Input, 1,
|
|
203
|
+
&enableIO,
|
|
204
|
+
UInt32(MemoryLayout<UInt32>.size)
|
|
205
|
+
)
|
|
206
|
+
guard status == noErr else {
|
|
207
|
+
fputs("ERROR: Failed to enable mic input (status \(status))\n", stderr)
|
|
208
|
+
exit(1)
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Single format used for both elements (VPIO requires same rate)
|
|
212
|
+
var vpioFormat = AudioStreamBasicDescription(
|
|
213
|
+
mSampleRate: speakerRate,
|
|
214
|
+
mFormatID: kAudioFormatLinearPCM,
|
|
215
|
+
mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
|
|
216
|
+
mBytesPerPacket: UInt32(BYTES_PER_FRAME),
|
|
217
|
+
mFramesPerPacket: 1,
|
|
218
|
+
mBytesPerFrame: UInt32(BYTES_PER_FRAME),
|
|
219
|
+
mChannelsPerFrame: CHANNELS,
|
|
220
|
+
mBitsPerChannel: BITS_PER_CHANNEL,
|
|
221
|
+
mReserved: 0
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
// Set mic format (output scope of element 1 = what we receive)
|
|
225
|
+
status = AudioUnitSetProperty(
|
|
226
|
+
gAudioUnit,
|
|
227
|
+
kAudioUnitProperty_StreamFormat,
|
|
228
|
+
kAudioUnitScope_Output, 1,
|
|
229
|
+
&vpioFormat,
|
|
230
|
+
UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
|
|
231
|
+
)
|
|
232
|
+
guard status == noErr else {
|
|
233
|
+
fputs("ERROR: Failed to set mic format (status \(status))\n", stderr)
|
|
234
|
+
exit(1)
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Set speaker format (input scope of element 0 = what we feed)
|
|
238
|
+
status = AudioUnitSetProperty(
|
|
239
|
+
gAudioUnit,
|
|
240
|
+
kAudioUnitProperty_StreamFormat,
|
|
241
|
+
kAudioUnitScope_Input, 0,
|
|
242
|
+
&vpioFormat,
|
|
243
|
+
UInt32(MemoryLayout<AudioStreamBasicDescription>.size)
|
|
244
|
+
)
|
|
245
|
+
guard status == noErr else {
|
|
246
|
+
fputs("ERROR: Failed to set speaker format (status \(status))\n", stderr)
|
|
247
|
+
exit(1)
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// ============================================================================
|
|
251
|
+
// INPUT CALLBACK (echo-cancelled mic -> resample -> stdout)
|
|
252
|
+
// ============================================================================
|
|
253
|
+
|
|
254
|
+
/// AudioConverter data supplier callback for mic resampling.
|
|
255
|
+
/// Provides input samples from the VPIO mic capture buffer.
|
|
256
|
+
let converterInputProc: AudioConverterComplexInputDataProc = {
|
|
257
|
+
(_, ioNumberDataPackets, ioData, _, _) -> OSStatus in
|
|
258
|
+
|
|
259
|
+
let requestedFrames = ioNumberDataPackets.pointee
|
|
260
|
+
let available = min(requestedFrames, gConverterInputFrames)
|
|
261
|
+
|
|
262
|
+
if available == 0 {
|
|
263
|
+
ioNumberDataPackets.pointee = 0
|
|
264
|
+
ioData.pointee.mNumberBuffers = 0
|
|
265
|
+
return 100 // End of data sentinel
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
ioData.pointee.mNumberBuffers = 1
|
|
269
|
+
ioData.pointee.mBuffers.mNumberChannels = CHANNELS
|
|
270
|
+
ioData.pointee.mBuffers.mDataByteSize = available * UInt32(BYTES_PER_FRAME)
|
|
271
|
+
ioData.pointee.mBuffers.mData = UnsafeMutableRawPointer(gConverterInputBuffer!)
|
|
272
|
+
|
|
273
|
+
ioNumberDataPackets.pointee = available
|
|
274
|
+
gConverterInputFrames = 0 // Consumed all available input
|
|
275
|
+
|
|
276
|
+
return noErr
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
var inputCallback = AURenderCallbackStruct(
|
|
280
|
+
inputProc: { (_, ioActionFlags, inTimeStamp, _, inNumberFrames, _) -> OSStatus in
|
|
281
|
+
let byteCount = Int(inNumberFrames) * BYTES_PER_FRAME
|
|
282
|
+
let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: byteCount)
|
|
283
|
+
defer { buffer.deallocate() }
|
|
284
|
+
|
|
285
|
+
var bufferList = AudioBufferList(
|
|
286
|
+
mNumberBuffers: 1,
|
|
287
|
+
mBuffers: AudioBuffer(
|
|
288
|
+
mNumberChannels: CHANNELS,
|
|
289
|
+
mDataByteSize: UInt32(byteCount),
|
|
290
|
+
mData: UnsafeMutableRawPointer(buffer)
|
|
291
|
+
)
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
let renderStatus = AudioUnitRender(
|
|
295
|
+
gAudioUnit, ioActionFlags, inTimeStamp, 1, inNumberFrames, &bufferList
|
|
296
|
+
)
|
|
297
|
+
if renderStatus != noErr { return renderStatus }
|
|
298
|
+
|
|
299
|
+
// If no resampling needed, write directly to stdout
|
|
300
|
+
if !needsResampling || gMicConverter == nil {
|
|
301
|
+
fwrite(buffer, 1, byteCount, stdout)
|
|
302
|
+
return noErr
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Resample from vpioRate to micRate
|
|
306
|
+
let inputFrames = inNumberFrames
|
|
307
|
+
let outputFrames = UInt32(Double(inputFrames) * gMicRate / gVpioRate) + 1
|
|
308
|
+
|
|
309
|
+
// Ensure resample buffer is large enough
|
|
310
|
+
if Int(outputFrames) > gResampleBufferCapacity {
|
|
311
|
+
gResampleBuffer?.deallocate()
|
|
312
|
+
gResampleBufferCapacity = Int(outputFrames) * 2
|
|
313
|
+
gResampleBuffer = .allocate(capacity: gResampleBufferCapacity)
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Set up converter input
|
|
317
|
+
gConverterInputBuffer = UnsafeMutableRawPointer(buffer).assumingMemoryBound(to: Int16.self)
|
|
318
|
+
gConverterInputFrames = inputFrames
|
|
319
|
+
|
|
320
|
+
var outFrameCount = outputFrames
|
|
321
|
+
var outBufferList = AudioBufferList(
|
|
322
|
+
mNumberBuffers: 1,
|
|
323
|
+
mBuffers: AudioBuffer(
|
|
324
|
+
mNumberChannels: CHANNELS,
|
|
325
|
+
mDataByteSize: outFrameCount * UInt32(BYTES_PER_FRAME),
|
|
326
|
+
mData: UnsafeMutableRawPointer(gResampleBuffer!)
|
|
327
|
+
)
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
let convertStatus = AudioConverterFillComplexBuffer(
|
|
331
|
+
gMicConverter!,
|
|
332
|
+
converterInputProc,
|
|
333
|
+
nil,
|
|
334
|
+
&outFrameCount,
|
|
335
|
+
&outBufferList,
|
|
336
|
+
nil
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
// 100 = our "end of data" sentinel, not an error
|
|
340
|
+
if convertStatus != noErr && convertStatus != 100 {
|
|
341
|
+
return convertStatus
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
let outBytes = Int(outFrameCount) * BYTES_PER_FRAME
|
|
345
|
+
if outBytes > 0 {
|
|
346
|
+
fwrite(gResampleBuffer!, 1, outBytes, stdout)
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
return noErr
|
|
350
|
+
},
|
|
351
|
+
inputProcRefCon: nil
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
status = AudioUnitSetProperty(
|
|
355
|
+
gAudioUnit,
|
|
356
|
+
kAudioOutputUnitProperty_SetInputCallback,
|
|
357
|
+
kAudioUnitScope_Global, 0,
|
|
358
|
+
&inputCallback,
|
|
359
|
+
UInt32(MemoryLayout<AURenderCallbackStruct>.size)
|
|
360
|
+
)
|
|
361
|
+
guard status == noErr else {
|
|
362
|
+
fputs("ERROR: Failed to set input callback (status \(status))\n", stderr)
|
|
363
|
+
exit(1)
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// ============================================================================
|
|
367
|
+
// RENDER CALLBACK (ring buffer -> speakers)
|
|
368
|
+
// ============================================================================
|
|
369
|
+
|
|
370
|
+
var renderCallback = AURenderCallbackStruct(
|
|
371
|
+
inputProc: { (_, _, _, _, inNumberFrames, ioData) -> OSStatus in
|
|
372
|
+
guard let bufferList = ioData else { return noErr }
|
|
373
|
+
let abl = UnsafeMutableAudioBufferListPointer(bufferList)
|
|
374
|
+
|
|
375
|
+
for i in 0..<abl.count {
|
|
376
|
+
let byteCount = Int(inNumberFrames) * BYTES_PER_FRAME
|
|
377
|
+
let dest = abl[i].mData!.assumingMemoryBound(to: UInt8.self)
|
|
378
|
+
|
|
379
|
+
os_unfair_lock_lock(&gRingLock)
|
|
380
|
+
|
|
381
|
+
if gClearRequested {
|
|
382
|
+
ringClear()
|
|
383
|
+
gClearRequested = false
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
let bytesRead = ringRead(dest, count: byteCount)
|
|
387
|
+
os_unfair_lock_unlock(&gRingLock)
|
|
388
|
+
|
|
389
|
+
// Fill remainder with silence
|
|
390
|
+
if bytesRead < byteCount {
|
|
391
|
+
memset(dest.advanced(by: bytesRead), 0, byteCount - bytesRead)
|
|
392
|
+
}
|
|
393
|
+
abl[i].mDataByteSize = UInt32(byteCount)
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
return noErr
|
|
397
|
+
},
|
|
398
|
+
inputProcRefCon: nil
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
status = AudioUnitSetProperty(
|
|
402
|
+
gAudioUnit,
|
|
403
|
+
kAudioUnitProperty_SetRenderCallback,
|
|
404
|
+
kAudioUnitScope_Input, 0,
|
|
405
|
+
&renderCallback,
|
|
406
|
+
UInt32(MemoryLayout<AURenderCallbackStruct>.size)
|
|
407
|
+
)
|
|
408
|
+
guard status == noErr else {
|
|
409
|
+
fputs("ERROR: Failed to set render callback (status \(status))\n", stderr)
|
|
410
|
+
exit(1)
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// ============================================================================
|
|
414
|
+
// START
|
|
415
|
+
// ============================================================================
|
|
416
|
+
|
|
417
|
+
status = AudioUnitInitialize(gAudioUnit)
|
|
418
|
+
guard status == noErr else {
|
|
419
|
+
fputs("ERROR: Failed to initialize VPIO (status \(status))\n", stderr)
|
|
420
|
+
fputs(" This may mean: no microphone is available, mic access was denied,\n", stderr)
|
|
421
|
+
fputs(" or the audio device doesn't support \(speakerRate)Hz.\n", stderr)
|
|
422
|
+
exit(1)
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
status = AudioOutputUnitStart(gAudioUnit)
|
|
426
|
+
guard status == noErr else {
|
|
427
|
+
fputs("ERROR: Failed to start VPIO (status \(status))\n", stderr)
|
|
428
|
+
exit(1)
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
fputs("READY\n", stderr)
|
|
432
|
+
|
|
433
|
+
// ============================================================================
|
|
434
|
+
// STDIN READER THREAD (TTS audio -> ring buffer)
|
|
435
|
+
// ============================================================================
|
|
436
|
+
|
|
437
|
+
let stdinThread = Thread {
|
|
438
|
+
let chunkSize = 4096
|
|
439
|
+
let buf = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
|
|
440
|
+
defer { buf.deallocate() }
|
|
441
|
+
|
|
442
|
+
while true {
|
|
443
|
+
let bytesRead = fread(buf, 1, chunkSize, stdin)
|
|
444
|
+
if bytesRead == 0 { break }
|
|
445
|
+
|
|
446
|
+
// After SIGUSR1 (interrupt), discard stale pipe data until SIGUSR2 (resume)
|
|
447
|
+
if gDiscardStdin { continue }
|
|
448
|
+
|
|
449
|
+
var offset = 0
|
|
450
|
+
while offset < bytesRead {
|
|
451
|
+
// Re-check discard flag inside the write loop in case SIGUSR1 arrives
|
|
452
|
+
// while we're draining a large read into the ring buffer
|
|
453
|
+
if gDiscardStdin { break }
|
|
454
|
+
|
|
455
|
+
os_unfair_lock_lock(&gRingLock)
|
|
456
|
+
let written = ringWrite(buf.advanced(by: offset), count: bytesRead - offset)
|
|
457
|
+
os_unfair_lock_unlock(&gRingLock)
|
|
458
|
+
|
|
459
|
+
offset += written
|
|
460
|
+
if written == 0 {
|
|
461
|
+
Thread.sleep(forTimeInterval: 0.001)
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
stdinThread.start()
|
|
467
|
+
|
|
468
|
+
// ============================================================================
|
|
469
|
+
// SIGNAL HANDLERS
|
|
470
|
+
// ============================================================================
|
|
471
|
+
|
|
472
|
+
signal(SIGUSR1) { _ in
|
|
473
|
+
gClearRequested = true
|
|
474
|
+
gDiscardStdin = true
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
signal(SIGUSR2) { _ in
|
|
478
|
+
gDiscardStdin = false
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
signal(SIGINT) { _ in exit(0) }
|
|
482
|
+
signal(SIGTERM) { _ in exit(0) }
|
|
483
|
+
|
|
484
|
+
dispatchMain()
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tagged mock TTS server for reproducing stale-audio bugs.
|
|
3
|
+
*
|
|
4
|
+
* Like mock-tts-server.mjs but with two differences:
|
|
5
|
+
* - Tags each PCM chunk with a generation counter byte (0x01, 0x02, ...)
|
|
6
|
+
* so tests can identify which generation produced a given chunk.
|
|
7
|
+
* - Deliberately ignores the interrupt command during generation, simulating
|
|
8
|
+
* the real tts-server.py bug where interrupt can't be processed while the
|
|
9
|
+
* main thread is blocked writing audio to stdout.
|
|
10
|
+
* - Writes chunks with small delays to simulate real TTS generation latency.
|
|
11
|
+
*
|
|
12
|
+
* Protocol: same as tts-server.py (JSON stdin, length-prefixed PCM stdout).
|
|
13
|
+
*
|
|
14
|
+
* Run: node sidecar/mock-tts-server-tagged.mjs
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { createInterface } from "readline";
|
|
18
|
+
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// CONSTANTS
|
|
21
|
+
// ============================================================================
|
|
22
|
+
|
|
23
|
+
/** 10ms of 24kHz mono 16-bit silence */
|
|
24
|
+
const CHUNK_SIZE = 480;
|
|
25
|
+
|
|
26
|
+
/** Number of chunks per generate command (overridable via argv[2]) */
|
|
27
|
+
const CHUNKS_PER_GENERATE = parseInt(process.argv[2] || "15");
|
|
28
|
+
|
|
29
|
+
/** Delay between chunks in ms (overridable via argv[3]) */
|
|
30
|
+
const CHUNK_DELAY_MS = parseInt(process.argv[3] || "10");
|
|
31
|
+
|
|
32
|
+
// ============================================================================
|
|
33
|
+
// STATE
|
|
34
|
+
// ============================================================================
|
|
35
|
+
|
|
36
|
+
/** Monotonically increasing generation counter. First generate = 1. */
|
|
37
|
+
let genCounter = 0;
|
|
38
|
+
|
|
39
|
+
/** Serial command queue for generate commands */
|
|
40
|
+
const pendingCommands = [];
|
|
41
|
+
let processing = false;
|
|
42
|
+
|
|
43
|
+
/** Interrupt flag -- set immediately when interrupt command arrives */
|
|
44
|
+
let interrupted = false;
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// HELPERS
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
function sleep(ms) {
|
|
51
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function writeChunk(tag) {
|
|
55
|
+
const header = Buffer.alloc(4);
|
|
56
|
+
header.writeUInt32BE(CHUNK_SIZE, 0);
|
|
57
|
+
process.stdout.write(header);
|
|
58
|
+
process.stdout.write(Buffer.alloc(CHUNK_SIZE, tag));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function writeEndMarker() {
|
|
62
|
+
process.stdout.write(Buffer.alloc(4, 0));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// COMMAND PROCESSING
|
|
67
|
+
// ============================================================================
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Process commands serially, like the real Python server.
|
|
71
|
+
* A new generate cannot start until the previous one finishes.
|
|
72
|
+
*/
|
|
73
|
+
async function drainQueue() {
|
|
74
|
+
if (processing) return;
|
|
75
|
+
processing = true;
|
|
76
|
+
|
|
77
|
+
while (pendingCommands.length > 0) {
|
|
78
|
+
const cmd = pendingCommands.shift();
|
|
79
|
+
await handleCommand(cmd);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
processing = false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async function handleCommand(cmd) {
|
|
86
|
+
if (cmd.cmd === "generate") {
|
|
87
|
+
interrupted = false;
|
|
88
|
+
genCounter++;
|
|
89
|
+
const tag = genCounter & 0xff;
|
|
90
|
+
|
|
91
|
+
for (let i = 0; i < CHUNKS_PER_GENERATE; i++) {
|
|
92
|
+
if (interrupted) break;
|
|
93
|
+
await sleep(CHUNK_DELAY_MS);
|
|
94
|
+
if (interrupted) break;
|
|
95
|
+
writeChunk(tag);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
writeEndMarker();
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ============================================================================
|
|
103
|
+
// ENTRY POINT
|
|
104
|
+
// ============================================================================
|
|
105
|
+
|
|
106
|
+
process.stderr.write("READY\n");
|
|
107
|
+
|
|
108
|
+
const rl = createInterface({ input: process.stdin });
|
|
109
|
+
|
|
110
|
+
rl.on("line", (line) => {
|
|
111
|
+
let cmd;
|
|
112
|
+
try {
|
|
113
|
+
cmd = JSON.parse(line);
|
|
114
|
+
} catch {
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (cmd.cmd === "quit") {
|
|
119
|
+
process.exit(0);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Handle interrupt immediately -- not through the serial queue.
|
|
123
|
+
// The event loop processes this between awaits in handleCommand,
|
|
124
|
+
// so the interrupted flag is visible to the generate loop.
|
|
125
|
+
if (cmd.cmd === "interrupt") {
|
|
126
|
+
interrupted = true;
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
pendingCommands.push(cmd);
|
|
131
|
+
drainQueue();
|
|
132
|
+
});
|