@livx.cc/agentx 0.96.15 → 0.96.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,9 +34,16 @@ FileHandle.standardError.write("mic access granted: \(granted)\n".data(using: .u
34
34
 
35
35
  let engine = AVAudioEngine()
36
36
  let input = engine.inputNode
37
- do { try input.setVoiceProcessingEnabled(true) } catch {
38
- FileHandle.standardError.write("AEC unavailable: \(error)\n".data(using: .utf8)!)
39
- exit(1)
37
+ // VPIO gives hardware echo cancellation, but on some Macs (observed: macOS 26.5.x + a built-in mic that
38
+ // VPIO exposes as a multichannel format) the VP input tap NEVER delivers buffers — capture is dead. The
39
+ // host detects that (no audio) and respawns us with MIC_NO_VPIO=1: plain capture, no AEC (heuristic-echo
40
+ // tier, gated below). Skipping VPIO trades echo cancellation for a mic that actually works.
41
+ let useVPIO = ProcessInfo.processInfo.environment["MIC_NO_VPIO"] != "1"
42
+ if useVPIO {
43
+ do { try input.setVoiceProcessingEnabled(true) } catch {
44
+ FileHandle.standardError.write("AEC unavailable: \(error)\n".data(using: .utf8)!)
45
+ exit(1)
46
+ }
40
47
  }
41
48
  // the VP I/O unit is full-duplex: input stays muted unless the output side renders too. The mixer→
42
49
  // output format must match the VP output's hardware format (nil/default fails kAUInitialize -10875).
@@ -44,9 +51,10 @@ let outHw = engine.outputNode.outputFormat(forBus: 0)
44
51
  engine.connect(engine.mainMixerNode, to: engine.outputNode, format: outHw)
45
52
  engine.mainMixerNode.outputVolume = 1 // playback is audible now (was 0 when this path was only a keep-alive)
46
53
  // VP ducks all other system audio by default — disable (we ARE the audio).
47
- if #available(macOS 14.0, *) {
54
+ if useVPIO, #available(macOS 14.0, *) {
48
55
  input.voiceProcessingOtherAudioDuckingConfiguration = .init(enableAdvancedDucking: false, duckingLevel: .min)
49
56
  }
57
+ FileHandle.standardError.write("mode: \(useVPIO ? "vpio" : "no-vpio")\n".data(using: .utf8)!)
50
58
 
51
59
  // ---- playback: framed s16le 44.1k mono on stdin → player node (the VPIO reference signal) ----
52
60
  let playFmt = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: false)!
@@ -92,6 +100,14 @@ func playbackEnvNow() -> Float {
92
100
  let inFmt = input.outputFormat(forBus: 0)
93
101
  let decim = max(1, Int(inFmt.sampleRate / 16000))
94
102
  let out = FileHandle.standardOutput
103
+ FileHandle.standardError.write("input format: \(inFmt.sampleRate)Hz \(inFmt.channelCount)ch decim=\(decim)\n".data(using: .utf8)!)
104
+
105
+ // Diagnostic (stderr → surfaced only under DEBUG=VoiceIO): log from INSIDE the tap, throttled to ~once
106
+ // every 2s of audio. A GCD/runloop timer can't be used — AVAudioEngine+VPIO blocks background-queue
107
+ // timers in this process. If these lines appear → the tap fires (rms tells silence vs signal); if NONE
108
+ // appear → the CoreAudio input tap never fires (device/format dead) = no audio can reach STT.
109
+ var diagTaps = 0
110
+ var diagAcc = 0 // frames since last log (×decim ≈ input samples) → throttle by input sample rate
95
111
 
96
112
  input.installTap(onBus: 0, bufferSize: 2048, format: inFmt) { buf, _ in
97
113
  guard let ch = buf.floatChannelData else { return }
@@ -141,6 +157,11 @@ input.installTap(onBus: 0, bufferSize: 2048, format: inFmt) { buf, _ in
141
157
  let gate = gateEnabled && audible && rms < threshold // user speech must clearly dominate the expected residue
142
158
  gateLock.unlock()
143
159
  if gate { for k in 0..<pcm.count { pcm[k] = 0 } } // keep the stream's timing — send silence, not nothing
160
+ diagTaps += 1; diagAcc += n
161
+ if diagAcc >= Int(inFmt.sampleRate) * 2 { // ~every 2s of input audio
162
+ FileHandle.standardError.write("tap: \(diagTaps) firings/2s, rms=\(Int(rms)), gate=\(gate)\n".data(using: .utf8)!)
163
+ diagTaps = 0; diagAcc = 0
164
+ }
144
165
  pcm.withUnsafeBufferPointer { p in
145
166
  out.write(Data(buffer: p))
146
167
  }
@@ -152,6 +173,17 @@ do { try engine.start() } catch {
152
173
  }
153
174
  player.play()
154
175
 
176
+ // Non-VPIO mode only: the input tap won't fire unless the output graph is actively rendering. Keep the
177
+ // MAIN player warm with a continuous silent loop WHEN IDLE — when real TTS arrives the stdin handler
178
+ // stops this loop, plays the TTS, then resumes the loop on FLUSH. (A separate node does NOT wake the
179
+ // tap — empirically only the main player does.) Pure zeros → inaudible; gate envelope untouched.
180
+ let keepSilence = AVAudioPCMBuffer(pcmFormat: playFmt, frameCapacity: 4410)!; keepSilence.frameLength = 4410
181
+ var loopingSilence = false
182
+ func armIdleKeepAlive() { if !useVPIO { player.scheduleBuffer(keepSilence, at: nil, options: .loops, completionHandler: nil); loopingSilence = true } }
183
+ // Clear the idle loop so a real TTS buffer can play (a .loops buffer never yields the queue otherwise).
184
+ func stopIdleKeepAlive() { if loopingSilence { player.stop(); player.play(); loopingSilence = false } }
185
+ armIdleKeepAlive()
186
+
155
187
  // stdin reader (background): framed playback + in-band FLUSH. EOF = parent gone → clean exit.
156
188
  DispatchQueue.global(qos: .userInteractive).async {
157
189
  let stdin = FileHandle.standardInput
@@ -171,6 +203,7 @@ DispatchQueue.global(qos: .userInteractive).async {
171
203
  if len == 0 { // FLUSH: stop drops every scheduled buffer; restart for the next turn
172
204
  player.stop()
173
205
  player.play()
206
+ armIdleKeepAlive() // non-VPIO: resume the idle silence loop so the input tap stays awake between turns
174
207
  pausedAt = 0
175
208
  gateLock.lock(); pausedSince = 0; audibleUntil = CACurrentMediaTime(); schedEnv.removeAll(); gateLock.unlock() // gate tail only; envelope timeline resets with playerTime
176
209
  continue
@@ -224,6 +257,7 @@ DispatchQueue.global(qos: .userInteractive).async {
224
257
  }
225
258
  gateLock.unlock()
226
259
  }
260
+ stopIdleKeepAlive() // non-VPIO: drop the idle silence loop before the first real TTS buffer
227
261
  player.scheduleBuffer(buf, completionHandler: nil)
228
262
  // extend the audible window by this buffer's duration (+0.8s tail covers device latency)
229
263
  gateLock.lock()
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livx.cc/agentx",
3
- "version": "0.96.15",
3
+ "version": "0.96.17",
4
4
  "description": "Edge-native AI agent runtime — drives a virtual filesystem via any LLM (ai.libx.js). Same bytes run in node, browser, or edge.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",