@livx.cc/agentx 0.96.15 → 0.96.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{Agent-DRe91tAy.d.ts → Agent-DdhD1pGw.d.ts} +15 -0
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +109 -23
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +13 -2
- package/dist/index.js +52 -2
- package/dist/index.js.map +1 -1
- package/dist/native/mic-aec.swift +38 -4
- package/package.json +1 -1
|
@@ -34,9 +34,16 @@ FileHandle.standardError.write("mic access granted: \(granted)\n".data(using: .u
|
|
|
34
34
|
|
|
35
35
|
let engine = AVAudioEngine()
|
|
36
36
|
let input = engine.inputNode
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
// VPIO gives hardware echo cancellation, but on some Macs (observed: macOS 26.5.x + a built-in mic that
|
|
38
|
+
// VPIO exposes as a multichannel format) the VP input tap NEVER delivers buffers — capture is dead. The
|
|
39
|
+
// host detects that (no audio) and respawns us with MIC_NO_VPIO=1: plain capture, no AEC (heuristic-echo
|
|
40
|
+
// tier, gated below). Skipping VPIO trades echo cancellation for a mic that actually works.
|
|
41
|
+
let useVPIO = ProcessInfo.processInfo.environment["MIC_NO_VPIO"] != "1"
|
|
42
|
+
if useVPIO {
|
|
43
|
+
do { try input.setVoiceProcessingEnabled(true) } catch {
|
|
44
|
+
FileHandle.standardError.write("AEC unavailable: \(error)\n".data(using: .utf8)!)
|
|
45
|
+
exit(1)
|
|
46
|
+
}
|
|
40
47
|
}
|
|
41
48
|
// the VP I/O unit is full-duplex: input stays muted unless the output side renders too. The mixer→
|
|
42
49
|
// output format must match the VP output's hardware format (nil/default fails kAUInitialize -10875).
|
|
@@ -44,9 +51,10 @@ let outHw = engine.outputNode.outputFormat(forBus: 0)
|
|
|
44
51
|
engine.connect(engine.mainMixerNode, to: engine.outputNode, format: outHw)
|
|
45
52
|
engine.mainMixerNode.outputVolume = 1 // playback is audible now (was 0 when this path was only a keep-alive)
|
|
46
53
|
// VP ducks all other system audio by default — disable (we ARE the audio).
|
|
47
|
-
if #available(macOS 14.0, *) {
|
|
54
|
+
if useVPIO, #available(macOS 14.0, *) {
|
|
48
55
|
input.voiceProcessingOtherAudioDuckingConfiguration = .init(enableAdvancedDucking: false, duckingLevel: .min)
|
|
49
56
|
}
|
|
57
|
+
FileHandle.standardError.write("mode: \(useVPIO ? "vpio" : "no-vpio")\n".data(using: .utf8)!)
|
|
50
58
|
|
|
51
59
|
// ---- playback: framed s16le 44.1k mono on stdin → player node (the VPIO reference signal) ----
|
|
52
60
|
let playFmt = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: 44100, channels: 1, interleaved: false)!
|
|
@@ -92,6 +100,14 @@ func playbackEnvNow() -> Float {
|
|
|
92
100
|
let inFmt = input.outputFormat(forBus: 0)
|
|
93
101
|
let decim = max(1, Int(inFmt.sampleRate / 16000))
|
|
94
102
|
let out = FileHandle.standardOutput
|
|
103
|
+
FileHandle.standardError.write("input format: \(inFmt.sampleRate)Hz \(inFmt.channelCount)ch decim=\(decim)\n".data(using: .utf8)!)
|
|
104
|
+
|
|
105
|
+
// Diagnostic (stderr → surfaced only under DEBUG=VoiceIO): log from INSIDE the tap, throttled to ~once
|
|
106
|
+
// every 2s of audio. A GCD/runloop timer can't be used — AVAudioEngine+VPIO blocks background-queue
|
|
107
|
+
// timers in this process. If these lines appear → the tap fires (rms tells silence vs signal); if NONE
|
|
108
|
+
// appear → the CoreAudio input tap never fires (device/format dead) = no audio can reach STT.
|
|
109
|
+
var diagTaps = 0
|
|
110
|
+
var diagAcc = 0 // frames since last log (×decim ≈ input samples) → throttle by input sample rate
|
|
95
111
|
|
|
96
112
|
input.installTap(onBus: 0, bufferSize: 2048, format: inFmt) { buf, _ in
|
|
97
113
|
guard let ch = buf.floatChannelData else { return }
|
|
@@ -141,6 +157,11 @@ input.installTap(onBus: 0, bufferSize: 2048, format: inFmt) { buf, _ in
|
|
|
141
157
|
let gate = gateEnabled && audible && rms < threshold // user speech must clearly dominate the expected residue
|
|
142
158
|
gateLock.unlock()
|
|
143
159
|
if gate { for k in 0..<pcm.count { pcm[k] = 0 } } // keep the stream's timing — send silence, not nothing
|
|
160
|
+
diagTaps += 1; diagAcc += n
|
|
161
|
+
if diagAcc >= Int(inFmt.sampleRate) * 2 { // ~every 2s of input audio
|
|
162
|
+
FileHandle.standardError.write("tap: \(diagTaps) firings/2s, rms=\(Int(rms)), gate=\(gate)\n".data(using: .utf8)!)
|
|
163
|
+
diagTaps = 0; diagAcc = 0
|
|
164
|
+
}
|
|
144
165
|
pcm.withUnsafeBufferPointer { p in
|
|
145
166
|
out.write(Data(buffer: p))
|
|
146
167
|
}
|
|
@@ -152,6 +173,17 @@ do { try engine.start() } catch {
|
|
|
152
173
|
}
|
|
153
174
|
player.play()
|
|
154
175
|
|
|
176
|
+
// Non-VPIO mode only: the input tap won't fire unless the output graph is actively rendering. Keep the
|
|
177
|
+
// MAIN player warm with a continuous silent loop WHEN IDLE — when real TTS arrives the stdin handler
|
|
178
|
+
// stops this loop, plays the TTS, then resumes the loop on FLUSH. (A separate node does NOT wake the
|
|
179
|
+
// tap — empirically only the main player does.) Pure zeros → inaudible; gate envelope untouched.
|
|
180
|
+
let keepSilence = AVAudioPCMBuffer(pcmFormat: playFmt, frameCapacity: 4410)!; keepSilence.frameLength = 4410
|
|
181
|
+
var loopingSilence = false
|
|
182
|
+
func armIdleKeepAlive() { if !useVPIO { player.scheduleBuffer(keepSilence, at: nil, options: .loops, completionHandler: nil); loopingSilence = true } }
|
|
183
|
+
// Clear the idle loop so a real TTS buffer can play (a .loops buffer never yields the queue otherwise).
|
|
184
|
+
func stopIdleKeepAlive() { if loopingSilence { player.stop(); player.play(); loopingSilence = false } }
|
|
185
|
+
armIdleKeepAlive()
|
|
186
|
+
|
|
155
187
|
// stdin reader (background): framed playback + in-band FLUSH. EOF = parent gone → clean exit.
|
|
156
188
|
DispatchQueue.global(qos: .userInteractive).async {
|
|
157
189
|
let stdin = FileHandle.standardInput
|
|
@@ -171,6 +203,7 @@ DispatchQueue.global(qos: .userInteractive).async {
|
|
|
171
203
|
if len == 0 { // FLUSH: stop drops every scheduled buffer; restart for the next turn
|
|
172
204
|
player.stop()
|
|
173
205
|
player.play()
|
|
206
|
+
armIdleKeepAlive() // non-VPIO: resume the idle silence loop so the input tap stays awake between turns
|
|
174
207
|
pausedAt = 0
|
|
175
208
|
gateLock.lock(); pausedSince = 0; audibleUntil = CACurrentMediaTime(); schedEnv.removeAll(); gateLock.unlock() // gate tail only; envelope timeline resets with playerTime
|
|
176
209
|
continue
|
|
@@ -224,6 +257,7 @@ DispatchQueue.global(qos: .userInteractive).async {
|
|
|
224
257
|
}
|
|
225
258
|
gateLock.unlock()
|
|
226
259
|
}
|
|
260
|
+
stopIdleKeepAlive() // non-VPIO: drop the idle silence loop before the first real TTS buffer
|
|
227
261
|
player.scheduleBuffer(buf, completionHandler: nil)
|
|
228
262
|
// extend the audible window by this buffer's duration (+0.8s tail covers device latency)
|
|
229
263
|
gateLock.lock()
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livx.cc/agentx",
|
|
3
|
-
"version": "0.96.
|
|
3
|
+
"version": "0.96.17",
|
|
4
4
|
"description": "Edge-native AI agent runtime — drives a virtual filesystem via any LLM (ai.libx.js). Same bytes run in node, browser, or edge.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|