agent.libx.js 0.92.6 → 0.92.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +188 -6
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +28 -0
- package/dist/index.js +88 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -745,6 +745,11 @@ interface AudioSink {
|
|
|
745
745
|
playedMs(): number;
|
|
746
746
|
/** stop playback NOW (barge-in primitive) */
|
|
747
747
|
kill(): void;
|
|
748
|
+
/** optional exact-sample pause/resume — enables the overlap trail-off tier (web: AudioContext
|
|
749
|
+
* suspend/resume; CLI AEC helper: control frames). Sinks without it degrade to interrupt-only
|
|
750
|
+
* turn-taking. Nothing is lost across a pause; playedMs/drainMs must exclude paused time. */
|
|
751
|
+
pause?(): void;
|
|
752
|
+
resume?(): void;
|
|
748
753
|
}
|
|
749
754
|
/** Static key (server/CLI) or an async getter (browser: fetch a short-lived token from YOUR
|
|
750
755
|
* backend). Getters are invoked on EVERY (re)connect — temp tokens expire, so a reconnect
|
|
@@ -794,6 +799,18 @@ declare class VoiceEngineOptions {
|
|
|
794
799
|
/** heuristic (non-AEC) energy barge-in tuning */
|
|
795
800
|
bargeRmsMult: number;
|
|
796
801
|
bargeRmsFloor: number;
|
|
802
|
+
/** Overlap turn-taking (AEC tier, needs player.pause/resume) — human phone-call model:
|
|
803
|
+
* onset → PAUSE (exact-sample hold, nothing lost); sustained overlap → cede (interrupt; the LLM
|
|
804
|
+
* re-enters). Brief overlaps that die out (backchannels — "mm-hm", decided by DURATION, not
|
|
805
|
+
* vocabulary) resume from the precise sample and are dropped. false disables. */
|
|
806
|
+
overlapPause: boolean;
|
|
807
|
+
/** sustained overlap ≥ this → cede the turn */
|
|
808
|
+
overlapSustainMs: number;
|
|
809
|
+
/** quiet for this long while paused → resume, drop the interjection */
|
|
810
|
+
overlapResumeMs: number;
|
|
811
|
+
/** energy floor for "overlap candidate" — must sit ABOVE typical room ambient (~110 rms measured;
|
|
812
|
+
* ungated ambient re-arming the resume timer forever was a live wedge). User speech ≫ 300. */
|
|
813
|
+
overlapRms: number;
|
|
797
814
|
}
|
|
798
815
|
declare class VoiceEngine {
|
|
799
816
|
options: VoiceEngineOptions;
|
|
@@ -817,6 +834,10 @@ declare class VoiceEngine {
|
|
|
817
834
|
private pendingUtt;
|
|
818
835
|
private pendingTimer;
|
|
819
836
|
private lastInterrupted;
|
|
837
|
+
private pausedAt;
|
|
838
|
+
private overlapLoud;
|
|
839
|
+
private overlapLastLoudAt;
|
|
840
|
+
private resumeTimer;
|
|
820
841
|
constructor(options?: Partial<VoiceEngineOptions>);
|
|
821
842
|
start(): Promise<void>;
|
|
822
843
|
get usingAec(): boolean;
|
|
@@ -852,6 +873,13 @@ declare class VoiceEngine {
|
|
|
852
873
|
private handlePartial;
|
|
853
874
|
private handleUtterance;
|
|
854
875
|
private flushUtterance;
|
|
876
|
+
private get overlapCapable();
|
|
877
|
+
/** Overlap turn-taking (AEC tier): onset → pause (exact-sample hold); sustained → cede; died out
|
|
878
|
+
* → resume. No vocabulary anywhere — duration and persistence decide (backchannels are short
|
|
879
|
+
* and stop). Nothing is lost across a pause, so a false positive costs only a brief hold. */
|
|
880
|
+
private handleOverlap;
|
|
881
|
+
private armResume;
|
|
882
|
+
private resetOverlap;
|
|
855
883
|
/** energy two-stage barge-in (heuristic tier only): spike over echo baseline → pause + confirm via STT */
|
|
856
884
|
private handleLevel;
|
|
857
885
|
}
|
package/dist/index.js
CHANGED
|
@@ -3652,12 +3652,14 @@ ${recent}` : brief;
|
|
|
3652
3652
|
report.output(chunk);
|
|
3653
3653
|
}
|
|
3654
3654
|
} : base;
|
|
3655
|
+
const workerHost = o.host?.ask ? { ask: (q) => o.host.ask(q) } : void 0;
|
|
3655
3656
|
const worker = new Agent({
|
|
3656
3657
|
ai: o.ai,
|
|
3657
3658
|
fs: o.fs,
|
|
3658
3659
|
model: o.workerModel,
|
|
3659
3660
|
...o.workerOptions,
|
|
3660
3661
|
// may override ai/fs/model/tools/… —
|
|
3662
|
+
...workerHost ? { host: workerHost } : {},
|
|
3661
3663
|
...hooks ? { hooks } : {},
|
|
3662
3664
|
signal: controller.signal
|
|
3663
3665
|
// …but never the per-task cancellation signal
|
|
@@ -4000,6 +4002,18 @@ var VoiceEngineOptions = class {
|
|
|
4000
4002
|
/** heuristic (non-AEC) energy barge-in tuning */
|
|
4001
4003
|
bargeRmsMult = 2;
|
|
4002
4004
|
bargeRmsFloor = 500;
|
|
4005
|
+
/** Overlap turn-taking (AEC tier, needs player.pause/resume) — human phone-call model:
|
|
4006
|
+
* onset → PAUSE (exact-sample hold, nothing lost); sustained overlap → cede (interrupt; the LLM
|
|
4007
|
+
* re-enters). Brief overlaps that die out (backchannels — "mm-hm", decided by DURATION, not
|
|
4008
|
+
* vocabulary) resume from the precise sample and are dropped. false disables. */
|
|
4009
|
+
overlapPause = true;
|
|
4010
|
+
/** sustained overlap ≥ this → cede the turn */
|
|
4011
|
+
overlapSustainMs = 350;
|
|
4012
|
+
/** quiet for this long while paused → resume, drop the interjection */
|
|
4013
|
+
overlapResumeMs = 700;
|
|
4014
|
+
/** energy floor for "overlap candidate" — must sit ABOVE typical room ambient (~110 rms measured;
|
|
4015
|
+
* ungated ambient re-arming the resume timer forever was a live wedge). User speech ≫ 300. */
|
|
4016
|
+
overlapRms = 300;
|
|
4003
4017
|
};
|
|
4004
4018
|
var VoiceEngine = class {
|
|
4005
4019
|
options;
|
|
@@ -4030,6 +4044,13 @@ var VoiceEngine = class {
|
|
|
4030
4044
|
// endpointed text held for the merge window
|
|
4031
4045
|
pendingTimer = null;
|
|
4032
4046
|
lastInterrupted = null;
|
|
4047
|
+
// overlap (pause) tier state — AEC + pause-capable sinks only
|
|
4048
|
+
pausedAt = 0;
|
|
4049
|
+
overlapLoud = 0;
|
|
4050
|
+
// loud chunks since pause (sustain must be real sound, not two clicks)
|
|
4051
|
+
overlapLastLoudAt = 0;
|
|
4052
|
+
// continuity guard: a gap re-arms the onset (sparse noise ≠ sustained speech)
|
|
4053
|
+
resumeTimer = null;
|
|
4033
4054
|
constructor(options) {
|
|
4034
4055
|
this.options = { ...new VoiceEngineOptions(), ...options };
|
|
4035
4056
|
const o = this.options;
|
|
@@ -4077,6 +4098,7 @@ var VoiceEngine = class {
|
|
|
4077
4098
|
this.drainTimer = null;
|
|
4078
4099
|
}
|
|
4079
4100
|
this.interrupted = false;
|
|
4101
|
+
this.resetOverlap(true);
|
|
4080
4102
|
if (!this.speaking) this.player.markTurn();
|
|
4081
4103
|
this.speaking = true;
|
|
4082
4104
|
this.ctxOpen = true;
|
|
@@ -4111,6 +4133,10 @@ var VoiceEngine = class {
|
|
|
4111
4133
|
this.drainTimer = null;
|
|
4112
4134
|
return;
|
|
4113
4135
|
}
|
|
4136
|
+
if (this.pausedAt) {
|
|
4137
|
+
this.drainTimer = setTimeout(settle, 250);
|
|
4138
|
+
return;
|
|
4139
|
+
}
|
|
4114
4140
|
this.drainTimer = null;
|
|
4115
4141
|
this.speaking = false;
|
|
4116
4142
|
this.echoUntil = now() + 2500;
|
|
@@ -4142,6 +4168,7 @@ var VoiceEngine = class {
|
|
|
4142
4168
|
clearTimeout(this.drainTimer);
|
|
4143
4169
|
this.drainTimer = null;
|
|
4144
4170
|
}
|
|
4171
|
+
this.resetOverlap(false);
|
|
4145
4172
|
const heardChars = Math.round(Math.max(0, this.player.playedMs()) / 1e3 * 15);
|
|
4146
4173
|
if (this.reply) this.lastInterrupted = { full: this.reply, heard: this.reply.slice(0, heardChars) };
|
|
4147
4174
|
this.speaking = false;
|
|
@@ -4156,6 +4183,7 @@ var VoiceEngine = class {
|
|
|
4156
4183
|
this.setState("listening");
|
|
4157
4184
|
}
|
|
4158
4185
|
stop() {
|
|
4186
|
+
if (this.resumeTimer) clearTimeout(this.resumeTimer);
|
|
4159
4187
|
if (this.pendingTimer) clearTimeout(this.pendingTimer);
|
|
4160
4188
|
if (this.drainTimer) clearTimeout(this.drainTimer);
|
|
4161
4189
|
this.stt.stop();
|
|
@@ -4181,12 +4209,11 @@ var VoiceEngine = class {
|
|
|
4181
4209
|
genuine(text) {
|
|
4182
4210
|
const total = this.words(text).length;
|
|
4183
4211
|
const novel = this.novelWords(text).length;
|
|
4184
|
-
|
|
4185
|
-
return novel >= 2 || novel / Math.max(1, total) > 0.5;
|
|
4212
|
+
return novel > 0 && novel / Math.max(1, total) > 0.5;
|
|
4186
4213
|
}
|
|
4187
4214
|
handlePartial(text) {
|
|
4188
4215
|
if (this.speaking) {
|
|
4189
|
-
const barge = this.usingAec ? this.genuine(text) : this.novelWords(text).length >= (this.suspectUntil ? 1 : 2);
|
|
4216
|
+
const barge = this.overlapCapable ? false : this.usingAec ? this.genuine(text) : this.novelWords(text).length >= (this.suspectUntil ? 1 : 2);
|
|
4190
4217
|
if (barge) {
|
|
4191
4218
|
const phase = this.ctxOpen ? "speaking" : "drain";
|
|
4192
4219
|
this.interrupt();
|
|
@@ -4203,6 +4230,10 @@ var VoiceEngine = class {
|
|
|
4203
4230
|
if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
|
|
4204
4231
|
}
|
|
4205
4232
|
handleUtterance(text) {
|
|
4233
|
+
if (this.speaking && this.ctxOpen && this.overlapCapable) {
|
|
4234
|
+
this.stt.reset();
|
|
4235
|
+
return;
|
|
4236
|
+
}
|
|
4206
4237
|
if (this.echoActive() && (this.usingAec ? !this.genuine(text) : this.novelWords(text).length < 2)) {
|
|
4207
4238
|
this.stt.reset();
|
|
4208
4239
|
return;
|
|
@@ -4226,9 +4257,62 @@ var VoiceEngine = class {
|
|
|
4226
4257
|
this.pendingUtt = "";
|
|
4227
4258
|
if (text) this.options.onUtterance(text);
|
|
4228
4259
|
}
|
|
4260
|
+
get overlapCapable() {
|
|
4261
|
+
return this.usingAec && this.options.overlapPause && !!this.player.pause && !!this.player.resume;
|
|
4262
|
+
}
|
|
4263
|
+
/** Overlap turn-taking (AEC tier): onset → pause (exact-sample hold); sustained → cede; died out
|
|
4264
|
+
* → resume. No vocabulary anywhere — duration and persistence decide (backchannels are short
|
|
4265
|
+
* and stop). Nothing is lost across a pause, so a false positive costs only a brief hold. */
|
|
4266
|
+
handleOverlap(rms) {
|
|
4267
|
+
const o = this.options;
|
|
4268
|
+
if (!this.speaking || !this.overlapCapable) return;
|
|
4269
|
+
if (rms < o.overlapRms) return;
|
|
4270
|
+
const t = now();
|
|
4271
|
+
if (!this.pausedAt) {
|
|
4272
|
+
this.pausedAt = t;
|
|
4273
|
+
this.overlapLoud = 1;
|
|
4274
|
+
this.overlapLastLoudAt = t;
|
|
4275
|
+
this.player.pause();
|
|
4276
|
+
this.armResume();
|
|
4277
|
+
return;
|
|
4278
|
+
}
|
|
4279
|
+
if (t - this.overlapLastLoudAt > 300) {
|
|
4280
|
+
this.pausedAt = t;
|
|
4281
|
+
this.overlapLoud = 1;
|
|
4282
|
+
this.overlapLastLoudAt = t;
|
|
4283
|
+
this.armResume();
|
|
4284
|
+
return;
|
|
4285
|
+
}
|
|
4286
|
+
this.overlapLastLoudAt = t;
|
|
4287
|
+
this.overlapLoud++;
|
|
4288
|
+
if (t - this.pausedAt >= o.overlapSustainMs && this.overlapLoud >= 4) {
|
|
4289
|
+
const phase = this.ctxOpen ? "speaking" : "drain";
|
|
4290
|
+
this.interrupt();
|
|
4291
|
+
this.options.onBargeIn(phase);
|
|
4292
|
+
return;
|
|
4293
|
+
}
|
|
4294
|
+
this.armResume();
|
|
4295
|
+
}
|
|
4296
|
+
armResume() {
|
|
4297
|
+
if (this.resumeTimer) clearTimeout(this.resumeTimer);
|
|
4298
|
+
this.resumeTimer = setTimeout(() => {
|
|
4299
|
+
this.resumeTimer = null;
|
|
4300
|
+
if (!this.pausedAt) return;
|
|
4301
|
+
this.resetOverlap(true);
|
|
4302
|
+
}, this.options.overlapResumeMs);
|
|
4303
|
+
}
|
|
4304
|
+
resetOverlap(resume) {
|
|
4305
|
+
if (this.resumeTimer) {
|
|
4306
|
+
clearTimeout(this.resumeTimer);
|
|
4307
|
+
this.resumeTimer = null;
|
|
4308
|
+
}
|
|
4309
|
+
if (this.pausedAt && resume) this.player.resume?.();
|
|
4310
|
+
this.pausedAt = 0;
|
|
4311
|
+
this.overlapLoud = 0;
|
|
4312
|
+
}
|
|
4229
4313
|
/** energy two-stage barge-in (heuristic tier only): spike over echo baseline → pause + confirm via STT */
|
|
4230
4314
|
handleLevel(rms) {
|
|
4231
|
-
if (this.usingAec) return;
|
|
4315
|
+
if (this.usingAec) return this.handleOverlap(rms);
|
|
4232
4316
|
if (!this.speaking) {
|
|
4233
4317
|
this.baseline = 0;
|
|
4234
4318
|
this.hot = 0;
|