agent.libx.js 0.92.6 → 0.92.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -745,6 +745,11 @@ interface AudioSink {
745
745
  playedMs(): number;
746
746
  /** stop playback NOW (barge-in primitive) */
747
747
  kill(): void;
748
+ /** optional exact-sample pause/resume — enables the overlap trail-off tier (web: AudioContext
749
+ * suspend/resume; CLI AEC helper: control frames). Sinks without it degrade to interrupt-only
750
+ * turn-taking. Nothing is lost across a pause; playedMs/drainMs must exclude paused time. */
751
+ pause?(): void;
752
+ resume?(): void;
748
753
  }
749
754
  /** Static key (server/CLI) or an async getter (browser: fetch a short-lived token from YOUR
750
755
  * backend). Getters are invoked on EVERY (re)connect — temp tokens expire, so a reconnect
@@ -794,6 +799,18 @@ declare class VoiceEngineOptions {
794
799
  /** heuristic (non-AEC) energy barge-in tuning */
795
800
  bargeRmsMult: number;
796
801
  bargeRmsFloor: number;
802
+ /** Overlap turn-taking (AEC tier, needs player.pause/resume) — human phone-call model:
803
+ * onset → PAUSE (exact-sample hold, nothing lost); sustained overlap → cede (interrupt; the LLM
804
+ * re-enters). Brief overlaps that die out (backchannels — "mm-hm", decided by DURATION, not
805
+ * vocabulary) resume from the precise sample and are dropped. false disables. */
806
+ overlapPause: boolean;
807
+ /** sustained overlap ≥ this → cede the turn */
808
+ overlapSustainMs: number;
809
+ /** quiet for this long while paused → resume, drop the interjection */
810
+ overlapResumeMs: number;
811
+ /** energy floor for "overlap candidate" — must sit ABOVE typical room ambient (~110 rms measured;
812
+ * ungated ambient re-arming the resume timer forever was a live wedge). User speech ≫ 300. */
813
+ overlapRms: number;
797
814
  }
798
815
  declare class VoiceEngine {
799
816
  options: VoiceEngineOptions;
@@ -817,6 +834,10 @@ declare class VoiceEngine {
817
834
  private pendingUtt;
818
835
  private pendingTimer;
819
836
  private lastInterrupted;
837
+ private pausedAt;
838
+ private overlapLoud;
839
+ private overlapLastLoudAt;
840
+ private resumeTimer;
820
841
  constructor(options?: Partial<VoiceEngineOptions>);
821
842
  start(): Promise<void>;
822
843
  get usingAec(): boolean;
@@ -852,6 +873,13 @@ declare class VoiceEngine {
852
873
  private handlePartial;
853
874
  private handleUtterance;
854
875
  private flushUtterance;
876
+ private get overlapCapable();
877
+ /** Overlap turn-taking (AEC tier): onset → pause (exact-sample hold); sustained → cede; died out
878
+ * → resume. No vocabulary anywhere — duration and persistence decide (backchannels are short
879
+ * and stop). Nothing is lost across a pause, so a false positive costs only a brief hold. */
880
+ private handleOverlap;
881
+ private armResume;
882
+ private resetOverlap;
855
883
  /** energy two-stage barge-in (heuristic tier only): spike over echo baseline → pause + confirm via STT */
856
884
  private handleLevel;
857
885
  }
package/dist/index.js CHANGED
@@ -3652,12 +3652,14 @@ ${recent}` : brief;
3652
3652
  report.output(chunk);
3653
3653
  }
3654
3654
  } : base;
3655
+ const workerHost = o.host?.ask ? { ask: (q) => o.host.ask(q) } : void 0;
3655
3656
  const worker = new Agent({
3656
3657
  ai: o.ai,
3657
3658
  fs: o.fs,
3658
3659
  model: o.workerModel,
3659
3660
  ...o.workerOptions,
3660
3661
  // may override ai/fs/model/tools/… —
3662
+ ...workerHost ? { host: workerHost } : {},
3661
3663
  ...hooks ? { hooks } : {},
3662
3664
  signal: controller.signal
3663
3665
  // …but never the per-task cancellation signal
@@ -4000,6 +4002,18 @@ var VoiceEngineOptions = class {
4000
4002
  /** heuristic (non-AEC) energy barge-in tuning */
4001
4003
  bargeRmsMult = 2;
4002
4004
  bargeRmsFloor = 500;
4005
+ /** Overlap turn-taking (AEC tier, needs player.pause/resume) — human phone-call model:
4006
+ * onset → PAUSE (exact-sample hold, nothing lost); sustained overlap → cede (interrupt; the LLM
4007
+ * re-enters). Brief overlaps that die out (backchannels — "mm-hm", decided by DURATION, not
4008
+ * vocabulary) resume from the precise sample and are dropped. false disables. */
4009
+ overlapPause = true;
4010
+ /** sustained overlap ≥ this → cede the turn */
4011
+ overlapSustainMs = 350;
4012
+ /** quiet for this long while paused → resume, drop the interjection */
4013
+ overlapResumeMs = 700;
4014
+ /** energy floor for "overlap candidate" — must sit ABOVE typical room ambient (~110 rms measured;
4015
+ * ungated ambient re-arming the resume timer forever was a live wedge). User speech ≫ 300. */
4016
+ overlapRms = 300;
4003
4017
  };
4004
4018
  var VoiceEngine = class {
4005
4019
  options;
@@ -4030,6 +4044,13 @@ var VoiceEngine = class {
4030
4044
  // endpointed text held for the merge window
4031
4045
  pendingTimer = null;
4032
4046
  lastInterrupted = null;
4047
+ // overlap (pause) tier state — AEC + pause-capable sinks only
4048
+ pausedAt = 0;
4049
+ overlapLoud = 0;
4050
+ // loud chunks since pause (sustain must be real sound, not two clicks)
4051
+ overlapLastLoudAt = 0;
4052
+ // continuity guard: a gap re-arms the onset (sparse noise ≠ sustained speech)
4053
+ resumeTimer = null;
4033
4054
  constructor(options) {
4034
4055
  this.options = { ...new VoiceEngineOptions(), ...options };
4035
4056
  const o = this.options;
@@ -4077,6 +4098,7 @@ var VoiceEngine = class {
4077
4098
  this.drainTimer = null;
4078
4099
  }
4079
4100
  this.interrupted = false;
4101
+ this.resetOverlap(true);
4080
4102
  if (!this.speaking) this.player.markTurn();
4081
4103
  this.speaking = true;
4082
4104
  this.ctxOpen = true;
@@ -4111,6 +4133,10 @@ var VoiceEngine = class {
4111
4133
  this.drainTimer = null;
4112
4134
  return;
4113
4135
  }
4136
+ if (this.pausedAt) {
4137
+ this.drainTimer = setTimeout(settle, 250);
4138
+ return;
4139
+ }
4114
4140
  this.drainTimer = null;
4115
4141
  this.speaking = false;
4116
4142
  this.echoUntil = now() + 2500;
@@ -4142,6 +4168,7 @@ var VoiceEngine = class {
4142
4168
  clearTimeout(this.drainTimer);
4143
4169
  this.drainTimer = null;
4144
4170
  }
4171
+ this.resetOverlap(false);
4145
4172
  const heardChars = Math.round(Math.max(0, this.player.playedMs()) / 1e3 * 15);
4146
4173
  if (this.reply) this.lastInterrupted = { full: this.reply, heard: this.reply.slice(0, heardChars) };
4147
4174
  this.speaking = false;
@@ -4156,6 +4183,7 @@ var VoiceEngine = class {
4156
4183
  this.setState("listening");
4157
4184
  }
4158
4185
  stop() {
4186
+ if (this.resumeTimer) clearTimeout(this.resumeTimer);
4159
4187
  if (this.pendingTimer) clearTimeout(this.pendingTimer);
4160
4188
  if (this.drainTimer) clearTimeout(this.drainTimer);
4161
4189
  this.stt.stop();
@@ -4181,12 +4209,11 @@ var VoiceEngine = class {
4181
4209
  genuine(text) {
4182
4210
  const total = this.words(text).length;
4183
4211
  const novel = this.novelWords(text).length;
4184
- if (!novel) return false;
4185
- return novel >= 2 || novel / Math.max(1, total) > 0.5;
4212
+ return novel > 0 && novel / Math.max(1, total) > 0.5;
4186
4213
  }
4187
4214
  handlePartial(text) {
4188
4215
  if (this.speaking) {
4189
- const barge = this.usingAec ? this.genuine(text) : this.novelWords(text).length >= (this.suspectUntil ? 1 : 2);
4216
+ const barge = this.overlapCapable ? false : this.usingAec ? this.genuine(text) : this.novelWords(text).length >= (this.suspectUntil ? 1 : 2);
4190
4217
  if (barge) {
4191
4218
  const phase = this.ctxOpen ? "speaking" : "drain";
4192
4219
  this.interrupt();
@@ -4203,6 +4230,10 @@ var VoiceEngine = class {
4203
4230
  if (!this.echoActive() || (this.usingAec ? this.genuine(text) : this.novelWords(text).length >= 1)) this.options.onPartial(text);
4204
4231
  }
4205
4232
  handleUtterance(text) {
4233
+ if (this.speaking && this.ctxOpen && this.overlapCapable) {
4234
+ this.stt.reset();
4235
+ return;
4236
+ }
4206
4237
  if (this.echoActive() && (this.usingAec ? !this.genuine(text) : this.novelWords(text).length < 2)) {
4207
4238
  this.stt.reset();
4208
4239
  return;
@@ -4226,9 +4257,62 @@ var VoiceEngine = class {
4226
4257
  this.pendingUtt = "";
4227
4258
  if (text) this.options.onUtterance(text);
4228
4259
  }
4260
+ get overlapCapable() {
4261
+ return this.usingAec && this.options.overlapPause && !!this.player.pause && !!this.player.resume;
4262
+ }
4263
+ /** Overlap turn-taking (AEC tier): onset → pause (exact-sample hold); sustained → cede; died out
4264
+ * → resume. No vocabulary anywhere — duration and persistence decide (backchannels are short
4265
+ * and stop). Nothing is lost across a pause, so a false positive costs only a brief hold. */
4266
+ handleOverlap(rms) {
4267
+ const o = this.options;
4268
+ if (!this.speaking || !this.overlapCapable) return;
4269
+ if (rms < o.overlapRms) return;
4270
+ const t = now();
4271
+ if (!this.pausedAt) {
4272
+ this.pausedAt = t;
4273
+ this.overlapLoud = 1;
4274
+ this.overlapLastLoudAt = t;
4275
+ this.player.pause();
4276
+ this.armResume();
4277
+ return;
4278
+ }
4279
+ if (t - this.overlapLastLoudAt > 300) {
4280
+ this.pausedAt = t;
4281
+ this.overlapLoud = 1;
4282
+ this.overlapLastLoudAt = t;
4283
+ this.armResume();
4284
+ return;
4285
+ }
4286
+ this.overlapLastLoudAt = t;
4287
+ this.overlapLoud++;
4288
+ if (t - this.pausedAt >= o.overlapSustainMs && this.overlapLoud >= 4) {
4289
+ const phase = this.ctxOpen ? "speaking" : "drain";
4290
+ this.interrupt();
4291
+ this.options.onBargeIn(phase);
4292
+ return;
4293
+ }
4294
+ this.armResume();
4295
+ }
4296
+ armResume() {
4297
+ if (this.resumeTimer) clearTimeout(this.resumeTimer);
4298
+ this.resumeTimer = setTimeout(() => {
4299
+ this.resumeTimer = null;
4300
+ if (!this.pausedAt) return;
4301
+ this.resetOverlap(true);
4302
+ }, this.options.overlapResumeMs);
4303
+ }
4304
+ resetOverlap(resume) {
4305
+ if (this.resumeTimer) {
4306
+ clearTimeout(this.resumeTimer);
4307
+ this.resumeTimer = null;
4308
+ }
4309
+ if (this.pausedAt && resume) this.player.resume?.();
4310
+ this.pausedAt = 0;
4311
+ this.overlapLoud = 0;
4312
+ }
4229
4313
  /** energy two-stage barge-in (heuristic tier only): spike over echo baseline → pause + confirm via STT */
4230
4314
  handleLevel(rms) {
4231
- if (this.usingAec) return;
4315
+ if (this.usingAec) return this.handleOverlap(rms);
4232
4316
  if (!this.speaking) {
4233
4317
  this.baseline = 0;
4234
4318
  this.hot = 0;