agent.libx.js 0.93.2 → 0.93.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -876,6 +876,7 @@ declare class VoiceEngine {
876
876
  private pausedAt;
877
877
  private lastOverlapPartial;
878
878
  private resumeTimer;
879
+ private turnStartAt;
879
880
  constructor(options?: Partial<VoiceEngineOptions>);
880
881
  start(): Promise<void>;
881
882
  get usingAec(): boolean;
@@ -942,6 +943,7 @@ declare class SonioxSTT {
942
943
  private lastChangeAt;
943
944
  private lastCombined;
944
945
  private endpointTimer;
946
+ private firstTokenAt;
945
947
  constructor(options?: Partial<SonioxSTTOptions>);
946
948
  get usingAec(): boolean;
947
949
  private connectWs;
package/dist/index.js CHANGED
@@ -3601,7 +3601,7 @@ var DuplexAgentOptions = class {
3601
3601
  ai;
3602
3602
  /** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3603
3603
  fs;
3604
- voiceModel = "anthropic/claude-haiku-4-5";
3604
+ voiceModel = "groq/openai/gpt-oss-20b";
3605
3605
  workerModel = "anthropic/claude-sonnet-4-6";
3606
3606
  /** Escape hatches merged over the derived per-agent options. */
3607
3607
  voiceOptions;
@@ -4198,6 +4198,8 @@ var VoiceEngine = class {
4198
4198
  lastOverlapPartial = "";
4199
4199
  // change-detection: only NEW partial text counts as activity
4200
4200
  resumeTimer = null;
4201
+ turnStartAt = 0;
4202
+ // timestamp when the current turn began (for TTFT logging)
4201
4203
  constructor(options) {
4202
4204
  this.options = { ...new VoiceEngineOptions(), ...options };
4203
4205
  const o = this.options;
@@ -4258,6 +4260,7 @@ var VoiceEngine = class {
4258
4260
  this.spokeDeltas = true;
4259
4261
  this.ackAt = now();
4260
4262
  }
4263
+ this.turnStartAt = now();
4261
4264
  this.setState("thinking");
4262
4265
  }
4263
4266
  speakDelta(text) {
@@ -4266,6 +4269,7 @@ var VoiceEngine = class {
4266
4269
  this.reply += text;
4267
4270
  for (const w of this.words(this.reply)) this.echoWords.add(w);
4268
4271
  this.tts.speak(text, true);
4272
+ if (!this.spokeDeltas && this.turnStartAt) log8.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
4269
4273
  this.spokeDeltas = true;
4270
4274
  this.setState("speaking");
4271
4275
  }
@@ -4286,6 +4290,7 @@ var VoiceEngine = class {
4286
4290
  }
4287
4291
  this.drainTimer = null;
4288
4292
  this.speaking = false;
4293
+ if (this.turnStartAt) log8.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
4289
4294
  this.echoUntil = now() + 2500;
4290
4295
  if (!this.usingAec) this.stt.reset();
4291
4296
  this.setState("listening");
@@ -4322,7 +4327,7 @@ var VoiceEngine = class {
4322
4327
  this.ctxOpen = false;
4323
4328
  this.interrupted = true;
4324
4329
  this.suspectUntil = 0;
4325
- this.echoUntil = now() + 2500;
4330
+ this.echoUntil = now() + Math.max(2500, this.player.drainMs() + 3e3);
4326
4331
  this.tts.cancel();
4327
4332
  this.player.kill();
4328
4333
  if (!this.usingAec) this.stt.reset();
@@ -4517,6 +4522,8 @@ var SonioxSTT = class {
4517
4522
  lastChangeAt = 0;
4518
4523
  lastCombined = "";
4519
4524
  endpointTimer = null;
4525
+ firstTokenAt = 0;
4526
+ // first speech token in current utterance
4520
4527
  constructor(options) {
4521
4528
  this.options = { ...new SonioxSTTOptions(), ...options };
4522
4529
  }
@@ -4556,6 +4563,7 @@ var SonioxSTT = class {
4556
4563
  this.endpointTimer = setInterval(() => {
4557
4564
  const combined = (this.finalText + this.partialText).trim();
4558
4565
  if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
4566
+ if (this.firstTokenAt) log9.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
4559
4567
  this.reset();
4560
4568
  this.onUtterance(combined, now2());
4561
4569
  }, 120);
@@ -4583,10 +4591,12 @@ var SonioxSTT = class {
4583
4591
  if (combined !== this.lastCombined) {
4584
4592
  this.lastCombined = combined;
4585
4593
  this.lastChangeAt = now2();
4594
+ if (!this.firstTokenAt && combined.trim()) this.firstTokenAt = now2();
4586
4595
  }
4587
4596
  this.onPartial(combined);
4588
4597
  if (endpoint && this.finalText.trim()) {
4589
4598
  const utterance = this.finalText.trim();
4599
+ if (this.firstTokenAt) log9.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
4590
4600
  this.reset();
4591
4601
  this.onUtterance(utterance, now2());
4592
4602
  }
@@ -4595,6 +4605,7 @@ var SonioxSTT = class {
4595
4605
  this.finalText = "";
4596
4606
  this.partialText = "";
4597
4607
  this.lastCombined = "";
4608
+ this.firstTokenAt = 0;
4598
4609
  }
4599
4610
  stop() {
4600
4611
  this.stopped = true;