agent.libx.js 0.93.2 → 0.93.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/cli.ts CHANGED
@@ -208,7 +208,7 @@ Flags:
208
208
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
209
209
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
210
210
  spoken replies out (echo-cancelled; speak over it to interrupt)
211
- --voice-model <id> with --duplex: the fast voice model (default anthropic/claude-haiku-4-5)
211
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
212
212
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
213
213
  --subagents allow the Task tool (spawn child agents)
214
214
  --reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)
package/dist/cli.js CHANGED
@@ -3501,7 +3501,7 @@ var DuplexAgentOptions = class {
3501
3501
  ai;
3502
3502
  /** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3503
3503
  fs;
3504
- voiceModel = "anthropic/claude-haiku-4-5";
3504
+ voiceModel = "groq/openai/gpt-oss-20b";
3505
3505
  workerModel = "anthropic/claude-sonnet-4-6";
3506
3506
  /** Escape hatches merged over the derived per-agent options. */
3507
3507
  voiceOptions;
@@ -4003,6 +4003,8 @@ var VoiceEngine = class {
4003
4003
  lastOverlapPartial = "";
4004
4004
  // change-detection: only NEW partial text counts as activity
4005
4005
  resumeTimer = null;
4006
+ turnStartAt = 0;
4007
+ // timestamp when the current turn began (for TTFT logging)
4006
4008
  constructor(options) {
4007
4009
  this.options = { ...new VoiceEngineOptions(), ...options };
4008
4010
  const o = this.options;
@@ -4063,6 +4065,7 @@ var VoiceEngine = class {
4063
4065
  this.spokeDeltas = true;
4064
4066
  this.ackAt = now();
4065
4067
  }
4068
+ this.turnStartAt = now();
4066
4069
  this.setState("thinking");
4067
4070
  }
4068
4071
  speakDelta(text) {
@@ -4071,6 +4074,7 @@ var VoiceEngine = class {
4071
4074
  this.reply += text;
4072
4075
  for (const w of this.words(this.reply)) this.echoWords.add(w);
4073
4076
  this.tts.speak(text, true);
4077
+ if (!this.spokeDeltas && this.turnStartAt) log7.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
4074
4078
  this.spokeDeltas = true;
4075
4079
  this.setState("speaking");
4076
4080
  }
@@ -4091,6 +4095,7 @@ var VoiceEngine = class {
4091
4095
  }
4092
4096
  this.drainTimer = null;
4093
4097
  this.speaking = false;
4098
+ if (this.turnStartAt) log7.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
4094
4099
  this.echoUntil = now() + 2500;
4095
4100
  if (!this.usingAec) this.stt.reset();
4096
4101
  this.setState("listening");
@@ -4127,7 +4132,7 @@ var VoiceEngine = class {
4127
4132
  this.ctxOpen = false;
4128
4133
  this.interrupted = true;
4129
4134
  this.suspectUntil = 0;
4130
- this.echoUntil = now() + 2500;
4135
+ this.echoUntil = now() + Math.max(2500, this.player.drainMs() + 3e3);
4131
4136
  this.tts.cancel();
4132
4137
  this.player.kill();
4133
4138
  if (!this.usingAec) this.stt.reset();
@@ -4322,6 +4327,8 @@ var SonioxSTT = class {
4322
4327
  lastChangeAt = 0;
4323
4328
  lastCombined = "";
4324
4329
  endpointTimer = null;
4330
+ firstTokenAt = 0;
4331
+ // first speech token in current utterance
4325
4332
  constructor(options) {
4326
4333
  this.options = { ...new SonioxSTTOptions(), ...options };
4327
4334
  }
@@ -4361,6 +4368,7 @@ var SonioxSTT = class {
4361
4368
  this.endpointTimer = setInterval(() => {
4362
4369
  const combined = (this.finalText + this.partialText).trim();
4363
4370
  if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
4371
+ if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
4364
4372
  this.reset();
4365
4373
  this.onUtterance(combined, now2());
4366
4374
  }, 120);
@@ -4388,10 +4396,12 @@ var SonioxSTT = class {
4388
4396
  if (combined !== this.lastCombined) {
4389
4397
  this.lastCombined = combined;
4390
4398
  this.lastChangeAt = now2();
4399
+ if (!this.firstTokenAt && combined.trim()) this.firstTokenAt = now2();
4391
4400
  }
4392
4401
  this.onPartial(combined);
4393
4402
  if (endpoint && this.finalText.trim()) {
4394
4403
  const utterance = this.finalText.trim();
4404
+ if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
4395
4405
  this.reset();
4396
4406
  this.onUtterance(utterance, now2());
4397
4407
  }
@@ -4400,6 +4410,7 @@ var SonioxSTT = class {
4400
4410
  this.finalText = "";
4401
4411
  this.partialText = "";
4402
4412
  this.lastCombined = "";
4413
+ this.firstTokenAt = 0;
4403
4414
  }
4404
4415
  stop() {
4405
4416
  this.stopped = true;
@@ -7405,7 +7416,7 @@ Flags:
7405
7416
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
7406
7417
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
7407
7418
  spoken replies out (echo-cancelled; speak over it to interrupt)
7408
- --voice-model <id> with --duplex: the fast voice model (default anthropic/claude-haiku-4-5)
7419
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
7409
7420
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
7410
7421
  --subagents allow the Task tool (spawn child agents)
7411
7422
  --reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)