agent.libx.js 0.93.2 → 0.93.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/cli.ts CHANGED
@@ -208,7 +208,7 @@ Flags:
208
208
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
209
209
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
210
210
  spoken replies out (echo-cancelled; speak over it to interrupt)
211
- --voice-model <id> with --duplex: the fast voice model (default anthropic/claude-haiku-4-5)
211
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
212
212
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
213
213
  --subagents allow the Task tool (spawn child agents)
214
214
  --reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)
package/dist/cli.js CHANGED
@@ -2900,9 +2900,17 @@ var Agent = class _Agent {
2900
2900
  let res;
2901
2901
  const sent = this.trimContext();
2902
2902
  const frag = reasoningToChatFragment(o.model, o.reasoning);
2903
+ const isCursorWithTools = o.model.startsWith("cursor/") && wireTools.length > 0;
2904
+ const cursorPo = isCursorWithTools ? {
2905
+ toolExecutor: async (name, args) => {
2906
+ const tc = { id: `cursor-${Date.now()}`, type: "function", function: { name, arguments: JSON.stringify(args) } };
2907
+ const raw = await this.dispatch(tc);
2908
+ return typeof raw === "string" ? raw : raw.text;
2909
+ }
2910
+ } : void 0;
2903
2911
  const reasonOpts = {
2904
2912
  ...frag,
2905
- ...o.providerOptions ? { providerOptions: { ...frag.providerOptions, ...o.providerOptions } } : {}
2913
+ ...o.providerOptions || cursorPo ? { providerOptions: { ...frag.providerOptions, ...o.providerOptions, ...cursorPo } } : {}
2906
2914
  };
2907
2915
  try {
2908
2916
  if (useStream) {
@@ -3501,7 +3509,7 @@ var DuplexAgentOptions = class {
3501
3509
  ai;
3502
3510
  /** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
3503
3511
  fs;
3504
- voiceModel = "anthropic/claude-haiku-4-5";
3512
+ voiceModel = "groq/openai/gpt-oss-20b";
3505
3513
  workerModel = "anthropic/claude-sonnet-4-6";
3506
3514
  /** Escape hatches merged over the derived per-agent options. */
3507
3515
  voiceOptions;
@@ -4003,6 +4011,8 @@ var VoiceEngine = class {
4003
4011
  lastOverlapPartial = "";
4004
4012
  // change-detection: only NEW partial text counts as activity
4005
4013
  resumeTimer = null;
4014
+ turnStartAt = 0;
4015
+ // timestamp when the current turn began (for TTFT logging)
4006
4016
  constructor(options) {
4007
4017
  this.options = { ...new VoiceEngineOptions(), ...options };
4008
4018
  const o = this.options;
@@ -4063,6 +4073,7 @@ var VoiceEngine = class {
4063
4073
  this.spokeDeltas = true;
4064
4074
  this.ackAt = now();
4065
4075
  }
4076
+ this.turnStartAt = now();
4066
4077
  this.setState("thinking");
4067
4078
  }
4068
4079
  speakDelta(text) {
@@ -4071,6 +4082,7 @@ var VoiceEngine = class {
4071
4082
  this.reply += text;
4072
4083
  for (const w of this.words(this.reply)) this.echoWords.add(w);
4073
4084
  this.tts.speak(text, true);
4085
+ if (!this.spokeDeltas && this.turnStartAt) log7.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
4074
4086
  this.spokeDeltas = true;
4075
4087
  this.setState("speaking");
4076
4088
  }
@@ -4091,6 +4103,7 @@ var VoiceEngine = class {
4091
4103
  }
4092
4104
  this.drainTimer = null;
4093
4105
  this.speaking = false;
4106
+ if (this.turnStartAt) log7.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
4094
4107
  this.echoUntil = now() + 2500;
4095
4108
  if (!this.usingAec) this.stt.reset();
4096
4109
  this.setState("listening");
@@ -4127,7 +4140,7 @@ var VoiceEngine = class {
4127
4140
  this.ctxOpen = false;
4128
4141
  this.interrupted = true;
4129
4142
  this.suspectUntil = 0;
4130
- this.echoUntil = now() + 2500;
4143
+ this.echoUntil = now() + Math.max(2500, this.player.drainMs() + 3e3);
4131
4144
  this.tts.cancel();
4132
4145
  this.player.kill();
4133
4146
  if (!this.usingAec) this.stt.reset();
@@ -4322,6 +4335,8 @@ var SonioxSTT = class {
4322
4335
  lastChangeAt = 0;
4323
4336
  lastCombined = "";
4324
4337
  endpointTimer = null;
4338
+ firstTokenAt = 0;
4339
+ // first speech token in current utterance
4325
4340
  constructor(options) {
4326
4341
  this.options = { ...new SonioxSTTOptions(), ...options };
4327
4342
  }
@@ -4361,6 +4376,7 @@ var SonioxSTT = class {
4361
4376
  this.endpointTimer = setInterval(() => {
4362
4377
  const combined = (this.finalText + this.partialText).trim();
4363
4378
  if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
4379
+ if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
4364
4380
  this.reset();
4365
4381
  this.onUtterance(combined, now2());
4366
4382
  }, 120);
@@ -4388,10 +4404,12 @@ var SonioxSTT = class {
4388
4404
  if (combined !== this.lastCombined) {
4389
4405
  this.lastCombined = combined;
4390
4406
  this.lastChangeAt = now2();
4407
+ if (!this.firstTokenAt && combined.trim()) this.firstTokenAt = now2();
4391
4408
  }
4392
4409
  this.onPartial(combined);
4393
4410
  if (endpoint && this.finalText.trim()) {
4394
4411
  const utterance = this.finalText.trim();
4412
+ if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
4395
4413
  this.reset();
4396
4414
  this.onUtterance(utterance, now2());
4397
4415
  }
@@ -4400,6 +4418,7 @@ var SonioxSTT = class {
4400
4418
  this.finalText = "";
4401
4419
  this.partialText = "";
4402
4420
  this.lastCombined = "";
4421
+ this.firstTokenAt = 0;
4403
4422
  }
4404
4423
  stop() {
4405
4424
  this.stopped = true;
@@ -7405,7 +7424,7 @@ Flags:
7405
7424
  impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
7406
7425
  with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
7407
7426
  spoken replies out (echo-cancelled; speak over it to interrupt)
7408
- --voice-model <id> with --duplex: the fast voice model (default anthropic/claude-haiku-4-5)
7427
+ --voice-model <id> with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
7409
7428
  --add-dir <path> mount another directory into the workspace (repeatable; disk mode only)
7410
7429
  --subagents allow the Task tool (spawn child agents)
7411
7430
  --reasoning <e> extended thinking: off|low|medium|high or a token budget (anthropic/openai)