npm - agent.libx.js - Versions diffs - 0.93.2 → 0.93.4 - Mend

agent.libx.js 0.93.2 → 0.93.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/cli/cli.ts CHANGED Viewed

@@ -208,7 +208,7 @@ Flags:
                        impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
                        with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O — mic in,
                        spoken replies out (echo-cancelled; speak over it to interrupt)
-  --voice-model <id>   with --duplex: the fast voice model (default anthropic/claude-haiku-4-5)
+  --voice-model <id>   with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
   --add-dir <path>     mount another directory into the workspace (repeatable; disk mode only)
   --subagents          allow the Task tool (spawn child agents)
   --reasoning <e>      extended thinking: off|low|medium|high or a token budget (anthropic/openai)

package/dist/cli.js CHANGED Viewed

@@ -3501,7 +3501,7 @@ var DuplexAgentOptions = class {
   ai;
   /** The WORKER's filesystem. If omitted the worker keeps Agent's jailed-disk-at-cwd default. */
   fs;
-  voiceModel = "anthropic/claude-haiku-4-5";
+  voiceModel = "groq/openai/gpt-oss-20b";
   workerModel = "anthropic/claude-sonnet-4-6";
   /** Escape hatches merged over the derived per-agent options. */
   voiceOptions;
@@ -4003,6 +4003,8 @@ var VoiceEngine = class {
   lastOverlapPartial = "";
   // change-detection: only NEW partial text counts as activity
   resumeTimer = null;
+  turnStartAt = 0;
+  // timestamp when the current turn began (for TTFT logging)
   constructor(options) {
     this.options = { ...new VoiceEngineOptions(), ...options };
     const o = this.options;
@@ -4063,6 +4065,7 @@ var VoiceEngine = class {
       this.spokeDeltas = true;
       this.ackAt = now();
     }
+    this.turnStartAt = now();
     this.setState("thinking");
   }
   speakDelta(text) {
@@ -4071,6 +4074,7 @@ var VoiceEngine = class {
     this.reply += text;
     for (const w of this.words(this.reply)) this.echoWords.add(w);
     this.tts.speak(text, true);
+    if (!this.spokeDeltas && this.turnStartAt) log7.info(`ttft: ${Math.round(now() - this.turnStartAt)}ms`);
     this.spokeDeltas = true;
     this.setState("speaking");
   }
@@ -4091,6 +4095,7 @@ var VoiceEngine = class {
       }
       this.drainTimer = null;
       this.speaking = false;
+      if (this.turnStartAt) log7.info(`turn: ${Math.round(now() - this.turnStartAt)}ms (incl. playback)`);
       this.echoUntil = now() + 2500;
       if (!this.usingAec) this.stt.reset();
       this.setState("listening");
@@ -4127,7 +4132,7 @@ var VoiceEngine = class {
     this.ctxOpen = false;
     this.interrupted = true;
     this.suspectUntil = 0;
-    this.echoUntil = now() + 2500;
+    this.echoUntil = now() + Math.max(2500, this.player.drainMs() + 3e3);
     this.tts.cancel();
     this.player.kill();
     if (!this.usingAec) this.stt.reset();
@@ -4322,6 +4327,8 @@ var SonioxSTT = class {
   lastChangeAt = 0;
   lastCombined = "";
   endpointTimer = null;
+  firstTokenAt = 0;
+  // first speech token in current utterance
   constructor(options) {
     this.options = { ...new SonioxSTTOptions(), ...options };
   }
@@ -4361,6 +4368,7 @@ var SonioxSTT = class {
     this.endpointTimer = setInterval(() => {
       const combined = (this.finalText + this.partialText).trim();
       if (!combined || now2() - this.lastChangeAt < this.options.silenceEndpointMs) return;
+      if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192silence-endpoint, "${combined.slice(0, 60)}"`);
       this.reset();
       this.onUtterance(combined, now2());
     }, 120);
@@ -4388,10 +4396,12 @@ var SonioxSTT = class {
     if (combined !== this.lastCombined) {
       this.lastCombined = combined;
       this.lastChangeAt = now2();
+      if (!this.firstTokenAt && combined.trim()) this.firstTokenAt = now2();
     }
     this.onPartial(combined);
     if (endpoint && this.finalText.trim()) {
       const utterance = this.finalText.trim();
+      if (this.firstTokenAt) log8.info(`stt: ${Math.round(now2() - this.firstTokenAt)}ms first-token\u2192endpoint, "${utterance.slice(0, 60)}"`);
       this.reset();
       this.onUtterance(utterance, now2());
     }
@@ -4400,6 +4410,7 @@ var SonioxSTT = class {
     this.finalText = "";
     this.partialText = "";
     this.lastCombined = "";
+    this.firstTokenAt = 0;
   }
   stop() {
     this.stopped = true;
@@ -7405,7 +7416,7 @@ Flags:
                        impulsive reactions, human pacing (implies --duplex; aliases: --convo, --voice)
                        with SONIOX_API_KEY + CARTESIA_API_KEY(+VOICE_ID) set: real voice I/O \u2014 mic in,
                        spoken replies out (echo-cancelled; speak over it to interrupt)
-  --voice-model <id>   with --duplex: the fast voice model (default anthropic/claude-haiku-4-5)
+  --voice-model <id>   with --duplex: the fast voice model (default groq/openai/gpt-oss-20b)
   --add-dir <path>     mount another directory into the workspace (repeatable; disk mode only)
   --subagents          allow the Task tool (spawn child agents)
   --reasoning <e>      extended thinking: off|low|medium|high or a token budget (anthropic/openai)