npm - @livx.cc/agentx - Versions diffs - 0.96.15 → 0.96.16 - Mend

@livx.cc/agentx 0.96.15 → 0.96.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/{Agent-DRe91tAy.d.ts → Agent-DdhD1pGw.d.ts} +15 -0
package/dist/cli.d.ts +1 -1
package/dist/cli.js +62 -17
package/dist/cli.js.map +1 -1
package/dist/index.d.ts +13 -2
package/dist/index.js +52 -2
package/dist/index.js.map +1 -1
package/dist/native/mic-aec.swift +13 -0
package/package.json +1 -1

package/dist/index.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { a as AgentOptions, H as Hooks, h as RunResult, A as Agent } from './Agent-DRe91tAy.js';
-export { C as ChatFragment, D as DEFAULT_MUTATING, b as Decision, P as PermissionOptions, c as PermissionPolicy, d as PermissionRule, e as PreToolUseDecision, R as ReasoningEffort, f as RecordingHooks, g as RecordingLifecycle, T as ToolUse, i as ToolUseMeta, j as composeHooks, p as planMode, r as reasoningToChatFragment } from './Agent-DRe91tAy.js';
+import { a as AgentOptions, H as Hooks, h as RunResult, A as Agent } from './Agent-DdhD1pGw.js';
+export { C as ChatFragment, D as DEFAULT_MUTATING, b as Decision, P as PermissionOptions, c as PermissionPolicy, d as PermissionRule, e as PreToolUseDecision, R as ReasoningEffort, f as RecordingHooks, g as RecordingLifecycle, T as ToolUse, i as ToolUseMeta, j as composeHooks, p as planMode, r as reasoningToChatFragment } from './Agent-DdhD1pGw.js';
 import { IFilesystem, FileMetadata } from '@livx.cc/wcli/core';
 export { CommandExecutor, FileMetadata, IFilesystem, IndexedDbFilesystem, MemFilesystem, registerHeadlessCommands } from '@livx.cc/wcli/core';
 import { BodDB } from '@bod.ee/db';
@@ -1166,6 +1166,8 @@ interface SttLike {
     onPartial: (text: string) => void;
     onUtterance: (text: string, endpointAt: number) => void;
     onLevel: (rms: number) => void;
+    /** Optional: unrecoverable capture failure (e.g. mic produced no audio) — host tears voice down. */
+    onFatal?: (message: string) => void;
     start(): Promise<void> | void;
     reset(): void;
     stop(): void;
@@ -1326,6 +1328,9 @@ declare class SonioxSTTOptions {
     /** Client-side endpoint: finalized text + no new tokens for this long = utterance (don't wait for
      *  Soniox's semantic <end>, which adds 0.5-1.5s — the difference between ping-pong and lag). */
     silenceEndpointMs: number;
+    /** No-audio watchdog: if the mic source stops delivering chunks for this long, capture is dead →
+     *  fire onFatal + stop (else Soniox idle-timeouts and reconnect-loops forever). 0 = disable. */
+    noAudioTimeoutMs: number;
 }
 declare class SonioxSTT {
     options: SonioxSTTOptions;
@@ -1336,6 +1341,12 @@ declare class SonioxSTT {
     onUtterance: (text: string, endpointAt: number) => void;
     /** mic energy (RMS) per chunk — drives the energy-based heuristic barge-in tier */
     onLevel: (rms: number) => void;
+    /** Unrecoverable: the mic source stopped delivering audio (Soniox starves → idle-timeout reconnect
+     *  loop). The host tears voice down instead of spinning forever. */
+    onFatal: (message: string) => void;
+    private lastChunkAt;
+    private startedChunksAt;
+    private noAudioTimer;
     private finalText;
     private partialText;
     private lastChangeAt;

package/dist/index.js CHANGED Viewed

@@ -3103,6 +3103,9 @@ function reasoningToChatFragment(model, effort) {
 // src/Agent.ts
 var log4 = forComponent("Agent");
+function isBrowserTool(name) {
+  return name.startsWith("mcp__") && /browser/i.test(name.slice(5).split("__")[0]);
+}
 function isAbortError(err) {
   const e = err;
   const blob = `${e?.message ?? ""} ${e?.name ?? ""} ${e?.code ?? ""} ${e?.cause?.name ?? ""}`;
@@ -3150,6 +3153,17 @@ var AgentOptions = class {
    *  and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
    *  paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
   capToolResult;
+  /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
+   *  (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
+   *  kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
+   *  is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
+   *   - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
+   *     cropped/spilled to a small stub → small fresh-token tail per step).
+   *   - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
+   *     (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
+   *  Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
+   *  tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
+  browserTrim;
   /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
   skillsDir;
   /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3234,6 +3248,8 @@ var Agent = class _Agent {
   // session-start lifecycle hook fires once per conversation
   parkedMs = 0;
   // cumulative time blocked on the HUMAN (permission/plan prompts) — excluded from the timeout
+  browserActive = false;
+  // flips true once a browser MCP tool is actually invoked → adaptive trimming engages (browserTrim)
   /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
    *  time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
   async park(p) {
@@ -3642,7 +3658,11 @@ var Agent = class _Agent {
     }
     if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
     if (images?.length && !result) result = `[${images.length} image${images.length > 1 ? "s" : ""} attached]`;
-    const cap = this.options.maxToolResultBytes ?? 0;
+    const browser = isBrowserTool(tc.function.name);
+    if (browser && this.options.browserTrim) this.browserActive = true;
+    const browserBytes = browser ? this.options.browserTrim?.resultBytes : void 0;
+    const baseCap = this.options.maxToolResultBytes ?? 0;
+    const cap = browserBytes ? baseCap > 0 ? Math.min(baseCap, browserBytes) : browserBytes : baseCap;
     if (!threw && cap > 0 && result.length > cap) {
       const info = { tool: tc.function.name, args };
       result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
@@ -3686,7 +3706,9 @@ ${out}`;
     let out = null;
     if (o.compaction?.maxMessages && m.length > o.compaction.maxMessages) out = compact(m, o.compaction.maxMessages);
     else if (o.maxContextMessages && m.length > o.maxContextMessages) out = dropOldest(m, o.maxContextMessages);
-    if (o.keepToolOutputs) out = stubOldToolResults(out ?? m, o.keepToolOutputs);
+    const browserKeep = this.browserActive ? o.browserTrim?.keepOutputs : void 0;
+    const keep = browserKeep != null ? o.keepToolOutputs ? Math.min(o.keepToolOutputs, browserKeep) : browserKeep : o.keepToolOutputs;
+    if (keep) out = stubOldToolResults(out ?? m, keep);
     if (o.maxContextTokens) {
       const pre = (out ?? m).length;
       out = fitTokenBudget(out ?? m, o.maxContextTokens);
@@ -6042,6 +6064,9 @@ var SonioxSTTOptions = class {
   /** Client-side endpoint: finalized text + no new tokens for this long = utterance (don't wait for
    *  Soniox's semantic <end>, which adds 0.5-1.5s — the difference between ping-pong and lag). */
   silenceEndpointMs = 500;
+  /** No-audio watchdog: if the mic source stops delivering chunks for this long, capture is dead →
+   *  fire onFatal + stop (else Soniox idle-timeouts and reconnect-loops forever). 0 = disable. */
+  noAudioTimeoutMs = 1e4;
 };
 var SonioxSTT = class {
   options;
@@ -6055,6 +6080,15 @@ var SonioxSTT = class {
   /** mic energy (RMS) per chunk — drives the energy-based heuristic barge-in tier */
   onLevel = () => {
   };
+  /** Unrecoverable: the mic source stopped delivering audio (Soniox starves → idle-timeout reconnect
+   *  loop). The host tears voice down instead of spinning forever. */
+  onFatal = () => {
+  };
+  lastChunkAt = 0;
+  // timestamp of the most recent mic chunk (0 = none yet)
+  startedChunksAt = 0;
+  // when capture started (grace before the first chunk)
+  noAudioTimer = null;
   finalText = "";
   partialText = "";
   lastChangeAt = 0;
@@ -6106,7 +6140,22 @@ var SonioxSTT = class {
       this.onUtterance(combined, now2());
     }, 120);
     this.endpointTimer.unref?.();
+    this.startedChunksAt = now2();
+    const noAudioMs = this.options.noAudioTimeoutMs;
+    if (noAudioMs > 0) {
+      this.noAudioTimer = setInterval(() => {
+        if (this.stopped) return;
+        const ref = this.lastChunkAt || this.startedChunksAt;
+        if (now2() - ref > noAudioMs) {
+          log11.error(`stt: no mic audio for >${Math.round(noAudioMs / 1e3)}s \u2014 capture device stopped delivering`);
+          this.onFatal("microphone stopped delivering audio (try a different input device, e.g. AirPods, or check System Settings \u2192 Sound \u2192 Input)");
+          this.stop();
+        }
+      }, Math.max(250, Math.min(2e3, noAudioMs / 4)));
+      this.noAudioTimer.unref?.();
+    }
     await this.options.source.start((chunk) => {
+      this.lastChunkAt = now2();
       let sum = 0;
       const view = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
       for (let i = 0; i + 1 < chunk.byteLength; i += 2) {
@@ -6148,6 +6197,7 @@ var SonioxSTT = class {
   stop() {
     this.stopped = true;
     if (this.endpointTimer) clearInterval(this.endpointTimer);
+    if (this.noAudioTimer) clearInterval(this.noAudioTimer);
     this.options.source?.stop();
     if (this.ws) this.ws.onclose = null;
     this.ws?.close();