npm - @livx.cc/agentx - Versions diffs - 0.96.15 → 0.96.16 - Mend

@livx.cc/agentx 0.96.15 → 0.96.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/{Agent-DRe91tAy.d.ts → Agent-DdhD1pGw.d.ts} +15 -0
package/dist/cli.d.ts +1 -1
package/dist/cli.js +62 -17
package/dist/cli.js.map +1 -1
package/dist/index.d.ts +13 -2
package/dist/index.js +52 -2
package/dist/index.js.map +1 -1
package/dist/native/mic-aec.swift +13 -0
package/package.json +1 -1

package/dist/{Agent-DRe91tAy.d.ts → Agent-DdhD1pGw.d.ts} RENAMED Viewed

@@ -240,6 +240,20 @@ declare class AgentOptions {
         tool: string;
         args: any;
     }) => string | Promise<string>;
+    /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
+     *  (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
+     *  kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
+     *  is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
+     *   - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
+     *     cropped/spilled to a small stub → small fresh-token tail per step).
+     *   - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
+     *     (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
+     *  Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
+     *  tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
+    browserTrim?: {
+        resultBytes?: number;
+        keepOutputs?: number;
+    };
     /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
     skillsDir?: string | string[];
     /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -326,6 +340,7 @@ declare class Agent {
     private systemPromptCache;
     private started;
     private parkedMs;
+    private browserActive;
     /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
      *  time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
     private park;

package/dist/cli.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env bun
-import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DRe91tAy.js';
+import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DdhD1pGw.js';
 import { IFilesystem } from '@livx.cc/wcli/core';
 import { M as Message, c as ContentPart, e as MessageContent } from './tools-DtpN8Agv.js';

package/dist/cli.js CHANGED Viewed

@@ -3141,6 +3141,9 @@ function reasoningToChatFragment(model, effort) {
 // src/Agent.ts
 var log4 = forComponent("Agent");
+function isBrowserTool(name) {
+  return name.startsWith("mcp__") && /browser/i.test(name.slice(5).split("__")[0]);
+}
 function isAbortError(err2) {
   const e = err2;
   const blob = `${e?.message ?? ""} ${e?.name ?? ""} ${e?.code ?? ""} ${e?.cause?.name ?? ""}`;
@@ -3188,6 +3191,17 @@ var AgentOptions = class {
    *  and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
    *  paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
   capToolResult;
+  /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
+   *  (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
+   *  kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
+   *  is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
+   *   - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
+   *     cropped/spilled to a small stub → small fresh-token tail per step).
+   *   - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
+   *     (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
+   *  Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
+   *  tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
+  browserTrim;
   /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
   skillsDir;
   /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3272,6 +3286,8 @@ var Agent = class _Agent {
   // session-start lifecycle hook fires once per conversation
   parkedMs = 0;
   // cumulative time blocked on the HUMAN (permission/plan prompts) — excluded from the timeout
+  browserActive = false;
+  // flips true once a browser MCP tool is actually invoked → adaptive trimming engages (browserTrim)
   /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
    *  time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
   async park(p) {
@@ -3680,7 +3696,11 @@ var Agent = class _Agent {
     }
     if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
     if (images?.length && !result) result = `[${images.length} image${images.length > 1 ? "s" : ""} attached]`;
-    const cap = this.options.maxToolResultBytes ?? 0;
+    const browser = isBrowserTool(tc.function.name);
+    if (browser && this.options.browserTrim) this.browserActive = true;
+    const browserBytes = browser ? this.options.browserTrim?.resultBytes : void 0;
+    const baseCap = this.options.maxToolResultBytes ?? 0;
+    const cap = browserBytes ? baseCap > 0 ? Math.min(baseCap, browserBytes) : browserBytes : baseCap;
     if (!threw && cap > 0 && result.length > cap) {
       const info = { tool: tc.function.name, args };
       result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
@@ -3724,7 +3744,9 @@ ${out}`;
     let out = null;
     if (o.compaction?.maxMessages && m.length > o.compaction.maxMessages) out = compact(m, o.compaction.maxMessages);
     else if (o.maxContextMessages && m.length > o.maxContextMessages) out = dropOldest(m, o.maxContextMessages);
-    if (o.keepToolOutputs) out = stubOldToolResults(out ?? m, o.keepToolOutputs);
+    const browserKeep = this.browserActive ? o.browserTrim?.keepOutputs : void 0;
+    const keep = browserKeep != null ? o.keepToolOutputs ? Math.min(o.keepToolOutputs, browserKeep) : browserKeep : o.keepToolOutputs;
+    if (keep) out = stubOldToolResults(out ?? m, keep);
     if (o.maxContextTokens) {
       const pre = (out ?? m).length;
       out = fitTokenBudget(out ?? m, o.maxContextTokens);
@@ -5918,6 +5940,9 @@ var SonioxSTTOptions = class {
   /** Client-side endpoint: finalized text + no new tokens for this long = utterance (don't wait for
    *  Soniox's semantic <end>, which adds 0.5-1.5s — the difference between ping-pong and lag). */
   silenceEndpointMs = 500;
+  /** No-audio watchdog: if the mic source stops delivering chunks for this long, capture is dead →
+   *  fire onFatal + stop (else Soniox idle-timeouts and reconnect-loops forever). 0 = disable. */
+  noAudioTimeoutMs = 1e4;
 };
 var SonioxSTT = class {
   options;
@@ -5931,6 +5956,15 @@ var SonioxSTT = class {
   /** mic energy (RMS) per chunk — drives the energy-based heuristic barge-in tier */
   onLevel = () => {
   };
+  /** Unrecoverable: the mic source stopped delivering audio (Soniox starves → idle-timeout reconnect
+   *  loop). The host tears voice down instead of spinning forever. */
+  onFatal = () => {
+  };
+  lastChunkAt = 0;
+  // timestamp of the most recent mic chunk (0 = none yet)
+  startedChunksAt = 0;
+  // when capture started (grace before the first chunk)
+  noAudioTimer = null;
   finalText = "";
   partialText = "";
   lastChangeAt = 0;
@@ -5982,7 +6016,22 @@ var SonioxSTT = class {
       this.onUtterance(combined, now2());
     }, 120);
     this.endpointTimer.unref?.();
+    this.startedChunksAt = now2();
+    const noAudioMs = this.options.noAudioTimeoutMs;
+    if (noAudioMs > 0) {
+      this.noAudioTimer = setInterval(() => {
+        if (this.stopped) return;
+        const ref = this.lastChunkAt || this.startedChunksAt;
+        if (now2() - ref > noAudioMs) {
+          log10.error(`stt: no mic audio for >${Math.round(noAudioMs / 1e3)}s \u2014 capture device stopped delivering`);
+          this.onFatal("microphone stopped delivering audio (try a different input device, e.g. AirPods, or check System Settings \u2192 Sound \u2192 Input)");
+          this.stop();
+        }
+      }, Math.max(250, Math.min(2e3, noAudioMs / 4)));
+      this.noAudioTimer.unref?.();
+    }
     await this.options.source.start((chunk) => {
+      this.lastChunkAt = now2();
       let sum = 0;
       const view = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
       for (let i = 0; i + 1 < chunk.byteLength; i += 2) {
@@ -6024,6 +6073,7 @@ var SonioxSTT = class {
   stop() {
     this.stopped = true;
     if (this.endpointTimer) clearInterval(this.endpointTimer);
+    if (this.noAudioTimer) clearInterval(this.noAudioTimer);
     this.options.source?.stop();
     if (this.ws) this.ws.onclose = null;
     this.ws?.close();
@@ -7022,7 +7072,8 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
     ...o.maxToolCalls != null ? { maxToolCalls: o.maxToolCalls } : {},
     // Context-v2: by default, note-take over a long session (stub tool outputs older than the last 8).
     keepToolOutputs: o.keepToolOutputs ?? 8,
-    ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {}
+    ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {},
+    ...o.browserTrim ? { browserTrim: o.browserTrim } : {}
   });
 }
 function summarizeCall(name, args) {
@@ -7360,10 +7411,11 @@ var VoiceIO = class extends VoiceEngine {
     });
     this.duplexSource = duplex;
   }
-  /** Host hook for an unrecoverable audio-source failure (e.g. mic permission denied). Only the duplex
-   *  AEC source can hit it; a no-op otherwise. */
+  /** Host hook for an unrecoverable audio failure — mic permission denied (duplex source) or no mic
+   *  audio at all (STT watchdog). Routed to whichever can detect it. */
   set onFatal(fn) {
     if (this.duplexSource) this.duplexSource.onFatal = fn;
+    if (this.stt) this.stt.onFatal = fn;
   }
   /** ready = keys present (AEC vs heuristic is decided at start()) */
   static available(env = process.env) {
@@ -10878,12 +10930,7 @@ function makeAskResolver(cwd) {
     }
   };
 }
-function hasBrowserMcp(cfg, extraTools) {
-  const isBrowser = (s) => /browser/i.test(s);
-  if (Object.entries(cfg.mcpServers ?? {}).some(([n, c]) => isBrowser(n) && !c?.disabled)) return true;
-  return extraTools.some((t) => t.name.startsWith("mcp__") && isBrowser(t.name.slice(5).split("__")[0]));
-}
-var BROWSER_MAX_CONTEXT_TOKENS = 12e4;
+var BROWSER_TRIM = { resultBytes: 8e3, keepOutputs: 2 };
 function optsFor(args, ai, cfg = {}, extraTools = []) {
   const perm = resolvePermMode(args, canPrompt);
   if (perm.notice) err(dim(`  \u26A0 ${perm.notice}
@@ -10922,9 +10969,10 @@ function optsFor(args, ai, cfg = {}, extraTools = []) {
     maxRepeats: cfg.maxRepeats,
     maxToolCalls: cfg.maxToolCalls,
     keepToolOutputs: cfg.keepToolOutputs,
-    // Token-aware backstop: explicit config wins; else auto-enable for browser MCP runs (huge DOM/screenshot
-    // results otherwise accumulate past the maxTokens budget kill-switch mid-browse). Off for normal sessions.
-    maxContextTokens: cfg.maxContextTokens ?? (hasBrowserMcp(cfg, extraTools) ? BROWSER_MAX_CONTEXT_TOKENS : void 0),
+    maxContextTokens: cfg.maxContextTokens,
+    // Adaptive browser trimming (engages on actual browser tool USE, inside the Agent — see BROWSER_TRIM).
+    // Config can tune the values; null-coalesced so a config 0/value still wins over the default.
+    browserTrim: cfg.browserTrim ?? BROWSER_TRIM,
     learnFromMistakes: cfg.learnFromMistakes,
     // Forwarded to cursor/* delegations for environment parity (chat-model providers ignore it).
     // Raw config (pre-OAuth): unresolved-oauth http servers are skipped by the cursor mapper.
@@ -10946,9 +10994,6 @@ async function makeAgent(args, ai, cfg, extraTools = []) {
   if (args.harden && !virtual) err(dim(`  \u26E8 hardened shell: writes confined to cwd+tmp${args.hardenNet ? "" : ", network blocked"} (sandbox-exec/bwrap)
 `));
   const opts = optsFor(args, ai, cfg, extraTools);
-  if (opts.maxContextTokens === BROWSER_MAX_CONTEXT_TOKENS && cfg.maxContextTokens == null)
-    err(dim(`  \u229E browser MCP detected \u2014 auto-capping sent context at ~${BROWSER_MAX_CONTEXT_TOKENS / 1e3}k tok (set maxContextTokens in config to override)
-`));
   const agent = await buildAgent(opts);
   const display = displayHooks(agent.options.fs, { flush: agent.options.host?.flushText });
   agent.options.hooks = cfg.hooks ? composeHooks(display, hooksFromConfig(cfg.hooks)) : display;