@livx.cc/agentx 0.96.15 → 0.96.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -240,6 +240,20 @@ declare class AgentOptions {
240
240
  tool: string;
241
241
  args: any;
242
242
  }) => string | Promise<string>;
243
+ /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
244
+ * (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
245
+ * kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
246
+ * is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
247
+ * - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
248
+ * cropped/spilled to a small stub → small fresh-token tail per step).
249
+ * - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
250
+ * (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
251
+ * Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
252
+ * tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
253
+ browserTrim?: {
254
+ resultBytes?: number;
255
+ keepOutputs?: number;
256
+ };
243
257
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
244
258
  skillsDir?: string | string[];
245
259
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -326,6 +340,7 @@ declare class Agent {
326
340
  private systemPromptCache;
327
341
  private started;
328
342
  private parkedMs;
343
+ private browserActive;
329
344
  /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
330
345
  * time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
331
346
  private park;
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DRe91tAy.js';
2
+ import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DdhD1pGw.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart, e as MessageContent } from './tools-DtpN8Agv.js';
5
5
 
package/dist/cli.js CHANGED
@@ -3141,6 +3141,9 @@ function reasoningToChatFragment(model, effort) {
3141
3141
 
3142
3142
  // src/Agent.ts
3143
3143
  var log4 = forComponent("Agent");
3144
+ function isBrowserTool(name) {
3145
+ return name.startsWith("mcp__") && /browser/i.test(name.slice(5).split("__")[0]);
3146
+ }
3144
3147
  function isAbortError(err2) {
3145
3148
  const e = err2;
3146
3149
  const blob = `${e?.message ?? ""} ${e?.name ?? ""} ${e?.code ?? ""} ${e?.cause?.name ?? ""}`;
@@ -3188,6 +3191,17 @@ var AgentOptions = class {
3188
3191
  * and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
3189
3192
  * paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
3190
3193
  capToolResult;
3194
+ /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
3195
+ * (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
3196
+ * kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
3197
+ * is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
3198
+ * - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
3199
+ * cropped/spilled to a small stub → small fresh-token tail per step).
3200
+ * - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
3201
+ * (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
3202
+ * Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
3203
+ * tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
3204
+ browserTrim;
3191
3205
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
3192
3206
  skillsDir;
3193
3207
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3272,6 +3286,8 @@ var Agent = class _Agent {
3272
3286
  // session-start lifecycle hook fires once per conversation
3273
3287
  parkedMs = 0;
3274
3288
  // cumulative time blocked on the HUMAN (permission/plan prompts) — excluded from the timeout
3289
+ browserActive = false;
3290
+ // flips true once a browser MCP tool is actually invoked → adaptive trimming engages (browserTrim)
3275
3291
  /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
3276
3292
  * time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
3277
3293
  async park(p) {
@@ -3680,7 +3696,11 @@ var Agent = class _Agent {
3680
3696
  }
3681
3697
  if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
3682
3698
  if (images?.length && !result) result = `[${images.length} image${images.length > 1 ? "s" : ""} attached]`;
3683
- const cap = this.options.maxToolResultBytes ?? 0;
3699
+ const browser = isBrowserTool(tc.function.name);
3700
+ if (browser && this.options.browserTrim) this.browserActive = true;
3701
+ const browserBytes = browser ? this.options.browserTrim?.resultBytes : void 0;
3702
+ const baseCap = this.options.maxToolResultBytes ?? 0;
3703
+ const cap = browserBytes ? baseCap > 0 ? Math.min(baseCap, browserBytes) : browserBytes : baseCap;
3684
3704
  if (!threw && cap > 0 && result.length > cap) {
3685
3705
  const info = { tool: tc.function.name, args };
3686
3706
  result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
@@ -3724,7 +3744,9 @@ ${out}`;
3724
3744
  let out = null;
3725
3745
  if (o.compaction?.maxMessages && m.length > o.compaction.maxMessages) out = compact(m, o.compaction.maxMessages);
3726
3746
  else if (o.maxContextMessages && m.length > o.maxContextMessages) out = dropOldest(m, o.maxContextMessages);
3727
- if (o.keepToolOutputs) out = stubOldToolResults(out ?? m, o.keepToolOutputs);
3747
+ const browserKeep = this.browserActive ? o.browserTrim?.keepOutputs : void 0;
3748
+ const keep = browserKeep != null ? o.keepToolOutputs ? Math.min(o.keepToolOutputs, browserKeep) : browserKeep : o.keepToolOutputs;
3749
+ if (keep) out = stubOldToolResults(out ?? m, keep);
3728
3750
  if (o.maxContextTokens) {
3729
3751
  const pre = (out ?? m).length;
3730
3752
  out = fitTokenBudget(out ?? m, o.maxContextTokens);
@@ -5918,6 +5940,9 @@ var SonioxSTTOptions = class {
5918
5940
  /** Client-side endpoint: finalized text + no new tokens for this long = utterance (don't wait for
5919
5941
  * Soniox's semantic <end>, which adds 0.5-1.5s — the difference between ping-pong and lag). */
5920
5942
  silenceEndpointMs = 500;
5943
+ /** No-audio watchdog: if the mic source stops delivering chunks for this long, capture is dead →
5944
+ * fire onFatal + stop (else Soniox idle-timeouts and reconnect-loops forever). 0 = disable. */
5945
+ noAudioTimeoutMs = 1e4;
5921
5946
  };
5922
5947
  var SonioxSTT = class {
5923
5948
  options;
@@ -5931,6 +5956,15 @@ var SonioxSTT = class {
5931
5956
  /** mic energy (RMS) per chunk — drives the energy-based heuristic barge-in tier */
5932
5957
  onLevel = () => {
5933
5958
  };
5959
+ /** Unrecoverable: the mic source stopped delivering audio (Soniox starves → idle-timeout reconnect
5960
+ * loop). The host tears voice down instead of spinning forever. */
5961
+ onFatal = () => {
5962
+ };
5963
+ lastChunkAt = 0;
5964
+ // timestamp of the most recent mic chunk (0 = none yet)
5965
+ startedChunksAt = 0;
5966
+ // when capture started (grace before the first chunk)
5967
+ noAudioTimer = null;
5934
5968
  finalText = "";
5935
5969
  partialText = "";
5936
5970
  lastChangeAt = 0;
@@ -5982,7 +6016,22 @@ var SonioxSTT = class {
5982
6016
  this.onUtterance(combined, now2());
5983
6017
  }, 120);
5984
6018
  this.endpointTimer.unref?.();
6019
+ this.startedChunksAt = now2();
6020
+ const noAudioMs = this.options.noAudioTimeoutMs;
6021
+ if (noAudioMs > 0) {
6022
+ this.noAudioTimer = setInterval(() => {
6023
+ if (this.stopped) return;
6024
+ const ref = this.lastChunkAt || this.startedChunksAt;
6025
+ if (now2() - ref > noAudioMs) {
6026
+ log10.error(`stt: no mic audio for >${Math.round(noAudioMs / 1e3)}s \u2014 capture device stopped delivering`);
6027
+ this.onFatal("microphone stopped delivering audio (try a different input device, e.g. AirPods, or check System Settings \u2192 Sound \u2192 Input)");
6028
+ this.stop();
6029
+ }
6030
+ }, Math.max(250, Math.min(2e3, noAudioMs / 4)));
6031
+ this.noAudioTimer.unref?.();
6032
+ }
5985
6033
  await this.options.source.start((chunk) => {
6034
+ this.lastChunkAt = now2();
5986
6035
  let sum = 0;
5987
6036
  const view = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
5988
6037
  for (let i = 0; i + 1 < chunk.byteLength; i += 2) {
@@ -6024,6 +6073,7 @@ var SonioxSTT = class {
6024
6073
  stop() {
6025
6074
  this.stopped = true;
6026
6075
  if (this.endpointTimer) clearInterval(this.endpointTimer);
6076
+ if (this.noAudioTimer) clearInterval(this.noAudioTimer);
6027
6077
  this.options.source?.stop();
6028
6078
  if (this.ws) this.ws.onclose = null;
6029
6079
  this.ws?.close();
@@ -7022,7 +7072,8 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
7022
7072
  ...o.maxToolCalls != null ? { maxToolCalls: o.maxToolCalls } : {},
7023
7073
  // Context-v2: by default, note-take over a long session (stub tool outputs older than the last 8).
7024
7074
  keepToolOutputs: o.keepToolOutputs ?? 8,
7025
- ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {}
7075
+ ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {},
7076
+ ...o.browserTrim ? { browserTrim: o.browserTrim } : {}
7026
7077
  });
7027
7078
  }
7028
7079
  function summarizeCall(name, args) {
@@ -7216,22 +7267,46 @@ var AecDuplexAudio = class {
7216
7267
  this.bin = bin;
7217
7268
  }
7218
7269
  bin;
7219
- aec = true;
7270
+ /** Mutable: starts true (VPIO/AEC). Flips false if we fall back to non-VPIO capture (heuristic tier). */
7271
+ _aec = true;
7272
+ get aec() {
7273
+ return this._aec;
7274
+ }
7220
7275
  onFatal;
7221
7276
  proc = null;
7222
7277
  stopped = false;
7223
7278
  micDenied = false;
7279
+ noVpio = false;
7280
+ // currently running the non-VPIO fallback
7281
+ triedFallback = false;
7282
+ // one-shot guard
7283
+ gotChunk = false;
7284
+ // any mic audio since (re)spawn?
7285
+ onChunk = () => {
7286
+ };
7287
+ fallbackTimer = null;
7224
7288
  bytesWritten = 0;
7225
7289
  startedAt = 0;
7226
7290
  // --- AudioSource ---
7227
7291
  start(onChunk) {
7228
- this.proc = spawn2(this.bin, [], { stdio: ["pipe", "pipe", "pipe"] });
7292
+ this.onChunk = onChunk;
7293
+ this.spawnHelper();
7294
+ }
7295
+ /** (Re)spawn the helper. On the first spawn, arm a fast watchdog: if VPIO delivers NO audio within
7296
+ * ~2.5s, the VP input tap is dead on this machine (seen on macOS 26.5.x) — respawn once with
7297
+ * MIC_NO_VPIO=1 (plain capture, heuristic echo) so the mic actually works instead of starving STT. */
7298
+ spawnHelper() {
7299
+ const env = this.noVpio ? { ...process.env, MIC_NO_VPIO: "1" } : process.env;
7300
+ this.proc = spawn2(this.bin, [], { stdio: ["pipe", "pipe", "pipe"], env });
7229
7301
  this.proc.stdin.on("error", () => {
7230
7302
  });
7231
7303
  this.proc.on("exit", (c) => {
7232
7304
  if (c && !this.stopped) log16.error(`aec duplex audio exited (${c}) \u2014 check mic permission / MIC_AEC=0`);
7233
7305
  });
7234
- this.proc.stdout.on("data", (chunk) => onChunk(chunk));
7306
+ this.proc.stdout.on("data", (chunk) => {
7307
+ this.gotChunk = true;
7308
+ this.onChunk(chunk);
7309
+ });
7235
7310
  this.proc.stderr.on("data", (d) => {
7236
7311
  for (const ln of String(d).split("\n")) {
7237
7312
  const s = ln.trim();
@@ -7245,9 +7320,21 @@ var AecDuplexAudio = class {
7245
7320
  } else log16.debug(`mic-aec: ${s}`);
7246
7321
  }
7247
7322
  });
7248
- }
7249
- stop() {
7250
- this.stopped = true;
7323
+ if (!this.noVpio && !this.triedFallback) {
7324
+ this.fallbackTimer = setTimeout(() => {
7325
+ if (this.stopped || this.gotChunk) return;
7326
+ this.triedFallback = true;
7327
+ this.noVpio = true;
7328
+ this._aec = false;
7329
+ log16.warn("mic-aec: VPIO delivered no audio in 2.5s \u2014 falling back to non-VPIO capture (heuristic echo; headphones recommended)");
7330
+ this.killProc();
7331
+ this.spawnHelper();
7332
+ }, 2500);
7333
+ this.fallbackTimer.unref?.();
7334
+ }
7335
+ }
7336
+ /** Kill the current child WITHOUT marking the whole source stopped (used for the fallback respawn). */
7337
+ killProc() {
7251
7338
  const p = this.proc;
7252
7339
  this.proc = null;
7253
7340
  if (!p) return;
@@ -7259,6 +7346,11 @@ var AecDuplexAudio = class {
7259
7346
  }
7260
7347
  }, 500).unref?.();
7261
7348
  }
7349
+ stop() {
7350
+ this.stopped = true;
7351
+ if (this.fallbackTimer) clearTimeout(this.fallbackTimer);
7352
+ this.killProc();
7353
+ }
7262
7354
  // --- AudioSink (frame writer; same played/drain byte-math as the ffplay Player) ---
7263
7355
  frame(payload) {
7264
7356
  const stdin = this.proc?.stdin;
@@ -7360,10 +7452,11 @@ var VoiceIO = class extends VoiceEngine {
7360
7452
  });
7361
7453
  this.duplexSource = duplex;
7362
7454
  }
7363
- /** Host hook for an unrecoverable audio-source failure (e.g. mic permission denied). Only the duplex
7364
- * AEC source can hit it; a no-op otherwise. */
7455
+ /** Host hook for an unrecoverable audio failure mic permission denied (duplex source) or no mic
7456
+ * audio at all (STT watchdog). Routed to whichever can detect it. */
7365
7457
  set onFatal(fn) {
7366
7458
  if (this.duplexSource) this.duplexSource.onFatal = fn;
7459
+ if (this.stt) this.stt.onFatal = fn;
7367
7460
  }
7368
7461
  /** ready = keys present (AEC vs heuristic is decided at start()) */
7369
7462
  static available(env = process.env) {
@@ -10878,12 +10971,7 @@ function makeAskResolver(cwd) {
10878
10971
  }
10879
10972
  };
10880
10973
  }
10881
- function hasBrowserMcp(cfg, extraTools) {
10882
- const isBrowser = (s) => /browser/i.test(s);
10883
- if (Object.entries(cfg.mcpServers ?? {}).some(([n, c]) => isBrowser(n) && !c?.disabled)) return true;
10884
- return extraTools.some((t) => t.name.startsWith("mcp__") && isBrowser(t.name.slice(5).split("__")[0]));
10885
- }
10886
- var BROWSER_MAX_CONTEXT_TOKENS = 12e4;
10974
+ var BROWSER_TRIM = { resultBytes: 8e3, keepOutputs: 2 };
10887
10975
  function optsFor(args, ai, cfg = {}, extraTools = []) {
10888
10976
  const perm = resolvePermMode(args, canPrompt);
10889
10977
  if (perm.notice) err(dim(` \u26A0 ${perm.notice}
@@ -10922,9 +11010,10 @@ function optsFor(args, ai, cfg = {}, extraTools = []) {
10922
11010
  maxRepeats: cfg.maxRepeats,
10923
11011
  maxToolCalls: cfg.maxToolCalls,
10924
11012
  keepToolOutputs: cfg.keepToolOutputs,
10925
- // Token-aware backstop: explicit config wins; else auto-enable for browser MCP runs (huge DOM/screenshot
10926
- // results otherwise accumulate past the maxTokens budget kill-switch mid-browse). Off for normal sessions.
10927
- maxContextTokens: cfg.maxContextTokens ?? (hasBrowserMcp(cfg, extraTools) ? BROWSER_MAX_CONTEXT_TOKENS : void 0),
11013
+ maxContextTokens: cfg.maxContextTokens,
11014
+ // Adaptive browser trimming (engages on actual browser tool USE, inside the Agent — see BROWSER_TRIM).
11015
+ // Config can tune the values; null-coalesced so a config 0/value still wins over the default.
11016
+ browserTrim: cfg.browserTrim ?? BROWSER_TRIM,
10928
11017
  learnFromMistakes: cfg.learnFromMistakes,
10929
11018
  // Forwarded to cursor/* delegations for environment parity (chat-model providers ignore it).
10930
11019
  // Raw config (pre-OAuth): unresolved-oauth http servers are skipped by the cursor mapper.
@@ -10946,9 +11035,6 @@ async function makeAgent(args, ai, cfg, extraTools = []) {
10946
11035
  if (args.harden && !virtual) err(dim(` \u26E8 hardened shell: writes confined to cwd+tmp${args.hardenNet ? "" : ", network blocked"} (sandbox-exec/bwrap)
10947
11036
  `));
10948
11037
  const opts = optsFor(args, ai, cfg, extraTools);
10949
- if (opts.maxContextTokens === BROWSER_MAX_CONTEXT_TOKENS && cfg.maxContextTokens == null)
10950
- err(dim(` \u229E browser MCP detected \u2014 auto-capping sent context at ~${BROWSER_MAX_CONTEXT_TOKENS / 1e3}k tok (set maxContextTokens in config to override)
10951
- `));
10952
11038
  const agent = await buildAgent(opts);
10953
11039
  const display = displayHooks(agent.options.fs, { flush: agent.options.host?.flushText });
10954
11040
  agent.options.hooks = cfg.hooks ? composeHooks(display, hooksFromConfig(cfg.hooks)) : display;