@livx.cc/agentx 0.96.15 → 0.96.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -240,6 +240,20 @@ declare class AgentOptions {
240
240
  tool: string;
241
241
  args: any;
242
242
  }) => string | Promise<string>;
243
+ /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
244
+ * (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
245
+ * kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
246
+ * is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
247
+ * - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
248
+ * cropped/spilled to a small stub → small fresh-token tail per step).
249
+ * - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
250
+ * (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
251
+ * Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
252
+ * tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
253
+ browserTrim?: {
254
+ resultBytes?: number;
255
+ keepOutputs?: number;
256
+ };
243
257
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
244
258
  skillsDir?: string | string[];
245
259
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -326,6 +340,7 @@ declare class Agent {
326
340
  private systemPromptCache;
327
341
  private started;
328
342
  private parkedMs;
343
+ private browserActive;
329
344
  /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
330
345
  * time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
331
346
  private park;
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DRe91tAy.js';
2
+ import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DdhD1pGw.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart, e as MessageContent } from './tools-DtpN8Agv.js';
5
5
 
package/dist/cli.js CHANGED
@@ -3141,6 +3141,9 @@ function reasoningToChatFragment(model, effort) {
3141
3141
 
3142
3142
  // src/Agent.ts
3143
3143
  var log4 = forComponent("Agent");
3144
+ function isBrowserTool(name) {
3145
+ return name.startsWith("mcp__") && /browser/i.test(name.slice(5).split("__")[0]);
3146
+ }
3144
3147
  function isAbortError(err2) {
3145
3148
  const e = err2;
3146
3149
  const blob = `${e?.message ?? ""} ${e?.name ?? ""} ${e?.code ?? ""} ${e?.cause?.name ?? ""}`;
@@ -3188,6 +3191,17 @@ var AgentOptions = class {
3188
3191
  * and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
3189
3192
  * paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
3190
3193
  capToolResult;
3194
+ /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
3195
+ * (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
3196
+ * kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
3197
+ * is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
3198
+ * - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
3199
+ * cropped/spilled to a small stub → small fresh-token tail per step).
3200
+ * - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
3201
+ * (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
3202
+ * Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
3203
+ * tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
3204
+ browserTrim;
3191
3205
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
3192
3206
  skillsDir;
3193
3207
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3272,6 +3286,8 @@ var Agent = class _Agent {
3272
3286
  // session-start lifecycle hook fires once per conversation
3273
3287
  parkedMs = 0;
3274
3288
  // cumulative time blocked on the HUMAN (permission/plan prompts) — excluded from the timeout
3289
+ browserActive = false;
3290
+ // flips true once a browser MCP tool is actually invoked → adaptive trimming engages (browserTrim)
3275
3291
  /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
3276
3292
  * time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
3277
3293
  async park(p) {
@@ -3680,7 +3696,11 @@ var Agent = class _Agent {
3680
3696
  }
3681
3697
  if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
3682
3698
  if (images?.length && !result) result = `[${images.length} image${images.length > 1 ? "s" : ""} attached]`;
3683
- const cap = this.options.maxToolResultBytes ?? 0;
3699
+ const browser = isBrowserTool(tc.function.name);
3700
+ if (browser && this.options.browserTrim) this.browserActive = true;
3701
+ const browserBytes = browser ? this.options.browserTrim?.resultBytes : void 0;
3702
+ const baseCap = this.options.maxToolResultBytes ?? 0;
3703
+ const cap = browserBytes ? baseCap > 0 ? Math.min(baseCap, browserBytes) : browserBytes : baseCap;
3684
3704
  if (!threw && cap > 0 && result.length > cap) {
3685
3705
  const info = { tool: tc.function.name, args };
3686
3706
  result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
@@ -3724,7 +3744,9 @@ ${out}`;
3724
3744
  let out = null;
3725
3745
  if (o.compaction?.maxMessages && m.length > o.compaction.maxMessages) out = compact(m, o.compaction.maxMessages);
3726
3746
  else if (o.maxContextMessages && m.length > o.maxContextMessages) out = dropOldest(m, o.maxContextMessages);
3727
- if (o.keepToolOutputs) out = stubOldToolResults(out ?? m, o.keepToolOutputs);
3747
+ const browserKeep = this.browserActive ? o.browserTrim?.keepOutputs : void 0;
3748
+ const keep = browserKeep != null ? o.keepToolOutputs ? Math.min(o.keepToolOutputs, browserKeep) : browserKeep : o.keepToolOutputs;
3749
+ if (keep) out = stubOldToolResults(out ?? m, keep);
3728
3750
  if (o.maxContextTokens) {
3729
3751
  const pre = (out ?? m).length;
3730
3752
  out = fitTokenBudget(out ?? m, o.maxContextTokens);
@@ -5918,6 +5940,9 @@ var SonioxSTTOptions = class {
5918
5940
  /** Client-side endpoint: finalized text + no new tokens for this long = utterance (don't wait for
5919
5941
  * Soniox's semantic <end>, which adds 0.5-1.5s — the difference between ping-pong and lag). */
5920
5942
  silenceEndpointMs = 500;
5943
+ /** No-audio watchdog: if the mic source stops delivering chunks for this long, capture is dead →
5944
+ * fire onFatal + stop (else Soniox idle-timeouts and reconnect-loops forever). 0 = disable. */
5945
+ noAudioTimeoutMs = 1e4;
5921
5946
  };
5922
5947
  var SonioxSTT = class {
5923
5948
  options;
@@ -5931,6 +5956,15 @@ var SonioxSTT = class {
5931
5956
  /** mic energy (RMS) per chunk — drives the energy-based heuristic barge-in tier */
5932
5957
  onLevel = () => {
5933
5958
  };
5959
+ /** Unrecoverable: the mic source stopped delivering audio (Soniox starves → idle-timeout reconnect
5960
+ * loop). The host tears voice down instead of spinning forever. */
5961
+ onFatal = () => {
5962
+ };
5963
+ lastChunkAt = 0;
5964
+ // timestamp of the most recent mic chunk (0 = none yet)
5965
+ startedChunksAt = 0;
5966
+ // when capture started (grace before the first chunk)
5967
+ noAudioTimer = null;
5934
5968
  finalText = "";
5935
5969
  partialText = "";
5936
5970
  lastChangeAt = 0;
@@ -5982,7 +6016,22 @@ var SonioxSTT = class {
5982
6016
  this.onUtterance(combined, now2());
5983
6017
  }, 120);
5984
6018
  this.endpointTimer.unref?.();
6019
+ this.startedChunksAt = now2();
6020
+ const noAudioMs = this.options.noAudioTimeoutMs;
6021
+ if (noAudioMs > 0) {
6022
+ this.noAudioTimer = setInterval(() => {
6023
+ if (this.stopped) return;
6024
+ const ref = this.lastChunkAt || this.startedChunksAt;
6025
+ if (now2() - ref > noAudioMs) {
6026
+ log10.error(`stt: no mic audio for >${Math.round(noAudioMs / 1e3)}s \u2014 capture device stopped delivering`);
6027
+ this.onFatal("microphone stopped delivering audio (try a different input device, e.g. AirPods, or check System Settings \u2192 Sound \u2192 Input)");
6028
+ this.stop();
6029
+ }
6030
+ }, Math.max(250, Math.min(2e3, noAudioMs / 4)));
6031
+ this.noAudioTimer.unref?.();
6032
+ }
5985
6033
  await this.options.source.start((chunk) => {
6034
+ this.lastChunkAt = now2();
5986
6035
  let sum = 0;
5987
6036
  const view = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
5988
6037
  for (let i = 0; i + 1 < chunk.byteLength; i += 2) {
@@ -6024,6 +6073,7 @@ var SonioxSTT = class {
6024
6073
  stop() {
6025
6074
  this.stopped = true;
6026
6075
  if (this.endpointTimer) clearInterval(this.endpointTimer);
6076
+ if (this.noAudioTimer) clearInterval(this.noAudioTimer);
6027
6077
  this.options.source?.stop();
6028
6078
  if (this.ws) this.ws.onclose = null;
6029
6079
  this.ws?.close();
@@ -7022,7 +7072,8 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
7022
7072
  ...o.maxToolCalls != null ? { maxToolCalls: o.maxToolCalls } : {},
7023
7073
  // Context-v2: by default, note-take over a long session (stub tool outputs older than the last 8).
7024
7074
  keepToolOutputs: o.keepToolOutputs ?? 8,
7025
- ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {}
7075
+ ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {},
7076
+ ...o.browserTrim ? { browserTrim: o.browserTrim } : {}
7026
7077
  });
7027
7078
  }
7028
7079
  function summarizeCall(name, args) {
@@ -7360,10 +7411,11 @@ var VoiceIO = class extends VoiceEngine {
7360
7411
  });
7361
7412
  this.duplexSource = duplex;
7362
7413
  }
7363
- /** Host hook for an unrecoverable audio-source failure (e.g. mic permission denied). Only the duplex
7364
- * AEC source can hit it; a no-op otherwise. */
7414
+ /** Host hook for an unrecoverable audio failure mic permission denied (duplex source) or no mic
7415
+ * audio at all (STT watchdog). Routed to whichever can detect it. */
7365
7416
  set onFatal(fn) {
7366
7417
  if (this.duplexSource) this.duplexSource.onFatal = fn;
7418
+ if (this.stt) this.stt.onFatal = fn;
7367
7419
  }
7368
7420
  /** ready = keys present (AEC vs heuristic is decided at start()) */
7369
7421
  static available(env = process.env) {
@@ -10878,12 +10930,7 @@ function makeAskResolver(cwd) {
10878
10930
  }
10879
10931
  };
10880
10932
  }
10881
- function hasBrowserMcp(cfg, extraTools) {
10882
- const isBrowser = (s) => /browser/i.test(s);
10883
- if (Object.entries(cfg.mcpServers ?? {}).some(([n, c]) => isBrowser(n) && !c?.disabled)) return true;
10884
- return extraTools.some((t) => t.name.startsWith("mcp__") && isBrowser(t.name.slice(5).split("__")[0]));
10885
- }
10886
- var BROWSER_MAX_CONTEXT_TOKENS = 12e4;
10933
+ var BROWSER_TRIM = { resultBytes: 8e3, keepOutputs: 2 };
10887
10934
  function optsFor(args, ai, cfg = {}, extraTools = []) {
10888
10935
  const perm = resolvePermMode(args, canPrompt);
10889
10936
  if (perm.notice) err(dim(` \u26A0 ${perm.notice}
@@ -10922,9 +10969,10 @@ function optsFor(args, ai, cfg = {}, extraTools = []) {
10922
10969
  maxRepeats: cfg.maxRepeats,
10923
10970
  maxToolCalls: cfg.maxToolCalls,
10924
10971
  keepToolOutputs: cfg.keepToolOutputs,
10925
- // Token-aware backstop: explicit config wins; else auto-enable for browser MCP runs (huge DOM/screenshot
10926
- // results otherwise accumulate past the maxTokens budget kill-switch mid-browse). Off for normal sessions.
10927
- maxContextTokens: cfg.maxContextTokens ?? (hasBrowserMcp(cfg, extraTools) ? BROWSER_MAX_CONTEXT_TOKENS : void 0),
10972
+ maxContextTokens: cfg.maxContextTokens,
10973
+ // Adaptive browser trimming (engages on actual browser tool USE, inside the Agent — see BROWSER_TRIM).
10974
+ // Config can tune the values; null-coalesced so a config 0/value still wins over the default.
10975
+ browserTrim: cfg.browserTrim ?? BROWSER_TRIM,
10928
10976
  learnFromMistakes: cfg.learnFromMistakes,
10929
10977
  // Forwarded to cursor/* delegations for environment parity (chat-model providers ignore it).
10930
10978
  // Raw config (pre-OAuth): unresolved-oauth http servers are skipped by the cursor mapper.
@@ -10946,9 +10994,6 @@ async function makeAgent(args, ai, cfg, extraTools = []) {
10946
10994
  if (args.harden && !virtual) err(dim(` \u26E8 hardened shell: writes confined to cwd+tmp${args.hardenNet ? "" : ", network blocked"} (sandbox-exec/bwrap)
10947
10995
  `));
10948
10996
  const opts = optsFor(args, ai, cfg, extraTools);
10949
- if (opts.maxContextTokens === BROWSER_MAX_CONTEXT_TOKENS && cfg.maxContextTokens == null)
10950
- err(dim(` \u229E browser MCP detected \u2014 auto-capping sent context at ~${BROWSER_MAX_CONTEXT_TOKENS / 1e3}k tok (set maxContextTokens in config to override)
10951
- `));
10952
10997
  const agent = await buildAgent(opts);
10953
10998
  const display = displayHooks(agent.options.fs, { flush: agent.options.host?.flushText });
10954
10999
  agent.options.hooks = cfg.hooks ? composeHooks(display, hooksFromConfig(cfg.hooks)) : display;