@livx.cc/agentx 0.96.14 → 0.96.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -240,6 +240,20 @@ declare class AgentOptions {
240
240
  tool: string;
241
241
  args: any;
242
242
  }) => string | Promise<string>;
243
+ /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
244
+ * (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
245
+ * kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
246
+ * is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
247
+ * - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
248
+ * cropped/spilled to a small stub → small fresh-token tail per step).
249
+ * - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
250
+ * (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
251
+ * Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
252
+ * tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
253
+ browserTrim?: {
254
+ resultBytes?: number;
255
+ keepOutputs?: number;
256
+ };
243
257
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
244
258
  skillsDir?: string | string[];
245
259
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -326,6 +340,7 @@ declare class Agent {
326
340
  private systemPromptCache;
327
341
  private started;
328
342
  private parkedMs;
343
+ private browserActive;
329
344
  /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
330
345
  * time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
331
346
  private park;
package/dist/cli.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env bun
2
- import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DRe91tAy.js';
2
+ import { H as Hooks, h as RunResult, R as ReasoningEffort, A as Agent } from './Agent-DdhD1pGw.js';
3
3
  import { IFilesystem } from '@livx.cc/wcli/core';
4
4
  import { M as Message, c as ContentPart, e as MessageContent } from './tools-DtpN8Agv.js';
5
5
 
package/dist/cli.js CHANGED
@@ -3141,6 +3141,9 @@ function reasoningToChatFragment(model, effort) {
3141
3141
 
3142
3142
  // src/Agent.ts
3143
3143
  var log4 = forComponent("Agent");
3144
+ function isBrowserTool(name) {
3145
+ return name.startsWith("mcp__") && /browser/i.test(name.slice(5).split("__")[0]);
3146
+ }
3144
3147
  function isAbortError(err2) {
3145
3148
  const e = err2;
3146
3149
  const blob = `${e?.message ?? ""} ${e?.name ?? ""} ${e?.code ?? ""} ${e?.cause?.name ?? ""}`;
@@ -3188,6 +3191,17 @@ var AgentOptions = class {
3188
3191
  * and returns the (cropped) string to put in context — e.g. spill to scratch and return a recoverable,
3189
3192
  * paginated stub. Called only when a result exceeds `maxToolResultBytes`. */
3190
3193
  capToolResult;
3194
+ /** Browser-run adaptive trimming. Browser MCP tools (`mcp__*browser*`) return huge per-call results
3195
+ * (full DOM text + screenshots) that accumulate per step and blow the cumulative `maxTokens` budget
3196
+ * kill-switch mid-browse. The fix attacks PER-STEP TOKEN GROWTH, and engages ONLY once a browser tool
3197
+ * is ACTUALLY invoked (not on mere config presence — so normal coding is untouched):
3198
+ * - `resultBytes`: tighter `maxToolResultBytes` applied to BROWSER tool results (each oversized DOM is
3199
+ * cropped/spilled to a small stub → small fresh-token tail per step).
3200
+ * - `keepOutputs`: keep only the most-recent N tool-result bodies verbatim ONCE a browser tool has run
3201
+ * (older huge DOMs collapse to one-line stubs → small cached prefix → small 0.1×cacheRead/step).
3202
+ * Explicit `keepToolOutputs`/`maxToolResultBytes` are NOT overridden below their browser values — the
3203
+ * tighter of (user, browser) wins, never loosening a user's cap. Unset = off (no adaptive behavior). */
3204
+ browserTrim;
3191
3205
  /** VFS dir(s) of skills (`<dir>/<id>/SKILL.md`). If set: inject a catalog + add the `Skill` tool. Multiple dirs are merged (first wins on name collisions). */
3192
3206
  skillsDir;
3193
3207
  /** VFS dir(s) of slash-command templates (`<dir>/<name>.md`). If set: inject a catalog + add the `SlashCommand` tool. Multiple dirs are merged (first wins). */
@@ -3272,6 +3286,8 @@ var Agent = class _Agent {
3272
3286
  // session-start lifecycle hook fires once per conversation
3273
3287
  parkedMs = 0;
3274
3288
  // cumulative time blocked on the HUMAN (permission/plan prompts) — excluded from the timeout
3289
+ browserActive = false;
3290
+ // flips true once a browser MCP tool is actually invoked → adaptive trimming engages (browserTrim)
3275
3291
  /** Time a human-blocking await (a permission/plan prompt) and bank it in `parkedMs` so idle prompt
3276
3292
  * time never trips the wall-clock kill-switch. The agent did no work while parked on the user. */
3277
3293
  async park(p) {
@@ -3680,7 +3696,11 @@ var Agent = class _Agent {
3680
3696
  }
3681
3697
  if (!threw) result = await this.maybeAutoTest(tc.function.name, result);
3682
3698
  if (images?.length && !result) result = `[${images.length} image${images.length > 1 ? "s" : ""} attached]`;
3683
- const cap = this.options.maxToolResultBytes ?? 0;
3699
+ const browser = isBrowserTool(tc.function.name);
3700
+ if (browser && this.options.browserTrim) this.browserActive = true;
3701
+ const browserBytes = browser ? this.options.browserTrim?.resultBytes : void 0;
3702
+ const baseCap = this.options.maxToolResultBytes ?? 0;
3703
+ const cap = browserBytes ? baseCap > 0 ? Math.min(baseCap, browserBytes) : browserBytes : baseCap;
3684
3704
  if (!threw && cap > 0 && result.length > cap) {
3685
3705
  const info = { tool: tc.function.name, args };
3686
3706
  result = this.options.capToolResult ? await this.options.capToolResult(result, info) : cropResult(result, cap);
@@ -3724,7 +3744,9 @@ ${out}`;
3724
3744
  let out = null;
3725
3745
  if (o.compaction?.maxMessages && m.length > o.compaction.maxMessages) out = compact(m, o.compaction.maxMessages);
3726
3746
  else if (o.maxContextMessages && m.length > o.maxContextMessages) out = dropOldest(m, o.maxContextMessages);
3727
- if (o.keepToolOutputs) out = stubOldToolResults(out ?? m, o.keepToolOutputs);
3747
+ const browserKeep = this.browserActive ? o.browserTrim?.keepOutputs : void 0;
3748
+ const keep = browserKeep != null ? o.keepToolOutputs ? Math.min(o.keepToolOutputs, browserKeep) : browserKeep : o.keepToolOutputs;
3749
+ if (keep) out = stubOldToolResults(out ?? m, keep);
3728
3750
  if (o.maxContextTokens) {
3729
3751
  const pre = (out ?? m).length;
3730
3752
  out = fitTokenBudget(out ?? m, o.maxContextTokens);
@@ -5918,6 +5940,9 @@ var SonioxSTTOptions = class {
5918
5940
  /** Client-side endpoint: finalized text + no new tokens for this long = utterance (don't wait for
5919
5941
  * Soniox's semantic <end>, which adds 0.5-1.5s — the difference between ping-pong and lag). */
5920
5942
  silenceEndpointMs = 500;
5943
+ /** No-audio watchdog: if the mic source stops delivering chunks for this long, capture is dead →
5944
+ * fire onFatal + stop (else Soniox idle-timeouts and reconnect-loops forever). 0 = disable. */
5945
+ noAudioTimeoutMs = 1e4;
5921
5946
  };
5922
5947
  var SonioxSTT = class {
5923
5948
  options;
@@ -5931,6 +5956,15 @@ var SonioxSTT = class {
5931
5956
  /** mic energy (RMS) per chunk — drives the energy-based heuristic barge-in tier */
5932
5957
  onLevel = () => {
5933
5958
  };
5959
+ /** Unrecoverable: the mic source stopped delivering audio (Soniox starves → idle-timeout reconnect
5960
+ * loop). The host tears voice down instead of spinning forever. */
5961
+ onFatal = () => {
5962
+ };
5963
+ lastChunkAt = 0;
5964
+ // timestamp of the most recent mic chunk (0 = none yet)
5965
+ startedChunksAt = 0;
5966
+ // when capture started (grace before the first chunk)
5967
+ noAudioTimer = null;
5934
5968
  finalText = "";
5935
5969
  partialText = "";
5936
5970
  lastChangeAt = 0;
@@ -5982,7 +6016,22 @@ var SonioxSTT = class {
5982
6016
  this.onUtterance(combined, now2());
5983
6017
  }, 120);
5984
6018
  this.endpointTimer.unref?.();
6019
+ this.startedChunksAt = now2();
6020
+ const noAudioMs = this.options.noAudioTimeoutMs;
6021
+ if (noAudioMs > 0) {
6022
+ this.noAudioTimer = setInterval(() => {
6023
+ if (this.stopped) return;
6024
+ const ref = this.lastChunkAt || this.startedChunksAt;
6025
+ if (now2() - ref > noAudioMs) {
6026
+ log10.error(`stt: no mic audio for >${Math.round(noAudioMs / 1e3)}s \u2014 capture device stopped delivering`);
6027
+ this.onFatal("microphone stopped delivering audio (try a different input device, e.g. AirPods, or check System Settings \u2192 Sound \u2192 Input)");
6028
+ this.stop();
6029
+ }
6030
+ }, Math.max(250, Math.min(2e3, noAudioMs / 4)));
6031
+ this.noAudioTimer.unref?.();
6032
+ }
5985
6033
  await this.options.source.start((chunk) => {
6034
+ this.lastChunkAt = now2();
5986
6035
  let sum = 0;
5987
6036
  const view = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
5988
6037
  for (let i = 0; i + 1 < chunk.byteLength; i += 2) {
@@ -6024,6 +6073,7 @@ var SonioxSTT = class {
6024
6073
  stop() {
6025
6074
  this.stopped = true;
6026
6075
  if (this.endpointTimer) clearInterval(this.endpointTimer);
6076
+ if (this.noAudioTimer) clearInterval(this.noAudioTimer);
6027
6077
  this.options.source?.stop();
6028
6078
  if (this.ws) this.ws.onclose = null;
6029
6079
  this.ws?.close();
@@ -7022,7 +7072,8 @@ The filesystem root '/' is the real machine root \u2014 you have full filesystem
7022
7072
  ...o.maxToolCalls != null ? { maxToolCalls: o.maxToolCalls } : {},
7023
7073
  // Context-v2: by default, note-take over a long session (stub tool outputs older than the last 8).
7024
7074
  keepToolOutputs: o.keepToolOutputs ?? 8,
7025
- ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {}
7075
+ ...o.maxContextTokens != null ? { maxContextTokens: o.maxContextTokens } : {},
7076
+ ...o.browserTrim ? { browserTrim: o.browserTrim } : {}
7026
7077
  });
7027
7078
  }
7028
7079
  function summarizeCall(name, args) {
@@ -7147,6 +7198,23 @@ function resolveAecBinary() {
7147
7198
  }
7148
7199
  return bin;
7149
7200
  }
7201
+ function openMicSettings() {
7202
+ if (process.platform !== "darwin") return;
7203
+ try {
7204
+ spawnSync2("open", ["x-apple.systempreferences:com.apple.preference.security?Privacy_Microphone"]);
7205
+ } catch {
7206
+ }
7207
+ }
7208
+ function micPermissionStatus() {
7209
+ const bin = resolveAecBinary();
7210
+ if (!bin) return null;
7211
+ const r = spawnSync2(bin, ["--check-mic"], { encoding: "utf8" });
7212
+ const out = (r.stdout ?? "").trim();
7213
+ if (out === "authorized") return "authorized";
7214
+ if (out === "denied" || out === "restricted") return "denied";
7215
+ if (out === "notDetermined") return "notDetermined";
7216
+ return null;
7217
+ }
7150
7218
  function aecUnavailableHint() {
7151
7219
  if (process.env.MIC_AEC === "0" || process.platform !== "darwin") return null;
7152
7220
  if (resolveAecBinary()) return null;
@@ -7200,8 +7268,10 @@ var AecDuplexAudio = class {
7200
7268
  }
7201
7269
  bin;
7202
7270
  aec = true;
7271
+ onFatal;
7203
7272
  proc = null;
7204
7273
  stopped = false;
7274
+ micDenied = false;
7205
7275
  bytesWritten = 0;
7206
7276
  startedAt = 0;
7207
7277
  // --- AudioSource ---
@@ -7217,8 +7287,13 @@ var AecDuplexAudio = class {
7217
7287
  for (const ln of String(d).split("\n")) {
7218
7288
  const s = ln.trim();
7219
7289
  if (!s) continue;
7220
- if (/mic access granted:\s*false/i.test(s)) log16.warn("mic-aec: microphone permission DENIED \u2014 grant it in System Settings \u2192 Privacy & Security \u2192 Microphone for your terminal, then restart it");
7221
- else log16.debug(`mic-aec: ${s}`);
7290
+ if (/mic access granted:\s*false/i.test(s)) {
7291
+ if (!this.micDenied) {
7292
+ this.micDenied = true;
7293
+ openMicSettings();
7294
+ this.onFatal?.("microphone permission denied \u2014 enable it in System Settings \u2192 Privacy & Security \u2192 Microphone for your terminal, then restart it");
7295
+ }
7296
+ } else log16.debug(`mic-aec: ${s}`);
7222
7297
  }
7223
7298
  });
7224
7299
  }
@@ -7319,6 +7394,7 @@ var VoiceIOOptions = class extends VoiceEngineOptions {
7319
7394
  cartesiaVoiceId = process.env.CARTESIA_VOICE_ID ?? "";
7320
7395
  };
7321
7396
  var VoiceIO = class extends VoiceEngine {
7397
+ duplexSource;
7322
7398
  constructor(options) {
7323
7399
  const o = { ...new VoiceIOOptions(), ...options };
7324
7400
  const bin = !o.stt || !o.player ? resolveAecBinary() : null;
@@ -7333,6 +7409,13 @@ var VoiceIO = class extends VoiceEngine {
7333
7409
  overlapEnergyHold: process.env.OVERLAP_ENERGY_HOLD === "1" || o.overlapEnergyHold
7334
7410
  // textless residue pre-pause: opt-in (hiccup source)
7335
7411
  });
7412
+ this.duplexSource = duplex;
7413
+ }
7414
+ /** Host hook for an unrecoverable audio failure — mic permission denied (duplex source) or no mic
7415
+ * audio at all (STT watchdog). Routed to whichever can detect it. */
7416
+ set onFatal(fn) {
7417
+ if (this.duplexSource) this.duplexSource.onFatal = fn;
7418
+ if (this.stt) this.stt.onFatal = fn;
7336
7419
  }
7337
7420
  /** ready = keys present (AEC vs heuristic is decided at start()) */
7338
7421
  static available(env = process.env) {
@@ -10847,12 +10930,7 @@ function makeAskResolver(cwd) {
10847
10930
  }
10848
10931
  };
10849
10932
  }
10850
- function hasBrowserMcp(cfg, extraTools) {
10851
- const isBrowser = (s) => /browser/i.test(s);
10852
- if (Object.entries(cfg.mcpServers ?? {}).some(([n, c]) => isBrowser(n) && !c?.disabled)) return true;
10853
- return extraTools.some((t) => t.name.startsWith("mcp__") && isBrowser(t.name.slice(5).split("__")[0]));
10854
- }
10855
- var BROWSER_MAX_CONTEXT_TOKENS = 12e4;
10933
+ var BROWSER_TRIM = { resultBytes: 8e3, keepOutputs: 2 };
10856
10934
  function optsFor(args, ai, cfg = {}, extraTools = []) {
10857
10935
  const perm = resolvePermMode(args, canPrompt);
10858
10936
  if (perm.notice) err(dim(` \u26A0 ${perm.notice}
@@ -10891,9 +10969,10 @@ function optsFor(args, ai, cfg = {}, extraTools = []) {
10891
10969
  maxRepeats: cfg.maxRepeats,
10892
10970
  maxToolCalls: cfg.maxToolCalls,
10893
10971
  keepToolOutputs: cfg.keepToolOutputs,
10894
- // Token-aware backstop: explicit config wins; else auto-enable for browser MCP runs (huge DOM/screenshot
10895
- // results otherwise accumulate past the maxTokens budget kill-switch mid-browse). Off for normal sessions.
10896
- maxContextTokens: cfg.maxContextTokens ?? (hasBrowserMcp(cfg, extraTools) ? BROWSER_MAX_CONTEXT_TOKENS : void 0),
10972
+ maxContextTokens: cfg.maxContextTokens,
10973
+ // Adaptive browser trimming (engages on actual browser tool USE, inside the Agent — see BROWSER_TRIM).
10974
+ // Config can tune the values; null-coalesced so a config 0/value still wins over the default.
10975
+ browserTrim: cfg.browserTrim ?? BROWSER_TRIM,
10897
10976
  learnFromMistakes: cfg.learnFromMistakes,
10898
10977
  // Forwarded to cursor/* delegations for environment parity (chat-model providers ignore it).
10899
10978
  // Raw config (pre-OAuth): unresolved-oauth http servers are skipped by the cursor mapper.
@@ -10915,9 +10994,6 @@ async function makeAgent(args, ai, cfg, extraTools = []) {
10915
10994
  if (args.harden && !virtual) err(dim(` \u26E8 hardened shell: writes confined to cwd+tmp${args.hardenNet ? "" : ", network blocked"} (sandbox-exec/bwrap)
10916
10995
  `));
10917
10996
  const opts = optsFor(args, ai, cfg, extraTools);
10918
- if (opts.maxContextTokens === BROWSER_MAX_CONTEXT_TOKENS && cfg.maxContextTokens == null)
10919
- err(dim(` \u229E browser MCP detected \u2014 auto-capping sent context at ~${BROWSER_MAX_CONTEXT_TOKENS / 1e3}k tok (set maxContextTokens in config to override)
10920
- `));
10921
10997
  const agent = await buildAgent(opts);
10922
10998
  const display = displayHooks(agent.options.fs, { flush: agent.options.host?.flushText });
10923
10999
  agent.options.hooks = cfg.hooks ? composeHooks(display, hooksFromConfig(cfg.hooks)) : display;
@@ -12020,6 +12096,22 @@ ${task}`;
12020
12096
  const keys = ["ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", "GROQ_API_KEY"].filter((k) => process.env[k]);
12021
12097
  keys.length ? ok(`provider keys: ${keys.join(", ")}`) : bad("no provider keys set (ANTHROPIC_API_KEY / OPENAI_API_KEY / GOOGLE_API_KEY / GROQ_API_KEY)");
12022
12098
  process.env.BODIFY_API_KEY && process.env.BODIFY_APP_ID ? ok(`bodify secrets: ${process.env.BODIFY_APP_ID}`) : warn("bodify secrets: not configured (set BODIFY_API_KEY + BODIFY_APP_ID)");
12099
+ {
12100
+ const { spawnSync: spawnSync7 } = await import("child_process");
12101
+ const has = (cmd) => spawnSync7("which", [cmd]).status === 0;
12102
+ if (!VoiceIO.available()) warn("voice: keys missing (SONIOX_API_KEY / CARTESIA_API_KEY / CARTESIA_VOICE_ID) \u2014 voice disabled");
12103
+ else {
12104
+ ok("voice: keys present");
12105
+ has("ffmpeg") ? ok("voice: ffmpeg installed") : bad("voice: ffmpeg missing \u2014 `brew install ffmpeg`");
12106
+ if (process.platform === "darwin") {
12107
+ has("swiftc") ? ok("voice: swiftc present (AEC echo cancellation)") : warn("voice: no swiftc \u2014 `xcode-select --install` for echo cancellation (else heuristic tier)");
12108
+ const mic = micPermissionStatus();
12109
+ if (mic === "authorized") ok("voice: microphone permission granted");
12110
+ else if (mic === "denied") bad("voice: microphone permission DENIED \u2014 System Settings \u2192 Privacy & Security \u2192 Microphone, then restart your terminal");
12111
+ else if (mic === "notDetermined") warn("voice: microphone permission not yet granted \u2014 you'll be prompted on first --voice");
12112
+ }
12113
+ }
12114
+ }
12023
12115
  const info = getModelInfo(work.model);
12024
12116
  info?.pricing ? ok(`model ${work.model} \u2014 priced (${info.pricing.inputCostPer1K}/${info.pricing.outputCostPer1K} per 1k in/out)`) : warn(`model ${work.model} \u2014 no pricing in the catalog (costs will show ~$0; verify the id)`);
12025
12117
  const cfgFiles = ["ts", "js", "json"].flatMap((e) => [`${cwd}/.agent/config.${e}`, `${homedir9()}/.agent/config.${e}`]).filter((p) => existsSync9(p));
@@ -13073,6 +13165,17 @@ ${out}
13073
13165
  }).finally(() => editorRef?.redrawNow());
13074
13166
  }
13075
13167
  });
13168
+ voiceIO.onFatal = (msg) => {
13169
+ err(yellow(`
13170
+ \u26A0 voice off \u2014 ${msg}
13171
+ `));
13172
+ if (voiceIO) {
13173
+ voiceIO.stop();
13174
+ voiceIO = void 0;
13175
+ voicePartial = "";
13176
+ editorRef?.redrawNow();
13177
+ }
13178
+ };
13076
13179
  try {
13077
13180
  await voiceIO.start();
13078
13181
  const inDev = voiceIO.usingAec ? detectedInputDevice() : null;