omnius 1.0.212 → 1.0.214

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -4896,6 +4896,29 @@ var init_shell = __esm({
4896
4896
  hasSudoPassword() {
4897
4897
  return this._sudoPassword !== null;
4898
4898
  }
4899
+ isConcurrencySafe(args) {
4900
+ return this.isReadOnly(args);
4901
+ }
4902
+ isReadOnly(args) {
4903
+ const command = String(args["command"] ?? "").trim();
4904
+ if (!command || args["stdin"] !== void 0)
4905
+ return false;
4906
+ const normalized = command.replace(/\s+/g, " ");
4907
+ if (/\bcd\b/.test(normalized))
4908
+ return false;
4909
+ if (/[<>]|\|\s*(?:tee|xargs)\b|\btee\b/.test(normalized))
4910
+ return false;
4911
+ if (/\b(?:sudo|su|rm|mv|cp|touch|mkdir|rmdir|chmod|chown|ln|truncate|dd|patch|apply_patch)\b/.test(normalized)) {
4912
+ return false;
4913
+ }
4914
+ if (/\bgit\s+(?:add|commit|checkout|switch|reset|clean|push|pull|merge|rebase|apply|am|stash|tag)\b/.test(normalized)) {
4915
+ return false;
4916
+ }
4917
+ if (/\b(?:npm|pnpm|yarn|bun)\s+(?:install|i|ci|add|remove|run|test|build)\b/.test(normalized)) {
4918
+ return false;
4919
+ }
4920
+ return /^(?:pwd|ls\b|find\b|rg\b|grep\b|cat\b|head\b|tail\b|wc\b|sed\s+-n\b|awk\b|git\s+(?:status|diff|log|show|branch|rev-parse|ls-files|grep)\b|which\b|command\s+-v\b|node\s+--version\b|node\s+-v\b|python(?:3)?\s+--version\b)/.test(normalized);
4921
+ }
4899
4922
  async execute(args) {
4900
4923
  const start2 = performance.now();
4901
4924
  const command = args["command"];
@@ -6112,6 +6135,7 @@ var init_file_read = __esm({
6112
6135
  ];
6113
6136
  FileReadTool = class {
6114
6137
  name = "file_read";
6138
+ aliases = ["read_file", "read", "cat"];
6115
6139
  description = "Read the contents of a file. For large files (200+ lines), returns a structural preview with signatures — use offset/limit to read specific sections.";
6116
6140
  parameters = {
6117
6141
  type: "object",
@@ -6127,6 +6151,12 @@ var init_file_read = __esm({
6127
6151
  constructor(workingDir) {
6128
6152
  this.workingDir = workingDir;
6129
6153
  }
6154
+ isConcurrencySafe() {
6155
+ return true;
6156
+ }
6157
+ isReadOnly() {
6158
+ return true;
6159
+ }
6130
6160
  /** Set actual context window size for proportional auto-windowing */
6131
6161
  setContextWindowSize(size) {
6132
6162
  this._contextWindowSize = size;
@@ -6662,6 +6692,7 @@ var init_grep_search = __esm({
6662
6692
  MAX_OUTPUT_LINES = 100;
6663
6693
  GrepSearchTool = class {
6664
6694
  name = "grep_search";
6695
+ aliases = ["grep", "ripgrep", "search_text"];
6665
6696
  description = "Search file contents using regex patterns. Returns matching lines with file paths and line numbers.";
6666
6697
  parameters = {
6667
6698
  type: "object",
@@ -6685,6 +6716,12 @@ var init_grep_search = __esm({
6685
6716
  constructor(workingDir) {
6686
6717
  this.workingDir = workingDir;
6687
6718
  }
6719
+ isConcurrencySafe() {
6720
+ return true;
6721
+ }
6722
+ isReadOnly() {
6723
+ return true;
6724
+ }
6688
6725
  async execute(args) {
6689
6726
  const pattern = args["pattern"];
6690
6727
  const searchPath = resolve4(this.workingDir, args["path"] ?? ".");
@@ -6767,6 +6804,7 @@ var init_glob_find = __esm({
6767
6804
  MAX_RESULTS = 50;
6768
6805
  GlobFindTool = class {
6769
6806
  name = "find_files";
6807
+ aliases = ["glob", "find"];
6770
6808
  description = "Find files matching a glob pattern. Returns list of matching file paths.";
6771
6809
  parameters = {
6772
6810
  type: "object",
@@ -6786,6 +6824,12 @@ var init_glob_find = __esm({
6786
6824
  constructor(workingDir) {
6787
6825
  this.workingDir = workingDir;
6788
6826
  }
6827
+ isConcurrencySafe() {
6828
+ return true;
6829
+ }
6830
+ isReadOnly() {
6831
+ return true;
6832
+ }
6789
6833
  async execute(args) {
6790
6834
  const pattern = args["pattern"];
6791
6835
  const searchPath = resolve5(this.workingDir, args["path"] ?? ".");
@@ -24373,6 +24417,7 @@ var init_list_directory = __esm({
24373
24417
  MAX_ENTRIES = 100;
24374
24418
  ListDirectoryTool = class {
24375
24419
  name = "list_directory";
24420
+ aliases = ["ls", "dir"];
24376
24421
  description = "List files and directories at a given path. Shows file sizes and types. Output includes full relative paths you can use directly in subsequent tool calls.";
24377
24422
  parameters = {
24378
24423
  type: "object",
@@ -24388,6 +24433,12 @@ var init_list_directory = __esm({
24388
24433
  constructor(workingDir) {
24389
24434
  this.workingDir = workingDir;
24390
24435
  }
24436
+ isConcurrencySafe() {
24437
+ return true;
24438
+ }
24439
+ isReadOnly() {
24440
+ return true;
24441
+ }
24391
24442
  async execute(args) {
24392
24443
  const rawPath = args["path"];
24393
24444
  const dirPath = typeof rawPath === "string" && rawPath.trim() ? rawPath : ".";
@@ -551582,28 +551633,38 @@ var init_personality = __esm({
551582
551633
  });
551583
551634
 
551584
551635
  // packages/orchestrator/dist/critic.js
551585
- function buildForceProgressBlockMessage(call, hits) {
551636
+ function buildCriticGuidanceMessage(call, hits, opts = {}) {
551586
551637
  const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
551587
- return `[FORCED PROGRESS BLOCK — duplicate ${call.tool} call skipped; this is not a tool failure. You have called ${call.tool}(${argPreview}) ${hits} times with identical arguments. The runtime did not re-run the tool; it is returning the prior result below so you can proceed without retrying.
551588
-
551589
- Progress is REQUIRED before this tool will run again with the same arguments. To proceed, do one of these:
551590
- file_write or file_edit to make progress, OR
551591
- todo_write that advances the plan, OR
551592
- task_complete (if all phases are done), OR
551593
- Call a different tool or use different arguments.]`;
551638
+ const cached = opts.cachedResult ? `
551639
+ Prior evidence preview:
551640
+ ${opts.cachedResult.slice(0, 700)}` : "";
551641
+ const source = opts.adversaryFlag ? "The adversary recognized this exact tool call as already observed earlier." : `This is exact repeat #${hits} for the same ${call.tool} arguments.`;
551642
+ return `[ADVERSARY GUIDANCE non-blocking]
551643
+ Observation: ${source}
551644
+ Call: ${call.tool}(${argPreview})
551645
+ Root cause hypothesis: the run is losing track of already-observed evidence, usually after path confusion, compaction, or an over-broad discovery loop.
551646
+ Corrective action: let this call's result inform the next step once, then pivot to a concrete action.
551647
+ Suggested next actions: edit/write the implicated file, run verification, read a different specific file, or complete with evidence. Prefer not to repeat this exact call again unless the filesystem, browser, or page state changed.${cached}`;
551594
551648
  }
551595
551649
  function buildCachedResultEnvelope(result) {
551596
- return `[CACHED RESULT — you already have this information from a prior identical call. Do NOT call this tool again with the same arguments.]
551650
+ return `[PRIOR RESULT — already observed by a prior identical call]
551597
551651
  ${result}`;
551598
551652
  }
551599
551653
  function evaluate2(inputs) {
551600
- const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
551601
- if (observerRedundantBlock) {
551654
+ const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, adversaryRedundantSignal } = inputs;
551655
+ if (adversaryRedundantSignal) {
551602
551656
  const cached = recentToolResults.get(fingerprint);
551657
+ const cachedResult = cached ? buildCachedResultEnvelope(cached.result) : void 0;
551603
551658
  return {
551604
- decision: "observer_block",
551605
- reason: "Littleman observer flagged this fingerprint as redundant",
551606
- cachedResult: cached ? buildCachedResultEnvelope(cached.result) : null
551659
+ decision: "guidance",
551660
+ reason: "Adversary flagged this fingerprint as redundant",
551661
+ hitNumber: (dedupHitCount.get(fingerprint) ?? 0) + 1,
551662
+ guidanceMessage: buildCriticGuidanceMessage(proposedCall, (dedupHitCount.get(fingerprint) ?? 0) + 1, {
551663
+ cachedResult,
551664
+ adversaryFlag: true
551665
+ }),
551666
+ cachedResult,
551667
+ compacted: cached?.compacted
551607
551668
  };
551608
551669
  }
551609
551670
  const cacheEligible = isReadLike || proposedCall.tool === "shell";
@@ -551611,24 +551672,16 @@ function evaluate2(inputs) {
551611
551672
  const cached = recentToolResults.get(fingerprint);
551612
551673
  if (cached !== void 0) {
551613
551674
  const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
551614
- const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
551615
- if (hits >= threshold) {
551616
- return {
551617
- decision: "force_progress_block",
551618
- reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
551619
- hitNumber: hits,
551620
- blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
551621
- cachedResult: buildCachedResultEnvelope(cached.result),
551622
- compacted: cached.compacted
551623
- };
551624
- }
551625
551675
  const cachedEnvelope = buildCachedResultEnvelope(cached.result);
551626
551676
  return {
551627
- decision: "serve_cached",
551628
- reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
551677
+ decision: "guidance",
551678
+ reason: cached.compacted ? "post-compaction duplicate evidence" : `duplicate call #${hits}`,
551629
551679
  cachedResult: cachedEnvelope,
551630
551680
  compacted: cached.compacted,
551631
- hitNumber: hits
551681
+ hitNumber: hits,
551682
+ guidanceMessage: buildCriticGuidanceMessage(proposedCall, hits, {
551683
+ cachedResult: cachedEnvelope
551684
+ })
551632
551685
  };
551633
551686
  }
551634
551687
  }
@@ -551680,12 +551733,9 @@ function isStagnant(signals, opts) {
551680
551733
  return false;
551681
551734
  return signals.completedDelta <= 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
551682
551735
  }
551683
- var SHELL_THRESHOLD, FS_THRESHOLD;
551684
551736
  var init_critic = __esm({
551685
551737
  "packages/orchestrator/dist/critic.js"() {
551686
551738
  "use strict";
551687
- SHELL_THRESHOLD = 2;
551688
- FS_THRESHOLD = 3;
551689
551739
  }
551690
551740
  });
551691
551741
 
@@ -555234,7 +555284,8 @@ function partitionToolCalls(calls, readOnlyHints) {
555234
555284
  const batches = [];
555235
555285
  let currentConcurrent = [];
555236
555286
  for (const call of calls) {
555237
- if (isConcurrencySafe(call.name, readOnlyHints)) {
555287
+ const safe = typeof call.concurrencySafe === "boolean" ? call.concurrencySafe : isConcurrencySafe(call.name, readOnlyHints);
555288
+ if (safe) {
555238
555289
  currentConcurrent.push(call);
555239
555290
  } else {
555240
555291
  if (currentConcurrent.length > 0) {
@@ -555892,13 +555943,18 @@ var init_streaming_executor = __esm({
555892
555943
  executeFn = null;
555893
555944
  constructor(config) {
555894
555945
  this.config = {
555895
- maxConcurrent: config?.maxConcurrent ?? 5
555946
+ maxConcurrent: config?.maxConcurrent ?? 5,
555947
+ concurrencyResolver: config?.concurrencyResolver
555896
555948
  };
555897
555949
  }
555898
555950
  /** Set the tool execution function */
555899
555951
  setExecutor(fn) {
555900
555952
  this.executeFn = fn;
555901
555953
  }
555954
+ /** Update the parsed-input concurrency classifier. */
555955
+ setConcurrencyResolver(fn) {
555956
+ this.config.concurrencyResolver = fn;
555957
+ }
555902
555958
  /** Number of tools tracked */
555903
555959
  get size() {
555904
555960
  return this.tools.size;
@@ -555919,7 +555975,7 @@ var init_streaming_executor = __esm({
555919
555975
  name: name10,
555920
555976
  args: partialArgs ?? {},
555921
555977
  state: "queued",
555922
- concurrencySafe: isConcurrencySafe(name10),
555978
+ concurrencySafe: this.resolveConcurrencySafe(name10, partialArgs ?? {}),
555923
555979
  finalized: false,
555924
555980
  queuedAt: Date.now()
555925
555981
  });
@@ -555934,6 +555990,7 @@ var init_streaming_executor = __esm({
555934
555990
  if (!entry)
555935
555991
  return;
555936
555992
  entry.args = args;
555993
+ entry.concurrencySafe = this.resolveConcurrencySafe(entry.name, args);
555937
555994
  entry.finalized = true;
555938
555995
  if (entry.state === "queued") {
555939
555996
  this.processQueue();
@@ -556031,6 +556088,15 @@ var init_streaming_executor = __esm({
556031
556088
  return true;
556032
556089
  return false;
556033
556090
  }
556091
+ resolveConcurrencySafe(name10, args) {
556092
+ try {
556093
+ const resolved = this.config.concurrencyResolver?.(name10, args);
556094
+ if (typeof resolved === "boolean")
556095
+ return resolved;
556096
+ } catch {
556097
+ }
556098
+ return isConcurrencySafe(name10);
556099
+ }
556034
556100
  entryFingerprint(entry) {
556035
556101
  return `${entry.name}:${stableValueKey(entry.args)}`;
556036
556102
  }
@@ -558656,8 +558722,8 @@ var init_agenticRunner = __esm({
558656
558722
  // WO-KG-15
558657
558723
  _retrievalContextCache = null;
558658
558724
  // WO-KG-15: cache per-run
558659
- // Observer world-model and cohort stats
558660
- _observerMode = "both";
558725
+ // Adversary world-model and cohort stats
558726
+ _adversaryMode = "both";
558661
558727
  _worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
558662
558728
  // REG-7-root: Track file writes since last todo_write call. When this
558663
558729
  // counter climbs without a todo update, the agent has likely batched
@@ -559006,6 +559072,8 @@ var init_agenticRunner = __esm({
559006
559072
  _sessionId = `session-${Date.now()}`;
559007
559073
  _workingDirectory = "";
559008
559074
  constructor(backend, options2) {
559075
+ const adversaryMode = options2?.adversaryMode ?? options2?.observerMode ?? "both";
559076
+ const disableAdversaryCritic = options2?.disableAdversaryCritic ?? options2?.disableStepCritic ?? false;
559009
559077
  this.backend = backend;
559010
559078
  this.options = {
559011
559079
  maxTurns: options2?.maxTurns ?? 60,
@@ -559030,19 +559098,23 @@ var init_agenticRunner = __esm({
559030
559098
  bruteForceMaxCycles: options2?.bruteForceMaxCycles ?? 100,
559031
559099
  allowTurnExtension: options2?.allowTurnExtension ?? true,
559032
559100
  completionProvenanceGuard: options2?.completionProvenanceGuard ?? true,
559101
+ disableAdversaryCritic,
559102
+ disableStepCritic: disableAdversaryCritic,
559033
559103
  modelTier: options2?.modelTier ?? "large",
559034
559104
  contextWindowSize: options2?.contextWindowSize ?? 0,
559035
559105
  personality: options2?.personality ?? PERSONALITY_PRESETS.balanced,
559036
559106
  personalityName: options2?.personalityName ?? "",
559037
559107
  finalVarResolver: options2?.finalVarResolver ?? void 0,
559038
- observerMode: options2?.observerMode ?? "both",
559108
+ adversaryMode,
559109
+ observerMode: adversaryMode,
559039
559110
  // Phase 4 — sub-agent isolation flag (defaults false). When true, this
559040
559111
  // runner skips cross-task handoff inheritance from the parent's
559041
559112
  // session.
559042
559113
  subAgent: options2?.subAgent ?? false,
559043
559114
  skipCrossTaskHandoff: options2?.skipCrossTaskHandoff ?? false
559044
559115
  };
559045
- this._observerMode = this.options.observerMode;
559116
+ this._adversaryMode = this.options.adversaryMode;
559117
+ this._streamingExecutor.setConcurrencyResolver((name10, args) => this.resolveToolConcurrencySafe(name10, args));
559046
559118
  }
559047
559119
  /** Update context window size (e.g. after querying Ollama /api/show) */
559048
559120
  setContextWindowSize(size) {
@@ -559050,7 +559122,10 @@ var init_agenticRunner = __esm({
559050
559122
  }
559051
559123
  /** Set the working directory for session checkpointing */
559052
559124
  setWorkingDirectory(dir) {
559053
- this._workingDirectory = dir;
559125
+ this._workingDirectory = _pathResolve(dir);
559126
+ }
559127
+ authoritativeWorkingDirectory() {
559128
+ return _pathResolve(this._workingDirectory || process.cwd());
559054
559129
  }
559055
559130
  /** State root for runner-owned memory/artifacts. Defaults to cwd/.omnius. */
559056
559131
  omniusStateDir() {
@@ -559823,7 +559898,7 @@ ${result.output ?? ""}`;
559823
559898
  * checklist via todo_write, and only then call task_complete.
559824
559899
  */
559825
559900
  /**
559826
- * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
559901
+ * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / adversary
559827
559902
  * block / budget exhausted). These paths return early from
559828
559903
  * executeSingle BEFORE the main result-handling code, so the normal
559829
559904
  * MAST tagging miss them. This helper lets each return-early site
@@ -561367,7 +561442,7 @@ ${latest.output || ""}`.trim();
561367
561442
  }
561368
561443
  }
561369
561444
  const sections = [
561370
- "[KNOWLEDGE — cached tool results already known to the runtime. Do NOT re-call these tools with the same arguments:]"
561445
+ "[KNOWLEDGE — cached tool results already known to the runtime. Repeating an exact read/list/search/shell call is a wasted action and will be blocked or served from cache:]"
561371
561446
  ];
561372
561447
  if (compactedCount > 0) {
561373
561448
  sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
@@ -561379,6 +561454,7 @@ ${latest.output || ""}`.trim();
561379
561454
  if (dirsListed.length > 0) {
561380
561455
  const unique2 = [...new Set(dirsListed)].slice(0, 15);
561381
561456
  sections.push(`Directories already listed (${unique2.length}): ${unique2.join(", ")}`);
561457
+ sections.push(`Do not call list_directory again on these exact directories unless you changed their contents. Use the listed child paths directly with file_read/edit/delegation.`);
561382
561458
  }
561383
561459
  if (searches.length > 0) {
561384
561460
  const unique2 = [...new Set(searches)].slice(0, 15);
@@ -561392,6 +561468,23 @@ ${latest.output || ""}`.trim();
561392
561468
  return null;
561393
561469
  return sections.join("\n");
561394
561470
  }
561471
+ _renderRuntimeRootBlock() {
561472
+ const authoritative = this.authoritativeWorkingDirectory();
561473
+ const proc = _pathResolve(process.cwd());
561474
+ const lines = [
561475
+ `[RUNTIME ROOT — authoritative]`,
561476
+ `Current working directory for this run: ${authoritative}`,
561477
+ `All relative file/tool paths resolve under this directory unless the tool call uses an absolute path.`,
561478
+ `Do not infer cwd from old tasks, shell transcripts, memory, or prior browser sessions.`
561479
+ ];
561480
+ if (proc !== authoritative) {
561481
+ lines.push(`Process cwd differs (${proc}); treat the run cwd above as authoritative for repo/project work.`);
561482
+ }
561483
+ if (this._worldFacts.lastCwd && this._worldFacts.lastCwd !== authoritative) {
561484
+ lines.push(`Last shell cd target was command-local only: ${this._worldFacts.lastCwd}. It does not change the run cwd.`);
561485
+ }
561486
+ return lines.join("\n");
561487
+ }
561395
561488
  _insertContextFrame(messages2, frame) {
561396
561489
  if (!frame)
561397
561490
  return;
@@ -561429,7 +561522,7 @@ ${latest.output || ""}`.trim();
561429
561522
  add2(this._activeContextItem("task_state", "todo-state", "turn.todos", "Todo state", input.todoBlock, 80));
561430
561523
  add2(this._activeContextItem("recent_failure", "recent-failures", "turn.failures", "Recent failures", input.failureBlock, 95));
561431
561524
  add2(this._activeContextItem("recent_failure", "write-churn", "turn.churn", "Write churn", input.churnBlock, 75));
561432
- add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 65));
561525
+ add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 92));
561433
561526
  add2(this._activeContextItem("anchor", "anchors", "turn.anchors", "Relevant anchors", input.anchorsBlock, 50));
561434
561527
  add2(this._activeContextItem("environment", "environment", "turn.environment", "Environment", input.environmentBlock, 35));
561435
561528
  if (this._lastPprMemoryLines.length > 0) {
@@ -561684,7 +561777,10 @@ ${chunk.content}`, {
561684
561777
  async _buildTurnContextFrame(turn, messages2, recentToolResults, environmentBlock) {
561685
561778
  this._contextLedger.clearSources("turn.");
561686
561779
  this._contextLedger.prune(turn);
561687
- const goalBlock = this._taskState.goal ? `Active task: ${this._taskState.goal}` : null;
561780
+ const goalBlock = [
561781
+ this._renderRuntimeRootBlock(),
561782
+ this._taskState.goal ? `Active task: ${this._taskState.goal}` : null
561783
+ ].filter(Boolean).join("\n\n");
561688
561784
  const filesystemBlock = this._renderFilesystemStateBlock(turn);
561689
561785
  const todoBlock = this._renderTodoStateBlock(turn);
561690
561786
  const failureBlock = this._renderRecentFailuresBlock(turn);
@@ -561750,7 +561846,7 @@ ${this._lastPprMemoryLines.slice(0, 5).join("\n")}` : null;
561750
561846
  signalFromBlock("tool_cache", "turn.tool-cache", toolCacheBlock, {
561751
561847
  id: "tool-cache",
561752
561848
  dedupeKey: "turn.tool-cache",
561753
- priority: 65,
561849
+ priority: 92,
561754
561850
  createdTurn: turn,
561755
561851
  ttlTurns: 1
561756
561852
  }),
@@ -562020,7 +562116,8 @@ ${blob}
562020
562116
  return Object.entries(args ?? {}).sort(([a2], [b]) => a2.localeCompare(b)).map(([k, v]) => `${k}=${this._formatExactArgValue(v)}`).join(",");
562021
562117
  }
562022
562118
  _buildToolFingerprint(name10, args) {
562023
- return `${name10}:${this._buildExactArgsKey(args)}`;
562119
+ const canonical = this.lookupRegisteredTool(name10)?.name ?? name10;
562120
+ return `${canonical}:${this._buildExactArgsKey(args)}`;
562024
562121
  }
562025
562122
  _dedupeToolCallsForResponse(toolCalls, turn) {
562026
562123
  if (toolCalls.length <= 1)
@@ -562226,32 +562323,45 @@ ${blob}
562226
562323
  }
562227
562324
  /** Register a tool for the agent to use */
562228
562325
  registerTool(tool) {
562229
- if (!this.isToolAllowedByProfile(tool.name))
562326
+ if (!this.isToolAllowedByProfile(tool.name, tool.aliases))
562230
562327
  return;
562231
562328
  this.tools.set(tool.name, tool);
562232
562329
  if (tool.name === "generate_image") {
562233
562330
  this.maybeInstallImagePromptExpander(tool);
562234
562331
  }
562235
562332
  }
562236
- isToolAllowedByProfile(name10) {
562237
- const profile = this.options.toolProfile;
562238
- if (!profile)
562239
- return true;
562333
+ toolNameVariants(name10) {
562240
562334
  const raw = String(name10 ?? "").trim();
562335
+ if (!raw)
562336
+ return [];
562241
562337
  const lastSegment = raw.split(/[.:/]/).filter(Boolean).pop() ?? raw;
562242
- const candidates = /* @__PURE__ */ new Set([
562338
+ return Array.from(new Set([
562243
562339
  raw,
562244
562340
  raw.toLowerCase(),
562341
+ raw.replace(/[-\s]+/g, "_"),
562245
562342
  raw.replace(/^functions[._:-]/i, ""),
562246
562343
  raw.replace(/^tools[._:-]/i, ""),
562247
562344
  lastSegment,
562248
- lastSegment.toLowerCase()
562249
- ]);
562345
+ lastSegment.toLowerCase(),
562346
+ lastSegment.replace(/[-\s]+/g, "_")
562347
+ ].filter(Boolean)));
562348
+ }
562349
+ isToolAllowedByProfile(name10, aliases) {
562350
+ const profile = this.options.toolProfile;
562351
+ if (!profile)
562352
+ return true;
562353
+ const candidates = /* @__PURE__ */ new Set();
562354
+ for (const value2 of [name10, ...aliases ?? []]) {
562355
+ for (const variant of this.toolNameVariants(value2)) {
562356
+ candidates.add(variant);
562357
+ }
562358
+ }
562250
562359
  const allow = Array.isArray(profile.allow) ? profile.allow.filter(Boolean) : [];
562251
- if (allow.length > 0)
562252
- return allow.some((toolName) => candidates.has(toolName));
562360
+ if (allow.length > 0) {
562361
+ return allow.some((toolName) => this.toolNameVariants(toolName).some((candidate) => candidates.has(candidate)));
562362
+ }
562253
562363
  const deny = Array.isArray(profile.deny) ? profile.deny.filter(Boolean) : [];
562254
- return !deny.some((toolName) => candidates.has(toolName));
562364
+ return !deny.some((toolName) => this.toolNameVariants(toolName).some((candidate) => candidates.has(candidate)));
562255
562365
  }
562256
562366
  toolProfileDenial(name10) {
562257
562367
  const profileName = this.options.toolProfile?.name || "active tool profile";
@@ -562329,20 +562439,14 @@ Rewrite it now for ${ctx3.model}.`;
562329
562439
  const direct = this.tools.get(raw);
562330
562440
  if (direct)
562331
562441
  return { name: raw, tool: direct };
562332
- const lastSegment = raw.split(/[.:/]/).filter(Boolean).pop() ?? raw;
562333
- const candidates = /* @__PURE__ */ new Set([
562334
- raw,
562335
- raw.toLowerCase(),
562336
- raw.replace(/[-\s]+/g, "_"),
562337
- raw.replace(/^functions[._:-]/i, ""),
562338
- raw.replace(/^tools[._:-]/i, ""),
562339
- lastSegment,
562340
- lastSegment.toLowerCase(),
562341
- lastSegment.replace(/[-\s]+/g, "_")
562342
- ]);
562442
+ const candidates = new Set(this.toolNameVariants(raw));
562343
562443
  const lowerIndex = /* @__PURE__ */ new Map();
562344
- for (const registeredName of this.tools.keys()) {
562345
- lowerIndex.set(registeredName.toLowerCase(), registeredName);
562444
+ for (const [registeredName, tool] of this.tools) {
562445
+ for (const value2 of [registeredName, ...tool.aliases ?? []]) {
562446
+ for (const variant of this.toolNameVariants(value2)) {
562447
+ lowerIndex.set(variant.toLowerCase(), registeredName);
562448
+ }
562449
+ }
562346
562450
  }
562347
562451
  for (const candidate of candidates) {
562348
562452
  const exact = this.tools.get(candidate);
@@ -562354,8 +562458,47 @@ Rewrite it now for ${ctx3.model}.`;
562354
562458
  }
562355
562459
  return null;
562356
562460
  }
562461
+ resolveToolConcurrencySafe(name10, args) {
562462
+ const resolved = this.lookupRegisteredTool(name10);
562463
+ const tool = resolved?.tool;
562464
+ try {
562465
+ if (typeof tool?.isConcurrencySafe === "function") {
562466
+ return !!tool.isConcurrencySafe(args);
562467
+ }
562468
+ if (typeof tool?.isReadOnly === "function") {
562469
+ return !!tool.isReadOnly(args);
562470
+ }
562471
+ } catch {
562472
+ }
562473
+ return isConcurrencySafe(resolved?.name ?? name10);
562474
+ }
562475
+ toolResultMaxSize(tool) {
562476
+ const max = tool?.maxResultSizeChars;
562477
+ return typeof max === "number" && Number.isFinite(max) && max > 0 ? max : void 0;
562478
+ }
562479
+ applyRegisteredToolResultTriage(result, toolName, tool) {
562480
+ return applyToolResultTriage(result, {
562481
+ workingDir: this._workingDirectory || process.cwd(),
562482
+ toolName,
562483
+ maxOutputSize: this.toolResultMaxSize(tool)
562484
+ });
562485
+ }
562486
+ async validateToolInput(tool, args, toolName) {
562487
+ if (typeof tool.validateInput !== "function")
562488
+ return null;
562489
+ const validation = await tool.validateInput(args, {
562490
+ toolName,
562491
+ workingDir: this._workingDirectory || process.cwd()
562492
+ });
562493
+ if (validation?.result === true)
562494
+ return null;
562495
+ if (validation?.result === false) {
562496
+ return validation.message || "custom validation failed";
562497
+ }
562498
+ return null;
562499
+ }
562357
562500
  unknownToolError(name10) {
562358
- const names = Array.from(this.tools.keys()).sort();
562501
+ const names = Array.from(this.tools.values()).map((tool) => tool.aliases?.length ? `${tool.name} (aliases: ${tool.aliases.join("|")})` : tool.name).sort();
562359
562502
  const preview = names.slice(0, 80).join(", ");
562360
562503
  const suffix = names.length > 80 ? `, ... ${names.length - 80} more` : "";
562361
562504
  return `Unknown tool: ${name10}. Registered tools (${names.length}): ${preview}${suffix}`;
@@ -562369,6 +562512,7 @@ Rewrite it now for ${ctx3.model}.`;
562369
562512
  for (const t2 of this.tools.values()) {
562370
562513
  list.push({
562371
562514
  name: t2.name,
562515
+ ...t2.aliases?.length ? { aliases: t2.aliases } : {},
562372
562516
  description: t2.description,
562373
562517
  parameters: t2.parameters
562374
562518
  });
@@ -562380,10 +562524,10 @@ Rewrite it now for ${ctx3.model}.`;
562380
562524
  * Validates against inputSchema if present and returns the tool result.
562381
562525
  */
562382
562526
  async runToolByName(name10, args) {
562383
- if (!this.isToolAllowedByProfile(name10)) {
562527
+ const resolved = this.lookupRegisteredTool(name10);
562528
+ if (!this.isToolAllowedByProfile(resolved?.name ?? name10, resolved?.tool.aliases)) {
562384
562529
  return this.toolProfileDenial(name10);
562385
562530
  }
562386
- const resolved = this.lookupRegisteredTool(name10);
562387
562531
  if (!resolved) {
562388
562532
  return { success: false, output: "", error: this.unknownToolError(name10) };
562389
562533
  }
@@ -562399,12 +562543,17 @@ Rewrite it now for ${ctx3.model}.`;
562399
562543
  error: `Invalid args for ${resolved.name}: ${e2?.message || String(e2)}`
562400
562544
  };
562401
562545
  }
562546
+ const validationError = await this.validateToolInput(tool, args, resolved.name);
562547
+ if (validationError) {
562548
+ return {
562549
+ success: false,
562550
+ output: "",
562551
+ error: `Invalid args for ${resolved.name}: ${validationError}. Check the parameter values and try again.`
562552
+ };
562553
+ }
562402
562554
  try {
562403
562555
  const result = await tool.execute(args);
562404
- return applyToolResultTriage(result, {
562405
- workingDir: this._workingDirectory || process.cwd(),
562406
- toolName: tool.name ?? "tool"
562407
- });
562556
+ return this.applyRegisteredToolResultTriage(result, resolved.name, tool);
562408
562557
  } catch (e2) {
562409
562558
  return { success: false, output: "", error: e2?.message || String(e2) };
562410
562559
  }
@@ -562602,8 +562751,8 @@ ${notice}`;
562602
562751
  const window2 = recentToolCalls.slice(-repetitionWindow);
562603
562752
  const uniqueKeys = new Set(window2.map((tc) => `${tc.name}:${tc.argsKey}`));
562604
562753
  const ratio = 1 - uniqueKeys.size / window2.length;
562605
- if (ratio > 0.4 && this._littlemanToolOutcomes.length >= 3) {
562606
- const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
562754
+ if (ratio > 0.4 && this._adversaryToolOutcomes.length >= 3) {
562755
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
562607
562756
  const uniquePreviews = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40)));
562608
562757
  if (uniquePreviews.size >= 3) {
562609
562758
  return Math.max(0, ratio - 0.4);
@@ -562701,6 +562850,9 @@ Respond with your assessment, then take action.`;
562701
562850
  this._lastActiveForgettingReport = null;
562702
562851
  this._lastContextConsolidationTurn = -1e3;
562703
562852
  this._contextFrameBuilder = new ContextFrameBuilder();
562853
+ if (!this._workingDirectory) {
562854
+ this._workingDirectory = _pathResolve(process.cwd());
562855
+ }
562704
562856
  if (!this.options.disablePersistentMemory && !this._memoryInitialized) {
562705
562857
  try {
562706
562858
  const path12 = await import("node:path");
@@ -563134,10 +563286,10 @@ TASK: ${scrubbedTask}` : scrubbedTask;
563134
563286
  this._hookDenyHintCount = 0;
563135
563287
  this._selfConsistencyVotes = 0;
563136
563288
  this._retrievalContextCache = null;
563137
- this._observerMode = this.options.observerMode ?? "both";
563289
+ this._adversaryMode = this.options.adversaryMode ?? "both";
563138
563290
  this._worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
563139
563291
  this._argCohorts.clear();
563140
- this._littlemanRedundantBlocks.clear();
563292
+ this._adversaryRedundantSignals.clear();
563141
563293
  this._lastTodoWriteTurn = -1;
563142
563294
  this._lastTodoReminderTurn = -1;
563143
563295
  let pendingConstraintWarnings = [];
@@ -563237,14 +563389,44 @@ TASK: ${scrubbedTask}` : scrubbedTask;
563237
563389
  });
563238
563390
  if (gate.proceed)
563239
563391
  return false;
563240
- messages2.push({ role: "system", content: gate.feedback });
563392
+ messages2.push({
563393
+ role: "system",
563394
+ content: `${gate.feedback}
563395
+
563396
+ [ADVISORY ONLY] This critique does not block task_complete; use it to improve the next run or visible evidence if the task continues.`
563397
+ });
563241
563398
  this.emit({
563242
563399
  type: "status",
563243
- content: `task_complete held by completion provenance guard: ${gate.reason}`,
563400
+ content: `completion provenance critique emitted without blocking: ${gate.reason}`,
563401
+ turn,
563402
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
563403
+ });
563404
+ this.emit({
563405
+ type: "adversary_reaction",
563406
+ adversary: {
563407
+ class: "guidance",
563408
+ shortText: "Completion provenance critique emitted",
563409
+ confidence: 0.9,
563410
+ details: gate.reason
563411
+ },
563412
+ turn,
563413
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
563414
+ });
563415
+ return false;
563416
+ };
563417
+ const emitBackwardPassAdvisory = (feedback, turn) => {
563418
+ messages2.push({
563419
+ role: "system",
563420
+ content: `${feedback}
563421
+
563422
+ [ADVISORY ONLY] Backward-pass critique is non-blocking; do not treat this as a tool failure or completion refusal.`
563423
+ });
563424
+ this.emit({
563425
+ type: "status",
563426
+ content: "backward-pass critique emitted without blocking completion",
563244
563427
  turn,
563245
563428
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
563246
563429
  });
563247
- return true;
563248
563430
  };
563249
563431
  const turnCap = this.options.maxTurns && this.options.maxTurns > 0 ? this.options.maxTurns : Number.MAX_SAFE_INTEGER;
563250
563432
  for (let turn = 0; turn < turnCap; turn++) {
@@ -564230,8 +564412,8 @@ ${_staleSamples.join("\n")}` : ``,
564230
564412
  nextSelfEval = now + selfEvalInterval;
564231
564413
  }
564232
564414
  const turnsRemaining = this.options.maxTurns - turn;
564233
- if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._littlemanToolOutcomes.length >= 2) {
564234
- const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
564415
+ if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._adversaryToolOutcomes.length >= 2) {
564416
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
564235
564417
  const recentSuccesses = recentOutcomes.filter((o2) => o2.succeeded).length;
564236
564418
  const uniqueResults = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40))).size;
564237
564419
  const isActive = recentSuccesses >= 2 && uniqueResults >= 2;
@@ -564240,16 +564422,16 @@ ${_staleSamples.join("\n")}` : ``,
564240
564422
  this.options.maxTurns += extension3;
564241
564423
  this.emit({
564242
564424
  type: "status",
564243
- content: `Littleman triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
564425
+ content: `Adversary triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
564244
564426
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
564245
564427
  });
564246
564428
  const detailsLines = recentOutcomes.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`);
564247
564429
  this.emit({
564248
- type: "debug_littleman",
564430
+ type: "debug_adversary",
564249
564431
  turn,
564250
564432
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
564251
564433
  content: `Timeout triage: EXTENDED by ${extension3} turns (active session detected)`,
564252
- littlemanAction: {
564434
+ adversaryAction: {
564253
564435
  detection: "none",
564254
564436
  recentSuccesses,
564255
564437
  recentFailures: recentOutcomes.length - recentSuccesses,
@@ -564582,6 +564764,9 @@ ${memoryLines.join("\n")}`
564582
564764
  maxTokens: effectiveMaxTokens,
564583
564765
  timeoutMs: this.options.requestTimeoutMs
564584
564766
  };
564767
+ if ((this.options.contextWindowSize ?? 0) > 0) {
564768
+ chatRequest.numCtx = this.options.contextWindowSize;
564769
+ }
564585
564770
  if (this.options.memoryPrefix)
564586
564771
  chatRequest.memoryPrefix = this.options.memoryPrefix;
564587
564772
  if (this.options.memoryPrefixHash)
@@ -564623,7 +564808,7 @@ ${memoryLines.join("\n")}`
564623
564808
  compactionThreshold: limits.compactionThreshold,
564624
564809
  toolCallCount,
564625
564810
  keepRecent: limits.keepRecent,
564626
- littlemanOutcomes: this._littlemanToolOutcomes.length,
564811
+ adversaryOutcomes: this._adversaryToolOutcomes.length,
564627
564812
  headroom: limits.compactionThreshold - estTokens
564628
564813
  }
564629
564814
  });
@@ -564740,19 +564925,17 @@ ${memoryLines.join("\n")}`
564740
564925
  if (jsonMatch) {
564741
564926
  try {
564742
564927
  const parsed = JSON.parse(jsonMatch[1]);
564743
- if (parsed.tool && this.tools.has(parsed.tool)) {
564744
- const tool = this.tools.get(parsed.tool);
564928
+ const resolvedParsedTool = parsed.tool ? this.lookupRegisteredTool(parsed.tool) : null;
564929
+ if (parsed.tool && resolvedParsedTool) {
564930
+ const tool = resolvedParsedTool.tool;
564745
564931
  const rawResult = await tool.execute(parsed.args ?? {});
564746
- const result = applyToolResultTriage(rawResult, {
564747
- workingDir: this._workingDirectory || process.cwd(),
564748
- toolName: parsed.tool
564749
- });
564932
+ const result = this.applyRegisteredToolResultTriage(rawResult, resolvedParsedTool.name, tool);
564750
564933
  messages2.push({ role: "assistant", content });
564751
564934
  messages2.push({
564752
564935
  role: "user",
564753
564936
  content: `Tool result (${parsed.tool}): ${result.output.slice(0, 2e3)}`
564754
564937
  });
564755
- if (parsed.tool === "task_complete") {
564938
+ if (resolvedParsedTool.name === "task_complete") {
564756
564939
  completed = true;
564757
564940
  summary = String(parsed.args?.summary ?? content);
564758
564941
  }
@@ -564986,16 +565169,19 @@ ${memoryLines.join("\n")}`
564986
565169
  const cohort = this._argCohorts.get(cohortKey);
564987
565170
  if (cohort && cohort.failure >= 3 && cohort.success === 0) {
564988
565171
  this.emit({
564989
- type: "observer_reaction",
565172
+ type: "adversary_reaction",
564990
565173
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
564991
- observer: {
565174
+ adversary: {
564992
565175
  class: "arg_cohort_risk",
564993
565176
  shortText: `${tc.name} with similar args has failed ${cohort.failure}× recently`,
564994
565177
  confidence: 0.85
564995
565178
  }
564996
565179
  });
564997
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
564998
- this.pendingUserMessages.push(`⚠ ${tc.name} with similar arguments has failed ${cohort.failure}× recently. Try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
565180
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
565181
+ this.pendingUserMessages.push(`[ADVERSARY CRITIQUE non-blocking]
565182
+ Evidence: ${tc.name} with similar arguments has failed ${cohort.failure}× recently.
565183
+ Root cause hypothesis: the argument family may be wrong, a prerequisite may be missing, or the tool is being used before enough state is known.
565184
+ Corrective action: try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
564999
565185
  }
565000
565186
  }
565001
565187
  if (this._errorPatterns.size > 0) {
@@ -565277,19 +565463,11 @@ ${memoryLines.join("\n")}`
565277
565463
  ].includes(tc.name);
565278
565464
  const isStatefulBrowserTool = this._isStatefulBrowserTool(tc.name);
565279
565465
  const isReadLike = !isStatefulBrowserTool && (baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
565280
- const observerRedundantBlock = this._littlemanRedundantBlocks.has(toolFingerprint);
565281
- if (observerRedundantBlock) {
565282
- this._littlemanRedundantBlocks.delete(toolFingerprint);
565466
+ const adversaryRedundantSignal = this._adversaryRedundantSignals.has(toolFingerprint);
565467
+ if (adversaryRedundantSignal) {
565468
+ this._adversaryRedundantSignals.delete(toolFingerprint);
565283
565469
  }
565284
- const markSyntheticToolLog = (outputPreview) => {
565285
- const lastLog = toolCallLog[_toolLogTailIdx];
565286
- if (!lastLog)
565287
- return;
565288
- lastLog.success = true;
565289
- lastLog.mutated = false;
565290
- lastLog.mutatedFiles = [];
565291
- lastLog.outputPreview = outputPreview.slice(0, 100);
565292
- };
565470
+ let criticGuidance = null;
565293
565471
  {
565294
565472
  const _reflStem = buildStem(tc.name, tc.arguments ?? {});
565295
565473
  if (!this._reflectionsInjectedThisTurn.has(_reflStem)) {
@@ -565331,7 +565509,10 @@ ${memoryLines.join("\n")}`
565331
565509
  }
565332
565510
  }
565333
565511
  }
565334
- const criticDecision = evaluate2({
565512
+ const criticDecision = this.options.disableAdversaryCritic === true ? {
565513
+ decision: "pass",
565514
+ reason: "adversary critic disabled for isolated evaluation"
565515
+ } : evaluate2({
565335
565516
  proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
565336
565517
  fingerprint: toolFingerprint,
565337
565518
  isReadLike,
@@ -565345,116 +565526,33 @@ ${memoryLines.join("\n")}`
565345
565526
  stagnationSignals: null,
565346
565527
  // stagnation gate handled at top-of-turn
565347
565528
  stagnationGateActive: false,
565348
- observerRedundantBlock
565529
+ adversaryRedundantSignal
565349
565530
  });
565350
- if (criticDecision.decision === "observer_block") {
565351
- this.emit({
565352
- type: "tool_call",
565353
- toolName: tc.name,
565354
- toolArgs: tc.arguments,
565355
- turn,
565356
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565357
- });
565358
- const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
565359
-
565360
- ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
565361
- markSyntheticToolLog(blockMsg);
565362
- this.emit({
565363
- type: "tool_result",
565364
- toolName: tc.name,
565365
- success: true,
565366
- content: blockMsg.slice(0, 100),
565367
- turn,
565368
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565369
- });
565370
- this._tagSyntheticFailure({
565371
- mode: "step_repetition",
565372
- rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
565373
- });
565374
- return { tc, output: blockMsg, success: true };
565375
- }
565376
- if (criticDecision.decision === "force_progress_block") {
565531
+ if (criticDecision.decision === "guidance") {
565377
565532
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
565378
565533
  const _existingFp = recentToolResults.get(toolFingerprint);
565379
565534
  if (_existingFp !== void 0) {
565380
565535
  recentToolResults.delete(toolFingerprint);
565381
565536
  recentToolResults.set(toolFingerprint, _existingFp);
565382
565537
  }
565538
+ criticGuidance = criticDecision.guidanceMessage;
565383
565539
  this.emit({
565384
- type: "tool_call",
565385
- toolName: tc.name,
565386
- toolArgs: tc.arguments,
565387
- turn,
565540
+ type: "adversary_reaction",
565541
+ adversary: {
565542
+ class: "guidance",
565543
+ shortText: `Adversary guidance for repeated ${tc.name} call`,
565544
+ confidence: 0.9,
565545
+ details: criticDecision.reason
565546
+ },
565388
565547
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
565389
565548
  });
565390
565549
  this.emit({
565391
- type: "tool_result",
565392
- toolName: tc.name,
565393
- success: true,
565394
- content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
565395
- turn,
565396
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565397
- });
565398
- this._tagSyntheticFailure({
565399
- mode: "step_repetition",
565400
- rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
565401
- });
565402
- const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
565403
-
565404
- [GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
565405
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
565406
-
565407
- ` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
565408
-
565409
- `;
565410
- const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
565411
- ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
565412
- markSyntheticToolLog(`${criticDecision.blockMessage}
565413
-
565414
- ${truncatedCache}`);
565415
- return {
565416
- tc,
565417
- output: `${criticDecision.blockMessage}
565418
-
565419
- ${header}${truncatedCache}${generationCompletionHint}`,
565420
- success: true
565421
- };
565422
- }
565423
- if (criticDecision.decision === "serve_cached") {
565424
- dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
565425
- const _existingFp = recentToolResults.get(toolFingerprint);
565426
- if (_existingFp !== void 0) {
565427
- recentToolResults.delete(toolFingerprint);
565428
- recentToolResults.set(toolFingerprint, _existingFp);
565429
- }
565430
- this.emit({
565431
- type: "tool_call",
565432
- toolName: tc.name,
565433
- toolArgs: tc.arguments,
565434
- turn,
565435
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565436
- });
565437
- const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
565438
-
565439
- [GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
565440
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
565441
-
565442
- ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
565443
-
565444
- `;
565445
- const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
565446
- ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
565447
- const dedupOutput = header + truncatedCache + generationCompletionHint;
565448
- markSyntheticToolLog(dedupOutput);
565449
- this.emit({
565450
- type: "tool_result",
565550
+ type: "status",
565451
565551
  toolName: tc.name,
565452
- success: true,
565453
- content: header.slice(0, 100),
565552
+ content: `Adversary guidance emitted for ${tc.name}; tool call will still execute`,
565454
565553
  turn,
565455
565554
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
565456
565555
  });
565457
- return { tc, output: dedupOutput, success: true };
565458
565556
  }
565459
565557
  this.emit({
565460
565558
  type: "tool_call",
@@ -565494,6 +565592,9 @@ ${header}${truncatedCache}${generationCompletionHint}`,
565494
565592
  }
565495
565593
  }
565496
565594
  }
565595
+ if (!validationError) {
565596
+ validationError = await this.validateToolInput(tool, tc.arguments, resolvedTool?.name ?? tc.name);
565597
+ }
565497
565598
  if (validationError) {
565498
565599
  result = {
565499
565600
  success: false,
@@ -565582,10 +565683,7 @@ ${header}${truncatedCache}${generationCompletionHint}`,
565582
565683
  } else {
565583
565684
  result = await tool.execute(finalArgs);
565584
565685
  }
565585
- result = applyToolResultTriage(result, {
565586
- workingDir: this._workingDirectory || process.cwd(),
565587
- toolName: tc.name
565588
- });
565686
+ result = this.applyRegisteredToolResultTriage(result, resolvedTool?.name ?? tc.name, tool);
565589
565687
  if (tc.name === "shell" && result.success === true) {
565590
565688
  const semanticErr = this._detectSemanticShellFailure(result.output ?? "");
565591
565689
  if (semanticErr) {
@@ -566455,6 +566553,11 @@ Respond with EXACTLY this structure before your next tool call:
566455
566553
  result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
566456
566554
  }
566457
566555
  let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
566556
+ if (criticGuidance) {
566557
+ output += `
566558
+
566559
+ ${criticGuidance}`;
566560
+ }
566458
566561
  if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
566459
566562
  const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
566460
566563
  if (recovery)
@@ -566824,10 +566927,8 @@ Then use file_read on individual FILES inside it.`);
566824
566927
  await this._streamingExecutor.waitAll();
566825
566928
  const streamResults = this._streamingExecutor.drainCompleted();
566826
566929
  for (const sr of streamResults) {
566827
- sr.result = applyToolResultTriage(sr.result, {
566828
- workingDir: this._workingDirectory || process.cwd(),
566829
- toolName: sr.name
566830
- });
566930
+ const resolvedStreamTool = this.lookupRegisteredTool(sr.name);
566931
+ sr.result = this.applyRegisteredToolResultTriage(sr.result, resolvedStreamTool?.name ?? sr.name, resolvedStreamTool?.tool);
566831
566932
  }
566832
566933
  const handledIds = /* @__PURE__ */ new Set();
566833
566934
  for (const sr of streamResults) {
@@ -566865,22 +566966,21 @@ ${sr.result.output}`;
566865
566966
  }
566866
566967
  const _bp1 = await this._runBackwardPassReview(turn);
566867
566968
  if (_bp1 && !_bp1.proceed && _bp1.feedback) {
566868
- messages2.push({ role: "system", content: _bp1.feedback });
566869
- } else {
566870
- completed = true;
566871
- summary = extractTaskCompleteSummary(matchTc.arguments);
566872
- if (summary && !this._assistantTextEmitted) {
566873
- this.emit({
566874
- type: "assistant_text",
566875
- content: summary,
566876
- source: "task_complete_summary",
566877
- turn,
566878
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
566879
- });
566880
- this._assistantTextEmitted = true;
566881
- }
566882
- break;
566969
+ emitBackwardPassAdvisory(_bp1.feedback, turn);
566970
+ }
566971
+ completed = true;
566972
+ summary = extractTaskCompleteSummary(matchTc.arguments);
566973
+ if (summary && !this._assistantTextEmitted) {
566974
+ this.emit({
566975
+ type: "assistant_text",
566976
+ content: summary,
566977
+ source: "task_complete_summary",
566978
+ turn,
566979
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
566980
+ });
566981
+ this._assistantTextEmitted = true;
566883
566982
  }
566983
+ break;
566884
566984
  }
566885
566985
  }
566886
566986
  }
@@ -566921,22 +567021,21 @@ ${sr.result.output}`;
566921
567021
  }
566922
567022
  const _bp2 = await this._runBackwardPassReview(turn);
566923
567023
  if (_bp2 && !_bp2.proceed && _bp2.feedback) {
566924
- messages2.push({ role: "system", content: _bp2.feedback });
566925
- } else {
566926
- completed = true;
566927
- summary = extractTaskCompleteSummary(r2.tc.arguments);
566928
- if (summary && !this._assistantTextEmitted) {
566929
- this.emit({
566930
- type: "assistant_text",
566931
- content: summary,
566932
- source: "task_complete_summary",
566933
- turn,
566934
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
566935
- });
566936
- this._assistantTextEmitted = true;
566937
- }
566938
- break;
567024
+ emitBackwardPassAdvisory(_bp2.feedback, turn);
566939
567025
  }
567026
+ completed = true;
567027
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
567028
+ if (summary && !this._assistantTextEmitted) {
567029
+ this.emit({
567030
+ type: "assistant_text",
567031
+ content: summary,
567032
+ source: "task_complete_summary",
567033
+ turn,
567034
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
567035
+ });
567036
+ this._assistantTextEmitted = true;
567037
+ }
567038
+ break;
566940
567039
  }
566941
567040
  }
566942
567041
  }
@@ -566946,7 +567045,8 @@ ${sr.result.output}`;
566946
567045
  const batchToolCalls = rawToolCalls.map((tc) => ({
566947
567046
  name: tc.name,
566948
567047
  args: tc.arguments,
566949
- id: tc.id
567048
+ id: tc.id,
567049
+ concurrencySafe: this.resolveToolConcurrencySafe(tc.name, tc.arguments)
566950
567050
  }));
566951
567051
  const batches = partitionToolCalls(batchToolCalls);
566952
567052
  for (const batch2 of batches) {
@@ -567013,22 +567113,21 @@ ${sr.result.output}`;
567013
567113
  }
567014
567114
  const _bp3 = await this._runBackwardPassReview(turn);
567015
567115
  if (_bp3 && !_bp3.proceed && _bp3.feedback) {
567016
- messages2.push({ role: "system", content: _bp3.feedback });
567017
- } else {
567018
- completed = true;
567019
- summary = extractTaskCompleteSummary(r2.tc.arguments);
567020
- if (summary && !this._assistantTextEmitted) {
567021
- this.emit({
567022
- type: "assistant_text",
567023
- content: summary,
567024
- source: "task_complete_summary",
567025
- turn,
567026
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
567027
- });
567028
- this._assistantTextEmitted = true;
567029
- }
567030
- break;
567116
+ emitBackwardPassAdvisory(_bp3.feedback, turn);
567031
567117
  }
567118
+ completed = true;
567119
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
567120
+ if (summary && !this._assistantTextEmitted) {
567121
+ this.emit({
567122
+ type: "assistant_text",
567123
+ content: summary,
567124
+ source: "task_complete_summary",
567125
+ turn,
567126
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
567127
+ });
567128
+ this._assistantTextEmitted = true;
567129
+ }
567130
+ break;
567032
567131
  }
567033
567132
  }
567034
567133
  }
@@ -567039,7 +567138,7 @@ ${sr.result.output}`;
567039
567138
  }
567040
567139
  if (completed)
567041
567140
  break;
567042
- this.littlemanObserve(messages2, turn);
567141
+ this.adversaryObserve(messages2, turn);
567043
567142
  const currentRepScore = this.detectRepetition(toolCallLog);
567044
567143
  if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
567045
567144
  const { repetitionWindow } = this.contextLimits();
@@ -567225,6 +567324,9 @@ Call task_complete(summary="...") NOW with whatever you have.`
567225
567324
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
567226
567325
  });
567227
567326
  }
567327
+ const pendingBeforeAdversary = this.pendingUserMessages.length;
567328
+ this.adversaryObserve(messages2, turn);
567329
+ const adversaryAddedGuidance = this.pendingUserMessages.length > pendingBeforeAdversary;
567228
567330
  if (/task.?complete|all tests pass/i.test(content)) {
567229
567331
  const completionArgs = { summary: content };
567230
567332
  if (holdNoProgressTaskComplete(completionArgs, turn) || holdProvenanceTaskComplete(completionArgs, turn)) {
@@ -567236,7 +567338,7 @@ Call task_complete(summary="...") NOW with whatever you have.`
567236
567338
  }
567237
567339
  if (isThinkOnly) {
567238
567340
  if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
567239
- const recentSuccesses = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567341
+ const recentSuccesses = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567240
567342
  const hasRecentSuccess = recentSuccesses.length > 0;
567241
567343
  const successHint = hasRecentSuccess ? `
567242
567344
 
@@ -567340,6 +567442,12 @@ Your most recent tool calls SUCCEEDED. If the task is complete, call task_comple
567340
567442
  content: "Continue working. Use tools to read files, make changes, and run validation. Call task_complete when done."
567341
567443
  });
567342
567444
  }
567445
+ if (adversaryAddedGuidance) {
567446
+ while (this.pendingUserMessages.length > 0) {
567447
+ const userMsg = this.pendingUserMessages.shift();
567448
+ await this.appendInjectedUserMessage(userMsg, messages2, turn);
567449
+ }
567450
+ }
567343
567451
  }
567344
567452
  try {
567345
567453
  const turnLogTail = toolCallLog.filter((t2) => t2.turn === turn || t2.turn === void 0);
@@ -567487,7 +567595,8 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
567487
567595
  tools: toolDefs,
567488
567596
  temperature: this.options.temperature,
567489
567597
  maxTokens: this.options.maxTokens,
567490
- timeoutMs: this.options.requestTimeoutMs
567598
+ timeoutMs: this.options.requestTimeoutMs,
567599
+ numCtx: this.options.contextWindowSize || void 0
567491
567600
  };
567492
567601
  let response;
567493
567602
  try {
@@ -567677,10 +567786,7 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
567677
567786
  } else {
567678
567787
  try {
567679
567788
  result = await tool.execute(tc.arguments);
567680
- result = applyToolResultTriage(result, {
567681
- workingDir: this._workingDirectory || process.cwd(),
567682
- toolName: tc.name
567683
- });
567789
+ result = this.applyRegisteredToolResultTriage(result, resolvedTool?.name ?? tc.name, tool);
567684
567790
  } catch (err) {
567685
567791
  result = {
567686
567792
  success: false,
@@ -567699,10 +567805,7 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
567699
567805
  if (pw2 && tool) {
567700
567806
  try {
567701
567807
  result = await tool.execute(tc.arguments);
567702
- result = applyToolResultTriage(result, {
567703
- workingDir: this._workingDirectory || process.cwd(),
567704
- toolName: tc.name
567705
- });
567808
+ result = this.applyRegisteredToolResultTriage(result, resolvedTool?.name ?? tc.name, tool);
567706
567809
  } catch (err) {
567707
567810
  result = {
567708
567811
  success: false,
@@ -567797,8 +567900,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
567797
567900
  }
567798
567901
  const _bp4 = await this._runBackwardPassReview(turn);
567799
567902
  if (_bp4 && !_bp4.proceed && _bp4.feedback) {
567800
- messages2.push({ role: "system", content: _bp4.feedback });
567801
- continue;
567903
+ emitBackwardPassAdvisory(_bp4.feedback, turn);
567802
567904
  }
567803
567905
  completed = true;
567804
567906
  summary = extractTaskCompleteSummary(tc.arguments);
@@ -567871,7 +567973,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
567871
567973
  }
567872
567974
  if (isThinkOnlyBF) {
567873
567975
  if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
567874
- const recentSucc = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567976
+ const recentSucc = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567875
567977
  const succHint = recentSucc.length > 0 ? "\n\nYour most recent tool calls SUCCEEDED. If the task is complete, call task_complete now with a summary." : "";
567876
567978
  messages2.push({
567877
567979
  role: "user",
@@ -569959,38 +570061,35 @@ ${newerSummary}`;
569959
570061
  ${trimmedNew}`;
569960
570062
  }
569961
570063
  // -------------------------------------------------------------------------
569962
- // Littleman Observer — parallel meta-analysis of the main loop
570064
+ // Adversary — parallel meta-analysis of the main loop
569963
570065
  // -------------------------------------------------------------------------
569964
- // Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
569965
570066
  // Runs after each tool turn to detect when the model has lost track of
569966
570067
  // what happened and inject corrections before the next inference.
569967
570068
  //
569968
570069
  // This is the architectural fix for the "I see both tools have been failing"
569969
570070
  // regression: instead of only fixing the data the model sees (mask/summary),
569970
570071
  // we add a second analysis path that catches mismatches in real-time.
569971
- /** Track recent tool outcomes for the littleman observer */
569972
- _littlemanToolOutcomes = [];
569973
- /** WO-FIX-C: Tool fingerprints the littleman has flagged as redundant.
569974
- * Checked in executeSingle to block re-execution and return cached data. */
569975
- _littlemanRedundantBlocks = /* @__PURE__ */ new Set();
570072
+ /** Track recent tool outcomes for the adversary */
570073
+ _adversaryToolOutcomes = [];
570074
+ /** WO-FIX-C: Tool fingerprints the adversary has flagged as redundant.
570075
+ * Checked in executeSingle to attach advisory guidance before dispatch. */
570076
+ _adversaryRedundantSignals = /* @__PURE__ */ new Set();
569976
570077
  /** Reflexion pattern: task-local failure-indexed reflection buffer.
569977
570078
  * Generates typed self-reflections on task failure and injects them
569978
570079
  * into the next attempt's context for active learning. */
569979
570080
  _reflectionBuffer = null;
569980
570081
  /**
569981
- * Littleman observer: post-turn meta-analysis.
570082
+ * Adversary: post-turn meta-analysis.
569982
570083
  *
569983
570084
  * Examines the last few messages looking for contradictions between
569984
570085
  * actual tool outcomes and the model's stated understanding. When it
569985
570086
  * detects the model claiming failure after success (or vice versa),
569986
- * it injects a corrective message.
570087
+ * it injects a corrective non-blocking critique.
569987
570088
  *
569988
570089
  * Also detects repeated actions — when the model re-does something
569989
- * that already succeeded, the littleman nudges it to move on.
570090
+ * that already succeeded, the adversary nudges it to move on.
569990
570091
  */
569991
- littlemanObserve(messages2, turn) {
569992
- if (this.options.modelTier === "large")
569993
- return;
570092
+ adversaryObserve(messages2, turn) {
569994
570093
  const recent = messages2.slice(-6);
569995
570094
  for (const msg of recent) {
569996
570095
  if (msg.role === "tool" && typeof msg.content === "string") {
@@ -570017,10 +570116,16 @@ ${trimmedNew}`;
570017
570116
  }
570018
570117
  const argsKey = toolArgs ? this._buildExactArgsKey(toolArgs) : void 0;
570019
570118
  const fingerprint = toolArgs ? this._buildToolFingerprint(toolName, toolArgs) : void 0;
570020
- if (!this._littlemanToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
570021
- this._littlemanToolOutcomes.push({
570119
+ const alreadySeen = this._adversaryToolOutcomes.some((o2) => {
570120
+ if (msg.tool_call_id && o2.toolCallId === msg.tool_call_id)
570121
+ return true;
570122
+ return o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint;
570123
+ });
570124
+ if (!alreadySeen) {
570125
+ this._adversaryToolOutcomes.push({
570022
570126
  turn,
570023
570127
  tool: toolName,
570128
+ toolCallId: msg.tool_call_id,
570024
570129
  argsKey,
570025
570130
  fingerprint,
570026
570131
  succeeded,
@@ -570029,27 +570134,47 @@ ${trimmedNew}`;
570029
570134
  }
570030
570135
  }
570031
570136
  }
570032
- while (this._littlemanToolOutcomes.length > 20)
570033
- this._littlemanToolOutcomes.shift();
570137
+ while (this._adversaryToolOutcomes.length > 20)
570138
+ this._adversaryToolOutcomes.shift();
570034
570139
  const emitReaction = (cls, shortText, confidence2, details2) => {
570035
570140
  this.emit({
570036
- type: "observer_reaction",
570141
+ type: "adversary_reaction",
570037
570142
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
570038
- observer: { class: cls, shortText, confidence: confidence2, details: details2 }
570143
+ adversary: { class: cls, shortText, confidence: confidence2, details: details2 }
570039
570144
  });
570040
570145
  };
570146
+ const buildAdversaryCritique = (input) => {
570147
+ const alternatives = input.alternatives && input.alternatives.length > 0 ? `
570148
+ Alternatives:
570149
+ ${input.alternatives.map((item) => `- ${item}`).join("\n")}` : "";
570150
+ return [
570151
+ `[ADVERSARY CRITIQUE — non-blocking]`,
570152
+ `Evidence: ${input.evidence}`,
570153
+ `Root cause hypothesis: ${input.hypothesis}`,
570154
+ `Corrective action: ${input.correctiveAction}${alternatives}`
570155
+ ].join("\n");
570156
+ };
570041
570157
  const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
570042
570158
  if (lastAssistant && typeof lastAssistant.content === "string") {
570043
570159
  const text = lastAssistant.content.toLowerCase();
570044
570160
  const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
570045
570161
  if (claimsFailure) {
570046
- const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
570162
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
570047
570163
  const successes = recentOutcomes.filter((o2) => o2.succeeded);
570048
570164
  if (successes.length >= 1) {
570049
570165
  const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
570050
570166
  emitReaction("false_failure", `Claimed failure, but recent tools succeeded (${successes.length})`, 0.9, successList);
570051
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570052
- this.pendingUserMessages.push(`⚠ Correction: recent tools DID succeed. Do not retry them. Successful results: ${successList}. Use them to advance the task.`);
570167
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570168
+ this.pendingUserMessages.push(buildAdversaryCritique({
570169
+ evidence: `Recent tools succeeded: ${successList}.`,
570170
+ hypothesis: "The main loop is interpreting uncertainty or partial progress as failure and may be about to discard usable evidence.",
570171
+ correctiveAction: "Use the successful results to advance the task, then verify the next concrete step.",
570172
+ alternatives: [
570173
+ "Edit or run the next verification step that follows from the successful output.",
570174
+ "Read a different targeted file if the successful result exposed a new path or symbol.",
570175
+ "Complete only if the successful output is sufficient evidence for the user's request."
570176
+ ]
570177
+ }));
570053
570178
  }
570054
570179
  this.emit({
570055
570180
  type: "status",
@@ -570063,47 +570188,68 @@ ${trimmedNew}`;
570063
570188
  const text = lastAssistant.content.toLowerCase();
570064
570189
  const claimsSuccess = /(done|fixed|success|passed|complete)/i.test(text);
570065
570190
  if (claimsSuccess) {
570066
- const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
570191
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
570067
570192
  const failures = recentOutcomes.filter((o2) => !o2.succeeded);
570068
570193
  const successes = recentOutcomes.filter((o2) => o2.succeeded);
570069
570194
  if (failures.length > 0 && successes.length === 0) {
570070
570195
  const failList = failures.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
570071
570196
  emitReaction("false_success", `Claimed success, but recent tools failed (${failures.length})`, 0.9, failList);
570072
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570073
- this.pendingUserMessages.push(`⚠ Your recent tools show errors (${failures.length}). Verify the last tool output and correct the issue before claiming success.`);
570197
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570198
+ this.pendingUserMessages.push(buildAdversaryCritique({
570199
+ evidence: `Recent tools show errors (${failures.length}): ${failList}.`,
570200
+ hypothesis: "The main loop is prematurely compressing intent into success language before the verifier produced evidence.",
570201
+ correctiveAction: "Inspect the failed output, identify the implicated path/symbol/command, and run one focused corrective step before claiming success.",
570202
+ alternatives: [
570203
+ "Read the smallest relevant source region around the failed symbol.",
570204
+ "Patch the implicated code or configuration.",
570205
+ "Run the same verifier only after a state-changing fix."
570206
+ ]
570207
+ }));
570074
570208
  }
570075
570209
  }
570076
570210
  }
570077
570211
  }
570078
- const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
570079
- for (const tc of lastToolCalls) {
570080
- const name10 = tc.function.name;
570081
- if (this._isStatefulBrowserTool(name10))
570082
- continue;
570083
- let args = {};
570084
- try {
570085
- args = JSON.parse(tc.function.arguments);
570086
- } catch {
570087
- }
570088
- const argsKey = this._buildExactArgsKey(args);
570089
- const fingerprint = this._buildToolFingerprint(name10, args);
570090
- const prior = this._littlemanToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
570091
- if (prior) {
570092
- this._littlemanRedundantBlocks.add(fingerprint);
570093
- emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
570094
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570095
- this.pendingUserMessages.push(`⚠ You already ran ${name10} successfully on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Do NOT re-run it. Use the existing result and proceed.`);
570212
+ if (this.options.disableAdversaryCritic !== true) {
570213
+ const newestAssistant = [...recent].reverse().find((m2) => m2.role === "assistant");
570214
+ const lastToolCalls = newestAssistant?.tool_calls ?? [];
570215
+ for (const tc of lastToolCalls) {
570216
+ const name10 = tc.function.name;
570217
+ if (this._isStatefulBrowserTool(name10))
570218
+ continue;
570219
+ let args = {};
570220
+ try {
570221
+ args = JSON.parse(tc.function.arguments);
570222
+ } catch {
570223
+ }
570224
+ const argsKey = this._buildExactArgsKey(args);
570225
+ const fingerprint = this._buildToolFingerprint(name10, args);
570226
+ const prior = this._adversaryToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
570227
+ if (prior) {
570228
+ this._adversaryRedundantSignals.add(fingerprint);
570229
+ emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
570230
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570231
+ this.pendingUserMessages.push(buildAdversaryCritique({
570232
+ evidence: `${name10} already succeeded on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Prior preview: ${prior.preview}`,
570233
+ hypothesis: "The main loop may have lost track of previously observed evidence because of context pressure, path confusion, or repeated discovery.",
570234
+ correctiveAction: "Let this duplicate run execute if needed, but treat the prior result as evidence and pivot afterward unless state has changed.",
570235
+ alternatives: [
570236
+ "Use the prior result to edit/write, verify, or finish with evidence.",
570237
+ "Read a different specific file or selector if the current evidence is insufficient.",
570238
+ "Repeat exact arguments only when filesystem, browser, or page state changed."
570239
+ ]
570240
+ }));
570241
+ }
570242
+ this.emit({
570243
+ type: "status",
570244
+ content: `\x1B[38;5;178m⚠ Adversary noted redundant ${name10} call (succeeded on turn ${prior.turn}); action remains allowed\x1B[0m`,
570245
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
570246
+ });
570247
+ break;
570096
570248
  }
570097
- this.emit({
570098
- type: "status",
570099
- content: `\x1B[38;5;178m⚠ Prevented redundant ${name10} call (succeeded on turn ${prior.turn})\x1B[0m`,
570100
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
570101
- });
570102
- break;
570103
570249
  }
570104
570250
  }
570105
570251
  {
570106
- const recentCalls = this._littlemanToolOutcomes.slice(-5);
570252
+ const recentCalls = this._adversaryToolOutcomes.slice(-5);
570107
570253
  if (recentCalls.length >= 3) {
570108
570254
  let consecutiveShortResults = 0;
570109
570255
  for (let i2 = recentCalls.length - 1; i2 >= 0; i2--) {
@@ -570116,30 +570262,39 @@ ${trimmedNew}`;
570116
570262
  }
570117
570263
  if (consecutiveShortResults >= 3) {
570118
570264
  emitReaction("idle_think", `Consecutive output without input: ${consecutiveShortResults}`, 0.7);
570119
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570120
- this.pendingUserMessages.push(`⚠ You have sent ${consecutiveShortResults} consecutive outputs without reading any input. Alternate: receive input, then respond. Call your input tool now.`);
570265
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570266
+ this.pendingUserMessages.push(buildAdversaryCritique({
570267
+ evidence: `${consecutiveShortResults} consecutive output-like calls occurred without an input-like observation.`,
570268
+ hypothesis: "The loop may be acting from stale state instead of re-observing the environment.",
570269
+ correctiveAction: "Take one input/observation step before another output step.",
570270
+ alternatives: [
570271
+ "Call the input/listen/poll tool for the current environment.",
570272
+ "Read the current UI/page state before clicking or typing again.",
570273
+ "If the task is already complete, finish with the concrete evidence already observed."
570274
+ ]
570275
+ }));
570121
570276
  }
570122
570277
  this.emit({
570123
570278
  type: "status",
570124
- content: `\x1B[38;5;178m⚠ Blocked runaway output (${consecutiveShortResults} consecutive sends without receive)\x1B[0m`,
570279
+ content: `\x1B[38;5;178m⚠ Adversary flagged runaway-output risk (${consecutiveShortResults} consecutive sends without receive); action remains allowed\x1B[0m`,
570125
570280
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
570126
570281
  });
570127
570282
  }
570128
570283
  }
570129
570284
  }
570130
- const succCount = this._littlemanToolOutcomes.filter((o2) => o2.succeeded).length;
570131
- const failCount = this._littlemanToolOutcomes.filter((o2) => !o2.succeeded).length;
570132
- const lastFour = this._littlemanToolOutcomes.slice(-4);
570285
+ const succCount = this._adversaryToolOutcomes.filter((o2) => o2.succeeded).length;
570286
+ const failCount = this._adversaryToolOutcomes.filter((o2) => !o2.succeeded).length;
570287
+ const lastFour = this._adversaryToolOutcomes.slice(-4);
570133
570288
  const details = [
570134
570289
  `Recent tool outcomes:`,
570135
570290
  ...lastFour.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`)
570136
570291
  ].join("\n");
570137
570292
  this.emit({
570138
- type: "debug_littleman",
570293
+ type: "debug_adversary",
570139
570294
  turn,
570140
570295
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
570141
- content: `Littleman: ${this._littlemanToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
570142
- littlemanAction: {
570296
+ content: `Adversary: ${this._adversaryToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
570297
+ adversaryAction: {
570143
570298
  detection: "none",
570144
570299
  recentSuccesses: succCount,
570145
570300
  recentFailures: failCount,
@@ -571164,6 +571319,7 @@ ${transcript}`
571164
571319
  }
571165
571320
  }
571166
571321
  const getDesc = (tool) => dynamicDescs.get(tool.name) ?? tool.description;
571322
+ const aliasText = (tool) => Array.isArray(tool.aliases) && tool.aliases.length > 0 ? ` aliases:${tool.aliases.join(",")}` : "";
571167
571323
  const getCustomToolMetadata = (tool) => {
571168
571324
  const meta = tool.customToolMetadata;
571169
571325
  return meta?.isCustomTool === true ? meta : void 0;
@@ -571200,7 +571356,7 @@ Example: ${tool.name}(${JSON.stringify(meta.examples[0].args ?? {})})` : "";
571200
571356
  };
571201
571357
  const getIndexLabel = (tool) => {
571202
571358
  const meta = getCustomToolMetadata(tool);
571203
- const desc = `${getDesc(tool)} ${customToolSearchText(tool)}`.toLowerCase().replace(/[`"'()[\]{}:;,.!?/\\|-]+/g, " ");
571359
+ const desc = `${getDesc(tool)} ${aliasText(tool)} ${customToolSearchText(tool)}`.toLowerCase().replace(/[`"'()[\]{}:;,.!?/\\|-]+/g, " ");
571204
571360
  const keywords2 = Array.from(new Set(desc.split(/\s+/).filter((word2) => word2.length > 2 && !STOPWORDS3.has(word2) && !tool.name.toLowerCase().includes(word2)))).slice(0, 4);
571205
571361
  const base3 = keywords2.length > 0 ? `${tool.name}(${keywords2.join(",")})` : tool.name;
571206
571362
  if (!meta)
@@ -571234,7 +571390,7 @@ Example: ${tool.name}(${JSON.stringify(meta.examples[0].args ?? {})})` : "";
571234
571390
  if (CORE_TOOLS3.has(tool.name))
571235
571391
  continue;
571236
571392
  const customMeta = getCustomToolMetadata(tool);
571237
- const toolText = `${tool.name} ${getDesc(tool)} ${customToolSearchText(tool)}`.toLowerCase();
571393
+ const toolText = `${tool.name} ${aliasText(tool)} ${getDesc(tool)} ${customToolSearchText(tool)}`.toLowerCase();
571238
571394
  const toolWords = toolText.split(/\s+/).filter((w) => w.length > 2);
571239
571395
  let score = 0;
571240
571396
  for (const tw of toolWords) {
@@ -571245,7 +571401,7 @@ Example: ${tool.name}(${JSON.stringify(meta.examples[0].args ?? {})})` : "";
571245
571401
  score += 1;
571246
571402
  }
571247
571403
  }
571248
- if (taskText.includes(tool.name.replace(/_/g, " ")) || taskText.includes(tool.name)) {
571404
+ if (taskText.includes(tool.name.replace(/_/g, " ")) || taskText.includes(tool.name) || (tool.aliases ?? []).some((alias) => taskText.includes(alias.toLowerCase()))) {
571249
571405
  score += customMeta ? 16 : 10;
571250
571406
  }
571251
571407
  if (wants3dModelGeneration) {
@@ -571398,6 +571554,9 @@ ${catalog}`,
571398
571554
  continue;
571399
571555
  lines.push("");
571400
571556
  lines.push(`## ${tool.name}`);
571557
+ if (tool.aliases?.length) {
571558
+ lines.push(`Aliases: ${tool.aliases.join(", ")}`);
571559
+ }
571401
571560
  lines.push(`${getDesc(tool)}${customToolDetails(tool)}`);
571402
571561
  lines.push(`Parameters: ${JSON.stringify(tool.parameters)}`);
571403
571562
  }
@@ -571410,7 +571569,7 @@ ${catalog}`,
571410
571569
  }
571411
571570
  return { success: true, output: lines.join("\n") };
571412
571571
  }
571413
- const matches = deferred.filter((t2) => t2.name.toLowerCase().includes(query) || getDesc(t2).toLowerCase().includes(query) || customToolSearchText(t2).toLowerCase().includes(query)).sort((a2, b) => {
571572
+ const matches = deferred.filter((t2) => t2.name.toLowerCase().includes(query) || (t2.aliases ?? []).some((alias) => alias.toLowerCase().includes(query)) || getDesc(t2).toLowerCase().includes(query) || customToolSearchText(t2).toLowerCase().includes(query)).sort((a2, b) => {
571414
571573
  const scoreTool = (tool) => {
571415
571574
  const meta = getCustomToolMetadata(tool);
571416
571575
  let score = 0;
@@ -571418,6 +571577,10 @@ ${catalog}`,
571418
571577
  score += 30;
571419
571578
  if (tool.name.toLowerCase().includes(query))
571420
571579
  score += 10;
571580
+ if ((tool.aliases ?? []).some((alias) => alias.toLowerCase() === query))
571581
+ score += 24;
571582
+ if ((tool.aliases ?? []).some((alias) => alias.toLowerCase().includes(query)))
571583
+ score += 8;
571421
571584
  if (getDesc(tool).toLowerCase().includes(query))
571422
571585
  score += 4;
571423
571586
  if (customToolSearchText(tool).toLowerCase().includes(query))
@@ -571448,7 +571611,9 @@ ${catalog}`,
571448
571611
  activatedToolsRef.add(t2.name);
571449
571612
  const result = matches.map((t2) => {
571450
571613
  const paramsStr = JSON.stringify(t2.parameters, null, 2);
571451
- return `## ${t2.name}
571614
+ const aliases = t2.aliases?.length ? `
571615
+ Aliases: ${t2.aliases.join(", ")}` : "";
571616
+ return `## ${t2.name}${aliases}
571452
571617
  ${getDesc(t2)}${customToolDetails(t2)}
571453
571618
 
571454
571619
  Parameters:
@@ -651081,7 +651246,7 @@ ${conversationStream}`
651081
651246
  // off default rather than the global config's value.
651082
651247
  thinking: false,
651083
651248
  // Telegram sub-agent runs must be bounded. Brute-force re-engagement and
651084
- // the Littleman near-cap turn extension are appropriate for the full TUI
651249
+ // the Adversary near-cap turn extension are appropriate for the full TUI
651085
651250
  // session but cause Telegram to silently outgrow its nominal maxTurns,
651086
651251
  // which is how the Snow Crash PDF loop reached 60+ turns of self-talk.
651087
651252
  ...TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS
@@ -681052,15 +681217,31 @@ function adaptTool6(tool) {
681052
681217
  }
681053
681218
  return {
681054
681219
  name: tool.name,
681220
+ aliases: tool.aliases,
681055
681221
  description: tool.description,
681056
681222
  parameters: tool.parameters,
681223
+ inputSchema: tool.inputSchema,
681224
+ prompt: tool.prompt,
681225
+ executeStream: tool.executeStream,
681226
+ validateInput: tool.validateInput,
681227
+ isConcurrencySafe: tool.isConcurrencySafe,
681228
+ isReadOnly: tool.isReadOnly,
681229
+ maxResultSizeChars: tool.maxResultSizeChars,
681057
681230
  async execute(args) {
681058
681231
  const result = await tool.execute(args);
681059
681232
  return {
681060
681233
  success: result.success,
681061
681234
  output: result.output,
681062
681235
  error: result.error,
681063
- llmContent: result.llmContent
681236
+ llmContent: result.llmContent,
681237
+ mutated: result.mutated,
681238
+ mutatedFiles: result.mutatedFiles,
681239
+ diff: result.diff,
681240
+ dryRun: result.dryRun,
681241
+ noop: result.noop,
681242
+ partial: result.partial,
681243
+ beforeHash: result.beforeHash,
681244
+ afterHash: result.afterHash
681064
681245
  };
681065
681246
  }
681066
681247
  };
@@ -683444,8 +683625,8 @@ ${entry.fullContent}`
683444
683625
  let streamTextBuffer = "";
683445
683626
  let lastAssistantText = "";
683446
683627
  let lastProvenancePath = null;
683447
- let showLittleman = false;
683448
- const littlemanBuffer = [];
683628
+ let showAdversary = false;
683629
+ const adversaryBuffer = [];
683449
683630
  const contentWrite = (fn) => {
683450
683631
  if (isNeovimActive()) {
683451
683632
  const origWrite = process.stdout.write;
@@ -683929,24 +684110,24 @@ ${entry.fullContent}`
683929
684110
  if (snap) {
683930
684111
  contentWrite(
683931
684112
  () => renderInfo(
683932
- `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | littleman: ${snap.littlemanOutcomes} tracked\x1B[0m`
684113
+ `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | adversary: ${snap.adversaryOutcomes} tracked\x1B[0m`
683933
684114
  )
683934
684115
  );
683935
684116
  }
683936
684117
  }
683937
684118
  break;
683938
- case "debug_littleman":
683939
- if (event.littlemanAction) {
683940
- const lm = event.littlemanAction;
684119
+ case "debug_adversary":
684120
+ if (event.adversaryAction) {
684121
+ const lm = event.adversaryAction;
683941
684122
  if (lm.intervention) {
683942
684123
  const simple = `⚠ ${lm.intervention}`;
683943
684124
  contentWrite(() => renderInfo(simple));
683944
684125
  }
683945
684126
  if (lm.details) {
683946
- littlemanBuffer.push(lm.details);
683947
- if (littlemanBuffer.length > 50)
683948
- littlemanBuffer.splice(0, littlemanBuffer.length - 50);
683949
- if (showLittleman) {
684127
+ adversaryBuffer.push(lm.details);
684128
+ if (adversaryBuffer.length > 50)
684129
+ adversaryBuffer.splice(0, adversaryBuffer.length - 50);
684130
+ if (showAdversary) {
683950
684131
  const det = String(lm.details);
683951
684132
  contentWrite(() => {
683952
684133
  process.stdout.write(c3.dim(det) + "\n");
@@ -685688,8 +685869,8 @@ This is an independent background session started from /background.`
685688
685869
  origTtyWriteRef = null;
685689
685870
  statusBar.setNeovimFocusChecker(() => isNeovimFocused());
685690
685871
  let _escapeHandler = null;
685691
- let showLittleman = false;
685692
- const littlemanBuffer = [];
685872
+ let showAdversary = false;
685873
+ const adversaryBuffer = [];
685693
685874
  statusBar.hookDirectInput(
685694
685875
  rl,
685695
685876
  () => {
@@ -685722,26 +685903,26 @@ This is an independent background session started from /background.`
685722
685903
  }
685723
685904
  },
685724
685905
  () => {
685725
- showLittleman = !showLittleman;
685906
+ showAdversary = !showAdversary;
685726
685907
  if (statusBar.isActive) {
685727
685908
  try {
685728
685909
  statusBar.jumpToLive();
685729
685910
  } catch {
685730
685911
  }
685731
685912
  statusBar.beginContentWrite();
685732
- if (showLittleman) {
685733
- renderInfo("Littleman details: shown");
685734
- const dump = littlemanBuffer.slice(-10).join("\n");
685913
+ if (showAdversary) {
685914
+ renderInfo("Adversary details: shown");
685915
+ const dump = adversaryBuffer.slice(-10).join("\n");
685735
685916
  if (dump.trim()) {
685736
685917
  process.stdout.write(`
685737
- ${c3.dim("[littleman recap]")}
685918
+ ${c3.dim("[adversary recap]")}
685738
685919
  `);
685739
685920
  for (const line of dump.split("\n")) {
685740
685921
  process.stdout.write(" " + c3.dim(line) + "\n");
685741
685922
  }
685742
685923
  }
685743
685924
  } else {
685744
- renderInfo("Littleman details: hidden");
685925
+ renderInfo("Adversary details: hidden");
685745
685926
  }
685746
685927
  statusBar.endContentWrite();
685747
685928
  }