omnius 1.0.212 → 1.0.213

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -551582,28 +551582,38 @@ var init_personality = __esm({
551582
551582
  });
551583
551583
 
551584
551584
  // packages/orchestrator/dist/critic.js
551585
- function buildForceProgressBlockMessage(call, hits) {
551585
+ function buildCriticGuidanceMessage(call, hits, opts = {}) {
551586
551586
  const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
551587
- return `[FORCED PROGRESS BLOCK — duplicate ${call.tool} call skipped; this is not a tool failure. You have called ${call.tool}(${argPreview}) ${hits} times with identical arguments. The runtime did not re-run the tool; it is returning the prior result below so you can proceed without retrying.
551588
-
551589
- Progress is REQUIRED before this tool will run again with the same arguments. To proceed, do one of these:
551590
- file_write or file_edit to make progress, OR
551591
- todo_write that advances the plan, OR
551592
- task_complete (if all phases are done), OR
551593
- Call a different tool or use different arguments.]`;
551587
+ const cached = opts.cachedResult ? `
551588
+ Prior evidence preview:
551589
+ ${opts.cachedResult.slice(0, 700)}` : "";
551590
+ const source = opts.adversaryFlag ? "The adversary recognized this exact tool call as already observed earlier." : `This is exact repeat #${hits} for the same ${call.tool} arguments.`;
551591
+ return `[ADVERSARY GUIDANCE non-blocking]
551592
+ Observation: ${source}
551593
+ Call: ${call.tool}(${argPreview})
551594
+ Root cause hypothesis: the run is losing track of already-observed evidence, usually after path confusion, compaction, or an over-broad discovery loop.
551595
+ Corrective action: let this call's result inform the next step once, then pivot to a concrete action.
551596
+ Suggested next actions: edit/write the implicated file, run verification, read a different specific file, or complete with evidence. Prefer not to repeat this exact call again unless the filesystem, browser, or page state changed.${cached}`;
551594
551597
  }
551595
551598
  function buildCachedResultEnvelope(result) {
551596
- return `[CACHED RESULT — you already have this information from a prior identical call. Do NOT call this tool again with the same arguments.]
551599
+ return `[PRIOR RESULT — already observed by a prior identical call]
551597
551600
  ${result}`;
551598
551601
  }
551599
551602
  function evaluate2(inputs) {
551600
- const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
551601
- if (observerRedundantBlock) {
551603
+ const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, adversaryRedundantSignal } = inputs;
551604
+ if (adversaryRedundantSignal) {
551602
551605
  const cached = recentToolResults.get(fingerprint);
551606
+ const cachedResult = cached ? buildCachedResultEnvelope(cached.result) : void 0;
551603
551607
  return {
551604
- decision: "observer_block",
551605
- reason: "Littleman observer flagged this fingerprint as redundant",
551606
- cachedResult: cached ? buildCachedResultEnvelope(cached.result) : null
551608
+ decision: "guidance",
551609
+ reason: "Adversary flagged this fingerprint as redundant",
551610
+ hitNumber: (dedupHitCount.get(fingerprint) ?? 0) + 1,
551611
+ guidanceMessage: buildCriticGuidanceMessage(proposedCall, (dedupHitCount.get(fingerprint) ?? 0) + 1, {
551612
+ cachedResult,
551613
+ adversaryFlag: true
551614
+ }),
551615
+ cachedResult,
551616
+ compacted: cached?.compacted
551607
551617
  };
551608
551618
  }
551609
551619
  const cacheEligible = isReadLike || proposedCall.tool === "shell";
@@ -551611,24 +551621,16 @@ function evaluate2(inputs) {
551611
551621
  const cached = recentToolResults.get(fingerprint);
551612
551622
  if (cached !== void 0) {
551613
551623
  const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
551614
- const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
551615
- if (hits >= threshold) {
551616
- return {
551617
- decision: "force_progress_block",
551618
- reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
551619
- hitNumber: hits,
551620
- blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
551621
- cachedResult: buildCachedResultEnvelope(cached.result),
551622
- compacted: cached.compacted
551623
- };
551624
- }
551625
551624
  const cachedEnvelope = buildCachedResultEnvelope(cached.result);
551626
551625
  return {
551627
- decision: "serve_cached",
551628
- reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
551626
+ decision: "guidance",
551627
+ reason: cached.compacted ? "post-compaction duplicate evidence" : `duplicate call #${hits}`,
551629
551628
  cachedResult: cachedEnvelope,
551630
551629
  compacted: cached.compacted,
551631
- hitNumber: hits
551630
+ hitNumber: hits,
551631
+ guidanceMessage: buildCriticGuidanceMessage(proposedCall, hits, {
551632
+ cachedResult: cachedEnvelope
551633
+ })
551632
551634
  };
551633
551635
  }
551634
551636
  }
@@ -551680,12 +551682,9 @@ function isStagnant(signals, opts) {
551680
551682
  return false;
551681
551683
  return signals.completedDelta <= 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
551682
551684
  }
551683
- var SHELL_THRESHOLD, FS_THRESHOLD;
551684
551685
  var init_critic = __esm({
551685
551686
  "packages/orchestrator/dist/critic.js"() {
551686
551687
  "use strict";
551687
- SHELL_THRESHOLD = 2;
551688
- FS_THRESHOLD = 3;
551689
551688
  }
551690
551689
  });
551691
551690
 
@@ -558656,8 +558655,8 @@ var init_agenticRunner = __esm({
558656
558655
  // WO-KG-15
558657
558656
  _retrievalContextCache = null;
558658
558657
  // WO-KG-15: cache per-run
558659
- // Observer world-model and cohort stats
558660
- _observerMode = "both";
558658
+ // Adversary world-model and cohort stats
558659
+ _adversaryMode = "both";
558661
558660
  _worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
558662
558661
  // REG-7-root: Track file writes since last todo_write call. When this
558663
558662
  // counter climbs without a todo update, the agent has likely batched
@@ -559006,6 +559005,8 @@ var init_agenticRunner = __esm({
559006
559005
  _sessionId = `session-${Date.now()}`;
559007
559006
  _workingDirectory = "";
559008
559007
  constructor(backend, options2) {
559008
+ const adversaryMode = options2?.adversaryMode ?? options2?.observerMode ?? "both";
559009
+ const disableAdversaryCritic = options2?.disableAdversaryCritic ?? options2?.disableStepCritic ?? false;
559009
559010
  this.backend = backend;
559010
559011
  this.options = {
559011
559012
  maxTurns: options2?.maxTurns ?? 60,
@@ -559030,19 +559031,22 @@ var init_agenticRunner = __esm({
559030
559031
  bruteForceMaxCycles: options2?.bruteForceMaxCycles ?? 100,
559031
559032
  allowTurnExtension: options2?.allowTurnExtension ?? true,
559032
559033
  completionProvenanceGuard: options2?.completionProvenanceGuard ?? true,
559034
+ disableAdversaryCritic,
559035
+ disableStepCritic: disableAdversaryCritic,
559033
559036
  modelTier: options2?.modelTier ?? "large",
559034
559037
  contextWindowSize: options2?.contextWindowSize ?? 0,
559035
559038
  personality: options2?.personality ?? PERSONALITY_PRESETS.balanced,
559036
559039
  personalityName: options2?.personalityName ?? "",
559037
559040
  finalVarResolver: options2?.finalVarResolver ?? void 0,
559038
- observerMode: options2?.observerMode ?? "both",
559041
+ adversaryMode,
559042
+ observerMode: adversaryMode,
559039
559043
  // Phase 4 — sub-agent isolation flag (defaults false). When true, this
559040
559044
  // runner skips cross-task handoff inheritance from the parent's
559041
559045
  // session.
559042
559046
  subAgent: options2?.subAgent ?? false,
559043
559047
  skipCrossTaskHandoff: options2?.skipCrossTaskHandoff ?? false
559044
559048
  };
559045
- this._observerMode = this.options.observerMode;
559049
+ this._adversaryMode = this.options.adversaryMode;
559046
559050
  }
559047
559051
  /** Update context window size (e.g. after querying Ollama /api/show) */
559048
559052
  setContextWindowSize(size) {
@@ -559050,7 +559054,10 @@ var init_agenticRunner = __esm({
559050
559054
  }
559051
559055
  /** Set the working directory for session checkpointing */
559052
559056
  setWorkingDirectory(dir) {
559053
- this._workingDirectory = dir;
559057
+ this._workingDirectory = _pathResolve(dir);
559058
+ }
559059
+ authoritativeWorkingDirectory() {
559060
+ return _pathResolve(this._workingDirectory || process.cwd());
559054
559061
  }
559055
559062
  /** State root for runner-owned memory/artifacts. Defaults to cwd/.omnius. */
559056
559063
  omniusStateDir() {
@@ -559823,7 +559830,7 @@ ${result.output ?? ""}`;
559823
559830
  * checklist via todo_write, and only then call task_complete.
559824
559831
  */
559825
559832
  /**
559826
- * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
559833
+ * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / adversary
559827
559834
  * block / budget exhausted). These paths return early from
559828
559835
  * executeSingle BEFORE the main result-handling code, so the normal
559829
559836
  * MAST tagging miss them. This helper lets each return-early site
@@ -561367,7 +561374,7 @@ ${latest.output || ""}`.trim();
561367
561374
  }
561368
561375
  }
561369
561376
  const sections = [
561370
- "[KNOWLEDGE — cached tool results already known to the runtime. Do NOT re-call these tools with the same arguments:]"
561377
+ "[KNOWLEDGE — cached tool results already known to the runtime. Repeating an exact read/list/search/shell call is a wasted action and will be blocked or served from cache:]"
561371
561378
  ];
561372
561379
  if (compactedCount > 0) {
561373
561380
  sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
@@ -561379,6 +561386,7 @@ ${latest.output || ""}`.trim();
561379
561386
  if (dirsListed.length > 0) {
561380
561387
  const unique2 = [...new Set(dirsListed)].slice(0, 15);
561381
561388
  sections.push(`Directories already listed (${unique2.length}): ${unique2.join(", ")}`);
561389
+ sections.push(`Do not call list_directory again on these exact directories unless you changed their contents. Use the listed child paths directly with file_read/edit/delegation.`);
561382
561390
  }
561383
561391
  if (searches.length > 0) {
561384
561392
  const unique2 = [...new Set(searches)].slice(0, 15);
@@ -561392,6 +561400,23 @@ ${latest.output || ""}`.trim();
561392
561400
  return null;
561393
561401
  return sections.join("\n");
561394
561402
  }
561403
+ _renderRuntimeRootBlock() {
561404
+ const authoritative = this.authoritativeWorkingDirectory();
561405
+ const proc = _pathResolve(process.cwd());
561406
+ const lines = [
561407
+ `[RUNTIME ROOT — authoritative]`,
561408
+ `Current working directory for this run: ${authoritative}`,
561409
+ `All relative file/tool paths resolve under this directory unless the tool call uses an absolute path.`,
561410
+ `Do not infer cwd from old tasks, shell transcripts, memory, or prior browser sessions.`
561411
+ ];
561412
+ if (proc !== authoritative) {
561413
+ lines.push(`Process cwd differs (${proc}); treat the run cwd above as authoritative for repo/project work.`);
561414
+ }
561415
+ if (this._worldFacts.lastCwd && this._worldFacts.lastCwd !== authoritative) {
561416
+ lines.push(`Last shell cd target was command-local only: ${this._worldFacts.lastCwd}. It does not change the run cwd.`);
561417
+ }
561418
+ return lines.join("\n");
561419
+ }
561395
561420
  _insertContextFrame(messages2, frame) {
561396
561421
  if (!frame)
561397
561422
  return;
@@ -561429,7 +561454,7 @@ ${latest.output || ""}`.trim();
561429
561454
  add2(this._activeContextItem("task_state", "todo-state", "turn.todos", "Todo state", input.todoBlock, 80));
561430
561455
  add2(this._activeContextItem("recent_failure", "recent-failures", "turn.failures", "Recent failures", input.failureBlock, 95));
561431
561456
  add2(this._activeContextItem("recent_failure", "write-churn", "turn.churn", "Write churn", input.churnBlock, 75));
561432
- add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 65));
561457
+ add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 92));
561433
561458
  add2(this._activeContextItem("anchor", "anchors", "turn.anchors", "Relevant anchors", input.anchorsBlock, 50));
561434
561459
  add2(this._activeContextItem("environment", "environment", "turn.environment", "Environment", input.environmentBlock, 35));
561435
561460
  if (this._lastPprMemoryLines.length > 0) {
@@ -561684,7 +561709,10 @@ ${chunk.content}`, {
561684
561709
  async _buildTurnContextFrame(turn, messages2, recentToolResults, environmentBlock) {
561685
561710
  this._contextLedger.clearSources("turn.");
561686
561711
  this._contextLedger.prune(turn);
561687
- const goalBlock = this._taskState.goal ? `Active task: ${this._taskState.goal}` : null;
561712
+ const goalBlock = [
561713
+ this._renderRuntimeRootBlock(),
561714
+ this._taskState.goal ? `Active task: ${this._taskState.goal}` : null
561715
+ ].filter(Boolean).join("\n\n");
561688
561716
  const filesystemBlock = this._renderFilesystemStateBlock(turn);
561689
561717
  const todoBlock = this._renderTodoStateBlock(turn);
561690
561718
  const failureBlock = this._renderRecentFailuresBlock(turn);
@@ -561750,7 +561778,7 @@ ${this._lastPprMemoryLines.slice(0, 5).join("\n")}` : null;
561750
561778
  signalFromBlock("tool_cache", "turn.tool-cache", toolCacheBlock, {
561751
561779
  id: "tool-cache",
561752
561780
  dedupeKey: "turn.tool-cache",
561753
- priority: 65,
561781
+ priority: 92,
561754
561782
  createdTurn: turn,
561755
561783
  ttlTurns: 1
561756
561784
  }),
@@ -562602,8 +562630,8 @@ ${notice}`;
562602
562630
  const window2 = recentToolCalls.slice(-repetitionWindow);
562603
562631
  const uniqueKeys = new Set(window2.map((tc) => `${tc.name}:${tc.argsKey}`));
562604
562632
  const ratio = 1 - uniqueKeys.size / window2.length;
562605
- if (ratio > 0.4 && this._littlemanToolOutcomes.length >= 3) {
562606
- const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
562633
+ if (ratio > 0.4 && this._adversaryToolOutcomes.length >= 3) {
562634
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
562607
562635
  const uniquePreviews = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40)));
562608
562636
  if (uniquePreviews.size >= 3) {
562609
562637
  return Math.max(0, ratio - 0.4);
@@ -562701,6 +562729,9 @@ Respond with your assessment, then take action.`;
562701
562729
  this._lastActiveForgettingReport = null;
562702
562730
  this._lastContextConsolidationTurn = -1e3;
562703
562731
  this._contextFrameBuilder = new ContextFrameBuilder();
562732
+ if (!this._workingDirectory) {
562733
+ this._workingDirectory = _pathResolve(process.cwd());
562734
+ }
562704
562735
  if (!this.options.disablePersistentMemory && !this._memoryInitialized) {
562705
562736
  try {
562706
562737
  const path12 = await import("node:path");
@@ -563134,10 +563165,10 @@ TASK: ${scrubbedTask}` : scrubbedTask;
563134
563165
  this._hookDenyHintCount = 0;
563135
563166
  this._selfConsistencyVotes = 0;
563136
563167
  this._retrievalContextCache = null;
563137
- this._observerMode = this.options.observerMode ?? "both";
563168
+ this._adversaryMode = this.options.adversaryMode ?? "both";
563138
563169
  this._worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
563139
563170
  this._argCohorts.clear();
563140
- this._littlemanRedundantBlocks.clear();
563171
+ this._adversaryRedundantSignals.clear();
563141
563172
  this._lastTodoWriteTurn = -1;
563142
563173
  this._lastTodoReminderTurn = -1;
563143
563174
  let pendingConstraintWarnings = [];
@@ -563237,14 +563268,44 @@ TASK: ${scrubbedTask}` : scrubbedTask;
563237
563268
  });
563238
563269
  if (gate.proceed)
563239
563270
  return false;
563240
- messages2.push({ role: "system", content: gate.feedback });
563271
+ messages2.push({
563272
+ role: "system",
563273
+ content: `${gate.feedback}
563274
+
563275
+ [ADVISORY ONLY] This critique does not block task_complete; use it to improve the next run or visible evidence if the task continues.`
563276
+ });
563241
563277
  this.emit({
563242
563278
  type: "status",
563243
- content: `task_complete held by completion provenance guard: ${gate.reason}`,
563279
+ content: `completion provenance critique emitted without blocking: ${gate.reason}`,
563280
+ turn,
563281
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
563282
+ });
563283
+ this.emit({
563284
+ type: "adversary_reaction",
563285
+ adversary: {
563286
+ class: "guidance",
563287
+ shortText: "Completion provenance critique emitted",
563288
+ confidence: 0.9,
563289
+ details: gate.reason
563290
+ },
563291
+ turn,
563292
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
563293
+ });
563294
+ return false;
563295
+ };
563296
+ const emitBackwardPassAdvisory = (feedback, turn) => {
563297
+ messages2.push({
563298
+ role: "system",
563299
+ content: `${feedback}
563300
+
563301
+ [ADVISORY ONLY] Backward-pass critique is non-blocking; do not treat this as a tool failure or completion refusal.`
563302
+ });
563303
+ this.emit({
563304
+ type: "status",
563305
+ content: "backward-pass critique emitted without blocking completion",
563244
563306
  turn,
563245
563307
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
563246
563308
  });
563247
- return true;
563248
563309
  };
563249
563310
  const turnCap = this.options.maxTurns && this.options.maxTurns > 0 ? this.options.maxTurns : Number.MAX_SAFE_INTEGER;
563250
563311
  for (let turn = 0; turn < turnCap; turn++) {
@@ -564230,8 +564291,8 @@ ${_staleSamples.join("\n")}` : ``,
564230
564291
  nextSelfEval = now + selfEvalInterval;
564231
564292
  }
564232
564293
  const turnsRemaining = this.options.maxTurns - turn;
564233
- if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._littlemanToolOutcomes.length >= 2) {
564234
- const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
564294
+ if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._adversaryToolOutcomes.length >= 2) {
564295
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
564235
564296
  const recentSuccesses = recentOutcomes.filter((o2) => o2.succeeded).length;
564236
564297
  const uniqueResults = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40))).size;
564237
564298
  const isActive = recentSuccesses >= 2 && uniqueResults >= 2;
@@ -564240,16 +564301,16 @@ ${_staleSamples.join("\n")}` : ``,
564240
564301
  this.options.maxTurns += extension3;
564241
564302
  this.emit({
564242
564303
  type: "status",
564243
- content: `Littleman triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
564304
+ content: `Adversary triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
564244
564305
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
564245
564306
  });
564246
564307
  const detailsLines = recentOutcomes.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`);
564247
564308
  this.emit({
564248
- type: "debug_littleman",
564309
+ type: "debug_adversary",
564249
564310
  turn,
564250
564311
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
564251
564312
  content: `Timeout triage: EXTENDED by ${extension3} turns (active session detected)`,
564252
- littlemanAction: {
564313
+ adversaryAction: {
564253
564314
  detection: "none",
564254
564315
  recentSuccesses,
564255
564316
  recentFailures: recentOutcomes.length - recentSuccesses,
@@ -564582,6 +564643,9 @@ ${memoryLines.join("\n")}`
564582
564643
  maxTokens: effectiveMaxTokens,
564583
564644
  timeoutMs: this.options.requestTimeoutMs
564584
564645
  };
564646
+ if ((this.options.contextWindowSize ?? 0) > 0) {
564647
+ chatRequest.numCtx = this.options.contextWindowSize;
564648
+ }
564585
564649
  if (this.options.memoryPrefix)
564586
564650
  chatRequest.memoryPrefix = this.options.memoryPrefix;
564587
564651
  if (this.options.memoryPrefixHash)
@@ -564623,7 +564687,7 @@ ${memoryLines.join("\n")}`
564623
564687
  compactionThreshold: limits.compactionThreshold,
564624
564688
  toolCallCount,
564625
564689
  keepRecent: limits.keepRecent,
564626
- littlemanOutcomes: this._littlemanToolOutcomes.length,
564690
+ adversaryOutcomes: this._adversaryToolOutcomes.length,
564627
564691
  headroom: limits.compactionThreshold - estTokens
564628
564692
  }
564629
564693
  });
@@ -564986,16 +565050,19 @@ ${memoryLines.join("\n")}`
564986
565050
  const cohort = this._argCohorts.get(cohortKey);
564987
565051
  if (cohort && cohort.failure >= 3 && cohort.success === 0) {
564988
565052
  this.emit({
564989
- type: "observer_reaction",
565053
+ type: "adversary_reaction",
564990
565054
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
564991
- observer: {
565055
+ adversary: {
564992
565056
  class: "arg_cohort_risk",
564993
565057
  shortText: `${tc.name} with similar args has failed ${cohort.failure}× recently`,
564994
565058
  confidence: 0.85
564995
565059
  }
564996
565060
  });
564997
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
564998
- this.pendingUserMessages.push(`⚠ ${tc.name} with similar arguments has failed ${cohort.failure}× recently. Try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
565061
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
565062
+ this.pendingUserMessages.push(`[ADVERSARY CRITIQUE non-blocking]
565063
+ Evidence: ${tc.name} with similar arguments has failed ${cohort.failure}× recently.
565064
+ Root cause hypothesis: the argument family may be wrong, a prerequisite may be missing, or the tool is being used before enough state is known.
565065
+ Corrective action: try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
564999
565066
  }
565000
565067
  }
565001
565068
  if (this._errorPatterns.size > 0) {
@@ -565277,19 +565344,11 @@ ${memoryLines.join("\n")}`
565277
565344
  ].includes(tc.name);
565278
565345
  const isStatefulBrowserTool = this._isStatefulBrowserTool(tc.name);
565279
565346
  const isReadLike = !isStatefulBrowserTool && (baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
565280
- const observerRedundantBlock = this._littlemanRedundantBlocks.has(toolFingerprint);
565281
- if (observerRedundantBlock) {
565282
- this._littlemanRedundantBlocks.delete(toolFingerprint);
565347
+ const adversaryRedundantSignal = this._adversaryRedundantSignals.has(toolFingerprint);
565348
+ if (adversaryRedundantSignal) {
565349
+ this._adversaryRedundantSignals.delete(toolFingerprint);
565283
565350
  }
565284
- const markSyntheticToolLog = (outputPreview) => {
565285
- const lastLog = toolCallLog[_toolLogTailIdx];
565286
- if (!lastLog)
565287
- return;
565288
- lastLog.success = true;
565289
- lastLog.mutated = false;
565290
- lastLog.mutatedFiles = [];
565291
- lastLog.outputPreview = outputPreview.slice(0, 100);
565292
- };
565351
+ let criticGuidance = null;
565293
565352
  {
565294
565353
  const _reflStem = buildStem(tc.name, tc.arguments ?? {});
565295
565354
  if (!this._reflectionsInjectedThisTurn.has(_reflStem)) {
@@ -565331,7 +565390,10 @@ ${memoryLines.join("\n")}`
565331
565390
  }
565332
565391
  }
565333
565392
  }
565334
- const criticDecision = evaluate2({
565393
+ const criticDecision = this.options.disableAdversaryCritic === true ? {
565394
+ decision: "pass",
565395
+ reason: "adversary critic disabled for isolated evaluation"
565396
+ } : evaluate2({
565335
565397
  proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
565336
565398
  fingerprint: toolFingerprint,
565337
565399
  isReadLike,
@@ -565345,116 +565407,33 @@ ${memoryLines.join("\n")}`
565345
565407
  stagnationSignals: null,
565346
565408
  // stagnation gate handled at top-of-turn
565347
565409
  stagnationGateActive: false,
565348
- observerRedundantBlock
565410
+ adversaryRedundantSignal
565349
565411
  });
565350
- if (criticDecision.decision === "observer_block") {
565351
- this.emit({
565352
- type: "tool_call",
565353
- toolName: tc.name,
565354
- toolArgs: tc.arguments,
565355
- turn,
565356
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565357
- });
565358
- const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
565359
-
565360
- ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
565361
- markSyntheticToolLog(blockMsg);
565362
- this.emit({
565363
- type: "tool_result",
565364
- toolName: tc.name,
565365
- success: true,
565366
- content: blockMsg.slice(0, 100),
565367
- turn,
565368
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565369
- });
565370
- this._tagSyntheticFailure({
565371
- mode: "step_repetition",
565372
- rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
565373
- });
565374
- return { tc, output: blockMsg, success: true };
565375
- }
565376
- if (criticDecision.decision === "force_progress_block") {
565412
+ if (criticDecision.decision === "guidance") {
565377
565413
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
565378
565414
  const _existingFp = recentToolResults.get(toolFingerprint);
565379
565415
  if (_existingFp !== void 0) {
565380
565416
  recentToolResults.delete(toolFingerprint);
565381
565417
  recentToolResults.set(toolFingerprint, _existingFp);
565382
565418
  }
565419
+ criticGuidance = criticDecision.guidanceMessage;
565383
565420
  this.emit({
565384
- type: "tool_call",
565385
- toolName: tc.name,
565386
- toolArgs: tc.arguments,
565387
- turn,
565421
+ type: "adversary_reaction",
565422
+ adversary: {
565423
+ class: "guidance",
565424
+ shortText: `Adversary guidance for repeated ${tc.name} call`,
565425
+ confidence: 0.9,
565426
+ details: criticDecision.reason
565427
+ },
565388
565428
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
565389
565429
  });
565390
565430
  this.emit({
565391
- type: "tool_result",
565392
- toolName: tc.name,
565393
- success: true,
565394
- content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
565395
- turn,
565396
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565397
- });
565398
- this._tagSyntheticFailure({
565399
- mode: "step_repetition",
565400
- rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
565401
- });
565402
- const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
565403
-
565404
- [GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
565405
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
565406
-
565407
- ` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
565408
-
565409
- `;
565410
- const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
565411
- ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
565412
- markSyntheticToolLog(`${criticDecision.blockMessage}
565413
-
565414
- ${truncatedCache}`);
565415
- return {
565416
- tc,
565417
- output: `${criticDecision.blockMessage}
565418
-
565419
- ${header}${truncatedCache}${generationCompletionHint}`,
565420
- success: true
565421
- };
565422
- }
565423
- if (criticDecision.decision === "serve_cached") {
565424
- dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
565425
- const _existingFp = recentToolResults.get(toolFingerprint);
565426
- if (_existingFp !== void 0) {
565427
- recentToolResults.delete(toolFingerprint);
565428
- recentToolResults.set(toolFingerprint, _existingFp);
565429
- }
565430
- this.emit({
565431
- type: "tool_call",
565432
- toolName: tc.name,
565433
- toolArgs: tc.arguments,
565434
- turn,
565435
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565436
- });
565437
- const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
565438
-
565439
- [GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
565440
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
565441
-
565442
- ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
565443
-
565444
- `;
565445
- const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
565446
- ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
565447
- const dedupOutput = header + truncatedCache + generationCompletionHint;
565448
- markSyntheticToolLog(dedupOutput);
565449
- this.emit({
565450
- type: "tool_result",
565431
+ type: "status",
565451
565432
  toolName: tc.name,
565452
- success: true,
565453
- content: header.slice(0, 100),
565433
+ content: `Adversary guidance emitted for ${tc.name}; tool call will still execute`,
565454
565434
  turn,
565455
565435
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
565456
565436
  });
565457
- return { tc, output: dedupOutput, success: true };
565458
565437
  }
565459
565438
  this.emit({
565460
565439
  type: "tool_call",
@@ -566455,6 +566434,11 @@ Respond with EXACTLY this structure before your next tool call:
566455
566434
  result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
566456
566435
  }
566457
566436
  let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
566437
+ if (criticGuidance) {
566438
+ output += `
566439
+
566440
+ ${criticGuidance}`;
566441
+ }
566458
566442
  if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
566459
566443
  const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
566460
566444
  if (recovery)
@@ -566865,22 +566849,21 @@ ${sr.result.output}`;
566865
566849
  }
566866
566850
  const _bp1 = await this._runBackwardPassReview(turn);
566867
566851
  if (_bp1 && !_bp1.proceed && _bp1.feedback) {
566868
- messages2.push({ role: "system", content: _bp1.feedback });
566869
- } else {
566870
- completed = true;
566871
- summary = extractTaskCompleteSummary(matchTc.arguments);
566872
- if (summary && !this._assistantTextEmitted) {
566873
- this.emit({
566874
- type: "assistant_text",
566875
- content: summary,
566876
- source: "task_complete_summary",
566877
- turn,
566878
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
566879
- });
566880
- this._assistantTextEmitted = true;
566881
- }
566882
- break;
566852
+ emitBackwardPassAdvisory(_bp1.feedback, turn);
566853
+ }
566854
+ completed = true;
566855
+ summary = extractTaskCompleteSummary(matchTc.arguments);
566856
+ if (summary && !this._assistantTextEmitted) {
566857
+ this.emit({
566858
+ type: "assistant_text",
566859
+ content: summary,
566860
+ source: "task_complete_summary",
566861
+ turn,
566862
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
566863
+ });
566864
+ this._assistantTextEmitted = true;
566883
566865
  }
566866
+ break;
566884
566867
  }
566885
566868
  }
566886
566869
  }
@@ -566921,22 +566904,21 @@ ${sr.result.output}`;
566921
566904
  }
566922
566905
  const _bp2 = await this._runBackwardPassReview(turn);
566923
566906
  if (_bp2 && !_bp2.proceed && _bp2.feedback) {
566924
- messages2.push({ role: "system", content: _bp2.feedback });
566925
- } else {
566926
- completed = true;
566927
- summary = extractTaskCompleteSummary(r2.tc.arguments);
566928
- if (summary && !this._assistantTextEmitted) {
566929
- this.emit({
566930
- type: "assistant_text",
566931
- content: summary,
566932
- source: "task_complete_summary",
566933
- turn,
566934
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
566935
- });
566936
- this._assistantTextEmitted = true;
566937
- }
566938
- break;
566907
+ emitBackwardPassAdvisory(_bp2.feedback, turn);
566939
566908
  }
566909
+ completed = true;
566910
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
566911
+ if (summary && !this._assistantTextEmitted) {
566912
+ this.emit({
566913
+ type: "assistant_text",
566914
+ content: summary,
566915
+ source: "task_complete_summary",
566916
+ turn,
566917
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
566918
+ });
566919
+ this._assistantTextEmitted = true;
566920
+ }
566921
+ break;
566940
566922
  }
566941
566923
  }
566942
566924
  }
@@ -567013,22 +566995,21 @@ ${sr.result.output}`;
567013
566995
  }
567014
566996
  const _bp3 = await this._runBackwardPassReview(turn);
567015
566997
  if (_bp3 && !_bp3.proceed && _bp3.feedback) {
567016
- messages2.push({ role: "system", content: _bp3.feedback });
567017
- } else {
567018
- completed = true;
567019
- summary = extractTaskCompleteSummary(r2.tc.arguments);
567020
- if (summary && !this._assistantTextEmitted) {
567021
- this.emit({
567022
- type: "assistant_text",
567023
- content: summary,
567024
- source: "task_complete_summary",
567025
- turn,
567026
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
567027
- });
567028
- this._assistantTextEmitted = true;
567029
- }
567030
- break;
566998
+ emitBackwardPassAdvisory(_bp3.feedback, turn);
567031
566999
  }
567000
+ completed = true;
567001
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
567002
+ if (summary && !this._assistantTextEmitted) {
567003
+ this.emit({
567004
+ type: "assistant_text",
567005
+ content: summary,
567006
+ source: "task_complete_summary",
567007
+ turn,
567008
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
567009
+ });
567010
+ this._assistantTextEmitted = true;
567011
+ }
567012
+ break;
567032
567013
  }
567033
567014
  }
567034
567015
  }
@@ -567039,7 +567020,7 @@ ${sr.result.output}`;
567039
567020
  }
567040
567021
  if (completed)
567041
567022
  break;
567042
- this.littlemanObserve(messages2, turn);
567023
+ this.adversaryObserve(messages2, turn);
567043
567024
  const currentRepScore = this.detectRepetition(toolCallLog);
567044
567025
  if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
567045
567026
  const { repetitionWindow } = this.contextLimits();
@@ -567236,7 +567217,7 @@ Call task_complete(summary="...") NOW with whatever you have.`
567236
567217
  }
567237
567218
  if (isThinkOnly) {
567238
567219
  if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
567239
- const recentSuccesses = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567220
+ const recentSuccesses = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567240
567221
  const hasRecentSuccess = recentSuccesses.length > 0;
567241
567222
  const successHint = hasRecentSuccess ? `
567242
567223
 
@@ -567487,7 +567468,8 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
567487
567468
  tools: toolDefs,
567488
567469
  temperature: this.options.temperature,
567489
567470
  maxTokens: this.options.maxTokens,
567490
- timeoutMs: this.options.requestTimeoutMs
567471
+ timeoutMs: this.options.requestTimeoutMs,
567472
+ numCtx: this.options.contextWindowSize || void 0
567491
567473
  };
567492
567474
  let response;
567493
567475
  try {
@@ -567797,8 +567779,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
567797
567779
  }
567798
567780
  const _bp4 = await this._runBackwardPassReview(turn);
567799
567781
  if (_bp4 && !_bp4.proceed && _bp4.feedback) {
567800
- messages2.push({ role: "system", content: _bp4.feedback });
567801
- continue;
567782
+ emitBackwardPassAdvisory(_bp4.feedback, turn);
567802
567783
  }
567803
567784
  completed = true;
567804
567785
  summary = extractTaskCompleteSummary(tc.arguments);
@@ -567871,7 +567852,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
567871
567852
  }
567872
567853
  if (isThinkOnlyBF) {
567873
567854
  if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
567874
- const recentSucc = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567855
+ const recentSucc = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567875
567856
  const succHint = recentSucc.length > 0 ? "\n\nYour most recent tool calls SUCCEEDED. If the task is complete, call task_complete now with a summary." : "";
567876
567857
  messages2.push({
567877
567858
  role: "user",
@@ -569959,36 +569940,35 @@ ${newerSummary}`;
569959
569940
  ${trimmedNew}`;
569960
569941
  }
569961
569942
  // -------------------------------------------------------------------------
569962
- // Littleman Observer — parallel meta-analysis of the main loop
569943
+ // Adversary — parallel meta-analysis of the main loop
569963
569944
  // -------------------------------------------------------------------------
569964
- // Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
569965
569945
  // Runs after each tool turn to detect when the model has lost track of
569966
569946
  // what happened and inject corrections before the next inference.
569967
569947
  //
569968
569948
  // This is the architectural fix for the "I see both tools have been failing"
569969
569949
  // regression: instead of only fixing the data the model sees (mask/summary),
569970
569950
  // we add a second analysis path that catches mismatches in real-time.
569971
- /** Track recent tool outcomes for the littleman observer */
569972
- _littlemanToolOutcomes = [];
569973
- /** WO-FIX-C: Tool fingerprints the littleman has flagged as redundant.
569974
- * Checked in executeSingle to block re-execution and return cached data. */
569975
- _littlemanRedundantBlocks = /* @__PURE__ */ new Set();
569951
+ /** Track recent tool outcomes for the adversary */
569952
+ _adversaryToolOutcomes = [];
569953
+ /** WO-FIX-C: Tool fingerprints the adversary has flagged as redundant.
569954
+ * Checked in executeSingle to attach advisory guidance before dispatch. */
569955
+ _adversaryRedundantSignals = /* @__PURE__ */ new Set();
569976
569956
  /** Reflexion pattern: task-local failure-indexed reflection buffer.
569977
569957
  * Generates typed self-reflections on task failure and injects them
569978
569958
  * into the next attempt's context for active learning. */
569979
569959
  _reflectionBuffer = null;
569980
569960
  /**
569981
- * Littleman observer: post-turn meta-analysis.
569961
+ * Adversary: post-turn meta-analysis.
569982
569962
  *
569983
569963
  * Examines the last few messages looking for contradictions between
569984
569964
  * actual tool outcomes and the model's stated understanding. When it
569985
569965
  * detects the model claiming failure after success (or vice versa),
569986
- * it injects a corrective message.
569966
+ * it injects a corrective non-blocking critique.
569987
569967
  *
569988
569968
  * Also detects repeated actions — when the model re-does something
569989
- * that already succeeded, the littleman nudges it to move on.
569969
+ * that already succeeded, the adversary nudges it to move on.
569990
569970
  */
569991
- littlemanObserve(messages2, turn) {
569971
+ adversaryObserve(messages2, turn) {
569992
569972
  if (this.options.modelTier === "large")
569993
569973
  return;
569994
569974
  const recent = messages2.slice(-6);
@@ -570017,8 +569997,8 @@ ${trimmedNew}`;
570017
569997
  }
570018
569998
  const argsKey = toolArgs ? this._buildExactArgsKey(toolArgs) : void 0;
570019
569999
  const fingerprint = toolArgs ? this._buildToolFingerprint(toolName, toolArgs) : void 0;
570020
- if (!this._littlemanToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
570021
- this._littlemanToolOutcomes.push({
570000
+ if (!this._adversaryToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
570001
+ this._adversaryToolOutcomes.push({
570022
570002
  turn,
570023
570003
  tool: toolName,
570024
570004
  argsKey,
@@ -570029,27 +570009,47 @@ ${trimmedNew}`;
570029
570009
  }
570030
570010
  }
570031
570011
  }
570032
- while (this._littlemanToolOutcomes.length > 20)
570033
- this._littlemanToolOutcomes.shift();
570012
+ while (this._adversaryToolOutcomes.length > 20)
570013
+ this._adversaryToolOutcomes.shift();
570034
570014
  const emitReaction = (cls, shortText, confidence2, details2) => {
570035
570015
  this.emit({
570036
- type: "observer_reaction",
570016
+ type: "adversary_reaction",
570037
570017
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
570038
- observer: { class: cls, shortText, confidence: confidence2, details: details2 }
570018
+ adversary: { class: cls, shortText, confidence: confidence2, details: details2 }
570039
570019
  });
570040
570020
  };
570021
+ const buildAdversaryCritique = (input) => {
570022
+ const alternatives = input.alternatives && input.alternatives.length > 0 ? `
570023
+ Alternatives:
570024
+ ${input.alternatives.map((item) => `- ${item}`).join("\n")}` : "";
570025
+ return [
570026
+ `[ADVERSARY CRITIQUE — non-blocking]`,
570027
+ `Evidence: ${input.evidence}`,
570028
+ `Root cause hypothesis: ${input.hypothesis}`,
570029
+ `Corrective action: ${input.correctiveAction}${alternatives}`
570030
+ ].join("\n");
570031
+ };
570041
570032
  const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
570042
570033
  if (lastAssistant && typeof lastAssistant.content === "string") {
570043
570034
  const text = lastAssistant.content.toLowerCase();
570044
570035
  const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
570045
570036
  if (claimsFailure) {
570046
- const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
570037
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
570047
570038
  const successes = recentOutcomes.filter((o2) => o2.succeeded);
570048
570039
  if (successes.length >= 1) {
570049
570040
  const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
570050
570041
  emitReaction("false_failure", `Claimed failure, but recent tools succeeded (${successes.length})`, 0.9, successList);
570051
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570052
- this.pendingUserMessages.push(`⚠ Correction: recent tools DID succeed. Do not retry them. Successful results: ${successList}. Use them to advance the task.`);
570042
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570043
+ this.pendingUserMessages.push(buildAdversaryCritique({
570044
+ evidence: `Recent tools succeeded: ${successList}.`,
570045
+ hypothesis: "The main loop is interpreting uncertainty or partial progress as failure and may be about to discard usable evidence.",
570046
+ correctiveAction: "Use the successful results to advance the task, then verify the next concrete step.",
570047
+ alternatives: [
570048
+ "Edit or run the next verification step that follows from the successful output.",
570049
+ "Read a different targeted file if the successful result exposed a new path or symbol.",
570050
+ "Complete only if the successful output is sufficient evidence for the user's request."
570051
+ ]
570052
+ }));
570053
570053
  }
570054
570054
  this.emit({
570055
570055
  type: "status",
@@ -570063,47 +570063,67 @@ ${trimmedNew}`;
570063
570063
  const text = lastAssistant.content.toLowerCase();
570064
570064
  const claimsSuccess = /(done|fixed|success|passed|complete)/i.test(text);
570065
570065
  if (claimsSuccess) {
570066
- const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
570066
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
570067
570067
  const failures = recentOutcomes.filter((o2) => !o2.succeeded);
570068
570068
  const successes = recentOutcomes.filter((o2) => o2.succeeded);
570069
570069
  if (failures.length > 0 && successes.length === 0) {
570070
570070
  const failList = failures.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
570071
570071
  emitReaction("false_success", `Claimed success, but recent tools failed (${failures.length})`, 0.9, failList);
570072
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570073
- this.pendingUserMessages.push(`⚠ Your recent tools show errors (${failures.length}). Verify the last tool output and correct the issue before claiming success.`);
570072
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570073
+ this.pendingUserMessages.push(buildAdversaryCritique({
570074
+ evidence: `Recent tools show errors (${failures.length}): ${failList}.`,
570075
+ hypothesis: "The main loop is prematurely compressing intent into success language before the verifier produced evidence.",
570076
+ correctiveAction: "Inspect the failed output, identify the implicated path/symbol/command, and run one focused corrective step before claiming success.",
570077
+ alternatives: [
570078
+ "Read the smallest relevant source region around the failed symbol.",
570079
+ "Patch the implicated code or configuration.",
570080
+ "Run the same verifier only after a state-changing fix."
570081
+ ]
570082
+ }));
570074
570083
  }
570075
570084
  }
570076
570085
  }
570077
570086
  }
570078
- const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
570079
- for (const tc of lastToolCalls) {
570080
- const name10 = tc.function.name;
570081
- if (this._isStatefulBrowserTool(name10))
570082
- continue;
570083
- let args = {};
570084
- try {
570085
- args = JSON.parse(tc.function.arguments);
570086
- } catch {
570087
- }
570088
- const argsKey = this._buildExactArgsKey(args);
570089
- const fingerprint = this._buildToolFingerprint(name10, args);
570090
- const prior = this._littlemanToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
570091
- if (prior) {
570092
- this._littlemanRedundantBlocks.add(fingerprint);
570093
- emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
570094
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570095
- this.pendingUserMessages.push(`⚠ You already ran ${name10} successfully on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Do NOT re-run it. Use the existing result and proceed.`);
570087
+ if (this.options.disableAdversaryCritic !== true) {
570088
+ const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
570089
+ for (const tc of lastToolCalls) {
570090
+ const name10 = tc.function.name;
570091
+ if (this._isStatefulBrowserTool(name10))
570092
+ continue;
570093
+ let args = {};
570094
+ try {
570095
+ args = JSON.parse(tc.function.arguments);
570096
+ } catch {
570097
+ }
570098
+ const argsKey = this._buildExactArgsKey(args);
570099
+ const fingerprint = this._buildToolFingerprint(name10, args);
570100
+ const prior = this._adversaryToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
570101
+ if (prior) {
570102
+ this._adversaryRedundantSignals.add(fingerprint);
570103
+ emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
570104
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570105
+ this.pendingUserMessages.push(buildAdversaryCritique({
570106
+ evidence: `${name10} already succeeded on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Prior preview: ${prior.preview}`,
570107
+ hypothesis: "The main loop may have lost track of previously observed evidence because of context pressure, path confusion, or repeated discovery.",
570108
+ correctiveAction: "Let this duplicate run execute if needed, but treat the prior result as evidence and pivot afterward unless state has changed.",
570109
+ alternatives: [
570110
+ "Use the prior result to edit/write, verify, or finish with evidence.",
570111
+ "Read a different specific file or selector if the current evidence is insufficient.",
570112
+ "Repeat exact arguments only when filesystem, browser, or page state changed."
570113
+ ]
570114
+ }));
570115
+ }
570116
+ this.emit({
570117
+ type: "status",
570118
+ content: `\x1B[38;5;178m⚠ Adversary noted redundant ${name10} call (succeeded on turn ${prior.turn}); action remains allowed\x1B[0m`,
570119
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
570120
+ });
570121
+ break;
570096
570122
  }
570097
- this.emit({
570098
- type: "status",
570099
- content: `\x1B[38;5;178m⚠ Prevented redundant ${name10} call (succeeded on turn ${prior.turn})\x1B[0m`,
570100
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
570101
- });
570102
- break;
570103
570123
  }
570104
570124
  }
570105
570125
  {
570106
- const recentCalls = this._littlemanToolOutcomes.slice(-5);
570126
+ const recentCalls = this._adversaryToolOutcomes.slice(-5);
570107
570127
  if (recentCalls.length >= 3) {
570108
570128
  let consecutiveShortResults = 0;
570109
570129
  for (let i2 = recentCalls.length - 1; i2 >= 0; i2--) {
@@ -570116,30 +570136,39 @@ ${trimmedNew}`;
570116
570136
  }
570117
570137
  if (consecutiveShortResults >= 3) {
570118
570138
  emitReaction("idle_think", `Consecutive output without input: ${consecutiveShortResults}`, 0.7);
570119
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
570120
- this.pendingUserMessages.push(`⚠ You have sent ${consecutiveShortResults} consecutive outputs without reading any input. Alternate: receive input, then respond. Call your input tool now.`);
570139
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570140
+ this.pendingUserMessages.push(buildAdversaryCritique({
570141
+ evidence: `${consecutiveShortResults} consecutive output-like calls occurred without an input-like observation.`,
570142
+ hypothesis: "The loop may be acting from stale state instead of re-observing the environment.",
570143
+ correctiveAction: "Take one input/observation step before another output step.",
570144
+ alternatives: [
570145
+ "Call the input/listen/poll tool for the current environment.",
570146
+ "Read the current UI/page state before clicking or typing again.",
570147
+ "If the task is already complete, finish with the concrete evidence already observed."
570148
+ ]
570149
+ }));
570121
570150
  }
570122
570151
  this.emit({
570123
570152
  type: "status",
570124
- content: `\x1B[38;5;178m⚠ Blocked runaway output (${consecutiveShortResults} consecutive sends without receive)\x1B[0m`,
570153
+ content: `\x1B[38;5;178m⚠ Adversary flagged runaway-output risk (${consecutiveShortResults} consecutive sends without receive); action remains allowed\x1B[0m`,
570125
570154
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
570126
570155
  });
570127
570156
  }
570128
570157
  }
570129
570158
  }
570130
- const succCount = this._littlemanToolOutcomes.filter((o2) => o2.succeeded).length;
570131
- const failCount = this._littlemanToolOutcomes.filter((o2) => !o2.succeeded).length;
570132
- const lastFour = this._littlemanToolOutcomes.slice(-4);
570159
+ const succCount = this._adversaryToolOutcomes.filter((o2) => o2.succeeded).length;
570160
+ const failCount = this._adversaryToolOutcomes.filter((o2) => !o2.succeeded).length;
570161
+ const lastFour = this._adversaryToolOutcomes.slice(-4);
570133
570162
  const details = [
570134
570163
  `Recent tool outcomes:`,
570135
570164
  ...lastFour.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`)
570136
570165
  ].join("\n");
570137
570166
  this.emit({
570138
- type: "debug_littleman",
570167
+ type: "debug_adversary",
570139
570168
  turn,
570140
570169
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
570141
- content: `Littleman: ${this._littlemanToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
570142
- littlemanAction: {
570170
+ content: `Adversary: ${this._adversaryToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
570171
+ adversaryAction: {
570143
570172
  detection: "none",
570144
570173
  recentSuccesses: succCount,
570145
570174
  recentFailures: failCount,
@@ -651081,7 +651110,7 @@ ${conversationStream}`
651081
651110
  // off default rather than the global config's value.
651082
651111
  thinking: false,
651083
651112
  // Telegram sub-agent runs must be bounded. Brute-force re-engagement and
651084
- // the Littleman near-cap turn extension are appropriate for the full TUI
651113
+ // the Adversary near-cap turn extension are appropriate for the full TUI
651085
651114
  // session but cause Telegram to silently outgrow its nominal maxTurns,
651086
651115
  // which is how the Snow Crash PDF loop reached 60+ turns of self-talk.
651087
651116
  ...TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS
@@ -683444,8 +683473,8 @@ ${entry.fullContent}`
683444
683473
  let streamTextBuffer = "";
683445
683474
  let lastAssistantText = "";
683446
683475
  let lastProvenancePath = null;
683447
- let showLittleman = false;
683448
- const littlemanBuffer = [];
683476
+ let showAdversary = false;
683477
+ const adversaryBuffer = [];
683449
683478
  const contentWrite = (fn) => {
683450
683479
  if (isNeovimActive()) {
683451
683480
  const origWrite = process.stdout.write;
@@ -683929,24 +683958,24 @@ ${entry.fullContent}`
683929
683958
  if (snap) {
683930
683959
  contentWrite(
683931
683960
  () => renderInfo(
683932
- `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | littleman: ${snap.littlemanOutcomes} tracked\x1B[0m`
683961
+ `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | adversary: ${snap.adversaryOutcomes} tracked\x1B[0m`
683933
683962
  )
683934
683963
  );
683935
683964
  }
683936
683965
  }
683937
683966
  break;
683938
- case "debug_littleman":
683939
- if (event.littlemanAction) {
683940
- const lm = event.littlemanAction;
683967
+ case "debug_adversary":
683968
+ if (event.adversaryAction) {
683969
+ const lm = event.adversaryAction;
683941
683970
  if (lm.intervention) {
683942
683971
  const simple = `⚠ ${lm.intervention}`;
683943
683972
  contentWrite(() => renderInfo(simple));
683944
683973
  }
683945
683974
  if (lm.details) {
683946
- littlemanBuffer.push(lm.details);
683947
- if (littlemanBuffer.length > 50)
683948
- littlemanBuffer.splice(0, littlemanBuffer.length - 50);
683949
- if (showLittleman) {
683975
+ adversaryBuffer.push(lm.details);
683976
+ if (adversaryBuffer.length > 50)
683977
+ adversaryBuffer.splice(0, adversaryBuffer.length - 50);
683978
+ if (showAdversary) {
683950
683979
  const det = String(lm.details);
683951
683980
  contentWrite(() => {
683952
683981
  process.stdout.write(c3.dim(det) + "\n");
@@ -685688,8 +685717,8 @@ This is an independent background session started from /background.`
685688
685717
  origTtyWriteRef = null;
685689
685718
  statusBar.setNeovimFocusChecker(() => isNeovimFocused());
685690
685719
  let _escapeHandler = null;
685691
- let showLittleman = false;
685692
- const littlemanBuffer = [];
685720
+ let showAdversary = false;
685721
+ const adversaryBuffer = [];
685693
685722
  statusBar.hookDirectInput(
685694
685723
  rl,
685695
685724
  () => {
@@ -685722,26 +685751,26 @@ This is an independent background session started from /background.`
685722
685751
  }
685723
685752
  },
685724
685753
  () => {
685725
- showLittleman = !showLittleman;
685754
+ showAdversary = !showAdversary;
685726
685755
  if (statusBar.isActive) {
685727
685756
  try {
685728
685757
  statusBar.jumpToLive();
685729
685758
  } catch {
685730
685759
  }
685731
685760
  statusBar.beginContentWrite();
685732
- if (showLittleman) {
685733
- renderInfo("Littleman details: shown");
685734
- const dump = littlemanBuffer.slice(-10).join("\n");
685761
+ if (showAdversary) {
685762
+ renderInfo("Adversary details: shown");
685763
+ const dump = adversaryBuffer.slice(-10).join("\n");
685735
685764
  if (dump.trim()) {
685736
685765
  process.stdout.write(`
685737
- ${c3.dim("[littleman recap]")}
685766
+ ${c3.dim("[adversary recap]")}
685738
685767
  `);
685739
685768
  for (const line of dump.split("\n")) {
685740
685769
  process.stdout.write(" " + c3.dim(line) + "\n");
685741
685770
  }
685742
685771
  }
685743
685772
  } else {
685744
- renderInfo("Littleman details: hidden");
685773
+ renderInfo("Adversary details: hidden");
685745
685774
  }
685746
685775
  statusBar.endContentWrite();
685747
685776
  }
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.212",
3
+ "version": "1.0.213",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "omnius",
9
- "version": "1.0.212",
9
+ "version": "1.0.213",
10
10
  "bundleDependencies": [
11
11
  "image-to-ascii"
12
12
  ],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "omnius",
3
- "version": "1.0.212",
3
+ "version": "1.0.213",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",