npm - omnius - Versions diffs - 1.0.211 → 1.0.213 - Mend

omnius 1.0.211 → 1.0.213

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -551582,28 +551582,38 @@ var init_personality = __esm({
 });
 // packages/orchestrator/dist/critic.js
-function buildForceProgressBlockMessage(call, hits) {
+function buildCriticGuidanceMessage(call, hits, opts = {}) {
   const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
-  return `[FORCED PROGRESS BLOCK — duplicate ${call.tool} call skipped; this is not a tool failure. You have called ${call.tool}(${argPreview}) ${hits} times with identical arguments. The runtime did not re-run the tool; it is returning the prior result below so you can proceed without retrying.
-Progress is REQUIRED before this tool will run again with the same arguments. To proceed, do one of these:
-  • file_write or file_edit to make progress, OR
-  • todo_write that advances the plan, OR
-  • task_complete (if all phases are done), OR
-  • Call a different tool or use different arguments.]`;
+  const cached = opts.cachedResult ? `
+Prior evidence preview:
+${opts.cachedResult.slice(0, 700)}` : "";
+  const source = opts.adversaryFlag ? "The adversary recognized this exact tool call as already observed earlier." : `This is exact repeat #${hits} for the same ${call.tool} arguments.`;
+  return `[ADVERSARY GUIDANCE — non-blocking]
+Observation: ${source}
+Call: ${call.tool}(${argPreview})
+Root cause hypothesis: the run is losing track of already-observed evidence, usually after path confusion, compaction, or an over-broad discovery loop.
+Corrective action: let this call's result inform the next step once, then pivot to a concrete action.
+Suggested next actions: edit/write the implicated file, run verification, read a different specific file, or complete with evidence. Prefer not to repeat this exact call again unless the filesystem, browser, or page state changed.${cached}`;
 }
 function buildCachedResultEnvelope(result) {
-  return `[CACHED RESULT — you already have this information from a prior identical call. Do NOT call this tool again with the same arguments.]
+  return `[PRIOR RESULT — already observed by a prior identical call]
 ${result}`;
 }
 function evaluate2(inputs) {
-  const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
-  if (observerRedundantBlock) {
+  const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, adversaryRedundantSignal } = inputs;
+  if (adversaryRedundantSignal) {
     const cached = recentToolResults.get(fingerprint);
+    const cachedResult = cached ? buildCachedResultEnvelope(cached.result) : void 0;
     return {
-      decision: "observer_block",
-      reason: "Littleman observer flagged this fingerprint as redundant",
-      cachedResult: cached ? buildCachedResultEnvelope(cached.result) : null
+      decision: "guidance",
+      reason: "Adversary flagged this fingerprint as redundant",
+      hitNumber: (dedupHitCount.get(fingerprint) ?? 0) + 1,
+      guidanceMessage: buildCriticGuidanceMessage(proposedCall, (dedupHitCount.get(fingerprint) ?? 0) + 1, {
+        cachedResult,
+        adversaryFlag: true
+      }),
+      cachedResult,
+      compacted: cached?.compacted
     };
   }
   const cacheEligible = isReadLike || proposedCall.tool === "shell";
@@ -551611,24 +551621,16 @@ function evaluate2(inputs) {
     const cached = recentToolResults.get(fingerprint);
     if (cached !== void 0) {
       const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
-      const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
-      if (hits >= threshold) {
-        return {
-          decision: "force_progress_block",
-          reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
-          hitNumber: hits,
-          blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
-          cachedResult: buildCachedResultEnvelope(cached.result),
-          compacted: cached.compacted
-        };
-      }
       const cachedEnvelope = buildCachedResultEnvelope(cached.result);
       return {
-        decision: "serve_cached",
-        reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
+        decision: "guidance",
+        reason: cached.compacted ? "post-compaction duplicate evidence" : `duplicate call #${hits}`,
         cachedResult: cachedEnvelope,
         compacted: cached.compacted,
-        hitNumber: hits
+        hitNumber: hits,
+        guidanceMessage: buildCriticGuidanceMessage(proposedCall, hits, {
+          cachedResult: cachedEnvelope
+        })
       };
     }
   }
@@ -551680,12 +551682,9 @@ function isStagnant(signals, opts) {
     return false;
   return signals.completedDelta <= 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
 }
-var SHELL_THRESHOLD, FS_THRESHOLD;
 var init_critic = __esm({
   "packages/orchestrator/dist/critic.js"() {
     "use strict";
-    SHELL_THRESHOLD = 2;
-    FS_THRESHOLD = 3;
   }
 });
@@ -558656,8 +558655,8 @@ var init_agenticRunner = __esm({
       // WO-KG-15
       _retrievalContextCache = null;
       // WO-KG-15: cache per-run
-      // Observer world-model and cohort stats
-      _observerMode = "both";
+      // Adversary world-model and cohort stats
+      _adversaryMode = "both";
       _worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
       // REG-7-root: Track file writes since last todo_write call. When this
       // counter climbs without a todo update, the agent has likely batched
@@ -559006,6 +559005,8 @@ var init_agenticRunner = __esm({
       _sessionId = `session-${Date.now()}`;
       _workingDirectory = "";
       constructor(backend, options2) {
+        const adversaryMode = options2?.adversaryMode ?? options2?.observerMode ?? "both";
+        const disableAdversaryCritic = options2?.disableAdversaryCritic ?? options2?.disableStepCritic ?? false;
         this.backend = backend;
         this.options = {
           maxTurns: options2?.maxTurns ?? 60,
@@ -559029,19 +559030,23 @@ var init_agenticRunner = __esm({
           bruteForce: options2?.bruteForce ?? true,
           bruteForceMaxCycles: options2?.bruteForceMaxCycles ?? 100,
           allowTurnExtension: options2?.allowTurnExtension ?? true,
+          completionProvenanceGuard: options2?.completionProvenanceGuard ?? true,
+          disableAdversaryCritic,
+          disableStepCritic: disableAdversaryCritic,
           modelTier: options2?.modelTier ?? "large",
           contextWindowSize: options2?.contextWindowSize ?? 0,
           personality: options2?.personality ?? PERSONALITY_PRESETS.balanced,
           personalityName: options2?.personalityName ?? "",
           finalVarResolver: options2?.finalVarResolver ?? void 0,
-          observerMode: options2?.observerMode ?? "both",
+          adversaryMode,
+          observerMode: adversaryMode,
           // Phase 4 — sub-agent isolation flag (defaults false). When true, this
           // runner skips cross-task handoff inheritance from the parent's
           // session.
           subAgent: options2?.subAgent ?? false,
           skipCrossTaskHandoff: options2?.skipCrossTaskHandoff ?? false
         };
-        this._observerMode = this.options.observerMode;
+        this._adversaryMode = this.options.adversaryMode;
       }
       /** Update context window size (e.g. after querying Ollama /api/show) */
       setContextWindowSize(size) {
@@ -559049,7 +559054,10 @@ var init_agenticRunner = __esm({
       }
       /** Set the working directory for session checkpointing */
       setWorkingDirectory(dir) {
-        this._workingDirectory = dir;
+        this._workingDirectory = _pathResolve(dir);
+      }
+      authoritativeWorkingDirectory() {
+        return _pathResolve(this._workingDirectory || process.cwd());
       }
       /** State root for runner-owned memory/artifacts. Defaults to cwd/.omnius. */
       omniusStateDir() {
@@ -559822,7 +559830,7 @@ ${result.output ?? ""}`;
        * checklist via todo_write, and only then call task_complete.
        */
       /**
-       * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
+       * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / adversary
        * block / budget exhausted). These paths return early from
        * executeSingle BEFORE the main result-handling code, so the normal
        * MAST tagging miss them. This helper lets each return-early site
@@ -559898,6 +559906,198 @@ Do NOT call task_complete until all items are marked completed via todo_write.`;
           `Continue the work loop: inspect the failed evidence, make the smallest targeted fix, then rerun the relevant verification. Use the full available verification spectrum for the artifact you changed: static syntax, build/typecheck, tests, service startup, runtime logs, browser/page errors, console output, network failures, screenshots, accessibility/DOM state, and end-to-end user flow checks where applicable. The exact tools are stack-dependent; the standard is objective runtime evidence, not self-report.`
         ].join("\n");
       }
+      buildMissionCompletionContract(task, context2) {
+        if (process.env["OMNIUS_DISABLE_MISSION_COMPLETION_CONTRACT"] === "1")
+          return "";
+        const profile = this._inferCompletionProfile(`${task}
+${context2 ?? ""}`);
+        const requirements = [];
+        if (profile.browser)
+          requirements.push("browser/UI state must be proven by a post-action screenshot/DOM/observe_bundle pass");
+        if (profile.desktop)
+          requirements.push("desktop state must be proven by vision_action_loop observe or desktop_describe after the final action");
+        if (profile.code)
+          requirements.push("code/file changes must be proven by a relevant build/test/typecheck/runtime command after the last edit");
+        if (profile.research)
+          requirements.push("research/root-cause claims must cite concrete inspected files, commands, logs, or source artifacts");
+        if (requirements.length === 0)
+          requirements.push("final claims must name the concrete evidence used or state that the task required no external action");
+        return [
+          `[MISSION COMPLETION CONTRACT]`,
+          `Current ask: ${task.slice(0, 500)}`,
+          ``,
+          `Before claiming success or calling task_complete, satisfy the mission-specific evidence requirements:`,
+          ...requirements.map((line) => `- ${line}.`),
+          ``,
+          `The final task_complete summary for any action-heavy task must include a compact Provenance/Evidence note naming the validating tool output, command, screenshot, DOM state, file path, or blocker. Self-confidence is not evidence.`,
+          `For browser/form/account/send flows: after the last click/type/navigate/submit action, capture a fresh browser observation and verify the visible final state before completion.`,
+          `If completion is impossible, use a summary beginning BLOCKED: and name the exact blocker plus the evidence already collected.`
+        ].join("\n");
+      }
+      _inferCompletionProfile(text) {
+        const t2 = text.toLowerCase();
+        const browser3 = /\b(browser|web\s*page|website|page|playwright|selenium|chromedriver|chrome|headless|gui|proton|login|captcha|form|account|compose|mail|submit|click|type|fill)\b/.test(t2);
+        const desktop = /\b(desktop|screen|application|app\b|window|file manager|open a file|laptop|screenshot|vision_action_loop|desktop_describe|desktop_click)\b/.test(t2);
+        const code8 = /\b(implement|fix|patch|refactor|rewrite|build|compile|typecheck|test suite|unit test|integration test|source file|codebase|package|typescript|javascript|python|rust|golang)\b/.test(t2);
+        const research = /\b(discover|root cause|triage|deep dive|review|audit|investigate|prove|validate|forensics|diagnostic|failure mode)\b/.test(t2);
+        const formLike = /\b(form|fill|submit|signup|sign up|login|log in|account|compose|send|sent|mail|captcha|checkout|payment|upload)\b/.test(t2);
+        return { browser: browser3, desktop, code: code8, research, formLike };
+      }
+      _completionSummaryHasProvenance(summary) {
+        return /\b(provenance|evidence|verified|validated|confirmed|observed|screenshot|dom|console|network|log|test|typecheck|build|passed|opened|sent|created|submitted|blocked)\b/i.test(summary);
+      }
+      _isBlockedCompletionSummary(summary) {
+        return /^\s*(?:BLOCKED|PARTIAL|NO FILE CHANGES REQUIRED)\b/i.test(summary);
+      }
+      _browserActionKind(entry) {
+        if (!/^(browser_action|playwright_browser|carbonyl_browser)$/.test(entry.name))
+          return "other";
+        const args = this._parseExactArgsKey(entry.argsKey);
+        const action = String(args.get("action") ?? args.get("tool") ?? args.get("command") ?? "").toLowerCase();
+        if (/^(screenshot|dom|dom_summary|observe|observe_bundle|page_errors|console_logs|network_log|accessibility|snapshot|state|url|title)$/.test(action)) {
+          return "observe";
+        }
+        if (/^(navigate|click|click_xy|vision_click|visual_click|type|fill|press|select|submit|evaluate|scroll|scroll_up|scroll_down|back|forward)$/.test(action)) {
+          return "state";
+        }
+        return entry.name === "carbonyl_browser" ? "state" : "other";
+      }
+      _desktopActionKind(entry) {
+        if (/^(desktop_describe|screenshot)$/.test(entry.name))
+          return "observe";
+        if (entry.name === "desktop_click")
+          return "state";
+        if (entry.name !== "vision_action_loop")
+          return "other";
+        const args = this._parseExactArgsKey(entry.argsKey);
+        const action = String(args.get("action") ?? "").toLowerCase();
+        if (/^(observe|screenshot|describe|ocr|state)$/.test(action))
+          return "observe";
+        if (/^(click|type|key|press|open|focus|drag|scroll)$/.test(action))
+          return "state";
+        return "other";
+      }
+      _isVerificationShell(entry) {
+        if (entry.name !== "shell" && entry.name !== "background_run")
+          return false;
+        if (entry.success !== true)
+          return false;
+        const args = this._parseExactArgsKey(entry.argsKey);
+        const command = String(args.get("command") ?? args.get("cmd") ?? "").toLowerCase();
+        return /\b(test|typecheck|check|build|compile|verify|lint|pytest|vitest|jest|playwright|cypress|tsc|cargo\s+test|go\s+test|npm\s+run|pnpm\s+run|yarn\s+run)\b/.test(command);
+      }
+      _evaluateCompletionProvenanceGate(input) {
+        if (this.options.completionProvenanceGuard === false)
+          return { proceed: true };
+        if (process.env["OMNIUS_DISABLE_COMPLETION_PROVENANCE_GUARD"] === "1")
+          return { proceed: true };
+        const summary = input.summary || "";
+        const blockedSummary = this._isBlockedCompletionSummary(summary);
+        const profile = this._inferCompletionProfile(input.taskGoal);
+        const log22 = input.toolCallLog.filter((entry) => entry.name !== "task_complete");
+        const browserUsed = log22.some((entry) => /^(browser_action|playwright_browser|carbonyl_browser)$/.test(entry.name));
+        const desktopUsed = log22.some((entry) => /^(desktop_describe|desktop_click|vision_action_loop|screenshot)$/.test(entry.name));
+        const mutated = log22.some((entry) => entry.mutated === true);
+        const issues = [];
+        const actionHeavy = profile.browser || profile.desktop || profile.code || profile.research || browserUsed || desktopUsed || mutated || this._fileWritesThisRun > 0;
+        if (!actionHeavy)
+          return { proceed: true };
+        if (blockedSummary)
+          return { proceed: true };
+        const successfulNonCompletion = log22.filter((entry) => entry.success === true);
+        if (successfulNonCompletion.length === 0) {
+          issues.push("No successful objective tool result is recorded for this action-oriented task.");
+        }
+        const requiresBrowser = profile.browser || browserUsed;
+        const requiresDesktop = profile.desktop || desktopUsed;
+        if (requiresBrowser) {
+          let lastStateIdx = -1;
+          let lastObserveIdx = -1;
+          log22.forEach((entry, idx) => {
+            if (entry.success !== true)
+              return;
+            const kind = this._browserActionKind(entry);
+            if (kind === "state")
+              lastStateIdx = idx;
+            if (kind === "observe")
+              lastObserveIdx = idx;
+          });
+          if (lastStateIdx >= 0 && lastObserveIdx <= lastStateIdx) {
+            issues.push("Browser state changed after the last browser observation. Capture a fresh screenshot/DOM/observe_bundle after the final click/type/navigate/submit before completion.");
+          } else if (profile.formLike && lastObserveIdx < 0) {
+            issues.push("This looks like a form/account/send flow, but no successful post-action browser observation is recorded.");
+          }
+        }
+        if (requiresDesktop) {
+          let lastStateIdx = -1;
+          let lastObserveIdx = -1;
+          log22.forEach((entry, idx) => {
+            if (entry.success !== true)
+              return;
+            const kind = this._desktopActionKind(entry);
+            if (kind === "state")
+              lastStateIdx = idx;
+            if (kind === "observe")
+              lastObserveIdx = idx;
+          });
+          if (lastStateIdx >= 0 && lastObserveIdx <= lastStateIdx) {
+            issues.push("Desktop state changed after the last visual observation. Run vision_action_loop observe or desktop_describe after the final desktop action before completion.");
+          }
+        }
+        const mutatedEntries = log22.map((entry, idx) => ({ entry, idx })).filter(({ entry }) => entry.mutated === true);
+        if (profile.code || mutatedEntries.length > 0 || this._fileWritesThisRun > 0) {
+          const lastMutationIdx = mutatedEntries.length > 0 ? Math.max(...mutatedEntries.map(({ idx }) => idx)) : -1;
+          const verifiedAfterMutation = log22.some((entry, idx) => {
+            if (lastMutationIdx >= 0 && idx <= lastMutationIdx)
+              return false;
+            if (this._isVerificationShell(entry))
+              return true;
+            if (entry.success === true && this._browserActionKind(entry) === "observe")
+              return true;
+            if (entry.success === true && this._desktopActionKind(entry) === "observe")
+              return true;
+            return false;
+          });
+          if (lastMutationIdx >= 0 && !verifiedAfterMutation) {
+            issues.push("Files were changed but no successful verification command or runtime observation appears after the last mutation.");
+          }
+        }
+        const lastNonCompletion = [...log22].reverse().find(Boolean);
+        if (lastNonCompletion?.success === false) {
+          issues.push(`The most recent non-completion tool result failed (${lastNonCompletion.name}); resolve or explicitly report BLOCKED before completing.`);
+        }
+        if (!this._completionSummaryHasProvenance(summary)) {
+          issues.push("The completion summary does not include an explicit Evidence/Provenance note.");
+        }
+        if (issues.length === 0)
+          return { proceed: true };
+        const recentEvidence = successfulNonCompletion.slice(-6).map((entry) => {
+          const preview = (entry.outputPreview ?? "").replace(/\s+/g, " ").slice(0, 160);
+          return `- ${entry.name}(${entry.argsKey.slice(0, 120)})${preview ? ` -> ${preview}` : ""}`;
+        }).join("\n");
+        return {
+          proceed: false,
+          reason: issues[0] ?? "missing provenance",
+          feedback: [
+            `[COMPLETION PROVENANCE REQUIRED]`,
+            ``,
+            `You attempted to finish, but the completion claim is not yet proven against the current mission.`,
+            ``,
+            `Blocking issues:`,
+            ...issues.map((issue, index) => `${index + 1}. ${issue}`),
+            ``,
+            recentEvidence ? `Recent successful evidence already available:
+${recentEvidence}` : `Recent successful evidence already available: none recorded.`,
+            ``,
+            `Do the smallest missing verification step now. For browser/UI work, take a fresh screenshot/DOM/observe_bundle after the final action. For desktop work, run vision_action_loop observe or desktop_describe after the final action. For code/file changes, run the relevant build/test/typecheck/runtime check after the last edit.`,
+            ``,
+            `Only then call task_complete with this shape:`,
+            `Summary: <what changed or what final state was reached>`,
+            `Provenance: <tool/command/screenshot/DOM/log/file evidence proving it>`,
+            `If impossible, call task_complete with summary starting BLOCKED: and name the blocker plus evidence.`
+          ].join("\n")
+        };
+      }
       /**
        * REG-47: post-implementation backward-pass review.
        *
@@ -561174,7 +561374,7 @@ ${latest.output || ""}`.trim();
           }
         }
         const sections = [
-          "[KNOWLEDGE — cached tool results already known to the runtime. Do NOT re-call these tools with the same arguments:]"
+          "[KNOWLEDGE — cached tool results already known to the runtime. Repeating an exact read/list/search/shell call is a wasted action and will be blocked or served from cache:]"
         ];
         if (compactedCount > 0) {
           sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
@@ -561186,6 +561386,7 @@ ${latest.output || ""}`.trim();
         if (dirsListed.length > 0) {
           const unique2 = [...new Set(dirsListed)].slice(0, 15);
           sections.push(`Directories already listed (${unique2.length}): ${unique2.join(", ")}`);
+          sections.push(`Do not call list_directory again on these exact directories unless you changed their contents. Use the listed child paths directly with file_read/edit/delegation.`);
         }
         if (searches.length > 0) {
           const unique2 = [...new Set(searches)].slice(0, 15);
@@ -561199,6 +561400,23 @@ ${latest.output || ""}`.trim();
           return null;
         return sections.join("\n");
       }
+      _renderRuntimeRootBlock() {
+        const authoritative = this.authoritativeWorkingDirectory();
+        const proc = _pathResolve(process.cwd());
+        const lines = [
+          `[RUNTIME ROOT — authoritative]`,
+          `Current working directory for this run: ${authoritative}`,
+          `All relative file/tool paths resolve under this directory unless the tool call uses an absolute path.`,
+          `Do not infer cwd from old tasks, shell transcripts, memory, or prior browser sessions.`
+        ];
+        if (proc !== authoritative) {
+          lines.push(`Process cwd differs (${proc}); treat the run cwd above as authoritative for repo/project work.`);
+        }
+        if (this._worldFacts.lastCwd && this._worldFacts.lastCwd !== authoritative) {
+          lines.push(`Last shell cd target was command-local only: ${this._worldFacts.lastCwd}. It does not change the run cwd.`);
+        }
+        return lines.join("\n");
+      }
       _insertContextFrame(messages2, frame) {
         if (!frame)
           return;
@@ -561236,7 +561454,7 @@ ${latest.output || ""}`.trim();
         add2(this._activeContextItem("task_state", "todo-state", "turn.todos", "Todo state", input.todoBlock, 80));
         add2(this._activeContextItem("recent_failure", "recent-failures", "turn.failures", "Recent failures", input.failureBlock, 95));
         add2(this._activeContextItem("recent_failure", "write-churn", "turn.churn", "Write churn", input.churnBlock, 75));
-        add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 65));
+        add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 92));
         add2(this._activeContextItem("anchor", "anchors", "turn.anchors", "Relevant anchors", input.anchorsBlock, 50));
         add2(this._activeContextItem("environment", "environment", "turn.environment", "Environment", input.environmentBlock, 35));
         if (this._lastPprMemoryLines.length > 0) {
@@ -561491,7 +561709,10 @@ ${chunk.content}`, {
       async _buildTurnContextFrame(turn, messages2, recentToolResults, environmentBlock) {
         this._contextLedger.clearSources("turn.");
         this._contextLedger.prune(turn);
-        const goalBlock = this._taskState.goal ? `Active task: ${this._taskState.goal}` : null;
+        const goalBlock = [
+          this._renderRuntimeRootBlock(),
+          this._taskState.goal ? `Active task: ${this._taskState.goal}` : null
+        ].filter(Boolean).join("\n\n");
         const filesystemBlock = this._renderFilesystemStateBlock(turn);
         const todoBlock = this._renderTodoStateBlock(turn);
         const failureBlock = this._renderRecentFailuresBlock(turn);
@@ -561557,7 +561778,7 @@ ${this._lastPprMemoryLines.slice(0, 5).join("\n")}` : null;
           signalFromBlock("tool_cache", "turn.tool-cache", toolCacheBlock, {
             id: "tool-cache",
             dedupeKey: "turn.tool-cache",
-            priority: 65,
+            priority: 92,
             createdTurn: turn,
             ttlTurns: 1
           }),
@@ -562409,8 +562630,8 @@ ${notice}`;
         const window2 = recentToolCalls.slice(-repetitionWindow);
         const uniqueKeys = new Set(window2.map((tc) => `${tc.name}:${tc.argsKey}`));
         const ratio = 1 - uniqueKeys.size / window2.length;
-        if (ratio > 0.4 && this._littlemanToolOutcomes.length >= 3) {
-          const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
+        if (ratio > 0.4 && this._adversaryToolOutcomes.length >= 3) {
+          const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
           const uniquePreviews = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40)));
           if (uniquePreviews.size >= 3) {
             return Math.max(0, ratio - 0.4);
@@ -562508,6 +562729,9 @@ Respond with your assessment, then take action.`;
         this._lastActiveForgettingReport = null;
         this._lastContextConsolidationTurn = -1e3;
         this._contextFrameBuilder = new ContextFrameBuilder();
+        if (!this._workingDirectory) {
+          this._workingDirectory = _pathResolve(process.cwd());
+        }
         if (!this.options.disablePersistentMemory && !this._memoryInitialized) {
           try {
             const path12 = await import("node:path");
@@ -562728,7 +562952,6 @@ Respond with your assessment, then take action.`;
         const contextComposition = await this.assembleContext(task, context2);
         const systemPrompt = contextComposition.assembled;
         this._contextTree = new ContextTree(`sys-${systemPrompt.length}`, cleanedTask.slice(0, 200));
-        this._phaseMessageStartIdx = 2;
         this.emit({
           type: "status",
           content: `Context assembled: ${contextComposition.sections.map((s2) => `${s2.label}(${s2.tokenEstimate}t)`).join(" + ")} = ~${contextComposition.totalTokenEstimate}t`,
@@ -562772,10 +562995,13 @@ TASK: ${scrubbedTask}` : scrubbedTask;
             }
           });
         }
+        const missionCompletionContract = this.buildMissionCompletionContract(cleanedTask, context2);
         const messages2 = [
           { role: "system", content: systemPrompt },
+          ...missionCompletionContract ? [{ role: "system", content: missionCompletionContract }] : [],
           { role: "user", content: userContent }
         ];
+        this._phaseMessageStartIdx = messages2.length;
         if (process.env["OMNIUS_DISABLE_DECOMP1"] !== "1") {
           try {
             const _taskBodyForDecomp = typeof userContent === "string" ? userContent : "";
@@ -562939,10 +563165,10 @@ TASK: ${scrubbedTask}` : scrubbedTask;
         this._hookDenyHintCount = 0;
         this._selfConsistencyVotes = 0;
         this._retrievalContextCache = null;
-        this._observerMode = this.options.observerMode ?? "both";
+        this._adversaryMode = this.options.adversaryMode ?? "both";
         this._worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
         this._argCohorts.clear();
-        this._littlemanRedundantBlocks.clear();
+        this._adversaryRedundantSignals.clear();
         this._lastTodoWriteTurn = -1;
         this._lastTodoReminderTurn = -1;
         let pendingConstraintWarnings = [];
@@ -563033,6 +563259,54 @@ TASK: ${scrubbedTask}` : scrubbedTask;
           });
           return true;
         };
+        const holdProvenanceTaskComplete = (args, turn) => {
+          const proposedSummary = extractTaskCompleteSummary(args);
+          const gate = this._evaluateCompletionProvenanceGate({
+            summary: proposedSummary,
+            taskGoal: cleanedTask,
+            toolCallLog
+          });
+          if (gate.proceed)
+            return false;
+          messages2.push({
+            role: "system",
+            content: `${gate.feedback}
+[ADVISORY ONLY] This critique does not block task_complete; use it to improve the next run or visible evidence if the task continues.`
+          });
+          this.emit({
+            type: "status",
+            content: `completion provenance critique emitted without blocking: ${gate.reason}`,
+            turn,
+            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+          });
+          this.emit({
+            type: "adversary_reaction",
+            adversary: {
+              class: "guidance",
+              shortText: "Completion provenance critique emitted",
+              confidence: 0.9,
+              details: gate.reason
+            },
+            turn,
+            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+          });
+          return false;
+        };
+        const emitBackwardPassAdvisory = (feedback, turn) => {
+          messages2.push({
+            role: "system",
+            content: `${feedback}
+[ADVISORY ONLY] Backward-pass critique is non-blocking; do not treat this as a tool failure or completion refusal.`
+          });
+          this.emit({
+            type: "status",
+            content: "backward-pass critique emitted without blocking completion",
+            turn,
+            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+          });
+        };
         const turnCap = this.options.maxTurns && this.options.maxTurns > 0 ? this.options.maxTurns : Number.MAX_SAFE_INTEGER;
         for (let turn = 0; turn < turnCap; turn++) {
           clearTurnState(this._appState);
@@ -564017,8 +564291,8 @@ ${_staleSamples.join("\n")}` : ``,
             nextSelfEval = now + selfEvalInterval;
           }
           const turnsRemaining = this.options.maxTurns - turn;
-          if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._littlemanToolOutcomes.length >= 2) {
-            const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
+          if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._adversaryToolOutcomes.length >= 2) {
+            const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
             const recentSuccesses = recentOutcomes.filter((o2) => o2.succeeded).length;
             const uniqueResults = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40))).size;
             const isActive = recentSuccesses >= 2 && uniqueResults >= 2;
@@ -564027,16 +564301,16 @@ ${_staleSamples.join("\n")}` : ``,
               this.options.maxTurns += extension3;
               this.emit({
                 type: "status",
-                content: `Littleman triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
+                content: `Adversary triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
                 timestamp: (/* @__PURE__ */ new Date()).toISOString()
               });
               const detailsLines = recentOutcomes.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`);
               this.emit({
-                type: "debug_littleman",
+                type: "debug_adversary",
                 turn,
                 timestamp: (/* @__PURE__ */ new Date()).toISOString(),
                 content: `Timeout triage: EXTENDED by ${extension3} turns (active session detected)`,
-                littlemanAction: {
+                adversaryAction: {
                   detection: "none",
                   recentSuccesses,
                   recentFailures: recentOutcomes.length - recentSuccesses,
@@ -564369,6 +564643,9 @@ ${memoryLines.join("\n")}`
             maxTokens: effectiveMaxTokens,
             timeoutMs: this.options.requestTimeoutMs
           };
+          if ((this.options.contextWindowSize ?? 0) > 0) {
+            chatRequest.numCtx = this.options.contextWindowSize;
+          }
           if (this.options.memoryPrefix)
             chatRequest.memoryPrefix = this.options.memoryPrefix;
           if (this.options.memoryPrefixHash)
@@ -564410,7 +564687,7 @@ ${memoryLines.join("\n")}`
                 compactionThreshold: limits.compactionThreshold,
                 toolCallCount,
                 keepRecent: limits.keepRecent,
-                littlemanOutcomes: this._littlemanToolOutcomes.length,
+                adversaryOutcomes: this._adversaryToolOutcomes.length,
                 headroom: limits.compactionThreshold - estTokens
               }
             });
@@ -564773,16 +565050,19 @@ ${memoryLines.join("\n")}`
               const cohort = this._argCohorts.get(cohortKey);
               if (cohort && cohort.failure >= 3 && cohort.success === 0) {
                 this.emit({
-                  type: "observer_reaction",
+                  type: "adversary_reaction",
                   timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-                  observer: {
+                  adversary: {
                     class: "arg_cohort_risk",
                     shortText: `${tc.name} with similar args has failed ${cohort.failure}× recently`,
                     confidence: 0.85
                   }
                 });
-                if (this._observerMode === "skillcoach" || this._observerMode === "both") {
-                  this.pendingUserMessages.push(`⚠ ${tc.name} with similar arguments has failed ${cohort.failure}× recently. Try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
+                if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
+                  this.pendingUserMessages.push(`[ADVERSARY CRITIQUE — non-blocking]
+Evidence: ${tc.name} with similar arguments has failed ${cohort.failure}× recently.
+Root cause hypothesis: the argument family may be wrong, a prerequisite may be missing, or the tool is being used before enough state is known.
+Corrective action: try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
                 }
               }
               if (this._errorPatterns.size > 0) {
@@ -565064,19 +565344,11 @@ ${memoryLines.join("\n")}`
               ].includes(tc.name);
               const isStatefulBrowserTool = this._isStatefulBrowserTool(tc.name);
               const isReadLike = !isStatefulBrowserTool && (baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
-              const observerRedundantBlock = this._littlemanRedundantBlocks.has(toolFingerprint);
-              if (observerRedundantBlock) {
-                this._littlemanRedundantBlocks.delete(toolFingerprint);
+              const adversaryRedundantSignal = this._adversaryRedundantSignals.has(toolFingerprint);
+              if (adversaryRedundantSignal) {
+                this._adversaryRedundantSignals.delete(toolFingerprint);
               }
-              const markSyntheticToolLog = (outputPreview) => {
-                const lastLog = toolCallLog[_toolLogTailIdx];
-                if (!lastLog)
-                  return;
-                lastLog.success = true;
-                lastLog.mutated = false;
-                lastLog.mutatedFiles = [];
-                lastLog.outputPreview = outputPreview.slice(0, 100);
-              };
+              let criticGuidance = null;
               {
                 const _reflStem = buildStem(tc.name, tc.arguments ?? {});
                 if (!this._reflectionsInjectedThisTurn.has(_reflStem)) {
@@ -565118,7 +565390,10 @@ ${memoryLines.join("\n")}`
                   }
                 }
               }
-              const criticDecision = evaluate2({
+              const criticDecision = this.options.disableAdversaryCritic === true ? {
+                decision: "pass",
+                reason: "adversary critic disabled for isolated evaluation"
+              } : evaluate2({
                 proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
                 fingerprint: toolFingerprint,
                 isReadLike,
@@ -565132,116 +565407,33 @@ ${memoryLines.join("\n")}`
                 stagnationSignals: null,
                 // stagnation gate handled at top-of-turn
                 stagnationGateActive: false,
-                observerRedundantBlock
+                adversaryRedundantSignal
               });
-              if (criticDecision.decision === "observer_block") {
-                this.emit({
-                  type: "tool_call",
-                  toolName: tc.name,
-                  toolArgs: tc.arguments,
-                  turn,
-                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                });
-                const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
-${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
-                markSyntheticToolLog(blockMsg);
-                this.emit({
-                  type: "tool_result",
-                  toolName: tc.name,
-                  success: true,
-                  content: blockMsg.slice(0, 100),
-                  turn,
-                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                });
-                this._tagSyntheticFailure({
-                  mode: "step_repetition",
-                  rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
-                });
-                return { tc, output: blockMsg, success: true };
-              }
-              if (criticDecision.decision === "force_progress_block") {
-                dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
-                const _existingFp = recentToolResults.get(toolFingerprint);
-                if (_existingFp !== void 0) {
-                  recentToolResults.delete(toolFingerprint);
-                  recentToolResults.set(toolFingerprint, _existingFp);
-                }
-                this.emit({
-                  type: "tool_call",
-                  toolName: tc.name,
-                  toolArgs: tc.arguments,
-                  turn,
-                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                });
-                this.emit({
-                  type: "tool_result",
-                  toolName: tc.name,
-                  success: true,
-                  content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
-                  turn,
-                  timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                });
-                this._tagSyntheticFailure({
-                  mode: "step_repetition",
-                  rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
-                });
-                const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
-[GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
-                const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
-` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
-`;
-                const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
-... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
-                markSyntheticToolLog(`${criticDecision.blockMessage}
-${truncatedCache}`);
-                return {
-                  tc,
-                  output: `${criticDecision.blockMessage}
-${header}${truncatedCache}${generationCompletionHint}`,
-                  success: true
-                };
-              }
-              if (criticDecision.decision === "serve_cached") {
+              if (criticDecision.decision === "guidance") {
                 dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
                 const _existingFp = recentToolResults.get(toolFingerprint);
                 if (_existingFp !== void 0) {
                   recentToolResults.delete(toolFingerprint);
                   recentToolResults.set(toolFingerprint, _existingFp);
                 }
+                criticGuidance = criticDecision.guidanceMessage;
                 this.emit({
-                  type: "tool_call",
-                  toolName: tc.name,
-                  toolArgs: tc.arguments,
-                  turn,
+                  type: "adversary_reaction",
+                  adversary: {
+                    class: "guidance",
+                    shortText: `Adversary guidance for repeated ${tc.name} call`,
+                    confidence: 0.9,
+                    details: criticDecision.reason
+                  },
                   timestamp: (/* @__PURE__ */ new Date()).toISOString()
                 });
-                const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
-[GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
-                const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
-` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
-`;
-                const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
-... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
-                const dedupOutput = header + truncatedCache + generationCompletionHint;
-                markSyntheticToolLog(dedupOutput);
                 this.emit({
-                  type: "tool_result",
+                  type: "status",
                   toolName: tc.name,
-                  success: true,
-                  content: header.slice(0, 100),
+                  content: `Adversary guidance emitted for ${tc.name}; tool call will still execute`,
                   turn,
                   timestamp: (/* @__PURE__ */ new Date()).toISOString()
                 });
-                return { tc, output: dedupOutput, success: true };
               }
               this.emit({
                 type: "tool_call",
@@ -566242,6 +566434,11 @@ Respond with EXACTLY this structure before your next tool call:
                 result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
               }
               let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
+              if (criticGuidance) {
+                output += `
+${criticGuidance}`;
+              }
               if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
                 const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
                 if (recovery)
@@ -566251,6 +566448,13 @@ Respond with EXACTLY this structure before your next tool call:
                 editFeedbackRequiredBeforeMoreEdits = this._buildBatchEditAtomicAbortGuidance(tc.arguments);
                 this.pendingUserMessages.push(editFeedbackRequiredBeforeMoreEdits);
               }
+              const currentLogEntry = toolCallLog[_toolLogTailIdx];
+              if (currentLogEntry) {
+                currentLogEntry.success = result.success;
+                currentLogEntry.mutated = realFileMutation;
+                currentLogEntry.mutatedFiles = realMutationPaths;
+                currentLogEntry.outputPreview = (result.success ? result.llmContent ?? result.output ?? output : result.error ?? result.output ?? output).toString().slice(0, 500);
+              }
               this.emit({
                 type: "tool_result",
                 toolName: tc.name,
@@ -566640,27 +566844,26 @@ ${sr.result.output}`;
                         timestamp: (/* @__PURE__ */ new Date()).toISOString()
                       });
                     } else {
-                      if (holdNoProgressTaskComplete(matchTc.arguments, turn)) {
+                      if (holdNoProgressTaskComplete(matchTc.arguments, turn) || holdProvenanceTaskComplete(matchTc.arguments, turn)) {
                         continue;
                       }
                       const _bp1 = await this._runBackwardPassReview(turn);
                       if (_bp1 && !_bp1.proceed && _bp1.feedback) {
-                        messages2.push({ role: "system", content: _bp1.feedback });
-                      } else {
-                        completed = true;
-                        summary = extractTaskCompleteSummary(matchTc.arguments);
-                        if (summary && !this._assistantTextEmitted) {
-                          this.emit({
-                            type: "assistant_text",
-                            content: summary,
-                            source: "task_complete_summary",
-                            turn,
-                            timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                          });
-                          this._assistantTextEmitted = true;
-                        }
-                        break;
+                        emitBackwardPassAdvisory(_bp1.feedback, turn);
+                      }
+                      completed = true;
+                      summary = extractTaskCompleteSummary(matchTc.arguments);
+                      if (summary && !this._assistantTextEmitted) {
+                        this.emit({
+                          type: "assistant_text",
+                          content: summary,
+                          source: "task_complete_summary",
+                          turn,
+                          timestamp: (/* @__PURE__ */ new Date()).toISOString()
+                        });
+                        this._assistantTextEmitted = true;
                       }
+                      break;
                     }
                   }
                 }
@@ -566696,27 +566899,26 @@ ${sr.result.output}`;
                           timestamp: (/* @__PURE__ */ new Date()).toISOString()
                         });
                       } else {
-                        if (holdNoProgressTaskComplete(r2.tc.arguments, turn)) {
+                        if (holdNoProgressTaskComplete(r2.tc.arguments, turn) || holdProvenanceTaskComplete(r2.tc.arguments, turn)) {
                           continue;
                         }
                         const _bp2 = await this._runBackwardPassReview(turn);
                         if (_bp2 && !_bp2.proceed && _bp2.feedback) {
-                          messages2.push({ role: "system", content: _bp2.feedback });
-                        } else {
-                          completed = true;
-                          summary = extractTaskCompleteSummary(r2.tc.arguments);
-                          if (summary && !this._assistantTextEmitted) {
-                            this.emit({
-                              type: "assistant_text",
-                              content: summary,
-                              source: "task_complete_summary",
-                              turn,
-                              timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                            });
-                            this._assistantTextEmitted = true;
-                          }
-                          break;
+                          emitBackwardPassAdvisory(_bp2.feedback, turn);
+                        }
+                        completed = true;
+                        summary = extractTaskCompleteSummary(r2.tc.arguments);
+                        if (summary && !this._assistantTextEmitted) {
+                          this.emit({
+                            type: "assistant_text",
+                            content: summary,
+                            source: "task_complete_summary",
+                            turn,
+                            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+                          });
+                          this._assistantTextEmitted = true;
                         }
+                        break;
                       }
                     }
                   }
@@ -566788,27 +566990,26 @@ ${sr.result.output}`;
                           timestamp: (/* @__PURE__ */ new Date()).toISOString()
                         });
                       } else {
-                        if (holdNoProgressTaskComplete(r2.tc.arguments, turn)) {
+                        if (holdNoProgressTaskComplete(r2.tc.arguments, turn) || holdProvenanceTaskComplete(r2.tc.arguments, turn)) {
                           continue;
                         }
                         const _bp3 = await this._runBackwardPassReview(turn);
                         if (_bp3 && !_bp3.proceed && _bp3.feedback) {
-                          messages2.push({ role: "system", content: _bp3.feedback });
-                        } else {
-                          completed = true;
-                          summary = extractTaskCompleteSummary(r2.tc.arguments);
-                          if (summary && !this._assistantTextEmitted) {
-                            this.emit({
-                              type: "assistant_text",
-                              content: summary,
-                              source: "task_complete_summary",
-                              turn,
-                              timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                            });
-                            this._assistantTextEmitted = true;
-                          }
-                          break;
+                          emitBackwardPassAdvisory(_bp3.feedback, turn);
                         }
+                        completed = true;
+                        summary = extractTaskCompleteSummary(r2.tc.arguments);
+                        if (summary && !this._assistantTextEmitted) {
+                          this.emit({
+                            type: "assistant_text",
+                            content: summary,
+                            source: "task_complete_summary",
+                            turn,
+                            timestamp: (/* @__PURE__ */ new Date()).toISOString()
+                          });
+                          this._assistantTextEmitted = true;
+                        }
+                        break;
                       }
                     }
                   }
@@ -566819,7 +567020,7 @@ ${sr.result.output}`;
             }
             if (completed)
               break;
-            this.littlemanObserve(messages2, turn);
+            this.adversaryObserve(messages2, turn);
             const currentRepScore = this.detectRepetition(toolCallLog);
             if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
               const { repetitionWindow } = this.contextLimits();
@@ -567006,13 +567207,17 @@ Call task_complete(summary="...") NOW with whatever you have.`
               });
             }
             if (/task.?complete|all tests pass/i.test(content)) {
+              const completionArgs = { summary: content };
+              if (holdNoProgressTaskComplete(completionArgs, turn) || holdProvenanceTaskComplete(completionArgs, turn)) {
+                continue;
+              }
               completed = true;
               summary = content;
               break;
             }
             if (isThinkOnly) {
               if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
-                const recentSuccesses = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
+                const recentSuccesses = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
                 const hasRecentSuccess = recentSuccesses.length > 0;
                 const successHint = hasRecentSuccess ? `
@@ -567263,7 +567468,8 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
               tools: toolDefs,
               temperature: this.options.temperature,
               maxTokens: this.options.maxTokens,
-              timeoutMs: this.options.requestTimeoutMs
+              timeoutMs: this.options.requestTimeoutMs,
+              numCtx: this.options.contextWindowSize || void 0
             };
             let response;
             try {
@@ -567568,13 +567774,12 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')")  or
                       timestamp: (/* @__PURE__ */ new Date()).toISOString()
                     });
                   } else {
-                    if (holdNoProgressTaskComplete(tc.arguments, turn)) {
+                    if (holdNoProgressTaskComplete(tc.arguments, turn) || holdProvenanceTaskComplete(tc.arguments, turn)) {
                       continue;
                     }
                     const _bp4 = await this._runBackwardPassReview(turn);
                     if (_bp4 && !_bp4.proceed && _bp4.feedback) {
-                      messages2.push({ role: "system", content: _bp4.feedback });
-                      continue;
+                      emitBackwardPassAdvisory(_bp4.feedback, turn);
                     }
                     completed = true;
                     summary = extractTaskCompleteSummary(tc.arguments);
@@ -567621,22 +567826,9 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')")  or
                     timestamp: (/* @__PURE__ */ new Date()).toISOString()
                   });
                 } else {
-                  if (process.env["OMNIUS_DISABLE_PROGRESS_GATES"] !== "1") {
-                    const gate = computeNoProgressCompletionGate({
-                      summary: content,
-                      toolCallLog,
-                      taskState: this._taskState
-                    });
-                    if (gate.shouldInject && gate.content) {
-                      messages2.push({ role: "system", content: gate.content });
-                      this.emit({
-                        type: "status",
-                        content: "text completion held: discovery happened but no deliverable or explicit blocker is recorded",
-                        turn,
-                        timestamp: (/* @__PURE__ */ new Date()).toISOString()
-                      });
-                      continue;
-                    }
+                  const completionArgs = { summary: content };
+                  if (holdNoProgressTaskComplete(completionArgs, turn) || holdProvenanceTaskComplete(completionArgs, turn)) {
+                    continue;
                   }
                   completed = true;
                   summary = content;
@@ -567660,7 +567852,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')")  or
               }
               if (isThinkOnlyBF) {
                 if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
-                  const recentSucc = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
+                  const recentSucc = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
                   const succHint = recentSucc.length > 0 ? "\n\nYour most recent tool calls SUCCEEDED. If the task is complete, call task_complete now with a summary." : "";
                   messages2.push({
                     role: "user",
@@ -569748,36 +569940,35 @@ ${newerSummary}`;
 ${trimmedNew}`;
       }
       // -------------------------------------------------------------------------
-      // Littleman Observer — parallel meta-analysis of the main loop
+      // Adversary — parallel meta-analysis of the main loop
       // -------------------------------------------------------------------------
-      // Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
       // Runs after each tool turn to detect when the model has lost track of
       // what happened and inject corrections before the next inference.
       //
       // This is the architectural fix for the "I see both tools have been failing"
       // regression: instead of only fixing the data the model sees (mask/summary),
       // we add a second analysis path that catches mismatches in real-time.
-      /** Track recent tool outcomes for the littleman observer */
-      _littlemanToolOutcomes = [];
-      /** WO-FIX-C: Tool fingerprints the littleman has flagged as redundant.
-       *  Checked in executeSingle to block re-execution and return cached data. */
-      _littlemanRedundantBlocks = /* @__PURE__ */ new Set();
+      /** Track recent tool outcomes for the adversary */
+      _adversaryToolOutcomes = [];
+      /** WO-FIX-C: Tool fingerprints the adversary has flagged as redundant.
+       *  Checked in executeSingle to attach advisory guidance before dispatch. */
+      _adversaryRedundantSignals = /* @__PURE__ */ new Set();
       /** Reflexion pattern: task-local failure-indexed reflection buffer.
        *  Generates typed self-reflections on task failure and injects them
        *  into the next attempt's context for active learning. */
       _reflectionBuffer = null;
       /**
-       * Littleman observer: post-turn meta-analysis.
+       * Adversary: post-turn meta-analysis.
        *
        * Examines the last few messages looking for contradictions between
        * actual tool outcomes and the model's stated understanding. When it
        * detects the model claiming failure after success (or vice versa),
-       * it injects a corrective message.
+       * it injects a corrective non-blocking critique.
        *
        * Also detects repeated actions — when the model re-does something
-       * that already succeeded, the littleman nudges it to move on.
+       * that already succeeded, the adversary nudges it to move on.
        */
-      littlemanObserve(messages2, turn) {
+      adversaryObserve(messages2, turn) {
         if (this.options.modelTier === "large")
           return;
         const recent = messages2.slice(-6);
@@ -569806,8 +569997,8 @@ ${trimmedNew}`;
             }
             const argsKey = toolArgs ? this._buildExactArgsKey(toolArgs) : void 0;
             const fingerprint = toolArgs ? this._buildToolFingerprint(toolName, toolArgs) : void 0;
-            if (!this._littlemanToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
-              this._littlemanToolOutcomes.push({
+            if (!this._adversaryToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
+              this._adversaryToolOutcomes.push({
                 turn,
                 tool: toolName,
                 argsKey,
@@ -569818,27 +570009,47 @@ ${trimmedNew}`;
             }
           }
         }
-        while (this._littlemanToolOutcomes.length > 20)
-          this._littlemanToolOutcomes.shift();
+        while (this._adversaryToolOutcomes.length > 20)
+          this._adversaryToolOutcomes.shift();
         const emitReaction = (cls, shortText, confidence2, details2) => {
           this.emit({
-            type: "observer_reaction",
+            type: "adversary_reaction",
             timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-            observer: { class: cls, shortText, confidence: confidence2, details: details2 }
+            adversary: { class: cls, shortText, confidence: confidence2, details: details2 }
           });
         };
+        const buildAdversaryCritique = (input) => {
+          const alternatives = input.alternatives && input.alternatives.length > 0 ? `
+Alternatives:
+${input.alternatives.map((item) => `- ${item}`).join("\n")}` : "";
+          return [
+            `[ADVERSARY CRITIQUE — non-blocking]`,
+            `Evidence: ${input.evidence}`,
+            `Root cause hypothesis: ${input.hypothesis}`,
+            `Corrective action: ${input.correctiveAction}${alternatives}`
+          ].join("\n");
+        };
         const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
         if (lastAssistant && typeof lastAssistant.content === "string") {
           const text = lastAssistant.content.toLowerCase();
           const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
           if (claimsFailure) {
-            const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
+            const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
             const successes = recentOutcomes.filter((o2) => o2.succeeded);
             if (successes.length >= 1) {
               const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
               emitReaction("false_failure", `Claimed failure, but recent tools succeeded (${successes.length})`, 0.9, successList);
-              if (this._observerMode === "skillcoach" || this._observerMode === "both") {
-                this.pendingUserMessages.push(`⚠ Correction: recent tools DID succeed. Do not retry them. Successful results: ${successList}. Use them to advance the task.`);
+              if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
+                this.pendingUserMessages.push(buildAdversaryCritique({
+                  evidence: `Recent tools succeeded: ${successList}.`,
+                  hypothesis: "The main loop is interpreting uncertainty or partial progress as failure and may be about to discard usable evidence.",
+                  correctiveAction: "Use the successful results to advance the task, then verify the next concrete step.",
+                  alternatives: [
+                    "Edit or run the next verification step that follows from the successful output.",
+                    "Read a different targeted file if the successful result exposed a new path or symbol.",
+                    "Complete only if the successful output is sufficient evidence for the user's request."
+                  ]
+                }));
               }
               this.emit({
                 type: "status",
@@ -569852,47 +570063,67 @@ ${trimmedNew}`;
           const text = lastAssistant.content.toLowerCase();
           const claimsSuccess = /(done|fixed|success|passed|complete)/i.test(text);
           if (claimsSuccess) {
-            const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
+            const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
             const failures = recentOutcomes.filter((o2) => !o2.succeeded);
             const successes = recentOutcomes.filter((o2) => o2.succeeded);
             if (failures.length > 0 && successes.length === 0) {
               const failList = failures.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
               emitReaction("false_success", `Claimed success, but recent tools failed (${failures.length})`, 0.9, failList);
-              if (this._observerMode === "skillcoach" || this._observerMode === "both") {
-                this.pendingUserMessages.push(`⚠ Your recent tools show errors (${failures.length}). Verify the last tool output and correct the issue before claiming success.`);
+              if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
+                this.pendingUserMessages.push(buildAdversaryCritique({
+                  evidence: `Recent tools show errors (${failures.length}): ${failList}.`,
+                  hypothesis: "The main loop is prematurely compressing intent into success language before the verifier produced evidence.",
+                  correctiveAction: "Inspect the failed output, identify the implicated path/symbol/command, and run one focused corrective step before claiming success.",
+                  alternatives: [
+                    "Read the smallest relevant source region around the failed symbol.",
+                    "Patch the implicated code or configuration.",
+                    "Run the same verifier only after a state-changing fix."
+                  ]
+                }));
               }
             }
           }
         }
-        const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
-        for (const tc of lastToolCalls) {
-          const name10 = tc.function.name;
-          if (this._isStatefulBrowserTool(name10))
-            continue;
-          let args = {};
-          try {
-            args = JSON.parse(tc.function.arguments);
-          } catch {
-          }
-          const argsKey = this._buildExactArgsKey(args);
-          const fingerprint = this._buildToolFingerprint(name10, args);
-          const prior = this._littlemanToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
-          if (prior) {
-            this._littlemanRedundantBlocks.add(fingerprint);
-            emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
-            if (this._observerMode === "skillcoach" || this._observerMode === "both") {
-              this.pendingUserMessages.push(`⚠ You already ran ${name10} successfully on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Do NOT re-run it. Use the existing result and proceed.`);
+        if (this.options.disableAdversaryCritic !== true) {
+          const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
+          for (const tc of lastToolCalls) {
+            const name10 = tc.function.name;
+            if (this._isStatefulBrowserTool(name10))
+              continue;
+            let args = {};
+            try {
+              args = JSON.parse(tc.function.arguments);
+            } catch {
+            }
+            const argsKey = this._buildExactArgsKey(args);
+            const fingerprint = this._buildToolFingerprint(name10, args);
+            const prior = this._adversaryToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
+            if (prior) {
+              this._adversaryRedundantSignals.add(fingerprint);
+              emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
+              if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
+                this.pendingUserMessages.push(buildAdversaryCritique({
+                  evidence: `${name10} already succeeded on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Prior preview: ${prior.preview}`,
+                  hypothesis: "The main loop may have lost track of previously observed evidence because of context pressure, path confusion, or repeated discovery.",
+                  correctiveAction: "Let this duplicate run execute if needed, but treat the prior result as evidence and pivot afterward unless state has changed.",
+                  alternatives: [
+                    "Use the prior result to edit/write, verify, or finish with evidence.",
+                    "Read a different specific file or selector if the current evidence is insufficient.",
+                    "Repeat exact arguments only when filesystem, browser, or page state changed."
+                  ]
+                }));
+              }
+              this.emit({
+                type: "status",
+                content: `\x1B[38;5;178m⚠ Adversary noted redundant ${name10} call (succeeded on turn ${prior.turn}); action remains allowed\x1B[0m`,
+                timestamp: (/* @__PURE__ */ new Date()).toISOString()
+              });
+              break;
             }
-            this.emit({
-              type: "status",
-              content: `\x1B[38;5;178m⚠ Prevented redundant ${name10} call (succeeded on turn ${prior.turn})\x1B[0m`,
-              timestamp: (/* @__PURE__ */ new Date()).toISOString()
-            });
-            break;
           }
         }
         {
-          const recentCalls = this._littlemanToolOutcomes.slice(-5);
+          const recentCalls = this._adversaryToolOutcomes.slice(-5);
           if (recentCalls.length >= 3) {
             let consecutiveShortResults = 0;
             for (let i2 = recentCalls.length - 1; i2 >= 0; i2--) {
@@ -569905,30 +570136,39 @@ ${trimmedNew}`;
             }
             if (consecutiveShortResults >= 3) {
               emitReaction("idle_think", `Consecutive output without input: ${consecutiveShortResults}`, 0.7);
-              if (this._observerMode === "skillcoach" || this._observerMode === "both") {
-                this.pendingUserMessages.push(`⚠ You have sent ${consecutiveShortResults} consecutive outputs without reading any input. Alternate: receive input, then respond. Call your input tool now.`);
+              if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
+                this.pendingUserMessages.push(buildAdversaryCritique({
+                  evidence: `${consecutiveShortResults} consecutive output-like calls occurred without an input-like observation.`,
+                  hypothesis: "The loop may be acting from stale state instead of re-observing the environment.",
+                  correctiveAction: "Take one input/observation step before another output step.",
+                  alternatives: [
+                    "Call the input/listen/poll tool for the current environment.",
+                    "Read the current UI/page state before clicking or typing again.",
+                    "If the task is already complete, finish with the concrete evidence already observed."
+                  ]
+                }));
               }
               this.emit({
                 type: "status",
-                content: `\x1B[38;5;178m⚠ Blocked runaway output (${consecutiveShortResults} consecutive sends without receive)\x1B[0m`,
+                content: `\x1B[38;5;178m⚠ Adversary flagged runaway-output risk (${consecutiveShortResults} consecutive sends without receive); action remains allowed\x1B[0m`,
                 timestamp: (/* @__PURE__ */ new Date()).toISOString()
               });
             }
           }
         }
-        const succCount = this._littlemanToolOutcomes.filter((o2) => o2.succeeded).length;
-        const failCount = this._littlemanToolOutcomes.filter((o2) => !o2.succeeded).length;
-        const lastFour = this._littlemanToolOutcomes.slice(-4);
+        const succCount = this._adversaryToolOutcomes.filter((o2) => o2.succeeded).length;
+        const failCount = this._adversaryToolOutcomes.filter((o2) => !o2.succeeded).length;
+        const lastFour = this._adversaryToolOutcomes.slice(-4);
         const details = [
           `Recent tool outcomes:`,
           ...lastFour.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`)
         ].join("\n");
         this.emit({
-          type: "debug_littleman",
+          type: "debug_adversary",
           turn,
           timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-          content: `Littleman: ${this._littlemanToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
-          littlemanAction: {
+          content: `Adversary: ${this._adversaryToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
+          adversaryAction: {
             detection: "none",
             recentSuccesses: succCount,
             recentFailures: failCount,
@@ -650870,7 +651110,7 @@ ${conversationStream}`
           // off default rather than the global config's value.
           thinking: false,
           // Telegram sub-agent runs must be bounded. Brute-force re-engagement and
-          // the Littleman near-cap turn extension are appropriate for the full TUI
+          // the Adversary near-cap turn extension are appropriate for the full TUI
           // session but cause Telegram to silently outgrow its nominal maxTurns,
           // which is how the Snow Crash PDF loop reached 60+ turns of self-talk.
           ...TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS
@@ -683233,8 +683473,8 @@ ${entry.fullContent}`
   let streamTextBuffer = "";
   let lastAssistantText = "";
   let lastProvenancePath = null;
-  let showLittleman = false;
-  const littlemanBuffer = [];
+  let showAdversary = false;
+  const adversaryBuffer = [];
   const contentWrite = (fn) => {
     if (isNeovimActive()) {
       const origWrite = process.stdout.write;
@@ -683718,24 +683958,24 @@ ${entry.fullContent}`
           if (snap) {
             contentWrite(
               () => renderInfo(
-                `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | littleman: ${snap.littlemanOutcomes} tracked\x1B[0m`
+                `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | adversary: ${snap.adversaryOutcomes} tracked\x1B[0m`
               )
             );
           }
         }
         break;
-      case "debug_littleman":
-        if (event.littlemanAction) {
-          const lm = event.littlemanAction;
+      case "debug_adversary":
+        if (event.adversaryAction) {
+          const lm = event.adversaryAction;
           if (lm.intervention) {
             const simple = `⚠ ${lm.intervention}`;
             contentWrite(() => renderInfo(simple));
           }
           if (lm.details) {
-            littlemanBuffer.push(lm.details);
-            if (littlemanBuffer.length > 50)
-              littlemanBuffer.splice(0, littlemanBuffer.length - 50);
-            if (showLittleman) {
+            adversaryBuffer.push(lm.details);
+            if (adversaryBuffer.length > 50)
+              adversaryBuffer.splice(0, adversaryBuffer.length - 50);
+            if (showAdversary) {
               const det = String(lm.details);
               contentWrite(() => {
                 process.stdout.write(c3.dim(det) + "\n");
@@ -685477,8 +685717,8 @@ This is an independent background session started from /background.`
   origTtyWriteRef = null;
   statusBar.setNeovimFocusChecker(() => isNeovimFocused());
   let _escapeHandler = null;
-  let showLittleman = false;
-  const littlemanBuffer = [];
+  let showAdversary = false;
+  const adversaryBuffer = [];
   statusBar.hookDirectInput(
     rl,
     () => {
@@ -685511,26 +685751,26 @@ This is an independent background session started from /background.`
       }
     },
     () => {
-      showLittleman = !showLittleman;
+      showAdversary = !showAdversary;
       if (statusBar.isActive) {
         try {
           statusBar.jumpToLive();
         } catch {
         }
         statusBar.beginContentWrite();
-        if (showLittleman) {
-          renderInfo("Littleman details: shown");
-          const dump = littlemanBuffer.slice(-10).join("\n");
+        if (showAdversary) {
+          renderInfo("Adversary details: shown");
+          const dump = adversaryBuffer.slice(-10).join("\n");
           if (dump.trim()) {
             process.stdout.write(`
-  ${c3.dim("[littleman recap]")}
+  ${c3.dim("[adversary recap]")}
 `);
             for (const line of dump.split("\n")) {
               process.stdout.write("  " + c3.dim(line) + "\n");
             }
           }
         } else {
-          renderInfo("Littleman details: hidden");
+          renderInfo("Adversary details: hidden");
         }
         statusBar.endContentWrite();
       }