omnius 1.0.211 → 1.0.213

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -551582,28 +551582,38 @@ var init_personality = __esm({
551582
551582
  });
551583
551583
 
551584
551584
  // packages/orchestrator/dist/critic.js
551585
- function buildForceProgressBlockMessage(call, hits) {
551585
+ function buildCriticGuidanceMessage(call, hits, opts = {}) {
551586
551586
  const argPreview = JSON.stringify(call.args ?? {}).slice(0, 200);
551587
- return `[FORCED PROGRESS BLOCK — duplicate ${call.tool} call skipped; this is not a tool failure. You have called ${call.tool}(${argPreview}) ${hits} times with identical arguments. The runtime did not re-run the tool; it is returning the prior result below so you can proceed without retrying.
551588
-
551589
- Progress is REQUIRED before this tool will run again with the same arguments. To proceed, do one of these:
551590
- file_write or file_edit to make progress, OR
551591
- todo_write that advances the plan, OR
551592
- task_complete (if all phases are done), OR
551593
- Call a different tool or use different arguments.]`;
551587
+ const cached = opts.cachedResult ? `
551588
+ Prior evidence preview:
551589
+ ${opts.cachedResult.slice(0, 700)}` : "";
551590
+ const source = opts.adversaryFlag ? "The adversary recognized this exact tool call as already observed earlier." : `This is exact repeat #${hits} for the same ${call.tool} arguments.`;
551591
+ return `[ADVERSARY GUIDANCE non-blocking]
551592
+ Observation: ${source}
551593
+ Call: ${call.tool}(${argPreview})
551594
+ Root cause hypothesis: the run is losing track of already-observed evidence, usually after path confusion, compaction, or an over-broad discovery loop.
551595
+ Corrective action: let this call's result inform the next step once, then pivot to a concrete action.
551596
+ Suggested next actions: edit/write the implicated file, run verification, read a different specific file, or complete with evidence. Prefer not to repeat this exact call again unless the filesystem, browser, or page state changed.${cached}`;
551594
551597
  }
551595
551598
  function buildCachedResultEnvelope(result) {
551596
- return `[CACHED RESULT — you already have this information from a prior identical call. Do NOT call this tool again with the same arguments.]
551599
+ return `[PRIOR RESULT — already observed by a prior identical call]
551597
551600
  ${result}`;
551598
551601
  }
551599
551602
  function evaluate2(inputs) {
551600
- const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, observerRedundantBlock } = inputs;
551601
- if (observerRedundantBlock) {
551603
+ const { proposedCall, fingerprint, isReadLike, recentToolResults, dedupHitCount, adversaryRedundantSignal } = inputs;
551604
+ if (adversaryRedundantSignal) {
551602
551605
  const cached = recentToolResults.get(fingerprint);
551606
+ const cachedResult = cached ? buildCachedResultEnvelope(cached.result) : void 0;
551603
551607
  return {
551604
- decision: "observer_block",
551605
- reason: "Littleman observer flagged this fingerprint as redundant",
551606
- cachedResult: cached ? buildCachedResultEnvelope(cached.result) : null
551608
+ decision: "guidance",
551609
+ reason: "Adversary flagged this fingerprint as redundant",
551610
+ hitNumber: (dedupHitCount.get(fingerprint) ?? 0) + 1,
551611
+ guidanceMessage: buildCriticGuidanceMessage(proposedCall, (dedupHitCount.get(fingerprint) ?? 0) + 1, {
551612
+ cachedResult,
551613
+ adversaryFlag: true
551614
+ }),
551615
+ cachedResult,
551616
+ compacted: cached?.compacted
551607
551617
  };
551608
551618
  }
551609
551619
  const cacheEligible = isReadLike || proposedCall.tool === "shell";
@@ -551611,24 +551621,16 @@ function evaluate2(inputs) {
551611
551621
  const cached = recentToolResults.get(fingerprint);
551612
551622
  if (cached !== void 0) {
551613
551623
  const hits = (dedupHitCount.get(fingerprint) ?? 0) + 1;
551614
- const threshold = proposedCall.tool === "shell" ? SHELL_THRESHOLD : FS_THRESHOLD;
551615
- if (hits >= threshold) {
551616
- return {
551617
- decision: "force_progress_block",
551618
- reason: `${proposedCall.tool} fingerprint hit count ${hits} >= ${threshold}`,
551619
- hitNumber: hits,
551620
- blockMessage: buildForceProgressBlockMessage(proposedCall, hits),
551621
- cachedResult: buildCachedResultEnvelope(cached.result),
551622
- compacted: cached.compacted
551623
- };
551624
- }
551625
551624
  const cachedEnvelope = buildCachedResultEnvelope(cached.result);
551626
551625
  return {
551627
- decision: "serve_cached",
551628
- reason: cached.compacted ? "post-compaction cache re-serve" : `duplicate call #${hits} (still under ${threshold}-hit gate)`,
551626
+ decision: "guidance",
551627
+ reason: cached.compacted ? "post-compaction duplicate evidence" : `duplicate call #${hits}`,
551629
551628
  cachedResult: cachedEnvelope,
551630
551629
  compacted: cached.compacted,
551631
- hitNumber: hits
551630
+ hitNumber: hits,
551631
+ guidanceMessage: buildCriticGuidanceMessage(proposedCall, hits, {
551632
+ cachedResult: cachedEnvelope
551633
+ })
551632
551634
  };
551633
551635
  }
551634
551636
  }
@@ -551680,12 +551682,9 @@ function isStagnant(signals, opts) {
551680
551682
  return false;
551681
551683
  return signals.completedDelta <= 0 && signals.filesDelta < filesDeltaMin && signals.failureSum >= failureThreshold && signals.variantCount >= variantThreshold;
551682
551684
  }
551683
- var SHELL_THRESHOLD, FS_THRESHOLD;
551684
551685
  var init_critic = __esm({
551685
551686
  "packages/orchestrator/dist/critic.js"() {
551686
551687
  "use strict";
551687
- SHELL_THRESHOLD = 2;
551688
- FS_THRESHOLD = 3;
551689
551688
  }
551690
551689
  });
551691
551690
 
@@ -558656,8 +558655,8 @@ var init_agenticRunner = __esm({
558656
558655
  // WO-KG-15
558657
558656
  _retrievalContextCache = null;
558658
558657
  // WO-KG-15: cache per-run
558659
- // Observer world-model and cohort stats
558660
- _observerMode = "both";
558658
+ // Adversary world-model and cohort stats
558659
+ _adversaryMode = "both";
558661
558660
  _worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
558662
558661
  // REG-7-root: Track file writes since last todo_write call. When this
558663
558662
  // counter climbs without a todo update, the agent has likely batched
@@ -559006,6 +559005,8 @@ var init_agenticRunner = __esm({
559006
559005
  _sessionId = `session-${Date.now()}`;
559007
559006
  _workingDirectory = "";
559008
559007
  constructor(backend, options2) {
559008
+ const adversaryMode = options2?.adversaryMode ?? options2?.observerMode ?? "both";
559009
+ const disableAdversaryCritic = options2?.disableAdversaryCritic ?? options2?.disableStepCritic ?? false;
559009
559010
  this.backend = backend;
559010
559011
  this.options = {
559011
559012
  maxTurns: options2?.maxTurns ?? 60,
@@ -559029,19 +559030,23 @@ var init_agenticRunner = __esm({
559029
559030
  bruteForce: options2?.bruteForce ?? true,
559030
559031
  bruteForceMaxCycles: options2?.bruteForceMaxCycles ?? 100,
559031
559032
  allowTurnExtension: options2?.allowTurnExtension ?? true,
559033
+ completionProvenanceGuard: options2?.completionProvenanceGuard ?? true,
559034
+ disableAdversaryCritic,
559035
+ disableStepCritic: disableAdversaryCritic,
559032
559036
  modelTier: options2?.modelTier ?? "large",
559033
559037
  contextWindowSize: options2?.contextWindowSize ?? 0,
559034
559038
  personality: options2?.personality ?? PERSONALITY_PRESETS.balanced,
559035
559039
  personalityName: options2?.personalityName ?? "",
559036
559040
  finalVarResolver: options2?.finalVarResolver ?? void 0,
559037
- observerMode: options2?.observerMode ?? "both",
559041
+ adversaryMode,
559042
+ observerMode: adversaryMode,
559038
559043
  // Phase 4 — sub-agent isolation flag (defaults false). When true, this
559039
559044
  // runner skips cross-task handoff inheritance from the parent's
559040
559045
  // session.
559041
559046
  subAgent: options2?.subAgent ?? false,
559042
559047
  skipCrossTaskHandoff: options2?.skipCrossTaskHandoff ?? false
559043
559048
  };
559044
- this._observerMode = this.options.observerMode;
559049
+ this._adversaryMode = this.options.adversaryMode;
559045
559050
  }
559046
559051
  /** Update context window size (e.g. after querying Ollama /api/show) */
559047
559052
  setContextWindowSize(size) {
@@ -559049,7 +559054,10 @@ var init_agenticRunner = __esm({
559049
559054
  }
559050
559055
  /** Set the working directory for session checkpointing */
559051
559056
  setWorkingDirectory(dir) {
559052
- this._workingDirectory = dir;
559057
+ this._workingDirectory = _pathResolve(dir);
559058
+ }
559059
+ authoritativeWorkingDirectory() {
559060
+ return _pathResolve(this._workingDirectory || process.cwd());
559053
559061
  }
559054
559062
  /** State root for runner-owned memory/artifacts. Defaults to cwd/.omnius. */
559055
559063
  omniusStateDir() {
@@ -559822,7 +559830,7 @@ ${result.output ?? ""}`;
559822
559830
  * checklist via todo_write, and only then call task_complete.
559823
559831
  */
559824
559832
  /**
559825
- * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / observer
559833
+ * REG-39c: tag a SYNTHETIC failure (FORCED PROGRESS BLOCK / adversary
559826
559834
  * block / budget exhausted). These paths return early from
559827
559835
  * executeSingle BEFORE the main result-handling code, so the normal
559828
559836
  * MAST tagging miss them. This helper lets each return-early site
@@ -559898,6 +559906,198 @@ Do NOT call task_complete until all items are marked completed via todo_write.`;
559898
559906
  `Continue the work loop: inspect the failed evidence, make the smallest targeted fix, then rerun the relevant verification. Use the full available verification spectrum for the artifact you changed: static syntax, build/typecheck, tests, service startup, runtime logs, browser/page errors, console output, network failures, screenshots, accessibility/DOM state, and end-to-end user flow checks where applicable. The exact tools are stack-dependent; the standard is objective runtime evidence, not self-report.`
559899
559907
  ].join("\n");
559900
559908
  }
559909
+ buildMissionCompletionContract(task, context2) {
559910
+ if (process.env["OMNIUS_DISABLE_MISSION_COMPLETION_CONTRACT"] === "1")
559911
+ return "";
559912
+ const profile = this._inferCompletionProfile(`${task}
559913
+ ${context2 ?? ""}`);
559914
+ const requirements = [];
559915
+ if (profile.browser)
559916
+ requirements.push("browser/UI state must be proven by a post-action screenshot/DOM/observe_bundle pass");
559917
+ if (profile.desktop)
559918
+ requirements.push("desktop state must be proven by vision_action_loop observe or desktop_describe after the final action");
559919
+ if (profile.code)
559920
+ requirements.push("code/file changes must be proven by a relevant build/test/typecheck/runtime command after the last edit");
559921
+ if (profile.research)
559922
+ requirements.push("research/root-cause claims must cite concrete inspected files, commands, logs, or source artifacts");
559923
+ if (requirements.length === 0)
559924
+ requirements.push("final claims must name the concrete evidence used or state that the task required no external action");
559925
+ return [
559926
+ `[MISSION COMPLETION CONTRACT]`,
559927
+ `Current ask: ${task.slice(0, 500)}`,
559928
+ ``,
559929
+ `Before claiming success or calling task_complete, satisfy the mission-specific evidence requirements:`,
559930
+ ...requirements.map((line) => `- ${line}.`),
559931
+ ``,
559932
+ `The final task_complete summary for any action-heavy task must include a compact Provenance/Evidence note naming the validating tool output, command, screenshot, DOM state, file path, or blocker. Self-confidence is not evidence.`,
559933
+ `For browser/form/account/send flows: after the last click/type/navigate/submit action, capture a fresh browser observation and verify the visible final state before completion.`,
559934
+ `If completion is impossible, use a summary beginning BLOCKED: and name the exact blocker plus the evidence already collected.`
559935
+ ].join("\n");
559936
+ }
559937
+ _inferCompletionProfile(text) {
559938
+ const t2 = text.toLowerCase();
559939
+ const browser3 = /\b(browser|web\s*page|website|page|playwright|selenium|chromedriver|chrome|headless|gui|proton|login|captcha|form|account|compose|mail|submit|click|type|fill)\b/.test(t2);
559940
+ const desktop = /\b(desktop|screen|application|app\b|window|file manager|open a file|laptop|screenshot|vision_action_loop|desktop_describe|desktop_click)\b/.test(t2);
559941
+ const code8 = /\b(implement|fix|patch|refactor|rewrite|build|compile|typecheck|test suite|unit test|integration test|source file|codebase|package|typescript|javascript|python|rust|golang)\b/.test(t2);
559942
+ const research = /\b(discover|root cause|triage|deep dive|review|audit|investigate|prove|validate|forensics|diagnostic|failure mode)\b/.test(t2);
559943
+ const formLike = /\b(form|fill|submit|signup|sign up|login|log in|account|compose|send|sent|mail|captcha|checkout|payment|upload)\b/.test(t2);
559944
+ return { browser: browser3, desktop, code: code8, research, formLike };
559945
+ }
559946
+ _completionSummaryHasProvenance(summary) {
559947
+ return /\b(provenance|evidence|verified|validated|confirmed|observed|screenshot|dom|console|network|log|test|typecheck|build|passed|opened|sent|created|submitted|blocked)\b/i.test(summary);
559948
+ }
559949
+ _isBlockedCompletionSummary(summary) {
559950
+ return /^\s*(?:BLOCKED|PARTIAL|NO FILE CHANGES REQUIRED)\b/i.test(summary);
559951
+ }
559952
+ _browserActionKind(entry) {
559953
+ if (!/^(browser_action|playwright_browser|carbonyl_browser)$/.test(entry.name))
559954
+ return "other";
559955
+ const args = this._parseExactArgsKey(entry.argsKey);
559956
+ const action = String(args.get("action") ?? args.get("tool") ?? args.get("command") ?? "").toLowerCase();
559957
+ if (/^(screenshot|dom|dom_summary|observe|observe_bundle|page_errors|console_logs|network_log|accessibility|snapshot|state|url|title)$/.test(action)) {
559958
+ return "observe";
559959
+ }
559960
+ if (/^(navigate|click|click_xy|vision_click|visual_click|type|fill|press|select|submit|evaluate|scroll|scroll_up|scroll_down|back|forward)$/.test(action)) {
559961
+ return "state";
559962
+ }
559963
+ return entry.name === "carbonyl_browser" ? "state" : "other";
559964
+ }
559965
+ _desktopActionKind(entry) {
559966
+ if (/^(desktop_describe|screenshot)$/.test(entry.name))
559967
+ return "observe";
559968
+ if (entry.name === "desktop_click")
559969
+ return "state";
559970
+ if (entry.name !== "vision_action_loop")
559971
+ return "other";
559972
+ const args = this._parseExactArgsKey(entry.argsKey);
559973
+ const action = String(args.get("action") ?? "").toLowerCase();
559974
+ if (/^(observe|screenshot|describe|ocr|state)$/.test(action))
559975
+ return "observe";
559976
+ if (/^(click|type|key|press|open|focus|drag|scroll)$/.test(action))
559977
+ return "state";
559978
+ return "other";
559979
+ }
559980
+ _isVerificationShell(entry) {
559981
+ if (entry.name !== "shell" && entry.name !== "background_run")
559982
+ return false;
559983
+ if (entry.success !== true)
559984
+ return false;
559985
+ const args = this._parseExactArgsKey(entry.argsKey);
559986
+ const command = String(args.get("command") ?? args.get("cmd") ?? "").toLowerCase();
559987
+ return /\b(test|typecheck|check|build|compile|verify|lint|pytest|vitest|jest|playwright|cypress|tsc|cargo\s+test|go\s+test|npm\s+run|pnpm\s+run|yarn\s+run)\b/.test(command);
559988
+ }
559989
+ _evaluateCompletionProvenanceGate(input) {
559990
+ if (this.options.completionProvenanceGuard === false)
559991
+ return { proceed: true };
559992
+ if (process.env["OMNIUS_DISABLE_COMPLETION_PROVENANCE_GUARD"] === "1")
559993
+ return { proceed: true };
559994
+ const summary = input.summary || "";
559995
+ const blockedSummary = this._isBlockedCompletionSummary(summary);
559996
+ const profile = this._inferCompletionProfile(input.taskGoal);
559997
+ const log22 = input.toolCallLog.filter((entry) => entry.name !== "task_complete");
559998
+ const browserUsed = log22.some((entry) => /^(browser_action|playwright_browser|carbonyl_browser)$/.test(entry.name));
559999
+ const desktopUsed = log22.some((entry) => /^(desktop_describe|desktop_click|vision_action_loop|screenshot)$/.test(entry.name));
560000
+ const mutated = log22.some((entry) => entry.mutated === true);
560001
+ const issues = [];
560002
+ const actionHeavy = profile.browser || profile.desktop || profile.code || profile.research || browserUsed || desktopUsed || mutated || this._fileWritesThisRun > 0;
560003
+ if (!actionHeavy)
560004
+ return { proceed: true };
560005
+ if (blockedSummary)
560006
+ return { proceed: true };
560007
+ const successfulNonCompletion = log22.filter((entry) => entry.success === true);
560008
+ if (successfulNonCompletion.length === 0) {
560009
+ issues.push("No successful objective tool result is recorded for this action-oriented task.");
560010
+ }
560011
+ const requiresBrowser = profile.browser || browserUsed;
560012
+ const requiresDesktop = profile.desktop || desktopUsed;
560013
+ if (requiresBrowser) {
560014
+ let lastStateIdx = -1;
560015
+ let lastObserveIdx = -1;
560016
+ log22.forEach((entry, idx) => {
560017
+ if (entry.success !== true)
560018
+ return;
560019
+ const kind = this._browserActionKind(entry);
560020
+ if (kind === "state")
560021
+ lastStateIdx = idx;
560022
+ if (kind === "observe")
560023
+ lastObserveIdx = idx;
560024
+ });
560025
+ if (lastStateIdx >= 0 && lastObserveIdx <= lastStateIdx) {
560026
+ issues.push("Browser state changed after the last browser observation. Capture a fresh screenshot/DOM/observe_bundle after the final click/type/navigate/submit before completion.");
560027
+ } else if (profile.formLike && lastObserveIdx < 0) {
560028
+ issues.push("This looks like a form/account/send flow, but no successful post-action browser observation is recorded.");
560029
+ }
560030
+ }
560031
+ if (requiresDesktop) {
560032
+ let lastStateIdx = -1;
560033
+ let lastObserveIdx = -1;
560034
+ log22.forEach((entry, idx) => {
560035
+ if (entry.success !== true)
560036
+ return;
560037
+ const kind = this._desktopActionKind(entry);
560038
+ if (kind === "state")
560039
+ lastStateIdx = idx;
560040
+ if (kind === "observe")
560041
+ lastObserveIdx = idx;
560042
+ });
560043
+ if (lastStateIdx >= 0 && lastObserveIdx <= lastStateIdx) {
560044
+ issues.push("Desktop state changed after the last visual observation. Run vision_action_loop observe or desktop_describe after the final desktop action before completion.");
560045
+ }
560046
+ }
560047
+ const mutatedEntries = log22.map((entry, idx) => ({ entry, idx })).filter(({ entry }) => entry.mutated === true);
560048
+ if (profile.code || mutatedEntries.length > 0 || this._fileWritesThisRun > 0) {
560049
+ const lastMutationIdx = mutatedEntries.length > 0 ? Math.max(...mutatedEntries.map(({ idx }) => idx)) : -1;
560050
+ const verifiedAfterMutation = log22.some((entry, idx) => {
560051
+ if (lastMutationIdx >= 0 && idx <= lastMutationIdx)
560052
+ return false;
560053
+ if (this._isVerificationShell(entry))
560054
+ return true;
560055
+ if (entry.success === true && this._browserActionKind(entry) === "observe")
560056
+ return true;
560057
+ if (entry.success === true && this._desktopActionKind(entry) === "observe")
560058
+ return true;
560059
+ return false;
560060
+ });
560061
+ if (lastMutationIdx >= 0 && !verifiedAfterMutation) {
560062
+ issues.push("Files were changed but no successful verification command or runtime observation appears after the last mutation.");
560063
+ }
560064
+ }
560065
+ const lastNonCompletion = [...log22].reverse().find(Boolean);
560066
+ if (lastNonCompletion?.success === false) {
560067
+ issues.push(`The most recent non-completion tool result failed (${lastNonCompletion.name}); resolve or explicitly report BLOCKED before completing.`);
560068
+ }
560069
+ if (!this._completionSummaryHasProvenance(summary)) {
560070
+ issues.push("The completion summary does not include an explicit Evidence/Provenance note.");
560071
+ }
560072
+ if (issues.length === 0)
560073
+ return { proceed: true };
560074
+ const recentEvidence = successfulNonCompletion.slice(-6).map((entry) => {
560075
+ const preview = (entry.outputPreview ?? "").replace(/\s+/g, " ").slice(0, 160);
560076
+ return `- ${entry.name}(${entry.argsKey.slice(0, 120)})${preview ? ` -> ${preview}` : ""}`;
560077
+ }).join("\n");
560078
+ return {
560079
+ proceed: false,
560080
+ reason: issues[0] ?? "missing provenance",
560081
+ feedback: [
560082
+ `[COMPLETION PROVENANCE REQUIRED]`,
560083
+ ``,
560084
+ `You attempted to finish, but the completion claim is not yet proven against the current mission.`,
560085
+ ``,
560086
+ `Blocking issues:`,
560087
+ ...issues.map((issue, index) => `${index + 1}. ${issue}`),
560088
+ ``,
560089
+ recentEvidence ? `Recent successful evidence already available:
560090
+ ${recentEvidence}` : `Recent successful evidence already available: none recorded.`,
560091
+ ``,
560092
+ `Do the smallest missing verification step now. For browser/UI work, take a fresh screenshot/DOM/observe_bundle after the final action. For desktop work, run vision_action_loop observe or desktop_describe after the final action. For code/file changes, run the relevant build/test/typecheck/runtime check after the last edit.`,
560093
+ ``,
560094
+ `Only then call task_complete with this shape:`,
560095
+ `Summary: <what changed or what final state was reached>`,
560096
+ `Provenance: <tool/command/screenshot/DOM/log/file evidence proving it>`,
560097
+ `If impossible, call task_complete with summary starting BLOCKED: and name the blocker plus evidence.`
560098
+ ].join("\n")
560099
+ };
560100
+ }
559901
560101
  /**
559902
560102
  * REG-47: post-implementation backward-pass review.
559903
560103
  *
@@ -561174,7 +561374,7 @@ ${latest.output || ""}`.trim();
561174
561374
  }
561175
561375
  }
561176
561376
  const sections = [
561177
- "[KNOWLEDGE — cached tool results already known to the runtime. Do NOT re-call these tools with the same arguments:]"
561377
+ "[KNOWLEDGE — cached tool results already known to the runtime. Repeating an exact read/list/search/shell call is a wasted action and will be blocked or served from cache:]"
561178
561378
  ];
561179
561379
  if (compactedCount > 0) {
561180
561380
  sections.push(`Compacted cached entries still count as already-known results (${compactedCount}); an exact repeat will be served from cache or skipped, not produce new information.`);
@@ -561186,6 +561386,7 @@ ${latest.output || ""}`.trim();
561186
561386
  if (dirsListed.length > 0) {
561187
561387
  const unique2 = [...new Set(dirsListed)].slice(0, 15);
561188
561388
  sections.push(`Directories already listed (${unique2.length}): ${unique2.join(", ")}`);
561389
+ sections.push(`Do not call list_directory again on these exact directories unless you changed their contents. Use the listed child paths directly with file_read/edit/delegation.`);
561189
561390
  }
561190
561391
  if (searches.length > 0) {
561191
561392
  const unique2 = [...new Set(searches)].slice(0, 15);
@@ -561199,6 +561400,23 @@ ${latest.output || ""}`.trim();
561199
561400
  return null;
561200
561401
  return sections.join("\n");
561201
561402
  }
561403
+ _renderRuntimeRootBlock() {
561404
+ const authoritative = this.authoritativeWorkingDirectory();
561405
+ const proc = _pathResolve(process.cwd());
561406
+ const lines = [
561407
+ `[RUNTIME ROOT — authoritative]`,
561408
+ `Current working directory for this run: ${authoritative}`,
561409
+ `All relative file/tool paths resolve under this directory unless the tool call uses an absolute path.`,
561410
+ `Do not infer cwd from old tasks, shell transcripts, memory, or prior browser sessions.`
561411
+ ];
561412
+ if (proc !== authoritative) {
561413
+ lines.push(`Process cwd differs (${proc}); treat the run cwd above as authoritative for repo/project work.`);
561414
+ }
561415
+ if (this._worldFacts.lastCwd && this._worldFacts.lastCwd !== authoritative) {
561416
+ lines.push(`Last shell cd target was command-local only: ${this._worldFacts.lastCwd}. It does not change the run cwd.`);
561417
+ }
561418
+ return lines.join("\n");
561419
+ }
561202
561420
  _insertContextFrame(messages2, frame) {
561203
561421
  if (!frame)
561204
561422
  return;
@@ -561236,7 +561454,7 @@ ${latest.output || ""}`.trim();
561236
561454
  add2(this._activeContextItem("task_state", "todo-state", "turn.todos", "Todo state", input.todoBlock, 80));
561237
561455
  add2(this._activeContextItem("recent_failure", "recent-failures", "turn.failures", "Recent failures", input.failureBlock, 95));
561238
561456
  add2(this._activeContextItem("recent_failure", "write-churn", "turn.churn", "Write churn", input.churnBlock, 75));
561239
- add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 65));
561457
+ add2(this._activeContextItem("tool_cache", "tool-cache", "turn.tool-cache", "Tool cache", input.toolCacheBlock, 92));
561240
561458
  add2(this._activeContextItem("anchor", "anchors", "turn.anchors", "Relevant anchors", input.anchorsBlock, 50));
561241
561459
  add2(this._activeContextItem("environment", "environment", "turn.environment", "Environment", input.environmentBlock, 35));
561242
561460
  if (this._lastPprMemoryLines.length > 0) {
@@ -561491,7 +561709,10 @@ ${chunk.content}`, {
561491
561709
  async _buildTurnContextFrame(turn, messages2, recentToolResults, environmentBlock) {
561492
561710
  this._contextLedger.clearSources("turn.");
561493
561711
  this._contextLedger.prune(turn);
561494
- const goalBlock = this._taskState.goal ? `Active task: ${this._taskState.goal}` : null;
561712
+ const goalBlock = [
561713
+ this._renderRuntimeRootBlock(),
561714
+ this._taskState.goal ? `Active task: ${this._taskState.goal}` : null
561715
+ ].filter(Boolean).join("\n\n");
561495
561716
  const filesystemBlock = this._renderFilesystemStateBlock(turn);
561496
561717
  const todoBlock = this._renderTodoStateBlock(turn);
561497
561718
  const failureBlock = this._renderRecentFailuresBlock(turn);
@@ -561557,7 +561778,7 @@ ${this._lastPprMemoryLines.slice(0, 5).join("\n")}` : null;
561557
561778
  signalFromBlock("tool_cache", "turn.tool-cache", toolCacheBlock, {
561558
561779
  id: "tool-cache",
561559
561780
  dedupeKey: "turn.tool-cache",
561560
- priority: 65,
561781
+ priority: 92,
561561
561782
  createdTurn: turn,
561562
561783
  ttlTurns: 1
561563
561784
  }),
@@ -562409,8 +562630,8 @@ ${notice}`;
562409
562630
  const window2 = recentToolCalls.slice(-repetitionWindow);
562410
562631
  const uniqueKeys = new Set(window2.map((tc) => `${tc.name}:${tc.argsKey}`));
562411
562632
  const ratio = 1 - uniqueKeys.size / window2.length;
562412
- if (ratio > 0.4 && this._littlemanToolOutcomes.length >= 3) {
562413
- const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
562633
+ if (ratio > 0.4 && this._adversaryToolOutcomes.length >= 3) {
562634
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
562414
562635
  const uniquePreviews = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40)));
562415
562636
  if (uniquePreviews.size >= 3) {
562416
562637
  return Math.max(0, ratio - 0.4);
@@ -562508,6 +562729,9 @@ Respond with your assessment, then take action.`;
562508
562729
  this._lastActiveForgettingReport = null;
562509
562730
  this._lastContextConsolidationTurn = -1e3;
562510
562731
  this._contextFrameBuilder = new ContextFrameBuilder();
562732
+ if (!this._workingDirectory) {
562733
+ this._workingDirectory = _pathResolve(process.cwd());
562734
+ }
562511
562735
  if (!this.options.disablePersistentMemory && !this._memoryInitialized) {
562512
562736
  try {
562513
562737
  const path12 = await import("node:path");
@@ -562728,7 +562952,6 @@ Respond with your assessment, then take action.`;
562728
562952
  const contextComposition = await this.assembleContext(task, context2);
562729
562953
  const systemPrompt = contextComposition.assembled;
562730
562954
  this._contextTree = new ContextTree(`sys-${systemPrompt.length}`, cleanedTask.slice(0, 200));
562731
- this._phaseMessageStartIdx = 2;
562732
562955
  this.emit({
562733
562956
  type: "status",
562734
562957
  content: `Context assembled: ${contextComposition.sections.map((s2) => `${s2.label}(${s2.tokenEstimate}t)`).join(" + ")} = ~${contextComposition.totalTokenEstimate}t`,
@@ -562772,10 +562995,13 @@ TASK: ${scrubbedTask}` : scrubbedTask;
562772
562995
  }
562773
562996
  });
562774
562997
  }
562998
+ const missionCompletionContract = this.buildMissionCompletionContract(cleanedTask, context2);
562775
562999
  const messages2 = [
562776
563000
  { role: "system", content: systemPrompt },
563001
+ ...missionCompletionContract ? [{ role: "system", content: missionCompletionContract }] : [],
562777
563002
  { role: "user", content: userContent }
562778
563003
  ];
563004
+ this._phaseMessageStartIdx = messages2.length;
562779
563005
  if (process.env["OMNIUS_DISABLE_DECOMP1"] !== "1") {
562780
563006
  try {
562781
563007
  const _taskBodyForDecomp = typeof userContent === "string" ? userContent : "";
@@ -562939,10 +563165,10 @@ TASK: ${scrubbedTask}` : scrubbedTask;
562939
563165
  this._hookDenyHintCount = 0;
562940
563166
  this._selfConsistencyVotes = 0;
562941
563167
  this._retrievalContextCache = null;
562942
- this._observerMode = this.options.observerMode ?? "both";
563168
+ this._adversaryMode = this.options.adversaryMode ?? "both";
562943
563169
  this._worldFacts = { files: /* @__PURE__ */ new Map(), lastTest: {}, lastLists: /* @__PURE__ */ new Map() };
562944
563170
  this._argCohorts.clear();
562945
- this._littlemanRedundantBlocks.clear();
563171
+ this._adversaryRedundantSignals.clear();
562946
563172
  this._lastTodoWriteTurn = -1;
562947
563173
  this._lastTodoReminderTurn = -1;
562948
563174
  let pendingConstraintWarnings = [];
@@ -563033,6 +563259,54 @@ TASK: ${scrubbedTask}` : scrubbedTask;
563033
563259
  });
563034
563260
  return true;
563035
563261
  };
563262
+ const holdProvenanceTaskComplete = (args, turn) => {
563263
+ const proposedSummary = extractTaskCompleteSummary(args);
563264
+ const gate = this._evaluateCompletionProvenanceGate({
563265
+ summary: proposedSummary,
563266
+ taskGoal: cleanedTask,
563267
+ toolCallLog
563268
+ });
563269
+ if (gate.proceed)
563270
+ return false;
563271
+ messages2.push({
563272
+ role: "system",
563273
+ content: `${gate.feedback}
563274
+
563275
+ [ADVISORY ONLY] This critique does not block task_complete; use it to improve the next run or visible evidence if the task continues.`
563276
+ });
563277
+ this.emit({
563278
+ type: "status",
563279
+ content: `completion provenance critique emitted without blocking: ${gate.reason}`,
563280
+ turn,
563281
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
563282
+ });
563283
+ this.emit({
563284
+ type: "adversary_reaction",
563285
+ adversary: {
563286
+ class: "guidance",
563287
+ shortText: "Completion provenance critique emitted",
563288
+ confidence: 0.9,
563289
+ details: gate.reason
563290
+ },
563291
+ turn,
563292
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
563293
+ });
563294
+ return false;
563295
+ };
563296
+ const emitBackwardPassAdvisory = (feedback, turn) => {
563297
+ messages2.push({
563298
+ role: "system",
563299
+ content: `${feedback}
563300
+
563301
+ [ADVISORY ONLY] Backward-pass critique is non-blocking; do not treat this as a tool failure or completion refusal.`
563302
+ });
563303
+ this.emit({
563304
+ type: "status",
563305
+ content: "backward-pass critique emitted without blocking completion",
563306
+ turn,
563307
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
563308
+ });
563309
+ };
563036
563310
  const turnCap = this.options.maxTurns && this.options.maxTurns > 0 ? this.options.maxTurns : Number.MAX_SAFE_INTEGER;
563037
563311
  for (let turn = 0; turn < turnCap; turn++) {
563038
563312
  clearTurnState(this._appState);
@@ -564017,8 +564291,8 @@ ${_staleSamples.join("\n")}` : ``,
564017
564291
  nextSelfEval = now + selfEvalInterval;
564018
564292
  }
564019
564293
  const turnsRemaining = this.options.maxTurns - turn;
564020
- if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._littlemanToolOutcomes.length >= 2) {
564021
- const recentOutcomes = this._littlemanToolOutcomes.slice(-6);
564294
+ if (this.options.allowTurnExtension && turnsRemaining <= 3 && turnsRemaining > 0 && this._adversaryToolOutcomes.length >= 2) {
564295
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-6);
564022
564296
  const recentSuccesses = recentOutcomes.filter((o2) => o2.succeeded).length;
564023
564297
  const uniqueResults = new Set(recentOutcomes.map((o2) => o2.preview.slice(0, 40))).size;
564024
564298
  const isActive = recentSuccesses >= 2 && uniqueResults >= 2;
@@ -564027,16 +564301,16 @@ ${_staleSamples.join("\n")}` : ``,
564027
564301
  this.options.maxTurns += extension3;
564028
564302
  this.emit({
564029
564303
  type: "status",
564030
- content: `Littleman triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
564304
+ content: `Adversary triage: activity detected (${recentSuccesses} recent successes, ${uniqueResults} unique results) — extending turn limit by ${extension3} (now ${this.options.maxTurns})`,
564031
564305
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
564032
564306
  });
564033
564307
  const detailsLines = recentOutcomes.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`);
564034
564308
  this.emit({
564035
- type: "debug_littleman",
564309
+ type: "debug_adversary",
564036
564310
  turn,
564037
564311
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
564038
564312
  content: `Timeout triage: EXTENDED by ${extension3} turns (active session detected)`,
564039
- littlemanAction: {
564313
+ adversaryAction: {
564040
564314
  detection: "none",
564041
564315
  recentSuccesses,
564042
564316
  recentFailures: recentOutcomes.length - recentSuccesses,
@@ -564369,6 +564643,9 @@ ${memoryLines.join("\n")}`
564369
564643
  maxTokens: effectiveMaxTokens,
564370
564644
  timeoutMs: this.options.requestTimeoutMs
564371
564645
  };
564646
+ if ((this.options.contextWindowSize ?? 0) > 0) {
564647
+ chatRequest.numCtx = this.options.contextWindowSize;
564648
+ }
564372
564649
  if (this.options.memoryPrefix)
564373
564650
  chatRequest.memoryPrefix = this.options.memoryPrefix;
564374
564651
  if (this.options.memoryPrefixHash)
@@ -564410,7 +564687,7 @@ ${memoryLines.join("\n")}`
564410
564687
  compactionThreshold: limits.compactionThreshold,
564411
564688
  toolCallCount,
564412
564689
  keepRecent: limits.keepRecent,
564413
- littlemanOutcomes: this._littlemanToolOutcomes.length,
564690
+ adversaryOutcomes: this._adversaryToolOutcomes.length,
564414
564691
  headroom: limits.compactionThreshold - estTokens
564415
564692
  }
564416
564693
  });
@@ -564773,16 +565050,19 @@ ${memoryLines.join("\n")}`
564773
565050
  const cohort = this._argCohorts.get(cohortKey);
564774
565051
  if (cohort && cohort.failure >= 3 && cohort.success === 0) {
564775
565052
  this.emit({
564776
- type: "observer_reaction",
565053
+ type: "adversary_reaction",
564777
565054
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
564778
- observer: {
565055
+ adversary: {
564779
565056
  class: "arg_cohort_risk",
564780
565057
  shortText: `${tc.name} with similar args has failed ${cohort.failure}× recently`,
564781
565058
  confidence: 0.85
564782
565059
  }
564783
565060
  });
564784
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
564785
- this.pendingUserMessages.push(`⚠ ${tc.name} with similar arguments has failed ${cohort.failure}× recently. Try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
565061
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
565062
+ this.pendingUserMessages.push(`[ADVERSARY CRITIQUE non-blocking]
565063
+ Evidence: ${tc.name} with similar arguments has failed ${cohort.failure}× recently.
565064
+ Root cause hypothesis: the argument family may be wrong, a prerequisite may be missing, or the tool is being used before enough state is known.
565065
+ Corrective action: try a different approach first: read relevant files, adjust arguments, or verify prerequisites.`);
564786
565066
  }
564787
565067
  }
564788
565068
  if (this._errorPatterns.size > 0) {
@@ -565064,19 +565344,11 @@ ${memoryLines.join("\n")}`
565064
565344
  ].includes(tc.name);
565065
565345
  const isStatefulBrowserTool = this._isStatefulBrowserTool(tc.name);
565066
565346
  const isReadLike = !isStatefulBrowserTool && (baseIsReadLike || tc.name === "shell" && this._isShellCommandReadOnly(tc.arguments?.["command"] ?? tc.arguments?.["cmd"] ?? ""));
565067
- const observerRedundantBlock = this._littlemanRedundantBlocks.has(toolFingerprint);
565068
- if (observerRedundantBlock) {
565069
- this._littlemanRedundantBlocks.delete(toolFingerprint);
565347
+ const adversaryRedundantSignal = this._adversaryRedundantSignals.has(toolFingerprint);
565348
+ if (adversaryRedundantSignal) {
565349
+ this._adversaryRedundantSignals.delete(toolFingerprint);
565070
565350
  }
565071
- const markSyntheticToolLog = (outputPreview) => {
565072
- const lastLog = toolCallLog[_toolLogTailIdx];
565073
- if (!lastLog)
565074
- return;
565075
- lastLog.success = true;
565076
- lastLog.mutated = false;
565077
- lastLog.mutatedFiles = [];
565078
- lastLog.outputPreview = outputPreview.slice(0, 100);
565079
- };
565351
+ let criticGuidance = null;
565080
565352
  {
565081
565353
  const _reflStem = buildStem(tc.name, tc.arguments ?? {});
565082
565354
  if (!this._reflectionsInjectedThisTurn.has(_reflStem)) {
@@ -565118,7 +565390,10 @@ ${memoryLines.join("\n")}`
565118
565390
  }
565119
565391
  }
565120
565392
  }
565121
- const criticDecision = evaluate2({
565393
+ const criticDecision = this.options.disableAdversaryCritic === true ? {
565394
+ decision: "pass",
565395
+ reason: "adversary critic disabled for isolated evaluation"
565396
+ } : evaluate2({
565122
565397
  proposedCall: { tool: tc.name, args: tc.arguments ?? {} },
565123
565398
  fingerprint: toolFingerprint,
565124
565399
  isReadLike,
@@ -565132,116 +565407,33 @@ ${memoryLines.join("\n")}`
565132
565407
  stagnationSignals: null,
565133
565408
  // stagnation gate handled at top-of-turn
565134
565409
  stagnationGateActive: false,
565135
- observerRedundantBlock
565410
+ adversaryRedundantSignal
565136
565411
  });
565137
- if (criticDecision.decision === "observer_block") {
565138
- this.emit({
565139
- type: "tool_call",
565140
- toolName: tc.name,
565141
- toolArgs: tc.arguments,
565142
- turn,
565143
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565144
- });
565145
- const blockMsg = criticDecision.cachedResult ? `[BLOCKED — this tool+args already succeeded. Re-served from cache:]
565146
-
565147
- ${criticDecision.cachedResult.slice(0, 500)}` : `[BLOCKED — the observer confirmed this tool already succeeded with these arguments on a prior turn. Do NOT re-run. Use your prior findings to proceed.]`;
565148
- markSyntheticToolLog(blockMsg);
565149
- this.emit({
565150
- type: "tool_result",
565151
- toolName: tc.name,
565152
- success: true,
565153
- content: blockMsg.slice(0, 100),
565154
- turn,
565155
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565156
- });
565157
- this._tagSyntheticFailure({
565158
- mode: "step_repetition",
565159
- rationale: `observer-block on ${tc.name} fingerprint flagged redundant`
565160
- });
565161
- return { tc, output: blockMsg, success: true };
565162
- }
565163
- if (criticDecision.decision === "force_progress_block") {
565164
- dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
565165
- const _existingFp = recentToolResults.get(toolFingerprint);
565166
- if (_existingFp !== void 0) {
565167
- recentToolResults.delete(toolFingerprint);
565168
- recentToolResults.set(toolFingerprint, _existingFp);
565169
- }
565170
- this.emit({
565171
- type: "tool_call",
565172
- toolName: tc.name,
565173
- toolArgs: tc.arguments,
565174
- turn,
565175
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565176
- });
565177
- this.emit({
565178
- type: "tool_result",
565179
- toolName: tc.name,
565180
- success: true,
565181
- content: `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run; cached result returned.]`.slice(0, 120),
565182
- turn,
565183
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
565184
- });
565185
- this._tagSyntheticFailure({
565186
- mode: "step_repetition",
565187
- rationale: `force_progress_block on ${tc.name} after ${criticDecision.hitNumber} identical calls`
565188
- });
565189
- const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
565190
-
565191
- [GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
565192
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. Do not retry this exact call.]
565193
-
565194
- ` : `[SKIPPED DUPLICATE — exact ${tc.name} call not re-run. The cached result below is from the prior successful call. Do not retry this exact call.]
565195
-
565196
- `;
565197
- const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
565198
- ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
565199
- markSyntheticToolLog(`${criticDecision.blockMessage}
565200
-
565201
- ${truncatedCache}`);
565202
- return {
565203
- tc,
565204
- output: `${criticDecision.blockMessage}
565205
-
565206
- ${header}${truncatedCache}${generationCompletionHint}`,
565207
- success: true
565208
- };
565209
- }
565210
- if (criticDecision.decision === "serve_cached") {
565412
+ if (criticDecision.decision === "guidance") {
565211
565413
  dedupHitCount.set(toolFingerprint, criticDecision.hitNumber);
565212
565414
  const _existingFp = recentToolResults.get(toolFingerprint);
565213
565415
  if (_existingFp !== void 0) {
565214
565416
  recentToolResults.delete(toolFingerprint);
565215
565417
  recentToolResults.set(toolFingerprint, _existingFp);
565216
565418
  }
565419
+ criticGuidance = criticDecision.guidanceMessage;
565217
565420
  this.emit({
565218
- type: "tool_call",
565219
- toolName: tc.name,
565220
- toolArgs: tc.arguments,
565221
- turn,
565421
+ type: "adversary_reaction",
565422
+ adversary: {
565423
+ class: "guidance",
565424
+ shortText: `Adversary guidance for repeated ${tc.name} call`,
565425
+ confidence: 0.9,
565426
+ details: criticDecision.reason
565427
+ },
565222
565428
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
565223
565429
  });
565224
- const generationCompletionHint = isGenerationArtifactSuccess(tc.name, criticDecision.cachedResult) ? `
565225
-
565226
- [GENERATION ALREADY COMPLETE] This exact ${tc.name} call already succeeded. Do not call it again. Use the cached artifact/path above; if delivery is needed, send it, otherwise call task_complete.` : "";
565227
- const header = criticDecision.compacted ? `[RE-SERVED FROM CACHE — the original result was compacted from context. Here is the data again. No need to call this tool again.]
565228
-
565229
- ` : `[DUPLICATE CALL #${criticDecision.hitNumber} — you already called ${tc.name} with these exact arguments. The result is identical. Do NOT call this again. Use the data you already have to make progress. One more identical call will trigger a hard progress block.]
565230
-
565231
- `;
565232
- const truncatedCache = criticDecision.cachedResult.length > 500 ? criticDecision.cachedResult.slice(0, 500) + `
565233
- ... [${criticDecision.cachedResult.length - 500} chars omitted — same as before]` : criticDecision.cachedResult;
565234
- const dedupOutput = header + truncatedCache + generationCompletionHint;
565235
- markSyntheticToolLog(dedupOutput);
565236
565430
  this.emit({
565237
- type: "tool_result",
565431
+ type: "status",
565238
565432
  toolName: tc.name,
565239
- success: true,
565240
- content: header.slice(0, 100),
565433
+ content: `Adversary guidance emitted for ${tc.name}; tool call will still execute`,
565241
565434
  turn,
565242
565435
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
565243
565436
  });
565244
- return { tc, output: dedupOutput, success: true };
565245
565437
  }
565246
565438
  this.emit({
565247
565439
  type: "tool_call",
@@ -566242,6 +566434,11 @@ Respond with EXACTLY this structure before your next tool call:
566242
566434
  result = await this.offloadEmbeddedImageResult(result, tc.name, turn);
566243
566435
  }
566244
566436
  let output = this.normalizeToolOutput(result, tc.name, tc.arguments, turn);
566437
+ if (criticGuidance) {
566438
+ output += `
566439
+
566440
+ ${criticGuidance}`;
566441
+ }
566245
566442
  if (!result.success && (this.options.modelTier === "small" || this.options.modelTier === "medium")) {
566246
566443
  const recovery = this.buildRecoveryGuidance(tc.name, result.error ?? "", tc.arguments);
566247
566444
  if (recovery)
@@ -566251,6 +566448,13 @@ Respond with EXACTLY this structure before your next tool call:
566251
566448
  editFeedbackRequiredBeforeMoreEdits = this._buildBatchEditAtomicAbortGuidance(tc.arguments);
566252
566449
  this.pendingUserMessages.push(editFeedbackRequiredBeforeMoreEdits);
566253
566450
  }
566451
+ const currentLogEntry = toolCallLog[_toolLogTailIdx];
566452
+ if (currentLogEntry) {
566453
+ currentLogEntry.success = result.success;
566454
+ currentLogEntry.mutated = realFileMutation;
566455
+ currentLogEntry.mutatedFiles = realMutationPaths;
566456
+ currentLogEntry.outputPreview = (result.success ? result.llmContent ?? result.output ?? output : result.error ?? result.output ?? output).toString().slice(0, 500);
566457
+ }
566254
566458
  this.emit({
566255
566459
  type: "tool_result",
566256
566460
  toolName: tc.name,
@@ -566640,27 +566844,26 @@ ${sr.result.output}`;
566640
566844
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
566641
566845
  });
566642
566846
  } else {
566643
- if (holdNoProgressTaskComplete(matchTc.arguments, turn)) {
566847
+ if (holdNoProgressTaskComplete(matchTc.arguments, turn) || holdProvenanceTaskComplete(matchTc.arguments, turn)) {
566644
566848
  continue;
566645
566849
  }
566646
566850
  const _bp1 = await this._runBackwardPassReview(turn);
566647
566851
  if (_bp1 && !_bp1.proceed && _bp1.feedback) {
566648
- messages2.push({ role: "system", content: _bp1.feedback });
566649
- } else {
566650
- completed = true;
566651
- summary = extractTaskCompleteSummary(matchTc.arguments);
566652
- if (summary && !this._assistantTextEmitted) {
566653
- this.emit({
566654
- type: "assistant_text",
566655
- content: summary,
566656
- source: "task_complete_summary",
566657
- turn,
566658
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
566659
- });
566660
- this._assistantTextEmitted = true;
566661
- }
566662
- break;
566852
+ emitBackwardPassAdvisory(_bp1.feedback, turn);
566853
+ }
566854
+ completed = true;
566855
+ summary = extractTaskCompleteSummary(matchTc.arguments);
566856
+ if (summary && !this._assistantTextEmitted) {
566857
+ this.emit({
566858
+ type: "assistant_text",
566859
+ content: summary,
566860
+ source: "task_complete_summary",
566861
+ turn,
566862
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
566863
+ });
566864
+ this._assistantTextEmitted = true;
566663
566865
  }
566866
+ break;
566664
566867
  }
566665
566868
  }
566666
566869
  }
@@ -566696,27 +566899,26 @@ ${sr.result.output}`;
566696
566899
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
566697
566900
  });
566698
566901
  } else {
566699
- if (holdNoProgressTaskComplete(r2.tc.arguments, turn)) {
566902
+ if (holdNoProgressTaskComplete(r2.tc.arguments, turn) || holdProvenanceTaskComplete(r2.tc.arguments, turn)) {
566700
566903
  continue;
566701
566904
  }
566702
566905
  const _bp2 = await this._runBackwardPassReview(turn);
566703
566906
  if (_bp2 && !_bp2.proceed && _bp2.feedback) {
566704
- messages2.push({ role: "system", content: _bp2.feedback });
566705
- } else {
566706
- completed = true;
566707
- summary = extractTaskCompleteSummary(r2.tc.arguments);
566708
- if (summary && !this._assistantTextEmitted) {
566709
- this.emit({
566710
- type: "assistant_text",
566711
- content: summary,
566712
- source: "task_complete_summary",
566713
- turn,
566714
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
566715
- });
566716
- this._assistantTextEmitted = true;
566717
- }
566718
- break;
566907
+ emitBackwardPassAdvisory(_bp2.feedback, turn);
566908
+ }
566909
+ completed = true;
566910
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
566911
+ if (summary && !this._assistantTextEmitted) {
566912
+ this.emit({
566913
+ type: "assistant_text",
566914
+ content: summary,
566915
+ source: "task_complete_summary",
566916
+ turn,
566917
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
566918
+ });
566919
+ this._assistantTextEmitted = true;
566719
566920
  }
566921
+ break;
566720
566922
  }
566721
566923
  }
566722
566924
  }
@@ -566788,27 +566990,26 @@ ${sr.result.output}`;
566788
566990
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
566789
566991
  });
566790
566992
  } else {
566791
- if (holdNoProgressTaskComplete(r2.tc.arguments, turn)) {
566993
+ if (holdNoProgressTaskComplete(r2.tc.arguments, turn) || holdProvenanceTaskComplete(r2.tc.arguments, turn)) {
566792
566994
  continue;
566793
566995
  }
566794
566996
  const _bp3 = await this._runBackwardPassReview(turn);
566795
566997
  if (_bp3 && !_bp3.proceed && _bp3.feedback) {
566796
- messages2.push({ role: "system", content: _bp3.feedback });
566797
- } else {
566798
- completed = true;
566799
- summary = extractTaskCompleteSummary(r2.tc.arguments);
566800
- if (summary && !this._assistantTextEmitted) {
566801
- this.emit({
566802
- type: "assistant_text",
566803
- content: summary,
566804
- source: "task_complete_summary",
566805
- turn,
566806
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
566807
- });
566808
- this._assistantTextEmitted = true;
566809
- }
566810
- break;
566998
+ emitBackwardPassAdvisory(_bp3.feedback, turn);
566811
566999
  }
567000
+ completed = true;
567001
+ summary = extractTaskCompleteSummary(r2.tc.arguments);
567002
+ if (summary && !this._assistantTextEmitted) {
567003
+ this.emit({
567004
+ type: "assistant_text",
567005
+ content: summary,
567006
+ source: "task_complete_summary",
567007
+ turn,
567008
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
567009
+ });
567010
+ this._assistantTextEmitted = true;
567011
+ }
567012
+ break;
566812
567013
  }
566813
567014
  }
566814
567015
  }
@@ -566819,7 +567020,7 @@ ${sr.result.output}`;
566819
567020
  }
566820
567021
  if (completed)
566821
567022
  break;
566822
- this.littlemanObserve(messages2, turn);
567023
+ this.adversaryObserve(messages2, turn);
566823
567024
  const currentRepScore = this.detectRepetition(toolCallLog);
566824
567025
  if (currentRepScore > 0.4 && toolCallLog.length >= 4) {
566825
567026
  const { repetitionWindow } = this.contextLimits();
@@ -567006,13 +567207,17 @@ Call task_complete(summary="...") NOW with whatever you have.`
567006
567207
  });
567007
567208
  }
567008
567209
  if (/task.?complete|all tests pass/i.test(content)) {
567210
+ const completionArgs = { summary: content };
567211
+ if (holdNoProgressTaskComplete(completionArgs, turn) || holdProvenanceTaskComplete(completionArgs, turn)) {
567212
+ continue;
567213
+ }
567009
567214
  completed = true;
567010
567215
  summary = content;
567011
567216
  break;
567012
567217
  }
567013
567218
  if (isThinkOnly) {
567014
567219
  if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
567015
- const recentSuccesses = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567220
+ const recentSuccesses = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567016
567221
  const hasRecentSuccess = recentSuccesses.length > 0;
567017
567222
  const successHint = hasRecentSuccess ? `
567018
567223
 
@@ -567263,7 +567468,8 @@ ${this.options.maxTurns && this.options.maxTurns > 0 ? `You have ${this.options.
567263
567468
  tools: toolDefs,
567264
567469
  temperature: this.options.temperature,
567265
567470
  maxTokens: this.options.maxTokens,
567266
- timeoutMs: this.options.requestTimeoutMs
567471
+ timeoutMs: this.options.requestTimeoutMs,
567472
+ numCtx: this.options.contextWindowSize || void 0
567267
567473
  };
567268
567474
  let response;
567269
567475
  try {
@@ -567568,13 +567774,12 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
567568
567774
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
567569
567775
  });
567570
567776
  } else {
567571
- if (holdNoProgressTaskComplete(tc.arguments, turn)) {
567777
+ if (holdNoProgressTaskComplete(tc.arguments, turn) || holdProvenanceTaskComplete(tc.arguments, turn)) {
567572
567778
  continue;
567573
567779
  }
567574
567780
  const _bp4 = await this._runBackwardPassReview(turn);
567575
567781
  if (_bp4 && !_bp4.proceed && _bp4.feedback) {
567576
- messages2.push({ role: "system", content: _bp4.feedback });
567577
- continue;
567782
+ emitBackwardPassAdvisory(_bp4.feedback, turn);
567578
567783
  }
567579
567784
  completed = true;
567580
567785
  summary = extractTaskCompleteSummary(tc.arguments);
@@ -567621,22 +567826,9 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
567621
567826
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
567622
567827
  });
567623
567828
  } else {
567624
- if (process.env["OMNIUS_DISABLE_PROGRESS_GATES"] !== "1") {
567625
- const gate = computeNoProgressCompletionGate({
567626
- summary: content,
567627
- toolCallLog,
567628
- taskState: this._taskState
567629
- });
567630
- if (gate.shouldInject && gate.content) {
567631
- messages2.push({ role: "system", content: gate.content });
567632
- this.emit({
567633
- type: "status",
567634
- content: "text completion held: discovery happened but no deliverable or explicit blocker is recorded",
567635
- turn,
567636
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
567637
- });
567638
- continue;
567639
- }
567829
+ const completionArgs = { summary: content };
567830
+ if (holdNoProgressTaskComplete(completionArgs, turn) || holdProvenanceTaskComplete(completionArgs, turn)) {
567831
+ continue;
567640
567832
  }
567641
567833
  completed = true;
567642
567834
  summary = content;
@@ -567660,7 +567852,7 @@ Full content available via: repl_exec(code="data = retrieve('${handleId}')") or
567660
567852
  }
567661
567853
  if (isThinkOnlyBF) {
567662
567854
  if (consecutiveThinkOnly >= MAX_CONSECUTIVE_THINK_ONLY) {
567663
- const recentSucc = this._littlemanToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567855
+ const recentSucc = this._adversaryToolOutcomes.slice(-3).filter((o2) => o2.succeeded);
567664
567856
  const succHint = recentSucc.length > 0 ? "\n\nYour most recent tool calls SUCCEEDED. If the task is complete, call task_complete now with a summary." : "";
567665
567857
  messages2.push({
567666
567858
  role: "user",
@@ -569748,36 +569940,35 @@ ${newerSummary}`;
569748
569940
  ${trimmedNew}`;
569749
569941
  }
569750
569942
  // -------------------------------------------------------------------------
569751
- // Littleman Observer — parallel meta-analysis of the main loop
569943
+ // Adversary — parallel meta-analysis of the main loop
569752
569944
  // -------------------------------------------------------------------------
569753
- // Inspired by Hannover's fireCompanionObserver (src/buddy/observer.ts).
569754
569945
  // Runs after each tool turn to detect when the model has lost track of
569755
569946
  // what happened and inject corrections before the next inference.
569756
569947
  //
569757
569948
  // This is the architectural fix for the "I see both tools have been failing"
569758
569949
  // regression: instead of only fixing the data the model sees (mask/summary),
569759
569950
  // we add a second analysis path that catches mismatches in real-time.
569760
- /** Track recent tool outcomes for the littleman observer */
569761
- _littlemanToolOutcomes = [];
569762
- /** WO-FIX-C: Tool fingerprints the littleman has flagged as redundant.
569763
- * Checked in executeSingle to block re-execution and return cached data. */
569764
- _littlemanRedundantBlocks = /* @__PURE__ */ new Set();
569951
+ /** Track recent tool outcomes for the adversary */
569952
+ _adversaryToolOutcomes = [];
569953
+ /** WO-FIX-C: Tool fingerprints the adversary has flagged as redundant.
569954
+ * Checked in executeSingle to attach advisory guidance before dispatch. */
569955
+ _adversaryRedundantSignals = /* @__PURE__ */ new Set();
569765
569956
  /** Reflexion pattern: task-local failure-indexed reflection buffer.
569766
569957
  * Generates typed self-reflections on task failure and injects them
569767
569958
  * into the next attempt's context for active learning. */
569768
569959
  _reflectionBuffer = null;
569769
569960
  /**
569770
- * Littleman observer: post-turn meta-analysis.
569961
+ * Adversary: post-turn meta-analysis.
569771
569962
  *
569772
569963
  * Examines the last few messages looking for contradictions between
569773
569964
  * actual tool outcomes and the model's stated understanding. When it
569774
569965
  * detects the model claiming failure after success (or vice versa),
569775
- * it injects a corrective message.
569966
+ * it injects a corrective non-blocking critique.
569776
569967
  *
569777
569968
  * Also detects repeated actions — when the model re-does something
569778
- * that already succeeded, the littleman nudges it to move on.
569969
+ * that already succeeded, the adversary nudges it to move on.
569779
569970
  */
569780
- littlemanObserve(messages2, turn) {
569971
+ adversaryObserve(messages2, turn) {
569781
569972
  if (this.options.modelTier === "large")
569782
569973
  return;
569783
569974
  const recent = messages2.slice(-6);
@@ -569806,8 +569997,8 @@ ${trimmedNew}`;
569806
569997
  }
569807
569998
  const argsKey = toolArgs ? this._buildExactArgsKey(toolArgs) : void 0;
569808
569999
  const fingerprint = toolArgs ? this._buildToolFingerprint(toolName, toolArgs) : void 0;
569809
- if (!this._littlemanToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
569810
- this._littlemanToolOutcomes.push({
570000
+ if (!this._adversaryToolOutcomes.some((o2) => o2.turn === turn && o2.tool === toolName && o2.fingerprint === fingerprint)) {
570001
+ this._adversaryToolOutcomes.push({
569811
570002
  turn,
569812
570003
  tool: toolName,
569813
570004
  argsKey,
@@ -569818,27 +570009,47 @@ ${trimmedNew}`;
569818
570009
  }
569819
570010
  }
569820
570011
  }
569821
- while (this._littlemanToolOutcomes.length > 20)
569822
- this._littlemanToolOutcomes.shift();
570012
+ while (this._adversaryToolOutcomes.length > 20)
570013
+ this._adversaryToolOutcomes.shift();
569823
570014
  const emitReaction = (cls, shortText, confidence2, details2) => {
569824
570015
  this.emit({
569825
- type: "observer_reaction",
570016
+ type: "adversary_reaction",
569826
570017
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
569827
- observer: { class: cls, shortText, confidence: confidence2, details: details2 }
570018
+ adversary: { class: cls, shortText, confidence: confidence2, details: details2 }
569828
570019
  });
569829
570020
  };
570021
+ const buildAdversaryCritique = (input) => {
570022
+ const alternatives = input.alternatives && input.alternatives.length > 0 ? `
570023
+ Alternatives:
570024
+ ${input.alternatives.map((item) => `- ${item}`).join("\n")}` : "";
570025
+ return [
570026
+ `[ADVERSARY CRITIQUE — non-blocking]`,
570027
+ `Evidence: ${input.evidence}`,
570028
+ `Root cause hypothesis: ${input.hypothesis}`,
570029
+ `Corrective action: ${input.correctiveAction}${alternatives}`
570030
+ ].join("\n");
570031
+ };
569830
570032
  const lastAssistant = [...recent].reverse().find((m2) => m2.role === "assistant" && typeof m2.content === "string");
569831
570033
  if (lastAssistant && typeof lastAssistant.content === "string") {
569832
570034
  const text = lastAssistant.content.toLowerCase();
569833
570035
  const claimsFailure = /(?:fail|error|didn't work|not working|unable to|cannot|couldn't|both .* fail|tools? (?:have |has )?been fail)/i.test(text);
569834
570036
  if (claimsFailure) {
569835
- const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
570037
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
569836
570038
  const successes = recentOutcomes.filter((o2) => o2.succeeded);
569837
570039
  if (successes.length >= 1) {
569838
570040
  const successList = successes.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
569839
570041
  emitReaction("false_failure", `Claimed failure, but recent tools succeeded (${successes.length})`, 0.9, successList);
569840
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
569841
- this.pendingUserMessages.push(`⚠ Correction: recent tools DID succeed. Do not retry them. Successful results: ${successList}. Use them to advance the task.`);
570042
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570043
+ this.pendingUserMessages.push(buildAdversaryCritique({
570044
+ evidence: `Recent tools succeeded: ${successList}.`,
570045
+ hypothesis: "The main loop is interpreting uncertainty or partial progress as failure and may be about to discard usable evidence.",
570046
+ correctiveAction: "Use the successful results to advance the task, then verify the next concrete step.",
570047
+ alternatives: [
570048
+ "Edit or run the next verification step that follows from the successful output.",
570049
+ "Read a different targeted file if the successful result exposed a new path or symbol.",
570050
+ "Complete only if the successful output is sufficient evidence for the user's request."
570051
+ ]
570052
+ }));
569842
570053
  }
569843
570054
  this.emit({
569844
570055
  type: "status",
@@ -569852,47 +570063,67 @@ ${trimmedNew}`;
569852
570063
  const text = lastAssistant.content.toLowerCase();
569853
570064
  const claimsSuccess = /(done|fixed|success|passed|complete)/i.test(text);
569854
570065
  if (claimsSuccess) {
569855
- const recentOutcomes = this._littlemanToolOutcomes.slice(-4);
570066
+ const recentOutcomes = this._adversaryToolOutcomes.slice(-4);
569856
570067
  const failures = recentOutcomes.filter((o2) => !o2.succeeded);
569857
570068
  const successes = recentOutcomes.filter((o2) => o2.succeeded);
569858
570069
  if (failures.length > 0 && successes.length === 0) {
569859
570070
  const failList = failures.map((o2) => `${o2.tool}: ${o2.preview.slice(0, 60)}`).join("; ");
569860
570071
  emitReaction("false_success", `Claimed success, but recent tools failed (${failures.length})`, 0.9, failList);
569861
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
569862
- this.pendingUserMessages.push(`⚠ Your recent tools show errors (${failures.length}). Verify the last tool output and correct the issue before claiming success.`);
570072
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570073
+ this.pendingUserMessages.push(buildAdversaryCritique({
570074
+ evidence: `Recent tools show errors (${failures.length}): ${failList}.`,
570075
+ hypothesis: "The main loop is prematurely compressing intent into success language before the verifier produced evidence.",
570076
+ correctiveAction: "Inspect the failed output, identify the implicated path/symbol/command, and run one focused corrective step before claiming success.",
570077
+ alternatives: [
570078
+ "Read the smallest relevant source region around the failed symbol.",
570079
+ "Patch the implicated code or configuration.",
570080
+ "Run the same verifier only after a state-changing fix."
570081
+ ]
570082
+ }));
569863
570083
  }
569864
570084
  }
569865
570085
  }
569866
570086
  }
569867
- const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
569868
- for (const tc of lastToolCalls) {
569869
- const name10 = tc.function.name;
569870
- if (this._isStatefulBrowserTool(name10))
569871
- continue;
569872
- let args = {};
569873
- try {
569874
- args = JSON.parse(tc.function.arguments);
569875
- } catch {
569876
- }
569877
- const argsKey = this._buildExactArgsKey(args);
569878
- const fingerprint = this._buildToolFingerprint(name10, args);
569879
- const prior = this._littlemanToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
569880
- if (prior) {
569881
- this._littlemanRedundantBlocks.add(fingerprint);
569882
- emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
569883
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
569884
- this.pendingUserMessages.push(`⚠ You already ran ${name10} successfully on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Do NOT re-run it. Use the existing result and proceed.`);
570087
+ if (this.options.disableAdversaryCritic !== true) {
570088
+ const lastToolCalls = recent.filter((m2) => m2.role === "assistant" && m2.tool_calls?.length).flatMap((m2) => m2.tool_calls ?? []);
570089
+ for (const tc of lastToolCalls) {
570090
+ const name10 = tc.function.name;
570091
+ if (this._isStatefulBrowserTool(name10))
570092
+ continue;
570093
+ let args = {};
570094
+ try {
570095
+ args = JSON.parse(tc.function.arguments);
570096
+ } catch {
570097
+ }
570098
+ const argsKey = this._buildExactArgsKey(args);
570099
+ const fingerprint = this._buildToolFingerprint(name10, args);
570100
+ const prior = this._adversaryToolOutcomes.find((o2) => o2.succeeded && o2.tool === name10 && o2.fingerprint === fingerprint && o2.turn < turn);
570101
+ if (prior) {
570102
+ this._adversaryRedundantSignals.add(fingerprint);
570103
+ emitReaction("redundant_action", `Already ran ${name10} successfully on turn ${prior.turn}`, 0.8, prior.preview);
570104
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570105
+ this.pendingUserMessages.push(buildAdversaryCritique({
570106
+ evidence: `${name10} already succeeded on turn ${prior.turn} with exact arguments (${argsKey.slice(0, 120)}). Prior preview: ${prior.preview}`,
570107
+ hypothesis: "The main loop may have lost track of previously observed evidence because of context pressure, path confusion, or repeated discovery.",
570108
+ correctiveAction: "Let this duplicate run execute if needed, but treat the prior result as evidence and pivot afterward unless state has changed.",
570109
+ alternatives: [
570110
+ "Use the prior result to edit/write, verify, or finish with evidence.",
570111
+ "Read a different specific file or selector if the current evidence is insufficient.",
570112
+ "Repeat exact arguments only when filesystem, browser, or page state changed."
570113
+ ]
570114
+ }));
570115
+ }
570116
+ this.emit({
570117
+ type: "status",
570118
+ content: `\x1B[38;5;178m⚠ Adversary noted redundant ${name10} call (succeeded on turn ${prior.turn}); action remains allowed\x1B[0m`,
570119
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
570120
+ });
570121
+ break;
569885
570122
  }
569886
- this.emit({
569887
- type: "status",
569888
- content: `\x1B[38;5;178m⚠ Prevented redundant ${name10} call (succeeded on turn ${prior.turn})\x1B[0m`,
569889
- timestamp: (/* @__PURE__ */ new Date()).toISOString()
569890
- });
569891
- break;
569892
570123
  }
569893
570124
  }
569894
570125
  {
569895
- const recentCalls = this._littlemanToolOutcomes.slice(-5);
570126
+ const recentCalls = this._adversaryToolOutcomes.slice(-5);
569896
570127
  if (recentCalls.length >= 3) {
569897
570128
  let consecutiveShortResults = 0;
569898
570129
  for (let i2 = recentCalls.length - 1; i2 >= 0; i2--) {
@@ -569905,30 +570136,39 @@ ${trimmedNew}`;
569905
570136
  }
569906
570137
  if (consecutiveShortResults >= 3) {
569907
570138
  emitReaction("idle_think", `Consecutive output without input: ${consecutiveShortResults}`, 0.7);
569908
- if (this._observerMode === "skillcoach" || this._observerMode === "both") {
569909
- this.pendingUserMessages.push(`⚠ You have sent ${consecutiveShortResults} consecutive outputs without reading any input. Alternate: receive input, then respond. Call your input tool now.`);
570139
+ if (this._adversaryMode === "skillcoach" || this._adversaryMode === "both") {
570140
+ this.pendingUserMessages.push(buildAdversaryCritique({
570141
+ evidence: `${consecutiveShortResults} consecutive output-like calls occurred without an input-like observation.`,
570142
+ hypothesis: "The loop may be acting from stale state instead of re-observing the environment.",
570143
+ correctiveAction: "Take one input/observation step before another output step.",
570144
+ alternatives: [
570145
+ "Call the input/listen/poll tool for the current environment.",
570146
+ "Read the current UI/page state before clicking or typing again.",
570147
+ "If the task is already complete, finish with the concrete evidence already observed."
570148
+ ]
570149
+ }));
569910
570150
  }
569911
570151
  this.emit({
569912
570152
  type: "status",
569913
- content: `\x1B[38;5;178m⚠ Blocked runaway output (${consecutiveShortResults} consecutive sends without receive)\x1B[0m`,
570153
+ content: `\x1B[38;5;178m⚠ Adversary flagged runaway-output risk (${consecutiveShortResults} consecutive sends without receive); action remains allowed\x1B[0m`,
569914
570154
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
569915
570155
  });
569916
570156
  }
569917
570157
  }
569918
570158
  }
569919
- const succCount = this._littlemanToolOutcomes.filter((o2) => o2.succeeded).length;
569920
- const failCount = this._littlemanToolOutcomes.filter((o2) => !o2.succeeded).length;
569921
- const lastFour = this._littlemanToolOutcomes.slice(-4);
570159
+ const succCount = this._adversaryToolOutcomes.filter((o2) => o2.succeeded).length;
570160
+ const failCount = this._adversaryToolOutcomes.filter((o2) => !o2.succeeded).length;
570161
+ const lastFour = this._adversaryToolOutcomes.slice(-4);
569922
570162
  const details = [
569923
570163
  `Recent tool outcomes:`,
569924
570164
  ...lastFour.map((o2) => `- ${o2.tool}: ${o2.succeeded ? "OK" : "ERR"} — ${o2.preview}`)
569925
570165
  ].join("\n");
569926
570166
  this.emit({
569927
- type: "debug_littleman",
570167
+ type: "debug_adversary",
569928
570168
  turn,
569929
570169
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
569930
- content: `Littleman: ${this._littlemanToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
569931
- littlemanAction: {
570170
+ content: `Adversary: ${this._adversaryToolOutcomes.length} tracked outcomes (${succCount} ok, ${failCount} err)`,
570171
+ adversaryAction: {
569932
570172
  detection: "none",
569933
570173
  recentSuccesses: succCount,
569934
570174
  recentFailures: failCount,
@@ -650870,7 +651110,7 @@ ${conversationStream}`
650870
651110
  // off default rather than the global config's value.
650871
651111
  thinking: false,
650872
651112
  // Telegram sub-agent runs must be bounded. Brute-force re-engagement and
650873
- // the Littleman near-cap turn extension are appropriate for the full TUI
651113
+ // the Adversary near-cap turn extension are appropriate for the full TUI
650874
651114
  // session but cause Telegram to silently outgrow its nominal maxTurns,
650875
651115
  // which is how the Snow Crash PDF loop reached 60+ turns of self-talk.
650876
651116
  ...TELEGRAM_SUB_AGENT_BOUNDED_OPTIONS
@@ -683233,8 +683473,8 @@ ${entry.fullContent}`
683233
683473
  let streamTextBuffer = "";
683234
683474
  let lastAssistantText = "";
683235
683475
  let lastProvenancePath = null;
683236
- let showLittleman = false;
683237
- const littlemanBuffer = [];
683476
+ let showAdversary = false;
683477
+ const adversaryBuffer = [];
683238
683478
  const contentWrite = (fn) => {
683239
683479
  if (isNeovimActive()) {
683240
683480
  const origWrite = process.stdout.write;
@@ -683718,24 +683958,24 @@ ${entry.fullContent}`
683718
683958
  if (snap) {
683719
683959
  contentWrite(
683720
683960
  () => renderInfo(
683721
- `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | littleman: ${snap.littlemanOutcomes} tracked\x1B[0m`
683961
+ `\x1B[38;5;243m[ctx] ${snap.messageCount} msgs | ~${snap.estimatedTokens} tok | headroom: ${snap.headroom} | tools: ${snap.toolCallCount} | adversary: ${snap.adversaryOutcomes} tracked\x1B[0m`
683722
683962
  )
683723
683963
  );
683724
683964
  }
683725
683965
  }
683726
683966
  break;
683727
- case "debug_littleman":
683728
- if (event.littlemanAction) {
683729
- const lm = event.littlemanAction;
683967
+ case "debug_adversary":
683968
+ if (event.adversaryAction) {
683969
+ const lm = event.adversaryAction;
683730
683970
  if (lm.intervention) {
683731
683971
  const simple = `⚠ ${lm.intervention}`;
683732
683972
  contentWrite(() => renderInfo(simple));
683733
683973
  }
683734
683974
  if (lm.details) {
683735
- littlemanBuffer.push(lm.details);
683736
- if (littlemanBuffer.length > 50)
683737
- littlemanBuffer.splice(0, littlemanBuffer.length - 50);
683738
- if (showLittleman) {
683975
+ adversaryBuffer.push(lm.details);
683976
+ if (adversaryBuffer.length > 50)
683977
+ adversaryBuffer.splice(0, adversaryBuffer.length - 50);
683978
+ if (showAdversary) {
683739
683979
  const det = String(lm.details);
683740
683980
  contentWrite(() => {
683741
683981
  process.stdout.write(c3.dim(det) + "\n");
@@ -685477,8 +685717,8 @@ This is an independent background session started from /background.`
685477
685717
  origTtyWriteRef = null;
685478
685718
  statusBar.setNeovimFocusChecker(() => isNeovimFocused());
685479
685719
  let _escapeHandler = null;
685480
- let showLittleman = false;
685481
- const littlemanBuffer = [];
685720
+ let showAdversary = false;
685721
+ const adversaryBuffer = [];
685482
685722
  statusBar.hookDirectInput(
685483
685723
  rl,
685484
685724
  () => {
@@ -685511,26 +685751,26 @@ This is an independent background session started from /background.`
685511
685751
  }
685512
685752
  },
685513
685753
  () => {
685514
- showLittleman = !showLittleman;
685754
+ showAdversary = !showAdversary;
685515
685755
  if (statusBar.isActive) {
685516
685756
  try {
685517
685757
  statusBar.jumpToLive();
685518
685758
  } catch {
685519
685759
  }
685520
685760
  statusBar.beginContentWrite();
685521
- if (showLittleman) {
685522
- renderInfo("Littleman details: shown");
685523
- const dump = littlemanBuffer.slice(-10).join("\n");
685761
+ if (showAdversary) {
685762
+ renderInfo("Adversary details: shown");
685763
+ const dump = adversaryBuffer.slice(-10).join("\n");
685524
685764
  if (dump.trim()) {
685525
685765
  process.stdout.write(`
685526
- ${c3.dim("[littleman recap]")}
685766
+ ${c3.dim("[adversary recap]")}
685527
685767
  `);
685528
685768
  for (const line of dump.split("\n")) {
685529
685769
  process.stdout.write(" " + c3.dim(line) + "\n");
685530
685770
  }
685531
685771
  }
685532
685772
  } else {
685533
- renderInfo("Littleman details: hidden");
685773
+ renderInfo("Adversary details: hidden");
685534
685774
  }
685535
685775
  statusBar.endContentWrite();
685536
685776
  }