npm - @kimbho/kimbho-cli - Versions diffs - 0.1.28 → 0.1.30 - Mend

@kimbho/kimbho-cli 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.cjs CHANGED Viewed

@@ -12718,7 +12718,7 @@ function createCompletionRuntimeCommand(program2) {
 // package.json
 var package_default = {
   name: "@kimbho/kimbho-cli",
-  version: "0.1.28",
+  version: "0.1.30",
   description: "Kimbho CLI is a terminal-native coding agent for planning, execution, and verification.",
   type: "module",
   engines: {
@@ -17627,6 +17627,44 @@ var RepoStrategySchema = external_exports.object({
   ]),
   reasoning: external_exports.string().min(1)
 });
+var TaskExecutionPhaseSchema = external_exports.enum([
+  "survey",
+  "plan-edit",
+  "implement",
+  "verify",
+  "repair",
+  "finalize",
+  "escalate"
+]);
+var TaskVerifierStateSchema = external_exports.object({
+  availableCommands: external_exports.array(external_exports.string().min(1)).default([]),
+  preferredCommands: external_exports.array(external_exports.string().min(1)).default([]),
+  attemptedCommands: external_exports.array(external_exports.string().min(1)).default([]),
+  disabledCommands: external_exports.array(external_exports.string().min(1)).default([]),
+  successfulCommands: external_exports.array(external_exports.string().min(1)).default([]),
+  currentCommand: external_exports.string().min(1).optional(),
+  latestFailureSummary: external_exports.string().min(1).optional(),
+  latestFailureCommand: external_exports.string().min(1).optional(),
+  latestSuccessfulCommand: external_exports.string().min(1).optional(),
+  requiresInteractiveSetup: external_exports.boolean().default(false)
+});
+var TaskWorldModelSchema = external_exports.object({
+  phase: TaskExecutionPhaseSchema.default("survey"),
+  targetFiles: external_exports.array(external_exports.string().min(1)).default([]),
+  inspectedFiles: external_exports.array(external_exports.string().min(1)).default([]),
+  changedFiles: external_exports.array(external_exports.string().min(1)).default([]),
+  hypotheses: external_exports.array(external_exports.string().min(1)).default([]),
+  blockers: external_exports.array(external_exports.string().min(1)).default([]),
+  proofPending: external_exports.array(external_exports.string().min(1)).default([]),
+  proofSatisfied: external_exports.array(external_exports.string().min(1)).default([]),
+  recentActions: external_exports.array(external_exports.string().min(1)).default([]),
+  recentCommands: external_exports.array(external_exports.string().min(1)).default([]),
+  nextFocus: external_exports.string().min(1).optional(),
+  sourceEditCount: external_exports.number().int().nonnegative().default(0),
+  validationLoopCount: external_exports.number().int().nonnegative().default(0),
+  verifier: TaskVerifierStateSchema.default({}),
+  lastUpdatedAt: external_exports.string().datetime().optional()
+});
 var PlanTaskSchema = external_exports.object({
   id: external_exports.string().min(1),
   title: external_exports.string().min(1),
@@ -17662,7 +17700,8 @@ var PlanTaskSchema = external_exports.object({
   teamId: external_exports.string().min(1).optional(),
   teamMemberIds: external_exports.array(external_exports.string()).optional(),
   subagentLabel: external_exports.string().min(1).optional(),
-  subagentInstructions: external_exports.string().min(1).optional()
+  subagentInstructions: external_exports.string().min(1).optional(),
+  executionState: TaskWorldModelSchema.optional()
 });
 var PlanMilestoneSchema = external_exports.object({
   id: external_exports.string().min(1),
@@ -18150,6 +18189,103 @@ var LegacyKimbhoConfigSchema = external_exports.object({
     "next-prisma-postgres"
   ])
 });
+function uniqueModelIds(models) {
+  return Array.from(new Set(models.map((model) => model?.trim()).filter((model) => Boolean(model))));
+}
+function providerCandidateModels(provider) {
+  if (!provider) {
+    return [];
+  }
+  return uniqueModelIds([
+    provider.defaultModel,
+    ...provider.models
+  ]);
+}
+function estimateModelScale(model) {
+  const matches = Array.from(model.matchAll(/(\d+(?:\.\d+)?)b/gi));
+  if (matches.length === 0) {
+    return 0;
+  }
+  return Math.max(...matches.map((match) => Number.parseFloat(match[1] ?? "0")).filter((value) => Number.isFinite(value)));
+}
+function scoreModelForRole(model, role) {
+  const normalized = model.toLowerCase();
+  const scale = estimateModelScale(normalized);
+  let score = 0;
+  if (/gpt-5(?!.*mini)(?!.*nano)/i.test(normalized) || /\bgpt5\b/i.test(normalized)) {
+    score += 160;
+  }
+  if (/\bo3\b|\bo4\b|o4-mini-high/i.test(normalized)) {
+    score += 145;
+  }
+  if (/opus|sonnet|claude-4|claude-3\.7/i.test(normalized)) {
+    score += /opus|claude-4/i.test(normalized) ? 150 : 132;
+  }
+  if (/gpt-4\.1|gpt-4o|deepseek-r1|deepseek-v3|qwq/i.test(normalized)) {
+    score += 122;
+  }
+  if (/qwen.*(?:32b|35b|72b|110b|235b)|llama.*(?:70b|90b|405b)|mixtral/i.test(normalized)) {
+    score += 110;
+  }
+  if (/reason|thinking|r1|o[34]/i.test(normalized)) {
+    score += 20;
+  }
+  if (scale > 0) {
+    score += Math.min(scale, 120) * 0.8;
+  }
+  if (/mini|nano|flash|haiku|small|fast|instant|lite/i.test(normalized)) {
+    score -= 55;
+  }
+  if (/\b(?:3|7|8|9|14)b\b/i.test(normalized)) {
+    score -= 35;
+  }
+  if (/preview|experimental|beta/i.test(normalized)) {
+    score -= 6;
+  }
+  if (role === "fast") {
+    let fastBias = 0;
+    if (/mini|nano|flash|haiku|small|fast|instant|lite/i.test(normalized)) {
+      fastBias += 95;
+    }
+    if (/sonnet|gpt-4o-mini|gpt-5-mini|claude.*haiku/i.test(normalized)) {
+      fastBias += 60;
+    }
+    if (scale >= 32) {
+      fastBias -= 40;
+    } else if (scale > 0 && scale <= 16) {
+      fastBias += 24;
+    }
+    return fastBias + score * 0.2;
+  }
+  if (role === "reviewer") {
+    if (/reason|thinking|r1|\bo3\b|\bo4\b|opus/i.test(normalized)) {
+      score += 24;
+    }
+    if (/mini|flash|haiku/i.test(normalized)) {
+      score -= 10;
+    }
+  }
+  if (role === "planner") {
+    if (/reason|thinking|r1|\bo3\b|\bo4\b|sonnet|opus/i.test(normalized)) {
+      score += 18;
+    }
+  }
+  if (role === "coder") {
+    if (/sonnet|gpt-5|gpt-4\.1|deepseek|qwen|llama/i.test(normalized)) {
+      score += 16;
+    }
+  }
+  return score;
+}
+function pickPreferredProviderModel(provider, role) {
+  const candidates = providerCandidateModels(provider);
+  if (candidates.length === 0) {
+    return null;
+  }
+  return [
+    ...candidates
+  ].sort((left, right) => scoreModelForRole(right, role) - scoreModelForRole(left, role))[0] ?? null;
+}
 function createBrainCatalog(providerId, defaultModel, fastModel) {
   return {
     planner: {
@@ -18229,12 +18365,13 @@ function normalizeConfigInput(raw) {
   const legacy = LegacyKimbhoConfigSchema.safeParse(raw);
   if (legacy.success) {
     const provider = mapLegacyProviderToDefinition(legacy.data.provider);
-    const defaultModel = provider.defaultModel ?? "gpt-5";
+    const defaultModel = pickPreferredProviderModel(provider, "planner") ?? provider.defaultModel ?? "gpt-5";
+    const fastModel = pickPreferredProviderModel(provider, "fast") ?? defaultModel;
     return {
       providers: [
         provider
       ],
-      brains: createBrainCatalog(provider.id, defaultModel, defaultModel),
+      brains: createBrainCatalog(provider.id, defaultModel, fastModel),
       approvalMode: legacy.data.approvalMode,
       sandboxMode: legacy.data.sandboxMode,
       stackPresets: legacy.data.stackPresets,
@@ -18285,8 +18422,8 @@ function createDefaultConfig(options = {}) {
     baseUrl: "https://api.openai.com/v1",
     defaultModel: "gpt-5"
   });
-  const defaultModel = options.defaultModel ?? provider.defaultModel;
-  const fastModel = options.fastModel ?? defaultModel;
+  const defaultModel = options.defaultModel ?? pickPreferredProviderModel(provider, "planner") ?? provider.defaultModel;
+  const fastModel = options.fastModel ?? pickPreferredProviderModel(provider, "fast") ?? defaultModel;
   return KimbhoConfigSchema.parse({
     providers: [
       provider
@@ -18765,7 +18902,10 @@ function resolveBrainSettings(config2, role) {
 function resolveBrainModel(config2, role) {
   const settings = resolveBrainSettings(config2, role);
   const provider = findProviderById(config2, settings.providerId);
-  return settings.model ?? provider?.defaultModel ?? null;
+  if (settings.model) {
+    return settings.model;
+  }
+  return pickPreferredProviderModel(provider, role) ?? provider?.defaultModel ?? null;
 }
 // ../core/dist/session/store.js
@@ -32988,6 +33128,9 @@ function combinePositiveLimit(...values) {
   }
   return Math.min(...filtered);
 }
+function uniqueStrings2(values) {
+  return Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
+}
 function truncateForModel(value) {
   if (!value) {
     return value;
@@ -33077,6 +33220,18 @@ function isReadOnlyShellCommand2(command) {
     "git diff"
   ].some((prefix) => normalized === prefix || normalized.startsWith(prefix));
 }
+function isShellFileInspectionCommand(command) {
+  const normalized = command.trim().toLowerCase();
+  return [
+    "cat ",
+    "head ",
+    "tail ",
+    "wc ",
+    "sed ",
+    "more ",
+    "less "
+  ].some((prefix) => normalized === prefix || normalized.startsWith(prefix));
+}
 function isVerificationCommand(command) {
   const normalized = command.trim().toLowerCase();
   return [
@@ -33121,6 +33276,310 @@ function isVerificationAction(action) {
   const command = typeof action.input.command === "string" ? action.input.command : "";
   return command.length > 0 && isVerificationCommand(command);
 }
+function isRuntimeValidationAction(action) {
+  if (action.type !== "tool") {
+    return false;
+  }
+  if (isVerificationAction(action)) {
+    return true;
+  }
+  return [
+    "process.start",
+    "process.logs",
+    "process.stop",
+    "browser.open",
+    "browser.inspect",
+    "browser.click",
+    "browser.fill",
+    "browser.close",
+    "http.fetch"
+  ].includes(action.tool);
+}
+function isShellFileInspectionAction(action) {
+  if (action.type !== "tool" || action.tool !== "shell.exec") {
+    return false;
+  }
+  const command = typeof action.input.command === "string" ? action.input.command : "";
+  return isShellFileInspectionCommand(command);
+}
+function isRepoInspectionAction(action) {
+  if (action.type !== "tool") {
+    return false;
+  }
+  return [
+    "file.read",
+    "file.search",
+    "file.list",
+    "repo.index",
+    "repo.query",
+    "git.diff"
+  ].includes(action.tool);
+}
+function normalizeWorkspacePath(cwd, value) {
+  const normalized = value.replace(/\\/g, "/").trim();
+  if (normalized.length === 0) {
+    return normalized;
+  }
+  if (!import_node_path14.default.isAbsolute(normalized)) {
+    return normalized.replace(/^\.\//, "");
+  }
+  const relative = import_node_path14.default.relative(cwd, normalized).replace(/\\/g, "/");
+  return relative.length > 0 && !relative.startsWith("..") ? relative : normalized;
+}
+async function detectVerificationCommands(cwd) {
+  const commands = [];
+  const packagePath = import_node_path14.default.join(cwd, "package.json");
+  try {
+    await (0, import_promises14.access)(packagePath);
+    const raw = await (0, import_promises14.readFile)(packagePath, "utf8");
+    const parsed = JSON.parse(raw);
+    const scripts = parsed.scripts ?? {};
+    const packageManager = parsed.packageManager?.startsWith("pnpm") ? "pnpm" : parsed.packageManager?.startsWith("yarn") ? "yarn" : parsed.packageManager?.startsWith("bun") ? "bun" : "npm";
+    const renderRun = (script) => {
+      if (packageManager === "yarn") {
+        return `yarn ${script}`;
+      }
+      return `${packageManager} run ${script}`;
+    };
+    if (scripts.typecheck) {
+      commands.push(renderRun("typecheck"));
+    }
+    if (scripts.build) {
+      commands.push(renderRun("build"));
+    }
+    if (scripts.test) {
+      commands.push(renderRun("test"));
+    }
+    if (scripts.lint) {
+      commands.push(renderRun("lint"));
+    }
+  } catch {
+  }
+  if (commands.length === 0) {
+    try {
+      await (0, import_promises14.access)(import_node_path14.default.join(cwd, "tsconfig.json"));
+      commands.push("npx tsc --noEmit");
+    } catch {
+    }
+  }
+  const unique = uniqueStrings2(commands);
+  const preferred = [
+    ...unique.filter((command) => /typecheck|build|test/i.test(command)),
+    ...unique.filter((command) => /lint/i.test(command) && !/typecheck|build|test/i.test(command))
+  ];
+  return {
+    availableCommands: unique,
+    preferredCommands: uniqueStrings2(preferred)
+  };
+}
+function createInitialWorldModel(task, request, verifier) {
+  const phase = task.type === "verification" ? "verify" : task.type === "integration" || task.type === "documentation" ? "finalize" : request.workspaceState === "existing" ? "survey" : "implement";
+  return {
+    phase,
+    targetFiles: uniqueStrings2(task.filesLikelyTouched.map((filePath) => normalizeWorkspacePath(request.cwd, filePath))),
+    inspectedFiles: [],
+    changedFiles: [],
+    hypotheses: uniqueStrings2([
+      `Satisfy task acceptance criteria for ${task.id}.`,
+      task.description
+    ]),
+    blockers: [],
+    proofPending: uniqueStrings2(task.acceptanceCriteria),
+    proofSatisfied: [],
+    recentActions: [],
+    recentCommands: [],
+    nextFocus: phase === "verify" ? "Run the preferred verification path and capture proof." : "Inspect the likely source files and determine the minimal safe change.",
+    sourceEditCount: 0,
+    validationLoopCount: 0,
+    verifier: {
+      availableCommands: verifier.availableCommands,
+      preferredCommands: verifier.preferredCommands,
+      attemptedCommands: [],
+      disabledCommands: [],
+      successfulCommands: [],
+      requiresInteractiveSetup: false
+    },
+    lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+  };
+}
+function summarizeWorldModel(worldModel) {
+  return [
+    `phase: ${worldModel.phase}`,
+    `next focus: ${worldModel.nextFocus ?? "(unset)"}`,
+    `target files: ${worldModel.targetFiles.join(", ") || "(none)"}`,
+    `inspected files: ${worldModel.inspectedFiles.join(", ") || "(none)"}`,
+    `changed files: ${worldModel.changedFiles.join(", ") || "(none)"}`,
+    `hypotheses: ${worldModel.hypotheses.join(" | ") || "(none)"}`,
+    `blockers: ${worldModel.blockers.join(" | ") || "(none)"}`,
+    `proof pending: ${worldModel.proofPending.join(" | ") || "(none)"}`,
+    `proof satisfied: ${worldModel.proofSatisfied.join(" | ") || "(none)"}`,
+    `verifier preferred commands: ${worldModel.verifier.preferredCommands.join(", ") || "(none)"}`,
+    `verifier disabled commands: ${worldModel.verifier.disabledCommands.join(", ") || "(none)"}`,
+    `latest verifier failure: ${worldModel.verifier.latestFailureSummary ?? "(none)"}`,
+    `recent actions: ${worldModel.recentActions.join(" | ") || "(none)"}`
+  ].join("\n");
+}
+function recordWorldModelAction(worldModel, label, command) {
+  return {
+    ...worldModel,
+    recentActions: uniqueStrings2([
+      ...worldModel.recentActions.slice(-5),
+      label
+    ]).slice(-6),
+    recentCommands: command ? uniqueStrings2([
+      ...worldModel.recentCommands.slice(-5),
+      command
+    ]).slice(-6) : worldModel.recentCommands,
+    lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+  };
+}
+function scoreVerificationCommand(command, worldModel) {
+  const normalized = command.trim().toLowerCase();
+  let score = 0;
+  if (worldModel.phase === "repair" && worldModel.verifier.latestFailureCommand && normalized === worldModel.verifier.latestFailureCommand.trim().toLowerCase()) {
+    score += 80;
+  }
+  if (/typecheck|tsc --noemit|tsc\b/i.test(normalized)) {
+    score += worldModel.proofPending.some((item) => /typescript|compile|type/i.test(item)) ? 70 : 32;
+  }
+  if (/build/i.test(normalized)) {
+    score += worldModel.proofPending.some((item) => /build|compile|render|page|layout|responsive/i.test(item)) ? 68 : 34;
+  }
+  if (/test|vitest|jest/i.test(normalized)) {
+    score += worldModel.proofPending.some((item) => /test|behavior|logic|regression/i.test(item)) ? 74 : 38;
+  }
+  if (/lint|eslint/i.test(normalized)) {
+    score += worldModel.proofPending.some((item) => /lint|style|quality/i.test(item)) ? 54 : 18;
+  }
+  if (/dev|serve|start/i.test(normalized)) {
+    score -= 18;
+  }
+  if (worldModel.verifier.successfulCommands.includes(command)) {
+    score -= 12;
+  }
+  if (worldModel.verifier.disabledCommands.includes(command)) {
+    score -= 1e3;
+  }
+  return score;
+}
+function determineNextVerificationCommand(worldModel) {
+  const candidates = uniqueStrings2([
+    ...worldModel.phase === "repair" && worldModel.verifier.latestFailureCommand ? [
+      worldModel.verifier.latestFailureCommand
+    ] : [],
+    ...worldModel.verifier.preferredCommands,
+    ...worldModel.verifier.availableCommands
+  ]).filter((command) => !worldModel.verifier.disabledCommands.includes(command));
+  if (candidates.length === 0) {
+    return null;
+  }
+  return [
+    ...candidates
+  ].sort((left, right) => scoreVerificationCommand(right, worldModel) - scoreVerificationCommand(left, worldModel))[0] ?? null;
+}
+function updateProofStateForVerification(worldModel, command, success2) {
+  if (!command || !success2) {
+    return {
+      proofPending: worldModel.proofPending,
+      proofSatisfied: worldModel.proofSatisfied
+    };
+  }
+  const normalized = command.toLowerCase();
+  const satisfiedLabels = [];
+  let proofPending = [
+    ...worldModel.proofPending
+  ];
+  const satisfyMatching = (pattern, fallback) => {
+    const matching = proofPending.filter((item) => pattern.test(item));
+    if (matching.length > 0) {
+      satisfiedLabels.push(...matching);
+      proofPending = proofPending.filter((item) => !pattern.test(item));
+      return;
+    }
+    satisfiedLabels.push(fallback);
+  };
+  if (/typecheck|tsc --noemit|tsc\b/.test(normalized)) {
+    satisfyMatching(/typescript|compile|type/i, `Type safety verified via ${command}`);
+  }
+  if (/build/.test(normalized)) {
+    satisfyMatching(/build|compile|render|page|layout|responsive/i, `Build/render verification passed via ${command}`);
+  }
+  if (/test|vitest|jest/.test(normalized)) {
+    satisfyMatching(/test|behavior|logic|regression/i, `Behavior verified via ${command}`);
+  }
+  if (/lint|eslint/.test(normalized)) {
+    satisfyMatching(/lint|style|quality/i, `Code quality verified via ${command}`);
+  }
+  if (satisfiedLabels.length === 0) {
+    satisfiedLabels.push(`Verified via ${command}`);
+  }
+  return {
+    proofPending: uniqueStrings2(proofPending),
+    proofSatisfied: uniqueStrings2([
+      ...worldModel.proofSatisfied,
+      ...satisfiedLabels
+    ])
+  };
+}
+function derivePhaseGuidance(worldModel) {
+  switch (worldModel.phase) {
+    case "survey":
+      return [
+        "Survey likely source files and constraints before making a change.",
+        "Do not spend more than one baseline validation action before a real source edit lands."
+      ];
+    case "plan-edit":
+    case "implement":
+      return [
+        "Make the concrete source change now.",
+        "Use file.patch or file.write against the likely target files before further runtime validation."
+      ];
+    case "verify":
+      return [
+        `Use the strategic verifier next: ${determineNextVerificationCommand(worldModel) ?? "(choose the best non-interactive verifier)"}.`,
+        "Capture proof for the changed behavior before finishing."
+      ];
+    case "repair":
+      return [
+        `Repair the latest failing verifier before rerunning it: ${worldModel.verifier.latestFailureSummary ?? "(missing failure summary)"}.`,
+        "Inspect failure output, edit the relevant source, then rerun the strategic verifier."
+      ];
+    case "finalize":
+      return [
+        "Only finish once the key proof is captured and no blocker remains.",
+        "Use git.diff or one final verifier if you still need confirmation."
+      ];
+    case "escalate":
+      return [
+        "Summarize blockers precisely so the supervisor can reassign or replan."
+      ];
+  }
+}
+function deriveSupervisorHints(status, task, worldModel) {
+  const delegatedTask = Boolean(task.parentTaskId);
+  if (status === "completed" && delegatedTask) {
+    return {
+      shouldMergeDelegatedWork: true,
+      shouldReprioritize: true,
+      reason: `Delegated task ${task.id} completed; parent flow may be ready to merge or reprioritize.`
+    };
+  }
+  if (status === "blocked" || status === "handoff" || worldModel.phase === "escalate") {
+    return {
+      shouldReplan: true,
+      shouldReprioritize: delegatedTask,
+      reason: worldModel.blockers[0] ?? `Task ${task.id} needs supervisor intervention.`
+    };
+  }
+  if (delegatedTask && worldModel.phase === "repair") {
+    return {
+      shouldReplan: true,
+      shouldReprioritize: true,
+      reason: `Delegated task ${task.id} is in repair mode and may need reassignment or dependency changes.`
+    };
+  }
+  return void 0;
+}
 function extractShellCommand(action) {
   if (action.type !== "tool" || action.tool !== "shell.exec") {
     return "";
@@ -33139,7 +33598,7 @@ function isInteractiveVerificationSetupFailure(action, result) {
   ].join("\n").toLowerCase();
   return combined.includes("interactive eslint setup required") || combined.includes("command requires interactive input before it can continue") || command.includes("lint") && combined.includes("how would you like to configure eslint");
 }
-function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstructions) {
+function buildSystemPrompt(agent, task, request, allowedTools, worldModel, plan, extraInstructions) {
   const toolShape = allowedTools.join("|");
   const dependencyTasks = plan ? flattenPlanTasks(plan).filter((candidate) => task.dependsOn.includes(candidate.id)) : [];
   const completedTasks = plan ? flattenPlanTasks(plan).filter((candidate) => candidate.status === "completed" && candidate.id !== task.id) : [];
@@ -33153,6 +33612,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
     `Goal: ${request.goal}`,
     `Current task: ${task.id} - ${task.title}`,
     `Task description: ${task.description}`,
+    `Execution phase: ${worldModel.phase}`,
     `Acceptance criteria:`,
     ...task.acceptanceCriteria.map((item) => `- ${item}`),
     `Likely files: ${task.filesLikelyTouched.join(", ") || "(not specified)"}`,
@@ -33162,6 +33622,8 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
     `Completed tasks in this run: ${completedTasks.length > 0 ? completedTasks.slice(-4).map((candidate) => `${candidate.id}:${candidate.title}`).join(", ") : "(none yet)"}`,
     `Downstream tasks depending on this task: ${downstreamTasks.length > 0 ? downstreamTasks.slice(0, 4).map((candidate) => `${candidate.id}:${candidate.title}`).join(", ") : "(none)"}`,
     `Allowed tools: ${allowedTools.join(", ")}`,
+    `Task world-model:`,
+    summarizeWorldModel(worldModel),
     `Respond with exactly one JSON object and no markdown.`,
     `Tool action shape: {"type":"tool","tool":"${toolShape}","input":{...},"reason":"why this step matters"}`,
     `Finish shape: {"type":"finish","summary":"what was completed and verified"}`,
@@ -33170,6 +33632,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
     `- Use one action per response.`,
     `- Use file.list, file.search, repo.index, and repo.query to explore the workspace before editing.`,
     `- Prefer file.read before editing existing files.`,
+    `- For repo file inspection, prefer file.read, file.search, file.list, repo.query, and git.diff instead of shell.exec cat/head/tail/wc/sed.`,
     `- Use scaffold.generate when the task is clearly greenfield and a known preset fits better than improvising every file by hand.`,
     `- Use file.patch for existing files when possible; use file.write for new files or full replacements.`,
     `- Use git.diff to inspect the current patch after changes when helpful.`,
@@ -33184,6 +33647,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
     `- If a verification command asks for interactive setup or operator input, do not rerun it unchanged. Choose a different non-interactive verifier, or configure that verifier only if the task explicitly requires it.`,
     `- Do not claim success unless the task acceptance criteria are satisfied.`,
     `- If the task is underspecified, make a pragmatic implementation choice and continue.`,
+    ...derivePhaseGuidance(worldModel).map((rule) => `- ${rule}`),
     ...task.subagentInstructions ? [
       `Delegation instructions:`,
       task.subagentInstructions
@@ -33473,6 +33937,8 @@ var AutonomousTaskExecutor = class {
     const brain = await this.resolver.resolve(effectiveBrainRole);
     const allowedTools = resolvedExecutionPolicy.allowedTools;
     const actionSchema = createAgentActionSchema(allowedTools);
+    const verificationCommands = await detectVerificationCommands(request.cwd);
+    let worldModel = task.executionState ?? createInitialWorldModel(task, request, verificationCommands);
     const messages = [
       {
         role: "user",
@@ -33499,11 +33965,13 @@ var AutonomousTaskExecutor = class {
       }
     };
     let changedWorkspace = false;
+    let appliedSourceEdit = false;
     let verifiedAfterLatestChange = false;
     let repairRequiredBeforeVerification = false;
     let repairAppliedSinceFailure = false;
     let verificationFailures = 0;
     let lastVerificationFailure = null;
+    let preEditValidationActions = 0;
     let preservedMessageCount = messages.length;
     let compactedTranscriptEntries = 0;
     let compactionSummary = null;
@@ -33514,6 +33982,26 @@ var AutonomousTaskExecutor = class {
       inputTokens: 0,
       outputTokens: 0
     };
+    const snapshotWorldModel = () => ({
+      ...worldModel,
+      lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+    });
+    const makeOutcome = (status, summary, extra = {}) => {
+      const currentWorldModel = snapshotWorldModel();
+      const supervisorHints = deriveSupervisorHints(status, task, currentWorldModel);
+      return {
+        status,
+        summary,
+        toolResults,
+        artifacts: Array.from(artifacts),
+        usage: usageTotals,
+        worldModel: currentWorldModel,
+        ...supervisorHints ? {
+          supervisorHints
+        } : {},
+        ...extra
+      };
+    };
     const createToolExecutionContext = (step, approvalReason, operatorApproved = false) => ({
       cwd: request.cwd,
       ...options.signal ? {
@@ -33587,13 +34075,17 @@ var AutonomousTaskExecutor = class {
       });
       const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
       artifacts.add(transcriptPath2);
-      return {
-        status: "blocked",
-        summary,
-        toolResults,
-        artifacts: Array.from(artifacts),
-        usage: usageTotals
+      worldModel = {
+        ...worldModel,
+        phase: "escalate",
+        blockers: uniqueStrings2([
+          ...worldModel.blockers,
+          summary
+        ]),
+        nextFocus: "Supervisor should replan or reassign this task because a budget was exhausted.",
+        lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
       };
+      return makeOutcome("blocked", summary);
     };
     await emitProgress({
       type: "task-note",
@@ -33622,24 +34114,32 @@ var AutonomousTaskExecutor = class {
       });
       const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
       artifacts.add(transcriptPath2);
-      return {
-        status: "blocked",
-        summary,
-        toolResults,
-        artifacts: Array.from(artifacts),
-        usage: usageTotals
+      worldModel = {
+        ...worldModel,
+        phase: "escalate",
+        blockers: uniqueStrings2([
+          ...worldModel.blockers,
+          summary
+        ]),
+        nextFocus: "Supervisor should inspect why this task has no executable tools.",
+        lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
       };
+      return makeOutcome("blocked", summary);
     }
     if (resolvedApproval?.decision === "deny") {
       const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
       artifacts.add(transcriptPath2);
-      return {
-        status: "blocked",
-        summary: `Operator denied approval for ${resolvedApproval.approval.toolId} in ${task.id}.`,
-        toolResults,
-        artifacts: Array.from(artifacts),
-        usage: usageTotals
+      worldModel = {
+        ...worldModel,
+        phase: "escalate",
+        blockers: uniqueStrings2([
+          ...worldModel.blockers,
+          `Operator denied ${resolvedApproval.approval.toolId}.`
+        ]),
+        nextFocus: "Choose a safer path or wait for supervisor replanning after the denied action.",
+        lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
       };
+      return makeOutcome("blocked", `Operator denied approval for ${resolvedApproval.approval.toolId} in ${task.id}.`);
     }
     if (request.workspaceState === "existing") {
       const preflightResults = [];
@@ -33681,6 +34181,18 @@ var AutonomousTaskExecutor = class {
         for (const artifact of result.artifacts) {
           artifacts.add(artifact);
         }
+        if (toolId === "file.read" && result.success && typeof input.path === "string") {
+          worldModel = {
+            ...worldModel,
+            inspectedFiles: uniqueStrings2([
+              ...worldModel.inspectedFiles,
+              normalizeWorkspacePath(request.cwd, input.path)
+            ]),
+            phase: worldModel.phase === "survey" ? "plan-edit" : worldModel.phase,
+            nextFocus: worldModel.phase === "survey" ? "Make the concrete source change in the inspected target files." : worldModel.nextFocus,
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+          };
+        }
         transcript.push({
           step: 0,
           response: JSON.stringify({
@@ -33780,6 +34292,14 @@ ${truncateForModel(customAgentMemory)}`);
           ].join("\n\n")
         });
       }
+      if (request.workspaceState === "existing" && (task.type === "scaffold" || task.type === "implementation") && worldModel.phase === "survey") {
+        worldModel = {
+          ...worldModel,
+          phase: "plan-edit",
+          nextFocus: "Edit the likely source files before using more verification or browser/runtime checks.",
+          lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+        };
+      }
     }
     preservedMessageCount = messages.length;
     const runToolAction = async (parsedAction, step, operatorApproved = false) => {
@@ -33841,7 +34361,10 @@ ${truncateForModel(customAgentMemory)}`);
     const applyToolResult = async (parsedAction, result, step, transcriptEntry) => {
       const mutatingAction = isMutatingAction(parsedAction);
       const verificationAction = isVerificationAction(parsedAction);
+      const runtimeValidationAction = isRuntimeValidationAction(parsedAction);
+      const shellCommand = extractShellCommand(parsedAction);
       const interactiveVerificationSetupFailure = verificationAction && !result.success && isInteractiveVerificationSetupFailure(parsedAction, result);
+      worldModel = recordWorldModelAction(worldModel, parsedAction.type === "tool" ? `${parsedAction.tool}${shellCommand ? ` ${shellCommand}` : ""}` : parsedAction.type === "finish" ? "finish" : "block", shellCommand || void 0);
       if (mutatingAction && result.success) {
         changedWorkspace = true;
         verifiedAfterLatestChange = false;
@@ -33849,22 +34372,121 @@ ${truncateForModel(customAgentMemory)}`);
           repairAppliedSinceFailure = true;
         }
       }
+      if (parsedAction.tool === "file.write" || parsedAction.tool === "file.patch") {
+        if (result.success) {
+          appliedSourceEdit = true;
+          preEditValidationActions = 0;
+          worldModel = {
+            ...worldModel,
+            phase: "verify",
+            changedFiles: uniqueStrings2([
+              ...worldModel.changedFiles,
+              ...result.artifacts.map((artifact) => normalizeWorkspacePath(request.cwd, artifact)),
+              ...typeof parsedAction.input.path === "string" ? [
+                normalizeWorkspacePath(request.cwd, parsedAction.input.path)
+              ] : []
+            ]),
+            blockers: [],
+            sourceEditCount: worldModel.sourceEditCount + 1,
+            nextFocus: `Run the strategic verifier next: ${determineNextVerificationCommand(worldModel) ?? "choose the best non-interactive verifier"}.`,
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+          };
+        }
+      }
+      if (!appliedSourceEdit && runtimeValidationAction) {
+        preEditValidationActions += 1;
+        worldModel = {
+          ...worldModel,
+          validationLoopCount: worldModel.validationLoopCount + 1,
+          nextFocus: "Make a source edit before spending more time on runtime validation.",
+          lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+        };
+      }
+      if (parsedAction.tool === "file.read" && result.success && typeof parsedAction.input.path === "string") {
+        worldModel = {
+          ...worldModel,
+          inspectedFiles: uniqueStrings2([
+            ...worldModel.inspectedFiles,
+            normalizeWorkspacePath(request.cwd, parsedAction.input.path)
+          ]),
+          phase: worldModel.phase === "survey" ? "plan-edit" : worldModel.phase,
+          nextFocus: worldModel.phase === "survey" ? "Use the inspected files to make the concrete implementation change." : worldModel.nextFocus,
+          lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+        };
+      }
       if (verificationAction) {
         verifiedAfterLatestChange = result.success;
+        const attemptedCommand = shellCommand || (parsedAction.tool === "tests.run" ? "tests.run" : "");
+        const proofUpdate = updateProofStateForVerification(worldModel, attemptedCommand || void 0, result.success);
+        const verifier = {
+          ...worldModel.verifier,
+          attemptedCommands: attemptedCommand ? uniqueStrings2([
+            ...worldModel.verifier.attemptedCommands,
+            attemptedCommand
+          ]) : worldModel.verifier.attemptedCommands,
+          currentCommand: attemptedCommand || worldModel.verifier.currentCommand,
+          latestFailureSummary: result.success ? void 0 : result.summary,
+          latestFailureCommand: result.success ? void 0 : attemptedCommand || void 0,
+          latestSuccessfulCommand: result.success ? attemptedCommand || worldModel.verifier.latestSuccessfulCommand : worldModel.verifier.latestSuccessfulCommand,
+          successfulCommands: result.success && attemptedCommand ? uniqueStrings2([
+            ...worldModel.verifier.successfulCommands,
+            attemptedCommand
+          ]) : worldModel.verifier.successfulCommands,
+          disabledCommands: interactiveVerificationSetupFailure && attemptedCommand ? uniqueStrings2([
+            ...worldModel.verifier.disabledCommands,
+            attemptedCommand
+          ]) : worldModel.verifier.disabledCommands,
+          requiresInteractiveSetup: interactiveVerificationSetupFailure
+        };
+        worldModel = {
+          ...worldModel,
+          verifier,
+          lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+        };
         if (result.success) {
           verificationFailures = 0;
           repairRequiredBeforeVerification = false;
           repairAppliedSinceFailure = false;
           lastVerificationFailure = null;
+          worldModel = {
+            ...worldModel,
+            phase: "finalize",
+            validationLoopCount: 0,
+            proofPending: proofUpdate.proofPending,
+            proofSatisfied: proofUpdate.proofSatisfied,
+            nextFocus: "Review the diff and finish if the task acceptance criteria are satisfied.",
+            blockers: [],
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+          };
         } else if (interactiveVerificationSetupFailure) {
           repairRequiredBeforeVerification = false;
           repairAppliedSinceFailure = false;
           lastVerificationFailure = result;
+          worldModel = {
+            ...worldModel,
+            phase: appliedSourceEdit ? "verify" : "implement",
+            blockers: uniqueStrings2([
+              ...worldModel.blockers.filter((blocker) => !blocker.includes("interactive")),
+              result.summary
+            ]),
+            nextFocus: "Choose a different non-interactive verifier or configure the verifier only if the task truly requires it.",
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+          };
         } else {
           verificationFailures += 1;
           repairRequiredBeforeVerification = true;
           repairAppliedSinceFailure = false;
           lastVerificationFailure = result;
+          worldModel = {
+            ...worldModel,
+            phase: "repair",
+            blockers: uniqueStrings2([
+              ...worldModel.blockers,
+              result.summary
+            ]),
+            nextFocus: "Repair the latest failing verifier output before running verification again.",
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
+          };
         }
       }
       transcriptEntry.toolResult = result;
@@ -33925,21 +34547,21 @@ ${truncateForModel(customAgentMemory)}`);
           const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
           artifacts.add(transcriptPath2);
           if (task.agentRole !== "test-debugger") {
-            return {
-              status: "handoff",
-              summary: `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; handing off to test-debugger.`,
-              toolResults,
-              artifacts: Array.from(artifacts),
-              usage: usageTotals
+            worldModel = {
+              ...worldModel,
+              phase: "escalate",
+              nextFocus: "Escalate to test-debugger with the latest failing verifier context.",
+              lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
             };
+            return makeOutcome("handoff", `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; handing off to test-debugger.`);
           }
-          return {
-            status: "blocked",
-            summary: `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; repair budget exhausted.`,
-            toolResults,
-            artifacts: Array.from(artifacts),
-            usage: usageTotals
+          worldModel = {
+            ...worldModel,
+            phase: "escalate",
+            nextFocus: "Debugger repair budget is exhausted; supervisor must replan or accept the blocker.",
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
           };
+          return makeOutcome("blocked", `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; repair budget exhausted.`);
         }
       }
       messages.push({
@@ -33983,14 +34605,9 @@ ${truncateForModel(customAgentMemory)}`);
       if (execution.approvalRequest) {
         const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
         artifacts.add(transcriptPath2);
-        return {
-          status: "awaiting-approval",
-          summary: execution.approvalRequest.reason,
-          toolResults,
-          artifacts: Array.from(artifacts),
-          approvalRequest: execution.approvalRequest,
-          usage: usageTotals
-        };
+        return makeOutcome("awaiting-approval", execution.approvalRequest.reason, {
+          approvalRequest: execution.approvalRequest
+        });
       }
       if (!execution.result) {
         throw new Error("Approved action did not produce a tool result.");
@@ -34007,13 +34624,12 @@ ${truncateForModel(customAgentMemory)}`);
       } catch (error2) {
         const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
         artifacts.add(transcriptPath2);
-        return {
-          status: "paused",
-          summary: `Execution interrupted by operator during ${task.id}.`,
-          toolResults,
-          artifacts: Array.from(artifacts),
-          usage: usageTotals
+        worldModel = {
+          ...worldModel,
+          nextFocus: "Resume from the current execution state without redoing completed inspection or verification.",
+          lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
         };
+        return makeOutcome("paused", `Execution interrupted by operator during ${task.id}.`);
       }
       await compactExecutionContextIfNeeded(step);
       if (typeof maxModelCalls === "number" && usageTotals.modelCalls >= maxModelCalls) {
@@ -34036,7 +34652,7 @@ ${truncateForModel(customAgentMemory)}`);
         try {
           response = await brain.client.generateText({
             model: brain.model,
-            systemPrompt: buildSystemPrompt(effectiveAgent, task, request, allowedTools, options.plan, task.agentPromptPreamble ?? customOverlay?.promptPreamble),
+            systemPrompt: buildSystemPrompt(effectiveAgent, task, request, allowedTools, snapshotWorldModel(), options.plan, task.agentPromptPreamble ?? customOverlay?.promptPreamble),
             messages,
             responseFormat: "json_object",
             ...typeof brain.settings.temperature === "number" ? {
@@ -34054,24 +34670,27 @@ ${truncateForModel(customAgentMemory)}`);
           });
           const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
           artifacts.add(transcriptPath2);
-          return {
-            status: "blocked",
-            summary: `Model request failed before ${task.id} could choose a safe action: ${error2 instanceof Error ? error2.message : String(error2)}`,
-            toolResults,
-            artifacts: Array.from(artifacts),
-            usage: usageTotals
+          worldModel = {
+            ...worldModel,
+            phase: "escalate",
+            blockers: uniqueStrings2([
+              ...worldModel.blockers,
+              error2 instanceof Error ? error2.message : String(error2)
+            ]),
+            nextFocus: "Supervisor should inspect the model failure or switch to a healthier provider/model.",
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
           };
+          return makeOutcome("blocked", `Model request failed before ${task.id} could choose a safe action: ${error2 instanceof Error ? error2.message : String(error2)}`);
         }
         if (options.signal?.aborted) {
           const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
           artifacts.add(transcriptPath2);
-          return {
-            status: "paused",
-            summary: `Execution interrupted by operator during ${task.id}.`,
-            toolResults,
-            artifacts: Array.from(artifacts),
-            usage: usageTotals
+          worldModel = {
+            ...worldModel,
+            nextFocus: "Resume from the current execution state without redoing completed inspection or verification.",
+            lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
           };
+          return makeOutcome("paused", `Execution interrupted by operator during ${task.id}.`);
         }
         responseText = response.text;
         usageTotals.modelCalls += 1;
@@ -34134,13 +34753,17 @@ ${truncateForModel(customAgentMemory)}`);
               step,
               message: "Model stayed out of structured mode after multiple retries."
             });
-            return {
-              status: "blocked",
-              summary: `${parseSummary} The task stopped before a safe tool action could be chosen.`,
-              toolResults,
-              artifacts: Array.from(artifacts),
-              usage: usageTotals
+            worldModel = {
+              ...worldModel,
+              phase: "escalate",
+              blockers: uniqueStrings2([
+                ...worldModel.blockers,
+                parseSummary
+              ]),
+              nextFocus: "Supervisor should inspect the unstructured model output and replan or switch models.",
+              lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
             };
+            return makeOutcome("blocked", `${parseSummary} The task stopped before a safe tool action could be chosen.`);
           }
           await emitProgress({
             type: "task-note",
@@ -34174,10 +34797,53 @@ ${truncateForModel(customAgentMemory)}`);
         response: responseText,
         parsedAction
       };
+      const strategicVerifier = determineNextVerificationCommand(worldModel);
       messages.push({
         role: "assistant",
         content: JSON.stringify(parsedAction)
       });
+      if (worldModel.phase === "survey" && parsedAction.type === "tool" && !isRepoInspectionAction(parsedAction)) {
+        transcriptEntry.runtimeNote = "Rejected phase-inconsistent action because survey phase still requires repo inspection.";
+        transcript.push(transcriptEntry);
+        await emitProgress({
+          type: "task-note",
+          sessionId,
+          taskId: task.id,
+          agentRole: task.agentRole,
+          step,
+          message: "Survey phase requires file/repo inspection before implementation or verification."
+        });
+        messages.push({
+          role: "user",
+          content: [
+            "The current execution phase is survey.",
+            "Inspect likely source files and repository context first with file.read, file.search, file.list, repo.index, repo.query, or git.diff.",
+            "Do not jump to runtime validation or edits before you understand the target files."
+          ].join("\n")
+        });
+        continue;
+      }
+      if (worldModel.phase === "verify" && parsedAction.type === "tool" && !isVerificationAction(parsedAction) && !isRepoInspectionAction(parsedAction)) {
+        transcriptEntry.runtimeNote = "Rejected phase-inconsistent action because verify phase requires proof or diff review.";
+        transcript.push(transcriptEntry);
+        await emitProgress({
+          type: "task-note",
+          sessionId,
+          taskId: task.id,
+          agentRole: task.agentRole,
+          step,
+          message: strategicVerifier ? `Verify phase requires proof. Run ${strategicVerifier} or inspect the diff.` : "Verify phase requires proof. Use a non-interactive verifier or inspect the diff."
+        });
+        messages.push({
+          role: "user",
+          content: [
+            "The current execution phase is verify.",
+            strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run the best non-interactive verifier next.",
+            "Only use diff/inspection actions here if you still need proof context before finishing."
+          ].join("\n")
+        });
+        continue;
+      }
       if (parsedAction.type === "finish") {
         if (changedWorkspace && !verifiedAfterLatestChange) {
           transcriptEntry.runtimeNote = "Finish rejected because code changed without a successful verification step.";
@@ -34194,31 +34860,77 @@ ${truncateForModel(customAgentMemory)}`);
             role: "user",
             content: [
               "You tried to finish after making code changes without a successful verification step.",
-              "Run tests.run or a build/lint/test shell command, inspect failures, and only finish after verification passes."
+              strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run tests.run or a build/lint/test shell command, inspect failures, and only finish after verification passes."
+            ].join("\n")
+          });
+          continue;
+        }
+        if (worldModel.phase !== "finalize") {
+          transcriptEntry.runtimeNote = `Finish rejected because the task is still in ${worldModel.phase} phase.`;
+          transcript.push(transcriptEntry);
+          await emitProgress({
+            type: "task-note",
+            sessionId,
+            taskId: task.id,
+            agentRole: task.agentRole,
+            step,
+            message: `Finish rejected because the task is still in ${worldModel.phase} phase.`
+          });
+          messages.push({
+            role: "user",
+            content: [
+              `The task is still in ${worldModel.phase} phase.`,
+              ...derivePhaseGuidance(worldModel),
+              "Do the next strategic action instead of finishing early."
+            ].join("\n")
+          });
+          continue;
+        }
+        if (worldModel.proofPending.length > 0 && worldModel.proofSatisfied.length === 0) {
+          transcriptEntry.runtimeNote = "Finish rejected because the task still has pending proof and no satisfied verifier evidence.";
+          transcript.push(transcriptEntry);
+          await emitProgress({
+            type: "task-note",
+            sessionId,
+            taskId: task.id,
+            agentRole: task.agentRole,
+            step,
+            message: "Finish rejected because the task still has pending proof requirements."
+          });
+          messages.push({
+            role: "user",
+            content: [
+              "You still owe proof for this task before finishing.",
+              `Pending proof: ${worldModel.proofPending.join(" | ")}`,
+              strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run the best non-interactive verifier, then finish only after the proof is captured."
             ].join("\n")
           });
           continue;
         }
         const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
         artifacts.add(transcriptPath2);
-        return {
-          status: "completed",
-          summary: parsedAction.summary,
-          toolResults,
-          artifacts: Array.from(artifacts),
-          usage: usageTotals
+        worldModel = {
+          ...worldModel,
+          phase: "finalize",
+          nextFocus: "Task completed.",
+          lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
         };
+        return makeOutcome("completed", parsedAction.summary);
       }
       if (parsedAction.type === "block") {
         const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
         artifacts.add(transcriptPath2);
-        return {
-          status: "blocked",
-          summary: parsedAction.reason,
-          toolResults,
-          artifacts: Array.from(artifacts),
-          usage: usageTotals
+        worldModel = {
+          ...worldModel,
+          phase: "escalate",
+          blockers: uniqueStrings2([
+            ...worldModel.blockers,
+            parsedAction.reason
+          ]),
+          nextFocus: "Supervisor should inspect the reported blocker and adjust the task graph.",
+          lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
         };
+        return makeOutcome("blocked", parsedAction.reason);
       }
       if (!allowedTools.includes(parsedAction.tool)) {
         transcriptEntry.runtimeNote = `Rejected disallowed tool "${parsedAction.tool}" for ${agent.role}.`;
@@ -34263,6 +34975,67 @@ ${truncateForModel(customAgentMemory)}`);
         });
         continue;
       }
+      if (isVerificationAction(parsedAction) && strategicVerifier && extractShellCommand(parsedAction) && extractShellCommand(parsedAction) !== strategicVerifier) {
+        transcriptEntry.runtimeNote = "Verification command rejected because it does not match the current strategic verifier.";
+        transcript.push(transcriptEntry);
+        await emitProgress({
+          type: "task-note",
+          sessionId,
+          taskId: task.id,
+          agentRole: task.agentRole,
+          step,
+          message: `Verification should follow the strategic order. Prefer ${strategicVerifier} next.`
+        });
+        messages.push({
+          role: "user",
+          content: [
+            "Use the strategic verification path instead of picking a random verifier.",
+            `Preferred next verifier: ${strategicVerifier}`
+          ].join("\n")
+        });
+        continue;
+      }
+      if (allowedTools.includes("file.read") && isShellFileInspectionAction(parsedAction)) {
+        transcriptEntry.runtimeNote = "Rejected shell-based file inspection because file.read is available.";
+        transcript.push(transcriptEntry);
+        await emitProgress({
+          type: "task-note",
+          sessionId,
+          taskId: task.id,
+          agentRole: task.agentRole,
+          step,
+          message: "Use file.read or repo/file tools instead of cat/head/wc/sed shell commands for source inspection."
+        });
+        messages.push({
+          role: "user",
+          content: [
+            "Do not use shell.exec for simple repo file inspection when file.read is available.",
+            "Use file.read, file.search, file.list, repo.query, or git.diff instead."
+          ].join("\n")
+        });
+        continue;
+      }
+      if (request.workspaceState === "existing" && (task.type === "scaffold" || task.type === "implementation") && !appliedSourceEdit && isRuntimeValidationAction(parsedAction) && preEditValidationActions >= 1) {
+        transcriptEntry.runtimeNote = "Rejected repeated validation/runtime loop before any source edit.";
+        transcript.push(transcriptEntry);
+        await emitProgress({
+          type: "task-note",
+          sessionId,
+          taskId: task.id,
+          agentRole: task.agentRole,
+          step,
+          message: "Repeated build/dev/browser verification was stopped because no source edit has landed yet."
+        });
+        messages.push({
+          role: "user",
+          content: [
+            "You already used one baseline verification/runtime check before making a source edit.",
+            "Do not keep rerunning build, lint, dev server, browser, or HTTP checks unchanged.",
+            "Inspect likely source files, make a concrete edit with file.patch or file.write, and then validate again."
+          ].join("\n")
+        });
+        continue;
+      }
       const execution = await runToolAction(parsedAction, step);
       if (execution.budgetExceeded) {
         transcriptEntry.runtimeNote = execution.budgetExceeded;
@@ -34273,14 +35046,9 @@ ${truncateForModel(customAgentMemory)}`);
         transcriptEntry.runtimeNote = execution.approvalRequest.reason;
         const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
         artifacts.add(transcriptPath2);
-        return {
-          status: "awaiting-approval",
-          summary: execution.approvalRequest.reason,
-          toolResults,
-          artifacts: Array.from(artifacts),
-          approvalRequest: execution.approvalRequest,
-          usage: usageTotals
-        };
+        return makeOutcome("awaiting-approval", execution.approvalRequest.reason, {
+          approvalRequest: execution.approvalRequest
+        });
       }
       if (!execution.result) {
         throw new Error(`Tool ${parsedAction.tool} did not return a result.`);
@@ -34300,21 +35068,21 @@ ${truncateForModel(customAgentMemory)}`);
       message: task.agentRole !== "test-debugger" ? `Step budget exhausted after ${maxSteps} steps; handing task to test-debugger.` : `Step budget exhausted after ${maxSteps} steps; debugger escalation exhausted.`
     });
     if (task.agentRole !== "test-debugger") {
-      return {
-        status: "handoff",
-        summary: `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; handing off to test-debugger.`,
-        toolResults,
-        artifacts: Array.from(artifacts),
-        usage: usageTotals
+      worldModel = {
+        ...worldModel,
+        phase: "escalate",
+        nextFocus: "Hand off the task to test-debugger with the current world-model and transcript.",
+        lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
       };
+      return makeOutcome("handoff", `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; handing off to test-debugger.`);
     }
-    return {
-      status: "blocked",
-      summary: `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; debugger escalation exhausted.`,
-      toolResults,
-      artifacts: Array.from(artifacts),
-      usage: usageTotals
+    worldModel = {
+      ...worldModel,
+      phase: "escalate",
+      nextFocus: "Debugger escalation is exhausted; supervisor must replan or accept the blocker.",
+      lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
     };
+    return makeOutcome("blocked", `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; debugger escalation exhausted.`);
   }
 };
@@ -36678,6 +37446,15 @@ function updateTaskStatus(plan, taskId, status) {
     milestones
   };
 }
+function updateTaskExecutionState(plan, taskId, executionState) {
+  if (!executionState) {
+    return plan;
+  }
+  return replaceTask(plan, taskId, (task) => ({
+    ...task,
+    executionState
+  }));
+}
 function replaceTask(plan, taskId, mapper) {
   const milestones = plan.milestones.map((milestone) => ({
     ...milestone,
@@ -37112,6 +37889,44 @@ ${result.stderr.slice(0, 1e3)}`);
     return lines.join("\n");
   }).join("\n\n");
 }
+function renderTaskWorldModelContext(task, outcome) {
+  const worldModel = outcome.worldModel;
+  if (!worldModel) {
+    return [
+      `task: ${task.id}`,
+      `status: ${outcome.status}`,
+      `summary: ${outcome.summary}`
+    ].join("\n");
+  }
+  return [
+    `task: ${task.id}`,
+    `status: ${outcome.status}`,
+    `summary: ${outcome.summary}`,
+    `phase: ${worldModel.phase}`,
+    `next focus: ${worldModel.nextFocus ?? "(unset)"}`,
+    `target files: ${worldModel.targetFiles.join(", ") || "(none)"}`,
+    `inspected files: ${worldModel.inspectedFiles.join(", ") || "(none)"}`,
+    `changed files: ${worldModel.changedFiles.join(", ") || "(none)"}`,
+    `blockers: ${worldModel.blockers.join(" | ") || "(none)"}`,
+    `proof pending: ${worldModel.proofPending.join(" | ") || "(none)"}`,
+    `proof satisfied: ${worldModel.proofSatisfied.join(" | ") || "(none)"}`,
+    `recent actions: ${worldModel.recentActions.join(" | ") || "(none)"}`,
+    `preferred verifiers: ${worldModel.verifier.preferredCommands.join(", ") || "(none)"}`,
+    `latest verifier failure: ${worldModel.verifier.latestFailureSummary ?? "(none)"}`
+  ].join("\n");
+}
+function shouldRevisePlanAfterTaskOutcome(task, outcome) {
+  if (task.agentRole === "repo-analyst" || task.agentRole === "planner") {
+    return false;
+  }
+  if (outcome.supervisorHints?.shouldReplan || outcome.supervisorHints?.shouldReprioritize || outcome.supervisorHints?.shouldMergeDelegatedWork) {
+    return true;
+  }
+  if (outcome.status === "blocked" || outcome.status === "handoff") {
+    return true;
+  }
+  return Boolean(task.parentTaskId && outcome.worldModel && (outcome.worldModel.phase === "repair" || outcome.worldModel.phase === "finalize" || outcome.worldModel.blockers.length > 0));
+}
 function latestRepoAnalysisContext(events) {
   const event = [
     ...events
@@ -37549,7 +38364,13 @@ var ExecutionOrchestrator = class {
       let sawApprovalRequest = false;
       for (const batchResult of batchResults) {
         const { task: autoTask, outcome } = batchResult;
-        workingPlan = outcome.status === "handoff" ? replaceTask(workingPlan, autoTask.id, () => outcome.handoffTask ?? createDebuggerHandoffTask(autoTask, outcome, this.toolsForAgent("test-debugger").map((tool) => tool.id))) : updateTaskStatus(workingPlan, autoTask.id, outcome.status === "completed" ? "completed" : outcome.status === "blocked" ? "blocked" : "pending");
+        workingPlan = updateTaskExecutionState(workingPlan, autoTask.id, outcome.worldModel);
+        workingPlan = outcome.status === "handoff" ? replaceTask(workingPlan, autoTask.id, () => ({
+          ...outcome.handoffTask ?? createDebuggerHandoffTask(autoTask, outcome, this.toolsForAgent("test-debugger").map((tool) => tool.id)),
+          ...outcome.worldModel ? {
+            executionState: outcome.worldModel
+          } : {}
+        })) : updateTaskStatus(workingPlan, autoTask.id, outcome.status === "completed" ? "completed" : outcome.status === "blocked" ? "blocked" : "pending");
         if (autoTask.agentRole === "repo-analyst" && outcome.status === "completed") {
           const replanned = await this.maybeRevisePlanAfterRepoAnalysis(session.id, session.request, workingPlan, outcome, emitProgress);
           workingPlan = replanned.plan;
@@ -37562,6 +38383,16 @@ var ExecutionOrchestrator = class {
             });
           }
         }
+        const adapted = await this.maybeRevisePlanAfterTaskOutcome(session.id, session.request, workingPlan, autoTask, outcome, emitProgress);
+        workingPlan = adapted.plan;
+        if (adapted.note) {
+          notes = maybeAppendNote(notes, adapted.note);
+          await emitProgress({
+            type: "task-note",
+            sessionId: session.id,
+            message: adapted.note
+          });
+        }
         notes = maybeAppendNote(notes, outcome.summary);
         if (outcome.status === "awaiting-approval" && outcome.approvalRequest) {
           pendingApprovals.push(outcome.approvalRequest);
@@ -38141,6 +38972,12 @@ var ExecutionOrchestrator = class {
       ...outcome.usage ? {
         usage: outcome.usage
       } : {},
+      ...outcome.worldModel ? {
+        worldModel: outcome.worldModel
+      } : {},
+      ...outcome.supervisorHints ? {
+        supervisorHints: outcome.supervisorHints
+      } : {},
       ...integrated.integrationFailed && integrated.conflict && task.agentRole !== "integrator" ? {
         handoffTask: createIntegratorHandoffTask(task, outcome, this.toolsForAgent("integrator").map((tool) => tool.id), integrated.conflict)
       } : {}
@@ -38321,6 +39158,89 @@ var ExecutionOrchestrator = class {
       };
     }
   }
+  async maybeRevisePlanAfterTaskOutcome(sessionId, request, plan, task, outcome, emitProgress) {
+    if (!shouldRevisePlanAfterTaskOutcome(task, outcome)) {
+      return {
+        plan
+      };
+    }
+    const config2 = await loadConfig(request.cwd);
+    if (!config2) {
+      return {
+        plan
+      };
+    }
+    const revisionContext = [
+      "Task outcome context:",
+      renderTaskWorldModelContext(task, outcome),
+      "",
+      "Supervisor hints:",
+      outcome.supervisorHints ? [
+        `- should replan: ${outcome.supervisorHints.shouldReplan ? "yes" : "no"}`,
+        `- should reprioritize: ${outcome.supervisorHints.shouldReprioritize ? "yes" : "no"}`,
+        `- should merge delegated work: ${outcome.supervisorHints.shouldMergeDelegatedWork ? "yes" : "no"}`,
+        `- reason: ${outcome.supervisorHints.reason ?? "(none)"}`
+      ].join("\n") : "- none",
+      "",
+      "Tool evidence:",
+      renderPlannerReplanContext(outcome.toolResults),
+      "",
+      "Planner instructions:",
+      "- Reassign, merge, reorder, or split follow-up tasks if the task outcome suggests the current graph is suboptimal.",
+      "- Preserve completed work, but feel free to adapt pending task order, dependencies, agent roles, or milestone shape.",
+      "- Prefer making parallel work possible when blockers or delegated-worker outcomes reveal an opportunity."
+    ].join("\n");
+    try {
+      const brain = await new BrainResolver(config2, createDefaultBrainProviderRegistry(request.cwd)).resolve("planner");
+      const result = await revisePlanWithModel(request, plan, revisionContext, {
+        modelLabel: `${brain.provider.id}/${brain.model}`,
+        ...typeof brain.settings.temperature === "number" ? {
+          temperature: brain.settings.temperature
+        } : {},
+        ...typeof brain.settings.maxTokens === "number" ? {
+          maxTokens: brain.settings.maxTokens
+        } : {},
+        generate: async (input) => brain.client.generateText({
+          model: brain.model,
+          systemPrompt: [
+            brain.settings.promptPreamble,
+            input.systemPrompt
+          ].filter(Boolean).join("\n\n"),
+          userPrompt: input.userPrompt,
+          responseFormat: "json_object",
+          ...typeof input.temperature === "number" ? {
+            temperature: input.temperature
+          } : {},
+          ...typeof input.maxTokens === "number" ? {
+            maxTokens: input.maxTokens
+          } : {}
+        })
+      });
+      if (result.source === "model") {
+        return {
+          plan: result.plan,
+          note: `Planner brain adapted the active task graph after ${task.id} via ${brain.provider.id}/${brain.model}.`
+        };
+      }
+      return {
+        plan,
+        ...result.warning ? {
+          note: `${result.warning} Keeping the current task graph.`
+        } : {}
+      };
+    } catch (error2) {
+      if (emitProgress) {
+        await emitProgress({
+          type: "task-note",
+          sessionId,
+          message: `Planner task adaptation skipped: ${error2 instanceof Error ? error2.message : String(error2)}`
+        });
+      }
+      return {
+        plan
+      };
+    }
+  }
   async maybeExpandReadyTaskGraph(sessionId, request, plan, events, emitProgress) {
     const envelope = this.buildEnvelope(request, plan, sessionId);
     const candidate = envelope.readyTasks.find((task) => shouldDelegateTask(task, request));