@kimbho/kimbho-cli 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -12718,7 +12718,7 @@ function createCompletionRuntimeCommand(program2) {
12718
12718
  // package.json
12719
12719
  var package_default = {
12720
12720
  name: "@kimbho/kimbho-cli",
12721
- version: "0.1.28",
12721
+ version: "0.1.30",
12722
12722
  description: "Kimbho CLI is a terminal-native coding agent for planning, execution, and verification.",
12723
12723
  type: "module",
12724
12724
  engines: {
@@ -17627,6 +17627,44 @@ var RepoStrategySchema = external_exports.object({
17627
17627
  ]),
17628
17628
  reasoning: external_exports.string().min(1)
17629
17629
  });
17630
+ var TaskExecutionPhaseSchema = external_exports.enum([
17631
+ "survey",
17632
+ "plan-edit",
17633
+ "implement",
17634
+ "verify",
17635
+ "repair",
17636
+ "finalize",
17637
+ "escalate"
17638
+ ]);
17639
+ var TaskVerifierStateSchema = external_exports.object({
17640
+ availableCommands: external_exports.array(external_exports.string().min(1)).default([]),
17641
+ preferredCommands: external_exports.array(external_exports.string().min(1)).default([]),
17642
+ attemptedCommands: external_exports.array(external_exports.string().min(1)).default([]),
17643
+ disabledCommands: external_exports.array(external_exports.string().min(1)).default([]),
17644
+ successfulCommands: external_exports.array(external_exports.string().min(1)).default([]),
17645
+ currentCommand: external_exports.string().min(1).optional(),
17646
+ latestFailureSummary: external_exports.string().min(1).optional(),
17647
+ latestFailureCommand: external_exports.string().min(1).optional(),
17648
+ latestSuccessfulCommand: external_exports.string().min(1).optional(),
17649
+ requiresInteractiveSetup: external_exports.boolean().default(false)
17650
+ });
17651
+ var TaskWorldModelSchema = external_exports.object({
17652
+ phase: TaskExecutionPhaseSchema.default("survey"),
17653
+ targetFiles: external_exports.array(external_exports.string().min(1)).default([]),
17654
+ inspectedFiles: external_exports.array(external_exports.string().min(1)).default([]),
17655
+ changedFiles: external_exports.array(external_exports.string().min(1)).default([]),
17656
+ hypotheses: external_exports.array(external_exports.string().min(1)).default([]),
17657
+ blockers: external_exports.array(external_exports.string().min(1)).default([]),
17658
+ proofPending: external_exports.array(external_exports.string().min(1)).default([]),
17659
+ proofSatisfied: external_exports.array(external_exports.string().min(1)).default([]),
17660
+ recentActions: external_exports.array(external_exports.string().min(1)).default([]),
17661
+ recentCommands: external_exports.array(external_exports.string().min(1)).default([]),
17662
+ nextFocus: external_exports.string().min(1).optional(),
17663
+ sourceEditCount: external_exports.number().int().nonnegative().default(0),
17664
+ validationLoopCount: external_exports.number().int().nonnegative().default(0),
17665
+ verifier: TaskVerifierStateSchema.default({}),
17666
+ lastUpdatedAt: external_exports.string().datetime().optional()
17667
+ });
17630
17668
  var PlanTaskSchema = external_exports.object({
17631
17669
  id: external_exports.string().min(1),
17632
17670
  title: external_exports.string().min(1),
@@ -17662,7 +17700,8 @@ var PlanTaskSchema = external_exports.object({
17662
17700
  teamId: external_exports.string().min(1).optional(),
17663
17701
  teamMemberIds: external_exports.array(external_exports.string()).optional(),
17664
17702
  subagentLabel: external_exports.string().min(1).optional(),
17665
- subagentInstructions: external_exports.string().min(1).optional()
17703
+ subagentInstructions: external_exports.string().min(1).optional(),
17704
+ executionState: TaskWorldModelSchema.optional()
17666
17705
  });
17667
17706
  var PlanMilestoneSchema = external_exports.object({
17668
17707
  id: external_exports.string().min(1),
@@ -18150,6 +18189,103 @@ var LegacyKimbhoConfigSchema = external_exports.object({
18150
18189
  "next-prisma-postgres"
18151
18190
  ])
18152
18191
  });
18192
+ function uniqueModelIds(models) {
18193
+ return Array.from(new Set(models.map((model) => model?.trim()).filter((model) => Boolean(model))));
18194
+ }
18195
+ function providerCandidateModels(provider) {
18196
+ if (!provider) {
18197
+ return [];
18198
+ }
18199
+ return uniqueModelIds([
18200
+ provider.defaultModel,
18201
+ ...provider.models
18202
+ ]);
18203
+ }
18204
+ function estimateModelScale(model) {
18205
+ const matches = Array.from(model.matchAll(/(\d+(?:\.\d+)?)b/gi));
18206
+ if (matches.length === 0) {
18207
+ return 0;
18208
+ }
18209
+ return Math.max(...matches.map((match) => Number.parseFloat(match[1] ?? "0")).filter((value) => Number.isFinite(value)));
18210
+ }
18211
+ function scoreModelForRole(model, role) {
18212
+ const normalized = model.toLowerCase();
18213
+ const scale = estimateModelScale(normalized);
18214
+ let score = 0;
18215
+ if (/gpt-5(?!.*mini)(?!.*nano)/i.test(normalized) || /\bgpt5\b/i.test(normalized)) {
18216
+ score += 160;
18217
+ }
18218
+ if (/\bo3\b|\bo4\b|o4-mini-high/i.test(normalized)) {
18219
+ score += 145;
18220
+ }
18221
+ if (/opus|sonnet|claude-4|claude-3\.7/i.test(normalized)) {
18222
+ score += /opus|claude-4/i.test(normalized) ? 150 : 132;
18223
+ }
18224
+ if (/gpt-4\.1|gpt-4o|deepseek-r1|deepseek-v3|qwq/i.test(normalized)) {
18225
+ score += 122;
18226
+ }
18227
+ if (/qwen.*(?:32b|35b|72b|110b|235b)|llama.*(?:70b|90b|405b)|mixtral/i.test(normalized)) {
18228
+ score += 110;
18229
+ }
18230
+ if (/reason|thinking|r1|o[34]/i.test(normalized)) {
18231
+ score += 20;
18232
+ }
18233
+ if (scale > 0) {
18234
+ score += Math.min(scale, 120) * 0.8;
18235
+ }
18236
+ if (/mini|nano|flash|haiku|small|fast|instant|lite/i.test(normalized)) {
18237
+ score -= 55;
18238
+ }
18239
+ if (/\b(?:3|7|8|9|14)b\b/i.test(normalized)) {
18240
+ score -= 35;
18241
+ }
18242
+ if (/preview|experimental|beta/i.test(normalized)) {
18243
+ score -= 6;
18244
+ }
18245
+ if (role === "fast") {
18246
+ let fastBias = 0;
18247
+ if (/mini|nano|flash|haiku|small|fast|instant|lite/i.test(normalized)) {
18248
+ fastBias += 95;
18249
+ }
18250
+ if (/sonnet|gpt-4o-mini|gpt-5-mini|claude.*haiku/i.test(normalized)) {
18251
+ fastBias += 60;
18252
+ }
18253
+ if (scale >= 32) {
18254
+ fastBias -= 40;
18255
+ } else if (scale > 0 && scale <= 16) {
18256
+ fastBias += 24;
18257
+ }
18258
+ return fastBias + score * 0.2;
18259
+ }
18260
+ if (role === "reviewer") {
18261
+ if (/reason|thinking|r1|\bo3\b|\bo4\b|opus/i.test(normalized)) {
18262
+ score += 24;
18263
+ }
18264
+ if (/mini|flash|haiku/i.test(normalized)) {
18265
+ score -= 10;
18266
+ }
18267
+ }
18268
+ if (role === "planner") {
18269
+ if (/reason|thinking|r1|\bo3\b|\bo4\b|sonnet|opus/i.test(normalized)) {
18270
+ score += 18;
18271
+ }
18272
+ }
18273
+ if (role === "coder") {
18274
+ if (/sonnet|gpt-5|gpt-4\.1|deepseek|qwen|llama/i.test(normalized)) {
18275
+ score += 16;
18276
+ }
18277
+ }
18278
+ return score;
18279
+ }
18280
+ function pickPreferredProviderModel(provider, role) {
18281
+ const candidates = providerCandidateModels(provider);
18282
+ if (candidates.length === 0) {
18283
+ return null;
18284
+ }
18285
+ return [
18286
+ ...candidates
18287
+ ].sort((left, right) => scoreModelForRole(right, role) - scoreModelForRole(left, role))[0] ?? null;
18288
+ }
18153
18289
  function createBrainCatalog(providerId, defaultModel, fastModel) {
18154
18290
  return {
18155
18291
  planner: {
@@ -18229,12 +18365,13 @@ function normalizeConfigInput(raw) {
18229
18365
  const legacy = LegacyKimbhoConfigSchema.safeParse(raw);
18230
18366
  if (legacy.success) {
18231
18367
  const provider = mapLegacyProviderToDefinition(legacy.data.provider);
18232
- const defaultModel = provider.defaultModel ?? "gpt-5";
18368
+ const defaultModel = pickPreferredProviderModel(provider, "planner") ?? provider.defaultModel ?? "gpt-5";
18369
+ const fastModel = pickPreferredProviderModel(provider, "fast") ?? defaultModel;
18233
18370
  return {
18234
18371
  providers: [
18235
18372
  provider
18236
18373
  ],
18237
- brains: createBrainCatalog(provider.id, defaultModel, defaultModel),
18374
+ brains: createBrainCatalog(provider.id, defaultModel, fastModel),
18238
18375
  approvalMode: legacy.data.approvalMode,
18239
18376
  sandboxMode: legacy.data.sandboxMode,
18240
18377
  stackPresets: legacy.data.stackPresets,
@@ -18285,8 +18422,8 @@ function createDefaultConfig(options = {}) {
18285
18422
  baseUrl: "https://api.openai.com/v1",
18286
18423
  defaultModel: "gpt-5"
18287
18424
  });
18288
- const defaultModel = options.defaultModel ?? provider.defaultModel;
18289
- const fastModel = options.fastModel ?? defaultModel;
18425
+ const defaultModel = options.defaultModel ?? pickPreferredProviderModel(provider, "planner") ?? provider.defaultModel;
18426
+ const fastModel = options.fastModel ?? pickPreferredProviderModel(provider, "fast") ?? defaultModel;
18290
18427
  return KimbhoConfigSchema.parse({
18291
18428
  providers: [
18292
18429
  provider
@@ -18765,7 +18902,10 @@ function resolveBrainSettings(config2, role) {
18765
18902
  function resolveBrainModel(config2, role) {
18766
18903
  const settings = resolveBrainSettings(config2, role);
18767
18904
  const provider = findProviderById(config2, settings.providerId);
18768
- return settings.model ?? provider?.defaultModel ?? null;
18905
+ if (settings.model) {
18906
+ return settings.model;
18907
+ }
18908
+ return pickPreferredProviderModel(provider, role) ?? provider?.defaultModel ?? null;
18769
18909
  }
18770
18910
 
18771
18911
  // ../core/dist/session/store.js
@@ -32988,6 +33128,9 @@ function combinePositiveLimit(...values) {
32988
33128
  }
32989
33129
  return Math.min(...filtered);
32990
33130
  }
33131
+ function uniqueStrings2(values) {
33132
+ return Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
33133
+ }
32991
33134
  function truncateForModel(value) {
32992
33135
  if (!value) {
32993
33136
  return value;
@@ -33077,6 +33220,18 @@ function isReadOnlyShellCommand2(command) {
33077
33220
  "git diff"
33078
33221
  ].some((prefix) => normalized === prefix || normalized.startsWith(prefix));
33079
33222
  }
33223
+ function isShellFileInspectionCommand(command) {
33224
+ const normalized = command.trim().toLowerCase();
33225
+ return [
33226
+ "cat ",
33227
+ "head ",
33228
+ "tail ",
33229
+ "wc ",
33230
+ "sed ",
33231
+ "more ",
33232
+ "less "
33233
+ ].some((prefix) => normalized === prefix || normalized.startsWith(prefix));
33234
+ }
33080
33235
  function isVerificationCommand(command) {
33081
33236
  const normalized = command.trim().toLowerCase();
33082
33237
  return [
@@ -33121,6 +33276,310 @@ function isVerificationAction(action) {
33121
33276
  const command = typeof action.input.command === "string" ? action.input.command : "";
33122
33277
  return command.length > 0 && isVerificationCommand(command);
33123
33278
  }
33279
+ function isRuntimeValidationAction(action) {
33280
+ if (action.type !== "tool") {
33281
+ return false;
33282
+ }
33283
+ if (isVerificationAction(action)) {
33284
+ return true;
33285
+ }
33286
+ return [
33287
+ "process.start",
33288
+ "process.logs",
33289
+ "process.stop",
33290
+ "browser.open",
33291
+ "browser.inspect",
33292
+ "browser.click",
33293
+ "browser.fill",
33294
+ "browser.close",
33295
+ "http.fetch"
33296
+ ].includes(action.tool);
33297
+ }
33298
+ function isShellFileInspectionAction(action) {
33299
+ if (action.type !== "tool" || action.tool !== "shell.exec") {
33300
+ return false;
33301
+ }
33302
+ const command = typeof action.input.command === "string" ? action.input.command : "";
33303
+ return isShellFileInspectionCommand(command);
33304
+ }
33305
+ function isRepoInspectionAction(action) {
33306
+ if (action.type !== "tool") {
33307
+ return false;
33308
+ }
33309
+ return [
33310
+ "file.read",
33311
+ "file.search",
33312
+ "file.list",
33313
+ "repo.index",
33314
+ "repo.query",
33315
+ "git.diff"
33316
+ ].includes(action.tool);
33317
+ }
33318
+ function normalizeWorkspacePath(cwd, value) {
33319
+ const normalized = value.replace(/\\/g, "/").trim();
33320
+ if (normalized.length === 0) {
33321
+ return normalized;
33322
+ }
33323
+ if (!import_node_path14.default.isAbsolute(normalized)) {
33324
+ return normalized.replace(/^\.\//, "");
33325
+ }
33326
+ const relative = import_node_path14.default.relative(cwd, normalized).replace(/\\/g, "/");
33327
+ return relative.length > 0 && !relative.startsWith("..") ? relative : normalized;
33328
+ }
33329
+ async function detectVerificationCommands(cwd) {
33330
+ const commands = [];
33331
+ const packagePath = import_node_path14.default.join(cwd, "package.json");
33332
+ try {
33333
+ await (0, import_promises14.access)(packagePath);
33334
+ const raw = await (0, import_promises14.readFile)(packagePath, "utf8");
33335
+ const parsed = JSON.parse(raw);
33336
+ const scripts = parsed.scripts ?? {};
33337
+ const packageManager = parsed.packageManager?.startsWith("pnpm") ? "pnpm" : parsed.packageManager?.startsWith("yarn") ? "yarn" : parsed.packageManager?.startsWith("bun") ? "bun" : "npm";
33338
+ const renderRun = (script) => {
33339
+ if (packageManager === "yarn") {
33340
+ return `yarn ${script}`;
33341
+ }
33342
+ return `${packageManager} run ${script}`;
33343
+ };
33344
+ if (scripts.typecheck) {
33345
+ commands.push(renderRun("typecheck"));
33346
+ }
33347
+ if (scripts.build) {
33348
+ commands.push(renderRun("build"));
33349
+ }
33350
+ if (scripts.test) {
33351
+ commands.push(renderRun("test"));
33352
+ }
33353
+ if (scripts.lint) {
33354
+ commands.push(renderRun("lint"));
33355
+ }
33356
+ } catch {
33357
+ }
33358
+ if (commands.length === 0) {
33359
+ try {
33360
+ await (0, import_promises14.access)(import_node_path14.default.join(cwd, "tsconfig.json"));
33361
+ commands.push("npx tsc --noEmit");
33362
+ } catch {
33363
+ }
33364
+ }
33365
+ const unique = uniqueStrings2(commands);
33366
+ const preferred = [
33367
+ ...unique.filter((command) => /typecheck|build|test/i.test(command)),
33368
+ ...unique.filter((command) => /lint/i.test(command) && !/typecheck|build|test/i.test(command))
33369
+ ];
33370
+ return {
33371
+ availableCommands: unique,
33372
+ preferredCommands: uniqueStrings2(preferred)
33373
+ };
33374
+ }
33375
+ function createInitialWorldModel(task, request, verifier) {
33376
+ const phase = task.type === "verification" ? "verify" : task.type === "integration" || task.type === "documentation" ? "finalize" : request.workspaceState === "existing" ? "survey" : "implement";
33377
+ return {
33378
+ phase,
33379
+ targetFiles: uniqueStrings2(task.filesLikelyTouched.map((filePath) => normalizeWorkspacePath(request.cwd, filePath))),
33380
+ inspectedFiles: [],
33381
+ changedFiles: [],
33382
+ hypotheses: uniqueStrings2([
33383
+ `Satisfy task acceptance criteria for ${task.id}.`,
33384
+ task.description
33385
+ ]),
33386
+ blockers: [],
33387
+ proofPending: uniqueStrings2(task.acceptanceCriteria),
33388
+ proofSatisfied: [],
33389
+ recentActions: [],
33390
+ recentCommands: [],
33391
+ nextFocus: phase === "verify" ? "Run the preferred verification path and capture proof." : "Inspect the likely source files and determine the minimal safe change.",
33392
+ sourceEditCount: 0,
33393
+ validationLoopCount: 0,
33394
+ verifier: {
33395
+ availableCommands: verifier.availableCommands,
33396
+ preferredCommands: verifier.preferredCommands,
33397
+ attemptedCommands: [],
33398
+ disabledCommands: [],
33399
+ successfulCommands: [],
33400
+ requiresInteractiveSetup: false
33401
+ },
33402
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33403
+ };
33404
+ }
33405
+ function summarizeWorldModel(worldModel) {
33406
+ return [
33407
+ `phase: ${worldModel.phase}`,
33408
+ `next focus: ${worldModel.nextFocus ?? "(unset)"}`,
33409
+ `target files: ${worldModel.targetFiles.join(", ") || "(none)"}`,
33410
+ `inspected files: ${worldModel.inspectedFiles.join(", ") || "(none)"}`,
33411
+ `changed files: ${worldModel.changedFiles.join(", ") || "(none)"}`,
33412
+ `hypotheses: ${worldModel.hypotheses.join(" | ") || "(none)"}`,
33413
+ `blockers: ${worldModel.blockers.join(" | ") || "(none)"}`,
33414
+ `proof pending: ${worldModel.proofPending.join(" | ") || "(none)"}`,
33415
+ `proof satisfied: ${worldModel.proofSatisfied.join(" | ") || "(none)"}`,
33416
+ `verifier preferred commands: ${worldModel.verifier.preferredCommands.join(", ") || "(none)"}`,
33417
+ `verifier disabled commands: ${worldModel.verifier.disabledCommands.join(", ") || "(none)"}`,
33418
+ `latest verifier failure: ${worldModel.verifier.latestFailureSummary ?? "(none)"}`,
33419
+ `recent actions: ${worldModel.recentActions.join(" | ") || "(none)"}`
33420
+ ].join("\n");
33421
+ }
33422
+ function recordWorldModelAction(worldModel, label, command) {
33423
+ return {
33424
+ ...worldModel,
33425
+ recentActions: uniqueStrings2([
33426
+ ...worldModel.recentActions.slice(-5),
33427
+ label
33428
+ ]).slice(-6),
33429
+ recentCommands: command ? uniqueStrings2([
33430
+ ...worldModel.recentCommands.slice(-5),
33431
+ command
33432
+ ]).slice(-6) : worldModel.recentCommands,
33433
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33434
+ };
33435
+ }
33436
+ function scoreVerificationCommand(command, worldModel) {
33437
+ const normalized = command.trim().toLowerCase();
33438
+ let score = 0;
33439
+ if (worldModel.phase === "repair" && worldModel.verifier.latestFailureCommand && normalized === worldModel.verifier.latestFailureCommand.trim().toLowerCase()) {
33440
+ score += 80;
33441
+ }
33442
+ if (/typecheck|tsc --noemit|tsc\b/i.test(normalized)) {
33443
+ score += worldModel.proofPending.some((item) => /typescript|compile|type/i.test(item)) ? 70 : 32;
33444
+ }
33445
+ if (/build/i.test(normalized)) {
33446
+ score += worldModel.proofPending.some((item) => /build|compile|render|page|layout|responsive/i.test(item)) ? 68 : 34;
33447
+ }
33448
+ if (/test|vitest|jest/i.test(normalized)) {
33449
+ score += worldModel.proofPending.some((item) => /test|behavior|logic|regression/i.test(item)) ? 74 : 38;
33450
+ }
33451
+ if (/lint|eslint/i.test(normalized)) {
33452
+ score += worldModel.proofPending.some((item) => /lint|style|quality/i.test(item)) ? 54 : 18;
33453
+ }
33454
+ if (/dev|serve|start/i.test(normalized)) {
33455
+ score -= 18;
33456
+ }
33457
+ if (worldModel.verifier.successfulCommands.includes(command)) {
33458
+ score -= 12;
33459
+ }
33460
+ if (worldModel.verifier.disabledCommands.includes(command)) {
33461
+ score -= 1e3;
33462
+ }
33463
+ return score;
33464
+ }
33465
+ function determineNextVerificationCommand(worldModel) {
33466
+ const candidates = uniqueStrings2([
33467
+ ...worldModel.phase === "repair" && worldModel.verifier.latestFailureCommand ? [
33468
+ worldModel.verifier.latestFailureCommand
33469
+ ] : [],
33470
+ ...worldModel.verifier.preferredCommands,
33471
+ ...worldModel.verifier.availableCommands
33472
+ ]).filter((command) => !worldModel.verifier.disabledCommands.includes(command));
33473
+ if (candidates.length === 0) {
33474
+ return null;
33475
+ }
33476
+ return [
33477
+ ...candidates
33478
+ ].sort((left, right) => scoreVerificationCommand(right, worldModel) - scoreVerificationCommand(left, worldModel))[0] ?? null;
33479
+ }
33480
+ function updateProofStateForVerification(worldModel, command, success2) {
33481
+ if (!command || !success2) {
33482
+ return {
33483
+ proofPending: worldModel.proofPending,
33484
+ proofSatisfied: worldModel.proofSatisfied
33485
+ };
33486
+ }
33487
+ const normalized = command.toLowerCase();
33488
+ const satisfiedLabels = [];
33489
+ let proofPending = [
33490
+ ...worldModel.proofPending
33491
+ ];
33492
+ const satisfyMatching = (pattern, fallback) => {
33493
+ const matching = proofPending.filter((item) => pattern.test(item));
33494
+ if (matching.length > 0) {
33495
+ satisfiedLabels.push(...matching);
33496
+ proofPending = proofPending.filter((item) => !pattern.test(item));
33497
+ return;
33498
+ }
33499
+ satisfiedLabels.push(fallback);
33500
+ };
33501
+ if (/typecheck|tsc --noemit|tsc\b/.test(normalized)) {
33502
+ satisfyMatching(/typescript|compile|type/i, `Type safety verified via ${command}`);
33503
+ }
33504
+ if (/build/.test(normalized)) {
33505
+ satisfyMatching(/build|compile|render|page|layout|responsive/i, `Build/render verification passed via ${command}`);
33506
+ }
33507
+ if (/test|vitest|jest/.test(normalized)) {
33508
+ satisfyMatching(/test|behavior|logic|regression/i, `Behavior verified via ${command}`);
33509
+ }
33510
+ if (/lint|eslint/.test(normalized)) {
33511
+ satisfyMatching(/lint|style|quality/i, `Code quality verified via ${command}`);
33512
+ }
33513
+ if (satisfiedLabels.length === 0) {
33514
+ satisfiedLabels.push(`Verified via ${command}`);
33515
+ }
33516
+ return {
33517
+ proofPending: uniqueStrings2(proofPending),
33518
+ proofSatisfied: uniqueStrings2([
33519
+ ...worldModel.proofSatisfied,
33520
+ ...satisfiedLabels
33521
+ ])
33522
+ };
33523
+ }
33524
+ function derivePhaseGuidance(worldModel) {
33525
+ switch (worldModel.phase) {
33526
+ case "survey":
33527
+ return [
33528
+ "Survey likely source files and constraints before making a change.",
33529
+ "Do not spend more than one baseline validation action before a real source edit lands."
33530
+ ];
33531
+ case "plan-edit":
33532
+ case "implement":
33533
+ return [
33534
+ "Make the concrete source change now.",
33535
+ "Use file.patch or file.write against the likely target files before further runtime validation."
33536
+ ];
33537
+ case "verify":
33538
+ return [
33539
+ `Use the strategic verifier next: ${determineNextVerificationCommand(worldModel) ?? "(choose the best non-interactive verifier)"}.`,
33540
+ "Capture proof for the changed behavior before finishing."
33541
+ ];
33542
+ case "repair":
33543
+ return [
33544
+ `Repair the latest failing verifier before rerunning it: ${worldModel.verifier.latestFailureSummary ?? "(missing failure summary)"}.`,
33545
+ "Inspect failure output, edit the relevant source, then rerun the strategic verifier."
33546
+ ];
33547
+ case "finalize":
33548
+ return [
33549
+ "Only finish once the key proof is captured and no blocker remains.",
33550
+ "Use git.diff or one final verifier if you still need confirmation."
33551
+ ];
33552
+ case "escalate":
33553
+ return [
33554
+ "Summarize blockers precisely so the supervisor can reassign or replan."
33555
+ ];
33556
+ }
33557
+ }
33558
+ function deriveSupervisorHints(status, task, worldModel) {
33559
+ const delegatedTask = Boolean(task.parentTaskId);
33560
+ if (status === "completed" && delegatedTask) {
33561
+ return {
33562
+ shouldMergeDelegatedWork: true,
33563
+ shouldReprioritize: true,
33564
+ reason: `Delegated task ${task.id} completed; parent flow may be ready to merge or reprioritize.`
33565
+ };
33566
+ }
33567
+ if (status === "blocked" || status === "handoff" || worldModel.phase === "escalate") {
33568
+ return {
33569
+ shouldReplan: true,
33570
+ shouldReprioritize: delegatedTask,
33571
+ reason: worldModel.blockers[0] ?? `Task ${task.id} needs supervisor intervention.`
33572
+ };
33573
+ }
33574
+ if (delegatedTask && worldModel.phase === "repair") {
33575
+ return {
33576
+ shouldReplan: true,
33577
+ shouldReprioritize: true,
33578
+ reason: `Delegated task ${task.id} is in repair mode and may need reassignment or dependency changes.`
33579
+ };
33580
+ }
33581
+ return void 0;
33582
+ }
33124
33583
  function extractShellCommand(action) {
33125
33584
  if (action.type !== "tool" || action.tool !== "shell.exec") {
33126
33585
  return "";
@@ -33139,7 +33598,7 @@ function isInteractiveVerificationSetupFailure(action, result) {
33139
33598
  ].join("\n").toLowerCase();
33140
33599
  return combined.includes("interactive eslint setup required") || combined.includes("command requires interactive input before it can continue") || command.includes("lint") && combined.includes("how would you like to configure eslint");
33141
33600
  }
33142
- function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstructions) {
33601
+ function buildSystemPrompt(agent, task, request, allowedTools, worldModel, plan, extraInstructions) {
33143
33602
  const toolShape = allowedTools.join("|");
33144
33603
  const dependencyTasks = plan ? flattenPlanTasks(plan).filter((candidate) => task.dependsOn.includes(candidate.id)) : [];
33145
33604
  const completedTasks = plan ? flattenPlanTasks(plan).filter((candidate) => candidate.status === "completed" && candidate.id !== task.id) : [];
@@ -33153,6 +33612,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
33153
33612
  `Goal: ${request.goal}`,
33154
33613
  `Current task: ${task.id} - ${task.title}`,
33155
33614
  `Task description: ${task.description}`,
33615
+ `Execution phase: ${worldModel.phase}`,
33156
33616
  `Acceptance criteria:`,
33157
33617
  ...task.acceptanceCriteria.map((item) => `- ${item}`),
33158
33618
  `Likely files: ${task.filesLikelyTouched.join(", ") || "(not specified)"}`,
@@ -33162,6 +33622,8 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
33162
33622
  `Completed tasks in this run: ${completedTasks.length > 0 ? completedTasks.slice(-4).map((candidate) => `${candidate.id}:${candidate.title}`).join(", ") : "(none yet)"}`,
33163
33623
  `Downstream tasks depending on this task: ${downstreamTasks.length > 0 ? downstreamTasks.slice(0, 4).map((candidate) => `${candidate.id}:${candidate.title}`).join(", ") : "(none)"}`,
33164
33624
  `Allowed tools: ${allowedTools.join(", ")}`,
33625
+ `Task world-model:`,
33626
+ summarizeWorldModel(worldModel),
33165
33627
  `Respond with exactly one JSON object and no markdown.`,
33166
33628
  `Tool action shape: {"type":"tool","tool":"${toolShape}","input":{...},"reason":"why this step matters"}`,
33167
33629
  `Finish shape: {"type":"finish","summary":"what was completed and verified"}`,
@@ -33170,6 +33632,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
33170
33632
  `- Use one action per response.`,
33171
33633
  `- Use file.list, file.search, repo.index, and repo.query to explore the workspace before editing.`,
33172
33634
  `- Prefer file.read before editing existing files.`,
33635
+ `- For repo file inspection, prefer file.read, file.search, file.list, repo.query, and git.diff instead of shell.exec cat/head/tail/wc/sed.`,
33173
33636
  `- Use scaffold.generate when the task is clearly greenfield and a known preset fits better than improvising every file by hand.`,
33174
33637
  `- Use file.patch for existing files when possible; use file.write for new files or full replacements.`,
33175
33638
  `- Use git.diff to inspect the current patch after changes when helpful.`,
@@ -33184,6 +33647,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
33184
33647
  `- If a verification command asks for interactive setup or operator input, do not rerun it unchanged. Choose a different non-interactive verifier, or configure that verifier only if the task explicitly requires it.`,
33185
33648
  `- Do not claim success unless the task acceptance criteria are satisfied.`,
33186
33649
  `- If the task is underspecified, make a pragmatic implementation choice and continue.`,
33650
+ ...derivePhaseGuidance(worldModel).map((rule) => `- ${rule}`),
33187
33651
  ...task.subagentInstructions ? [
33188
33652
  `Delegation instructions:`,
33189
33653
  task.subagentInstructions
@@ -33473,6 +33937,8 @@ var AutonomousTaskExecutor = class {
33473
33937
  const brain = await this.resolver.resolve(effectiveBrainRole);
33474
33938
  const allowedTools = resolvedExecutionPolicy.allowedTools;
33475
33939
  const actionSchema = createAgentActionSchema(allowedTools);
33940
+ const verificationCommands = await detectVerificationCommands(request.cwd);
33941
+ let worldModel = task.executionState ?? createInitialWorldModel(task, request, verificationCommands);
33476
33942
  const messages = [
33477
33943
  {
33478
33944
  role: "user",
@@ -33499,11 +33965,13 @@ var AutonomousTaskExecutor = class {
33499
33965
  }
33500
33966
  };
33501
33967
  let changedWorkspace = false;
33968
+ let appliedSourceEdit = false;
33502
33969
  let verifiedAfterLatestChange = false;
33503
33970
  let repairRequiredBeforeVerification = false;
33504
33971
  let repairAppliedSinceFailure = false;
33505
33972
  let verificationFailures = 0;
33506
33973
  let lastVerificationFailure = null;
33974
+ let preEditValidationActions = 0;
33507
33975
  let preservedMessageCount = messages.length;
33508
33976
  let compactedTranscriptEntries = 0;
33509
33977
  let compactionSummary = null;
@@ -33514,6 +33982,26 @@ var AutonomousTaskExecutor = class {
33514
33982
  inputTokens: 0,
33515
33983
  outputTokens: 0
33516
33984
  };
33985
+ const snapshotWorldModel = () => ({
33986
+ ...worldModel,
33987
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33988
+ });
33989
+ const makeOutcome = (status, summary, extra = {}) => {
33990
+ const currentWorldModel = snapshotWorldModel();
33991
+ const supervisorHints = deriveSupervisorHints(status, task, currentWorldModel);
33992
+ return {
33993
+ status,
33994
+ summary,
33995
+ toolResults,
33996
+ artifacts: Array.from(artifacts),
33997
+ usage: usageTotals,
33998
+ worldModel: currentWorldModel,
33999
+ ...supervisorHints ? {
34000
+ supervisorHints
34001
+ } : {},
34002
+ ...extra
34003
+ };
34004
+ };
33517
34005
  const createToolExecutionContext = (step, approvalReason, operatorApproved = false) => ({
33518
34006
  cwd: request.cwd,
33519
34007
  ...options.signal ? {
@@ -33587,13 +34075,17 @@ var AutonomousTaskExecutor = class {
33587
34075
  });
33588
34076
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
33589
34077
  artifacts.add(transcriptPath2);
33590
- return {
33591
- status: "blocked",
33592
- summary,
33593
- toolResults,
33594
- artifacts: Array.from(artifacts),
33595
- usage: usageTotals
34078
+ worldModel = {
34079
+ ...worldModel,
34080
+ phase: "escalate",
34081
+ blockers: uniqueStrings2([
34082
+ ...worldModel.blockers,
34083
+ summary
34084
+ ]),
34085
+ nextFocus: "Supervisor should replan or reassign this task because a budget was exhausted.",
34086
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33596
34087
  };
34088
+ return makeOutcome("blocked", summary);
33597
34089
  };
33598
34090
  await emitProgress({
33599
34091
  type: "task-note",
@@ -33622,24 +34114,32 @@ var AutonomousTaskExecutor = class {
33622
34114
  });
33623
34115
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
33624
34116
  artifacts.add(transcriptPath2);
33625
- return {
33626
- status: "blocked",
33627
- summary,
33628
- toolResults,
33629
- artifacts: Array.from(artifacts),
33630
- usage: usageTotals
34117
+ worldModel = {
34118
+ ...worldModel,
34119
+ phase: "escalate",
34120
+ blockers: uniqueStrings2([
34121
+ ...worldModel.blockers,
34122
+ summary
34123
+ ]),
34124
+ nextFocus: "Supervisor should inspect why this task has no executable tools.",
34125
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33631
34126
  };
34127
+ return makeOutcome("blocked", summary);
33632
34128
  }
33633
34129
  if (resolvedApproval?.decision === "deny") {
33634
34130
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
33635
34131
  artifacts.add(transcriptPath2);
33636
- return {
33637
- status: "blocked",
33638
- summary: `Operator denied approval for ${resolvedApproval.approval.toolId} in ${task.id}.`,
33639
- toolResults,
33640
- artifacts: Array.from(artifacts),
33641
- usage: usageTotals
34132
+ worldModel = {
34133
+ ...worldModel,
34134
+ phase: "escalate",
34135
+ blockers: uniqueStrings2([
34136
+ ...worldModel.blockers,
34137
+ `Operator denied ${resolvedApproval.approval.toolId}.`
34138
+ ]),
34139
+ nextFocus: "Choose a safer path or wait for supervisor replanning after the denied action.",
34140
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33642
34141
  };
34142
+ return makeOutcome("blocked", `Operator denied approval for ${resolvedApproval.approval.toolId} in ${task.id}.`);
33643
34143
  }
33644
34144
  if (request.workspaceState === "existing") {
33645
34145
  const preflightResults = [];
@@ -33681,6 +34181,18 @@ var AutonomousTaskExecutor = class {
33681
34181
  for (const artifact of result.artifacts) {
33682
34182
  artifacts.add(artifact);
33683
34183
  }
34184
+ if (toolId === "file.read" && result.success && typeof input.path === "string") {
34185
+ worldModel = {
34186
+ ...worldModel,
34187
+ inspectedFiles: uniqueStrings2([
34188
+ ...worldModel.inspectedFiles,
34189
+ normalizeWorkspacePath(request.cwd, input.path)
34190
+ ]),
34191
+ phase: worldModel.phase === "survey" ? "plan-edit" : worldModel.phase,
34192
+ nextFocus: worldModel.phase === "survey" ? "Make the concrete source change in the inspected target files." : worldModel.nextFocus,
34193
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34194
+ };
34195
+ }
33684
34196
  transcript.push({
33685
34197
  step: 0,
33686
34198
  response: JSON.stringify({
@@ -33780,6 +34292,14 @@ ${truncateForModel(customAgentMemory)}`);
33780
34292
  ].join("\n\n")
33781
34293
  });
33782
34294
  }
34295
+ if (request.workspaceState === "existing" && (task.type === "scaffold" || task.type === "implementation") && worldModel.phase === "survey") {
34296
+ worldModel = {
34297
+ ...worldModel,
34298
+ phase: "plan-edit",
34299
+ nextFocus: "Edit the likely source files before using more verification or browser/runtime checks.",
34300
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34301
+ };
34302
+ }
33783
34303
  }
33784
34304
  preservedMessageCount = messages.length;
33785
34305
  const runToolAction = async (parsedAction, step, operatorApproved = false) => {
@@ -33841,7 +34361,10 @@ ${truncateForModel(customAgentMemory)}`);
33841
34361
  const applyToolResult = async (parsedAction, result, step, transcriptEntry) => {
33842
34362
  const mutatingAction = isMutatingAction(parsedAction);
33843
34363
  const verificationAction = isVerificationAction(parsedAction);
34364
+ const runtimeValidationAction = isRuntimeValidationAction(parsedAction);
34365
+ const shellCommand = extractShellCommand(parsedAction);
33844
34366
  const interactiveVerificationSetupFailure = verificationAction && !result.success && isInteractiveVerificationSetupFailure(parsedAction, result);
34367
+ worldModel = recordWorldModelAction(worldModel, parsedAction.type === "tool" ? `${parsedAction.tool}${shellCommand ? ` ${shellCommand}` : ""}` : parsedAction.type === "finish" ? "finish" : "block", shellCommand || void 0);
33845
34368
  if (mutatingAction && result.success) {
33846
34369
  changedWorkspace = true;
33847
34370
  verifiedAfterLatestChange = false;
@@ -33849,22 +34372,121 @@ ${truncateForModel(customAgentMemory)}`);
33849
34372
  repairAppliedSinceFailure = true;
33850
34373
  }
33851
34374
  }
34375
+ if (parsedAction.tool === "file.write" || parsedAction.tool === "file.patch") {
34376
+ if (result.success) {
34377
+ appliedSourceEdit = true;
34378
+ preEditValidationActions = 0;
34379
+ worldModel = {
34380
+ ...worldModel,
34381
+ phase: "verify",
34382
+ changedFiles: uniqueStrings2([
34383
+ ...worldModel.changedFiles,
34384
+ ...result.artifacts.map((artifact) => normalizeWorkspacePath(request.cwd, artifact)),
34385
+ ...typeof parsedAction.input.path === "string" ? [
34386
+ normalizeWorkspacePath(request.cwd, parsedAction.input.path)
34387
+ ] : []
34388
+ ]),
34389
+ blockers: [],
34390
+ sourceEditCount: worldModel.sourceEditCount + 1,
34391
+ nextFocus: `Run the strategic verifier next: ${determineNextVerificationCommand(worldModel) ?? "choose the best non-interactive verifier"}.`,
34392
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34393
+ };
34394
+ }
34395
+ }
34396
+ if (!appliedSourceEdit && runtimeValidationAction) {
34397
+ preEditValidationActions += 1;
34398
+ worldModel = {
34399
+ ...worldModel,
34400
+ validationLoopCount: worldModel.validationLoopCount + 1,
34401
+ nextFocus: "Make a source edit before spending more time on runtime validation.",
34402
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34403
+ };
34404
+ }
34405
+ if (parsedAction.tool === "file.read" && result.success && typeof parsedAction.input.path === "string") {
34406
+ worldModel = {
34407
+ ...worldModel,
34408
+ inspectedFiles: uniqueStrings2([
34409
+ ...worldModel.inspectedFiles,
34410
+ normalizeWorkspacePath(request.cwd, parsedAction.input.path)
34411
+ ]),
34412
+ phase: worldModel.phase === "survey" ? "plan-edit" : worldModel.phase,
34413
+ nextFocus: worldModel.phase === "survey" ? "Use the inspected files to make the concrete implementation change." : worldModel.nextFocus,
34414
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34415
+ };
34416
+ }
33852
34417
  if (verificationAction) {
33853
34418
  verifiedAfterLatestChange = result.success;
34419
+ const attemptedCommand = shellCommand || (parsedAction.tool === "tests.run" ? "tests.run" : "");
34420
+ const proofUpdate = updateProofStateForVerification(worldModel, attemptedCommand || void 0, result.success);
34421
+ const verifier = {
34422
+ ...worldModel.verifier,
34423
+ attemptedCommands: attemptedCommand ? uniqueStrings2([
34424
+ ...worldModel.verifier.attemptedCommands,
34425
+ attemptedCommand
34426
+ ]) : worldModel.verifier.attemptedCommands,
34427
+ currentCommand: attemptedCommand || worldModel.verifier.currentCommand,
34428
+ latestFailureSummary: result.success ? void 0 : result.summary,
34429
+ latestFailureCommand: result.success ? void 0 : attemptedCommand || void 0,
34430
+ latestSuccessfulCommand: result.success ? attemptedCommand || worldModel.verifier.latestSuccessfulCommand : worldModel.verifier.latestSuccessfulCommand,
34431
+ successfulCommands: result.success && attemptedCommand ? uniqueStrings2([
34432
+ ...worldModel.verifier.successfulCommands,
34433
+ attemptedCommand
34434
+ ]) : worldModel.verifier.successfulCommands,
34435
+ disabledCommands: interactiveVerificationSetupFailure && attemptedCommand ? uniqueStrings2([
34436
+ ...worldModel.verifier.disabledCommands,
34437
+ attemptedCommand
34438
+ ]) : worldModel.verifier.disabledCommands,
34439
+ requiresInteractiveSetup: interactiveVerificationSetupFailure
34440
+ };
34441
+ worldModel = {
34442
+ ...worldModel,
34443
+ verifier,
34444
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34445
+ };
33854
34446
  if (result.success) {
33855
34447
  verificationFailures = 0;
33856
34448
  repairRequiredBeforeVerification = false;
33857
34449
  repairAppliedSinceFailure = false;
33858
34450
  lastVerificationFailure = null;
34451
+ worldModel = {
34452
+ ...worldModel,
34453
+ phase: "finalize",
34454
+ validationLoopCount: 0,
34455
+ proofPending: proofUpdate.proofPending,
34456
+ proofSatisfied: proofUpdate.proofSatisfied,
34457
+ nextFocus: "Review the diff and finish if the task acceptance criteria are satisfied.",
34458
+ blockers: [],
34459
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34460
+ };
33859
34461
  } else if (interactiveVerificationSetupFailure) {
33860
34462
  repairRequiredBeforeVerification = false;
33861
34463
  repairAppliedSinceFailure = false;
33862
34464
  lastVerificationFailure = result;
34465
+ worldModel = {
34466
+ ...worldModel,
34467
+ phase: appliedSourceEdit ? "verify" : "implement",
34468
+ blockers: uniqueStrings2([
34469
+ ...worldModel.blockers.filter((blocker) => !blocker.includes("interactive")),
34470
+ result.summary
34471
+ ]),
34472
+ nextFocus: "Choose a different non-interactive verifier or configure the verifier only if the task truly requires it.",
34473
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34474
+ };
33863
34475
  } else {
33864
34476
  verificationFailures += 1;
33865
34477
  repairRequiredBeforeVerification = true;
33866
34478
  repairAppliedSinceFailure = false;
33867
34479
  lastVerificationFailure = result;
34480
+ worldModel = {
34481
+ ...worldModel,
34482
+ phase: "repair",
34483
+ blockers: uniqueStrings2([
34484
+ ...worldModel.blockers,
34485
+ result.summary
34486
+ ]),
34487
+ nextFocus: "Repair the latest failing verifier output before running verification again.",
34488
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34489
+ };
33868
34490
  }
33869
34491
  }
33870
34492
  transcriptEntry.toolResult = result;
@@ -33925,21 +34547,21 @@ ${truncateForModel(customAgentMemory)}`);
33925
34547
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
33926
34548
  artifacts.add(transcriptPath2);
33927
34549
  if (task.agentRole !== "test-debugger") {
33928
- return {
33929
- status: "handoff",
33930
- summary: `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; handing off to test-debugger.`,
33931
- toolResults,
33932
- artifacts: Array.from(artifacts),
33933
- usage: usageTotals
34550
+ worldModel = {
34551
+ ...worldModel,
34552
+ phase: "escalate",
34553
+ nextFocus: "Escalate to test-debugger with the latest failing verifier context.",
34554
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33934
34555
  };
34556
+ return makeOutcome("handoff", `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; handing off to test-debugger.`);
33935
34557
  }
33936
- return {
33937
- status: "blocked",
33938
- summary: `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; repair budget exhausted.`,
33939
- toolResults,
33940
- artifacts: Array.from(artifacts),
33941
- usage: usageTotals
34558
+ worldModel = {
34559
+ ...worldModel,
34560
+ phase: "escalate",
34561
+ nextFocus: "Debugger repair budget is exhausted; supervisor must replan or accept the blocker.",
34562
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
33942
34563
  };
34564
+ return makeOutcome("blocked", `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; repair budget exhausted.`);
33943
34565
  }
33944
34566
  }
33945
34567
  messages.push({
@@ -33983,14 +34605,9 @@ ${truncateForModel(customAgentMemory)}`);
33983
34605
  if (execution.approvalRequest) {
33984
34606
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
33985
34607
  artifacts.add(transcriptPath2);
33986
- return {
33987
- status: "awaiting-approval",
33988
- summary: execution.approvalRequest.reason,
33989
- toolResults,
33990
- artifacts: Array.from(artifacts),
33991
- approvalRequest: execution.approvalRequest,
33992
- usage: usageTotals
33993
- };
34608
+ return makeOutcome("awaiting-approval", execution.approvalRequest.reason, {
34609
+ approvalRequest: execution.approvalRequest
34610
+ });
33994
34611
  }
33995
34612
  if (!execution.result) {
33996
34613
  throw new Error("Approved action did not produce a tool result.");
@@ -34007,13 +34624,12 @@ ${truncateForModel(customAgentMemory)}`);
34007
34624
  } catch (error2) {
34008
34625
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
34009
34626
  artifacts.add(transcriptPath2);
34010
- return {
34011
- status: "paused",
34012
- summary: `Execution interrupted by operator during ${task.id}.`,
34013
- toolResults,
34014
- artifacts: Array.from(artifacts),
34015
- usage: usageTotals
34627
+ worldModel = {
34628
+ ...worldModel,
34629
+ nextFocus: "Resume from the current execution state without redoing completed inspection or verification.",
34630
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34016
34631
  };
34632
+ return makeOutcome("paused", `Execution interrupted by operator during ${task.id}.`);
34017
34633
  }
34018
34634
  await compactExecutionContextIfNeeded(step);
34019
34635
  if (typeof maxModelCalls === "number" && usageTotals.modelCalls >= maxModelCalls) {
@@ -34036,7 +34652,7 @@ ${truncateForModel(customAgentMemory)}`);
34036
34652
  try {
34037
34653
  response = await brain.client.generateText({
34038
34654
  model: brain.model,
34039
- systemPrompt: buildSystemPrompt(effectiveAgent, task, request, allowedTools, options.plan, task.agentPromptPreamble ?? customOverlay?.promptPreamble),
34655
+ systemPrompt: buildSystemPrompt(effectiveAgent, task, request, allowedTools, snapshotWorldModel(), options.plan, task.agentPromptPreamble ?? customOverlay?.promptPreamble),
34040
34656
  messages,
34041
34657
  responseFormat: "json_object",
34042
34658
  ...typeof brain.settings.temperature === "number" ? {
@@ -34054,24 +34670,27 @@ ${truncateForModel(customAgentMemory)}`);
34054
34670
  });
34055
34671
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
34056
34672
  artifacts.add(transcriptPath2);
34057
- return {
34058
- status: "blocked",
34059
- summary: `Model request failed before ${task.id} could choose a safe action: ${error2 instanceof Error ? error2.message : String(error2)}`,
34060
- toolResults,
34061
- artifacts: Array.from(artifacts),
34062
- usage: usageTotals
34673
+ worldModel = {
34674
+ ...worldModel,
34675
+ phase: "escalate",
34676
+ blockers: uniqueStrings2([
34677
+ ...worldModel.blockers,
34678
+ error2 instanceof Error ? error2.message : String(error2)
34679
+ ]),
34680
+ nextFocus: "Supervisor should inspect the model failure or switch to a healthier provider/model.",
34681
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34063
34682
  };
34683
+ return makeOutcome("blocked", `Model request failed before ${task.id} could choose a safe action: ${error2 instanceof Error ? error2.message : String(error2)}`);
34064
34684
  }
34065
34685
  if (options.signal?.aborted) {
34066
34686
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
34067
34687
  artifacts.add(transcriptPath2);
34068
- return {
34069
- status: "paused",
34070
- summary: `Execution interrupted by operator during ${task.id}.`,
34071
- toolResults,
34072
- artifacts: Array.from(artifacts),
34073
- usage: usageTotals
34688
+ worldModel = {
34689
+ ...worldModel,
34690
+ nextFocus: "Resume from the current execution state without redoing completed inspection or verification.",
34691
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34074
34692
  };
34693
+ return makeOutcome("paused", `Execution interrupted by operator during ${task.id}.`);
34075
34694
  }
34076
34695
  responseText = response.text;
34077
34696
  usageTotals.modelCalls += 1;
@@ -34134,13 +34753,17 @@ ${truncateForModel(customAgentMemory)}`);
34134
34753
  step,
34135
34754
  message: "Model stayed out of structured mode after multiple retries."
34136
34755
  });
34137
- return {
34138
- status: "blocked",
34139
- summary: `${parseSummary} The task stopped before a safe tool action could be chosen.`,
34140
- toolResults,
34141
- artifacts: Array.from(artifacts),
34142
- usage: usageTotals
34756
+ worldModel = {
34757
+ ...worldModel,
34758
+ phase: "escalate",
34759
+ blockers: uniqueStrings2([
34760
+ ...worldModel.blockers,
34761
+ parseSummary
34762
+ ]),
34763
+ nextFocus: "Supervisor should inspect the unstructured model output and replan or switch models.",
34764
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34143
34765
  };
34766
+ return makeOutcome("blocked", `${parseSummary} The task stopped before a safe tool action could be chosen.`);
34144
34767
  }
34145
34768
  await emitProgress({
34146
34769
  type: "task-note",
@@ -34174,10 +34797,53 @@ ${truncateForModel(customAgentMemory)}`);
34174
34797
  response: responseText,
34175
34798
  parsedAction
34176
34799
  };
34800
+ const strategicVerifier = determineNextVerificationCommand(worldModel);
34177
34801
  messages.push({
34178
34802
  role: "assistant",
34179
34803
  content: JSON.stringify(parsedAction)
34180
34804
  });
34805
+ if (worldModel.phase === "survey" && parsedAction.type === "tool" && !isRepoInspectionAction(parsedAction)) {
34806
+ transcriptEntry.runtimeNote = "Rejected phase-inconsistent action because survey phase still requires repo inspection.";
34807
+ transcript.push(transcriptEntry);
34808
+ await emitProgress({
34809
+ type: "task-note",
34810
+ sessionId,
34811
+ taskId: task.id,
34812
+ agentRole: task.agentRole,
34813
+ step,
34814
+ message: "Survey phase requires file/repo inspection before implementation or verification."
34815
+ });
34816
+ messages.push({
34817
+ role: "user",
34818
+ content: [
34819
+ "The current execution phase is survey.",
34820
+ "Inspect likely source files and repository context first with file.read, file.search, file.list, repo.index, repo.query, or git.diff.",
34821
+ "Do not jump to runtime validation or edits before you understand the target files."
34822
+ ].join("\n")
34823
+ });
34824
+ continue;
34825
+ }
34826
+ if (worldModel.phase === "verify" && parsedAction.type === "tool" && !isVerificationAction(parsedAction) && !isRepoInspectionAction(parsedAction)) {
34827
+ transcriptEntry.runtimeNote = "Rejected phase-inconsistent action because verify phase requires proof or diff review.";
34828
+ transcript.push(transcriptEntry);
34829
+ await emitProgress({
34830
+ type: "task-note",
34831
+ sessionId,
34832
+ taskId: task.id,
34833
+ agentRole: task.agentRole,
34834
+ step,
34835
+ message: strategicVerifier ? `Verify phase requires proof. Run ${strategicVerifier} or inspect the diff.` : "Verify phase requires proof. Use a non-interactive verifier or inspect the diff."
34836
+ });
34837
+ messages.push({
34838
+ role: "user",
34839
+ content: [
34840
+ "The current execution phase is verify.",
34841
+ strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run the best non-interactive verifier next.",
34842
+ "Only use diff/inspection actions here if you still need proof context before finishing."
34843
+ ].join("\n")
34844
+ });
34845
+ continue;
34846
+ }
34181
34847
  if (parsedAction.type === "finish") {
34182
34848
  if (changedWorkspace && !verifiedAfterLatestChange) {
34183
34849
  transcriptEntry.runtimeNote = "Finish rejected because code changed without a successful verification step.";
@@ -34194,31 +34860,77 @@ ${truncateForModel(customAgentMemory)}`);
34194
34860
  role: "user",
34195
34861
  content: [
34196
34862
  "You tried to finish after making code changes without a successful verification step.",
34197
- "Run tests.run or a build/lint/test shell command, inspect failures, and only finish after verification passes."
34863
+ strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run tests.run or a build/lint/test shell command, inspect failures, and only finish after verification passes."
34864
+ ].join("\n")
34865
+ });
34866
+ continue;
34867
+ }
34868
+ if (worldModel.phase !== "finalize") {
34869
+ transcriptEntry.runtimeNote = `Finish rejected because the task is still in ${worldModel.phase} phase.`;
34870
+ transcript.push(transcriptEntry);
34871
+ await emitProgress({
34872
+ type: "task-note",
34873
+ sessionId,
34874
+ taskId: task.id,
34875
+ agentRole: task.agentRole,
34876
+ step,
34877
+ message: `Finish rejected because the task is still in ${worldModel.phase} phase.`
34878
+ });
34879
+ messages.push({
34880
+ role: "user",
34881
+ content: [
34882
+ `The task is still in ${worldModel.phase} phase.`,
34883
+ ...derivePhaseGuidance(worldModel),
34884
+ "Do the next strategic action instead of finishing early."
34885
+ ].join("\n")
34886
+ });
34887
+ continue;
34888
+ }
34889
+ if (worldModel.proofPending.length > 0 && worldModel.proofSatisfied.length === 0) {
34890
+ transcriptEntry.runtimeNote = "Finish rejected because the task still has pending proof and no satisfied verifier evidence.";
34891
+ transcript.push(transcriptEntry);
34892
+ await emitProgress({
34893
+ type: "task-note",
34894
+ sessionId,
34895
+ taskId: task.id,
34896
+ agentRole: task.agentRole,
34897
+ step,
34898
+ message: "Finish rejected because the task still has pending proof requirements."
34899
+ });
34900
+ messages.push({
34901
+ role: "user",
34902
+ content: [
34903
+ "You still owe proof for this task before finishing.",
34904
+ `Pending proof: ${worldModel.proofPending.join(" | ")}`,
34905
+ strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run the best non-interactive verifier, then finish only after the proof is captured."
34198
34906
  ].join("\n")
34199
34907
  });
34200
34908
  continue;
34201
34909
  }
34202
34910
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
34203
34911
  artifacts.add(transcriptPath2);
34204
- return {
34205
- status: "completed",
34206
- summary: parsedAction.summary,
34207
- toolResults,
34208
- artifacts: Array.from(artifacts),
34209
- usage: usageTotals
34912
+ worldModel = {
34913
+ ...worldModel,
34914
+ phase: "finalize",
34915
+ nextFocus: "Task completed.",
34916
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34210
34917
  };
34918
+ return makeOutcome("completed", parsedAction.summary);
34211
34919
  }
34212
34920
  if (parsedAction.type === "block") {
34213
34921
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
34214
34922
  artifacts.add(transcriptPath2);
34215
- return {
34216
- status: "blocked",
34217
- summary: parsedAction.reason,
34218
- toolResults,
34219
- artifacts: Array.from(artifacts),
34220
- usage: usageTotals
34923
+ worldModel = {
34924
+ ...worldModel,
34925
+ phase: "escalate",
34926
+ blockers: uniqueStrings2([
34927
+ ...worldModel.blockers,
34928
+ parsedAction.reason
34929
+ ]),
34930
+ nextFocus: "Supervisor should inspect the reported blocker and adjust the task graph.",
34931
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34221
34932
  };
34933
+ return makeOutcome("blocked", parsedAction.reason);
34222
34934
  }
34223
34935
  if (!allowedTools.includes(parsedAction.tool)) {
34224
34936
  transcriptEntry.runtimeNote = `Rejected disallowed tool "${parsedAction.tool}" for ${agent.role}.`;
@@ -34263,6 +34975,67 @@ ${truncateForModel(customAgentMemory)}`);
34263
34975
  });
34264
34976
  continue;
34265
34977
  }
34978
+ if (isVerificationAction(parsedAction) && strategicVerifier && extractShellCommand(parsedAction) && extractShellCommand(parsedAction) !== strategicVerifier) {
34979
+ transcriptEntry.runtimeNote = "Verification command rejected because it does not match the current strategic verifier.";
34980
+ transcript.push(transcriptEntry);
34981
+ await emitProgress({
34982
+ type: "task-note",
34983
+ sessionId,
34984
+ taskId: task.id,
34985
+ agentRole: task.agentRole,
34986
+ step,
34987
+ message: `Verification should follow the strategic order. Prefer ${strategicVerifier} next.`
34988
+ });
34989
+ messages.push({
34990
+ role: "user",
34991
+ content: [
34992
+ "Use the strategic verification path instead of picking a random verifier.",
34993
+ `Preferred next verifier: ${strategicVerifier}`
34994
+ ].join("\n")
34995
+ });
34996
+ continue;
34997
+ }
34998
+ if (allowedTools.includes("file.read") && isShellFileInspectionAction(parsedAction)) {
34999
+ transcriptEntry.runtimeNote = "Rejected shell-based file inspection because file.read is available.";
35000
+ transcript.push(transcriptEntry);
35001
+ await emitProgress({
35002
+ type: "task-note",
35003
+ sessionId,
35004
+ taskId: task.id,
35005
+ agentRole: task.agentRole,
35006
+ step,
35007
+ message: "Use file.read or repo/file tools instead of cat/head/wc/sed shell commands for source inspection."
35008
+ });
35009
+ messages.push({
35010
+ role: "user",
35011
+ content: [
35012
+ "Do not use shell.exec for simple repo file inspection when file.read is available.",
35013
+ "Use file.read, file.search, file.list, repo.query, or git.diff instead."
35014
+ ].join("\n")
35015
+ });
35016
+ continue;
35017
+ }
35018
+ if (request.workspaceState === "existing" && (task.type === "scaffold" || task.type === "implementation") && !appliedSourceEdit && isRuntimeValidationAction(parsedAction) && preEditValidationActions >= 1) {
35019
+ transcriptEntry.runtimeNote = "Rejected repeated validation/runtime loop before any source edit.";
35020
+ transcript.push(transcriptEntry);
35021
+ await emitProgress({
35022
+ type: "task-note",
35023
+ sessionId,
35024
+ taskId: task.id,
35025
+ agentRole: task.agentRole,
35026
+ step,
35027
+ message: "Repeated build/dev/browser verification was stopped because no source edit has landed yet."
35028
+ });
35029
+ messages.push({
35030
+ role: "user",
35031
+ content: [
35032
+ "You already used one baseline verification/runtime check before making a source edit.",
35033
+ "Do not keep rerunning build, lint, dev server, browser, or HTTP checks unchanged.",
35034
+ "Inspect likely source files, make a concrete edit with file.patch or file.write, and then validate again."
35035
+ ].join("\n")
35036
+ });
35037
+ continue;
35038
+ }
34266
35039
  const execution = await runToolAction(parsedAction, step);
34267
35040
  if (execution.budgetExceeded) {
34268
35041
  transcriptEntry.runtimeNote = execution.budgetExceeded;
@@ -34273,14 +35046,9 @@ ${truncateForModel(customAgentMemory)}`);
34273
35046
  transcriptEntry.runtimeNote = execution.approvalRequest.reason;
34274
35047
  const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
34275
35048
  artifacts.add(transcriptPath2);
34276
- return {
34277
- status: "awaiting-approval",
34278
- summary: execution.approvalRequest.reason,
34279
- toolResults,
34280
- artifacts: Array.from(artifacts),
34281
- approvalRequest: execution.approvalRequest,
34282
- usage: usageTotals
34283
- };
35049
+ return makeOutcome("awaiting-approval", execution.approvalRequest.reason, {
35050
+ approvalRequest: execution.approvalRequest
35051
+ });
34284
35052
  }
34285
35053
  if (!execution.result) {
34286
35054
  throw new Error(`Tool ${parsedAction.tool} did not return a result.`);
@@ -34300,21 +35068,21 @@ ${truncateForModel(customAgentMemory)}`);
34300
35068
  message: task.agentRole !== "test-debugger" ? `Step budget exhausted after ${maxSteps} steps; handing task to test-debugger.` : `Step budget exhausted after ${maxSteps} steps; debugger escalation exhausted.`
34301
35069
  });
34302
35070
  if (task.agentRole !== "test-debugger") {
34303
- return {
34304
- status: "handoff",
34305
- summary: `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; handing off to test-debugger.`,
34306
- toolResults,
34307
- artifacts: Array.from(artifacts),
34308
- usage: usageTotals
35071
+ worldModel = {
35072
+ ...worldModel,
35073
+ phase: "escalate",
35074
+ nextFocus: "Hand off the task to test-debugger with the current world-model and transcript.",
35075
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34309
35076
  };
35077
+ return makeOutcome("handoff", `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; handing off to test-debugger.`);
34310
35078
  }
34311
- return {
34312
- status: "blocked",
34313
- summary: `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; debugger escalation exhausted.`,
34314
- toolResults,
34315
- artifacts: Array.from(artifacts),
34316
- usage: usageTotals
35079
+ worldModel = {
35080
+ ...worldModel,
35081
+ phase: "escalate",
35082
+ nextFocus: "Debugger escalation is exhausted; supervisor must replan or accept the blocker.",
35083
+ lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
34317
35084
  };
35085
+ return makeOutcome("blocked", `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; debugger escalation exhausted.`);
34318
35086
  }
34319
35087
  };
34320
35088
 
@@ -36678,6 +37446,15 @@ function updateTaskStatus(plan, taskId, status) {
36678
37446
  milestones
36679
37447
  };
36680
37448
  }
37449
+ function updateTaskExecutionState(plan, taskId, executionState) {
37450
+ if (!executionState) {
37451
+ return plan;
37452
+ }
37453
+ return replaceTask(plan, taskId, (task) => ({
37454
+ ...task,
37455
+ executionState
37456
+ }));
37457
+ }
36681
37458
  function replaceTask(plan, taskId, mapper) {
36682
37459
  const milestones = plan.milestones.map((milestone) => ({
36683
37460
  ...milestone,
@@ -37112,6 +37889,44 @@ ${result.stderr.slice(0, 1e3)}`);
37112
37889
  return lines.join("\n");
37113
37890
  }).join("\n\n");
37114
37891
  }
37892
+ function renderTaskWorldModelContext(task, outcome) {
37893
+ const worldModel = outcome.worldModel;
37894
+ if (!worldModel) {
37895
+ return [
37896
+ `task: ${task.id}`,
37897
+ `status: ${outcome.status}`,
37898
+ `summary: ${outcome.summary}`
37899
+ ].join("\n");
37900
+ }
37901
+ return [
37902
+ `task: ${task.id}`,
37903
+ `status: ${outcome.status}`,
37904
+ `summary: ${outcome.summary}`,
37905
+ `phase: ${worldModel.phase}`,
37906
+ `next focus: ${worldModel.nextFocus ?? "(unset)"}`,
37907
+ `target files: ${worldModel.targetFiles.join(", ") || "(none)"}`,
37908
+ `inspected files: ${worldModel.inspectedFiles.join(", ") || "(none)"}`,
37909
+ `changed files: ${worldModel.changedFiles.join(", ") || "(none)"}`,
37910
+ `blockers: ${worldModel.blockers.join(" | ") || "(none)"}`,
37911
+ `proof pending: ${worldModel.proofPending.join(" | ") || "(none)"}`,
37912
+ `proof satisfied: ${worldModel.proofSatisfied.join(" | ") || "(none)"}`,
37913
+ `recent actions: ${worldModel.recentActions.join(" | ") || "(none)"}`,
37914
+ `preferred verifiers: ${worldModel.verifier.preferredCommands.join(", ") || "(none)"}`,
37915
+ `latest verifier failure: ${worldModel.verifier.latestFailureSummary ?? "(none)"}`
37916
+ ].join("\n");
37917
+ }
37918
+ function shouldRevisePlanAfterTaskOutcome(task, outcome) {
37919
+ if (task.agentRole === "repo-analyst" || task.agentRole === "planner") {
37920
+ return false;
37921
+ }
37922
+ if (outcome.supervisorHints?.shouldReplan || outcome.supervisorHints?.shouldReprioritize || outcome.supervisorHints?.shouldMergeDelegatedWork) {
37923
+ return true;
37924
+ }
37925
+ if (outcome.status === "blocked" || outcome.status === "handoff") {
37926
+ return true;
37927
+ }
37928
+ return Boolean(task.parentTaskId && outcome.worldModel && (outcome.worldModel.phase === "repair" || outcome.worldModel.phase === "finalize" || outcome.worldModel.blockers.length > 0));
37929
+ }
37115
37930
  function latestRepoAnalysisContext(events) {
37116
37931
  const event = [
37117
37932
  ...events
@@ -37549,7 +38364,13 @@ var ExecutionOrchestrator = class {
37549
38364
  let sawApprovalRequest = false;
37550
38365
  for (const batchResult of batchResults) {
37551
38366
  const { task: autoTask, outcome } = batchResult;
37552
- workingPlan = outcome.status === "handoff" ? replaceTask(workingPlan, autoTask.id, () => outcome.handoffTask ?? createDebuggerHandoffTask(autoTask, outcome, this.toolsForAgent("test-debugger").map((tool) => tool.id))) : updateTaskStatus(workingPlan, autoTask.id, outcome.status === "completed" ? "completed" : outcome.status === "blocked" ? "blocked" : "pending");
38367
+ workingPlan = updateTaskExecutionState(workingPlan, autoTask.id, outcome.worldModel);
38368
+ workingPlan = outcome.status === "handoff" ? replaceTask(workingPlan, autoTask.id, () => ({
38369
+ ...outcome.handoffTask ?? createDebuggerHandoffTask(autoTask, outcome, this.toolsForAgent("test-debugger").map((tool) => tool.id)),
38370
+ ...outcome.worldModel ? {
38371
+ executionState: outcome.worldModel
38372
+ } : {}
38373
+ })) : updateTaskStatus(workingPlan, autoTask.id, outcome.status === "completed" ? "completed" : outcome.status === "blocked" ? "blocked" : "pending");
37553
38374
  if (autoTask.agentRole === "repo-analyst" && outcome.status === "completed") {
37554
38375
  const replanned = await this.maybeRevisePlanAfterRepoAnalysis(session.id, session.request, workingPlan, outcome, emitProgress);
37555
38376
  workingPlan = replanned.plan;
@@ -37562,6 +38383,16 @@ var ExecutionOrchestrator = class {
37562
38383
  });
37563
38384
  }
37564
38385
  }
38386
+ const adapted = await this.maybeRevisePlanAfterTaskOutcome(session.id, session.request, workingPlan, autoTask, outcome, emitProgress);
38387
+ workingPlan = adapted.plan;
38388
+ if (adapted.note) {
38389
+ notes = maybeAppendNote(notes, adapted.note);
38390
+ await emitProgress({
38391
+ type: "task-note",
38392
+ sessionId: session.id,
38393
+ message: adapted.note
38394
+ });
38395
+ }
37565
38396
  notes = maybeAppendNote(notes, outcome.summary);
37566
38397
  if (outcome.status === "awaiting-approval" && outcome.approvalRequest) {
37567
38398
  pendingApprovals.push(outcome.approvalRequest);
@@ -38141,6 +38972,12 @@ var ExecutionOrchestrator = class {
38141
38972
  ...outcome.usage ? {
38142
38973
  usage: outcome.usage
38143
38974
  } : {},
38975
+ ...outcome.worldModel ? {
38976
+ worldModel: outcome.worldModel
38977
+ } : {},
38978
+ ...outcome.supervisorHints ? {
38979
+ supervisorHints: outcome.supervisorHints
38980
+ } : {},
38144
38981
  ...integrated.integrationFailed && integrated.conflict && task.agentRole !== "integrator" ? {
38145
38982
  handoffTask: createIntegratorHandoffTask(task, outcome, this.toolsForAgent("integrator").map((tool) => tool.id), integrated.conflict)
38146
38983
  } : {}
@@ -38321,6 +39158,89 @@ var ExecutionOrchestrator = class {
38321
39158
  };
38322
39159
  }
38323
39160
  }
39161
+ async maybeRevisePlanAfterTaskOutcome(sessionId, request, plan, task, outcome, emitProgress) {
39162
+ if (!shouldRevisePlanAfterTaskOutcome(task, outcome)) {
39163
+ return {
39164
+ plan
39165
+ };
39166
+ }
39167
+ const config2 = await loadConfig(request.cwd);
39168
+ if (!config2) {
39169
+ return {
39170
+ plan
39171
+ };
39172
+ }
39173
+ const revisionContext = [
39174
+ "Task outcome context:",
39175
+ renderTaskWorldModelContext(task, outcome),
39176
+ "",
39177
+ "Supervisor hints:",
39178
+ outcome.supervisorHints ? [
39179
+ `- should replan: ${outcome.supervisorHints.shouldReplan ? "yes" : "no"}`,
39180
+ `- should reprioritize: ${outcome.supervisorHints.shouldReprioritize ? "yes" : "no"}`,
39181
+ `- should merge delegated work: ${outcome.supervisorHints.shouldMergeDelegatedWork ? "yes" : "no"}`,
39182
+ `- reason: ${outcome.supervisorHints.reason ?? "(none)"}`
39183
+ ].join("\n") : "- none",
39184
+ "",
39185
+ "Tool evidence:",
39186
+ renderPlannerReplanContext(outcome.toolResults),
39187
+ "",
39188
+ "Planner instructions:",
39189
+ "- Reassign, merge, reorder, or split follow-up tasks if the task outcome suggests the current graph is suboptimal.",
39190
+ "- Preserve completed work, but feel free to adapt pending task order, dependencies, agent roles, or milestone shape.",
39191
+ "- Prefer making parallel work possible when blockers or delegated-worker outcomes reveal an opportunity."
39192
+ ].join("\n");
39193
+ try {
39194
+ const brain = await new BrainResolver(config2, createDefaultBrainProviderRegistry(request.cwd)).resolve("planner");
39195
+ const result = await revisePlanWithModel(request, plan, revisionContext, {
39196
+ modelLabel: `${brain.provider.id}/${brain.model}`,
39197
+ ...typeof brain.settings.temperature === "number" ? {
39198
+ temperature: brain.settings.temperature
39199
+ } : {},
39200
+ ...typeof brain.settings.maxTokens === "number" ? {
39201
+ maxTokens: brain.settings.maxTokens
39202
+ } : {},
39203
+ generate: async (input) => brain.client.generateText({
39204
+ model: brain.model,
39205
+ systemPrompt: [
39206
+ brain.settings.promptPreamble,
39207
+ input.systemPrompt
39208
+ ].filter(Boolean).join("\n\n"),
39209
+ userPrompt: input.userPrompt,
39210
+ responseFormat: "json_object",
39211
+ ...typeof input.temperature === "number" ? {
39212
+ temperature: input.temperature
39213
+ } : {},
39214
+ ...typeof input.maxTokens === "number" ? {
39215
+ maxTokens: input.maxTokens
39216
+ } : {}
39217
+ })
39218
+ });
39219
+ if (result.source === "model") {
39220
+ return {
39221
+ plan: result.plan,
39222
+ note: `Planner brain adapted the active task graph after ${task.id} via ${brain.provider.id}/${brain.model}.`
39223
+ };
39224
+ }
39225
+ return {
39226
+ plan,
39227
+ ...result.warning ? {
39228
+ note: `${result.warning} Keeping the current task graph.`
39229
+ } : {}
39230
+ };
39231
+ } catch (error2) {
39232
+ if (emitProgress) {
39233
+ await emitProgress({
39234
+ type: "task-note",
39235
+ sessionId,
39236
+ message: `Planner task adaptation skipped: ${error2 instanceof Error ? error2.message : String(error2)}`
39237
+ });
39238
+ }
39239
+ return {
39240
+ plan
39241
+ };
39242
+ }
39243
+ }
38324
39244
  async maybeExpandReadyTaskGraph(sessionId, request, plan, events, emitProgress) {
38325
39245
  const envelope = this.buildEnvelope(request, plan, sessionId);
38326
39246
  const candidate = envelope.readyTasks.find((task) => shouldDelegateTask(task, request));