@kimbho/kimbho-cli 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1025 -105
- package/dist/index.cjs.map +3 -3
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -12718,7 +12718,7 @@ function createCompletionRuntimeCommand(program2) {
|
|
|
12718
12718
|
// package.json
|
|
12719
12719
|
var package_default = {
|
|
12720
12720
|
name: "@kimbho/kimbho-cli",
|
|
12721
|
-
version: "0.1.
|
|
12721
|
+
version: "0.1.30",
|
|
12722
12722
|
description: "Kimbho CLI is a terminal-native coding agent for planning, execution, and verification.",
|
|
12723
12723
|
type: "module",
|
|
12724
12724
|
engines: {
|
|
@@ -17627,6 +17627,44 @@ var RepoStrategySchema = external_exports.object({
|
|
|
17627
17627
|
]),
|
|
17628
17628
|
reasoning: external_exports.string().min(1)
|
|
17629
17629
|
});
|
|
17630
|
+
var TaskExecutionPhaseSchema = external_exports.enum([
|
|
17631
|
+
"survey",
|
|
17632
|
+
"plan-edit",
|
|
17633
|
+
"implement",
|
|
17634
|
+
"verify",
|
|
17635
|
+
"repair",
|
|
17636
|
+
"finalize",
|
|
17637
|
+
"escalate"
|
|
17638
|
+
]);
|
|
17639
|
+
var TaskVerifierStateSchema = external_exports.object({
|
|
17640
|
+
availableCommands: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17641
|
+
preferredCommands: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17642
|
+
attemptedCommands: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17643
|
+
disabledCommands: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17644
|
+
successfulCommands: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17645
|
+
currentCommand: external_exports.string().min(1).optional(),
|
|
17646
|
+
latestFailureSummary: external_exports.string().min(1).optional(),
|
|
17647
|
+
latestFailureCommand: external_exports.string().min(1).optional(),
|
|
17648
|
+
latestSuccessfulCommand: external_exports.string().min(1).optional(),
|
|
17649
|
+
requiresInteractiveSetup: external_exports.boolean().default(false)
|
|
17650
|
+
});
|
|
17651
|
+
var TaskWorldModelSchema = external_exports.object({
|
|
17652
|
+
phase: TaskExecutionPhaseSchema.default("survey"),
|
|
17653
|
+
targetFiles: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17654
|
+
inspectedFiles: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17655
|
+
changedFiles: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17656
|
+
hypotheses: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17657
|
+
blockers: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17658
|
+
proofPending: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17659
|
+
proofSatisfied: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17660
|
+
recentActions: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17661
|
+
recentCommands: external_exports.array(external_exports.string().min(1)).default([]),
|
|
17662
|
+
nextFocus: external_exports.string().min(1).optional(),
|
|
17663
|
+
sourceEditCount: external_exports.number().int().nonnegative().default(0),
|
|
17664
|
+
validationLoopCount: external_exports.number().int().nonnegative().default(0),
|
|
17665
|
+
verifier: TaskVerifierStateSchema.default({}),
|
|
17666
|
+
lastUpdatedAt: external_exports.string().datetime().optional()
|
|
17667
|
+
});
|
|
17630
17668
|
var PlanTaskSchema = external_exports.object({
|
|
17631
17669
|
id: external_exports.string().min(1),
|
|
17632
17670
|
title: external_exports.string().min(1),
|
|
@@ -17662,7 +17700,8 @@ var PlanTaskSchema = external_exports.object({
|
|
|
17662
17700
|
teamId: external_exports.string().min(1).optional(),
|
|
17663
17701
|
teamMemberIds: external_exports.array(external_exports.string()).optional(),
|
|
17664
17702
|
subagentLabel: external_exports.string().min(1).optional(),
|
|
17665
|
-
subagentInstructions: external_exports.string().min(1).optional()
|
|
17703
|
+
subagentInstructions: external_exports.string().min(1).optional(),
|
|
17704
|
+
executionState: TaskWorldModelSchema.optional()
|
|
17666
17705
|
});
|
|
17667
17706
|
var PlanMilestoneSchema = external_exports.object({
|
|
17668
17707
|
id: external_exports.string().min(1),
|
|
@@ -18150,6 +18189,103 @@ var LegacyKimbhoConfigSchema = external_exports.object({
|
|
|
18150
18189
|
"next-prisma-postgres"
|
|
18151
18190
|
])
|
|
18152
18191
|
});
|
|
18192
|
+
function uniqueModelIds(models) {
|
|
18193
|
+
return Array.from(new Set(models.map((model) => model?.trim()).filter((model) => Boolean(model))));
|
|
18194
|
+
}
|
|
18195
|
+
function providerCandidateModels(provider) {
|
|
18196
|
+
if (!provider) {
|
|
18197
|
+
return [];
|
|
18198
|
+
}
|
|
18199
|
+
return uniqueModelIds([
|
|
18200
|
+
provider.defaultModel,
|
|
18201
|
+
...provider.models
|
|
18202
|
+
]);
|
|
18203
|
+
}
|
|
18204
|
+
function estimateModelScale(model) {
|
|
18205
|
+
const matches = Array.from(model.matchAll(/(\d+(?:\.\d+)?)b/gi));
|
|
18206
|
+
if (matches.length === 0) {
|
|
18207
|
+
return 0;
|
|
18208
|
+
}
|
|
18209
|
+
return Math.max(...matches.map((match) => Number.parseFloat(match[1] ?? "0")).filter((value) => Number.isFinite(value)));
|
|
18210
|
+
}
|
|
18211
|
+
function scoreModelForRole(model, role) {
|
|
18212
|
+
const normalized = model.toLowerCase();
|
|
18213
|
+
const scale = estimateModelScale(normalized);
|
|
18214
|
+
let score = 0;
|
|
18215
|
+
if (/gpt-5(?!.*mini)(?!.*nano)/i.test(normalized) || /\bgpt5\b/i.test(normalized)) {
|
|
18216
|
+
score += 160;
|
|
18217
|
+
}
|
|
18218
|
+
if (/\bo3\b|\bo4\b|o4-mini-high/i.test(normalized)) {
|
|
18219
|
+
score += 145;
|
|
18220
|
+
}
|
|
18221
|
+
if (/opus|sonnet|claude-4|claude-3\.7/i.test(normalized)) {
|
|
18222
|
+
score += /opus|claude-4/i.test(normalized) ? 150 : 132;
|
|
18223
|
+
}
|
|
18224
|
+
if (/gpt-4\.1|gpt-4o|deepseek-r1|deepseek-v3|qwq/i.test(normalized)) {
|
|
18225
|
+
score += 122;
|
|
18226
|
+
}
|
|
18227
|
+
if (/qwen.*(?:32b|35b|72b|110b|235b)|llama.*(?:70b|90b|405b)|mixtral/i.test(normalized)) {
|
|
18228
|
+
score += 110;
|
|
18229
|
+
}
|
|
18230
|
+
if (/reason|thinking|r1|o[34]/i.test(normalized)) {
|
|
18231
|
+
score += 20;
|
|
18232
|
+
}
|
|
18233
|
+
if (scale > 0) {
|
|
18234
|
+
score += Math.min(scale, 120) * 0.8;
|
|
18235
|
+
}
|
|
18236
|
+
if (/mini|nano|flash|haiku|small|fast|instant|lite/i.test(normalized)) {
|
|
18237
|
+
score -= 55;
|
|
18238
|
+
}
|
|
18239
|
+
if (/\b(?:3|7|8|9|14)b\b/i.test(normalized)) {
|
|
18240
|
+
score -= 35;
|
|
18241
|
+
}
|
|
18242
|
+
if (/preview|experimental|beta/i.test(normalized)) {
|
|
18243
|
+
score -= 6;
|
|
18244
|
+
}
|
|
18245
|
+
if (role === "fast") {
|
|
18246
|
+
let fastBias = 0;
|
|
18247
|
+
if (/mini|nano|flash|haiku|small|fast|instant|lite/i.test(normalized)) {
|
|
18248
|
+
fastBias += 95;
|
|
18249
|
+
}
|
|
18250
|
+
if (/sonnet|gpt-4o-mini|gpt-5-mini|claude.*haiku/i.test(normalized)) {
|
|
18251
|
+
fastBias += 60;
|
|
18252
|
+
}
|
|
18253
|
+
if (scale >= 32) {
|
|
18254
|
+
fastBias -= 40;
|
|
18255
|
+
} else if (scale > 0 && scale <= 16) {
|
|
18256
|
+
fastBias += 24;
|
|
18257
|
+
}
|
|
18258
|
+
return fastBias + score * 0.2;
|
|
18259
|
+
}
|
|
18260
|
+
if (role === "reviewer") {
|
|
18261
|
+
if (/reason|thinking|r1|\bo3\b|\bo4\b|opus/i.test(normalized)) {
|
|
18262
|
+
score += 24;
|
|
18263
|
+
}
|
|
18264
|
+
if (/mini|flash|haiku/i.test(normalized)) {
|
|
18265
|
+
score -= 10;
|
|
18266
|
+
}
|
|
18267
|
+
}
|
|
18268
|
+
if (role === "planner") {
|
|
18269
|
+
if (/reason|thinking|r1|\bo3\b|\bo4\b|sonnet|opus/i.test(normalized)) {
|
|
18270
|
+
score += 18;
|
|
18271
|
+
}
|
|
18272
|
+
}
|
|
18273
|
+
if (role === "coder") {
|
|
18274
|
+
if (/sonnet|gpt-5|gpt-4\.1|deepseek|qwen|llama/i.test(normalized)) {
|
|
18275
|
+
score += 16;
|
|
18276
|
+
}
|
|
18277
|
+
}
|
|
18278
|
+
return score;
|
|
18279
|
+
}
|
|
18280
|
+
function pickPreferredProviderModel(provider, role) {
|
|
18281
|
+
const candidates = providerCandidateModels(provider);
|
|
18282
|
+
if (candidates.length === 0) {
|
|
18283
|
+
return null;
|
|
18284
|
+
}
|
|
18285
|
+
return [
|
|
18286
|
+
...candidates
|
|
18287
|
+
].sort((left, right) => scoreModelForRole(right, role) - scoreModelForRole(left, role))[0] ?? null;
|
|
18288
|
+
}
|
|
18153
18289
|
function createBrainCatalog(providerId, defaultModel, fastModel) {
|
|
18154
18290
|
return {
|
|
18155
18291
|
planner: {
|
|
@@ -18229,12 +18365,13 @@ function normalizeConfigInput(raw) {
|
|
|
18229
18365
|
const legacy = LegacyKimbhoConfigSchema.safeParse(raw);
|
|
18230
18366
|
if (legacy.success) {
|
|
18231
18367
|
const provider = mapLegacyProviderToDefinition(legacy.data.provider);
|
|
18232
|
-
const defaultModel = provider.defaultModel ?? "gpt-5";
|
|
18368
|
+
const defaultModel = pickPreferredProviderModel(provider, "planner") ?? provider.defaultModel ?? "gpt-5";
|
|
18369
|
+
const fastModel = pickPreferredProviderModel(provider, "fast") ?? defaultModel;
|
|
18233
18370
|
return {
|
|
18234
18371
|
providers: [
|
|
18235
18372
|
provider
|
|
18236
18373
|
],
|
|
18237
|
-
brains: createBrainCatalog(provider.id, defaultModel,
|
|
18374
|
+
brains: createBrainCatalog(provider.id, defaultModel, fastModel),
|
|
18238
18375
|
approvalMode: legacy.data.approvalMode,
|
|
18239
18376
|
sandboxMode: legacy.data.sandboxMode,
|
|
18240
18377
|
stackPresets: legacy.data.stackPresets,
|
|
@@ -18285,8 +18422,8 @@ function createDefaultConfig(options = {}) {
|
|
|
18285
18422
|
baseUrl: "https://api.openai.com/v1",
|
|
18286
18423
|
defaultModel: "gpt-5"
|
|
18287
18424
|
});
|
|
18288
|
-
const defaultModel = options.defaultModel ?? provider.defaultModel;
|
|
18289
|
-
const fastModel = options.fastModel ?? defaultModel;
|
|
18425
|
+
const defaultModel = options.defaultModel ?? pickPreferredProviderModel(provider, "planner") ?? provider.defaultModel;
|
|
18426
|
+
const fastModel = options.fastModel ?? pickPreferredProviderModel(provider, "fast") ?? defaultModel;
|
|
18290
18427
|
return KimbhoConfigSchema.parse({
|
|
18291
18428
|
providers: [
|
|
18292
18429
|
provider
|
|
@@ -18765,7 +18902,10 @@ function resolveBrainSettings(config2, role) {
|
|
|
18765
18902
|
function resolveBrainModel(config2, role) {
|
|
18766
18903
|
const settings = resolveBrainSettings(config2, role);
|
|
18767
18904
|
const provider = findProviderById(config2, settings.providerId);
|
|
18768
|
-
|
|
18905
|
+
if (settings.model) {
|
|
18906
|
+
return settings.model;
|
|
18907
|
+
}
|
|
18908
|
+
return pickPreferredProviderModel(provider, role) ?? provider?.defaultModel ?? null;
|
|
18769
18909
|
}
|
|
18770
18910
|
|
|
18771
18911
|
// ../core/dist/session/store.js
|
|
@@ -32988,6 +33128,9 @@ function combinePositiveLimit(...values) {
|
|
|
32988
33128
|
}
|
|
32989
33129
|
return Math.min(...filtered);
|
|
32990
33130
|
}
|
|
33131
|
+
function uniqueStrings2(values) {
|
|
33132
|
+
return Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
|
|
33133
|
+
}
|
|
32991
33134
|
function truncateForModel(value) {
|
|
32992
33135
|
if (!value) {
|
|
32993
33136
|
return value;
|
|
@@ -33077,6 +33220,18 @@ function isReadOnlyShellCommand2(command) {
|
|
|
33077
33220
|
"git diff"
|
|
33078
33221
|
].some((prefix) => normalized === prefix || normalized.startsWith(prefix));
|
|
33079
33222
|
}
|
|
33223
|
+
function isShellFileInspectionCommand(command) {
|
|
33224
|
+
const normalized = command.trim().toLowerCase();
|
|
33225
|
+
return [
|
|
33226
|
+
"cat ",
|
|
33227
|
+
"head ",
|
|
33228
|
+
"tail ",
|
|
33229
|
+
"wc ",
|
|
33230
|
+
"sed ",
|
|
33231
|
+
"more ",
|
|
33232
|
+
"less "
|
|
33233
|
+
].some((prefix) => normalized === prefix || normalized.startsWith(prefix));
|
|
33234
|
+
}
|
|
33080
33235
|
function isVerificationCommand(command) {
|
|
33081
33236
|
const normalized = command.trim().toLowerCase();
|
|
33082
33237
|
return [
|
|
@@ -33121,6 +33276,310 @@ function isVerificationAction(action) {
|
|
|
33121
33276
|
const command = typeof action.input.command === "string" ? action.input.command : "";
|
|
33122
33277
|
return command.length > 0 && isVerificationCommand(command);
|
|
33123
33278
|
}
|
|
33279
|
+
function isRuntimeValidationAction(action) {
|
|
33280
|
+
if (action.type !== "tool") {
|
|
33281
|
+
return false;
|
|
33282
|
+
}
|
|
33283
|
+
if (isVerificationAction(action)) {
|
|
33284
|
+
return true;
|
|
33285
|
+
}
|
|
33286
|
+
return [
|
|
33287
|
+
"process.start",
|
|
33288
|
+
"process.logs",
|
|
33289
|
+
"process.stop",
|
|
33290
|
+
"browser.open",
|
|
33291
|
+
"browser.inspect",
|
|
33292
|
+
"browser.click",
|
|
33293
|
+
"browser.fill",
|
|
33294
|
+
"browser.close",
|
|
33295
|
+
"http.fetch"
|
|
33296
|
+
].includes(action.tool);
|
|
33297
|
+
}
|
|
33298
|
+
function isShellFileInspectionAction(action) {
|
|
33299
|
+
if (action.type !== "tool" || action.tool !== "shell.exec") {
|
|
33300
|
+
return false;
|
|
33301
|
+
}
|
|
33302
|
+
const command = typeof action.input.command === "string" ? action.input.command : "";
|
|
33303
|
+
return isShellFileInspectionCommand(command);
|
|
33304
|
+
}
|
|
33305
|
+
function isRepoInspectionAction(action) {
|
|
33306
|
+
if (action.type !== "tool") {
|
|
33307
|
+
return false;
|
|
33308
|
+
}
|
|
33309
|
+
return [
|
|
33310
|
+
"file.read",
|
|
33311
|
+
"file.search",
|
|
33312
|
+
"file.list",
|
|
33313
|
+
"repo.index",
|
|
33314
|
+
"repo.query",
|
|
33315
|
+
"git.diff"
|
|
33316
|
+
].includes(action.tool);
|
|
33317
|
+
}
|
|
33318
|
+
function normalizeWorkspacePath(cwd, value) {
|
|
33319
|
+
const normalized = value.replace(/\\/g, "/").trim();
|
|
33320
|
+
if (normalized.length === 0) {
|
|
33321
|
+
return normalized;
|
|
33322
|
+
}
|
|
33323
|
+
if (!import_node_path14.default.isAbsolute(normalized)) {
|
|
33324
|
+
return normalized.replace(/^\.\//, "");
|
|
33325
|
+
}
|
|
33326
|
+
const relative = import_node_path14.default.relative(cwd, normalized).replace(/\\/g, "/");
|
|
33327
|
+
return relative.length > 0 && !relative.startsWith("..") ? relative : normalized;
|
|
33328
|
+
}
|
|
33329
|
+
async function detectVerificationCommands(cwd) {
|
|
33330
|
+
const commands = [];
|
|
33331
|
+
const packagePath = import_node_path14.default.join(cwd, "package.json");
|
|
33332
|
+
try {
|
|
33333
|
+
await (0, import_promises14.access)(packagePath);
|
|
33334
|
+
const raw = await (0, import_promises14.readFile)(packagePath, "utf8");
|
|
33335
|
+
const parsed = JSON.parse(raw);
|
|
33336
|
+
const scripts = parsed.scripts ?? {};
|
|
33337
|
+
const packageManager = parsed.packageManager?.startsWith("pnpm") ? "pnpm" : parsed.packageManager?.startsWith("yarn") ? "yarn" : parsed.packageManager?.startsWith("bun") ? "bun" : "npm";
|
|
33338
|
+
const renderRun = (script) => {
|
|
33339
|
+
if (packageManager === "yarn") {
|
|
33340
|
+
return `yarn ${script}`;
|
|
33341
|
+
}
|
|
33342
|
+
return `${packageManager} run ${script}`;
|
|
33343
|
+
};
|
|
33344
|
+
if (scripts.typecheck) {
|
|
33345
|
+
commands.push(renderRun("typecheck"));
|
|
33346
|
+
}
|
|
33347
|
+
if (scripts.build) {
|
|
33348
|
+
commands.push(renderRun("build"));
|
|
33349
|
+
}
|
|
33350
|
+
if (scripts.test) {
|
|
33351
|
+
commands.push(renderRun("test"));
|
|
33352
|
+
}
|
|
33353
|
+
if (scripts.lint) {
|
|
33354
|
+
commands.push(renderRun("lint"));
|
|
33355
|
+
}
|
|
33356
|
+
} catch {
|
|
33357
|
+
}
|
|
33358
|
+
if (commands.length === 0) {
|
|
33359
|
+
try {
|
|
33360
|
+
await (0, import_promises14.access)(import_node_path14.default.join(cwd, "tsconfig.json"));
|
|
33361
|
+
commands.push("npx tsc --noEmit");
|
|
33362
|
+
} catch {
|
|
33363
|
+
}
|
|
33364
|
+
}
|
|
33365
|
+
const unique = uniqueStrings2(commands);
|
|
33366
|
+
const preferred = [
|
|
33367
|
+
...unique.filter((command) => /typecheck|build|test/i.test(command)),
|
|
33368
|
+
...unique.filter((command) => /lint/i.test(command) && !/typecheck|build|test/i.test(command))
|
|
33369
|
+
];
|
|
33370
|
+
return {
|
|
33371
|
+
availableCommands: unique,
|
|
33372
|
+
preferredCommands: uniqueStrings2(preferred)
|
|
33373
|
+
};
|
|
33374
|
+
}
|
|
33375
|
+
function createInitialWorldModel(task, request, verifier) {
|
|
33376
|
+
const phase = task.type === "verification" ? "verify" : task.type === "integration" || task.type === "documentation" ? "finalize" : request.workspaceState === "existing" ? "survey" : "implement";
|
|
33377
|
+
return {
|
|
33378
|
+
phase,
|
|
33379
|
+
targetFiles: uniqueStrings2(task.filesLikelyTouched.map((filePath) => normalizeWorkspacePath(request.cwd, filePath))),
|
|
33380
|
+
inspectedFiles: [],
|
|
33381
|
+
changedFiles: [],
|
|
33382
|
+
hypotheses: uniqueStrings2([
|
|
33383
|
+
`Satisfy task acceptance criteria for ${task.id}.`,
|
|
33384
|
+
task.description
|
|
33385
|
+
]),
|
|
33386
|
+
blockers: [],
|
|
33387
|
+
proofPending: uniqueStrings2(task.acceptanceCriteria),
|
|
33388
|
+
proofSatisfied: [],
|
|
33389
|
+
recentActions: [],
|
|
33390
|
+
recentCommands: [],
|
|
33391
|
+
nextFocus: phase === "verify" ? "Run the preferred verification path and capture proof." : "Inspect the likely source files and determine the minimal safe change.",
|
|
33392
|
+
sourceEditCount: 0,
|
|
33393
|
+
validationLoopCount: 0,
|
|
33394
|
+
verifier: {
|
|
33395
|
+
availableCommands: verifier.availableCommands,
|
|
33396
|
+
preferredCommands: verifier.preferredCommands,
|
|
33397
|
+
attemptedCommands: [],
|
|
33398
|
+
disabledCommands: [],
|
|
33399
|
+
successfulCommands: [],
|
|
33400
|
+
requiresInteractiveSetup: false
|
|
33401
|
+
},
|
|
33402
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33403
|
+
};
|
|
33404
|
+
}
|
|
33405
|
+
function summarizeWorldModel(worldModel) {
|
|
33406
|
+
return [
|
|
33407
|
+
`phase: ${worldModel.phase}`,
|
|
33408
|
+
`next focus: ${worldModel.nextFocus ?? "(unset)"}`,
|
|
33409
|
+
`target files: ${worldModel.targetFiles.join(", ") || "(none)"}`,
|
|
33410
|
+
`inspected files: ${worldModel.inspectedFiles.join(", ") || "(none)"}`,
|
|
33411
|
+
`changed files: ${worldModel.changedFiles.join(", ") || "(none)"}`,
|
|
33412
|
+
`hypotheses: ${worldModel.hypotheses.join(" | ") || "(none)"}`,
|
|
33413
|
+
`blockers: ${worldModel.blockers.join(" | ") || "(none)"}`,
|
|
33414
|
+
`proof pending: ${worldModel.proofPending.join(" | ") || "(none)"}`,
|
|
33415
|
+
`proof satisfied: ${worldModel.proofSatisfied.join(" | ") || "(none)"}`,
|
|
33416
|
+
`verifier preferred commands: ${worldModel.verifier.preferredCommands.join(", ") || "(none)"}`,
|
|
33417
|
+
`verifier disabled commands: ${worldModel.verifier.disabledCommands.join(", ") || "(none)"}`,
|
|
33418
|
+
`latest verifier failure: ${worldModel.verifier.latestFailureSummary ?? "(none)"}`,
|
|
33419
|
+
`recent actions: ${worldModel.recentActions.join(" | ") || "(none)"}`
|
|
33420
|
+
].join("\n");
|
|
33421
|
+
}
|
|
33422
|
+
function recordWorldModelAction(worldModel, label, command) {
|
|
33423
|
+
return {
|
|
33424
|
+
...worldModel,
|
|
33425
|
+
recentActions: uniqueStrings2([
|
|
33426
|
+
...worldModel.recentActions.slice(-5),
|
|
33427
|
+
label
|
|
33428
|
+
]).slice(-6),
|
|
33429
|
+
recentCommands: command ? uniqueStrings2([
|
|
33430
|
+
...worldModel.recentCommands.slice(-5),
|
|
33431
|
+
command
|
|
33432
|
+
]).slice(-6) : worldModel.recentCommands,
|
|
33433
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33434
|
+
};
|
|
33435
|
+
}
|
|
33436
|
+
function scoreVerificationCommand(command, worldModel) {
|
|
33437
|
+
const normalized = command.trim().toLowerCase();
|
|
33438
|
+
let score = 0;
|
|
33439
|
+
if (worldModel.phase === "repair" && worldModel.verifier.latestFailureCommand && normalized === worldModel.verifier.latestFailureCommand.trim().toLowerCase()) {
|
|
33440
|
+
score += 80;
|
|
33441
|
+
}
|
|
33442
|
+
if (/typecheck|tsc --noemit|tsc\b/i.test(normalized)) {
|
|
33443
|
+
score += worldModel.proofPending.some((item) => /typescript|compile|type/i.test(item)) ? 70 : 32;
|
|
33444
|
+
}
|
|
33445
|
+
if (/build/i.test(normalized)) {
|
|
33446
|
+
score += worldModel.proofPending.some((item) => /build|compile|render|page|layout|responsive/i.test(item)) ? 68 : 34;
|
|
33447
|
+
}
|
|
33448
|
+
if (/test|vitest|jest/i.test(normalized)) {
|
|
33449
|
+
score += worldModel.proofPending.some((item) => /test|behavior|logic|regression/i.test(item)) ? 74 : 38;
|
|
33450
|
+
}
|
|
33451
|
+
if (/lint|eslint/i.test(normalized)) {
|
|
33452
|
+
score += worldModel.proofPending.some((item) => /lint|style|quality/i.test(item)) ? 54 : 18;
|
|
33453
|
+
}
|
|
33454
|
+
if (/dev|serve|start/i.test(normalized)) {
|
|
33455
|
+
score -= 18;
|
|
33456
|
+
}
|
|
33457
|
+
if (worldModel.verifier.successfulCommands.includes(command)) {
|
|
33458
|
+
score -= 12;
|
|
33459
|
+
}
|
|
33460
|
+
if (worldModel.verifier.disabledCommands.includes(command)) {
|
|
33461
|
+
score -= 1e3;
|
|
33462
|
+
}
|
|
33463
|
+
return score;
|
|
33464
|
+
}
|
|
33465
|
+
function determineNextVerificationCommand(worldModel) {
|
|
33466
|
+
const candidates = uniqueStrings2([
|
|
33467
|
+
...worldModel.phase === "repair" && worldModel.verifier.latestFailureCommand ? [
|
|
33468
|
+
worldModel.verifier.latestFailureCommand
|
|
33469
|
+
] : [],
|
|
33470
|
+
...worldModel.verifier.preferredCommands,
|
|
33471
|
+
...worldModel.verifier.availableCommands
|
|
33472
|
+
]).filter((command) => !worldModel.verifier.disabledCommands.includes(command));
|
|
33473
|
+
if (candidates.length === 0) {
|
|
33474
|
+
return null;
|
|
33475
|
+
}
|
|
33476
|
+
return [
|
|
33477
|
+
...candidates
|
|
33478
|
+
].sort((left, right) => scoreVerificationCommand(right, worldModel) - scoreVerificationCommand(left, worldModel))[0] ?? null;
|
|
33479
|
+
}
|
|
33480
|
+
function updateProofStateForVerification(worldModel, command, success2) {
|
|
33481
|
+
if (!command || !success2) {
|
|
33482
|
+
return {
|
|
33483
|
+
proofPending: worldModel.proofPending,
|
|
33484
|
+
proofSatisfied: worldModel.proofSatisfied
|
|
33485
|
+
};
|
|
33486
|
+
}
|
|
33487
|
+
const normalized = command.toLowerCase();
|
|
33488
|
+
const satisfiedLabels = [];
|
|
33489
|
+
let proofPending = [
|
|
33490
|
+
...worldModel.proofPending
|
|
33491
|
+
];
|
|
33492
|
+
const satisfyMatching = (pattern, fallback) => {
|
|
33493
|
+
const matching = proofPending.filter((item) => pattern.test(item));
|
|
33494
|
+
if (matching.length > 0) {
|
|
33495
|
+
satisfiedLabels.push(...matching);
|
|
33496
|
+
proofPending = proofPending.filter((item) => !pattern.test(item));
|
|
33497
|
+
return;
|
|
33498
|
+
}
|
|
33499
|
+
satisfiedLabels.push(fallback);
|
|
33500
|
+
};
|
|
33501
|
+
if (/typecheck|tsc --noemit|tsc\b/.test(normalized)) {
|
|
33502
|
+
satisfyMatching(/typescript|compile|type/i, `Type safety verified via ${command}`);
|
|
33503
|
+
}
|
|
33504
|
+
if (/build/.test(normalized)) {
|
|
33505
|
+
satisfyMatching(/build|compile|render|page|layout|responsive/i, `Build/render verification passed via ${command}`);
|
|
33506
|
+
}
|
|
33507
|
+
if (/test|vitest|jest/.test(normalized)) {
|
|
33508
|
+
satisfyMatching(/test|behavior|logic|regression/i, `Behavior verified via ${command}`);
|
|
33509
|
+
}
|
|
33510
|
+
if (/lint|eslint/.test(normalized)) {
|
|
33511
|
+
satisfyMatching(/lint|style|quality/i, `Code quality verified via ${command}`);
|
|
33512
|
+
}
|
|
33513
|
+
if (satisfiedLabels.length === 0) {
|
|
33514
|
+
satisfiedLabels.push(`Verified via ${command}`);
|
|
33515
|
+
}
|
|
33516
|
+
return {
|
|
33517
|
+
proofPending: uniqueStrings2(proofPending),
|
|
33518
|
+
proofSatisfied: uniqueStrings2([
|
|
33519
|
+
...worldModel.proofSatisfied,
|
|
33520
|
+
...satisfiedLabels
|
|
33521
|
+
])
|
|
33522
|
+
};
|
|
33523
|
+
}
|
|
33524
|
+
function derivePhaseGuidance(worldModel) {
|
|
33525
|
+
switch (worldModel.phase) {
|
|
33526
|
+
case "survey":
|
|
33527
|
+
return [
|
|
33528
|
+
"Survey likely source files and constraints before making a change.",
|
|
33529
|
+
"Do not spend more than one baseline validation action before a real source edit lands."
|
|
33530
|
+
];
|
|
33531
|
+
case "plan-edit":
|
|
33532
|
+
case "implement":
|
|
33533
|
+
return [
|
|
33534
|
+
"Make the concrete source change now.",
|
|
33535
|
+
"Use file.patch or file.write against the likely target files before further runtime validation."
|
|
33536
|
+
];
|
|
33537
|
+
case "verify":
|
|
33538
|
+
return [
|
|
33539
|
+
`Use the strategic verifier next: ${determineNextVerificationCommand(worldModel) ?? "(choose the best non-interactive verifier)"}.`,
|
|
33540
|
+
"Capture proof for the changed behavior before finishing."
|
|
33541
|
+
];
|
|
33542
|
+
case "repair":
|
|
33543
|
+
return [
|
|
33544
|
+
`Repair the latest failing verifier before rerunning it: ${worldModel.verifier.latestFailureSummary ?? "(missing failure summary)"}.`,
|
|
33545
|
+
"Inspect failure output, edit the relevant source, then rerun the strategic verifier."
|
|
33546
|
+
];
|
|
33547
|
+
case "finalize":
|
|
33548
|
+
return [
|
|
33549
|
+
"Only finish once the key proof is captured and no blocker remains.",
|
|
33550
|
+
"Use git.diff or one final verifier if you still need confirmation."
|
|
33551
|
+
];
|
|
33552
|
+
case "escalate":
|
|
33553
|
+
return [
|
|
33554
|
+
"Summarize blockers precisely so the supervisor can reassign or replan."
|
|
33555
|
+
];
|
|
33556
|
+
}
|
|
33557
|
+
}
|
|
33558
|
+
function deriveSupervisorHints(status, task, worldModel) {
|
|
33559
|
+
const delegatedTask = Boolean(task.parentTaskId);
|
|
33560
|
+
if (status === "completed" && delegatedTask) {
|
|
33561
|
+
return {
|
|
33562
|
+
shouldMergeDelegatedWork: true,
|
|
33563
|
+
shouldReprioritize: true,
|
|
33564
|
+
reason: `Delegated task ${task.id} completed; parent flow may be ready to merge or reprioritize.`
|
|
33565
|
+
};
|
|
33566
|
+
}
|
|
33567
|
+
if (status === "blocked" || status === "handoff" || worldModel.phase === "escalate") {
|
|
33568
|
+
return {
|
|
33569
|
+
shouldReplan: true,
|
|
33570
|
+
shouldReprioritize: delegatedTask,
|
|
33571
|
+
reason: worldModel.blockers[0] ?? `Task ${task.id} needs supervisor intervention.`
|
|
33572
|
+
};
|
|
33573
|
+
}
|
|
33574
|
+
if (delegatedTask && worldModel.phase === "repair") {
|
|
33575
|
+
return {
|
|
33576
|
+
shouldReplan: true,
|
|
33577
|
+
shouldReprioritize: true,
|
|
33578
|
+
reason: `Delegated task ${task.id} is in repair mode and may need reassignment or dependency changes.`
|
|
33579
|
+
};
|
|
33580
|
+
}
|
|
33581
|
+
return void 0;
|
|
33582
|
+
}
|
|
33124
33583
|
function extractShellCommand(action) {
|
|
33125
33584
|
if (action.type !== "tool" || action.tool !== "shell.exec") {
|
|
33126
33585
|
return "";
|
|
@@ -33139,7 +33598,7 @@ function isInteractiveVerificationSetupFailure(action, result) {
|
|
|
33139
33598
|
].join("\n").toLowerCase();
|
|
33140
33599
|
return combined.includes("interactive eslint setup required") || combined.includes("command requires interactive input before it can continue") || command.includes("lint") && combined.includes("how would you like to configure eslint");
|
|
33141
33600
|
}
|
|
33142
|
-
function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstructions) {
|
|
33601
|
+
function buildSystemPrompt(agent, task, request, allowedTools, worldModel, plan, extraInstructions) {
|
|
33143
33602
|
const toolShape = allowedTools.join("|");
|
|
33144
33603
|
const dependencyTasks = plan ? flattenPlanTasks(plan).filter((candidate) => task.dependsOn.includes(candidate.id)) : [];
|
|
33145
33604
|
const completedTasks = plan ? flattenPlanTasks(plan).filter((candidate) => candidate.status === "completed" && candidate.id !== task.id) : [];
|
|
@@ -33153,6 +33612,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
|
|
|
33153
33612
|
`Goal: ${request.goal}`,
|
|
33154
33613
|
`Current task: ${task.id} - ${task.title}`,
|
|
33155
33614
|
`Task description: ${task.description}`,
|
|
33615
|
+
`Execution phase: ${worldModel.phase}`,
|
|
33156
33616
|
`Acceptance criteria:`,
|
|
33157
33617
|
...task.acceptanceCriteria.map((item) => `- ${item}`),
|
|
33158
33618
|
`Likely files: ${task.filesLikelyTouched.join(", ") || "(not specified)"}`,
|
|
@@ -33162,6 +33622,8 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
|
|
|
33162
33622
|
`Completed tasks in this run: ${completedTasks.length > 0 ? completedTasks.slice(-4).map((candidate) => `${candidate.id}:${candidate.title}`).join(", ") : "(none yet)"}`,
|
|
33163
33623
|
`Downstream tasks depending on this task: ${downstreamTasks.length > 0 ? downstreamTasks.slice(0, 4).map((candidate) => `${candidate.id}:${candidate.title}`).join(", ") : "(none)"}`,
|
|
33164
33624
|
`Allowed tools: ${allowedTools.join(", ")}`,
|
|
33625
|
+
`Task world-model:`,
|
|
33626
|
+
summarizeWorldModel(worldModel),
|
|
33165
33627
|
`Respond with exactly one JSON object and no markdown.`,
|
|
33166
33628
|
`Tool action shape: {"type":"tool","tool":"${toolShape}","input":{...},"reason":"why this step matters"}`,
|
|
33167
33629
|
`Finish shape: {"type":"finish","summary":"what was completed and verified"}`,
|
|
@@ -33170,6 +33632,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
|
|
|
33170
33632
|
`- Use one action per response.`,
|
|
33171
33633
|
`- Use file.list, file.search, repo.index, and repo.query to explore the workspace before editing.`,
|
|
33172
33634
|
`- Prefer file.read before editing existing files.`,
|
|
33635
|
+
`- For repo file inspection, prefer file.read, file.search, file.list, repo.query, and git.diff instead of shell.exec cat/head/tail/wc/sed.`,
|
|
33173
33636
|
`- Use scaffold.generate when the task is clearly greenfield and a known preset fits better than improvising every file by hand.`,
|
|
33174
33637
|
`- Use file.patch for existing files when possible; use file.write for new files or full replacements.`,
|
|
33175
33638
|
`- Use git.diff to inspect the current patch after changes when helpful.`,
|
|
@@ -33184,6 +33647,7 @@ function buildSystemPrompt(agent, task, request, allowedTools, plan, extraInstru
|
|
|
33184
33647
|
`- If a verification command asks for interactive setup or operator input, do not rerun it unchanged. Choose a different non-interactive verifier, or configure that verifier only if the task explicitly requires it.`,
|
|
33185
33648
|
`- Do not claim success unless the task acceptance criteria are satisfied.`,
|
|
33186
33649
|
`- If the task is underspecified, make a pragmatic implementation choice and continue.`,
|
|
33650
|
+
...derivePhaseGuidance(worldModel).map((rule) => `- ${rule}`),
|
|
33187
33651
|
...task.subagentInstructions ? [
|
|
33188
33652
|
`Delegation instructions:`,
|
|
33189
33653
|
task.subagentInstructions
|
|
@@ -33473,6 +33937,8 @@ var AutonomousTaskExecutor = class {
|
|
|
33473
33937
|
const brain = await this.resolver.resolve(effectiveBrainRole);
|
|
33474
33938
|
const allowedTools = resolvedExecutionPolicy.allowedTools;
|
|
33475
33939
|
const actionSchema = createAgentActionSchema(allowedTools);
|
|
33940
|
+
const verificationCommands = await detectVerificationCommands(request.cwd);
|
|
33941
|
+
let worldModel = task.executionState ?? createInitialWorldModel(task, request, verificationCommands);
|
|
33476
33942
|
const messages = [
|
|
33477
33943
|
{
|
|
33478
33944
|
role: "user",
|
|
@@ -33499,11 +33965,13 @@ var AutonomousTaskExecutor = class {
|
|
|
33499
33965
|
}
|
|
33500
33966
|
};
|
|
33501
33967
|
let changedWorkspace = false;
|
|
33968
|
+
let appliedSourceEdit = false;
|
|
33502
33969
|
let verifiedAfterLatestChange = false;
|
|
33503
33970
|
let repairRequiredBeforeVerification = false;
|
|
33504
33971
|
let repairAppliedSinceFailure = false;
|
|
33505
33972
|
let verificationFailures = 0;
|
|
33506
33973
|
let lastVerificationFailure = null;
|
|
33974
|
+
let preEditValidationActions = 0;
|
|
33507
33975
|
let preservedMessageCount = messages.length;
|
|
33508
33976
|
let compactedTranscriptEntries = 0;
|
|
33509
33977
|
let compactionSummary = null;
|
|
@@ -33514,6 +33982,26 @@ var AutonomousTaskExecutor = class {
|
|
|
33514
33982
|
inputTokens: 0,
|
|
33515
33983
|
outputTokens: 0
|
|
33516
33984
|
};
|
|
33985
|
+
const snapshotWorldModel = () => ({
|
|
33986
|
+
...worldModel,
|
|
33987
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33988
|
+
});
|
|
33989
|
+
const makeOutcome = (status, summary, extra = {}) => {
|
|
33990
|
+
const currentWorldModel = snapshotWorldModel();
|
|
33991
|
+
const supervisorHints = deriveSupervisorHints(status, task, currentWorldModel);
|
|
33992
|
+
return {
|
|
33993
|
+
status,
|
|
33994
|
+
summary,
|
|
33995
|
+
toolResults,
|
|
33996
|
+
artifacts: Array.from(artifacts),
|
|
33997
|
+
usage: usageTotals,
|
|
33998
|
+
worldModel: currentWorldModel,
|
|
33999
|
+
...supervisorHints ? {
|
|
34000
|
+
supervisorHints
|
|
34001
|
+
} : {},
|
|
34002
|
+
...extra
|
|
34003
|
+
};
|
|
34004
|
+
};
|
|
33517
34005
|
const createToolExecutionContext = (step, approvalReason, operatorApproved = false) => ({
|
|
33518
34006
|
cwd: request.cwd,
|
|
33519
34007
|
...options.signal ? {
|
|
@@ -33587,13 +34075,17 @@ var AutonomousTaskExecutor = class {
|
|
|
33587
34075
|
});
|
|
33588
34076
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
|
|
33589
34077
|
artifacts.add(transcriptPath2);
|
|
33590
|
-
|
|
33591
|
-
|
|
33592
|
-
|
|
33593
|
-
|
|
33594
|
-
|
|
33595
|
-
|
|
34078
|
+
worldModel = {
|
|
34079
|
+
...worldModel,
|
|
34080
|
+
phase: "escalate",
|
|
34081
|
+
blockers: uniqueStrings2([
|
|
34082
|
+
...worldModel.blockers,
|
|
34083
|
+
summary
|
|
34084
|
+
]),
|
|
34085
|
+
nextFocus: "Supervisor should replan or reassign this task because a budget was exhausted.",
|
|
34086
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33596
34087
|
};
|
|
34088
|
+
return makeOutcome("blocked", summary);
|
|
33597
34089
|
};
|
|
33598
34090
|
await emitProgress({
|
|
33599
34091
|
type: "task-note",
|
|
@@ -33622,24 +34114,32 @@ var AutonomousTaskExecutor = class {
|
|
|
33622
34114
|
});
|
|
33623
34115
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
|
|
33624
34116
|
artifacts.add(transcriptPath2);
|
|
33625
|
-
|
|
33626
|
-
|
|
33627
|
-
|
|
33628
|
-
|
|
33629
|
-
|
|
33630
|
-
|
|
34117
|
+
worldModel = {
|
|
34118
|
+
...worldModel,
|
|
34119
|
+
phase: "escalate",
|
|
34120
|
+
blockers: uniqueStrings2([
|
|
34121
|
+
...worldModel.blockers,
|
|
34122
|
+
summary
|
|
34123
|
+
]),
|
|
34124
|
+
nextFocus: "Supervisor should inspect why this task has no executable tools.",
|
|
34125
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33631
34126
|
};
|
|
34127
|
+
return makeOutcome("blocked", summary);
|
|
33632
34128
|
}
|
|
33633
34129
|
if (resolvedApproval?.decision === "deny") {
|
|
33634
34130
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
|
|
33635
34131
|
artifacts.add(transcriptPath2);
|
|
33636
|
-
|
|
33637
|
-
|
|
33638
|
-
|
|
33639
|
-
|
|
33640
|
-
|
|
33641
|
-
|
|
34132
|
+
worldModel = {
|
|
34133
|
+
...worldModel,
|
|
34134
|
+
phase: "escalate",
|
|
34135
|
+
blockers: uniqueStrings2([
|
|
34136
|
+
...worldModel.blockers,
|
|
34137
|
+
`Operator denied ${resolvedApproval.approval.toolId}.`
|
|
34138
|
+
]),
|
|
34139
|
+
nextFocus: "Choose a safer path or wait for supervisor replanning after the denied action.",
|
|
34140
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33642
34141
|
};
|
|
34142
|
+
return makeOutcome("blocked", `Operator denied approval for ${resolvedApproval.approval.toolId} in ${task.id}.`);
|
|
33643
34143
|
}
|
|
33644
34144
|
if (request.workspaceState === "existing") {
|
|
33645
34145
|
const preflightResults = [];
|
|
@@ -33681,6 +34181,18 @@ var AutonomousTaskExecutor = class {
|
|
|
33681
34181
|
for (const artifact of result.artifacts) {
|
|
33682
34182
|
artifacts.add(artifact);
|
|
33683
34183
|
}
|
|
34184
|
+
if (toolId === "file.read" && result.success && typeof input.path === "string") {
|
|
34185
|
+
worldModel = {
|
|
34186
|
+
...worldModel,
|
|
34187
|
+
inspectedFiles: uniqueStrings2([
|
|
34188
|
+
...worldModel.inspectedFiles,
|
|
34189
|
+
normalizeWorkspacePath(request.cwd, input.path)
|
|
34190
|
+
]),
|
|
34191
|
+
phase: worldModel.phase === "survey" ? "plan-edit" : worldModel.phase,
|
|
34192
|
+
nextFocus: worldModel.phase === "survey" ? "Make the concrete source change in the inspected target files." : worldModel.nextFocus,
|
|
34193
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34194
|
+
};
|
|
34195
|
+
}
|
|
33684
34196
|
transcript.push({
|
|
33685
34197
|
step: 0,
|
|
33686
34198
|
response: JSON.stringify({
|
|
@@ -33780,6 +34292,14 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
33780
34292
|
].join("\n\n")
|
|
33781
34293
|
});
|
|
33782
34294
|
}
|
|
34295
|
+
if (request.workspaceState === "existing" && (task.type === "scaffold" || task.type === "implementation") && worldModel.phase === "survey") {
|
|
34296
|
+
worldModel = {
|
|
34297
|
+
...worldModel,
|
|
34298
|
+
phase: "plan-edit",
|
|
34299
|
+
nextFocus: "Edit the likely source files before using more verification or browser/runtime checks.",
|
|
34300
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34301
|
+
};
|
|
34302
|
+
}
|
|
33783
34303
|
}
|
|
33784
34304
|
preservedMessageCount = messages.length;
|
|
33785
34305
|
const runToolAction = async (parsedAction, step, operatorApproved = false) => {
|
|
@@ -33841,7 +34361,10 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
33841
34361
|
const applyToolResult = async (parsedAction, result, step, transcriptEntry) => {
|
|
33842
34362
|
const mutatingAction = isMutatingAction(parsedAction);
|
|
33843
34363
|
const verificationAction = isVerificationAction(parsedAction);
|
|
34364
|
+
const runtimeValidationAction = isRuntimeValidationAction(parsedAction);
|
|
34365
|
+
const shellCommand = extractShellCommand(parsedAction);
|
|
33844
34366
|
const interactiveVerificationSetupFailure = verificationAction && !result.success && isInteractiveVerificationSetupFailure(parsedAction, result);
|
|
34367
|
+
worldModel = recordWorldModelAction(worldModel, parsedAction.type === "tool" ? `${parsedAction.tool}${shellCommand ? ` ${shellCommand}` : ""}` : parsedAction.type === "finish" ? "finish" : "block", shellCommand || void 0);
|
|
33845
34368
|
if (mutatingAction && result.success) {
|
|
33846
34369
|
changedWorkspace = true;
|
|
33847
34370
|
verifiedAfterLatestChange = false;
|
|
@@ -33849,22 +34372,121 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
33849
34372
|
repairAppliedSinceFailure = true;
|
|
33850
34373
|
}
|
|
33851
34374
|
}
|
|
34375
|
+
if (parsedAction.tool === "file.write" || parsedAction.tool === "file.patch") {
|
|
34376
|
+
if (result.success) {
|
|
34377
|
+
appliedSourceEdit = true;
|
|
34378
|
+
preEditValidationActions = 0;
|
|
34379
|
+
worldModel = {
|
|
34380
|
+
...worldModel,
|
|
34381
|
+
phase: "verify",
|
|
34382
|
+
changedFiles: uniqueStrings2([
|
|
34383
|
+
...worldModel.changedFiles,
|
|
34384
|
+
...result.artifacts.map((artifact) => normalizeWorkspacePath(request.cwd, artifact)),
|
|
34385
|
+
...typeof parsedAction.input.path === "string" ? [
|
|
34386
|
+
normalizeWorkspacePath(request.cwd, parsedAction.input.path)
|
|
34387
|
+
] : []
|
|
34388
|
+
]),
|
|
34389
|
+
blockers: [],
|
|
34390
|
+
sourceEditCount: worldModel.sourceEditCount + 1,
|
|
34391
|
+
nextFocus: `Run the strategic verifier next: ${determineNextVerificationCommand(worldModel) ?? "choose the best non-interactive verifier"}.`,
|
|
34392
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34393
|
+
};
|
|
34394
|
+
}
|
|
34395
|
+
}
|
|
34396
|
+
if (!appliedSourceEdit && runtimeValidationAction) {
|
|
34397
|
+
preEditValidationActions += 1;
|
|
34398
|
+
worldModel = {
|
|
34399
|
+
...worldModel,
|
|
34400
|
+
validationLoopCount: worldModel.validationLoopCount + 1,
|
|
34401
|
+
nextFocus: "Make a source edit before spending more time on runtime validation.",
|
|
34402
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34403
|
+
};
|
|
34404
|
+
}
|
|
34405
|
+
if (parsedAction.tool === "file.read" && result.success && typeof parsedAction.input.path === "string") {
|
|
34406
|
+
worldModel = {
|
|
34407
|
+
...worldModel,
|
|
34408
|
+
inspectedFiles: uniqueStrings2([
|
|
34409
|
+
...worldModel.inspectedFiles,
|
|
34410
|
+
normalizeWorkspacePath(request.cwd, parsedAction.input.path)
|
|
34411
|
+
]),
|
|
34412
|
+
phase: worldModel.phase === "survey" ? "plan-edit" : worldModel.phase,
|
|
34413
|
+
nextFocus: worldModel.phase === "survey" ? "Use the inspected files to make the concrete implementation change." : worldModel.nextFocus,
|
|
34414
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34415
|
+
};
|
|
34416
|
+
}
|
|
33852
34417
|
if (verificationAction) {
|
|
33853
34418
|
verifiedAfterLatestChange = result.success;
|
|
34419
|
+
const attemptedCommand = shellCommand || (parsedAction.tool === "tests.run" ? "tests.run" : "");
|
|
34420
|
+
const proofUpdate = updateProofStateForVerification(worldModel, attemptedCommand || void 0, result.success);
|
|
34421
|
+
const verifier = {
|
|
34422
|
+
...worldModel.verifier,
|
|
34423
|
+
attemptedCommands: attemptedCommand ? uniqueStrings2([
|
|
34424
|
+
...worldModel.verifier.attemptedCommands,
|
|
34425
|
+
attemptedCommand
|
|
34426
|
+
]) : worldModel.verifier.attemptedCommands,
|
|
34427
|
+
currentCommand: attemptedCommand || worldModel.verifier.currentCommand,
|
|
34428
|
+
latestFailureSummary: result.success ? void 0 : result.summary,
|
|
34429
|
+
latestFailureCommand: result.success ? void 0 : attemptedCommand || void 0,
|
|
34430
|
+
latestSuccessfulCommand: result.success ? attemptedCommand || worldModel.verifier.latestSuccessfulCommand : worldModel.verifier.latestSuccessfulCommand,
|
|
34431
|
+
successfulCommands: result.success && attemptedCommand ? uniqueStrings2([
|
|
34432
|
+
...worldModel.verifier.successfulCommands,
|
|
34433
|
+
attemptedCommand
|
|
34434
|
+
]) : worldModel.verifier.successfulCommands,
|
|
34435
|
+
disabledCommands: interactiveVerificationSetupFailure && attemptedCommand ? uniqueStrings2([
|
|
34436
|
+
...worldModel.verifier.disabledCommands,
|
|
34437
|
+
attemptedCommand
|
|
34438
|
+
]) : worldModel.verifier.disabledCommands,
|
|
34439
|
+
requiresInteractiveSetup: interactiveVerificationSetupFailure
|
|
34440
|
+
};
|
|
34441
|
+
worldModel = {
|
|
34442
|
+
...worldModel,
|
|
34443
|
+
verifier,
|
|
34444
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34445
|
+
};
|
|
33854
34446
|
if (result.success) {
|
|
33855
34447
|
verificationFailures = 0;
|
|
33856
34448
|
repairRequiredBeforeVerification = false;
|
|
33857
34449
|
repairAppliedSinceFailure = false;
|
|
33858
34450
|
lastVerificationFailure = null;
|
|
34451
|
+
worldModel = {
|
|
34452
|
+
...worldModel,
|
|
34453
|
+
phase: "finalize",
|
|
34454
|
+
validationLoopCount: 0,
|
|
34455
|
+
proofPending: proofUpdate.proofPending,
|
|
34456
|
+
proofSatisfied: proofUpdate.proofSatisfied,
|
|
34457
|
+
nextFocus: "Review the diff and finish if the task acceptance criteria are satisfied.",
|
|
34458
|
+
blockers: [],
|
|
34459
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34460
|
+
};
|
|
33859
34461
|
} else if (interactiveVerificationSetupFailure) {
|
|
33860
34462
|
repairRequiredBeforeVerification = false;
|
|
33861
34463
|
repairAppliedSinceFailure = false;
|
|
33862
34464
|
lastVerificationFailure = result;
|
|
34465
|
+
worldModel = {
|
|
34466
|
+
...worldModel,
|
|
34467
|
+
phase: appliedSourceEdit ? "verify" : "implement",
|
|
34468
|
+
blockers: uniqueStrings2([
|
|
34469
|
+
...worldModel.blockers.filter((blocker) => !blocker.includes("interactive")),
|
|
34470
|
+
result.summary
|
|
34471
|
+
]),
|
|
34472
|
+
nextFocus: "Choose a different non-interactive verifier or configure the verifier only if the task truly requires it.",
|
|
34473
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34474
|
+
};
|
|
33863
34475
|
} else {
|
|
33864
34476
|
verificationFailures += 1;
|
|
33865
34477
|
repairRequiredBeforeVerification = true;
|
|
33866
34478
|
repairAppliedSinceFailure = false;
|
|
33867
34479
|
lastVerificationFailure = result;
|
|
34480
|
+
worldModel = {
|
|
34481
|
+
...worldModel,
|
|
34482
|
+
phase: "repair",
|
|
34483
|
+
blockers: uniqueStrings2([
|
|
34484
|
+
...worldModel.blockers,
|
|
34485
|
+
result.summary
|
|
34486
|
+
]),
|
|
34487
|
+
nextFocus: "Repair the latest failing verifier output before running verification again.",
|
|
34488
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34489
|
+
};
|
|
33868
34490
|
}
|
|
33869
34491
|
}
|
|
33870
34492
|
transcriptEntry.toolResult = result;
|
|
@@ -33925,21 +34547,21 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
33925
34547
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
|
|
33926
34548
|
artifacts.add(transcriptPath2);
|
|
33927
34549
|
if (task.agentRole !== "test-debugger") {
|
|
33928
|
-
|
|
33929
|
-
|
|
33930
|
-
|
|
33931
|
-
|
|
33932
|
-
|
|
33933
|
-
usage: usageTotals
|
|
34550
|
+
worldModel = {
|
|
34551
|
+
...worldModel,
|
|
34552
|
+
phase: "escalate",
|
|
34553
|
+
nextFocus: "Escalate to test-debugger with the latest failing verifier context.",
|
|
34554
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33934
34555
|
};
|
|
34556
|
+
return makeOutcome("handoff", `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; handing off to test-debugger.`);
|
|
33935
34557
|
}
|
|
33936
|
-
|
|
33937
|
-
|
|
33938
|
-
|
|
33939
|
-
|
|
33940
|
-
|
|
33941
|
-
usage: usageTotals
|
|
34558
|
+
worldModel = {
|
|
34559
|
+
...worldModel,
|
|
34560
|
+
phase: "escalate",
|
|
34561
|
+
nextFocus: "Debugger repair budget is exhausted; supervisor must replan or accept the blocker.",
|
|
34562
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
33942
34563
|
};
|
|
34564
|
+
return makeOutcome("blocked", `Verification failed ${verificationFailures} time${verificationFailures === 1 ? "" : "s"} for ${task.id}; repair budget exhausted.`);
|
|
33943
34565
|
}
|
|
33944
34566
|
}
|
|
33945
34567
|
messages.push({
|
|
@@ -33983,14 +34605,9 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
33983
34605
|
if (execution.approvalRequest) {
|
|
33984
34606
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
|
|
33985
34607
|
artifacts.add(transcriptPath2);
|
|
33986
|
-
return {
|
|
33987
|
-
|
|
33988
|
-
|
|
33989
|
-
toolResults,
|
|
33990
|
-
artifacts: Array.from(artifacts),
|
|
33991
|
-
approvalRequest: execution.approvalRequest,
|
|
33992
|
-
usage: usageTotals
|
|
33993
|
-
};
|
|
34608
|
+
return makeOutcome("awaiting-approval", execution.approvalRequest.reason, {
|
|
34609
|
+
approvalRequest: execution.approvalRequest
|
|
34610
|
+
});
|
|
33994
34611
|
}
|
|
33995
34612
|
if (!execution.result) {
|
|
33996
34613
|
throw new Error("Approved action did not produce a tool result.");
|
|
@@ -34007,13 +34624,12 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34007
34624
|
} catch (error2) {
|
|
34008
34625
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
|
|
34009
34626
|
artifacts.add(transcriptPath2);
|
|
34010
|
-
|
|
34011
|
-
|
|
34012
|
-
|
|
34013
|
-
|
|
34014
|
-
artifacts: Array.from(artifacts),
|
|
34015
|
-
usage: usageTotals
|
|
34627
|
+
worldModel = {
|
|
34628
|
+
...worldModel,
|
|
34629
|
+
nextFocus: "Resume from the current execution state without redoing completed inspection or verification.",
|
|
34630
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34016
34631
|
};
|
|
34632
|
+
return makeOutcome("paused", `Execution interrupted by operator during ${task.id}.`);
|
|
34017
34633
|
}
|
|
34018
34634
|
await compactExecutionContextIfNeeded(step);
|
|
34019
34635
|
if (typeof maxModelCalls === "number" && usageTotals.modelCalls >= maxModelCalls) {
|
|
@@ -34036,7 +34652,7 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34036
34652
|
try {
|
|
34037
34653
|
response = await brain.client.generateText({
|
|
34038
34654
|
model: brain.model,
|
|
34039
|
-
systemPrompt: buildSystemPrompt(effectiveAgent, task, request, allowedTools, options.plan, task.agentPromptPreamble ?? customOverlay?.promptPreamble),
|
|
34655
|
+
systemPrompt: buildSystemPrompt(effectiveAgent, task, request, allowedTools, snapshotWorldModel(), options.plan, task.agentPromptPreamble ?? customOverlay?.promptPreamble),
|
|
34040
34656
|
messages,
|
|
34041
34657
|
responseFormat: "json_object",
|
|
34042
34658
|
...typeof brain.settings.temperature === "number" ? {
|
|
@@ -34054,24 +34670,27 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34054
34670
|
});
|
|
34055
34671
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
|
|
34056
34672
|
artifacts.add(transcriptPath2);
|
|
34057
|
-
|
|
34058
|
-
|
|
34059
|
-
|
|
34060
|
-
|
|
34061
|
-
|
|
34062
|
-
|
|
34673
|
+
worldModel = {
|
|
34674
|
+
...worldModel,
|
|
34675
|
+
phase: "escalate",
|
|
34676
|
+
blockers: uniqueStrings2([
|
|
34677
|
+
...worldModel.blockers,
|
|
34678
|
+
error2 instanceof Error ? error2.message : String(error2)
|
|
34679
|
+
]),
|
|
34680
|
+
nextFocus: "Supervisor should inspect the model failure or switch to a healthier provider/model.",
|
|
34681
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34063
34682
|
};
|
|
34683
|
+
return makeOutcome("blocked", `Model request failed before ${task.id} could choose a safe action: ${error2 instanceof Error ? error2.message : String(error2)}`);
|
|
34064
34684
|
}
|
|
34065
34685
|
if (options.signal?.aborted) {
|
|
34066
34686
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript);
|
|
34067
34687
|
artifacts.add(transcriptPath2);
|
|
34068
|
-
|
|
34069
|
-
|
|
34070
|
-
|
|
34071
|
-
|
|
34072
|
-
artifacts: Array.from(artifacts),
|
|
34073
|
-
usage: usageTotals
|
|
34688
|
+
worldModel = {
|
|
34689
|
+
...worldModel,
|
|
34690
|
+
nextFocus: "Resume from the current execution state without redoing completed inspection or verification.",
|
|
34691
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34074
34692
|
};
|
|
34693
|
+
return makeOutcome("paused", `Execution interrupted by operator during ${task.id}.`);
|
|
34075
34694
|
}
|
|
34076
34695
|
responseText = response.text;
|
|
34077
34696
|
usageTotals.modelCalls += 1;
|
|
@@ -34134,13 +34753,17 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34134
34753
|
step,
|
|
34135
34754
|
message: "Model stayed out of structured mode after multiple retries."
|
|
34136
34755
|
});
|
|
34137
|
-
|
|
34138
|
-
|
|
34139
|
-
|
|
34140
|
-
|
|
34141
|
-
|
|
34142
|
-
|
|
34756
|
+
worldModel = {
|
|
34757
|
+
...worldModel,
|
|
34758
|
+
phase: "escalate",
|
|
34759
|
+
blockers: uniqueStrings2([
|
|
34760
|
+
...worldModel.blockers,
|
|
34761
|
+
parseSummary
|
|
34762
|
+
]),
|
|
34763
|
+
nextFocus: "Supervisor should inspect the unstructured model output and replan or switch models.",
|
|
34764
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34143
34765
|
};
|
|
34766
|
+
return makeOutcome("blocked", `${parseSummary} The task stopped before a safe tool action could be chosen.`);
|
|
34144
34767
|
}
|
|
34145
34768
|
await emitProgress({
|
|
34146
34769
|
type: "task-note",
|
|
@@ -34174,10 +34797,53 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34174
34797
|
response: responseText,
|
|
34175
34798
|
parsedAction
|
|
34176
34799
|
};
|
|
34800
|
+
const strategicVerifier = determineNextVerificationCommand(worldModel);
|
|
34177
34801
|
messages.push({
|
|
34178
34802
|
role: "assistant",
|
|
34179
34803
|
content: JSON.stringify(parsedAction)
|
|
34180
34804
|
});
|
|
34805
|
+
if (worldModel.phase === "survey" && parsedAction.type === "tool" && !isRepoInspectionAction(parsedAction)) {
|
|
34806
|
+
transcriptEntry.runtimeNote = "Rejected phase-inconsistent action because survey phase still requires repo inspection.";
|
|
34807
|
+
transcript.push(transcriptEntry);
|
|
34808
|
+
await emitProgress({
|
|
34809
|
+
type: "task-note",
|
|
34810
|
+
sessionId,
|
|
34811
|
+
taskId: task.id,
|
|
34812
|
+
agentRole: task.agentRole,
|
|
34813
|
+
step,
|
|
34814
|
+
message: "Survey phase requires file/repo inspection before implementation or verification."
|
|
34815
|
+
});
|
|
34816
|
+
messages.push({
|
|
34817
|
+
role: "user",
|
|
34818
|
+
content: [
|
|
34819
|
+
"The current execution phase is survey.",
|
|
34820
|
+
"Inspect likely source files and repository context first with file.read, file.search, file.list, repo.index, repo.query, or git.diff.",
|
|
34821
|
+
"Do not jump to runtime validation or edits before you understand the target files."
|
|
34822
|
+
].join("\n")
|
|
34823
|
+
});
|
|
34824
|
+
continue;
|
|
34825
|
+
}
|
|
34826
|
+
if (worldModel.phase === "verify" && parsedAction.type === "tool" && !isVerificationAction(parsedAction) && !isRepoInspectionAction(parsedAction)) {
|
|
34827
|
+
transcriptEntry.runtimeNote = "Rejected phase-inconsistent action because verify phase requires proof or diff review.";
|
|
34828
|
+
transcript.push(transcriptEntry);
|
|
34829
|
+
await emitProgress({
|
|
34830
|
+
type: "task-note",
|
|
34831
|
+
sessionId,
|
|
34832
|
+
taskId: task.id,
|
|
34833
|
+
agentRole: task.agentRole,
|
|
34834
|
+
step,
|
|
34835
|
+
message: strategicVerifier ? `Verify phase requires proof. Run ${strategicVerifier} or inspect the diff.` : "Verify phase requires proof. Use a non-interactive verifier or inspect the diff."
|
|
34836
|
+
});
|
|
34837
|
+
messages.push({
|
|
34838
|
+
role: "user",
|
|
34839
|
+
content: [
|
|
34840
|
+
"The current execution phase is verify.",
|
|
34841
|
+
strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run the best non-interactive verifier next.",
|
|
34842
|
+
"Only use diff/inspection actions here if you still need proof context before finishing."
|
|
34843
|
+
].join("\n")
|
|
34844
|
+
});
|
|
34845
|
+
continue;
|
|
34846
|
+
}
|
|
34181
34847
|
if (parsedAction.type === "finish") {
|
|
34182
34848
|
if (changedWorkspace && !verifiedAfterLatestChange) {
|
|
34183
34849
|
transcriptEntry.runtimeNote = "Finish rejected because code changed without a successful verification step.";
|
|
@@ -34194,31 +34860,77 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34194
34860
|
role: "user",
|
|
34195
34861
|
content: [
|
|
34196
34862
|
"You tried to finish after making code changes without a successful verification step.",
|
|
34197
|
-
"Run tests.run or a build/lint/test shell command, inspect failures, and only finish after verification passes."
|
|
34863
|
+
strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run tests.run or a build/lint/test shell command, inspect failures, and only finish after verification passes."
|
|
34864
|
+
].join("\n")
|
|
34865
|
+
});
|
|
34866
|
+
continue;
|
|
34867
|
+
}
|
|
34868
|
+
if (worldModel.phase !== "finalize") {
|
|
34869
|
+
transcriptEntry.runtimeNote = `Finish rejected because the task is still in ${worldModel.phase} phase.`;
|
|
34870
|
+
transcript.push(transcriptEntry);
|
|
34871
|
+
await emitProgress({
|
|
34872
|
+
type: "task-note",
|
|
34873
|
+
sessionId,
|
|
34874
|
+
taskId: task.id,
|
|
34875
|
+
agentRole: task.agentRole,
|
|
34876
|
+
step,
|
|
34877
|
+
message: `Finish rejected because the task is still in ${worldModel.phase} phase.`
|
|
34878
|
+
});
|
|
34879
|
+
messages.push({
|
|
34880
|
+
role: "user",
|
|
34881
|
+
content: [
|
|
34882
|
+
`The task is still in ${worldModel.phase} phase.`,
|
|
34883
|
+
...derivePhaseGuidance(worldModel),
|
|
34884
|
+
"Do the next strategic action instead of finishing early."
|
|
34885
|
+
].join("\n")
|
|
34886
|
+
});
|
|
34887
|
+
continue;
|
|
34888
|
+
}
|
|
34889
|
+
if (worldModel.proofPending.length > 0 && worldModel.proofSatisfied.length === 0) {
|
|
34890
|
+
transcriptEntry.runtimeNote = "Finish rejected because the task still has pending proof and no satisfied verifier evidence.";
|
|
34891
|
+
transcript.push(transcriptEntry);
|
|
34892
|
+
await emitProgress({
|
|
34893
|
+
type: "task-note",
|
|
34894
|
+
sessionId,
|
|
34895
|
+
taskId: task.id,
|
|
34896
|
+
agentRole: task.agentRole,
|
|
34897
|
+
step,
|
|
34898
|
+
message: "Finish rejected because the task still has pending proof requirements."
|
|
34899
|
+
});
|
|
34900
|
+
messages.push({
|
|
34901
|
+
role: "user",
|
|
34902
|
+
content: [
|
|
34903
|
+
"You still owe proof for this task before finishing.",
|
|
34904
|
+
`Pending proof: ${worldModel.proofPending.join(" | ")}`,
|
|
34905
|
+
strategicVerifier ? `Run the strategic verifier next: ${strategicVerifier}.` : "Run the best non-interactive verifier, then finish only after the proof is captured."
|
|
34198
34906
|
].join("\n")
|
|
34199
34907
|
});
|
|
34200
34908
|
continue;
|
|
34201
34909
|
}
|
|
34202
34910
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
|
|
34203
34911
|
artifacts.add(transcriptPath2);
|
|
34204
|
-
|
|
34205
|
-
|
|
34206
|
-
|
|
34207
|
-
|
|
34208
|
-
|
|
34209
|
-
usage: usageTotals
|
|
34912
|
+
worldModel = {
|
|
34913
|
+
...worldModel,
|
|
34914
|
+
phase: "finalize",
|
|
34915
|
+
nextFocus: "Task completed.",
|
|
34916
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34210
34917
|
};
|
|
34918
|
+
return makeOutcome("completed", parsedAction.summary);
|
|
34211
34919
|
}
|
|
34212
34920
|
if (parsedAction.type === "block") {
|
|
34213
34921
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
|
|
34214
34922
|
artifacts.add(transcriptPath2);
|
|
34215
|
-
|
|
34216
|
-
|
|
34217
|
-
|
|
34218
|
-
|
|
34219
|
-
|
|
34220
|
-
|
|
34923
|
+
worldModel = {
|
|
34924
|
+
...worldModel,
|
|
34925
|
+
phase: "escalate",
|
|
34926
|
+
blockers: uniqueStrings2([
|
|
34927
|
+
...worldModel.blockers,
|
|
34928
|
+
parsedAction.reason
|
|
34929
|
+
]),
|
|
34930
|
+
nextFocus: "Supervisor should inspect the reported blocker and adjust the task graph.",
|
|
34931
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34221
34932
|
};
|
|
34933
|
+
return makeOutcome("blocked", parsedAction.reason);
|
|
34222
34934
|
}
|
|
34223
34935
|
if (!allowedTools.includes(parsedAction.tool)) {
|
|
34224
34936
|
transcriptEntry.runtimeNote = `Rejected disallowed tool "${parsedAction.tool}" for ${agent.role}.`;
|
|
@@ -34263,6 +34975,67 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34263
34975
|
});
|
|
34264
34976
|
continue;
|
|
34265
34977
|
}
|
|
34978
|
+
if (isVerificationAction(parsedAction) && strategicVerifier && extractShellCommand(parsedAction) && extractShellCommand(parsedAction) !== strategicVerifier) {
|
|
34979
|
+
transcriptEntry.runtimeNote = "Verification command rejected because it does not match the current strategic verifier.";
|
|
34980
|
+
transcript.push(transcriptEntry);
|
|
34981
|
+
await emitProgress({
|
|
34982
|
+
type: "task-note",
|
|
34983
|
+
sessionId,
|
|
34984
|
+
taskId: task.id,
|
|
34985
|
+
agentRole: task.agentRole,
|
|
34986
|
+
step,
|
|
34987
|
+
message: `Verification should follow the strategic order. Prefer ${strategicVerifier} next.`
|
|
34988
|
+
});
|
|
34989
|
+
messages.push({
|
|
34990
|
+
role: "user",
|
|
34991
|
+
content: [
|
|
34992
|
+
"Use the strategic verification path instead of picking a random verifier.",
|
|
34993
|
+
`Preferred next verifier: ${strategicVerifier}`
|
|
34994
|
+
].join("\n")
|
|
34995
|
+
});
|
|
34996
|
+
continue;
|
|
34997
|
+
}
|
|
34998
|
+
if (allowedTools.includes("file.read") && isShellFileInspectionAction(parsedAction)) {
|
|
34999
|
+
transcriptEntry.runtimeNote = "Rejected shell-based file inspection because file.read is available.";
|
|
35000
|
+
transcript.push(transcriptEntry);
|
|
35001
|
+
await emitProgress({
|
|
35002
|
+
type: "task-note",
|
|
35003
|
+
sessionId,
|
|
35004
|
+
taskId: task.id,
|
|
35005
|
+
agentRole: task.agentRole,
|
|
35006
|
+
step,
|
|
35007
|
+
message: "Use file.read or repo/file tools instead of cat/head/wc/sed shell commands for source inspection."
|
|
35008
|
+
});
|
|
35009
|
+
messages.push({
|
|
35010
|
+
role: "user",
|
|
35011
|
+
content: [
|
|
35012
|
+
"Do not use shell.exec for simple repo file inspection when file.read is available.",
|
|
35013
|
+
"Use file.read, file.search, file.list, repo.query, or git.diff instead."
|
|
35014
|
+
].join("\n")
|
|
35015
|
+
});
|
|
35016
|
+
continue;
|
|
35017
|
+
}
|
|
35018
|
+
if (request.workspaceState === "existing" && (task.type === "scaffold" || task.type === "implementation") && !appliedSourceEdit && isRuntimeValidationAction(parsedAction) && preEditValidationActions >= 1) {
|
|
35019
|
+
transcriptEntry.runtimeNote = "Rejected repeated validation/runtime loop before any source edit.";
|
|
35020
|
+
transcript.push(transcriptEntry);
|
|
35021
|
+
await emitProgress({
|
|
35022
|
+
type: "task-note",
|
|
35023
|
+
sessionId,
|
|
35024
|
+
taskId: task.id,
|
|
35025
|
+
agentRole: task.agentRole,
|
|
35026
|
+
step,
|
|
35027
|
+
message: "Repeated build/dev/browser verification was stopped because no source edit has landed yet."
|
|
35028
|
+
});
|
|
35029
|
+
messages.push({
|
|
35030
|
+
role: "user",
|
|
35031
|
+
content: [
|
|
35032
|
+
"You already used one baseline verification/runtime check before making a source edit.",
|
|
35033
|
+
"Do not keep rerunning build, lint, dev server, browser, or HTTP checks unchanged.",
|
|
35034
|
+
"Inspect likely source files, make a concrete edit with file.patch or file.write, and then validate again."
|
|
35035
|
+
].join("\n")
|
|
35036
|
+
});
|
|
35037
|
+
continue;
|
|
35038
|
+
}
|
|
34266
35039
|
const execution = await runToolAction(parsedAction, step);
|
|
34267
35040
|
if (execution.budgetExceeded) {
|
|
34268
35041
|
transcriptEntry.runtimeNote = execution.budgetExceeded;
|
|
@@ -34273,14 +35046,9 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34273
35046
|
transcriptEntry.runtimeNote = execution.approvalRequest.reason;
|
|
34274
35047
|
const transcriptPath2 = await writeTranscriptArtifact(request.cwd, sessionId, task.id, transcript.concat(transcriptEntry));
|
|
34275
35048
|
artifacts.add(transcriptPath2);
|
|
34276
|
-
return {
|
|
34277
|
-
|
|
34278
|
-
|
|
34279
|
-
toolResults,
|
|
34280
|
-
artifacts: Array.from(artifacts),
|
|
34281
|
-
approvalRequest: execution.approvalRequest,
|
|
34282
|
-
usage: usageTotals
|
|
34283
|
-
};
|
|
35049
|
+
return makeOutcome("awaiting-approval", execution.approvalRequest.reason, {
|
|
35050
|
+
approvalRequest: execution.approvalRequest
|
|
35051
|
+
});
|
|
34284
35052
|
}
|
|
34285
35053
|
if (!execution.result) {
|
|
34286
35054
|
throw new Error(`Tool ${parsedAction.tool} did not return a result.`);
|
|
@@ -34300,21 +35068,21 @@ ${truncateForModel(customAgentMemory)}`);
|
|
|
34300
35068
|
message: task.agentRole !== "test-debugger" ? `Step budget exhausted after ${maxSteps} steps; handing task to test-debugger.` : `Step budget exhausted after ${maxSteps} steps; debugger escalation exhausted.`
|
|
34301
35069
|
});
|
|
34302
35070
|
if (task.agentRole !== "test-debugger") {
|
|
34303
|
-
|
|
34304
|
-
|
|
34305
|
-
|
|
34306
|
-
|
|
34307
|
-
|
|
34308
|
-
usage: usageTotals
|
|
35071
|
+
worldModel = {
|
|
35072
|
+
...worldModel,
|
|
35073
|
+
phase: "escalate",
|
|
35074
|
+
nextFocus: "Hand off the task to test-debugger with the current world-model and transcript.",
|
|
35075
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34309
35076
|
};
|
|
35077
|
+
return makeOutcome("handoff", `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; handing off to test-debugger.`);
|
|
34310
35078
|
}
|
|
34311
|
-
|
|
34312
|
-
|
|
34313
|
-
|
|
34314
|
-
|
|
34315
|
-
|
|
34316
|
-
usage: usageTotals
|
|
35079
|
+
worldModel = {
|
|
35080
|
+
...worldModel,
|
|
35081
|
+
phase: "escalate",
|
|
35082
|
+
nextFocus: "Debugger escalation is exhausted; supervisor must replan or accept the blocker.",
|
|
35083
|
+
lastUpdatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
34317
35084
|
};
|
|
35085
|
+
return makeOutcome("blocked", `Autonomous executor reached the step limit (${maxSteps}) for ${task.id}; debugger escalation exhausted.`);
|
|
34318
35086
|
}
|
|
34319
35087
|
};
|
|
34320
35088
|
|
|
@@ -36678,6 +37446,15 @@ function updateTaskStatus(plan, taskId, status) {
|
|
|
36678
37446
|
milestones
|
|
36679
37447
|
};
|
|
36680
37448
|
}
|
|
37449
|
+
function updateTaskExecutionState(plan, taskId, executionState) {
|
|
37450
|
+
if (!executionState) {
|
|
37451
|
+
return plan;
|
|
37452
|
+
}
|
|
37453
|
+
return replaceTask(plan, taskId, (task) => ({
|
|
37454
|
+
...task,
|
|
37455
|
+
executionState
|
|
37456
|
+
}));
|
|
37457
|
+
}
|
|
36681
37458
|
function replaceTask(plan, taskId, mapper) {
|
|
36682
37459
|
const milestones = plan.milestones.map((milestone) => ({
|
|
36683
37460
|
...milestone,
|
|
@@ -37112,6 +37889,44 @@ ${result.stderr.slice(0, 1e3)}`);
|
|
|
37112
37889
|
return lines.join("\n");
|
|
37113
37890
|
}).join("\n\n");
|
|
37114
37891
|
}
|
|
37892
|
+
function renderTaskWorldModelContext(task, outcome) {
|
|
37893
|
+
const worldModel = outcome.worldModel;
|
|
37894
|
+
if (!worldModel) {
|
|
37895
|
+
return [
|
|
37896
|
+
`task: ${task.id}`,
|
|
37897
|
+
`status: ${outcome.status}`,
|
|
37898
|
+
`summary: ${outcome.summary}`
|
|
37899
|
+
].join("\n");
|
|
37900
|
+
}
|
|
37901
|
+
return [
|
|
37902
|
+
`task: ${task.id}`,
|
|
37903
|
+
`status: ${outcome.status}`,
|
|
37904
|
+
`summary: ${outcome.summary}`,
|
|
37905
|
+
`phase: ${worldModel.phase}`,
|
|
37906
|
+
`next focus: ${worldModel.nextFocus ?? "(unset)"}`,
|
|
37907
|
+
`target files: ${worldModel.targetFiles.join(", ") || "(none)"}`,
|
|
37908
|
+
`inspected files: ${worldModel.inspectedFiles.join(", ") || "(none)"}`,
|
|
37909
|
+
`changed files: ${worldModel.changedFiles.join(", ") || "(none)"}`,
|
|
37910
|
+
`blockers: ${worldModel.blockers.join(" | ") || "(none)"}`,
|
|
37911
|
+
`proof pending: ${worldModel.proofPending.join(" | ") || "(none)"}`,
|
|
37912
|
+
`proof satisfied: ${worldModel.proofSatisfied.join(" | ") || "(none)"}`,
|
|
37913
|
+
`recent actions: ${worldModel.recentActions.join(" | ") || "(none)"}`,
|
|
37914
|
+
`preferred verifiers: ${worldModel.verifier.preferredCommands.join(", ") || "(none)"}`,
|
|
37915
|
+
`latest verifier failure: ${worldModel.verifier.latestFailureSummary ?? "(none)"}`
|
|
37916
|
+
].join("\n");
|
|
37917
|
+
}
|
|
37918
|
+
function shouldRevisePlanAfterTaskOutcome(task, outcome) {
|
|
37919
|
+
if (task.agentRole === "repo-analyst" || task.agentRole === "planner") {
|
|
37920
|
+
return false;
|
|
37921
|
+
}
|
|
37922
|
+
if (outcome.supervisorHints?.shouldReplan || outcome.supervisorHints?.shouldReprioritize || outcome.supervisorHints?.shouldMergeDelegatedWork) {
|
|
37923
|
+
return true;
|
|
37924
|
+
}
|
|
37925
|
+
if (outcome.status === "blocked" || outcome.status === "handoff") {
|
|
37926
|
+
return true;
|
|
37927
|
+
}
|
|
37928
|
+
return Boolean(task.parentTaskId && outcome.worldModel && (outcome.worldModel.phase === "repair" || outcome.worldModel.phase === "finalize" || outcome.worldModel.blockers.length > 0));
|
|
37929
|
+
}
|
|
37115
37930
|
function latestRepoAnalysisContext(events) {
|
|
37116
37931
|
const event = [
|
|
37117
37932
|
...events
|
|
@@ -37549,7 +38364,13 @@ var ExecutionOrchestrator = class {
|
|
|
37549
38364
|
let sawApprovalRequest = false;
|
|
37550
38365
|
for (const batchResult of batchResults) {
|
|
37551
38366
|
const { task: autoTask, outcome } = batchResult;
|
|
37552
|
-
workingPlan =
|
|
38367
|
+
workingPlan = updateTaskExecutionState(workingPlan, autoTask.id, outcome.worldModel);
|
|
38368
|
+
workingPlan = outcome.status === "handoff" ? replaceTask(workingPlan, autoTask.id, () => ({
|
|
38369
|
+
...outcome.handoffTask ?? createDebuggerHandoffTask(autoTask, outcome, this.toolsForAgent("test-debugger").map((tool) => tool.id)),
|
|
38370
|
+
...outcome.worldModel ? {
|
|
38371
|
+
executionState: outcome.worldModel
|
|
38372
|
+
} : {}
|
|
38373
|
+
})) : updateTaskStatus(workingPlan, autoTask.id, outcome.status === "completed" ? "completed" : outcome.status === "blocked" ? "blocked" : "pending");
|
|
37553
38374
|
if (autoTask.agentRole === "repo-analyst" && outcome.status === "completed") {
|
|
37554
38375
|
const replanned = await this.maybeRevisePlanAfterRepoAnalysis(session.id, session.request, workingPlan, outcome, emitProgress);
|
|
37555
38376
|
workingPlan = replanned.plan;
|
|
@@ -37562,6 +38383,16 @@ var ExecutionOrchestrator = class {
|
|
|
37562
38383
|
});
|
|
37563
38384
|
}
|
|
37564
38385
|
}
|
|
38386
|
+
const adapted = await this.maybeRevisePlanAfterTaskOutcome(session.id, session.request, workingPlan, autoTask, outcome, emitProgress);
|
|
38387
|
+
workingPlan = adapted.plan;
|
|
38388
|
+
if (adapted.note) {
|
|
38389
|
+
notes = maybeAppendNote(notes, adapted.note);
|
|
38390
|
+
await emitProgress({
|
|
38391
|
+
type: "task-note",
|
|
38392
|
+
sessionId: session.id,
|
|
38393
|
+
message: adapted.note
|
|
38394
|
+
});
|
|
38395
|
+
}
|
|
37565
38396
|
notes = maybeAppendNote(notes, outcome.summary);
|
|
37566
38397
|
if (outcome.status === "awaiting-approval" && outcome.approvalRequest) {
|
|
37567
38398
|
pendingApprovals.push(outcome.approvalRequest);
|
|
@@ -38141,6 +38972,12 @@ var ExecutionOrchestrator = class {
|
|
|
38141
38972
|
...outcome.usage ? {
|
|
38142
38973
|
usage: outcome.usage
|
|
38143
38974
|
} : {},
|
|
38975
|
+
...outcome.worldModel ? {
|
|
38976
|
+
worldModel: outcome.worldModel
|
|
38977
|
+
} : {},
|
|
38978
|
+
...outcome.supervisorHints ? {
|
|
38979
|
+
supervisorHints: outcome.supervisorHints
|
|
38980
|
+
} : {},
|
|
38144
38981
|
...integrated.integrationFailed && integrated.conflict && task.agentRole !== "integrator" ? {
|
|
38145
38982
|
handoffTask: createIntegratorHandoffTask(task, outcome, this.toolsForAgent("integrator").map((tool) => tool.id), integrated.conflict)
|
|
38146
38983
|
} : {}
|
|
@@ -38321,6 +39158,89 @@ var ExecutionOrchestrator = class {
|
|
|
38321
39158
|
};
|
|
38322
39159
|
}
|
|
38323
39160
|
}
|
|
39161
|
+
async maybeRevisePlanAfterTaskOutcome(sessionId, request, plan, task, outcome, emitProgress) {
|
|
39162
|
+
if (!shouldRevisePlanAfterTaskOutcome(task, outcome)) {
|
|
39163
|
+
return {
|
|
39164
|
+
plan
|
|
39165
|
+
};
|
|
39166
|
+
}
|
|
39167
|
+
const config2 = await loadConfig(request.cwd);
|
|
39168
|
+
if (!config2) {
|
|
39169
|
+
return {
|
|
39170
|
+
plan
|
|
39171
|
+
};
|
|
39172
|
+
}
|
|
39173
|
+
const revisionContext = [
|
|
39174
|
+
"Task outcome context:",
|
|
39175
|
+
renderTaskWorldModelContext(task, outcome),
|
|
39176
|
+
"",
|
|
39177
|
+
"Supervisor hints:",
|
|
39178
|
+
outcome.supervisorHints ? [
|
|
39179
|
+
`- should replan: ${outcome.supervisorHints.shouldReplan ? "yes" : "no"}`,
|
|
39180
|
+
`- should reprioritize: ${outcome.supervisorHints.shouldReprioritize ? "yes" : "no"}`,
|
|
39181
|
+
`- should merge delegated work: ${outcome.supervisorHints.shouldMergeDelegatedWork ? "yes" : "no"}`,
|
|
39182
|
+
`- reason: ${outcome.supervisorHints.reason ?? "(none)"}`
|
|
39183
|
+
].join("\n") : "- none",
|
|
39184
|
+
"",
|
|
39185
|
+
"Tool evidence:",
|
|
39186
|
+
renderPlannerReplanContext(outcome.toolResults),
|
|
39187
|
+
"",
|
|
39188
|
+
"Planner instructions:",
|
|
39189
|
+
"- Reassign, merge, reorder, or split follow-up tasks if the task outcome suggests the current graph is suboptimal.",
|
|
39190
|
+
"- Preserve completed work, but feel free to adapt pending task order, dependencies, agent roles, or milestone shape.",
|
|
39191
|
+
"- Prefer making parallel work possible when blockers or delegated-worker outcomes reveal an opportunity."
|
|
39192
|
+
].join("\n");
|
|
39193
|
+
try {
|
|
39194
|
+
const brain = await new BrainResolver(config2, createDefaultBrainProviderRegistry(request.cwd)).resolve("planner");
|
|
39195
|
+
const result = await revisePlanWithModel(request, plan, revisionContext, {
|
|
39196
|
+
modelLabel: `${brain.provider.id}/${brain.model}`,
|
|
39197
|
+
...typeof brain.settings.temperature === "number" ? {
|
|
39198
|
+
temperature: brain.settings.temperature
|
|
39199
|
+
} : {},
|
|
39200
|
+
...typeof brain.settings.maxTokens === "number" ? {
|
|
39201
|
+
maxTokens: brain.settings.maxTokens
|
|
39202
|
+
} : {},
|
|
39203
|
+
generate: async (input) => brain.client.generateText({
|
|
39204
|
+
model: brain.model,
|
|
39205
|
+
systemPrompt: [
|
|
39206
|
+
brain.settings.promptPreamble,
|
|
39207
|
+
input.systemPrompt
|
|
39208
|
+
].filter(Boolean).join("\n\n"),
|
|
39209
|
+
userPrompt: input.userPrompt,
|
|
39210
|
+
responseFormat: "json_object",
|
|
39211
|
+
...typeof input.temperature === "number" ? {
|
|
39212
|
+
temperature: input.temperature
|
|
39213
|
+
} : {},
|
|
39214
|
+
...typeof input.maxTokens === "number" ? {
|
|
39215
|
+
maxTokens: input.maxTokens
|
|
39216
|
+
} : {}
|
|
39217
|
+
})
|
|
39218
|
+
});
|
|
39219
|
+
if (result.source === "model") {
|
|
39220
|
+
return {
|
|
39221
|
+
plan: result.plan,
|
|
39222
|
+
note: `Planner brain adapted the active task graph after ${task.id} via ${brain.provider.id}/${brain.model}.`
|
|
39223
|
+
};
|
|
39224
|
+
}
|
|
39225
|
+
return {
|
|
39226
|
+
plan,
|
|
39227
|
+
...result.warning ? {
|
|
39228
|
+
note: `${result.warning} Keeping the current task graph.`
|
|
39229
|
+
} : {}
|
|
39230
|
+
};
|
|
39231
|
+
} catch (error2) {
|
|
39232
|
+
if (emitProgress) {
|
|
39233
|
+
await emitProgress({
|
|
39234
|
+
type: "task-note",
|
|
39235
|
+
sessionId,
|
|
39236
|
+
message: `Planner task adaptation skipped: ${error2 instanceof Error ? error2.message : String(error2)}`
|
|
39237
|
+
});
|
|
39238
|
+
}
|
|
39239
|
+
return {
|
|
39240
|
+
plan
|
|
39241
|
+
};
|
|
39242
|
+
}
|
|
39243
|
+
}
|
|
38324
39244
|
async maybeExpandReadyTaskGraph(sessionId, request, plan, events, emitProgress) {
|
|
38325
39245
|
const envelope = this.buildEnvelope(request, plan, sessionId);
|
|
38326
39246
|
const candidate = envelope.readyTasks.find((task) => shouldDelegateTask(task, request));
|