@tritard/waterbrother 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +492 -10
- package/src/frontend.js +404 -150
- package/src/tools.js +6 -3
- package/src/workflow.js +15 -27
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -14,7 +14,19 @@ import { expandHomePath } from "./path-utils.js";
|
|
|
14
14
|
import { AUTONOMY_MODES, buildOperatorIdentity, EXPERIENCE_MODES, modeDefaults, normalizeAutonomyMode, normalizeExperienceMode } from "./modes.js";
|
|
15
15
|
import { computeImpactMap } from "./impact.js";
|
|
16
16
|
import { reviewTurn } from "./reviewer.js";
|
|
17
|
-
import {
|
|
17
|
+
import {
|
|
18
|
+
buildFrontendExecutionContext,
|
|
19
|
+
getFrontendAcceptanceFailure,
|
|
20
|
+
buildFrontendRebuildPrompt,
|
|
21
|
+
buildFrontendRevisionPrompt,
|
|
22
|
+
detectFrontendSlop,
|
|
23
|
+
inspectFrontendArtifacts,
|
|
24
|
+
mergeFrontendSlop,
|
|
25
|
+
reviewFrontendTurn,
|
|
26
|
+
shouldAutoReviseFrontend,
|
|
27
|
+
shouldForceFrontendRebuild,
|
|
28
|
+
shouldRunFrontendReview
|
|
29
|
+
} from "./frontend.js";
|
|
18
30
|
import { loadTask, saveTask, listTasks, setActiveTask, getActiveTask, closeTask } from "./task-store.js";
|
|
19
31
|
import { runDecisionPass, runInventPass, formatDecisionForDisplay, formatDecisionCompact, formatDecisionDetail } from "./decider.js";
|
|
20
32
|
import { runBuildWorkflow, startFeatureTask, runChallengeWorkflow } from "./workflow.js";
|
|
@@ -608,6 +620,63 @@ function buildSyntheticAssistantOutput(receipt) {
|
|
|
608
620
|
return null;
|
|
609
621
|
}
|
|
610
622
|
|
|
623
|
+
function formatReceiptFileList(receipt, limit = 6) {
|
|
624
|
+
const files = Array.isArray(receipt?.changedFiles)
|
|
625
|
+
? receipt.changedFiles.map((filePath) => String(filePath || "").trim()).filter(Boolean)
|
|
626
|
+
: [];
|
|
627
|
+
return [...new Set(files)].slice(0, limit);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
function buildBenchmarkFrontendStatusOutput({
|
|
631
|
+
assistantText = "",
|
|
632
|
+
receipt = null,
|
|
633
|
+
artifacts = null,
|
|
634
|
+
frontend = null
|
|
635
|
+
} = {}) {
|
|
636
|
+
if (!frontend?.benchmarkMode) return assistantText;
|
|
637
|
+
const designReview = artifacts?.designReview || receipt?.designReview || null;
|
|
638
|
+
const screenshotReview = artifacts?.screenshotReview || receipt?.screenshotReview || null;
|
|
639
|
+
const designSlop = artifacts?.designSlop || receipt?.designSlop || null;
|
|
640
|
+
const acceptanceFailure = getFrontendAcceptanceFailure({
|
|
641
|
+
frontend,
|
|
642
|
+
slop: designSlop,
|
|
643
|
+
designReview,
|
|
644
|
+
screenshotReview
|
|
645
|
+
});
|
|
646
|
+
const designVerdict = String(designReview?.verdict || "").trim().toLowerCase();
|
|
647
|
+
const renderVerdict = String(screenshotReview?.verdict || "").trim().toLowerCase();
|
|
648
|
+
const shouldDowngrade =
|
|
649
|
+
Boolean(acceptanceFailure) ||
|
|
650
|
+
designVerdict === "weak" ||
|
|
651
|
+
renderVerdict === "weak" ||
|
|
652
|
+
(designVerdict === "caution" && renderVerdict !== "strong");
|
|
653
|
+
if (!shouldDowngrade) return assistantText;
|
|
654
|
+
|
|
655
|
+
const lines = [];
|
|
656
|
+
const files = formatReceiptFileList(receipt);
|
|
657
|
+
if (files.length > 0) {
|
|
658
|
+
lines.push("Updated benchmark frontend files:");
|
|
659
|
+
for (const filePath of files) lines.push(`- ${filePath}`);
|
|
660
|
+
}
|
|
661
|
+
if (acceptanceFailure?.reason) {
|
|
662
|
+
lines.push(`Current status: ${acceptanceFailure.reason}.`);
|
|
663
|
+
} else {
|
|
664
|
+
const statusParts = [];
|
|
665
|
+
if (designVerdict) statusParts.push(`design ${designVerdict}`);
|
|
666
|
+
if (renderVerdict) statusParts.push(`render ${renderVerdict}`);
|
|
667
|
+
if (artifacts?.designRevision?.triggered) {
|
|
668
|
+
statusParts.push(`auto-revised ${artifacts.designRevision.passes}x`);
|
|
669
|
+
}
|
|
670
|
+
if (statusParts.length > 0) {
|
|
671
|
+
lines.push(`Current status: ${statusParts.join(", ")}.`);
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
if (designReview?.summary) lines.push(designReview.summary);
|
|
675
|
+
if (renderVerdict === "weak" && screenshotReview?.summary) lines.push(screenshotReview.summary);
|
|
676
|
+
lines.push("Result needs another pass before it should be treated as finished.");
|
|
677
|
+
return lines.filter(Boolean).join("\n\n");
|
|
678
|
+
}
|
|
679
|
+
|
|
611
680
|
function hasFrontendCodeEcho(text) {
|
|
612
681
|
const body = String(text || "");
|
|
613
682
|
return /```(?:html|css|js|javascript|jsx|tsx)?[\s\S]{120,}```/i.test(body) || /<!DOCTYPE html>/i.test(body);
|
|
@@ -638,6 +707,36 @@ function shouldRecoverFrontendCodeEcho({ frontendExecutionContext, receipt, assi
|
|
|
638
707
|
return tools.some((tool) => tool?.name === "declare_contract") || tools.some((tool) => tool?.name === "make_directory");
|
|
639
708
|
}
|
|
640
709
|
|
|
710
|
+
function shouldRecoverFrontendBlockedShell({ frontendExecutionContext, receipt }) {
|
|
711
|
+
if (!frontendExecutionContext?.frontend) return false;
|
|
712
|
+
if (!receipt || receipt.mutated) return false;
|
|
713
|
+
const tools = Array.isArray(receipt?.tools) ? receipt.tools : [];
|
|
714
|
+
return tools.some((tool) => {
|
|
715
|
+
if (tool?.name !== "run_shell" || tool?.status !== "blocked") return false;
|
|
716
|
+
const message = String(tool?.result_preview || tool?.error || "").toLowerCase();
|
|
717
|
+
const argsPreview = parseToolArgsPreview(tool);
|
|
718
|
+
const command = String(argsPreview?.command || "").trim().toLowerCase();
|
|
719
|
+
return /declare_contract/.test(message) || /mutating shell command not allowed/.test(message) || /\bmkdir\b|\btouch\b|\bcp\b|\bmv\b|\brm\b/.test(command);
|
|
720
|
+
});
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
function isFrontendArtifactPath(filePath = "") {
|
|
724
|
+
const value = String(filePath || "").trim().toLowerCase();
|
|
725
|
+
if (!value) return false;
|
|
726
|
+
return /\.(html|css|scss|sass|less|js|jsx|tsx|vue|svelte)$/.test(value);
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
function shouldRecoverFrontendMissingFiles({ frontendExecutionContext, receipt }) {
|
|
730
|
+
if (!frontendExecutionContext?.frontend) return false;
|
|
731
|
+
if (!receipt?.mutated) return false;
|
|
732
|
+
const changedFiles = Array.isArray(receipt?.changedFiles) ? receipt.changedFiles : [];
|
|
733
|
+
if (changedFiles.some((filePath) => isFrontendArtifactPath(filePath))) return false;
|
|
734
|
+
const tools = Array.isArray(receipt?.tools) ? receipt.tools : [];
|
|
735
|
+
const touchedScopeOnly = changedFiles.length > 0;
|
|
736
|
+
const attemptedWrite = tools.some((tool) => ["write_file", "replace_in_file", "apply_patch"].includes(tool?.name));
|
|
737
|
+
return touchedScopeOnly && (attemptedWrite || tools.some((tool) => tool?.name === "make_directory"));
|
|
738
|
+
}
|
|
739
|
+
|
|
641
740
|
function buildFrontendWriteRecoveryPrompt({ originalPrompt, contract }) {
|
|
642
741
|
const target = deriveContractWriteTarget(contract);
|
|
643
742
|
const lines = [
|
|
@@ -652,6 +751,32 @@ function buildFrontendWriteRecoveryPrompt({ originalPrompt, contract }) {
|
|
|
652
751
|
return lines.join("\n\n");
|
|
653
752
|
}
|
|
654
753
|
|
|
754
|
+
function buildFrontendBlockedShellRecoveryPrompt({ originalPrompt }) {
|
|
755
|
+
return [
|
|
756
|
+
"You tried to use a mutating shell command before declaring contract scope.",
|
|
757
|
+
`Original task: ${String(originalPrompt || "").trim()}`,
|
|
758
|
+
"Do not use run_shell for this frontend task.",
|
|
759
|
+
"First call declare_contract with the target Desktop folder/file scope.",
|
|
760
|
+
"Then use make_directory and write_file to create the site files.",
|
|
761
|
+
"If this is a new site in a folder, write index.html there unless multiple files are clearly justified.",
|
|
762
|
+
"Reply briefly with only the files created or updated after the tool calls succeed."
|
|
763
|
+
].join("\n\n");
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
function buildFrontendMissingFilesRecoveryPrompt({ originalPrompt, contract, changedFiles = [] }) {
|
|
767
|
+
const target = deriveContractWriteTarget(contract);
|
|
768
|
+
const lines = [
|
|
769
|
+
"You created the frontend scope but did not write the actual site file.",
|
|
770
|
+
`Original task: ${String(originalPrompt || "").trim()}`,
|
|
771
|
+
changedFiles.length > 0 ? `Current touched paths: ${changedFiles.join(", ")}` : "",
|
|
772
|
+
target ? `Write the actual site into the declared scope now: ${target}` : "Write the actual site into the declared contract scope now.",
|
|
773
|
+
"Do not stop after creating the folder.",
|
|
774
|
+
"Use write_file to create the real frontend file now. If this is a new site in a folder, default to index.html unless multiple files are clearly justified.",
|
|
775
|
+
"Reply briefly with only the files created or updated."
|
|
776
|
+
].filter(Boolean);
|
|
777
|
+
return lines.join("\n\n");
|
|
778
|
+
}
|
|
779
|
+
|
|
655
780
|
function color256(fg, text) {
|
|
656
781
|
return `\x1b[38;5;${fg}m${text}\x1b[0m`;
|
|
657
782
|
}
|
|
@@ -1039,6 +1164,264 @@ async function enrichTurnArtifacts({ agent, context, promptText, assistantText,
|
|
|
1039
1164
|
return receipt;
|
|
1040
1165
|
}
|
|
1041
1166
|
|
|
1167
|
+
async function analyzeTurnArtifacts({
|
|
1168
|
+
agent,
|
|
1169
|
+
context,
|
|
1170
|
+
promptText,
|
|
1171
|
+
assistantText,
|
|
1172
|
+
receipt,
|
|
1173
|
+
frontend = null,
|
|
1174
|
+
signal
|
|
1175
|
+
}) {
|
|
1176
|
+
let impact = receipt.impact || null;
|
|
1177
|
+
if (receipt.mutated && context.runtime.impact?.enabled !== false) {
|
|
1178
|
+
impact = await computeImpactMap({
|
|
1179
|
+
cwd: context.cwd,
|
|
1180
|
+
changedFiles: receipt.changedFiles || [],
|
|
1181
|
+
maxRelated: context.runtime.impact?.maxRelated,
|
|
1182
|
+
maxTests: context.runtime.impact?.maxTests
|
|
1183
|
+
});
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
let review = receipt.review || null;
|
|
1187
|
+
if (receipt.mutated && context.runtime.reviewer?.enabled !== false) {
|
|
1188
|
+
try {
|
|
1189
|
+
review = await reviewTurn({
|
|
1190
|
+
apiKey: context.runtime.apiKey,
|
|
1191
|
+
baseUrl: context.runtime.baseUrl,
|
|
1192
|
+
model: context.runtime.reviewer?.model || agent.getModel(),
|
|
1193
|
+
promptText,
|
|
1194
|
+
assistantText,
|
|
1195
|
+
receipt: { ...receipt, diff: receipt.diff || "" },
|
|
1196
|
+
impact,
|
|
1197
|
+
maxDiffChars: context.runtime.reviewer?.maxDiffChars,
|
|
1198
|
+
signal
|
|
1199
|
+
});
|
|
1200
|
+
} catch (error) {
|
|
1201
|
+
review = {
|
|
1202
|
+
verdict: "caution",
|
|
1203
|
+
summary: `review failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1204
|
+
concerns: ["Sentinel reviewer could not complete."],
|
|
1205
|
+
followups: []
|
|
1206
|
+
};
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
let designReview = receipt.designReview || null;
|
|
1211
|
+
if (shouldRunFrontendReview({ promptText, receipt, profile: agent.getProfile() })) {
|
|
1212
|
+
try {
|
|
1213
|
+
designReview = await reviewFrontendTurn({
|
|
1214
|
+
apiKey: context.runtime.apiKey,
|
|
1215
|
+
baseUrl: context.runtime.baseUrl,
|
|
1216
|
+
model: context.runtime.reviewer?.model || agent.getModel(),
|
|
1217
|
+
promptText,
|
|
1218
|
+
assistantText,
|
|
1219
|
+
receipt: { ...receipt, diff: receipt.diff || "" },
|
|
1220
|
+
signal
|
|
1221
|
+
});
|
|
1222
|
+
} catch (error) {
|
|
1223
|
+
designReview = {
|
|
1224
|
+
verdict: "caution",
|
|
1225
|
+
summary: `design review failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1226
|
+
strengths: [],
|
|
1227
|
+
issues: ["Frontend design reviewer could not complete."],
|
|
1228
|
+
nextPass: []
|
|
1229
|
+
};
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
const screenshotReview = null;
|
|
1234
|
+
const screenshotPath = null;
|
|
1235
|
+
|
|
1236
|
+
const deterministicSlop = designReview
|
|
1237
|
+
? detectFrontendSlop({ promptText, assistantText, receipt, designReview })
|
|
1238
|
+
: null;
|
|
1239
|
+
const artifactSlop = designReview
|
|
1240
|
+
? await inspectFrontendArtifacts({ cwd: context.cwd, promptText, receipt, frontend })
|
|
1241
|
+
: null;
|
|
1242
|
+
const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
|
|
1243
|
+
|
|
1244
|
+
return {
|
|
1245
|
+
impact,
|
|
1246
|
+
review,
|
|
1247
|
+
designReview,
|
|
1248
|
+
designSlop,
|
|
1249
|
+
screenshotReview,
|
|
1250
|
+
screenshotPath
|
|
1251
|
+
};
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
async function finalizeReceiptArtifacts({
|
|
1255
|
+
agent,
|
|
1256
|
+
context,
|
|
1257
|
+
receipt,
|
|
1258
|
+
artifacts,
|
|
1259
|
+
signal
|
|
1260
|
+
}) {
|
|
1261
|
+
if (!receipt) return null;
|
|
1262
|
+
const updates = {};
|
|
1263
|
+
if (artifacts?.impact) updates.impact = artifacts.impact;
|
|
1264
|
+
if (artifacts?.review) updates.review = artifacts.review;
|
|
1265
|
+
if (artifacts?.designReview) updates.designReview = artifacts.designReview;
|
|
1266
|
+
if (artifacts?.designSlop) updates.designSlop = artifacts.designSlop;
|
|
1267
|
+
if (artifacts?.screenshotReview) updates.screenshotReview = artifacts.screenshotReview;
|
|
1268
|
+
if (artifacts?.screenshotPath) updates.screenshotPath = artifacts.screenshotPath;
|
|
1269
|
+
if (artifacts?.designRevision) updates.designRevision = artifacts.designRevision;
|
|
1270
|
+
const finalReceipt = Object.keys(updates).length > 0
|
|
1271
|
+
? (await agent.toolRuntime.updateReceipt(receipt.id, updates) || receipt)
|
|
1272
|
+
: receipt;
|
|
1273
|
+
context.runtime.lastReceipt = finalReceipt;
|
|
1274
|
+
context.runtime.lastImpact = artifacts?.impact || finalReceipt.impact || null;
|
|
1275
|
+
return finalReceipt;
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
async function maybeReviseInteractiveFrontend({
|
|
1279
|
+
agent,
|
|
1280
|
+
context,
|
|
1281
|
+
promptText,
|
|
1282
|
+
response,
|
|
1283
|
+
receipt,
|
|
1284
|
+
frontendExecutionContext = null,
|
|
1285
|
+
previousExecutionContext = null,
|
|
1286
|
+
signal
|
|
1287
|
+
}) {
|
|
1288
|
+
if (!frontendExecutionContext || !receipt) {
|
|
1289
|
+
return { response, receipt, artifacts: null };
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
const baseExecutionContext = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
|
|
1293
|
+
if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
|
|
1294
|
+
baseExecutionContext.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
let activeResponse = response;
|
|
1298
|
+
let activeReceipt = receipt;
|
|
1299
|
+
let artifacts = await analyzeTurnArtifacts({
|
|
1300
|
+
agent,
|
|
1301
|
+
context,
|
|
1302
|
+
promptText,
|
|
1303
|
+
assistantText: activeResponse.content || "",
|
|
1304
|
+
receipt: activeReceipt,
|
|
1305
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1306
|
+
signal
|
|
1307
|
+
});
|
|
1308
|
+
|
|
1309
|
+
let revisionCount = 0;
|
|
1310
|
+
const revisionHistory = [];
|
|
1311
|
+
|
|
1312
|
+
while (shouldAutoReviseFrontend({
|
|
1313
|
+
designReview: artifacts.designReview,
|
|
1314
|
+
slop: artifacts.designSlop,
|
|
1315
|
+
revisionCount,
|
|
1316
|
+
frontend: frontendExecutionContext.frontend || null
|
|
1317
|
+
})) {
|
|
1318
|
+
const passNumber = revisionCount + 1;
|
|
1319
|
+
const forceRebuild = shouldForceFrontendRebuild({
|
|
1320
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1321
|
+
slop: artifacts.designSlop,
|
|
1322
|
+
revisionCount
|
|
1323
|
+
});
|
|
1324
|
+
revisionHistory.push({
|
|
1325
|
+
passNumber,
|
|
1326
|
+
verdict: artifacts.designReview?.verdict || null,
|
|
1327
|
+
summary: String(artifacts.designReview?.summary || "").trim(),
|
|
1328
|
+
slopFlags: Array.isArray(artifacts.designSlop?.flags) ? [...artifacts.designSlop.flags] : [],
|
|
1329
|
+
mode: forceRebuild ? "rebuild" : "revise"
|
|
1330
|
+
});
|
|
1331
|
+
const revisionPrompt = forceRebuild
|
|
1332
|
+
? buildFrontendRebuildPrompt({
|
|
1333
|
+
originalPrompt: promptText,
|
|
1334
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1335
|
+
designReview: artifacts.designReview,
|
|
1336
|
+
slop: artifacts.designSlop,
|
|
1337
|
+
screenshotReview: artifacts.screenshotReview
|
|
1338
|
+
})
|
|
1339
|
+
: buildFrontendRevisionPrompt({
|
|
1340
|
+
originalPrompt: promptText,
|
|
1341
|
+
designReview: artifacts.designReview,
|
|
1342
|
+
slop: artifacts.designSlop,
|
|
1343
|
+
screenshotReview: artifacts.screenshotReview
|
|
1344
|
+
});
|
|
1345
|
+
|
|
1346
|
+
const revisionSpinner = createProgressSpinner(
|
|
1347
|
+
forceRebuild ? `rebuilding frontend (${passNumber})...` : `revising frontend (${passNumber})...`
|
|
1348
|
+
);
|
|
1349
|
+
printLiveTrace(
|
|
1350
|
+
forceRebuild ? `frontend rebuild pass ${passNumber}` : `frontend revision pass ${passNumber}`,
|
|
1351
|
+
context.runtime.traceMode
|
|
1352
|
+
);
|
|
1353
|
+
if (activeReceipt.contract) {
|
|
1354
|
+
agent.toolRuntime.setCurrentContract(activeReceipt.contract);
|
|
1355
|
+
}
|
|
1356
|
+
agent.setExecutionContext({
|
|
1357
|
+
...baseExecutionContext,
|
|
1358
|
+
phase: forceRebuild ? `design-rebuild-${passNumber}` : `design-revision-${passNumber}`,
|
|
1359
|
+
reminders: [
|
|
1360
|
+
baseExecutionContext.reminders || "",
|
|
1361
|
+
forceRebuild
|
|
1362
|
+
? `Automatic rebuild pass ${passNumber}: discard the previous frontend direction and rebuild within the same contract using the benchmark starter skeleton.`
|
|
1363
|
+
: passNumber === 1
|
|
1364
|
+
? "Automatic second pass: fix the flagged frontend design issues without widening scope."
|
|
1365
|
+
: `Automatic follow-up pass ${passNumber}: remove any remaining benchmark hard-fail patterns.`
|
|
1366
|
+
].filter(Boolean).join("\n")
|
|
1367
|
+
});
|
|
1368
|
+
try {
|
|
1369
|
+
activeResponse = await agent.runTurn(revisionPrompt, {
|
|
1370
|
+
signal,
|
|
1371
|
+
onStateChange(state) {
|
|
1372
|
+
printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
|
|
1373
|
+
},
|
|
1374
|
+
onToolStart(toolCall) {
|
|
1375
|
+
const toolName = toolCall?.function?.name || "tool";
|
|
1376
|
+
printLiveTrace(`using ${toolName}`, context.runtime.traceMode);
|
|
1377
|
+
},
|
|
1378
|
+
onToolEnd(toolCall, result) {
|
|
1379
|
+
const toolName = toolCall?.function?.name || "tool";
|
|
1380
|
+
const status = parseToolResultShape(result);
|
|
1381
|
+
const label =
|
|
1382
|
+
status === "ok" ? `${toolName} ok` : status === "blocked" ? `${toolName} blocked` : `${toolName} ${status}`;
|
|
1383
|
+
printLiveTrace(label, context.runtime.traceMode);
|
|
1384
|
+
}
|
|
1385
|
+
});
|
|
1386
|
+
activeReceipt = await agent.toolRuntime.completeTurn({ signal });
|
|
1387
|
+
if (!activeReceipt) break;
|
|
1388
|
+
artifacts = await analyzeTurnArtifacts({
|
|
1389
|
+
agent,
|
|
1390
|
+
context,
|
|
1391
|
+
promptText,
|
|
1392
|
+
assistantText: activeResponse.content || "",
|
|
1393
|
+
receipt: activeReceipt,
|
|
1394
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1395
|
+
signal
|
|
1396
|
+
});
|
|
1397
|
+
revisionCount += 1;
|
|
1398
|
+
} finally {
|
|
1399
|
+
revisionSpinner.stop();
|
|
1400
|
+
agent.setExecutionContext(previousExecutionContext);
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
if (revisionCount > 0) {
|
|
1405
|
+
artifacts.designRevision = {
|
|
1406
|
+
triggered: true,
|
|
1407
|
+
passes: revisionCount,
|
|
1408
|
+
history: revisionHistory
|
|
1409
|
+
};
|
|
1410
|
+
}
|
|
1411
|
+
|
|
1412
|
+
const acceptanceFailure = getFrontendAcceptanceFailure({
|
|
1413
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1414
|
+
slop: artifacts.designSlop,
|
|
1415
|
+
designReview: artifacts.designReview,
|
|
1416
|
+
screenshotReview: artifacts.screenshotReview
|
|
1417
|
+
});
|
|
1418
|
+
if (acceptanceFailure) {
|
|
1419
|
+
throw new Error(acceptanceFailure.reason);
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
return { response: activeResponse, receipt: activeReceipt, artifacts };
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1042
1425
|
function describeOperator(runtime, agent) {
|
|
1043
1426
|
const identity = buildOperatorIdentity({
|
|
1044
1427
|
mode: agent.getExperienceMode(),
|
|
@@ -3624,7 +4007,35 @@ async function runTextTurnInteractive({
|
|
|
3624
4007
|
}
|
|
3625
4008
|
if (!precomputedReceipt && frontendExecutionContext) {
|
|
3626
4009
|
const candidateReceipt = await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
|
|
3627
|
-
if (
|
|
4010
|
+
if (shouldRecoverFrontendBlockedShell({ frontendExecutionContext, receipt: candidateReceipt })) {
|
|
4011
|
+
const recoverySpinner = createProgressSpinner("retrying frontend tools...");
|
|
4012
|
+
printLiveTrace("frontend recovery: blocked mutating shell, retrying with declare_contract and file tools", context.runtime.traceMode);
|
|
4013
|
+
agent.toolRuntime.setReadOnlyRoots(readOnlyRoots);
|
|
4014
|
+
agent.toolRuntime.setWriteRoots(writeRoots);
|
|
4015
|
+
if (frontendExecutionContext) {
|
|
4016
|
+
const merged = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
|
|
4017
|
+
if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
|
|
4018
|
+
merged.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
|
|
4019
|
+
}
|
|
4020
|
+
agent.setExecutionContext(merged);
|
|
4021
|
+
}
|
|
4022
|
+
try {
|
|
4023
|
+
response = await agent.runTurn(buildFrontendBlockedShellRecoveryPrompt({ originalPrompt: effectivePromptText }), {
|
|
4024
|
+
signal: abortController?.signal,
|
|
4025
|
+
onStateChange(state) {
|
|
4026
|
+
printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
|
|
4027
|
+
}
|
|
4028
|
+
});
|
|
4029
|
+
renderedAssistantText = response.content || "";
|
|
4030
|
+
} finally {
|
|
4031
|
+
recoverySpinner.stop();
|
|
4032
|
+
agent.toolRuntime.setReadOnlyRoots([]);
|
|
4033
|
+
agent.toolRuntime.setWriteRoots([]);
|
|
4034
|
+
if (frontendExecutionContext) {
|
|
4035
|
+
agent.setExecutionContext(previousExecutionContext);
|
|
4036
|
+
}
|
|
4037
|
+
}
|
|
4038
|
+
} else if (shouldRecoverFrontendCodeEcho({ frontendExecutionContext, receipt: candidateReceipt, assistantText: response.content || "" })) {
|
|
3628
4039
|
const recoverySpinner = createProgressSpinner("writing files...");
|
|
3629
4040
|
printLiveTrace("frontend recovery: assistant echoed code, retrying with write_file", context.runtime.traceMode);
|
|
3630
4041
|
agent.toolRuntime.setReadOnlyRoots(readOnlyRoots);
|
|
@@ -3656,18 +4067,89 @@ async function runTextTurnInteractive({
|
|
|
3656
4067
|
precomputedReceipt = candidateReceipt;
|
|
3657
4068
|
}
|
|
3658
4069
|
}
|
|
4070
|
+
let finalizedArtifacts = null;
|
|
4071
|
+
let finalizedReceipt = precomputedReceipt || await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
|
|
4072
|
+
if (frontendExecutionContext && finalizedReceipt && shouldRecoverFrontendMissingFiles({ frontendExecutionContext, receipt: finalizedReceipt })) {
|
|
4073
|
+
const recoverySpinner = createProgressSpinner("writing site files...");
|
|
4074
|
+
printLiveTrace("frontend recovery: scope created but no site files written, retrying with write_file", context.runtime.traceMode);
|
|
4075
|
+
agent.toolRuntime.setReadOnlyRoots(readOnlyRoots);
|
|
4076
|
+
agent.toolRuntime.setWriteRoots(writeRoots);
|
|
4077
|
+
if (frontendExecutionContext) {
|
|
4078
|
+
const merged = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
|
|
4079
|
+
if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
|
|
4080
|
+
merged.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
|
|
4081
|
+
}
|
|
4082
|
+
agent.setExecutionContext(merged);
|
|
4083
|
+
}
|
|
4084
|
+
try {
|
|
4085
|
+
response = await agent.runTurn(buildFrontendMissingFilesRecoveryPrompt({
|
|
4086
|
+
originalPrompt: effectivePromptText,
|
|
4087
|
+
contract: finalizedReceipt?.contract,
|
|
4088
|
+
changedFiles: Array.isArray(finalizedReceipt?.changedFiles) ? finalizedReceipt.changedFiles : []
|
|
4089
|
+
}), {
|
|
4090
|
+
signal: abortController?.signal,
|
|
4091
|
+
onStateChange(state) {
|
|
4092
|
+
printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
|
|
4093
|
+
}
|
|
4094
|
+
});
|
|
4095
|
+
renderedAssistantText = response.content || renderedAssistantText;
|
|
4096
|
+
finalizedReceipt = await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
|
|
4097
|
+
} finally {
|
|
4098
|
+
recoverySpinner.stop();
|
|
4099
|
+
agent.toolRuntime.setReadOnlyRoots([]);
|
|
4100
|
+
agent.toolRuntime.setWriteRoots([]);
|
|
4101
|
+
if (frontendExecutionContext) {
|
|
4102
|
+
agent.setExecutionContext(previousExecutionContext);
|
|
4103
|
+
}
|
|
4104
|
+
}
|
|
4105
|
+
}
|
|
4106
|
+
if (frontendExecutionContext && finalizedReceipt) {
|
|
4107
|
+
const revisedFrontendTurn = await maybeReviseInteractiveFrontend({
|
|
4108
|
+
agent,
|
|
4109
|
+
context,
|
|
4110
|
+
promptText: effectivePromptText,
|
|
4111
|
+
response,
|
|
4112
|
+
receipt: finalizedReceipt,
|
|
4113
|
+
frontendExecutionContext,
|
|
4114
|
+
previousExecutionContext,
|
|
4115
|
+
signal: abortController?.signal
|
|
4116
|
+
});
|
|
4117
|
+
response = revisedFrontendTurn.response || response;
|
|
4118
|
+
renderedAssistantText = response.content || renderedAssistantText;
|
|
4119
|
+
finalizedReceipt = revisedFrontendTurn.receipt || finalizedReceipt;
|
|
4120
|
+
finalizedArtifacts = revisedFrontendTurn.artifacts || null;
|
|
4121
|
+
}
|
|
4122
|
+
|
|
4123
|
+
renderedAssistantText = buildBenchmarkFrontendStatusOutput({
|
|
4124
|
+
assistantText: renderedAssistantText,
|
|
4125
|
+
receipt: finalizedReceipt,
|
|
4126
|
+
artifacts: finalizedArtifacts,
|
|
4127
|
+
frontend: frontendExecutionContext?.frontend || null
|
|
4128
|
+
});
|
|
4129
|
+
|
|
3659
4130
|
printAssistantOutput(renderedAssistantText);
|
|
3660
4131
|
await setSessionRunState(currentSession, agent, "done");
|
|
3661
4132
|
printTurnSummary(turnSummary, response, { modelId: agent.getModel(), costTracker: context.costTracker, traceMode: context.runtime.traceMode });
|
|
3662
4133
|
printTraceTimeline(turnSummary, context.runtime.traceMode);
|
|
3663
|
-
|
|
3664
|
-
|
|
3665
|
-
|
|
3666
|
-
|
|
3667
|
-
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
|
|
4134
|
+
let finalReceipt = null;
|
|
4135
|
+
if (finalizedReceipt && finalizedArtifacts) {
|
|
4136
|
+
finalReceipt = await finalizeReceiptArtifacts({
|
|
4137
|
+
agent,
|
|
4138
|
+
context,
|
|
4139
|
+
receipt: finalizedReceipt,
|
|
4140
|
+
artifacts: finalizedArtifacts,
|
|
4141
|
+
signal: abortController?.signal
|
|
4142
|
+
});
|
|
4143
|
+
} else {
|
|
4144
|
+
const receipt = await enrichTurnArtifacts({
|
|
4145
|
+
agent,
|
|
4146
|
+
context,
|
|
4147
|
+
promptText: effectivePromptText,
|
|
4148
|
+
assistantText: response.content || "",
|
|
4149
|
+
signal: abortController?.signal
|
|
4150
|
+
});
|
|
4151
|
+
finalReceipt = receipt || finalizedReceipt;
|
|
4152
|
+
}
|
|
3671
4153
|
if (finalReceipt) {
|
|
3672
4154
|
currentSession.lastReceiptId = finalReceipt.id;
|
|
3673
4155
|
if (shouldPrintReceiptSummary(finalReceipt, context.runtime.receiptMode)) {
|