@tritard/waterbrother 0.8.33 → 0.8.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +320 -9
- package/src/frontend.js +103 -7
- package/src/prompt.js +24 -40
- package/src/workflow.js +7 -1
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -14,7 +14,21 @@ import { expandHomePath } from "./path-utils.js";
|
|
|
14
14
|
import { AUTONOMY_MODES, buildOperatorIdentity, EXPERIENCE_MODES, modeDefaults, normalizeAutonomyMode, normalizeExperienceMode } from "./modes.js";
|
|
15
15
|
import { computeImpactMap } from "./impact.js";
|
|
16
16
|
import { reviewTurn } from "./reviewer.js";
|
|
17
|
-
import {
|
|
17
|
+
import {
|
|
18
|
+
buildFrontendExecutionContext,
|
|
19
|
+
buildFrontendRebuildPrompt,
|
|
20
|
+
buildFrontendRevisionPrompt,
|
|
21
|
+
captureFrontendScreenshot,
|
|
22
|
+
detectFrontendSlop,
|
|
23
|
+
findFrontendPreviewEntry,
|
|
24
|
+
inspectFrontendArtifacts,
|
|
25
|
+
mergeFrontendSlop,
|
|
26
|
+
reviewFrontendTurn,
|
|
27
|
+
reviewFrontendScreenshot,
|
|
28
|
+
shouldAutoReviseFrontend,
|
|
29
|
+
shouldForceFrontendRebuild,
|
|
30
|
+
shouldRunFrontendReview
|
|
31
|
+
} from "./frontend.js";
|
|
18
32
|
import { loadTask, saveTask, listTasks, setActiveTask, getActiveTask, closeTask } from "./task-store.js";
|
|
19
33
|
import { runDecisionPass, runInventPass, formatDecisionForDisplay, formatDecisionCompact, formatDecisionDetail } from "./decider.js";
|
|
20
34
|
import { runBuildWorkflow, startFeatureTask, runChallengeWorkflow } from "./workflow.js";
|
|
@@ -1037,6 +1051,273 @@ async function enrichTurnArtifacts({ agent, context, promptText, assistantText,
|
|
|
1037
1051
|
return receipt;
|
|
1038
1052
|
}
|
|
1039
1053
|
|
|
1054
|
+
async function analyzeTurnArtifacts({
|
|
1055
|
+
agent,
|
|
1056
|
+
context,
|
|
1057
|
+
promptText,
|
|
1058
|
+
assistantText,
|
|
1059
|
+
receipt,
|
|
1060
|
+
frontend = null,
|
|
1061
|
+
signal
|
|
1062
|
+
}) {
|
|
1063
|
+
let impact = receipt.impact || null;
|
|
1064
|
+
if (receipt.mutated && context.runtime.impact?.enabled !== false) {
|
|
1065
|
+
impact = await computeImpactMap({
|
|
1066
|
+
cwd: context.cwd,
|
|
1067
|
+
changedFiles: receipt.changedFiles || [],
|
|
1068
|
+
maxRelated: context.runtime.impact?.maxRelated,
|
|
1069
|
+
maxTests: context.runtime.impact?.maxTests
|
|
1070
|
+
});
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
let review = receipt.review || null;
|
|
1074
|
+
if (receipt.mutated && context.runtime.reviewer?.enabled !== false) {
|
|
1075
|
+
try {
|
|
1076
|
+
review = await reviewTurn({
|
|
1077
|
+
apiKey: context.runtime.apiKey,
|
|
1078
|
+
baseUrl: context.runtime.baseUrl,
|
|
1079
|
+
model: context.runtime.reviewer?.model || agent.getModel(),
|
|
1080
|
+
promptText,
|
|
1081
|
+
assistantText,
|
|
1082
|
+
receipt: { ...receipt, diff: receipt.diff || "" },
|
|
1083
|
+
impact,
|
|
1084
|
+
maxDiffChars: context.runtime.reviewer?.maxDiffChars,
|
|
1085
|
+
signal
|
|
1086
|
+
});
|
|
1087
|
+
} catch (error) {
|
|
1088
|
+
review = {
|
|
1089
|
+
verdict: "caution",
|
|
1090
|
+
summary: `review failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1091
|
+
concerns: ["Sentinel reviewer could not complete."],
|
|
1092
|
+
followups: []
|
|
1093
|
+
};
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
let designReview = receipt.designReview || null;
|
|
1098
|
+
if (shouldRunFrontendReview({ promptText, receipt, profile: agent.getProfile() })) {
|
|
1099
|
+
try {
|
|
1100
|
+
designReview = await reviewFrontendTurn({
|
|
1101
|
+
apiKey: context.runtime.apiKey,
|
|
1102
|
+
baseUrl: context.runtime.baseUrl,
|
|
1103
|
+
model: context.runtime.reviewer?.model || agent.getModel(),
|
|
1104
|
+
promptText,
|
|
1105
|
+
assistantText,
|
|
1106
|
+
receipt: { ...receipt, diff: receipt.diff || "" },
|
|
1107
|
+
signal
|
|
1108
|
+
});
|
|
1109
|
+
} catch (error) {
|
|
1110
|
+
designReview = {
|
|
1111
|
+
verdict: "caution",
|
|
1112
|
+
summary: `design review failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1113
|
+
strengths: [],
|
|
1114
|
+
issues: ["Frontend design reviewer could not complete."],
|
|
1115
|
+
nextPass: []
|
|
1116
|
+
};
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
let screenshotReview = null;
|
|
1121
|
+
let screenshotPath = null;
|
|
1122
|
+
if (designReview) {
|
|
1123
|
+
try {
|
|
1124
|
+
const previewEntry = await findFrontendPreviewEntry({ cwd: context.cwd, receipt });
|
|
1125
|
+
if (previewEntry) {
|
|
1126
|
+
screenshotPath = await captureFrontendScreenshot({ entryPath: previewEntry });
|
|
1127
|
+
if (screenshotPath) {
|
|
1128
|
+
screenshotReview = await reviewFrontendScreenshot({
|
|
1129
|
+
apiKey: context.runtime.apiKey,
|
|
1130
|
+
baseUrl: context.runtime.baseUrl,
|
|
1131
|
+
model: context.runtime.reviewer?.model || agent.getModel(),
|
|
1132
|
+
screenshotPath,
|
|
1133
|
+
promptText,
|
|
1134
|
+
signal
|
|
1135
|
+
});
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
} catch {
|
|
1139
|
+
screenshotReview = null;
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
const deterministicSlop = designReview
|
|
1144
|
+
? detectFrontendSlop({ promptText, assistantText, receipt, designReview })
|
|
1145
|
+
: null;
|
|
1146
|
+
const artifactSlop = designReview
|
|
1147
|
+
? await inspectFrontendArtifacts({ cwd: context.cwd, receipt, frontend })
|
|
1148
|
+
: null;
|
|
1149
|
+
const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
|
|
1150
|
+
|
|
1151
|
+
return {
|
|
1152
|
+
impact,
|
|
1153
|
+
review,
|
|
1154
|
+
designReview,
|
|
1155
|
+
designSlop,
|
|
1156
|
+
screenshotReview,
|
|
1157
|
+
screenshotPath
|
|
1158
|
+
};
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
async function finalizeReceiptArtifacts({
|
|
1162
|
+
agent,
|
|
1163
|
+
context,
|
|
1164
|
+
receipt,
|
|
1165
|
+
artifacts,
|
|
1166
|
+
signal
|
|
1167
|
+
}) {
|
|
1168
|
+
if (!receipt) return null;
|
|
1169
|
+
const updates = {};
|
|
1170
|
+
if (artifacts?.impact) updates.impact = artifacts.impact;
|
|
1171
|
+
if (artifacts?.review) updates.review = artifacts.review;
|
|
1172
|
+
if (artifacts?.designReview) updates.designReview = artifacts.designReview;
|
|
1173
|
+
if (artifacts?.designSlop) updates.designSlop = artifacts.designSlop;
|
|
1174
|
+
if (artifacts?.screenshotReview) updates.screenshotReview = artifacts.screenshotReview;
|
|
1175
|
+
if (artifacts?.screenshotPath) updates.screenshotPath = artifacts.screenshotPath;
|
|
1176
|
+
if (artifacts?.designRevision) updates.designRevision = artifacts.designRevision;
|
|
1177
|
+
const finalReceipt = Object.keys(updates).length > 0
|
|
1178
|
+
? (await agent.toolRuntime.updateReceipt(receipt.id, updates) || receipt)
|
|
1179
|
+
: receipt;
|
|
1180
|
+
context.runtime.lastReceipt = finalReceipt;
|
|
1181
|
+
context.runtime.lastImpact = artifacts?.impact || finalReceipt.impact || null;
|
|
1182
|
+
return finalReceipt;
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
async function maybeReviseInteractiveFrontend({
|
|
1186
|
+
agent,
|
|
1187
|
+
context,
|
|
1188
|
+
promptText,
|
|
1189
|
+
response,
|
|
1190
|
+
receipt,
|
|
1191
|
+
frontendExecutionContext = null,
|
|
1192
|
+
previousExecutionContext = null,
|
|
1193
|
+
signal
|
|
1194
|
+
}) {
|
|
1195
|
+
if (!frontendExecutionContext || !receipt) {
|
|
1196
|
+
return { response, receipt, artifacts: null };
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
const baseExecutionContext = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
|
|
1200
|
+
if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
|
|
1201
|
+
baseExecutionContext.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
let activeResponse = response;
|
|
1205
|
+
let activeReceipt = receipt;
|
|
1206
|
+
let artifacts = await analyzeTurnArtifacts({
|
|
1207
|
+
agent,
|
|
1208
|
+
context,
|
|
1209
|
+
promptText,
|
|
1210
|
+
assistantText: activeResponse.content || "",
|
|
1211
|
+
receipt: activeReceipt,
|
|
1212
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1213
|
+
signal
|
|
1214
|
+
});
|
|
1215
|
+
|
|
1216
|
+
let revisionCount = 0;
|
|
1217
|
+
const revisionHistory = [];
|
|
1218
|
+
|
|
1219
|
+
while (shouldAutoReviseFrontend({
|
|
1220
|
+
designReview: artifacts.designReview,
|
|
1221
|
+
slop: artifacts.designSlop,
|
|
1222
|
+
revisionCount
|
|
1223
|
+
})) {
|
|
1224
|
+
const passNumber = revisionCount + 1;
|
|
1225
|
+
const forceRebuild = shouldForceFrontendRebuild({
|
|
1226
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1227
|
+
slop: artifacts.designSlop,
|
|
1228
|
+
revisionCount
|
|
1229
|
+
});
|
|
1230
|
+
revisionHistory.push({
|
|
1231
|
+
passNumber,
|
|
1232
|
+
verdict: artifacts.designReview?.verdict || null,
|
|
1233
|
+
summary: String(artifacts.designReview?.summary || "").trim(),
|
|
1234
|
+
slopFlags: Array.isArray(artifacts.designSlop?.flags) ? [...artifacts.designSlop.flags] : [],
|
|
1235
|
+
mode: forceRebuild ? "rebuild" : "revise"
|
|
1236
|
+
});
|
|
1237
|
+
const revisionPrompt = forceRebuild
|
|
1238
|
+
? buildFrontendRebuildPrompt({
|
|
1239
|
+
originalPrompt: promptText,
|
|
1240
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1241
|
+
designReview: artifacts.designReview,
|
|
1242
|
+
slop: artifacts.designSlop,
|
|
1243
|
+
screenshotReview: artifacts.screenshotReview
|
|
1244
|
+
})
|
|
1245
|
+
: buildFrontendRevisionPrompt({
|
|
1246
|
+
originalPrompt: promptText,
|
|
1247
|
+
designReview: artifacts.designReview,
|
|
1248
|
+
slop: artifacts.designSlop,
|
|
1249
|
+
screenshotReview: artifacts.screenshotReview
|
|
1250
|
+
});
|
|
1251
|
+
|
|
1252
|
+
const revisionSpinner = createProgressSpinner(
|
|
1253
|
+
forceRebuild ? `rebuilding frontend (${passNumber})...` : `revising frontend (${passNumber})...`
|
|
1254
|
+
);
|
|
1255
|
+
printLiveTrace(
|
|
1256
|
+
forceRebuild ? `frontend rebuild pass ${passNumber}` : `frontend revision pass ${passNumber}`,
|
|
1257
|
+
context.runtime.traceMode
|
|
1258
|
+
);
|
|
1259
|
+
if (activeReceipt.contract) {
|
|
1260
|
+
agent.toolRuntime.setCurrentContract(activeReceipt.contract);
|
|
1261
|
+
}
|
|
1262
|
+
agent.setExecutionContext({
|
|
1263
|
+
...baseExecutionContext,
|
|
1264
|
+
phase: forceRebuild ? `design-rebuild-${passNumber}` : `design-revision-${passNumber}`,
|
|
1265
|
+
reminders: [
|
|
1266
|
+
baseExecutionContext.reminders || "",
|
|
1267
|
+
forceRebuild
|
|
1268
|
+
? `Automatic rebuild pass ${passNumber}: discard the previous frontend direction and rebuild within the same contract using the benchmark starter skeleton.`
|
|
1269
|
+
: passNumber === 1
|
|
1270
|
+
? "Automatic second pass: fix the flagged frontend design issues without widening scope."
|
|
1271
|
+
: `Automatic follow-up pass ${passNumber}: remove any remaining benchmark hard-fail patterns.`
|
|
1272
|
+
].filter(Boolean).join("\n")
|
|
1273
|
+
});
|
|
1274
|
+
try {
|
|
1275
|
+
activeResponse = await agent.runTurn(revisionPrompt, {
|
|
1276
|
+
signal,
|
|
1277
|
+
onStateChange(state) {
|
|
1278
|
+
printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
|
|
1279
|
+
},
|
|
1280
|
+
onToolStart(toolCall) {
|
|
1281
|
+
const toolName = toolCall?.function?.name || "tool";
|
|
1282
|
+
printLiveTrace(`using ${toolName}`, context.runtime.traceMode);
|
|
1283
|
+
},
|
|
1284
|
+
onToolEnd(toolCall, result) {
|
|
1285
|
+
const toolName = toolCall?.function?.name || "tool";
|
|
1286
|
+
const status = parseToolResultShape(result);
|
|
1287
|
+
const label =
|
|
1288
|
+
status === "ok" ? `${toolName} ok` : status === "blocked" ? `${toolName} blocked` : `${toolName} ${status}`;
|
|
1289
|
+
printLiveTrace(label, context.runtime.traceMode);
|
|
1290
|
+
}
|
|
1291
|
+
});
|
|
1292
|
+
activeReceipt = await agent.toolRuntime.completeTurn({ signal });
|
|
1293
|
+
if (!activeReceipt) break;
|
|
1294
|
+
artifacts = await analyzeTurnArtifacts({
|
|
1295
|
+
agent,
|
|
1296
|
+
context,
|
|
1297
|
+
promptText,
|
|
1298
|
+
assistantText: activeResponse.content || "",
|
|
1299
|
+
receipt: activeReceipt,
|
|
1300
|
+
frontend: frontendExecutionContext.frontend || null,
|
|
1301
|
+
signal
|
|
1302
|
+
});
|
|
1303
|
+
revisionCount += 1;
|
|
1304
|
+
} finally {
|
|
1305
|
+
revisionSpinner.stop();
|
|
1306
|
+
agent.setExecutionContext(previousExecutionContext);
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
|
|
1310
|
+
if (revisionCount > 0) {
|
|
1311
|
+
artifacts.designRevision = {
|
|
1312
|
+
triggered: true,
|
|
1313
|
+
passes: revisionCount,
|
|
1314
|
+
history: revisionHistory
|
|
1315
|
+
};
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
return { response: activeResponse, receipt: activeReceipt, artifacts };
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1040
1321
|
function describeOperator(runtime, agent) {
|
|
1041
1322
|
const identity = buildOperatorIdentity({
|
|
1042
1323
|
mode: agent.getExperienceMode(),
|
|
@@ -3654,18 +3935,48 @@ async function runTextTurnInteractive({
|
|
|
3654
3935
|
precomputedReceipt = candidateReceipt;
|
|
3655
3936
|
}
|
|
3656
3937
|
}
|
|
3938
|
+
let finalizedArtifacts = null;
|
|
3939
|
+
let finalizedReceipt = precomputedReceipt || await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
|
|
3940
|
+
if (frontendExecutionContext && finalizedReceipt) {
|
|
3941
|
+
const revisedFrontendTurn = await maybeReviseInteractiveFrontend({
|
|
3942
|
+
agent,
|
|
3943
|
+
context,
|
|
3944
|
+
promptText: effectivePromptText,
|
|
3945
|
+
response,
|
|
3946
|
+
receipt: finalizedReceipt,
|
|
3947
|
+
frontendExecutionContext,
|
|
3948
|
+
previousExecutionContext,
|
|
3949
|
+
signal: abortController?.signal
|
|
3950
|
+
});
|
|
3951
|
+
response = revisedFrontendTurn.response || response;
|
|
3952
|
+
renderedAssistantText = response.content || renderedAssistantText;
|
|
3953
|
+
finalizedReceipt = revisedFrontendTurn.receipt || finalizedReceipt;
|
|
3954
|
+
finalizedArtifacts = revisedFrontendTurn.artifacts || null;
|
|
3955
|
+
}
|
|
3956
|
+
|
|
3657
3957
|
printAssistantOutput(renderedAssistantText);
|
|
3658
3958
|
await setSessionRunState(currentSession, agent, "done");
|
|
3659
3959
|
printTurnSummary(turnSummary, response, { modelId: agent.getModel(), costTracker: context.costTracker, traceMode: context.runtime.traceMode });
|
|
3660
3960
|
printTraceTimeline(turnSummary, context.runtime.traceMode);
|
|
3661
|
-
|
|
3662
|
-
|
|
3663
|
-
|
|
3664
|
-
|
|
3665
|
-
|
|
3666
|
-
|
|
3667
|
-
|
|
3668
|
-
|
|
3961
|
+
let finalReceipt = null;
|
|
3962
|
+
if (finalizedReceipt && finalizedArtifacts) {
|
|
3963
|
+
finalReceipt = await finalizeReceiptArtifacts({
|
|
3964
|
+
agent,
|
|
3965
|
+
context,
|
|
3966
|
+
receipt: finalizedReceipt,
|
|
3967
|
+
artifacts: finalizedArtifacts,
|
|
3968
|
+
signal: abortController?.signal
|
|
3969
|
+
});
|
|
3970
|
+
} else {
|
|
3971
|
+
const receipt = await enrichTurnArtifacts({
|
|
3972
|
+
agent,
|
|
3973
|
+
context,
|
|
3974
|
+
promptText: effectivePromptText,
|
|
3975
|
+
assistantText: response.content || "",
|
|
3976
|
+
signal: abortController?.signal
|
|
3977
|
+
});
|
|
3978
|
+
finalReceipt = receipt || finalizedReceipt;
|
|
3979
|
+
}
|
|
3669
3980
|
if (finalReceipt) {
|
|
3670
3981
|
currentSession.lastReceiptId = finalReceipt.id;
|
|
3671
3982
|
if (shouldPrintReceiptSummary(finalReceipt, context.runtime.receiptMode)) {
|
package/src/frontend.js
CHANGED
|
@@ -47,7 +47,7 @@ Rules:
|
|
|
47
47
|
- Be concrete about visible layout, spacing, typography, contrast, composition, and interaction cues.
|
|
48
48
|
- Do not wrap JSON in markdown.`;
|
|
49
49
|
|
|
50
|
-
const BENCHMARK_FRONTEND_PROMPT = /\b(?:benchmark|squarespace quality|ultimate design|first class|on par|codex|claude code|cc)\b/i;
|
|
50
|
+
const BENCHMARK_FRONTEND_PROMPT = /\b(?:benchmark|squarespace quality|ultimate design|first class|on par|codex|claude code|cc|winning shopify|winning ecommerce|high-converting|flagship pdp)\b/i;
|
|
51
51
|
|
|
52
52
|
const UNIVERSAL_FRONTEND_REMINDERS = [
|
|
53
53
|
"Choose one visual direction and stay consistent across typography, spacing, color, and motion.",
|
|
@@ -63,7 +63,8 @@ const BENCHMARK_FRONTEND_REMINDERS = [
|
|
|
63
63
|
"For benchmark frontend tasks, cut generic reflective-editorial copy. Use sharper, more concrete language or neutral structural placeholders instead of atmosphere-writing.",
|
|
64
64
|
"For benchmark frontend tasks, force at least one asymmetrical or compositionally distinctive move instead of a fully balanced template layout.",
|
|
65
65
|
"Benchmark mode: treat fake issue framing, fake publication history, fake keyboard shortcuts, invented named contributors, and demo-page behavior as disallowed outputs. Use neutral structural placeholders if needed.",
|
|
66
|
-
"Benchmark mode: the page should feel publishable, not like a static demo or wireframe explanation."
|
|
66
|
+
"Benchmark mode: the page should feel publishable, not like a static demo or wireframe explanation.",
|
|
67
|
+
"Benchmark mode: do not label the page as a demo, benchmark, placeholder, or structural exercise inside the UI."
|
|
67
68
|
];
|
|
68
69
|
|
|
69
70
|
const BENCHMARK_SITE_TYPE_RULES = {
|
|
@@ -71,7 +72,9 @@ const BENCHMARK_SITE_TYPE_RULES = {
|
|
|
71
72
|
"Benchmark blog mode: use neutral structural placeholders or concrete subject matter instead of publication worldbuilding or reflective-editorial atmosphere prose.",
|
|
72
73
|
"Benchmark blog mode: do not wrap the page in a generic publication shell like Journal, Featured Essay, Latest Dispatches, Notes, Archive, Print Edition, Submit Work, or similar magazine-site framing.",
|
|
73
74
|
"Benchmark blog mode: avoid the default editorial scaffold of hero, story list, archive rail, topics grid, and publication footer unless the user explicitly asked for a magazine-style site.",
|
|
74
|
-
"Benchmark blog mode: do not use Tailwind CDN starter theming, Font Awesome chrome, picsum/placehold imagery, fake keyboard/search chrome, or demo-only modal article previews."
|
|
75
|
+
"Benchmark blog mode: do not use Tailwind CDN starter theming, Font Awesome chrome, picsum/placehold imagery, fake keyboard/search chrome, or demo-only modal article previews.",
|
|
76
|
+
"Benchmark blog mode: include one real editorial image surface or one authored abstract visual composition. Do not replace visuals with text placeholders.",
|
|
77
|
+
"Benchmark blog mode: do not invent named authors, editors, contributors, or interview credits unless the user explicitly asked for fictional worldbuilding."
|
|
75
78
|
],
|
|
76
79
|
store: [
|
|
77
80
|
"Benchmark ecommerce mode: prioritize conversion architecture over editorial styling. The page should sell, not just look clean.",
|
|
@@ -79,7 +82,8 @@ const BENCHMARK_SITE_TYPE_RULES = {
|
|
|
79
82
|
"Benchmark ecommerce mode: do not leave the main merchandising surface as a literal placeholder, dashed demo box, emoji stand-in, or visual wireframe.",
|
|
80
83
|
"Benchmark ecommerce mode: do not use Tailwind CDN starter theming, Font Awesome chrome, or placeholder product images such as picsum/placehold on the live merchandising surface.",
|
|
81
84
|
"Benchmark ecommerce mode: fake command palettes, fake app shortcuts, demo-only cart gimmicks, and unrelated theme chrome are disallowed.",
|
|
82
|
-
"Benchmark ecommerce mode: fake review counts, fake bestseller labels, fake sales counters, and fake as-featured-in proof are disallowed."
|
|
85
|
+
"Benchmark ecommerce mode: fake review counts, fake bestseller labels, fake sales counters, and fake as-featured-in proof are disallowed.",
|
|
86
|
+
"Benchmark ecommerce mode: include a real product-photo surface or an authored product-style render/silhouette. Do not replace the hero with text placeholders."
|
|
83
87
|
]
|
|
84
88
|
};
|
|
85
89
|
|
|
@@ -234,7 +238,8 @@ const BENCHMARK_STARTERS = {
|
|
|
234
238
|
"Section 2: one dominant lead piece with a strong headline and one supporting block offset beside or below it.",
|
|
235
239
|
"Section 3: one compact secondary list or pair of entries, not an archive rail plus topics plus notes stack.",
|
|
236
240
|
"Section 4: minimal closing footer note only if needed.",
|
|
237
|
-
"No publication shell, no archive/topic chrome, no subscribe CTA, no demo interactions, no explanation that the page is a demo."
|
|
241
|
+
"No publication shell, no archive/topic chrome, no subscribe CTA, no author bylines, no contributor credits, no demo interactions, no explanation that the page is a demo.",
|
|
242
|
+
"Use one strong visual surface: a real editorial image URL or an authored abstract composition made with CSS/SVG."
|
|
238
243
|
],
|
|
239
244
|
store: [
|
|
240
245
|
"Benchmark store starter: build a flagship single-product PDP with 4-5 sections max.",
|
|
@@ -243,7 +248,9 @@ const BENCHMARK_STARTERS = {
|
|
|
243
248
|
"Section 3: three focused benefit/features blocks.",
|
|
244
249
|
"Section 4: material/specification or craftsmanship band with objection handling.",
|
|
245
250
|
"Section 5: compact FAQ or guarantee block if still needed.",
|
|
246
|
-
"No Tailwind starter shell, no fake reviews, no fake badges, no placeholder gallery, no emoji or dashed placeholder hero."
|
|
251
|
+
"No Tailwind starter shell, no fake reviews, no fake badges, no placeholder gallery, no emoji or dashed placeholder hero, no alert-based checkout, no keyboard shortcut chrome, no demo footer copy.",
|
|
252
|
+
"The hero must merchandise the product with a believable product surface and product-specific benefit language, not generic luxury-commerce filler.",
|
|
253
|
+
"Use a real product image URL or an authored SVG/CSS product silhouette with material/shadow treatment. Never use text-only image placeholders."
|
|
247
254
|
]
|
|
248
255
|
};
|
|
249
256
|
|
|
@@ -275,6 +282,7 @@ const SLOP_PATTERNS = [
|
|
|
275
282
|
{ key: "premium_blog_trope", label: "generic premium-blog editorial trope", pattern: /\b(?:thoughtful living|slow living|curated reflections|crafted with intention|made with intention|a quiet publication|made with restraint|journal of attention|discipline of seeing|private notes made public)\b/i, weight: 2 },
|
|
276
283
|
{ key: "reflective_editorial_copy", label: "generic reflective-editorial copy trope", pattern: /\b(?:quiet architecture of attention|great thinkers understood|value of slowness|the examined life|deliberate cultivation of focus|most radical act|what we have forgotten|how everyday items carry|still matters)\b/i, weight: 2 },
|
|
277
284
|
{ key: "fictional_publication_brand", label: "invented publication/author scaffolding", pattern: /\b(?:by [A-Z][a-z]+ [A-Z][a-z]+|photography by [A-Z][a-z]+ [A-Z][a-z]+|journal of attention|vesper|lumen|dispatches|vol\.\s*\d+|winter 20\d{2}|autumn 20\d{2}|spring 20\d{2}|summer 20\d{2}|the quarterly|editorial\.)\b/i, weight: 3 },
|
|
285
|
+
{ key: "author_bylines", label: "named author or contributor byline", pattern: /\b(?:by [A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)+|contributor|essay by|interview by)\b/i, weight: 3 },
|
|
278
286
|
{ key: "fake_social_proof", label: "fake reader count or social proof", pattern: /\b(?:\d+\s+readers joined this month|\d+\s+subscribers?|trusted by)\b/i, weight: 2 },
|
|
279
287
|
{ key: "placeholder_surface", label: "placeholder surface or stand-in hero", pattern: /\b(?:placeholder|visual placeholder|product representation|hero placeholder|dashed|emoji stand-in|🧥|🎧)\b/i, weight: 3 }
|
|
280
288
|
];
|
|
@@ -308,6 +316,20 @@ function normalizeContent(content) {
|
|
|
308
316
|
return "";
|
|
309
317
|
}
|
|
310
318
|
|
|
319
|
+
function isExternalAssetReference(value) {
|
|
320
|
+
return /^(?:[a-z]+:|\/\/|#|data:|mailto:|tel:)/i.test(String(value || ""));
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function extractLocalAssetReferences(htmlText = "") {
|
|
324
|
+
const refs = new Set();
|
|
325
|
+
for (const match of String(htmlText).matchAll(/\b(?:href|src)=["']([^"'#?]+(?:\?[^"']*)?)["']/gi)) {
|
|
326
|
+
const ref = String(match[1] || "").trim();
|
|
327
|
+
if (!ref || isExternalAssetReference(ref)) continue;
|
|
328
|
+
refs.add(ref.split("?")[0]);
|
|
329
|
+
}
|
|
330
|
+
return [...refs];
|
|
331
|
+
}
|
|
332
|
+
|
|
311
333
|
function inferMimeType(filePath) {
|
|
312
334
|
const lower = String(filePath || "").toLowerCase();
|
|
313
335
|
if (lower.endsWith(".png")) return "image/png";
|
|
@@ -489,6 +511,10 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
|
|
|
489
511
|
flags.push("benchmark store used fake proof or badge chrome");
|
|
490
512
|
score += 4;
|
|
491
513
|
}
|
|
514
|
+
if (siteType === "store" && benchmarkMode && /\b(?:Objects for a considered life|considered life|timeless essentials|designed for modern living)\b/i.test(haystack)) {
|
|
515
|
+
flags.push("benchmark store fell back to generic luxury-commerce copy");
|
|
516
|
+
score += 2;
|
|
517
|
+
}
|
|
492
518
|
if (siteType === "store" && benchmarkMode && /\b(?:metaKey && e\.key === ['"]k['"]|command palette|keyboard shortcuts?)\b/i.test(haystack)) {
|
|
493
519
|
flags.push("benchmark store used fake keyboard or shortcut chrome");
|
|
494
520
|
score += 4;
|
|
@@ -517,6 +543,10 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
|
|
|
517
543
|
flags.push("benchmark blog fell back to essay-magazine placeholder language");
|
|
518
544
|
score += 3;
|
|
519
545
|
}
|
|
546
|
+
if (siteType === "blog" && benchmarkMode && /\b(?:by [A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)+|contributor|essay by|interview by)\b/i.test(haystack)) {
|
|
547
|
+
flags.push("benchmark blog used invented author or contributor bylines");
|
|
548
|
+
score += 4;
|
|
549
|
+
}
|
|
520
550
|
if (siteType === "blog" && benchmarkMode && /cdn\.tailwindcss\.com/i.test(haystack)) {
|
|
521
551
|
flags.push("benchmark blog relied on Tailwind CDN starter theming");
|
|
522
552
|
score += 4;
|
|
@@ -551,12 +581,78 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
|
|
|
551
581
|
return {
|
|
552
582
|
score,
|
|
553
583
|
flags,
|
|
554
|
-
hardBlock: flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy/.test(flag)),
|
|
584
|
+
hardBlock: flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|invented author or contributor bylines|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy/.test(flag)),
|
|
555
585
|
severe: score >= 5,
|
|
556
586
|
summary: flags.length > 0 ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
|
|
557
587
|
};
|
|
558
588
|
}
|
|
559
589
|
|
|
590
|
+
export async function inspectFrontendArtifacts({ cwd, receipt = null, frontend = null } = {}) {
|
|
591
|
+
const changedFiles = Array.isArray(receipt?.changedFiles) ? receipt.changedFiles : [];
|
|
592
|
+
const htmlFiles = changedFiles
|
|
593
|
+
.map((filePath) => String(filePath || "").trim())
|
|
594
|
+
.filter((filePath) => filePath && filePath.toLowerCase().endsWith(".html"));
|
|
595
|
+
const flags = [];
|
|
596
|
+
let score = 0;
|
|
597
|
+
|
|
598
|
+
for (const filePath of htmlFiles) {
|
|
599
|
+
const absolute = path.isAbsolute(filePath) ? filePath : path.resolve(cwd || process.cwd(), filePath);
|
|
600
|
+
let htmlText = "";
|
|
601
|
+
try {
|
|
602
|
+
htmlText = await fs.readFile(absolute, "utf8");
|
|
603
|
+
} catch {
|
|
604
|
+
continue;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
const localRefs = extractLocalAssetReferences(htmlText);
|
|
608
|
+
for (const ref of localRefs) {
|
|
609
|
+
const target = path.resolve(path.dirname(absolute), ref);
|
|
610
|
+
try {
|
|
611
|
+
await fs.access(target);
|
|
612
|
+
} catch {
|
|
613
|
+
flags.push(`missing local asset reference: ${ref}`);
|
|
614
|
+
score += 4;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
if (frontend?.benchmarkMode) {
|
|
619
|
+
const hasVisualSurface = /<img\b|background-image\s*:|url\((?!['"]?(?:https?:|data:))/i.test(htmlText) || /<svg\b/i.test(htmlText);
|
|
620
|
+
const hasPlaceholderSurface = /\b(?:visual-placeholder|image-placeholder|replace with high[- ]resolution|replace with high quality photography|text-only image placeholder|product representation)\b/i.test(htmlText);
|
|
621
|
+
if (!hasVisualSurface || hasPlaceholderSurface) {
|
|
622
|
+
flags.push(`benchmark ${frontend.siteType} lacks a real visual surface`);
|
|
623
|
+
score += 4;
|
|
624
|
+
}
|
|
625
|
+
if (/\b(?:console\.log|alert\(|prompt\(|static demo|demo product page|all content is placeholder)\b/i.test(htmlText)) {
|
|
626
|
+
flags.push(`benchmark ${frontend.siteType} still contains demo behavior or explanation copy`);
|
|
627
|
+
score += 3;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
return {
|
|
633
|
+
score,
|
|
634
|
+
flags: [...new Set(flags)],
|
|
635
|
+
hardBlock: flags.length > 0,
|
|
636
|
+
severe: score >= 5,
|
|
637
|
+
summary: flags.length ? `frontend artifact issues: ${[...new Set(flags)].join(", ")}` : "no frontend artifact issues"
|
|
638
|
+
};
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
export function mergeFrontendSlop(base = null, extra = null) {
|
|
642
|
+
if (!base && !extra) return null;
|
|
643
|
+
if (!base) return extra;
|
|
644
|
+
if (!extra) return base;
|
|
645
|
+
const flags = [...new Set([...(base.flags || []), ...(extra.flags || [])])];
|
|
646
|
+
const score = Number(base.score || 0) + Number(extra.score || 0);
|
|
647
|
+
return {
|
|
648
|
+
score,
|
|
649
|
+
flags,
|
|
650
|
+
hardBlock: Boolean(base.hardBlock || extra.hardBlock),
|
|
651
|
+
severe: score >= 5 || Boolean(base.severe || extra.severe),
|
|
652
|
+
summary: flags.length ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
|
|
653
|
+
};
|
|
654
|
+
}
|
|
655
|
+
|
|
560
656
|
export function shouldAutoReviseFrontend({ designReview = null, slop = null, revisionCount = 0 } = {}) {
|
|
561
657
|
if (revisionCount >= 3) return false;
|
|
562
658
|
if (!designReview) return false;
|
package/src/prompt.js
CHANGED
|
@@ -6,67 +6,51 @@ export async function promptLine(label, { input = process.stdin, output = proces
|
|
|
6
6
|
return;
|
|
7
7
|
}
|
|
8
8
|
let buf = "";
|
|
9
|
-
let
|
|
10
|
-
const shouldPauseOnCleanup = typeof input.pause === "function";
|
|
9
|
+
let settled = false;
|
|
11
10
|
const isTTY = input.isTTY && typeof input.setRawMode === "function";
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
// Steal stdin from existing data listeners (readline keypress emitter, etc.)
|
|
13
|
+
const stolenListeners = input.rawListeners("data").slice();
|
|
14
|
+
for (const fn of stolenListeners) input.removeListener("data", fn);
|
|
15
|
+
|
|
16
|
+
function finish(value, err) {
|
|
17
|
+
if (settled) return;
|
|
18
|
+
settled = true;
|
|
15
19
|
input.removeListener("data", onData);
|
|
16
20
|
if (signal) signal.removeEventListener("abort", onAbort);
|
|
17
|
-
if (isTTY) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
} catch {}
|
|
24
|
-
}
|
|
21
|
+
if (isTTY) { try { input.setRawMode(false); } catch {} }
|
|
22
|
+
try { input.pause(); } catch {}
|
|
23
|
+
// Restore stolen listeners
|
|
24
|
+
for (const fn of stolenListeners) input.on("data", fn);
|
|
25
|
+
if (err) reject(err);
|
|
26
|
+
else resolve(value);
|
|
25
27
|
}
|
|
28
|
+
|
|
26
29
|
function onData(chunk) {
|
|
27
|
-
if (
|
|
30
|
+
if (settled) return;
|
|
28
31
|
const str = chunk.toString();
|
|
29
32
|
for (const ch of str) {
|
|
30
33
|
if (ch === "\r") continue;
|
|
31
|
-
if (ch === "\n") {
|
|
32
|
-
|
|
33
|
-
output.write("\n");
|
|
34
|
-
resolve(buf);
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
if (ch === "\u0003") {
|
|
38
|
-
cleanup();
|
|
39
|
-
reject(new DOMException("The operation was aborted.", "AbortError"));
|
|
40
|
-
return;
|
|
41
|
-
}
|
|
42
|
-
// Backspace
|
|
34
|
+
if (ch === "\n") { output.write("\n"); finish(buf); return; }
|
|
35
|
+
if (ch === "\u0003") { finish("", new DOMException("The operation was aborted.", "AbortError")); return; }
|
|
43
36
|
if (ch === "\u007f" || ch === "\b") {
|
|
44
|
-
if (buf.length > 0) {
|
|
45
|
-
buf = buf.slice(0, -1);
|
|
46
|
-
output.write("\b \b");
|
|
47
|
-
}
|
|
37
|
+
if (buf.length > 0) { buf = buf.slice(0, -1); output.write("\b \b"); }
|
|
48
38
|
continue;
|
|
49
39
|
}
|
|
50
|
-
// Skip non-printable control chars
|
|
51
40
|
if (ch.charCodeAt(0) < 32) continue;
|
|
52
41
|
buf += ch;
|
|
53
42
|
output.write(ch);
|
|
54
43
|
}
|
|
55
44
|
}
|
|
45
|
+
|
|
56
46
|
function onAbort() {
|
|
57
|
-
|
|
58
|
-
cleanup();
|
|
59
|
-
reject(signal.reason || new DOMException("The operation was aborted.", "AbortError"));
|
|
47
|
+
finish("", signal.reason || new DOMException("The operation was aborted.", "AbortError"));
|
|
60
48
|
}
|
|
61
49
|
|
|
62
|
-
//
|
|
63
|
-
if (isTTY) {
|
|
64
|
-
try { input.setRawMode(false); } catch {}
|
|
65
|
-
}
|
|
50
|
+
// Clean stdin state
|
|
51
|
+
if (isTTY) { try { input.setRawMode(false); } catch {} }
|
|
66
52
|
input.pause();
|
|
67
|
-
if (isTTY)
|
|
68
|
-
input.setRawMode(true);
|
|
69
|
-
}
|
|
53
|
+
if (isTTY) input.setRawMode(true);
|
|
70
54
|
input.on("data", onData);
|
|
71
55
|
input.resume();
|
|
72
56
|
if (signal) signal.addEventListener("abort", onAbort, { once: true });
|
package/src/workflow.js
CHANGED
|
@@ -8,6 +8,8 @@ import {
|
|
|
8
8
|
captureFrontendScreenshot,
|
|
9
9
|
detectFrontendSlop,
|
|
10
10
|
findFrontendPreviewEntry,
|
|
11
|
+
inspectFrontendArtifacts,
|
|
12
|
+
mergeFrontendSlop,
|
|
11
13
|
reviewFrontendTurn,
|
|
12
14
|
reviewFrontendScreenshot,
|
|
13
15
|
shouldForceFrontendRebuild,
|
|
@@ -180,9 +182,13 @@ export async function runBuildWorkflow({
|
|
|
180
182
|
}
|
|
181
183
|
}
|
|
182
184
|
|
|
183
|
-
const
|
|
185
|
+
const deterministicSlop = designReview
|
|
184
186
|
? detectFrontendSlop({ promptText, assistantText: activeResponse.content || "", receipt: activeReceipt, designReview })
|
|
185
187
|
: null;
|
|
188
|
+
const artifactSlop = designReview
|
|
189
|
+
? await inspectFrontendArtifacts({ cwd: context.cwd, receipt: activeReceipt, frontend: frontendCtx?.frontend || null })
|
|
190
|
+
: null;
|
|
191
|
+
const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
|
|
186
192
|
|
|
187
193
|
return { impact, review, designReview, designSlop, screenshotReview, screenshotPath };
|
|
188
194
|
}
|