@tritard/waterbrother 0.8.33 → 0.8.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tritard/waterbrother",
3
- "version": "0.8.33",
3
+ "version": "0.8.35",
4
4
  "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.js CHANGED
@@ -14,7 +14,21 @@ import { expandHomePath } from "./path-utils.js";
14
14
  import { AUTONOMY_MODES, buildOperatorIdentity, EXPERIENCE_MODES, modeDefaults, normalizeAutonomyMode, normalizeExperienceMode } from "./modes.js";
15
15
  import { computeImpactMap } from "./impact.js";
16
16
  import { reviewTurn } from "./reviewer.js";
17
- import { buildFrontendExecutionContext, reviewFrontendTurn, shouldRunFrontendReview } from "./frontend.js";
17
+ import {
18
+ buildFrontendExecutionContext,
19
+ buildFrontendRebuildPrompt,
20
+ buildFrontendRevisionPrompt,
21
+ captureFrontendScreenshot,
22
+ detectFrontendSlop,
23
+ findFrontendPreviewEntry,
24
+ inspectFrontendArtifacts,
25
+ mergeFrontendSlop,
26
+ reviewFrontendTurn,
27
+ reviewFrontendScreenshot,
28
+ shouldAutoReviseFrontend,
29
+ shouldForceFrontendRebuild,
30
+ shouldRunFrontendReview
31
+ } from "./frontend.js";
18
32
  import { loadTask, saveTask, listTasks, setActiveTask, getActiveTask, closeTask } from "./task-store.js";
19
33
  import { runDecisionPass, runInventPass, formatDecisionForDisplay, formatDecisionCompact, formatDecisionDetail } from "./decider.js";
20
34
  import { runBuildWorkflow, startFeatureTask, runChallengeWorkflow } from "./workflow.js";
@@ -1037,6 +1051,273 @@ async function enrichTurnArtifacts({ agent, context, promptText, assistantText,
1037
1051
  return receipt;
1038
1052
  }
1039
1053
 
1054
+ async function analyzeTurnArtifacts({
1055
+ agent,
1056
+ context,
1057
+ promptText,
1058
+ assistantText,
1059
+ receipt,
1060
+ frontend = null,
1061
+ signal
1062
+ }) {
1063
+ let impact = receipt.impact || null;
1064
+ if (receipt.mutated && context.runtime.impact?.enabled !== false) {
1065
+ impact = await computeImpactMap({
1066
+ cwd: context.cwd,
1067
+ changedFiles: receipt.changedFiles || [],
1068
+ maxRelated: context.runtime.impact?.maxRelated,
1069
+ maxTests: context.runtime.impact?.maxTests
1070
+ });
1071
+ }
1072
+
1073
+ let review = receipt.review || null;
1074
+ if (receipt.mutated && context.runtime.reviewer?.enabled !== false) {
1075
+ try {
1076
+ review = await reviewTurn({
1077
+ apiKey: context.runtime.apiKey,
1078
+ baseUrl: context.runtime.baseUrl,
1079
+ model: context.runtime.reviewer?.model || agent.getModel(),
1080
+ promptText,
1081
+ assistantText,
1082
+ receipt: { ...receipt, diff: receipt.diff || "" },
1083
+ impact,
1084
+ maxDiffChars: context.runtime.reviewer?.maxDiffChars,
1085
+ signal
1086
+ });
1087
+ } catch (error) {
1088
+ review = {
1089
+ verdict: "caution",
1090
+ summary: `review failed: ${error instanceof Error ? error.message : String(error)}`,
1091
+ concerns: ["Sentinel reviewer could not complete."],
1092
+ followups: []
1093
+ };
1094
+ }
1095
+ }
1096
+
1097
+ let designReview = receipt.designReview || null;
1098
+ if (shouldRunFrontendReview({ promptText, receipt, profile: agent.getProfile() })) {
1099
+ try {
1100
+ designReview = await reviewFrontendTurn({
1101
+ apiKey: context.runtime.apiKey,
1102
+ baseUrl: context.runtime.baseUrl,
1103
+ model: context.runtime.reviewer?.model || agent.getModel(),
1104
+ promptText,
1105
+ assistantText,
1106
+ receipt: { ...receipt, diff: receipt.diff || "" },
1107
+ signal
1108
+ });
1109
+ } catch (error) {
1110
+ designReview = {
1111
+ verdict: "caution",
1112
+ summary: `design review failed: ${error instanceof Error ? error.message : String(error)}`,
1113
+ strengths: [],
1114
+ issues: ["Frontend design reviewer could not complete."],
1115
+ nextPass: []
1116
+ };
1117
+ }
1118
+ }
1119
+
1120
+ let screenshotReview = null;
1121
+ let screenshotPath = null;
1122
+ if (designReview) {
1123
+ try {
1124
+ const previewEntry = await findFrontendPreviewEntry({ cwd: context.cwd, receipt });
1125
+ if (previewEntry) {
1126
+ screenshotPath = await captureFrontendScreenshot({ entryPath: previewEntry });
1127
+ if (screenshotPath) {
1128
+ screenshotReview = await reviewFrontendScreenshot({
1129
+ apiKey: context.runtime.apiKey,
1130
+ baseUrl: context.runtime.baseUrl,
1131
+ model: context.runtime.reviewer?.model || agent.getModel(),
1132
+ screenshotPath,
1133
+ promptText,
1134
+ signal
1135
+ });
1136
+ }
1137
+ }
1138
+ } catch {
1139
+ screenshotReview = null;
1140
+ }
1141
+ }
1142
+
1143
+ const deterministicSlop = designReview
1144
+ ? detectFrontendSlop({ promptText, assistantText, receipt, designReview })
1145
+ : null;
1146
+ const artifactSlop = designReview
1147
+ ? await inspectFrontendArtifacts({ cwd: context.cwd, receipt, frontend })
1148
+ : null;
1149
+ const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
1150
+
1151
+ return {
1152
+ impact,
1153
+ review,
1154
+ designReview,
1155
+ designSlop,
1156
+ screenshotReview,
1157
+ screenshotPath
1158
+ };
1159
+ }
1160
+
1161
+ async function finalizeReceiptArtifacts({
1162
+ agent,
1163
+ context,
1164
+ receipt,
1165
+ artifacts,
1166
+ signal
1167
+ }) {
1168
+ if (!receipt) return null;
1169
+ const updates = {};
1170
+ if (artifacts?.impact) updates.impact = artifacts.impact;
1171
+ if (artifacts?.review) updates.review = artifacts.review;
1172
+ if (artifacts?.designReview) updates.designReview = artifacts.designReview;
1173
+ if (artifacts?.designSlop) updates.designSlop = artifacts.designSlop;
1174
+ if (artifacts?.screenshotReview) updates.screenshotReview = artifacts.screenshotReview;
1175
+ if (artifacts?.screenshotPath) updates.screenshotPath = artifacts.screenshotPath;
1176
+ if (artifacts?.designRevision) updates.designRevision = artifacts.designRevision;
1177
+ const finalReceipt = Object.keys(updates).length > 0
1178
+ ? (await agent.toolRuntime.updateReceipt(receipt.id, updates) || receipt)
1179
+ : receipt;
1180
+ context.runtime.lastReceipt = finalReceipt;
1181
+ context.runtime.lastImpact = artifacts?.impact || finalReceipt.impact || null;
1182
+ return finalReceipt;
1183
+ }
1184
+
1185
+ async function maybeReviseInteractiveFrontend({
1186
+ agent,
1187
+ context,
1188
+ promptText,
1189
+ response,
1190
+ receipt,
1191
+ frontendExecutionContext = null,
1192
+ previousExecutionContext = null,
1193
+ signal
1194
+ }) {
1195
+ if (!frontendExecutionContext || !receipt) {
1196
+ return { response, receipt, artifacts: null };
1197
+ }
1198
+
1199
+ const baseExecutionContext = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
1200
+ if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
1201
+ baseExecutionContext.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
1202
+ }
1203
+
1204
+ let activeResponse = response;
1205
+ let activeReceipt = receipt;
1206
+ let artifacts = await analyzeTurnArtifacts({
1207
+ agent,
1208
+ context,
1209
+ promptText,
1210
+ assistantText: activeResponse.content || "",
1211
+ receipt: activeReceipt,
1212
+ frontend: frontendExecutionContext.frontend || null,
1213
+ signal
1214
+ });
1215
+
1216
+ let revisionCount = 0;
1217
+ const revisionHistory = [];
1218
+
1219
+ while (shouldAutoReviseFrontend({
1220
+ designReview: artifacts.designReview,
1221
+ slop: artifacts.designSlop,
1222
+ revisionCount
1223
+ })) {
1224
+ const passNumber = revisionCount + 1;
1225
+ const forceRebuild = shouldForceFrontendRebuild({
1226
+ frontend: frontendExecutionContext.frontend || null,
1227
+ slop: artifacts.designSlop,
1228
+ revisionCount
1229
+ });
1230
+ revisionHistory.push({
1231
+ passNumber,
1232
+ verdict: artifacts.designReview?.verdict || null,
1233
+ summary: String(artifacts.designReview?.summary || "").trim(),
1234
+ slopFlags: Array.isArray(artifacts.designSlop?.flags) ? [...artifacts.designSlop.flags] : [],
1235
+ mode: forceRebuild ? "rebuild" : "revise"
1236
+ });
1237
+ const revisionPrompt = forceRebuild
1238
+ ? buildFrontendRebuildPrompt({
1239
+ originalPrompt: promptText,
1240
+ frontend: frontendExecutionContext.frontend || null,
1241
+ designReview: artifacts.designReview,
1242
+ slop: artifacts.designSlop,
1243
+ screenshotReview: artifacts.screenshotReview
1244
+ })
1245
+ : buildFrontendRevisionPrompt({
1246
+ originalPrompt: promptText,
1247
+ designReview: artifacts.designReview,
1248
+ slop: artifacts.designSlop,
1249
+ screenshotReview: artifacts.screenshotReview
1250
+ });
1251
+
1252
+ const revisionSpinner = createProgressSpinner(
1253
+ forceRebuild ? `rebuilding frontend (${passNumber})...` : `revising frontend (${passNumber})...`
1254
+ );
1255
+ printLiveTrace(
1256
+ forceRebuild ? `frontend rebuild pass ${passNumber}` : `frontend revision pass ${passNumber}`,
1257
+ context.runtime.traceMode
1258
+ );
1259
+ if (activeReceipt.contract) {
1260
+ agent.toolRuntime.setCurrentContract(activeReceipt.contract);
1261
+ }
1262
+ agent.setExecutionContext({
1263
+ ...baseExecutionContext,
1264
+ phase: forceRebuild ? `design-rebuild-${passNumber}` : `design-revision-${passNumber}`,
1265
+ reminders: [
1266
+ baseExecutionContext.reminders || "",
1267
+ forceRebuild
1268
+ ? `Automatic rebuild pass ${passNumber}: discard the previous frontend direction and rebuild within the same contract using the benchmark starter skeleton.`
1269
+ : passNumber === 1
1270
+ ? "Automatic second pass: fix the flagged frontend design issues without widening scope."
1271
+ : `Automatic follow-up pass ${passNumber}: remove any remaining benchmark hard-fail patterns.`
1272
+ ].filter(Boolean).join("\n")
1273
+ });
1274
+ try {
1275
+ activeResponse = await agent.runTurn(revisionPrompt, {
1276
+ signal,
1277
+ onStateChange(state) {
1278
+ printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
1279
+ },
1280
+ onToolStart(toolCall) {
1281
+ const toolName = toolCall?.function?.name || "tool";
1282
+ printLiveTrace(`using ${toolName}`, context.runtime.traceMode);
1283
+ },
1284
+ onToolEnd(toolCall, result) {
1285
+ const toolName = toolCall?.function?.name || "tool";
1286
+ const status = parseToolResultShape(result);
1287
+ const label =
1288
+ status === "ok" ? `${toolName} ok` : status === "blocked" ? `${toolName} blocked` : `${toolName} ${status}`;
1289
+ printLiveTrace(label, context.runtime.traceMode);
1290
+ }
1291
+ });
1292
+ activeReceipt = await agent.toolRuntime.completeTurn({ signal });
1293
+ if (!activeReceipt) break;
1294
+ artifacts = await analyzeTurnArtifacts({
1295
+ agent,
1296
+ context,
1297
+ promptText,
1298
+ assistantText: activeResponse.content || "",
1299
+ receipt: activeReceipt,
1300
+ frontend: frontendExecutionContext.frontend || null,
1301
+ signal
1302
+ });
1303
+ revisionCount += 1;
1304
+ } finally {
1305
+ revisionSpinner.stop();
1306
+ agent.setExecutionContext(previousExecutionContext);
1307
+ }
1308
+ }
1309
+
1310
+ if (revisionCount > 0) {
1311
+ artifacts.designRevision = {
1312
+ triggered: true,
1313
+ passes: revisionCount,
1314
+ history: revisionHistory
1315
+ };
1316
+ }
1317
+
1318
+ return { response: activeResponse, receipt: activeReceipt, artifacts };
1319
+ }
1320
+
1040
1321
  function describeOperator(runtime, agent) {
1041
1322
  const identity = buildOperatorIdentity({
1042
1323
  mode: agent.getExperienceMode(),
@@ -3654,18 +3935,48 @@ async function runTextTurnInteractive({
3654
3935
  precomputedReceipt = candidateReceipt;
3655
3936
  }
3656
3937
  }
3938
+ let finalizedArtifacts = null;
3939
+ let finalizedReceipt = precomputedReceipt || await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
3940
+ if (frontendExecutionContext && finalizedReceipt) {
3941
+ const revisedFrontendTurn = await maybeReviseInteractiveFrontend({
3942
+ agent,
3943
+ context,
3944
+ promptText: effectivePromptText,
3945
+ response,
3946
+ receipt: finalizedReceipt,
3947
+ frontendExecutionContext,
3948
+ previousExecutionContext,
3949
+ signal: abortController?.signal
3950
+ });
3951
+ response = revisedFrontendTurn.response || response;
3952
+ renderedAssistantText = response.content || renderedAssistantText;
3953
+ finalizedReceipt = revisedFrontendTurn.receipt || finalizedReceipt;
3954
+ finalizedArtifacts = revisedFrontendTurn.artifacts || null;
3955
+ }
3956
+
3657
3957
  printAssistantOutput(renderedAssistantText);
3658
3958
  await setSessionRunState(currentSession, agent, "done");
3659
3959
  printTurnSummary(turnSummary, response, { modelId: agent.getModel(), costTracker: context.costTracker, traceMode: context.runtime.traceMode });
3660
3960
  printTraceTimeline(turnSummary, context.runtime.traceMode);
3661
- const receipt = await enrichTurnArtifacts({
3662
- agent,
3663
- context,
3664
- promptText: effectivePromptText,
3665
- assistantText: response.content || '',
3666
- signal: abortController?.signal
3667
- });
3668
- const finalReceipt = receipt || precomputedReceipt;
3961
+ let finalReceipt = null;
3962
+ if (finalizedReceipt && finalizedArtifacts) {
3963
+ finalReceipt = await finalizeReceiptArtifacts({
3964
+ agent,
3965
+ context,
3966
+ receipt: finalizedReceipt,
3967
+ artifacts: finalizedArtifacts,
3968
+ signal: abortController?.signal
3969
+ });
3970
+ } else {
3971
+ const receipt = await enrichTurnArtifacts({
3972
+ agent,
3973
+ context,
3974
+ promptText: effectivePromptText,
3975
+ assistantText: response.content || "",
3976
+ signal: abortController?.signal
3977
+ });
3978
+ finalReceipt = receipt || finalizedReceipt;
3979
+ }
3669
3980
  if (finalReceipt) {
3670
3981
  currentSession.lastReceiptId = finalReceipt.id;
3671
3982
  if (shouldPrintReceiptSummary(finalReceipt, context.runtime.receiptMode)) {
package/src/frontend.js CHANGED
@@ -47,7 +47,7 @@ Rules:
47
47
  - Be concrete about visible layout, spacing, typography, contrast, composition, and interaction cues.
48
48
  - Do not wrap JSON in markdown.`;
49
49
 
50
- const BENCHMARK_FRONTEND_PROMPT = /\b(?:benchmark|squarespace quality|ultimate design|first class|on par|codex|claude code|cc)\b/i;
50
+ const BENCHMARK_FRONTEND_PROMPT = /\b(?:benchmark|squarespace quality|ultimate design|first class|on par|codex|claude code|cc|winning shopify|winning ecommerce|high-converting|flagship pdp)\b/i;
51
51
 
52
52
  const UNIVERSAL_FRONTEND_REMINDERS = [
53
53
  "Choose one visual direction and stay consistent across typography, spacing, color, and motion.",
@@ -63,7 +63,8 @@ const BENCHMARK_FRONTEND_REMINDERS = [
63
63
  "For benchmark frontend tasks, cut generic reflective-editorial copy. Use sharper, more concrete language or neutral structural placeholders instead of atmosphere-writing.",
64
64
  "For benchmark frontend tasks, force at least one asymmetrical or compositionally distinctive move instead of a fully balanced template layout.",
65
65
  "Benchmark mode: treat fake issue framing, fake publication history, fake keyboard shortcuts, invented named contributors, and demo-page behavior as disallowed outputs. Use neutral structural placeholders if needed.",
66
- "Benchmark mode: the page should feel publishable, not like a static demo or wireframe explanation."
66
+ "Benchmark mode: the page should feel publishable, not like a static demo or wireframe explanation.",
67
+ "Benchmark mode: do not label the page as a demo, benchmark, placeholder, or structural exercise inside the UI."
67
68
  ];
68
69
 
69
70
  const BENCHMARK_SITE_TYPE_RULES = {
@@ -71,7 +72,9 @@ const BENCHMARK_SITE_TYPE_RULES = {
71
72
  "Benchmark blog mode: use neutral structural placeholders or concrete subject matter instead of publication worldbuilding or reflective-editorial atmosphere prose.",
72
73
  "Benchmark blog mode: do not wrap the page in a generic publication shell like Journal, Featured Essay, Latest Dispatches, Notes, Archive, Print Edition, Submit Work, or similar magazine-site framing.",
73
74
  "Benchmark blog mode: avoid the default editorial scaffold of hero, story list, archive rail, topics grid, and publication footer unless the user explicitly asked for a magazine-style site.",
74
- "Benchmark blog mode: do not use Tailwind CDN starter theming, Font Awesome chrome, picsum/placehold imagery, fake keyboard/search chrome, or demo-only modal article previews."
75
+ "Benchmark blog mode: do not use Tailwind CDN starter theming, Font Awesome chrome, picsum/placehold imagery, fake keyboard/search chrome, or demo-only modal article previews.",
76
+ "Benchmark blog mode: include one real editorial image surface or one authored abstract visual composition. Do not replace visuals with text placeholders.",
77
+ "Benchmark blog mode: do not invent named authors, editors, contributors, or interview credits unless the user explicitly asked for fictional worldbuilding."
75
78
  ],
76
79
  store: [
77
80
  "Benchmark ecommerce mode: prioritize conversion architecture over editorial styling. The page should sell, not just look clean.",
@@ -79,7 +82,8 @@ const BENCHMARK_SITE_TYPE_RULES = {
79
82
  "Benchmark ecommerce mode: do not leave the main merchandising surface as a literal placeholder, dashed demo box, emoji stand-in, or visual wireframe.",
80
83
  "Benchmark ecommerce mode: do not use Tailwind CDN starter theming, Font Awesome chrome, or placeholder product images such as picsum/placehold on the live merchandising surface.",
81
84
  "Benchmark ecommerce mode: fake command palettes, fake app shortcuts, demo-only cart gimmicks, and unrelated theme chrome are disallowed.",
82
- "Benchmark ecommerce mode: fake review counts, fake bestseller labels, fake sales counters, and fake as-featured-in proof are disallowed."
85
+ "Benchmark ecommerce mode: fake review counts, fake bestseller labels, fake sales counters, and fake as-featured-in proof are disallowed.",
86
+ "Benchmark ecommerce mode: include a real product-photo surface or an authored product-style render/silhouette. Do not replace the hero with text placeholders."
83
87
  ]
84
88
  };
85
89
 
@@ -234,7 +238,8 @@ const BENCHMARK_STARTERS = {
234
238
  "Section 2: one dominant lead piece with a strong headline and one supporting block offset beside or below it.",
235
239
  "Section 3: one compact secondary list or pair of entries, not an archive rail plus topics plus notes stack.",
236
240
  "Section 4: minimal closing footer note only if needed.",
237
- "No publication shell, no archive/topic chrome, no subscribe CTA, no demo interactions, no explanation that the page is a demo."
241
+ "No publication shell, no archive/topic chrome, no subscribe CTA, no author bylines, no contributor credits, no demo interactions, no explanation that the page is a demo.",
242
+ "Use one strong visual surface: a real editorial image URL or an authored abstract composition made with CSS/SVG."
238
243
  ],
239
244
  store: [
240
245
  "Benchmark store starter: build a flagship single-product PDP with 4-5 sections max.",
@@ -243,7 +248,9 @@ const BENCHMARK_STARTERS = {
243
248
  "Section 3: three focused benefit/features blocks.",
244
249
  "Section 4: material/specification or craftsmanship band with objection handling.",
245
250
  "Section 5: compact FAQ or guarantee block if still needed.",
246
- "No Tailwind starter shell, no fake reviews, no fake badges, no placeholder gallery, no emoji or dashed placeholder hero."
251
+ "No Tailwind starter shell, no fake reviews, no fake badges, no placeholder gallery, no emoji or dashed placeholder hero, no alert-based checkout, no keyboard shortcut chrome, no demo footer copy.",
252
+ "The hero must merchandise the product with a believable product surface and product-specific benefit language, not generic luxury-commerce filler.",
253
+ "Use a real product image URL or an authored SVG/CSS product silhouette with material/shadow treatment. Never use text-only image placeholders."
247
254
  ]
248
255
  };
249
256
 
@@ -275,6 +282,7 @@ const SLOP_PATTERNS = [
275
282
  { key: "premium_blog_trope", label: "generic premium-blog editorial trope", pattern: /\b(?:thoughtful living|slow living|curated reflections|crafted with intention|made with intention|a quiet publication|made with restraint|journal of attention|discipline of seeing|private notes made public)\b/i, weight: 2 },
276
283
  { key: "reflective_editorial_copy", label: "generic reflective-editorial copy trope", pattern: /\b(?:quiet architecture of attention|great thinkers understood|value of slowness|the examined life|deliberate cultivation of focus|most radical act|what we have forgotten|how everyday items carry|still matters)\b/i, weight: 2 },
277
284
  { key: "fictional_publication_brand", label: "invented publication/author scaffolding", pattern: /\b(?:by [A-Z][a-z]+ [A-Z][a-z]+|photography by [A-Z][a-z]+ [A-Z][a-z]+|journal of attention|vesper|lumen|dispatches|vol\.\s*\d+|winter 20\d{2}|autumn 20\d{2}|spring 20\d{2}|summer 20\d{2}|the quarterly|editorial\.)\b/i, weight: 3 },
285
+ { key: "author_bylines", label: "named author or contributor byline", pattern: /\b(?:by [A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)+|contributor|essay by|interview by)\b/i, weight: 3 },
278
286
  { key: "fake_social_proof", label: "fake reader count or social proof", pattern: /\b(?:\d+\s+readers joined this month|\d+\s+subscribers?|trusted by)\b/i, weight: 2 },
279
287
  { key: "placeholder_surface", label: "placeholder surface or stand-in hero", pattern: /\b(?:placeholder|visual placeholder|product representation|hero placeholder|dashed|emoji stand-in|🧥|🎧)\b/i, weight: 3 }
280
288
  ];
@@ -308,6 +316,20 @@ function normalizeContent(content) {
308
316
  return "";
309
317
  }
310
318
 
319
+ function isExternalAssetReference(value) {
320
+ return /^(?:[a-z]+:|\/\/|#|data:|mailto:|tel:)/i.test(String(value || ""));
321
+ }
322
+
323
+ function extractLocalAssetReferences(htmlText = "") {
324
+ const refs = new Set();
325
+ for (const match of String(htmlText).matchAll(/\b(?:href|src)=["']([^"'#?]+(?:\?[^"']*)?)["']/gi)) {
326
+ const ref = String(match[1] || "").trim();
327
+ if (!ref || isExternalAssetReference(ref)) continue;
328
+ refs.add(ref.split("?")[0]);
329
+ }
330
+ return [...refs];
331
+ }
332
+
311
333
  function inferMimeType(filePath) {
312
334
  const lower = String(filePath || "").toLowerCase();
313
335
  if (lower.endsWith(".png")) return "image/png";
@@ -489,6 +511,10 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
489
511
  flags.push("benchmark store used fake proof or badge chrome");
490
512
  score += 4;
491
513
  }
514
+ if (siteType === "store" && benchmarkMode && /\b(?:Objects for a considered life|considered life|timeless essentials|designed for modern living)\b/i.test(haystack)) {
515
+ flags.push("benchmark store fell back to generic luxury-commerce copy");
516
+ score += 2;
517
+ }
492
518
  if (siteType === "store" && benchmarkMode && /\b(?:metaKey && e\.key === ['"]k['"]|command palette|keyboard shortcuts?)\b/i.test(haystack)) {
493
519
  flags.push("benchmark store used fake keyboard or shortcut chrome");
494
520
  score += 4;
@@ -517,6 +543,10 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
517
543
  flags.push("benchmark blog fell back to essay-magazine placeholder language");
518
544
  score += 3;
519
545
  }
546
+ if (siteType === "blog" && benchmarkMode && /\b(?:by [A-Z][A-Za-z]+(?:\s+[A-Z][A-Za-z]+)+|contributor|essay by|interview by)\b/i.test(haystack)) {
547
+ flags.push("benchmark blog used invented author or contributor bylines");
548
+ score += 4;
549
+ }
520
550
  if (siteType === "blog" && benchmarkMode && /cdn\.tailwindcss\.com/i.test(haystack)) {
521
551
  flags.push("benchmark blog relied on Tailwind CDN starter theming");
522
552
  score += 4;
@@ -551,12 +581,78 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
551
581
  return {
552
582
  score,
553
583
  flags,
554
- hardBlock: flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy/.test(flag)),
584
+ hardBlock: flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|invented author or contributor bylines|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy/.test(flag)),
555
585
  severe: score >= 5,
556
586
  summary: flags.length > 0 ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
557
587
  };
558
588
  }
559
589
 
590
+ export async function inspectFrontendArtifacts({ cwd, receipt = null, frontend = null } = {}) {
591
+ const changedFiles = Array.isArray(receipt?.changedFiles) ? receipt.changedFiles : [];
592
+ const htmlFiles = changedFiles
593
+ .map((filePath) => String(filePath || "").trim())
594
+ .filter((filePath) => filePath && filePath.toLowerCase().endsWith(".html"));
595
+ const flags = [];
596
+ let score = 0;
597
+
598
+ for (const filePath of htmlFiles) {
599
+ const absolute = path.isAbsolute(filePath) ? filePath : path.resolve(cwd || process.cwd(), filePath);
600
+ let htmlText = "";
601
+ try {
602
+ htmlText = await fs.readFile(absolute, "utf8");
603
+ } catch {
604
+ continue;
605
+ }
606
+
607
+ const localRefs = extractLocalAssetReferences(htmlText);
608
+ for (const ref of localRefs) {
609
+ const target = path.resolve(path.dirname(absolute), ref);
610
+ try {
611
+ await fs.access(target);
612
+ } catch {
613
+ flags.push(`missing local asset reference: ${ref}`);
614
+ score += 4;
615
+ }
616
+ }
617
+
618
+ if (frontend?.benchmarkMode) {
619
+ const hasVisualSurface = /<img\b|background-image\s*:|url\((?!['"]?(?:https?:|data:))/i.test(htmlText) || /<svg\b/i.test(htmlText);
620
+ const hasPlaceholderSurface = /\b(?:visual-placeholder|image-placeholder|replace with high[- ]resolution|replace with high quality photography|text-only image placeholder|product representation)\b/i.test(htmlText);
621
+ if (!hasVisualSurface || hasPlaceholderSurface) {
622
+ flags.push(`benchmark ${frontend.siteType} lacks a real visual surface`);
623
+ score += 4;
624
+ }
625
+ if (/\b(?:console\.log|alert\(|prompt\(|static demo|demo product page|all content is placeholder)\b/i.test(htmlText)) {
626
+ flags.push(`benchmark ${frontend.siteType} still contains demo behavior or explanation copy`);
627
+ score += 3;
628
+ }
629
+ }
630
+ }
631
+
632
+ return {
633
+ score,
634
+ flags: [...new Set(flags)],
635
+ hardBlock: flags.length > 0,
636
+ severe: score >= 5,
637
+ summary: flags.length ? `frontend artifact issues: ${[...new Set(flags)].join(", ")}` : "no frontend artifact issues"
638
+ };
639
+ }
640
+
641
+ export function mergeFrontendSlop(base = null, extra = null) {
642
+ if (!base && !extra) return null;
643
+ if (!base) return extra;
644
+ if (!extra) return base;
645
+ const flags = [...new Set([...(base.flags || []), ...(extra.flags || [])])];
646
+ const score = Number(base.score || 0) + Number(extra.score || 0);
647
+ return {
648
+ score,
649
+ flags,
650
+ hardBlock: Boolean(base.hardBlock || extra.hardBlock),
651
+ severe: score >= 5 || Boolean(base.severe || extra.severe),
652
+ summary: flags.length ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
653
+ };
654
+ }
655
+
560
656
  export function shouldAutoReviseFrontend({ designReview = null, slop = null, revisionCount = 0 } = {}) {
561
657
  if (revisionCount >= 3) return false;
562
658
  if (!designReview) return false;
package/src/prompt.js CHANGED
@@ -6,67 +6,51 @@ export async function promptLine(label, { input = process.stdin, output = proces
6
6
  return;
7
7
  }
8
8
  let buf = "";
9
- let resolved = false;
10
- const shouldPauseOnCleanup = typeof input.pause === "function";
9
+ let settled = false;
11
10
  const isTTY = input.isTTY && typeof input.setRawMode === "function";
12
11
 
13
- function cleanup() {
14
- resolved = true;
12
+ // Steal stdin from existing data listeners (readline keypress emitter, etc.)
13
+ const stolenListeners = input.rawListeners("data").slice();
14
+ for (const fn of stolenListeners) input.removeListener("data", fn);
15
+
16
+ function finish(value, err) {
17
+ if (settled) return;
18
+ settled = true;
15
19
  input.removeListener("data", onData);
16
20
  if (signal) signal.removeEventListener("abort", onAbort);
17
- if (isTTY) {
18
- try { input.setRawMode(false); } catch {}
19
- }
20
- if (shouldPauseOnCleanup) {
21
- try {
22
- input.pause();
23
- } catch {}
24
- }
21
+ if (isTTY) { try { input.setRawMode(false); } catch {} }
22
+ try { input.pause(); } catch {}
23
+ // Restore stolen listeners
24
+ for (const fn of stolenListeners) input.on("data", fn);
25
+ if (err) reject(err);
26
+ else resolve(value);
25
27
  }
28
+
26
29
  function onData(chunk) {
27
- if (resolved) return;
30
+ if (settled) return;
28
31
  const str = chunk.toString();
29
32
  for (const ch of str) {
30
33
  if (ch === "\r") continue;
31
- if (ch === "\n") {
32
- cleanup();
33
- output.write("\n");
34
- resolve(buf);
35
- return;
36
- }
37
- if (ch === "\u0003") {
38
- cleanup();
39
- reject(new DOMException("The operation was aborted.", "AbortError"));
40
- return;
41
- }
42
- // Backspace
34
+ if (ch === "\n") { output.write("\n"); finish(buf); return; }
35
+ if (ch === "\u0003") { finish("", new DOMException("The operation was aborted.", "AbortError")); return; }
43
36
  if (ch === "\u007f" || ch === "\b") {
44
- if (buf.length > 0) {
45
- buf = buf.slice(0, -1);
46
- output.write("\b \b");
47
- }
37
+ if (buf.length > 0) { buf = buf.slice(0, -1); output.write("\b \b"); }
48
38
  continue;
49
39
  }
50
- // Skip non-printable control chars
51
40
  if (ch.charCodeAt(0) < 32) continue;
52
41
  buf += ch;
53
42
  output.write(ch);
54
43
  }
55
44
  }
45
+
56
46
  function onAbort() {
57
- if (resolved) return;
58
- cleanup();
59
- reject(signal.reason || new DOMException("The operation was aborted.", "AbortError"));
47
+ finish("", signal.reason || new DOMException("The operation was aborted.", "AbortError"));
60
48
  }
61
49
 
62
- // Ensure clean stdin state before listening
63
- if (isTTY) {
64
- try { input.setRawMode(false); } catch {}
65
- }
50
+ // Clean stdin state
51
+ if (isTTY) { try { input.setRawMode(false); } catch {} }
66
52
  input.pause();
67
- if (isTTY) {
68
- input.setRawMode(true);
69
- }
53
+ if (isTTY) input.setRawMode(true);
70
54
  input.on("data", onData);
71
55
  input.resume();
72
56
  if (signal) signal.addEventListener("abort", onAbort, { once: true });
package/src/workflow.js CHANGED
@@ -8,6 +8,8 @@ import {
8
8
  captureFrontendScreenshot,
9
9
  detectFrontendSlop,
10
10
  findFrontendPreviewEntry,
11
+ inspectFrontendArtifacts,
12
+ mergeFrontendSlop,
11
13
  reviewFrontendTurn,
12
14
  reviewFrontendScreenshot,
13
15
  shouldForceFrontendRebuild,
@@ -180,9 +182,13 @@ export async function runBuildWorkflow({
180
182
  }
181
183
  }
182
184
 
183
- const designSlop = designReview
185
+ const deterministicSlop = designReview
184
186
  ? detectFrontendSlop({ promptText, assistantText: activeResponse.content || "", receipt: activeReceipt, designReview })
185
187
  : null;
188
+ const artifactSlop = designReview
189
+ ? await inspectFrontendArtifacts({ cwd: context.cwd, receipt: activeReceipt, frontend: frontendCtx?.frontend || null })
190
+ : null;
191
+ const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
186
192
 
187
193
  return { impact, review, designReview, designSlop, screenshotReview, screenshotPath };
188
194
  }