@tritard/waterbrother 0.8.39 → 0.8.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tritard/waterbrother",
3
- "version": "0.8.39",
3
+ "version": "0.8.41",
4
4
  "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.js CHANGED
@@ -621,6 +621,63 @@ function buildSyntheticAssistantOutput(receipt) {
621
621
  return null;
622
622
  }
623
623
 
624
+ function formatReceiptFileList(receipt, limit = 6) {
625
+ const files = Array.isArray(receipt?.changedFiles)
626
+ ? receipt.changedFiles.map((filePath) => String(filePath || "").trim()).filter(Boolean)
627
+ : [];
628
+ return [...new Set(files)].slice(0, limit);
629
+ }
630
+
631
+ function buildBenchmarkFrontendStatusOutput({
632
+ assistantText = "",
633
+ receipt = null,
634
+ artifacts = null,
635
+ frontend = null
636
+ } = {}) {
637
+ if (!frontend?.benchmarkMode) return assistantText;
638
+ const designReview = artifacts?.designReview || receipt?.designReview || null;
639
+ const screenshotReview = artifacts?.screenshotReview || receipt?.screenshotReview || null;
640
+ const designSlop = artifacts?.designSlop || receipt?.designSlop || null;
641
+ const acceptanceFailure = getFrontendAcceptanceFailure({
642
+ frontend,
643
+ slop: designSlop,
644
+ designReview,
645
+ screenshotReview
646
+ });
647
+ const designVerdict = String(designReview?.verdict || "").trim().toLowerCase();
648
+ const renderVerdict = String(screenshotReview?.verdict || "").trim().toLowerCase();
649
+ const shouldDowngrade =
650
+ Boolean(acceptanceFailure) ||
651
+ designVerdict === "weak" ||
652
+ renderVerdict === "weak" ||
653
+ (designVerdict === "caution" && renderVerdict !== "strong");
654
+ if (!shouldDowngrade) return assistantText;
655
+
656
+ const lines = [];
657
+ const files = formatReceiptFileList(receipt);
658
+ if (files.length > 0) {
659
+ lines.push("Updated benchmark frontend files:");
660
+ for (const filePath of files) lines.push(`- ${filePath}`);
661
+ }
662
+ if (acceptanceFailure?.reason) {
663
+ lines.push(`Current status: ${acceptanceFailure.reason}.`);
664
+ } else {
665
+ const statusParts = [];
666
+ if (designVerdict) statusParts.push(`design ${designVerdict}`);
667
+ if (renderVerdict) statusParts.push(`render ${renderVerdict}`);
668
+ if (artifacts?.designRevision?.triggered) {
669
+ statusParts.push(`auto-revised ${artifacts.designRevision.passes}x`);
670
+ }
671
+ if (statusParts.length > 0) {
672
+ lines.push(`Current status: ${statusParts.join(", ")}.`);
673
+ }
674
+ }
675
+ if (designReview?.summary) lines.push(designReview.summary);
676
+ if (renderVerdict === "weak" && screenshotReview?.summary) lines.push(screenshotReview.summary);
677
+ lines.push("Result needs another pass before it should be treated as finished.");
678
+ return lines.filter(Boolean).join("\n\n");
679
+ }
680
+
624
681
  function hasFrontendCodeEcho(text) {
625
682
  const body = String(text || "");
626
683
  return /```(?:html|css|js|javascript|jsx|tsx)?[\s\S]{120,}```/i.test(body) || /<!DOCTYPE html>/i.test(body);
@@ -1145,7 +1202,7 @@ async function analyzeTurnArtifacts({
1145
1202
  ? detectFrontendSlop({ promptText, assistantText, receipt, designReview })
1146
1203
  : null;
1147
1204
  const artifactSlop = designReview
1148
- ? await inspectFrontendArtifacts({ cwd: context.cwd, receipt, frontend })
1205
+ ? await inspectFrontendArtifacts({ cwd: context.cwd, promptText, receipt, frontend })
1149
1206
  : null;
1150
1207
  const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
1151
1208
 
@@ -1319,7 +1376,9 @@ async function maybeReviseInteractiveFrontend({
1319
1376
 
1320
1377
  const acceptanceFailure = getFrontendAcceptanceFailure({
1321
1378
  frontend: frontendExecutionContext.frontend || null,
1322
- slop: artifacts.designSlop
1379
+ slop: artifacts.designSlop,
1380
+ designReview: artifacts.designReview,
1381
+ screenshotReview: artifacts.screenshotReview
1323
1382
  });
1324
1383
  if (acceptanceFailure) {
1325
1384
  throw new Error(acceptanceFailure.reason);
@@ -3964,6 +4023,13 @@ async function runTextTurnInteractive({
3964
4023
  finalizedArtifacts = revisedFrontendTurn.artifacts || null;
3965
4024
  }
3966
4025
 
4026
+ renderedAssistantText = buildBenchmarkFrontendStatusOutput({
4027
+ assistantText: renderedAssistantText,
4028
+ receipt: finalizedReceipt,
4029
+ artifacts: finalizedArtifacts,
4030
+ frontend: frontendExecutionContext?.frontend || null
4031
+ });
4032
+
3967
4033
  printAssistantOutput(renderedAssistantText);
3968
4034
  await setSessionRunState(currentSession, agent, "done");
3969
4035
  printTurnSummary(turnSummary, response, { modelId: agent.getModel(), costTracker: context.costTracker, traceMode: context.runtime.traceMode });
package/src/frontend.js CHANGED
@@ -80,6 +80,7 @@ const BENCHMARK_SITE_TYPE_RULES = {
80
80
  store: [
81
81
  "Benchmark ecommerce mode: prioritize conversion architecture over editorial styling. The page should sell, not just look clean.",
82
82
  "Benchmark ecommerce mode: include proof, objection handling, and trust signals that belong on a real single-product PDP.",
83
+ "Benchmark ecommerce mode: if the prompt does not specify a product category, default to a neutral physical product class like headphones, a bag, a lamp, a jacket, or a speaker. Do not drift into skincare, beauty, supplement, or launch-ad landing-page tropes unless the prompt asks for them.",
83
84
  "Benchmark ecommerce mode: do not leave the main merchandising surface as a literal placeholder, dashed demo box, emoji stand-in, or visual wireframe.",
84
85
  "Benchmark ecommerce mode: do not use Tailwind CDN starter theming, Font Awesome chrome, or placeholder product images such as picsum/placehold on the live merchandising surface.",
85
86
  "Benchmark ecommerce mode: fake command palettes, fake app shortcuts, demo-only cart gimmicks, and unrelated theme chrome are disallowed.",
@@ -245,6 +246,7 @@ const BENCHMARK_STARTERS = {
245
246
  ],
246
247
  store: [
247
248
  "Benchmark store starter: build a flagship single-product PDP with 4-5 sections max.",
249
+ "If the prompt does not specify a product category, choose a neutral physical product such as headphones, a bag, a lamp, a speaker, or a jacket. Do not invent skincare/beauty/supplement DTC landing pages.",
248
250
  "Section 1: product-first hero with a product-shaped visual composition and purchase stack side by side.",
249
251
  "Section 2: trust or shipping/returns strip.",
250
252
  "Section 3: three focused benefit/features blocks.",
@@ -257,6 +259,67 @@ const BENCHMARK_STARTERS = {
257
259
  ]
258
260
  };
259
261
 
262
+ const BENCHMARK_STARTER_VARIANTS = {
263
+ blog: [
264
+ {
265
+ key: "architectural-journal",
266
+ label: "architectural journal",
267
+ starter: [
268
+ "Variant shape: severe image-text split with one offset note card and one lean secondary pair.",
269
+ "Palette should feel mineral, paper, and shadow-driven rather than warm lifestyle-editorial.",
270
+ "Finish with one restrained final gesture, not a generic footer label."
271
+ ]
272
+ },
273
+ {
274
+ key: "art-book-minimal",
275
+ label: "art-book minimal",
276
+ starter: [
277
+ "Variant shape: oversized white space, one dominant visual surface, one short text block, one caption-like secondary band.",
278
+ "Treat typography like exhibition text: sparse, quiet, and deliberate.",
279
+ "Avoid default editorial chrome entirely; let spacing and crop do the work."
280
+ ]
281
+ },
282
+ {
283
+ key: "review-folio",
284
+ label: "review folio",
285
+ starter: [
286
+ "Variant shape: folio-like review page with strong masthead mark, one lead essay, and one compact pair of related notes.",
287
+ "Use stronger typographic contrast and one unexpected compositional move, not a generic archive scaffold.",
288
+ "Closing section should feel like a designed coda rather than a footer."
289
+ ]
290
+ }
291
+ ],
292
+ store: [
293
+ {
294
+ key: "cold-premium-tech",
295
+ label: "cold premium tech",
296
+ starter: [
297
+ "Variant shape: hard-edged flagship PDP with dark hero, precise specification accents, and a compact proof/guarantee band.",
298
+ "Use crisp hierarchy, hard contrast, and performance language rather than luxury-lifestyle filler.",
299
+ "Cart UI should feel real and quiet; no gimmicks, no fake app chrome."
300
+ ]
301
+ },
302
+ {
303
+ key: "warm-object-craft",
304
+ label: "warm object craft",
305
+ starter: [
306
+ "Variant shape: tactile product story with warm neutral palette, close material crop, and trust/objection handling near the CTA.",
307
+ "Lead with material, construction, weight, care, and use-case details.",
308
+ "Keep the page commercially sharp, but let the object feel physical and desirable."
309
+ ]
310
+ },
311
+ {
312
+ key: "dense-conversion-pdp",
313
+ label: "dense conversion pdp",
314
+ starter: [
315
+ "Variant shape: stronger purchase stack with nearby shipping, returns, warranty, and one compact objection-handling block.",
316
+ "Use a more aggressive conversion rhythm than the minimalist variants, but keep it authored and clean.",
317
+ "Hero and cart item surfaces must both feel product-real, not abstract placeholders."
318
+ ]
319
+ }
320
+ ]
321
+ };
322
+
260
323
  const BENCHMARK_FEWSHOT_EXAMPLES = {
261
324
  blog: {
262
325
  good: [
@@ -277,10 +340,12 @@ const BENCHMARK_FEWSHOT_EXAMPLES = {
277
340
  "Product-first hero with believable merchandising image or authored product render.",
278
341
  "Purchase stack plus trust, objections, and compact FAQ only if needed.",
279
342
  "Copy names material, dimensions, carry/use case, warranty, or care.",
343
+ "When the prompt is generic, stay with a neutral physical product class instead of drifting into skincare/beauty campaign pages.",
280
344
  "No alert-based checkout, no fake review counts, no fake featured-in strip, no demo footer."
281
345
  ],
282
346
  bad: [
283
347
  "Generic luxury filler like considered life, timeless essentials, or modern living.",
348
+ "Beauty/skincare serum landing pages when the user did not ask for that product category.",
284
349
  "Demo cart toasts, alert-based cart fallbacks, or 'in a real Shopify store' copy.",
285
350
  "Placeholder gallery or weak hero that does not merchandise the product.",
286
351
  "Fake proof chrome like bestseller badges, sold-this-month counters, or made-up review totals."
@@ -304,6 +369,16 @@ const BENCHMARK_GRADERS = {
304
369
  ]
305
370
  };
306
371
 
372
+ const STORE_PROMPT_CATEGORY_PATTERNS = {
373
+ beauty: /\b(?:beauty|skincare|skin care|serum|vitamin c|retinol|cleanser|moisturizer|cream|cosmetic|face oil|face serum|glow serum|supplement|wellness)\b/i,
374
+ audio: /\b(?:audio|headphones|earbuds|speaker|wireless|anc|bluetooth)\b/i,
375
+ bag: /\b(?:bag|backpack|tote|wallet|duffel|carryall|briefcase|messenger)\b/i,
376
+ apparel: /\b(?:jacket|coat|shirt|hoodie|sweater|pants|denim|apparel|clothing|garment)\b/i,
377
+ object: /\b(?:lamp|chair|desk|ceramic|table|watch|knife|fountain pen|pen|bottle|object|furniture|wood)\b/i
378
+ };
379
+
380
+ const STORE_BEAUTY_LANDING_DRIFT_PATTERN = /\b(?:beauty|skincare|skin care|serum|vitamin c|retinol|cleanser|moisturizer|cream|complexion|dermatologist|clinical results?|ingredient(?:s)?|before\s*&\s*after|glow serum|face oil)\b/i;
381
+
307
382
  const GENERIC_EDITORIAL_LAYOUT_PATTERNS = [
308
383
  /\bsticky\s+nav\b/i,
309
384
  /\bsticky\b/i,
@@ -351,6 +426,58 @@ function getBenchmarkStarter(siteType, benchmarkMode) {
351
426
  return BENCHMARK_STARTERS[siteType] || null;
352
427
  }
353
428
 
429
+ function stablePromptHash(text = "") {
430
+ const value = String(text || "");
431
+ let hash = 0;
432
+ for (let index = 0; index < value.length; index += 1) {
433
+ hash = ((hash * 31) + value.charCodeAt(index)) >>> 0;
434
+ }
435
+ return hash;
436
+ }
437
+
438
+ function chooseBenchmarkStarterVariant({ siteType, promptText = "", archetype = "", benchmarkMode = false } = {}) {
439
+ const variants = BENCHMARK_STARTER_VARIANTS[siteType] || [];
440
+ if (variants.length === 0) return null;
441
+ const text = String(promptText || "").toLowerCase();
442
+ if (siteType === "blog") {
443
+ if (/\b(?:architecture|architectural|concrete|brutalis|ando|chapel|osaka|kyoto|sao paulo)\b/.test(text)) {
444
+ return variants.find((variant) => variant.key === "architectural-journal") || variants[0];
445
+ }
446
+ if (/\b(?:gallery|art book|art-book|folio|exhibition|museum)\b/.test(text)) {
447
+ return variants.find((variant) => variant.key === "art-book-minimal") || variants[0];
448
+ }
449
+ }
450
+ if (siteType === "store") {
451
+ if (/\b(?:audio|headphones|speaker|tech|wireless|anc|bluetooth)\b/.test(text) || archetype === "high-contrast-tech") {
452
+ return variants.find((variant) => variant.key === "cold-premium-tech") || variants[0];
453
+ }
454
+ if (/\b(?:lamp|chair|desk|leather|tote|wallet|wood|ceramic|object)\b/.test(text)) {
455
+ return variants.find((variant) => variant.key === "warm-object-craft") || variants[0];
456
+ }
457
+ if (benchmarkMode) {
458
+ return variants.find((variant) => variant.key === "dense-conversion-pdp") || variants[0];
459
+ }
460
+ }
461
+ return variants[stablePromptHash(text) % variants.length];
462
+ }
463
+
464
+ function inferBenchmarkStorePromptCategory(promptText = "") {
465
+ const text = String(promptText || "");
466
+ for (const [category, pattern] of Object.entries(STORE_PROMPT_CATEGORY_PATTERNS)) {
467
+ if (pattern.test(text)) return category;
468
+ }
469
+ return "generic";
470
+ }
471
+
472
+ function detectBenchmarkStoreSemanticDrift(promptText = "", haystack = "") {
473
+ const promptCategory = inferBenchmarkStorePromptCategory(promptText);
474
+ if (promptCategory === "beauty") return null;
475
+ if (STORE_BEAUTY_LANDING_DRIFT_PATTERN.test(haystack)) {
476
+ return "benchmark store drifted into beauty/skincare landing-page tropes that do not match the prompt";
477
+ }
478
+ return null;
479
+ }
480
+
354
481
  function getBenchmarkExamples(siteType, benchmarkMode) {
355
482
  if (!benchmarkMode) return null;
356
483
  return BENCHMARK_FEWSHOT_EXAMPLES[siteType] || null;
@@ -499,6 +626,7 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
499
626
  const assetPolicy = inferAssetPolicy(text);
500
627
  const layoutStarter = getLayoutStarter(siteType, archetype);
501
628
  const benchmarkStarter = getBenchmarkStarter(siteType, benchmarkMode);
629
+ const benchmarkVariant = chooseBenchmarkStarterVariant({ siteType, promptText: text, archetype, benchmarkMode });
502
630
  const benchmarkExamples = getBenchmarkExamples(siteType, benchmarkMode);
503
631
  const siteTypeRules = getSiteTypeRules(siteType);
504
632
  const benchmarkSiteTypeRules = getBenchmarkSiteTypeRules(siteType, benchmarkMode);
@@ -507,6 +635,7 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
507
635
  `Content mode: ${contentMode}. Asset policy: ${assetPolicy}.`,
508
636
  layoutStarter ? `Layout starter: ${layoutStarter[0]}` : "",
509
637
  ...(benchmarkStarter ? [`Benchmark starter skeleton:\n- ${benchmarkStarter.join("\n- ")}`] : []),
638
+ ...(benchmarkVariant ? [`Benchmark starter variant: ${benchmarkVariant.label}\n- ${benchmarkVariant.starter.join("\n- ")}`] : []),
510
639
  ...(benchmarkExamples ? [
511
640
  `Benchmark few-shot guide:\nGood:\n- ${benchmarkExamples.good.join("\n- ")}\nBad:\n- ${benchmarkExamples.bad.join("\n- ")}`
512
641
  ] : []),
@@ -528,6 +657,8 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
528
657
  assetPolicy,
529
658
  layoutStarter: layoutStarter ? layoutStarter[0] : null,
530
659
  benchmarkStarter: benchmarkStarter ? benchmarkStarter[0] : null,
660
+ benchmarkVariant: benchmarkVariant?.label || null,
661
+ benchmarkVariantStarter: benchmarkVariant?.starter || null,
531
662
  benchmarkMode,
532
663
  siteTypeRules,
533
664
  benchmarkSiteTypeRules
@@ -594,6 +725,13 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
594
725
  flags.push("benchmark store fell back to generic luxury-commerce copy");
595
726
  score += 2;
596
727
  }
728
+ const semanticDrift = siteType === "store" && benchmarkMode
729
+ ? detectBenchmarkStoreSemanticDrift(promptText, haystack)
730
+ : null;
731
+ if (semanticDrift) {
732
+ flags.push(semanticDrift);
733
+ score += 4;
734
+ }
597
735
  if (siteType === "store" && benchmarkMode && /\b(?:metaKey && e\.key === ['"]k['"]|command palette|keyboard shortcuts?)\b/i.test(haystack)) {
598
736
  flags.push("benchmark store used fake keyboard or shortcut chrome");
599
737
  score += 4;
@@ -665,13 +803,13 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
665
803
  return {
666
804
  score,
667
805
  flags,
668
- hardBlock: graderResult.hardBlock || flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|invented author or contributor bylines|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy/.test(flag)),
806
+ hardBlock: graderResult.hardBlock || flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|invented author or contributor bylines|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy|beauty\/skincare landing-page tropes|semantic drift/.test(flag)),
669
807
  severe: score >= 5,
670
808
  summary: flags.length > 0 ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
671
809
  };
672
810
  }
673
811
 
674
- export async function inspectFrontendArtifacts({ cwd, receipt = null, frontend = null } = {}) {
812
+ export async function inspectFrontendArtifacts({ cwd, promptText = "", receipt = null, frontend = null } = {}) {
675
813
  const changedFiles = Array.isArray(receipt?.changedFiles) ? receipt.changedFiles : [];
676
814
  const htmlFiles = changedFiles
677
815
  .map((filePath) => String(filePath || "").trim())
@@ -727,6 +865,11 @@ export async function inspectFrontendArtifacts({ cwd, receipt = null, frontend =
727
865
  score += 4;
728
866
  }
729
867
  if (frontend.siteType === "store") {
868
+ const semanticDrift = detectBenchmarkStoreSemanticDrift(promptText, htmlText);
869
+ if (semanticDrift) {
870
+ flags.push(semanticDrift);
871
+ score += 4;
872
+ }
730
873
  if (/\b(?:cart would open here|would go to checkout|would continue to checkout|would appear here)\b/i.test(htmlText)) {
731
874
  flags.push("benchmark store still contains fake cart or checkout fallback behavior");
732
875
  score += 4;
@@ -806,12 +949,27 @@ export function shouldForceFrontendRebuild({ frontend = null, slop = null, revis
806
949
  return revisionCount < 2;
807
950
  }
808
951
 
809
- export function getFrontendAcceptanceFailure({ frontend = null, slop = null } = {}) {
810
- if (!frontend?.benchmarkMode || !slop?.hardBlock) return null;
952
+ export function getFrontendAcceptanceFailure({ frontend = null, slop = null, designReview = null, screenshotReview = null } = {}) {
953
+ if (!frontend?.benchmarkMode) return null;
954
+ if (screenshotReview?.verdict === "weak") {
955
+ return {
956
+ siteType: frontend.siteType,
957
+ reason: `benchmark ${frontend.siteType} render review is weak: ${String(screenshotReview.summary || "").trim() || "visual result did not clear benchmark review"}`,
958
+ flags: Array.isArray(slop?.flags) ? slop.flags : []
959
+ };
960
+ }
961
+ if (designReview?.verdict === "weak") {
962
+ return {
963
+ siteType: frontend.siteType,
964
+ reason: `benchmark ${frontend.siteType} design review is weak: ${String(designReview.summary || "").trim() || "design did not clear benchmark review"}`,
965
+ flags: Array.isArray(slop?.flags) ? slop.flags : []
966
+ };
967
+ }
968
+ if (!slop?.hardBlock) return null;
811
969
  const flags = Array.isArray(slop.flags) ? slop.flags : [];
812
970
  if (frontend.siteType === "store") {
813
971
  const blocking = flags.filter((flag) =>
814
- /Tailwind CDN|Font Awesome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|fake keyboard or shortcut chrome|demo behavior|demo page explanation copy|fake cart or checkout fallback behavior|generic luxury-commerce copy/i.test(flag)
972
+ /Tailwind CDN|Font Awesome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|fake keyboard or shortcut chrome|demo behavior|demo page explanation copy|fake cart or checkout fallback behavior|generic luxury-commerce copy|beauty\/skincare landing-page tropes|semantic drift/i.test(flag)
815
973
  );
816
974
  if (blocking.length > 0) {
817
975
  return {
@@ -832,6 +990,7 @@ export function buildFrontendRevisionPrompt({
832
990
  } = {}) {
833
991
  const siteType = pickByPattern(String(originalPrompt || ""), SITE_TYPES, "landing");
834
992
  const benchmarkMode = isBenchmarkFrontendTask(originalPrompt);
993
+ const benchmarkVariant = chooseBenchmarkStarterVariant({ siteType, promptText: originalPrompt, benchmarkMode });
835
994
  const benchmarkExamples = getBenchmarkExamples(siteType, benchmarkMode);
836
995
  const issues = Array.isArray(designReview?.issues) ? designReview.issues.slice(0, 6) : [];
837
996
  const nextPass = Array.isArray(designReview?.nextPass) ? designReview.nextPass.slice(0, 6) : [];
@@ -844,6 +1003,7 @@ export function buildFrontendRevisionPrompt({
844
1003
  issues.length > 0 ? `Problems to fix:\n- ${issues.join("\n- ")}` : "",
845
1004
  visualIssues.length > 0 ? `Visible screenshot problems:\n- ${visualIssues.join("\n- ")}` : "",
846
1005
  slopFlags.length > 0 ? `Deterministic slop flags:\n- ${slopFlags.join("\n- ")}` : "",
1006
+ benchmarkVariant ? `Preserve this benchmark variant direction: ${benchmarkVariant.label}\n- ${benchmarkVariant.starter.join("\n- ")}` : "",
847
1007
  benchmarkExamples ? `Benchmark example guide:\nGood:\n- ${benchmarkExamples.good.join("\n- ")}\nBad:\n- ${benchmarkExamples.bad.join("\n- ")}` : "",
848
1008
  nextPass.length > 0 ? `Revision priorities:\n- ${nextPass.join("\n- ")}` : "",
849
1009
  visualNextPass.length > 0 ? `Screenshot revision priorities:\n- ${visualNextPass.join("\n- ")}` : "",
@@ -880,6 +1040,9 @@ export function buildFrontendRebuildPrompt({
880
1040
  } = {}) {
881
1041
  const siteType = pickByPattern(String(originalPrompt || ""), SITE_TYPES, "landing");
882
1042
  const benchmarkMode = isBenchmarkFrontendTask(originalPrompt);
1043
+ const benchmarkVariant = frontend?.benchmarkVariantStarter?.length
1044
+ ? { label: frontend.benchmarkVariant || "selected benchmark variant", starter: frontend.benchmarkVariantStarter }
1045
+ : chooseBenchmarkStarterVariant({ siteType, promptText: originalPrompt, archetype: frontend?.archetype || "", benchmarkMode });
883
1046
  const benchmarkExamples = getBenchmarkExamples(siteType, benchmarkMode);
884
1047
  const slopFlags = Array.isArray(slop?.flags) ? slop.flags.slice(0, 8) : [];
885
1048
  const issues = Array.isArray(designReview?.issues) ? designReview.issues.slice(0, 6) : [];
@@ -889,6 +1052,7 @@ export function buildFrontendRebuildPrompt({
889
1052
  "Rebuild the affected frontend files from scratch within the same contract.",
890
1053
  `Original task: ${String(originalPrompt || "").trim()}`,
891
1054
  starter ? `Required starter skeleton:\n- ${starter.join("\n- ")}` : "",
1055
+ benchmarkVariant ? `Required benchmark variant direction: ${benchmarkVariant.label}\n- ${benchmarkVariant.starter.join("\n- ")}` : "",
892
1056
  benchmarkExamples ? `Benchmark example guide:\nGood:\n- ${benchmarkExamples.good.join("\n- ")}\nBad:\n- ${benchmarkExamples.bad.join("\n- ")}` : "",
893
1057
  slopFlags.length > 0 ? `Banned patterns that must be removed completely:\n- ${slopFlags.join("\n- ")}` : "",
894
1058
  issues.length > 0 ? `Design issues to correct in the rebuild:\n- ${issues.join("\n- ")}` : "",
package/src/workflow.js CHANGED
@@ -187,7 +187,7 @@ export async function runBuildWorkflow({
187
187
  ? detectFrontendSlop({ promptText, assistantText: activeResponse.content || "", receipt: activeReceipt, designReview })
188
188
  : null;
189
189
  const artifactSlop = designReview
190
- ? await inspectFrontendArtifacts({ cwd: context.cwd, receipt: activeReceipt, frontend: frontendCtx?.frontend || null })
190
+ ? await inspectFrontendArtifacts({ cwd: context.cwd, promptText, receipt: activeReceipt, frontend: frontendCtx?.frontend || null })
191
191
  : null;
192
192
  const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
193
193
 
@@ -256,7 +256,9 @@ export async function runBuildWorkflow({
256
256
 
257
257
  const acceptanceFailure = getFrontendAcceptanceFailure({
258
258
  frontend: frontendCtx?.frontend || null,
259
- slop: designSlop
259
+ slop: designSlop,
260
+ designReview,
261
+ screenshotReview
260
262
  });
261
263
  if (acceptanceFailure) {
262
264
  throw new Error(acceptanceFailure.reason);