@tritard/waterbrother 0.8.39 → 0.8.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +68 -2
- package/src/frontend.js +169 -5
- package/src/workflow.js +4 -2
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -621,6 +621,63 @@ function buildSyntheticAssistantOutput(receipt) {
|
|
|
621
621
|
return null;
|
|
622
622
|
}
|
|
623
623
|
|
|
624
|
+
function formatReceiptFileList(receipt, limit = 6) {
|
|
625
|
+
const files = Array.isArray(receipt?.changedFiles)
|
|
626
|
+
? receipt.changedFiles.map((filePath) => String(filePath || "").trim()).filter(Boolean)
|
|
627
|
+
: [];
|
|
628
|
+
return [...new Set(files)].slice(0, limit);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
function buildBenchmarkFrontendStatusOutput({
|
|
632
|
+
assistantText = "",
|
|
633
|
+
receipt = null,
|
|
634
|
+
artifacts = null,
|
|
635
|
+
frontend = null
|
|
636
|
+
} = {}) {
|
|
637
|
+
if (!frontend?.benchmarkMode) return assistantText;
|
|
638
|
+
const designReview = artifacts?.designReview || receipt?.designReview || null;
|
|
639
|
+
const screenshotReview = artifacts?.screenshotReview || receipt?.screenshotReview || null;
|
|
640
|
+
const designSlop = artifacts?.designSlop || receipt?.designSlop || null;
|
|
641
|
+
const acceptanceFailure = getFrontendAcceptanceFailure({
|
|
642
|
+
frontend,
|
|
643
|
+
slop: designSlop,
|
|
644
|
+
designReview,
|
|
645
|
+
screenshotReview
|
|
646
|
+
});
|
|
647
|
+
const designVerdict = String(designReview?.verdict || "").trim().toLowerCase();
|
|
648
|
+
const renderVerdict = String(screenshotReview?.verdict || "").trim().toLowerCase();
|
|
649
|
+
const shouldDowngrade =
|
|
650
|
+
Boolean(acceptanceFailure) ||
|
|
651
|
+
designVerdict === "weak" ||
|
|
652
|
+
renderVerdict === "weak" ||
|
|
653
|
+
(designVerdict === "caution" && renderVerdict !== "strong");
|
|
654
|
+
if (!shouldDowngrade) return assistantText;
|
|
655
|
+
|
|
656
|
+
const lines = [];
|
|
657
|
+
const files = formatReceiptFileList(receipt);
|
|
658
|
+
if (files.length > 0) {
|
|
659
|
+
lines.push("Updated benchmark frontend files:");
|
|
660
|
+
for (const filePath of files) lines.push(`- ${filePath}`);
|
|
661
|
+
}
|
|
662
|
+
if (acceptanceFailure?.reason) {
|
|
663
|
+
lines.push(`Current status: ${acceptanceFailure.reason}.`);
|
|
664
|
+
} else {
|
|
665
|
+
const statusParts = [];
|
|
666
|
+
if (designVerdict) statusParts.push(`design ${designVerdict}`);
|
|
667
|
+
if (renderVerdict) statusParts.push(`render ${renderVerdict}`);
|
|
668
|
+
if (artifacts?.designRevision?.triggered) {
|
|
669
|
+
statusParts.push(`auto-revised ${artifacts.designRevision.passes}x`);
|
|
670
|
+
}
|
|
671
|
+
if (statusParts.length > 0) {
|
|
672
|
+
lines.push(`Current status: ${statusParts.join(", ")}.`);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
if (designReview?.summary) lines.push(designReview.summary);
|
|
676
|
+
if (renderVerdict === "weak" && screenshotReview?.summary) lines.push(screenshotReview.summary);
|
|
677
|
+
lines.push("Result needs another pass before it should be treated as finished.");
|
|
678
|
+
return lines.filter(Boolean).join("\n\n");
|
|
679
|
+
}
|
|
680
|
+
|
|
624
681
|
function hasFrontendCodeEcho(text) {
|
|
625
682
|
const body = String(text || "");
|
|
626
683
|
return /```(?:html|css|js|javascript|jsx|tsx)?[\s\S]{120,}```/i.test(body) || /<!DOCTYPE html>/i.test(body);
|
|
@@ -1145,7 +1202,7 @@ async function analyzeTurnArtifacts({
|
|
|
1145
1202
|
? detectFrontendSlop({ promptText, assistantText, receipt, designReview })
|
|
1146
1203
|
: null;
|
|
1147
1204
|
const artifactSlop = designReview
|
|
1148
|
-
? await inspectFrontendArtifacts({ cwd: context.cwd, receipt, frontend })
|
|
1205
|
+
? await inspectFrontendArtifacts({ cwd: context.cwd, promptText, receipt, frontend })
|
|
1149
1206
|
: null;
|
|
1150
1207
|
const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
|
|
1151
1208
|
|
|
@@ -1319,7 +1376,9 @@ async function maybeReviseInteractiveFrontend({
|
|
|
1319
1376
|
|
|
1320
1377
|
const acceptanceFailure = getFrontendAcceptanceFailure({
|
|
1321
1378
|
frontend: frontendExecutionContext.frontend || null,
|
|
1322
|
-
slop: artifacts.designSlop
|
|
1379
|
+
slop: artifacts.designSlop,
|
|
1380
|
+
designReview: artifacts.designReview,
|
|
1381
|
+
screenshotReview: artifacts.screenshotReview
|
|
1323
1382
|
});
|
|
1324
1383
|
if (acceptanceFailure) {
|
|
1325
1384
|
throw new Error(acceptanceFailure.reason);
|
|
@@ -3964,6 +4023,13 @@ async function runTextTurnInteractive({
|
|
|
3964
4023
|
finalizedArtifacts = revisedFrontendTurn.artifacts || null;
|
|
3965
4024
|
}
|
|
3966
4025
|
|
|
4026
|
+
renderedAssistantText = buildBenchmarkFrontendStatusOutput({
|
|
4027
|
+
assistantText: renderedAssistantText,
|
|
4028
|
+
receipt: finalizedReceipt,
|
|
4029
|
+
artifacts: finalizedArtifacts,
|
|
4030
|
+
frontend: frontendExecutionContext?.frontend || null
|
|
4031
|
+
});
|
|
4032
|
+
|
|
3967
4033
|
printAssistantOutput(renderedAssistantText);
|
|
3968
4034
|
await setSessionRunState(currentSession, agent, "done");
|
|
3969
4035
|
printTurnSummary(turnSummary, response, { modelId: agent.getModel(), costTracker: context.costTracker, traceMode: context.runtime.traceMode });
|
package/src/frontend.js
CHANGED
|
@@ -80,6 +80,7 @@ const BENCHMARK_SITE_TYPE_RULES = {
|
|
|
80
80
|
store: [
|
|
81
81
|
"Benchmark ecommerce mode: prioritize conversion architecture over editorial styling. The page should sell, not just look clean.",
|
|
82
82
|
"Benchmark ecommerce mode: include proof, objection handling, and trust signals that belong on a real single-product PDP.",
|
|
83
|
+
"Benchmark ecommerce mode: if the prompt does not specify a product category, default to a neutral physical product class like headphones, a bag, a lamp, a jacket, or a speaker. Do not drift into skincare, beauty, supplement, or launch-ad landing-page tropes unless the prompt asks for them.",
|
|
83
84
|
"Benchmark ecommerce mode: do not leave the main merchandising surface as a literal placeholder, dashed demo box, emoji stand-in, or visual wireframe.",
|
|
84
85
|
"Benchmark ecommerce mode: do not use Tailwind CDN starter theming, Font Awesome chrome, or placeholder product images such as picsum/placehold on the live merchandising surface.",
|
|
85
86
|
"Benchmark ecommerce mode: fake command palettes, fake app shortcuts, demo-only cart gimmicks, and unrelated theme chrome are disallowed.",
|
|
@@ -245,6 +246,7 @@ const BENCHMARK_STARTERS = {
|
|
|
245
246
|
],
|
|
246
247
|
store: [
|
|
247
248
|
"Benchmark store starter: build a flagship single-product PDP with 4-5 sections max.",
|
|
249
|
+
"If the prompt does not specify a product category, choose a neutral physical product such as headphones, a bag, a lamp, a speaker, or a jacket. Do not invent skincare/beauty/supplement DTC landing pages.",
|
|
248
250
|
"Section 1: product-first hero with a product-shaped visual composition and purchase stack side by side.",
|
|
249
251
|
"Section 2: trust or shipping/returns strip.",
|
|
250
252
|
"Section 3: three focused benefit/features blocks.",
|
|
@@ -257,6 +259,67 @@ const BENCHMARK_STARTERS = {
|
|
|
257
259
|
]
|
|
258
260
|
};
|
|
259
261
|
|
|
262
|
+
const BENCHMARK_STARTER_VARIANTS = {
|
|
263
|
+
blog: [
|
|
264
|
+
{
|
|
265
|
+
key: "architectural-journal",
|
|
266
|
+
label: "architectural journal",
|
|
267
|
+
starter: [
|
|
268
|
+
"Variant shape: severe image-text split with one offset note card and one lean secondary pair.",
|
|
269
|
+
"Palette should feel mineral, paper, and shadow-driven rather than warm lifestyle-editorial.",
|
|
270
|
+
"Finish with one restrained final gesture, not a generic footer label."
|
|
271
|
+
]
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
key: "art-book-minimal",
|
|
275
|
+
label: "art-book minimal",
|
|
276
|
+
starter: [
|
|
277
|
+
"Variant shape: oversized white space, one dominant visual surface, one short text block, one caption-like secondary band.",
|
|
278
|
+
"Treat typography like exhibition text: sparse, quiet, and deliberate.",
|
|
279
|
+
"Avoid default editorial chrome entirely; let spacing and crop do the work."
|
|
280
|
+
]
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
key: "review-folio",
|
|
284
|
+
label: "review folio",
|
|
285
|
+
starter: [
|
|
286
|
+
"Variant shape: folio-like review page with strong masthead mark, one lead essay, and one compact pair of related notes.",
|
|
287
|
+
"Use stronger typographic contrast and one unexpected compositional move, not a generic archive scaffold.",
|
|
288
|
+
"Closing section should feel like a designed coda rather than a footer."
|
|
289
|
+
]
|
|
290
|
+
}
|
|
291
|
+
],
|
|
292
|
+
store: [
|
|
293
|
+
{
|
|
294
|
+
key: "cold-premium-tech",
|
|
295
|
+
label: "cold premium tech",
|
|
296
|
+
starter: [
|
|
297
|
+
"Variant shape: hard-edged flagship PDP with dark hero, precise specification accents, and a compact proof/guarantee band.",
|
|
298
|
+
"Use crisp hierarchy, hard contrast, and performance language rather than luxury-lifestyle filler.",
|
|
299
|
+
"Cart UI should feel real and quiet; no gimmicks, no fake app chrome."
|
|
300
|
+
]
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
key: "warm-object-craft",
|
|
304
|
+
label: "warm object craft",
|
|
305
|
+
starter: [
|
|
306
|
+
"Variant shape: tactile product story with warm neutral palette, close material crop, and trust/objection handling near the CTA.",
|
|
307
|
+
"Lead with material, construction, weight, care, and use-case details.",
|
|
308
|
+
"Keep the page commercially sharp, but let the object feel physical and desirable."
|
|
309
|
+
]
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
key: "dense-conversion-pdp",
|
|
313
|
+
label: "dense conversion pdp",
|
|
314
|
+
starter: [
|
|
315
|
+
"Variant shape: stronger purchase stack with nearby shipping, returns, warranty, and one compact objection-handling block.",
|
|
316
|
+
"Use a more aggressive conversion rhythm than the minimalist variants, but keep it authored and clean.",
|
|
317
|
+
"Hero and cart item surfaces must both feel product-real, not abstract placeholders."
|
|
318
|
+
]
|
|
319
|
+
}
|
|
320
|
+
]
|
|
321
|
+
};
|
|
322
|
+
|
|
260
323
|
const BENCHMARK_FEWSHOT_EXAMPLES = {
|
|
261
324
|
blog: {
|
|
262
325
|
good: [
|
|
@@ -277,10 +340,12 @@ const BENCHMARK_FEWSHOT_EXAMPLES = {
|
|
|
277
340
|
"Product-first hero with believable merchandising image or authored product render.",
|
|
278
341
|
"Purchase stack plus trust, objections, and compact FAQ only if needed.",
|
|
279
342
|
"Copy names material, dimensions, carry/use case, warranty, or care.",
|
|
343
|
+
"When the prompt is generic, stay with a neutral physical product class instead of drifting into skincare/beauty campaign pages.",
|
|
280
344
|
"No alert-based checkout, no fake review counts, no fake featured-in strip, no demo footer."
|
|
281
345
|
],
|
|
282
346
|
bad: [
|
|
283
347
|
"Generic luxury filler like considered life, timeless essentials, or modern living.",
|
|
348
|
+
"Beauty/skincare serum landing pages when the user did not ask for that product category.",
|
|
284
349
|
"Demo cart toasts, alert-based cart fallbacks, or 'in a real Shopify store' copy.",
|
|
285
350
|
"Placeholder gallery or weak hero that does not merchandise the product.",
|
|
286
351
|
"Fake proof chrome like bestseller badges, sold-this-month counters, or made-up review totals."
|
|
@@ -304,6 +369,16 @@ const BENCHMARK_GRADERS = {
|
|
|
304
369
|
]
|
|
305
370
|
};
|
|
306
371
|
|
|
372
|
+
const STORE_PROMPT_CATEGORY_PATTERNS = {
|
|
373
|
+
beauty: /\b(?:beauty|skincare|skin care|serum|vitamin c|retinol|cleanser|moisturizer|cream|cosmetic|face oil|face serum|glow serum|supplement|wellness)\b/i,
|
|
374
|
+
audio: /\b(?:audio|headphones|earbuds|speaker|wireless|anc|bluetooth)\b/i,
|
|
375
|
+
bag: /\b(?:bag|backpack|tote|wallet|duffel|carryall|briefcase|messenger)\b/i,
|
|
376
|
+
apparel: /\b(?:jacket|coat|shirt|hoodie|sweater|pants|denim|apparel|clothing|garment)\b/i,
|
|
377
|
+
object: /\b(?:lamp|chair|desk|ceramic|table|watch|knife|fountain pen|pen|bottle|object|furniture|wood)\b/i
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
const STORE_BEAUTY_LANDING_DRIFT_PATTERN = /\b(?:beauty|skincare|skin care|serum|vitamin c|retinol|cleanser|moisturizer|cream|complexion|dermatologist|clinical results?|ingredient(?:s)?|before\s*&\s*after|glow serum|face oil)\b/i;
|
|
381
|
+
|
|
307
382
|
const GENERIC_EDITORIAL_LAYOUT_PATTERNS = [
|
|
308
383
|
/\bsticky\s+nav\b/i,
|
|
309
384
|
/\bsticky\b/i,
|
|
@@ -351,6 +426,58 @@ function getBenchmarkStarter(siteType, benchmarkMode) {
|
|
|
351
426
|
return BENCHMARK_STARTERS[siteType] || null;
|
|
352
427
|
}
|
|
353
428
|
|
|
429
|
+
function stablePromptHash(text = "") {
|
|
430
|
+
const value = String(text || "");
|
|
431
|
+
let hash = 0;
|
|
432
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
433
|
+
hash = ((hash * 31) + value.charCodeAt(index)) >>> 0;
|
|
434
|
+
}
|
|
435
|
+
return hash;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
function chooseBenchmarkStarterVariant({ siteType, promptText = "", archetype = "", benchmarkMode = false } = {}) {
|
|
439
|
+
const variants = BENCHMARK_STARTER_VARIANTS[siteType] || [];
|
|
440
|
+
if (variants.length === 0) return null;
|
|
441
|
+
const text = String(promptText || "").toLowerCase();
|
|
442
|
+
if (siteType === "blog") {
|
|
443
|
+
if (/\b(?:architecture|architectural|concrete|brutalis|ando|chapel|osaka|kyoto|sao paulo)\b/.test(text)) {
|
|
444
|
+
return variants.find((variant) => variant.key === "architectural-journal") || variants[0];
|
|
445
|
+
}
|
|
446
|
+
if (/\b(?:gallery|art book|art-book|folio|exhibition|museum)\b/.test(text)) {
|
|
447
|
+
return variants.find((variant) => variant.key === "art-book-minimal") || variants[0];
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
if (siteType === "store") {
|
|
451
|
+
if (/\b(?:audio|headphones|speaker|tech|wireless|anc|bluetooth)\b/.test(text) || archetype === "high-contrast-tech") {
|
|
452
|
+
return variants.find((variant) => variant.key === "cold-premium-tech") || variants[0];
|
|
453
|
+
}
|
|
454
|
+
if (/\b(?:lamp|chair|desk|leather|tote|wallet|wood|ceramic|object)\b/.test(text)) {
|
|
455
|
+
return variants.find((variant) => variant.key === "warm-object-craft") || variants[0];
|
|
456
|
+
}
|
|
457
|
+
if (benchmarkMode) {
|
|
458
|
+
return variants.find((variant) => variant.key === "dense-conversion-pdp") || variants[0];
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
return variants[stablePromptHash(text) % variants.length];
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function inferBenchmarkStorePromptCategory(promptText = "") {
|
|
465
|
+
const text = String(promptText || "");
|
|
466
|
+
for (const [category, pattern] of Object.entries(STORE_PROMPT_CATEGORY_PATTERNS)) {
|
|
467
|
+
if (pattern.test(text)) return category;
|
|
468
|
+
}
|
|
469
|
+
return "generic";
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
function detectBenchmarkStoreSemanticDrift(promptText = "", haystack = "") {
|
|
473
|
+
const promptCategory = inferBenchmarkStorePromptCategory(promptText);
|
|
474
|
+
if (promptCategory === "beauty") return null;
|
|
475
|
+
if (STORE_BEAUTY_LANDING_DRIFT_PATTERN.test(haystack)) {
|
|
476
|
+
return "benchmark store drifted into beauty/skincare landing-page tropes that do not match the prompt";
|
|
477
|
+
}
|
|
478
|
+
return null;
|
|
479
|
+
}
|
|
480
|
+
|
|
354
481
|
function getBenchmarkExamples(siteType, benchmarkMode) {
|
|
355
482
|
if (!benchmarkMode) return null;
|
|
356
483
|
return BENCHMARK_FEWSHOT_EXAMPLES[siteType] || null;
|
|
@@ -499,6 +626,7 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
|
|
|
499
626
|
const assetPolicy = inferAssetPolicy(text);
|
|
500
627
|
const layoutStarter = getLayoutStarter(siteType, archetype);
|
|
501
628
|
const benchmarkStarter = getBenchmarkStarter(siteType, benchmarkMode);
|
|
629
|
+
const benchmarkVariant = chooseBenchmarkStarterVariant({ siteType, promptText: text, archetype, benchmarkMode });
|
|
502
630
|
const benchmarkExamples = getBenchmarkExamples(siteType, benchmarkMode);
|
|
503
631
|
const siteTypeRules = getSiteTypeRules(siteType);
|
|
504
632
|
const benchmarkSiteTypeRules = getBenchmarkSiteTypeRules(siteType, benchmarkMode);
|
|
@@ -507,6 +635,7 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
|
|
|
507
635
|
`Content mode: ${contentMode}. Asset policy: ${assetPolicy}.`,
|
|
508
636
|
layoutStarter ? `Layout starter: ${layoutStarter[0]}` : "",
|
|
509
637
|
...(benchmarkStarter ? [`Benchmark starter skeleton:\n- ${benchmarkStarter.join("\n- ")}`] : []),
|
|
638
|
+
...(benchmarkVariant ? [`Benchmark starter variant: ${benchmarkVariant.label}\n- ${benchmarkVariant.starter.join("\n- ")}`] : []),
|
|
510
639
|
...(benchmarkExamples ? [
|
|
511
640
|
`Benchmark few-shot guide:\nGood:\n- ${benchmarkExamples.good.join("\n- ")}\nBad:\n- ${benchmarkExamples.bad.join("\n- ")}`
|
|
512
641
|
] : []),
|
|
@@ -528,6 +657,8 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
|
|
|
528
657
|
assetPolicy,
|
|
529
658
|
layoutStarter: layoutStarter ? layoutStarter[0] : null,
|
|
530
659
|
benchmarkStarter: benchmarkStarter ? benchmarkStarter[0] : null,
|
|
660
|
+
benchmarkVariant: benchmarkVariant?.label || null,
|
|
661
|
+
benchmarkVariantStarter: benchmarkVariant?.starter || null,
|
|
531
662
|
benchmarkMode,
|
|
532
663
|
siteTypeRules,
|
|
533
664
|
benchmarkSiteTypeRules
|
|
@@ -594,6 +725,13 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
|
|
|
594
725
|
flags.push("benchmark store fell back to generic luxury-commerce copy");
|
|
595
726
|
score += 2;
|
|
596
727
|
}
|
|
728
|
+
const semanticDrift = siteType === "store" && benchmarkMode
|
|
729
|
+
? detectBenchmarkStoreSemanticDrift(promptText, haystack)
|
|
730
|
+
: null;
|
|
731
|
+
if (semanticDrift) {
|
|
732
|
+
flags.push(semanticDrift);
|
|
733
|
+
score += 4;
|
|
734
|
+
}
|
|
597
735
|
if (siteType === "store" && benchmarkMode && /\b(?:metaKey && e\.key === ['"]k['"]|command palette|keyboard shortcuts?)\b/i.test(haystack)) {
|
|
598
736
|
flags.push("benchmark store used fake keyboard or shortcut chrome");
|
|
599
737
|
score += 4;
|
|
@@ -665,13 +803,13 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
|
|
|
665
803
|
return {
|
|
666
804
|
score,
|
|
667
805
|
flags,
|
|
668
|
-
hardBlock: graderResult.hardBlock || flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|invented author or contributor bylines|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy/.test(flag)),
|
|
806
|
+
hardBlock: graderResult.hardBlock || flags.some((flag) => /fictional publication identity|generic publication-shell framing|essay-magazine placeholder language|invented author or contributor bylines|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|placeholder hero surface|Tailwind CDN starter theming|Font Awesome chrome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|demo interaction chrome|demo page explanation copy|beauty\/skincare landing-page tropes|semantic drift/.test(flag)),
|
|
669
807
|
severe: score >= 5,
|
|
670
808
|
summary: flags.length > 0 ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
|
|
671
809
|
};
|
|
672
810
|
}
|
|
673
811
|
|
|
674
|
-
export async function inspectFrontendArtifacts({ cwd, receipt = null, frontend = null } = {}) {
|
|
812
|
+
export async function inspectFrontendArtifacts({ cwd, promptText = "", receipt = null, frontend = null } = {}) {
|
|
675
813
|
const changedFiles = Array.isArray(receipt?.changedFiles) ? receipt.changedFiles : [];
|
|
676
814
|
const htmlFiles = changedFiles
|
|
677
815
|
.map((filePath) => String(filePath || "").trim())
|
|
@@ -727,6 +865,11 @@ export async function inspectFrontendArtifacts({ cwd, receipt = null, frontend =
|
|
|
727
865
|
score += 4;
|
|
728
866
|
}
|
|
729
867
|
if (frontend.siteType === "store") {
|
|
868
|
+
const semanticDrift = detectBenchmarkStoreSemanticDrift(promptText, htmlText);
|
|
869
|
+
if (semanticDrift) {
|
|
870
|
+
flags.push(semanticDrift);
|
|
871
|
+
score += 4;
|
|
872
|
+
}
|
|
730
873
|
if (/\b(?:cart would open here|would go to checkout|would continue to checkout|would appear here)\b/i.test(htmlText)) {
|
|
731
874
|
flags.push("benchmark store still contains fake cart or checkout fallback behavior");
|
|
732
875
|
score += 4;
|
|
@@ -806,12 +949,27 @@ export function shouldForceFrontendRebuild({ frontend = null, slop = null, revis
|
|
|
806
949
|
return revisionCount < 2;
|
|
807
950
|
}
|
|
808
951
|
|
|
809
|
-
export function getFrontendAcceptanceFailure({ frontend = null, slop = null } = {}) {
|
|
810
|
-
if (!frontend?.benchmarkMode
|
|
952
|
+
export function getFrontendAcceptanceFailure({ frontend = null, slop = null, designReview = null, screenshotReview = null } = {}) {
|
|
953
|
+
if (!frontend?.benchmarkMode) return null;
|
|
954
|
+
if (screenshotReview?.verdict === "weak") {
|
|
955
|
+
return {
|
|
956
|
+
siteType: frontend.siteType,
|
|
957
|
+
reason: `benchmark ${frontend.siteType} render review is weak: ${String(screenshotReview.summary || "").trim() || "visual result did not clear benchmark review"}`,
|
|
958
|
+
flags: Array.isArray(slop?.flags) ? slop.flags : []
|
|
959
|
+
};
|
|
960
|
+
}
|
|
961
|
+
if (designReview?.verdict === "weak") {
|
|
962
|
+
return {
|
|
963
|
+
siteType: frontend.siteType,
|
|
964
|
+
reason: `benchmark ${frontend.siteType} design review is weak: ${String(designReview.summary || "").trim() || "design did not clear benchmark review"}`,
|
|
965
|
+
flags: Array.isArray(slop?.flags) ? slop.flags : []
|
|
966
|
+
};
|
|
967
|
+
}
|
|
968
|
+
if (!slop?.hardBlock) return null;
|
|
811
969
|
const flags = Array.isArray(slop.flags) ? slop.flags : [];
|
|
812
970
|
if (frontend.siteType === "store") {
|
|
813
971
|
const blocking = flags.filter((flag) =>
|
|
814
|
-
/Tailwind CDN|Font Awesome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|fake keyboard or shortcut chrome|demo behavior|demo page explanation copy|fake cart or checkout fallback behavior|generic luxury-commerce copy/i.test(flag)
|
|
972
|
+
/Tailwind CDN|Font Awesome|placeholder product imagery|placeholder imagery|fake proof or badge chrome|fake keyboard or shortcut chrome|demo behavior|demo page explanation copy|fake cart or checkout fallback behavior|generic luxury-commerce copy|beauty\/skincare landing-page tropes|semantic drift/i.test(flag)
|
|
815
973
|
);
|
|
816
974
|
if (blocking.length > 0) {
|
|
817
975
|
return {
|
|
@@ -832,6 +990,7 @@ export function buildFrontendRevisionPrompt({
|
|
|
832
990
|
} = {}) {
|
|
833
991
|
const siteType = pickByPattern(String(originalPrompt || ""), SITE_TYPES, "landing");
|
|
834
992
|
const benchmarkMode = isBenchmarkFrontendTask(originalPrompt);
|
|
993
|
+
const benchmarkVariant = chooseBenchmarkStarterVariant({ siteType, promptText: originalPrompt, benchmarkMode });
|
|
835
994
|
const benchmarkExamples = getBenchmarkExamples(siteType, benchmarkMode);
|
|
836
995
|
const issues = Array.isArray(designReview?.issues) ? designReview.issues.slice(0, 6) : [];
|
|
837
996
|
const nextPass = Array.isArray(designReview?.nextPass) ? designReview.nextPass.slice(0, 6) : [];
|
|
@@ -844,6 +1003,7 @@ export function buildFrontendRevisionPrompt({
|
|
|
844
1003
|
issues.length > 0 ? `Problems to fix:\n- ${issues.join("\n- ")}` : "",
|
|
845
1004
|
visualIssues.length > 0 ? `Visible screenshot problems:\n- ${visualIssues.join("\n- ")}` : "",
|
|
846
1005
|
slopFlags.length > 0 ? `Deterministic slop flags:\n- ${slopFlags.join("\n- ")}` : "",
|
|
1006
|
+
benchmarkVariant ? `Preserve this benchmark variant direction: ${benchmarkVariant.label}\n- ${benchmarkVariant.starter.join("\n- ")}` : "",
|
|
847
1007
|
benchmarkExamples ? `Benchmark example guide:\nGood:\n- ${benchmarkExamples.good.join("\n- ")}\nBad:\n- ${benchmarkExamples.bad.join("\n- ")}` : "",
|
|
848
1008
|
nextPass.length > 0 ? `Revision priorities:\n- ${nextPass.join("\n- ")}` : "",
|
|
849
1009
|
visualNextPass.length > 0 ? `Screenshot revision priorities:\n- ${visualNextPass.join("\n- ")}` : "",
|
|
@@ -880,6 +1040,9 @@ export function buildFrontendRebuildPrompt({
|
|
|
880
1040
|
} = {}) {
|
|
881
1041
|
const siteType = pickByPattern(String(originalPrompt || ""), SITE_TYPES, "landing");
|
|
882
1042
|
const benchmarkMode = isBenchmarkFrontendTask(originalPrompt);
|
|
1043
|
+
const benchmarkVariant = frontend?.benchmarkVariantStarter?.length
|
|
1044
|
+
? { label: frontend.benchmarkVariant || "selected benchmark variant", starter: frontend.benchmarkVariantStarter }
|
|
1045
|
+
: chooseBenchmarkStarterVariant({ siteType, promptText: originalPrompt, archetype: frontend?.archetype || "", benchmarkMode });
|
|
883
1046
|
const benchmarkExamples = getBenchmarkExamples(siteType, benchmarkMode);
|
|
884
1047
|
const slopFlags = Array.isArray(slop?.flags) ? slop.flags.slice(0, 8) : [];
|
|
885
1048
|
const issues = Array.isArray(designReview?.issues) ? designReview.issues.slice(0, 6) : [];
|
|
@@ -889,6 +1052,7 @@ export function buildFrontendRebuildPrompt({
|
|
|
889
1052
|
"Rebuild the affected frontend files from scratch within the same contract.",
|
|
890
1053
|
`Original task: ${String(originalPrompt || "").trim()}`,
|
|
891
1054
|
starter ? `Required starter skeleton:\n- ${starter.join("\n- ")}` : "",
|
|
1055
|
+
benchmarkVariant ? `Required benchmark variant direction: ${benchmarkVariant.label}\n- ${benchmarkVariant.starter.join("\n- ")}` : "",
|
|
892
1056
|
benchmarkExamples ? `Benchmark example guide:\nGood:\n- ${benchmarkExamples.good.join("\n- ")}\nBad:\n- ${benchmarkExamples.bad.join("\n- ")}` : "",
|
|
893
1057
|
slopFlags.length > 0 ? `Banned patterns that must be removed completely:\n- ${slopFlags.join("\n- ")}` : "",
|
|
894
1058
|
issues.length > 0 ? `Design issues to correct in the rebuild:\n- ${issues.join("\n- ")}` : "",
|
package/src/workflow.js
CHANGED
|
@@ -187,7 +187,7 @@ export async function runBuildWorkflow({
|
|
|
187
187
|
? detectFrontendSlop({ promptText, assistantText: activeResponse.content || "", receipt: activeReceipt, designReview })
|
|
188
188
|
: null;
|
|
189
189
|
const artifactSlop = designReview
|
|
190
|
-
? await inspectFrontendArtifacts({ cwd: context.cwd, receipt: activeReceipt, frontend: frontendCtx?.frontend || null })
|
|
190
|
+
? await inspectFrontendArtifacts({ cwd: context.cwd, promptText, receipt: activeReceipt, frontend: frontendCtx?.frontend || null })
|
|
191
191
|
: null;
|
|
192
192
|
const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
|
|
193
193
|
|
|
@@ -256,7 +256,9 @@ export async function runBuildWorkflow({
|
|
|
256
256
|
|
|
257
257
|
const acceptanceFailure = getFrontendAcceptanceFailure({
|
|
258
258
|
frontend: frontendCtx?.frontend || null,
|
|
259
|
-
slop: designSlop
|
|
259
|
+
slop: designSlop,
|
|
260
|
+
designReview,
|
|
261
|
+
screenshotReview
|
|
260
262
|
});
|
|
261
263
|
if (acceptanceFailure) {
|
|
262
264
|
throw new Error(acceptanceFailure.reason);
|