@tritard/waterbrother 0.8.20 → 0.8.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tritard/waterbrother",
3
- "version": "0.8.20",
3
+ "version": "0.8.22",
4
4
  "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
5
5
  "type": "module",
6
6
  "bin": {
package/src/agent.js CHANGED
@@ -59,12 +59,11 @@ When you use tools:
59
59
  - define the typography, color system, spacing rhythm, and motion restraint through the code you write
60
60
  - avoid fake press badges, fake testimonials, fake founder lore, and placeholder image services unless explicitly requested
61
61
  - avoid generic Tailwind CDN template output and default premium-blog serif/sans pairings
62
- - avoid invented publication brands, named authors, issue numbers, staff credits, or editorial worldbuilding unless the user explicitly asked for fictional framing
63
62
  - avoid browser-default Georgia/Times plus system-sans typography as a final design answer
64
63
  - avoid fake keyboard shortcuts, fake command palettes, and demo-only interaction chrome
65
- - avoid generic reflective-editorial copy about slowness, attention, or the examined life unless the user explicitly asked for that voice
66
- - prefer fewer sections and at least one asymmetrical or compositionally distinctive move over a fully balanced template layout
67
64
  - prefer fewer stronger sections over a busy page with fake search, fake filters, or filler widgets
65
+ - adapt the above rules to the site type; docs and dashboards may need clearer repetitive utility layouts than editorial or portfolio pages
66
+ - only apply benchmark-grade strictness about publication framing, reflective-editorial copy, and forced asymmetry when the task is clearly a benchmark/comparison ask
68
67
  - After making code or file edits:
69
68
  - lead with the file path or paths you changed
70
69
  - summarize concrete changes, not vibes or quality claims
@@ -124,9 +123,13 @@ function buildSystemPrompt(profile, experienceMode = "standard", autonomyMode =
124
123
  `- tone: ${frontend.tone || "unknown"}`,
125
124
  `- content mode: ${frontend.contentMode || "designed-placeholder"}`,
126
125
  `- asset policy: ${frontend.assetPolicy || "no-placeholder-images-unless-explicit"}`,
127
- `- layout starter: ${frontend.layoutStarter || "choose a strong non-generic section grammar"}`
126
+ `- layout starter: ${frontend.layoutStarter || "choose a strong non-generic section grammar"}`,
127
+ `- benchmark mode: ${frontend.benchmarkMode ? "on" : "off"}`
128
128
  ].join("\n")
129
129
  );
130
+ if (Array.isArray(frontend.siteTypeRules) && frontend.siteTypeRules.length > 0) {
131
+ ctxLines.push(`Site-type rules:\n- ${frontend.siteTypeRules.join("\n- ")}`);
132
+ }
130
133
  }
131
134
  if (executionContext.reminders) ctxLines.push(`Scope reminders:\n${executionContext.reminders}`);
132
135
  if (ctxLines.length > 0) base += `\n\nExecution context:\n${ctxLines.join("\n")}`;
package/src/frontend.js CHANGED
@@ -47,6 +47,24 @@ Rules:
47
47
  - Be concrete about visible layout, spacing, typography, contrast, composition, and interaction cues.
48
48
  - Do not wrap JSON in markdown.`;
49
49
 
50
+ const BENCHMARK_FRONTEND_PROMPT = /\b(?:benchmark|squarespace quality|ultimate design|first class|on par|codex|claude code|cc)\b/i;
51
+
52
+ const UNIVERSAL_FRONTEND_REMINDERS = [
53
+ "Choose one visual direction and stay consistent across typography, spacing, color, and motion.",
54
+ "Choose a real type system with deliberate character. Browser-default Georgia/Times plus system sans is not an acceptable final design direction.",
55
+ "Prefer hand-authored CSS variables and layout rules over generic template utility sprawl when feasible.",
56
+ "Cut fake credibility elements, fake brands, fake testimonials, and filler interface chrome unless explicitly requested.",
57
+ "Do not add fake keyboard shortcuts, fake command palettes, or demo-only interactive chrome unless the interface truly needs them.",
58
+ "Avoid placeholder image services, Inter/Playfair default pairings, Tailwind CDN starter aesthetics, and generic premium-blog tropes.",
59
+ "Prefer fewer sections with stronger hierarchy over a long page full of low-value widgets."
60
+ ];
61
+
62
+ const BENCHMARK_FRONTEND_REMINDERS = [
63
+ "For benchmark frontend tasks, cut generic reflective-editorial copy. Use sharper, more concrete language or neutral structural placeholders instead of atmosphere-writing.",
64
+ "For benchmark frontend tasks, force at least one asymmetrical or compositionally distinctive move instead of a fully balanced template layout.",
65
+ "Benchmark mode: treat fake issue framing, fake publication history, fake keyboard shortcuts, and invented named contributors as disallowed outputs. Use neutral structural placeholders if needed."
66
+ ];
67
+
50
68
  const SITE_TYPES = [
51
69
  ["blog", /\b(blog|journal|essays?|articles?|publication|editorial)\b/i],
52
70
  ["landing", /\b(landing page|homepage|home page|marketing site|product site|launch page|hero section)\b/i],
@@ -71,6 +89,30 @@ const AUDIENCE_HINTS = [
71
89
  ["operators", /\b(founders?|operators?|engineers?|designers?)\b/i]
72
90
  ];
73
91
 
92
+ const SITE_TYPE_RULES = {
93
+ blog: [
94
+ "For blogs and editorial sites, prefer structural placeholder labels over inventing fictional publication brands, named editors, or fake biographical lore.",
95
+ "Do not invent issue numbers, seasonal issue metadata, staff credits, or named authors/photographers unless the user explicitly asked for fictional worldbuilding.",
96
+ "Do not add a newsletter/signup block unless the user explicitly asked for one or the site type clearly requires capture."
97
+ ],
98
+ landing: [
99
+ "For landing pages, prioritize the value proposition, proof, and one primary action over editorial atmosphere.",
100
+ "CTA sections are acceptable when they are genuinely part of the requested product flow; avoid fake testimonials or fake customer logos."
101
+ ],
102
+ portfolio: [
103
+ "For portfolios, let work samples, case studies, and identity carry the page rather than editorial filler.",
104
+ "Do not add newsletter/signup blocks or publication framing unless explicitly requested."
105
+ ],
106
+ docs: [
107
+ "For docs and help centers, clarity and information scent matter more than asymmetry or art direction.",
108
+ "Balanced, repetitive utility layouts are acceptable when they improve scanability."
109
+ ],
110
+ dashboard: [
111
+ "For dashboards and app UI, useful controls and information density matter more than editorial composition.",
112
+ "Do not remove necessary navigation or controls just to make the page feel minimal."
113
+ ]
114
+ };
115
+
74
116
  const ARCHETYPE_RULES = {
75
117
  "editorial-minimal": [
76
118
  "Use restrained editorial hierarchy with fewer, larger blocks of content.",
@@ -240,12 +282,20 @@ function inferContentMode(text) {
240
282
  return "designed-placeholder";
241
283
  }
242
284
 
285
+ function isBenchmarkFrontendTask(text) {
286
+ return BENCHMARK_FRONTEND_PROMPT.test(String(text || ""));
287
+ }
288
+
243
289
  function inferAssetPolicy(text) {
244
290
  if (/\b(no images|text only)\b/i.test(text)) return "text-only";
245
291
  if (/\b(use stock images|placeholder images)\b/i.test(text)) return "placeholders-allowed";
246
292
  return "no-placeholder-images-unless-explicit";
247
293
  }
248
294
 
295
+ function getSiteTypeRules(siteType) {
296
+ return SITE_TYPE_RULES[siteType] || [];
297
+ }
298
+
249
299
  export function isFrontendGenerationPrompt(promptText = "", profile = "coder") {
250
300
  const text = String(promptText || "").trim();
251
301
  if (!text) return false;
@@ -264,25 +314,21 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
264
314
  const archetype = pickByPattern(text, ARCHETYPES, siteType === "blog" ? "editorial-minimal" : "high-contrast-tech");
265
315
  const audience = pickByPattern(text, AUDIENCE_HINTS, siteType === "portfolio" ? "recruiters" : "readers");
266
316
  const tone = inferTone(text);
267
- const contentMode = inferContentMode(text);
317
+ const benchmarkMode = isBenchmarkFrontendTask(text);
318
+ let contentMode = inferContentMode(text);
319
+ if (benchmarkMode && siteType === "blog" && contentMode === "designed-placeholder") {
320
+ contentMode = "structural-placeholder";
321
+ }
268
322
  const assetPolicy = inferAssetPolicy(text);
269
323
  const layoutStarter = getLayoutStarter(siteType, archetype);
324
+ const siteTypeRules = getSiteTypeRules(siteType);
270
325
  const reminders = [
271
326
  `Frontend brief: site type = ${siteType}, archetype = ${archetype}, audience = ${audience}, tone = ${tone}.`,
272
327
  `Content mode: ${contentMode}. Asset policy: ${assetPolicy}.`,
273
328
  layoutStarter ? `Layout starter: ${layoutStarter[0]}` : "",
274
- "Choose one visual direction and stay consistent across typography, spacing, color, and motion.",
275
- "Choose a real type system with deliberate character. Browser-default Georgia/Times plus system sans is not an acceptable final design direction.",
276
- "Prefer hand-authored CSS variables and layout rules over generic template utility sprawl when feasible.",
277
- "Cut fake credibility elements, fake brands, fake testimonials, and filler interface chrome unless explicitly requested.",
278
- "For blogs and editorial sites, prefer structural placeholder labels over inventing fictional publication brands, named editors, or fake biographical lore.",
279
- "Do not invent issue numbers, seasonal issue metadata, staff credits, or named authors/photographers unless the user explicitly asked for fictional worldbuilding.",
280
- "Do not add a newsletter/signup block unless the user explicitly asked for one or the site type clearly requires capture.",
281
- "Do not add fake keyboard shortcuts, fake command palettes, or demo-only interactive chrome unless the interface truly needs them.",
282
- "Avoid placeholder image services, Inter/Playfair default pairings, Tailwind CDN starter aesthetics, and generic premium-blog tropes.",
283
- "Prefer fewer sections with stronger hierarchy over a long page full of low-value widgets.",
284
- "For benchmark frontend tasks, cut generic reflective-editorial copy. Use sharper, more concrete language or neutral structural placeholders instead of atmosphere-writing.",
285
- "For benchmark frontend tasks, force at least one asymmetrical or compositionally distinctive move instead of a fully balanced template layout.",
329
+ ...UNIVERSAL_FRONTEND_REMINDERS,
330
+ ...siteTypeRules,
331
+ ...(benchmarkMode ? BENCHMARK_FRONTEND_REMINDERS : []),
286
332
  ...(layoutStarter ? [layoutStarter[1]] : []),
287
333
  ...(ARCHETYPE_RULES[archetype] || [])
288
334
  ].join("\n");
@@ -295,7 +341,9 @@ export function buildFrontendExecutionContext({ promptText = "", profile = "code
295
341
  tone,
296
342
  contentMode,
297
343
  assetPolicy,
298
- layoutStarter: layoutStarter ? layoutStarter[0] : null
344
+ layoutStarter: layoutStarter ? layoutStarter[0] : null,
345
+ benchmarkMode,
346
+ siteTypeRules
299
347
  },
300
348
  reminders
301
349
  };
@@ -330,20 +378,25 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
330
378
  }
331
379
  }
332
380
  const siteType = pickByPattern(String(promptText || ""), SITE_TYPES, "landing");
381
+ const benchmarkMode = isBenchmarkFrontendTask(promptText);
333
382
  if (siteType === "blog" && !hasExplicitNewsletterRequest(promptText) && /\bnewsletter|subscribe|email\b/i.test(haystack)) {
334
383
  flags.push("newsletter block added without being requested");
335
384
  score += 3;
336
385
  }
337
- if (siteType === "blog" && /\b(?:by [A-Z][a-z]+ [A-Z][a-z]+|photography by [A-Z][a-z]+ [A-Z][a-z]+|A quiet publication|Pacific Northwest|Journal of Attention|Receive the next dispatch|VOL\.\s*\d+|The Quarterly)\b/i.test(haystack)) {
386
+ if (siteType === "blog" && /\b(?:by [A-Z][a-z]+ [A-Z][a-z]+|photography by [A-Z][a-z]+ [A-Z][a-z]+|A quiet publication|Pacific Northwest|Journal of Attention|Receive the next dispatch|VOL\.\s*\d+|The Quarterly|Since 20\d{2}|Featured\s+•\s+(?:Winter|Spring|Summer|Autumn)\s+20\d{2})\b/i.test(haystack)) {
338
387
  flags.push("fictional publication identity replacing structural placeholder content");
339
388
  score += 3;
340
389
  }
341
390
  if (/\b(?:command palette would open here|metaKey && e\.key === ['"]k['"]|keyboard accessibility)\b/i.test(haystack)) {
342
391
  flags.push("fake keyboard or command-palette gimmick");
343
- score += 2;
392
+ score += 3;
393
+ }
394
+ if (siteType === "blog" && benchmarkMode && /\b(?:measure of time in ordinary things|quiet rituals|architecture of memory|cult of productivity|considered writing|the slow accumulation of days|craft of attention|in defense of silence|meditation on permanence and ephemerality|stillness and the act of looking|deliberate observation)\b/i.test(haystack)) {
395
+ flags.push("benchmark blog fell back to reflective-editorial atmosphere copy");
396
+ score += 3;
344
397
  }
345
398
  const editorialLayoutHits = GENERIC_EDITORIAL_LAYOUT_PATTERNS.filter((pattern) => pattern.test(haystack)).length;
346
- if (siteType === "blog" && editorialLayoutHits >= 3) {
399
+ if (siteType === "blog" && benchmarkMode && editorialLayoutHits >= 3) {
347
400
  flags.push("generic editorial starter layout");
348
401
  score += 2;
349
402
  }
@@ -352,14 +405,16 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
352
405
  return {
353
406
  score,
354
407
  flags,
408
+ hardBlock: flags.some((flag) => /fictional publication identity|fake keyboard|reflective-editorial atmosphere/.test(flag)),
355
409
  severe: score >= 5,
356
410
  summary: flags.length > 0 ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
357
411
  };
358
412
  }
359
413
 
360
414
  export function shouldAutoReviseFrontend({ designReview = null, slop = null, revisionCount = 0 } = {}) {
361
- if (revisionCount >= 1) return false;
415
+ if (revisionCount >= 2) return false;
362
416
  if (!designReview) return false;
417
+ if (slop?.hardBlock) return true;
363
418
  if (designReview.verdict === "weak") return true;
364
419
  if (designReview.verdict === "caution" && (slop?.score || 0) >= 3) return true;
365
420
  return false;
@@ -390,6 +445,7 @@ export function buildFrontendRevisionPrompt({
390
445
  "Do not use browser-default serif/sans fallback stacks as the end-state typography. Pick a deliberate type system or simplify until the typography feels intentional.",
391
446
  "Remove fake keyboard shortcuts, fake command palettes, and demo-only interaction flourishes that do not help the page.",
392
447
  "Cut reflective-editorial filler copy and replace it with either concrete language or neutral structural placeholders.",
448
+ "For benchmark blog tasks, default to neutral structural placeholder content instead of invented publication framing, issue metadata, or named contributors.",
393
449
  "Reduce section count if needed and push one stronger asymmetrical composition instead of a sequence of balanced blocks.",
394
450
  "Simplify the page if needed. Stronger direction with fewer elements is preferred over busier generic output.",
395
451
  "Rewrite the weakest sections rather than making superficial tweaks."
package/src/workflow.js CHANGED
@@ -187,11 +187,17 @@ export async function runBuildWorkflow({
187
187
 
188
188
  let { impact, review, designReview, designSlop, screenshotReview, screenshotPath } = await analyze(receipt, response);
189
189
  let designRevision = null;
190
+ let revisionCount = 0;
191
+ const revisionHistory = [];
190
192
 
191
- if (shouldAutoReviseFrontend({ designReview, slop: designSlop, revisionCount: 0 })) {
192
- const firstPassVerdict = designReview?.verdict || null;
193
- const firstPassSummary = String(designReview?.summary || "").trim();
194
- const firstPassSlopFlags = Array.isArray(designSlop?.flags) ? [...designSlop.flags] : [];
193
+ while (shouldAutoReviseFrontend({ designReview, slop: designSlop, revisionCount })) {
194
+ const passNumber = revisionCount + 1;
195
+ revisionHistory.push({
196
+ passNumber,
197
+ verdict: designReview?.verdict || null,
198
+ summary: String(designReview?.summary || "").trim(),
199
+ slopFlags: Array.isArray(designSlop?.flags) ? [...designSlop.flags] : []
200
+ });
195
201
  const revisionPrompt = buildFrontendRevisionPrompt({
196
202
  originalPrompt: promptText,
197
203
  designReview,
@@ -200,10 +206,12 @@ export async function runBuildWorkflow({
200
206
  });
201
207
  const revisionCtx = {
202
208
  ...executionCtx,
203
- phase: "design-revision",
209
+ phase: revisionCount === 0 ? "design-revision" : `design-revision-${passNumber}`,
204
210
  reminders: [
205
211
  executionCtx.reminders || "",
206
- "Automatic second pass: fix the flagged frontend design issues without widening scope."
212
+ revisionCount === 0
213
+ ? "Automatic second pass: fix the flagged frontend design issues without widening scope."
214
+ : `Automatic follow-up pass ${passNumber}: remove any remaining benchmark hard-fail patterns.`
207
215
  ].filter(Boolean).join("\n")
208
216
  };
209
217
  agent.setExecutionContext(revisionCtx);
@@ -212,16 +220,15 @@ export async function runBuildWorkflow({
212
220
  }
213
221
  response = await agent.runBuildTurn(revisionPrompt, handlers);
214
222
  const revisedReceipt = await agent.toolRuntime.completeTurn({ signal: handlers.signal });
215
- if (revisedReceipt) {
216
- receipt = revisedReceipt;
217
- ({ impact, review, designReview, designSlop, screenshotReview, screenshotPath } = await analyze(receipt, response));
218
- designRevision = {
219
- triggered: true,
220
- firstPassVerdict,
221
- initialSummary: firstPassSummary,
222
- slopFlags: firstPassSlopFlags
223
- };
224
- }
223
+ if (!revisedReceipt) break;
224
+ receipt = revisedReceipt;
225
+ ({ impact, review, designReview, designSlop, screenshotReview, screenshotPath } = await analyze(receipt, response));
226
+ revisionCount += 1;
227
+ designRevision = {
228
+ triggered: true,
229
+ passes: revisionCount,
230
+ history: revisionHistory
231
+ };
225
232
  }
226
233
 
227
234
  // Update receipt with impact and review