@runchr/gstack-antigravity 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @runchr/gstack-antigravity might be problematic. Click here for more details.

Files changed (231) hide show
  1. package/.agents/skills/gstack/.agents/skills/gstack/SKILL.md +651 -0
  2. package/.agents/skills/gstack/.agents/skills/gstack-autoplan/SKILL.md +678 -0
  3. package/.agents/skills/gstack/.agents/skills/gstack-benchmark/SKILL.md +482 -0
  4. package/.agents/skills/gstack/.agents/skills/gstack-browse/SKILL.md +511 -0
  5. package/.agents/skills/gstack/.agents/skills/gstack-canary/SKILL.md +486 -0
  6. package/.agents/skills/gstack/.agents/skills/gstack-careful/SKILL.md +50 -0
  7. package/.agents/skills/gstack/.agents/skills/gstack-cso/SKILL.md +607 -0
  8. package/.agents/skills/gstack/.agents/skills/gstack-design-consultation/SKILL.md +615 -0
  9. package/.agents/skills/gstack/.agents/skills/gstack-design-review/SKILL.md +988 -0
  10. package/.agents/skills/gstack/.agents/skills/gstack-document-release/SKILL.md +604 -0
  11. package/.agents/skills/gstack/.agents/skills/gstack-freeze/SKILL.md +67 -0
  12. package/.agents/skills/gstack/.agents/skills/gstack-guard/SKILL.md +62 -0
  13. package/.agents/skills/gstack/.agents/skills/gstack-investigate/SKILL.md +415 -0
  14. package/.agents/skills/gstack/.agents/skills/gstack-land-and-deploy/SKILL.md +873 -0
  15. package/.agents/skills/gstack/.agents/skills/gstack-office-hours/SKILL.md +986 -0
  16. package/.agents/skills/gstack/.agents/skills/gstack-plan-ceo-review/SKILL.md +1268 -0
  17. package/.agents/skills/gstack/.agents/skills/gstack-plan-design-review/SKILL.md +668 -0
  18. package/.agents/skills/gstack/.agents/skills/gstack-plan-eng-review/SKILL.md +826 -0
  19. package/.agents/skills/gstack/.agents/skills/gstack-qa/SKILL.md +1006 -0
  20. package/.agents/skills/gstack/.agents/skills/gstack-qa-only/SKILL.md +626 -0
  21. package/.agents/skills/gstack/.agents/skills/gstack-retro/SKILL.md +1065 -0
  22. package/.agents/skills/gstack/.agents/skills/gstack-review/SKILL.md +704 -0
  23. package/.agents/skills/gstack/.agents/skills/gstack-setup-browser-cookies/SKILL.md +325 -0
  24. package/.agents/skills/gstack/.agents/skills/gstack-setup-deploy/SKILL.md +450 -0
  25. package/.agents/skills/gstack/.agents/skills/gstack-ship/SKILL.md +1312 -0
  26. package/.agents/skills/gstack/.agents/skills/gstack-unfreeze/SKILL.md +36 -0
  27. package/.agents/skills/gstack/.agents/skills/gstack-upgrade/SKILL.md +220 -0
  28. package/.agents/skills/gstack/.env.example +5 -0
  29. package/.agents/skills/gstack/.github/workflows/skill-docs.yml +17 -0
  30. package/.agents/skills/gstack/AGENTS.md +49 -0
  31. package/.agents/skills/gstack/ARCHITECTURE.md +359 -0
  32. package/.agents/skills/gstack/BROWSER.md +271 -0
  33. package/.agents/skills/gstack/CHANGELOG.md +800 -0
  34. package/.agents/skills/gstack/CLAUDE.md +284 -0
  35. package/.agents/skills/gstack/CONTRIBUTING.md +370 -0
  36. package/.agents/skills/gstack/ETHOS.md +129 -0
  37. package/.agents/skills/gstack/LICENSE +21 -0
  38. package/.agents/skills/gstack/README.md +228 -0
  39. package/.agents/skills/gstack/SKILL.md +657 -0
  40. package/.agents/skills/gstack/SKILL.md.tmpl +281 -0
  41. package/.agents/skills/gstack/TODOS.md +564 -0
  42. package/.agents/skills/gstack/VERSION +1 -0
  43. package/.agents/skills/gstack/autoplan/SKILL.md +689 -0
  44. package/.agents/skills/gstack/autoplan/SKILL.md.tmpl +416 -0
  45. package/.agents/skills/gstack/benchmark/SKILL.md +489 -0
  46. package/.agents/skills/gstack/benchmark/SKILL.md.tmpl +233 -0
  47. package/.agents/skills/gstack/bin/dev-setup +68 -0
  48. package/.agents/skills/gstack/bin/dev-teardown +56 -0
  49. package/.agents/skills/gstack/bin/gstack-analytics +191 -0
  50. package/.agents/skills/gstack/bin/gstack-community-dashboard +113 -0
  51. package/.agents/skills/gstack/bin/gstack-config +38 -0
  52. package/.agents/skills/gstack/bin/gstack-diff-scope +71 -0
  53. package/.agents/skills/gstack/bin/gstack-global-discover.ts +591 -0
  54. package/.agents/skills/gstack/bin/gstack-repo-mode +93 -0
  55. package/.agents/skills/gstack/bin/gstack-review-log +9 -0
  56. package/.agents/skills/gstack/bin/gstack-review-read +12 -0
  57. package/.agents/skills/gstack/bin/gstack-slug +15 -0
  58. package/.agents/skills/gstack/bin/gstack-telemetry-log +158 -0
  59. package/.agents/skills/gstack/bin/gstack-telemetry-sync +127 -0
  60. package/.agents/skills/gstack/bin/gstack-update-check +196 -0
  61. package/.agents/skills/gstack/browse/SKILL.md +517 -0
  62. package/.agents/skills/gstack/browse/SKILL.md.tmpl +141 -0
  63. package/.agents/skills/gstack/browse/bin/find-browse +21 -0
  64. package/.agents/skills/gstack/browse/bin/remote-slug +14 -0
  65. package/.agents/skills/gstack/browse/scripts/build-node-server.sh +48 -0
  66. package/.agents/skills/gstack/browse/src/browser-manager.ts +634 -0
  67. package/.agents/skills/gstack/browse/src/buffers.ts +137 -0
  68. package/.agents/skills/gstack/browse/src/bun-polyfill.cjs +109 -0
  69. package/.agents/skills/gstack/browse/src/cli.ts +420 -0
  70. package/.agents/skills/gstack/browse/src/commands.ts +111 -0
  71. package/.agents/skills/gstack/browse/src/config.ts +150 -0
  72. package/.agents/skills/gstack/browse/src/cookie-import-browser.ts +417 -0
  73. package/.agents/skills/gstack/browse/src/cookie-picker-routes.ts +207 -0
  74. package/.agents/skills/gstack/browse/src/cookie-picker-ui.ts +541 -0
  75. package/.agents/skills/gstack/browse/src/find-browse.ts +61 -0
  76. package/.agents/skills/gstack/browse/src/meta-commands.ts +269 -0
  77. package/.agents/skills/gstack/browse/src/platform.ts +17 -0
  78. package/.agents/skills/gstack/browse/src/read-commands.ts +335 -0
  79. package/.agents/skills/gstack/browse/src/server.ts +369 -0
  80. package/.agents/skills/gstack/browse/src/snapshot.ts +398 -0
  81. package/.agents/skills/gstack/browse/src/url-validation.ts +91 -0
  82. package/.agents/skills/gstack/browse/src/write-commands.ts +352 -0
  83. package/.agents/skills/gstack/browse/test/bun-polyfill.test.ts +72 -0
  84. package/.agents/skills/gstack/browse/test/commands.test.ts +1836 -0
  85. package/.agents/skills/gstack/browse/test/config.test.ts +250 -0
  86. package/.agents/skills/gstack/browse/test/cookie-import-browser.test.ts +397 -0
  87. package/.agents/skills/gstack/browse/test/cookie-picker-routes.test.ts +205 -0
  88. package/.agents/skills/gstack/browse/test/find-browse.test.ts +50 -0
  89. package/.agents/skills/gstack/browse/test/fixtures/basic.html +33 -0
  90. package/.agents/skills/gstack/browse/test/fixtures/cursor-interactive.html +22 -0
  91. package/.agents/skills/gstack/browse/test/fixtures/dialog.html +15 -0
  92. package/.agents/skills/gstack/browse/test/fixtures/empty.html +2 -0
  93. package/.agents/skills/gstack/browse/test/fixtures/forms.html +55 -0
  94. package/.agents/skills/gstack/browse/test/fixtures/qa-eval-checkout.html +108 -0
  95. package/.agents/skills/gstack/browse/test/fixtures/qa-eval-spa.html +98 -0
  96. package/.agents/skills/gstack/browse/test/fixtures/qa-eval.html +51 -0
  97. package/.agents/skills/gstack/browse/test/fixtures/responsive.html +49 -0
  98. package/.agents/skills/gstack/browse/test/fixtures/snapshot.html +55 -0
  99. package/.agents/skills/gstack/browse/test/fixtures/spa.html +24 -0
  100. package/.agents/skills/gstack/browse/test/fixtures/states.html +17 -0
  101. package/.agents/skills/gstack/browse/test/fixtures/upload.html +25 -0
  102. package/.agents/skills/gstack/browse/test/gstack-config.test.ts +125 -0
  103. package/.agents/skills/gstack/browse/test/gstack-update-check.test.ts +467 -0
  104. package/.agents/skills/gstack/browse/test/handoff.test.ts +235 -0
  105. package/.agents/skills/gstack/browse/test/path-validation.test.ts +63 -0
  106. package/.agents/skills/gstack/browse/test/platform.test.ts +37 -0
  107. package/.agents/skills/gstack/browse/test/snapshot.test.ts +467 -0
  108. package/.agents/skills/gstack/browse/test/test-server.ts +57 -0
  109. package/.agents/skills/gstack/browse/test/url-validation.test.ts +72 -0
  110. package/.agents/skills/gstack/canary/SKILL.md +493 -0
  111. package/.agents/skills/gstack/canary/SKILL.md.tmpl +220 -0
  112. package/.agents/skills/gstack/careful/SKILL.md +59 -0
  113. package/.agents/skills/gstack/careful/SKILL.md.tmpl +57 -0
  114. package/.agents/skills/gstack/careful/bin/check-careful.sh +112 -0
  115. package/.agents/skills/gstack/codex/SKILL.md +677 -0
  116. package/.agents/skills/gstack/codex/SKILL.md.tmpl +356 -0
  117. package/.agents/skills/gstack/conductor.json +6 -0
  118. package/.agents/skills/gstack/cso/SKILL.md +615 -0
  119. package/.agents/skills/gstack/cso/SKILL.md.tmpl +376 -0
  120. package/.agents/skills/gstack/design-consultation/SKILL.md +625 -0
  121. package/.agents/skills/gstack/design-consultation/SKILL.md.tmpl +369 -0
  122. package/.agents/skills/gstack/design-review/SKILL.md +998 -0
  123. package/.agents/skills/gstack/design-review/SKILL.md.tmpl +262 -0
  124. package/.agents/skills/gstack/docs/images/github-2013.png +0 -0
  125. package/.agents/skills/gstack/docs/images/github-2026.png +0 -0
  126. package/.agents/skills/gstack/docs/skills.md +877 -0
  127. package/.agents/skills/gstack/document-release/SKILL.md +613 -0
  128. package/.agents/skills/gstack/document-release/SKILL.md.tmpl +357 -0
  129. package/.agents/skills/gstack/freeze/SKILL.md +82 -0
  130. package/.agents/skills/gstack/freeze/SKILL.md.tmpl +80 -0
  131. package/.agents/skills/gstack/freeze/bin/check-freeze.sh +68 -0
  132. package/.agents/skills/gstack/gstack-upgrade/SKILL.md +226 -0
  133. package/.agents/skills/gstack/gstack-upgrade/SKILL.md.tmpl +224 -0
  134. package/.agents/skills/gstack/guard/SKILL.md +82 -0
  135. package/.agents/skills/gstack/guard/SKILL.md.tmpl +80 -0
  136. package/.agents/skills/gstack/investigate/SKILL.md +435 -0
  137. package/.agents/skills/gstack/investigate/SKILL.md.tmpl +196 -0
  138. package/.agents/skills/gstack/land-and-deploy/SKILL.md +880 -0
  139. package/.agents/skills/gstack/land-and-deploy/SKILL.md.tmpl +575 -0
  140. package/.agents/skills/gstack/office-hours/SKILL.md +996 -0
  141. package/.agents/skills/gstack/office-hours/SKILL.md.tmpl +624 -0
  142. package/.agents/skills/gstack/package.json +55 -0
  143. package/.agents/skills/gstack/plan-ceo-review/SKILL.md +1277 -0
  144. package/.agents/skills/gstack/plan-ceo-review/SKILL.md.tmpl +838 -0
  145. package/.agents/skills/gstack/plan-design-review/SKILL.md +676 -0
  146. package/.agents/skills/gstack/plan-design-review/SKILL.md.tmpl +314 -0
  147. package/.agents/skills/gstack/plan-eng-review/SKILL.md +836 -0
  148. package/.agents/skills/gstack/plan-eng-review/SKILL.md.tmpl +279 -0
  149. package/.agents/skills/gstack/qa/SKILL.md +1016 -0
  150. package/.agents/skills/gstack/qa/SKILL.md.tmpl +316 -0
  151. package/.agents/skills/gstack/qa/references/issue-taxonomy.md +85 -0
  152. package/.agents/skills/gstack/qa/templates/qa-report-template.md +126 -0
  153. package/.agents/skills/gstack/qa-only/SKILL.md +633 -0
  154. package/.agents/skills/gstack/qa-only/SKILL.md.tmpl +101 -0
  155. package/.agents/skills/gstack/retro/SKILL.md +1072 -0
  156. package/.agents/skills/gstack/retro/SKILL.md.tmpl +833 -0
  157. package/.agents/skills/gstack/review/SKILL.md +849 -0
  158. package/.agents/skills/gstack/review/SKILL.md.tmpl +259 -0
  159. package/.agents/skills/gstack/review/TODOS-format.md +62 -0
  160. package/.agents/skills/gstack/review/checklist.md +190 -0
  161. package/.agents/skills/gstack/review/design-checklist.md +132 -0
  162. package/.agents/skills/gstack/review/greptile-triage.md +220 -0
  163. package/.agents/skills/gstack/scripts/analytics.ts +190 -0
  164. package/.agents/skills/gstack/scripts/dev-skill.ts +82 -0
  165. package/.agents/skills/gstack/scripts/eval-compare.ts +96 -0
  166. package/.agents/skills/gstack/scripts/eval-list.ts +116 -0
  167. package/.agents/skills/gstack/scripts/eval-select.ts +86 -0
  168. package/.agents/skills/gstack/scripts/eval-summary.ts +187 -0
  169. package/.agents/skills/gstack/scripts/eval-watch.ts +172 -0
  170. package/.agents/skills/gstack/scripts/gen-skill-docs.ts +2414 -0
  171. package/.agents/skills/gstack/scripts/skill-check.ts +167 -0
  172. package/.agents/skills/gstack/setup +269 -0
  173. package/.agents/skills/gstack/setup-browser-cookies/SKILL.md +330 -0
  174. package/.agents/skills/gstack/setup-browser-cookies/SKILL.md.tmpl +74 -0
  175. package/.agents/skills/gstack/setup-deploy/SKILL.md +459 -0
  176. package/.agents/skills/gstack/setup-deploy/SKILL.md.tmpl +220 -0
  177. package/.agents/skills/gstack/ship/SKILL.md +1457 -0
  178. package/.agents/skills/gstack/ship/SKILL.md.tmpl +528 -0
  179. package/.agents/skills/gstack/supabase/config.sh +10 -0
  180. package/.agents/skills/gstack/supabase/functions/community-pulse/index.ts +59 -0
  181. package/.agents/skills/gstack/supabase/functions/telemetry-ingest/index.ts +135 -0
  182. package/.agents/skills/gstack/supabase/functions/update-check/index.ts +37 -0
  183. package/.agents/skills/gstack/supabase/migrations/001_telemetry.sql +89 -0
  184. package/.agents/skills/gstack/test/analytics.test.ts +277 -0
  185. package/.agents/skills/gstack/test/codex-e2e.test.ts +197 -0
  186. package/.agents/skills/gstack/test/fixtures/coverage-audit-fixture.ts +76 -0
  187. package/.agents/skills/gstack/test/fixtures/eval-baselines.json +7 -0
  188. package/.agents/skills/gstack/test/fixtures/qa-eval-checkout-ground-truth.json +43 -0
  189. package/.agents/skills/gstack/test/fixtures/qa-eval-ground-truth.json +43 -0
  190. package/.agents/skills/gstack/test/fixtures/qa-eval-spa-ground-truth.json +43 -0
  191. package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.css +86 -0
  192. package/.agents/skills/gstack/test/fixtures/review-eval-design-slop.html +41 -0
  193. package/.agents/skills/gstack/test/fixtures/review-eval-enum-diff.rb +30 -0
  194. package/.agents/skills/gstack/test/fixtures/review-eval-enum.rb +27 -0
  195. package/.agents/skills/gstack/test/fixtures/review-eval-vuln.rb +14 -0
  196. package/.agents/skills/gstack/test/gemini-e2e.test.ts +173 -0
  197. package/.agents/skills/gstack/test/gen-skill-docs.test.ts +1049 -0
  198. package/.agents/skills/gstack/test/global-discover.test.ts +187 -0
  199. package/.agents/skills/gstack/test/helpers/codex-session-runner.ts +282 -0
  200. package/.agents/skills/gstack/test/helpers/e2e-helpers.ts +239 -0
  201. package/.agents/skills/gstack/test/helpers/eval-store.test.ts +548 -0
  202. package/.agents/skills/gstack/test/helpers/eval-store.ts +689 -0
  203. package/.agents/skills/gstack/test/helpers/gemini-session-runner.test.ts +104 -0
  204. package/.agents/skills/gstack/test/helpers/gemini-session-runner.ts +201 -0
  205. package/.agents/skills/gstack/test/helpers/llm-judge.ts +130 -0
  206. package/.agents/skills/gstack/test/helpers/observability.test.ts +283 -0
  207. package/.agents/skills/gstack/test/helpers/session-runner.test.ts +96 -0
  208. package/.agents/skills/gstack/test/helpers/session-runner.ts +357 -0
  209. package/.agents/skills/gstack/test/helpers/skill-parser.ts +206 -0
  210. package/.agents/skills/gstack/test/helpers/touchfiles.ts +260 -0
  211. package/.agents/skills/gstack/test/hook-scripts.test.ts +373 -0
  212. package/.agents/skills/gstack/test/skill-e2e-browse.test.ts +293 -0
  213. package/.agents/skills/gstack/test/skill-e2e-deploy.test.ts +279 -0
  214. package/.agents/skills/gstack/test/skill-e2e-design.test.ts +614 -0
  215. package/.agents/skills/gstack/test/skill-e2e-plan.test.ts +538 -0
  216. package/.agents/skills/gstack/test/skill-e2e-qa-bugs.test.ts +194 -0
  217. package/.agents/skills/gstack/test/skill-e2e-qa-workflow.test.ts +412 -0
  218. package/.agents/skills/gstack/test/skill-e2e-review.test.ts +535 -0
  219. package/.agents/skills/gstack/test/skill-e2e-workflow.test.ts +586 -0
  220. package/.agents/skills/gstack/test/skill-e2e.test.ts +3325 -0
  221. package/.agents/skills/gstack/test/skill-llm-eval.test.ts +787 -0
  222. package/.agents/skills/gstack/test/skill-parser.test.ts +179 -0
  223. package/.agents/skills/gstack/test/skill-routing-e2e.test.ts +605 -0
  224. package/.agents/skills/gstack/test/skill-validation.test.ts +1520 -0
  225. package/.agents/skills/gstack/test/telemetry.test.ts +278 -0
  226. package/.agents/skills/gstack/test/touchfiles.test.ts +262 -0
  227. package/.agents/skills/gstack/unfreeze/SKILL.md +40 -0
  228. package/.agents/skills/gstack/unfreeze/SKILL.md.tmpl +38 -0
  229. package/README.md +12 -7
  230. package/README_KO.md +12 -6
  231. package/package.json +3 -2
@@ -0,0 +1,614 @@
1
+ import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
2
+ import { runSkillTest } from './helpers/session-runner';
3
+ import { callJudge } from './helpers/llm-judge';
4
+ import {
5
+ ROOT, browseBin, runId, evalsEnabled,
6
+ describeIfSelected, testConcurrentIfSelected,
7
+ copyDirSync, setupBrowseShims, logCost, recordE2E,
8
+ createEvalCollector, finalizeEvalCollector,
9
+ } from './helpers/e2e-helpers';
10
+ import { spawnSync } from 'child_process';
11
+ import * as fs from 'fs';
12
+ import * as path from 'path';
13
+ import * as os from 'os';
14
+
15
+ const evalCollector = createEvalCollector('e2e-design');
16
+
17
+ /**
18
+ * LLM judge for DESIGN.md quality — checks font blacklist compliance,
19
+ * coherence, specificity, and AI slop avoidance.
20
+ */
21
+ async function designQualityJudge(designMd: string): Promise<{ passed: boolean; reasoning: string }> {
22
+ return callJudge<{ passed: boolean; reasoning: string }>(`You are evaluating a generated DESIGN.md file for quality.
23
+
24
+ Evaluate against these criteria — ALL must pass for an overall "passed: true":
25
+ 1. Does NOT recommend Inter, Roboto, Arial, Helvetica, Open Sans, Lato, Montserrat, or Poppins as primary fonts
26
+ 2. Aesthetic direction is coherent with color approach (e.g., brutalist aesthetic doesn't pair with expressive color without explanation)
27
+ 3. Font recommendations include specific font names (not generic like "a sans-serif font")
28
+ 4. Color palette includes actual hex values, not placeholders like "[hex]"
29
+ 5. Rationale is provided for major decisions (not just "because it looks good")
30
+ 6. No AI slop patterns: purple gradients mentioned positively, "3-column feature grid" language, generic marketing speak
31
+ 7. Product context is reflected in design choices (civic tech → should have appropriate, professional aesthetic)
32
+
33
+ DESIGN.md content:
34
+ \`\`\`
35
+ ${designMd}
36
+ \`\`\`
37
+
38
+ Return JSON: { "passed": true/false, "reasoning": "one paragraph explaining your evaluation" }`);
39
+ }
40
+
41
+ // --- Design Consultation E2E ---
42
+
43
+ describeIfSelected('Design Consultation E2E', [
44
+ 'design-consultation-core',
45
+ 'design-consultation-existing',
46
+ 'design-consultation-research',
47
+ 'design-consultation-preview',
48
+ ], () => {
49
+ let designDir: string;
50
+
51
+ beforeAll(() => {
52
+ designDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-design-consultation-'));
53
+ const run = (cmd: string, args: string[]) =>
54
+ spawnSync(cmd, args, { cwd: designDir, stdio: 'pipe', timeout: 5000 });
55
+
56
+ run('git', ['init', '-b', 'main']);
57
+ run('git', ['config', 'user.email', 'test@test.com']);
58
+ run('git', ['config', 'user.name', 'Test']);
59
+
60
+ // Create a realistic project context
61
+ fs.writeFileSync(path.join(designDir, 'README.md'), `# CivicPulse
62
+
63
+ A civic tech data platform for government employees to access, visualize, and share public data. Built with Next.js and PostgreSQL.
64
+
65
+ ## Features
66
+ - Real-time data dashboards for municipal budgets
67
+ - Public records search with faceted filtering
68
+ - Data export and sharing tools for inter-department collaboration
69
+ `);
70
+ fs.writeFileSync(path.join(designDir, 'package.json'), JSON.stringify({
71
+ name: 'civicpulse',
72
+ version: '0.1.0',
73
+ dependencies: { next: '^14.0.0', react: '^18.2.0', 'tailwindcss': '^3.4.0' },
74
+ }, null, 2));
75
+
76
+ run('git', ['add', '.']);
77
+ run('git', ['commit', '-m', 'initial project setup']);
78
+
79
+ // Copy design-consultation skill
80
+ fs.mkdirSync(path.join(designDir, 'design-consultation'), { recursive: true });
81
+ fs.copyFileSync(
82
+ path.join(ROOT, 'design-consultation', 'SKILL.md'),
83
+ path.join(designDir, 'design-consultation', 'SKILL.md'),
84
+ );
85
+ });
86
+
87
+ afterAll(() => {
88
+ try { fs.rmSync(designDir, { recursive: true, force: true }); } catch {}
89
+ });
90
+
91
+ testConcurrentIfSelected('design-consultation-core', async () => {
92
+ const result = await runSkillTest({
93
+ prompt: `Read design-consultation/SKILL.md for the design consultation workflow.
94
+ Skip the preamble bash block, lake intro, telemetry, and contributor mode sections — go straight to the design workflow.
95
+
96
+ This is a civic tech data platform called CivicPulse for government employees who need to access public data. Read the README.md for details.
97
+
98
+ Skip research — work from your design knowledge. Skip the font preview page. Skip any AskUserQuestion calls — this is non-interactive. Accept your first design system proposal.
99
+
100
+ Write DESIGN.md and CLAUDE.md (or update it) in the working directory.`,
101
+ workingDirectory: designDir,
102
+ maxTurns: 20,
103
+ timeout: 360_000,
104
+ testName: 'design-consultation-core',
105
+ runId,
106
+ model: 'claude-opus-4-6',
107
+ });
108
+
109
+ logCost('/design-consultation core', result);
110
+
111
+ const designPath = path.join(designDir, 'DESIGN.md');
112
+ const claudePath = path.join(designDir, 'CLAUDE.md');
113
+ const designExists = fs.existsSync(designPath);
114
+ const claudeExists = fs.existsSync(claudePath);
115
+ let designContent = '';
116
+
117
+ if (designExists) {
118
+ designContent = fs.readFileSync(designPath, 'utf-8');
119
+ }
120
+
121
+ // Structural checks — fuzzy synonym matching to handle agent variation
122
+ const sectionSynonyms: Record<string, string[]> = {
123
+ 'Product Context': ['product', 'context', 'overview', 'about'],
124
+ 'Aesthetic': ['aesthetic', 'visual direction', 'design direction', 'visual identity'],
125
+ 'Typography': ['typography', 'type', 'font', 'typeface'],
126
+ 'Color': ['color', 'colour', 'palette', 'colors'],
127
+ 'Spacing': ['spacing', 'space', 'whitespace', 'gap'],
128
+ 'Layout': ['layout', 'grid', 'structure', 'composition'],
129
+ 'Motion': ['motion', 'animation', 'transition', 'movement'],
130
+ };
131
+ const missingSections = Object.entries(sectionSynonyms).filter(
132
+ ([_, synonyms]) => !synonyms.some(s => designContent.toLowerCase().includes(s))
133
+ ).map(([name]) => name);
134
+
135
+ // LLM judge for quality
136
+ let judgeResult = { passed: false, reasoning: 'judge not run' };
137
+ if (designExists && designContent.length > 100) {
138
+ try {
139
+ judgeResult = await designQualityJudge(designContent);
140
+ console.log('Design quality judge:', JSON.stringify(judgeResult, null, 2));
141
+ } catch (err) {
142
+ console.warn('Judge failed:', err);
143
+ judgeResult = { passed: true, reasoning: 'judge error — defaulting to pass' };
144
+ }
145
+ }
146
+
147
+ const structuralPass = designExists && claudeExists && missingSections.length === 0;
148
+ recordE2E(evalCollector, '/design-consultation core', 'Design Consultation E2E', result, {
149
+ passed: structuralPass && judgeResult.passed && ['success', 'error_max_turns'].includes(result.exitReason),
150
+ });
151
+
152
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
153
+ expect(designExists).toBe(true);
154
+ if (designExists) {
155
+ expect(missingSections).toHaveLength(0);
156
+ }
157
+ if (claudeExists) {
158
+ const claude = fs.readFileSync(claudePath, 'utf-8');
159
+ expect(claude.toLowerCase()).toContain('design.md');
160
+ }
161
+ }, 420_000);
162
+
163
+ testConcurrentIfSelected('design-consultation-research', async () => {
164
+ // Test WebSearch integration — research phase only, no DESIGN.md generation
165
+ const researchDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-research-'));
166
+
167
+ const result = await runSkillTest({
168
+ prompt: `You have access to WebSearch. Research civic tech data platform designs.
169
+
170
+ Do exactly 2 WebSearch queries:
171
+ 1. 'civic tech government data platform design 2025'
172
+ 2. 'open data portal UX best practices'
173
+
174
+ Summarize the key design patterns you found to ${researchDir}/research-notes.md.
175
+ Include: color trends, typography patterns, and layout conventions you observed.
176
+ Do NOT generate a full DESIGN.md — just research notes.`,
177
+ workingDirectory: researchDir,
178
+ maxTurns: 8,
179
+ timeout: 90_000,
180
+ testName: 'design-consultation-research',
181
+ runId,
182
+ });
183
+
184
+ logCost('/design-consultation research', result);
185
+
186
+ const notesPath = path.join(researchDir, 'research-notes.md');
187
+ const notesExist = fs.existsSync(notesPath);
188
+ const notesContent = notesExist ? fs.readFileSync(notesPath, 'utf-8') : '';
189
+
190
+ // Check if WebSearch was used
191
+ const webSearchCalls = result.toolCalls.filter(tc => tc.tool === 'WebSearch');
192
+ if (webSearchCalls.length > 0) {
193
+ console.log(`WebSearch used ${webSearchCalls.length} times`);
194
+ } else {
195
+ console.warn('WebSearch not used — may be unavailable in test env');
196
+ }
197
+
198
+ recordE2E(evalCollector, '/design-consultation research', 'Design Consultation E2E', result, {
199
+ passed: notesExist && notesContent.length > 200 && ['success', 'error_max_turns'].includes(result.exitReason),
200
+ });
201
+
202
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
203
+ expect(notesExist).toBe(true);
204
+ if (notesExist) {
205
+ expect(notesContent.length).toBeGreaterThan(200);
206
+ }
207
+
208
+ try { fs.rmSync(researchDir, { recursive: true, force: true }); } catch {}
209
+ }, 120_000);
210
+
211
+ testConcurrentIfSelected('design-consultation-existing', async () => {
212
+ // Pre-create a minimal DESIGN.md (independent of core test)
213
+ fs.writeFileSync(path.join(designDir, 'DESIGN.md'), `# Design System — CivicPulse
214
+
215
+ ## Typography
216
+ Body: system-ui
217
+ `);
218
+
219
+ const result = await runSkillTest({
220
+ prompt: `Read design-consultation/SKILL.md for the design consultation workflow.
221
+
222
+ There is already a DESIGN.md in this repo. Update it with a complete design system for CivicPulse, a civic tech data platform for government employees.
223
+
224
+ Skip research. Skip font preview. Skip any AskUserQuestion calls — this is non-interactive.`,
225
+ workingDirectory: designDir,
226
+ maxTurns: 20,
227
+ timeout: 360_000,
228
+ testName: 'design-consultation-existing',
229
+ runId,
230
+ model: 'claude-opus-4-6',
231
+ });
232
+
233
+ logCost('/design-consultation existing', result);
234
+
235
+ const designPath = path.join(designDir, 'DESIGN.md');
236
+ const designExists = fs.existsSync(designPath);
237
+ let designContent = '';
238
+ if (designExists) {
239
+ designContent = fs.readFileSync(designPath, 'utf-8');
240
+ }
241
+
242
+ // Should have more content than the minimal version
243
+ const hasColor = designContent.toLowerCase().includes('color');
244
+ const hasSpacing = designContent.toLowerCase().includes('spacing');
245
+
246
+ recordE2E(evalCollector, '/design-consultation existing', 'Design Consultation E2E', result, {
247
+ passed: designExists && hasColor && hasSpacing && ['success', 'error_max_turns'].includes(result.exitReason),
248
+ });
249
+
250
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
251
+ expect(designExists).toBe(true);
252
+ if (designExists) {
253
+ expect(hasColor).toBe(true);
254
+ expect(hasSpacing).toBe(true);
255
+ }
256
+ }, 420_000);
257
+
258
+ testConcurrentIfSelected('design-consultation-preview', async () => {
259
+ // Test preview HTML generation only — no DESIGN.md (covered by core test)
260
+ const previewDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-preview-'));
261
+
262
+ const result = await runSkillTest({
263
+ prompt: `Generate a font and color preview page for a civic tech data platform.
264
+
265
+ The design system uses:
266
+ - Primary font: Cabinet Grotesk (headings), Source Sans 3 (body)
267
+ - Colors: #1B4D8E (civic blue), #C4501A (alert orange), #2D6A4F (success green)
268
+ - Neutral: #F8F7F6 (warm white), #1A1A1A (near black)
269
+
270
+ Write a single HTML file to ${previewDir}/design-preview.html that shows:
271
+ - Font specimens for each font at different sizes
272
+ - Color swatches with hex values
273
+ - A light/dark toggle
274
+ Do NOT write DESIGN.md — only the preview HTML.`,
275
+ workingDirectory: previewDir,
276
+ maxTurns: 8,
277
+ timeout: 90_000,
278
+ testName: 'design-consultation-preview',
279
+ runId,
280
+ });
281
+
282
+ logCost('/design-consultation preview', result);
283
+
284
+ const previewPath = path.join(previewDir, 'design-preview.html');
285
+ const previewExists = fs.existsSync(previewPath);
286
+ let previewContent = '';
287
+ if (previewExists) {
288
+ previewContent = fs.readFileSync(previewPath, 'utf-8');
289
+ }
290
+
291
+ const hasHtml = previewContent.includes('<html') || previewContent.includes('<!DOCTYPE');
292
+ const hasFontRef = previewContent.includes('font-family') || previewContent.includes('fonts.googleapis') || previewContent.includes('fonts.bunny');
293
+
294
+ recordE2E(evalCollector, '/design-consultation preview', 'Design Consultation E2E', result, {
295
+ passed: previewExists && hasHtml && ['success', 'error_max_turns'].includes(result.exitReason),
296
+ });
297
+
298
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
299
+ expect(previewExists).toBe(true);
300
+ if (previewExists) {
301
+ expect(hasHtml).toBe(true);
302
+ expect(hasFontRef).toBe(true);
303
+ }
304
+
305
+ try { fs.rmSync(previewDir, { recursive: true, force: true }); } catch {}
306
+ }, 120_000);
307
+ });
308
+
309
+ // --- Plan Design Review E2E (plan-mode) ---
310
+
311
+ describeIfSelected('Plan Design Review E2E', ['plan-design-review-plan-mode', 'plan-design-review-no-ui-scope'], () => {
312
+
313
+ /** Create an isolated tmpdir with git repo and plan-design-review skill */
314
+ function setupReviewDir(): string {
315
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-design-'));
316
+ const run = (cmd: string, args: string[]) =>
317
+ spawnSync(cmd, args, { cwd: dir, stdio: 'pipe', timeout: 5000 });
318
+
319
+ run('git', ['init', '-b', 'main']);
320
+ run('git', ['config', 'user.email', 'test@test.com']);
321
+ run('git', ['config', 'user.name', 'Test']);
322
+
323
+ // Copy plan-design-review skill
324
+ fs.mkdirSync(path.join(dir, 'plan-design-review'), { recursive: true });
325
+ fs.copyFileSync(
326
+ path.join(ROOT, 'plan-design-review', 'SKILL.md'),
327
+ path.join(dir, 'plan-design-review', 'SKILL.md'),
328
+ );
329
+
330
+ return dir;
331
+ }
332
+
333
+ testConcurrentIfSelected('plan-design-review-plan-mode', async () => {
334
+ const reviewDir = setupReviewDir();
335
+ try {
336
+ const run = (cmd: string, args: string[]) =>
337
+ spawnSync(cmd, args, { cwd: reviewDir, stdio: 'pipe', timeout: 5000 });
338
+
339
+ // Create a plan file with intentional design gaps
340
+ fs.writeFileSync(path.join(reviewDir, 'plan.md'), `# Plan: User Dashboard
341
+
342
+ ## Context
343
+ Build a user dashboard that shows account stats, recent activity, and settings.
344
+
345
+ ## Implementation
346
+ 1. Create a dashboard page at /dashboard
347
+ 2. Show user stats (posts, followers, engagement rate)
348
+ 3. Add a recent activity feed
349
+ 4. Add a settings panel
350
+ 5. Use a clean, modern UI with cards and icons
351
+ 6. Add a hero section at the top with a gradient background
352
+
353
+ ## Technical Details
354
+ - React components with Tailwind CSS
355
+ - API endpoint: GET /api/dashboard
356
+ - WebSocket for real-time activity updates
357
+ `);
358
+
359
+ run('git', ['add', '.']);
360
+ run('git', ['commit', '-m', 'initial plan']);
361
+
362
+ const result = await runSkillTest({
363
+ prompt: `Read plan-design-review/SKILL.md for the design review workflow.
364
+
365
+ Review the plan in ./plan.md. This plan has several design gaps — it uses vague language like "clean, modern UI" and "cards and icons", mentions a "hero section with gradient" (AI slop), and doesn't specify empty states, error states, loading states, responsive behavior, or accessibility.
366
+
367
+ Skip the preamble bash block. Skip any AskUserQuestion calls — this is non-interactive. Rate each design dimension 0-10 and explain what would make it a 10. Then EDIT plan.md to add the missing design decisions (interaction state table, empty states, responsive behavior, etc.).
368
+
369
+ IMPORTANT: Do NOT try to browse any URLs or use a browse binary. This is a plan review, not a live site audit. Just read the plan file, review it, and edit it to fix the gaps.`,
370
+ workingDirectory: reviewDir,
371
+ maxTurns: 15,
372
+ timeout: 300_000,
373
+ testName: 'plan-design-review-plan-mode',
374
+ runId,
375
+ });
376
+
377
+ logCost('/plan-design-review plan-mode', result);
378
+
379
+ // Check that the agent produced design ratings (0-10 scale)
380
+ const output = result.output || '';
381
+ const hasRatings = /\d+\/10/.test(output);
382
+ const hasDesignContent = output.toLowerCase().includes('information architecture') ||
383
+ output.toLowerCase().includes('interaction state') ||
384
+ output.toLowerCase().includes('ai slop') ||
385
+ output.toLowerCase().includes('hierarchy');
386
+
387
+ // Check that the plan file was edited (the core new behavior)
388
+ const planAfter = fs.readFileSync(path.join(reviewDir, 'plan.md'), 'utf-8');
389
+ const planOriginal = `# Plan: User Dashboard`;
390
+ const planWasEdited = planAfter.length > 300; // Original is ~450 chars, edited should be much longer
391
+ const planHasDesignAdditions = planAfter.toLowerCase().includes('empty') ||
392
+ planAfter.toLowerCase().includes('loading') ||
393
+ planAfter.toLowerCase().includes('error') ||
394
+ planAfter.toLowerCase().includes('state') ||
395
+ planAfter.toLowerCase().includes('responsive') ||
396
+ planAfter.toLowerCase().includes('accessibility');
397
+
398
+ recordE2E(evalCollector, '/plan-design-review plan-mode', 'Plan Design Review E2E', result, {
399
+ passed: hasDesignContent && planWasEdited && ['success', 'error_max_turns'].includes(result.exitReason),
400
+ });
401
+
402
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
403
+ // Agent should produce design-relevant output about the plan
404
+ expect(hasDesignContent).toBe(true);
405
+ // Agent should have edited the plan file to add missing design decisions
406
+ expect(planWasEdited).toBe(true);
407
+ expect(planHasDesignAdditions).toBe(true);
408
+ } finally {
409
+ try { fs.rmSync(reviewDir, { recursive: true, force: true }); } catch {}
410
+ }
411
+ }, 360_000);
412
+
413
+ testConcurrentIfSelected('plan-design-review-no-ui-scope', async () => {
414
+ const reviewDir = setupReviewDir();
415
+ try {
416
+ const run = (cmd: string, args: string[]) =>
417
+ spawnSync(cmd, args, { cwd: reviewDir, stdio: 'pipe', timeout: 5000 });
418
+
419
+ // Write a backend-only plan
420
+ fs.writeFileSync(path.join(reviewDir, 'backend-plan.md'), `# Plan: Database Migration
421
+
422
+ ## Context
423
+ Migrate user records from PostgreSQL to a new schema with better indexing.
424
+
425
+ ## Implementation
426
+ 1. Create migration to add new columns to users table
427
+ 2. Backfill data from legacy columns
428
+ 3. Add database indexes for common query patterns
429
+ 4. Update ActiveRecord models
430
+ 5. Run migration in staging first, then production
431
+ `);
432
+
433
+ run('git', ['add', '.']);
434
+ run('git', ['commit', '-m', 'initial plan']);
435
+
436
+ const result = await runSkillTest({
437
+ prompt: `Read plan-design-review/SKILL.md for the design review workflow.
438
+
439
+ Review the plan in ./backend-plan.md. This is a pure backend database migration plan with no UI changes.
440
+
441
+ Skip the preamble bash block. Skip any AskUserQuestion calls — this is non-interactive. Write your findings directly to stdout.
442
+
443
+ IMPORTANT: Do NOT try to browse any URLs or use a browse binary. This is a plan review, not a live site audit.`,
444
+ workingDirectory: reviewDir,
445
+ maxTurns: 10,
446
+ timeout: 180_000,
447
+ testName: 'plan-design-review-no-ui-scope',
448
+ runId,
449
+ });
450
+
451
+ logCost('/plan-design-review no-ui-scope', result);
452
+
453
+ // Agent should detect no UI scope and exit early
454
+ const output = result.output || '';
455
+ const detectsNoUI = output.toLowerCase().includes('no ui') ||
456
+ output.toLowerCase().includes('no frontend') ||
457
+ output.toLowerCase().includes('no design') ||
458
+ output.toLowerCase().includes('not applicable') ||
459
+ output.toLowerCase().includes('backend');
460
+
461
+ recordE2E(evalCollector, '/plan-design-review no-ui-scope', 'Plan Design Review E2E', result, {
462
+ passed: detectsNoUI && ['success', 'error_max_turns'].includes(result.exitReason),
463
+ });
464
+
465
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
466
+ expect(detectsNoUI).toBe(true);
467
+ } finally {
468
+ try { fs.rmSync(reviewDir, { recursive: true, force: true }); } catch {}
469
+ }
470
+ }, 240_000);
471
+ });
472
+
473
+ // --- Design Review E2E (live-site audit + fix) ---
474
+
475
+ describeIfSelected('Design Review E2E', ['design-review-fix'], () => {
476
+ let qaDesignDir: string;
477
+ let qaDesignServer: ReturnType<typeof Bun.serve> | null = null;
478
+
479
+ beforeAll(() => {
480
+ qaDesignDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-qa-design-'));
481
+ setupBrowseShims(qaDesignDir);
482
+
483
+ const run = (cmd: string, args: string[]) =>
484
+ spawnSync(cmd, args, { cwd: qaDesignDir, stdio: 'pipe', timeout: 5000 });
485
+
486
+ run('git', ['init', '-b', 'main']);
487
+ run('git', ['config', 'user.email', 'test@test.com']);
488
+ run('git', ['config', 'user.name', 'Test']);
489
+
490
+ // Create HTML/CSS with intentional design issues
491
+ fs.writeFileSync(path.join(qaDesignDir, 'index.html'), `<!DOCTYPE html>
492
+ <html lang="en">
493
+ <head>
494
+ <meta charset="utf-8">
495
+ <meta name="viewport" content="width=device-width, initial-scale=1">
496
+ <title>Design Test App</title>
497
+ <link rel="stylesheet" href="style.css">
498
+ </head>
499
+ <body>
500
+ <header>
501
+ <h1 style="font-size: 48px; color: #333;">Welcome</h1>
502
+ <h2 style="font-size: 47px; color: #334;">Subtitle Here</h2>
503
+ </header>
504
+ <main>
505
+ <div class="card" style="padding: 10px; margin: 20px;">
506
+ <h3 style="color: blue;">Card Title</h3>
507
+ <p style="color: #666; font-size: 14px; line-height: 1.2;">Some content here with tight line height.</p>
508
+ </div>
509
+ <div class="card" style="padding: 30px; margin: 5px;">
510
+ <h3 style="color: green;">Another Card</h3>
511
+ <p style="color: #999; font-size: 16px;">Different spacing and colors for no reason.</p>
512
+ </div>
513
+ <button style="background: red; color: white; padding: 5px 10px; border: none;">Click Me</button>
514
+ <button style="background: #007bff; color: white; padding: 12px 24px; border: none; border-radius: 20px;">Also Click</button>
515
+ </main>
516
+ </body>
517
+ </html>`);
518
+
519
+ fs.writeFileSync(path.join(qaDesignDir, 'style.css'), `body {
520
+ font-family: Arial, sans-serif;
521
+ margin: 0;
522
+ padding: 20px;
523
+ }
524
+ .card {
525
+ border: 1px solid #ddd;
526
+ border-radius: 4px;
527
+ }
528
+ `);
529
+
530
+ run('git', ['add', '.']);
531
+ run('git', ['commit', '-m', 'initial design test page']);
532
+
533
+ // Start a simple file server for the design test page
534
+ qaDesignServer = Bun.serve({
535
+ port: 0,
536
+ fetch(req) {
537
+ const url = new URL(req.url);
538
+ const filePath = path.join(qaDesignDir, url.pathname === '/' ? 'index.html' : url.pathname.slice(1));
539
+ try {
540
+ const content = fs.readFileSync(filePath);
541
+ const ext = path.extname(filePath);
542
+ const contentType = ext === '.css' ? 'text/css' : ext === '.html' ? 'text/html' : 'text/plain';
543
+ return new Response(content, { headers: { 'Content-Type': contentType } });
544
+ } catch {
545
+ return new Response('Not Found', { status: 404 });
546
+ }
547
+ },
548
+ });
549
+
550
+ // Copy design-review skill
551
+ fs.mkdirSync(path.join(qaDesignDir, 'design-review'), { recursive: true });
552
+ fs.copyFileSync(
553
+ path.join(ROOT, 'design-review', 'SKILL.md'),
554
+ path.join(qaDesignDir, 'design-review', 'SKILL.md'),
555
+ );
556
+ });
557
+
558
+ afterAll(() => {
559
+ qaDesignServer?.stop();
560
+ try { fs.rmSync(qaDesignDir, { recursive: true, force: true }); } catch {}
561
+ });
562
+
563
+ test('Test 7: /design-review audits and fixes design issues', async () => {
564
+ const serverUrl = `http://localhost:${(qaDesignServer as any)?.port}`;
565
+
566
+ const result = await runSkillTest({
567
+ prompt: `IMPORTANT: The browse binary is already assigned below as B. Do NOT search for it or run the SKILL.md setup block — just use $B directly.
568
+
569
+ B="${browseBin}"
570
+
571
+ Read design-review/SKILL.md for the design review + fix workflow.
572
+
573
+ Review the site at ${serverUrl}. Use --quick mode. Skip any AskUserQuestion calls — this is non-interactive. Fix up to 3 issues max. Write your report to ./design-audit.md.`,
574
+ workingDirectory: qaDesignDir,
575
+ maxTurns: 30,
576
+ timeout: 360_000,
577
+ testName: 'design-review-fix',
578
+ runId,
579
+ });
580
+
581
+ logCost('/design-review fix', result);
582
+
583
+ const reportPath = path.join(qaDesignDir, 'design-audit.md');
584
+ const reportExists = fs.existsSync(reportPath);
585
+
586
+ // Check if any design fix commits were made
587
+ const gitLog = spawnSync('git', ['log', '--oneline'], {
588
+ cwd: qaDesignDir, stdio: 'pipe',
589
+ });
590
+ const commits = gitLog.stdout.toString().trim().split('\n');
591
+ const designFixCommits = commits.filter((c: string) => c.includes('style(design)'));
592
+
593
+ recordE2E(evalCollector, '/design-review fix', 'Design Review E2E', result, {
594
+ passed: ['success', 'error_max_turns'].includes(result.exitReason),
595
+ });
596
+
597
+ // Accept error_max_turns — the fix loop is complex
598
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
599
+
600
+ // Report and commits are best-effort — log what happened
601
+ if (reportExists) {
602
+ const report = fs.readFileSync(reportPath, 'utf-8');
603
+ console.log(`Design audit report: ${report.length} chars`);
604
+ } else {
605
+ console.warn('No design-audit.md generated');
606
+ }
607
+ console.log(`Design fix commits: ${designFixCommits.length}`);
608
+ }, 420_000);
609
+ });
610
+
611
+ // Module-level afterAll — finalize eval collector after all tests complete
612
+ afterAll(async () => {
613
+ await finalizeEvalCollector(evalCollector);
614
+ });