@agjs/tsforge 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agjs/tsforge",
3
3
  "type": "module",
4
- "version": "0.2.0",
4
+ "version": "0.2.2",
5
5
  "license": "MIT",
6
6
  "description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
7
7
  "repository": {
@@ -19,7 +19,8 @@
19
19
  "src",
20
20
  "scripts",
21
21
  "strict.eslint.config.mjs",
22
- "strict.web.eslint.config.mjs"
22
+ "strict.web.eslint.config.mjs",
23
+ "strict.type-aware.eslint.config.mjs"
23
24
  ],
24
25
  "engines": {
25
26
  "bun": ">=1.3.14"
@@ -5,27 +5,59 @@
5
5
  //
6
6
  // bun browser-check.ts <htmlFile> # render-only (no errors)
7
7
  // bun browser-check.ts <htmlFile> --smoke # render + generic behaviour smoke
8
+ // bun browser-check.ts <htmlFile> --a11y # + axe accessibility (serious/critical fail)
9
+ // bun browser-check.ts <htmlFile> --screenshots[=dir] # + per-route PNGs (artifact)
10
+ // bun browser-check.ts <htmlFile> --perf # + a basic DOM-size/mount-time budget
8
11
  // bun browser-check.ts <htmlFile> <checks.json> # render + interaction checks
9
12
  // bun browser-check.ts <htmlFile> <selector> [text]
10
13
  import { readdir } from "node:fs/promises";
11
14
  import { dirname, join } from "node:path";
12
- import { renderCheck, parseChecks, type IRenderOptions } from "../src/browser";
15
+ import {
16
+ renderCheck,
17
+ parseChecks,
18
+ type IRenderOptions,
19
+ type IPerfBudget,
20
+ } from "../src/browser";
13
21
  import { crawlableRoutePaths } from "../src/web-routes";
14
22
 
15
23
  const rawArgs = process.argv.slice(2);
16
24
  const smoke = rawArgs.includes("--smoke");
17
25
  const crawl = rawArgs.includes("--crawl");
18
- const [file, arg2, arg3] = rawArgs.filter(
19
- (a) => a !== "--smoke" && a !== "--crawl"
20
- );
26
+ const a11y = rawArgs.includes("--a11y");
27
+ const perf = rawArgs.includes("--perf");
28
+ const screenshotsArg = rawArgs.find((a) => a.startsWith("--screenshots"));
29
+ // Positionals are anything that isn't a recognized `--flag`.
30
+ const [file, arg2, arg3] = rawArgs.filter((a) => !a.startsWith("--"));
21
31
 
22
32
  if (file === undefined) {
23
33
  process.stderr.write(
24
- "usage: browser-check.ts <htmlFile> [--smoke] [--crawl] [checks.json | selector [text]]\n"
34
+ "usage: browser-check.ts <htmlFile> [--smoke] [--crawl] [--a11y] " +
35
+ "[--screenshots[=dir]] [--perf] [checks.json | selector [text]]\n"
25
36
  );
26
37
  process.exit(2);
27
38
  }
28
39
 
40
+ /** A conservative default budget — a tripwire for runaway render trees / slow
41
+ * mounts, not a tuned Lighthouse target. */
42
+ const DEFAULT_PERF_BUDGET: IPerfBudget = {
43
+ maxDomNodes: 5000,
44
+ maxMountMs: 6000,
45
+ };
46
+
47
+ /** The screenshot dir: `--screenshots=<dir>`, else a `screenshots/` folder next
48
+ * to the HTML file. undefined when `--screenshots` wasn't passed. */
49
+ function screenshotDir(): string | undefined {
50
+ if (screenshotsArg === undefined) {
51
+ return undefined;
52
+ }
53
+
54
+ const eq = screenshotsArg.indexOf("=");
55
+
56
+ return eq === -1
57
+ ? join(dirname(file ?? "."), "screenshots")
58
+ : screenshotsArg.slice(eq + 1);
59
+ }
60
+
29
61
  /** With --crawl, enumerate the app's static routes from `<buildDir>/src/routes/`
30
62
  * (the build dir is the parent of dist/) so every page — not just the home —
31
63
  * is render-checked. Dynamic ($param) routes are skipped. */
@@ -66,10 +98,14 @@ async function checksFor(): Promise<Partial<IRenderOptions>> {
66
98
  };
67
99
  }
68
100
 
101
+ const shots = screenshotDir();
69
102
  const result = await renderCheck({
70
103
  file,
71
104
  smoke,
105
+ a11y,
72
106
  routes: await routesFor(),
107
+ ...(perf ? { perfBudget: DEFAULT_PERF_BUDGET } : {}),
108
+ ...(shots !== undefined ? { screenshotDir: shots } : {}),
73
109
  ...(await checksFor()),
74
110
  });
75
111
 
@@ -10,6 +10,7 @@ import { readdir } from "node:fs/promises";
10
10
  import { homedir } from "node:os";
11
11
  import { join } from "node:path";
12
12
  import { isRecord } from "../src/lib/guards";
13
+ import { classifyRun, parseEventLog } from "../src/eval";
13
14
 
14
15
  function num(value: unknown): number {
15
16
  return typeof value === "number" ? value : 0;
@@ -168,6 +169,9 @@ async function main(): Promise<void> {
168
169
  const text = await Bun.file(path).text();
169
170
  const lines = text.split("\n").filter((l) => l.trim().length > 0);
170
171
  const m = analyze(lines);
172
+ // Single source of truth for WHY a run failed — the same classifier the eval
173
+ // sweep and the reusable analyzeEvents() use, fed the typed event stream.
174
+ const failure = classifyRun(parseEventLog(text));
171
175
  const pct =
172
176
  m.contextWindow > 0
173
177
  ? Math.round((m.peakContext / m.contextWindow) * 100)
@@ -182,6 +186,12 @@ async function main(): Promise<void> {
182
186
  ["model", m.model],
183
187
  ["context window", String(m.contextWindow)],
184
188
  ["final status", m.finalStatus],
189
+ [
190
+ "failure class",
191
+ failure.detail === undefined
192
+ ? failure.failureClass
193
+ : `${failure.failureClass} (${failure.detail})`,
194
+ ],
185
195
  ["turns (repair iterations)", String(m.turns)],
186
196
  ["model calls", String(m.modelCalls)],
187
197
  ["tokens out (→ solution)", String(m.tokensOut)],
package/scripts/sweep.ts CHANGED
@@ -6,13 +6,19 @@
6
6
  import { mkdir, readdir, rm, stat } from "node:fs/promises";
7
7
  import { join } from "node:path";
8
8
  import { parseSpec } from "../src/spec";
9
- import { buildGate, prettierWriteCommand } from "../src/detect-gate";
9
+ import { buildGate, buildCoreFix } from "../src/detect-gate";
10
10
  import { runSpec, qualityRepair } from "../src/loop";
11
11
  import { modelAgent } from "../src/agent";
12
12
  import { OpenAICompatibleProvider } from "../src/inference";
13
13
  import { resolveActiveModel, resolveApiKey } from "../src/models-config";
14
14
  import { providerConfig } from "../src/cli";
15
- import { summarize, type IRunRecord } from "../src/eval";
15
+ import {
16
+ summarize,
17
+ classifyRun,
18
+ renderSweepReportMarkdown,
19
+ buildSweepReport,
20
+ type IRunRecord,
21
+ } from "../src/eval";
16
22
  import { renderEvent } from "../src/render";
17
23
  import type { ILoopEvent } from "../src/loop";
18
24
 
@@ -252,24 +258,33 @@ async function runOne(
252
258
  // (an unguarded index access, an `as any`) slipped through as GREEN. Now
253
259
  // every task and the whole-spec verify must clear the strict floor BEFORE
254
260
  // its functional tests count.
255
- // prettier --write FIRST (auto-format), then tsc-strict + eslint. The model
256
- // never hand-formats, but the gate still enforces type-safety + idioms.
257
- const strictGate = `${prettierWriteCommand()} && ${(await buildGate(runDir)).command}`;
261
+ // buildCoreFix (eslint --fix + prettier) runs as task.fix before each gate
262
+ // check same janitor as the interactive CLI so padding-line, prefer-const,
263
+ // etc. are squashed without model turns.
264
+ const gateCommand = (await buildGate(runDir)).command;
265
+ const fixCommand = buildCoreFix();
258
266
  const gatedSpec = {
259
267
  ...spec,
260
268
  tasks: spec.tasks.map((t) => ({
261
269
  ...t,
262
- accept: `${strictGate} && ${t.accept}`,
270
+ fix: fixCommand,
271
+ accept: `${gateCommand} && ${t.accept}`,
263
272
  })),
264
273
  verify:
265
- spec.verify.length > 0 ? `${strictGate} && ${spec.verify}` : strictGate,
274
+ spec.verify.length > 0
275
+ ? `${gateCommand} && ${spec.verify}`
276
+ : gateCommand,
266
277
  };
267
278
 
268
279
  // Every run gets a full transcript at <runDir>/run.log; stream to the
269
280
  // terminal too when TSFORGE_STREAM=1.
270
281
  const log = Bun.file(join(runDir, "run.log")).writer();
282
+ // Keep the structured events so a failed run can be classified (WHY it
283
+ // failed), not just counted — fed to classifyRun below.
284
+ const runEvents: ILoopEvent[] = [];
271
285
 
272
286
  const onEvent = (e: ILoopEvent): void => {
287
+ runEvents.push(e);
273
288
  void log.write(renderEvent(e, { color: false }));
274
289
  // Flush per event — otherwise Bun's FileSink buffers and `tail -f` shows
275
290
  // nothing until the run ends. The log must be live.
@@ -359,6 +374,9 @@ async function runOne(
359
374
  );
360
375
 
361
376
  const vLabel = variantLabel(variantEnv);
377
+ const failureClass = passed
378
+ ? undefined
379
+ : classifyRun(runEvents).failureClass;
362
380
 
363
381
  records.push({
364
382
  label: `${vLabel} temp=${temp}`,
@@ -366,9 +384,10 @@ async function runOne(
366
384
  cycles,
367
385
  ms,
368
386
  quality,
387
+ ...(failureClass === undefined ? {} : { failureClass }),
369
388
  });
370
389
  process.stdout.write(
371
- ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : "blocked"} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}) → ${runId}\n`
390
+ ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : `blocked[${failureClass ?? "unknown"}]`} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}) → ${runId}\n`
372
391
  );
373
392
  } finally {
374
393
  restore();
@@ -380,11 +399,22 @@ const summaries = summarize(records);
380
399
  process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
381
400
 
382
401
  for (const s of summaries) {
402
+ const failures = Object.entries(s.failureClasses)
403
+ .sort(([, a], [, b]) => b - a)
404
+ .map(([cls, n]) => `${cls}×${String(n)}`)
405
+ .join(", ");
406
+
383
407
  process.stdout.write(
384
- `${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms\n`
408
+ `${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms${failures.length > 0 ? ` [${failures}]` : ""}\n`
385
409
  );
386
410
  }
387
411
 
412
+ // The statistical report (Wilson CI + z-test vs baseline) now also tabulates a
413
+ // per-variant failure-class breakdown — WHY runs failed, not just how often.
414
+ process.stdout.write(
415
+ `\n${renderSweepReportMarkdown(buildSweepReport(records))}\n`
416
+ );
417
+
388
418
  const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
389
419
 
390
420
  await Bun.write(
@@ -1,8 +1,11 @@
1
1
  export {
2
2
  renderCheck,
3
+ summarizeAxeViolations,
4
+ checkPerfBudget,
3
5
  type IRenderOptions,
4
6
  type IRenderExpect,
5
7
  type IRenderResult,
6
8
  type IStep,
9
+ type IPerfBudget,
7
10
  } from "./oracle";
8
11
  export { parseChecks } from "./checks";
@@ -1,4 +1,5 @@
1
1
  import { resolve, dirname, basename, join } from "node:path";
2
+ import { isRecord } from "../lib/guards";
2
3
  // `playwright` is an OPTIONAL peer: bundling it (+ a browser binary) into every
3
4
  // install is too heavy, so the import is dynamic and the render-check skips when
4
5
  // it's absent. The type-only import is erased at runtime, so it can't crash a
@@ -14,6 +15,20 @@ async function loadChromium(): Promise<typeof Chromium | null> {
14
15
  }
15
16
  }
16
17
 
18
+ /** Run axe against a page and return its raw result; null when @axe-core/
19
+ * playwright isn't installed (a11y is an optional enhancement, like the browser
20
+ * itself). Kept untyped at the boundary — extractAxeViolations narrows it. */
21
+ async function runAxe(page: Page): Promise<unknown> {
22
+ try {
23
+ const mod = await import("@axe-core/playwright");
24
+ const builder = new mod.AxeBuilder({ page });
25
+
26
+ return await builder.analyze();
27
+ } catch {
28
+ return null;
29
+ }
30
+ }
31
+
17
32
  /**
18
33
  * The browser oracle — renders a built web page in headless chromium and reports
19
34
  * whether it actually WORKS, beyond what tsc/eslint can see: it fails on uncaught
@@ -55,22 +70,121 @@ export interface IRenderOptions {
55
70
  * single-page smoke misses them. Served with SPA fallback so the client
56
71
  * router handles the path. Empty/undefined → no crawl (unchanged behavior). */
57
72
  routes?: string[];
73
+ /** Run axe accessibility checks on the page (and each crawled route). Serious
74
+ * and critical violations become gate errors; minor/moderate are skipped.
75
+ * Skipped gracefully when @axe-core/playwright isn't installed. */
76
+ a11y?: boolean;
77
+ /** Directory to write a screenshot per page/route into (desktop + mobile
78
+ * viewports). An artifact for human/visual review — never a pass/fail signal. */
79
+ screenshotDir?: string;
80
+ /** A perf budget (DOM node count + mount time) checked on the initial page. */
81
+ perfBudget?: IPerfBudget;
58
82
  /** Navigation timeout (default 15s). */
59
83
  timeoutMs?: number;
60
84
  }
61
85
 
86
+ /** Screenshot viewports — a desktop and a mobile pass per page. */
87
+ const VIEWPORTS = [
88
+ { name: "desktop", width: 1280, height: 800 },
89
+ { name: "mobile", width: 390, height: 844 },
90
+ ] as const;
91
+
62
92
  export interface IRenderResult {
63
93
  ok: boolean;
64
94
  /** Human-readable failures (console errors, page errors, missing content). */
65
95
  errors: string[];
66
96
  /** True when the check was skipped because playwright isn't installed. */
67
97
  skipped?: boolean;
98
+ /** Paths of screenshots captured (when `screenshotDir` was set). */
99
+ screenshots?: string[];
100
+ }
101
+
102
+ /** A simple performance budget: fail the render when the built app blows past
103
+ * these. Intentionally minimal (no full Lighthouse) — a tripwire, not a profiler. */
104
+ export interface IPerfBudget {
105
+ /** Max total DOM nodes after load (a proxy for over-heavy render trees). */
106
+ maxDomNodes?: number;
107
+ /** Max time from navigation start to DOMContentLoaded, in ms. */
108
+ maxMountMs?: number;
109
+ }
110
+
111
+ /** axe impact levels that FAIL the a11y check — minor/moderate are reported by
112
+ * axe but don't gate (too noisy to block a build on). */
113
+ const AXE_FAIL_IMPACTS = new Set(["serious", "critical"]);
114
+
115
+ /** The subset of an axe violation the oracle reports on. */
116
+ interface IAxeViolation {
117
+ id: string;
118
+ impact: string | undefined;
119
+ nodeCount: number;
120
+ }
121
+
122
+ /** Extract the reportable violations from axe's (untyped, dynamically-imported)
123
+ * result — narrowed with guards, no casts. */
124
+ function extractAxeViolations(result: unknown): IAxeViolation[] {
125
+ if (!isRecord(result) || !Array.isArray(result.violations)) {
126
+ return [];
127
+ }
128
+
129
+ const out: IAxeViolation[] = [];
130
+
131
+ for (const v of result.violations) {
132
+ if (!isRecord(v) || typeof v.id !== "string") {
133
+ continue;
134
+ }
135
+
136
+ out.push({
137
+ id: v.id,
138
+ impact: typeof v.impact === "string" ? v.impact : undefined,
139
+ nodeCount: Array.isArray(v.nodes) ? v.nodes.length : 0,
140
+ });
141
+ }
142
+
143
+ return out;
144
+ }
145
+
146
+ /** Turn axe violations into gate errors — only serious/critical fail. Pure. */
147
+ export function summarizeAxeViolations(
148
+ violations: readonly IAxeViolation[],
149
+ where: string
150
+ ): string[] {
151
+ return violations
152
+ .filter((v) => v.impact !== undefined && AXE_FAIL_IMPACTS.has(v.impact))
153
+ .map(
154
+ (v) =>
155
+ `a11y ${v.impact ?? "?"} at ${where}: ${v.id} (${String(v.nodeCount)} node(s))`
156
+ );
157
+ }
158
+
159
+ /** Evaluate a perf budget against measured values → gate errors. Pure. */
160
+ export function checkPerfBudget(
161
+ domNodes: number,
162
+ mountMs: number,
163
+ budget: IPerfBudget,
164
+ where: string
165
+ ): string[] {
166
+ const errors: string[] = [];
167
+
168
+ if (budget.maxDomNodes !== undefined && domNodes > budget.maxDomNodes) {
169
+ errors.push(
170
+ `perf at ${where}: ${String(domNodes)} DOM nodes > budget ${String(budget.maxDomNodes)}`
171
+ );
172
+ }
173
+
174
+ if (budget.maxMountMs !== undefined && mountMs > budget.maxMountMs) {
175
+ errors.push(
176
+ `perf at ${where}: mount ${String(Math.round(mountMs))}ms > budget ${String(budget.maxMountMs)}ms`
177
+ );
178
+ }
179
+
180
+ return errors;
68
181
  }
69
182
 
70
183
  export async function renderCheck(
71
184
  opts: IRenderOptions
72
185
  ): Promise<IRenderResult> {
73
186
  const errors: string[] = [];
187
+ const screenshots: string[] = [];
74
188
  const chromium = await loadChromium();
75
189
 
76
190
  // No playwright → skip the render check rather than fail the gate. The build
@@ -87,7 +201,10 @@ export async function renderCheck(
87
201
  const browser = await chromium.launch({ args: ["--no-sandbox"] });
88
202
 
89
203
  try {
90
- const page = await browser.newPage();
204
+ // Page via an explicit context (not browser.newPage()) — axe-core/playwright
205
+ // requires a context-owned page; browser.close() tears the context down too.
206
+ const context = await browser.newContext();
207
+ const page = await context.newPage();
91
208
  const timeout = opts.timeoutMs ?? 15_000;
92
209
 
93
210
  page.on("console", (message) => {
@@ -113,30 +230,39 @@ export async function renderCheck(
113
230
  waitUntil: "load",
114
231
  timeout,
115
232
  });
116
- await runChecks(page, opts, errors);
233
+ await runChecks(page, opts, errors, screenshots);
117
234
 
118
235
  if (opts.routes !== undefined && opts.routes.length > 0) {
119
- await crawlRoutes(page, base, opts.routes, errors, timeout);
236
+ await crawlRoutes(page, base, opts.routes, errors, timeout, {
237
+ opts,
238
+ screenshots,
239
+ });
120
240
  }
121
241
  } finally {
122
242
  await server.stop(true);
123
243
  }
124
244
  } else {
125
245
  await page.setContent(opts.html ?? "", { waitUntil: "load", timeout });
126
- await runChecks(page, opts, errors);
246
+ await runChecks(page, opts, errors, screenshots);
127
247
  }
128
248
 
129
- return { ok: errors.length === 0, errors };
249
+ return {
250
+ ok: errors.length === 0,
251
+ errors,
252
+ ...(screenshots.length > 0 ? { screenshots } : {}),
253
+ };
130
254
  } finally {
131
255
  await browser.close();
132
256
  }
133
257
  }
134
258
 
135
- /** The expectation + step + smoke checks that run against the loaded page. */
259
+ /** The expectation + step + smoke checks that run against the loaded page, then
260
+ * the optional quality oracles (a11y, perf budget, screenshots). */
136
261
  async function runChecks(
137
262
  page: Page,
138
263
  opts: IRenderOptions,
139
- errors: string[]
264
+ errors: string[],
265
+ screenshots: string[]
140
266
  ): Promise<void> {
141
267
  await checkExpectations(page, opts.expect, errors);
142
268
 
@@ -147,6 +273,76 @@ async function runChecks(
147
273
  if (opts.smoke === true) {
148
274
  await runSmoke(page, errors);
149
275
  }
276
+
277
+ await runQualityOracles(page, opts, "index", errors, screenshots);
278
+ }
279
+
280
+ /** The opt-in quality layer: accessibility (axe), a perf budget, and screenshots.
281
+ * Each is independent and skips cleanly when not requested / dep absent. */
282
+ async function runQualityOracles(
283
+ page: Page,
284
+ opts: IRenderOptions,
285
+ where: string,
286
+ errors: string[],
287
+ screenshots: string[]
288
+ ): Promise<void> {
289
+ if (opts.a11y === true) {
290
+ const violations = extractAxeViolations(await runAxe(page));
291
+
292
+ errors.push(...summarizeAxeViolations(violations, where));
293
+ }
294
+
295
+ if (opts.perfBudget !== undefined) {
296
+ const { domNodes, mountMs } = await measurePage(page);
297
+
298
+ errors.push(...checkPerfBudget(domNodes, mountMs, opts.perfBudget, where));
299
+ }
300
+
301
+ if (opts.screenshotDir !== undefined) {
302
+ await capturePage(page, opts.screenshotDir, where, screenshots);
303
+ }
304
+ }
305
+
306
+ /** Measure DOM size + mount time for the perf budget. */
307
+ async function measurePage(
308
+ page: Page
309
+ ): Promise<{ domNodes: number; mountMs: number }> {
310
+ return page.evaluate(() => {
311
+ const nav = performance.getEntriesByType("navigation")[0];
312
+ const mountMs =
313
+ nav instanceof PerformanceNavigationTiming
314
+ ? nav.domContentLoadedEventEnd - nav.startTime
315
+ : 0;
316
+
317
+ return { domNodes: document.querySelectorAll("*").length, mountMs };
318
+ });
319
+ }
320
+
321
+ /** Filesystem-safe label for a route (e.g. "/a/b" → "a-b", "/" → "index"). */
322
+ function routeLabel(route: string): string {
323
+ const cleaned = route.replace(/^\/+|\/+$/g, "").replace(/\//g, "-");
324
+
325
+ return cleaned.length === 0 ? "index" : cleaned;
326
+ }
327
+
328
+ /** Capture a desktop + mobile screenshot of the current page into `dir`. */
329
+ async function capturePage(
330
+ page: Page,
331
+ dir: string,
332
+ label: string,
333
+ screenshots: string[]
334
+ ): Promise<void> {
335
+ for (const vp of VIEWPORTS) {
336
+ const path = join(dir, `${label}-${vp.name}.png`);
337
+
338
+ try {
339
+ await page.setViewportSize({ width: vp.width, height: vp.height });
340
+ await page.screenshot({ path, fullPage: true });
341
+ screenshots.push(path);
342
+ } catch {
343
+ // A screenshot is a best-effort artifact, never a gate failure.
344
+ }
345
+ }
150
346
  }
151
347
 
152
348
  /** Serve a directory on an ephemeral localhost port. SPA FALLBACK: an
@@ -187,7 +383,8 @@ async function crawlRoutes(
187
383
  base: string,
188
384
  routes: readonly string[],
189
385
  errors: string[],
190
- timeout: number
386
+ timeout: number,
387
+ quality: { opts: IRenderOptions; screenshots: string[] }
191
388
  ): Promise<void> {
192
389
  for (const route of routes) {
193
390
  try {
@@ -207,7 +404,17 @@ async function crawlRoutes(
207
404
 
208
405
  if (blank) {
209
406
  errors.push(`route ${route} rendered blank`);
407
+ continue;
210
408
  }
409
+
410
+ // a11y + screenshots per route (perf budget stays an initial-page check).
411
+ await runQualityOracles(
412
+ page,
413
+ { ...quality.opts, perfBudget: undefined },
414
+ routeLabel(route),
415
+ errors,
416
+ quality.screenshots
417
+ );
211
418
  } catch (error) {
212
419
  errors.push(
213
420
  `route ${route} failed to load: ${error instanceof Error ? error.message : String(error)}`
package/src/cli.ts CHANGED
@@ -38,6 +38,7 @@ import {
38
38
  buildGate,
39
39
  buildWebGate,
40
40
  buildWebFix,
41
+ buildCoreFix,
41
42
  buildWebTypeGate,
42
43
  buildWebTscCheck,
43
44
  scaffoldWeb,
@@ -102,11 +103,15 @@ export interface ICliArgs {
102
103
  /** Plan mode: a from-scratch build pauses after the design phase to show its
103
104
  * plan for review/edit before implementing (`--plan`; also toggled by /plan). */
104
105
  plan: boolean;
106
+ /** Keep the auto-gate at the strict TS floor only — do NOT append the
107
+ * project's discovered tests (`--strict-floor-only`). By default the auto-gate
108
+ * also runs the project's tests, so "green" means floor + tests pass. */
109
+ strictFloorOnly: boolean;
105
110
  }
106
111
 
107
112
  const BOOL_FLAGS: Record<
108
113
  string,
109
- "continue" | "noGate" | "web" | "log" | "plan"
114
+ "continue" | "noGate" | "web" | "log" | "plan" | "strictFloorOnly"
110
115
  > = {
111
116
  "--continue": "continue",
112
117
  "-c": "continue",
@@ -114,6 +119,7 @@ const BOOL_FLAGS: Record<
114
119
  "--web": "web",
115
120
  "--log": "log",
116
121
  "--plan": "plan",
122
+ "--strict-floor-only": "strictFloorOnly",
117
123
  };
118
124
 
119
125
  const VALUE_FLAGS = new Set([
@@ -140,6 +146,7 @@ export function parseArgs(argv: readonly string[]): ICliArgs {
140
146
  web: false,
141
147
  log: false,
142
148
  plan: false,
149
+ strictFloorOnly: false,
143
150
  };
144
151
 
145
152
  for (let i = 0; i < argv.length; i += 1) {
@@ -812,7 +819,13 @@ async function baseGate(
812
819
  args.dir,
813
820
  activePacks,
814
821
  Object.keys(ruleOverrides).length > 0 ? ruleOverrides : undefined,
815
- { enableTypeAware: profile === "strict" }
822
+ {
823
+ enableTypeAware: profile === "strict",
824
+ // "Green" should mean the strict floor AND the project's own tests pass —
825
+ // not just that it type-checks and lints. discoverTestCommand appends them
826
+ // only when the project actually has tests; --strict-floor-only opts out.
827
+ includeTests: !args.strictFloorOnly,
828
+ }
816
829
  );
817
830
 
818
831
  return { accept: auto.command, gateLabel: auto.label };
@@ -891,7 +904,7 @@ async function repl(args: ICliArgs): Promise<number> {
891
904
  fix: buildWebFix("react"),
892
905
  incrementalCheck: buildWebTscCheck(),
893
906
  }
894
- : { scaffoldWeb: true }),
907
+ : { scaffoldWeb: true, fix: buildCoreFix() }),
895
908
  ...(thinkingTokenBudget === undefined ? {} : { thinkingTokenBudget }),
896
909
  ...(autoCompactAt === undefined ? {} : { autoCompactAt }),
897
910
  // Thinking OFF for interactive replies so they STREAM immediately instead of