@agjs/tsforge 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/scripts/browser-check.ts +41 -5
- package/scripts/cli-metrics.ts +10 -0
- package/scripts/sweep.ts +39 -9
- package/src/browser/index.ts +3 -0
- package/src/browser/oracle.ts +215 -8
- package/src/cli.ts +16 -3
- package/src/detect-gate.ts +127 -13
- package/src/eval/eval.types.ts +9 -0
- package/src/eval/failure-class.ts +263 -0
- package/src/eval/index.ts +8 -0
- package/src/eval/metrics.ts +7 -0
- package/src/eval/parse-log.ts +105 -0
- package/src/eval/report.ts +19 -0
- package/src/eval/score.ts +10 -0
- package/src/loop/loop.types.ts +4 -0
- package/src/loop/turn.ts +3 -0
- package/strict.eslint.config.mjs +24 -1
- package/strict.type-aware.eslint.config.mjs +33 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agjs/tsforge",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.2.
|
|
4
|
+
"version": "0.2.2",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
|
|
7
7
|
"repository": {
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
"src",
|
|
20
20
|
"scripts",
|
|
21
21
|
"strict.eslint.config.mjs",
|
|
22
|
-
"strict.web.eslint.config.mjs"
|
|
22
|
+
"strict.web.eslint.config.mjs",
|
|
23
|
+
"strict.type-aware.eslint.config.mjs"
|
|
23
24
|
],
|
|
24
25
|
"engines": {
|
|
25
26
|
"bun": ">=1.3.14"
|
package/scripts/browser-check.ts
CHANGED
|
@@ -5,27 +5,59 @@
|
|
|
5
5
|
//
|
|
6
6
|
// bun browser-check.ts <htmlFile> # render-only (no errors)
|
|
7
7
|
// bun browser-check.ts <htmlFile> --smoke # render + generic behaviour smoke
|
|
8
|
+
// bun browser-check.ts <htmlFile> --a11y # + axe accessibility (serious/critical fail)
|
|
9
|
+
// bun browser-check.ts <htmlFile> --screenshots[=dir] # + per-route PNGs (artifact)
|
|
10
|
+
// bun browser-check.ts <htmlFile> --perf # + a basic DOM-size/mount-time budget
|
|
8
11
|
// bun browser-check.ts <htmlFile> <checks.json> # render + interaction checks
|
|
9
12
|
// bun browser-check.ts <htmlFile> <selector> [text]
|
|
10
13
|
import { readdir } from "node:fs/promises";
|
|
11
14
|
import { dirname, join } from "node:path";
|
|
12
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
renderCheck,
|
|
17
|
+
parseChecks,
|
|
18
|
+
type IRenderOptions,
|
|
19
|
+
type IPerfBudget,
|
|
20
|
+
} from "../src/browser";
|
|
13
21
|
import { crawlableRoutePaths } from "../src/web-routes";
|
|
14
22
|
|
|
15
23
|
const rawArgs = process.argv.slice(2);
|
|
16
24
|
const smoke = rawArgs.includes("--smoke");
|
|
17
25
|
const crawl = rawArgs.includes("--crawl");
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
);
|
|
26
|
+
const a11y = rawArgs.includes("--a11y");
|
|
27
|
+
const perf = rawArgs.includes("--perf");
|
|
28
|
+
const screenshotsArg = rawArgs.find((a) => a.startsWith("--screenshots"));
|
|
29
|
+
// Positionals are anything that isn't a recognized `--flag`.
|
|
30
|
+
const [file, arg2, arg3] = rawArgs.filter((a) => !a.startsWith("--"));
|
|
21
31
|
|
|
22
32
|
if (file === undefined) {
|
|
23
33
|
process.stderr.write(
|
|
24
|
-
"usage: browser-check.ts <htmlFile> [--smoke] [--crawl] [
|
|
34
|
+
"usage: browser-check.ts <htmlFile> [--smoke] [--crawl] [--a11y] " +
|
|
35
|
+
"[--screenshots[=dir]] [--perf] [checks.json | selector [text]]\n"
|
|
25
36
|
);
|
|
26
37
|
process.exit(2);
|
|
27
38
|
}
|
|
28
39
|
|
|
40
|
+
/** A conservative default budget — a tripwire for runaway render trees / slow
|
|
41
|
+
* mounts, not a tuned Lighthouse target. */
|
|
42
|
+
const DEFAULT_PERF_BUDGET: IPerfBudget = {
|
|
43
|
+
maxDomNodes: 5000,
|
|
44
|
+
maxMountMs: 6000,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/** The screenshot dir: `--screenshots=<dir>`, else a `screenshots/` folder next
|
|
48
|
+
* to the HTML file. undefined when `--screenshots` wasn't passed. */
|
|
49
|
+
function screenshotDir(): string | undefined {
|
|
50
|
+
if (screenshotsArg === undefined) {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const eq = screenshotsArg.indexOf("=");
|
|
55
|
+
|
|
56
|
+
return eq === -1
|
|
57
|
+
? join(dirname(file ?? "."), "screenshots")
|
|
58
|
+
: screenshotsArg.slice(eq + 1);
|
|
59
|
+
}
|
|
60
|
+
|
|
29
61
|
/** With --crawl, enumerate the app's static routes from `<buildDir>/src/routes/`
|
|
30
62
|
* (the build dir is the parent of dist/) so every page — not just the home —
|
|
31
63
|
* is render-checked. Dynamic ($param) routes are skipped. */
|
|
@@ -66,10 +98,14 @@ async function checksFor(): Promise<Partial<IRenderOptions>> {
|
|
|
66
98
|
};
|
|
67
99
|
}
|
|
68
100
|
|
|
101
|
+
const shots = screenshotDir();
|
|
69
102
|
const result = await renderCheck({
|
|
70
103
|
file,
|
|
71
104
|
smoke,
|
|
105
|
+
a11y,
|
|
72
106
|
routes: await routesFor(),
|
|
107
|
+
...(perf ? { perfBudget: DEFAULT_PERF_BUDGET } : {}),
|
|
108
|
+
...(shots !== undefined ? { screenshotDir: shots } : {}),
|
|
73
109
|
...(await checksFor()),
|
|
74
110
|
});
|
|
75
111
|
|
package/scripts/cli-metrics.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { readdir } from "node:fs/promises";
|
|
|
10
10
|
import { homedir } from "node:os";
|
|
11
11
|
import { join } from "node:path";
|
|
12
12
|
import { isRecord } from "../src/lib/guards";
|
|
13
|
+
import { classifyRun, parseEventLog } from "../src/eval";
|
|
13
14
|
|
|
14
15
|
function num(value: unknown): number {
|
|
15
16
|
return typeof value === "number" ? value : 0;
|
|
@@ -168,6 +169,9 @@ async function main(): Promise<void> {
|
|
|
168
169
|
const text = await Bun.file(path).text();
|
|
169
170
|
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
170
171
|
const m = analyze(lines);
|
|
172
|
+
// Single source of truth for WHY a run failed — the same classifier the eval
|
|
173
|
+
// sweep and the reusable analyzeEvents() use, fed the typed event stream.
|
|
174
|
+
const failure = classifyRun(parseEventLog(text));
|
|
171
175
|
const pct =
|
|
172
176
|
m.contextWindow > 0
|
|
173
177
|
? Math.round((m.peakContext / m.contextWindow) * 100)
|
|
@@ -182,6 +186,12 @@ async function main(): Promise<void> {
|
|
|
182
186
|
["model", m.model],
|
|
183
187
|
["context window", String(m.contextWindow)],
|
|
184
188
|
["final status", m.finalStatus],
|
|
189
|
+
[
|
|
190
|
+
"failure class",
|
|
191
|
+
failure.detail === undefined
|
|
192
|
+
? failure.failureClass
|
|
193
|
+
: `${failure.failureClass} (${failure.detail})`,
|
|
194
|
+
],
|
|
185
195
|
["turns (repair iterations)", String(m.turns)],
|
|
186
196
|
["model calls", String(m.modelCalls)],
|
|
187
197
|
["tokens out (→ solution)", String(m.tokensOut)],
|
package/scripts/sweep.ts
CHANGED
|
@@ -6,13 +6,19 @@
|
|
|
6
6
|
import { mkdir, readdir, rm, stat } from "node:fs/promises";
|
|
7
7
|
import { join } from "node:path";
|
|
8
8
|
import { parseSpec } from "../src/spec";
|
|
9
|
-
import { buildGate,
|
|
9
|
+
import { buildGate, buildCoreFix } from "../src/detect-gate";
|
|
10
10
|
import { runSpec, qualityRepair } from "../src/loop";
|
|
11
11
|
import { modelAgent } from "../src/agent";
|
|
12
12
|
import { OpenAICompatibleProvider } from "../src/inference";
|
|
13
13
|
import { resolveActiveModel, resolveApiKey } from "../src/models-config";
|
|
14
14
|
import { providerConfig } from "../src/cli";
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
summarize,
|
|
17
|
+
classifyRun,
|
|
18
|
+
renderSweepReportMarkdown,
|
|
19
|
+
buildSweepReport,
|
|
20
|
+
type IRunRecord,
|
|
21
|
+
} from "../src/eval";
|
|
16
22
|
import { renderEvent } from "../src/render";
|
|
17
23
|
import type { ILoopEvent } from "../src/loop";
|
|
18
24
|
|
|
@@ -252,24 +258,33 @@ async function runOne(
|
|
|
252
258
|
// (an unguarded index access, an `as any`) slipped through as GREEN. Now
|
|
253
259
|
// every task and the whole-spec verify must clear the strict floor BEFORE
|
|
254
260
|
// its functional tests count.
|
|
255
|
-
//
|
|
256
|
-
//
|
|
257
|
-
|
|
261
|
+
// buildCoreFix (eslint --fix + prettier) runs as task.fix before each gate
|
|
262
|
+
// check — same janitor as the interactive CLI — so padding-line, prefer-const,
|
|
263
|
+
// etc. are squashed without model turns.
|
|
264
|
+
const gateCommand = (await buildGate(runDir)).command;
|
|
265
|
+
const fixCommand = buildCoreFix();
|
|
258
266
|
const gatedSpec = {
|
|
259
267
|
...spec,
|
|
260
268
|
tasks: spec.tasks.map((t) => ({
|
|
261
269
|
...t,
|
|
262
|
-
|
|
270
|
+
fix: fixCommand,
|
|
271
|
+
accept: `${gateCommand} && ${t.accept}`,
|
|
263
272
|
})),
|
|
264
273
|
verify:
|
|
265
|
-
spec.verify.length > 0
|
|
274
|
+
spec.verify.length > 0
|
|
275
|
+
? `${gateCommand} && ${spec.verify}`
|
|
276
|
+
: gateCommand,
|
|
266
277
|
};
|
|
267
278
|
|
|
268
279
|
// Every run gets a full transcript at <runDir>/run.log; stream to the
|
|
269
280
|
// terminal too when TSFORGE_STREAM=1.
|
|
270
281
|
const log = Bun.file(join(runDir, "run.log")).writer();
|
|
282
|
+
// Keep the structured events so a failed run can be classified (WHY it
|
|
283
|
+
// failed), not just counted — fed to classifyRun below.
|
|
284
|
+
const runEvents: ILoopEvent[] = [];
|
|
271
285
|
|
|
272
286
|
const onEvent = (e: ILoopEvent): void => {
|
|
287
|
+
runEvents.push(e);
|
|
273
288
|
void log.write(renderEvent(e, { color: false }));
|
|
274
289
|
// Flush per event — otherwise Bun's FileSink buffers and `tail -f` shows
|
|
275
290
|
// nothing until the run ends. The log must be live.
|
|
@@ -359,6 +374,9 @@ async function runOne(
|
|
|
359
374
|
);
|
|
360
375
|
|
|
361
376
|
const vLabel = variantLabel(variantEnv);
|
|
377
|
+
const failureClass = passed
|
|
378
|
+
? undefined
|
|
379
|
+
: classifyRun(runEvents).failureClass;
|
|
362
380
|
|
|
363
381
|
records.push({
|
|
364
382
|
label: `${vLabel} temp=${temp}`,
|
|
@@ -366,9 +384,10 @@ async function runOne(
|
|
|
366
384
|
cycles,
|
|
367
385
|
ms,
|
|
368
386
|
quality,
|
|
387
|
+
...(failureClass === undefined ? {} : { failureClass }),
|
|
369
388
|
});
|
|
370
389
|
process.stdout.write(
|
|
371
|
-
` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" :
|
|
390
|
+
` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : `blocked[${failureClass ?? "unknown"}]`} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}) → ${runId}\n`
|
|
372
391
|
);
|
|
373
392
|
} finally {
|
|
374
393
|
restore();
|
|
@@ -380,11 +399,22 @@ const summaries = summarize(records);
|
|
|
380
399
|
process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
|
|
381
400
|
|
|
382
401
|
for (const s of summaries) {
|
|
402
|
+
const failures = Object.entries(s.failureClasses)
|
|
403
|
+
.sort(([, a], [, b]) => b - a)
|
|
404
|
+
.map(([cls, n]) => `${cls}×${String(n)}`)
|
|
405
|
+
.join(", ");
|
|
406
|
+
|
|
383
407
|
process.stdout.write(
|
|
384
|
-
`${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms\n`
|
|
408
|
+
`${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms${failures.length > 0 ? ` [${failures}]` : ""}\n`
|
|
385
409
|
);
|
|
386
410
|
}
|
|
387
411
|
|
|
412
|
+
// The statistical report (Wilson CI + z-test vs baseline) now also tabulates a
|
|
413
|
+
// per-variant failure-class breakdown — WHY runs failed, not just how often.
|
|
414
|
+
process.stdout.write(
|
|
415
|
+
`\n${renderSweepReportMarkdown(buildSweepReport(records))}\n`
|
|
416
|
+
);
|
|
417
|
+
|
|
388
418
|
const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
|
|
389
419
|
|
|
390
420
|
await Bun.write(
|
package/src/browser/index.ts
CHANGED
package/src/browser/oracle.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { resolve, dirname, basename, join } from "node:path";
|
|
2
|
+
import { isRecord } from "../lib/guards";
|
|
2
3
|
// `playwright` is an OPTIONAL peer: bundling it (+ a browser binary) into every
|
|
3
4
|
// install is too heavy, so the import is dynamic and the render-check skips when
|
|
4
5
|
// it's absent. The type-only import is erased at runtime, so it can't crash a
|
|
@@ -14,6 +15,20 @@ async function loadChromium(): Promise<typeof Chromium | null> {
|
|
|
14
15
|
}
|
|
15
16
|
}
|
|
16
17
|
|
|
18
|
+
/** Run axe against a page and return its raw result; null when @axe-core/
|
|
19
|
+
* playwright isn't installed (a11y is an optional enhancement, like the browser
|
|
20
|
+
* itself). Kept untyped at the boundary — extractAxeViolations narrows it. */
|
|
21
|
+
async function runAxe(page: Page): Promise<unknown> {
|
|
22
|
+
try {
|
|
23
|
+
const mod = await import("@axe-core/playwright");
|
|
24
|
+
const builder = new mod.AxeBuilder({ page });
|
|
25
|
+
|
|
26
|
+
return await builder.analyze();
|
|
27
|
+
} catch {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
17
32
|
/**
|
|
18
33
|
* The browser oracle — renders a built web page in headless chromium and reports
|
|
19
34
|
* whether it actually WORKS, beyond what tsc/eslint can see: it fails on uncaught
|
|
@@ -55,22 +70,121 @@ export interface IRenderOptions {
|
|
|
55
70
|
* single-page smoke misses them. Served with SPA fallback so the client
|
|
56
71
|
* router handles the path. Empty/undefined → no crawl (unchanged behavior). */
|
|
57
72
|
routes?: string[];
|
|
73
|
+
/** Run axe accessibility checks on the page (and each crawled route). Serious
|
|
74
|
+
* and critical violations become gate errors; minor/moderate are skipped.
|
|
75
|
+
* Skipped gracefully when @axe-core/playwright isn't installed. */
|
|
76
|
+
a11y?: boolean;
|
|
77
|
+
/** Directory to write a screenshot per page/route into (desktop + mobile
|
|
78
|
+
* viewports). An artifact for human/visual review — never a pass/fail signal. */
|
|
79
|
+
screenshotDir?: string;
|
|
80
|
+
/** A perf budget (DOM node count + mount time) checked on the initial page. */
|
|
81
|
+
perfBudget?: IPerfBudget;
|
|
58
82
|
/** Navigation timeout (default 15s). */
|
|
59
83
|
timeoutMs?: number;
|
|
60
84
|
}
|
|
61
85
|
|
|
86
|
+
/** Screenshot viewports — a desktop and a mobile pass per page. */
|
|
87
|
+
const VIEWPORTS = [
|
|
88
|
+
{ name: "desktop", width: 1280, height: 800 },
|
|
89
|
+
{ name: "mobile", width: 390, height: 844 },
|
|
90
|
+
] as const;
|
|
91
|
+
|
|
62
92
|
export interface IRenderResult {
|
|
63
93
|
ok: boolean;
|
|
64
94
|
/** Human-readable failures (console errors, page errors, missing content). */
|
|
65
95
|
errors: string[];
|
|
66
96
|
/** True when the check was skipped because playwright isn't installed. */
|
|
67
97
|
skipped?: boolean;
|
|
98
|
+
/** Paths of screenshots captured (when `screenshotDir` was set). */
|
|
99
|
+
screenshots?: string[];
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** A simple performance budget: fail the render when the built app blows past
|
|
103
|
+
* these. Intentionally minimal (no full Lighthouse) — a tripwire, not a profiler. */
|
|
104
|
+
export interface IPerfBudget {
|
|
105
|
+
/** Max total DOM nodes after load (a proxy for over-heavy render trees). */
|
|
106
|
+
maxDomNodes?: number;
|
|
107
|
+
/** Max time from navigation start to DOMContentLoaded, in ms. */
|
|
108
|
+
maxMountMs?: number;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** axe impact levels that FAIL the a11y check — minor/moderate are reported by
|
|
112
|
+
* axe but don't gate (too noisy to block a build on). */
|
|
113
|
+
const AXE_FAIL_IMPACTS = new Set(["serious", "critical"]);
|
|
114
|
+
|
|
115
|
+
/** The subset of an axe violation the oracle reports on. */
|
|
116
|
+
interface IAxeViolation {
|
|
117
|
+
id: string;
|
|
118
|
+
impact: string | undefined;
|
|
119
|
+
nodeCount: number;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Extract the reportable violations from axe's (untyped, dynamically-imported)
|
|
123
|
+
* result — narrowed with guards, no casts. */
|
|
124
|
+
function extractAxeViolations(result: unknown): IAxeViolation[] {
|
|
125
|
+
if (!isRecord(result) || !Array.isArray(result.violations)) {
|
|
126
|
+
return [];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const out: IAxeViolation[] = [];
|
|
130
|
+
|
|
131
|
+
for (const v of result.violations) {
|
|
132
|
+
if (!isRecord(v) || typeof v.id !== "string") {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
out.push({
|
|
137
|
+
id: v.id,
|
|
138
|
+
impact: typeof v.impact === "string" ? v.impact : undefined,
|
|
139
|
+
nodeCount: Array.isArray(v.nodes) ? v.nodes.length : 0,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return out;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Turn axe violations into gate errors — only serious/critical fail. Pure. */
|
|
147
|
+
export function summarizeAxeViolations(
|
|
148
|
+
violations: readonly IAxeViolation[],
|
|
149
|
+
where: string
|
|
150
|
+
): string[] {
|
|
151
|
+
return violations
|
|
152
|
+
.filter((v) => v.impact !== undefined && AXE_FAIL_IMPACTS.has(v.impact))
|
|
153
|
+
.map(
|
|
154
|
+
(v) =>
|
|
155
|
+
`a11y ${v.impact ?? "?"} at ${where}: ${v.id} (${String(v.nodeCount)} node(s))`
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/** Evaluate a perf budget against measured values → gate errors. Pure. */
|
|
160
|
+
export function checkPerfBudget(
|
|
161
|
+
domNodes: number,
|
|
162
|
+
mountMs: number,
|
|
163
|
+
budget: IPerfBudget,
|
|
164
|
+
where: string
|
|
165
|
+
): string[] {
|
|
166
|
+
const errors: string[] = [];
|
|
167
|
+
|
|
168
|
+
if (budget.maxDomNodes !== undefined && domNodes > budget.maxDomNodes) {
|
|
169
|
+
errors.push(
|
|
170
|
+
`perf at ${where}: ${String(domNodes)} DOM nodes > budget ${String(budget.maxDomNodes)}`
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (budget.maxMountMs !== undefined && mountMs > budget.maxMountMs) {
|
|
175
|
+
errors.push(
|
|
176
|
+
`perf at ${where}: mount ${String(Math.round(mountMs))}ms > budget ${String(budget.maxMountMs)}ms`
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return errors;
|
|
68
181
|
}
|
|
69
182
|
|
|
70
183
|
export async function renderCheck(
|
|
71
184
|
opts: IRenderOptions
|
|
72
185
|
): Promise<IRenderResult> {
|
|
73
186
|
const errors: string[] = [];
|
|
187
|
+
const screenshots: string[] = [];
|
|
74
188
|
const chromium = await loadChromium();
|
|
75
189
|
|
|
76
190
|
// No playwright → skip the render check rather than fail the gate. The build
|
|
@@ -87,7 +201,10 @@ export async function renderCheck(
|
|
|
87
201
|
const browser = await chromium.launch({ args: ["--no-sandbox"] });
|
|
88
202
|
|
|
89
203
|
try {
|
|
90
|
-
|
|
204
|
+
// Page via an explicit context (not browser.newPage()) — axe-core/playwright
|
|
205
|
+
// requires a context-owned page; browser.close() tears the context down too.
|
|
206
|
+
const context = await browser.newContext();
|
|
207
|
+
const page = await context.newPage();
|
|
91
208
|
const timeout = opts.timeoutMs ?? 15_000;
|
|
92
209
|
|
|
93
210
|
page.on("console", (message) => {
|
|
@@ -113,30 +230,39 @@ export async function renderCheck(
|
|
|
113
230
|
waitUntil: "load",
|
|
114
231
|
timeout,
|
|
115
232
|
});
|
|
116
|
-
await runChecks(page, opts, errors);
|
|
233
|
+
await runChecks(page, opts, errors, screenshots);
|
|
117
234
|
|
|
118
235
|
if (opts.routes !== undefined && opts.routes.length > 0) {
|
|
119
|
-
await crawlRoutes(page, base, opts.routes, errors, timeout
|
|
236
|
+
await crawlRoutes(page, base, opts.routes, errors, timeout, {
|
|
237
|
+
opts,
|
|
238
|
+
screenshots,
|
|
239
|
+
});
|
|
120
240
|
}
|
|
121
241
|
} finally {
|
|
122
242
|
await server.stop(true);
|
|
123
243
|
}
|
|
124
244
|
} else {
|
|
125
245
|
await page.setContent(opts.html ?? "", { waitUntil: "load", timeout });
|
|
126
|
-
await runChecks(page, opts, errors);
|
|
246
|
+
await runChecks(page, opts, errors, screenshots);
|
|
127
247
|
}
|
|
128
248
|
|
|
129
|
-
return {
|
|
249
|
+
return {
|
|
250
|
+
ok: errors.length === 0,
|
|
251
|
+
errors,
|
|
252
|
+
...(screenshots.length > 0 ? { screenshots } : {}),
|
|
253
|
+
};
|
|
130
254
|
} finally {
|
|
131
255
|
await browser.close();
|
|
132
256
|
}
|
|
133
257
|
}
|
|
134
258
|
|
|
135
|
-
/** The expectation + step + smoke checks that run against the loaded page
|
|
259
|
+
/** The expectation + step + smoke checks that run against the loaded page, then
|
|
260
|
+
* the optional quality oracles (a11y, perf budget, screenshots). */
|
|
136
261
|
async function runChecks(
|
|
137
262
|
page: Page,
|
|
138
263
|
opts: IRenderOptions,
|
|
139
|
-
errors: string[]
|
|
264
|
+
errors: string[],
|
|
265
|
+
screenshots: string[]
|
|
140
266
|
): Promise<void> {
|
|
141
267
|
await checkExpectations(page, opts.expect, errors);
|
|
142
268
|
|
|
@@ -147,6 +273,76 @@ async function runChecks(
|
|
|
147
273
|
if (opts.smoke === true) {
|
|
148
274
|
await runSmoke(page, errors);
|
|
149
275
|
}
|
|
276
|
+
|
|
277
|
+
await runQualityOracles(page, opts, "index", errors, screenshots);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/** The opt-in quality layer: accessibility (axe), a perf budget, and screenshots.
|
|
281
|
+
* Each is independent and skips cleanly when not requested / dep absent. */
|
|
282
|
+
async function runQualityOracles(
|
|
283
|
+
page: Page,
|
|
284
|
+
opts: IRenderOptions,
|
|
285
|
+
where: string,
|
|
286
|
+
errors: string[],
|
|
287
|
+
screenshots: string[]
|
|
288
|
+
): Promise<void> {
|
|
289
|
+
if (opts.a11y === true) {
|
|
290
|
+
const violations = extractAxeViolations(await runAxe(page));
|
|
291
|
+
|
|
292
|
+
errors.push(...summarizeAxeViolations(violations, where));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (opts.perfBudget !== undefined) {
|
|
296
|
+
const { domNodes, mountMs } = await measurePage(page);
|
|
297
|
+
|
|
298
|
+
errors.push(...checkPerfBudget(domNodes, mountMs, opts.perfBudget, where));
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (opts.screenshotDir !== undefined) {
|
|
302
|
+
await capturePage(page, opts.screenshotDir, where, screenshots);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/** Measure DOM size + mount time for the perf budget. */
|
|
307
|
+
async function measurePage(
|
|
308
|
+
page: Page
|
|
309
|
+
): Promise<{ domNodes: number; mountMs: number }> {
|
|
310
|
+
return page.evaluate(() => {
|
|
311
|
+
const nav = performance.getEntriesByType("navigation")[0];
|
|
312
|
+
const mountMs =
|
|
313
|
+
nav instanceof PerformanceNavigationTiming
|
|
314
|
+
? nav.domContentLoadedEventEnd - nav.startTime
|
|
315
|
+
: 0;
|
|
316
|
+
|
|
317
|
+
return { domNodes: document.querySelectorAll("*").length, mountMs };
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/** Filesystem-safe label for a route (e.g. "/a/b" → "a-b", "/" → "index"). */
|
|
322
|
+
function routeLabel(route: string): string {
|
|
323
|
+
const cleaned = route.replace(/^\/+|\/+$/g, "").replace(/\//g, "-");
|
|
324
|
+
|
|
325
|
+
return cleaned.length === 0 ? "index" : cleaned;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/** Capture a desktop + mobile screenshot of the current page into `dir`. */
|
|
329
|
+
async function capturePage(
|
|
330
|
+
page: Page,
|
|
331
|
+
dir: string,
|
|
332
|
+
label: string,
|
|
333
|
+
screenshots: string[]
|
|
334
|
+
): Promise<void> {
|
|
335
|
+
for (const vp of VIEWPORTS) {
|
|
336
|
+
const path = join(dir, `${label}-${vp.name}.png`);
|
|
337
|
+
|
|
338
|
+
try {
|
|
339
|
+
await page.setViewportSize({ width: vp.width, height: vp.height });
|
|
340
|
+
await page.screenshot({ path, fullPage: true });
|
|
341
|
+
screenshots.push(path);
|
|
342
|
+
} catch {
|
|
343
|
+
// A screenshot is a best-effort artifact, never a gate failure.
|
|
344
|
+
}
|
|
345
|
+
}
|
|
150
346
|
}
|
|
151
347
|
|
|
152
348
|
/** Serve a directory on an ephemeral localhost port. SPA FALLBACK: an
|
|
@@ -187,7 +383,8 @@ async function crawlRoutes(
|
|
|
187
383
|
base: string,
|
|
188
384
|
routes: readonly string[],
|
|
189
385
|
errors: string[],
|
|
190
|
-
timeout: number
|
|
386
|
+
timeout: number,
|
|
387
|
+
quality: { opts: IRenderOptions; screenshots: string[] }
|
|
191
388
|
): Promise<void> {
|
|
192
389
|
for (const route of routes) {
|
|
193
390
|
try {
|
|
@@ -207,7 +404,17 @@ async function crawlRoutes(
|
|
|
207
404
|
|
|
208
405
|
if (blank) {
|
|
209
406
|
errors.push(`route ${route} rendered blank`);
|
|
407
|
+
continue;
|
|
210
408
|
}
|
|
409
|
+
|
|
410
|
+
// a11y + screenshots per route (perf budget stays an initial-page check).
|
|
411
|
+
await runQualityOracles(
|
|
412
|
+
page,
|
|
413
|
+
{ ...quality.opts, perfBudget: undefined },
|
|
414
|
+
routeLabel(route),
|
|
415
|
+
errors,
|
|
416
|
+
quality.screenshots
|
|
417
|
+
);
|
|
211
418
|
} catch (error) {
|
|
212
419
|
errors.push(
|
|
213
420
|
`route ${route} failed to load: ${error instanceof Error ? error.message : String(error)}`
|
package/src/cli.ts
CHANGED
|
@@ -38,6 +38,7 @@ import {
|
|
|
38
38
|
buildGate,
|
|
39
39
|
buildWebGate,
|
|
40
40
|
buildWebFix,
|
|
41
|
+
buildCoreFix,
|
|
41
42
|
buildWebTypeGate,
|
|
42
43
|
buildWebTscCheck,
|
|
43
44
|
scaffoldWeb,
|
|
@@ -102,11 +103,15 @@ export interface ICliArgs {
|
|
|
102
103
|
/** Plan mode: a from-scratch build pauses after the design phase to show its
|
|
103
104
|
* plan for review/edit before implementing (`--plan`; also toggled by /plan). */
|
|
104
105
|
plan: boolean;
|
|
106
|
+
/** Keep the auto-gate at the strict TS floor only — do NOT append the
|
|
107
|
+
* project's discovered tests (`--strict-floor-only`). By default the auto-gate
|
|
108
|
+
* also runs the project's tests, so "green" means floor + tests pass. */
|
|
109
|
+
strictFloorOnly: boolean;
|
|
105
110
|
}
|
|
106
111
|
|
|
107
112
|
const BOOL_FLAGS: Record<
|
|
108
113
|
string,
|
|
109
|
-
"continue" | "noGate" | "web" | "log" | "plan"
|
|
114
|
+
"continue" | "noGate" | "web" | "log" | "plan" | "strictFloorOnly"
|
|
110
115
|
> = {
|
|
111
116
|
"--continue": "continue",
|
|
112
117
|
"-c": "continue",
|
|
@@ -114,6 +119,7 @@ const BOOL_FLAGS: Record<
|
|
|
114
119
|
"--web": "web",
|
|
115
120
|
"--log": "log",
|
|
116
121
|
"--plan": "plan",
|
|
122
|
+
"--strict-floor-only": "strictFloorOnly",
|
|
117
123
|
};
|
|
118
124
|
|
|
119
125
|
const VALUE_FLAGS = new Set([
|
|
@@ -140,6 +146,7 @@ export function parseArgs(argv: readonly string[]): ICliArgs {
|
|
|
140
146
|
web: false,
|
|
141
147
|
log: false,
|
|
142
148
|
plan: false,
|
|
149
|
+
strictFloorOnly: false,
|
|
143
150
|
};
|
|
144
151
|
|
|
145
152
|
for (let i = 0; i < argv.length; i += 1) {
|
|
@@ -812,7 +819,13 @@ async function baseGate(
|
|
|
812
819
|
args.dir,
|
|
813
820
|
activePacks,
|
|
814
821
|
Object.keys(ruleOverrides).length > 0 ? ruleOverrides : undefined,
|
|
815
|
-
{
|
|
822
|
+
{
|
|
823
|
+
enableTypeAware: profile === "strict",
|
|
824
|
+
// "Green" should mean the strict floor AND the project's own tests pass —
|
|
825
|
+
// not just that it type-checks and lints. discoverTestCommand appends them
|
|
826
|
+
// only when the project actually has tests; --strict-floor-only opts out.
|
|
827
|
+
includeTests: !args.strictFloorOnly,
|
|
828
|
+
}
|
|
816
829
|
);
|
|
817
830
|
|
|
818
831
|
return { accept: auto.command, gateLabel: auto.label };
|
|
@@ -891,7 +904,7 @@ async function repl(args: ICliArgs): Promise<number> {
|
|
|
891
904
|
fix: buildWebFix("react"),
|
|
892
905
|
incrementalCheck: buildWebTscCheck(),
|
|
893
906
|
}
|
|
894
|
-
: { scaffoldWeb: true }),
|
|
907
|
+
: { scaffoldWeb: true, fix: buildCoreFix() }),
|
|
895
908
|
...(thinkingTokenBudget === undefined ? {} : { thinkingTokenBudget }),
|
|
896
909
|
...(autoCompactAt === undefined ? {} : { autoCompactAt }),
|
|
897
910
|
// Thinking OFF for interactive replies so they STREAM immediately instead of
|