@agjs/tsforge 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/scripts/browser-check.ts +41 -5
- package/scripts/cli-metrics.ts +10 -0
- package/scripts/sweep.ts +28 -3
- package/src/browser/index.ts +3 -0
- package/src/browser/oracle.ts +215 -8
- package/src/cli.ts +14 -2
- package/src/detect-gate.ts +111 -13
- package/src/eval/eval.types.ts +9 -0
- package/src/eval/failure-class.ts +263 -0
- package/src/eval/index.ts +8 -0
- package/src/eval/metrics.ts +7 -0
- package/src/eval/parse-log.ts +105 -0
- package/src/eval/report.ts +19 -0
- package/src/eval/score.ts +10 -0
- package/src/loop/loop.types.ts +4 -0
- package/src/loop/turn.ts +3 -0
- package/strict.type-aware.eslint.config.mjs +33 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agjs/tsforge",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.2.
|
|
4
|
+
"version": "0.2.1",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
|
|
7
7
|
"repository": {
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
"src",
|
|
20
20
|
"scripts",
|
|
21
21
|
"strict.eslint.config.mjs",
|
|
22
|
-
"strict.web.eslint.config.mjs"
|
|
22
|
+
"strict.web.eslint.config.mjs",
|
|
23
|
+
"strict.type-aware.eslint.config.mjs"
|
|
23
24
|
],
|
|
24
25
|
"engines": {
|
|
25
26
|
"bun": ">=1.3.14"
|
package/scripts/browser-check.ts
CHANGED
|
@@ -5,27 +5,59 @@
|
|
|
5
5
|
//
|
|
6
6
|
// bun browser-check.ts <htmlFile> # render-only (no errors)
|
|
7
7
|
// bun browser-check.ts <htmlFile> --smoke # render + generic behaviour smoke
|
|
8
|
+
// bun browser-check.ts <htmlFile> --a11y # + axe accessibility (serious/critical fail)
|
|
9
|
+
// bun browser-check.ts <htmlFile> --screenshots[=dir] # + per-route PNGs (artifact)
|
|
10
|
+
// bun browser-check.ts <htmlFile> --perf # + a basic DOM-size/mount-time budget
|
|
8
11
|
// bun browser-check.ts <htmlFile> <checks.json> # render + interaction checks
|
|
9
12
|
// bun browser-check.ts <htmlFile> <selector> [text]
|
|
10
13
|
import { readdir } from "node:fs/promises";
|
|
11
14
|
import { dirname, join } from "node:path";
|
|
12
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
renderCheck,
|
|
17
|
+
parseChecks,
|
|
18
|
+
type IRenderOptions,
|
|
19
|
+
type IPerfBudget,
|
|
20
|
+
} from "../src/browser";
|
|
13
21
|
import { crawlableRoutePaths } from "../src/web-routes";
|
|
14
22
|
|
|
15
23
|
const rawArgs = process.argv.slice(2);
|
|
16
24
|
const smoke = rawArgs.includes("--smoke");
|
|
17
25
|
const crawl = rawArgs.includes("--crawl");
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
);
|
|
26
|
+
const a11y = rawArgs.includes("--a11y");
|
|
27
|
+
const perf = rawArgs.includes("--perf");
|
|
28
|
+
const screenshotsArg = rawArgs.find((a) => a.startsWith("--screenshots"));
|
|
29
|
+
// Positionals are anything that isn't a recognized `--flag`.
|
|
30
|
+
const [file, arg2, arg3] = rawArgs.filter((a) => !a.startsWith("--"));
|
|
21
31
|
|
|
22
32
|
if (file === undefined) {
|
|
23
33
|
process.stderr.write(
|
|
24
|
-
"usage: browser-check.ts <htmlFile> [--smoke] [--crawl] [
|
|
34
|
+
"usage: browser-check.ts <htmlFile> [--smoke] [--crawl] [--a11y] " +
|
|
35
|
+
"[--screenshots[=dir]] [--perf] [checks.json | selector [text]]\n"
|
|
25
36
|
);
|
|
26
37
|
process.exit(2);
|
|
27
38
|
}
|
|
28
39
|
|
|
40
|
+
/** A conservative default budget — a tripwire for runaway render trees / slow
|
|
41
|
+
* mounts, not a tuned Lighthouse target. */
|
|
42
|
+
const DEFAULT_PERF_BUDGET: IPerfBudget = {
|
|
43
|
+
maxDomNodes: 5000,
|
|
44
|
+
maxMountMs: 6000,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/** The screenshot dir: `--screenshots=<dir>`, else a `screenshots/` folder next
|
|
48
|
+
* to the HTML file. undefined when `--screenshots` wasn't passed. */
|
|
49
|
+
function screenshotDir(): string | undefined {
|
|
50
|
+
if (screenshotsArg === undefined) {
|
|
51
|
+
return undefined;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const eq = screenshotsArg.indexOf("=");
|
|
55
|
+
|
|
56
|
+
return eq === -1
|
|
57
|
+
? join(dirname(file ?? "."), "screenshots")
|
|
58
|
+
: screenshotsArg.slice(eq + 1);
|
|
59
|
+
}
|
|
60
|
+
|
|
29
61
|
/** With --crawl, enumerate the app's static routes from `<buildDir>/src/routes/`
|
|
30
62
|
* (the build dir is the parent of dist/) so every page — not just the home —
|
|
31
63
|
* is render-checked. Dynamic ($param) routes are skipped. */
|
|
@@ -66,10 +98,14 @@ async function checksFor(): Promise<Partial<IRenderOptions>> {
|
|
|
66
98
|
};
|
|
67
99
|
}
|
|
68
100
|
|
|
101
|
+
const shots = screenshotDir();
|
|
69
102
|
const result = await renderCheck({
|
|
70
103
|
file,
|
|
71
104
|
smoke,
|
|
105
|
+
a11y,
|
|
72
106
|
routes: await routesFor(),
|
|
107
|
+
...(perf ? { perfBudget: DEFAULT_PERF_BUDGET } : {}),
|
|
108
|
+
...(shots !== undefined ? { screenshotDir: shots } : {}),
|
|
73
109
|
...(await checksFor()),
|
|
74
110
|
});
|
|
75
111
|
|
package/scripts/cli-metrics.ts
CHANGED
|
@@ -10,6 +10,7 @@ import { readdir } from "node:fs/promises";
|
|
|
10
10
|
import { homedir } from "node:os";
|
|
11
11
|
import { join } from "node:path";
|
|
12
12
|
import { isRecord } from "../src/lib/guards";
|
|
13
|
+
import { classifyRun, parseEventLog } from "../src/eval";
|
|
13
14
|
|
|
14
15
|
function num(value: unknown): number {
|
|
15
16
|
return typeof value === "number" ? value : 0;
|
|
@@ -168,6 +169,9 @@ async function main(): Promise<void> {
|
|
|
168
169
|
const text = await Bun.file(path).text();
|
|
169
170
|
const lines = text.split("\n").filter((l) => l.trim().length > 0);
|
|
170
171
|
const m = analyze(lines);
|
|
172
|
+
// Single source of truth for WHY a run failed — the same classifier the eval
|
|
173
|
+
// sweep and the reusable analyzeEvents() use, fed the typed event stream.
|
|
174
|
+
const failure = classifyRun(parseEventLog(text));
|
|
171
175
|
const pct =
|
|
172
176
|
m.contextWindow > 0
|
|
173
177
|
? Math.round((m.peakContext / m.contextWindow) * 100)
|
|
@@ -182,6 +186,12 @@ async function main(): Promise<void> {
|
|
|
182
186
|
["model", m.model],
|
|
183
187
|
["context window", String(m.contextWindow)],
|
|
184
188
|
["final status", m.finalStatus],
|
|
189
|
+
[
|
|
190
|
+
"failure class",
|
|
191
|
+
failure.detail === undefined
|
|
192
|
+
? failure.failureClass
|
|
193
|
+
: `${failure.failureClass} (${failure.detail})`,
|
|
194
|
+
],
|
|
185
195
|
["turns (repair iterations)", String(m.turns)],
|
|
186
196
|
["model calls", String(m.modelCalls)],
|
|
187
197
|
["tokens out (→ solution)", String(m.tokensOut)],
|
package/scripts/sweep.ts
CHANGED
|
@@ -12,7 +12,13 @@ import { modelAgent } from "../src/agent";
|
|
|
12
12
|
import { OpenAICompatibleProvider } from "../src/inference";
|
|
13
13
|
import { resolveActiveModel, resolveApiKey } from "../src/models-config";
|
|
14
14
|
import { providerConfig } from "../src/cli";
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
summarize,
|
|
17
|
+
classifyRun,
|
|
18
|
+
renderSweepReportMarkdown,
|
|
19
|
+
buildSweepReport,
|
|
20
|
+
type IRunRecord,
|
|
21
|
+
} from "../src/eval";
|
|
16
22
|
import { renderEvent } from "../src/render";
|
|
17
23
|
import type { ILoopEvent } from "../src/loop";
|
|
18
24
|
|
|
@@ -268,8 +274,12 @@ async function runOne(
|
|
|
268
274
|
// Every run gets a full transcript at <runDir>/run.log; stream to the
|
|
269
275
|
// terminal too when TSFORGE_STREAM=1.
|
|
270
276
|
const log = Bun.file(join(runDir, "run.log")).writer();
|
|
277
|
+
// Keep the structured events so a failed run can be classified (WHY it
|
|
278
|
+
// failed), not just counted — fed to classifyRun below.
|
|
279
|
+
const runEvents: ILoopEvent[] = [];
|
|
271
280
|
|
|
272
281
|
const onEvent = (e: ILoopEvent): void => {
|
|
282
|
+
runEvents.push(e);
|
|
273
283
|
void log.write(renderEvent(e, { color: false }));
|
|
274
284
|
// Flush per event — otherwise Bun's FileSink buffers and `tail -f` shows
|
|
275
285
|
// nothing until the run ends. The log must be live.
|
|
@@ -359,6 +369,9 @@ async function runOne(
|
|
|
359
369
|
);
|
|
360
370
|
|
|
361
371
|
const vLabel = variantLabel(variantEnv);
|
|
372
|
+
const failureClass = passed
|
|
373
|
+
? undefined
|
|
374
|
+
: classifyRun(runEvents).failureClass;
|
|
362
375
|
|
|
363
376
|
records.push({
|
|
364
377
|
label: `${vLabel} temp=${temp}`,
|
|
@@ -366,9 +379,10 @@ async function runOne(
|
|
|
366
379
|
cycles,
|
|
367
380
|
ms,
|
|
368
381
|
quality,
|
|
382
|
+
...(failureClass === undefined ? {} : { failureClass }),
|
|
369
383
|
});
|
|
370
384
|
process.stdout.write(
|
|
371
|
-
` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" :
|
|
385
|
+
` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : `blocked[${failureClass ?? "unknown"}]`} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}) → ${runId}\n`
|
|
372
386
|
);
|
|
373
387
|
} finally {
|
|
374
388
|
restore();
|
|
@@ -380,11 +394,22 @@ const summaries = summarize(records);
|
|
|
380
394
|
process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
|
|
381
395
|
|
|
382
396
|
for (const s of summaries) {
|
|
397
|
+
const failures = Object.entries(s.failureClasses)
|
|
398
|
+
.sort(([, a], [, b]) => b - a)
|
|
399
|
+
.map(([cls, n]) => `${cls}×${String(n)}`)
|
|
400
|
+
.join(", ");
|
|
401
|
+
|
|
383
402
|
process.stdout.write(
|
|
384
|
-
`${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms\n`
|
|
403
|
+
`${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms${failures.length > 0 ? ` [${failures}]` : ""}\n`
|
|
385
404
|
);
|
|
386
405
|
}
|
|
387
406
|
|
|
407
|
+
// The statistical report (Wilson CI + z-test vs baseline) now also tabulates a
|
|
408
|
+
// per-variant failure-class breakdown — WHY runs failed, not just how often.
|
|
409
|
+
process.stdout.write(
|
|
410
|
+
`\n${renderSweepReportMarkdown(buildSweepReport(records))}\n`
|
|
411
|
+
);
|
|
412
|
+
|
|
388
413
|
const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
|
|
389
414
|
|
|
390
415
|
await Bun.write(
|
package/src/browser/index.ts
CHANGED
package/src/browser/oracle.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { resolve, dirname, basename, join } from "node:path";
|
|
2
|
+
import { isRecord } from "../lib/guards";
|
|
2
3
|
// `playwright` is an OPTIONAL peer: bundling it (+ a browser binary) into every
|
|
3
4
|
// install is too heavy, so the import is dynamic and the render-check skips when
|
|
4
5
|
// it's absent. The type-only import is erased at runtime, so it can't crash a
|
|
@@ -14,6 +15,20 @@ async function loadChromium(): Promise<typeof Chromium | null> {
|
|
|
14
15
|
}
|
|
15
16
|
}
|
|
16
17
|
|
|
18
|
+
/** Run axe against a page and return its raw result; null when @axe-core/
|
|
19
|
+
* playwright isn't installed (a11y is an optional enhancement, like the browser
|
|
20
|
+
* itself). Kept untyped at the boundary — extractAxeViolations narrows it. */
|
|
21
|
+
async function runAxe(page: Page): Promise<unknown> {
|
|
22
|
+
try {
|
|
23
|
+
const mod = await import("@axe-core/playwright");
|
|
24
|
+
const builder = new mod.AxeBuilder({ page });
|
|
25
|
+
|
|
26
|
+
return await builder.analyze();
|
|
27
|
+
} catch {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
17
32
|
/**
|
|
18
33
|
* The browser oracle — renders a built web page in headless chromium and reports
|
|
19
34
|
* whether it actually WORKS, beyond what tsc/eslint can see: it fails on uncaught
|
|
@@ -55,22 +70,121 @@ export interface IRenderOptions {
|
|
|
55
70
|
* single-page smoke misses them. Served with SPA fallback so the client
|
|
56
71
|
* router handles the path. Empty/undefined → no crawl (unchanged behavior). */
|
|
57
72
|
routes?: string[];
|
|
73
|
+
/** Run axe accessibility checks on the page (and each crawled route). Serious
|
|
74
|
+
* and critical violations become gate errors; minor/moderate are skipped.
|
|
75
|
+
* Skipped gracefully when @axe-core/playwright isn't installed. */
|
|
76
|
+
a11y?: boolean;
|
|
77
|
+
/** Directory to write a screenshot per page/route into (desktop + mobile
|
|
78
|
+
* viewports). An artifact for human/visual review — never a pass/fail signal. */
|
|
79
|
+
screenshotDir?: string;
|
|
80
|
+
/** A perf budget (DOM node count + mount time) checked on the initial page. */
|
|
81
|
+
perfBudget?: IPerfBudget;
|
|
58
82
|
/** Navigation timeout (default 15s). */
|
|
59
83
|
timeoutMs?: number;
|
|
60
84
|
}
|
|
61
85
|
|
|
86
|
+
/** Screenshot viewports — a desktop and a mobile pass per page. */
|
|
87
|
+
const VIEWPORTS = [
|
|
88
|
+
{ name: "desktop", width: 1280, height: 800 },
|
|
89
|
+
{ name: "mobile", width: 390, height: 844 },
|
|
90
|
+
] as const;
|
|
91
|
+
|
|
62
92
|
export interface IRenderResult {
|
|
63
93
|
ok: boolean;
|
|
64
94
|
/** Human-readable failures (console errors, page errors, missing content). */
|
|
65
95
|
errors: string[];
|
|
66
96
|
/** True when the check was skipped because playwright isn't installed. */
|
|
67
97
|
skipped?: boolean;
|
|
98
|
+
/** Paths of screenshots captured (when `screenshotDir` was set). */
|
|
99
|
+
screenshots?: string[];
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** A simple performance budget: fail the render when the built app blows past
|
|
103
|
+
* these. Intentionally minimal (no full Lighthouse) — a tripwire, not a profiler. */
|
|
104
|
+
export interface IPerfBudget {
|
|
105
|
+
/** Max total DOM nodes after load (a proxy for over-heavy render trees). */
|
|
106
|
+
maxDomNodes?: number;
|
|
107
|
+
/** Max time from navigation start to DOMContentLoaded, in ms. */
|
|
108
|
+
maxMountMs?: number;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** axe impact levels that FAIL the a11y check — minor/moderate are reported by
|
|
112
|
+
* axe but don't gate (too noisy to block a build on). */
|
|
113
|
+
const AXE_FAIL_IMPACTS = new Set(["serious", "critical"]);
|
|
114
|
+
|
|
115
|
+
/** The subset of an axe violation the oracle reports on. */
|
|
116
|
+
interface IAxeViolation {
|
|
117
|
+
id: string;
|
|
118
|
+
impact: string | undefined;
|
|
119
|
+
nodeCount: number;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Extract the reportable violations from axe's (untyped, dynamically-imported)
|
|
123
|
+
* result — narrowed with guards, no casts. */
|
|
124
|
+
function extractAxeViolations(result: unknown): IAxeViolation[] {
|
|
125
|
+
if (!isRecord(result) || !Array.isArray(result.violations)) {
|
|
126
|
+
return [];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const out: IAxeViolation[] = [];
|
|
130
|
+
|
|
131
|
+
for (const v of result.violations) {
|
|
132
|
+
if (!isRecord(v) || typeof v.id !== "string") {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
out.push({
|
|
137
|
+
id: v.id,
|
|
138
|
+
impact: typeof v.impact === "string" ? v.impact : undefined,
|
|
139
|
+
nodeCount: Array.isArray(v.nodes) ? v.nodes.length : 0,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return out;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Turn axe violations into gate errors — only serious/critical fail. Pure. */
|
|
147
|
+
export function summarizeAxeViolations(
|
|
148
|
+
violations: readonly IAxeViolation[],
|
|
149
|
+
where: string
|
|
150
|
+
): string[] {
|
|
151
|
+
return violations
|
|
152
|
+
.filter((v) => v.impact !== undefined && AXE_FAIL_IMPACTS.has(v.impact))
|
|
153
|
+
.map(
|
|
154
|
+
(v) =>
|
|
155
|
+
`a11y ${v.impact ?? "?"} at ${where}: ${v.id} (${String(v.nodeCount)} node(s))`
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/** Evaluate a perf budget against measured values → gate errors. Pure. */
|
|
160
|
+
export function checkPerfBudget(
|
|
161
|
+
domNodes: number,
|
|
162
|
+
mountMs: number,
|
|
163
|
+
budget: IPerfBudget,
|
|
164
|
+
where: string
|
|
165
|
+
): string[] {
|
|
166
|
+
const errors: string[] = [];
|
|
167
|
+
|
|
168
|
+
if (budget.maxDomNodes !== undefined && domNodes > budget.maxDomNodes) {
|
|
169
|
+
errors.push(
|
|
170
|
+
`perf at ${where}: ${String(domNodes)} DOM nodes > budget ${String(budget.maxDomNodes)}`
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (budget.maxMountMs !== undefined && mountMs > budget.maxMountMs) {
|
|
175
|
+
errors.push(
|
|
176
|
+
`perf at ${where}: mount ${String(Math.round(mountMs))}ms > budget ${String(budget.maxMountMs)}ms`
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return errors;
|
|
68
181
|
}
|
|
69
182
|
|
|
70
183
|
export async function renderCheck(
|
|
71
184
|
opts: IRenderOptions
|
|
72
185
|
): Promise<IRenderResult> {
|
|
73
186
|
const errors: string[] = [];
|
|
187
|
+
const screenshots: string[] = [];
|
|
74
188
|
const chromium = await loadChromium();
|
|
75
189
|
|
|
76
190
|
// No playwright → skip the render check rather than fail the gate. The build
|
|
@@ -87,7 +201,10 @@ export async function renderCheck(
|
|
|
87
201
|
const browser = await chromium.launch({ args: ["--no-sandbox"] });
|
|
88
202
|
|
|
89
203
|
try {
|
|
90
|
-
|
|
204
|
+
// Page via an explicit context (not browser.newPage()) — axe-core/playwright
|
|
205
|
+
// requires a context-owned page; browser.close() tears the context down too.
|
|
206
|
+
const context = await browser.newContext();
|
|
207
|
+
const page = await context.newPage();
|
|
91
208
|
const timeout = opts.timeoutMs ?? 15_000;
|
|
92
209
|
|
|
93
210
|
page.on("console", (message) => {
|
|
@@ -113,30 +230,39 @@ export async function renderCheck(
|
|
|
113
230
|
waitUntil: "load",
|
|
114
231
|
timeout,
|
|
115
232
|
});
|
|
116
|
-
await runChecks(page, opts, errors);
|
|
233
|
+
await runChecks(page, opts, errors, screenshots);
|
|
117
234
|
|
|
118
235
|
if (opts.routes !== undefined && opts.routes.length > 0) {
|
|
119
|
-
await crawlRoutes(page, base, opts.routes, errors, timeout
|
|
236
|
+
await crawlRoutes(page, base, opts.routes, errors, timeout, {
|
|
237
|
+
opts,
|
|
238
|
+
screenshots,
|
|
239
|
+
});
|
|
120
240
|
}
|
|
121
241
|
} finally {
|
|
122
242
|
await server.stop(true);
|
|
123
243
|
}
|
|
124
244
|
} else {
|
|
125
245
|
await page.setContent(opts.html ?? "", { waitUntil: "load", timeout });
|
|
126
|
-
await runChecks(page, opts, errors);
|
|
246
|
+
await runChecks(page, opts, errors, screenshots);
|
|
127
247
|
}
|
|
128
248
|
|
|
129
|
-
return {
|
|
249
|
+
return {
|
|
250
|
+
ok: errors.length === 0,
|
|
251
|
+
errors,
|
|
252
|
+
...(screenshots.length > 0 ? { screenshots } : {}),
|
|
253
|
+
};
|
|
130
254
|
} finally {
|
|
131
255
|
await browser.close();
|
|
132
256
|
}
|
|
133
257
|
}
|
|
134
258
|
|
|
135
|
-
/** The expectation + step + smoke checks that run against the loaded page
|
|
259
|
+
/** The expectation + step + smoke checks that run against the loaded page, then
|
|
260
|
+
* the optional quality oracles (a11y, perf budget, screenshots). */
|
|
136
261
|
async function runChecks(
|
|
137
262
|
page: Page,
|
|
138
263
|
opts: IRenderOptions,
|
|
139
|
-
errors: string[]
|
|
264
|
+
errors: string[],
|
|
265
|
+
screenshots: string[]
|
|
140
266
|
): Promise<void> {
|
|
141
267
|
await checkExpectations(page, opts.expect, errors);
|
|
142
268
|
|
|
@@ -147,6 +273,76 @@ async function runChecks(
|
|
|
147
273
|
if (opts.smoke === true) {
|
|
148
274
|
await runSmoke(page, errors);
|
|
149
275
|
}
|
|
276
|
+
|
|
277
|
+
await runQualityOracles(page, opts, "index", errors, screenshots);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/** The opt-in quality layer: accessibility (axe), a perf budget, and screenshots.
|
|
281
|
+
* Each is independent and skips cleanly when not requested / dep absent. */
|
|
282
|
+
async function runQualityOracles(
|
|
283
|
+
page: Page,
|
|
284
|
+
opts: IRenderOptions,
|
|
285
|
+
where: string,
|
|
286
|
+
errors: string[],
|
|
287
|
+
screenshots: string[]
|
|
288
|
+
): Promise<void> {
|
|
289
|
+
if (opts.a11y === true) {
|
|
290
|
+
const violations = extractAxeViolations(await runAxe(page));
|
|
291
|
+
|
|
292
|
+
errors.push(...summarizeAxeViolations(violations, where));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (opts.perfBudget !== undefined) {
|
|
296
|
+
const { domNodes, mountMs } = await measurePage(page);
|
|
297
|
+
|
|
298
|
+
errors.push(...checkPerfBudget(domNodes, mountMs, opts.perfBudget, where));
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (opts.screenshotDir !== undefined) {
|
|
302
|
+
await capturePage(page, opts.screenshotDir, where, screenshots);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/** Measure DOM size + mount time for the perf budget. */
|
|
307
|
+
async function measurePage(
|
|
308
|
+
page: Page
|
|
309
|
+
): Promise<{ domNodes: number; mountMs: number }> {
|
|
310
|
+
return page.evaluate(() => {
|
|
311
|
+
const nav = performance.getEntriesByType("navigation")[0];
|
|
312
|
+
const mountMs =
|
|
313
|
+
nav instanceof PerformanceNavigationTiming
|
|
314
|
+
? nav.domContentLoadedEventEnd - nav.startTime
|
|
315
|
+
: 0;
|
|
316
|
+
|
|
317
|
+
return { domNodes: document.querySelectorAll("*").length, mountMs };
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/** Filesystem-safe label for a route (e.g. "/a/b" → "a-b", "/" → "index"). */
|
|
322
|
+
function routeLabel(route: string): string {
|
|
323
|
+
const cleaned = route.replace(/^\/+|\/+$/g, "").replace(/\//g, "-");
|
|
324
|
+
|
|
325
|
+
return cleaned.length === 0 ? "index" : cleaned;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/** Capture a desktop + mobile screenshot of the current page into `dir`. */
|
|
329
|
+
async function capturePage(
|
|
330
|
+
page: Page,
|
|
331
|
+
dir: string,
|
|
332
|
+
label: string,
|
|
333
|
+
screenshots: string[]
|
|
334
|
+
): Promise<void> {
|
|
335
|
+
for (const vp of VIEWPORTS) {
|
|
336
|
+
const path = join(dir, `${label}-${vp.name}.png`);
|
|
337
|
+
|
|
338
|
+
try {
|
|
339
|
+
await page.setViewportSize({ width: vp.width, height: vp.height });
|
|
340
|
+
await page.screenshot({ path, fullPage: true });
|
|
341
|
+
screenshots.push(path);
|
|
342
|
+
} catch {
|
|
343
|
+
// A screenshot is a best-effort artifact, never a gate failure.
|
|
344
|
+
}
|
|
345
|
+
}
|
|
150
346
|
}
|
|
151
347
|
|
|
152
348
|
/** Serve a directory on an ephemeral localhost port. SPA FALLBACK: an
|
|
@@ -187,7 +383,8 @@ async function crawlRoutes(
|
|
|
187
383
|
base: string,
|
|
188
384
|
routes: readonly string[],
|
|
189
385
|
errors: string[],
|
|
190
|
-
timeout: number
|
|
386
|
+
timeout: number,
|
|
387
|
+
quality: { opts: IRenderOptions; screenshots: string[] }
|
|
191
388
|
): Promise<void> {
|
|
192
389
|
for (const route of routes) {
|
|
193
390
|
try {
|
|
@@ -207,7 +404,17 @@ async function crawlRoutes(
|
|
|
207
404
|
|
|
208
405
|
if (blank) {
|
|
209
406
|
errors.push(`route ${route} rendered blank`);
|
|
407
|
+
continue;
|
|
210
408
|
}
|
|
409
|
+
|
|
410
|
+
// a11y + screenshots per route (perf budget stays an initial-page check).
|
|
411
|
+
await runQualityOracles(
|
|
412
|
+
page,
|
|
413
|
+
{ ...quality.opts, perfBudget: undefined },
|
|
414
|
+
routeLabel(route),
|
|
415
|
+
errors,
|
|
416
|
+
quality.screenshots
|
|
417
|
+
);
|
|
211
418
|
} catch (error) {
|
|
212
419
|
errors.push(
|
|
213
420
|
`route ${route} failed to load: ${error instanceof Error ? error.message : String(error)}`
|
package/src/cli.ts
CHANGED
|
@@ -102,11 +102,15 @@ export interface ICliArgs {
|
|
|
102
102
|
/** Plan mode: a from-scratch build pauses after the design phase to show its
|
|
103
103
|
* plan for review/edit before implementing (`--plan`; also toggled by /plan). */
|
|
104
104
|
plan: boolean;
|
|
105
|
+
/** Keep the auto-gate at the strict TS floor only — do NOT append the
|
|
106
|
+
* project's discovered tests (`--strict-floor-only`). By default the auto-gate
|
|
107
|
+
* also runs the project's tests, so "green" means floor + tests pass. */
|
|
108
|
+
strictFloorOnly: boolean;
|
|
105
109
|
}
|
|
106
110
|
|
|
107
111
|
const BOOL_FLAGS: Record<
|
|
108
112
|
string,
|
|
109
|
-
"continue" | "noGate" | "web" | "log" | "plan"
|
|
113
|
+
"continue" | "noGate" | "web" | "log" | "plan" | "strictFloorOnly"
|
|
110
114
|
> = {
|
|
111
115
|
"--continue": "continue",
|
|
112
116
|
"-c": "continue",
|
|
@@ -114,6 +118,7 @@ const BOOL_FLAGS: Record<
|
|
|
114
118
|
"--web": "web",
|
|
115
119
|
"--log": "log",
|
|
116
120
|
"--plan": "plan",
|
|
121
|
+
"--strict-floor-only": "strictFloorOnly",
|
|
117
122
|
};
|
|
118
123
|
|
|
119
124
|
const VALUE_FLAGS = new Set([
|
|
@@ -140,6 +145,7 @@ export function parseArgs(argv: readonly string[]): ICliArgs {
|
|
|
140
145
|
web: false,
|
|
141
146
|
log: false,
|
|
142
147
|
plan: false,
|
|
148
|
+
strictFloorOnly: false,
|
|
143
149
|
};
|
|
144
150
|
|
|
145
151
|
for (let i = 0; i < argv.length; i += 1) {
|
|
@@ -812,7 +818,13 @@ async function baseGate(
|
|
|
812
818
|
args.dir,
|
|
813
819
|
activePacks,
|
|
814
820
|
Object.keys(ruleOverrides).length > 0 ? ruleOverrides : undefined,
|
|
815
|
-
{
|
|
821
|
+
{
|
|
822
|
+
enableTypeAware: profile === "strict",
|
|
823
|
+
// "Green" should mean the strict floor AND the project's own tests pass —
|
|
824
|
+
// not just that it type-checks and lints. discoverTestCommand appends them
|
|
825
|
+
// only when the project actually has tests; --strict-floor-only opts out.
|
|
826
|
+
includeTests: !args.strictFloorOnly,
|
|
827
|
+
}
|
|
816
828
|
);
|
|
817
829
|
|
|
818
830
|
return { accept: auto.command, gateLabel: auto.label };
|
package/src/detect-gate.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { join, dirname } from "node:path";
|
|
|
2
2
|
import { existsSync } from "node:fs";
|
|
3
3
|
import { ESLint } from "eslint";
|
|
4
4
|
import { WEB_TEMPLATES, type WebFramework } from "./web-templates";
|
|
5
|
+
import { isRecord } from "./lib/guards";
|
|
5
6
|
|
|
6
7
|
/**
|
|
7
8
|
* Build the gate that confirms "done" — and makes tsforge a TypeScript-SPECIALIZED
|
|
@@ -106,10 +107,16 @@ const STRICT_TSCONFIG = `{
|
|
|
106
107
|
/** Strict overlay for a project that ALREADY has a tsconfig: extend it (so the
|
|
107
108
|
* project's paths/jsx/module/lib still resolve — a bare strict config would
|
|
108
109
|
* mis-compile a real app) but FORCE every strictness flag on top, so a loosely-
|
|
109
|
-
* configured repo still gets tsforge's strict-TS floor.
|
|
110
|
-
*
|
|
111
|
-
|
|
112
|
-
|
|
110
|
+
* configured repo still gets tsforge's strict-TS floor.
|
|
111
|
+
*
|
|
112
|
+
* PERSISTENCE POLICY: written under `.tsforge/` (tsforge's cache namespace), NOT
|
|
113
|
+
* as a sibling in the project root — so the gate never litters the user's repo
|
|
114
|
+
* with a `tsforge.tsconfig.json`. `extends` points one level up to the project's
|
|
115
|
+
* own config, and `include`/`exclude` are re-stated relative to the subdir
|
|
116
|
+
* because `extends` does not inherit them (they default to the config's own
|
|
117
|
+
* directory otherwise — which under `.tsforge/` would compile nothing). */
|
|
118
|
+
const STRICT_TSCONFIG_OVERLAY = `{
|
|
119
|
+
"extends": "../tsconfig.json",
|
|
113
120
|
"compilerOptions": {
|
|
114
121
|
"strict": true,
|
|
115
122
|
"noUncheckedIndexedAccess": true,
|
|
@@ -119,10 +126,16 @@ const STRICT_TSCONFIG_OVERRIDE = `{
|
|
|
119
126
|
"erasableSyntaxOnly": true,
|
|
120
127
|
"skipLibCheck": true,
|
|
121
128
|
"noEmit": true
|
|
122
|
-
}
|
|
129
|
+
},
|
|
130
|
+
"include": ["../**/*.ts", "../**/*.tsx"],
|
|
131
|
+
"exclude": ["../node_modules", "../dist", "../build", "../scratch", "../.tsforge"]
|
|
123
132
|
}
|
|
124
133
|
`;
|
|
125
134
|
|
|
135
|
+
/** The gate overlay's home: tsforge's cache dir + the overlay filename. */
|
|
136
|
+
const GATE_TSCONFIG_DIR = ".tsforge";
|
|
137
|
+
const GATE_TSCONFIG_FILE = "tsconfig.gate.json";
|
|
138
|
+
|
|
126
139
|
// The web-stack scaffolds (Vite + React full-kit, or Vite vanilla) live in the
|
|
127
140
|
// registry; this module just lays them down and builds their gate. shadcn/TanStack
|
|
128
141
|
// boilerplate is held to a web-tailored strict config (no `I`-prefix — React names
|
|
@@ -373,7 +386,12 @@ export function buildWebGate(framework: WebFramework): IGate {
|
|
|
373
386
|
// HARNESS-authored and app-agnostic: we deliberately do NOT run a model-authored
|
|
374
387
|
// checks.json — the 27b writes over-strict interaction assertions (exact
|
|
375
388
|
// placeholders/fill flows) it then can't satisfy and spirals on (iter3/4).
|
|
376
|
-
|
|
389
|
+
// OPT-IN quality oracles (default OFF so existing web runs are unchanged):
|
|
390
|
+
// TSFORGE_A11Y=1 adds axe (serious/critical fail), TSFORGE_SCREENSHOTS=1 writes
|
|
391
|
+
// per-route PNGs. A "frontend"/"strict" profile can set these.
|
|
392
|
+
const a11y = process.env.TSFORGE_A11Y === "1" ? " --a11y" : "";
|
|
393
|
+
const shots = process.env.TSFORGE_SCREENSHOTS === "1" ? " --screenshots" : "";
|
|
394
|
+
const render = `bun "${BROWSER_CHECK}" dist/index.html --smoke --crawl${a11y}${shots}`;
|
|
377
395
|
// Prettier enforces formatting (the fix step runs `prettier --write` first, so
|
|
378
396
|
// this passes without the model ever hand-formatting). Respects .prettierignore
|
|
379
397
|
// (vendored ui/ + lib/ skipped). Runs after lint so a parse error fails there.
|
|
@@ -468,7 +486,7 @@ export async function buildGate(
|
|
|
468
486
|
cwd: string,
|
|
469
487
|
packs?: readonly string[],
|
|
470
488
|
ruleOverrides?: Readonly<Record<string, "error" | "warn" | "off">>,
|
|
471
|
-
options?: { enableTypeAware?: boolean }
|
|
489
|
+
options?: { enableTypeAware?: boolean; includeTests?: boolean }
|
|
472
490
|
): Promise<IGate> {
|
|
473
491
|
const parts: string[] = [];
|
|
474
492
|
const labels: string[] = [];
|
|
@@ -494,29 +512,95 @@ export async function buildGate(
|
|
|
494
512
|
}
|
|
495
513
|
}
|
|
496
514
|
|
|
515
|
+
// Tests run LAST (after the cheap static floor) so a type/lint error fails
|
|
516
|
+
// fast without paying for a test run. Only appended when the project actually
|
|
517
|
+
// has tests to run — a strict-floor-only run, or a project with none, skips it.
|
|
518
|
+
if (options?.includeTests === true) {
|
|
519
|
+
const test = await discoverTestCommand(cwd);
|
|
520
|
+
|
|
521
|
+
if (test !== null) {
|
|
522
|
+
parts.push(test);
|
|
523
|
+
labels.push("tests");
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
497
527
|
return { command: parts.join(" && "), label: labels.join(" + ") };
|
|
498
528
|
}
|
|
499
529
|
|
|
530
|
+
/** The npm-init placeholder test script — running it always fails, so it must
|
|
531
|
+
* NOT count as "the project has tests". */
|
|
532
|
+
const PLACEHOLDER_TEST = /no test specified/i;
|
|
533
|
+
|
|
534
|
+
/**
|
|
535
|
+
* The project's test command for the gate, or null when there's nothing to run.
|
|
536
|
+
* Prefers an explicit, real package.json `test` script (run via `bun run test`);
|
|
537
|
+
* else falls back to `bun test` when the project has test files; else null — so
|
|
538
|
+
* a greenfield app with no tests yet stays at the strict floor instead of
|
|
539
|
+
* failing a gate that runs a placeholder/absent test command.
|
|
540
|
+
*/
|
|
541
|
+
export async function discoverTestCommand(cwd: string): Promise<string | null> {
|
|
542
|
+
const pkgFile = Bun.file(join(cwd, "package.json"));
|
|
543
|
+
|
|
544
|
+
if (await pkgFile.exists()) {
|
|
545
|
+
try {
|
|
546
|
+
const pkg: unknown = await pkgFile.json();
|
|
547
|
+
const scripts = isRecord(pkg) ? pkg.scripts : undefined;
|
|
548
|
+
const script = isRecord(scripts) ? scripts.test : undefined;
|
|
549
|
+
|
|
550
|
+
if (
|
|
551
|
+
typeof script === "string" &&
|
|
552
|
+
script.trim().length > 0 &&
|
|
553
|
+
!PLACEHOLDER_TEST.test(script)
|
|
554
|
+
) {
|
|
555
|
+
return "bun run test";
|
|
556
|
+
}
|
|
557
|
+
} catch {
|
|
558
|
+
// Malformed package.json — fall through to file detection.
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return (await hasTestFiles(cwd)) ? "bun test" : null;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/** True when the project has at least one *.test.* / *.spec.* file (outside
|
|
566
|
+
* node_modules) — the signal that a bare `bun test` has something to run. */
|
|
567
|
+
async function hasTestFiles(cwd: string): Promise<boolean> {
|
|
568
|
+
const glob = new Bun.Glob("**/*.{test,spec}.{ts,tsx,js,jsx}");
|
|
569
|
+
|
|
570
|
+
for await (const path of glob.scan({ cwd, onlyFiles: true })) {
|
|
571
|
+
if (!path.includes("node_modules")) {
|
|
572
|
+
return true;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
return false;
|
|
577
|
+
}
|
|
578
|
+
|
|
500
579
|
/**
|
|
501
580
|
* The type-aware floor — ALWAYS tsforge-strict (user policy: a repo's own config
|
|
502
|
-
* is never trusted to be strict enough). With a project tsconfig, extend it
|
|
503
|
-
* force the strict flags; greenfield, bring the full strict one.
|
|
504
|
-
* TS project. (The strict
|
|
581
|
+
* is never trusted to be strict enough). With a project tsconfig, extend it under
|
|
582
|
+
* `.tsforge/` but force the strict flags; greenfield, bring the full strict one.
|
|
583
|
+
* null when not a TS project. (The strict overlay / bundled config win over
|
|
584
|
+
* whatever the repo set.)
|
|
505
585
|
*/
|
|
506
586
|
async function tscPart(cwd: string): Promise<string | null> {
|
|
507
587
|
const hasTsconfig = await Bun.file(join(cwd, "tsconfig.json")).exists();
|
|
508
588
|
|
|
509
589
|
if (hasTsconfig) {
|
|
590
|
+
// EPHEMERAL gate artifact: lives in .tsforge/ (Bun.write makes the dir), so
|
|
591
|
+
// we never drop a tsforge.tsconfig.json in the user's project root.
|
|
510
592
|
await Bun.write(
|
|
511
|
-
join(cwd,
|
|
512
|
-
|
|
593
|
+
join(cwd, GATE_TSCONFIG_DIR, GATE_TSCONFIG_FILE),
|
|
594
|
+
STRICT_TSCONFIG_OVERLAY
|
|
513
595
|
);
|
|
596
|
+
await ignoreGateArtifact(cwd);
|
|
514
597
|
|
|
515
|
-
return `"${TSC_BIN}" --noEmit -p
|
|
598
|
+
return `"${TSC_BIN}" --noEmit -p ${GATE_TSCONFIG_DIR}/${GATE_TSCONFIG_FILE}`;
|
|
516
599
|
}
|
|
517
600
|
|
|
518
601
|
// Greenfield: bring a strict tsconfig so tsc can gate — but only when this is
|
|
519
602
|
// actually a TS project (has a package.json), so we never litter a random dir.
|
|
603
|
+
// Unlike the overlay, a greenfield tsconfig.json is a DURABLE project file.
|
|
520
604
|
if (await Bun.file(join(cwd, "package.json")).exists()) {
|
|
521
605
|
await Bun.write(join(cwd, "tsconfig.json"), STRICT_TSCONFIG);
|
|
522
606
|
|
|
@@ -526,6 +610,20 @@ async function tscPart(cwd: string): Promise<string | null> {
|
|
|
526
610
|
return null;
|
|
527
611
|
}
|
|
528
612
|
|
|
613
|
+
/** Keep the ephemeral gate overlay out of git WITHOUT touching the user's root
|
|
614
|
+
* .gitignore: drop a scoped `.tsforge/.gitignore` ignoring just the overlay.
|
|
615
|
+
* Created only when absent, so a user-authored `.tsforge/.gitignore` (e.g. one
|
|
616
|
+
* that intentionally tracks rules.json) is never clobbered. */
|
|
617
|
+
async function ignoreGateArtifact(cwd: string): Promise<void> {
|
|
618
|
+
const ignore = join(cwd, GATE_TSCONFIG_DIR, ".gitignore");
|
|
619
|
+
|
|
620
|
+
if (await Bun.file(ignore).exists()) {
|
|
621
|
+
return;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
await Bun.write(ignore, `${GATE_TSCONFIG_FILE}\n`);
|
|
625
|
+
}
|
|
626
|
+
|
|
529
627
|
/** The syntactic idiom layer — ALWAYS tsforge's bundled strict eslint config
|
|
530
628
|
* (user policy). We deliberately do NOT defer to the project's own `lint`
|
|
531
629
|
* script: that's exactly how a weak repo would dodge the strict-TS floor. The
|
package/src/eval/eval.types.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import type { FailureClass } from "./failure-class";
|
|
2
|
+
|
|
1
3
|
export interface IJudgeInput {
|
|
2
4
|
goal: string;
|
|
3
5
|
criteria: string;
|
|
@@ -21,6 +23,9 @@ export interface IRunRecord {
|
|
|
21
23
|
ms: number;
|
|
22
24
|
/** LLM-judge quality score (1–5), when available. */
|
|
23
25
|
quality?: number;
|
|
26
|
+
/** Structured reason a failed run failed (from classifyRun); omitted/`none`
|
|
27
|
+
* for a passing run. The substrate for turning failures into interventions. */
|
|
28
|
+
failureClass?: FailureClass;
|
|
24
29
|
}
|
|
25
30
|
|
|
26
31
|
/** Aggregated metrics for a variant across its runs. */
|
|
@@ -33,4 +38,8 @@ export interface IVariantSummary {
|
|
|
33
38
|
avgMs: number;
|
|
34
39
|
/** Average quality across runs that were scored (0 if none). */
|
|
35
40
|
avgQuality: number;
|
|
41
|
+
/** Count of failed runs by failure class (e.g. {"type-error": 2}); empty when
|
|
42
|
+
* no run carried a class. Lets a sweep show WHY a variant failed, not just how
|
|
43
|
+
* often. */
|
|
44
|
+
failureClasses: Record<string, number>;
|
|
36
45
|
}
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import type { ILoopEvent } from "../loop/loop.types";
|
|
2
|
+
import type { ErrorSet } from "../validate/validate.types";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Why a run failed — a structured reason, so every failed run maps to a possible
|
|
6
|
+
* harness intervention (the self-improving north-star). Derived purely from the
|
|
7
|
+
* event stream (+ an optional final gate error set), so the same classifier
|
|
8
|
+
* serves the live loop, the eval sweep, and the offline log analyzer.
|
|
9
|
+
*/
|
|
10
|
+
export const FAILURE_CLASS = {
|
|
11
|
+
/** The run reached a green gate — no failure. */
|
|
12
|
+
none: "none",
|
|
13
|
+
/** Model emitted tool calls the parser couldn't read (repair L3 / salvage). */
|
|
14
|
+
toolMalformed: "tool-malformed",
|
|
15
|
+
/** Edits kept missing their target (missing-file / not-found / ambiguous). */
|
|
16
|
+
editReject: "edit-reject",
|
|
17
|
+
/** Hit the turn cap or the gate stalled with no decisive error class. */
|
|
18
|
+
noProgress: "no-progress",
|
|
19
|
+
/** Final gate red dominated by tsc type errors. */
|
|
20
|
+
typeError: "type-error",
|
|
21
|
+
/** Final gate red dominated by ESLint rule violations. */
|
|
22
|
+
lintRule: "lint-rule",
|
|
23
|
+
/** Imported a module that doesn't exist (TS2307 / "Cannot find module"). */
|
|
24
|
+
hallucinatedImport: "hallucinated-import",
|
|
25
|
+
/** Output degenerated into a repetition loop (StreamGuard fired). */
|
|
26
|
+
degeneration: "degeneration",
|
|
27
|
+
/** A per-call/timeout backstop tripped. */
|
|
28
|
+
timeout: "timeout",
|
|
29
|
+
/** A route rendered as an empty/phantom page. */
|
|
30
|
+
routePhantom: "route-phantom",
|
|
31
|
+
/** The built app failed to render / threw in the browser oracle. */
|
|
32
|
+
browserFail: "browser-fail",
|
|
33
|
+
/** The bundler/build step (vite) failed. */
|
|
34
|
+
buildFail: "build-fail",
|
|
35
|
+
/** Failed, but no signal was decisive. */
|
|
36
|
+
unknown: "unknown",
|
|
37
|
+
} as const;
|
|
38
|
+
|
|
39
|
+
export type FailureClass = (typeof FAILURE_CLASS)[keyof typeof FAILURE_CLASS];
|
|
40
|
+
|
|
41
|
+
/** Per-signal tallies behind a classification — kept for debugging/telemetry. */
|
|
42
|
+
export interface IFailureSignals {
|
|
43
|
+
repairs: number;
|
|
44
|
+
salvages: number;
|
|
45
|
+
editRejects: number;
|
|
46
|
+
degenerated: boolean;
|
|
47
|
+
tsErrors: number;
|
|
48
|
+
lintErrors: number;
|
|
49
|
+
missingModule: number;
|
|
50
|
+
browser: number;
|
|
51
|
+
build: number;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export interface IFailureSummary {
|
|
55
|
+
failureClass: FailureClass;
|
|
56
|
+
/** The dominant rule/code for type-error|lint-rule (e.g. "TS18048", "no-as"). */
|
|
57
|
+
detail?: string;
|
|
58
|
+
signals: IFailureSignals;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const TS_CODE = /^TS\d+$/;
|
|
62
|
+
const MISSING_MODULE = /cannot find module/i;
|
|
63
|
+
const DEGENERATE = /degenerat/i;
|
|
64
|
+
const TOOL_MALFORMED = /salvage|recovered|malformed|re-ask/i;
|
|
65
|
+
const REJECTED = /reject/i;
|
|
66
|
+
const BROWSER = /blank|did not render|did not mount|page error|uncaught/i;
|
|
67
|
+
const ROUTE = /route|phantom|stub/i;
|
|
68
|
+
const BUILD = /vite|esbuild|build failed|bundl/i;
|
|
69
|
+
|
|
70
|
+
/** The most frequently occurring string, or undefined for an empty list. */
|
|
71
|
+
function mostCommon(values: readonly string[]): string | undefined {
|
|
72
|
+
const counts = new Map<string, number>();
|
|
73
|
+
let best: string | undefined;
|
|
74
|
+
let bestN = 0;
|
|
75
|
+
|
|
76
|
+
for (const value of values) {
|
|
77
|
+
const n = (counts.get(value) ?? 0) + 1;
|
|
78
|
+
|
|
79
|
+
counts.set(value, n);
|
|
80
|
+
|
|
81
|
+
if (n > bestN) {
|
|
82
|
+
bestN = n;
|
|
83
|
+
best = value;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return best;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** The final red gate's rules: prefer the explicit error set, else the rules
|
|
91
|
+
* carried on the last failing `validated` event. */
|
|
92
|
+
function finalRules(
|
|
93
|
+
events: readonly ILoopEvent[],
|
|
94
|
+
finalErrors?: ErrorSet
|
|
95
|
+
): string[] {
|
|
96
|
+
if (finalErrors !== undefined) {
|
|
97
|
+
return finalErrors.flatMap((e) => (e.rule === undefined ? [] : [e.rule]));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let last: readonly string[] = [];
|
|
101
|
+
|
|
102
|
+
for (const event of events) {
|
|
103
|
+
if (event.kind === "validated" && event.passed === false && event.rules) {
|
|
104
|
+
last = event.rules;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return [...last];
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** Concatenated message/output text across the run — for keyword signals that
|
|
112
|
+
* aren't structured into a dedicated field (missing module, browser, build). */
|
|
113
|
+
function runText(
|
|
114
|
+
events: readonly ILoopEvent[],
|
|
115
|
+
finalErrors?: ErrorSet
|
|
116
|
+
): string {
|
|
117
|
+
const parts: string[] = [];
|
|
118
|
+
|
|
119
|
+
for (const event of events) {
|
|
120
|
+
parts.push(event.message);
|
|
121
|
+
|
|
122
|
+
if (event.output !== undefined) {
|
|
123
|
+
parts.push(event.output);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
for (const e of finalErrors ?? []) {
|
|
128
|
+
parts.push(e.message);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return parts.join("\n");
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function gatherSignals(
|
|
135
|
+
events: readonly ILoopEvent[],
|
|
136
|
+
finalErrors?: ErrorSet
|
|
137
|
+
): IFailureSignals {
|
|
138
|
+
const rules = finalRules(events, finalErrors);
|
|
139
|
+
const text = runText(events, finalErrors);
|
|
140
|
+
const missingModule =
|
|
141
|
+
rules.filter((r) => r === "TS2307").length +
|
|
142
|
+
(MISSING_MODULE.test(text) ? 1 : 0);
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
repairs: events.filter((e) => e.kind === "repair").length,
|
|
146
|
+
salvages: events.filter(
|
|
147
|
+
(e) => e.kind === "tool" && TOOL_MALFORMED.test(e.message)
|
|
148
|
+
).length,
|
|
149
|
+
editRejects: events.filter(
|
|
150
|
+
(e) => e.kind === "edit" && REJECTED.test(e.message)
|
|
151
|
+
).length,
|
|
152
|
+
degenerated: events.some((e) => DEGENERATE.test(e.message)),
|
|
153
|
+
tsErrors: rules.filter((r) => TS_CODE.test(r) && r !== "TS2307").length,
|
|
154
|
+
lintErrors: rules.filter((r) => !TS_CODE.test(r)).length,
|
|
155
|
+
missingModule,
|
|
156
|
+
browser: BROWSER.test(text) ? 1 : 0,
|
|
157
|
+
build: BUILD.test(text) ? 1 : 0,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function finalStatusOf(
|
|
162
|
+
events: readonly ILoopEvent[]
|
|
163
|
+
): "done" | "stuck" | "none" {
|
|
164
|
+
let status: "done" | "stuck" | "none" = "none";
|
|
165
|
+
|
|
166
|
+
for (const event of events) {
|
|
167
|
+
if (event.kind === "done") {
|
|
168
|
+
status = "done";
|
|
169
|
+
} else if (event.kind === "stuck") {
|
|
170
|
+
status = "stuck";
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return status;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/** Pick the dominant gate-error class (type vs lint), with its commonest code. */
|
|
178
|
+
function classifyGateErrors(
|
|
179
|
+
events: readonly ILoopEvent[],
|
|
180
|
+
finalErrors: ErrorSet | undefined,
|
|
181
|
+
signals: IFailureSignals
|
|
182
|
+
): IFailureSummary | undefined {
|
|
183
|
+
const rules = finalRules(events, finalErrors);
|
|
184
|
+
|
|
185
|
+
if (signals.tsErrors > 0 && signals.tsErrors >= signals.lintErrors) {
|
|
186
|
+
return {
|
|
187
|
+
failureClass: FAILURE_CLASS.typeError,
|
|
188
|
+
detail: mostCommon(rules.filter((r) => TS_CODE.test(r))),
|
|
189
|
+
signals,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (signals.lintErrors > 0) {
|
|
194
|
+
return {
|
|
195
|
+
failureClass: FAILURE_CLASS.lintRule,
|
|
196
|
+
detail: mostCommon(rules.filter((r) => !TS_CODE.test(r))),
|
|
197
|
+
signals,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return undefined;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/** Behavioral fallback when no gate-error class dominates. */
|
|
205
|
+
function classifyBehavior(signals: IFailureSignals): FailureClass {
|
|
206
|
+
if (signals.degenerated) {
|
|
207
|
+
return FAILURE_CLASS.degeneration;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (signals.salvages > 0 || signals.repairs > 0) {
|
|
211
|
+
return FAILURE_CLASS.toolMalformed;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (signals.editRejects > 0) {
|
|
215
|
+
return FAILURE_CLASS.editReject;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return FAILURE_CLASS.noProgress;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Classify a run from its event stream. Pass the final gate `ErrorSet` when the
|
|
223
|
+
* caller has it (authoritative); otherwise the classifier falls back to the
|
|
224
|
+
* `rules` carried on the last failing `validated` event and keyword signals.
|
|
225
|
+
* A run that reached a green gate classifies as `none`.
|
|
226
|
+
*/
|
|
227
|
+
export function classifyRun(
|
|
228
|
+
events: readonly ILoopEvent[],
|
|
229
|
+
finalErrors?: ErrorSet
|
|
230
|
+
): IFailureSummary {
|
|
231
|
+
const signals = gatherSignals(events, finalErrors);
|
|
232
|
+
|
|
233
|
+
if (finalStatusOf(events) === "done") {
|
|
234
|
+
return { failureClass: FAILURE_CLASS.none, signals };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (signals.missingModule > 0) {
|
|
238
|
+
return { failureClass: FAILURE_CLASS.hallucinatedImport, signals };
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if (signals.browser > 0) {
|
|
242
|
+
const text = runText(events, finalErrors);
|
|
243
|
+
|
|
244
|
+
return {
|
|
245
|
+
failureClass: ROUTE.test(text)
|
|
246
|
+
? FAILURE_CLASS.routePhantom
|
|
247
|
+
: FAILURE_CLASS.browserFail,
|
|
248
|
+
signals,
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (signals.build > 0 && signals.tsErrors === 0 && signals.lintErrors === 0) {
|
|
253
|
+
return { failureClass: FAILURE_CLASS.buildFail, signals };
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const gate = classifyGateErrors(events, finalErrors, signals);
|
|
257
|
+
|
|
258
|
+
if (gate !== undefined) {
|
|
259
|
+
return gate;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return { failureClass: classifyBehavior(signals), signals };
|
|
263
|
+
}
|
package/src/eval/index.ts
CHANGED
|
@@ -2,6 +2,14 @@ export * from "./eval.types";
|
|
|
2
2
|
export { judge } from "./judge";
|
|
3
3
|
export { summarize } from "./score";
|
|
4
4
|
export { analyzeEvents, type IRunMetrics } from "./metrics";
|
|
5
|
+
export {
|
|
6
|
+
classifyRun,
|
|
7
|
+
FAILURE_CLASS,
|
|
8
|
+
type FailureClass,
|
|
9
|
+
type IFailureSummary,
|
|
10
|
+
type IFailureSignals,
|
|
11
|
+
} from "./failure-class";
|
|
12
|
+
export { parseEventLog } from "./parse-log";
|
|
5
13
|
export {
|
|
6
14
|
buildSweepReport,
|
|
7
15
|
renderSweepReportMarkdown,
|
package/src/eval/metrics.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ILoopEvent } from "../loop/loop.types";
|
|
2
|
+
import { classifyRun, type FailureClass } from "./failure-class";
|
|
2
3
|
|
|
3
4
|
/** Behavioral metrics distilled from a run's event stream — the signals the
|
|
4
5
|
* local-model literature says predict outcomes (tokens-to-solution, repair
|
|
@@ -6,6 +7,10 @@ import type { ILoopEvent } from "../loop/loop.types";
|
|
|
6
7
|
* the cli-metrics script. */
|
|
7
8
|
export interface IRunMetrics {
|
|
8
9
|
finalStatus: "done" | "stuck" | "none";
|
|
10
|
+
/** Structured reason the run failed (`none` when it reached green). The single
|
|
11
|
+
* source of truth for failure classification — the cli-metrics analyzer and
|
|
12
|
+
* the eval sweep both read this rather than re-deriving it. */
|
|
13
|
+
failureClass: FailureClass;
|
|
9
14
|
/** Model turns (one per `cycle` event). */
|
|
10
15
|
turns: number;
|
|
11
16
|
/** Model calls (one per `usage` event). */
|
|
@@ -29,6 +34,7 @@ export interface IRunMetrics {
|
|
|
29
34
|
function emptyMetrics(): IRunMetrics {
|
|
30
35
|
return {
|
|
31
36
|
finalStatus: "none",
|
|
37
|
+
failureClass: "none",
|
|
32
38
|
turns: 0,
|
|
33
39
|
modelCalls: 0,
|
|
34
40
|
tokensOut: 0,
|
|
@@ -82,6 +88,7 @@ export function analyzeEvents(events: readonly ILoopEvent[]): IRunMetrics {
|
|
|
82
88
|
|
|
83
89
|
m.filesCreated = created.size;
|
|
84
90
|
m.avgTokensPerSecond = tpsCount > 0 ? Math.round(tpsSum / tpsCount) : 0;
|
|
91
|
+
m.failureClass = classifyRun(events).failureClass;
|
|
85
92
|
|
|
86
93
|
return m;
|
|
87
94
|
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import type { ILoopEvent } from "../loop/loop.types";
|
|
2
|
+
import { isRecord } from "../lib/guards";
|
|
3
|
+
|
|
4
|
+
/** The known event kinds, as a runtime set, so a JSONL line can be validated
|
|
5
|
+
* into a typed ILoopEvent without an `as` cast. Keep in sync with ILoopEvent. */
|
|
6
|
+
const KNOWN_KINDS = new Set<string>([
|
|
7
|
+
"start",
|
|
8
|
+
"red",
|
|
9
|
+
"cycle",
|
|
10
|
+
"token",
|
|
11
|
+
"message",
|
|
12
|
+
"fix",
|
|
13
|
+
"edit",
|
|
14
|
+
"create",
|
|
15
|
+
"validated",
|
|
16
|
+
"done",
|
|
17
|
+
"stuck",
|
|
18
|
+
"run",
|
|
19
|
+
"tool",
|
|
20
|
+
"repair",
|
|
21
|
+
"timing",
|
|
22
|
+
"usage",
|
|
23
|
+
"ttsr",
|
|
24
|
+
]);
|
|
25
|
+
|
|
26
|
+
function isKind(value: string): value is ILoopEvent["kind"] {
|
|
27
|
+
return KNOWN_KINDS.has(value);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function optionalString(value: unknown): string | undefined {
|
|
31
|
+
return typeof value === "string" ? value : undefined;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function stringArray(value: unknown): string[] | undefined {
|
|
35
|
+
if (!Array.isArray(value)) {
|
|
36
|
+
return undefined;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return value.filter((v): v is string => typeof v === "string");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Coerce one parsed JSONL record into an ILoopEvent, or null when it isn't one.
|
|
43
|
+
* Reads only the fields the failure classifier + metrics consume — enough to
|
|
44
|
+
* reconstruct a typed event stream from a `--log` file. */
|
|
45
|
+
function coerceEvent(record: unknown): ILoopEvent | null {
|
|
46
|
+
if (!isRecord(record)) {
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const kind = record.kind;
|
|
51
|
+
|
|
52
|
+
if (typeof kind !== "string" || !isKind(kind)) {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const event: ILoopEvent = {
|
|
57
|
+
kind,
|
|
58
|
+
task: optionalString(record.task) ?? "",
|
|
59
|
+
message: optionalString(record.message) ?? "",
|
|
60
|
+
};
|
|
61
|
+
const output = optionalString(record.output);
|
|
62
|
+
const rules = stringArray(record.rules);
|
|
63
|
+
|
|
64
|
+
if (output !== undefined) {
|
|
65
|
+
event.output = output;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (typeof record.passed === "boolean") {
|
|
69
|
+
event.passed = record.passed;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (rules !== undefined) {
|
|
73
|
+
event.rules = rules;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return event;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Parse a `--log` JSONL transcript (one serialized event per line) into a typed
|
|
80
|
+
* event stream. Malformed lines and non-event records are skipped. */
|
|
81
|
+
export function parseEventLog(jsonl: string): ILoopEvent[] {
|
|
82
|
+
const events: ILoopEvent[] = [];
|
|
83
|
+
|
|
84
|
+
for (const line of jsonl.split("\n")) {
|
|
85
|
+
if (line.trim().length === 0) {
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
let parsed: unknown;
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
parsed = JSON.parse(line);
|
|
93
|
+
} catch {
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const event = coerceEvent(parsed);
|
|
98
|
+
|
|
99
|
+
if (event !== null) {
|
|
100
|
+
events.push(event);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return events;
|
|
105
|
+
}
|
package/src/eval/report.ts
CHANGED
|
@@ -164,5 +164,24 @@ export function renderSweepReportMarkdown(report: ISweepReport): string {
|
|
|
164
164
|
...rows,
|
|
165
165
|
"",
|
|
166
166
|
"`*` = significant at p < 0.05 (two-proportion z-test vs baseline).",
|
|
167
|
+
...failureSection(report),
|
|
167
168
|
].join("\n");
|
|
168
169
|
}
|
|
170
|
+
|
|
171
|
+
/** Format a variant's failure-class tally, e.g. "type-error×2, no-progress×1". */
|
|
172
|
+
function formatFailureClasses(classes: Record<string, number>): string {
|
|
173
|
+
return Object.entries(classes)
|
|
174
|
+
.sort(([, a], [, b]) => b - a)
|
|
175
|
+
.map(([cls, n]) => `${cls}×${String(n)}`)
|
|
176
|
+
.join(", ");
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/** A "why failures happened" section — per-variant failure-class breakdown.
|
|
180
|
+
* Empty (no lines) when every run passed, so a clean sweep stays terse. */
|
|
181
|
+
function failureSection(report: ISweepReport): string[] {
|
|
182
|
+
const lines = report.variants
|
|
183
|
+
.filter((v) => Object.keys(v.failureClasses).length > 0)
|
|
184
|
+
.map((v) => `- **${v.label}**: ${formatFailureClasses(v.failureClasses)}`);
|
|
185
|
+
|
|
186
|
+
return lines.length === 0 ? [] : ["", "### Failure breakdown", ...lines];
|
|
187
|
+
}
|
package/src/eval/score.ts
CHANGED
|
@@ -20,6 +20,15 @@ export function summarize(records: IRunRecord[]): IVariantSummary[] {
|
|
|
20
20
|
const sum = (select: (r: IRunRecord) => number): number =>
|
|
21
21
|
list.reduce((acc, r) => acc + select(r), 0);
|
|
22
22
|
const scored = list.filter((r) => r.quality !== undefined);
|
|
23
|
+
const failureClasses: Record<string, number> = {};
|
|
24
|
+
|
|
25
|
+
for (const r of list) {
|
|
26
|
+
const fc = r.failureClass;
|
|
27
|
+
|
|
28
|
+
if (!r.passed && fc !== undefined && fc !== "none") {
|
|
29
|
+
failureClasses[fc] = (failureClasses[fc] ?? 0) + 1;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
23
32
|
|
|
24
33
|
summaries.push({
|
|
25
34
|
label,
|
|
@@ -32,6 +41,7 @@ export function summarize(records: IRunRecord[]): IVariantSummary[] {
|
|
|
32
41
|
scored.length > 0
|
|
33
42
|
? scored.reduce((acc, r) => acc + (r.quality ?? 0), 0) / scored.length
|
|
34
43
|
: 0,
|
|
44
|
+
failureClasses,
|
|
35
45
|
});
|
|
36
46
|
}
|
|
37
47
|
|
package/src/loop/loop.types.ts
CHANGED
|
@@ -32,6 +32,10 @@ export interface ILoopEvent {
|
|
|
32
32
|
/** For `timing` events: how long the turn took, in milliseconds. */
|
|
33
33
|
ms?: number;
|
|
34
34
|
errors?: number;
|
|
35
|
+
/** For `validated` events: the failing gate rules/codes (e.g. "TS18048",
|
|
36
|
+
* "no-restricted-syntax") — the structured substrate the failure classifier
|
|
37
|
+
* reads to tell a type error from a lint rule, not just a count. */
|
|
38
|
+
rules?: readonly string[];
|
|
35
39
|
passed?: boolean;
|
|
36
40
|
file?: string;
|
|
37
41
|
/** For `create` events: the new file's content (rendered as a code block). */
|
package/src/loop/turn.ts
CHANGED
|
@@ -784,6 +784,9 @@ export async function settleGate(
|
|
|
784
784
|
cycle: turn,
|
|
785
785
|
passed: gatePassed,
|
|
786
786
|
errors: gateErrors.length,
|
|
787
|
+
// Structured rule/code list (not just a count) so the failure classifier can
|
|
788
|
+
// tell a type error from a lint rule without re-parsing the gate output.
|
|
789
|
+
rules: gateErrors.flatMap((e) => (e.rule === undefined ? [] : [e.rule])),
|
|
787
790
|
message: gatePassed
|
|
788
791
|
? `task ${task.id} · turn ${turn}: GREEN`
|
|
789
792
|
: `task ${task.id} · turn ${turn}: red (${String(gateErrors.length)} error(s))${gateDetail}`,
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// Optional type-aware ESLint overlay — enabled only when the target has a
|
|
2
|
+
// compiling tsconfig (see detect-gate.ts). Adds async correctness rules that
|
|
3
|
+
// require parserOptions.project; kept separate from strict.eslint.config.mjs
|
|
4
|
+
// so the syntactic gate still runs on any .ts file without type info.
|
|
5
|
+
import tseslint from "typescript-eslint";
|
|
6
|
+
|
|
7
|
+
export default tseslint.config(
|
|
8
|
+
{ ignores: ["**/node_modules/**", "**/dist/**", "**/build/**"] },
|
|
9
|
+
{
|
|
10
|
+
files: ["**/*.ts", "**/*.tsx"],
|
|
11
|
+
languageOptions: {
|
|
12
|
+
parser: tseslint.parser,
|
|
13
|
+
parserOptions: {
|
|
14
|
+
projectService: true,
|
|
15
|
+
tsconfigRootDir: process.cwd(),
|
|
16
|
+
},
|
|
17
|
+
},
|
|
18
|
+
plugins: {
|
|
19
|
+
"@typescript-eslint": tseslint.plugin,
|
|
20
|
+
},
|
|
21
|
+
rules: {
|
|
22
|
+
"@typescript-eslint/no-floating-promises": "error",
|
|
23
|
+
"@typescript-eslint/no-misused-promises": [
|
|
24
|
+
"error",
|
|
25
|
+
{
|
|
26
|
+
checksVoidReturn: {
|
|
27
|
+
attributes: false,
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
],
|
|
31
|
+
},
|
|
32
|
+
}
|
|
33
|
+
);
|