@agjs/tsforge 0.1.19 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -2
- package/scripts/browser-check.ts +41 -5
- package/scripts/build-rules-md.ts +78 -21
- package/scripts/cli-metrics.ts +10 -0
- package/scripts/sweep.ts +53 -23
- package/scripts/web-sweep.ts +292 -0
- package/src/browser/index.ts +3 -0
- package/src/browser/oracle.ts +215 -8
- package/src/cli.ts +22 -4
- package/src/config/index.ts +8 -0
- package/src/config/profiles.ts +150 -0
- package/src/config/tsforge-config.ts +64 -5
- package/src/detect-gate.ts +144 -13
- package/src/eval/eval.types.ts +9 -0
- package/src/eval/failure-class.ts +263 -0
- package/src/eval/index.ts +8 -0
- package/src/eval/metrics.ts +7 -0
- package/src/eval/parse-log.ts +105 -0
- package/src/eval/report.ts +19 -0
- package/src/eval/score.ts +10 -0
- package/src/loop/feedback/meta-rule-docs.ts +48 -0
- package/src/loop/feedback/rule-docs.ts +150 -0
- package/src/loop/loop.types.ts +4 -0
- package/src/loop/rule-docs.generated.json +131 -1
- package/src/loop/ttsr-defaults.ts +175 -4
- package/src/loop/turn.ts +3 -0
- package/src/meta-rules/registry.ts +32 -0
- package/src/meta-rules/rules/ci/no-github-context-in-shell.ts +40 -0
- package/src/meta-rules/rules/ci/no-pull-request-target-untrusted-checkout.ts +42 -0
- package/src/meta-rules/rules/ci/workflow-permissions-explicit.ts +49 -0
- package/src/meta-rules/rules/ci/workflow-permissions-least-privilege.ts +44 -0
- package/src/meta-rules/rules/config/next-image-remote-patterns-no-wildcards.ts +77 -0
- package/src/meta-rules/rules/config/next-instrumentation-present.ts +66 -0
- package/src/meta-rules/rules/config/next-proxy-over-middleware.ts +64 -0
- package/src/meta-rules/rules/config/tsconfig-recommended-flags.ts +75 -0
- package/src/meta-rules/rules/supply-chain/dependency-overrides-require-comment.ts +61 -0
- package/src/meta-rules/rules/supply-chain/fastify-security-plugins.ts +54 -0
- package/src/meta-rules/rules/supply-chain/lockfile-required.ts +51 -0
- package/src/meta-rules/rules/supply-chain/migrations-must-be-checked-in.ts +49 -0
- package/src/meta-rules/rules/supply-chain/no-git-or-tarball-dependencies.ts +70 -0
- package/src/meta-rules/rules/supply-chain/package-manager-field-required.ts +31 -0
- package/src/meta-rules/rules/supply-chain/production-must-not-use-drizzle-push.ts +75 -0
- package/src/meta-rules/rules/supply-chain/single-package-manager.ts +30 -0
- package/src/meta-rules/utils/lockfiles.ts +105 -0
- package/src/meta-rules/utils/workflow-yaml.ts +86 -0
- package/src/rule-packs/authorization/index.ts +26 -0
- package/src/rule-packs/authorization/rules/id-param-requires-object-authz.ts +87 -0
- package/src/rule-packs/authorization/rules/mutating-route-requires-authz.ts +116 -0
- package/src/rule-packs/authorization/rules/server-action-requires-authz.ts +101 -0
- package/src/rule-packs/authorization/utils.ts +285 -0
- package/src/rule-packs/boundary-utils.ts +13 -0
- package/src/rule-packs/code-flow/index.ts +4 -1
- package/src/rule-packs/code-flow/rules/no-throw-literal.ts +67 -0
- package/src/rule-packs/drizzle/index.ts +7 -0
- package/src/rule-packs/drizzle/rules/update-delete-account-scoped-must-filter-scope.ts +106 -0
- package/src/rule-packs/drizzle/rules/update-delete-must-have-where.ts +73 -0
- package/src/rule-packs/drizzle/utils.ts +133 -1
- package/src/rule-packs/fastify/index.ts +38 -0
- package/src/rule-packs/fastify/rules/error-handler-must-set-status.ts +78 -0
- package/src/rule-packs/fastify/rules/prefer-return-over-reply-send.ts +104 -0
- package/src/rule-packs/fastify/rules/require-fp-for-shared-plugins.ts +106 -0
- package/src/rule-packs/fastify/rules/require-plugin-name.ts +54 -0
- package/src/rule-packs/fastify/rules/require-response-schema.ts +62 -0
- package/src/rule-packs/fastify/rules/require-route-schema.ts +104 -0
- package/src/rule-packs/fastify/rules/test-inject-must-close-app.ts +44 -0
- package/src/rule-packs/fastify/utils/fastifyChain.ts +231 -0
- package/src/rule-packs/index.ts +10 -0
- package/src/rule-packs/jwt-cookies/index.ts +10 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-set-maxage-or-expires.ts +132 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-set-samesite.ts +151 -0
- package/src/rule-packs/jwt-cookies/rules/jwt-must-verify-not-decode.ts +124 -0
- package/src/rule-packs/module-boundaries/index.ts +3 -0
- package/src/rule-packs/module-boundaries/rules/no-react-in-services.ts +111 -0
- package/src/rule-packs/nextjs/index.ts +32 -0
- package/src/rule-packs/nextjs/rules/await-dynamic-request-apis.ts +65 -0
- package/src/rule-packs/nextjs/rules/error-boundary-require-use-client.ts +38 -0
- package/src/rule-packs/nextjs/rules/mutation-should-revalidate-cache.ts +152 -0
- package/src/rule-packs/nextjs/rules/no-html-img-element.ts +45 -0
- package/src/rule-packs/nextjs/rules/no-internal-api-fetch.ts +126 -0
- package/src/rule-packs/nextjs/rules/no-secret-props-to-client.ts +118 -0
- package/src/rule-packs/nextjs/rules/no-sensitive-next-public-env.ts +72 -0
- package/src/rule-packs/nextjs/rules/prefer-lazy-use-state-init.ts +85 -0
- package/src/rule-packs/nextjs/rules/server-action-requires-authz-and-validation.ts +178 -0
- package/src/rule-packs/nextjs/rules/server-only-modules-import-server-only.ts +87 -0
- package/src/rule-packs/nextjs/utils.ts +18 -0
- package/src/rule-packs/react-component-architecture/index.ts +18 -0
- package/src/rule-packs/react-component-architecture/rules/dangerous-html-requires-sanitize.ts +83 -0
- package/src/rule-packs/react-component-architecture/rules/no-anonymous-useEffect.ts +61 -0
- package/src/rule-packs/react-component-architecture/rules/no-component-invocation.ts +55 -0
- package/src/rule-packs/react-component-architecture/rules/no-derived-state-in-effect.ts +204 -0
- package/src/rule-packs/react-component-architecture/rules/no-nested-component.ts +152 -0
- package/src/rule-packs/react-component-architecture/rules/no-react-fc.ts +57 -0
- package/src/rule-packs/rule-catalog.types.ts +21 -0
- package/src/rule-packs/rule-metadata.ts +163 -0
- package/src/rule-packs/runtime-boundaries/index.ts +33 -0
- package/src/rule-packs/runtime-boundaries/rules/no-prototype-polluting-merge.ts +113 -0
- package/src/rule-packs/runtime-boundaries/rules/no-user-controlled-fetch-url.ts +69 -0
- package/src/rule-packs/runtime-boundaries/rules/no-user-controlled-redirect.ts +79 -0
- package/src/rule-packs/runtime-boundaries/rules/upload-must-set-limits.ts +126 -0
- package/src/rule-packs/runtime-boundaries/rules/webhook-must-verify-signature-before-parse.ts +87 -0
- package/src/rule-packs/security/index.ts +35 -0
- package/src/rule-packs/security/rules/catch-must-handle.ts +126 -0
- package/src/rule-packs/security/rules/no-auth-token-in-storage.ts +107 -0
- package/src/rule-packs/security/rules/no-child-process-exec.ts +72 -0
- package/src/rule-packs/security/rules/no-dynamic-regexp.ts +56 -0
- package/src/rule-packs/security/rules/no-inner-html-assignment.ts +42 -0
- package/src/rule-packs/security/rules/no-spawn-with-shell.ts +106 -0
- package/src/rule-packs/structured-logging/index.ts +6 -0
- package/src/rule-packs/structured-logging/rules/caught-error-log-requires-cause.ts +234 -0
- package/src/rule-packs/structured-logging/rules/logger-not-console.ts +146 -0
- package/src/rule-packs/test-conventions/index.ts +9 -0
- package/src/rule-packs/test-conventions/rules/fake-timers-must-be-restored.ts +143 -0
- package/src/rule-packs/test-conventions/rules/no-conditional-expect.ts +77 -0
- package/src/rule-packs/test-conventions/rules/no-real-network-in-unit-tests.ts +174 -0
- package/src/rule-packs/typescript-core/index.ts +30 -0
- package/src/rule-packs/typescript-core/rules/exported-functions-require-return-type.ts +74 -0
- package/src/rule-packs/typescript-core/rules/fetch-must-check-ok.ts +106 -0
- package/src/rule-packs/typescript-core/rules/json-parse-must-validate.ts +97 -0
- package/src/rule-packs/typescript-core/rules/no-unsafe-boundary-cast.ts +70 -0
- package/src/stack-detection/packs.ts +57 -0
- package/strict.type-aware.eslint.config.mjs +33 -0
- package/strict.web.eslint.config.mjs +32 -1
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
// A/B sweep over the REAL thing: full web-app builds from the benchmark catalog,
|
|
2
|
+
// not toy logic seeds. Orchestrates headless-build.ts as a subprocess per
|
|
3
|
+
// (feature-variant x repeat), toggling features via env (TSFORGE_TTSR etc.),
|
|
4
|
+
// then aggregates pass-rate + turns into the same statistical report the logic
|
|
5
|
+
// sweep uses (Wilson intervals + two-proportion z-test vs a baseline variant).
|
|
6
|
+
//
|
|
7
|
+
// Each build is a from-scratch multi-entity app (up to webMaxTurns turns, large
|
|
8
|
+
// token spend), so this is GATED: it prints the plan and exits unless
|
|
9
|
+
// TSFORGE_WEB_CONFIRM=1 is set — a real run can cost hours and significant API
|
|
10
|
+
// credits on a cloud flagship.
|
|
11
|
+
//
|
|
12
|
+
// Run (dry-run plan): TSFORGE_WEB_APP=saas-crm bun run packages/core/scripts/web-sweep.ts
|
|
13
|
+
// Run (for real): TSFORGE_WEB_APP=saas-crm TSFORGE_FEATURE_VARIANTS=ttsr \
|
|
14
|
+
// TSFORGE_WEB_REPEATS=2 TSFORGE_WEB_CONFIRM=1 \
|
|
15
|
+
// bun run packages/core/scripts/web-sweep.ts [react|vanilla]
|
|
16
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
17
|
+
import { join } from "node:path";
|
|
18
|
+
import { resolveActiveModel } from "../src/models-config";
|
|
19
|
+
import { LOOP_LIMITS } from "../src/loop";
|
|
20
|
+
import {
|
|
21
|
+
buildSweepReport,
|
|
22
|
+
renderSweepReportMarkdown,
|
|
23
|
+
type IRunRecord,
|
|
24
|
+
} from "../src/eval";
|
|
25
|
+
import { BENCHMARK_CATALOG, findBenchmarkApp } from "./benchmark-catalog";
|
|
26
|
+
|
|
27
|
+
/** A feature variant: dimension name -> "1" (on) | "0" (off). */
|
|
28
|
+
type IFeatureVariant = Record<string, string>;
|
|
29
|
+
|
|
30
|
+
/** The env var each known feature dimension toggles (mirrors sweep.ts so a web
|
|
31
|
+
* A/B reads the same flags the logic A/B does). */
|
|
32
|
+
const DIMENSION_ENV: Record<string, string> = {
|
|
33
|
+
ttsr: "TSFORGE_TTSR",
|
|
34
|
+
hashline: "TSFORGE_HASHLINE",
|
|
35
|
+
lsp_write_feedback: "TSFORGE_LSP_WRITE_FEEDBACK",
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
/** Parse `TSFORGE_FEATURE_VARIANTS` ("ttsr,hashline") into the cartesian product
|
|
39
|
+
* of on/off per dimension. Empty -> a single unnamed baseline variant. */
|
|
40
|
+
function parseVariants(spec: string): IFeatureVariant[] {
|
|
41
|
+
const dims = spec
|
|
42
|
+
.split(",")
|
|
43
|
+
.map((s) => s.trim())
|
|
44
|
+
.filter((s) => s.length > 0);
|
|
45
|
+
|
|
46
|
+
let combos: IFeatureVariant[] = [{}];
|
|
47
|
+
|
|
48
|
+
for (const dim of dims) {
|
|
49
|
+
const next: IFeatureVariant[] = [];
|
|
50
|
+
|
|
51
|
+
for (const combo of combos) {
|
|
52
|
+
next.push({ ...combo, [dim]: "1" }, { ...combo, [dim]: "0" });
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
combos = next;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return combos;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** The env overrides that realize a variant (only known dimensions are mapped). */
|
|
62
|
+
function variantEnv(variant: IFeatureVariant): Record<string, string> {
|
|
63
|
+
const env: Record<string, string> = {};
|
|
64
|
+
|
|
65
|
+
for (const [dim, state] of Object.entries(variant)) {
|
|
66
|
+
const key = DIMENSION_ENV[dim];
|
|
67
|
+
|
|
68
|
+
if (key !== undefined) {
|
|
69
|
+
env[key] = state === "1" ? "1" : "0";
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return env;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** A stable label like "ttsr=on,hashline=off"; "baseline" when no dimensions. */
|
|
77
|
+
function variantLabel(variant: IFeatureVariant): string {
|
|
78
|
+
const parts = Object.entries(variant)
|
|
79
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
|
80
|
+
.map(([dim, state]) => `${dim}=${state === "1" ? "on" : "off"}`);
|
|
81
|
+
|
|
82
|
+
return parts.length > 0 ? parts.join(",") : "baseline";
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** The baseline label to compare against: the all-off variant when there are
|
|
86
|
+
* dimensions (so deltas read as "the feature ON vs OFF"), else "baseline". */
|
|
87
|
+
function baselineLabel(variants: IFeatureVariant[]): string {
|
|
88
|
+
const allOff = variants.find((v) =>
|
|
89
|
+
Object.values(v).every((state) => state === "0")
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
return allOff === undefined ? "baseline" : variantLabel(allOff);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
interface ISweepConfig {
|
|
96
|
+
readonly slug: string;
|
|
97
|
+
readonly framework: string;
|
|
98
|
+
readonly variants: IFeatureVariant[];
|
|
99
|
+
readonly repeats: number;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Sortable timestamp `YYYYMMDD-HHMMSS`. */
|
|
103
|
+
function stamp(): string {
|
|
104
|
+
const d = new Date();
|
|
105
|
+
const p = (n: number): string => String(n).padStart(2, "0");
|
|
106
|
+
|
|
107
|
+
return `${d.getFullYear()}${p(d.getMonth() + 1)}${p(d.getDate())}-${p(d.getHours())}${p(d.getMinutes())}${p(d.getSeconds())}`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const EVALS_ROOT = join(import.meta.dir, "..", "..", "..", "evals");
|
|
111
|
+
const HEADLESS = join(import.meta.dir, "headless-build.ts");
|
|
112
|
+
|
|
113
|
+
/** Stream a child's stdout to our terminal while keeping a small tail buffer so
|
|
114
|
+
* we can parse its final `[status · N turn(s)]` summary line. */
|
|
115
|
+
async function teeStdout(stream: ReadableStream<Uint8Array>): Promise<string> {
|
|
116
|
+
const reader = stream.getReader();
|
|
117
|
+
const decoder = new TextDecoder();
|
|
118
|
+
let tail = "";
|
|
119
|
+
|
|
120
|
+
for (;;) {
|
|
121
|
+
const chunk = await reader.read();
|
|
122
|
+
|
|
123
|
+
if (chunk.done) {
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const text = decoder.decode(chunk.value);
|
|
128
|
+
|
|
129
|
+
process.stdout.write(text);
|
|
130
|
+
tail = `${tail}${text}`.slice(-4096);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return tail;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
interface IBuildOutcome {
|
|
137
|
+
readonly passed: boolean;
|
|
138
|
+
readonly turns: number;
|
|
139
|
+
readonly ms: number;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/** Run ONE headless web build in its own dir with the variant's feature env. */
|
|
143
|
+
async function runOneBuild(
|
|
144
|
+
config: ISweepConfig,
|
|
145
|
+
variant: IFeatureVariant,
|
|
146
|
+
dir: string
|
|
147
|
+
): Promise<IBuildOutcome> {
|
|
148
|
+
const started = performance.now();
|
|
149
|
+
const proc = Bun.spawn(
|
|
150
|
+
["bun", HEADLESS, "--app", config.slug, config.framework, dir],
|
|
151
|
+
{
|
|
152
|
+
env: { ...process.env, ...variantEnv(variant) },
|
|
153
|
+
stdout: "pipe",
|
|
154
|
+
stderr: "inherit",
|
|
155
|
+
}
|
|
156
|
+
);
|
|
157
|
+
const tail = await teeStdout(proc.stdout);
|
|
158
|
+
const code = await proc.exited;
|
|
159
|
+
const ms = performance.now() - started;
|
|
160
|
+
const match = /\[\w+ · (\d+) turn/.exec(tail);
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
passed: code === 0,
|
|
164
|
+
turns: match?.[1] === undefined ? 0 : Number(match[1]),
|
|
165
|
+
ms,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/** Print the run plan and the cost warning. Returns the total build count. */
|
|
170
|
+
function printPlan(config: ISweepConfig, model: string): number {
|
|
171
|
+
const total = config.variants.length * config.repeats;
|
|
172
|
+
|
|
173
|
+
process.stdout.write(
|
|
174
|
+
`\nWEB A/B SWEEP — the real thing (full app builds)\n` +
|
|
175
|
+
` app: ${config.slug} (${config.framework})\n` +
|
|
176
|
+
` model: ${model}\n` +
|
|
177
|
+
` variants: ${config.variants.map(variantLabel).join(", ")}\n` +
|
|
178
|
+
` repeats: ${config.repeats}\n` +
|
|
179
|
+
` builds: ${total} total\n\n` +
|
|
180
|
+
`Each build is a from-scratch multi-entity app: up to ` +
|
|
181
|
+
`${LOOP_LIMITS.webMaxTurns} model turns, vite build + browser render gate, ` +
|
|
182
|
+
`large token spend. ${total} of them runs SEQUENTIALLY and can take hours ` +
|
|
183
|
+
`and significant API credits on a cloud flagship.\n`
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
return total;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** Run the full sweep, returning one record per build for aggregation. */
|
|
190
|
+
async function runSweep(
|
|
191
|
+
config: ISweepConfig,
|
|
192
|
+
runDir: string
|
|
193
|
+
): Promise<IRunRecord[]> {
|
|
194
|
+
const records: IRunRecord[] = [];
|
|
195
|
+
let index = 0;
|
|
196
|
+
const total = config.variants.length * config.repeats;
|
|
197
|
+
|
|
198
|
+
for (const variant of config.variants) {
|
|
199
|
+
const label = variantLabel(variant);
|
|
200
|
+
|
|
201
|
+
for (let repeat = 1; repeat <= config.repeats; repeat += 1) {
|
|
202
|
+
index += 1;
|
|
203
|
+
const dir = join(runDir, `${label}-${String(repeat)}`);
|
|
204
|
+
|
|
205
|
+
mkdirSync(dir, { recursive: true });
|
|
206
|
+
process.stdout.write(
|
|
207
|
+
`\n=== build ${String(index)}/${String(total)}: ${config.slug} ${label} #${String(repeat)} ===\n`
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
const outcome = await runOneBuild(config, variant, dir);
|
|
211
|
+
|
|
212
|
+
records.push({
|
|
213
|
+
label,
|
|
214
|
+
passed: outcome.passed,
|
|
215
|
+
cycles: outcome.turns,
|
|
216
|
+
ms: outcome.ms,
|
|
217
|
+
});
|
|
218
|
+
process.stdout.write(
|
|
219
|
+
` -> ${outcome.passed ? "PASS" : "FAIL"} (${String(outcome.turns)} turns, ${(outcome.ms / 1000).toFixed(0)}s)\n`
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return records;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/** Resolve the sweep config from env/argv, or print the catalog and exit. */
|
|
228
|
+
function resolveConfig(): ISweepConfig | undefined {
|
|
229
|
+
const slug = process.env.TSFORGE_WEB_APP ?? "";
|
|
230
|
+
const app = findBenchmarkApp(slug);
|
|
231
|
+
|
|
232
|
+
if (app === undefined) {
|
|
233
|
+
const list = BENCHMARK_CATALOG.map(
|
|
234
|
+
(a, i) => ` ${String(i + 1)}. ${a.slug} — ${a.name}`
|
|
235
|
+
).join("\n");
|
|
236
|
+
|
|
237
|
+
process.stderr.write(
|
|
238
|
+
`set TSFORGE_WEB_APP to a benchmark slug. catalog:\n${list}\n`
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
return undefined;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const framework = process.argv[2] === "vanilla" ? "vanilla" : "react";
|
|
245
|
+
const variants = parseVariants(process.env.TSFORGE_FEATURE_VARIANTS ?? "");
|
|
246
|
+
const repeats = Math.max(1, Number(process.env.TSFORGE_WEB_REPEATS ?? "1"));
|
|
247
|
+
|
|
248
|
+
return { slug: app.slug, framework, variants, repeats };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async function main(): Promise<void> {
|
|
252
|
+
const config = resolveConfig();
|
|
253
|
+
|
|
254
|
+
if (config === undefined) {
|
|
255
|
+
process.exit(2);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const { entry } = await resolveActiveModel();
|
|
259
|
+
|
|
260
|
+
printPlan(config, entry.model);
|
|
261
|
+
|
|
262
|
+
if (process.env.TSFORGE_WEB_CONFIRM !== "1") {
|
|
263
|
+
process.stdout.write(
|
|
264
|
+
`\nDRY RUN — set TSFORGE_WEB_CONFIRM=1 to actually run these builds.\n`
|
|
265
|
+
);
|
|
266
|
+
process.exit(0);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const runDir = join(
|
|
270
|
+
EVALS_ROOT,
|
|
271
|
+
"runs",
|
|
272
|
+
`web-sweep-${config.slug}-${stamp()}`
|
|
273
|
+
);
|
|
274
|
+
|
|
275
|
+
mkdirSync(runDir, { recursive: true });
|
|
276
|
+
|
|
277
|
+
const records = await runSweep(config, runDir);
|
|
278
|
+
const report = buildSweepReport(records, baselineLabel(config.variants));
|
|
279
|
+
const markdown = renderSweepReportMarkdown(report);
|
|
280
|
+
|
|
281
|
+
process.stdout.write(`\n${markdown}\n`);
|
|
282
|
+
|
|
283
|
+
const reportPath = join(runDir, "report.json");
|
|
284
|
+
|
|
285
|
+
writeFileSync(
|
|
286
|
+
reportPath,
|
|
287
|
+
`${JSON.stringify({ config, records, report }, null, 2)}\n`
|
|
288
|
+
);
|
|
289
|
+
process.stdout.write(`\nsaved ${reportPath}\n`);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
await main();
|
package/src/browser/index.ts
CHANGED
package/src/browser/oracle.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { resolve, dirname, basename, join } from "node:path";
|
|
2
|
+
import { isRecord } from "../lib/guards";
|
|
2
3
|
// `playwright` is an OPTIONAL peer: bundling it (+ a browser binary) into every
|
|
3
4
|
// install is too heavy, so the import is dynamic and the render-check skips when
|
|
4
5
|
// it's absent. The type-only import is erased at runtime, so it can't crash a
|
|
@@ -14,6 +15,20 @@ async function loadChromium(): Promise<typeof Chromium | null> {
|
|
|
14
15
|
}
|
|
15
16
|
}
|
|
16
17
|
|
|
18
|
+
/** Run axe against a page and return its raw result; null when @axe-core/
|
|
19
|
+
* playwright isn't installed (a11y is an optional enhancement, like the browser
|
|
20
|
+
* itself). Kept untyped at the boundary — extractAxeViolations narrows it. */
|
|
21
|
+
async function runAxe(page: Page): Promise<unknown> {
|
|
22
|
+
try {
|
|
23
|
+
const mod = await import("@axe-core/playwright");
|
|
24
|
+
const builder = new mod.AxeBuilder({ page });
|
|
25
|
+
|
|
26
|
+
return await builder.analyze();
|
|
27
|
+
} catch {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
17
32
|
/**
|
|
18
33
|
* The browser oracle — renders a built web page in headless chromium and reports
|
|
19
34
|
* whether it actually WORKS, beyond what tsc/eslint can see: it fails on uncaught
|
|
@@ -55,22 +70,121 @@ export interface IRenderOptions {
|
|
|
55
70
|
* single-page smoke misses them. Served with SPA fallback so the client
|
|
56
71
|
* router handles the path. Empty/undefined → no crawl (unchanged behavior). */
|
|
57
72
|
routes?: string[];
|
|
73
|
+
/** Run axe accessibility checks on the page (and each crawled route). Serious
|
|
74
|
+
* and critical violations become gate errors; minor/moderate are skipped.
|
|
75
|
+
* Skipped gracefully when @axe-core/playwright isn't installed. */
|
|
76
|
+
a11y?: boolean;
|
|
77
|
+
/** Directory to write a screenshot per page/route into (desktop + mobile
|
|
78
|
+
* viewports). An artifact for human/visual review — never a pass/fail signal. */
|
|
79
|
+
screenshotDir?: string;
|
|
80
|
+
/** A perf budget (DOM node count + mount time) checked on the initial page. */
|
|
81
|
+
perfBudget?: IPerfBudget;
|
|
58
82
|
/** Navigation timeout (default 15s). */
|
|
59
83
|
timeoutMs?: number;
|
|
60
84
|
}
|
|
61
85
|
|
|
86
|
+
/** Screenshot viewports — a desktop and a mobile pass per page. */
|
|
87
|
+
const VIEWPORTS = [
|
|
88
|
+
{ name: "desktop", width: 1280, height: 800 },
|
|
89
|
+
{ name: "mobile", width: 390, height: 844 },
|
|
90
|
+
] as const;
|
|
91
|
+
|
|
62
92
|
export interface IRenderResult {
|
|
63
93
|
ok: boolean;
|
|
64
94
|
/** Human-readable failures (console errors, page errors, missing content). */
|
|
65
95
|
errors: string[];
|
|
66
96
|
/** True when the check was skipped because playwright isn't installed. */
|
|
67
97
|
skipped?: boolean;
|
|
98
|
+
/** Paths of screenshots captured (when `screenshotDir` was set). */
|
|
99
|
+
screenshots?: string[];
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** A simple performance budget: fail the render when the built app blows past
|
|
103
|
+
* these. Intentionally minimal (no full Lighthouse) — a tripwire, not a profiler. */
|
|
104
|
+
export interface IPerfBudget {
|
|
105
|
+
/** Max total DOM nodes after load (a proxy for over-heavy render trees). */
|
|
106
|
+
maxDomNodes?: number;
|
|
107
|
+
/** Max time from navigation start to DOMContentLoaded, in ms. */
|
|
108
|
+
maxMountMs?: number;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/** axe impact levels that FAIL the a11y check — minor/moderate are reported by
|
|
112
|
+
* axe but don't gate (too noisy to block a build on). */
|
|
113
|
+
const AXE_FAIL_IMPACTS = new Set(["serious", "critical"]);
|
|
114
|
+
|
|
115
|
+
/** The subset of an axe violation the oracle reports on. */
|
|
116
|
+
interface IAxeViolation {
|
|
117
|
+
id: string;
|
|
118
|
+
impact: string | undefined;
|
|
119
|
+
nodeCount: number;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Extract the reportable violations from axe's (untyped, dynamically-imported)
|
|
123
|
+
* result — narrowed with guards, no casts. */
|
|
124
|
+
function extractAxeViolations(result: unknown): IAxeViolation[] {
|
|
125
|
+
if (!isRecord(result) || !Array.isArray(result.violations)) {
|
|
126
|
+
return [];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const out: IAxeViolation[] = [];
|
|
130
|
+
|
|
131
|
+
for (const v of result.violations) {
|
|
132
|
+
if (!isRecord(v) || typeof v.id !== "string") {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
out.push({
|
|
137
|
+
id: v.id,
|
|
138
|
+
impact: typeof v.impact === "string" ? v.impact : undefined,
|
|
139
|
+
nodeCount: Array.isArray(v.nodes) ? v.nodes.length : 0,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return out;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Turn axe violations into gate errors — only serious/critical fail. Pure. */
|
|
147
|
+
export function summarizeAxeViolations(
|
|
148
|
+
violations: readonly IAxeViolation[],
|
|
149
|
+
where: string
|
|
150
|
+
): string[] {
|
|
151
|
+
return violations
|
|
152
|
+
.filter((v) => v.impact !== undefined && AXE_FAIL_IMPACTS.has(v.impact))
|
|
153
|
+
.map(
|
|
154
|
+
(v) =>
|
|
155
|
+
`a11y ${v.impact ?? "?"} at ${where}: ${v.id} (${String(v.nodeCount)} node(s))`
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/** Evaluate a perf budget against measured values → gate errors. Pure. */
|
|
160
|
+
export function checkPerfBudget(
|
|
161
|
+
domNodes: number,
|
|
162
|
+
mountMs: number,
|
|
163
|
+
budget: IPerfBudget,
|
|
164
|
+
where: string
|
|
165
|
+
): string[] {
|
|
166
|
+
const errors: string[] = [];
|
|
167
|
+
|
|
168
|
+
if (budget.maxDomNodes !== undefined && domNodes > budget.maxDomNodes) {
|
|
169
|
+
errors.push(
|
|
170
|
+
`perf at ${where}: ${String(domNodes)} DOM nodes > budget ${String(budget.maxDomNodes)}`
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if (budget.maxMountMs !== undefined && mountMs > budget.maxMountMs) {
|
|
175
|
+
errors.push(
|
|
176
|
+
`perf at ${where}: mount ${String(Math.round(mountMs))}ms > budget ${String(budget.maxMountMs)}ms`
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return errors;
|
|
68
181
|
}
|
|
69
182
|
|
|
70
183
|
export async function renderCheck(
|
|
71
184
|
opts: IRenderOptions
|
|
72
185
|
): Promise<IRenderResult> {
|
|
73
186
|
const errors: string[] = [];
|
|
187
|
+
const screenshots: string[] = [];
|
|
74
188
|
const chromium = await loadChromium();
|
|
75
189
|
|
|
76
190
|
// No playwright → skip the render check rather than fail the gate. The build
|
|
@@ -87,7 +201,10 @@ export async function renderCheck(
|
|
|
87
201
|
const browser = await chromium.launch({ args: ["--no-sandbox"] });
|
|
88
202
|
|
|
89
203
|
try {
|
|
90
|
-
|
|
204
|
+
// Page via an explicit context (not browser.newPage()) — axe-core/playwright
|
|
205
|
+
// requires a context-owned page; browser.close() tears the context down too.
|
|
206
|
+
const context = await browser.newContext();
|
|
207
|
+
const page = await context.newPage();
|
|
91
208
|
const timeout = opts.timeoutMs ?? 15_000;
|
|
92
209
|
|
|
93
210
|
page.on("console", (message) => {
|
|
@@ -113,30 +230,39 @@ export async function renderCheck(
|
|
|
113
230
|
waitUntil: "load",
|
|
114
231
|
timeout,
|
|
115
232
|
});
|
|
116
|
-
await runChecks(page, opts, errors);
|
|
233
|
+
await runChecks(page, opts, errors, screenshots);
|
|
117
234
|
|
|
118
235
|
if (opts.routes !== undefined && opts.routes.length > 0) {
|
|
119
|
-
await crawlRoutes(page, base, opts.routes, errors, timeout
|
|
236
|
+
await crawlRoutes(page, base, opts.routes, errors, timeout, {
|
|
237
|
+
opts,
|
|
238
|
+
screenshots,
|
|
239
|
+
});
|
|
120
240
|
}
|
|
121
241
|
} finally {
|
|
122
242
|
await server.stop(true);
|
|
123
243
|
}
|
|
124
244
|
} else {
|
|
125
245
|
await page.setContent(opts.html ?? "", { waitUntil: "load", timeout });
|
|
126
|
-
await runChecks(page, opts, errors);
|
|
246
|
+
await runChecks(page, opts, errors, screenshots);
|
|
127
247
|
}
|
|
128
248
|
|
|
129
|
-
return {
|
|
249
|
+
return {
|
|
250
|
+
ok: errors.length === 0,
|
|
251
|
+
errors,
|
|
252
|
+
...(screenshots.length > 0 ? { screenshots } : {}),
|
|
253
|
+
};
|
|
130
254
|
} finally {
|
|
131
255
|
await browser.close();
|
|
132
256
|
}
|
|
133
257
|
}
|
|
134
258
|
|
|
135
|
-
/** The expectation + step + smoke checks that run against the loaded page
|
|
259
|
+
/** The expectation + step + smoke checks that run against the loaded page, then
|
|
260
|
+
* the optional quality oracles (a11y, perf budget, screenshots). */
|
|
136
261
|
async function runChecks(
|
|
137
262
|
page: Page,
|
|
138
263
|
opts: IRenderOptions,
|
|
139
|
-
errors: string[]
|
|
264
|
+
errors: string[],
|
|
265
|
+
screenshots: string[]
|
|
140
266
|
): Promise<void> {
|
|
141
267
|
await checkExpectations(page, opts.expect, errors);
|
|
142
268
|
|
|
@@ -147,6 +273,76 @@ async function runChecks(
|
|
|
147
273
|
if (opts.smoke === true) {
|
|
148
274
|
await runSmoke(page, errors);
|
|
149
275
|
}
|
|
276
|
+
|
|
277
|
+
await runQualityOracles(page, opts, "index", errors, screenshots);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/** The opt-in quality layer: accessibility (axe), a perf budget, and screenshots.
|
|
281
|
+
* Each is independent and skips cleanly when not requested / dep absent. */
|
|
282
|
+
async function runQualityOracles(
|
|
283
|
+
page: Page,
|
|
284
|
+
opts: IRenderOptions,
|
|
285
|
+
where: string,
|
|
286
|
+
errors: string[],
|
|
287
|
+
screenshots: string[]
|
|
288
|
+
): Promise<void> {
|
|
289
|
+
if (opts.a11y === true) {
|
|
290
|
+
const violations = extractAxeViolations(await runAxe(page));
|
|
291
|
+
|
|
292
|
+
errors.push(...summarizeAxeViolations(violations, where));
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (opts.perfBudget !== undefined) {
|
|
296
|
+
const { domNodes, mountMs } = await measurePage(page);
|
|
297
|
+
|
|
298
|
+
errors.push(...checkPerfBudget(domNodes, mountMs, opts.perfBudget, where));
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (opts.screenshotDir !== undefined) {
|
|
302
|
+
await capturePage(page, opts.screenshotDir, where, screenshots);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/** Measure DOM size + mount time for the perf budget. */
|
|
307
|
+
async function measurePage(
|
|
308
|
+
page: Page
|
|
309
|
+
): Promise<{ domNodes: number; mountMs: number }> {
|
|
310
|
+
return page.evaluate(() => {
|
|
311
|
+
const nav = performance.getEntriesByType("navigation")[0];
|
|
312
|
+
const mountMs =
|
|
313
|
+
nav instanceof PerformanceNavigationTiming
|
|
314
|
+
? nav.domContentLoadedEventEnd - nav.startTime
|
|
315
|
+
: 0;
|
|
316
|
+
|
|
317
|
+
return { domNodes: document.querySelectorAll("*").length, mountMs };
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/** Filesystem-safe label for a route (e.g. "/a/b" → "a-b", "/" → "index"). */
|
|
322
|
+
function routeLabel(route: string): string {
|
|
323
|
+
const cleaned = route.replace(/^\/+|\/+$/g, "").replace(/\//g, "-");
|
|
324
|
+
|
|
325
|
+
return cleaned.length === 0 ? "index" : cleaned;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/** Capture a desktop + mobile screenshot of the current page into `dir`. */
|
|
329
|
+
async function capturePage(
|
|
330
|
+
page: Page,
|
|
331
|
+
dir: string,
|
|
332
|
+
label: string,
|
|
333
|
+
screenshots: string[]
|
|
334
|
+
): Promise<void> {
|
|
335
|
+
for (const vp of VIEWPORTS) {
|
|
336
|
+
const path = join(dir, `${label}-${vp.name}.png`);
|
|
337
|
+
|
|
338
|
+
try {
|
|
339
|
+
await page.setViewportSize({ width: vp.width, height: vp.height });
|
|
340
|
+
await page.screenshot({ path, fullPage: true });
|
|
341
|
+
screenshots.push(path);
|
|
342
|
+
} catch {
|
|
343
|
+
// A screenshot is a best-effort artifact, never a gate failure.
|
|
344
|
+
}
|
|
345
|
+
}
|
|
150
346
|
}
|
|
151
347
|
|
|
152
348
|
/** Serve a directory on an ephemeral localhost port. SPA FALLBACK: an
|
|
@@ -187,7 +383,8 @@ async function crawlRoutes(
|
|
|
187
383
|
base: string,
|
|
188
384
|
routes: readonly string[],
|
|
189
385
|
errors: string[],
|
|
190
|
-
timeout: number
|
|
386
|
+
timeout: number,
|
|
387
|
+
quality: { opts: IRenderOptions; screenshots: string[] }
|
|
191
388
|
): Promise<void> {
|
|
192
389
|
for (const route of routes) {
|
|
193
390
|
try {
|
|
@@ -207,7 +404,17 @@ async function crawlRoutes(
|
|
|
207
404
|
|
|
208
405
|
if (blank) {
|
|
209
406
|
errors.push(`route ${route} rendered blank`);
|
|
407
|
+
continue;
|
|
210
408
|
}
|
|
409
|
+
|
|
410
|
+
// a11y + screenshots per route (perf budget stays an initial-page check).
|
|
411
|
+
await runQualityOracles(
|
|
412
|
+
page,
|
|
413
|
+
{ ...quality.opts, perfBudget: undefined },
|
|
414
|
+
routeLabel(route),
|
|
415
|
+
errors,
|
|
416
|
+
quality.screenshots
|
|
417
|
+
);
|
|
211
418
|
} catch (error) {
|
|
212
419
|
errors.push(
|
|
213
420
|
`route ${route} failed to load: ${error instanceof Error ? error.message : String(error)}`
|
package/src/cli.ts
CHANGED
|
@@ -102,11 +102,15 @@ export interface ICliArgs {
|
|
|
102
102
|
/** Plan mode: a from-scratch build pauses after the design phase to show its
|
|
103
103
|
* plan for review/edit before implementing (`--plan`; also toggled by /plan). */
|
|
104
104
|
plan: boolean;
|
|
105
|
+
/** Keep the auto-gate at the strict TS floor only — do NOT append the
|
|
106
|
+
* project's discovered tests (`--strict-floor-only`). By default the auto-gate
|
|
107
|
+
* also runs the project's tests, so "green" means floor + tests pass. */
|
|
108
|
+
strictFloorOnly: boolean;
|
|
105
109
|
}
|
|
106
110
|
|
|
107
111
|
const BOOL_FLAGS: Record<
|
|
108
112
|
string,
|
|
109
|
-
"continue" | "noGate" | "web" | "log" | "plan"
|
|
113
|
+
"continue" | "noGate" | "web" | "log" | "plan" | "strictFloorOnly"
|
|
110
114
|
> = {
|
|
111
115
|
"--continue": "continue",
|
|
112
116
|
"-c": "continue",
|
|
@@ -114,6 +118,7 @@ const BOOL_FLAGS: Record<
|
|
|
114
118
|
"--web": "web",
|
|
115
119
|
"--log": "log",
|
|
116
120
|
"--plan": "plan",
|
|
121
|
+
"--strict-floor-only": "strictFloorOnly",
|
|
117
122
|
};
|
|
118
123
|
|
|
119
124
|
const VALUE_FLAGS = new Set([
|
|
@@ -140,6 +145,7 @@ export function parseArgs(argv: readonly string[]): ICliArgs {
|
|
|
140
145
|
web: false,
|
|
141
146
|
log: false,
|
|
142
147
|
plan: false,
|
|
148
|
+
strictFloorOnly: false,
|
|
143
149
|
};
|
|
144
150
|
|
|
145
151
|
for (let i = 0; i < argv.length; i += 1) {
|
|
@@ -795,18 +801,30 @@ async function baseGate(
|
|
|
795
801
|
}
|
|
796
802
|
|
|
797
803
|
const { detectStack } = await import("./stack-detection");
|
|
798
|
-
const {
|
|
799
|
-
|
|
804
|
+
const {
|
|
805
|
+
loadTsforgeConfig,
|
|
806
|
+
resolveActivePacks,
|
|
807
|
+
normalizeRuleOverrides,
|
|
808
|
+
resolveProjectProfile,
|
|
809
|
+
} = await import("./config/tsforge-config");
|
|
800
810
|
|
|
801
811
|
const stackProfile = await detectStack(args.dir);
|
|
802
812
|
const config = await loadTsforgeConfig(args.dir);
|
|
803
813
|
const activePacks = resolveActivePacks(stackProfile.packs, config);
|
|
804
814
|
const ruleOverrides = normalizeRuleOverrides(config);
|
|
815
|
+
const profile = resolveProjectProfile(config);
|
|
805
816
|
|
|
806
817
|
const auto = await buildGate(
|
|
807
818
|
args.dir,
|
|
808
819
|
activePacks,
|
|
809
|
-
Object.keys(ruleOverrides).length > 0 ? ruleOverrides : undefined
|
|
820
|
+
Object.keys(ruleOverrides).length > 0 ? ruleOverrides : undefined,
|
|
821
|
+
{
|
|
822
|
+
enableTypeAware: profile === "strict",
|
|
823
|
+
// "Green" should mean the strict floor AND the project's own tests pass —
|
|
824
|
+
// not just that it type-checks and lints. discoverTestCommand appends them
|
|
825
|
+
// only when the project actually has tests; --strict-floor-only opts out.
|
|
826
|
+
includeTests: !args.strictFloorOnly,
|
|
827
|
+
}
|
|
810
828
|
);
|
|
811
829
|
|
|
812
830
|
return { accept: auto.command, gateLabel: auto.label };
|
package/src/config/index.ts
CHANGED
|
@@ -4,5 +4,13 @@ export {
|
|
|
4
4
|
loadTsforgeConfig,
|
|
5
5
|
resolveActivePacks,
|
|
6
6
|
normalizeRuleOverrides,
|
|
7
|
+
resolveProjectProfile,
|
|
7
8
|
type ITsforgeProjectConfig,
|
|
8
9
|
} from "./tsforge-config";
|
|
10
|
+
export {
|
|
11
|
+
PROFILE_DEFINITIONS,
|
|
12
|
+
DEFAULT_PROFILE,
|
|
13
|
+
isProfileId,
|
|
14
|
+
resolveProfileMetaRuleOverrides,
|
|
15
|
+
type ProfileId,
|
|
16
|
+
} from "./profiles";
|