@agjs/tsforge 0.1.18 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -1
- package/scripts/build-rules-md.ts +78 -21
- package/scripts/sweep.ts +25 -20
- package/scripts/web-sweep.ts +292 -0
- package/src/browser/oracle.ts +29 -1
- package/src/cli.ts +9 -3
- package/src/config/index.ts +8 -0
- package/src/config/profiles.ts +150 -0
- package/src/config/tsforge-config.ts +64 -5
- package/src/detect-gate.ts +34 -1
- package/src/inference/inference.types.ts +8 -0
- package/src/inference/request.ts +5 -1
- package/src/inference/stream.ts +21 -2
- package/src/inference/wire.ts +0 -0
- package/src/loop/feedback/meta-rule-docs.ts +48 -0
- package/src/loop/feedback/rule-docs.ts +150 -0
- package/src/loop/rule-docs.generated.json +131 -1
- package/src/loop/run.ts +3 -0
- package/src/loop/session.ts +12 -5
- package/src/loop/ttsr-defaults.ts +175 -4
- package/src/meta-rules/registry.ts +32 -0
- package/src/meta-rules/rules/ci/no-github-context-in-shell.ts +40 -0
- package/src/meta-rules/rules/ci/no-pull-request-target-untrusted-checkout.ts +42 -0
- package/src/meta-rules/rules/ci/workflow-permissions-explicit.ts +49 -0
- package/src/meta-rules/rules/ci/workflow-permissions-least-privilege.ts +44 -0
- package/src/meta-rules/rules/config/next-image-remote-patterns-no-wildcards.ts +77 -0
- package/src/meta-rules/rules/config/next-instrumentation-present.ts +66 -0
- package/src/meta-rules/rules/config/next-proxy-over-middleware.ts +64 -0
- package/src/meta-rules/rules/config/tsconfig-recommended-flags.ts +75 -0
- package/src/meta-rules/rules/supply-chain/dependency-overrides-require-comment.ts +61 -0
- package/src/meta-rules/rules/supply-chain/fastify-security-plugins.ts +54 -0
- package/src/meta-rules/rules/supply-chain/lockfile-required.ts +51 -0
- package/src/meta-rules/rules/supply-chain/migrations-must-be-checked-in.ts +49 -0
- package/src/meta-rules/rules/supply-chain/no-git-or-tarball-dependencies.ts +70 -0
- package/src/meta-rules/rules/supply-chain/package-manager-field-required.ts +31 -0
- package/src/meta-rules/rules/supply-chain/production-must-not-use-drizzle-push.ts +75 -0
- package/src/meta-rules/rules/supply-chain/single-package-manager.ts +30 -0
- package/src/meta-rules/utils/lockfiles.ts +105 -0
- package/src/meta-rules/utils/workflow-yaml.ts +86 -0
- package/src/rule-packs/authorization/index.ts +26 -0
- package/src/rule-packs/authorization/rules/id-param-requires-object-authz.ts +87 -0
- package/src/rule-packs/authorization/rules/mutating-route-requires-authz.ts +116 -0
- package/src/rule-packs/authorization/rules/server-action-requires-authz.ts +101 -0
- package/src/rule-packs/authorization/utils.ts +285 -0
- package/src/rule-packs/boundary-utils.ts +13 -0
- package/src/rule-packs/code-flow/index.ts +4 -1
- package/src/rule-packs/code-flow/rules/no-throw-literal.ts +67 -0
- package/src/rule-packs/drizzle/index.ts +7 -0
- package/src/rule-packs/drizzle/rules/update-delete-account-scoped-must-filter-scope.ts +106 -0
- package/src/rule-packs/drizzle/rules/update-delete-must-have-where.ts +73 -0
- package/src/rule-packs/drizzle/utils.ts +133 -1
- package/src/rule-packs/fastify/index.ts +38 -0
- package/src/rule-packs/fastify/rules/error-handler-must-set-status.ts +78 -0
- package/src/rule-packs/fastify/rules/prefer-return-over-reply-send.ts +104 -0
- package/src/rule-packs/fastify/rules/require-fp-for-shared-plugins.ts +106 -0
- package/src/rule-packs/fastify/rules/require-plugin-name.ts +54 -0
- package/src/rule-packs/fastify/rules/require-response-schema.ts +62 -0
- package/src/rule-packs/fastify/rules/require-route-schema.ts +104 -0
- package/src/rule-packs/fastify/rules/test-inject-must-close-app.ts +44 -0
- package/src/rule-packs/fastify/utils/fastifyChain.ts +231 -0
- package/src/rule-packs/index.ts +10 -0
- package/src/rule-packs/jwt-cookies/index.ts +10 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-set-maxage-or-expires.ts +132 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-set-samesite.ts +151 -0
- package/src/rule-packs/jwt-cookies/rules/jwt-must-verify-not-decode.ts +124 -0
- package/src/rule-packs/module-boundaries/index.ts +3 -0
- package/src/rule-packs/module-boundaries/rules/no-react-in-services.ts +111 -0
- package/src/rule-packs/nextjs/index.ts +32 -0
- package/src/rule-packs/nextjs/rules/await-dynamic-request-apis.ts +65 -0
- package/src/rule-packs/nextjs/rules/error-boundary-require-use-client.ts +38 -0
- package/src/rule-packs/nextjs/rules/mutation-should-revalidate-cache.ts +152 -0
- package/src/rule-packs/nextjs/rules/no-html-img-element.ts +45 -0
- package/src/rule-packs/nextjs/rules/no-internal-api-fetch.ts +126 -0
- package/src/rule-packs/nextjs/rules/no-secret-props-to-client.ts +118 -0
- package/src/rule-packs/nextjs/rules/no-sensitive-next-public-env.ts +72 -0
- package/src/rule-packs/nextjs/rules/prefer-lazy-use-state-init.ts +85 -0
- package/src/rule-packs/nextjs/rules/server-action-requires-authz-and-validation.ts +178 -0
- package/src/rule-packs/nextjs/rules/server-only-modules-import-server-only.ts +87 -0
- package/src/rule-packs/nextjs/utils.ts +18 -0
- package/src/rule-packs/react-component-architecture/index.ts +18 -0
- package/src/rule-packs/react-component-architecture/rules/dangerous-html-requires-sanitize.ts +83 -0
- package/src/rule-packs/react-component-architecture/rules/no-anonymous-useEffect.ts +61 -0
- package/src/rule-packs/react-component-architecture/rules/no-component-invocation.ts +55 -0
- package/src/rule-packs/react-component-architecture/rules/no-derived-state-in-effect.ts +204 -0
- package/src/rule-packs/react-component-architecture/rules/no-nested-component.ts +152 -0
- package/src/rule-packs/react-component-architecture/rules/no-react-fc.ts +57 -0
- package/src/rule-packs/rule-catalog.types.ts +21 -0
- package/src/rule-packs/rule-metadata.ts +163 -0
- package/src/rule-packs/runtime-boundaries/index.ts +33 -0
- package/src/rule-packs/runtime-boundaries/rules/no-prototype-polluting-merge.ts +113 -0
- package/src/rule-packs/runtime-boundaries/rules/no-user-controlled-fetch-url.ts +69 -0
- package/src/rule-packs/runtime-boundaries/rules/no-user-controlled-redirect.ts +79 -0
- package/src/rule-packs/runtime-boundaries/rules/upload-must-set-limits.ts +126 -0
- package/src/rule-packs/runtime-boundaries/rules/webhook-must-verify-signature-before-parse.ts +87 -0
- package/src/rule-packs/security/index.ts +35 -0
- package/src/rule-packs/security/rules/catch-must-handle.ts +126 -0
- package/src/rule-packs/security/rules/no-auth-token-in-storage.ts +107 -0
- package/src/rule-packs/security/rules/no-child-process-exec.ts +72 -0
- package/src/rule-packs/security/rules/no-dynamic-regexp.ts +56 -0
- package/src/rule-packs/security/rules/no-inner-html-assignment.ts +42 -0
- package/src/rule-packs/security/rules/no-spawn-with-shell.ts +106 -0
- package/src/rule-packs/structured-logging/index.ts +6 -0
- package/src/rule-packs/structured-logging/rules/caught-error-log-requires-cause.ts +234 -0
- package/src/rule-packs/structured-logging/rules/logger-not-console.ts +146 -0
- package/src/rule-packs/test-conventions/index.ts +9 -0
- package/src/rule-packs/test-conventions/rules/fake-timers-must-be-restored.ts +143 -0
- package/src/rule-packs/test-conventions/rules/no-conditional-expect.ts +77 -0
- package/src/rule-packs/test-conventions/rules/no-real-network-in-unit-tests.ts +174 -0
- package/src/rule-packs/typescript-core/index.ts +30 -0
- package/src/rule-packs/typescript-core/rules/exported-functions-require-return-type.ts +74 -0
- package/src/rule-packs/typescript-core/rules/fetch-must-check-ok.ts +106 -0
- package/src/rule-packs/typescript-core/rules/json-parse-must-validate.ts +97 -0
- package/src/rule-packs/typescript-core/rules/no-unsafe-boundary-cast.ts +70 -0
- package/src/stack-detection/packs.ts +57 -0
- package/strict.web.eslint.config.mjs +32 -1
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agjs/tsforge",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.2.0",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
|
|
7
7
|
"repository": {
|
|
@@ -34,6 +34,9 @@
|
|
|
34
34
|
"@stylistic/eslint-plugin": "^5.10.0",
|
|
35
35
|
"@typescript-eslint/utils": "8.60.0",
|
|
36
36
|
"cli-highlight": "2.1.11",
|
|
37
|
+
"eslint-plugin-react": "^7.37.5",
|
|
38
|
+
"eslint-plugin-react-hooks": "^7.1.1",
|
|
39
|
+
"eslint-plugin-jsx-a11y": "^6.10.2",
|
|
37
40
|
"eslint": "10.4.0",
|
|
38
41
|
"prettier": "3.8.3",
|
|
39
42
|
"typescript": "6.0.3",
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
// Generate RULES.md
|
|
2
|
-
// This produces a deterministic, human-readable reference of what gets enforced.
|
|
3
|
-
// bun run packages/core/scripts/build-rules-md.ts
|
|
1
|
+
// Generate RULES.md grouped by adoption tier, then pack.
|
|
4
2
|
import { join } from "node:path";
|
|
5
3
|
import { RULE_PACKS } from "../src/rule-packs";
|
|
6
4
|
import { META_RULES } from "../src/meta-rules";
|
|
5
|
+
import { getRuleCatalogEntry } from "../src/rule-packs/rule-metadata";
|
|
6
|
+
import type { RuleTier } from "../src/rule-packs/rule-catalog.types";
|
|
7
|
+
import { PROFILE_DEFINITIONS } from "../src/config/profiles";
|
|
7
8
|
|
|
8
9
|
function getRuleDescription(obj: unknown): string | undefined {
|
|
9
10
|
const isObject = (val: unknown): val is Record<string, unknown> =>
|
|
@@ -30,15 +31,28 @@ function getRuleDescription(obj: unknown): string | undefined {
|
|
|
30
31
|
return typeof description === "string" ? description : undefined;
|
|
31
32
|
}
|
|
32
33
|
|
|
34
|
+
const TIER_ORDER: readonly RuleTier[] = [
|
|
35
|
+
"safety",
|
|
36
|
+
"framework",
|
|
37
|
+
"architecture",
|
|
38
|
+
"experimental",
|
|
39
|
+
];
|
|
40
|
+
|
|
33
41
|
const out: string[] = [
|
|
34
42
|
"# Rules and Meta-Rules Catalog",
|
|
35
43
|
"",
|
|
36
|
-
"
|
|
44
|
+
"Rules are grouped by **adoption tier**. Use `profile` in `tsforge.config.json` to control which tiers are active by default.",
|
|
45
|
+
"",
|
|
46
|
+
"## Profiles",
|
|
37
47
|
"",
|
|
38
48
|
];
|
|
39
49
|
|
|
40
|
-
|
|
41
|
-
out.push(
|
|
50
|
+
for (const profile of Object.values(PROFILE_DEFINITIONS)) {
|
|
51
|
+
out.push(`- **${profile.id}**: ${profile.description}`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
out.push("");
|
|
55
|
+
out.push("## Rule Packs by Tier");
|
|
42
56
|
out.push("");
|
|
43
57
|
|
|
44
58
|
type PackId = keyof typeof RULE_PACKS;
|
|
@@ -47,36 +61,62 @@ function isPackId(id: string): id is PackId {
|
|
|
47
61
|
return id in RULE_PACKS;
|
|
48
62
|
}
|
|
49
63
|
|
|
50
|
-
const
|
|
64
|
+
const entriesByTier = new Map<
|
|
65
|
+
RuleTier,
|
|
66
|
+
{ packId: string; ruleName: string; severity: string; description: string }[]
|
|
67
|
+
>();
|
|
51
68
|
|
|
52
|
-
for (const packId of
|
|
69
|
+
for (const packId of Object.keys(RULE_PACKS).sort()) {
|
|
53
70
|
if (!isPackId(packId)) {
|
|
54
71
|
continue;
|
|
55
72
|
}
|
|
56
73
|
|
|
57
74
|
const pack = RULE_PACKS[packId];
|
|
58
75
|
|
|
59
|
-
|
|
60
|
-
out.push("");
|
|
61
|
-
out.push(pack.description);
|
|
62
|
-
out.push("");
|
|
63
|
-
|
|
64
|
-
const ruleNames = Object.keys(pack.rules).sort();
|
|
65
|
-
|
|
66
|
-
for (const ruleName of ruleNames) {
|
|
76
|
+
for (const ruleName of Object.keys(pack.rules).sort()) {
|
|
67
77
|
const rule = pack.rules[ruleName];
|
|
68
78
|
const severity = pack.rulesConfig[ruleName] ?? "warn";
|
|
69
79
|
const description = getRuleDescription(rule) ?? ruleName;
|
|
70
|
-
const
|
|
71
|
-
const
|
|
80
|
+
const tier = getRuleCatalogEntry(ruleName, packId).tier;
|
|
81
|
+
const list = entriesByTier.get(tier) ?? [];
|
|
82
|
+
|
|
83
|
+
list.push({
|
|
84
|
+
packId,
|
|
85
|
+
ruleName,
|
|
86
|
+
severity: severity.toUpperCase(),
|
|
87
|
+
description,
|
|
88
|
+
});
|
|
89
|
+
entriesByTier.set(tier, list);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
for (const tier of TIER_ORDER) {
|
|
94
|
+
const entries = entriesByTier.get(tier) ?? [];
|
|
95
|
+
|
|
96
|
+
if (entries.length === 0) {
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
72
99
|
|
|
73
|
-
|
|
100
|
+
out.push(`### Tier: ${tier}`);
|
|
101
|
+
out.push("");
|
|
102
|
+
|
|
103
|
+
for (const entry of entries.sort((a, b) => {
|
|
104
|
+
const byPack = a.packId.localeCompare(b.packId);
|
|
105
|
+
|
|
106
|
+
if (byPack !== 0) {
|
|
107
|
+
return byPack;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return a.ruleName.localeCompare(b.ruleName);
|
|
111
|
+
})) {
|
|
112
|
+
out.push(
|
|
113
|
+
`- **${entry.packId}/${entry.ruleName}** [${entry.severity}]: ${entry.description}`
|
|
114
|
+
);
|
|
74
115
|
}
|
|
75
116
|
|
|
76
117
|
out.push("");
|
|
77
118
|
}
|
|
78
119
|
|
|
79
|
-
// Section: Meta-Rules
|
|
80
120
|
out.push("## Meta-Rules");
|
|
81
121
|
out.push("");
|
|
82
122
|
out.push(
|
|
@@ -103,7 +143,6 @@ for (const rule of META_RULES) {
|
|
|
103
143
|
rulesByCategory.set(cat, rules);
|
|
104
144
|
}
|
|
105
145
|
|
|
106
|
-
// Render meta-rules by category.
|
|
107
146
|
for (const category of categoryOrder) {
|
|
108
147
|
const rules = rulesByCategory.get(category) ?? [];
|
|
109
148
|
|
|
@@ -123,6 +162,24 @@ for (const category of categoryOrder) {
|
|
|
123
162
|
out.push("");
|
|
124
163
|
}
|
|
125
164
|
|
|
165
|
+
out.push("## Out of scope");
|
|
166
|
+
out.push("");
|
|
167
|
+
out.push(
|
|
168
|
+
"The following are intentionally deferred — wrong tool for the syntactic ESLint gate, or require cross-file analysis:"
|
|
169
|
+
);
|
|
170
|
+
out.push("");
|
|
171
|
+
out.push(
|
|
172
|
+
"- GraphQL/WebSocket/OpenAPI contract rules (until OpenAPI dep + parser)"
|
|
173
|
+
);
|
|
174
|
+
out.push(
|
|
175
|
+
"- Container/Kubernetes YAML hardening (future meta-rules when Dockerfile/k8s detected)"
|
|
176
|
+
);
|
|
177
|
+
out.push("- LLM/MCP security packs (opt-in when AI SDK deps detected)");
|
|
178
|
+
out.push("- FSD layer DAG / full authorization taint tracking");
|
|
179
|
+
out.push("- Lighthouse / bundle-analyzer CI gates");
|
|
180
|
+
out.push("- Violation ratcheting / baseline snapshots (Phase 5)");
|
|
181
|
+
out.push("");
|
|
182
|
+
|
|
126
183
|
const path = join(import.meta.dir, "..", "RULES.md");
|
|
127
184
|
|
|
128
185
|
await Bun.write(path, out.join("\n"));
|
package/scripts/sweep.ts
CHANGED
|
@@ -11,6 +11,7 @@ import { runSpec, qualityRepair } from "../src/loop";
|
|
|
11
11
|
import { modelAgent } from "../src/agent";
|
|
12
12
|
import { OpenAICompatibleProvider } from "../src/inference";
|
|
13
13
|
import { resolveActiveModel, resolveApiKey } from "../src/models-config";
|
|
14
|
+
import { providerConfig } from "../src/cli";
|
|
14
15
|
import { summarize, type IRunRecord } from "../src/eval";
|
|
15
16
|
import { renderEvent } from "../src/render";
|
|
16
17
|
import type { ILoopEvent } from "../src/loop";
|
|
@@ -109,28 +110,32 @@ const seedFiles = await readdir(seedDir, { recursive: true });
|
|
|
109
110
|
// unreachable endpoint and hung with an empty run.log.)
|
|
110
111
|
const { entry: activeModel } = await resolveActiveModel();
|
|
111
112
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
// strict gate). Default off; enable via env if a target genuinely loops.
|
|
121
|
-
repetitionPenalty:
|
|
122
|
-
process.env.TSFORGE_REPETITION_PENALTY === undefined
|
|
123
|
-
? undefined
|
|
124
|
-
: Number(process.env.TSFORGE_REPETITION_PENALTY),
|
|
125
|
-
});
|
|
113
|
+
// Build the wire config the SAME way the CLI does (`providerConfig`), so the
|
|
114
|
+
// sweep inherits the active entry's provider dialect — `reasoning`,
|
|
115
|
+
// `reasoningEffort`, `extraBody`, `extraHeaders`. Hand-rolling the config here
|
|
116
|
+
// dropped those fields, so a DeepSeek sweep sent qwen-only params and hit the
|
|
117
|
+
// 400s the interactive path already handles. maxTokens still defaults to
|
|
118
|
+
// PROVIDER_LIMITS (16384) — thinking tokens count against it, so reasoning +
|
|
119
|
+
// code get room. Repetition penalty stays opt-in via TSFORGE_REPETITION_PENALTY.
|
|
120
|
+
const provider = new OpenAICompatibleProvider(providerConfig(activeModel));
|
|
126
121
|
|
|
127
122
|
// The judge scores quality. Point it at a flagship via TSFORGE_JUDGE_URL/MODEL
|
|
128
|
-
// (+ TSFORGE_JUDGE_KEY) to measure the gap
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
123
|
+
// (+ TSFORGE_JUDGE_KEY) to measure the gap. When NOT overridden, the active
|
|
124
|
+
// model judges itself — reuse its full dialect via providerConfig so a
|
|
125
|
+
// self-judge against DeepSeek speaks DeepSeek too. An explicit external judge
|
|
126
|
+
// is a plain generic call (its own endpoint, no inherited reasoning dialect).
|
|
127
|
+
const judgeOverridden =
|
|
128
|
+
process.env.TSFORGE_JUDGE_URL !== undefined ||
|
|
129
|
+
process.env.TSFORGE_JUDGE_MODEL !== undefined;
|
|
130
|
+
const judgeProvider = new OpenAICompatibleProvider(
|
|
131
|
+
judgeOverridden
|
|
132
|
+
? {
|
|
133
|
+
baseUrl: process.env.TSFORGE_JUDGE_URL ?? activeModel.baseUrl,
|
|
134
|
+
model: process.env.TSFORGE_JUDGE_MODEL ?? activeModel.model,
|
|
135
|
+
apiKey: process.env.TSFORGE_JUDGE_KEY ?? resolveApiKey(activeModel),
|
|
136
|
+
}
|
|
137
|
+
: providerConfig(activeModel)
|
|
138
|
+
);
|
|
134
139
|
|
|
135
140
|
/** Sortable timestamp `YYYYMMDD-HHMMSS` so run dirs sort newest-last by name. */
|
|
136
141
|
function stamp(): string {
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
// A/B sweep over the REAL thing: full web-app builds from the benchmark catalog,
|
|
2
|
+
// not toy logic seeds. Orchestrates headless-build.ts as a subprocess per
|
|
3
|
+
// (feature-variant x repeat), toggling features via env (TSFORGE_TTSR etc.),
|
|
4
|
+
// then aggregates pass-rate + turns into the same statistical report the logic
|
|
5
|
+
// sweep uses (Wilson intervals + two-proportion z-test vs a baseline variant).
|
|
6
|
+
//
|
|
7
|
+
// Each build is a from-scratch multi-entity app (up to webMaxTurns turns, large
|
|
8
|
+
// token spend), so this is GATED: it prints the plan and exits unless
|
|
9
|
+
// TSFORGE_WEB_CONFIRM=1 is set — a real run can cost hours and significant API
|
|
10
|
+
// credits on a cloud flagship.
|
|
11
|
+
//
|
|
12
|
+
// Run (dry-run plan): TSFORGE_WEB_APP=saas-crm bun run packages/core/scripts/web-sweep.ts
|
|
13
|
+
// Run (for real): TSFORGE_WEB_APP=saas-crm TSFORGE_FEATURE_VARIANTS=ttsr \
|
|
14
|
+
// TSFORGE_WEB_REPEATS=2 TSFORGE_WEB_CONFIRM=1 \
|
|
15
|
+
// bun run packages/core/scripts/web-sweep.ts [react|vanilla]
|
|
16
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
17
|
+
import { join } from "node:path";
|
|
18
|
+
import { resolveActiveModel } from "../src/models-config";
|
|
19
|
+
import { LOOP_LIMITS } from "../src/loop";
|
|
20
|
+
import {
|
|
21
|
+
buildSweepReport,
|
|
22
|
+
renderSweepReportMarkdown,
|
|
23
|
+
type IRunRecord,
|
|
24
|
+
} from "../src/eval";
|
|
25
|
+
import { BENCHMARK_CATALOG, findBenchmarkApp } from "./benchmark-catalog";
|
|
26
|
+
|
|
27
|
+
/** A feature variant: dimension name -> "1" (on) | "0" (off). */
|
|
28
|
+
type IFeatureVariant = Record<string, string>;
|
|
29
|
+
|
|
30
|
+
/** The env var each known feature dimension toggles (mirrors sweep.ts so a web
|
|
31
|
+
* A/B reads the same flags the logic A/B does). */
|
|
32
|
+
const DIMENSION_ENV: Record<string, string> = {
|
|
33
|
+
ttsr: "TSFORGE_TTSR",
|
|
34
|
+
hashline: "TSFORGE_HASHLINE",
|
|
35
|
+
lsp_write_feedback: "TSFORGE_LSP_WRITE_FEEDBACK",
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
/** Parse `TSFORGE_FEATURE_VARIANTS` ("ttsr,hashline") into the cartesian product
|
|
39
|
+
* of on/off per dimension. Empty -> a single unnamed baseline variant. */
|
|
40
|
+
function parseVariants(spec: string): IFeatureVariant[] {
|
|
41
|
+
const dims = spec
|
|
42
|
+
.split(",")
|
|
43
|
+
.map((s) => s.trim())
|
|
44
|
+
.filter((s) => s.length > 0);
|
|
45
|
+
|
|
46
|
+
let combos: IFeatureVariant[] = [{}];
|
|
47
|
+
|
|
48
|
+
for (const dim of dims) {
|
|
49
|
+
const next: IFeatureVariant[] = [];
|
|
50
|
+
|
|
51
|
+
for (const combo of combos) {
|
|
52
|
+
next.push({ ...combo, [dim]: "1" }, { ...combo, [dim]: "0" });
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
combos = next;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return combos;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** The env overrides that realize a variant (only known dimensions are mapped). */
|
|
62
|
+
function variantEnv(variant: IFeatureVariant): Record<string, string> {
|
|
63
|
+
const env: Record<string, string> = {};
|
|
64
|
+
|
|
65
|
+
for (const [dim, state] of Object.entries(variant)) {
|
|
66
|
+
const key = DIMENSION_ENV[dim];
|
|
67
|
+
|
|
68
|
+
if (key !== undefined) {
|
|
69
|
+
env[key] = state === "1" ? "1" : "0";
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return env;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** A stable label like "ttsr=on,hashline=off"; "baseline" when no dimensions. */
|
|
77
|
+
function variantLabel(variant: IFeatureVariant): string {
|
|
78
|
+
const parts = Object.entries(variant)
|
|
79
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
|
80
|
+
.map(([dim, state]) => `${dim}=${state === "1" ? "on" : "off"}`);
|
|
81
|
+
|
|
82
|
+
return parts.length > 0 ? parts.join(",") : "baseline";
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** The baseline label to compare against: the all-off variant when there are
|
|
86
|
+
* dimensions (so deltas read as "the feature ON vs OFF"), else "baseline". */
|
|
87
|
+
function baselineLabel(variants: IFeatureVariant[]): string {
|
|
88
|
+
const allOff = variants.find((v) =>
|
|
89
|
+
Object.values(v).every((state) => state === "0")
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
return allOff === undefined ? "baseline" : variantLabel(allOff);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
interface ISweepConfig {
|
|
96
|
+
readonly slug: string;
|
|
97
|
+
readonly framework: string;
|
|
98
|
+
readonly variants: IFeatureVariant[];
|
|
99
|
+
readonly repeats: number;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Sortable timestamp `YYYYMMDD-HHMMSS`. */
|
|
103
|
+
function stamp(): string {
|
|
104
|
+
const d = new Date();
|
|
105
|
+
const p = (n: number): string => String(n).padStart(2, "0");
|
|
106
|
+
|
|
107
|
+
return `${d.getFullYear()}${p(d.getMonth() + 1)}${p(d.getDate())}-${p(d.getHours())}${p(d.getMinutes())}${p(d.getSeconds())}`;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const EVALS_ROOT = join(import.meta.dir, "..", "..", "..", "evals");
|
|
111
|
+
const HEADLESS = join(import.meta.dir, "headless-build.ts");
|
|
112
|
+
|
|
113
|
+
/** Stream a child's stdout to our terminal while keeping a small tail buffer so
|
|
114
|
+
* we can parse its final `[status · N turn(s)]` summary line. */
|
|
115
|
+
async function teeStdout(stream: ReadableStream<Uint8Array>): Promise<string> {
|
|
116
|
+
const reader = stream.getReader();
|
|
117
|
+
const decoder = new TextDecoder();
|
|
118
|
+
let tail = "";
|
|
119
|
+
|
|
120
|
+
for (;;) {
|
|
121
|
+
const chunk = await reader.read();
|
|
122
|
+
|
|
123
|
+
if (chunk.done) {
|
|
124
|
+
break;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const text = decoder.decode(chunk.value);
|
|
128
|
+
|
|
129
|
+
process.stdout.write(text);
|
|
130
|
+
tail = `${tail}${text}`.slice(-4096);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return tail;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
interface IBuildOutcome {
|
|
137
|
+
readonly passed: boolean;
|
|
138
|
+
readonly turns: number;
|
|
139
|
+
readonly ms: number;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/** Run ONE headless web build in its own dir with the variant's feature env. */
|
|
143
|
+
async function runOneBuild(
|
|
144
|
+
config: ISweepConfig,
|
|
145
|
+
variant: IFeatureVariant,
|
|
146
|
+
dir: string
|
|
147
|
+
): Promise<IBuildOutcome> {
|
|
148
|
+
const started = performance.now();
|
|
149
|
+
const proc = Bun.spawn(
|
|
150
|
+
["bun", HEADLESS, "--app", config.slug, config.framework, dir],
|
|
151
|
+
{
|
|
152
|
+
env: { ...process.env, ...variantEnv(variant) },
|
|
153
|
+
stdout: "pipe",
|
|
154
|
+
stderr: "inherit",
|
|
155
|
+
}
|
|
156
|
+
);
|
|
157
|
+
const tail = await teeStdout(proc.stdout);
|
|
158
|
+
const code = await proc.exited;
|
|
159
|
+
const ms = performance.now() - started;
|
|
160
|
+
const match = /\[\w+ · (\d+) turn/.exec(tail);
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
passed: code === 0,
|
|
164
|
+
turns: match?.[1] === undefined ? 0 : Number(match[1]),
|
|
165
|
+
ms,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/** Print the run plan and the cost warning. Returns the total build count. */
|
|
170
|
+
function printPlan(config: ISweepConfig, model: string): number {
|
|
171
|
+
const total = config.variants.length * config.repeats;
|
|
172
|
+
|
|
173
|
+
process.stdout.write(
|
|
174
|
+
`\nWEB A/B SWEEP — the real thing (full app builds)\n` +
|
|
175
|
+
` app: ${config.slug} (${config.framework})\n` +
|
|
176
|
+
` model: ${model}\n` +
|
|
177
|
+
` variants: ${config.variants.map(variantLabel).join(", ")}\n` +
|
|
178
|
+
` repeats: ${config.repeats}\n` +
|
|
179
|
+
` builds: ${total} total\n\n` +
|
|
180
|
+
`Each build is a from-scratch multi-entity app: up to ` +
|
|
181
|
+
`${LOOP_LIMITS.webMaxTurns} model turns, vite build + browser render gate, ` +
|
|
182
|
+
`large token spend. ${total} of them runs SEQUENTIALLY and can take hours ` +
|
|
183
|
+
`and significant API credits on a cloud flagship.\n`
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
return total;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** Run the full sweep, returning one record per build for aggregation. */
|
|
190
|
+
async function runSweep(
|
|
191
|
+
config: ISweepConfig,
|
|
192
|
+
runDir: string
|
|
193
|
+
): Promise<IRunRecord[]> {
|
|
194
|
+
const records: IRunRecord[] = [];
|
|
195
|
+
let index = 0;
|
|
196
|
+
const total = config.variants.length * config.repeats;
|
|
197
|
+
|
|
198
|
+
for (const variant of config.variants) {
|
|
199
|
+
const label = variantLabel(variant);
|
|
200
|
+
|
|
201
|
+
for (let repeat = 1; repeat <= config.repeats; repeat += 1) {
|
|
202
|
+
index += 1;
|
|
203
|
+
const dir = join(runDir, `${label}-${String(repeat)}`);
|
|
204
|
+
|
|
205
|
+
mkdirSync(dir, { recursive: true });
|
|
206
|
+
process.stdout.write(
|
|
207
|
+
`\n=== build ${String(index)}/${String(total)}: ${config.slug} ${label} #${String(repeat)} ===\n`
|
|
208
|
+
);
|
|
209
|
+
|
|
210
|
+
const outcome = await runOneBuild(config, variant, dir);
|
|
211
|
+
|
|
212
|
+
records.push({
|
|
213
|
+
label,
|
|
214
|
+
passed: outcome.passed,
|
|
215
|
+
cycles: outcome.turns,
|
|
216
|
+
ms: outcome.ms,
|
|
217
|
+
});
|
|
218
|
+
process.stdout.write(
|
|
219
|
+
` -> ${outcome.passed ? "PASS" : "FAIL"} (${String(outcome.turns)} turns, ${(outcome.ms / 1000).toFixed(0)}s)\n`
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return records;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/** Resolve the sweep config from env/argv, or print the catalog and exit. */
|
|
228
|
+
function resolveConfig(): ISweepConfig | undefined {
|
|
229
|
+
const slug = process.env.TSFORGE_WEB_APP ?? "";
|
|
230
|
+
const app = findBenchmarkApp(slug);
|
|
231
|
+
|
|
232
|
+
if (app === undefined) {
|
|
233
|
+
const list = BENCHMARK_CATALOG.map(
|
|
234
|
+
(a, i) => ` ${String(i + 1)}. ${a.slug} — ${a.name}`
|
|
235
|
+
).join("\n");
|
|
236
|
+
|
|
237
|
+
process.stderr.write(
|
|
238
|
+
`set TSFORGE_WEB_APP to a benchmark slug. catalog:\n${list}\n`
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
return undefined;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const framework = process.argv[2] === "vanilla" ? "vanilla" : "react";
|
|
245
|
+
const variants = parseVariants(process.env.TSFORGE_FEATURE_VARIANTS ?? "");
|
|
246
|
+
const repeats = Math.max(1, Number(process.env.TSFORGE_WEB_REPEATS ?? "1"));
|
|
247
|
+
|
|
248
|
+
return { slug: app.slug, framework, variants, repeats };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async function main(): Promise<void> {
|
|
252
|
+
const config = resolveConfig();
|
|
253
|
+
|
|
254
|
+
if (config === undefined) {
|
|
255
|
+
process.exit(2);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const { entry } = await resolveActiveModel();
|
|
259
|
+
|
|
260
|
+
printPlan(config, entry.model);
|
|
261
|
+
|
|
262
|
+
if (process.env.TSFORGE_WEB_CONFIRM !== "1") {
|
|
263
|
+
process.stdout.write(
|
|
264
|
+
`\nDRY RUN — set TSFORGE_WEB_CONFIRM=1 to actually run these builds.\n`
|
|
265
|
+
);
|
|
266
|
+
process.exit(0);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const runDir = join(
|
|
270
|
+
EVALS_ROOT,
|
|
271
|
+
"runs",
|
|
272
|
+
`web-sweep-${config.slug}-${stamp()}`
|
|
273
|
+
);
|
|
274
|
+
|
|
275
|
+
mkdirSync(runDir, { recursive: true });
|
|
276
|
+
|
|
277
|
+
const records = await runSweep(config, runDir);
|
|
278
|
+
const report = buildSweepReport(records, baselineLabel(config.variants));
|
|
279
|
+
const markdown = renderSweepReportMarkdown(report);
|
|
280
|
+
|
|
281
|
+
process.stdout.write(`\n${markdown}\n`);
|
|
282
|
+
|
|
283
|
+
const reportPath = join(runDir, "report.json");
|
|
284
|
+
|
|
285
|
+
writeFileSync(
|
|
286
|
+
reportPath,
|
|
287
|
+
`${JSON.stringify({ config, records, report }, null, 2)}\n`
|
|
288
|
+
);
|
|
289
|
+
process.stdout.write(`\nsaved ${reportPath}\n`);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
await main();
|
package/src/browser/oracle.ts
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
import { resolve, dirname, basename, join } from "node:path";
|
|
2
|
-
|
|
2
|
+
// `playwright` is an OPTIONAL peer: bundling it (+ a browser binary) into every
|
|
3
|
+
// install is too heavy, so the import is dynamic and the render-check skips when
|
|
4
|
+
// it's absent. The type-only import is erased at runtime, so it can't crash a
|
|
5
|
+
// playwright-less install.
|
|
6
|
+
import type { Page, chromium as Chromium } from "playwright";
|
|
7
|
+
|
|
8
|
+
/** Load playwright's chromium lazily; null when it isn't installed. */
|
|
9
|
+
async function loadChromium(): Promise<typeof Chromium | null> {
|
|
10
|
+
try {
|
|
11
|
+
return (await import("playwright")).chromium;
|
|
12
|
+
} catch {
|
|
13
|
+
return null;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
3
16
|
|
|
4
17
|
/**
|
|
5
18
|
* The browser oracle — renders a built web page in headless chromium and reports
|
|
@@ -50,12 +63,27 @@ export interface IRenderResult {
|
|
|
50
63
|
ok: boolean;
|
|
51
64
|
/** Human-readable failures (console errors, page errors, missing content). */
|
|
52
65
|
errors: string[];
|
|
66
|
+
/** True when the check was skipped because playwright isn't installed. */
|
|
67
|
+
skipped?: boolean;
|
|
53
68
|
}
|
|
54
69
|
|
|
55
70
|
export async function renderCheck(
|
|
56
71
|
opts: IRenderOptions
|
|
57
72
|
): Promise<IRenderResult> {
|
|
58
73
|
const errors: string[] = [];
|
|
74
|
+
const chromium = await loadChromium();
|
|
75
|
+
|
|
76
|
+
// No playwright → skip the render check rather than fail the gate. The build
|
|
77
|
+
// still ran tsc/eslint/build/stub-check; the browser smoke is an enhancement.
|
|
78
|
+
if (chromium === null) {
|
|
79
|
+
process.stderr.write(
|
|
80
|
+
"browser render-check skipped: playwright not installed " +
|
|
81
|
+
"(run `bunx playwright install chromium` to enable it)\n"
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
return { ok: true, errors: [], skipped: true };
|
|
85
|
+
}
|
|
86
|
+
|
|
59
87
|
const browser = await chromium.launch({ args: ["--no-sandbox"] });
|
|
60
88
|
|
|
61
89
|
try {
|
package/src/cli.ts
CHANGED
|
@@ -795,18 +795,24 @@ async function baseGate(
|
|
|
795
795
|
}
|
|
796
796
|
|
|
797
797
|
const { detectStack } = await import("./stack-detection");
|
|
798
|
-
const {
|
|
799
|
-
|
|
798
|
+
const {
|
|
799
|
+
loadTsforgeConfig,
|
|
800
|
+
resolveActivePacks,
|
|
801
|
+
normalizeRuleOverrides,
|
|
802
|
+
resolveProjectProfile,
|
|
803
|
+
} = await import("./config/tsforge-config");
|
|
800
804
|
|
|
801
805
|
const stackProfile = await detectStack(args.dir);
|
|
802
806
|
const config = await loadTsforgeConfig(args.dir);
|
|
803
807
|
const activePacks = resolveActivePacks(stackProfile.packs, config);
|
|
804
808
|
const ruleOverrides = normalizeRuleOverrides(config);
|
|
809
|
+
const profile = resolveProjectProfile(config);
|
|
805
810
|
|
|
806
811
|
const auto = await buildGate(
|
|
807
812
|
args.dir,
|
|
808
813
|
activePacks,
|
|
809
|
-
Object.keys(ruleOverrides).length > 0 ? ruleOverrides : undefined
|
|
814
|
+
Object.keys(ruleOverrides).length > 0 ? ruleOverrides : undefined,
|
|
815
|
+
{ enableTypeAware: profile === "strict" }
|
|
810
816
|
);
|
|
811
817
|
|
|
812
818
|
return { accept: auto.command, gateLabel: auto.label };
|
package/src/config/index.ts
CHANGED
|
@@ -4,5 +4,13 @@ export {
|
|
|
4
4
|
loadTsforgeConfig,
|
|
5
5
|
resolveActivePacks,
|
|
6
6
|
normalizeRuleOverrides,
|
|
7
|
+
resolveProjectProfile,
|
|
7
8
|
type ITsforgeProjectConfig,
|
|
8
9
|
} from "./tsforge-config";
|
|
10
|
+
export {
|
|
11
|
+
PROFILE_DEFINITIONS,
|
|
12
|
+
DEFAULT_PROFILE,
|
|
13
|
+
isProfileId,
|
|
14
|
+
resolveProfileMetaRuleOverrides,
|
|
15
|
+
type ProfileId,
|
|
16
|
+
} from "./profiles";
|