@agjs/tsforge 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/tsforge.js +2 -0
- package/package.json +35 -0
- package/src/agent/agent.constants.ts +382 -0
- package/src/agent/agent.types.ts +34 -0
- package/src/agent/index.ts +4 -0
- package/src/agent/model-agent.ts +297 -0
- package/src/agent/tool-repair.ts +194 -0
- package/src/agent/tools.ts +190 -0
- package/src/browser/checks.ts +96 -0
- package/src/browser/index.ts +8 -0
- package/src/browser/oracle.ts +303 -0
- package/src/classify.ts +48 -0
- package/src/cli.ts +1333 -0
- package/src/config/config.constants.ts +9 -0
- package/src/config/flags.ts +32 -0
- package/src/config/index.ts +8 -0
- package/src/config/tsforge-config.ts +301 -0
- package/src/constitution/baseline.ts +257 -0
- package/src/detect-gate.ts +498 -0
- package/src/eval/eval.types.ts +36 -0
- package/src/eval/index.ts +3 -0
- package/src/eval/judge.ts +62 -0
- package/src/eval/score.ts +39 -0
- package/src/files/create.ts +22 -0
- package/src/files/edit.ts +193 -0
- package/src/files/files.constants.ts +11 -0
- package/src/files/files.types.ts +81 -0
- package/src/files/hashline-format.ts +110 -0
- package/src/files/hashline.ts +689 -0
- package/src/files/index.ts +19 -0
- package/src/index.ts +8 -0
- package/src/inference/index.ts +6 -0
- package/src/inference/inference.constants.ts +34 -0
- package/src/inference/inference.types.ts +123 -0
- package/src/inference/openai-compatible.ts +113 -0
- package/src/inference/stream-guard.ts +161 -0
- package/src/inference/stream.ts +370 -0
- package/src/inference/transport.ts +78 -0
- package/src/inference/wire.ts +0 -0
- package/src/lib/fs/fs.ts +126 -0
- package/src/lib/fs/fs.types.ts +5 -0
- package/src/lib/fs/index.ts +3 -0
- package/src/lib/fs/process.ts +146 -0
- package/src/lib/guards/guards.ts +9 -0
- package/src/lib/guards/index.ts +1 -0
- package/src/lib/json/index.ts +1 -0
- package/src/lib/json/json.ts +12 -0
- package/src/lib/scope/index.ts +2 -0
- package/src/lib/scope/scope.constants.ts +3 -0
- package/src/lib/scope/scope.ts +40 -0
- package/src/loop/astgrep-fix.ts +228 -0
- package/src/loop/feedback/feedback.ts +138 -0
- package/src/loop/feedback/index.ts +8 -0
- package/src/loop/feedback/meta-rule-docs.ts +41 -0
- package/src/loop/feedback/meta-rule-feedback.ts +61 -0
- package/src/loop/feedback/rule-docs.generated.json +112 -0
- package/src/loop/feedback/rule-docs.ts +342 -0
- package/src/loop/index.ts +19 -0
- package/src/loop/loop.constants.ts +68 -0
- package/src/loop/loop.types.ts +99 -0
- package/src/loop/prompt/index.ts +2 -0
- package/src/loop/prompt/project-map.ts +69 -0
- package/src/loop/prompt/prompt.ts +107 -0
- package/src/loop/quality.ts +174 -0
- package/src/loop/rule-docs.generated.json +367 -0
- package/src/loop/run-spec.ts +88 -0
- package/src/loop/run.ts +400 -0
- package/src/loop/session.ts +1410 -0
- package/src/loop/tools/add-dependency.ts +71 -0
- package/src/loop/tools/condense.ts +498 -0
- package/src/loop/tools/edit-hashline.ts +80 -0
- package/src/loop/tools/execute-tool.ts +80 -0
- package/src/loop/tools/file-ops.ts +323 -0
- package/src/loop/tools/index.ts +2 -0
- package/src/loop/tools/lsp-ops.ts +222 -0
- package/src/loop/tools/scaffold-routes.ts +68 -0
- package/src/loop/tools/scaffold-ui.ts +62 -0
- package/src/loop/tools/scaffold-web.ts +35 -0
- package/src/loop/tools/tool-context.ts +126 -0
- package/src/loop/ttsr-defaults.ts +53 -0
- package/src/loop/ttsr.ts +322 -0
- package/src/loop/turn.ts +856 -0
- package/src/lsp/index.ts +2 -0
- package/src/lsp/lsp.types.ts +56 -0
- package/src/lsp/service.ts +500 -0
- package/src/meta-rules/context.ts +195 -0
- package/src/meta-rules/index.ts +9 -0
- package/src/meta-rules/meta-rules.types.ts +47 -0
- package/src/meta-rules/parsers/package-json-parser.ts +51 -0
- package/src/meta-rules/registry.ts +37 -0
- package/src/meta-rules/rules/ci/workflow-actions-pinned.ts +59 -0
- package/src/meta-rules/rules/ci/workflow-runner-pinned.ts +57 -0
- package/src/meta-rules/rules/ci/workflow-timeout-required.ts +114 -0
- package/src/meta-rules/rules/config/tsconfig-paths-exist.ts +117 -0
- package/src/meta-rules/rules/config/tsconfig-strict.ts +91 -0
- package/src/meta-rules/rules/source-text/no-eslint-disable-comments.ts +34 -0
- package/src/meta-rules/rules/source-text/no-ts-suppressions.ts +38 -0
- package/src/meta-rules/rules/supply-chain/no-overlapping-libs.ts +57 -0
- package/src/meta-rules/rules/supply-chain/package-exact-deps.ts +55 -0
- package/src/meta-rules/rules/testing/test-sibling-required.ts +110 -0
- package/src/meta-rules/runner.ts +64 -0
- package/src/models-config.ts +196 -0
- package/src/render/ansi.ts +289 -0
- package/src/render/banner.ts +113 -0
- package/src/render/box.ts +134 -0
- package/src/render/index.ts +7 -0
- package/src/render/markdown.ts +123 -0
- package/src/render/render.types.ts +21 -0
- package/src/render/stream-markdown.ts +128 -0
- package/src/render/style.ts +26 -0
- package/src/rule-packs/bullmq/index.ts +39 -0
- package/src/rule-packs/bullmq/rules/index.ts +7 -0
- package/src/rule-packs/bullmq/rules/job-name-must-be-constant.ts +141 -0
- package/src/rule-packs/bullmq/rules/job-options-must-set-attempts.ts +174 -0
- package/src/rule-packs/bullmq/rules/no-blocking-concurrency-zero.ts +103 -0
- package/src/rule-packs/bullmq/rules/queue-options-must-set-removeoncomplete.ts +130 -0
- package/src/rule-packs/bullmq/rules/queue-options-must-set-removeonfail.ts +130 -0
- package/src/rule-packs/bullmq/rules/worker-must-implement-close.ts +182 -0
- package/src/rule-packs/bullmq/rules/worker-must-listen-failed.ts +140 -0
- package/src/rule-packs/bullmq/utils.ts +334 -0
- package/src/rule-packs/code-flow/index.ts +25 -0
- package/src/rule-packs/code-flow/rules/index.ts +3 -0
- package/src/rule-packs/code-flow/rules/no-bare-date-now.ts +138 -0
- package/src/rule-packs/code-flow/rules/no-template-trim-empty-ternary.ts +87 -0
- package/src/rule-packs/code-flow/rules/prefer-early-return.ts +80 -0
- package/src/rule-packs/code-flow/utils/prefer-early-return.ts +132 -0
- package/src/rule-packs/comment-hygiene/index.ts +25 -0
- package/src/rule-packs/comment-hygiene/rules/index.ts +3 -0
- package/src/rule-packs/comment-hygiene/rules/no-historical-comments.ts +102 -0
- package/src/rule-packs/comment-hygiene/rules/no-narration-comments.ts +83 -0
- package/src/rule-packs/comment-hygiene/rules/no-pr-reference-comments.ts +90 -0
- package/src/rule-packs/create-rule.ts +9 -0
- package/src/rule-packs/drizzle/index.ts +41 -0
- package/src/rule-packs/drizzle/rules/account-scoped-tables-require-where.ts +371 -0
- package/src/rule-packs/drizzle/rules/index.ts +8 -0
- package/src/rule-packs/drizzle/rules/no-nested-db-transaction.ts +127 -0
- package/src/rule-packs/drizzle/rules/no-raw-sql-outside-allowlist.ts +100 -0
- package/src/rule-packs/drizzle/rules/relations-must-cover-fks.ts +209 -0
- package/src/rule-packs/drizzle/rules/schema-files-must-not-import-driver.ts +127 -0
- package/src/rule-packs/drizzle/rules/schema-files-must-only-export-schema.ts +149 -0
- package/src/rule-packs/drizzle/rules/tables-must-have-timestamps.ts +312 -0
- package/src/rule-packs/drizzle/rules/timestamp-must-specify-mode.ts +166 -0
- package/src/rule-packs/drizzle/utils.ts +115 -0
- package/src/rule-packs/elysia/index.ts +43 -0
- package/src/rule-packs/elysia/rules/consistent-status-via-set.ts +69 -0
- package/src/rule-packs/elysia/rules/no-decorate-state-collision.ts +276 -0
- package/src/rule-packs/elysia/rules/no-separate-model-interfaces.ts +144 -0
- package/src/rule-packs/elysia/rules/prefer-destructured-context.ts +155 -0
- package/src/rule-packs/elysia/rules/prefer-direct-return.ts +176 -0
- package/src/rule-packs/elysia/rules/prefer-static-services.ts +159 -0
- package/src/rule-packs/elysia/rules/prefer-throw-status.ts +151 -0
- package/src/rule-packs/elysia/rules/require-hooks-before-routes.ts +209 -0
- package/src/rule-packs/elysia/rules/require-plugin-name.ts +107 -0
- package/src/rule-packs/elysia/utils/elysiaChain.ts +306 -0
- package/src/rule-packs/env-access/index.ts +23 -0
- package/src/rule-packs/env-access/rules/index.ts +2 -0
- package/src/rule-packs/env-access/rules/no-direct-process-env.ts +133 -0
- package/src/rule-packs/env-access/rules/no-process-exit.ts +95 -0
- package/src/rule-packs/i18n-keys/index.ts +19 -0
- package/src/rule-packs/i18n-keys/rules/static-translation-key-exists.ts +173 -0
- package/src/rule-packs/index.ts +139 -0
- package/src/rule-packs/jwt-cookies/index.ts +25 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-httponly.ts +150 -0
- package/src/rule-packs/jwt-cookies/rules/auth-cookie-must-be-secure-in-prod.ts +149 -0
- package/src/rule-packs/jwt-cookies/rules/bcrypt-rounds-min.ts +195 -0
- package/src/rule-packs/jwt-cookies/utils.ts +188 -0
- package/src/rule-packs/oauth-security/index.ts +25 -0
- package/src/rule-packs/oauth-security/rules/pkce-required-for-oidc.ts +296 -0
- package/src/rule-packs/oauth-security/rules/state-must-be-redis-backed.ts +193 -0
- package/src/rule-packs/oauth-security/rules/state-ttl-bounded.ts +219 -0
- package/src/rule-packs/oauth-security/utils.ts +127 -0
- package/src/rule-packs/react-component-architecture/index.ts +35 -0
- package/src/rule-packs/react-component-architecture/rules/component-folder-structure.ts +123 -0
- package/src/rule-packs/react-component-architecture/rules/forwardref-display-name.ts +93 -0
- package/src/rule-packs/react-component-architecture/rules/index-must-reexport-default.ts +123 -0
- package/src/rule-packs/react-component-architecture/rules/max-hooks-per-file.ts +122 -0
- package/src/rule-packs/react-component-architecture/rules/no-cross-feature-imports.ts +170 -0
- package/src/rule-packs/react-component-architecture/rules/no-inline-jsx-functions.ts +66 -0
- package/src/rule-packs/react-component-architecture/utils.ts +47 -0
- package/src/rule-packs/rule-packs.types.ts +18 -0
- package/src/rule-packs/structured-logging/index.ts +26 -0
- package/src/rule-packs/structured-logging/rules/mask-pii-fields.ts +221 -0
- package/src/rule-packs/structured-logging/rules/no-error-stringify.ts +217 -0
- package/src/rule-packs/structured-logging/rules/require-event-field.ts +136 -0
- package/src/rule-packs/structured-logging/utils/logger.ts +104 -0
- package/src/rule-packs/tanstack-query/index.ts +20 -0
- package/src/rule-packs/tanstack-query/rules/prefix-query-key-must-use-set-queries-data.ts +321 -0
- package/src/rule-packs/test-conventions/index.ts +23 -0
- package/src/rule-packs/test-conventions/rules/index.ts +2 -0
- package/src/rule-packs/test-conventions/rules/no-focused-tests.ts +170 -0
- package/src/rule-packs/test-conventions/rules/test-file-mirrors-source.ts +127 -0
- package/src/rule-packs/utils.ts +142 -0
- package/src/session-store.ts +359 -0
- package/src/spec/generate-tests.ts +213 -0
- package/src/spec/index.ts +5 -0
- package/src/spec/parse.ts +152 -0
- package/src/spec/review-tests.ts +162 -0
- package/src/spec/spec.constants.ts +13 -0
- package/src/spec/spec.types.ts +79 -0
- package/src/stack-detection/detect.ts +246 -0
- package/src/stack-detection/index.ts +3 -0
- package/src/stack-detection/packs.ts +174 -0
- package/src/stack-detection/stack-detection.types.ts +47 -0
- package/src/validate/accept.ts +49 -0
- package/src/validate/errors.ts +35 -0
- package/src/validate/index.ts +12 -0
- package/src/validate/parse.ts +148 -0
- package/src/validate/run-tests.ts +59 -0
- package/src/validate/validate.ts +40 -0
- package/src/validate/validate.types.ts +52 -0
- package/src/web-components.ts +638 -0
- package/src/web-coverage.ts +89 -0
- package/src/web-routes.ts +151 -0
- package/src/web-templates.ts +1011 -0
- package/strict.eslint.config.mjs +84 -0
- package/strict.web.eslint.config.mjs +185 -0
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import { ESLint } from "eslint";
|
|
3
|
+
import { WEB_TEMPLATES, type WebFramework } from "./web-templates";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Build the gate that confirms "done" — and makes tsforge a TypeScript-SPECIALIZED
|
|
7
|
+
* harness, not a generic file editor. It enforces strict TS on whatever the model
|
|
8
|
+
* writes, in two layers, using tsforge's OWN bundled toolchain so it works on any
|
|
9
|
+
* target regardless of that project's setup:
|
|
10
|
+
* 1. `tsc --strict --noUncheckedIndexedAccess` — the TYPE-aware floor (unguarded
|
|
11
|
+
* `arr[i]`, null-safety, real type errors). Greenfield gets a strict tsconfig
|
|
12
|
+
* brought in; an existing project's own tsconfig is respected.
|
|
13
|
+
* 2. the bundled eslint strict config — the SYNTACTIC idioms (no `as`/`any`/`!`,
|
|
14
|
+
* no over-annotation), which need no type info or deps.
|
|
15
|
+
* The deterministic gate loop + rule-docs cards + ast-grep polish then drive the
|
|
16
|
+
* local model's output up to that bar — that's the uplift.
|
|
17
|
+
*/
|
|
18
|
+
export interface IGate {
|
|
19
|
+
/** The shell command run to verify (must exit 0). */
|
|
20
|
+
command: string;
|
|
21
|
+
/** A short human label for the banner. */
|
|
22
|
+
label: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// tsforge's own toolchain, resolved from this module's location so it's found
|
|
26
|
+
// wherever the harness lives.
|
|
27
|
+
const ROOT = join(import.meta.dir, "..", "..", "..");
|
|
28
|
+
const ESLINT_BIN = join(ROOT, "node_modules", ".bin", "eslint");
|
|
29
|
+
const TSC_BIN = join(ROOT, "node_modules", ".bin", "tsc");
|
|
30
|
+
const PRETTIER_BIN = join(ROOT, "node_modules", ".bin", "prettier");
|
|
31
|
+
const STRICT_CONFIG = join(import.meta.dir, "..", "strict.eslint.config.mjs");
|
|
32
|
+
const BROWSER_CHECK = join(
|
|
33
|
+
import.meta.dir,
|
|
34
|
+
"..",
|
|
35
|
+
"scripts",
|
|
36
|
+
"browser-check.ts"
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
const STUB_CHECK = join(import.meta.dir, "..", "scripts", "stub-check.ts");
|
|
40
|
+
|
|
41
|
+
// The strict tsconfig tsforge brings to a greenfield project — strict + the
|
|
42
|
+
// index-safety the local model is weakest at, with DOM + JSX libs so browser /
|
|
43
|
+
// React code type-checks, and skipLibCheck so it never trips on dep .d.ts.
|
|
44
|
+
const STRICT_TSCONFIG = `{
|
|
45
|
+
"compilerOptions": {
|
|
46
|
+
"target": "ES2022",
|
|
47
|
+
"module": "ESNext",
|
|
48
|
+
"moduleResolution": "bundler",
|
|
49
|
+
"lib": ["ES2022", "DOM", "DOM.Iterable"],
|
|
50
|
+
"jsx": "react-jsx",
|
|
51
|
+
"strict": true,
|
|
52
|
+
"noUncheckedIndexedAccess": true,
|
|
53
|
+
"noImplicitOverride": true,
|
|
54
|
+
"noFallthroughCasesInSwitch": true,
|
|
55
|
+
"esModuleInterop": true,
|
|
56
|
+
"forceConsistentCasingInFileNames": true,
|
|
57
|
+
"skipLibCheck": true,
|
|
58
|
+
"noEmit": true
|
|
59
|
+
},
|
|
60
|
+
"include": ["**/*.ts", "**/*.tsx"],
|
|
61
|
+
"exclude": ["node_modules", "dist", "build", "scratch"]
|
|
62
|
+
}
|
|
63
|
+
`;
|
|
64
|
+
|
|
65
|
+
/** Strict overlay for a project that ALREADY has a tsconfig: extend it (so the
|
|
66
|
+
* project's paths/jsx/module/lib still resolve — a bare strict config would
|
|
67
|
+
* mis-compile a real app) but FORCE every strictness flag on top, so a loosely-
|
|
68
|
+
* configured repo still gets tsforge's strict-TS floor. Written as a sibling
|
|
69
|
+
* `tsforge.tsconfig.json` and gated with `tsc -p`. */
|
|
70
|
+
const STRICT_TSCONFIG_OVERRIDE = `{
|
|
71
|
+
"extends": "./tsconfig.json",
|
|
72
|
+
"compilerOptions": {
|
|
73
|
+
"strict": true,
|
|
74
|
+
"noUncheckedIndexedAccess": true,
|
|
75
|
+
"noImplicitOverride": true,
|
|
76
|
+
"noFallthroughCasesInSwitch": true,
|
|
77
|
+
"skipLibCheck": true,
|
|
78
|
+
"noEmit": true
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
`;
|
|
82
|
+
|
|
83
|
+
// The web-stack scaffolds (Vite + React full-kit, or Vite vanilla) live in the
|
|
84
|
+
// registry; this module just lays them down and builds their gate. shadcn/TanStack
|
|
85
|
+
// boilerplate is held to a web-tailored strict config (no `I`-prefix — React names
|
|
86
|
+
// interfaces `Props`, not `IProps`) with vendored/generated dirs exempted.
|
|
87
|
+
const STRICT_WEB_CONFIG = join(
|
|
88
|
+
import.meta.dir,
|
|
89
|
+
"..",
|
|
90
|
+
"strict.web.eslint.config.mjs"
|
|
91
|
+
);
|
|
92
|
+
|
|
93
|
+
/** The frameworks the spec Q&A can scaffold. */
|
|
94
|
+
export const WEB_FRAMEWORKS: readonly WebFramework[] = ["react", "vanilla"];
|
|
95
|
+
|
|
96
|
+
/** One lint violation on a single file (errors only), for write-time feedback. */
|
|
97
|
+
export interface IFileLintProblem {
|
|
98
|
+
line: number;
|
|
99
|
+
message: string;
|
|
100
|
+
ruleId: string;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Lint ONE just-written file, returning its errors. Reused per write. */
|
|
104
|
+
export type FileLinter = (absPath: string) => Promise<IFileLintProblem[]>;
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Build a WRITE-TIME single-file linter using the SAME bundled strict config as
|
|
108
|
+
* the gate's eslint step. The write-guard type-checks each new file via tsc, but
|
|
109
|
+
* tsc is blind to our STRICTNESS MOAT — the `no-as` cast ban, `I`-prefix, and
|
|
110
|
+
* `prefer-template` are eslint rules. A run log showed the model writing
|
|
111
|
+
* `Object.keys(x) as unknown as ...` in every domain file: type-valid, so the
|
|
112
|
+
* type-guard waved it through, and 12 `as` violations piled up unseen until the
|
|
113
|
+
* gate. This surfaces them inline the instant the file is written, so the model
|
|
114
|
+
* fixes them in-context instead of in a late repair spiral.
|
|
115
|
+
*
|
|
116
|
+
* In-process via the ESLint API (config + parser loaded once and reused across
|
|
117
|
+
* calls — no per-write cold start). Best-effort: a linter failure returns [] and
|
|
118
|
+
* never breaks the build; the gate stays the authority. `cwd` is the app dir so
|
|
119
|
+
* the vendored-code ignore globs (ui/, lib/, *.gen.ts) resolve correctly.
|
|
120
|
+
*
|
|
121
|
+
* When `packIds` is provided, those rule packs are added to the config via
|
|
122
|
+
* `overrideConfig` (applies after the bundled config). This allows write-time
|
|
123
|
+
* feedback on stack-aware rules. `ruleOverrides` (keyed by bare rule name) can
|
|
124
|
+
* tune severities or silence rules ("off").
|
|
125
|
+
*/
|
|
126
|
+
export function makeFileLinter(
|
|
127
|
+
framework: WebFramework | "core",
|
|
128
|
+
cwd: string,
|
|
129
|
+
packIds?: readonly string[],
|
|
130
|
+
ruleOverrides?: Readonly<Record<string, "error" | "warn" | "off">>
|
|
131
|
+
): FileLinter {
|
|
132
|
+
const overrideConfigFile =
|
|
133
|
+
framework === "core" ? STRICT_CONFIG : STRICT_WEB_CONFIG;
|
|
134
|
+
const ignores =
|
|
135
|
+
framework === "core" ? [] : WEB_TEMPLATES[framework].eslintIgnore;
|
|
136
|
+
let engine: ESLint | null = null;
|
|
137
|
+
|
|
138
|
+
return async (absPath) => {
|
|
139
|
+
try {
|
|
140
|
+
if (engine === null) {
|
|
141
|
+
interface IEslintOptions {
|
|
142
|
+
cwd: string;
|
|
143
|
+
overrideConfigFile: string;
|
|
144
|
+
overrideConfig?: Record<string, unknown>[];
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const eOpts: IEslintOptions = {
|
|
148
|
+
cwd,
|
|
149
|
+
overrideConfigFile,
|
|
150
|
+
};
|
|
151
|
+
|
|
152
|
+
// Add ignores config if needed
|
|
153
|
+
if (ignores.length > 0) {
|
|
154
|
+
eOpts.overrideConfig = [{ ignores }];
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Add pack rules if provided
|
|
158
|
+
if (packIds !== undefined && packIds.length > 0) {
|
|
159
|
+
const { buildPackEslintConfig } = await import("./rule-packs/index");
|
|
160
|
+
|
|
161
|
+
const { plugin, rules } = buildPackEslintConfig(
|
|
162
|
+
packIds,
|
|
163
|
+
ruleOverrides
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
const packConfig: Record<string, unknown> = {
|
|
167
|
+
files: ["**/*.ts", "**/*.tsx"],
|
|
168
|
+
plugins: { tsforge: plugin },
|
|
169
|
+
rules,
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
eOpts.overrideConfig =
|
|
173
|
+
eOpts.overrideConfig !== undefined
|
|
174
|
+
? [...eOpts.overrideConfig, packConfig]
|
|
175
|
+
: [packConfig];
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
engine = new ESLint(eOpts);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const results = await engine.lintFiles([absPath]);
|
|
182
|
+
const first = results[0];
|
|
183
|
+
|
|
184
|
+
if (first === undefined) {
|
|
185
|
+
return [];
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ONLY surface errors the model must fix BY HAND. ESLint sets `fix` on a
|
|
189
|
+
// message when the rule is auto-fixable — those (padding-line, quotes, semis,
|
|
190
|
+
// curly, prefer-const…) are squashed by the gate's `eslint --fix`/`prettier`
|
|
191
|
+
// janitor for free, so nagging the model about them just burns turns and, for
|
|
192
|
+
// interdependent rules like padding-line, OSCILLATES (fix one blank line, the
|
|
193
|
+
// rule flags the next) — a real thrash we saw in a run log. Keep only the
|
|
194
|
+
// hand-fix-required rules: `as`-casts, `any`, I-prefix, one-component, etc.
|
|
195
|
+
return first.messages
|
|
196
|
+
.filter((m) => m.severity === 2 && m.fix === undefined)
|
|
197
|
+
.map((m) => ({
|
|
198
|
+
line: m.line,
|
|
199
|
+
message: m.message,
|
|
200
|
+
ruleId: m.ruleId ?? "?",
|
|
201
|
+
}));
|
|
202
|
+
} catch {
|
|
203
|
+
return [];
|
|
204
|
+
}
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/** Lay down a stack's opinionated skeleton (non-destructive — only missing files).
|
|
209
|
+
* Dependency install is separate (`installWebDeps`) so this stays pure + fast +
|
|
210
|
+
* offline-testable. */
|
|
211
|
+
export async function scaffoldWeb(
|
|
212
|
+
cwd: string,
|
|
213
|
+
framework: WebFramework
|
|
214
|
+
): Promise<void> {
|
|
215
|
+
for (const [path, content] of Object.entries(
|
|
216
|
+
WEB_TEMPLATES[framework].files
|
|
217
|
+
)) {
|
|
218
|
+
await ensureFile(cwd, path, content);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* How a build turn must behave — prepended to every stack's guidance. The base
|
|
224
|
+
* CLI prompt is conversational ("reply with the code") and carries the CORE
|
|
225
|
+
* harness's TS house-rules (I-prefixed interfaces, no `as`). Both are WRONG for a
|
|
226
|
+
* web build: it must write files via tools, and a Vite/React app's gate uses the
|
|
227
|
+
* web lint config (no I-prefix, `as const` allowed). This block overrides both,
|
|
228
|
+
* so the model writes conforming code up front instead of writing idiomatic code
|
|
229
|
+
* and then "correcting" it toward rules the web gate never enforces.
|
|
230
|
+
*/
|
|
231
|
+
const BUILD_PREAMBLE = [
|
|
232
|
+
"You are BUILDING this app. You produce files by CALLING TOOLS, not by writing",
|
|
233
|
+
"them in your reply: a chat message is never saved to disk and cannot run.",
|
|
234
|
+
"Call `create` once per file (relative path + full contents), ONE file per call,",
|
|
235
|
+
"starting with the first file NOW — do not pre-write everything in prose. After",
|
|
236
|
+
"you stop, the gate builds the app and reports what to fix; then edit and",
|
|
237
|
+
"continue until it passes. Never paste file contents into your message.",
|
|
238
|
+
"",
|
|
239
|
+
"TYPE STYLE — the gate checks these; write them this way the FIRST time (the",
|
|
240
|
+
"gate rejects code that breaks them, and fixing after costs extra turns):",
|
|
241
|
+
" • Interfaces are `I`-prefixed PascalCase: `interface IIssue`, `interface",
|
|
242
|
+
" IButtonProps` — NOT `Issue` / `ButtonProps`. Write the `I` from the start;",
|
|
243
|
+
" do not emit a bare name and then rename it. (Type ALIASES — `type Status =`",
|
|
244
|
+
" — are not prefixed.)",
|
|
245
|
+
" • `as const` IS allowed and PREFERRED for literal data and registries (e.g.",
|
|
246
|
+
" `const STATUS = {...} as const`). Still forbidden: `any`, value-changing",
|
|
247
|
+
" `as` casts, non-null `!`. Use `===`, never `var`.",
|
|
248
|
+
" • REGISTRIES (the #1 source of type errors): for an `as const` object, DERIVE",
|
|
249
|
+
" its types — `type Status = keyof typeof STATUSES`, `type StatusInfo =",
|
|
250
|
+
" (typeof STATUSES)[Status]`. Do NOT declare a separate interface the object",
|
|
251
|
+
" must match (its `readonly`/literal types won't assign → a wall of TS2322).",
|
|
252
|
+
" To VALIDATE a registry's shape, append `satisfies` — `const STATUSES = {...}",
|
|
253
|
+
" as const satisfies Record<string, IStatusInfo>` — it checks the shape while",
|
|
254
|
+
" keeping the literals, and is NOT an `as` cast (allowed). Need a typed key",
|
|
255
|
+
" array? `Object.keys(x)` is `string[]`; do NOT cast it — make the array the",
|
|
256
|
+
" source (`const STATUS_KEYS = [...] as const; type Status = (typeof",
|
|
257
|
+
" STATUS_KEYS)[number]`) and build the registry from it.",
|
|
258
|
+
"",
|
|
259
|
+
"Write it RIGHT the first time — these are the gate's hard rules; code that",
|
|
260
|
+
"breaks them is rejected and costs you extra turns. The fixes are not optional",
|
|
261
|
+
"polish, they are how you write the line:",
|
|
262
|
+
" • No `x as Foo`. Narrow instead: `if (!(x instanceof Foo)) return;` or a type",
|
|
263
|
+
" guard, or type the value at its source. For event targets, check the type.",
|
|
264
|
+
" • SEED/DATA arrays: an UNANNOTATED literal widens (`priority: 'high'` becomes",
|
|
265
|
+
" `string`), so it won't fit `IThing[]` and you CANNOT cast it (`as` is banned).",
|
|
266
|
+
" Always pin the type ONE of two ways, then write PLAIN literals (no per-field",
|
|
267
|
+
" `as`): annotate — `const SEED: readonly IThing[] = [...]` — OR append",
|
|
268
|
+
" `satisfies` — `const SEED = [...] satisfies readonly IThing[]` (also flags a",
|
|
269
|
+
" WRONG enum value, e.g. a `priority` not in the union). A literal that's a member",
|
|
270
|
+
" of the union is already assignable; never write `'high' as Priority`.",
|
|
271
|
+
" • No `arr[i]!` / `obj.maybe!`. Guard: `const v = arr[i]; if (v === undefined)",
|
|
272
|
+
" return;` — array/Map index access is `T | undefined` here.",
|
|
273
|
+
" • No `any`. Use `unknown` + a narrow, or write the real type.",
|
|
274
|
+
" • Type every function parameter and every `useState`/`useRef` generic.",
|
|
275
|
+
"",
|
|
276
|
+
"Work directly — do NOT restate the task, announce a plan, or narrate progress",
|
|
277
|
+
"between steps ('The user wants me to…', 'I was in the middle of…', 'Now let me…').",
|
|
278
|
+
"That text is wasted. Emit the next tool call.",
|
|
279
|
+
"",
|
|
280
|
+
"NO COMMENTS in the code you write. A comment is generated text that costs you",
|
|
281
|
+
"time, and these add nothing: file-header banners that restate the filename,",
|
|
282
|
+
"decorative section dividers, and lines that restate the code or narrate where a",
|
|
283
|
+
"symbol is defined. Write self-explanatory names instead. The ONLY allowed comment",
|
|
284
|
+
"explains a non-obvious WHY the code cannot — most files need none. No JSDoc.",
|
|
285
|
+
].join("\n");
|
|
286
|
+
|
|
287
|
+
/** The system-prompt guidance for a stack (build framing + structure/conventions). */
|
|
288
|
+
export function webGuidance(framework: WebFramework): string {
|
|
289
|
+
return `${BUILD_PREAMBLE}\n\n${WEB_TEMPLATES[framework].guidance}`;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/** Install the scaffold's dependencies (react/vite/tailwind/…) with bun, streaming
|
|
293
|
+
* progress to the terminal. Required before the gate's tsc + vite build can run.
|
|
294
|
+
* Skipped when deps are already present. Returns false on a failed install. */
|
|
295
|
+
export async function installWebDeps(cwd: string): Promise<boolean> {
|
|
296
|
+
if (await Bun.file(join(cwd, "node_modules", ".bin", "vite")).exists()) {
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const proc = Bun.spawn(["bun", "install"], {
|
|
301
|
+
cwd,
|
|
302
|
+
stdout: "inherit",
|
|
303
|
+
stderr: "inherit",
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
return (await proc.exited) === 0;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/** The full web ladder: `vite build` + tsc strict + web eslint (vendored-exempt) +
|
|
310
|
+
* browser render of the built `dist/`. Build runs FIRST so any codegen (e.g.
|
|
311
|
+
* TanStack Router's routeTree.gen.ts) exists before tsc; `vite build` is itself
|
|
312
|
+
* the bundler oracle — it resolves imports, compiles JSX/Tailwind, fails on
|
|
313
|
+
* anything broken. */
|
|
314
|
+
export function buildWebGate(framework: WebFramework): IGate {
|
|
315
|
+
const template = WEB_TEMPLATES[framework];
|
|
316
|
+
const ignores = template.eslintIgnore
|
|
317
|
+
.map((glob) => `--ignore-pattern "${glob}"`)
|
|
318
|
+
.join(" ");
|
|
319
|
+
const build = `bun run build`;
|
|
320
|
+
const tsc = `"${TSC_BIN}" --noEmit -p tsconfig.json`;
|
|
321
|
+
const lint =
|
|
322
|
+
`"${ESLINT_BIN}" --no-config-lookup -c "${STRICT_WEB_CONFIG}" ${ignores} --format json .`.replace(
|
|
323
|
+
/\s+/g,
|
|
324
|
+
" "
|
|
325
|
+
);
|
|
326
|
+
// GENERIC BEHAVIOUR SMOKE (--smoke): the gate proves the built app mounts in a
|
|
327
|
+
// real browser AND survives interaction — it asserts the React root rendered
|
|
328
|
+
// content (a blank white screen is a silent failure tsc/eslint never catch) and
|
|
329
|
+
// clicks the first few buttons with zero uncaught/console errors. This is
|
|
330
|
+
// HARNESS-authored and app-agnostic: we deliberately do NOT run a model-authored
|
|
331
|
+
// checks.json — the 27b writes over-strict interaction assertions (exact
|
|
332
|
+
// placeholders/fill flows) it then can't satisfy and spirals on (iter3/4).
|
|
333
|
+
const render = `bun "${BROWSER_CHECK}" dist/index.html --smoke --crawl`;
|
|
334
|
+
// Prettier enforces formatting (the fix step runs `prettier --write` first, so
|
|
335
|
+
// this passes without the model ever hand-formatting). Respects .prettierignore
|
|
336
|
+
// (vendored ui/ + lib/ skipped). Runs after lint so a parse error fails there.
|
|
337
|
+
const format = `"${PRETTIER_BIN}" --check .`;
|
|
338
|
+
|
|
339
|
+
// Fail if any route is still an unfilled scaffold stub (empty page that coverage
|
|
340
|
+
// + the render smoke both miss). Runs before the browser so the cheap check
|
|
341
|
+
// fails fast.
|
|
342
|
+
const stubs = `bun "${STUB_CHECK}" .`;
|
|
343
|
+
|
|
344
|
+
return {
|
|
345
|
+
command: `${build} && ${tsc} && ${lint} && ${stubs} && ${format} && ${render}`,
|
|
346
|
+
label: `${template.label} (build + behaviour smoke)`,
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* A TYPES-only gate for the staged DESIGN phase: `tsc --noEmit` + web eslint, but
|
|
352
|
+
* NO vite build / browser (the app has no UI yet). This surfaces the `as const`↔
|
|
353
|
+
* interface `TS2322` errors and the I-prefix/`as`-cast lint on the TYPE CONTRACT
|
|
354
|
+
* ALONE — caught small and isolated, before any component is built — instead of
|
|
355
|
+
* as a 20-error avalanche at the very end (the Linear-clone failure mode).
|
|
356
|
+
*/
|
|
357
|
+
export function buildWebTypeGate(framework: WebFramework): IGate {
|
|
358
|
+
const template = WEB_TEMPLATES[framework];
|
|
359
|
+
const ignores = template.eslintIgnore
|
|
360
|
+
.map((glob) => `--ignore-pattern "${glob}"`)
|
|
361
|
+
.join(" ");
|
|
362
|
+
const tsc = `"${TSC_BIN}" --noEmit -p tsconfig.json`;
|
|
363
|
+
const lint =
|
|
364
|
+
`"${ESLINT_BIN}" --no-config-lookup -c "${STRICT_WEB_CONFIG}" ${ignores} --format json .`.replace(
|
|
365
|
+
/\s+/g,
|
|
366
|
+
" "
|
|
367
|
+
);
|
|
368
|
+
|
|
369
|
+
return { command: `${tsc} && ${lint}`, label: `${template.label} (types)` };
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/** Just `tsc --noEmit` — the FAST incremental check run every few edits while
|
|
373
|
+
* building, so type errors (the avalanche source) surface early. Lint waits for
|
|
374
|
+
* the full gate (running it every few edits is noisy on half-written files). */
|
|
375
|
+
export function buildWebTscCheck(): string {
|
|
376
|
+
return `"${TSC_BIN}" --noEmit -p tsconfig.json`;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* The web auto-fix command — the deterministic JANITOR, run BEFORE the gate each
|
|
381
|
+
* cycle so the model NEVER spends (slow, costly) tokens on mechanical cleanup:
|
|
382
|
+
* 1. `eslint --fix` — prefer-const, no-var, curly, inferrable types, AND the
|
|
383
|
+
* boringstack blank-lines (padding-line-between-statements is auto-fixable).
|
|
384
|
+
* 2. `prettier --write` — all whitespace/quotes/semis/width formatting.
|
|
385
|
+
* (Unused/missing imports are handled separately by the TS quick-fix pass.) The
|
|
386
|
+
* unfixable rules (`any`/`as`/`!`) still need the model. Best-effort: exits ignored,
|
|
387
|
+
* `;` so prettier runs even when eslint reports remaining (unfixable) errors.
|
|
388
|
+
*/
|
|
389
|
+
export function buildWebFix(framework: WebFramework): string {
|
|
390
|
+
const ignores = WEB_TEMPLATES[framework].eslintIgnore
|
|
391
|
+
.map((glob) => `--ignore-pattern "${glob}"`)
|
|
392
|
+
.join(" ");
|
|
393
|
+
|
|
394
|
+
const lintFix =
|
|
395
|
+
`"${ESLINT_BIN}" --no-config-lookup -c "${STRICT_WEB_CONFIG}" ${ignores} --fix .`.replace(
|
|
396
|
+
/\s+/g,
|
|
397
|
+
" "
|
|
398
|
+
);
|
|
399
|
+
const format = `"${PRETTIER_BIN}" --write .`;
|
|
400
|
+
|
|
401
|
+
return `${lintFix} ; ${format}`;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
async function ensureFile(
|
|
405
|
+
cwd: string,
|
|
406
|
+
name: string,
|
|
407
|
+
content: string
|
|
408
|
+
): Promise<void> {
|
|
409
|
+
const file = Bun.file(join(cwd, name));
|
|
410
|
+
|
|
411
|
+
if (!(await file.exists())) {
|
|
412
|
+
await Bun.write(file, content);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
export async function buildGate(
|
|
417
|
+
cwd: string,
|
|
418
|
+
packs?: readonly string[],
|
|
419
|
+
ruleOverrides?: Readonly<Record<string, "error" | "warn" | "off">>
|
|
420
|
+
): Promise<IGate> {
|
|
421
|
+
const parts: string[] = [];
|
|
422
|
+
const labels: string[] = [];
|
|
423
|
+
|
|
424
|
+
const tsc = await tscPart(cwd);
|
|
425
|
+
|
|
426
|
+
if (tsc !== null) {
|
|
427
|
+
parts.push(tsc);
|
|
428
|
+
labels.push("tsc --strict");
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
const lint = lintPart(packs, ruleOverrides);
|
|
432
|
+
|
|
433
|
+
parts.push(lint.command);
|
|
434
|
+
labels.push(lint.label);
|
|
435
|
+
|
|
436
|
+
return { command: parts.join(" && "), label: labels.join(" + ") };
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/**
|
|
440
|
+
* The type-aware floor — ALWAYS tsforge-strict (user policy: a repo's own config
|
|
441
|
+
* is never trusted to be strict enough). With a project tsconfig, extend it but
|
|
442
|
+
* force the strict flags; greenfield, bring the full strict one. null when not a
|
|
443
|
+
* TS project. (The strict override / bundled config win over whatever the repo set.)
|
|
444
|
+
*/
|
|
445
|
+
async function tscPart(cwd: string): Promise<string | null> {
|
|
446
|
+
const hasTsconfig = await Bun.file(join(cwd, "tsconfig.json")).exists();
|
|
447
|
+
|
|
448
|
+
if (hasTsconfig) {
|
|
449
|
+
await Bun.write(
|
|
450
|
+
join(cwd, "tsforge.tsconfig.json"),
|
|
451
|
+
STRICT_TSCONFIG_OVERRIDE
|
|
452
|
+
);
|
|
453
|
+
|
|
454
|
+
return `"${TSC_BIN}" --noEmit -p tsforge.tsconfig.json`;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// Greenfield: bring a strict tsconfig so tsc can gate — but only when this is
|
|
458
|
+
// actually a TS project (has a package.json), so we never litter a random dir.
|
|
459
|
+
if (await Bun.file(join(cwd, "package.json")).exists()) {
|
|
460
|
+
await Bun.write(join(cwd, "tsconfig.json"), STRICT_TSCONFIG);
|
|
461
|
+
|
|
462
|
+
return `"${TSC_BIN}" --noEmit -p tsconfig.json`;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return null;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/** The syntactic idiom layer — ALWAYS tsforge's bundled strict eslint config
|
|
469
|
+
* (user policy). We deliberately do NOT defer to the project's own `lint`
|
|
470
|
+
* script: that's exactly how a weak repo would dodge the strict-TS floor. The
|
|
471
|
+
* bundled config needs no deps in the target. When packs are provided, they
|
|
472
|
+
* are passed via TSFORGE_PACKS env var so the config can load TS imports. Rule
|
|
473
|
+
* overrides are passed via TSFORGE_RULE_OVERRIDES (JSON-encoded map). */
|
|
474
|
+
function lintPart(
|
|
475
|
+
packs?: readonly string[],
|
|
476
|
+
ruleOverrides?: Readonly<Record<string, "error" | "warn" | "off">>
|
|
477
|
+
): IGate {
|
|
478
|
+
const envParts: string[] = [];
|
|
479
|
+
|
|
480
|
+
if (packs !== undefined && packs.length > 0) {
|
|
481
|
+
envParts.push(`TSFORGE_PACKS=${packs.join(",")}`);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
if (
|
|
485
|
+
ruleOverrides !== undefined &&
|
|
486
|
+
typeof ruleOverrides === "object" &&
|
|
487
|
+
Object.keys(ruleOverrides).length > 0
|
|
488
|
+
) {
|
|
489
|
+
envParts.push(`TSFORGE_RULE_OVERRIDES=${JSON.stringify(ruleOverrides)}`);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const envPrefix = envParts.length > 0 ? `${envParts.join(" ")} ` : "";
|
|
493
|
+
|
|
494
|
+
return {
|
|
495
|
+
command: `${envPrefix}bun "${ESLINT_BIN}" --no-config-lookup -c "${STRICT_CONFIG}" --format json .`,
|
|
496
|
+
label: "strict TypeScript (tsforge)",
|
|
497
|
+
};
|
|
498
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
export interface IJudgeInput {
|
|
2
|
+
goal: string;
|
|
3
|
+
criteria: string;
|
|
4
|
+
code: string;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
/** A quality score (1–5 per dimension) from an LLM reviewer — what the gate can't see. */
|
|
8
|
+
export interface IJudgeScore {
|
|
9
|
+
overall: number;
|
|
10
|
+
correctness: number;
|
|
11
|
+
design: number;
|
|
12
|
+
readability: number;
|
|
13
|
+
notes: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface IRunRecord {
|
|
17
|
+
/** Variant label (e.g. "temp=0"). */
|
|
18
|
+
label: string;
|
|
19
|
+
passed: boolean;
|
|
20
|
+
cycles: number;
|
|
21
|
+
ms: number;
|
|
22
|
+
/** LLM-judge quality score (1–5), when available. */
|
|
23
|
+
quality?: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Aggregated metrics for a variant across its runs. */
|
|
27
|
+
export interface IVariantSummary {
|
|
28
|
+
label: string;
|
|
29
|
+
runs: number;
|
|
30
|
+
passed: number;
|
|
31
|
+
passRate: number;
|
|
32
|
+
avgCycles: number;
|
|
33
|
+
avgMs: number;
|
|
34
|
+
/** Average quality across runs that were scored (0 if none). */
|
|
35
|
+
avgQuality: number;
|
|
36
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { IJudgeInput, IJudgeScore } from "./eval.types";
|
|
2
|
+
import type { IProvider } from "../inference";
|
|
3
|
+
import { isRecord } from "../lib/guards";
|
|
4
|
+
import { extractJson } from "../lib/json";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Score a green solution on quality dimensions the deterministic gate can't
|
|
8
|
+
* judge. Provider-agnostic: point it at a flagship model to measure the local
|
|
9
|
+
* model's gap to flagship quality.
|
|
10
|
+
*/
|
|
11
|
+
const SYSTEM =
|
|
12
|
+
"You are a senior TypeScript reviewer. Score the solution 1–5 on each of: " +
|
|
13
|
+
"correctness/robustness (beyond the given tests), design, and readability/idiomatic TS. " +
|
|
14
|
+
'Respond with ONLY a JSON object: {"overall":1-5,"correctness":1-5,"design":1-5,"readability":1-5,"notes":"<one sentence>"}.';
|
|
15
|
+
|
|
16
|
+
const UNPARSEABLE: IJudgeScore = {
|
|
17
|
+
overall: 0,
|
|
18
|
+
correctness: 0,
|
|
19
|
+
design: 0,
|
|
20
|
+
readability: 0,
|
|
21
|
+
notes: "unparseable judge response",
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
export async function judge(
|
|
25
|
+
provider: IProvider,
|
|
26
|
+
input: IJudgeInput
|
|
27
|
+
): Promise<IJudgeScore> {
|
|
28
|
+
const res = await provider.complete(
|
|
29
|
+
[
|
|
30
|
+
{ role: "system", content: SYSTEM },
|
|
31
|
+
{
|
|
32
|
+
role: "user",
|
|
33
|
+
content: `Goal: ${input.goal}\n\nAcceptance criteria:\n${input.criteria}\n\nSolution:\n${input.code}`,
|
|
34
|
+
},
|
|
35
|
+
],
|
|
36
|
+
{ temperature: 0 }
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
let data: unknown;
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
data = JSON.parse(extractJson(res.content));
|
|
43
|
+
} catch {
|
|
44
|
+
return UNPARSEABLE;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (!isRecord(data)) {
|
|
48
|
+
return UNPARSEABLE;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
overall: clampScore(data.overall),
|
|
53
|
+
correctness: clampScore(data.correctness),
|
|
54
|
+
design: clampScore(data.design),
|
|
55
|
+
readability: clampScore(data.readability),
|
|
56
|
+
notes: typeof data.notes === "string" ? data.notes : "",
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function clampScore(value: unknown): number {
|
|
61
|
+
return typeof value === "number" && value >= 1 && value <= 5 ? value : 0;
|
|
62
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { IRunRecord, IVariantSummary } from "./eval.types";
|
|
2
|
+
|
|
3
|
+
/** One eval run's outcome. */
|
|
4
|
+
/** Aggregate run records per variant label. */
|
|
5
|
+
export function summarize(records: IRunRecord[]): IVariantSummary[] {
|
|
6
|
+
const byLabel = new Map<string, IRunRecord[]>();
|
|
7
|
+
|
|
8
|
+
for (const record of records) {
|
|
9
|
+
const list = byLabel.get(record.label) ?? [];
|
|
10
|
+
|
|
11
|
+
list.push(record);
|
|
12
|
+
byLabel.set(record.label, list);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const summaries: IVariantSummary[] = [];
|
|
16
|
+
|
|
17
|
+
for (const [label, list] of byLabel) {
|
|
18
|
+
const passed = list.filter((r) => r.passed).length;
|
|
19
|
+
const total = list.length;
|
|
20
|
+
const sum = (select: (r: IRunRecord) => number): number =>
|
|
21
|
+
list.reduce((acc, r) => acc + select(r), 0);
|
|
22
|
+
const scored = list.filter((r) => r.quality !== undefined);
|
|
23
|
+
|
|
24
|
+
summaries.push({
|
|
25
|
+
label,
|
|
26
|
+
runs: total,
|
|
27
|
+
passed,
|
|
28
|
+
passRate: passed / total,
|
|
29
|
+
avgCycles: sum((r) => r.cycles) / total,
|
|
30
|
+
avgMs: sum((r) => r.ms) / total,
|
|
31
|
+
avgQuality:
|
|
32
|
+
scored.length > 0
|
|
33
|
+
? scored.reduce((acc, r) => acc + (r.quality ?? 0), 0) / scored.length
|
|
34
|
+
: 0,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return summaries;
|
|
39
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { join } from "node:path";
|
|
2
|
+
import { CREATE_FAIL_REASON } from "./files.constants";
|
|
3
|
+
import type { CreateResult, ICreateFile } from "./files.types";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Create a new file. Refuses to overwrite an existing one (that's `edit`'s job)
|
|
7
|
+
* so the model can't silently clobber work. Parent dirs are created as needed.
|
|
8
|
+
*/
|
|
9
|
+
export async function applyCreate(
|
|
10
|
+
cwd: string,
|
|
11
|
+
create: ICreateFile
|
|
12
|
+
): Promise<CreateResult> {
|
|
13
|
+
const path = join(cwd, create.file);
|
|
14
|
+
|
|
15
|
+
if (await Bun.file(path).exists()) {
|
|
16
|
+
return { ok: false, file: create.file, reason: CREATE_FAIL_REASON.exists };
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
await Bun.write(path, create.content);
|
|
20
|
+
|
|
21
|
+
return { ok: true, file: create.file };
|
|
22
|
+
}
|