@agjs/tsforge 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@agjs/tsforge",
3
3
  "type": "module",
4
- "version": "0.3.1",
4
+ "version": "0.3.3",
5
5
  "license": "MIT",
6
6
  "description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
7
7
  "repository": {
package/scripts/sweep.ts CHANGED
@@ -1,5 +1,7 @@
1
- // Eval sweep: run a seed spec N times across temperature + feature flag variants, score, tabulate.
1
+ // Eval sweep: run seed spec(s) N times across temperature + feature flag variants, score, tabulate.
2
2
  // Run: TSFORGE_SEED=money TSFORGE_TEMPS=0,0.5 TSFORGE_REPEATS=3 bun run packages/core/scripts/sweep.ts
3
+ // TSFORGE_SEED accepts a comma-separated list (e.g. slugify,debounce,rate-limit) — each seed
4
+ // runs the full variant matrix and gets its own report + saved JSON.
3
5
  // A/B feature variants:
4
6
  // TSFORGE_FEATURE_VARIANTS=ttsr,hashline (sweep across feature toggles)
5
7
  // Each variant is dim=on|off (e.g. ttsr=on×hashline=off) creating a cartesian product.
@@ -15,6 +17,7 @@ import { providerConfig } from "../src/cli";
15
17
  import {
16
18
  summarize,
17
19
  classifyRun,
20
+ countTaskLoc,
18
21
  renderSweepReportMarkdown,
19
22
  buildSweepReport,
20
23
  type IRunRecord,
@@ -22,7 +25,10 @@ import {
22
25
  import { renderEvent } from "../src/render";
23
26
  import type { ILoopEvent } from "../src/loop";
24
27
 
25
- const seed = process.env.TSFORGE_SEED ?? "todo";
28
+ const seeds = (process.env.TSFORGE_SEED ?? "todo")
29
+ .split(",")
30
+ .map((s) => s.trim())
31
+ .filter((s) => s.length > 0);
26
32
  const temps = (process.env.TSFORGE_TEMPS ?? "0,0.5")
27
33
  .split(",")
28
34
  .map((t) => Number(t.trim()));
@@ -81,6 +87,8 @@ function variantToEnvVars(variant: IFeatureVariant): Record<string, string> {
81
87
  envVars.TSFORGE_HASHLINE = state === "1" ? "1" : "0";
82
88
  } else if (dim === "lsp_write_feedback") {
83
89
  envVars.TSFORGE_LSP_WRITE_FEEDBACK = state === "1" ? "1" : "0";
90
+ } else if (dim === "simplicity") {
91
+ envVars.TSFORGE_SIMPLICITY = state === "1" ? "1" : "0";
84
92
  }
85
93
  // else: unknown dimension, skip
86
94
  }
@@ -100,15 +108,17 @@ function variantLabel(variant: IFeatureVariant): string {
100
108
  const featureVariants = parseFeatureVariants();
101
109
 
102
110
  const evalsRoot = join(import.meta.dir, "..", "..", "..", "evals");
103
- // Prefer a local working seed (evals/<seed>); fall back to the committed corpus
104
- // (evals/corpus/<seed>) so checked-in seeds run with no manual copy step.
105
- const localSeedDir = join(evalsRoot, seed);
106
- const seedDir = (await Bun.file(join(localSeedDir, `${seed}.spec.md`)).exists())
107
- ? localSeedDir
108
- : join(evalsRoot, "corpus", seed);
109
- // Recursive so nested-directory apps (e.g. a React app under `src/`) copy whole;
110
- // flat single-dir evals are unaffected (recursive readdir returns the same list).
111
- const seedFiles = await readdir(seedDir, { recursive: true });
111
+
112
+ /** Resolve a seed's directory: prefer a local working seed (evals/<seed>); fall
113
+ * back to the committed corpus (evals/corpus/<seed>) so checked-in seeds run with
114
+ * no manual copy step. */
115
+ async function resolveSeedDir(seed: string): Promise<string> {
116
+ const local = join(evalsRoot, seed);
117
+
118
+ return (await Bun.file(join(local, `${seed}.spec.md`)).exists())
119
+ ? local
120
+ : join(evalsRoot, "corpus", seed);
121
+ }
112
122
 
113
123
  // Resolve the model the same way the CLI does: explicit TSFORGE_* env wins, else
114
124
  // the active entry from ~/.tsforge/models.json. (Previously this hardcoded the
@@ -151,36 +161,55 @@ function stamp(): string {
151
161
  return `${d.getFullYear()}${p(d.getMonth() + 1)}${p(d.getDate())}-${p(d.getHours())}${p(d.getMinutes())}${p(d.getSeconds())}`;
152
162
  }
153
163
 
154
- const records: IRunRecord[] = [];
155
-
156
- for (const variant of featureVariants) {
157
- const variantEnv = variantToEnvVars(variant);
158
- const vLabel = variantLabel(variant);
159
-
160
- for (const temp of temps) {
161
- for (let i = 0; i < repeats; i += 1) {
162
- const runId = `${seed}-${vLabel}-t${temp}-${stamp()}-${i + 1}`;
163
- const runDir = join(evalsRoot, "runs", runId);
164
-
165
- // One run's failure (e.g. a request timing out) must not abort the sweep —
166
- // record it as a blocked run and carry on, so a long batch is resilient.
167
- try {
168
- await runOne(runId, runDir, temp, i, variantEnv);
169
- } catch (err) {
170
- const message = err instanceof Error ? err.message : String(err);
171
-
172
- records.push({
173
- label: `${vLabel} temp=${temp}`,
174
- passed: false,
175
- cycles: 0,
176
- ms: 0,
177
- });
178
- process.stdout.write(
179
- ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ERRORED (${message}) → ${runId}\n`
180
- );
164
+ for (const seed of seeds) {
165
+ const seedDir = await resolveSeedDir(seed);
166
+ // Recursive so nested-directory apps (e.g. a React app under `src/`) copy whole;
167
+ // flat single-dir evals are unaffected (recursive readdir returns the same list).
168
+ const seedFiles = await readdir(seedDir, { recursive: true });
169
+ const records: IRunRecord[] = [];
170
+
171
+ for (const variant of featureVariants) {
172
+ const variantEnv = variantToEnvVars(variant);
173
+ const vLabel = variantLabel(variant);
174
+
175
+ for (const temp of temps) {
176
+ for (let i = 0; i < repeats; i += 1) {
177
+ const runId = `${seed}-${vLabel}-t${temp}-${stamp()}-${i + 1}`;
178
+ const runDir = join(evalsRoot, "runs", runId);
179
+
180
+ // One run's failure (e.g. a request timing out) must not abort the sweep —
181
+ // record it as a blocked run and carry on, so a long batch is resilient.
182
+ try {
183
+ records.push(
184
+ await runOne(
185
+ seed,
186
+ seedDir,
187
+ seedFiles,
188
+ runId,
189
+ runDir,
190
+ temp,
191
+ i,
192
+ variantEnv
193
+ )
194
+ );
195
+ } catch (err) {
196
+ const message = err instanceof Error ? err.message : String(err);
197
+
198
+ records.push({
199
+ label: `${vLabel} temp=${temp}`,
200
+ passed: false,
201
+ cycles: 0,
202
+ ms: 0,
203
+ });
204
+ process.stdout.write(
205
+ ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ERRORED (${message}) → ${runId}\n`
206
+ );
207
+ }
181
208
  }
182
209
  }
183
210
  }
211
+
212
+ await reportSeed(seed, records);
184
213
  }
185
214
 
186
215
  /** Set env vars for a variant, returning a restore function. */
@@ -206,7 +235,11 @@ function setVariantEnv(variant: Record<string, string>): () => void {
206
235
  }
207
236
 
208
237
  /** Copy seed files and prepare the run directory. */
209
- async function setupRunDir(dir: string): Promise<void> {
238
+ async function setupRunDir(
239
+ dir: string,
240
+ seedDir: string,
241
+ seedFiles: string[]
242
+ ): Promise<void> {
210
243
  await mkdir(dir, { recursive: true });
211
244
 
212
245
  for (const file of seedFiles) {
@@ -235,16 +268,19 @@ async function startRed(
235
268
  }
236
269
 
237
270
  async function runOne(
271
+ seed: string,
272
+ seedDir: string,
273
+ seedFiles: string[],
238
274
  runId: string,
239
275
  runDir: string,
240
276
  temp: number,
241
277
  i: number,
242
278
  variantEnv: Record<string, string> = {}
243
- ): Promise<void> {
279
+ ): Promise<IRunRecord> {
244
280
  const restore = setVariantEnv(variantEnv);
245
281
 
246
282
  try {
247
- await setupRunDir(runDir);
283
+ await setupRunDir(runDir, seedDir, seedFiles);
248
284
 
249
285
  const spec = parseSpec(
250
286
  await Bun.file(join(runDir, `${seed}.spec.md`)).text()
@@ -315,6 +351,16 @@ async function runOne(
315
351
  const cycles = result.results.reduce((acc, r) => acc + r.cycles, 0);
316
352
  const passed = result.status === "done";
317
353
 
354
+ // LOC is the concision signal the gate can't see — measured post-hoc on the
355
+ // GREEN solution's task files (a failed run has no shipped solution to size).
356
+ let loc: number | undefined;
357
+
358
+ if (passed) {
359
+ const taskFiles = spec.tasks.flatMap((t) => t.files);
360
+
361
+ loc = (await countTaskLoc(runDir, taskFiles)).totalLoc;
362
+ }
363
+
318
364
  // Once green, drive QUALITY up: judge → improve-per-critique → re-judge.
319
365
  let quality: number | undefined;
320
366
  let judgeNotes = "";
@@ -359,6 +405,7 @@ async function runOne(
359
405
  cycles,
360
406
  ms,
361
407
  quality,
408
+ loc,
362
409
  judgeNotes,
363
410
  tasks: result.results,
364
411
  },
@@ -378,47 +425,52 @@ async function runOne(
378
425
  ? undefined
379
426
  : classifyRun(runEvents).failureClass;
380
427
 
381
- records.push({
428
+ process.stdout.write(
429
+ ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : `blocked[${failureClass ?? "unknown"}]`} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}${loc === undefined ? "" : `, ${String(loc)} loc`}) → ${runId}\n`
430
+ );
431
+
432
+ return {
382
433
  label: `${vLabel} temp=${temp}`,
383
434
  passed,
384
435
  cycles,
385
436
  ms,
386
437
  quality,
438
+ ...(loc === undefined ? {} : { loc }),
387
439
  ...(failureClass === undefined ? {} : { failureClass }),
388
- });
389
- process.stdout.write(
390
- ` ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : `blocked[${failureClass ?? "unknown"}]`} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}) → ${runId}\n`
391
- );
440
+ };
392
441
  } finally {
393
442
  restore();
394
443
  }
395
444
  }
396
445
 
397
- const summaries = summarize(records);
446
+ /** Print one seed's per-variant summary + statistical report, and save its JSON. */
447
+ async function reportSeed(seed: string, records: IRunRecord[]): Promise<void> {
448
+ const summaries = summarize(records);
398
449
 
399
- process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
450
+ process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
400
451
 
401
- for (const s of summaries) {
402
- const failures = Object.entries(s.failureClasses)
403
- .sort(([, a], [, b]) => b - a)
404
- .map(([cls, n]) => `${cls}×${String(n)}`)
405
- .join(", ");
452
+ for (const s of summaries) {
453
+ const failures = Object.entries(s.failureClasses)
454
+ .sort(([, a], [, b]) => b - a)
455
+ .map(([cls, n]) => `${cls}×${String(n)}`)
456
+ .join(", ");
406
457
 
458
+ process.stdout.write(
459
+ `${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 ${s.avgLoc.toFixed(1)} loc avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms${failures.length > 0 ? ` [${failures}]` : ""}\n`
460
+ );
461
+ }
462
+
463
+ // The statistical report (Wilson CI + z-test vs baseline) now also tabulates a
464
+ // per-variant failure-class breakdown — WHY runs failed, not just how often.
407
465
  process.stdout.write(
408
- `${s.label.padEnd(10)} pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs}) Q ${s.avgQuality.toFixed(1)}/5 avg ${s.avgCycles.toFixed(1)} cyc ${Math.round(s.avgMs)}ms${failures.length > 0 ? ` [${failures}]` : ""}\n`
466
+ `\n${renderSweepReportMarkdown(buildSweepReport(records))}\n`
409
467
  );
410
- }
411
-
412
- // The statistical report (Wilson CI + z-test vs baseline) now also tabulates a
413
- // per-variant failure-class breakdown — WHY runs failed, not just how often.
414
- process.stdout.write(
415
- `\n${renderSweepReportMarkdown(buildSweepReport(records))}\n`
416
- );
417
468
 
418
- const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
469
+ const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
419
470
 
420
- await Bun.write(
421
- outPath,
422
- JSON.stringify({ seed, temps, repeats, records, summaries }, null, 2)
423
- );
424
- process.stdout.write(`\nsaved ${outPath}\n`);
471
+ await Bun.write(
472
+ outPath,
473
+ JSON.stringify({ seed, temps, repeats, records, summaries }, null, 2)
474
+ );
475
+ process.stdout.write(`\nsaved ${outPath}\n`);
476
+ }
package/src/cli.ts CHANGED
@@ -896,6 +896,11 @@ async function repl(args: ICliArgs): Promise<number> {
896
896
  // in the model's list; setSetupWeb() below only wires its callback.
897
897
  ...(args.web
898
898
  ? {
899
+ // --web pre-scaffolds the app, so scaffold_web isn't needed — but the
900
+ // build still needs scaffold_ui + scaffold_routes (+ add_dependency),
901
+ // which `scaffoldUi: true` registers. Without this the web guidance
902
+ // tells the model to call tools that aren't in its list and it deadlocks.
903
+ scaffoldUi: true,
899
904
  guidance: webGuidance("react"),
900
905
  fix: buildWebFix("react"),
901
906
  incrementalCheck: buildWebTscCheck(),
@@ -6,4 +6,5 @@ export const ENV_FLAG = {
6
6
  legacyFeedback: "TSFORGE_LEGACY_FEEDBACK",
7
7
  noAstgrep: "TSFORGE_NO_ASTGREP",
8
8
  forceTools: "TSFORGE_FORCE_TOOLS",
9
+ simplicity: "TSFORGE_SIMPLICITY",
9
10
  } as const;
@@ -29,4 +29,8 @@ export const flags = {
29
29
  * (A/B control, default ON — set to "0" to disable). */
30
30
  lspWriteFeedback: (): boolean =>
31
31
  process.env.TSFORGE_LSP_WRITE_FEEDBACK !== "0",
32
+ /** Scratch-utility simplicity guidance — appends a "shortest correct solution"
33
+ * block to the build prompt for from-scratch, non-web tasks (A/B control,
34
+ * default OFF until a sweep validates it). */
35
+ simplicity: (): boolean => isOn(ENV_FLAG.simplicity),
32
36
  };
@@ -23,6 +23,10 @@ export interface IRunRecord {
23
23
  ms: number;
24
24
  /** LLM-judge quality score (1–5), when available. */
25
25
  quality?: number;
26
+ /** Lines of code in the solution's task files (non-blank, non-comment), measured
27
+ * post-hoc on a green run. The concision signal the gate is blind to; omitted
28
+ * for a failed run (there's no shipped solution to measure). */
29
+ loc?: number;
26
30
  /** Structured reason a failed run failed (from classifyRun); omitted/`none`
27
31
  * for a passing run. The substrate for turning failures into interventions. */
28
32
  failureClass?: FailureClass;
@@ -38,6 +42,9 @@ export interface IVariantSummary {
38
42
  avgMs: number;
39
43
  /** Average quality across runs that were scored (0 if none). */
40
44
  avgQuality: number;
45
+ /** Average LOC across runs that recorded it — i.e. green runs (0 if none). The
46
+ * lower-is-better concision metric, compared per task across variants. */
47
+ avgLoc: number;
41
48
  /** Count of failed runs by failure class (e.g. {"type-error": 2}); empty when
42
49
  * no run carried a class. Lets a sweep show WHY a variant failed, not just how
43
50
  * often. */
package/src/eval/index.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  export * from "./eval.types";
2
2
  export { judge } from "./judge";
3
3
  export { summarize } from "./score";
4
+ export { countLoc, countTaskLoc, type ITaskLoc } from "./loc";
4
5
  export { analyzeEvents, type IRunMetrics } from "./metrics";
5
6
  export {
6
7
  classifyRun,
@@ -0,0 +1,56 @@
1
+ import { join } from "node:path";
2
+
3
+ /**
4
+ * Lines-of-code counter — a cheap structural proxy for solution SIZE, used by the
5
+ * eval sweep to measure concision (the axis the gate is blind to: it checks that
6
+ * code is correct, never that it is lean).
7
+ *
8
+ * Counts non-blank, non-comment lines. This is deliberately a HEURISTIC (the
9
+ * ponytail-benchmark approach), not a parse: block comments are stripped, then
10
+ * blank lines and line-comment-only lines are dropped. A comment marker inside a
11
+ * string literal is treated as a comment — acceptable, because LOC is only ever
12
+ * compared between solutions to the SAME task, where that noise is constant.
13
+ */
14
+ export function countLoc(content: string): number {
15
+ const withoutBlocks = content.replace(/\/\*[\s\S]*?\*\//g, "");
16
+
17
+ return withoutBlocks
18
+ .split("\n")
19
+ .map((line) => line.trim())
20
+ .filter((line) => line.length > 0 && !line.startsWith("//")).length;
21
+ }
22
+
23
+ /** Total + per-file LOC for a task's editable files. */
24
+ export interface ITaskLoc {
25
+ totalLoc: number;
26
+ perFile: Record<string, number>;
27
+ }
28
+
29
+ /**
30
+ * Sum LOC across a task's editable `files` (resolved under `cwd`; glob patterns
31
+ * are expanded, plain filenames match themselves). Run AFTER a green solution
32
+ * exists, so it measures what the model actually shipped. A pattern that matches
33
+ * nothing contributes 0.
34
+ */
35
+ export async function countTaskLoc(
36
+ cwd: string,
37
+ patterns: readonly string[]
38
+ ): Promise<ITaskLoc> {
39
+ const perFile: Record<string, number> = {};
40
+
41
+ for (const pattern of patterns) {
42
+ const glob = new Bun.Glob(pattern);
43
+
44
+ for await (const rel of glob.scan({ cwd, onlyFiles: true })) {
45
+ if (rel in perFile) {
46
+ continue;
47
+ }
48
+
49
+ perFile[rel] = countLoc(await Bun.file(join(cwd, rel)).text());
50
+ }
51
+ }
52
+
53
+ const totalLoc = Object.values(perFile).reduce((acc, n) => acc + n, 0);
54
+
55
+ return { totalLoc, perFile };
56
+ }
@@ -144,8 +144,8 @@ function baselineCell(report: IVariantReport, baseline: string | null): string {
144
144
  * (p < 0.05) from the baseline. */
145
145
  export function renderSweepReportMarkdown(report: ISweepReport): string {
146
146
  const header =
147
- "| Variant | Runs | Pass | 95% CI | Cycles | Ms | Quality | vs baseline |\n" +
148
- "| --- | --- | --- | --- | --- | --- | --- | --- |";
147
+ "| Variant | Runs | Pass | 95% CI | Cycles | Ms | Quality | LOC | vs baseline |\n" +
148
+ "| --- | --- | --- | --- | --- | --- | --- | --- | --- |";
149
149
 
150
150
  const rows = report.variants.map((v) => {
151
151
  const ci = `${pct(v.passRateCI[0])}–${pct(v.passRateCI[1])}`;
@@ -153,7 +153,7 @@ export function renderSweepReportMarkdown(report: ISweepReport): string {
153
153
  return (
154
154
  `| ${v.label} | ${String(v.runs)} | ${pct(v.passRate)} | ${ci} | ` +
155
155
  `${v.avgCycles.toFixed(1)} | ${String(Math.round(v.avgMs))} | ` +
156
- `${v.avgQuality.toFixed(1)} | ${baselineCell(v, report.baseline)} |`
156
+ `${v.avgQuality.toFixed(1)} | ${v.avgLoc.toFixed(1)} | ${baselineCell(v, report.baseline)} |`
157
157
  );
158
158
  });
159
159
 
package/src/eval/score.ts CHANGED
@@ -20,6 +20,7 @@ export function summarize(records: IRunRecord[]): IVariantSummary[] {
20
20
  const sum = (select: (r: IRunRecord) => number): number =>
21
21
  list.reduce((acc, r) => acc + select(r), 0);
22
22
  const scored = list.filter((r) => r.quality !== undefined);
23
+ const sized = list.filter((r) => r.loc !== undefined);
23
24
  const failureClasses: Record<string, number> = {};
24
25
 
25
26
  for (const r of list) {
@@ -41,6 +42,10 @@ export function summarize(records: IRunRecord[]): IVariantSummary[] {
41
42
  scored.length > 0
42
43
  ? scored.reduce((acc, r) => acc + (r.quality ?? 0), 0) / scored.length
43
44
  : 0,
45
+ avgLoc:
46
+ sized.length > 0
47
+ ? sized.reduce((acc, r) => acc + (r.loc ?? 0), 0) / sized.length
48
+ : 0,
44
49
  failureClasses,
45
50
  });
46
51
  }
@@ -3,20 +3,28 @@
3
3
  export const SCRATCH_PREFIX = "scratch/";
4
4
 
5
5
  /**
6
- * VENDORED, harness-authored files the model must NEVER edit or create. These are
7
- * tested, already-type-correct SDK/primitive/generated files: the web scaffold's
8
- * `src/lib/**` toolkit, the `src/components/ui/**` primitives, the MSW mock
9
- * machinery (`src/mocks/db.ts` + `src/mocks/browser.ts`), and any `*.gen.ts`
10
- * codegen output (TanStack's route tree). They are eslint- and prettier-ignored,
11
- * so a model that touches them sees tsc errors it cannot fix and — with
12
- * eslint-disable + `@ts-*` suppressions banned — has no escape, looping to the
13
- * turn cap. A write to any of these is rejected: a type error involving them is
14
- * always a wrong CALL SITE, never the library. (`src/mocks/handlers.ts` is NOT
15
- * vendored the model registers its mock resources there.)
6
+ * VENDORED, harness-authored files the model must NEVER rewrite the SPECIFIC
7
+ * tested/generated files the web scaffold ships, NOT whole directories. The guard
8
+ * exists for ONE reason: stop the model from "fixing" the generic SDK files
9
+ * (`use-resource`/`api`/`result`/…), whose strict-TS errors are unfixable and
10
+ * with eslint-disable + `@ts-*` suppressions banned trap it in a loop. A type
11
+ * error involving one is always a wrong CALL SITE, never the library.
12
+ *
13
+ * Deliberately scoped to exact files so the model stays FREE to do what the
14
+ * guidance tells it: create its own helpers in `src/lib/<name>.ts` and primitives
15
+ * in `src/components/ui/<x>.tsx` (and edit `src/components/ui/button.tsx`). It is
16
+ * also applied ONLY to web-scaffold sessions (via `IToolContext.vendored`), so a
17
+ * normal repo that happens to have a `src/lib/` is never affected. `src/mocks/
18
+ * handlers.ts` is NOT vendored — the model registers its mock resources there.
16
19
  */
17
- export const VENDORED_PATTERNS = [
18
- "src/lib/**",
19
- "src/components/ui/**",
20
+ export const WEB_VENDORED_PATTERNS = [
21
+ "src/lib/utils.ts",
22
+ "src/lib/result.ts",
23
+ "src/lib/object.ts",
24
+ "src/lib/sort.ts",
25
+ "src/lib/api.ts",
26
+ "src/lib/use-resource.ts",
27
+ "src/lib/use-form.ts",
20
28
  "src/mocks/db.ts",
21
29
  "src/mocks/browser.ts",
22
30
  "**/*.gen.ts",
@@ -1,5 +1,5 @@
1
1
  import { resolve, relative } from "node:path";
2
- import { SCRATCH_PREFIX, VENDORED_PATTERNS } from "./scope.constants";
2
+ import { SCRATCH_PREFIX } from "./scope.constants";
3
3
 
4
4
  /**
5
5
  * Normalize a model-supplied path against the workspace root, fixing the common
@@ -27,11 +27,12 @@ export function isInScope(file: string, patterns: string[]): boolean {
27
27
  return patterns.some((pattern) => new Bun.Glob(pattern).match(file));
28
28
  }
29
29
 
30
- /** True when `file` is a VENDORED, harness-authored file the model must not
31
- * touch (`src/lib/**`, `src/components/ui/**`, the MSW machinery, `*.gen.ts`).
32
- * Expects the workspace-relative form (`normalizeWorkspacePath` first). */
33
- export function isVendored(file: string): boolean {
34
- return VENDORED_PATTERNS.some((pattern) => new Bun.Glob(pattern).match(file));
30
+ /** True when `file` matches one of `patterns` — the VENDORED, harness-authored
31
+ * files the model must not rewrite. `patterns` is supplied per-session
32
+ * (`IToolContext.vendored`), so it is empty ( always false) outside a web
33
+ * scaffold. Expects the workspace-relative form (`normalizeWorkspacePath` first). */
34
+ export function isVendored(file: string, patterns: readonly string[]): boolean {
35
+ return patterns.some((pattern) => new Bun.Glob(pattern).match(file));
35
36
  }
36
37
 
37
38
  /** A file the model may write: its editable scope, OR a throwaway scratch file.
@@ -1,2 +1,9 @@
1
- export { SYSTEM, CHAT_SYSTEM, COMPACT_SYSTEM, seedPrompt } from "./prompt";
1
+ export {
2
+ SYSTEM,
3
+ CHAT_SYSTEM,
4
+ COMPACT_SYSTEM,
5
+ SCRATCH_SIMPLICITY_GUIDANCE,
6
+ buildSystemPrompt,
7
+ seedPrompt,
8
+ } from "./prompt";
2
9
  export { renderFileSection, exportedSymbols } from "./project-map";
@@ -1,7 +1,8 @@
1
1
  import type { ITask } from "../../spec";
2
2
  import type { IFileView } from "../../lib/fs";
3
- import { PACK_REGISTRY } from "../../stack-detection";
3
+ import { PACK_REGISTRY, isWebStack } from "../../stack-detection";
4
4
  import type { IStackProfile } from "../../stack-detection";
5
+ import { flags } from "../../config";
5
6
  import { renderFileSection } from "./project-map";
6
7
 
7
8
  /** The implement-agent system prompt: who it is, the tools, and the strict-TS
@@ -16,6 +17,40 @@ export const SYSTEM = [
16
17
  "The gate is `tsc` strict + eslint with every rule an error, so write TypeScript that satisfies it: interfaces are `I`-prefixed; `===`; no `var`; never the non-null `!` — guard index access (`const x = arr[i]; if (x === undefined) {...}`); no `any` and no `as` — type every parameter (e.g. `.reduce((acc: number, r: number) => …, 0)`); explicit boolean conditions. When the gate flags errors in read-only files (tests/types), they come from your editable file being missing or wrong-shaped and vanish once it's correct — don't edit them.",
17
18
  ].join("\n");
18
19
 
20
+ /** Appended to SYSTEM for from-scratch, NON-web utility builds when the simplicity
21
+ * flag is on. Pushes the model toward the shortest correct solution — the axis the
22
+ * gate is blind to (it checks correctness, never concision). Carve-outs keep it
23
+ * from fighting the gate's hard rules. NOT for web builds (the views/components
24
+ * architecture legitimately needs many small files). */
25
+ export const SCRATCH_SIMPLICITY_GUIDANCE = [
26
+ "SIMPLICITY — write the SHORTEST correct solution that passes the gate:",
27
+ " • The task's `files:` are the ceiling — do NOT add modules, classes, or",
28
+ " abstractions the task didn't ask for. One focused implementation.",
29
+ " • Prefer built-ins and a direct expression over step-by-step temporaries:",
30
+ " chain the transforms (`xs.filter(...).map(...)`) instead of naming each",
31
+ " intermediate, when it stays readable.",
32
+ " • NO narration/step comments ('// Step 1', '// first we…') — the code is the",
33
+ " explanation. A comment earns its place only for a non-obvious WHY.",
34
+ " • This NEVER overrides the gate: keep `I`-prefixed interfaces, no `as`/`any`/`!`,",
35
+ " real validation at trust boundaries, and any test siblings the gate requires.",
36
+ ].join("\n");
37
+
38
+ /** SYSTEM + the simplicity block when it applies, else SYSTEM unchanged. Gated on
39
+ * the `simplicity` flag AND a from-scratch (`!hasExistingCode`) NON-web build —
40
+ * so it never touches existing-repo edits or web/UI apps. */
41
+ export function buildSystemPrompt(
42
+ hasExistingCode: boolean,
43
+ stack: IStackProfile | undefined
44
+ ): string {
45
+ const webish = stack !== undefined && isWebStack(stack);
46
+
47
+ if (!flags.simplicity() || hasExistingCode || webish) {
48
+ return SYSTEM;
49
+ }
50
+
51
+ return `${SYSTEM}\n\n${SCRATCH_SIMPLICITY_GUIDANCE}`;
52
+ }
53
+
19
54
  /**
20
55
  * The INTERACTIVE assistant prompt (the CLI's `Session`). Unlike `SYSTEM` — which
21
56
  * drives a single task to a gate and is told to "keep going until green" — this
package/src/loop/run.ts CHANGED
@@ -17,7 +17,7 @@ import type {
17
17
  } from "./loop.types";
18
18
  import { mineLessons, consolidate as consolidateMemory } from "./memory";
19
19
  import { flags } from "../config";
20
- import { SYSTEM, seedPrompt } from "./prompt";
20
+ import { buildSystemPrompt, seedPrompt } from "./prompt";
21
21
  import { detectStack } from "../stack-detection";
22
22
  import type { TtsrManager } from "./ttsr";
23
23
  import {
@@ -295,17 +295,23 @@ export async function runTask(
295
295
 
296
296
  const editable = await readFiles(cwd, task.files);
297
297
  const context = await readFiles(cwd, task.context ?? []);
298
+
299
+ // Existing code to navigate? (editable files already have content). Only then
300
+ // do the LSP nav tools earn their decision-surface cost — see toolsFor(). Also
301
+ // gates the scratch-simplicity guidance (from-scratch builds only).
302
+ const hasExistingCode = editable.some((f) => f.content.trim().length > 0);
303
+
298
304
  const messages: IChatMessage[] = [
299
- { role: "system", content: SYSTEM },
305
+ {
306
+ role: "system",
307
+ content: buildSystemPrompt(hasExistingCode, stackProfile),
308
+ },
300
309
  {
301
310
  role: "user",
302
311
  content: seedPrompt(task, editable, context, stackProfile),
303
312
  },
304
313
  ];
305
314
 
306
- // Existing code to navigate? (editable files already have content). Only then
307
- // do the LSP nav tools earn their decision-surface cost — see toolsFor().
308
- const hasExistingCode = editable.some((f) => f.content.trim().length > 0);
309
315
  const tools = toolsFor(hasExistingCode);
310
316
 
311
317
  // Mode-aware reasoning cap: scratch tasks over-think unbounded, so default
@@ -18,6 +18,7 @@ import {
18
18
  } from "../agent";
19
19
  import { flags } from "../config";
20
20
  import { readFiles } from "../lib/fs";
21
+ import { WEB_VENDORED_PATTERNS } from "../lib/scope";
21
22
  import { validate, type ErrorParser } from "../validate";
22
23
  import { detectStack } from "../stack-detection";
23
24
  import {
@@ -521,9 +522,11 @@ export class Session {
521
522
  report({ kind: "tool", task: SESSION_ID, message });
522
523
  });
523
524
 
525
+ const isWebScaffold = cfg.scaffoldWeb === true || cfg.scaffoldUi === true;
524
526
  const ctx: ILoopCtx = {
525
527
  task,
526
528
  cwd: cfg.cwd,
529
+ ...(isWebScaffold ? { vendored: WEB_VENDORED_PATTERNS } : {}),
527
530
  tsService: await buildTsService(cfg.cwd),
528
531
  ...(cfg.lintFile === undefined ? {} : { lintFile: cfg.lintFile }),
529
532
  parse: cfg.parse,
@@ -206,7 +206,7 @@ export async function doEdit(
206
206
 
207
207
  edit.file = normalizeWorkspacePath(ctx.cwd, edit.file);
208
208
 
209
- if (isVendored(edit.file)) {
209
+ if (isVendored(edit.file, ctx.vendored ?? [])) {
210
210
  return reject(
211
211
  ctx,
212
212
  "edit:vendored",
@@ -312,7 +312,7 @@ export async function doCreate(
312
312
 
313
313
  create.file = normalizeWorkspacePath(ctx.cwd, create.file);
314
314
 
315
- if (isVendored(create.file)) {
315
+ if (isVendored(create.file, ctx.vendored ?? [])) {
316
316
  return reject(
317
317
  ctx,
318
318
  "create:vendored",
@@ -8,6 +8,10 @@ export interface IToolContext {
8
8
  cwd: string;
9
9
  /** Editable scope — `edit`/`create` outside it are rejected. */
10
10
  files: string[];
11
+ /** VENDORED file globs the model must not rewrite (the web scaffold's shipped
12
+ * SDK/generated files). Set only for web-scaffold sessions; absent/empty ⇒ the
13
+ * vendored guard is inert (non-web builds and normal repos are unaffected). */
14
+ vendored?: readonly string[];
11
15
  report: Reporter;
12
16
  task: string;
13
17
  /** In-process TypeScript LanguageService — backs the semantic tools
package/src/loop/turn.ts CHANGED
@@ -115,6 +115,9 @@ export interface ILoopCtx {
115
115
  /** Wired by the interactive CLI: turn this workspace into a web project (the
116
116
  * `scaffold_web` tool calls it). Threaded into the tool context. */
117
117
  setupWeb?: (framework: string) => Promise<void>;
118
+ /** VENDORED file globs the model must not rewrite (web-scaffold sessions only).
119
+ * Threaded into the tool context; absent ⇒ the vendored guard is inert. */
120
+ vendored?: readonly string[];
118
121
  /** PLAN MODE (set via Session.setPlanMode): threaded into the tool context so
119
122
  * mutating tools are rejected at dispatch — the model only plans. */
120
123
  readOnly?: boolean;
@@ -462,6 +465,7 @@ export async function runToolCalls(
462
465
  tsService: ctx.tsService,
463
466
  ...(ctx.signal === undefined ? {} : { signal: ctx.signal }),
464
467
  ...(ctx.setupWeb === undefined ? {} : { setupWeb: ctx.setupWeb }),
468
+ ...(ctx.vendored === undefined ? {} : { vendored: ctx.vendored }),
465
469
  ...(ctx.readOnly === undefined ? {} : { readOnly: ctx.readOnly }),
466
470
  ...(ctx.mcpRegistry === undefined
467
471
  ? {}
@@ -9,6 +9,21 @@ import {
9
9
  type IPackId,
10
10
  } from "./packs";
11
11
 
12
+ /** The pack ids that identify a WEB (browser UI) build. Used to scope behaviours
13
+ * that must NOT apply to web apps (e.g. the scratch-simplicity prompt, whose
14
+ * "shortest solution / no extra files" advice fights the views/components
15
+ * architecture the web scaffold requires). */
16
+ const WEB_PACK_IDS: readonly string[] = [
17
+ "react",
18
+ "react-component-architecture",
19
+ "tanstack-query",
20
+ ];
21
+
22
+ /** True when the detected stack is a web/browser UI build. */
23
+ export function isWebStack(profile: IStackProfile): boolean {
24
+ return profile.packs.some((p) => WEB_PACK_IDS.includes(p));
25
+ }
26
+
12
27
  /** Parse package.json and extract deps/devDeps, tolerating missing/invalid JSON. */
13
28
  async function loadPackageDeps(cwd: string): Promise<{
14
29
  deps: Set<string>;
@@ -1,3 +1,3 @@
1
1
  export * from "./stack-detection.types";
2
- export { detectStack } from "./detect";
2
+ export { detectStack, isWebStack } from "./detect";
3
3
  export { PACK_REGISTRY, ALWAYS_ON_PACKS } from "./packs";
@@ -317,6 +317,12 @@ export const Route = createRootRoute({
317
317
  });
318
318
  `;
319
319
 
320
+ // The placeholder home carries `data-tsforge-stub` (the SAME sentinel scaffold_routes
321
+ // stubs use) so the gate's stub-check FAILS until the model replaces it with the real
322
+ // home. Without this, an unbuilt app — just the scaffold + maybe some types — passes
323
+ // the gate (vite builds, this page renders non-blank, no scaffold_routes stubs to
324
+ // catch) and is falsely declared "done". The model removes the marker when it builds
325
+ // the real home.
320
326
  const INDEX_ROUTE_TSX = `import { createFileRoute } from "@tanstack/react-router";
321
327
 
322
328
  import { Button } from "@/components/ui/button";
@@ -327,7 +333,7 @@ export const Route = createFileRoute("/")({
327
333
 
328
334
  function Home() {
329
335
  return (
330
- <main className="flex min-h-screen flex-col items-center justify-center gap-6 bg-background text-foreground">
336
+ <main data-tsforge-stub className="flex min-h-screen flex-col items-center justify-center gap-6 bg-background text-foreground">
331
337
  <h1 className="text-3xl font-bold">app</h1>
332
338
  <Button>Get started</Button>
333
339
  </main>