npm - @agjs/tsforge - Versions diffs - 0.3.1 → 0.3.3 - Mend

@agjs/tsforge 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/package.json +1 -1
package/scripts/sweep.ts +119 -67
package/src/cli.ts +5 -0
package/src/config/config.constants.ts +1 -0
package/src/config/flags.ts +4 -0
package/src/eval/eval.types.ts +7 -0
package/src/eval/index.ts +1 -0
package/src/eval/loc.ts +56 -0
package/src/eval/report.ts +3 -3
package/src/eval/score.ts +5 -0
package/src/lib/scope/scope.constants.ts +21 -13
package/src/lib/scope/scope.ts +7 -6
package/src/loop/prompt/index.ts +8 -1
package/src/loop/prompt/prompt.ts +36 -1
package/src/loop/run.ts +11 -5
package/src/loop/session.ts +3 -0
package/src/loop/tools/file-ops.ts +2 -2
package/src/loop/tools/tool-context.ts +4 -0
package/src/loop/turn.ts +4 -0
package/src/stack-detection/detect.ts +15 -0
package/src/stack-detection/index.ts +1 -1
package/src/web-templates.ts +7 -1

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@agjs/tsforge",
   "type": "module",
-  "version": "0.3.1",
+  "version": "0.3.3",
   "license": "MIT",
   "description": "TypeScript coding harness with a deterministic gate, stack-aware guardrails, and stream-level correction.",
   "repository": {

package/scripts/sweep.ts CHANGED Viewed

@@ -1,5 +1,7 @@
-// Eval sweep: run a seed spec N times across temperature + feature flag variants, score, tabulate.
+// Eval sweep: run seed spec(s) N times across temperature + feature flag variants, score, tabulate.
 // Run:  TSFORGE_SEED=money TSFORGE_TEMPS=0,0.5 TSFORGE_REPEATS=3 bun run packages/core/scripts/sweep.ts
+// TSFORGE_SEED accepts a comma-separated list (e.g. slugify,debounce,rate-limit) — each seed
+// runs the full variant matrix and gets its own report + saved JSON.
 // A/B feature variants:
 //   TSFORGE_FEATURE_VARIANTS=ttsr,hashline (sweep across feature toggles)
 //   Each variant is dim=on|off (e.g. ttsr=on×hashline=off) creating a cartesian product.
@@ -15,6 +17,7 @@ import { providerConfig } from "../src/cli";
 import {
   summarize,
   classifyRun,
+  countTaskLoc,
   renderSweepReportMarkdown,
   buildSweepReport,
   type IRunRecord,
@@ -22,7 +25,10 @@ import {
 import { renderEvent } from "../src/render";
 import type { ILoopEvent } from "../src/loop";
-const seed = process.env.TSFORGE_SEED ?? "todo";
+const seeds = (process.env.TSFORGE_SEED ?? "todo")
+  .split(",")
+  .map((s) => s.trim())
+  .filter((s) => s.length > 0);
 const temps = (process.env.TSFORGE_TEMPS ?? "0,0.5")
   .split(",")
   .map((t) => Number(t.trim()));
@@ -81,6 +87,8 @@ function variantToEnvVars(variant: IFeatureVariant): Record<string, string> {
       envVars.TSFORGE_HASHLINE = state === "1" ? "1" : "0";
     } else if (dim === "lsp_write_feedback") {
       envVars.TSFORGE_LSP_WRITE_FEEDBACK = state === "1" ? "1" : "0";
+    } else if (dim === "simplicity") {
+      envVars.TSFORGE_SIMPLICITY = state === "1" ? "1" : "0";
     }
     // else: unknown dimension, skip
   }
@@ -100,15 +108,17 @@ function variantLabel(variant: IFeatureVariant): string {
 const featureVariants = parseFeatureVariants();
 const evalsRoot = join(import.meta.dir, "..", "..", "..", "evals");
-// Prefer a local working seed (evals/<seed>); fall back to the committed corpus
-// (evals/corpus/<seed>) so checked-in seeds run with no manual copy step.
-const localSeedDir = join(evalsRoot, seed);
-const seedDir = (await Bun.file(join(localSeedDir, `${seed}.spec.md`)).exists())
-  ? localSeedDir
-  : join(evalsRoot, "corpus", seed);
-// Recursive so nested-directory apps (e.g. a React app under `src/`) copy whole;
-// flat single-dir evals are unaffected (recursive readdir returns the same list).
-const seedFiles = await readdir(seedDir, { recursive: true });
+/** Resolve a seed's directory: prefer a local working seed (evals/<seed>); fall
+ *  back to the committed corpus (evals/corpus/<seed>) so checked-in seeds run with
+ *  no manual copy step. */
+async function resolveSeedDir(seed: string): Promise<string> {
+  const local = join(evalsRoot, seed);
+  return (await Bun.file(join(local, `${seed}.spec.md`)).exists())
+    ? local
+    : join(evalsRoot, "corpus", seed);
+}
 // Resolve the model the same way the CLI does: explicit TSFORGE_* env wins, else
 // the active entry from ~/.tsforge/models.json. (Previously this hardcoded the
@@ -151,36 +161,55 @@ function stamp(): string {
   return `${d.getFullYear()}${p(d.getMonth() + 1)}${p(d.getDate())}-${p(d.getHours())}${p(d.getMinutes())}${p(d.getSeconds())}`;
 }
-const records: IRunRecord[] = [];
-for (const variant of featureVariants) {
-  const variantEnv = variantToEnvVars(variant);
-  const vLabel = variantLabel(variant);
-  for (const temp of temps) {
-    for (let i = 0; i < repeats; i += 1) {
-      const runId = `${seed}-${vLabel}-t${temp}-${stamp()}-${i + 1}`;
-      const runDir = join(evalsRoot, "runs", runId);
-      // One run's failure (e.g. a request timing out) must not abort the sweep —
-      // record it as a blocked run and carry on, so a long batch is resilient.
-      try {
-        await runOne(runId, runDir, temp, i, variantEnv);
-      } catch (err) {
-        const message = err instanceof Error ? err.message : String(err);
-        records.push({
-          label: `${vLabel} temp=${temp}`,
-          passed: false,
-          cycles: 0,
-          ms: 0,
-        });
-        process.stdout.write(
-          `  ${seed} ${vLabel} temp=${temp} #${i + 1}: ERRORED (${message}) → ${runId}\n`
-        );
+for (const seed of seeds) {
+  const seedDir = await resolveSeedDir(seed);
+  // Recursive so nested-directory apps (e.g. a React app under `src/`) copy whole;
+  // flat single-dir evals are unaffected (recursive readdir returns the same list).
+  const seedFiles = await readdir(seedDir, { recursive: true });
+  const records: IRunRecord[] = [];
+  for (const variant of featureVariants) {
+    const variantEnv = variantToEnvVars(variant);
+    const vLabel = variantLabel(variant);
+    for (const temp of temps) {
+      for (let i = 0; i < repeats; i += 1) {
+        const runId = `${seed}-${vLabel}-t${temp}-${stamp()}-${i + 1}`;
+        const runDir = join(evalsRoot, "runs", runId);
+        // One run's failure (e.g. a request timing out) must not abort the sweep —
+        // record it as a blocked run and carry on, so a long batch is resilient.
+        try {
+          records.push(
+            await runOne(
+              seed,
+              seedDir,
+              seedFiles,
+              runId,
+              runDir,
+              temp,
+              i,
+              variantEnv
+            )
+          );
+        } catch (err) {
+          const message = err instanceof Error ? err.message : String(err);
+          records.push({
+            label: `${vLabel} temp=${temp}`,
+            passed: false,
+            cycles: 0,
+            ms: 0,
+          });
+          process.stdout.write(
+            `  ${seed} ${vLabel} temp=${temp} #${i + 1}: ERRORED (${message}) → ${runId}\n`
+          );
+        }
       }
     }
   }
+  await reportSeed(seed, records);
 }
 /** Set env vars for a variant, returning a restore function. */
@@ -206,7 +235,11 @@ function setVariantEnv(variant: Record<string, string>): () => void {
 }
 /** Copy seed files and prepare the run directory. */
-async function setupRunDir(dir: string): Promise<void> {
+async function setupRunDir(
+  dir: string,
+  seedDir: string,
+  seedFiles: string[]
+): Promise<void> {
   await mkdir(dir, { recursive: true });
   for (const file of seedFiles) {
@@ -235,16 +268,19 @@ async function startRed(
 }
 async function runOne(
+  seed: string,
+  seedDir: string,
+  seedFiles: string[],
   runId: string,
   runDir: string,
   temp: number,
   i: number,
   variantEnv: Record<string, string> = {}
-): Promise<void> {
+): Promise<IRunRecord> {
   const restore = setVariantEnv(variantEnv);
   try {
-    await setupRunDir(runDir);
+    await setupRunDir(runDir, seedDir, seedFiles);
     const spec = parseSpec(
       await Bun.file(join(runDir, `${seed}.spec.md`)).text()
@@ -315,6 +351,16 @@ async function runOne(
     const cycles = result.results.reduce((acc, r) => acc + r.cycles, 0);
     const passed = result.status === "done";
+    // LOC is the concision signal the gate can't see — measured post-hoc on the
+    // GREEN solution's task files (a failed run has no shipped solution to size).
+    let loc: number | undefined;
+    if (passed) {
+      const taskFiles = spec.tasks.flatMap((t) => t.files);
+      loc = (await countTaskLoc(runDir, taskFiles)).totalLoc;
+    }
     // Once green, drive QUALITY up: judge → improve-per-critique → re-judge.
     let quality: number | undefined;
     let judgeNotes = "";
@@ -359,6 +405,7 @@ async function runOne(
           cycles,
           ms,
           quality,
+          loc,
           judgeNotes,
           tasks: result.results,
         },
@@ -378,47 +425,52 @@ async function runOne(
       ? undefined
       : classifyRun(runEvents).failureClass;
-    records.push({
+    process.stdout.write(
+      `  ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : `blocked[${failureClass ?? "unknown"}]`} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}${loc === undefined ? "" : `, ${String(loc)} loc`}) → ${runId}\n`
+    );
+    return {
       label: `${vLabel} temp=${temp}`,
       passed,
       cycles,
       ms,
       quality,
+      ...(loc === undefined ? {} : { loc }),
       ...(failureClass === undefined ? {} : { failureClass }),
-    });
-    process.stdout.write(
-      `  ${seed} ${vLabel} temp=${temp} #${i + 1}: ${passed ? "done" : `blocked[${failureClass ?? "unknown"}]`} (${cycles} cyc, ${edits} edits, ${regressions} regress, ${ms}ms${quality === undefined ? "" : `, Q${quality}/5`}) → ${runId}\n`
-    );
+    };
   } finally {
     restore();
   }
 }
-const summaries = summarize(records);
+/** Print one seed's per-variant summary + statistical report, and save its JSON. */
+async function reportSeed(seed: string, records: IRunRecord[]): Promise<void> {
+  const summaries = summarize(records);
-process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
+  process.stdout.write(`\n=== sweep: ${seed} (${repeats} runs/variant) ===\n`);
-for (const s of summaries) {
-  const failures = Object.entries(s.failureClasses)
-    .sort(([, a], [, b]) => b - a)
-    .map(([cls, n]) => `${cls}×${String(n)}`)
-    .join(", ");
+  for (const s of summaries) {
+    const failures = Object.entries(s.failureClasses)
+      .sort(([, a], [, b]) => b - a)
+      .map(([cls, n]) => `${cls}×${String(n)}`)
+      .join(", ");
+    process.stdout.write(
+      `${s.label.padEnd(10)}  pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs})  Q ${s.avgQuality.toFixed(1)}/5  ${s.avgLoc.toFixed(1)} loc  avg ${s.avgCycles.toFixed(1)} cyc  ${Math.round(s.avgMs)}ms${failures.length > 0 ? `  [${failures}]` : ""}\n`
+    );
+  }
+  // The statistical report (Wilson CI + z-test vs baseline) now also tabulates a
+  // per-variant failure-class breakdown — WHY runs failed, not just how often.
   process.stdout.write(
-    `${s.label.padEnd(10)}  pass ${Math.round(s.passRate * 100)}% (${s.passed}/${s.runs})  Q ${s.avgQuality.toFixed(1)}/5  avg ${s.avgCycles.toFixed(1)} cyc  ${Math.round(s.avgMs)}ms${failures.length > 0 ? `  [${failures}]` : ""}\n`
+    `\n${renderSweepReportMarkdown(buildSweepReport(records))}\n`
   );
-}
-// The statistical report (Wilson CI + z-test vs baseline) now also tabulates a
-// per-variant failure-class breakdown — WHY runs failed, not just how often.
-process.stdout.write(
-  `\n${renderSweepReportMarkdown(buildSweepReport(records))}\n`
-);
-const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
+  const outPath = join(evalsRoot, "runs", `sweep-${seed}-${stamp()}.json`);
-await Bun.write(
-  outPath,
-  JSON.stringify({ seed, temps, repeats, records, summaries }, null, 2)
-);
-process.stdout.write(`\nsaved ${outPath}\n`);
+  await Bun.write(
+    outPath,
+    JSON.stringify({ seed, temps, repeats, records, summaries }, null, 2)
+  );
+  process.stdout.write(`\nsaved ${outPath}\n`);
+}

package/src/cli.ts CHANGED Viewed

@@ -896,6 +896,11 @@ async function repl(args: ICliArgs): Promise<number> {
     // in the model's list; setSetupWeb() below only wires its callback.
     ...(args.web
       ? {
+          // --web pre-scaffolds the app, so scaffold_web isn't needed — but the
+          // build still needs scaffold_ui + scaffold_routes (+ add_dependency),
+          // which `scaffoldUi: true` registers. Without this the web guidance
+          // tells the model to call tools that aren't in its list and it deadlocks.
+          scaffoldUi: true,
           guidance: webGuidance("react"),
           fix: buildWebFix("react"),
           incrementalCheck: buildWebTscCheck(),

package/src/config/config.constants.ts CHANGED Viewed

@@ -6,4 +6,5 @@ export const ENV_FLAG = {
   legacyFeedback: "TSFORGE_LEGACY_FEEDBACK",
   noAstgrep: "TSFORGE_NO_ASTGREP",
   forceTools: "TSFORGE_FORCE_TOOLS",
+  simplicity: "TSFORGE_SIMPLICITY",
 } as const;

package/src/config/flags.ts CHANGED Viewed

@@ -29,4 +29,8 @@ export const flags = {
    *  (A/B control, default ON — set to "0" to disable). */
   lspWriteFeedback: (): boolean =>
     process.env.TSFORGE_LSP_WRITE_FEEDBACK !== "0",
+  /** Scratch-utility simplicity guidance — appends a "shortest correct solution"
+   *  block to the build prompt for from-scratch, non-web tasks (A/B control,
+   *  default OFF until a sweep validates it). */
+  simplicity: (): boolean => isOn(ENV_FLAG.simplicity),
 };

package/src/eval/eval.types.ts CHANGED Viewed

@@ -23,6 +23,10 @@ export interface IRunRecord {
   ms: number;
   /** LLM-judge quality score (1–5), when available. */
   quality?: number;
+  /** Lines of code in the solution's task files (non-blank, non-comment), measured
+   *  post-hoc on a green run. The concision signal the gate is blind to; omitted
+   *  for a failed run (there's no shipped solution to measure). */
+  loc?: number;
   /** Structured reason a failed run failed (from classifyRun); omitted/`none`
    *  for a passing run. The substrate for turning failures into interventions. */
   failureClass?: FailureClass;
@@ -38,6 +42,9 @@ export interface IVariantSummary {
   avgMs: number;
   /** Average quality across runs that were scored (0 if none). */
   avgQuality: number;
+  /** Average LOC across runs that recorded it — i.e. green runs (0 if none). The
+   *  lower-is-better concision metric, compared per task across variants. */
+  avgLoc: number;
   /** Count of failed runs by failure class (e.g. {"type-error": 2}); empty when
    *  no run carried a class. Lets a sweep show WHY a variant failed, not just how
    *  often. */

package/src/eval/index.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 export * from "./eval.types";
 export { judge } from "./judge";
 export { summarize } from "./score";
+export { countLoc, countTaskLoc, type ITaskLoc } from "./loc";
 export { analyzeEvents, type IRunMetrics } from "./metrics";
 export {
   classifyRun,

package/src/eval/loc.ts ADDED Viewed

@@ -0,0 +1,56 @@
+import { join } from "node:path";
+/**
+ * Lines-of-code counter — a cheap structural proxy for solution SIZE, used by the
+ * eval sweep to measure concision (the axis the gate is blind to: it checks that
+ * code is correct, never that it is lean).
+ *
+ * Counts non-blank, non-comment lines. This is deliberately a HEURISTIC (the
+ * ponytail-benchmark approach), not a parse: block comments are stripped, then
+ * blank lines and line-comment-only lines are dropped. A comment marker inside a
+ * string literal is treated as a comment — acceptable, because LOC is only ever
+ * compared between solutions to the SAME task, where that noise is constant.
+ */
+export function countLoc(content: string): number {
+  const withoutBlocks = content.replace(/\/\*[\s\S]*?\*\//g, "");
+  return withoutBlocks
+    .split("\n")
+    .map((line) => line.trim())
+    .filter((line) => line.length > 0 && !line.startsWith("//")).length;
+}
+/** Total + per-file LOC for a task's editable files. */
+export interface ITaskLoc {
+  totalLoc: number;
+  perFile: Record<string, number>;
+}
+/**
+ * Sum LOC across a task's editable `files` (resolved under `cwd`; glob patterns
+ * are expanded, plain filenames match themselves). Run AFTER a green solution
+ * exists, so it measures what the model actually shipped. A pattern that matches
+ * nothing contributes 0.
+ */
+export async function countTaskLoc(
+  cwd: string,
+  patterns: readonly string[]
+): Promise<ITaskLoc> {
+  const perFile: Record<string, number> = {};
+  for (const pattern of patterns) {
+    const glob = new Bun.Glob(pattern);
+    for await (const rel of glob.scan({ cwd, onlyFiles: true })) {
+      if (rel in perFile) {
+        continue;
+      }
+      perFile[rel] = countLoc(await Bun.file(join(cwd, rel)).text());
+    }
+  }
+  const totalLoc = Object.values(perFile).reduce((acc, n) => acc + n, 0);
+  return { totalLoc, perFile };
+}

package/src/eval/report.ts CHANGED Viewed

@@ -144,8 +144,8 @@ function baselineCell(report: IVariantReport, baseline: string | null): string {
  *  (p < 0.05) from the baseline. */
 export function renderSweepReportMarkdown(report: ISweepReport): string {
   const header =
-    "| Variant | Runs | Pass | 95% CI | Cycles | Ms | Quality | vs baseline |\n" +
-    "| --- | --- | --- | --- | --- | --- | --- | --- |";
+    "| Variant | Runs | Pass | 95% CI | Cycles | Ms | Quality | LOC | vs baseline |\n" +
+    "| --- | --- | --- | --- | --- | --- | --- | --- | --- |";
   const rows = report.variants.map((v) => {
     const ci = `${pct(v.passRateCI[0])}–${pct(v.passRateCI[1])}`;
@@ -153,7 +153,7 @@ export function renderSweepReportMarkdown(report: ISweepReport): string {
     return (
       `| ${v.label} | ${String(v.runs)} | ${pct(v.passRate)} | ${ci} | ` +
       `${v.avgCycles.toFixed(1)} | ${String(Math.round(v.avgMs))} | ` +
-      `${v.avgQuality.toFixed(1)} | ${baselineCell(v, report.baseline)} |`
+      `${v.avgQuality.toFixed(1)} | ${v.avgLoc.toFixed(1)} | ${baselineCell(v, report.baseline)} |`
     );
   });

package/src/eval/score.ts CHANGED Viewed

@@ -20,6 +20,7 @@ export function summarize(records: IRunRecord[]): IVariantSummary[] {
     const sum = (select: (r: IRunRecord) => number): number =>
       list.reduce((acc, r) => acc + select(r), 0);
     const scored = list.filter((r) => r.quality !== undefined);
+    const sized = list.filter((r) => r.loc !== undefined);
     const failureClasses: Record<string, number> = {};
     for (const r of list) {
@@ -41,6 +42,10 @@ export function summarize(records: IRunRecord[]): IVariantSummary[] {
         scored.length > 0
           ? scored.reduce((acc, r) => acc + (r.quality ?? 0), 0) / scored.length
           : 0,
+      avgLoc:
+        sized.length > 0
+          ? sized.reduce((acc, r) => acc + (r.loc ?? 0), 0) / sized.length
+          : 0,
       failureClasses,
     });
   }

package/src/lib/scope/scope.constants.ts CHANGED Viewed

@@ -3,20 +3,28 @@
 export const SCRATCH_PREFIX = "scratch/";
 /**
- * VENDORED, harness-authored files the model must NEVER edit or create. These are
- * tested, already-type-correct SDK/primitive/generated files: the web scaffold's
- * `src/lib/**` toolkit, the `src/components/ui/**` primitives, the MSW mock
- * machinery (`src/mocks/db.ts` + `src/mocks/browser.ts`), and any `*.gen.ts`
- * codegen output (TanStack's route tree). They are eslint- and prettier-ignored,
- * so a model that touches them sees tsc errors it cannot fix and — with
- * eslint-disable + `@ts-*` suppressions banned — has no escape, looping to the
- * turn cap. A write to any of these is rejected: a type error involving them is
- * always a wrong CALL SITE, never the library. (`src/mocks/handlers.ts` is NOT
- * vendored — the model registers its mock resources there.)
+ * VENDORED, harness-authored files the model must NEVER rewrite — the SPECIFIC
+ * tested/generated files the web scaffold ships, NOT whole directories. The guard
+ * exists for ONE reason: stop the model from "fixing" the generic SDK files
+ * (`use-resource`/`api`/`result`/…), whose strict-TS errors are unfixable and —
+ * with eslint-disable + `@ts-*` suppressions banned — trap it in a loop. A type
+ * error involving one is always a wrong CALL SITE, never the library.
+ *
+ * Deliberately scoped to exact files so the model stays FREE to do what the
+ * guidance tells it: create its own helpers in `src/lib/<name>.ts` and primitives
+ * in `src/components/ui/<x>.tsx` (and edit `src/components/ui/button.tsx`). It is
+ * also applied ONLY to web-scaffold sessions (via `IToolContext.vendored`), so a
+ * normal repo that happens to have a `src/lib/` is never affected. `src/mocks/
+ * handlers.ts` is NOT vendored — the model registers its mock resources there.
  */
-export const VENDORED_PATTERNS = [
-  "src/lib/**",
-  "src/components/ui/**",
+export const WEB_VENDORED_PATTERNS = [
+  "src/lib/utils.ts",
+  "src/lib/result.ts",
+  "src/lib/object.ts",
+  "src/lib/sort.ts",
+  "src/lib/api.ts",
+  "src/lib/use-resource.ts",
+  "src/lib/use-form.ts",
   "src/mocks/db.ts",
   "src/mocks/browser.ts",
   "**/*.gen.ts",

package/src/lib/scope/scope.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { resolve, relative } from "node:path";
-import { SCRATCH_PREFIX, VENDORED_PATTERNS } from "./scope.constants";
+import { SCRATCH_PREFIX } from "./scope.constants";
 /**
  * Normalize a model-supplied path against the workspace root, fixing the common
@@ -27,11 +27,12 @@ export function isInScope(file: string, patterns: string[]): boolean {
   return patterns.some((pattern) => new Bun.Glob(pattern).match(file));
 }
-/** True when `file` is a VENDORED, harness-authored file the model must not
- *  touch (`src/lib/**`, `src/components/ui/**`, the MSW machinery, `*.gen.ts`).
- *  Expects the workspace-relative form (`normalizeWorkspacePath` first). */
-export function isVendored(file: string): boolean {
-  return VENDORED_PATTERNS.some((pattern) => new Bun.Glob(pattern).match(file));
+/** True when `file` matches one of `patterns` — the VENDORED, harness-authored
+ *  files the model must not rewrite. `patterns` is supplied per-session
+ *  (`IToolContext.vendored`), so it is empty (⇒ always false) outside a web
+ *  scaffold. Expects the workspace-relative form (`normalizeWorkspacePath` first). */
+export function isVendored(file: string, patterns: readonly string[]): boolean {
+  return patterns.some((pattern) => new Bun.Glob(pattern).match(file));
 }
 /** A file the model may write: its editable scope, OR a throwaway scratch file.

package/src/loop/prompt/index.ts CHANGED Viewed

@@ -1,2 +1,9 @@
-export { SYSTEM, CHAT_SYSTEM, COMPACT_SYSTEM, seedPrompt } from "./prompt";
+export {
+  SYSTEM,
+  CHAT_SYSTEM,
+  COMPACT_SYSTEM,
+  SCRATCH_SIMPLICITY_GUIDANCE,
+  buildSystemPrompt,
+  seedPrompt,
+} from "./prompt";
 export { renderFileSection, exportedSymbols } from "./project-map";

package/src/loop/prompt/prompt.ts CHANGED Viewed

@@ -1,7 +1,8 @@
 import type { ITask } from "../../spec";
 import type { IFileView } from "../../lib/fs";
-import { PACK_REGISTRY } from "../../stack-detection";
+import { PACK_REGISTRY, isWebStack } from "../../stack-detection";
 import type { IStackProfile } from "../../stack-detection";
+import { flags } from "../../config";
 import { renderFileSection } from "./project-map";
 /** The implement-agent system prompt: who it is, the tools, and the strict-TS
@@ -16,6 +17,40 @@ export const SYSTEM = [
   "The gate is `tsc` strict + eslint with every rule an error, so write TypeScript that satisfies it: interfaces are `I`-prefixed; `===`; no `var`; never the non-null `!` — guard index access (`const x = arr[i]; if (x === undefined) {...}`); no `any` and no `as` — type every parameter (e.g. `.reduce((acc: number, r: number) => …, 0)`); explicit boolean conditions. When the gate flags errors in read-only files (tests/types), they come from your editable file being missing or wrong-shaped and vanish once it's correct — don't edit them.",
 ].join("\n");
+/** Appended to SYSTEM for from-scratch, NON-web utility builds when the simplicity
+ *  flag is on. Pushes the model toward the shortest correct solution — the axis the
+ *  gate is blind to (it checks correctness, never concision). Carve-outs keep it
+ *  from fighting the gate's hard rules. NOT for web builds (the views/components
+ *  architecture legitimately needs many small files). */
+export const SCRATCH_SIMPLICITY_GUIDANCE = [
+  "SIMPLICITY — write the SHORTEST correct solution that passes the gate:",
+  "  • The task's `files:` are the ceiling — do NOT add modules, classes, or",
+  "    abstractions the task didn't ask for. One focused implementation.",
+  "  • Prefer built-ins and a direct expression over step-by-step temporaries:",
+  "    chain the transforms (`xs.filter(...).map(...)`) instead of naming each",
+  "    intermediate, when it stays readable.",
+  "  • NO narration/step comments ('// Step 1', '// first we…') — the code is the",
+  "    explanation. A comment earns its place only for a non-obvious WHY.",
+  "  • This NEVER overrides the gate: keep `I`-prefixed interfaces, no `as`/`any`/`!`,",
+  "    real validation at trust boundaries, and any test siblings the gate requires.",
+].join("\n");
+/** SYSTEM + the simplicity block when it applies, else SYSTEM unchanged. Gated on
+ *  the `simplicity` flag AND a from-scratch (`!hasExistingCode`) NON-web build —
+ *  so it never touches existing-repo edits or web/UI apps. */
+export function buildSystemPrompt(
+  hasExistingCode: boolean,
+  stack: IStackProfile | undefined
+): string {
+  const webish = stack !== undefined && isWebStack(stack);
+  if (!flags.simplicity() || hasExistingCode || webish) {
+    return SYSTEM;
+  }
+  return `${SYSTEM}\n\n${SCRATCH_SIMPLICITY_GUIDANCE}`;
+}
 /**
  * The INTERACTIVE assistant prompt (the CLI's `Session`). Unlike `SYSTEM` — which
  * drives a single task to a gate and is told to "keep going until green" — this

package/src/loop/run.ts CHANGED Viewed

@@ -17,7 +17,7 @@ import type {
 } from "./loop.types";
 import { mineLessons, consolidate as consolidateMemory } from "./memory";
 import { flags } from "../config";
-import { SYSTEM, seedPrompt } from "./prompt";
+import { buildSystemPrompt, seedPrompt } from "./prompt";
 import { detectStack } from "../stack-detection";
 import type { TtsrManager } from "./ttsr";
 import {
@@ -295,17 +295,23 @@ export async function runTask(
   const editable = await readFiles(cwd, task.files);
   const context = await readFiles(cwd, task.context ?? []);
+  // Existing code to navigate? (editable files already have content). Only then
+  // do the LSP nav tools earn their decision-surface cost — see toolsFor(). Also
+  // gates the scratch-simplicity guidance (from-scratch builds only).
+  const hasExistingCode = editable.some((f) => f.content.trim().length > 0);
   const messages: IChatMessage[] = [
-    { role: "system", content: SYSTEM },
+    {
+      role: "system",
+      content: buildSystemPrompt(hasExistingCode, stackProfile),
+    },
     {
       role: "user",
       content: seedPrompt(task, editable, context, stackProfile),
     },
   ];
-  // Existing code to navigate? (editable files already have content). Only then
-  // do the LSP nav tools earn their decision-surface cost — see toolsFor().
-  const hasExistingCode = editable.some((f) => f.content.trim().length > 0);
   const tools = toolsFor(hasExistingCode);
   // Mode-aware reasoning cap: scratch tasks over-think unbounded, so default

package/src/loop/session.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import {
 } from "../agent";
 import { flags } from "../config";
 import { readFiles } from "../lib/fs";
+import { WEB_VENDORED_PATTERNS } from "../lib/scope";
 import { validate, type ErrorParser } from "../validate";
 import { detectStack } from "../stack-detection";
 import {
@@ -521,9 +522,11 @@ export class Session {
             report({ kind: "tool", task: SESSION_ID, message });
           });
+    const isWebScaffold = cfg.scaffoldWeb === true || cfg.scaffoldUi === true;
     const ctx: ILoopCtx = {
       task,
       cwd: cfg.cwd,
+      ...(isWebScaffold ? { vendored: WEB_VENDORED_PATTERNS } : {}),
       tsService: await buildTsService(cfg.cwd),
       ...(cfg.lintFile === undefined ? {} : { lintFile: cfg.lintFile }),
       parse: cfg.parse,

package/src/loop/tools/file-ops.ts CHANGED Viewed

@@ -206,7 +206,7 @@ export async function doEdit(
   edit.file = normalizeWorkspacePath(ctx.cwd, edit.file);
-  if (isVendored(edit.file)) {
+  if (isVendored(edit.file, ctx.vendored ?? [])) {
     return reject(
       ctx,
       "edit:vendored",
@@ -312,7 +312,7 @@ export async function doCreate(
   create.file = normalizeWorkspacePath(ctx.cwd, create.file);
-  if (isVendored(create.file)) {
+  if (isVendored(create.file, ctx.vendored ?? [])) {
     return reject(
       ctx,
       "create:vendored",

package/src/loop/tools/tool-context.ts CHANGED Viewed

@@ -8,6 +8,10 @@ export interface IToolContext {
   cwd: string;
   /** Editable scope — `edit`/`create` outside it are rejected. */
   files: string[];
+  /** VENDORED file globs the model must not rewrite (the web scaffold's shipped
+   *  SDK/generated files). Set only for web-scaffold sessions; absent/empty ⇒ the
+   *  vendored guard is inert (non-web builds and normal repos are unaffected). */
+  vendored?: readonly string[];
   report: Reporter;
   task: string;
   /** In-process TypeScript LanguageService — backs the semantic tools

package/src/loop/turn.ts CHANGED Viewed

@@ -115,6 +115,9 @@ export interface ILoopCtx {
   /** Wired by the interactive CLI: turn this workspace into a web project (the
    *  `scaffold_web` tool calls it). Threaded into the tool context. */
   setupWeb?: (framework: string) => Promise<void>;
+  /** VENDORED file globs the model must not rewrite (web-scaffold sessions only).
+   *  Threaded into the tool context; absent ⇒ the vendored guard is inert. */
+  vendored?: readonly string[];
   /** PLAN MODE (set via Session.setPlanMode): threaded into the tool context so
    *  mutating tools are rejected at dispatch — the model only plans. */
   readOnly?: boolean;
@@ -462,6 +465,7 @@ export async function runToolCalls(
       tsService: ctx.tsService,
       ...(ctx.signal === undefined ? {} : { signal: ctx.signal }),
       ...(ctx.setupWeb === undefined ? {} : { setupWeb: ctx.setupWeb }),
+      ...(ctx.vendored === undefined ? {} : { vendored: ctx.vendored }),
       ...(ctx.readOnly === undefined ? {} : { readOnly: ctx.readOnly }),
       ...(ctx.mcpRegistry === undefined
         ? {}

package/src/stack-detection/detect.ts CHANGED Viewed

@@ -9,6 +9,21 @@ import {
   type IPackId,
 } from "./packs";
+/** The pack ids that identify a WEB (browser UI) build. Used to scope behaviours
+ *  that must NOT apply to web apps (e.g. the scratch-simplicity prompt, whose
+ *  "shortest solution / no extra files" advice fights the views/components
+ *  architecture the web scaffold requires). */
+const WEB_PACK_IDS: readonly string[] = [
+  "react",
+  "react-component-architecture",
+  "tanstack-query",
+];
+/** True when the detected stack is a web/browser UI build. */
+export function isWebStack(profile: IStackProfile): boolean {
+  return profile.packs.some((p) => WEB_PACK_IDS.includes(p));
+}
 /** Parse package.json and extract deps/devDeps, tolerating missing/invalid JSON. */
 async function loadPackageDeps(cwd: string): Promise<{
   deps: Set<string>;

package/src/stack-detection/index.ts CHANGED Viewed

@@ -1,3 +1,3 @@
 export * from "./stack-detection.types";
-export { detectStack } from "./detect";
+export { detectStack, isWebStack } from "./detect";
 export { PACK_REGISTRY, ALWAYS_ON_PACKS } from "./packs";

package/src/web-templates.ts CHANGED Viewed

@@ -317,6 +317,12 @@ export const Route = createRootRoute({
 });
 `;
+// The placeholder home carries `data-tsforge-stub` (the SAME sentinel scaffold_routes
+// stubs use) so the gate's stub-check FAILS until the model replaces it with the real
+// home. Without this, an unbuilt app — just the scaffold + maybe some types — passes
+// the gate (vite builds, this page renders non-blank, no scaffold_routes stubs to
+// catch) and is falsely declared "done". The model removes the marker when it builds
+// the real home.
 const INDEX_ROUTE_TSX = `import { createFileRoute } from "@tanstack/react-router";
 import { Button } from "@/components/ui/button";
@@ -327,7 +333,7 @@ export const Route = createFileRoute("/")({
 function Home() {
   return (
-    <main className="flex min-h-screen flex-col items-center justify-center gap-6 bg-background text-foreground">
+    <main data-tsforge-stub className="flex min-h-screen flex-col items-center justify-center gap-6 bg-background text-foreground">
       <h1 className="text-3xl font-bold">app</h1>
       <Button>Get started</Button>
     </main>