npm - @drawcall/create - Versions diffs - 0.0.0 → 0.1.2 - Mend

@drawcall/create 0.0.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/command.d.ts CHANGED Viewed

@@ -1,9 +1,11 @@
 import { Command } from "commander";
 import { type HarnessName, type Stage } from "./constants.js";
 import { createProject } from "./create.js";
+import { superviseBuild } from "./supervisor.js";
 export declare function createCreateCommand(command?: Command, options?: {
     version?: string;
     createProject?: typeof createProject;
+    superviseBuild?: typeof superviseBuild;
 }): Command;
 export declare function splitHarnessArgs(args: string[]): {
     promptParts: string[];

package/dist/command.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { Command } from "commander";
 import { CliError, DEFAULT_HARNESS_TIMEOUT_MS, HARNESS_NAMES, MAX_BUILD_TURNS, STAGES } from "./constants.js";
 import { createProject } from "./create.js";
+import { superviseBuild } from "./supervisor.js";
 import { formatDuration } from "./progress-log.js";
 const CLI_OPTION_NAMES = [
     "--stage",
@@ -8,24 +9,38 @@ const CLI_OPTION_NAMES = [
     "--harness-timeout-minutes",
     "--max-turns",
     "--name",
-    "--skip-template"
+    "--skip-template",
+    "--supervise"
 ];
 export function createCreateCommand(command = new Command(), options = {}) {
     const create = options.createProject ?? createProject;
+    const supervise = options.superviseBuild ?? superviseBuild;
     command
         .name("drawcall-create")
         .description("Create a project with an installed local harness")
-        .argument("<args...>", "what should be created; use -- to pass following args to the harness")
+        .argument("[args...]", "what should be created; use -- to pass following args to the harness (omit with --supervise)")
         .option("--stage <name>", `which stage to run (${STAGES.join(", ")})`)
         .option("--harness <name>", `harness to use (${HARNESS_NAMES.join(", ")})`)
         .option("--harness-timeout-minutes <count>", `timeout for each harness invocation in minutes (default: ${DEFAULT_HARNESS_TIMEOUT_MS / 60_000})`)
         .option("--max-turns <count>", `maximum build turns (default: ${MAX_BUILD_TURNS})`)
         .option("--name <name>", "project directory name (default: a generated dc-xxxxxx name)")
         .option("--skip-template", "during a full run, skip starter template search and build from scratch")
+        .option("--supervise", "run only the build loop, crash-safe: each turn is a separate child process, the tree is reset to the last good commit before every turn, and a killed turn resumes automatically")
         .passThroughOptions()
         .version(options.version ?? "0.0.0")
-        .action(async (args, commandOptions) => {
+        .action(async (args = [], commandOptions) => {
         const { promptParts, harnessArgs } = splitHarnessArgs(args);
+        if (commandOptions.supervise === true) {
+            const result = await supervise({
+                harness: parseHarnessName(commandOptions.harness),
+                harnessArgs,
+                harnessTimeoutMinutes: parsePositiveInteger(commandOptions.harnessTimeoutMinutes, "--harness-timeout-minutes"),
+                maxTurns: parsePositiveInteger(commandOptions.maxTurns, "--max-turns")
+            });
+            console.log(formatSupervised(result));
+            process.exitCode = result.stop === "stuck" ? 1 : 0;
+            return;
+        }
         const prompt = parsePromptParts(promptParts);
         const result = await create(prompt, {
             stage: parseStage(commandOptions.stage),
@@ -91,6 +106,14 @@ export function parseHarnessTimeoutMs(value) {
     const minutes = parsePositiveInteger(value, "--harness-timeout-minutes");
     return minutes === undefined ? undefined : minutes * 60_000;
 }
+function formatSupervised(result) {
+    const reason = {
+        "plan-consumed": "Plan consumed — build complete",
+        "budget-exhausted": "Reached the build-turn budget; PLAN.md still records remaining work",
+        stuck: "Stopped: no forward progress across consecutive turns"
+    };
+    return ["", reason[result.stop], `Turns ${result.turns}`, `Path  ${result.projectDir}`].join("\n");
+}
 function formatSuccess(result) {
     const lines = [
         "",

package/dist/constants.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@ export declare const HARNESS_NAMES: readonly ["opencode", "codex", "claude", "pi
 export type HarnessName = (typeof HARNESS_NAMES)[number];
 export declare const STAGES: readonly ["scaffold", "template", "survey-assets", "survey-technology", "goal", "plan", "build", "full"];
 export type Stage = (typeof STAGES)[number];
-export declare const PARALLEL_STAGES: readonly ["template", "survey-assets", "survey-technology"];
+export declare const PARALLEL_STAGES: readonly ["survey-assets", "survey-technology"];
 export declare const SKILLS: readonly ["drawcall-ai/vitexec", "drawcall-ai/uikitml", "drawcall-ai/acta", "drawcall-ai/market", "drawcall-ai/speech", "drawcall-ai/flipbook", "drawcall-ai/skills"];
 export declare const PACKAGES: readonly ["vitexec@latest", "@drawcall/uikitml@latest", "@drawcall/acta@latest", "@drawcall/market@latest", "@drawcall/flipbook@latest", "@pmndrs/uikit@latest", "@pmndrs/pointer-events@latest", "@pmndrs/viverse@latest", "navcat@^0.4.1", "three@^0.184.0", "vite@^8.0.16", "typescript@^6.0.3", "elics@^3.4.2", "postprocessing@^6.39.1"];
 export declare const PACKAGE_NAMES: readonly ["vitexec", "@drawcall/uikitml", "@drawcall/acta", "@drawcall/market", "@drawcall/flipbook", "@pmndrs/uikit", "@pmndrs/pointer-events", "@pmndrs/viverse", "navcat", "three", "vite", "typescript", "elics"];

package/dist/constants.js CHANGED Viewed

@@ -1,4 +1,12 @@
-export const HARNESS_NAMES = ["opencode", "codex", "claude", "pi", "gemini", "grok", "forge"];
+export const HARNESS_NAMES = [
+    "opencode",
+    "codex",
+    "claude",
+    "pi",
+    "gemini",
+    "grok",
+    "forge"
+];
 export const STAGES = [
     "scaffold",
     "template",
@@ -9,10 +17,10 @@ export const STAGES = [
     "build",
     "full"
 ];
-// In a "full" run these three stages run concurrently after scaffolding: they write disjoint
-// outputs (template touches the product; the surveys write gitignored scratch files) and none
-// of them depends on the others, so they share a barrier before the goal stage.
-export const PARALLEL_STAGES = ["template", "survey-assets", "survey-technology"];
+// In a "full" run the template is applied first; then these two surveys run concurrently over the
+// applied state (they write disjoint gitignored scratch files and don't depend on each other) and
+// share a barrier before the goal stage.
+export const PARALLEL_STAGES = ["survey-assets", "survey-technology"];
 export const SKILLS = [
     "drawcall-ai/vitexec",
     "drawcall-ai/uikitml",

package/dist/create.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { existsSync } from "node:fs";
+import { appendFileSync, existsSync } from "node:fs";
 import { mkdir } from "node:fs/promises";
 import { basename, join, resolve } from "node:path";
 import { CliError, DEFAULT_HARNESS_TIMEOUT_MS, GOAL_FILE, MAX_BUILD_TURNS, PLAN_FILE, SESSION_LOG_FILE } from "./constants.js";
@@ -20,11 +20,24 @@ const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
 async function runTurn(harnessRunner, prompt) {
     let result = await runHarnessTurn(harnessRunner, prompt);
     for (let attempt = 2; attempt <= HARNESS_TURN_ATTEMPTS && result.exitCode !== 0 && !result.timedOut; attempt += 1) {
-        await delay(RETRY_BACKOFF_MS * (attempt - 1));
+        const waitMs = RETRY_BACKOFF_MS * (attempt - 1);
+        // A retry is otherwise invisible: the runner just logs a second identical "$ <harness> …"
+        // invocation. Mark it so the log reads as "turn failed → retrying" rather than as a mysterious
+        // duplicate run, and so a slow build turn can be told apart from one that silently re-ran.
+        logToSession(harnessRunner.cwd, `[drawcall-create] harness turn failed (exit ${result.exitCode}); retry ${attempt}/${HARNESS_TURN_ATTEMPTS} after ${Math.round(waitMs / 1000)}s`);
+        await delay(waitMs);
         result = await runHarnessTurn(harnessRunner, prompt);
     }
     return result;
 }
+function logToSession(cwd, line) {
+    try {
+        appendFileSync(join(cwd, SESSION_LOG_FILE), `${line}\n`);
+    }
+    catch {
+        // No session log on disk (e.g. a test's caller-supplied runner) — the retry proceeds anyway.
+    }
+}
 export async function createProject(prompt, options = {}) {
     const { env = process.env, commandExists = isCommandAvailable } = options;
     const stage = options.stage ?? "full";
@@ -94,9 +107,10 @@ async function runStages(stage, harnessRunner, progressLog, prompt, projectName,
     }
     if (stage === "scaffold")
         return stopped(0);
-    // template, survey-assets, and survey-technology are independent (disjoint outputs, no git,
-    // surveys don't depend on the template). A "full" run does all three concurrently behind one
-    // barrier; a single-stage run does just the requested one.
+    // A "full" run applies the template first, then surveys the applied state (assets + technology)
+    // concurrently behind one barrier — the surveys read the implementation the template installed,
+    // so they cannot run alongside it. A skip-template full run surveys a bare scaffold instead. A
+    // single-stage run does just the requested one (surveys then read whatever is already in the cwd).
     if (stage === "full") {
         const { exitCode } = options.skipTemplate
             ? await runSurveyGroup(harnessRunner, progressLog, prompt)
@@ -169,27 +183,43 @@ async function runGroupTurns(harnessRunner, prompts) {
     }
     return Promise.all(prompts.map((prompt) => runTurn(harnessRunner, prompt)));
 }
-/** Apply a fitting Market starter and survey assets/technology concurrently, then commit. */
+/** Apply a fitting Market starter, then survey assets/technology over the applied state. */
 async function runTemplateGroup(harnessRunner, progressLog, prompt) {
-    progressLog.start("template/surveys");
+    // The template runs first and commits, so the surveys that follow can read the applied
+    // implementation (its src/ and the assets it installed via Market), not race them — the
+    // whole point of the template path is to survey what is already here and modify it.
+    progressLog.start("template");
+    try {
+        const template = await runTurn(harnessRunner, buildTemplatePrompt(prompt));
+        if (template.exitCode !== 0) {
+            progressLog.fail("template");
+            return template;
+        }
+        await finishTemplate(harnessRunner);
+        progressLog.succeed("template");
+    }
+    catch (error) {
+        progressLog.fail("template");
+        throw error;
+    }
+    // The two surveys write disjoint scratch files and don't depend on each other, so they run
+    // concurrently (serially for opencode — see SERIAL_GROUP_HARNESSES).
+    progressLog.start("surveys");
     try {
-        // Array order is the call order: each runner pushes synchronously before its first await.
         const results = await runGroupTurns(harnessRunner, [
-            buildTemplatePrompt(prompt),
             buildSurveyAssetsPrompt(prompt),
             buildSurveyTechnologyPrompt(prompt)
         ]);
         const failed = results.find((result) => result.exitCode !== 0);
         if (failed) {
-            progressLog.fail("template/surveys");
+            progressLog.fail("surveys");
             return failed;
         }
-        await finishTemplate(harnessRunner);
-        progressLog.succeed("template/surveys");
+        progressLog.succeed("surveys");
         return { exitCode: 0 };
     }
     catch (error) {
-        progressLog.fail("template/surveys");
+        progressLog.fail("surveys");
         throw error;
     }
 }

package/dist/index.d.ts CHANGED Viewed

@@ -5,4 +5,5 @@ export * from "./progress-log.js";
 export * from "./harness.js";
 export * from "./scaffold.js";
 export * from "./create.js";
+export * from "./supervisor.js";
 export * from "./command.js";

package/dist/index.js CHANGED Viewed

@@ -5,4 +5,5 @@ export * from "./progress-log.js";
 export * from "./harness.js";
 export * from "./scaffold.js";
 export * from "./create.js";
+export * from "./supervisor.js";
 export * from "./command.js";

package/dist/prompts.js CHANGED Viewed

@@ -38,31 +38,37 @@ export function buildSurveyAssetsPrompt(userPrompt) {
 Goal: ${userPrompt}
-You are surveying, not planning or building: catalogue the Market assets that already exist and fit this goal, so the goal and plan stages can draw on real options.
+You are surveying, not planning or building: map the content (assets) this goal needs against what the project already has and what the Market can still add, so the goal and plan stages can modify what exists efficiently rather than re-acquire content that is already here.
-Search and preview the Drawcall Market with \`npx @drawcall/market\` (the \`market\` skill doc has the commands and the asset types). Start from the user-visible experience, not from obvious nouns alone: include the things the player sees, hears, controls, collides with, collects, inhabits, and receives as moment-to-moment feedback. For each distinct need, find the assets that fit and judge fit from the real preview — metadata, screenshots, files — not the name.
+Start with what is already installed. A starter may already be applied to this project, and the assets it brought in are the substrate the build modifies — not gaps to fill. Enumerate them with the \`market\` skill's installed-asset listing (read the skill doc for the command; it reads the project's lock file and prints every installed asset's name, version, type, and file path), and skim the project's own files (\`public/\` and the implementation) to see which of those are already wired in and as what. If nothing is installed yet — a bare scaffold — say so plainly; the survey is then purely about what to add.
-Before writing, cross-check the goal against the Market asset types that could carry it — templates, models, humanoid models and animations, textures, environments, sound effects, background music, and flipbooks. This is a coverage pass, not a checklist to pad the file: mention a type only when it serves this goal or when its absence creates a real gap. Pay special attention to asset needs that are easy to misclassify as "implementation": animation clips, surface materials, sky/HDR, continuous audio such as ambience or background music, one-shot sound effects, and visual feedback such as fire, impacts, magic, weather, UI/world pings, or explosions. When the goal needs both event feedback and ongoing mood, survey both; do not let sound effects stand in for music/ambience, or a visual effect stand in for its matching sound.
+Then survey what the goal still needs that is not already here. Start from the user-visible experience, not from obvious nouns alone: include the things the player sees, hears, controls, collides with, collects, inhabits, and receives as moment-to-moment feedback. For each distinct need, first check whether an already-installed asset covers it; only where none does, search and preview the Drawcall Market for a composable addition (\`npx @drawcall/market\`; the \`market\` skill doc has the commands and the asset types), and judge fit from the real preview — metadata, screenshots, files — not the name. Survey composable building blocks, not another turnkey starter: a second whole-game starter would substitute for building rather than supply a piece, and one may already be applied.
-Before finishing, check that each asset-shaped word or sensory promise in the goal has an entry or an explicit gap. If the goal names music, a storm, muzzle flashes, impacts, explosions, collectibles, a place type, a character type, or any similar visible/audible thing, the survey should either name fitting assets for it or say that Market has no fitting asset.
+Before writing, cross-check the goal against the Market asset types that could carry it — models, humanoid models and animations, textures, environments, sound effects, background music, and flipbooks. This is a coverage pass, not a checklist to pad the file: mention a type only when it serves this goal or when its absence creates a real gap. Pay special attention to asset needs that are easy to misclassify as "implementation": animation clips, surface materials, sky/HDR, continuous audio such as ambience or background music, one-shot sound effects, and visual feedback such as fire, impacts, magic, weather, UI/world pings, or explosions. When the goal needs both event feedback and ongoing mood, survey both; do not let sound effects stand in for music/ambience, or a visual effect stand in for its matching sound.
-Write your findings to ${ASSET_SURVEY_FILE} (scratch, not committed): one entry per user-visible need with the fitting asset(s), what each actually is, and the gaps where nothing fits. Keep it concrete and skimmable, e.g.:
-- Collectible / pickup feedback: \`pickups-coin@1.0.2\` plus a fitting pickup sound — low-poly gold disc and a short reward cue. Strong fit.
-- Large outdoor place: no fitting island terrain model found — gap; use terrain/material assets instead.`;
+Before finishing, check that each asset-shaped word or sensory promise in the goal has an entry — resolved as one of three: already installed, a fitting Market asset to add, or a gap where Market has no fitting asset. If the goal names music, a storm, muzzle flashes, impacts, explosions, collectibles, a place type, a character type, or any similar visible/audible thing, the survey should say which of the three it is.
+Write your findings to ${ASSET_SURVEY_FILE} (scratch, not committed): one entry per user-visible need, marking whether it is already installed (with the asset name and how it is wired), needs a Market addition (with the fitting asset and what it actually is), or is a gap where nothing fits. Keep it concrete and skimmable, e.g.:
+- Player character model: already installed — \`humanoid-tactical-commando@1.0.0\`, wired as the player. Reuse as-is.
+- Enemy zombie model: not installed — \`humanoid-undead-zombie@1.0.0\` fits (a hunched, decayed humanoid); add it.
+- Large outdoor terrain mesh: no fitting Market asset — gap; build from ground textures + scattered props.`;
 }
 export function buildSurveyTechnologyPrompt(userPrompt) {
     return `Stay in the current project directory.
 Goal: ${userPrompt}
-You are surveying, not planning or building: catalogue the installed technology that fits this goal, so the goal and plan stages can draw on what is real.
+You are surveying, not planning or building: catalogue the capability (technology) situation so the goal and plan stages can modify efficiently — what the project already builds and how, what installed technology serves the goal's remaining needs, and where nothing fits.
+Read the existing implementation first. A starter may already be applied: read the source it ships (the entry point, \`src/\`, and config) and map what the project already does — the systems it already has (the game loop and architecture, the controller and camera, combat, world generation, UI, audio, and so on) and the technology each is built on. This is what the build will keep or modify, so name it concretely: the system, what it does, and the skill/package/API it uses. If \`src/\` is a bare scaffold with nothing built yet, say so plainly; the survey is then about the toolkit and the goal's needs built from scratch.
-This project ships with a set of installed agent skills — each a \`SKILL.md\` doc covering one capability — and a set of npm packages, and that set changes over time. Discover what is actually installed now rather than assuming or working from memory: list and read every installed skill's \`SKILL.md\`, and read package.json for the runtime packages. Skim them all first so you know the full toolkit before routing a single need — the skill you skip is the one a need belongs to. Each \`SKILL.md\` is the authority on what its technology does, when it fits, and its limits; for installed library packages, confirm the specific exports against the real types, which can lag the docs, and for technology a skill adds on demand (assets, optional packages) trust the \`SKILL.md\` and note it as an install-time addition. Follow each skill's own routing and limits — when a skill says to compose specific pieces or warns against a shortcut, do that rather than guessing a turnkey that conflicts.
+This project also ships a set of installed agent skills — each a \`SKILL.md\` doc covering one capability — and a set of npm packages, and that set changes over time. Discover what is actually installed now rather than assuming or working from memory: list and read every installed skill's \`SKILL.md\`, and read package.json for the runtime packages. Skim them all first so you know the full toolkit before routing a single need — the skill you skip is the one a need belongs to. Each \`SKILL.md\` is the authority on what its technology does, when it fits, and its limits; for installed library packages, confirm the specific exports against the real types, which can lag the docs, and for technology a skill adds on demand (assets, optional packages) trust the \`SKILL.md\` and note it as an install-time addition. Follow each skill's own routing and limits — when a skill says to compose specific pieces or warns against a shortcut, do that rather than guessing a turnkey that conflicts.
-Decompose the goal into its concrete needs and map each to the technology that fits. Where no installed technology fits a need — often a game-system need like quests, inventory, AI, or a signature custom mechanic — name it as a fit-gap the build must implement itself, rather than forcing a poor fit.
+Decompose the goal into its concrete needs and map each to where it stands: already built in the implementation (name the system and whether it is reused as-is or modified toward the goal), served by an installed skill/package (the real API/exports that fit), or a fit-gap with no installed technology — often a game-system need like quests, inventory, AI, or a signature custom mechanic — that the build must implement itself. A need the current implementation already satisfies is the cheapest kind; say so rather than re-deriving it, and rather than forcing a poor fit where nothing matches.
-Write your findings to ${TECH_SURVEY_FILE} (scratch, not committed): one entry per need, with the fitting skill/package and the real API/exports/asset that serves it, or the gap where nothing fits. Keep it concrete and skimmable, e.g. one bullet per need:
-- <need>: <fitting skill/package> — <the real API/exports that matter> — strong fit, following the skill's recommended composition.
+Write your findings to ${TECH_SURVEY_FILE} (scratch, not committed): one entry per need, marking it already-built (the system and the tech it uses, reused or modified), installed-tech-fits (the skill/package and the real API/exports), or fit-gap (the build implements it). Keep it concrete and skimmable, e.g. one bullet per need:
+- <need>: already built — <system / file role> on <tech> — reuse as-is, or modify <how> toward the goal.
+- <need>: <fitting skill/package> — <the real API/exports that matter> — following the skill's recommended composition.
 - <need>: no installed technology fits — fit-gap, the build implements it.`;
 }
 export function buildGoalPrompt(userPrompt) {
@@ -73,7 +79,7 @@ Goal: ${userPrompt}
 ${buildPrinciples()}
 Create ${GOAL_FILE}: a concrete, fixed picture of the finished game that every later turn builds toward. It is the target, not a plan or task list, and stays stable as the project grows.
-Ground it in what actually exists: read the surveys ${ASSET_SURVEY_FILE} and ${TECH_SURVEY_FILE} for the assets and technology that fit, and read ${README_FILE} for the current state — weighing how close that state already is to what the request wants. When ${README_FILE} shows a substantial, coherent game already in the same space as the request (an applied starter, not a bare scaffold), anchor the finished picture on that real implementation: take the scope and shape it has already settled as the substrate, and reach past it only for what the request genuinely needs and the current state does not yet deliver — rather than re-deriving an idealized version that re-opens settled scope or adds large systems the starter deliberately leaves out. The closer the current state already is to realizing the request, the more the goal is bound to it; a bare scaffold or a poor-fit starter binds it not at all, and the goal is then the full target the request deserves. Name the handful that define the look, feel, and mechanics (e.g. "low-poly style, terrain from asset A, props B/C, movement via skill D") — the defining choices, not a full asset manifest (the surveys already hold that). Where nothing fits a needed part, say so as a fit-gap rather than quietly dropping it.
+Ground it in what actually exists: read the surveys ${ASSET_SURVEY_FILE} and ${TECH_SURVEY_FILE} for the assets and technology that fit, and read ${README_FILE} for the current state — weighing how close that state already is to what the request wants. When ${README_FILE} shows a substantial, coherent game already in the same space as the request (an applied starter, not a bare scaffold), anchor the finished picture on that real implementation: take the scope and shape it has already settled as the substrate, and reach past it only for what the request genuinely needs and the current state does not yet deliver — rather than re-deriving an idealized version that re-opens settled scope or adds large systems the starter deliberately leaves out. The closer the current state already is to realizing the request, the more the goal is bound to it; a bare scaffold or a poor-fit starter binds it not at all, and the goal is then the full target the request deserves. Binding to the substrate means reusing what works, not holding back on what is cheap to change and decisive to the player. A template becomes the request's own game only when it stops reading as the recognizable starter and reads as the requested game: the working systems and the settled scope stay, but the surface the player meets is re-shaped to the request — above all the game's own name and identity, and the signature screens and UI language that brand the original (a title card, an end-screen verdict, the framing copy a player would recognize). These changes are inexpensive and decisive, so the goal commits to a distinct identity for this game and treats re-shaping that recognizable surface as part of what the request needs, not optional polish. Reason from this specific request about which cheap, high-leverage changes carry the most of its feeling — do not work down a fixed list, and do not stop at swapping one system while the game still announces itself as the starter. Name the handful that define the look, feel, and mechanics (e.g. "low-poly style, terrain from asset A, props B/C, movement via skill D") — the defining choices, not a full asset manifest (the surveys already hold that). Where nothing fits a needed part, say so as a fit-gap rather than quietly dropping it.
 Write it the way a strong, short game design document reads — concrete, decisive, easy to picture — but as a layered design, not a flat list of headings. A game's design runs from the experience it is for at the top down to how each thing looks and feels in the player's hands at the bottom, and the levels are causal in both directions: the top decides why every lower thing exists, while the player meets the game from the bottom up — touching the feel of a single action first, and only through it sensing the experience you aimed at. So design top-down and keep each lower choice traceable to the level above (this enemy, this loop, this verb earns its place by serving the experience), and never stop at rules-on-paper — because the player lives at the bottom, the bottom must be drawn as concretely as the top.
 The levels below, with the questions that live at each, are context to reason from, not a template to fill or a checklist to tick. Reason from this specific game about which levels carry it and how deep each goes — a puzzler lives on its mechanics, an exploration game on its world and mood, an arcade game on feel and mastery, a story RPG on its characters and the plot and world they inhabit — and say plainly what you are deliberately keeping thin or absent, which is itself a design decision. Go as deep on the level a game lives on as that game needs: the depth a story game owes its characters and plot is the depth an action game owes its feel — do not let any one level default to thin because it is harder to write. Add a question a level needs that is not here.
@@ -104,6 +110,8 @@ Write ${PLAN_FILE}: the ordered steps from the current state to the goal. A step
 Size each step to the most a single build-turn can confidently build and stand behind in one go, and group into it the features that complete one testable capability together. Resolve the real tension between two failure modes: a pure one-mechanic-at-a-time slice proves out every turn but tends to build throwaway scaffolding (a stand-in you later discard, a thin version you rewrite) and pay for the same area twice; a pure feature-batch implements each feature once but is too big to build and prove in a turn. Aim for the middle — group the features that naturally belong to one capability so each is implemented once, against its real collaborators, while the step still fits and proves in one turn. A system that only proves out with another (shooting needs something to shoot, loot needs someone to drop it) is usually a cue to group the two into one step, not to split them behind a stand-in: prefer building a feature with its real collaborators when they fit the same turn. Reach for a deliberate stand-in only when the real collaborator genuinely cannot fit the same turn and the stand-in is cheap and minimal — never substantial scaffolding you will throw away. Build shared foundations right the first time: the ECS spine, the controller/camera rig, the audio system and other substrate should be established correctly when first needed and reused — not built thin and "consolidated" in a later refactor (a planned pure-refactor step is a sign a foundation was under-built, and a build-turn spent on rework is a step the budget can't afford). A step that cannot exist until an earlier one lands comes after it. Size each step to one build-turn's worth, and let the plan run exactly as long as the real distance from the current state to the goal demands — no longer. The build-turn budget is a ceiling, not a quota to fill: when the current state already realizes most of the goal the plan is correspondingly short — as few as a single step — and you neither pad it with generic polish or re-verification of what already works nor stretch a small delta to resemble a full build; when the distance is large, use as many right-sized steps as it takes, preferring a few whole steps to a long trail of fragments and never a step too large for one turn to finish and stand behind.
+When the plan starts from an applied starter rather than a blank repo, most steps are modifications of what already runs, not new construction: reskinning the look, renaming and re-theming the cast, swapping a model or texture, re-grading the lighting, re-parameterizing the procedural generation, re-pointing a system's inputs or win condition. Favor the cheapest change that lands the goal's intended shift in how the game reads and feels — a deep change in the experience need not be a deep change in the code, and re-shaping what exists usually beats rebuilding it. Plan the cheap, high-impact re-shapings of the player-facing surface that the goal commits to — the game's name and identity, the signature screens, the framing copy that brands the original — as first-class step content, not afterthoughts: each is a small edit, but together they are the highest-leverage work in a reskin, the easiest to drop under pressure, and what stops the result reading as the recognizable starter; leaving them undone leaves the player facing the old game's identity. Size and gate such a step exactly like a built-from-scratch slice — outcome, fit, and a gate that confirms it reads right when run — and do not add a step for any part of the goal the current state already satisfies.
 A slice is the whole of the thing it introduces, not its mechanic alone. Reason from the goal about what makes each one real in the player's hands — its animation and feedback, how the player comes to know what to do and can see their current goal, the transitions and screens that frame it — and carry those into the same step, never deferring them to a later polish the embodiment principle forbids. Which of these a slice lives on depends on what it is; weigh them, do not tick them off a list.
 Some of the game is not a vertical slice at all — the continuous, cross-cutting layers no single feature owns: the audio bed (ambience and music), the atmospheric look (lighting mood and the postprocessing pass), the shared HUD frame, and the first-load/loading screen. Place these deliberately instead of letting them fall to a final step a stalled plan may never reach. Introduce a layer's foundation in the first step where the player would feel its absence — world ambience and music with the first explorable world, the look pass once there is a scene to grade, the loading screen in the very first step (any real build loads assets before it can show anything, so a blank canvas is felt from the start) — and extend it as the game grows. Foundational mood is part of the feel the goal commits to, not last-minute polish; a game that reaches its last planned step before it has any music or atmosphere was planned in the wrong order.
@@ -132,7 +140,8 @@ ${buildSliceMethod()}
 Use ${README_FILE} as the claimed current state, ${GOAL_FILE} as the fixed goal, and ${PLAN_FILE} as the plan.
 Take the first remaining ${PLAN_FILE} step as this turn's task and build the whole of it — the grouped features it names — with the fitting skills/packages/assets, allowing only the small prerequisites or repairs that make the step actually work. A right-sized step is one turn's work, so complete it rather than fragmenting it.
 Only if the step genuinely cannot fit one turn, split off the smallest coherent remainder as a single new ${PLAN_FILE} step (not a trail of fragments), and finish the rest now. Do not add pure-refactor, cleanup, or "consolidate the architecture" steps that don't advance the product — build the foundation correctly here, which means laying the code out as cohesive modules from the first turn (follow each skill's recommended file layout, such as the ecs skill's one-file-per-component and one-file-per-system split, rather than piling the game into one growing main file) instead of leaving rework for a future turn the budget can't afford. When a feature needs a collaborator that exists later in the plan, prefer pulling it forward into this step over building a throwaway stand-in you will discard.
-Prove the result with a proof-run that actually launches and drives the real running repo this turn — a written description is never a substitute for a run. The proof is a machine-produced artifact saved under \`${PROOF_DIR}/\` (gitignored scratch): a screenshot or clip captured from the running app, or — if you cannot view images — a recorded runtime-state dump that asserts the real-done runtime facts, each produced by the command you actually ran rather than authored by hand. Look at the screenshots/clips from the player's seat and judge them against the goal's real-done bar, iterating on the build until it reads right; a prose "verification" note with no run behind it does not satisfy the gate. Then close or rewrite the ${PLAN_FILE} step to reflect what is actually proven; if a turn only proved part of the step, keep the unproven parts as first-class remaining steps rather than caveats.
+Prove the result with a proof-run that actually launches and drives the real running repo this turn — a written description is never a substitute for a run. The proof is a machine-produced artifact saved under \`${PROOF_DIR}/\` (gitignored scratch): a screenshot or clip captured from the running app, or — if you cannot view images — a recorded runtime-state dump that asserts the real-done runtime facts, each produced by the command you actually ran rather than authored by hand. Look at the screenshots/clips from the player's seat and judge them against the goal's real-done bar, iterating on the build until it reads right; a prose "verification" note with no run behind it does not satisfy the gate. When the step modifies an already-proven product, scope the proof to what this step actually changed — the new behavior, the new look, the new feedback — and rely on the carried proof for systems reused unchanged rather than re-driving and re-proving the whole game each turn; that re-verification is the budget the turn cannot afford. Spend the time it saves on completing the step's full named scope: the cheap, high-impact edits — the identity, the wordmark, the title and HUD copy, the palette — whose deep effect on how the product reads is easy to drop under time pressure are exactly the ones that make it read as its new self, so land them, never leave the player-facing name or framing describing the old product. Then update ${PLAN_FILE} to reflect what is actually proven: close the step you finished, and if a turn only proved part of it, keep the unproven parts as first-class remaining steps rather than caveats.
+When the last step is proven and the goal is genuinely realized in the running product — its playthroughs reproduce end to end and every real-done aspect is met — delete ${PLAN_FILE} so the pipeline knows the build is complete and stops; this is the done signal, so do not spend a further turn re-verifying what is already proven. While any real work remains, keep ${PLAN_FILE} holding only the steps that are still open.
 If real findings change the project understanding, update ${GOAL_FILE} lightly and honestly.
 Finally, update ${README_FILE} so it truthfully describes the new proven state, what changed, what remains in ${PLAN_FILE}, and any remaining gaps.`;
 }

package/dist/scaffold.js CHANGED Viewed

@@ -12,7 +12,24 @@ export async function initGitRepo(cwd, runner) {
 }
 export async function commitAll(cwd, runner, message) {
     await assertExitCode(runner({ command: "git", args: ["add", "-A"], cwd }), "failed to stage changes");
-    await assertExitCode(runner({ command: "git", args: ["commit", "--allow-empty", "-m", message], cwd }), "failed to commit changes");
+    // A build turn often commits its own work mid-turn (the harness runs git itself). When it does,
+    // the staged tree is already clean here and an --allow-empty commit would mint a second, empty
+    // "feat: build turn N" — so commit count drifts above turn count. Only commit when something is
+    // actually staged; otherwise the turn's own commit already records this turn's completion.
+    if (await isTreeClean(cwd, runner))
+        return;
+    await assertExitCode(runner({ command: "git", args: ["commit", "-m", message], cwd }), "failed to commit changes");
+}
+// True when `git add -A` left nothing staged (a clean index against HEAD). `git diff --cached
+// --quiet` exits 0 when there are no staged changes and 1 when there are, which is exactly the
+// signal we want — so we read its exit code rather than asserting it.
+async function isTreeClean(cwd, runner) {
+    const { exitCode } = await runner({
+        command: "git",
+        args: ["diff", "--cached", "--quiet"],
+        cwd
+    });
+    return exitCode === 0;
 }
 export async function initNpmProject(cwd, _runner) {
     const packageJsonPath = join(cwd, "package.json");

package/dist/subprocess.js CHANGED Viewed

@@ -1,13 +1,16 @@
 import { spawn } from "node:child_process";
 import { createWriteStream } from "node:fs";
 import { delimiter, dirname, join, resolve } from "node:path";
+import { createInterface } from "node:readline";
 import which from "which";
 import { CliError, TIMEOUT_EXIT_CODE, TIMEOUT_KILL_GRACE_MS } from "./constants.js";
+import { formatDuration } from "./progress-log.js";
 export function createSubprocessRunner(options = { stdio: "inherit" }) {
     return (invocation) => runSubprocess(invocation, options);
 }
 function runSubprocess({ command, args, cwd, timeoutMs }, options) {
     return new Promise((resolveResult, reject) => {
+        const startedAt = Date.now();
         // A separate process group lets a timeout kill the whole child tree at once.
         const detached = process.platform !== "win32";
         // When a log file is set, capture all child output into it and keep the
@@ -20,8 +23,14 @@ function runSubprocess({ command, args, cwd, timeoutMs }, options) {
             stdio: logStream ? ["ignore", "pipe", "pipe"] : (options.stdio ?? "inherit")
         });
         if (logStream) {
-            child.stdout?.pipe(logStream, { end: false });
-            child.stderr?.pipe(logStream, { end: false });
+            // Stamp every captured line with the wall-clock time we received it instead of piping raw
+            // bytes. Reading the harness's output at our own boundary and timing it with our own clock is
+            // the one timing signal robust across every harness and version — it depends on no private
+            // session-transcript format or structured-output schema (both drift, and for backgrounded
+            // tool calls misreport durations). The gaps between timestamps are then where the wall-clock
+            // went: a long stall with no output is the model thinking or one command grinding.
+            forwardWithTimestamps(child.stdout, logStream);
+            forwardWithTimestamps(child.stderr, logStream);
         }
         let timedOut = false;
         let killTimer;
@@ -60,10 +69,21 @@ function runSubprocess({ command, args, cwd, timeoutMs }, options) {
             reject(error);
         });
         child.once("exit", (code, signal) => {
+            const exitCode = timedOut ? TIMEOUT_EXIT_CODE : signal ? 1 : (code ?? 1);
+            // Always record how long the command ran, so the log shows where wall-clock goes (each
+            // scaffold step, and each harness turn's total) without digging into the harness's own
+            // session transcript. A non-zero exit is named on the same line so a failure is unmistakable
+            // and the boundary between a failed turn and a following retry stays legible. Timeouts
+            // already logged their own notice, so only the duration is added for them.
+            if (logStream) {
+                const elapsed = formatDuration(Date.now() - startedAt);
+                const failure = !timedOut && exitCode !== 0
+                    ? ` — exit ${exitCode}${signal ? ` (signal ${signal})` : ""}`
+                    : "";
+                logStream.write(`[drawcall-create] ${command} done in ${elapsed}${failure}\n`);
+            }
             cleanup();
-            if (timedOut)
-                return resolveResult({ exitCode: TIMEOUT_EXIT_CODE, timedOut: true });
-            resolveResult({ exitCode: signal ? 1 : (code ?? 1), timedOut: false });
+            resolveResult({ exitCode, timedOut });
         });
     });
 }
@@ -100,6 +120,23 @@ function openSessionLog(logFile, command) {
     stream.write(`\n$ ${command.join(" ")}\n`);
     return stream;
 }
+// Copy a child stream into the log a line at a time, stamping each line with the wall-clock time we
+// received it. readline does the line splitting (including a final line with no trailing newline,
+// and \r\n) so a timestamp only ever prefixes a whole line.
+function forwardWithTimestamps(source, sink) {
+    if (!source)
+        return;
+    const lines = createInterface({ input: source });
+    lines.on("line", (line) => sink.write(`${logTimestamp()} ${line}\n`));
+}
+// Absolute wall-clock HH:MM:SS.mmm so any two lines anywhere in the log — within a turn or across
+// stages — can be subtracted directly, and the gap from the last output line to the "done in"
+// footer (the idle tail before the process exits) is visible too.
+function logTimestamp() {
+    const now = new Date();
+    const pad = (value, length = 2) => String(value).padStart(length, "0");
+    return `[${pad(now.getHours())}:${pad(now.getMinutes())}:${pad(now.getSeconds())}.${pad(now.getMilliseconds(), 3)}]`;
+}
 function killChildProcess(pid, detached, signal) {
     if (!pid)
         return;

package/dist/supervisor.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import { type HarnessName } from "./constants.js";
+import { type CommandResult } from "./subprocess.js";
+export type RunBuildTurnChild = (cwd: string) => Promise<CommandResult>;
+export type SuperviseBuildOptions = {
+    cwd?: string;
+    env?: NodeJS.ProcessEnv;
+    maxTurns?: number;
+    harness?: HarnessName;
+    harnessArgs?: string[];
+    harnessTimeoutMinutes?: number;
+    runBuildTurnChild?: RunBuildTurnChild;
+};
+export type SuperviseBuildResult = {
+    projectDir: string;
+    turns: number;
+    /** Why the loop stopped — surfaced to the caller and the session log. */
+    stop: "plan-consumed" | "budget-exhausted" | "stuck";
+};
+/**
+ * Run the build stage so it ALWAYS completes or cleanly resumes, even when a turn's child process
+ * is killed mid-turn by an uncatchable OOM/jetsam SIGKILL. Each turn runs as a separate child, so
+ * this supervisor holds almost no memory and is an unlikely OOM victim itself; if a turn-child dies
+ * without committing, the supervisor resets to the last known-good commit and retries.
+ *
+ * The repo must already be scaffolded, surveyed, and planned (a committed PLAN.md) — run the
+ * earlier stages with a normal `createProject` first. This is the build loop only.
+ */
+export declare function superviseBuild(options?: SuperviseBuildOptions): Promise<SuperviseBuildResult>;

package/dist/supervisor.js ADDED Viewed

@@ -0,0 +1,110 @@
+import { execFileSync } from "node:child_process";
+import { appendFileSync, existsSync, readFileSync } from "node:fs";
+import { join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+import { CliError, MAX_BUILD_TURNS, PLAN_FILE, SESSION_LOG_FILE } from "./constants.js";
+import { createSubprocessRunner } from "./subprocess.js";
+// Two consecutive turns that neither advance HEAD with real work nor touch PLAN.md mean the build
+// is wedged (a turn that keeps crashing the same way, or a no-op turn the harness can't get past).
+// Stopping then is honest: looping further just burns hours re-running the identical dead step.
+const STUCK_ATTEMPT_LIMIT = 2;
+/**
+ * Run the build stage so it ALWAYS completes or cleanly resumes, even when a turn's child process
+ * is killed mid-turn by an uncatchable OOM/jetsam SIGKILL. Each turn runs as a separate child, so
+ * this supervisor holds almost no memory and is an unlikely OOM victim itself; if a turn-child dies
+ * without committing, the supervisor resets to the last known-good commit and retries.
+ *
+ * The repo must already be scaffolded, surveyed, and planned (a committed PLAN.md) — run the
+ * earlier stages with a normal `createProject` first. This is the build loop only.
+ */
+export async function superviseBuild(options = {}) {
+    const cwd = resolve(options.cwd ?? process.cwd());
+    const env = options.env ?? process.env;
+    const maxTurns = options.maxTurns ?? MAX_BUILD_TURNS;
+    const runBuildTurnChild = options.runBuildTurnChild ?? defaultRunBuildTurnChild(env, options);
+    if (!existsSync(join(cwd, ".git"))) {
+        throw new CliError(`supervised build expects an existing git repo at ${cwd}`);
+    }
+    const planPath = join(cwd, PLAN_FILE);
+    let staleAttempts = 0;
+    for (let turn = 1; turn <= maxTurns; turn += 1) {
+        if (!existsSync(planPath))
+            return done(cwd, turn - 1, "plan-consumed");
+        // The soccer hazard: a prior turn killed mid-edit can leave the tree dirty with partial,
+        // partially-destructive changes (e.g. half-deleted source). Build turns re-read repo state and
+        // redo the step, so resume ONLY from a clean committed state. `clean -fd` (not -x) preserves
+        // node_modules and the gitignored surveys/proof scratch; never `git add -A` here.
+        resetToLastGoodCommit(cwd);
+        const before = captureProgressMarker(cwd, planPath);
+        await runBuildTurnChild(cwd);
+        const after = captureProgressMarker(cwd, planPath);
+        if (!existsSync(planPath))
+            return done(cwd, turn, "plan-consumed");
+        if (madeProgress(before, after)) {
+            staleAttempts = 0;
+            continue;
+        }
+        staleAttempts += 1;
+        if (staleAttempts >= STUCK_ATTEMPT_LIMIT) {
+            logToSession(cwd, `[drawcall-create] supervised build stuck: ${STUCK_ATTEMPT_LIMIT} turns with no new commit and no ${PLAN_FILE} change`);
+            return done(cwd, turn, "stuck");
+        }
+    }
+    return done(cwd, maxTurns, existsSync(planPath) ? "budget-exhausted" : "plan-consumed");
+}
+function madeProgress(before, after) {
+    return after.head !== before.head || after.planText !== before.planText;
+}
+function captureProgressMarker(cwd, planPath) {
+    return {
+        head: headCommit(cwd),
+        planText: existsSync(planPath) ? readFileSync(planPath, "utf8") : ""
+    };
+}
+// HEAD's commit hash, or "" before the first commit exists. `git rev-parse HEAD` throws on an empty
+// repo, which we read as "no commit yet" rather than a failure.
+function headCommit(cwd) {
+    try {
+        return execFileSync("git", ["rev-parse", "HEAD"], { cwd, encoding: "utf8" }).trim();
+    }
+    catch {
+        return "";
+    }
+}
+function resetToLastGoodCommit(cwd) {
+    execFileSync("git", ["reset", "--hard", "HEAD"], { cwd, stdio: "ignore" });
+    execFileSync("git", ["clean", "-fd"], { cwd, stdio: "ignore" });
+}
+function done(cwd, turns, stop) {
+    return { projectDir: cwd, turns, stop };
+}
+function logToSession(cwd, line) {
+    try {
+        appendFileSync(join(cwd, SESSION_LOG_FILE), `${line}\n`);
+    }
+    catch {
+        // No session log on disk (e.g. a test working dir) — the supervisor proceeds anyway.
+    }
+}
+// Re-invoke this CLI's own build stage as a fresh child: `--stage build` runs exactly one turn
+// (create.ts turnBudget), so the heavy turn state lives and dies in the child, not in the
+// supervisor. cli.js sits next to this compiled module.
+function defaultRunBuildTurnChild(env, options) {
+    const cliPath = fileURLToPath(new URL("./cli.js", import.meta.url));
+    const runner = createSubprocessRunner({ env, stdio: "inherit" });
+    const args = [cliPath, "--stage", "build"];
+    if (options.harness)
+        args.push("--harness", options.harness);
+    if (options.harnessTimeoutMinutes !== undefined) {
+        args.push("--harness-timeout-minutes", String(options.harnessTimeoutMinutes));
+    }
+    args.push(BUILD_RESUME_PROMPT);
+    if (options.harnessArgs && options.harnessArgs.length > 0) {
+        args.push("--", ...options.harnessArgs);
+    }
+    return (cwd) => runner({ command: process.execPath, args, cwd });
+}
+// The build stage re-reads the committed records (GOAL.md/PLAN.md/README.md) and continues the
+// plan, so the prompt is just a resume marker — the real instructions live in the build prompt the
+// child assembles. The CLI requires a non-empty prompt.
+const BUILD_RESUME_PROMPT = "resume the build from the committed plan";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@drawcall/create",
-  "version": "0.0.0",
+  "version": "0.1.2",
   "type": "module",
   "description": "Create projects with an installed local harness.",
   "license": "MIT",
@@ -12,7 +12,7 @@
     "node": ">=20"
   },
   "bin": {
-    "drawcall-create": "./dist/cli.js"
+    "drawcall-create": "dist/cli.js"
   },
   "exports": {
     ".": {
@@ -24,6 +24,15 @@
   "files": [
     "dist"
   ],
+  "scripts": {
+    "dev": "tsx src/cli.ts",
+    "build": "node -e \"require('fs').rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node -e \"require('fs').chmodSync('dist/cli.js', 0o755)\"",
+    "typecheck": "tsc -p tsconfig.json --noEmit && tsc -p tsconfig.test.json",
+    "test": "vitest run",
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
+    "prepublishOnly": "npm run typecheck && npm test && npm run build"
+  },
   "dependencies": {
     "commander": "^14.0.3",
     "which": "^6.0.1"
@@ -35,13 +44,5 @@
     "tsx": "^4.19.2",
     "typescript": "^5.7.2",
     "vitest": "^4.1.8"
-  },
-  "scripts": {
-    "dev": "tsx src/cli.ts",
-    "build": "node -e \"require('fs').rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node -e \"require('fs').chmodSync('dist/cli.js', 0o755)\"",
-    "typecheck": "tsc -p tsconfig.json --noEmit && tsc -p tsconfig.test.json",
-    "test": "vitest run",
-    "format": "prettier --write .",
-    "format:check": "prettier --check ."
   }
-}
+}