npm - @drawcall/create - Versions diffs - 0.2.1 → 0.2.3 - Mend

@drawcall/create 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/harness.d.ts CHANGED Viewed

@@ -2,7 +2,8 @@ import { Context, Effect, Layer } from "effect";
 import { type HarnessName } from "./constants.js";
 import { PreflightError } from "./errors.js";
 import { type CommandResult, Shell } from "./shell.js";
-export declare function harnessInvocation(harness: HarnessName, prompt: string, cwd: string, harnessArgs?: ReadonlyArray<string>): {
+export declare function sessionCapable(harness: HarnessName): boolean;
+export declare function harnessInvocation(harness: HarnessName, prompt: string, cwd: string, harnessArgs?: ReadonlyArray<string>, sessionArgs?: ReadonlyArray<string>): {
     command: string;
     args: string[];
 };
@@ -12,9 +13,15 @@ export interface HarnessTurn {
     readonly cwd: string;
     readonly harnessArgs?: ReadonlyArray<string>;
     readonly timeoutMs?: number;
+    readonly session?: {
+        readonly token?: string;
+    };
+}
+export interface HarnessResult extends CommandResult {
+    readonly sessionToken?: string;
 }
 export interface HarnessService {
-    readonly runTurn: (turn: HarnessTurn) => Effect.Effect<CommandResult>;
+    readonly runTurn: (turn: HarnessTurn) => Effect.Effect<HarnessResult>;
     readonly select: (requested: HarnessName | undefined) => Effect.Effect<HarnessName, PreflightError>;
 }
 declare const Harness_base: Context.TagClass<Harness, "Harness", HarnessService>;

package/dist/harness.js CHANGED Viewed

@@ -1,48 +1,140 @@
+import { randomUUID } from "node:crypto";
 import { Context, Effect, Layer } from "effect";
 import { HARNESS_NAMES } from "./constants.js";
 import { PreflightError } from "./errors.js";
 import { Shell } from "./shell.js";
+const preassign = (begin, resume) => ({
+    begin: () => {
+        const id = randomUUID();
+        return { args: begin(id), id };
+    },
+    resume
+});
+// Per-harness session continuity. Mechanisms verified against each harness's docs/source (2026).
+const SESSION_ADAPTERS = {
+    // opencode mints `ses_…`; `run --print-logs` surfaces it on the first turn, `run --session <id>`
+    // continues it on later turns (validated: context is retained across invocations).
+    opencode: {
+        begin: () => ({ args: ["--print-logs"] }),
+        resume: (id) => ["--session", id],
+        capture: (output) => output.match(/\bses_[A-Za-z0-9]{8,}\b/)?.[0]
+    },
+    // Claude Code: pre-set the id with --session-id, continue with --resume.
+    claude: preassign((id) => ["--session-id", id], (id) => ["--resume", id]),
+    // Gemini CLI: same shape as Claude.
+    gemini: preassign((id) => ["--session-id", id], (id) => ["--resume", id]),
+    // Grok CLI: a single --session flag both creates-with-id and resumes.
+    grok: preassign((id) => ["--session", id], (id) => ["--session", id]),
+    // forge: --conversation-id both starts and resumes a stored conversation.
+    forge: preassign((id) => ["--conversation-id", id], (id) => ["--conversation-id", id]),
+    // pi: --session takes the id for both start and resume.
+    pi: preassign((id) => ["--session", id], (id) => ["--session", id]),
+    // codex mints a thread id; `exec --json` surfaces it, `exec resume <id>` continues it.
+    codex: {
+        begin: () => ({ args: ["--json"] }),
+        resume: (id) => ["resume", id],
+        capture: (output) => output.match(/\bthread_[A-Za-z0-9-]{8,}\b/)?.[0]
+    }
+};
+// Whether a harness can continue a session across build turns. All currently-supported harnesses can;
+// the supervisor only enables same-session for those that do, and degrades to cold turns otherwise.
+export function sessionCapable(harness) {
+    return SESSION_ADAPTERS[harness] !== undefined;
+}
 // How each harness is invoked headlessly: the flags that make it run one non-interactive turn and
-// auto-approve the tool calls a build needs (writes, npm, proof runs). This is hard-won per-harness
-// knowledge, kept verbatim from the original — the rewrite changes the orchestration around it, not
-// the invocations themselves.
-export function harnessInvocation(harness, prompt, cwd, harnessArgs = []) {
+// auto-approve the tool calls a build needs (writes, npm, proof runs). `sessionArgs` are the adapter's
+// continuity flags, placed right after the harness's leading subcommand/mode flags so they apply to
+// this turn. This is hard-won per-harness knowledge — the rewrite changes the orchestration around it.
+export function harnessInvocation(harness, prompt, cwd, harnessArgs = [], sessionArgs = []) {
     switch (harness) {
         case "opencode":
             return {
                 command: "opencode",
-                args: ["run", "--dangerously-skip-permissions", "--dir", cwd, ...harnessArgs, prompt]
+                args: [
+                    "run",
+                    "--dangerously-skip-permissions",
+                    ...sessionArgs,
+                    "--dir",
+                    cwd,
+                    ...harnessArgs,
+                    prompt
+                ]
             };
         case "codex":
-            return { command: "codex", args: ["exec", "--skip-git-repo-check", ...harnessArgs, prompt] };
+            return {
+                command: "codex",
+                args: ["exec", ...sessionArgs, "--skip-git-repo-check", ...harnessArgs, prompt]
+            };
         case "claude":
             return {
                 command: "claude",
-                args: ["--print", "--permission-mode", "bypassPermissions", ...harnessArgs, prompt]
+                args: [
+                    "--print",
+                    ...sessionArgs,
+                    "--permission-mode",
+                    "bypassPermissions",
+                    ...harnessArgs,
+                    prompt
+                ]
             };
         case "pi":
-            return { command: "pi", args: [...harnessArgs, "-p", prompt] };
+            return { command: "pi", args: [...sessionArgs, ...harnessArgs, "-p", prompt] };
         case "gemini":
             return {
                 command: "gemini",
-                args: ["--approval-mode", "yolo", "--skip-trust", ...harnessArgs, "--prompt", prompt]
+                args: [
+                    "--approval-mode",
+                    "yolo",
+                    "--skip-trust",
+                    ...sessionArgs,
+                    ...harnessArgs,
+                    "--prompt",
+                    prompt
+                ]
             };
         case "grok":
             return {
                 command: "grok",
-                args: ["--always-approve", "--output-format", "plain", ...harnessArgs, "-p", prompt]
+                args: [
+                    "--always-approve",
+                    "--output-format",
+                    "plain",
+                    ...sessionArgs,
+                    ...harnessArgs,
+                    "-p",
+                    prompt
+                ]
             };
         case "forge":
-            return { command: "forge", args: ["-C", cwd, ...harnessArgs, "-p", prompt] };
+            return { command: "forge", args: ["-C", cwd, ...sessionArgs, ...harnessArgs, "-p", prompt] };
     }
 }
 export class Harness extends Context.Tag("Harness")() {
 }
 const live = (shell) => ({
-    runTurn: ({ harness, prompt, cwd, harnessArgs, timeoutMs }) => {
-        const { command, args } = harnessInvocation(harness, prompt, cwd, harnessArgs);
-        return shell.run({ command, args, cwd, timeoutMs });
-    },
+    runTurn: ({ harness, prompt, cwd, harnessArgs, timeoutMs, session }) => Effect.gen(function* () {
+        const adapter = session ? SESSION_ADAPTERS[harness] : undefined;
+        let sessionArgs = [];
+        let begunId;
+        if (adapter && session) {
+            if (session.token) {
+                sessionArgs = adapter.resume(session.token);
+            }
+            else {
+                const begun = adapter.begin();
+                sessionArgs = begun.args;
+                begunId = begun.id;
+            }
+        }
+        const { command, args } = harnessInvocation(harness, prompt, cwd, harnessArgs, sessionArgs);
+        const result = yield* shell.run({ command, args, cwd, timeoutMs });
+        // The continuation token: the one we resumed, the one we minted (preassign), or — for a capture
+        // harness's first turn — whatever id it surfaced in its output. Undefined keeps the next turn cold.
+        const sessionToken = session?.token ??
+            begunId ??
+            (session && adapter?.capture ? adapter.capture(result.output) : undefined);
+        return { ...result, sessionToken };
+    }),
     select: (requested) => Effect.gen(function* () {
         if (requested) {
             if (yield* shell.exists(requested))

package/dist/prompts.js CHANGED Viewed

@@ -138,6 +138,7 @@ ${buildPrinciples()}
 ${buildSliceMethod()}
 Use ${README_FILE} as the claimed current state, ${GOAL_FILE} as the fixed goal, and ${PLAN_FILE} as the plan.
+For how to use a package, the authoritative references are each skill's \`SKILL.md\` (its real API + examples) and the survey notes from the survey stage — rely on those. Do not re-derive APIs by exhaustively reading \`node_modules\` type-definitions: that reading can consume the entire turn before a single line of the game is written. Write the feature against the documented API and let \`tsc\`/runtime/proof-run errors point you at the few specifics still worth checking — building and correcting from real errors is faster and more reliable than reading every type up front, so favour getting a runnable slice on screen early in the turn over front-loading research.
 Take the first remaining ${PLAN_FILE} step as this turn's task and build the whole of it — the grouped features it names — with the fitting skills/packages/assets, allowing only the small prerequisites or repairs that make the step actually work. A right-sized step is one turn's work, so complete it rather than fragmenting it.
 Only if the step genuinely cannot fit one turn, split off the smallest coherent remainder as a single new ${PLAN_FILE} step (not a trail of fragments), and finish the rest now. Do not add pure-refactor, cleanup, or "consolidate the architecture" steps that don't advance the product — build the foundation correctly here, which means laying the code out as cohesive modules from the first turn (follow each skill's recommended file layout, such as the ecs skill's one-file-per-component and one-file-per-system split, rather than piling the game into one growing main file) instead of leaving rework for a future turn the budget can't afford. When a feature needs a collaborator that exists later in the plan, prefer pulling it forward into this step over building a throwaway stand-in you will discard.
 Prove the result with a proof-run that actually launches and drives the real running repo this turn — a written description is never a substitute for a run. The proof is a machine-produced artifact saved under \`${PROOF_DIR}/\` (gitignored scratch): a screenshot or clip captured from the running app, or — if you cannot view images — a recorded runtime-state dump that asserts the real-done runtime facts, each produced by the command you actually ran rather than authored by hand. Look at the screenshots/clips from the player's seat and judge them against the goal's real-done bar, iterating on the build until it reads right; a prose "verification" note with no run behind it does not satisfy the gate. When the step modifies an already-proven product, scope the proof to what this step actually changed — the new behavior, the new look, the new feedback — and rely on the carried proof for systems reused unchanged rather than re-driving and re-proving the whole game each turn; that re-verification is the budget the turn cannot afford. Spend the time it saves on completing the step's full named scope: the cheap, high-impact edits — the identity, the wordmark, the title and HUD copy, the palette — whose deep effect on how the product reads is easy to drop under time pressure are exactly the ones that make it read as its new self, so land them, never leave the player-facing name or framing describing the old product. Then update ${PLAN_FILE} to reflect what is actually proven: close the step you finished, and if a turn only proved part of it, keep the unproven parts as first-class remaining steps rather than caveats.

package/dist/stages.d.ts CHANGED Viewed

@@ -14,5 +14,7 @@ export interface RunContext {
 }
 type StageEnv = Harness | Scaffold | Git;
 export declare const runStage: (stage: Exclude<PipelineStage, "build">, ctx: RunContext, runMeta: RunMeta | undefined) => Effect.Effect<void, RunFailure, StageEnv>;
-export declare const buildTurn: (ctx: RunContext, turnNumber: number) => Effect.Effect<void, RunFailure, StageEnv>;
+export declare const buildTurn: (ctx: RunContext, turnNumber: number, session?: {
+    readonly token?: string;
+}) => Effect.Effect<string | undefined, RunFailure, StageEnv>;
 export {};

package/dist/stages.js CHANGED Viewed

@@ -12,16 +12,17 @@ import { buildBuildPrompt, buildGoalPrompt, buildPlanPrompt, buildSurveyAssetsPr
 // Run one harness turn for a stage, turning a non-zero exit / timeout into a classified, explainable
 // StageFailure. A clean exit is success — the stage's own assertions (did GOAL.md appear?) catch the
 // "ran fine but produced nothing" case separately.
-const turn = (stage, prompt, ctx) => Effect.flatMap(Harness, (harness) => harness
+const turn = (stage, prompt, ctx, session) => Effect.flatMap(Harness, (harness) => harness
     .runTurn({
     harness: ctx.harness,
     prompt,
     cwd: ctx.cwd,
     harnessArgs: ctx.harnessArgs,
-    timeoutMs: ctx.timeoutMs
+    timeoutMs: ctx.timeoutMs,
+    session
 })
     .pipe(Effect.flatMap((r) => r.exitCode === 0
-    ? Effect.void
+    ? Effect.succeed(r)
     : Effect.fail(new StageFailure({
         stage,
         reason: classifyHarnessOutput(r.output, r.exitCode, r.timedOut),
@@ -96,10 +97,12 @@ const plan = (ctx) => Effect.gen(function* () {
 // genuinely consumed it deletes PLAN.md, which is our "done" signal. The checkpoint is --allow-empty,
 // so a turn that committed real work and a turn that did nothing both leave a marker — and the
 // supervisor tells them apart by the tree hash, not by whether a commit appeared.
-const build = (ctx, turnNumber) => Effect.gen(function* () {
-    yield* turn("build", buildBuildPrompt(ctx.prompt, turnNumber), ctx);
+const build = (ctx, turnNumber, session) => Effect.gen(function* () {
+    const result = yield* turn("build", buildBuildPrompt(ctx.prompt, turnNumber), ctx, session);
     const done = !existsSync(join(ctx.cwd, PLAN_FILE));
     yield* checkpoint(ctx.cwd, done ? "done" : "build");
+    // The token to continue this build's warm session next turn (undefined keeps the next turn cold).
+    return result.sessionToken;
 });
 // Dispatch a non-build stage. Build is driven directly by the supervisor (it owns the turn number
 // and budget), so it is intentionally excluded from this signature.
@@ -117,4 +120,4 @@ export const runStage = (stage, ctx, runMeta) => {
             return plan(ctx);
     }
 };
-export const buildTurn = (ctx, turnNumber) => build(ctx, turnNumber);
+export const buildTurn = (ctx, turnNumber, session) => build(ctx, turnNumber, session);

package/dist/supervisor.d.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export interface RunOptions {
     readonly harnessTimeoutMs?: number;
     readonly maxTurns?: number;
     readonly skipTemplate?: boolean;
+    readonly buildSession?: boolean;
     readonly backoff?: (attempt: number) => Duration.Duration;
 }
 export type Outcome = "done" | "stuck" | "budget-exhausted";

package/dist/supervisor.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { humanReason } from "./classify.js";
 import { MAX_BUILD_TURNS } from "./constants.js";
 import { PreflightError, reasonOf } from "./errors.js";
 import { Git } from "./git.js";
-import { Harness } from "./harness.js";
+import { Harness, sessionCapable } from "./harness.js";
 import { Logger } from "./logger.js";
 import { planNext } from "./resume.js";
 import { buildTurn, runStage } from "./stages.js";
@@ -55,12 +55,10 @@ export const run = (options) => Effect.gen(function* () {
         maxTurns,
         skipTemplate: ctx.skipTemplate
     };
-    return yield* loop(ctx, runMeta, maxTurns, options.backoff ?? backoff, {
-        staleAttempts: 0,
-        errors: []
-    });
+    const sessionOn = options.buildSession ?? process.env.DC_BUILD_SESSION !== "off";
+    return yield* loop(ctx, runMeta, maxTurns, options.backoff ?? backoff, { staleAttempts: 0, errors: [] }, sessionOn, undefined);
 });
-const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function* () {
+const loop = (ctx, runMeta, maxTurns, backoffFor, state, sessionOn, buildToken) => Effect.gen(function* () {
     const git = yield* Git;
     const before = yield* fingerprint(ctx.cwd);
     const step = yield* planNext(ctx.cwd);
@@ -72,10 +70,18 @@ const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function*
         return finish(ctx.cwd, "budget-exhausted", state, turns);
     }
     yield* Console.log(`→ ${label(step.stage, ctx.harness)}`);
+    // Same-session only spans consecutive build turns, and only for harnesses that support it.
+    const session = step.stage === "build" && sessionOn && sessionCapable(ctx.harness)
+        ? { token: buildToken }
+        : undefined;
     const work = step.stage === "build"
-        ? buildTurn(ctx, turns + 1)
-        : runStage(step.stage, ctx, step.stage === "scaffold" ? runMeta : undefined);
+        ? buildTurn(ctx, turns + 1, session)
+        : runStage(step.stage, ctx, step.stage === "scaffold" ? runMeta : undefined).pipe(Effect.as(undefined));
     const outcome = yield* Effect.either(work);
+    // The token to continue with next turn — only from a successful build turn. Any failure runs
+    // resetClean (the tree returns to the last checkpoint), which makes the warm session inconsistent
+    // with the working tree, so we drop it and the next build turn begins a fresh session.
+    const nextToken = outcome._tag === "Right" && step.stage === "build" ? outcome.right : undefined;
     let reason;
     if (outcome._tag === "Left") {
         reason = reasonOf(outcome.left);
@@ -93,8 +99,8 @@ const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function*
         if (outcome._tag === "Right")
             yield* Console.log(`✓ ${label(step.stage, ctx.harness)}`);
         // Forward progress — reset the stale counter and the error accumulator; only the failures that
-        // actually precede a stop should appear in its report.
-        return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts: 0, errors: [] });
+        // actually precede a stop should appear in its report. Carry the warm-session token forward.
+        return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts: 0, errors: [] }, sessionOn, nextToken);
     }
     const staleAttempts = state.staleAttempts + 1;
     const errors = reason ? [...state.errors, reason] : state.errors;
@@ -105,7 +111,7 @@ const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function*
     }
     yield* note(`no progress; attempt ${staleAttempts}/${MAX_STALE_ATTEMPTS}, backing off`);
     yield* Effect.sleep(backoffFor(staleAttempts));
-    return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts, errors });
+    return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts, errors }, sessionOn, undefined);
 });
 const finish = (projectDir, outcome, state, buildTurns) => ({
     projectDir,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@drawcall/create",
-  "version": "0.2.1",
+  "version": "0.2.3",
   "type": "module",
   "description": "Create projects with an installed local harness.",
   "license": "MIT",