@drawcall/create 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/harness.d.ts +9 -2
- package/dist/harness.js +107 -15
- package/dist/prompts.js +1 -0
- package/dist/stages.d.ts +3 -1
- package/dist/stages.js +9 -6
- package/dist/supervisor.d.ts +1 -0
- package/dist/supervisor.js +17 -11
- package/package.json +1 -1
package/dist/harness.d.ts
CHANGED
|
@@ -2,7 +2,8 @@ import { Context, Effect, Layer } from "effect";
|
|
|
2
2
|
import { type HarnessName } from "./constants.js";
|
|
3
3
|
import { PreflightError } from "./errors.js";
|
|
4
4
|
import { type CommandResult, Shell } from "./shell.js";
|
|
5
|
-
export declare function
|
|
5
|
+
export declare function sessionCapable(harness: HarnessName): boolean;
|
|
6
|
+
export declare function harnessInvocation(harness: HarnessName, prompt: string, cwd: string, harnessArgs?: ReadonlyArray<string>, sessionArgs?: ReadonlyArray<string>): {
|
|
6
7
|
command: string;
|
|
7
8
|
args: string[];
|
|
8
9
|
};
|
|
@@ -12,9 +13,15 @@ export interface HarnessTurn {
|
|
|
12
13
|
readonly cwd: string;
|
|
13
14
|
readonly harnessArgs?: ReadonlyArray<string>;
|
|
14
15
|
readonly timeoutMs?: number;
|
|
16
|
+
readonly session?: {
|
|
17
|
+
readonly token?: string;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
export interface HarnessResult extends CommandResult {
|
|
21
|
+
readonly sessionToken?: string;
|
|
15
22
|
}
|
|
16
23
|
export interface HarnessService {
|
|
17
|
-
readonly runTurn: (turn: HarnessTurn) => Effect.Effect<
|
|
24
|
+
readonly runTurn: (turn: HarnessTurn) => Effect.Effect<HarnessResult>;
|
|
18
25
|
readonly select: (requested: HarnessName | undefined) => Effect.Effect<HarnessName, PreflightError>;
|
|
19
26
|
}
|
|
20
27
|
declare const Harness_base: Context.TagClass<Harness, "Harness", HarnessService>;
|
package/dist/harness.js
CHANGED
|
@@ -1,48 +1,140 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
1
2
|
import { Context, Effect, Layer } from "effect";
|
|
2
3
|
import { HARNESS_NAMES } from "./constants.js";
|
|
3
4
|
import { PreflightError } from "./errors.js";
|
|
4
5
|
import { Shell } from "./shell.js";
|
|
6
|
+
const preassign = (begin, resume) => ({
|
|
7
|
+
begin: () => {
|
|
8
|
+
const id = randomUUID();
|
|
9
|
+
return { args: begin(id), id };
|
|
10
|
+
},
|
|
11
|
+
resume
|
|
12
|
+
});
|
|
13
|
+
// Per-harness session continuity. Mechanisms verified against each harness's docs/source (2026).
|
|
14
|
+
const SESSION_ADAPTERS = {
|
|
15
|
+
// opencode mints `ses_…`; `run --print-logs` surfaces it on the first turn, `run --session <id>`
|
|
16
|
+
// continues it on later turns (validated: context is retained across invocations).
|
|
17
|
+
opencode: {
|
|
18
|
+
begin: () => ({ args: ["--print-logs"] }),
|
|
19
|
+
resume: (id) => ["--session", id],
|
|
20
|
+
capture: (output) => output.match(/\bses_[A-Za-z0-9]{8,}\b/)?.[0]
|
|
21
|
+
},
|
|
22
|
+
// Claude Code: pre-set the id with --session-id, continue with --resume.
|
|
23
|
+
claude: preassign((id) => ["--session-id", id], (id) => ["--resume", id]),
|
|
24
|
+
// Gemini CLI: same shape as Claude.
|
|
25
|
+
gemini: preassign((id) => ["--session-id", id], (id) => ["--resume", id]),
|
|
26
|
+
// Grok CLI: a single --session flag both creates-with-id and resumes.
|
|
27
|
+
grok: preassign((id) => ["--session", id], (id) => ["--session", id]),
|
|
28
|
+
// forge: --conversation-id both starts and resumes a stored conversation.
|
|
29
|
+
forge: preassign((id) => ["--conversation-id", id], (id) => ["--conversation-id", id]),
|
|
30
|
+
// pi: --session takes the id for both start and resume.
|
|
31
|
+
pi: preassign((id) => ["--session", id], (id) => ["--session", id]),
|
|
32
|
+
// codex mints a thread id; `exec --json` surfaces it, `exec resume <id>` continues it.
|
|
33
|
+
codex: {
|
|
34
|
+
begin: () => ({ args: ["--json"] }),
|
|
35
|
+
resume: (id) => ["resume", id],
|
|
36
|
+
capture: (output) => output.match(/\bthread_[A-Za-z0-9-]{8,}\b/)?.[0]
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
// Whether a harness can continue a session across build turns. All currently-supported harnesses can;
|
|
40
|
+
// the supervisor only enables same-session for those that do, and degrades to cold turns otherwise.
|
|
41
|
+
export function sessionCapable(harness) {
|
|
42
|
+
return SESSION_ADAPTERS[harness] !== undefined;
|
|
43
|
+
}
|
|
5
44
|
// How each harness is invoked headlessly: the flags that make it run one non-interactive turn and
|
|
6
|
-
// auto-approve the tool calls a build needs (writes, npm, proof runs).
|
|
7
|
-
//
|
|
8
|
-
// the
|
|
9
|
-
export function harnessInvocation(harness, prompt, cwd, harnessArgs = []) {
|
|
45
|
+
// auto-approve the tool calls a build needs (writes, npm, proof runs). `sessionArgs` are the adapter's
|
|
46
|
+
// continuity flags, placed right after the harness's leading subcommand/mode flags so they apply to
|
|
47
|
+
// this turn. This is hard-won per-harness knowledge — the rewrite changes the orchestration around it.
|
|
48
|
+
export function harnessInvocation(harness, prompt, cwd, harnessArgs = [], sessionArgs = []) {
|
|
10
49
|
switch (harness) {
|
|
11
50
|
case "opencode":
|
|
12
51
|
return {
|
|
13
52
|
command: "opencode",
|
|
14
|
-
args: [
|
|
53
|
+
args: [
|
|
54
|
+
"run",
|
|
55
|
+
"--dangerously-skip-permissions",
|
|
56
|
+
...sessionArgs,
|
|
57
|
+
"--dir",
|
|
58
|
+
cwd,
|
|
59
|
+
...harnessArgs,
|
|
60
|
+
prompt
|
|
61
|
+
]
|
|
15
62
|
};
|
|
16
63
|
case "codex":
|
|
17
|
-
return {
|
|
64
|
+
return {
|
|
65
|
+
command: "codex",
|
|
66
|
+
args: ["exec", ...sessionArgs, "--skip-git-repo-check", ...harnessArgs, prompt]
|
|
67
|
+
};
|
|
18
68
|
case "claude":
|
|
19
69
|
return {
|
|
20
70
|
command: "claude",
|
|
21
|
-
args: [
|
|
71
|
+
args: [
|
|
72
|
+
"--print",
|
|
73
|
+
...sessionArgs,
|
|
74
|
+
"--permission-mode",
|
|
75
|
+
"bypassPermissions",
|
|
76
|
+
...harnessArgs,
|
|
77
|
+
prompt
|
|
78
|
+
]
|
|
22
79
|
};
|
|
23
80
|
case "pi":
|
|
24
|
-
return { command: "pi", args: [...harnessArgs, "-p", prompt] };
|
|
81
|
+
return { command: "pi", args: [...sessionArgs, ...harnessArgs, "-p", prompt] };
|
|
25
82
|
case "gemini":
|
|
26
83
|
return {
|
|
27
84
|
command: "gemini",
|
|
28
|
-
args: [
|
|
85
|
+
args: [
|
|
86
|
+
"--approval-mode",
|
|
87
|
+
"yolo",
|
|
88
|
+
"--skip-trust",
|
|
89
|
+
...sessionArgs,
|
|
90
|
+
...harnessArgs,
|
|
91
|
+
"--prompt",
|
|
92
|
+
prompt
|
|
93
|
+
]
|
|
29
94
|
};
|
|
30
95
|
case "grok":
|
|
31
96
|
return {
|
|
32
97
|
command: "grok",
|
|
33
|
-
args: [
|
|
98
|
+
args: [
|
|
99
|
+
"--always-approve",
|
|
100
|
+
"--output-format",
|
|
101
|
+
"plain",
|
|
102
|
+
...sessionArgs,
|
|
103
|
+
...harnessArgs,
|
|
104
|
+
"-p",
|
|
105
|
+
prompt
|
|
106
|
+
]
|
|
34
107
|
};
|
|
35
108
|
case "forge":
|
|
36
|
-
return { command: "forge", args: ["-C", cwd, ...harnessArgs, "-p", prompt] };
|
|
109
|
+
return { command: "forge", args: ["-C", cwd, ...sessionArgs, ...harnessArgs, "-p", prompt] };
|
|
37
110
|
}
|
|
38
111
|
}
|
|
39
112
|
export class Harness extends Context.Tag("Harness")() {
|
|
40
113
|
}
|
|
41
114
|
const live = (shell) => ({
|
|
42
|
-
runTurn: ({ harness, prompt, cwd, harnessArgs, timeoutMs }) => {
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
|
|
115
|
+
runTurn: ({ harness, prompt, cwd, harnessArgs, timeoutMs, session }) => Effect.gen(function* () {
|
|
116
|
+
const adapter = session ? SESSION_ADAPTERS[harness] : undefined;
|
|
117
|
+
let sessionArgs = [];
|
|
118
|
+
let begunId;
|
|
119
|
+
if (adapter && session) {
|
|
120
|
+
if (session.token) {
|
|
121
|
+
sessionArgs = adapter.resume(session.token);
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
const begun = adapter.begin();
|
|
125
|
+
sessionArgs = begun.args;
|
|
126
|
+
begunId = begun.id;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const { command, args } = harnessInvocation(harness, prompt, cwd, harnessArgs, sessionArgs);
|
|
130
|
+
const result = yield* shell.run({ command, args, cwd, timeoutMs });
|
|
131
|
+
// The continuation token: the one we resumed, the one we minted (preassign), or — for a capture
|
|
132
|
+
// harness's first turn — whatever id it surfaced in its output. Undefined keeps the next turn cold.
|
|
133
|
+
const sessionToken = session?.token ??
|
|
134
|
+
begunId ??
|
|
135
|
+
(session && adapter?.capture ? adapter.capture(result.output) : undefined);
|
|
136
|
+
return { ...result, sessionToken };
|
|
137
|
+
}),
|
|
46
138
|
select: (requested) => Effect.gen(function* () {
|
|
47
139
|
if (requested) {
|
|
48
140
|
if (yield* shell.exists(requested))
|
package/dist/prompts.js
CHANGED
|
@@ -138,6 +138,7 @@ ${buildPrinciples()}
|
|
|
138
138
|
${buildSliceMethod()}
|
|
139
139
|
|
|
140
140
|
Use ${README_FILE} as the claimed current state, ${GOAL_FILE} as the fixed goal, and ${PLAN_FILE} as the plan.
|
|
141
|
+
For how to use a package, the authoritative references are each skill's \`SKILL.md\` (its real API + examples) and the survey notes from the survey stage — rely on those. Do not re-derive APIs by exhaustively reading \`node_modules\` type-definitions: that reading can consume the entire turn before a single line of the game is written. Write the feature against the documented API and let \`tsc\`/runtime/proof-run errors point you at the few specifics still worth checking — building and correcting from real errors is faster and more reliable than reading every type up front, so favour getting a runnable slice on screen early in the turn over front-loading research.
|
|
141
142
|
Take the first remaining ${PLAN_FILE} step as this turn's task and build the whole of it — the grouped features it names — with the fitting skills/packages/assets, allowing only the small prerequisites or repairs that make the step actually work. A right-sized step is one turn's work, so complete it rather than fragmenting it.
|
|
142
143
|
Only if the step genuinely cannot fit one turn, split off the smallest coherent remainder as a single new ${PLAN_FILE} step (not a trail of fragments), and finish the rest now. Do not add pure-refactor, cleanup, or "consolidate the architecture" steps that don't advance the product — build the foundation correctly here, which means laying the code out as cohesive modules from the first turn (follow each skill's recommended file layout, such as the ecs skill's one-file-per-component and one-file-per-system split, rather than piling the game into one growing main file) instead of leaving rework for a future turn the budget can't afford. When a feature needs a collaborator that exists later in the plan, prefer pulling it forward into this step over building a throwaway stand-in you will discard.
|
|
143
144
|
Prove the result with a proof-run that actually launches and drives the real running repo this turn — a written description is never a substitute for a run. The proof is a machine-produced artifact saved under \`${PROOF_DIR}/\` (gitignored scratch): a screenshot or clip captured from the running app, or — if you cannot view images — a recorded runtime-state dump that asserts the real-done runtime facts, each produced by the command you actually ran rather than authored by hand. Look at the screenshots/clips from the player's seat and judge them against the goal's real-done bar, iterating on the build until it reads right; a prose "verification" note with no run behind it does not satisfy the gate. When the step modifies an already-proven product, scope the proof to what this step actually changed — the new behavior, the new look, the new feedback — and rely on the carried proof for systems reused unchanged rather than re-driving and re-proving the whole game each turn; that re-verification is the budget the turn cannot afford. Spend the time it saves on completing the step's full named scope: the cheap, high-impact edits — the identity, the wordmark, the title and HUD copy, the palette — whose deep effect on how the product reads is easy to drop under time pressure are exactly the ones that make it read as its new self, so land them, never leave the player-facing name or framing describing the old product. Then update ${PLAN_FILE} to reflect what is actually proven: close the step you finished, and if a turn only proved part of it, keep the unproven parts as first-class remaining steps rather than caveats.
|
package/dist/stages.d.ts
CHANGED
|
@@ -14,5 +14,7 @@ export interface RunContext {
|
|
|
14
14
|
}
|
|
15
15
|
type StageEnv = Harness | Scaffold | Git;
|
|
16
16
|
export declare const runStage: (stage: Exclude<PipelineStage, "build">, ctx: RunContext, runMeta: RunMeta | undefined) => Effect.Effect<void, RunFailure, StageEnv>;
|
|
17
|
-
export declare const buildTurn: (ctx: RunContext, turnNumber: number
|
|
17
|
+
export declare const buildTurn: (ctx: RunContext, turnNumber: number, session?: {
|
|
18
|
+
readonly token?: string;
|
|
19
|
+
}) => Effect.Effect<string | undefined, RunFailure, StageEnv>;
|
|
18
20
|
export {};
|
package/dist/stages.js
CHANGED
|
@@ -12,16 +12,17 @@ import { buildBuildPrompt, buildGoalPrompt, buildPlanPrompt, buildSurveyAssetsPr
|
|
|
12
12
|
// Run one harness turn for a stage, turning a non-zero exit / timeout into a classified, explainable
|
|
13
13
|
// StageFailure. A clean exit is success — the stage's own assertions (did GOAL.md appear?) catch the
|
|
14
14
|
// "ran fine but produced nothing" case separately.
|
|
15
|
-
const turn = (stage, prompt, ctx) => Effect.flatMap(Harness, (harness) => harness
|
|
15
|
+
const turn = (stage, prompt, ctx, session) => Effect.flatMap(Harness, (harness) => harness
|
|
16
16
|
.runTurn({
|
|
17
17
|
harness: ctx.harness,
|
|
18
18
|
prompt,
|
|
19
19
|
cwd: ctx.cwd,
|
|
20
20
|
harnessArgs: ctx.harnessArgs,
|
|
21
|
-
timeoutMs: ctx.timeoutMs
|
|
21
|
+
timeoutMs: ctx.timeoutMs,
|
|
22
|
+
session
|
|
22
23
|
})
|
|
23
24
|
.pipe(Effect.flatMap((r) => r.exitCode === 0
|
|
24
|
-
? Effect.
|
|
25
|
+
? Effect.succeed(r)
|
|
25
26
|
: Effect.fail(new StageFailure({
|
|
26
27
|
stage,
|
|
27
28
|
reason: classifyHarnessOutput(r.output, r.exitCode, r.timedOut),
|
|
@@ -96,10 +97,12 @@ const plan = (ctx) => Effect.gen(function* () {
|
|
|
96
97
|
// genuinely consumed it deletes PLAN.md, which is our "done" signal. The checkpoint is --allow-empty,
|
|
97
98
|
// so a turn that committed real work and a turn that did nothing both leave a marker — and the
|
|
98
99
|
// supervisor tells them apart by the tree hash, not by whether a commit appeared.
|
|
99
|
-
const build = (ctx, turnNumber) => Effect.gen(function* () {
|
|
100
|
-
yield* turn("build", buildBuildPrompt(ctx.prompt, turnNumber), ctx);
|
|
100
|
+
const build = (ctx, turnNumber, session) => Effect.gen(function* () {
|
|
101
|
+
const result = yield* turn("build", buildBuildPrompt(ctx.prompt, turnNumber), ctx, session);
|
|
101
102
|
const done = !existsSync(join(ctx.cwd, PLAN_FILE));
|
|
102
103
|
yield* checkpoint(ctx.cwd, done ? "done" : "build");
|
|
104
|
+
// The token to continue this build's warm session next turn (undefined keeps the next turn cold).
|
|
105
|
+
return result.sessionToken;
|
|
103
106
|
});
|
|
104
107
|
// Dispatch a non-build stage. Build is driven directly by the supervisor (it owns the turn number
|
|
105
108
|
// and budget), so it is intentionally excluded from this signature.
|
|
@@ -117,4 +120,4 @@ export const runStage = (stage, ctx, runMeta) => {
|
|
|
117
120
|
return plan(ctx);
|
|
118
121
|
}
|
|
119
122
|
};
|
|
120
|
-
export const buildTurn = (ctx, turnNumber) => build(ctx, turnNumber);
|
|
123
|
+
export const buildTurn = (ctx, turnNumber, session) => build(ctx, turnNumber, session);
|
package/dist/supervisor.d.ts
CHANGED
|
@@ -17,6 +17,7 @@ export interface RunOptions {
|
|
|
17
17
|
readonly harnessTimeoutMs?: number;
|
|
18
18
|
readonly maxTurns?: number;
|
|
19
19
|
readonly skipTemplate?: boolean;
|
|
20
|
+
readonly buildSession?: boolean;
|
|
20
21
|
readonly backoff?: (attempt: number) => Duration.Duration;
|
|
21
22
|
}
|
|
22
23
|
export type Outcome = "done" | "stuck" | "budget-exhausted";
|
package/dist/supervisor.js
CHANGED
|
@@ -5,7 +5,7 @@ import { humanReason } from "./classify.js";
|
|
|
5
5
|
import { MAX_BUILD_TURNS } from "./constants.js";
|
|
6
6
|
import { PreflightError, reasonOf } from "./errors.js";
|
|
7
7
|
import { Git } from "./git.js";
|
|
8
|
-
import { Harness } from "./harness.js";
|
|
8
|
+
import { Harness, sessionCapable } from "./harness.js";
|
|
9
9
|
import { Logger } from "./logger.js";
|
|
10
10
|
import { planNext } from "./resume.js";
|
|
11
11
|
import { buildTurn, runStage } from "./stages.js";
|
|
@@ -55,12 +55,10 @@ export const run = (options) => Effect.gen(function* () {
|
|
|
55
55
|
maxTurns,
|
|
56
56
|
skipTemplate: ctx.skipTemplate
|
|
57
57
|
};
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
errors: []
|
|
61
|
-
});
|
|
58
|
+
const sessionOn = options.buildSession ?? process.env.DC_BUILD_SESSION !== "off";
|
|
59
|
+
return yield* loop(ctx, runMeta, maxTurns, options.backoff ?? backoff, { staleAttempts: 0, errors: [] }, sessionOn, undefined);
|
|
62
60
|
});
|
|
63
|
-
const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function* () {
|
|
61
|
+
const loop = (ctx, runMeta, maxTurns, backoffFor, state, sessionOn, buildToken) => Effect.gen(function* () {
|
|
64
62
|
const git = yield* Git;
|
|
65
63
|
const before = yield* fingerprint(ctx.cwd);
|
|
66
64
|
const step = yield* planNext(ctx.cwd);
|
|
@@ -72,10 +70,18 @@ const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function*
|
|
|
72
70
|
return finish(ctx.cwd, "budget-exhausted", state, turns);
|
|
73
71
|
}
|
|
74
72
|
yield* Console.log(`→ ${label(step.stage, ctx.harness)}`);
|
|
73
|
+
// Same-session only spans consecutive build turns, and only for harnesses that support it.
|
|
74
|
+
const session = step.stage === "build" && sessionOn && sessionCapable(ctx.harness)
|
|
75
|
+
? { token: buildToken }
|
|
76
|
+
: undefined;
|
|
75
77
|
const work = step.stage === "build"
|
|
76
|
-
? buildTurn(ctx, turns + 1)
|
|
77
|
-
: runStage(step.stage, ctx, step.stage === "scaffold" ? runMeta : undefined);
|
|
78
|
+
? buildTurn(ctx, turns + 1, session)
|
|
79
|
+
: runStage(step.stage, ctx, step.stage === "scaffold" ? runMeta : undefined).pipe(Effect.as(undefined));
|
|
78
80
|
const outcome = yield* Effect.either(work);
|
|
81
|
+
// The token to continue with next turn — only from a successful build turn. Any failure runs
|
|
82
|
+
// resetClean (the tree returns to the last checkpoint), which makes the warm session inconsistent
|
|
83
|
+
// with the working tree, so we drop it and the next build turn begins a fresh session.
|
|
84
|
+
const nextToken = outcome._tag === "Right" && step.stage === "build" ? outcome.right : undefined;
|
|
79
85
|
let reason;
|
|
80
86
|
if (outcome._tag === "Left") {
|
|
81
87
|
reason = reasonOf(outcome.left);
|
|
@@ -93,8 +99,8 @@ const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function*
|
|
|
93
99
|
if (outcome._tag === "Right")
|
|
94
100
|
yield* Console.log(`✓ ${label(step.stage, ctx.harness)}`);
|
|
95
101
|
// Forward progress — reset the stale counter and the error accumulator; only the failures that
|
|
96
|
-
// actually precede a stop should appear in its report.
|
|
97
|
-
return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts: 0, errors: [] });
|
|
102
|
+
// actually precede a stop should appear in its report. Carry the warm-session token forward.
|
|
103
|
+
return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts: 0, errors: [] }, sessionOn, nextToken);
|
|
98
104
|
}
|
|
99
105
|
const staleAttempts = state.staleAttempts + 1;
|
|
100
106
|
const errors = reason ? [...state.errors, reason] : state.errors;
|
|
@@ -105,7 +111,7 @@ const loop = (ctx, runMeta, maxTurns, backoffFor, state) => Effect.gen(function*
|
|
|
105
111
|
}
|
|
106
112
|
yield* note(`no progress; attempt ${staleAttempts}/${MAX_STALE_ATTEMPTS}, backing off`);
|
|
107
113
|
yield* Effect.sleep(backoffFor(staleAttempts));
|
|
108
|
-
return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts, errors });
|
|
114
|
+
return yield* loop(ctx, runMeta, maxTurns, backoffFor, { staleAttempts, errors }, sessionOn, undefined);
|
|
109
115
|
});
|
|
110
116
|
const finish = (projectDir, outcome, state, buildTurns) => ({
|
|
111
117
|
projectDir,
|