npm - pi-super-dev - Versions diffs - 0.1.0 - Mend

pi-super-dev 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/CHANGELOG.md +35 -0
package/LICENSE +21 -0
package/README.md +135 -0
package/agents/adversarial-reviewer.md +64 -0
package/agents/architecture-designer.md +43 -0
package/agents/architecture-improver.md +46 -0
package/agents/bdd-scenario-writer.md +37 -0
package/agents/build-cleaner.md +44 -0
package/agents/code-assessor.md +24 -0
package/agents/code-reviewer.md +59 -0
package/agents/debug-analyzer.md +54 -0
package/agents/docs-executor.md +49 -0
package/agents/handoff-writer.md +62 -0
package/agents/implementer.md +47 -0
package/agents/orchestrator.md +42 -0
package/agents/product-designer.md +42 -0
package/agents/prototype-runner.md +36 -0
package/agents/qa-agent.md +76 -0
package/agents/requirements-clarifier.md +58 -0
package/agents/research-agent.md +33 -0
package/agents/spec-reviewer.md +46 -0
package/agents/spec-writer.md +32 -0
package/agents/tdd-guide.md +51 -0
package/agents/ui-ux-designer.md +50 -0
package/package.json +40 -0
package/skills/super-dev/SKILL.md +35 -0
package/src/agents.ts +38 -0
package/src/control.ts +85 -0
package/src/doc-validators.ts +164 -0
package/src/extension.ts +164 -0
package/src/helpers.ts +263 -0
package/src/nodes.ts +550 -0
package/src/pi-spawn.ts +296 -0
package/src/pipeline.ts +15 -0
package/src/prompts.ts +120 -0
package/src/session-agent.ts +305 -0
package/src/setup.ts +141 -0
package/src/stages/design.ts +33 -0
package/src/stages/implementation.ts +80 -0
package/src/stages/index.ts +172 -0
package/src/stages/prototype.ts +43 -0
package/src/stages/setup.ts +32 -0
package/src/stages/writers.ts +105 -0
package/src/types.ts +235 -0
package/src/workflow.ts +181 -0

package/src/session-agent.ts ADDED Viewed

@@ -0,0 +1,305 @@
+/**
+ * In-process specialist execution via the pi SDK (`createAgentSession`).
+ *
+ * This is the alternative to {@link spawnAgent} (raw `pi` subprocess). It runs a
+ * specialist in-process, in-memory, and captures its result via a
+ * `structured_output` tool (schema-validated) instead of parsing `<control>`
+ * text from subprocess stdout. Same return contract as spawnAgent
+ * ({@link SpawnResult}) so the workflow engine is unchanged.
+ *
+ * Why: the subprocess path carried a whole class of bugs (spawn ENOENT,
+ * RangeError on stdout buffering, <control> parse fragility, process timeouts).
+ * The session path uses the same `@earendil-works/pi-coding-agent` SDK we
+ * already peer-depend on — no new dependency — and gets structured output,
+ * abort, and host config reuse (auth/model) for free.
+ *
+ * Select at runtime via `ctx.agent` (see workflow.ts): backend "session" uses
+ * this; "subprocess" uses spawnAgent.
+ */
+import {
+	createAgentSession,
+	createCodingTools,
+	defineTool,
+	getAgentDir,
+	type ToolDefinition,
+	SessionManager,
+	SettingsManager,
+} from "@earendil-works/pi-coding-agent";
+import { Type } from "typebox";
+import { mkdirSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { loadAgentPrompt } from "./agents.ts";
+import { extractControl } from "./control.ts";
+import { sanitizeSlug } from "./setup.ts";
+import type { AgentProgress, SpawnResult } from "./types.ts";
+export interface SessionAgentOptions {
+	agent: string;
+	prompt: string;
+	cwd: string;
+	model?: string;
+	signal?: AbortSignal;
+	id?: string;
+	timeoutMs?: number;
+	/** Control keys the caller expects in structured_output (declares them in the
+	 *  tool schema so the model fills them). When omitted, a fully permissive
+	 *  schema is used. Derived from the prompt by workflow.ts. */
+	controlKeys?: string[];
+	onProgress?: AgentProgress;
+}
+/** Build the structured_output schema. When `keys` is non-empty, each key is
+ *  DECLARED (Optional, Any) so the model treats it as part of the contract and
+ *  fills it — this is the fix for the requirements-gate failure, where a
+ *  schema that declared only `summary` made GLM return only `summary`. Keys
+ *  stay Optional so tool validation never rejects a partially-filled object;
+ *  completeness is enforced by the corrective re-prompt below. */
+function controlSchema(keys: string[]) {
+	const props: Record<string, ReturnType<typeof Type.Any>> = {};
+	for (const k of keys) props[k] = Type.Optional(Type.Any());
+	return Type.Object(props, { additionalProperties: true });
+}
+/** Which declared keys are missing/blank in the captured control object. */
+export function missingKeys(captured: Record<string, unknown> | null | undefined, keys: string[]): string[] {
+	if (!captured) return keys;
+	return keys.filter((k) => {
+		const v = captured[k];
+		return v === undefined || v === null || v === "" || (Array.isArray(v) && v.length === 0);
+	});
+}
+interface Capture {
+	called: boolean;
+	value: unknown;
+}
+/** Build the terminating structured_output tool that captures the result.
+ *  The schema DECLARES the expected keys (see controlSchema) so the model
+ *  fills them instead of dumping everything into one field. */
+function structuredOutputTool(capture: Capture, keys: string[]): ToolDefinition {
+	const fieldList = keys.length ? keys.join(", ") : "the fields the task requested";
+	return defineTool({
+		name: "structured_output",
+		label: "Structured Output",
+		description: `Return the final result object. It MUST include every one of these keys: ${fieldList}.`,
+		promptSnippet: "Return final machine-readable result",
+		promptGuidelines: [
+			`structured_output is the final answer channel; call it exactly once when the task is complete. Your object MUST contain ALL of: ${fieldList}.`,
+			"Do not write a prose final answer after calling structured_output.",
+		],
+		parameters: controlSchema(keys),
+		async execute(_toolCallId, params) {
+			capture.value = { ...(capture.value as Record<string, unknown> | undefined), ...params };
+			capture.called = true;
+			return {
+				content: [{ type: "text", text: "Structured output received." }],
+				details: params,
+				terminate: true,
+			};
+		},
+	});
+}
+/** Live progress forwarding from session events → the sink. Session events
+ *  nest streaming under `message_update.assistantMessageEvent` (text_delta /
+ *  text_end carry `partial.content` with the accumulated block text); tool calls
+ *  arrive as top-level `tool_execution_start`. Text partials reset per message
+ *  block, so finalizing at each tool call doesn't duplicate prefixes. */
+function forwardProgress(session: { subscribe(listener: (e: unknown) => void): () => void }, onProgress: AgentProgress): () => void {
+	let turns = 0;
+	let lastText = ""; // dedup: only forward text when it changes; reset per tool block
+	return session.subscribe((event: unknown) => {
+		const e = event as { type?: string; toolName?: string; args?: Record<string, unknown>; assistantMessageEvent?: { type?: string; partial?: { content?: Array<{ type: string; text?: string }> } } };
+		if (!e?.type) return;
+		if (e.type === "tool_execution_start" && e.toolName) {
+			lastText = "";
+			onProgress.event(`→ ${summarize(e.toolName, e.args)}`);
+		} else if (e.type === "turn_start") {
+			if (++turns > 1) onProgress.event(`turn ${turns}`);
+		} else if (e.type === "message_update") {
+			const a = e.assistantMessageEvent;
+			if (a?.type === "text_delta" || a?.type === "text_end") {
+				const text = (a.partial?.content ?? []).filter((p) => p.type === "text").map((p) => p.text ?? "").join("");
+				const clean = text.replace(/<control>[\s\S]*?<\/control>/gi, "").trim();
+				if (clean && clean !== lastText) {
+					lastText = clean;
+					onProgress.text(clean);
+				}
+			}
+		}
+	});
+}
+function summarize(name: string, args: Record<string, unknown> | undefined): string {
+	const a = args ?? {};
+	switch (name) {
+		case "write": case "edit": case "read": return `${name} ${a.path ?? a.file_path ?? ""}`;
+		case "bash": return `$ ${String(a.command ?? "").split("\n")[0]}`;
+		case "ffgrep": case "fffind": return `${name} "${a.pattern ?? ""}"`;
+		default: return name === "structured_output" ? "structured_output ✓" : name;
+	}
+}
+function lastAssistantText(messages: Array<{ role?: string; content?: Array<{ type: string; text?: string }> }>): string {
+	for (let i = messages.length - 1; i >= 0; i--) {
+		const m = messages[i];
+		if (m?.role !== "assistant" || !Array.isArray(m.content)) continue;
+		const t = m.content.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text as string).join("");
+		if (t.trim()) return t;
+	}
+	return "";
+}
+/** Ask the model for a concise 2-5 word kebab-case slug summarizing the task.
+ *  Minimal session: no coding tools, only a structured_output tool — fast and
+ *  cheap. Returns "" on any failure/timeout so the caller can fall back to the
+ *  deterministic slugifyTask. */
+export async function summarizeSlug(task: string, cwd: string, opts: { signal?: AbortSignal; timeoutMs?: number } = {}): Promise<string> {
+	const timeoutMs = opts.timeoutMs ?? 20_000;
+	const capture: Capture = { called: false, value: undefined };
+	const agentDir = getAgentDir();
+	let session;
+	try {
+		({ session } = await createAgentSession({
+			cwd,
+			agentDir,
+			sessionManager: SessionManager.inMemory(cwd),
+			settingsManager: SettingsManager.create(cwd, agentDir),
+			customTools: [defineTool({
+				name: "structured_output",
+				label: "Slug",
+				description: "Return the summary slug.",
+				promptSnippet: "Return the slug",
+				promptGuidelines: ["Call structured_output once with the slug."],
+				parameters: Type.Object({ slug: Type.String() }),
+				async execute(_id, params) { capture.value = params; capture.called = true; return { content: [{ type: "text", text: "ok" }], details: params, terminate: true }; },
+			})],
+		}));
+	} catch {
+		return "";
+	}
+	const timer = setTimeout(() => { try { void session.abort(); } catch { /* ignore */ } }, timeoutMs);
+	const onAbort = () => void session.abort();
+	opts.signal?.addEventListener("abort", onAbort, { once: true });
+	try {
+		await session.prompt(`Summarize this software task into a concise 2-5 word kebab-case slug (lowercase, words joined by single hyphens, no articles or filler words like "implement/add/feature"). Task:\n"""${task}"""\nCall structured_output with {slug}.`);
+	} catch { /* timeout/abort → fallback */ }
+	clearTimeout(timer);
+	opts.signal?.removeEventListener("abort", onAbort);
+	session.dispose();
+	const raw = capture.called ? String((capture.value as { slug?: unknown })?.slug ?? "") : "";
+	return sanitizeSlug(raw);
+}
+/** Run a specialist in-process and return its result (SpawnResult contract).
+ *  Per-stage `controlKeys` are declared in the structured_output schema so the
+ *  model fills them. If the first turn omits any, a single corrective re-prompt
+ *  is sent IN THE SAME SESSION (context preserved) before giving up — this is
+ *  what turns the old "gate failed after 3 attempts" into a self-healing step.
+ *  Set SUPER_DEV_DEBUG=1 to dump the full per-agent message trace to a temp
+ *  file (sessions are otherwise in-memory and unobservable). */
+export async function runAgentViaSession(opts: SessionAgentOptions): Promise<SpawnResult> {
+	const systemPrompt = loadAgentPrompt(opts.agent);
+	const keys = opts.controlKeys ?? [];
+	const capture: Capture = { called: false, value: undefined };
+	const timeoutMs = opts.timeoutMs ?? 480_000;
+	const agentDir = getAgentDir();
+	const { session } = await createAgentSession({
+		cwd: opts.cwd,
+		agentDir,
+		sessionManager: SessionManager.inMemory(opts.cwd),
+		settingsManager: SettingsManager.create(opts.cwd, agentDir),
+		customTools: [...createCodingTools(opts.cwd), structuredOutputTool(capture, keys)],
+	});
+	const unsub = opts.onProgress ? forwardProgress(session, opts.onProgress) : undefined;
+	let timedOut = false;
+	const onAbort = () => void session.abort();
+	const timer = setTimeout(() => {
+		timedOut = true;
+		try { void session.abort(); } catch { /* ignore */ }
+	}, timeoutMs);
+	opts.signal?.addEventListener("abort", onAbort, { once: true });
+	const finalOutputLine = keys.length
+		? `When the task is complete, call the \`structured_output\` tool exactly once with an object containing ALL of these keys: ${keys.join(", ")}. Do not omit any. Do not emit a prose final answer after that.`
+		: "When the task is complete, call the `structured_output` tool exactly once with an object containing the fields requested above. Do not emit a prose final answer after that.";
+	// Delivery discipline — the systemic fix for the recurring "agent explores for
+	// 10-27 tool calls then times out before writing" pattern. The ported agent
+	// prompts demand Claude-grade exhaustive verification; glm is slower and runs
+	// out of time. This preamble overrides that: bound exploration, write early.
+	const deliveryDiscipline = [
+		"## Delivery discipline (OVERRIDES any contrary instruction above)",
+		"You have a LIMITED time budget. The ONLY deliverable that matters is the written document + your structured_output call.",
+		"- Explore with AT MOST ~6 tool calls total (read/bash/grep/web). You do NOT need to read every file, run the full test suite, or verify every claim independently.",
+		"- Never re-read a file you already read. Never loop on self-auditing, self-scoring, or revision.",
+		"- START WRITING the document once you have the gist — well before you feel 'done' exploring. Written-but-imperfect beats thorough-but-unfinished (a timeout produces NOTHING).",
+		"- After writing, immediately call structured_output and STOP.",
+	].join("\n");
+	const task = [systemPrompt, "", "## Task", opts.prompt, "", deliveryDiscipline, "", "## Final output", finalOutputLine].join("\n");
+	let correctiveNote = "";
+	try {
+		try {
+			await session.prompt(task);
+		} catch (err) {
+			if (!timedOut && !opts.signal?.aborted) throw err;
+		}
+		// Self-heal: ONLY when the model actually called structured_output but
+		// omitted declared keys, send ONE corrective turn in the same session
+		// (same context, same files written) naming exactly what's missing. If it
+		// never called the tool, a "you omitted keys" message would be a false
+		// premise — leave that to the gate's cold retry instead.
+		const afterFirst = capture.called ? (capture.value as Record<string, unknown> | undefined) : undefined;
+		const missing = missingKeys(afterFirst, keys);
+		if (capture.called && missing.length > 0 && !timedOut && !opts.signal?.aborted) {
+			correctiveNote = `corrective re-prompt (missing: ${missing.join(", ")})`;
+			opts.onProgress?.event(`↻ ${opts.id ?? opts.agent}: ${correctiveNote}`);
+			const fix = `Your previous structured_output was missing required keys: ${missing.join(", ")}. Call structured_output AGAIN, this time with ALL of these keys filled from the work you already did: ${keys.join(", ")}. Do not redo the work — just return the complete object.`;
+			try {
+				await session.prompt(fix);
+			} catch (err) {
+				if (!timedOut && !opts.signal?.aborted) throw err;
+			}
+		}
+		const text = lastAssistantText(session.messages as Parameters<typeof lastAssistantText>[0]);
+		const control = capture.called ? (capture.value as Record<string, unknown>) : extractControl(text);
+		return { text, control: control ?? null, error: timedOut ? `timed out after ${Math.round(timeoutMs / 1000)}s${capture.called ? " (structured_output captured before abort)" : ""}` : undefined };
+	} catch (err) {
+		return { text: "", control: null, error: err instanceof Error ? err.message : String(err) };
+	} finally {
+		clearTimeout(timer);
+		opts.signal?.removeEventListener("abort", onAbort);
+		unsub?.();
+		if (process.env.SUPER_DEV_DEBUG) dumpTrace(opts, keys, capture, correctiveNote, session.messages);
+		session.dispose();
+	}
+}
+/** Write the full in-memory message trace to a temp file. The session backend
+ *  keeps everything in memory (SessionManager.inMemory), so without this there
+ *  are zero logs to debug a failed/garbled agent run. */
+function dumpTrace(opts: SessionAgentOptions, keys: string[], capture: Capture, correctiveNote: string, messages: unknown): void {
+	try {
+		const dir = join(tmpdir(), "super-dev-debug");
+		mkdirSync(dir, { recursive: true });
+		const safe = (opts.id ?? opts.agent).replace(/[^A-Za-z0-9_.-]+/g, "_");
+		const file = join(dir, `${Date.now()}-${safe}.json`);
+		writeFileSync(file, JSON.stringify({
+			agent: opts.agent,
+			id: opts.id,
+			cwd: opts.cwd,
+			controlKeys: keys,
+			structuredOutputCalled: capture.called,
+			structuredOutputValue: capture.value,
+			correctiveNote,
+			messages,
+		}, null, 2));
+	} catch { /* best-effort */ }
+}

package/src/setup.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * Deterministic setup stage — detects language/framework, derives a spec id,
+ * creates a git worktree (unless skipped), and creates the spec directory.
+ * Replaces the original LLM-driven setup agent; no model round-trip.
+ */
+import { execFileSync } from "node:child_process";
+import { existsSync, mkdirSync, readdirSync, readFileSync } from "node:fs";
+import { join, resolve } from "node:path";
+import type { SetupControl } from "./types.ts";
+function git(args: string[], cwd: string): string | null {
+	try {
+		return execFileSync("git", args, { cwd, encoding: "utf-8", stdio: ["ignore", "pipe", "ignore"] }).trim();
+	} catch {
+		return null;
+	}
+}
+export function detectLanguage(cwd: string, task = ""): { language: string; isWebUi: boolean } {
+	const has = (f: string) => existsSync(join(cwd, f));
+	if (has("Cargo.toml")) return { language: "rust", isWebUi: false };
+	if (has("go.mod")) return { language: "go", isWebUi: false };
+	if (has("pyproject.toml") || has("setup.py") || has("requirements.txt")) return { language: "python", isWebUi: false };
+	if (has("package.json")) {
+		try {
+			const pkg = JSON.parse(readFileSync(join(cwd, "package.json"), "utf8")) as { dependencies?: Record<string, string>; devDependencies?: Record<string, string> };
+			const deps = { ...(pkg.dependencies ?? {}), ...(pkg.devDependencies ?? {}) };
+			const isWebUi = Boolean(deps["react"] || deps["next"] || deps["vue"] || deps["svelte"] || deps["@sveltejs/kit"]);
+			if (deps["express"] || deps["fastify"] || deps["@hono/node-server"]) return { language: "backend", isWebUi };
+			return { language: "frontend", isWebUi };
+		} catch {
+			return { language: "frontend", isWebUi: true };
+		}
+	}
+	// Greenfield (no manifest): infer the target stack from the task text so
+	// downstream prompts and the implementation know what to build.
+	const t = task.toLowerCase();
+	const mentions = (...kw: string[]) => kw.some((k) => t.includes(k));
+	if (mentions("node", "nodejs", "node.js", "express", "fastify", "npm", "deno", "bun")) return { language: "backend", isWebUi: false };
+	if (mentions("python", "django", "flask", "fastapi", "pip")) return { language: "python", isWebUi: false };
+	if (mentions("golang") || /\bgo\b/.test(t)) return { language: "go", isWebUi: false };
+	if (mentions("rust", "cargo")) return { language: "rust", isWebUi: false };
+	return { language: "mixed", isWebUi: false };
+}
+/** Sanitize any string (LLM output or raw) into a kebab-case slug, truncated at
+ *  a word boundary so it never cuts mid-word. */
+export function sanitizeSlug(raw: string): string {
+	let s = raw.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
+	if (s.length > 40) { s = s.slice(0, 40); const c = s.lastIndexOf("-"); if (c > 8) s = s.slice(0, c); }
+	return s.replace(/-+$/g, "");
+}
+/** Deterministic fallback slug: drop filler words, keep up to ~5 content words. */
+const STOPWORDS = new Set("a an the to of for and or nor but in on at by with from into is are be as that this it its our your their we you they please need want implement add build create make new feature features simple app application page use using used based get one two three next".split(" "));
+export function slugifyTask(task: string): string {
+	const words = task.toLowerCase().replace(/[^a-z0-9\s]/g, " ").split(/\s+/).filter((w) => w && !STOPWORDS.has(w));
+	return sanitizeSlug(words.slice(0, 5).join("-")) || "task";
+}
+function nextSpecNumber(cwd: string): number {
+	const specsDir = join(cwd, "docs", "specifications");
+	let max = 0;
+	try {
+		for (const entry of readdirSync(specsDir)) {
+			const m = entry.match(/^(\d+)-/);
+			if (m) max = Math.max(max, Number(m[1]));
+		}
+	} catch { /* no specs dir yet */ }
+	return max + 1;
+}
+function detectDefaultBranch(cwd: string): string {
+	const fromOrigin = git(["symbolic-ref", "--short", "refs/remotes/origin/HEAD"], cwd);
+	if (fromOrigin && fromOrigin.startsWith("origin/")) return fromOrigin.slice("origin/".length);
+	const current = git(["rev-parse", "--abbrev-ref", "HEAD"], cwd);
+	if (current && current !== "HEAD") return current;
+	return "main";
+}
+function isGitRepo(cwd: string): boolean {
+	return git(["rev-parse", "--is-inside-work-tree"], cwd) !== null;
+}
+function headExists(cwd: string): boolean {
+	return git(["rev-parse", "--verify", "HEAD"], cwd) !== null;
+}
+function ensureGitIdentity(cwd: string): void {
+	if (!git(["config", "user.email"], cwd)) git(["config", "user.email", "pi-super-dev@local"], cwd);
+	if (!git(["config", "user.name"], cwd)) git(["config", "user.name", "pi-super-dev"], cwd);
+}
+export interface SetupOptions {
+	cwd?: string;
+	skipWorktree?: boolean;
+	/** Descriptive slug for the spec id (e.g. LLM-summarized). Falls back to
+	 *  slugifyTask(task) when empty/invalid. */
+	slug?: string;
+}
+export function runSetup(task: string, options: SetupOptions = {}): SetupControl {
+	const cwd = resolve(options.cwd ?? process.cwd());
+	// Ensure cwd is a git repo (worktree + later commits/merge require it).
+	let initializedRepo = false;
+	if (!isGitRepo(cwd)) {
+		git(["init"], cwd);
+		initializedRepo = true;
+	}
+	// A worktree (and later commits/merge) needs at least one commit on the
+	// base branch. Empty repos with an unborn HEAD break `git worktree add`
+	// ("fatal: invalid reference: main"), causing setup to silently fall back
+	// to operating in the cwd with no isolation.
+	if (!headExists(cwd)) {
+		ensureGitIdentity(cwd);
+		git(["commit", "--allow-empty", "-m", "chore: initial commit (pi-super-dev)"], cwd);
+	}
+	const { language, isWebUi } = detectLanguage(cwd, task);
+	const defaultBranch = detectDefaultBranch(cwd);
+	const slug = sanitizeSlug(options.slug ?? "") || slugifyTask(task);
+	const specIdentifier = `${String(nextSpecNumber(cwd)).padStart(2, "0")}-${slug}`;
+	let worktreePath = cwd;
+	let worktreeCreated = false;
+	if (!options.skipWorktree) {
+		const wtPath = join(cwd, ".worktree", specIdentifier);
+		const created = git(["worktree", "add", "-b", specIdentifier, wtPath, defaultBranch], cwd);
+		if (created !== null || existsSync(wtPath)) {
+			worktreePath = wtPath;
+			worktreeCreated = true;
+		}
+	}
+	const specDirectory = join(worktreePath, "docs", "specifications", specIdentifier) + "/";
+	mkdirSync(specDirectory, { recursive: true });
+	return { worktreePath, specDirectory, defaultBranch, language, isWebUi, specIdentifier, worktreeCreated, initializedRepo };
+}

package/src/stages/design.ts ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * Stage 6 — Design (routed).
+ * Self-contained task: route-designer helper picks the specialist designer
+ * (or skips for bug fixes), then spawns it.
+ */
+import type { Stage } from "../types.ts";
+import { buildDesignPrompt } from "../prompts.ts";
+export const designStage: Stage = {
+	id: "design",
+	label: "Stage 6 — Design",
+	async run(state, ctx) {
+		const routing = await ctx.helper({ name: "route-designer", sources: { "classify-task": state.classify } });
+		const designerAgent = (routing.value.designerAgent as string) ?? null;
+		if (!designerAgent) {
+			ctx.log(`Design skipped: ${routing.value.reason as string}`);
+			return null;
+		}
+		if (!ctx.budget.check()) {
+			ctx.log("Design: budget exhausted");
+			return null;
+		}
+		const setup = state.setup!;
+		const result = await ctx.agent({
+			id: "pipeline.design",
+			agent: designerAgent,
+			prompt: buildDesignPrompt(setup, state.classify ?? null, ctx.task, state.requirements ?? null, state.research ?? null, state.assessment ?? null, designerAgent),
+		});
+		ctx.log(`Design complete (agent: ${designerAgent})`);
+		return result.control ?? null;
+	},
+};

package/src/stages/implementation.ts ADDED Viewed

@@ -0,0 +1,80 @@
+/**
+ * Stage 9 — Implementation (per-phase TDD).
+ * Self-contained task: iterates the spec's phased task list. For each phase,
+ * up to 3 attempts of TDD-write → implement → QA → build-gate; commits on green.
+ */
+import type { ControlObj, Stage } from "../types.ts";
+import { buildTddPrompt, buildImplementPrompt, buildQaPrompt, buildCommitPrompt, buildImplementationSummaryPrompt } from "../prompts.ts";
+import { normalizePhases } from "../doc-validators.ts";
+const MAX_ATTEMPTS = 3;
+const pad = (n: number) => String(n).padStart(2, "0");
+export const implementationStage: Stage = {
+	id: "implementation",
+	label: "Stage 9 — Implementation",
+	async run(state, ctx) {
+		// Defensively normalize: agents sometimes return `phases` as a string or
+		// object instead of an array, which crashed `phases.entries()` (Stage 9:
+		// "phases.entries is not a function"). Never trust the control shape.
+		const phases = normalizePhases(state.spec?.phases);
+		if (!Array.isArray(state.spec?.phases) && state.spec?.phases != null) {
+			ctx.log(`Implementation: spec.phases was ${typeof state.spec.phases}, expected an array — normalized to ${phases.length} phase(s)`);
+		}
+		if (phases.length === 0) {
+			ctx.log("Implementation: no phases defined in spec — skipping");
+			return { phasesCompleted: 0, totalPhases: 0, allGreen: false };
+		}
+		const setup = state.setup!;
+		let phasesCompleted = 0;
+		let allGreen = true;
+		const filesModified: string[] = [];
+		for (const [idx, phase] of phases.entries()) {
+			const phaseId = `phase-${pad(idx + 1)}`;
+			let green = false;
+			for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
+				if (!ctx.budget.check()) {
+					allGreen = false;
+					return { phasesCompleted, totalPhases: phases.length, allGreen, filesModified, summary: "Budget exhausted" };
+				}
+				await ctx.agent({ id: `pipeline.implementation.${phaseId}.tdd.a${attempt}`, agent: "tdd-guide", prompt: buildTddPrompt(setup, state.classify ?? null, phase, state.spec ?? null) });
+				const specialist = await ctx.helper({ name: "route-specialist", sources: { "classify-task": state.classify }, options: { phase } });
+				const impl = await ctx.agent({ id: `pipeline.implementation.${phaseId}.impl.a${attempt}`, agent: "implementer", prompt: buildImplementPrompt(setup, state.classify ?? null, phase, specialist.value, state.spec ?? null) });
+				for (const f of ((impl.control as { filesModified?: unknown } | null)?.filesModified as string[] | undefined) ?? []) {
+					if (!filesModified.includes(f)) filesModified.push(f);
+				}
+				const qa = await ctx.agent({ id: `pipeline.implementation.${phaseId}.qa.a${attempt}`, agent: "qa-agent", prompt: buildQaPrompt(setup, state.classify ?? null, phase) });
+				const qaControl: ControlObj = qa.control ?? {};
+				const gate = await ctx.helper({ name: "gate-build", sources: { "qa-check": qaControl } });
+				if (gate.value.pass) {
+					green = true;
+					ctx.log(`Implementation ${phaseId} GREEN on attempt ${attempt}`);
+					break;
+				}
+				ctx.log(`Implementation ${phaseId} attempt ${attempt}/${MAX_ATTEMPTS} FAIL: ${((gate.value.errors as string[]) ?? []).join(", ")}`);
+			}
+			if (!green) {
+				ctx.log(`Implementation ${phaseId} failed after ${MAX_ATTEMPTS} attempts — terminating early`);
+				allGreen = false;
+				break;
+			}
+			phasesCompleted++;
+			if (ctx.budget.check()) {
+				await ctx.agent({ id: `pipeline.implementation.${phaseId}.commit`, agent: "orchestrator", prompt: buildCommitPrompt(setup, phase.name) });
+			}
+		}
+		const control: ControlObj = {
+			phasesCompleted,
+			totalPhases: phases.length,
+			allGreen,
+			filesModified,
+			summary: allGreen ? `All ${phases.length} phases completed successfully` : `${phasesCompleted}/${phases.length} phases completed`,
+		};
+		if (ctx.budget.check()) {
+			await ctx.agent({ id: "pipeline.implementation.summary", agent: "orchestrator", prompt: buildImplementationSummaryPrompt(setup, state.classify ?? null, control) });
+		}
+		return control;
+	},
+};