npm - substrate-ai - Versions diffs - 0.2.11 → 0.2.14 - Mend

substrate-ai 0.2.11 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/cli/index.js +361 -108
package/dist/index.d.ts +2 -0
package/dist/{run-C8aOWnKG.js → run-B9IglY4m.js} +1 -1
package/dist/{run-L-R_XYNT.js → run-CoP8UQU3.js} +26 -7
package/package.json +1 -1

package/dist/cli/index.js CHANGED Viewed

@@ -2,7 +2,7 @@
 import { createLogger, deepMask } from "../logger-C6n1g8uP.js";
 import { AdapterRegistry, createEventBus } from "../event-bus-J-bw-pkp.js";
 import { CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, PartialSubstrateConfigSchema, SUPPORTED_CONFIG_FORMAT_VERSIONS, SubstrateConfigSchema, defaultConfigMigrator } from "../version-manager-impl-BpVx2DkY.js";
-import { DatabaseWrapper, SUBSTRATE_OWNED_SETTINGS_KEYS, VALID_PHASES, buildPipelineStatusOutput, createContextCompiler, createDispatcher, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getSubstrateDefaultSettings, parseDbTimestampAsUtc, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, runAnalysisPhase, runMigrations, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-L-R_XYNT.js";
+import { DatabaseWrapper, SUBSTRATE_OWNED_SETTINGS_KEYS, VALID_PHASES, buildPipelineStatusOutput, createContextCompiler, createDispatcher, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getSubstrateDefaultSettings, parseDbTimestampAsUtc, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, runAnalysisPhase, runMigrations, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-CoP8UQU3.js";
 import { ConfigError, ConfigIncompatibleFormatError } from "../errors-BPqtzQ4U.js";
 import { addTokenUsage, createDecision, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getTokenUsageSummary, listRequirements, updatePipelineRun } from "../decisions-DNYByk0U.js";
 import { aggregateTokenUsageForRun, compareRunMetrics, getBaselineRunMetrics, getRunMetrics, getStoryMetricsForRun, incrementRunRestarts, listRunMetrics, tagRunAsBaseline } from "../metrics-BSg8VIHd.js";
@@ -2414,16 +2414,24 @@ const DEFAULT_STALL_THRESHOLD_SECONDS = 600;
 *   - `node dist/cli/index.js run` (npm run substrate:dev)
 *   - `npx substrate run`
 *   - any node process whose command contains `run` with `--events` or `--stories`
+*
+* When `projectRoot` is provided, additionally checks that the command line
+* contains that path (via `--project-root` flag or as part of the binary/CWD path).
+* This ensures multi-project environments match the correct orchestrator.
 */
-function isOrchestratorProcessLine(line) {
+function isOrchestratorProcessLine(line, projectRoot) {
 	if (line.includes("grep")) return false;
-	if (line.includes("substrate run")) return true;
-	if (line.includes("substrate-ai run")) return true;
-	if (line.includes("index.js run")) return true;
-	if (line.includes("node") && /\srun(\s|$)/.test(line) && (line.includes("--events") || line.includes("--stories"))) return true;
-	return false;
-}
-function inspectProcessTree(execFileSyncOverride) {
+	let isOrchestrator = false;
+	if (line.includes("substrate run")) isOrchestrator = true;
+	else if (line.includes("substrate-ai run")) isOrchestrator = true;
+	else if (line.includes("index.js run")) isOrchestrator = true;
+	else if (line.includes("node") && /\srun(\s|$)/.test(line) && (line.includes("--events") || line.includes("--stories"))) isOrchestrator = true;
+	if (!isOrchestrator) return false;
+	if (projectRoot !== void 0) return line.includes(projectRoot);
+	return true;
+}
+function inspectProcessTree(opts) {
+	const { projectRoot, execFileSync: execFileSyncOverride } = opts ?? {};
 	const result = {
 		orchestrator_pid: null,
 		child_pids: [],
@@ -2443,7 +2451,7 @@ function inspectProcessTree(execFileSyncOverride) {
 			});
 		}
 		const lines = psOutput.split("\n");
-		for (const line of lines) if (isOrchestratorProcessLine(line)) {
+		for (const line of lines) if (isOrchestratorProcessLine(line, projectRoot)) {
 			const match = line.trim().match(/^(\d+)/);
 			if (match) {
 				result.orchestrator_pid = parseInt(match[1], 10);
@@ -2466,6 +2474,58 @@ function inspectProcessTree(execFileSyncOverride) {
 	return result;
 }
 /**
+* Collect all descendant PIDs of the given root PIDs by walking the process
+* tree recursively. This ensures that grandchildren of the orchestrator
+* (e.g. node subprocesses spawned by `claude -p`) are also killed during
+* stall recovery, leaving no orphan processes.
+*
+* Returns only the descendants — the root PIDs themselves are NOT included.
+*/
+function getAllDescendantPids(rootPids, execFileSyncOverride) {
+	if (rootPids.length === 0) return [];
+	try {
+		let psOutput;
+		if (execFileSyncOverride !== void 0) psOutput = execFileSyncOverride("ps", ["-eo", "pid,ppid"], {
+			encoding: "utf-8",
+			timeout: 5e3
+		});
+		else {
+			const { execFileSync } = __require("node:child_process");
+			psOutput = execFileSync("ps", ["-eo", "pid,ppid"], {
+				encoding: "utf-8",
+				timeout: 5e3
+			});
+		}
+		const childrenOf = new Map();
+		for (const line of psOutput.split("\n")) {
+			const parts = line.trim().split(/\s+/);
+			if (parts.length >= 2) {
+				const pid = parseInt(parts[0], 10);
+				const ppid = parseInt(parts[1], 10);
+				if (!isNaN(pid) && !isNaN(ppid) && pid > 0) {
+					if (!childrenOf.has(ppid)) childrenOf.set(ppid, []);
+					childrenOf.get(ppid).push(pid);
+				}
+			}
+		}
+		const descendants = [];
+		const seen = new Set(rootPids);
+		const queue = [...rootPids];
+		while (queue.length > 0) {
+			const current = queue.shift();
+			const children = childrenOf.get(current) ?? [];
+			for (const child of children) if (!seen.has(child)) {
+				seen.add(child);
+				descendants.push(child);
+				queue.push(child);
+			}
+		}
+		return descendants;
+	} catch {
+		return [];
+	}
+}
+/**
 * Fetch pipeline health data as a structured object without any stdout side-effects.
 * Used by runSupervisorAction to poll health without formatting overhead.
 *
@@ -2524,10 +2584,11 @@ async function getAutoHealthData(options) {
 				}
 			}
 		} catch {}
-		const processInfo = inspectProcessTree();
+		const processInfo = inspectProcessTree({ projectRoot });
 		let verdict = "NO_PIPELINE_RUNNING";
 		if (run.status === "running") if (processInfo.orchestrator_pid === null && active === 0 && completed > 0) verdict = "NO_PIPELINE_RUNNING";
 		else if (processInfo.zombies.length > 0) verdict = "STALLED";
+		else if (processInfo.orchestrator_pid !== null && processInfo.child_pids.length > 0 && stalenessSeconds > DEFAULT_STALL_THRESHOLD_SECONDS) verdict = "HEALTHY";
 		else if (stalenessSeconds > DEFAULT_STALL_THRESHOLD_SECONDS) verdict = "STALLED";
 		else if (processInfo.orchestrator_pid !== null && processInfo.child_pids.length === 0 && active > 0) verdict = "STALLED";
 		else verdict = "HEALTHY";
@@ -2660,6 +2721,7 @@ function defaultSupervisorDeps() {
 				};
 			}
 		},
+		getAllDescendants: (rootPids) => getAllDescendantPids(rootPids),
 		runAnalysis: async (runId, projectRoot) => {
 			const dbPath = join(projectRoot, ".substrate", "substrate.db");
 			if (!existsSync(dbPath)) return;
@@ -2688,6 +2750,140 @@ function defaultSupervisorDeps() {
 		}
 	};
 }
+/** Build the supervisor:poll event payload. */
+function buildPollEvent(health, projectRoot, tokenSnapshot, extraFields) {
+	const proc = health.process ?? {
+		orchestrator_pid: null,
+		child_pids: [],
+		zombies: []
+	};
+	return {
+		type: "supervisor:poll",
+		run_id: health.run_id,
+		verdict: health.verdict,
+		staleness_seconds: health.staleness_seconds,
+		stories: {
+			active: health.stories.active,
+			completed: health.stories.completed,
+			escalated: health.stories.escalated
+		},
+		story_details: health.stories.details,
+		tokens: tokenSnapshot,
+		process: {
+			orchestrator_pid: proc.orchestrator_pid,
+			child_count: proc.child_pids.length,
+			zombie_count: proc.zombies.length
+		},
+		...extraFields
+	};
+}
+/** Extract succeeded / failed / escalated story keys from health details. */
+function buildTerminalSummary(storyDetails) {
+	const succeeded = [];
+	const failed = [];
+	const escalated = [];
+	for (const [k, s] of Object.entries(storyDetails)) if (s.phase === "COMPLETE") succeeded.push(k);
+	else if (s.phase === "ESCALATED") escalated.push(k);
+	else if (s.phase !== "PENDING") failed.push(k);
+	return {
+		succeeded,
+		failed,
+		escalated
+	};
+}
+/**
+* Handle stall recovery for a single project: kill stalled processes, restart pipeline.
+*
+* Returns null if no stall detected (staleness below threshold).
+* Returns updated state + maxRestartsExceeded flag otherwise.
+*/
+async function handleStallRecovery(health, state, config, deps, io) {
+	const { stallThreshold, maxRestarts, pack, outputFormat } = config;
+	const { killPid, resumePipeline, sleep, incrementRestarts, getAllDescendants } = deps;
+	const { emitEvent, log } = io;
+	const { projectRoot } = state;
+	if (health.staleness_seconds < stallThreshold) return null;
+	const directPids = [...health.process.orchestrator_pid !== null ? [health.process.orchestrator_pid] : [], ...health.process.child_pids];
+	const descendantPids = getAllDescendants(directPids);
+	const directPidSet = new Set(directPids);
+	const pids = [...directPids, ...descendantPids.filter((p) => !directPidSet.has(p))];
+	emitEvent({
+		type: "supervisor:kill",
+		run_id: health.run_id,
+		reason: "stall",
+		staleness_seconds: health.staleness_seconds,
+		pids
+	});
+	log(`Supervisor: Stall confirmed (${health.staleness_seconds}s ≥ ${stallThreshold}s threshold). Killing PIDs: ${pids.join(", ") || "none"}`);
+	for (const pid of pids) try {
+		killPid(pid, "SIGTERM");
+	} catch {}
+	await sleep(5e3);
+	for (const pid of pids) try {
+		killPid(pid, "SIGKILL");
+	} catch {}
+	if (pids.length > 0) {
+		let allDead = false;
+		for (let attempt = 0; attempt < 5; attempt++) {
+			await sleep(1e3);
+			allDead = pids.every((pid) => {
+				try {
+					process.kill(pid, 0);
+					return false;
+				} catch {
+					return true;
+				}
+			});
+			if (allDead) break;
+		}
+		if (!allDead) log(`Supervisor: Warning: Some PIDs may still be alive after SIGKILL`);
+	}
+	if (state.restartCount >= maxRestarts) {
+		emitEvent({
+			type: "supervisor:abort",
+			run_id: health.run_id,
+			reason: "max_restarts_exceeded",
+			attempts: state.restartCount
+		});
+		log(`Supervisor: Max restarts (${maxRestarts}) exceeded. Aborting.`);
+		return {
+			state,
+			maxRestartsExceeded: true
+		};
+	}
+	const newRestartCount = state.restartCount + 1;
+	if (health.run_id !== null) incrementRestarts(health.run_id, projectRoot);
+	emitEvent({
+		type: "supervisor:restart",
+		run_id: health.run_id,
+		attempt: newRestartCount
+	});
+	log(`Supervisor: Restarting pipeline (attempt ${newRestartCount}/${maxRestarts})`);
+	try {
+		await resumePipeline({
+			runId: health.run_id ?? void 0,
+			outputFormat,
+			projectRoot,
+			concurrency: 3,
+			pack
+		});
+	} catch (err) {
+		const message = err instanceof Error ? err.message : String(err);
+		log(`Supervisor: Resume error: ${message}`);
+		emitEvent({
+			type: "supervisor:error",
+			reason: "resume_failed",
+			message
+		});
+	}
+	return {
+		state: {
+			...state,
+			restartCount: newRestartCount
+		},
+		maxRestartsExceeded: false
+	};
+}
 /**
 * Run the pipeline supervisor — a long-running watchdog that polls pipeline health
 * and automatically kills and restarts stalled pipelines.
@@ -2701,11 +2897,16 @@ function defaultSupervisorDeps() {
 */
 async function runSupervisorAction(options, deps = {}) {
 	const { pollInterval, stallThreshold, maxRestarts, outputFormat, projectRoot, runId, pack, experiment, maxExperiments } = options;
-	const { getHealth, killPid, resumePipeline, sleep, incrementRestarts, runAnalysis, getTokenSnapshot } = {
+	const resolvedDeps = {
 		...defaultSupervisorDeps(),
 		...deps
 	};
-	let restartCount = 0;
+	const { getHealth, sleep, runAnalysis, getTokenSnapshot } = resolvedDeps;
+	let state = {
+		projectRoot,
+		runId,
+		restartCount: 0
+	};
 	const startTime = Date.now();
 	function emitEvent(event) {
 		if (outputFormat === "json") {
@@ -2731,46 +2932,20 @@ async function runSupervisorAction(options, deps = {}) {
 				output: 0,
 				cost_usd: 0
 			};
-			const proc = health.process ?? {
-				orchestrator_pid: null,
-				child_pids: [],
-				zombies: []
-			};
-			emitEvent({
-				type: "supervisor:poll",
-				run_id: health.run_id,
-				verdict: health.verdict,
-				staleness_seconds: health.staleness_seconds,
-				stories: {
-					active: health.stories.active,
-					completed: health.stories.completed,
-					escalated: health.stories.escalated
-				},
-				story_details: health.stories.details,
-				tokens: tokenSnapshot,
-				process: {
-					orchestrator_pid: proc.orchestrator_pid,
-					child_count: proc.child_pids.length,
-					zombie_count: proc.zombies.length
-				}
-			});
+			emitEvent(buildPollEvent(health, projectRoot, tokenSnapshot));
 		}
 		log(`[${ts}] Health: ${health.verdict} | staleness=${health.staleness_seconds}s | stories: active=${health.stories.active} completed=${health.stories.completed} escalated=${health.stories.escalated}`);
 		if (health.verdict === "NO_PIPELINE_RUNNING") {
 			const elapsedSeconds = Math.round((Date.now() - startTime) / 1e3);
-			const succeeded = Object.entries(health.stories.details).filter(([, s]) => s.phase === "COMPLETE").map(([k]) => k);
-			const failed = Object.entries(health.stories.details).filter(([, s]) => s.phase !== "COMPLETE" && s.phase !== "PENDING" && s.phase !== "ESCALATED").map(([k]) => k);
-			const escalated = Object.entries(health.stories.details).filter(([, s]) => s.phase === "ESCALATED").map(([k]) => k);
+			const summary = buildTerminalSummary(health.stories.details);
 			emitEvent({
 				type: "supervisor:summary",
 				run_id: health.run_id,
 				elapsed_seconds: elapsedSeconds,
-				succeeded,
-				failed,
-				escalated,
-				restarts: restartCount
+				...summary,
+				restarts: state.restartCount
 			});
-			log(`\nPipeline reached terminal state. Elapsed: ${elapsedSeconds}s | succeeded: ${succeeded.length} | failed: ${failed.length} | restarts: ${restartCount}`);
+			log(`\nPipeline reached terminal state. Elapsed: ${elapsedSeconds}s | succeeded: ${summary.succeeded.length} | failed: ${summary.failed.length} | restarts: ${state.restartCount}`);
 			if (health.run_id !== null && runAnalysis !== void 0) {
 				log(`[supervisor] Running post-run analysis for ${health.run_id}...`);
 				try {
@@ -2833,7 +3008,7 @@ async function runSupervisorAction(options, deps = {}) {
 								const expDb = expDbWrapper.db;
 								const { runRunAction: runPipeline } = await import(
 									/* @vite-ignore */
-									"../run-C8aOWnKG.js"
+									"../run-B9IglY4m.js"
 );
 								const runStoryFn = async (opts) => {
 									const exitCode = await runPipeline({
@@ -2896,84 +3071,162 @@ async function runSupervisorAction(options, deps = {}) {
 					});
 				}
 			}
-			return failed.length > 0 || escalated.length > 0 ? 1 : 0;
+			return summary.failed.length > 0 || summary.escalated.length > 0 ? 1 : 0;
 		}
-		if (health.staleness_seconds >= stallThreshold) {
-			const pids = [...health.process.orchestrator_pid !== null ? [health.process.orchestrator_pid] : [], ...health.process.child_pids];
-			emitEvent({
-				type: "supervisor:kill",
-				run_id: health.run_id,
-				reason: "stall",
-				staleness_seconds: health.staleness_seconds,
-				pids
-			});
-			log(`Supervisor: Stall confirmed (${health.staleness_seconds}s ≥ ${stallThreshold}s threshold). Killing PIDs: ${pids.join(", ") || "none"}`);
-			for (const pid of pids) try {
-				killPid(pid, "SIGTERM");
-			} catch {}
-			await sleep(5e3);
-			for (const pid of pids) try {
-				killPid(pid, "SIGKILL");
-			} catch {}
-			if (pids.length > 0) {
-				let allDead = false;
-				for (let attempt = 0; attempt < 5; attempt++) {
-					await sleep(1e3);
-					allDead = pids.every((pid) => {
-						try {
-							process.kill(pid, 0);
-							return false;
-						} catch {
-							return true;
-						}
-					});
-					if (allDead) break;
-				}
-				if (!allDead) log(`Supervisor: Warning: Some PIDs may still be alive after SIGKILL`);
+		const stallResult = await handleStallRecovery(health, state, {
+			stallThreshold,
+			maxRestarts,
+			pack,
+			outputFormat
+		}, resolvedDeps, {
+			emitEvent,
+			log
+		});
+		if (stallResult !== null) {
+			if (stallResult.maxRestartsExceeded) return 2;
+			state = stallResult.state;
+		}
+		await sleep(pollInterval * 1e3);
+	}
+}
+/**
+* Run the supervisor across multiple projects simultaneously.
+* Polls each project sequentially within each cycle, tagging events with `project`.
+*
+* Exit codes:
+*   0 — all projects completed without failures
+*   1 — at least one project completed with failures or escalations
+*   2 — at least one project hit max restarts
+*/
+async function runMultiProjectSupervisor(options, deps = {}) {
+	const { projects, pollInterval, stallThreshold, maxRestarts, outputFormat, pack } = options;
+	const resolvedDeps = {
+		...defaultSupervisorDeps(),
+		...deps
+	};
+	const { getHealth, sleep, getTokenSnapshot } = resolvedDeps;
+	if (projects.length === 0) {
+		process.stderr.write("Error: --projects requires at least one project path\n");
+		return 1;
+	}
+	const states = new Map(projects.map((p) => [p, {
+		projectRoot: p,
+		restartCount: 0
+	}]));
+	const doneProjects = new Set();
+	const projectExitCodes = new Map();
+	const startTime = Date.now();
+	function emitEvent(event) {
+		if (outputFormat === "json") {
+			const stamped = {
+				...event,
+				ts: new Date().toISOString()
+			};
+			process.stdout.write(JSON.stringify(stamped) + "\n");
+		}
+	}
+	function log(message) {
+		if (outputFormat === "human") process.stdout.write(message + "\n");
+	}
+	while (true) {
+		for (const projectRoot of projects) {
+			if (doneProjects.has(projectRoot)) continue;
+			let health;
+			try {
+				health = await getHealth({ projectRoot });
+			} catch {
+				log(`[supervisor] ${projectRoot}: health check failed — marking as done`);
+				emitEvent({
+					type: "supervisor:error",
+					project: projectRoot,
+					reason: "health_check_failed"
+				});
+				doneProjects.add(projectRoot);
+				projectExitCodes.set(projectRoot, 1);
+				continue;
+			}
+			const state = states.get(projectRoot);
+			if (outputFormat === "json") {
+				const tokenSnapshot = health.run_id !== null ? getTokenSnapshot(health.run_id, projectRoot) : {
+					input: 0,
+					output: 0,
+					cost_usd: 0
+				};
+				emitEvent(buildPollEvent(health, projectRoot, tokenSnapshot, { project: projectRoot }));
 			}
-			if (restartCount >= maxRestarts) {
+			log(`[${projectRoot}] Health: ${health.verdict} | staleness=${health.staleness_seconds}s | active=${health.stories.active} completed=${health.stories.completed} escalated=${health.stories.escalated}`);
+			if (health.verdict === "NO_PIPELINE_RUNNING") {
+				const elapsedSeconds = Math.round((Date.now() - startTime) / 1e3);
+				const summary = buildTerminalSummary(health.stories.details);
 				emitEvent({
-					type: "supervisor:abort",
+					type: "supervisor:summary",
+					project: projectRoot,
 					run_id: health.run_id,
-					reason: "max_restarts_exceeded",
-					attempts: restartCount
+					elapsed_seconds: elapsedSeconds,
+					...summary,
+					restarts: state.restartCount
 				});
-				log(`Supervisor: Max restarts (${maxRestarts}) exceeded. Aborting.`);
-				return 2;
+				log(`[${projectRoot}] Terminal. succeeded=${summary.succeeded.length} failed=${summary.failed.length} restarts=${state.restartCount}`);
+				doneProjects.add(projectRoot);
+				projectExitCodes.set(projectRoot, summary.failed.length > 0 || summary.escalated.length > 0 ? 1 : 0);
+				continue;
 			}
-			restartCount++;
-			if (health.run_id !== null) incrementRestarts(health.run_id, projectRoot);
+			const stallResult = await handleStallRecovery(health, state, {
+				stallThreshold,
+				maxRestarts,
+				pack,
+				outputFormat
+			}, resolvedDeps, {
+				emitEvent: (evt) => emitEvent({
+					...evt,
+					project: projectRoot
+				}),
+				log: (msg) => log(`[${projectRoot}] ${msg}`)
+			});
+			if (stallResult !== null) if (stallResult.maxRestartsExceeded) {
+				doneProjects.add(projectRoot);
+				projectExitCodes.set(projectRoot, 2);
+			} else states.set(projectRoot, stallResult.state);
+		}
+		if (doneProjects.size >= projects.length) {
+			const elapsedSeconds = Math.round((Date.now() - startTime) / 1e3);
 			emitEvent({
-				type: "supervisor:restart",
-				run_id: health.run_id,
-				attempt: restartCount
+				type: "supervisor:done",
+				elapsed_seconds: elapsedSeconds,
+				project_results: Object.fromEntries(projectExitCodes)
 			});
-			log(`Supervisor: Restarting pipeline (attempt ${restartCount}/${maxRestarts})`);
-			try {
-				await resumePipeline({
-					runId: health.run_id ?? void 0,
-					outputFormat,
-					projectRoot,
-					concurrency: 3,
-					pack
-				});
-			} catch (err) {
-				const message = err instanceof Error ? err.message : String(err);
-				log(`Supervisor: Resume error: ${message}`);
-				if (outputFormat === "json") emitEvent({
-					type: "supervisor:error",
-					reason: "resume_failed",
-					message
-				});
-			}
+			log(`\nAll projects reached terminal state. Elapsed: ${elapsedSeconds}s`);
+			const exitCodes = [...projectExitCodes.values()];
+			if (exitCodes.includes(2)) return 2;
+			if (exitCodes.includes(1)) return 1;
+			return 0;
 		}
 		await sleep(pollInterval * 1e3);
 	}
 }
 function registerSupervisorCommand(program, _version = "0.0.0", projectRoot = process.cwd()) {
-	program.command("supervisor").description("Monitor a pipeline run and automatically recover from stalls").option("--poll-interval <seconds>", "Health poll interval in seconds", (v) => parseInt(v, 10), 60).option("--stall-threshold <seconds>", "Staleness in seconds before killing a stalled pipeline", (v) => parseInt(v, 10), 600).option("--max-restarts <n>", "Maximum automatic restarts before aborting", (v) => parseInt(v, 10), 3).option("--run-id <id>", "Pipeline run ID to monitor (defaults to latest)").option("--pack <name>", "Methodology pack name", "bmad").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--experiment", "After post-run analysis, enter experiment mode: create branches, apply modifications, run single-story experiments, and report verdicts (Story 17-4)", false).option("--max-experiments <n>", "Maximum number of experiments to run per analysis cycle (default: 2, Story 17-4 AC6)", (v) => parseInt(v, 10), 2).action(async (opts) => {
+	program.command("supervisor").description("Monitor a pipeline run and automatically recover from stalls").option("--poll-interval <seconds>", "Health poll interval in seconds", (v) => parseInt(v, 10), 60).option("--stall-threshold <seconds>", "Staleness in seconds before killing a stalled pipeline", (v) => parseInt(v, 10), 600).option("--max-restarts <n>", "Maximum automatic restarts before aborting", (v) => parseInt(v, 10), 3).option("--run-id <id>", "Pipeline run ID to monitor (defaults to latest)").option("--pack <name>", "Methodology pack name", "bmad").option("--project-root <path>", "Project root directory", projectRoot).option("--projects <paths>", "Comma-separated project root directories to monitor (multi-project mode)").option("--output-format <format>", "Output format: human (default) or json", "human").option("--experiment", "After post-run analysis, enter experiment mode: create branches, apply modifications, run single-story experiments, and report verdicts (Story 17-4)", false).option("--max-experiments <n>", "Maximum number of experiments to run per analysis cycle (default: 2, Story 17-4 AC6)", (v) => parseInt(v, 10), 2).action(async (opts) => {
 		const outputFormat = opts.outputFormat === "json" ? "json" : "human";
 		if (opts.stallThreshold < 120) console.warn(`Warning: --stall-threshold ${opts.stallThreshold}s is below 120s. Agent steps typically take 45-90s. This may cause false stall detections and wasted restarts.`);
+		if (opts.projects) {
+			if (opts.runId) {
+				console.error("Error: --run-id cannot be used with --projects (ambiguous)");
+				process.exitCode = 1;
+				return;
+			}
+			if (opts.experiment) console.warn("Warning: --experiment is not supported in multi-project mode — ignored.");
+			const projects = opts.projects.split(",").map((p) => resolve(p.trim()));
+			const exitCode$1 = await runMultiProjectSupervisor({
+				projects,
+				pollInterval: opts.pollInterval,
+				stallThreshold: opts.stallThreshold,
+				maxRestarts: opts.maxRestarts,
+				outputFormat,
+				pack: opts.pack
+			});
+			process.exitCode = exitCode$1;
+			return;
+		}
 		const exitCode = await runSupervisorAction({
 			pollInterval: opts.pollInterval,
 			stallThreshold: opts.stallThreshold,

package/dist/index.d.ts CHANGED Viewed

@@ -1025,6 +1025,8 @@ interface OrchestratorEvents {
     storyKey: string;
     phase: string;
     elapsedMs: number;
+    /** PID of the stalled child process, or null if not tracked */
+    childPid: number | null;
   };
   /** Readiness check has completed — emitted for all verdicts (READY, NEEDS_WORK, NOT_READY) */
   'solutioning:readiness-check': {

package/dist/{run-C8aOWnKG.js → run-B9IglY4m.js} RENAMED Viewed

@@ -1,6 +1,6 @@
 import "./logger-C6n1g8uP.js";
 import "./event-bus-J-bw-pkp.js";
-import { registerRunCommand, runRunAction } from "./run-L-R_XYNT.js";
+import { registerRunCommand, runRunAction } from "./run-CoP8UQU3.js";
 import "./decisions-DNYByk0U.js";
 import "./metrics-BSg8VIHd.js";

package/dist/{run-L-R_XYNT.js → run-CoP8UQU3.js} RENAMED Viewed

@@ -1202,6 +1202,15 @@ function buildPipelineStatusOutput(run, tokenSummary, decisionsCount, storiesCou
 		totalOutput += row.total_output_tokens;
 		totalCost += row.total_cost_usd;
 	}
+	let activeDispatches = 0;
+	try {
+		if (run.token_usage_json) {
+			const state = JSON.parse(run.token_usage_json);
+			if (state.stories) {
+				for (const s of Object.values(state.stories)) if (s.phase !== "PENDING" && s.phase !== "COMPLETE" && s.phase !== "ESCALATED") activeDispatches++;
+			}
+		}
+	} catch {}
 	return {
 		run_id: run.id,
 		current_phase: currentPhase,
@@ -1214,7 +1223,9 @@ function buildPipelineStatusOutput(run, tokenSummary, decisionsCount, storiesCou
 		decisions_count: decisionsCount,
 		stories_count: storiesCount,
 		last_activity: run.updated_at,
-		staleness_seconds: Math.round((Date.now() - parseDbTimestampAsUtc(run.updated_at).getTime()) / 1e3)
+		staleness_seconds: Math.round((Date.now() - parseDbTimestampAsUtc(run.updated_at).getTime()) / 1e3),
+		last_event_ts: run.updated_at,
+		active_dispatches: activeDispatches
 	};
 }
 /**
@@ -5359,6 +5370,7 @@ function createImplementationOrchestrator(deps) {
 	let _heartbeatTimer = null;
 	const HEARTBEAT_INTERVAL_MS = 3e4;
 	const WATCHDOG_TIMEOUT_MS = 6e5;
+	const _stalledStories = new Set();
 	const _phaseStartMs = new Map();
 	const _phaseEndMs = new Map();
 	const _storyDispatches = new Map();
@@ -5454,6 +5466,7 @@ function createImplementationOrchestrator(deps) {
 	}
 	function recordProgress() {
 		_lastProgressTs = Date.now();
+		_stalledStories.clear();
 	}
 	function startHeartbeat() {
 		if (_heartbeatTimer !== null) return;
@@ -5465,7 +5478,8 @@ function createImplementationOrchestrator(deps) {
 			for (const s of _stories.values()) if (s.phase === "COMPLETE" || s.phase === "ESCALATED") completed++;
 			else if (s.phase === "PENDING") queued++;
 			else active++;
-			eventBus.emit("orchestrator:heartbeat", {
+			const timeSinceProgress = Date.now() - _lastProgressTs;
+			if (timeSinceProgress >= HEARTBEAT_INTERVAL_MS) eventBus.emit("orchestrator:heartbeat", {
 				runId: config.pipelineRunId ?? "",
 				activeDispatches: active,
 				completedDispatches: completed,
@@ -5474,6 +5488,8 @@ function createImplementationOrchestrator(deps) {
 			const elapsed = Date.now() - _lastProgressTs;
 			if (elapsed >= WATCHDOG_TIMEOUT_MS) {
 				for (const [key, s] of _stories) if (s.phase !== "PENDING" && s.phase !== "COMPLETE" && s.phase !== "ESCALATED") {
+					if (_stalledStories.has(key)) continue;
+					_stalledStories.add(key);
 					logger$16.warn({
 						storyKey: key,
 						phase: s.phase,
@@ -5483,7 +5499,8 @@ function createImplementationOrchestrator(deps) {
 						runId: config.pipelineRunId ?? "",
 						storyKey: key,
 						phase: s.phase,
-						elapsedMs: elapsed
+						elapsedMs: elapsed,
+						childPid: null
 					});
 				}
 			}
@@ -6244,7 +6261,7 @@ function createImplementationOrchestrator(deps) {
 		});
 		persistState();
 		recordProgress();
-		startHeartbeat();
+		if (config.enableHeartbeat) startHeartbeat();
 		if (projectRoot !== void 0) {
 			const seedResult = seedMethodologyContext(db, projectRoot);
 			if (seedResult.decisionsCreated > 0) logger$16.info({
@@ -10697,7 +10714,8 @@ async function runRunAction(options) {
 					run_id: payload.runId,
 					story_key: payload.storyKey,
 					phase: payload.phase,
-					elapsed_ms: payload.elapsedMs
+					elapsed_ms: payload.elapsedMs,
+					child_pid: payload.childPid
 				});
 			});
 		}
@@ -10710,7 +10728,8 @@ async function runRunAction(options) {
 			config: {
 				maxConcurrency: concurrency,
 				maxReviewCycles: 2,
-				pipelineRunId: pipelineRun.id
+				pipelineRunId: pipelineRun.id,
+				enableHeartbeat: eventsFlag === true
 			},
 			projectRoot
 		});
@@ -11153,4 +11172,4 @@ function registerRunCommand(program, _version = "0.0.0", projectRoot = process.c
 //#endregion
 export { DatabaseWrapper, SUBSTRATE_OWNED_SETTINGS_KEYS, VALID_PHASES, buildPipelineStatusOutput, createContextCompiler, createDispatcher, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getSubstrateDefaultSettings, parseDbTimestampAsUtc, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, runAnalysisPhase, runMigrations, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
-//# sourceMappingURL=run-L-R_XYNT.js.map
+//# sourceMappingURL=run-CoP8UQU3.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "substrate-ai",
-  "version": "0.2.11",
+  "version": "0.2.14",
   "description": "Substrate — multi-agent orchestration daemon for AI coding agents",
   "type": "module",
   "license": "MIT",