substrate-ai 0.19.28 → 0.19.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,13 @@ import { createLogger } from "./logger-KeHncl-f.js";
2
2
  import { DoltClient, DoltQueryError, createDatabaseAdapter$1 as createDatabaseAdapter, getLatestRun, getPipelineRunById, initSchema } from "./dist-R0W4ofKv.js";
3
3
  import { createRequire } from "module";
4
4
  import { dirname, join } from "path";
5
- import { existsSync, readFileSync } from "node:fs";
5
+ import { readFile } from "fs/promises";
6
+ import { EventEmitter } from "node:events";
7
+ import { existsSync, promises, readFileSync } from "node:fs";
6
8
  import { spawn, spawnSync } from "node:child_process";
7
9
  import { dirname as dirname$1, join as join$1, resolve as resolve$1 } from "node:path";
8
- import { readFile, writeFile } from "node:fs/promises";
10
+ import { z } from "zod";
11
+ import { mkdir as mkdir$1, open, readFile as readFile$1, unlink, writeFile as writeFile$1 } from "node:fs/promises";
9
12
  import { existsSync as existsSync$1 } from "fs";
10
13
  import { createRequire as createRequire$1 } from "node:module";
11
14
  import { fileURLToPath } from "node:url";
@@ -133,7 +136,7 @@ function parseDbTimestampAsUtc(ts) {
133
136
  return new Date(ts.replace(" ", "T") + "Z");
134
137
  }
135
138
  const __filename = fileURLToPath(import.meta.url);
136
- const __dirname = dirname(__filename);
139
+ const __dirname$1 = dirname(__filename);
137
140
  /**
138
141
  * Find the package root by walking up until we find package.json.
139
142
  * Works regardless of build output structure (tsdown bundles into
@@ -147,13 +150,13 @@ function findPackageRoot(startDir) {
147
150
  }
148
151
  return startDir;
149
152
  }
150
- const PACKAGE_ROOT = join(__dirname, "..", "..", "..");
153
+ const PACKAGE_ROOT = join(__dirname$1, "..", "..", "..");
151
154
  /**
152
155
  * Resolve the absolute path to the bmad-method package's src/ directory.
153
156
  * Uses createRequire so it works in ESM without import.meta.resolve polyfills.
154
157
  * Returns null if bmad-method is not installed.
155
158
  */
156
- function resolveBmadMethodSrcPath(fromDir = __dirname) {
159
+ function resolveBmadMethodSrcPath(fromDir = __dirname$1) {
157
160
  try {
158
161
  const require$1 = createRequire$1(join(fromDir, "synthetic.js"));
159
162
  const pkgJsonPath = require$1.resolve("bmad-method/package.json");
@@ -166,7 +169,7 @@ function resolveBmadMethodSrcPath(fromDir = __dirname) {
166
169
  * Read the version field from bmad-method's package.json.
167
170
  * Returns 'unknown' if not resolvable.
168
171
  */
169
- function resolveBmadMethodVersion(fromDir = __dirname) {
172
+ function resolveBmadMethodVersion(fromDir = __dirname$1) {
170
173
  try {
171
174
  const require$1 = createRequire$1(join(fromDir, "synthetic.js"));
172
175
  const pkgJsonPath = require$1.resolve("bmad-method/package.json");
@@ -782,7 +785,7 @@ var FileStateStore = class {
782
785
  if (inMemory !== void 0) return inMemory;
783
786
  if (this._basePath !== void 0) try {
784
787
  const filePath = join$1(this._basePath, "kv-metrics.json");
785
- const content = await readFile(filePath, "utf-8");
788
+ const content = await readFile$1(filePath, "utf-8");
786
789
  const parsed = JSON.parse(content);
787
790
  return parsed[runId]?.[key] ?? void 0;
788
791
  } catch {}
@@ -797,7 +800,7 @@ var FileStateStore = class {
797
800
  for (const [key, value] of runMap) serialized[runId][key] = value;
798
801
  }
799
802
  const filePath = join$1(this._basePath, "kv-metrics.json");
800
- await writeFile(filePath, JSON.stringify(serialized, null, 2), "utf-8");
803
+ await writeFile$1(filePath, JSON.stringify(serialized, null, 2), "utf-8");
801
804
  }
802
805
  async getContracts(storyKey) {
803
806
  return this._contracts.get(storyKey) ?? [];
@@ -820,7 +823,7 @@ var FileStateStore = class {
820
823
  const serialized = {};
821
824
  for (const [key, records] of this._contractVerifications) serialized[key] = records;
822
825
  const filePath = join$1(this._basePath, "contract-verifications.json");
823
- await writeFile(filePath, JSON.stringify(serialized, null, 2), "utf-8");
826
+ await writeFile$1(filePath, JSON.stringify(serialized, null, 2), "utf-8");
824
827
  }
825
828
  }
826
829
  async getContractVerification(storyKey) {
@@ -1461,7 +1464,7 @@ var DoltStateStore = class DoltStateStore {
1461
1464
 
1462
1465
  //#endregion
1463
1466
  //#region src/modules/state/index.ts
1464
- const logger$1 = createLogger("state:factory");
1467
+ const logger$2 = createLogger("state:factory");
1465
1468
  /**
1466
1469
  * Synchronously check whether Dolt is available and a Dolt repo exists at the
1467
1470
  * canonical state path under `basePath`.
@@ -1508,20 +1511,1154 @@ function createStateStore(config = {}) {
1508
1511
  const repoPath = config.basePath ?? process.cwd();
1509
1512
  const detection = detectDoltAvailableSync(repoPath);
1510
1513
  if (detection.available) {
1511
- logger$1.debug(`Dolt detected, using DoltStateStore (state path: ${join$1(repoPath, ".substrate", "state")})`);
1514
+ logger$2.debug(`Dolt detected, using DoltStateStore (state path: ${join$1(repoPath, ".substrate", "state")})`);
1512
1515
  const client = new DoltClient({ repoPath });
1513
1516
  return new DoltStateStore({
1514
1517
  repoPath,
1515
1518
  client
1516
1519
  });
1517
1520
  } else {
1518
- logger$1.debug(`Dolt not found, using FileStateStore (reason: ${detection.reason})`);
1521
+ logger$2.debug(`Dolt not found, using FileStateStore (reason: ${detection.reason})`);
1519
1522
  return new FileStateStore({ basePath: config.basePath });
1520
1523
  }
1521
1524
  }
1522
1525
  return new FileStateStore({ basePath: config.basePath });
1523
1526
  }
1524
1527
 
1528
+ //#endregion
1529
+ //#region packages/sdlc/dist/handlers/event-bridge.js
1530
+ /**
1531
+ * SDLC Event Bridge — translates factory graph executor lifecycle events into
1532
+ * SDLC orchestrator events for backward compatibility with existing consumers.
1533
+ *
1534
+ * Story 43-9: SDLC-as-Graph NDJSON Event Compatibility.
1535
+ *
1536
+ * ADR-003: This file MUST NOT import any runtime value from `@substrate-ai/factory`.
1537
+ * All coupling to the factory event bus shape is via local duck-typed interfaces.
1538
+ */
1539
+ /**
1540
+ * Maps factory graph node IDs to SDLC phase names.
1541
+ * Node IDs not present here are silently ignored by the bridge (AC5).
1542
+ *
1543
+ * | Graph Node ID | SDLC Phase Name |
1544
+ * |----------------|-----------------|
1545
+ * | analysis | 'analysis' |
1546
+ * | planning | 'planning' |
1547
+ * | solutioning | 'solutioning' |
1548
+ * | create_story | 'create' |
1549
+ * | dev_story | 'dev' |
1550
+ * | code_review | 'review' |
1551
+ * | start | (ignored) |
1552
+ * | exit | (ignored) |
1553
+ */
1554
+ const SDLC_NODE_PHASE_MAP = {
1555
+ analysis: "analysis",
1556
+ planning: "planning",
1557
+ solutioning: "solutioning",
1558
+ create_story: "create",
1559
+ dev_story: "dev",
1560
+ code_review: "review"
1561
+ };
1562
+ /**
1563
+ * Creates an event bridge that translates factory graph executor lifecycle events
1564
+ * into SDLC orchestrator events for backward compatibility with existing consumers
1565
+ * (supervisor, CLI polling, telemetry).
1566
+ *
1567
+ * Supported translations:
1568
+ * - `graph:node-started` → `orchestrator:story-phase-start` (AC1)
1569
+ * - `graph:node-completed` → `orchestrator:story-phase-complete` (AC2)
1570
+ * - `graph:node-retried` → (counter only — tracks review cycles)
1571
+ * - `graph:completed` (SUCCESS) → `orchestrator:story-complete` (AC3)
1572
+ * - `graph:goal-gate-unsatisfied` → `orchestrator:story-escalated` (AC4)
1573
+ * - Non-SDLC node IDs → silently ignored (AC5)
1574
+ *
1575
+ * @returns An object with a `teardown()` function that removes all registered
1576
+ * graph event listeners (AC7). Must be called after story execution
1577
+ * completes (use try/finally).
1578
+ */
1579
+ function createSdlcEventBridge(opts) {
1580
+ const { storyKey, pipelineRunId, sdlcBus, graphEvents } = opts;
1581
+ let devStoryRetries = 0;
1582
+ const onNodeStarted = (data) => {
1583
+ const { nodeId } = data;
1584
+ const phase = SDLC_NODE_PHASE_MAP[nodeId];
1585
+ if (!phase) return;
1586
+ sdlcBus.emit("orchestrator:story-phase-start", {
1587
+ storyKey,
1588
+ phase,
1589
+ pipelineRunId
1590
+ });
1591
+ };
1592
+ const onNodeCompleted = (data) => {
1593
+ const { nodeId, outcome } = data;
1594
+ const phase = SDLC_NODE_PHASE_MAP[nodeId];
1595
+ if (!phase) return;
1596
+ sdlcBus.emit("orchestrator:story-phase-complete", {
1597
+ storyKey,
1598
+ phase,
1599
+ result: outcome,
1600
+ pipelineRunId
1601
+ });
1602
+ };
1603
+ const onNodeRetried = (data) => {
1604
+ const { nodeId } = data;
1605
+ if (nodeId === "dev_story") devStoryRetries++;
1606
+ };
1607
+ const onGraphCompleted = (data) => {
1608
+ const { finalOutcome } = data;
1609
+ if (finalOutcome.status === "SUCCESS") sdlcBus.emit("orchestrator:story-complete", {
1610
+ storyKey,
1611
+ reviewCycles: devStoryRetries
1612
+ });
1613
+ };
1614
+ const onGoalGateUnsatisfied = (data) => {
1615
+ const evt = data;
1616
+ if (evt.nodeId === "dev_story") sdlcBus.emit("orchestrator:story-escalated", {
1617
+ storyKey,
1618
+ lastVerdict: evt.lastVerdict ?? "NEEDS_MAJOR_REWORK",
1619
+ reviewCycles: evt.reviewCycles ?? devStoryRetries,
1620
+ issues: evt.issues ?? []
1621
+ });
1622
+ };
1623
+ graphEvents.on("graph:node-started", onNodeStarted);
1624
+ graphEvents.on("graph:node-completed", onNodeCompleted);
1625
+ graphEvents.on("graph:node-retried", onNodeRetried);
1626
+ graphEvents.on("graph:completed", onGraphCompleted);
1627
+ graphEvents.on("graph:goal-gate-unsatisfied", onGoalGateUnsatisfied);
1628
+ return { teardown() {
1629
+ graphEvents.off("graph:node-started", onNodeStarted);
1630
+ graphEvents.off("graph:node-completed", onNodeCompleted);
1631
+ graphEvents.off("graph:node-retried", onNodeRetried);
1632
+ graphEvents.off("graph:completed", onGraphCompleted);
1633
+ graphEvents.off("graph:goal-gate-unsatisfied", onGoalGateUnsatisfied);
1634
+ } };
1635
+ }
1636
+
1637
+ //#endregion
1638
+ //#region packages/sdlc/dist/orchestrator/graph-orchestrator.js
1639
+ const __dirname = dirname$1(fileURLToPath(import.meta.url));
1640
+ /**
1641
+ * Returns the absolute path to the bundled SDLC pipeline DOT file.
1642
+ *
1643
+ * Resolution order:
1644
+ * 1. Relative to __dirname (works in source/unbundled: __dirname = packages/sdlc/src/orchestrator/)
1645
+ * 2. Via createRequire to locate @substrate-ai/sdlc package.json, then graphs/ relative to it
1646
+ * (works when bundled: __dirname points to dist/ but createRequire finds the real package)
1647
+ *
1648
+ * @throws {Error} if the DOT file cannot be found by any method.
1649
+ */
1650
+ function resolveGraphPath() {
1651
+ const candidates = [
1652
+ join$1(__dirname, "../../graphs/sdlc-pipeline.dot"),
1653
+ join$1(__dirname, "../graphs/sdlc-pipeline.dot"),
1654
+ join$1(__dirname, "graphs/sdlc-pipeline.dot")
1655
+ ];
1656
+ try {
1657
+ const require$1 = createRequire$1(import.meta.url);
1658
+ const sdlcPkgPath = require$1.resolve("@substrate-ai/sdlc/package.json");
1659
+ candidates.push(join$1(dirname$1(sdlcPkgPath), "graphs", "sdlc-pipeline.dot"));
1660
+ } catch {}
1661
+ for (const candidate of candidates) if (existsSync(candidate)) return candidate;
1662
+ throw new Error(`Cannot locate sdlc-pipeline.dot. Searched:\n${candidates.map((c) => ` ${c}`).join("\n")}`);
1663
+ }
1664
+ /** Thrown by `createGraphOrchestrator` when the supplied graph is structurally invalid. */
1665
+ var GraphOrchestratorInitError = class extends Error {
1666
+ constructor(message) {
1667
+ super(message);
1668
+ this.name = "GraphOrchestratorInitError";
1669
+ }
1670
+ };
1671
+ /**
1672
+ * Creates a `GraphOrchestrator` that runs one graph executor instance per
1673
+ * concurrent story slot, with conflict-group serialization and bounded concurrency.
1674
+ *
1675
+ * When `config.eventBus` is provided, a per-story SDLC event bridge is created
1676
+ * to translate factory `graph:*` events into `orchestrator:story-*` events (AC4,
1677
+ * story 43-10).
1678
+ *
1679
+ * @throws {GraphOrchestratorInitError} if `config.graph` is missing `nodes` or `edges`.
1680
+ */
1681
+ function createGraphOrchestrator(config) {
1682
+ if (!config.graph?.nodes || !config.graph?.edges) throw new GraphOrchestratorInitError("Invalid graph: missing nodes or edges arrays");
1683
+ const gcPauseMs = config.gcPauseMs ?? 2e3;
1684
+ async function runStoryGraph(storyKey, summary) {
1685
+ const epicId = storyKey.split("-")[0] ?? "";
1686
+ const initialContext = {
1687
+ storyKey,
1688
+ epicId,
1689
+ projectRoot: config.projectRoot,
1690
+ methodologyPack: config.methodologyPack,
1691
+ ...config.pipelineRunId !== void 0 ? {
1692
+ runId: config.pipelineRunId,
1693
+ pipelineRunId: config.pipelineRunId
1694
+ } : {}
1695
+ };
1696
+ const factoryBus = new EventEmitter();
1697
+ const bridge = config.eventBus != null ? createSdlcEventBridge({
1698
+ storyKey,
1699
+ ...config.pipelineRunId !== void 0 ? { pipelineRunId: config.pipelineRunId } : {},
1700
+ sdlcBus: config.eventBus,
1701
+ graphEvents: factoryBus
1702
+ }) : void 0;
1703
+ let escalated = false;
1704
+ factoryBus.on("graph:goal-gate-unsatisfied", () => {
1705
+ escalated = true;
1706
+ });
1707
+ let result;
1708
+ try {
1709
+ result = await config.executor.run(config.graph, {
1710
+ runId: `${config.runId}:${storyKey}`,
1711
+ logsRoot: config.logsRoot,
1712
+ handlerRegistry: config.handlerRegistry,
1713
+ initialContext,
1714
+ eventBus: factoryBus
1715
+ });
1716
+ } catch (err) {
1717
+ const errMsg = err instanceof Error ? err.message : String(err);
1718
+ process.stderr.write(`[graph-orchestrator] Story ${storyKey} failed: ${errMsg}\n`);
1719
+ summary.stories[storyKey] = {
1720
+ outcome: escalated ? "ESCALATED" : "FAILED",
1721
+ error: errMsg
1722
+ };
1723
+ summary.f++;
1724
+ } finally {
1725
+ bridge?.teardown();
1726
+ }
1727
+ if (result === void 0) return;
1728
+ if (result.status === "SUCCESS") {
1729
+ summary.stories[storyKey] = { outcome: "SUCCESS" };
1730
+ summary.s++;
1731
+ } else if (escalated) {
1732
+ summary.stories[storyKey] = { outcome: "ESCALATED" };
1733
+ summary.f++;
1734
+ } else {
1735
+ const reason = result.failureReason ?? `status=${result.status}`;
1736
+ process.stderr.write(`[graph-orchestrator] Story ${storyKey} failed: ${reason}\n`);
1737
+ summary.stories[storyKey] = {
1738
+ outcome: "FAILED",
1739
+ error: reason
1740
+ };
1741
+ summary.f++;
1742
+ }
1743
+ }
1744
+ async function runGroup(group, summary) {
1745
+ for (const storyKey of group) {
1746
+ await runStoryGraph(storyKey, summary);
1747
+ if (gcPauseMs > 0) await new Promise((r) => setTimeout(r, gcPauseMs));
1748
+ }
1749
+ }
1750
+ async function runBatch(groups, summary) {
1751
+ const queue = [...groups];
1752
+ const active = [];
1753
+ while (queue.length > 0 || active.length > 0) {
1754
+ while (active.length < config.maxConcurrency && queue.length > 0) {
1755
+ const group = queue.shift();
1756
+ const p = runGroup(group, summary).finally(() => {
1757
+ active.splice(active.indexOf(p), 1);
1758
+ });
1759
+ active.push(p);
1760
+ }
1761
+ if (active.length > 0) await Promise.race(active);
1762
+ }
1763
+ }
1764
+ return { async run(storyKeys) {
1765
+ const grouper = config.conflictGrouper ?? ((keys) => [keys.map((k) => [k])]);
1766
+ const batches = grouper(storyKeys);
1767
+ const summary = {
1768
+ s: 0,
1769
+ f: 0,
1770
+ stories: {}
1771
+ };
1772
+ for (const batchGroups of batches) await runBatch(batchGroups, summary);
1773
+ return {
1774
+ successCount: summary.s,
1775
+ failureCount: summary.f,
1776
+ totalStories: storyKeys.length,
1777
+ stories: summary.stories
1778
+ };
1779
+ } };
1780
+ }
1781
+ /**
1782
+ * Patches the loaded SDLC pipeline graph to reflect runtime configuration.
1783
+ *
1784
+ * Currently maps `maxReviewCycles` → `dev_story.maxRetries` (1:1 mapping).
1785
+ * Both values represent the number of *additional* retry attempts (not total).
1786
+ *
1787
+ * Must be called after parseGraph() and before any story graph instance runs.
1788
+ *
1789
+ * @param graph A factory Graph (Map-based nodes) duck-typed as PatchableGraph.
1790
+ * @param options Runtime configuration to apply.
1791
+ * @throws {Error} if the graph does not contain a `dev_story` node.
1792
+ */
1793
+ function applyConfigToGraph(graph, options) {
1794
+ const devStoryNode = graph.nodes.get("dev_story");
1795
+ if (!devStoryNode) throw new Error("applyConfigToGraph: graph does not contain a 'dev_story' node");
1796
+ devStoryNode.maxRetries = options.maxReviewCycles;
1797
+ graph.defaultMaxRetries = options.maxReviewCycles;
1798
+ }
1799
+
1800
+ //#endregion
1801
+ //#region packages/sdlc/dist/run-model/cli-flags.js
1802
+ /**
1803
+ * Zod schema for the CLI flags persisted in the run manifest.
1804
+ *
1805
+ * All fields are optional — only flags explicitly provided on the CLI are written.
1806
+ * `halt_on` defaults to `'none'` at write time; `cost_ceiling` is omitted when not provided.
1807
+ */
1808
+ const CliFlagsSchema = z.object({
1809
+ stories: z.array(z.string()).optional(),
1810
+ halt_on: z.enum([
1811
+ "all",
1812
+ "critical",
1813
+ "none"
1814
+ ]).optional(),
1815
+ cost_ceiling: z.number().positive().optional(),
1816
+ agent: z.string().optional(),
1817
+ skip_verification: z.boolean().optional(),
1818
+ events: z.boolean().optional()
1819
+ });
1820
+
1821
+ //#endregion
1822
+ //#region packages/sdlc/dist/run-model/verification-result.js
1823
+ /**
1824
+ * Schema for a single per-check verification result stored in the manifest.
1825
+ *
1826
+ * Mirrors VerificationCheckResult from packages/sdlc/src/verification/types.ts
1827
+ * without importing from that module (avoids circular dependency).
1828
+ */
1829
+ const StoredVerificationCheckResultSchema = z.object({
1830
+ checkName: z.string(),
1831
+ status: z.enum([
1832
+ "pass",
1833
+ "warn",
1834
+ "fail"
1835
+ ]),
1836
+ details: z.string(),
1837
+ duration_ms: z.number().nonnegative()
1838
+ });
1839
+ /**
1840
+ * Schema for the aggregated verification pipeline summary stored in the manifest.
1841
+ *
1842
+ * Mirrors VerificationSummary from packages/sdlc/src/verification/types.ts
1843
+ * without importing from that module (avoids circular dependency).
1844
+ */
1845
+ const StoredVerificationSummarySchema = z.object({
1846
+ storyKey: z.string(),
1847
+ checks: z.array(StoredVerificationCheckResultSchema),
1848
+ status: z.enum([
1849
+ "pass",
1850
+ "warn",
1851
+ "fail"
1852
+ ]),
1853
+ duration_ms: z.number().nonnegative()
1854
+ });
1855
+
1856
+ //#endregion
1857
+ //#region packages/sdlc/dist/run-model/per-story-state.js
1858
+ /**
1859
+ * High-level consumer-facing status for a story in the run manifest.
1860
+ *
1861
+ * Known literals cover all states defined in Epic 52–54. The trailing
1862
+ * `z.string()` fallback (MUST remain last) accommodates states added by
1863
+ * later stories (`gated` from 53-9, `skipped` from 53-3, `recovered` from 54-1)
1864
+ * and any future extensions without breaking deserialization.
1865
+ */
1866
+ const PerStoryStatusSchema = z.union([
1867
+ z.literal("pending"),
1868
+ z.literal("dispatched"),
1869
+ z.literal("in-review"),
1870
+ z.literal("complete"),
1871
+ z.literal("failed"),
1872
+ z.literal("escalated"),
1873
+ z.literal("recovered"),
1874
+ z.literal("verification-failed"),
1875
+ z.literal("gated"),
1876
+ z.literal("skipped"),
1877
+ z.string()
1878
+ ]);
1879
+ /**
1880
+ * Schema for a single per-story state entry in the run manifest.
1881
+ *
1882
+ * Field semantics:
1883
+ * - `status`: High-level consumer-facing status (state-machine value). Use this
1884
+ * for state-machine decisions and display.
1885
+ * - `phase`: Raw orchestrator `StoryPhase` string (e.g., `'IN_DEV'`, `'IN_REVIEW'`).
1886
+ * Informational only — do NOT compare this field in state-machine logic.
1887
+ * - `started_at`: ISO-8601 timestamp when the story entered an active phase.
1888
+ * - `completed_at`: ISO-8601 timestamp when the story reached a terminal state.
1889
+ * - `verification_result`: Verification pipeline result (populated by story 52-7).
1890
+ * - `cost_usd`: Accumulated cost in USD (populated at terminal transition).
1891
+ */
1892
+ const PerStoryStateSchema = z.object({
1893
+ status: PerStoryStatusSchema,
1894
+ phase: z.string(),
1895
+ started_at: z.string(),
1896
+ completed_at: z.string().optional(),
1897
+ verification_result: StoredVerificationSummarySchema.optional(),
1898
+ cost_usd: z.number().nonnegative().optional(),
1899
+ review_cycles: z.number().int().nonnegative().optional(),
1900
+ dispatches: z.number().int().nonnegative().optional()
1901
+ });
1902
+
1903
+ //#endregion
1904
+ //#region packages/sdlc/dist/run-model/recovery-history.js
1905
+ /**
1906
+ * Outcome of a recovery attempt.
1907
+ *
1908
+ * The string fallback must be last in the union so Zod evaluates the literal
1909
+ * variants first — a leading z.string() would swallow all literals. This
1910
+ * follows the v0.19.6 extensible union pattern used throughout the codebase.
1911
+ */
1912
+ const RecoveryOutcomeSchema = z.union([
1913
+ z.literal("retried"),
1914
+ z.literal("escalated"),
1915
+ z.literal("skipped"),
1916
+ z.string()
1917
+ ]);
1918
+ /**
1919
+ * A single recovery attempt recorded in the run manifest.
1920
+ *
1921
+ * `attempt_number` is 1-indexed: 1 = first retry, NOT the initial dispatch.
1922
+ * The initial dispatch of a story is never recorded as a RecoveryEntry.
1923
+ *
1924
+ * `strategy` is free-form (e.g., `'retry-with-context'`, `'re-scope'`).
1925
+ *
1926
+ * `cost_usd` is the cost of THIS single retry attempt only — NOT cumulative.
1927
+ * Cumulative per-story retry cost is tracked in `CostAccumulation.per_story`.
1928
+ */
1929
+ const RecoveryEntrySchema = z.object({
1930
+ story_key: z.string(),
1931
+ attempt_number: z.number().int().nonnegative(),
1932
+ strategy: z.string(),
1933
+ root_cause: z.string(),
1934
+ outcome: RecoveryOutcomeSchema,
1935
+ cost_usd: z.number().nonnegative(),
1936
+ timestamp: z.string()
1937
+ });
1938
+ /**
1939
+ * Accumulated retry cost data for a pipeline run.
1940
+ *
1941
+ * `per_story` maps story_key → sum of all RecoveryEntry.cost_usd for that
1942
+ * story. It does NOT include the initial dispatch cost, which is tracked in
1943
+ * `PerStoryState.cost_usd`.
1944
+ *
1945
+ * `run_total` is the sum of all RecoveryEntry.cost_usd values in the run
1946
+ * (i.e., total retry cost only, not total run cost).
1947
+ *
1948
+ * An empty `{ per_story: {}, run_total: 0 }` is the valid initial value.
1949
+ */
1950
+ const CostAccumulationSchema = z.object({
1951
+ per_story: z.record(z.string(), z.number().nonnegative()),
1952
+ run_total: z.number().nonnegative()
1953
+ });
1954
+
1955
+ //#endregion
1956
+ //#region packages/sdlc/dist/run-model/schemas.js
1957
+ /**
1958
+ * Schema for a pending supervisor proposal.
1959
+ * Uses z.union for extensible type field (follows v0.19.6 ReadinessFindingCategory pattern).
1960
+ */
1961
+ const ProposalSchema = z.object({
1962
+ id: z.string(),
1963
+ created_at: z.string(),
1964
+ description: z.string(),
1965
+ type: z.union([
1966
+ z.literal("retry"),
1967
+ z.literal("fix"),
1968
+ z.literal("escalate"),
1969
+ z.literal("skip"),
1970
+ z.string()
1971
+ ]),
1972
+ story_key: z.string().optional(),
1973
+ payload: z.record(z.string(), z.unknown()).optional()
1974
+ });
1975
+ /**
1976
+ * Zod schema for the full run manifest data.
1977
+ * Validated on every read; write validates via JSON round-trip.
1978
+ *
1979
+ * `cost_accumulation` uses `.default({ per_story: {}, run_total: 0 })` so
1980
+ * pre-Phase-D manifests that omit this field parse without error (AC7).
1981
+ */
1982
+ const RunManifestSchema = z.object({
1983
+ run_id: z.string(),
1984
+ cli_flags: CliFlagsSchema.transform((v) => v),
1985
+ story_scope: z.array(z.string()),
1986
+ supervisor_pid: z.number().nullable(),
1987
+ supervisor_session_id: z.string().nullable(),
1988
+ per_story_state: z.record(z.string(), PerStoryStateSchema),
1989
+ recovery_history: z.array(RecoveryEntrySchema),
1990
+ cost_accumulation: CostAccumulationSchema.default({
1991
+ per_story: {},
1992
+ run_total: 0
1993
+ }),
1994
+ pending_proposals: z.array(ProposalSchema),
1995
+ generation: z.number().int().nonnegative(),
1996
+ created_at: z.string(),
1997
+ updated_at: z.string()
1998
+ });
1999
+ /**
2000
+ * Error thrown when all read sources for a manifest fail.
2001
+ *
2002
+ * Includes `attempted_sources` listing each path/source tried,
2003
+ * so callers can diagnose which files were corrupt or missing.
2004
+ */
2005
+ var ManifestReadError = class extends Error {
2006
+ /** List of sources (file paths or source names) that were attempted. */
2007
+ attempted_sources;
2008
+ constructor(message, attempted_sources) {
2009
+ super(message);
2010
+ this.name = "ManifestReadError";
2011
+ this.attempted_sources = attempted_sources;
2012
+ }
2013
+ };
2014
+
2015
+ //#endregion
2016
+ //#region packages/sdlc/dist/run-model/run-manifest.js
2017
+ /** Default base directory for run manifests. */
2018
+ function defaultBaseDir() {
2019
+ return join$1(process.cwd(), ".substrate", "runs");
2020
+ }
2021
+ /** Build the primary manifest path for a given run ID. */
2022
+ function primaryPath(baseDir, runId) {
2023
+ return join$1(baseDir, `${runId}.json`);
2024
+ }
2025
+ /** Build the backup path. */
2026
+ function bakPath(baseDir, runId) {
2027
+ return join$1(baseDir, `${runId}.json.bak`);
2028
+ }
2029
+ /** Build the temporary write path. */
2030
+ function tmpPath(baseDir, runId) {
2031
+ return join$1(baseDir, `${runId}.json.tmp`);
2032
+ }
2033
+ /**
2034
+ * Attempt to read and parse a manifest file.
2035
+ * Returns the parsed data, or null if the file is missing or fails Zod validation.
2036
+ */
2037
+ async function tryReadFile(filePath) {
2038
+ try {
2039
+ const raw = await promises.readFile(filePath, "utf-8");
2040
+ let parsed;
2041
+ try {
2042
+ parsed = JSON.parse(raw);
2043
+ } catch {
2044
+ return null;
2045
+ }
2046
+ const result = RunManifestSchema.safeParse(parsed);
2047
+ if (!result.success) return null;
2048
+ return result.data;
2049
+ } catch {
2050
+ return null;
2051
+ }
2052
+ }
2053
+ /**
2054
+ * Build a minimal RunManifestData from Dolt pipeline_runs table.
2055
+ * Used in degraded-mode when all file sources fail.
2056
+ */
2057
+ async function reconstructFromDolt(runId, adapter) {
2058
+ try {
2059
+ const rows = await adapter.query("SELECT id, config_json, created_at, updated_at FROM pipeline_runs WHERE id = ?", [runId]);
2060
+ if (rows.length === 0) return null;
2061
+ const row = rows[0];
2062
+ let cliFlags = {};
2063
+ if (row.config_json) try {
2064
+ const parsed = JSON.parse(row.config_json);
2065
+ if (parsed !== null && typeof parsed === "object" && !Array.isArray(parsed)) cliFlags = parsed;
2066
+ } catch {}
2067
+ const now = new Date().toISOString();
2068
+ const data = {
2069
+ run_id: runId,
2070
+ cli_flags: cliFlags,
2071
+ story_scope: [],
2072
+ supervisor_pid: null,
2073
+ supervisor_session_id: null,
2074
+ per_story_state: {},
2075
+ recovery_history: [],
2076
+ cost_accumulation: {
2077
+ per_story: {},
2078
+ run_total: 0
2079
+ },
2080
+ pending_proposals: [],
2081
+ generation: 0,
2082
+ created_at: row.created_at ?? now,
2083
+ updated_at: row.updated_at ?? now
2084
+ };
2085
+ return data;
2086
+ } catch {
2087
+ return null;
2088
+ }
2089
+ }
2090
+ /**
2091
+ * Typed, atomic file-backed run manifest.
2092
+ *
2093
+ * Each instance is bound to a specific run ID and base directory.
2094
+ * Use `RunManifest.create()` to initialize a new manifest,
2095
+ * or `RunManifest.read()` to load an existing one.
2096
+ */
2097
+ var RunManifest = class RunManifest {
2098
+ runId;
2099
+ baseDir;
2100
+ /** Optional Dolt adapter for degraded-mode fallback on read. */
2101
+ doltAdapter;
2102
+ constructor(runId, baseDir = defaultBaseDir(), doltAdapter = null) {
2103
+ this.runId = runId;
2104
+ this.baseDir = baseDir;
2105
+ this.doltAdapter = doltAdapter;
2106
+ }
2107
+ get primaryPath() {
2108
+ return primaryPath(this.baseDir, this.runId);
2109
+ }
2110
+ get bakPath() {
2111
+ return bakPath(this.baseDir, this.runId);
2112
+ }
2113
+ get tmpPath() {
2114
+ return tmpPath(this.baseDir, this.runId);
2115
+ }
2116
+ /**
2117
+ * Read this manifest from disk (multi-tier fallback).
2118
+ *
2119
+ * Delegates to `RunManifest.read()` with this instance's runId, baseDir,
2120
+ * and doltAdapter. Primarily used by `SupervisorLock` (and tests that mock it).
2121
+ *
2122
+ * @throws ManifestReadError if all sources fail
2123
+ */
2124
+ async read() {
2125
+ return RunManifest.read(this.runId, this.baseDir, this.doltAdapter);
2126
+ }
2127
+ /**
2128
+ * Atomically update specific fields in the manifest.
2129
+ *
2130
+ * Reads the current manifest, merges in the provided partial data (shallow
2131
+ * merge), then writes the result atomically. Generation is incremented and
2132
+ * `updated_at` is refreshed by `write()`.
2133
+ *
2134
+ * Callers should pass only the fields they intend to change. Do NOT use this
2135
+ * to change `run_id` or `created_at` — those are immutable after creation.
2136
+ *
2137
+ * @throws ManifestReadError if the current manifest cannot be read
2138
+ */
2139
+ async update(partial) {
2140
+ const current = await this.read();
2141
+ const merged = {
2142
+ ...current,
2143
+ ...partial
2144
+ };
2145
+ await this.write(merged);
2146
+ }
2147
+ /**
2148
+ * Atomically write the manifest to disk.
2149
+ *
2150
+ * Sequence:
2151
+ * 1. Auto-increment `generation`, set `updated_at`
2152
+ * 2. Serialize to JSON and validate round-trip
2153
+ * 3. Ensure baseDir exists (mkdir -p)
2154
+ * 4. Write to `.tmp` via open → write → datasync → close (fsync)
2155
+ * 5. If primary exists, copy to `.bak`
2156
+ * 6. Rename `.tmp` → primary path
2157
+ */
2158
+ async write(data) {
2159
+ let currentGeneration = 0;
2160
+ const existing = await tryReadFile(this.primaryPath);
2161
+ if (existing !== null) currentGeneration = existing.generation;
2162
+ const fullData = {
2163
+ ...data,
2164
+ generation: currentGeneration + 1,
2165
+ updated_at: new Date().toISOString()
2166
+ };
2167
+ const json = JSON.stringify(fullData, null, 2);
2168
+ JSON.parse(json);
2169
+ await promises.mkdir(this.baseDir, { recursive: true });
2170
+ const tmp = this.tmpPath;
2171
+ const fileHandle = await promises.open(tmp, "w");
2172
+ try {
2173
+ await fileHandle.write(json, 0, "utf-8");
2174
+ await fileHandle.datasync();
2175
+ } finally {
2176
+ await fileHandle.close();
2177
+ }
2178
+ try {
2179
+ await promises.copyFile(this.primaryPath, this.bakPath);
2180
+ } catch {}
2181
+ await promises.rename(tmp, this.primaryPath);
2182
+ }
2183
+ /**
2184
+ * Return a bound `RunManifest` instance without performing any file I/O.
2185
+ *
2186
+ * Use `open()` when you want to call instance methods (`read()`, `patchCLIFlags()`)
2187
+ * on an existing run without reading the manifest upfront.
2188
+ *
2189
+ * ```typescript
2190
+ * await RunManifest.open(runId, runsDir).patchCLIFlags(cliFlags)
2191
+ * ```
2192
+ */
2193
+ static open(runId, baseDir = defaultBaseDir(), doltAdapter = null) {
2194
+ return new RunManifest(runId, baseDir, doltAdapter);
2195
+ }
2196
+ /**
2197
+ * Read the current manifest (or create a minimal default), merge the provided
2198
+ * CLI flags into `cli_flags`, and write the result atomically.
2199
+ *
2200
+ * Non-fatal: callers should wrap in try/catch and log a warning on failure.
2201
+ * The pipeline must not abort if manifest write fails.
2202
+ */
2203
+ async patchCLIFlags(flags) {
2204
+ let existingData;
2205
+ try {
2206
+ const read = await RunManifest.read(this.runId, this.baseDir, this.doltAdapter);
2207
+ const { generation: _gen, updated_at: _ts,...rest } = read;
2208
+ existingData = rest;
2209
+ } catch {
2210
+ const now = new Date().toISOString();
2211
+ existingData = {
2212
+ run_id: this.runId,
2213
+ cli_flags: {},
2214
+ story_scope: [],
2215
+ supervisor_pid: null,
2216
+ supervisor_session_id: null,
2217
+ per_story_state: {},
2218
+ recovery_history: [],
2219
+ cost_accumulation: {
2220
+ per_story: {},
2221
+ run_total: 0
2222
+ },
2223
+ pending_proposals: [],
2224
+ created_at: now
2225
+ };
2226
+ }
2227
+ await this.write({
2228
+ ...existingData,
2229
+ cli_flags: {
2230
+ ...existingData.cli_flags,
2231
+ ...flags
2232
+ }
2233
+ });
2234
+ }
2235
+ /**
2236
+ * Atomically upsert the per-story lifecycle state for a single story key.
2237
+ *
2238
+ * Reads the current manifest (or creates a minimal default if absent),
2239
+ * shallowly merges `updates` into `per_story_state[storyKey]`, and writes
2240
+ * the result atomically via a single `write()` call.
2241
+ *
2242
+ * Fields not included in `updates` on an existing entry are preserved unchanged.
2243
+ *
2244
+ * Non-fatal: callers MUST wrap in `.catch((err) => logger.warn(...))`.
2245
+ * The pipeline must never abort due to a manifest write failure.
2246
+ *
2247
+ * @param storyKey - Story key (e.g. '52-4')
2248
+ * @param updates - Partial PerStoryState fields to merge
2249
+ */
2250
+ async patchStoryState(storyKey, updates) {
2251
+ let existingData;
2252
+ try {
2253
+ const read = await RunManifest.read(this.runId, this.baseDir, this.doltAdapter);
2254
+ const { generation: _gen, updated_at: _ts,...rest } = read;
2255
+ existingData = rest;
2256
+ } catch {
2257
+ const now = new Date().toISOString();
2258
+ existingData = {
2259
+ run_id: this.runId,
2260
+ cli_flags: {},
2261
+ story_scope: [],
2262
+ supervisor_pid: null,
2263
+ supervisor_session_id: null,
2264
+ per_story_state: {},
2265
+ recovery_history: [],
2266
+ cost_accumulation: {
2267
+ per_story: {},
2268
+ run_total: 0
2269
+ },
2270
+ pending_proposals: [],
2271
+ created_at: now
2272
+ };
2273
+ }
2274
+ const existing = existingData.per_story_state[storyKey];
2275
+ const merged = {
2276
+ status: "pending",
2277
+ phase: "",
2278
+ started_at: new Date().toISOString(),
2279
+ ...existing,
2280
+ ...updates
2281
+ };
2282
+ await this.write({
2283
+ ...existingData,
2284
+ per_story_state: {
2285
+ ...existingData.per_story_state,
2286
+ [storyKey]: merged
2287
+ }
2288
+ });
2289
+ }
2290
+ /**
2291
+ * Atomically append a recovery entry and update cost accumulation.
2292
+ *
2293
+ * Reads the current manifest, appends `entry` to `recovery_history[]`,
2294
+ * increments `cost_accumulation.per_story[entry.story_key]` by `entry.cost_usd`,
2295
+ * increments `cost_accumulation.run_total` by `entry.cost_usd`, then writes
2296
+ * atomically via a single `write()` call.
2297
+ *
2298
+ * Non-fatal: callers MUST wrap in `.catch((err) => logger.warn(...))`.
2299
+ * The pipeline must never abort due to a manifest write failure.
2300
+ *
2301
+ * `entry.cost_usd` is the cost of this single retry attempt only (NOT cumulative).
2302
+ * Cumulative per-story retry cost is tracked in `cost_accumulation.per_story`.
2303
+ *
2304
+ * @param entry - Recovery entry to append (attempt_number is 1-indexed)
2305
+ */
2306
+ async appendRecoveryEntry(entry) {
2307
+ let existingData;
2308
+ try {
2309
+ const read = await RunManifest.read(this.runId, this.baseDir, this.doltAdapter);
2310
+ const { generation: _gen, updated_at: _ts,...rest } = read;
2311
+ existingData = rest;
2312
+ } catch {
2313
+ const now = new Date().toISOString();
2314
+ existingData = {
2315
+ run_id: this.runId,
2316
+ cli_flags: {},
2317
+ story_scope: [],
2318
+ supervisor_pid: null,
2319
+ supervisor_session_id: null,
2320
+ per_story_state: {},
2321
+ recovery_history: [],
2322
+ cost_accumulation: {
2323
+ per_story: {},
2324
+ run_total: 0
2325
+ },
2326
+ pending_proposals: [],
2327
+ created_at: now
2328
+ };
2329
+ }
2330
+ const prevStoryCost = existingData.cost_accumulation.per_story[entry.story_key] ?? 0;
2331
+ const updated = {
2332
+ ...existingData,
2333
+ recovery_history: [...existingData.recovery_history, entry],
2334
+ cost_accumulation: {
2335
+ per_story: {
2336
+ ...existingData.cost_accumulation.per_story,
2337
+ [entry.story_key]: prevStoryCost + entry.cost_usd
2338
+ },
2339
+ run_total: existingData.cost_accumulation.run_total + entry.cost_usd
2340
+ }
2341
+ };
2342
+ await this.write(updated);
2343
+ }
2344
+ /**
2345
+ * Create a new manifest with `generation: 0` and write it.
2346
+ * Returns a bound `RunManifest` instance.
2347
+ */
2348
+ static async create(runId, initialData, baseDir = defaultBaseDir(), doltAdapter = null) {
2349
+ const instance = new RunManifest(runId, baseDir, doltAdapter);
2350
+ const now = new Date().toISOString();
2351
+ const data = {
2352
+ ...initialData,
2353
+ created_at: now
2354
+ };
2355
+ await instance.write(data);
2356
+ return instance;
2357
+ }
2358
+ /**
2359
+ * Read a manifest from disk with multi-tier fallback.
2360
+ *
2361
+ * Attempts sources in order:
2362
+ * 1. Primary `.json`
2363
+ * 2. Backup `.json.bak` (preferred over primary if generation is higher)
2364
+ * 3. Temporary `.json.tmp`
2365
+ * 4. Dolt degraded reconstruction (if doltAdapter is provided)
2366
+ *
2367
+ * Generation tiebreak: if `.bak` has a higher `generation` than primary,
2368
+ * `.bak` is preferred (indicates primary was overwritten mid-rename).
2369
+ *
2370
+ * @throws ManifestReadError if all sources fail
2371
+ */
2372
+ static async read(runId, baseDir = defaultBaseDir(), doltAdapter = null) {
2373
+ const attempted = [];
2374
+ const primary = primaryPath(baseDir, runId);
2375
+ const bak = bakPath(baseDir, runId);
2376
+ const tmp = tmpPath(baseDir, runId);
2377
+ attempted.push(primary);
2378
+ const primaryData = await tryReadFile(primary);
2379
+ attempted.push(bak);
2380
+ const bakData = await tryReadFile(bak);
2381
+ if (primaryData !== null && bakData !== null) {
2382
+ if (bakData.generation > primaryData.generation) return bakData;
2383
+ return primaryData;
2384
+ }
2385
+ if (primaryData !== null) return primaryData;
2386
+ if (bakData !== null) return bakData;
2387
+ attempted.push(tmp);
2388
+ const tmpData = await tryReadFile(tmp);
2389
+ if (tmpData !== null) return tmpData;
2390
+ if (doltAdapter !== null) {
2391
+ const doltSource = "dolt:pipeline_runs";
2392
+ attempted.push(doltSource);
2393
+ const doltData = await reconstructFromDolt(runId, doltAdapter);
2394
+ if (doltData !== null) {
2395
+ console.warn(`[RunManifest] Degraded mode: reconstructed run ${runId} from Dolt pipeline_runs. per_story_state and recovery_history are empty.`);
2396
+ return doltData;
2397
+ }
2398
+ }
2399
+ throw new ManifestReadError(`Failed to read manifest for run ${runId}: all sources exhausted`, attempted);
2400
+ }
2401
+ };
2402
+
2403
+ //#endregion
2404
+ //#region packages/sdlc/dist/run-model/supervisor-lock.js
2405
+ const defaultLogger = console;
2406
+ /**
2407
+ * Advisory lock for the supervisor process.
2408
+ *
2409
+ * Usage:
2410
+ * ```ts
2411
+ * const lock = new SupervisorLock(runId, manifest)
2412
+ * await lock.acquire(process.pid, sessionId, { force: opts.force })
2413
+ * // ... supervisor work ...
2414
+ * await lock.release()
2415
+ * ```
2416
+ *
2417
+ * The lock is automatically released on process exit if `registerExitHandlers()`
2418
+ * is called, or if the consuming code registers `process.once('exit', ...)`.
2419
+ */
2420
+ var SupervisorLock = class {
2421
+ runId;
2422
+ manifest;
2423
+ baseDir;
2424
+ logger;
2425
+ /** Current lock mode — null until `acquire()` succeeds. */
2426
+ mode = null;
2427
+ /** File handle held open to maintain the advisory lock (flock mode only). */
2428
+ lockHandle = null;
2429
+ constructor(runId, manifest, logger$3) {
2430
+ this.runId = runId;
2431
+ this.manifest = manifest;
2432
+ this.baseDir = manifest.baseDir;
2433
+ this.logger = logger$3 ?? defaultLogger;
2434
+ }
2435
+ /** Advisory lock file path (primary path). */
2436
+ get lockPath() {
2437
+ return join$1(this.baseDir, `${this.runId}.lock`);
2438
+ }
2439
+ /** PID-file path (fallback path). */
2440
+ get pidPath() {
2441
+ return join$1(this.baseDir, `${this.runId}.pid`);
2442
+ }
2443
+ /**
2444
+ * Acquire exclusive ownership of the run.
2445
+ *
2446
+ * Attempts to open `.substrate/runs/{run-id}.lock` with `O_CREAT | O_EXCL`
2447
+ * (the `'wx'` flag), which succeeds atomically only if the file does not
2448
+ * exist. On success, writes `supervisor_pid` and `supervisor_session_id` to
2449
+ * the manifest.
2450
+ *
2451
+ * On EEXIST (file exists → contended): reads the manifest's `supervisor_pid`,
2452
+ * checks if the holder process is alive, and either throws a prescribed
2453
+ * rejection error or evicts the holder (if `force: true`).
2454
+ *
2455
+ * On ENOSYS or EOPNOTSUPP (filesystem does not support exclusive open): logs
2456
+ * a `warn`-level message and falls back to PID-file ownership.
2457
+ *
2458
+ * @throws Error with exact message: "Run {id} is already supervised by PID {pid}. Use --force to take over."
2459
+ */
2460
+ async acquire(pid, sessionId, opts) {
2461
+ const force = opts?.force ?? false;
2462
+ await mkdir$1(this.baseDir, { recursive: true });
2463
+ let fh;
2464
+ try {
2465
+ fh = await open(this.lockPath, "wx");
2466
+ } catch (err) {
2467
+ const e = err;
2468
+ if (e.code === "ENOSYS" || e.code === "EOPNOTSUPP") {
2469
+ this.logger.warn(`[SupervisorLock] flock not available on this filesystem (${e.code}). Falling back to PID-file for run ${this.runId}.`);
2470
+ await this.acquireViaPidFile(pid, sessionId, opts);
2471
+ return;
2472
+ }
2473
+ if (e.code === "EEXIST") {
2474
+ let existingPid = null;
2475
+ try {
2476
+ const data = await this.manifest.read();
2477
+ existingPid = data.supervisor_pid;
2478
+ } catch {}
2479
+ if (existingPid === null) {
2480
+ await unlink(this.lockPath).catch(() => void 0);
2481
+ await this.acquire(pid, sessionId, opts);
2482
+ return;
2483
+ }
2484
+ const isAlive = this.isPidAlive(existingPid);
2485
+ if (!isAlive) {
2486
+ await unlink(this.lockPath).catch(() => void 0);
2487
+ await this.acquire(pid, sessionId, opts);
2488
+ return;
2489
+ }
2490
+ if (force) {
2491
+ await this.forceKillOwner(existingPid);
2492
+ await unlink(this.lockPath).catch(() => void 0);
2493
+ await this.acquire(pid, sessionId, opts);
2494
+ return;
2495
+ }
2496
+ throw new Error(`Run ${this.runId} is already supervised by PID ${existingPid}. Use --force to take over.`);
2497
+ }
2498
+ throw err;
2499
+ }
2500
+ this.lockHandle = fh;
2501
+ this.mode = "flock";
2502
+ try {
2503
+ await fh.write(String(pid), 0, "utf-8");
2504
+ await this.manifest.update({
2505
+ supervisor_pid: pid,
2506
+ supervisor_session_id: sessionId
2507
+ });
2508
+ } catch (postOpenErr) {
2509
+ try {
2510
+ await fh.close();
2511
+ } catch {}
2512
+ this.lockHandle = null;
2513
+ this.mode = null;
2514
+ await unlink(this.lockPath).catch(() => void 0);
2515
+ throw postOpenErr;
2516
+ }
2517
+ }
2518
+ /**
2519
+ * Release ownership of the run.
2520
+ *
2521
+ * Removes the lock file (flock mode) or PID-file (fallback mode) and clears
2522
+ * `supervisor_pid` / `supervisor_session_id` in the manifest atomically.
2523
+ *
2524
+ * Safe to call multiple times; subsequent calls are no-ops.
2525
+ */
2526
+ async release() {
2527
+ if (this.mode === "flock") {
2528
+ if (this.lockHandle !== null) {
2529
+ try {
2530
+ await this.lockHandle.close();
2531
+ } catch {}
2532
+ this.lockHandle = null;
2533
+ }
2534
+ await unlink(this.lockPath).catch(() => void 0);
2535
+ } else if (this.mode === "pid-file") await this.releaseViaPidFile();
2536
+ this.mode = null;
2537
+ await this.manifest.update({
2538
+ supervisor_pid: null,
2539
+ supervisor_session_id: null
2540
+ });
2541
+ }
2542
+ /**
2543
+ * Acquire ownership using a PID-file at `.substrate/runs/{run-id}.pid`.
2544
+ *
2545
+ * If the PID-file exists:
2546
+ * - Dead PID (ESRCH from `kill(pid, 0)`) → overwrite without force (AC5)
2547
+ * - Alive PID without force → throw prescribed rejection error (AC3)
2548
+ * - Alive PID with force → SIGTERM + wait, then proceed (AC4)
2549
+ */
2550
+ async acquireViaPidFile(pid, sessionId, opts) {
2551
+ const force = opts?.force ?? false;
2552
+ let existingPid = null;
2553
+ try {
2554
+ const content = await readFile$1(this.pidPath, "utf-8");
2555
+ const parsed = parseInt(content.trim(), 10);
2556
+ if (!isNaN(parsed)) existingPid = parsed;
2557
+ } catch (e) {
2558
+ const err = e;
2559
+ if (err.code !== "ENOENT") throw e;
2560
+ }
2561
+ if (existingPid !== null) {
2562
+ const isAlive = this.isPidAlive(existingPid);
2563
+ if (!isAlive) {} else if (force) await this.forceKillOwner(existingPid);
2564
+ else throw new Error(`Run ${this.runId} is already supervised by PID ${existingPid}. Use --force to take over.`);
2565
+ }
2566
+ await writeFile$1(this.pidPath, String(pid), { flag: "w" });
2567
+ await this.manifest.update({
2568
+ supervisor_pid: pid,
2569
+ supervisor_session_id: sessionId
2570
+ });
2571
+ this.mode = "pid-file";
2572
+ }
2573
+ async releaseViaPidFile() {
2574
+ await unlink(this.pidPath).catch(() => void 0);
2575
+ }
2576
+ /**
2577
+ * Send SIGTERM to the existing supervisor and wait up to 500ms for it to exit.
2578
+ *
2579
+ * @throws Error if the process is still alive after 500ms.
2580
+ */
2581
+ async forceKillOwner(existingPid) {
2582
+ process.kill(existingPid, "SIGTERM");
2583
+ await new Promise((resolve$2) => setTimeout(resolve$2, 500));
2584
+ const stillAlive = this.isPidAlive(existingPid);
2585
+ if (stillAlive) throw new Error(`Existing supervisor PID ${existingPid} did not exit after SIGTERM. Kill manually and retry.`);
2586
+ }
2587
+ /**
2588
+ * Test whether a PID is alive by sending signal 0.
2589
+ *
2590
+ * Returns true if the process exists, false if ESRCH (not found).
2591
+ * Other errors (e.g. EPERM) are treated as "alive" to avoid false stale
2592
+ * detections when we lack permission to signal the process.
2593
+ */
2594
+ isPidAlive(pid) {
2595
+ try {
2596
+ process.kill(pid, 0);
2597
+ return true;
2598
+ } catch (e) {
2599
+ const err = e;
2600
+ return err.code !== "ESRCH";
2601
+ }
2602
+ }
2603
+ };
2604
+
2605
+ //#endregion
2606
+ //#region src/cli/commands/manifest-read.ts
2607
+ const logger$1 = createLogger("manifest-read");
2608
+ /**
2609
+ * Read the active run ID from `.substrate/current-run-id`.
2610
+ *
2611
+ * Returns the trimmed file content, or `null` if the file is absent or empty.
2612
+ * Never throws — all I/O errors are suppressed and return `null`.
2613
+ */
2614
+ async function readCurrentRunId(dbRoot) {
2615
+ try {
2616
+ const content = await readFile(join(dbRoot, ".substrate", "current-run-id"), "utf8");
2617
+ return content.trim() || null;
2618
+ } catch {
2619
+ return null;
2620
+ }
2621
+ }
2622
+ /**
2623
+ * Resolve the active run manifest for CLI commands.
2624
+ *
2625
+ * Steps:
2626
+ * 1. If `runId` is provided, use it directly.
2627
+ * 2. Otherwise, read `.substrate/current-run-id`.
2628
+ * 3. If neither yields a run ID → return `{ manifest: null, runId: null }`.
2629
+ * 4. Construct `RunManifest(resolvedRunId, runsDir)` and call `.read()` to
2630
+ * validate the file exists and parses correctly.
2631
+ * 5. On any error (file missing, schema mismatch) → return `{ manifest: null, runId }`.
2632
+ *
2633
+ * @param dbRoot - Resolved project root (typically from `resolveMainRepoRoot`)
2634
+ * @param runId - Optional explicit run ID; skips `current-run-id` lookup when provided
2635
+ */
2636
+ async function resolveRunManifest(dbRoot, runId) {
2637
+ const resolvedRunId = runId ?? await readCurrentRunId(dbRoot);
2638
+ if (!resolvedRunId) {
2639
+ logger$1.debug("run manifest not found — falling back to Dolt (no current-run-id)");
2640
+ return {
2641
+ manifest: null,
2642
+ runId: null
2643
+ };
2644
+ }
2645
+ const runsDir = join(dbRoot, ".substrate", "runs");
2646
+ try {
2647
+ const manifest = new RunManifest(resolvedRunId, runsDir);
2648
+ await manifest.read();
2649
+ return {
2650
+ manifest,
2651
+ runId: resolvedRunId
2652
+ };
2653
+ } catch {
2654
+ logger$1.debug({ runId: resolvedRunId }, "run manifest not found — falling back to Dolt");
2655
+ return {
2656
+ manifest: null,
2657
+ runId: resolvedRunId
2658
+ };
2659
+ }
2660
+ }
2661
+
1525
2662
  //#endregion
1526
2663
  //#region src/cli/commands/health.ts
1527
2664
  const logger = createLogger("health-cmd");
@@ -1600,10 +2737,10 @@ function inspectProcessTree(opts) {
1600
2737
  if (parts.length >= 3) {
1601
2738
  const pid = parseInt(parts[0], 10);
1602
2739
  const ppid = parseInt(parts[1], 10);
1603
- const stat$1 = parts[2];
2740
+ const stat$2 = parts[2];
1604
2741
  if (ppid === result.orchestrator_pid && pid !== result.orchestrator_pid) {
1605
2742
  result.child_pids.push(pid);
1606
- if (stat$1.includes("Z")) result.zombies.push(pid);
2743
+ if (stat$2.includes("Z")) result.zombies.push(pid);
1607
2744
  }
1608
2745
  }
1609
2746
  }
@@ -1663,6 +2800,42 @@ function getAllDescendantPids(rootPids, execFileSyncOverride) {
1663
2800
  }
1664
2801
  }
1665
2802
  /**
2803
+ * Derive health story counts from manifest `per_story_state`.
2804
+ * Maps manifest status strings to health output buckets.
2805
+ */
2806
+ function buildHealthStoryCountsFromManifest(perStoryState) {
2807
+ const counts = {
2808
+ active: 0,
2809
+ completed: 0,
2810
+ escalated: 0,
2811
+ pending: 0,
2812
+ failed: 0
2813
+ };
2814
+ for (const entry of Object.values(perStoryState)) switch (entry.status) {
2815
+ case "complete":
2816
+ counts.completed++;
2817
+ break;
2818
+ case "escalated":
2819
+ counts.escalated++;
2820
+ break;
2821
+ case "failed":
2822
+ case "verification-failed":
2823
+ counts.failed++;
2824
+ break;
2825
+ case "pending":
2826
+ case "gated":
2827
+ counts.pending++;
2828
+ break;
2829
+ case "dispatched":
2830
+ case "in-review":
2831
+ case "recovered":
2832
+ default:
2833
+ counts.active++;
2834
+ break;
2835
+ }
2836
+ return counts;
2837
+ }
2838
+ /**
1666
2839
  * Fetch pipeline health data as a structured object without any stdout side-effects.
1667
2840
  * Used by runSupervisorAction to poll health without formatting overhead.
1668
2841
  *
@@ -1806,6 +2979,27 @@ async function getAutoHealthData(options) {
1806
2979
  }
1807
2980
  }
1808
2981
  } catch {}
2982
+ let manifestSupervisor;
2983
+ let manifestStoryCounts;
2984
+ try {
2985
+ const { manifest: resolvedManifest } = await resolveRunManifest(dbRoot, run.id);
2986
+ if (resolvedManifest !== null) {
2987
+ const manifestData = await resolvedManifest.read();
2988
+ manifestSupervisor = {
2989
+ pid: manifestData.supervisor_pid,
2990
+ session_id: manifestData.supervisor_session_id
2991
+ };
2992
+ manifestStoryCounts = buildHealthStoryCountsFromManifest(manifestData.per_story_state);
2993
+ logger.debug({ runId: run.id }, "health: story counts and supervisor read from manifest");
2994
+ }
2995
+ } catch {
2996
+ logger.debug({ runId: run.id }, "health: manifest read failed — using token_usage_json counts");
2997
+ }
2998
+ const finalActive = manifestStoryCounts?.active ?? active;
2999
+ const finalCompleted = manifestStoryCounts?.completed ?? completed;
3000
+ const finalEscalated = manifestStoryCounts?.escalated ?? escalated;
3001
+ const finalPending = manifestStoryCounts?.pending ?? pending;
3002
+ const finalFailed = manifestStoryCounts?.failed;
1809
3003
  const substrateDirPath = join(dbRoot, ".substrate");
1810
3004
  const processInfo = options._processInfoOverride ?? inspectProcessTree({
1811
3005
  projectRoot,
@@ -1816,12 +3010,12 @@ async function getAutoHealthData(options) {
1816
3010
  else if (processInfo.pid_file_dead === true) verdict = "STALLED";
1817
3011
  else if (processInfo.zombies.length > 0) verdict = "STALLED";
1818
3012
  else if (stalenessSeconds > DEFAULT_STALL_THRESHOLD_SECONDS) verdict = "STALLED";
1819
- else if (active > 0) verdict = "STALLED";
3013
+ else if (finalActive > 0) verdict = "STALLED";
1820
3014
  else verdict = "HEALTHY";
1821
3015
  else if (run.status === "completed" || run.status === "failed" || run.status === "stopped") verdict = "NO_PIPELINE_RUNNING";
1822
3016
  const warnings = [];
1823
3017
  if (doltStateInfo !== void 0 && doltStateInfo.responsive === false) warnings.push("Dolt not connected — decision store queries may fail, story context will be degraded");
1824
- if (escalated > 0) warnings.push(`${escalated} story(ies) escalated — operator intervention may be needed`);
3018
+ if (finalEscalated > 0) warnings.push(`${finalEscalated} story(ies) escalated — operator intervention may be needed`);
1825
3019
  const healthOutput = {
1826
3020
  verdict,
1827
3021
  run_id: run.id,
@@ -1831,12 +3025,14 @@ async function getAutoHealthData(options) {
1831
3025
  last_activity: run.updated_at ?? "",
1832
3026
  process: processInfo,
1833
3027
  stories: {
1834
- active,
1835
- completed,
1836
- escalated,
1837
- pending,
3028
+ active: finalActive,
3029
+ completed: finalCompleted,
3030
+ escalated: finalEscalated,
3031
+ pending: finalPending,
3032
+ ...finalFailed !== void 0 ? { failed: finalFailed } : {},
1838
3033
  details: storyDetails
1839
3034
  },
3035
+ ...manifestSupervisor !== void 0 ? { manifest_supervisor: manifestSupervisor } : {},
1840
3036
  ...doltStateInfo !== void 0 ? { dolt_state: doltStateInfo } : {},
1841
3037
  ...warnings.length > 0 ? { warnings } : {}
1842
3038
  };
@@ -1948,5 +3144,5 @@ function registerHealthCommand(program, _version = "0.0.0", projectRoot = proces
1948
3144
  }
1949
3145
 
1950
3146
  //#endregion
1951
- export { BMAD_BASELINE_TOKENS_FULL, DEFAULT_STALL_THRESHOLD_SECONDS, DoltMergeConflict, FileStateStore, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN$1 as STORY_KEY_PATTERN, SUBSTRATE_OWNED_SETTINGS_KEYS, VALID_PHASES, WorkGraphRepository, __commonJS, __require, __toESM, buildPipelineStatusOutput, createDatabaseAdapter$1 as createDatabaseAdapter, createStateStore, detectCycles, findPackageRoot, formatOutput, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, inspectProcessTree, isOrchestratorProcessLine, parseDbTimestampAsUtc, registerHealthCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, runHealthAction, validateStoryKey };
1952
- //# sourceMappingURL=health-M0iCuP26.js.map
3147
+ export { BMAD_BASELINE_TOKENS_FULL, DEFAULT_STALL_THRESHOLD_SECONDS, DoltMergeConflict, FileStateStore, RunManifest, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN$1 as STORY_KEY_PATTERN, SUBSTRATE_OWNED_SETTINGS_KEYS, SupervisorLock, VALID_PHASES, WorkGraphRepository, __commonJS, __require, __toESM, applyConfigToGraph, buildPipelineStatusOutput, createDatabaseAdapter$1 as createDatabaseAdapter, createGraphOrchestrator, createStateStore, detectCycles, findPackageRoot, formatOutput, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, inspectProcessTree, isOrchestratorProcessLine, parseDbTimestampAsUtc, registerHealthCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveGraphPath, resolveMainRepoRoot, resolveRunManifest, runHealthAction, validateStoryKey };
3148
+ //# sourceMappingURL=health-DKallkoo.js.map