substrate-ai 0.2.15 → 0.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -2,10 +2,10 @@
2
2
  import { createLogger, deepMask } from "../logger-C6n1g8uP.js";
3
3
  import { AdapterRegistry, createEventBus } from "../event-bus-J-bw-pkp.js";
4
4
  import { CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, PartialSubstrateConfigSchema, SUPPORTED_CONFIG_FORMAT_VERSIONS, SubstrateConfigSchema, defaultConfigMigrator } from "../version-manager-impl-BpVx2DkY.js";
5
- import { DatabaseWrapper, SUBSTRATE_OWNED_SETTINGS_KEYS, VALID_PHASES, buildPipelineStatusOutput, createContextCompiler, createDispatcher, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getSubstrateDefaultSettings, parseDbTimestampAsUtc, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, runAnalysisPhase, runMigrations, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-CoP8UQU3.js";
5
+ import { DatabaseWrapper, SUBSTRATE_OWNED_SETTINGS_KEYS, VALID_PHASES, buildPipelineStatusOutput, createContextCompiler, createDispatcher, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getSubstrateDefaultSettings, parseDbTimestampAsUtc, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, runAnalysisPhase, runMigrations, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-BrI2xzk7.js";
6
6
  import { ConfigError, ConfigIncompatibleFormatError } from "../errors-BPqtzQ4U.js";
7
- import { addTokenUsage, createDecision, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getTokenUsageSummary, listRequirements, updatePipelineRun } from "../decisions-DNYByk0U.js";
8
- import { aggregateTokenUsageForRun, compareRunMetrics, getBaselineRunMetrics, getRunMetrics, getStoryMetricsForRun, incrementRunRestarts, listRunMetrics, tagRunAsBaseline } from "../metrics-BSg8VIHd.js";
7
+ import { addTokenUsage, createDecision, getDecisionsByCategory, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getTokenUsageSummary, listRequirements, updatePipelineRun } from "../decisions-SyswIRKz.js";
8
+ import { EXPERIMENT_RESULT, OPERATIONAL_FINDING, STORY_METRICS, aggregateTokenUsageForRun, compareRunMetrics, getBaselineRunMetrics, getRunMetrics, getStoryMetricsForRun, incrementRunRestarts, listRunMetrics, tagRunAsBaseline } from "../operational-CobuCGbM.js";
9
9
  import { abortMerge, createWorktree, getConflictingFiles, getMergedFiles, getOrphanedWorktrees, performMerge, removeBranch, removeWorktree, simulateMerge, verifyGitVersion } from "../git-utils-BtI5eNoN.js";
10
10
  import { registerUpgradeCommand } from "../upgrade-rV26kdh3.js";
11
11
  import { createRequire } from "module";
@@ -2723,6 +2723,73 @@ function defaultSupervisorDeps() {
2723
2723
  }
2724
2724
  },
2725
2725
  getAllDescendants: (rootPids) => getAllDescendantPids(rootPids),
2726
+ writeStallFindings: async (opts) => {
2727
+ try {
2728
+ const dbRoot = await resolveMainRepoRoot(opts.projectRoot);
2729
+ const dbPath = join(dbRoot, ".substrate", "substrate.db");
2730
+ if (!existsSync(dbPath)) return;
2731
+ const dbWrapper = new DatabaseWrapper(dbPath);
2732
+ try {
2733
+ dbWrapper.open();
2734
+ const db = dbWrapper.db;
2735
+ const activeStories = Object.entries(opts.storyDetails).filter(([, s]) => s.phase !== "PENDING" && s.phase !== "COMPLETE" && s.phase !== "ESCALATED");
2736
+ const now = Date.now();
2737
+ for (const [storyKey, storyState] of activeStories) createDecision(db, {
2738
+ pipeline_run_id: opts.runId ?? null,
2739
+ phase: "supervisor",
2740
+ category: OPERATIONAL_FINDING,
2741
+ key: `stall:${storyKey}:${now}`,
2742
+ value: JSON.stringify({
2743
+ phase: storyState.phase,
2744
+ staleness_secs: opts.staleness_secs,
2745
+ attempt: opts.attempt,
2746
+ outcome: opts.outcome
2747
+ }),
2748
+ rationale: `Supervisor stall recovery: story ${storyKey} was in phase ${storyState.phase} when pipeline stalled after ${opts.staleness_secs}s. Attempt ${opts.attempt}. Outcome: ${opts.outcome}.`
2749
+ });
2750
+ } finally {
2751
+ try {
2752
+ dbWrapper.close();
2753
+ } catch {}
2754
+ }
2755
+ } catch {}
2756
+ },
2757
+ writeRunSummary: async (opts) => {
2758
+ const totalStories = opts.succeeded.length + opts.failed.length + opts.escalated.length;
2759
+ if (totalStories === 0) return;
2760
+ if (opts.runId === null) return;
2761
+ try {
2762
+ const dbRoot = await resolveMainRepoRoot(opts.projectRoot);
2763
+ const dbPath = join(dbRoot, ".substrate", "substrate.db");
2764
+ if (!existsSync(dbPath)) return;
2765
+ const dbWrapper = new DatabaseWrapper(dbPath);
2766
+ try {
2767
+ dbWrapper.open();
2768
+ const db = dbWrapper.db;
2769
+ const tokenAgg = aggregateTokenUsageForRun(db, opts.runId);
2770
+ createDecision(db, {
2771
+ pipeline_run_id: opts.runId,
2772
+ phase: "supervisor",
2773
+ category: OPERATIONAL_FINDING,
2774
+ key: `run-summary:${opts.runId}`,
2775
+ value: JSON.stringify({
2776
+ succeeded: opts.succeeded,
2777
+ failed: opts.failed,
2778
+ escalated: opts.escalated,
2779
+ total_restarts: opts.total_restarts,
2780
+ elapsed_seconds: opts.elapsed_seconds,
2781
+ total_input_tokens: tokenAgg.input,
2782
+ total_output_tokens: tokenAgg.output
2783
+ }),
2784
+ rationale: `Run summary: ${opts.succeeded.length} succeeded, ${opts.failed.length} failed, ${opts.escalated.length} escalated. ${opts.total_restarts} restarts. Elapsed: ${opts.elapsed_seconds}s.`
2785
+ });
2786
+ } finally {
2787
+ try {
2788
+ dbWrapper.close();
2789
+ } catch {}
2790
+ }
2791
+ } catch {}
2792
+ },
2726
2793
  runAnalysis: async (runId, projectRoot) => {
2727
2794
  const dbPath = join(projectRoot, ".substrate", "substrate.db");
2728
2795
  if (!existsSync(dbPath)) return;
@@ -2800,7 +2867,7 @@ function buildTerminalSummary(storyDetails) {
2800
2867
  */
2801
2868
  async function handleStallRecovery(health, state, config, deps, io) {
2802
2869
  const { stallThreshold, maxRestarts, pack, outputFormat } = config;
2803
- const { killPid, resumePipeline, sleep, incrementRestarts, getAllDescendants } = deps;
2870
+ const { killPid, resumePipeline, sleep, incrementRestarts, getAllDescendants, writeStallFindings } = deps;
2804
2871
  const { emitEvent, log } = io;
2805
2872
  const { projectRoot } = state;
2806
2873
  if (health.staleness_seconds < stallThreshold) return null;
@@ -2847,6 +2914,14 @@ async function handleStallRecovery(health, state, config, deps, io) {
2847
2914
  attempts: state.restartCount
2848
2915
  });
2849
2916
  log(`Supervisor: Max restarts (${maxRestarts}) exceeded. Aborting.`);
2917
+ if (writeStallFindings) await writeStallFindings({
2918
+ runId: health.run_id,
2919
+ storyDetails: health.stories.details,
2920
+ staleness_secs: health.staleness_seconds,
2921
+ attempt: state.restartCount,
2922
+ outcome: "max-restarts-escalated",
2923
+ projectRoot
2924
+ });
2850
2925
  return {
2851
2926
  state,
2852
2927
  maxRestartsExceeded: true
@@ -2868,6 +2943,14 @@ async function handleStallRecovery(health, state, config, deps, io) {
2868
2943
  concurrency: 3,
2869
2944
  pack
2870
2945
  });
2946
+ if (writeStallFindings) await writeStallFindings({
2947
+ runId: health.run_id,
2948
+ storyDetails: health.stories.details,
2949
+ staleness_secs: health.staleness_seconds,
2950
+ attempt: newRestartCount,
2951
+ outcome: "recovered",
2952
+ projectRoot
2953
+ });
2871
2954
  } catch (err) {
2872
2955
  const message = err instanceof Error ? err.message : String(err);
2873
2956
  log(`Supervisor: Resume error: ${message}`);
@@ -2876,6 +2959,14 @@ async function handleStallRecovery(health, state, config, deps, io) {
2876
2959
  reason: "resume_failed",
2877
2960
  message
2878
2961
  });
2962
+ if (writeStallFindings) await writeStallFindings({
2963
+ runId: health.run_id,
2964
+ storyDetails: health.stories.details,
2965
+ staleness_secs: health.staleness_seconds,
2966
+ attempt: newRestartCount,
2967
+ outcome: "failed",
2968
+ projectRoot
2969
+ });
2879
2970
  }
2880
2971
  return {
2881
2972
  state: {
@@ -2902,7 +2993,7 @@ async function runSupervisorAction(options, deps = {}) {
2902
2993
  ...defaultSupervisorDeps(),
2903
2994
  ...deps
2904
2995
  };
2905
- const { getHealth, sleep, runAnalysis, getTokenSnapshot } = resolvedDeps;
2996
+ const { getHealth, sleep, runAnalysis, getTokenSnapshot, writeRunSummary } = resolvedDeps;
2906
2997
  let state = {
2907
2998
  projectRoot,
2908
2999
  runId,
@@ -2947,6 +3038,15 @@ async function runSupervisorAction(options, deps = {}) {
2947
3038
  restarts: state.restartCount
2948
3039
  });
2949
3040
  log(`\nPipeline reached terminal state. Elapsed: ${elapsedSeconds}s | succeeded: ${summary.succeeded.length} | failed: ${summary.failed.length} | restarts: ${state.restartCount}`);
3041
+ if (writeRunSummary !== void 0) await writeRunSummary({
3042
+ runId: health.run_id,
3043
+ succeeded: summary.succeeded,
3044
+ failed: summary.failed,
3045
+ escalated: summary.escalated,
3046
+ total_restarts: state.restartCount,
3047
+ elapsed_seconds: elapsedSeconds,
3048
+ projectRoot
3049
+ });
2950
3050
  if (health.run_id !== null && runAnalysis !== void 0) {
2951
3051
  log(`[supervisor] Running post-run analysis for ${health.run_id}...`);
2952
3052
  try {
@@ -2995,11 +3095,11 @@ async function runSupervisorAction(options, deps = {}) {
2995
3095
  try {
2996
3096
  const { createExperimenter } = await import(
2997
3097
  /* @vite-ignore */
2998
- "../experimenter-BSu2ie3J.js"
3098
+ "../experimenter-Cd04gAWQ.js"
2999
3099
  );
3000
3100
  const { getLatestRun: getLatest } = await import(
3001
3101
  /* @vite-ignore */
3002
- "../decisions-DKXc-jnv.js"
3102
+ "../decisions-BmqXQ3Se.js"
3003
3103
  );
3004
3104
  const dbPath = join(projectRoot, ".substrate", "substrate.db");
3005
3105
  const expDbWrapper = new DatabaseWrapper(dbPath);
@@ -3009,7 +3109,7 @@ async function runSupervisorAction(options, deps = {}) {
3009
3109
  const expDb = expDbWrapper.db;
3010
3110
  const { runRunAction: runPipeline } = await import(
3011
3111
  /* @vite-ignore */
3012
- "../run-B9IglY4m.js"
3112
+ "../run-fjuwOUib.js"
3013
3113
  );
3014
3114
  const runStoryFn = async (opts) => {
3015
3115
  const exitCode = await runPipeline({
@@ -3079,7 +3179,14 @@ async function runSupervisorAction(options, deps = {}) {
3079
3179
  maxRestarts,
3080
3180
  pack,
3081
3181
  outputFormat
3082
- }, resolvedDeps, {
3182
+ }, {
3183
+ killPid: resolvedDeps.killPid,
3184
+ resumePipeline: resolvedDeps.resumePipeline,
3185
+ sleep: resolvedDeps.sleep,
3186
+ incrementRestarts: resolvedDeps.incrementRestarts,
3187
+ getAllDescendants: resolvedDeps.getAllDescendants,
3188
+ writeStallFindings: resolvedDeps.writeStallFindings
3189
+ }, {
3083
3190
  emitEvent,
3084
3191
  log
3085
3192
  });
@@ -3327,23 +3434,69 @@ async function runMetricsAction(options) {
3327
3434
  return 0;
3328
3435
  }
3329
3436
  const runs = listRunMetrics(db, limit);
3330
- if (outputFormat === "json") process.stdout.write(formatOutput({ runs }, "json", true) + "\n");
3437
+ const storyMetricDecisions = getDecisionsByCategory(db, STORY_METRICS);
3438
+ const storyMetrics = storyMetricDecisions.map((d) => {
3439
+ const colonIdx = d.key.indexOf(":");
3440
+ const storyKey = colonIdx !== -1 ? d.key.slice(0, colonIdx) : d.key;
3441
+ const runId = colonIdx !== -1 ? d.key.slice(colonIdx + 1) : d.pipeline_run_id ?? "";
3442
+ try {
3443
+ const v = JSON.parse(d.value);
3444
+ return {
3445
+ story_key: storyKey,
3446
+ run_id: runId,
3447
+ wall_clock_seconds: v.wall_clock_seconds ?? 0,
3448
+ input_tokens: v.input_tokens ?? 0,
3449
+ output_tokens: v.output_tokens ?? 0,
3450
+ review_cycles: v.review_cycles ?? 0,
3451
+ stalled: v.stalled ?? false,
3452
+ ...v.cost_usd !== void 0 && v.cost_usd > 0 ? { cost_usd: v.cost_usd } : {}
3453
+ };
3454
+ } catch {
3455
+ return {
3456
+ story_key: storyKey,
3457
+ run_id: runId,
3458
+ wall_clock_seconds: 0,
3459
+ input_tokens: 0,
3460
+ output_tokens: 0,
3461
+ review_cycles: 0,
3462
+ stalled: false
3463
+ };
3464
+ }
3465
+ });
3466
+ if (outputFormat === "json") process.stdout.write(formatOutput({
3467
+ runs,
3468
+ story_metrics: storyMetrics
3469
+ }, "json", true) + "\n");
3331
3470
  else {
3332
- if (runs.length === 0) {
3471
+ if (runs.length === 0 && storyMetrics.length === 0) {
3333
3472
  process.stdout.write("No run metrics recorded yet. Run `substrate run` to generate metrics.\n");
3334
3473
  return 0;
3335
3474
  }
3336
- process.stdout.write(`\nPipeline Run Metrics (last ${runs.length} runs)\n`);
3337
- process.stdout.write("─".repeat(80) + "\n");
3338
- for (const run of runs) {
3339
- const isBaseline = run.is_baseline ? " [BASELINE]" : "";
3340
- process.stdout.write(`\nRun: ${run.run_id}${isBaseline}\n`);
3341
- process.stdout.write(` Status: ${run.status} | Methodology: ${run.methodology}\n`);
3342
- process.stdout.write(` Started: ${run.started_at}\n`);
3343
- if (run.completed_at) process.stdout.write(` Completed: ${run.completed_at} (${run.wall_clock_seconds}s)\n`);
3344
- process.stdout.write(` Stories: attempted=${run.stories_attempted} succeeded=${run.stories_succeeded} failed=${run.stories_failed} escalated=${run.stories_escalated}\n`);
3345
- process.stdout.write(` Tokens: ${(run.total_input_tokens ?? 0).toLocaleString()} in / ${(run.total_output_tokens ?? 0).toLocaleString()} out $${(run.total_cost_usd ?? 0).toFixed(4)}\n`);
3346
- process.stdout.write(` Cycles: ${run.total_review_cycles} | Dispatches: ${run.total_dispatches} | Concurrency: ${run.concurrency_setting}\n`);
3475
+ if (runs.length > 0) {
3476
+ process.stdout.write(`\nPipeline Run Metrics (last ${runs.length} runs)\n`);
3477
+ process.stdout.write("─".repeat(80) + "\n");
3478
+ for (const run of runs) {
3479
+ const isBaseline = run.is_baseline ? " [BASELINE]" : "";
3480
+ process.stdout.write(`\nRun: ${run.run_id}${isBaseline}\n`);
3481
+ process.stdout.write(` Status: ${run.status} | Methodology: ${run.methodology}\n`);
3482
+ process.stdout.write(` Started: ${run.started_at}\n`);
3483
+ if (run.completed_at) process.stdout.write(` Completed: ${run.completed_at} (${run.wall_clock_seconds}s)\n`);
3484
+ process.stdout.write(` Stories: attempted=${run.stories_attempted} succeeded=${run.stories_succeeded} failed=${run.stories_failed} escalated=${run.stories_escalated}\n`);
3485
+ process.stdout.write(` Tokens: ${(run.total_input_tokens ?? 0).toLocaleString()} in / ${(run.total_output_tokens ?? 0).toLocaleString()} out $${(run.total_cost_usd ?? 0).toFixed(4)}\n`);
3486
+ process.stdout.write(` Cycles: ${run.total_review_cycles} | Dispatches: ${run.total_dispatches} | Concurrency: ${run.concurrency_setting}\n`);
3487
+ }
3488
+ }
3489
+ if (storyMetrics.length > 0) {
3490
+ process.stdout.write(`\nPer-Story Efficiency Metrics (${storyMetrics.length} stories)\n`);
3491
+ process.stdout.write("─".repeat(80) + "\n");
3492
+ process.stdout.write(` ${"Story".padEnd(16)} ${"Run".padEnd(12)} ${"Wall(s)".padStart(8)} ${"Tokens In".padStart(10)} ${"Tokens Out".padStart(11)} ${"Cycles".padStart(7)} ${"Stalled".padStart(8)}\n`);
3493
+ process.stdout.write(" " + "─".repeat(76) + "\n");
3494
+ for (const sm of storyMetrics) {
3495
+ const runShort = sm.run_id.slice(0, 8);
3496
+ const stalledStr = sm.stalled ? "yes" : "no";
3497
+ const costStr = sm.cost_usd !== void 0 && sm.cost_usd > 0 ? ` $${sm.cost_usd.toFixed(4)}` : "";
3498
+ process.stdout.write(` ${sm.story_key.padEnd(16)} ${runShort.padEnd(12)} ${String(sm.wall_clock_seconds).padStart(8)} ${sm.input_tokens.toLocaleString().padStart(10)} ${sm.output_tokens.toLocaleString().padStart(11)} ${String(sm.review_cycles).padStart(7)} ${stalledStr.padStart(8)}${costStr}\n`);
3499
+ }
3347
3500
  }
3348
3501
  }
3349
3502
  return 0;
@@ -6442,6 +6595,100 @@ function renderEpics(decisions) {
6442
6595
  return parts.join("\n");
6443
6596
  }
6444
6597
  /**
6598
+ * Render `operational-finding` category decisions as an "Operational Findings" section.
6599
+ *
6600
+ * Groups findings by run key (for run-summary decisions) and stall key (for stall decisions).
6601
+ * Returns '' if no matching decisions are found.
6602
+ *
6603
+ * @param decisions - Decisions of any category; filters for 'operational-finding'
6604
+ * @returns Formatted markdown content, or '' if empty
6605
+ */
6606
+ function renderOperationalFindings(decisions) {
6607
+ const findings = decisions.filter((d) => d.category === "operational-finding");
6608
+ if (findings.length === 0) return "";
6609
+ const parts = ["## Operational Findings", ""];
6610
+ const runSummaries = findings.filter((d) => d.key.startsWith("run-summary:"));
6611
+ const stallFindings = findings.filter((d) => d.key.startsWith("stall:"));
6612
+ const otherFindings = findings.filter((d) => !d.key.startsWith("run-summary:") && !d.key.startsWith("stall:"));
6613
+ if (runSummaries.length > 0) {
6614
+ parts.push("### Run Summaries");
6615
+ parts.push("");
6616
+ for (const d of runSummaries) {
6617
+ const runId = d.key.replace("run-summary:", "");
6618
+ const parsed = safeParseJson(d.value);
6619
+ if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
6620
+ const s = parsed;
6621
+ parts.push(`**Run: ${runId}**`);
6622
+ parts.push(`- Succeeded: ${(s.succeeded ?? []).join(", ") || "none"}`);
6623
+ parts.push(`- Failed: ${(s.failed ?? []).join(", ") || "none"}`);
6624
+ parts.push(`- Escalated: ${(s.escalated ?? []).join(", ") || "none"}`);
6625
+ parts.push(`- Total restarts: ${s.total_restarts ?? 0}`);
6626
+ parts.push(`- Elapsed: ${s.elapsed_seconds ?? 0}s`);
6627
+ parts.push(`- Tokens: ${s.total_input_tokens ?? 0} in / ${s.total_output_tokens ?? 0} out`);
6628
+ } else parts.push(`**Run: ${runId}**: ${String(parsed)}`);
6629
+ parts.push("");
6630
+ }
6631
+ }
6632
+ if (stallFindings.length > 0) {
6633
+ parts.push("### Stall Events");
6634
+ parts.push("");
6635
+ for (const d of stallFindings) {
6636
+ const parsed = safeParseJson(d.value);
6637
+ if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
6638
+ const s = parsed;
6639
+ const outcome = s.outcome ?? "unknown";
6640
+ parts.push(`- **${d.key}**: phase=${s.phase ?? "?"} staleness=${s.staleness_secs ?? 0}s attempt=${s.attempt ?? 0} outcome=${outcome}`);
6641
+ } else parts.push(`- **${d.key}**: ${String(parsed)}`);
6642
+ }
6643
+ parts.push("");
6644
+ }
6645
+ if (otherFindings.length > 0) {
6646
+ for (const d of otherFindings) parts.push(`- **${d.key}**: ${renderValue(d.value)}`);
6647
+ parts.push("");
6648
+ }
6649
+ return parts.join("\n");
6650
+ }
6651
+ /**
6652
+ * Render `experiment-result` category decisions as an "Experiments" section.
6653
+ *
6654
+ * Lists each experiment with its verdict, metric delta, and branch name.
6655
+ * Returns '' if no matching decisions are found.
6656
+ *
6657
+ * @param decisions - Decisions of any category; filters for 'experiment-result'
6658
+ * @returns Formatted markdown content, or '' if empty
6659
+ */
6660
+ function renderExperiments(decisions) {
6661
+ const experiments = decisions.filter((d) => d.category === "experiment-result");
6662
+ if (experiments.length === 0) return "";
6663
+ const parts = ["## Experiments", ""];
6664
+ const improved = experiments.filter((d) => {
6665
+ const p = safeParseJson(d.value);
6666
+ return typeof p === "object" && p !== null && p["verdict"] === "IMPROVED";
6667
+ });
6668
+ const mixed = experiments.filter((d) => {
6669
+ const p = safeParseJson(d.value);
6670
+ return typeof p === "object" && p !== null && p["verdict"] === "MIXED";
6671
+ });
6672
+ const regressed = experiments.filter((d) => {
6673
+ const p = safeParseJson(d.value);
6674
+ return typeof p === "object" && p !== null && p["verdict"] === "REGRESSED";
6675
+ });
6676
+ parts.push(`**Total**: ${experiments.length} | **Improved**: ${improved.length} | **Mixed**: ${mixed.length} | **Regressed**: ${regressed.length}`);
6677
+ parts.push("");
6678
+ for (const d of experiments) {
6679
+ const parsed = safeParseJson(d.value);
6680
+ if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
6681
+ const e = parsed;
6682
+ const verdict = e.verdict ?? "UNKNOWN";
6683
+ const metric = e.target_metric ?? "unknown";
6684
+ const branch = e.branch_name ? ` → \`${e.branch_name}\`` : "";
6685
+ parts.push(`- **[${verdict}]** ${metric}: before=${e.before ?? "?"} after=${e.after ?? "?"}${branch}`);
6686
+ } else parts.push(`- ${String(parsed)}`);
6687
+ }
6688
+ parts.push("");
6689
+ return parts.join("\n");
6690
+ }
6691
+ /**
6445
6692
  * Render solutioning-phase readiness-findings decisions as a `readiness-report.md`.
6446
6693
  *
6447
6694
  * Groups findings by category, shows severity per finding, and emits an
@@ -6600,6 +6847,28 @@ async function runExportAction(options) {
6600
6847
  if (outputFormat === "human") process.stdout.write(` Written: ${filePath}\n`);
6601
6848
  }
6602
6849
  }
6850
+ const operationalDecisions = getDecisionsByCategory(db, OPERATIONAL_FINDING);
6851
+ if (operationalDecisions.length > 0) {
6852
+ const operationalContent = renderOperationalFindings(operationalDecisions);
6853
+ if (operationalContent !== "") {
6854
+ const filePath = join$1(resolvedOutputDir, "operational-findings.md");
6855
+ writeFileSync$1(filePath, operationalContent, "utf-8");
6856
+ filesWritten.push(filePath);
6857
+ if (!phasesExported.includes("operational")) phasesExported.push("operational");
6858
+ if (outputFormat === "human") process.stdout.write(` Written: ${filePath}\n`);
6859
+ }
6860
+ }
6861
+ const experimentDecisions = getDecisionsByCategory(db, EXPERIMENT_RESULT);
6862
+ if (experimentDecisions.length > 0) {
6863
+ const experimentsContent = renderExperiments(experimentDecisions);
6864
+ if (experimentsContent !== "") {
6865
+ const filePath = join$1(resolvedOutputDir, "experiments.md");
6866
+ writeFileSync$1(filePath, experimentsContent, "utf-8");
6867
+ filesWritten.push(filePath);
6868
+ if (!phasesExported.includes("operational")) phasesExported.push("operational");
6869
+ if (outputFormat === "human") process.stdout.write(` Written: ${filePath}\n`);
6870
+ }
6871
+ }
6603
6872
  if (outputFormat === "json") {
6604
6873
  const result = {
6605
6874
  files_written: filesWritten,
@@ -0,0 +1,3 @@
1
+ import { addTokenUsage, createDecision, createPipelineRun, createRequirement, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByCategory, getDecisionsByPhase, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getTokenUsageSummary, listRequirements, registerArtifact, updateDecision, updatePipelineRun, updatePipelineRunConfig, upsertDecision } from "./decisions-SyswIRKz.js";
2
+
3
+ export { getLatestRun };
@@ -179,6 +179,13 @@ function getDecisionsByPhaseForRun(db, runId, phase) {
179
179
  return stmt.all(runId, phase);
180
180
  }
181
181
  /**
182
+ * Get all decisions for a given category, ordered by created_at ascending.
183
+ */
184
+ function getDecisionsByCategory(db, category) {
185
+ const stmt = db.prepare("SELECT * FROM decisions WHERE category = ? ORDER BY created_at ASC");
186
+ return stmt.all(category);
187
+ }
188
+ /**
182
189
  * Update a decision's value and/or rationale and set updated_at.
183
190
  */
184
191
  function updateDecision(db, id, updates) {
@@ -354,5 +361,5 @@ function getTokenUsageSummary(db, runId) {
354
361
  }
355
362
 
356
363
  //#endregion
357
- export { addTokenUsage, createDecision, createPipelineRun, createRequirement, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByPhase, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getTokenUsageSummary, listRequirements, registerArtifact, updateDecision, updatePipelineRun, updatePipelineRunConfig, upsertDecision };
358
- //# sourceMappingURL=decisions-DNYByk0U.js.map
364
+ export { addTokenUsage, createDecision, createPipelineRun, createRequirement, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByCategory, getDecisionsByPhase, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getTokenUsageSummary, listRequirements, registerArtifact, updateDecision, updatePipelineRun, updatePipelineRunConfig, upsertDecision };
365
+ //# sourceMappingURL=decisions-SyswIRKz.js.map
@@ -1,5 +1,6 @@
1
1
  import "./logger-C6n1g8uP.js";
2
- import { getRunMetrics, getStoryMetricsForRun } from "./metrics-BSg8VIHd.js";
2
+ import { createDecision } from "./decisions-SyswIRKz.js";
3
+ import { EXPERIMENT_RESULT, getRunMetrics, getStoryMetricsForRun } from "./operational-CobuCGbM.js";
3
4
  import { spawnGit } from "./git-utils-BtI5eNoN.js";
4
5
  import { spawn } from "node:child_process";
5
6
  import { join } from "node:path";
@@ -459,6 +460,24 @@ function createExperimenter(config, deps) {
459
460
  prLink
460
461
  };
461
462
  await appendExperimentLog(finalResult);
463
+ try {
464
+ const targetMetricValue = rec.type === "token_regression" ? rec.tokens_actual ?? 0 : rec.type === "review_cycles" ? rec.review_cycles ?? 0 : rec.timing_seconds ?? 0;
465
+ const afterValue = rec.type === "token_regression" ? deltas.tokens_pct !== null ? Math.round(targetMetricValue * (1 + deltas.tokens_pct / 100)) : targetMetricValue : rec.type === "review_cycles" ? deltas.review_cycles_pct !== null ? Math.round(targetMetricValue * (1 + deltas.review_cycles_pct / 100)) : targetMetricValue : deltas.wall_clock_pct !== null ? Math.round(targetMetricValue * (1 + deltas.wall_clock_pct / 100)) : targetMetricValue;
466
+ createDecision(db, {
467
+ pipeline_run_id: baselineRunId,
468
+ phase: "supervisor",
469
+ category: EXPERIMENT_RESULT,
470
+ key: `experiment:${baselineRunId}:${Date.now()}`,
471
+ value: JSON.stringify({
472
+ target_metric: rec.type,
473
+ before: targetMetricValue,
474
+ after: afterValue,
475
+ verdict,
476
+ branch_name: verdict === "IMPROVED" || verdict === "MIXED" ? branchName : null
477
+ }),
478
+ rationale: `Experiment for ${rec.story_key}/${rec.phase}: ${rec.description}. Verdict: ${verdict}.`
479
+ });
480
+ } catch {}
462
481
  return finalResult;
463
482
  }
464
483
  return { async runExperiments(db, recommendations, baselineRunId) {
@@ -481,4 +500,4 @@ function createExperimenter(config, deps) {
481
500
 
482
501
  //#endregion
483
502
  export { createExperimenter };
484
- //# sourceMappingURL=experimenter-BSu2ie3J.js.map
503
+ //# sourceMappingURL=experimenter-Cd04gAWQ.js.map
@@ -180,5 +180,82 @@ function aggregateTokenUsageForStory(db, runId, storyKey) {
180
180
  }
181
181
 
182
182
  //#endregion
183
- export { aggregateTokenUsageForRun, aggregateTokenUsageForStory, compareRunMetrics, getBaselineRunMetrics, getRunMetrics, getStoryMetricsForRun, incrementRunRestarts, listRunMetrics, tagRunAsBaseline, writeRunMetrics, writeStoryMetrics };
184
- //# sourceMappingURL=metrics-BSg8VIHd.js.map
183
+ //#region src/persistence/schemas/operational.ts
184
+ /**
185
+ * Category and key-schema constants for operational findings in the decision store.
186
+ *
187
+ * These constants avoid string literals scattered across supervisor, orchestrator,
188
+ * and experimenter code. Import them wherever you need to insert or query
189
+ * operational decisions.
190
+ */
191
+ /**
192
+ * Category for supervisor stall events and run-level summaries.
193
+ *
194
+ * Key schemas:
195
+ * - Stall finding: "stall:{storyKey}:{timestamp}"
196
+ * - Run summary: "run-summary:{runId}"
197
+ *
198
+ * Value shapes:
199
+ *
200
+ * Stall finding:
201
+ * ```json
202
+ * {
203
+ * "phase": "string", // story phase at stall time, e.g. "code-review"
204
+ * "staleness_secs": 700,
205
+ * "attempt": 1, // restart attempt number (1-based)
206
+ * "outcome": "recovered" // "recovered" | "failed" | "max-restarts-escalated"
207
+ * }
208
+ * ```
209
+ *
210
+ * Run summary:
211
+ * ```json
212
+ * {
213
+ * "succeeded": ["1-1", "1-2"],
214
+ * "failed": ["1-3"],
215
+ * "escalated": [],
216
+ * "total_restarts": 0,
217
+ * "elapsed_seconds": 1234,
218
+ * "total_input_tokens": 50000,
219
+ * "total_output_tokens": 10000
220
+ * }
221
+ * ```
222
+ */
223
+ const OPERATIONAL_FINDING = "operational-finding";
224
+ /**
225
+ * Category for supervisor experiment results.
226
+ *
227
+ * Key schema: "experiment:{runId}:{timestamp}"
228
+ *
229
+ * Value shape:
230
+ * ```json
231
+ * {
232
+ * "target_metric": "token_regression",
233
+ * "before": 12000,
234
+ * "after": 9500,
235
+ * "verdict": "IMPROVED", // "IMPROVED" | "MIXED" | "REGRESSED"
236
+ * "branch_name": "supervisor/experiment/abc12345-dev-story-token-regression"
237
+ * }
238
+ * ```
239
+ */
240
+ const EXPERIMENT_RESULT = "experiment-result";
241
+ /**
242
+ * Category for per-story wall-clock and efficiency metrics.
243
+ *
244
+ * Key schema: "{storyKey}:{runId}"
245
+ *
246
+ * Value shape:
247
+ * ```json
248
+ * {
249
+ * "wall_clock_seconds": 180,
250
+ * "input_tokens": 8000,
251
+ * "output_tokens": 2000,
252
+ * "review_cycles": 2,
253
+ * "stalled": false
254
+ * }
255
+ * ```
256
+ */
257
+ const STORY_METRICS = "story-metrics";
258
+
259
+ //#endregion
260
+ export { EXPERIMENT_RESULT, OPERATIONAL_FINDING, STORY_METRICS, aggregateTokenUsageForRun, aggregateTokenUsageForStory, compareRunMetrics, getBaselineRunMetrics, getRunMetrics, getStoryMetricsForRun, incrementRunRestarts, listRunMetrics, tagRunAsBaseline, writeRunMetrics, writeStoryMetrics };
261
+ //# sourceMappingURL=operational-CobuCGbM.js.map
@@ -1,7 +1,7 @@
1
1
  import { createLogger } from "./logger-C6n1g8uP.js";
2
2
  import { AdapterRegistry, createEventBus, createTuiApp, isTuiCapable, printNonTtyWarning } from "./event-bus-J-bw-pkp.js";
3
- import { addTokenUsage, createDecision, createPipelineRun, createRequirement, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByPhase, getDecisionsByPhaseForRun, getPipelineRunById, getTokenUsageSummary, registerArtifact, updatePipelineRun, updatePipelineRunConfig, upsertDecision } from "./decisions-DNYByk0U.js";
4
- import { aggregateTokenUsageForRun, aggregateTokenUsageForStory, getStoryMetricsForRun, writeRunMetrics, writeStoryMetrics } from "./metrics-BSg8VIHd.js";
3
+ import { addTokenUsage, createDecision, createPipelineRun, createRequirement, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByPhase, getDecisionsByPhaseForRun, getPipelineRunById, getTokenUsageSummary, registerArtifact, updatePipelineRun, updatePipelineRunConfig, upsertDecision } from "./decisions-SyswIRKz.js";
4
+ import { STORY_METRICS, aggregateTokenUsageForRun, aggregateTokenUsageForStory, getStoryMetricsForRun, writeRunMetrics, writeStoryMetrics } from "./operational-CobuCGbM.js";
5
5
  import { dirname, join } from "path";
6
6
  import { access, readFile, readdir, stat } from "fs/promises";
7
7
  import { existsSync, mkdirSync, readFileSync } from "fs";
@@ -5371,6 +5371,7 @@ function createImplementationOrchestrator(deps) {
5371
5371
  const HEARTBEAT_INTERVAL_MS = 3e4;
5372
5372
  const WATCHDOG_TIMEOUT_MS = 6e5;
5373
5373
  const _stalledStories = new Set();
5374
+ const _storiesWithStall = new Set();
5374
5375
  const _phaseStartMs = new Map();
5375
5376
  const _phaseEndMs = new Map();
5376
5377
  const _storyDispatches = new Map();
@@ -5424,6 +5425,28 @@ function createImplementationOrchestrator(deps) {
5424
5425
  review_cycles: reviewCycles,
5425
5426
  dispatches: _storyDispatches.get(storyKey) ?? 0
5426
5427
  });
5428
+ try {
5429
+ const runId = config.pipelineRunId ?? "unknown";
5430
+ createDecision(db, {
5431
+ pipeline_run_id: config.pipelineRunId,
5432
+ phase: "implementation",
5433
+ category: STORY_METRICS,
5434
+ key: `${storyKey}:${runId}`,
5435
+ value: JSON.stringify({
5436
+ wall_clock_seconds: wallClockSeconds,
5437
+ input_tokens: tokenAgg.input,
5438
+ output_tokens: tokenAgg.output,
5439
+ review_cycles: reviewCycles,
5440
+ stalled: _storiesWithStall.has(storyKey)
5441
+ }),
5442
+ rationale: `Story ${storyKey} completed with result=${result} in ${wallClockSeconds}s. Tokens: ${tokenAgg.input}+${tokenAgg.output}. Review cycles: ${reviewCycles}.`
5443
+ });
5444
+ } catch (decisionErr) {
5445
+ logger$16.warn({
5446
+ err: decisionErr,
5447
+ storyKey
5448
+ }, "Failed to write story-metrics decision (best-effort)");
5449
+ }
5427
5450
  } catch (err) {
5428
5451
  logger$16.warn({
5429
5452
  err,
@@ -5490,6 +5513,7 @@ function createImplementationOrchestrator(deps) {
5490
5513
  for (const [key, s] of _stories) if (s.phase !== "PENDING" && s.phase !== "COMPLETE" && s.phase !== "ESCALATED") {
5491
5514
  if (_stalledStories.has(key)) continue;
5492
5515
  _stalledStories.add(key);
5516
+ _storiesWithStall.add(key);
5493
5517
  logger$16.warn({
5494
5518
  storyKey: key,
5495
5519
  phase: s.phase,
@@ -11172,4 +11196,4 @@ function registerRunCommand(program, _version = "0.0.0", projectRoot = process.c
11172
11196
 
11173
11197
  //#endregion
11174
11198
  export { DatabaseWrapper, SUBSTRATE_OWNED_SETTINGS_KEYS, VALID_PHASES, buildPipelineStatusOutput, createContextCompiler, createDispatcher, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, findPackageRoot, formatOutput, formatPhaseCompletionSummary, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getSubstrateDefaultSettings, parseDbTimestampAsUtc, registerRunCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, runAnalysisPhase, runMigrations, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
11175
- //# sourceMappingURL=run-CoP8UQU3.js.map
11199
+ //# sourceMappingURL=run-BrI2xzk7.js.map
@@ -0,0 +1,7 @@
1
+ import "./logger-C6n1g8uP.js";
2
+ import "./event-bus-J-bw-pkp.js";
3
+ import { registerRunCommand, runRunAction } from "./run-BrI2xzk7.js";
4
+ import "./decisions-SyswIRKz.js";
5
+ import "./operational-CobuCGbM.js";
6
+
7
+ export { runRunAction };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "substrate-ai",
3
- "version": "0.2.15",
3
+ "version": "0.2.16",
4
4
  "description": "Substrate — multi-agent orchestration daemon for AI coding agents",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -1,3 +0,0 @@
1
- import { addTokenUsage, createDecision, createPipelineRun, createRequirement, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByPhase, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getTokenUsageSummary, listRequirements, registerArtifact, updateDecision, updatePipelineRun, updatePipelineRunConfig, upsertDecision } from "./decisions-DNYByk0U.js";
2
-
3
- export { getLatestRun };
@@ -1,7 +0,0 @@
1
- import "./logger-C6n1g8uP.js";
2
- import "./event-bus-J-bw-pkp.js";
3
- import { registerRunCommand, runRunAction } from "./run-CoP8UQU3.js";
4
- import "./decisions-DNYByk0U.js";
5
- import "./metrics-BSg8VIHd.js";
6
-
7
- export { runRunAction };