substrate-ai 0.14.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -4,7 +4,7 @@ import { createLogger } from "../logger-KeHncl-f.js";
4
4
  import { createEventBus } from "../helpers-CElYrONe.js";
5
5
  import { AdapterRegistry, BudgetConfigSchema, CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, ConfigError, CostTrackerConfigSchema, DEFAULT_CONFIG, DoltClient, DoltNotInstalled, EXPERIMENT_RESULT, GlobalSettingsSchema, IngestionServer, MonitorDatabaseImpl, OPERATIONAL_FINDING, PartialGlobalSettingsSchema, PartialProviderConfigSchema, ProvidersSchema, RoutingRecommender, STORY_METRICS, TelemetryConfigSchema, addTokenUsage, aggregateTokenUsageForRun, checkDoltInstalled, compareRunMetrics, createAmendmentRun, createConfigSystem, createDecision, createDoltClient, createPipelineRun, getActiveDecisions, getAllCostEntriesFiltered, getBaselineRunMetrics, getDecisionsByCategory, getDecisionsByPhaseForRun, getLatestCompletedRun, getLatestRun, getPipelineRunById, getPlanningCostTotal, getRetryableEscalations, getRunMetrics, getSessionCostSummary, getSessionCostSummaryFiltered, getStoryMetricsForRun, getTokenUsageSummary, incrementRunRestarts, initSchema, initializeDolt, listRequirements, listRunMetrics, loadParentRunDecisions, supersedeDecision, tagRunAsBaseline, updatePipelineRun } from "../dist-CLvAwmT7.js";
6
6
  import "../adapter-registry-DXLMTmfD.js";
7
- import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-CUMPhuVq.js";
7
+ import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, listGraphRuns, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-CRP9jiCz.js";
8
8
  import "../errors-D1LU8CZ9.js";
9
9
  import "../routing-CcBOCuC9.js";
10
10
  import "../decisions-C0pz9Clx.js";
@@ -4359,7 +4359,7 @@ async function runSupervisorAction(options, deps = {}) {
4359
4359
  await initSchema(expAdapter);
4360
4360
  const { runRunAction: runPipeline } = await import(
4361
4361
  /* @vite-ignore */
4362
- "../run-CZo7hpsh.js"
4362
+ "../run-BPSzSeU8.js"
4363
4363
  );
4364
4364
  const runStoryFn = async (opts) => {
4365
4365
  const exitCode = await runPipeline({
@@ -4696,8 +4696,22 @@ function printCategoryTable(stats, label) {
4696
4696
  process.stdout.write(` ${c.category.padEnd(22)} ${c.totalTokens.toLocaleString().padStart(12)} ${pct.padStart(8)} ${String(c.eventCount).padStart(8)} ${avg.padStart(10)} ${c.trend}\n`);
4697
4697
  }
4698
4698
  }
4699
+ function printFactoryRunTable(runs) {
4700
+ process.stdout.write(`\nFactory Runs (${runs.length} records)\n`);
4701
+ process.stdout.write("─".repeat(80) + "\n");
4702
+ process.stdout.write(` ${"run_id".padEnd(10)} ${"score".padStart(7)} ${"passes".padStart(7)} ${"started_at".padEnd(20)} ${"cost_usd".padStart(10)} ${"status".padEnd(16)}\n`);
4703
+ process.stdout.write(" " + "─".repeat(74) + "\n");
4704
+ for (const run of runs) {
4705
+ const scoreStr = run.satisfaction_score !== null ? `${(run.satisfaction_score * 100).toFixed(1)}%` : "—";
4706
+ const passesStr = run.passes !== null ? run.passes ? "✓" : "✗" : "—";
4707
+ const startedAt = run.started_at.slice(0, 19);
4708
+ const costStr = `$${run.total_cost_usd.toFixed(4)}`;
4709
+ const statusStr = run.convergence_status ?? "—";
4710
+ process.stdout.write(` ${run.run_id.slice(0, 8).padEnd(10)} ${scoreStr.padStart(7)} ${passesStr.padStart(7)} ${startedAt.padEnd(20)} ${costStr.padStart(10)} ${statusStr.padEnd(16)}\n`);
4711
+ }
4712
+ }
4699
4713
  async function runMetricsAction(options) {
4700
- const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations } = options;
4714
+ const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations, run, factory } = options;
4701
4715
  const telemetryModes = [
4702
4716
  efficiency,
4703
4717
  recommendations,
@@ -4931,7 +4945,7 @@ async function runMetricsAction(options) {
4931
4945
  }
4932
4946
  const dbRoot = await resolveMainRepoRoot(projectRoot);
4933
4947
  const doltStateDir = join(dbRoot, ".substrate", "state", ".dolt");
4934
- if (!existsSync$1(doltStateDir)) {
4948
+ if (!existsSync$1(doltStateDir) && factory !== true && run === void 0) {
4935
4949
  if (outputFormat === "json") process.stdout.write(formatOutput({
4936
4950
  runs: [],
4937
4951
  message: "No metrics yet — no pipeline database found. Initialize Dolt with `substrate init`."
@@ -4945,6 +4959,60 @@ async function runMetricsAction(options) {
4945
4959
  });
4946
4960
  try {
4947
4961
  await initSchema(adapter);
4962
+ if (run !== void 0) {
4963
+ let rows = [];
4964
+ let resolvedRunId = run;
4965
+ try {
4966
+ rows = await getScenarioResultsForRun(adapter, run);
4967
+ if (rows.length === 0 && run.length < 36) {
4968
+ const allRuns = await listGraphRuns(adapter, 100);
4969
+ const matching = allRuns.filter((r) => r.id.startsWith(run));
4970
+ if (matching.length === 1 && matching[0] !== void 0) {
4971
+ resolvedRunId = matching[0].id;
4972
+ rows = await getScenarioResultsForRun(adapter, resolvedRunId);
4973
+ }
4974
+ }
4975
+ } catch (err) {
4976
+ logger$10.debug({ err }, "getScenarioResultsForRun failed");
4977
+ }
4978
+ if (rows.length === 0) {
4979
+ const msg = `No factory run found with id: ${run}`;
4980
+ if (outputFormat === "json") process.stdout.write(formatOutput({ message: msg }, "json", true) + "\n");
4981
+ else process.stderr.write(`Error: ${msg}\n`);
4982
+ return 1;
4983
+ }
4984
+ if (outputFormat === "json") process.stdout.write(formatOutput({
4985
+ run_id: resolvedRunId,
4986
+ type: "factory",
4987
+ iterations: rows
4988
+ }, "json", true) + "\n");
4989
+ else {
4990
+ process.stdout.write(`\nFactory Run: ${resolvedRunId}\n`);
4991
+ process.stdout.write("─".repeat(80) + "\n");
4992
+ process.stdout.write(` ${"#".padStart(3)} ${"score".padStart(7)} ${"passes".padStart(7)} ${"passed/total".padStart(13)} ${"executed_at".padEnd(20)}\n`);
4993
+ process.stdout.write(" " + "─".repeat(54) + "\n");
4994
+ for (const r of rows) {
4995
+ const scoreStr = `${(r.satisfaction_score * 100).toFixed(1)}%`;
4996
+ const passesStr = r.passes ? "✓" : "✗";
4997
+ const passedTotal = `${r.passed}/${r.total_scenarios}`;
4998
+ const execAt = String(r.executed_at).slice(0, 19);
4999
+ process.stdout.write(` ${String(r.iteration).padStart(3)} ${scoreStr.padStart(7)} ${passesStr.padStart(7)} ${passedTotal.padStart(13)} ${execAt.padEnd(20)}\n`);
5000
+ }
5001
+ }
5002
+ return 0;
5003
+ }
5004
+ if (factory === true) {
5005
+ let factoryRuns$1 = [];
5006
+ try {
5007
+ factoryRuns$1 = await getFactoryRunSummaries(adapter, limit);
5008
+ } catch (err) {
5009
+ logger$10.debug({ err }, "getFactoryRunSummaries failed in factory-only mode");
5010
+ }
5011
+ if (outputFormat === "json") process.stdout.write(formatOutput({ graph_runs: factoryRuns$1 }, "json", true) + "\n");
5012
+ else if (factoryRuns$1.length === 0) process.stdout.write("No factory runs recorded yet.\n");
5013
+ else printFactoryRunTable(factoryRuns$1);
5014
+ return 0;
5015
+ }
4948
5016
  if (tagBaseline !== void 0) {
4949
5017
  const row = await getRunMetrics(adapter, tagBaseline);
4950
5018
  if (!row) {
@@ -5033,18 +5101,26 @@ async function runMetricsAction(options) {
5033
5101
  const phaseBreakdownMap = {};
5034
5102
  try {
5035
5103
  const kvStore = new FileStateStore({ basePath: join(dbRoot, ".substrate") });
5036
- for (const run of runs) {
5037
- const raw = await kvStore.getMetric(run.run_id, "phase_token_breakdown");
5038
- phaseBreakdownMap[run.run_id] = raw !== void 0 ? raw : null;
5104
+ for (const run$1 of runs) {
5105
+ const raw = await kvStore.getMetric(run$1.run_id, "phase_token_breakdown");
5106
+ phaseBreakdownMap[run$1.run_id] = raw !== void 0 ? raw : null;
5039
5107
  }
5040
5108
  } catch {}
5109
+ let factoryRuns = [];
5110
+ try {
5111
+ factoryRuns = await getFactoryRunSummaries(adapter, limit);
5112
+ } catch (err) {
5113
+ logger$10.debug({ err }, "getFactoryRunSummaries failed — table may not exist in older databases");
5114
+ }
5041
5115
  if (outputFormat === "json") {
5042
- const runsWithBreakdown = runs.map((run) => ({
5043
- ...run,
5044
- phase_token_breakdown: phaseBreakdownMap[run.run_id] ?? null
5116
+ const runsWithBreakdown = runs.map((run$1) => ({
5117
+ ...run$1,
5118
+ type: "sdlc",
5119
+ phase_token_breakdown: phaseBreakdownMap[run$1.run_id] ?? null
5045
5120
  }));
5046
5121
  const jsonPayload = {
5047
5122
  runs: runsWithBreakdown,
5123
+ graph_runs: factoryRuns,
5048
5124
  story_metrics: storyMetrics
5049
5125
  };
5050
5126
  if (doltMetrics !== void 0) if (aggregate) {
@@ -5066,23 +5142,23 @@ async function runMetricsAction(options) {
5066
5142
  } else jsonPayload.dolt_metrics = doltMetrics;
5067
5143
  process.stdout.write(formatOutput(jsonPayload, "json", true) + "\n");
5068
5144
  } else {
5069
- if (runs.length === 0 && storyMetrics.length === 0 && (doltMetrics === void 0 || doltMetrics.length === 0)) {
5145
+ if (runs.length === 0 && storyMetrics.length === 0 && (doltMetrics === void 0 || doltMetrics.length === 0) && factoryRuns.length === 0) {
5070
5146
  process.stdout.write("No run metrics recorded yet. Run `substrate run` to generate metrics.\n");
5071
5147
  return 0;
5072
5148
  }
5073
5149
  if (runs.length > 0) {
5074
5150
  process.stdout.write(`\nPipeline Run Metrics (last ${runs.length} runs)\n`);
5075
5151
  process.stdout.write("─".repeat(80) + "\n");
5076
- for (const run of runs) {
5077
- const isBaseline = run.is_baseline ? " [BASELINE]" : "";
5078
- process.stdout.write(`\nRun: ${run.run_id}${isBaseline}\n`);
5079
- process.stdout.write(` Status: ${run.status} | Methodology: ${run.methodology}\n`);
5080
- process.stdout.write(` Started: ${run.started_at}\n`);
5081
- if (run.completed_at) process.stdout.write(` Completed: ${run.completed_at} (${run.wall_clock_seconds}s)\n`);
5082
- process.stdout.write(` Stories: attempted=${run.stories_attempted} succeeded=${run.stories_succeeded} failed=${run.stories_failed} escalated=${run.stories_escalated}\n`);
5083
- process.stdout.write(` Tokens: ${(run.total_input_tokens ?? 0).toLocaleString()} in / ${(run.total_output_tokens ?? 0).toLocaleString()} out $${(run.total_cost_usd ?? 0).toFixed(4)}\n`);
5084
- process.stdout.write(` Cycles: ${run.total_review_cycles} | Dispatches: ${run.total_dispatches} | Concurrency: ${run.concurrency_setting}\n`);
5085
- const breakdown = phaseBreakdownMap[run.run_id];
5152
+ for (const run$1 of runs) {
5153
+ const isBaseline = run$1.is_baseline ? " [BASELINE]" : "";
5154
+ process.stdout.write(`\nRun: ${run$1.run_id}${isBaseline}\n`);
5155
+ process.stdout.write(` Status: ${run$1.status} | Methodology: ${run$1.methodology}\n`);
5156
+ process.stdout.write(` Started: ${run$1.started_at}\n`);
5157
+ if (run$1.completed_at) process.stdout.write(` Completed: ${run$1.completed_at} (${run$1.wall_clock_seconds}s)\n`);
5158
+ process.stdout.write(` Stories: attempted=${run$1.stories_attempted} succeeded=${run$1.stories_succeeded} failed=${run$1.stories_failed} escalated=${run$1.stories_escalated}\n`);
5159
+ process.stdout.write(` Tokens: ${(run$1.total_input_tokens ?? 0).toLocaleString()} in / ${(run$1.total_output_tokens ?? 0).toLocaleString()} out $${(run$1.total_cost_usd ?? 0).toFixed(4)}\n`);
5160
+ process.stdout.write(` Cycles: ${run$1.total_review_cycles} | Dispatches: ${run$1.total_dispatches} | Concurrency: ${run$1.concurrency_setting}\n`);
5161
+ const breakdown = phaseBreakdownMap[run$1.run_id];
5086
5162
  if (breakdown !== null && breakdown !== void 0 && breakdown.entries.length > 0) {
5087
5163
  process.stdout.write(" Phase Token Breakdown:\n");
5088
5164
  for (const entry of breakdown.entries) process.stdout.write(` ${entry.phase.padEnd(10)} | ${entry.model.padEnd(30)} | in: ${entry.inputTokens} | out: ${entry.outputTokens} | dispatches: ${entry.dispatchCount}\n`);
@@ -5136,6 +5212,7 @@ async function runMetricsAction(options) {
5136
5212
  process.stdout.write(` ${m.storyKey.padEnd(16)} ${m.taskType.padEnd(16)} ${tokIn.padStart(10)} ${tokOut.padStart(11)} ${wall.padStart(10)} ${res.padEnd(12)}\n`);
5137
5213
  }
5138
5214
  }
5215
+ if (factoryRuns.length > 0) printFactoryRunTable(factoryRuns);
5139
5216
  }
5140
5217
  return 0;
5141
5218
  } catch (err) {
@@ -5151,7 +5228,7 @@ async function runMetricsAction(options) {
5151
5228
  }
5152
5229
  }
5153
5230
  function registerMetricsCommand(program, _version = "0.0.0", projectRoot = process.cwd()) {
5154
- program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").action(async (opts) => {
5231
+ program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").option("--run <run-id>", "Show per-iteration score history for a specific factory run").option("--factory", "Show only factory graph run metrics (excludes SDLC runs)").action(async (opts) => {
5155
5232
  const outputFormat = opts.outputFormat === "json" ? "json" : "human";
5156
5233
  let compareIds;
5157
5234
  if (opts.compare !== void 0) {
@@ -5186,7 +5263,9 @@ function registerMetricsCommand(program, _version = "0.0.0", projectRoot = proce
5186
5263
  ...opts.consumers !== void 0 && { consumers: opts.consumers },
5187
5264
  ...opts.categories !== void 0 && { categories: opts.categories },
5188
5265
  ...compareStoriesIds !== void 0 && { compareStories: compareStoriesIds },
5189
- ...opts.routingRecommendations !== void 0 && { routingRecommendations: opts.routingRecommendations }
5266
+ ...opts.routingRecommendations !== void 0 && { routingRecommendations: opts.routingRecommendations },
5267
+ ...opts.run !== void 0 && { run: opts.run },
5268
+ ...opts.factory !== void 0 && { factory: opts.factory }
5190
5269
  };
5191
5270
  const exitCode = await runMetricsAction(metricsOpts);
5192
5271
  process.exitCode = exitCode;
@@ -2,7 +2,7 @@ import "./health-DswaC1q5.js";
2
2
  import "./logger-KeHncl-f.js";
3
3
  import "./helpers-CElYrONe.js";
4
4
  import "./dist-CLvAwmT7.js";
5
- import { normalizeGraphSummaryToStatus, registerRunCommand, runRunAction } from "./run-CUMPhuVq.js";
5
+ import { normalizeGraphSummaryToStatus, registerRunCommand, runRunAction } from "./run-CRP9jiCz.js";
6
6
  import "./routing-CcBOCuC9.js";
7
7
  import "./decisions-C0pz9Clx.js";
8
8
 
@@ -1,7 +1,7 @@
1
1
  import { BMAD_BASELINE_TOKENS_FULL, DoltMergeConflict, FileStateStore, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN, VALID_PHASES, WorkGraphRepository, __commonJS, __require, __toESM, buildPipelineStatusOutput, createDatabaseAdapter, formatOutput, formatPipelineSummary, formatTokenTelemetry, inspectProcessTree, parseDbTimestampAsUtc, resolveMainRepoRoot, validateStoryKey } from "./health-DswaC1q5.js";
2
2
  import { createLogger } from "./logger-KeHncl-f.js";
3
3
  import { TypedEventBusImpl, createEventBus, createTuiApp, isTuiCapable, printNonTtyWarning, sleep } from "./helpers-CElYrONe.js";
4
- import { ADVISORY_NOTES, Categorizer, ConsumerAnalyzer, DEFAULT_GLOBAL_SETTINGS, DispatcherImpl, DoltClient, ESCALATION_DIAGNOSIS, EfficiencyScorer, IngestionServer, LogTurnAnalyzer, OPERATIONAL_FINDING, Recommender, RoutingRecommender, RoutingResolver, RoutingTelemetry, RoutingTokenAccumulator, RoutingTuner, STORY_METRICS, STORY_OUTCOME, SubstrateConfigSchema, TEST_EXPANSION_FINDING, TEST_PLAN, TelemetryNormalizer, TelemetryPipeline, TurnAnalyzer, addTokenUsage, aggregateTokenUsageForRun, aggregateTokenUsageForStory, callLLM, createConfigSystem, createDecision, createPipelineRun, createRequirement, detectInterfaceChanges, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByCategory, getDecisionsByPhase, getDecisionsByPhaseForRun, getPipelineRunById, getRunningPipelineRuns, getStoryMetricsForRun, getTokenUsageSummary, initSchema, loadModelRoutingConfig, registerArtifact, updatePipelineRun, updatePipelineRunConfig, upsertDecision, writeRunMetrics, writeStoryMetrics } from "./dist-CLvAwmT7.js";
4
+ import { ADVISORY_NOTES, Categorizer, ConsumerAnalyzer, DEFAULT_GLOBAL_SETTINGS, DispatcherImpl, DoltClient, ESCALATION_DIAGNOSIS, EfficiencyScorer, IngestionServer, LogTurnAnalyzer, OPERATIONAL_FINDING, Recommender, RoutingRecommender, RoutingResolver, RoutingTelemetry, RoutingTokenAccumulator, RoutingTuner, STORY_METRICS, STORY_OUTCOME, SubstrateConfigSchema, TEST_EXPANSION_FINDING, TEST_PLAN, TelemetryNormalizer, TelemetryPipeline, TurnAnalyzer, addTokenUsage, aggregateTokenUsageForRun, aggregateTokenUsageForStory, callLLM, createConfigSystem, createDatabaseAdapter$1, createDecision, createPipelineRun, createRequirement, detectInterfaceChanges, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByCategory, getDecisionsByPhase, getDecisionsByPhaseForRun, getPipelineRunById, getRunningPipelineRuns, getStoryMetricsForRun, getTokenUsageSummary, initSchema, loadModelRoutingConfig, registerArtifact, updatePipelineRun, updatePipelineRunConfig, upsertDecision, writeRunMetrics, writeStoryMetrics } from "./dist-CLvAwmT7.js";
5
5
  import { basename, dirname, extname, join } from "path";
6
6
  import { access, readFile, readdir, stat } from "fs/promises";
7
7
  import { EventEmitter } from "node:events";
@@ -22182,18 +22182,30 @@ function createConvergenceController() {
22182
22182
  failingNodes
22183
22183
  };
22184
22184
  },
22185
- checkGoalGates(graph, runId, eventBus) {
22185
+ checkGoalGates(graph, runId, eventBus, options) {
22186
22186
  const failedGates = [];
22187
22187
  for (const [id, node] of graph.nodes) {
22188
22188
  if (!node.goalGate) continue;
22189
- const status = outcomes.get(id);
22190
- const satisfied = status === "SUCCESS" || status === "PARTIAL_SUCCESS";
22191
- eventBus?.emit("graph:goal-gate-checked", {
22192
- runId,
22193
- nodeId: id,
22194
- satisfied
22195
- });
22196
- if (!satisfied) failedGates.push(id);
22189
+ if (options?.satisfactionThreshold !== void 0 && options?.context !== void 0) {
22190
+ const score = options.context.getNumber("satisfaction_score", 0);
22191
+ const satisfied = score >= options.satisfactionThreshold;
22192
+ eventBus?.emit("graph:goal-gate-checked", {
22193
+ runId,
22194
+ nodeId: id,
22195
+ satisfied,
22196
+ score
22197
+ });
22198
+ if (!satisfied) failedGates.push(id);
22199
+ } else {
22200
+ const status = outcomes.get(id);
22201
+ const satisfied = status === "SUCCESS" || status === "PARTIAL_SUCCESS";
22202
+ eventBus?.emit("graph:goal-gate-checked", {
22203
+ runId,
22204
+ nodeId: id,
22205
+ satisfied
22206
+ });
22207
+ if (!satisfied) failedGates.push(id);
22208
+ }
22197
22209
  }
22198
22210
  return {
22199
22211
  satisfied: failedGates.length === 0,
@@ -22562,6 +22574,263 @@ function injectRemediationContext(context, remediation) {
22562
22574
  context.set(REMEDIATION_CONTEXT_KEY, remediation);
22563
22575
  }
22564
22576
 
22577
+ //#endregion
22578
+ //#region packages/factory/dist/convergence/dual-signal.js
22579
+ /**
22580
+ * Dual-signal coordinator — evaluates code review and scenario signals together.
22581
+ *
22582
+ * Code review is the authoritative Phase 2 decision-maker.
22583
+ * Scenario score is a parallel signal used for monitoring and agreement tracking.
22584
+ *
22585
+ * Story 46-5.
22586
+ */
22587
+ /**
22588
+ * Context key under which code review handlers store their verdict.
22589
+ * Used by the executor to read the verdict when emitting advisory events.
22590
+ * Story 46-6.
22591
+ */
22592
+ const CONTEXT_KEY_CODE_REVIEW_VERDICT = "factory.codeReviewVerdict";
22593
+ /**
22594
+ * Evaluate dual signals without side effects.
22595
+ *
22596
+ * `SHIP_IT` and `LGTM_WITH_NOTES` are treated as code review passes.
22597
+ * Code review verdict is always the authoritative decision.
22598
+ */
22599
+ function evaluateDualSignal(verdict, score, threshold) {
22600
+ const codeReviewPassed = verdict === "SHIP_IT" || verdict === "LGTM_WITH_NOTES";
22601
+ const scenarioPassed = score >= threshold;
22602
+ const agreement = codeReviewPassed === scenarioPassed ? "AGREE" : "DISAGREE";
22603
+ return {
22604
+ codeReviewPassed,
22605
+ scenarioPassed,
22606
+ agreement,
22607
+ authoritativeDecision: verdict,
22608
+ score,
22609
+ threshold
22610
+ };
22611
+ }
22612
+ /**
22613
+ * Create a DualSignalCoordinator that evaluates signals and emits
22614
+ * `scenario:score-computed` events on each call.
22615
+ *
22616
+ * When `options.qualityMode === 'scenario-primary'`, also emits
22617
+ * `scenario:advisory-computed` with the code review verdict as advisory info.
22618
+ * Story 46-6.
22619
+ */
22620
+ function createDualSignalCoordinator(options) {
22621
+ return { evaluate(verdict, score, runId) {
22622
+ const result = evaluateDualSignal(verdict, score, options.threshold);
22623
+ options.eventBus.emit("scenario:score-computed", {
22624
+ runId,
22625
+ score: result.score,
22626
+ threshold: result.threshold,
22627
+ passes: result.scenarioPassed,
22628
+ agreement: result.agreement,
22629
+ codeReviewPassed: result.codeReviewPassed,
22630
+ scenarioPassed: result.scenarioPassed,
22631
+ authoritativeDecision: result.authoritativeDecision
22632
+ });
22633
+ if (options.qualityMode === "scenario-primary") options.eventBus.emit("scenario:advisory-computed", {
22634
+ runId,
22635
+ verdict: result.authoritativeDecision,
22636
+ codeReviewPassed: result.codeReviewPassed,
22637
+ score: result.score,
22638
+ threshold: result.threshold,
22639
+ agreement: result.agreement
22640
+ });
22641
+ return result;
22642
+ } };
22643
+ }
22644
+
22645
+ //#endregion
22646
+ //#region packages/factory/dist/scenarios/scorer.js
22647
+ /**
22648
+ * Compute a satisfaction score from a ScenarioRunResult.
22649
+ *
22650
+ * All scenarios are weighted equally (weight 1.0). The result includes a
22651
+ * `breakdown` array with one entry per scenario (added in Epic 46, story 46-1).
22652
+ *
22653
+ * @param result - The aggregated scenario run result.
22654
+ * @param threshold - Minimum score to consider passing (default 0.8).
22655
+ * @returns A SatisfactionScore with score, passes, threshold, and breakdown.
22656
+ */
22657
+ function computeSatisfactionScore(result, threshold = .8) {
22658
+ const scenarios = result.scenarios;
22659
+ const totalWeight = scenarios.length;
22660
+ const breakdown = scenarios.map((s$1) => {
22661
+ const passed$1 = s$1.status === "pass";
22662
+ const contribution = totalWeight > 0 ? passed$1 ? 1 / totalWeight : 0 : 0;
22663
+ return {
22664
+ name: s$1.name,
22665
+ passed: passed$1,
22666
+ weight: 1,
22667
+ contribution
22668
+ };
22669
+ });
22670
+ const { total, passed } = result.summary;
22671
+ const score = total === 0 ? 0 : passed / total;
22672
+ return {
22673
+ score,
22674
+ passes: score >= threshold,
22675
+ threshold,
22676
+ breakdown
22677
+ };
22678
+ }
22679
+
22680
+ //#endregion
22681
+ //#region packages/factory/dist/persistence/factory-queries.js
22682
+ /**
22683
+ * Factory persistence query functions for graph runs, node results, and scenario results.
22684
+ *
22685
+ * All functions accept a DatabaseAdapter as first argument, follow the established
22686
+ * pattern from @substrate-ai/core's metrics queries, and use portable SQL patterns
22687
+ * (select-then-delete-then-insert in transactions) compatible with both
22688
+ * InMemoryDatabaseAdapter and DoltDatabaseAdapter.
22689
+ *
22690
+ * Story 46-3: Score Persistence to Database.
22691
+ */
22692
+ /**
22693
+ * Insert or replace a graph_runs row.
22694
+ *
22695
+ * Uses portable select-then-delete-then-insert pattern inside a transaction
22696
+ * (not INSERT OR REPLACE which is SQLite-specific).
22697
+ *
22698
+ * - First call (status: 'running'): inserts the row.
22699
+ * - Second call (status: 'completed'/'failed'): deletes the old row and inserts
22700
+ * the updated row with completion details.
22701
+ */
22702
+ async function upsertGraphRun(adapter, input) {
22703
+ await adapter.transaction(async (tx) => {
22704
+ const existing = await tx.query("SELECT id FROM graph_runs WHERE id = ?", [input.id]);
22705
+ if (existing.length > 0) await tx.query("DELETE FROM graph_runs WHERE id = ?", [input.id]);
22706
+ await tx.query(`INSERT INTO graph_runs (
22707
+ id, graph_file, graph_goal, status, started_at, completed_at,
22708
+ total_cost_usd, node_count, final_outcome, checkpoint_path
22709
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
22710
+ input.id,
22711
+ input.graph_file,
22712
+ input.graph_goal ?? null,
22713
+ input.status,
22714
+ input.started_at,
22715
+ input.completed_at ?? null,
22716
+ input.total_cost_usd ?? 0,
22717
+ input.node_count ?? 0,
22718
+ input.final_outcome ?? null,
22719
+ input.checkpoint_path ?? null
22720
+ ]);
22721
+ });
22722
+ }
22723
+ /**
22724
+ * Append a graph_node_results row for a single node execution attempt.
22725
+ *
22726
+ * Each attempt is a distinct row — no upsert needed since run_id + node_id + attempt
22727
+ * together uniquely identify each record.
22728
+ */
22729
+ async function insertGraphNodeResult(adapter, input) {
22730
+ await adapter.transaction(async (tx) => {
22731
+ await tx.query(`INSERT INTO graph_node_results (
22732
+ run_id, node_id, attempt, status, started_at, completed_at,
22733
+ duration_ms, cost_usd, failure_reason, context_snapshot
22734
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
22735
+ input.run_id,
22736
+ input.node_id,
22737
+ input.attempt,
22738
+ input.status,
22739
+ input.started_at,
22740
+ input.completed_at ?? null,
22741
+ input.duration_ms ?? null,
22742
+ input.cost_usd ?? 0,
22743
+ input.failure_reason ?? null,
22744
+ input.context_snapshot ?? null
22745
+ ]);
22746
+ });
22747
+ }
22748
+ /**
22749
+ * Append a scenario_results row for a single scenario run iteration.
22750
+ *
22751
+ * The `details` field (score breakdown) should be serialized as JSON string
22752
+ * before calling this function, or passed raw if already serialized.
22753
+ */
22754
+ async function insertScenarioResult(adapter, input) {
22755
+ await adapter.transaction(async (tx) => {
22756
+ await tx.query(`INSERT INTO scenario_results (
22757
+ run_id, node_id, iteration, total_scenarios, passed, failed,
22758
+ satisfaction_score, threshold, passes, details, executed_at
22759
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [
22760
+ input.run_id,
22761
+ input.node_id,
22762
+ input.iteration,
22763
+ input.total_scenarios,
22764
+ input.passed,
22765
+ input.failed,
22766
+ input.satisfaction_score,
22767
+ input.threshold,
22768
+ input.passes ? 1 : 0,
22769
+ input.details ?? null,
22770
+ input.executed_at ?? new Date().toISOString()
22771
+ ]);
22772
+ });
22773
+ }
22774
+ /**
22775
+ * Retrieve all scenario results for a given run, ordered by iteration ascending.
22776
+ *
22777
+ * @returns Array of ScenarioResultRow ordered by iteration. Empty array if none exist.
22778
+ */
22779
+ async function getScenarioResultsForRun(adapter, runId) {
22780
+ const rows = await adapter.query("SELECT * FROM scenario_results WHERE run_id = ? ORDER BY iteration ASC", [runId]);
22781
+ return rows.map((row) => ({
22782
+ ...row,
22783
+ passes: Boolean(row.passes)
22784
+ }));
22785
+ }
22786
+ /**
22787
+ * List graph run records in descending started_at order.
22788
+ *
22789
+ * @param limit - Maximum number of rows to return (default 20).
22790
+ */
22791
+ async function listGraphRuns(adapter, limit = 20) {
22792
+ return adapter.query("SELECT * FROM graph_runs ORDER BY started_at DESC LIMIT ?", [limit]);
22793
+ }
22794
+ /**
22795
+ * Retrieve a summarized list of factory graph runs, enriched with per-run
22796
+ * iteration counts and latest satisfaction scores from `scenario_results`.
22797
+ *
22798
+ * Uses two queries (not a JOIN) for portability across adapters:
22799
+ * 1. SELECT from graph_runs ordered by started_at DESC
22800
+ * 2. GROUP BY aggregation on scenario_results for iteration counts and scores
22801
+ *
22802
+ * @param adapter - Database adapter (already opened)
22803
+ * @param limit - Maximum number of graph runs to return (default 20)
22804
+ * @returns Array of FactoryRunSummary ordered by started_at DESC; empty array on error
22805
+ */
22806
+ async function getFactoryRunSummaries(adapter, limit = 20) {
22807
+ const runs = await adapter.query("SELECT id, started_at, completed_at, total_cost_usd, final_outcome FROM graph_runs ORDER BY started_at DESC LIMIT ?", [limit]);
22808
+ if (runs.length === 0) return [];
22809
+ const scenarioAgg = await adapter.query("SELECT run_id, COUNT(*) as iterations, MAX(satisfaction_score) as satisfaction_score FROM scenario_results GROUP BY run_id", []);
22810
+ const latestPassesRows = await adapter.query("SELECT s.run_id, s.passes FROM scenario_results s INNER JOIN (SELECT run_id, MAX(iteration) AS max_iter FROM scenario_results GROUP BY run_id) latest ON s.run_id = latest.run_id AND s.iteration = latest.max_iter", []);
22811
+ const scenarioMap = new Map();
22812
+ for (const row of scenarioAgg) scenarioMap.set(row.run_id, {
22813
+ iterations: row.iterations,
22814
+ satisfaction_score: row.satisfaction_score
22815
+ });
22816
+ const latestPassesMap = new Map();
22817
+ for (const row of latestPassesRows) latestPassesMap.set(row.run_id, row.passes !== 0);
22818
+ return runs.map((run) => {
22819
+ const agg = scenarioMap.get(run.id);
22820
+ return {
22821
+ run_id: run.id,
22822
+ satisfaction_score: agg !== void 0 ? agg.satisfaction_score : null,
22823
+ iterations: agg !== void 0 ? agg.iterations : 0,
22824
+ convergence_status: run.final_outcome,
22825
+ started_at: run.started_at,
22826
+ completed_at: run.completed_at,
22827
+ total_cost_usd: run.total_cost_usd,
22828
+ type: "factory",
22829
+ passes: latestPassesMap.has(run.id) ? latestPassesMap.get(run.id) ?? null : null
22830
+ };
22831
+ });
22832
+ }
22833
+
22565
22834
  //#endregion
22566
22835
  //#region packages/factory/dist/graph/executor.js
22567
22836
  /**
@@ -22666,6 +22935,24 @@ function createGraphExecutor() {
22666
22935
  ...config.plateauThreshold !== void 0 ? { threshold: config.plateauThreshold } : {}
22667
22936
  });
22668
22937
  let convergenceIteration = 0;
22938
+ const runStartedAt = new Date().toISOString();
22939
+ let lastScenarioNodeId = "";
22940
+ const persistExit = async (finalStatus, finalOutcome) => {
22941
+ if (!config.adapter) return;
22942
+ try {
22943
+ await upsertGraphRun(config.adapter, {
22944
+ id: config.runId,
22945
+ graph_file: graph.id || config.runId,
22946
+ ...graph.goal ? { graph_goal: graph.goal } : {},
22947
+ status: finalStatus,
22948
+ started_at: runStartedAt,
22949
+ completed_at: new Date().toISOString(),
22950
+ final_outcome: finalOutcome,
22951
+ total_cost_usd: pipelineManager.getTotalCost(),
22952
+ node_count: graph.nodes.size
22953
+ });
22954
+ } catch {}
22955
+ };
22669
22956
  let completedNodes = [];
22670
22957
  let nodeRetries = {};
22671
22958
  let context = new GraphContext();
@@ -22679,6 +22966,40 @@ function createGraphExecutor() {
22679
22966
  if (config.scenarioStore) scenarioManifest = await config.scenarioStore.discover();
22680
22967
  const runStateManager = config.dotSource ? new RunStateManager({ runDir: config.logsRoot }) : null;
22681
22968
  if (runStateManager) await runStateManager.initRun(config.dotSource);
22969
+ if (config.adapter && config.eventBus) {
22970
+ const persistAdapter = config.adapter;
22971
+ const scenarioHandler = (payload) => {
22972
+ const threshold = config.satisfactionThreshold ?? .8;
22973
+ const scored = computeSatisfactionScore(payload.results, threshold);
22974
+ insertScenarioResult(persistAdapter, {
22975
+ run_id: config.runId,
22976
+ node_id: lastScenarioNodeId || "unknown",
22977
+ iteration: payload.iteration,
22978
+ total_scenarios: payload.results.summary.total,
22979
+ passed: payload.results.summary.passed,
22980
+ failed: payload.results.summary.failed,
22981
+ satisfaction_score: scored.score,
22982
+ threshold: scored.threshold,
22983
+ passes: scored.passes,
22984
+ details: JSON.stringify(scored.breakdown),
22985
+ executed_at: new Date().toISOString()
22986
+ }).catch((err) => {
22987
+ const msg = err instanceof Error ? err.message : String(err);
22988
+ console.debug(`[executor] scenario:completed persistence failed for run ${config.runId}: ${msg}`);
22989
+ });
22990
+ };
22991
+ config.eventBus.on("scenario:completed", scenarioHandler);
22992
+ }
22993
+ if (config.adapter) try {
22994
+ await upsertGraphRun(config.adapter, {
22995
+ id: config.runId,
22996
+ graph_file: graph.id || config.runId,
22997
+ ...graph.goal ? { graph_goal: graph.goal } : {},
22998
+ status: "running",
22999
+ started_at: runStartedAt,
23000
+ node_count: graph.nodes.size
23001
+ });
23002
+ } catch {}
22682
23003
  let currentNode;
22683
23004
  if (config.checkpointPath) {
22684
23005
  const checkpoint = await checkpointManager.load(config.checkpointPath);
@@ -22712,6 +23033,7 @@ function createGraphExecutor() {
22712
23033
  level: "session",
22713
23034
  reason: sessionResult.reason
22714
23035
  });
23036
+ await persistExit("failed", `Session budget exceeded: ${sessionResult.reason}`);
22715
23037
  return {
22716
23038
  status: "FAIL",
22717
23039
  failureReason: `Session budget exceeded: ${sessionResult.reason}`
@@ -22724,6 +23046,7 @@ function createGraphExecutor() {
22724
23046
  level: "pipeline",
22725
23047
  reason: pipelineResult.reason
22726
23048
  });
23049
+ await persistExit("failed", `Pipeline budget exceeded: ${pipelineResult.reason}`);
22727
23050
  return {
22728
23051
  status: "FAIL",
22729
23052
  failureReason: `Pipeline budget exceeded: ${pipelineResult.reason}`
@@ -22731,15 +23054,35 @@ function createGraphExecutor() {
22731
23054
  }
22732
23055
  const exitNode = graph.exitNode();
22733
23056
  if (currentNode.id === exitNode.id) {
22734
- const gateResult = controller.checkGoalGates(graph, config.runId, config.eventBus);
23057
+ const useScenarioPrimary = config.qualityMode === "scenario-primary";
23058
+ const gateResult = controller.checkGoalGates(graph, config.runId, config.eventBus, useScenarioPrimary || config.satisfactionThreshold !== void 0 ? {
23059
+ context,
23060
+ satisfactionThreshold: config.satisfactionThreshold ?? .8
23061
+ } : void 0);
23062
+ if (useScenarioPrimary && config.eventBus) {
23063
+ const rawVerdict = context.getString(CONTEXT_KEY_CODE_REVIEW_VERDICT, "");
23064
+ if (rawVerdict !== "") {
23065
+ const codeReviewVerdict = rawVerdict;
23066
+ const coordinator = createDualSignalCoordinator({
23067
+ eventBus: config.eventBus,
23068
+ threshold: config.satisfactionThreshold ?? .8,
23069
+ qualityMode: "scenario-primary"
23070
+ });
23071
+ const score = context.getNumber("satisfaction_score", 0);
23072
+ coordinator.evaluate(codeReviewVerdict, score, config.runId);
23073
+ }
23074
+ }
22735
23075
  if (!gateResult.satisfied) {
22736
23076
  const failingNodeId = gateResult.failedGates[0];
22737
23077
  const failingGateNode = graph.nodes.get(failingNodeId);
22738
23078
  const retryTargetId = failingGateNode ? controller.resolveRetryTarget(failingGateNode, graph) : null;
22739
- if (!retryTargetId) return {
22740
- status: "FAIL",
22741
- failureReason: "Goal gate failed: no retry target"
22742
- };
23079
+ if (!retryTargetId) {
23080
+ await persistExit("failed", "Goal gate failed: no retry target");
23081
+ return {
23082
+ status: "FAIL",
23083
+ failureReason: "Goal gate failed: no retry target"
23084
+ };
23085
+ }
22743
23086
  const retryNode = graph.nodes.get(retryTargetId);
22744
23087
  if (!retryNode) throw new Error(`Retry target node "${retryTargetId}" not found in graph`);
22745
23088
  convergenceIteration++;
@@ -22750,10 +23093,13 @@ function createGraphExecutor() {
22750
23093
  nodeId: retryTargetId,
22751
23094
  ...config.eventBus ? { eventBus: config.eventBus } : {}
22752
23095
  });
22753
- if (plateauResult.plateaued) return {
22754
- status: "FAIL",
22755
- failureReason: `Convergence plateau detected after ${convergenceIteration} iteration(s): scores plateaued at [${plateauResult.scores.join(", ")}]`
22756
- };
23096
+ if (plateauResult.plateaued) {
23097
+ await persistExit("failed", `Convergence plateau detected after ${convergenceIteration} iteration(s)`);
23098
+ return {
23099
+ status: "FAIL",
23100
+ failureReason: `Convergence plateau detected after ${convergenceIteration} iteration(s): scores plateaued at [${plateauResult.scores.join(", ")}]`
23101
+ };
23102
+ }
22757
23103
  const remediation = buildRemediationContext({
22758
23104
  previousFailureReason: `Goal gate unsatisfied: ${gateResult.failedGates.join(", ")}`,
22759
23105
  iterationCount: convergenceIteration,
@@ -22764,14 +23110,18 @@ function createGraphExecutor() {
22764
23110
  currentNode = retryNode;
22765
23111
  continue;
22766
23112
  }
23113
+ await persistExit("completed", "SUCCESS");
22767
23114
  return { status: "SUCCESS" };
22768
23115
  }
22769
23116
  if (resumeCompletedSet?.has(currentNode.id)) {
22770
23117
  const skipEdge = selectEdge(currentNode, { status: "SUCCESS" }, context, graph);
22771
- if (!skipEdge) return {
22772
- status: "FAIL",
22773
- failureReason: `No outgoing edge from node ${currentNode.id}`
22774
- };
23118
+ if (!skipEdge) {
23119
+ await persistExit("failed", `No outgoing edge from node ${currentNode.id}`);
23120
+ return {
23121
+ status: "FAIL",
23122
+ failureReason: `No outgoing edge from node ${currentNode.id}`
23123
+ };
23124
+ }
22775
23125
  config.eventBus?.emit("graph:edge-selected", {
22776
23126
  runId: config.runId,
22777
23127
  fromNode: currentNode.id,
@@ -22799,6 +23149,7 @@ function createGraphExecutor() {
22799
23149
  nodeId: currentNode.id,
22800
23150
  tampered: integrityResult.tampered
22801
23151
  });
23152
+ await persistExit("failed", `Scenario integrity violation before node "${currentNode.id}"`);
22802
23153
  return {
22803
23154
  status: "FAIL",
22804
23155
  failureReason: `Scenario integrity violation detected before node "${currentNode.id}": tampered files: ${integrityResult.tampered.join(", ")}`
@@ -22810,6 +23161,7 @@ function createGraphExecutor() {
22810
23161
  scenarioCount: scenarioManifest.scenarios.length
22811
23162
  });
22812
23163
  }
23164
+ lastScenarioNodeId = currentNode.id;
22813
23165
  config.eventBus?.emit("graph:node-started", {
22814
23166
  runId: config.runId,
22815
23167
  nodeId: currentNode.id,
@@ -22822,6 +23174,7 @@ function createGraphExecutor() {
22822
23174
  firstResumedFidelity = "";
22823
23175
  const startedAt = Date.now();
22824
23176
  let outcome = await dispatchWithRetry(nodeToDispatch, context, graph, config, nodeRetries);
23177
+ if (Date.now() - startedAt < 50) await new Promise((r) => setTimeout(r, 50));
22825
23178
  if (outcome.status === "PARTIAL_SUCCESS" && !nodeToDispatch.allowPartial) outcome = {
22826
23179
  ...outcome,
22827
23180
  status: "FAIL",
@@ -22861,6 +23214,22 @@ function createGraphExecutor() {
22861
23214
  if (outcome.contextUpdates) for (const [key, value] of Object.entries(outcome.contextUpdates)) context.set(key, value);
22862
23215
  const nodeCost = context.getNumber("factory.lastNodeCostUsd", 0);
22863
23216
  if (nodeCost > 0) pipelineManager.addCost(nodeCost);
23217
+ if (config.adapter) {
23218
+ const nodeCompletedAt = Date.now();
23219
+ try {
23220
+ await insertGraphNodeResult(config.adapter, {
23221
+ run_id: config.runId,
23222
+ node_id: nodeToDispatch.id,
23223
+ attempt: (nodeRetries[nodeToDispatch.id] ?? 0) + 1,
23224
+ status: outcome.status,
23225
+ started_at: new Date(startedAt).toISOString(),
23226
+ completed_at: new Date(nodeCompletedAt).toISOString(),
23227
+ duration_ms: nodeCompletedAt - startedAt,
23228
+ cost_usd: nodeCost,
23229
+ ...outcome.failureReason !== void 0 ? { failure_reason: outcome.failureReason } : {}
23230
+ });
23231
+ } catch {}
23232
+ }
22864
23233
  if (!skipCompletedPush) completedNodes.push(currentNode.id);
22865
23234
  skipCompletedPush = false;
22866
23235
  await checkpointManager.save(config.logsRoot, {
@@ -22883,16 +23252,20 @@ function createGraphExecutor() {
22883
23252
  currentNode = retryNode;
22884
23253
  continue;
22885
23254
  }
23255
+ await persistExit("failed", outcome.failureReason ?? "FAIL");
22886
23256
  return {
22887
23257
  status: "FAIL",
22888
23258
  ...outcome.failureReason !== void 0 && { failureReason: outcome.failureReason }
22889
23259
  };
22890
23260
  }
22891
23261
  const edge = selectEdge(currentNode, outcome, context, graph);
22892
- if (!edge) return {
22893
- status: "FAIL",
22894
- failureReason: `No outgoing edge from node ${currentNode.id}`
22895
- };
23262
+ if (!edge) {
23263
+ await persistExit("failed", `No outgoing edge from node ${currentNode.id}`);
23264
+ return {
23265
+ status: "FAIL",
23266
+ failureReason: `No outgoing edge from node ${currentNode.id}`
23267
+ };
23268
+ }
22896
23269
  config.eventBus?.emit("graph:edge-selected", {
22897
23270
  runId: config.runId,
22898
23271
  fromNode: currentNode.id,
@@ -23126,33 +23499,6 @@ function createCodergenHandler(options) {
23126
23499
  };
23127
23500
  }
23128
23501
 
23129
- //#endregion
23130
- //#region packages/factory/dist/scenarios/scorer.js
23131
- /**
23132
- * SatisfactionScorer — computes a satisfaction score from ScenarioRunResult.
23133
- *
23134
- * Score = passed / total (0.0 when total === 0).
23135
- * Passes = score >= threshold (default 0.8).
23136
- *
23137
- * Story 44-5.
23138
- */
23139
- /**
23140
- * Compute a satisfaction score from a ScenarioRunResult.
23141
- *
23142
- * @param result - The aggregated scenario run result.
23143
- * @param threshold - Minimum score to consider passing (default 0.8).
23144
- * @returns A SatisfactionScore with score, passes, and threshold.
23145
- */
23146
- function computeSatisfactionScore(result, threshold = .8) {
23147
- const { total, passed } = result.summary;
23148
- const score = total === 0 ? 0 : passed / total;
23149
- return {
23150
- score,
23151
- passes: score >= threshold,
23152
- threshold
23153
- };
23154
- }
23155
-
23156
23502
  //#endregion
23157
23503
  //#region packages/factory/dist/handlers/tool.js
23158
23504
  /**
@@ -23203,7 +23549,7 @@ function createToolHandler(options) {
23203
23549
  parsed = JSON.parse(stdoutBuf.trim());
23204
23550
  } catch {}
23205
23551
  if (isScenarioRunResult(parsed)) {
23206
- const scored = computeSatisfactionScore(parsed);
23552
+ const scored = computeSatisfactionScore(parsed, options?.satisfactionThreshold);
23207
23553
  resolve$2({
23208
23554
  status: "SUCCESS",
23209
23555
  contextUpdates: { satisfaction_score: scored.score }
@@ -28155,10 +28501,16 @@ const FactoryConfigSchema = z.object({
28155
28501
  scenario_dir: z.string().default(".substrate/scenarios/"),
28156
28502
  satisfaction_threshold: z.number().min(0).max(1).default(.8),
28157
28503
  budget_cap_usd: z.number().min(0).default(0),
28158
- wall_clock_cap_seconds: z.number().min(0).default(0),
28504
+ wall_clock_cap_seconds: z.number().min(0).default(3600),
28159
28505
  plateau_window: z.number().int().min(2).default(3),
28160
28506
  plateau_threshold: z.number().min(0).max(1).default(.05),
28161
- backend: z.enum(["cli", "direct"]).default("cli")
28507
+ backend: z.enum(["cli", "direct"]).default("cli"),
28508
+ quality_mode: z.enum([
28509
+ "code-review",
28510
+ "dual-signal",
28511
+ "scenario-primary",
28512
+ "scenario-only"
28513
+ ]).default("dual-signal")
28162
28514
  }).strict();
28163
28515
  /**
28164
28516
  * Extends SubstrateConfigSchema with an optional `factory:` section.
@@ -28196,6 +28548,71 @@ async function loadFactoryConfig(projectDir, explicitConfigPath) {
28196
28548
  });
28197
28549
  }
28198
28550
 
28551
+ //#endregion
28552
+ //#region packages/factory/dist/persistence/factory-schema.js
28553
+ /**
28554
+ * Factory schema DDL for graph execution and scenario validation tables.
28555
+ * Companion to `@substrate-ai/core`'s `initSchema` — call both during factory initialization.
28556
+ */
28557
+ /**
28558
+ * Initialize all factory-specific persistence tables on the given adapter.
28559
+ * Idempotent — safe to call multiple times.
28560
+ *
28561
+ * Creates:
28562
+ * - graph_runs: top-level graph execution run records
28563
+ * - graph_node_results: per-node execution results within a run
28564
+ * - scenario_results: scenario validation outcomes within a run
28565
+ */
28566
+ async function factorySchema(adapter) {
28567
+ await adapter.exec(`
28568
+ CREATE TABLE IF NOT EXISTS graph_runs (
28569
+ id VARCHAR(255) PRIMARY KEY,
28570
+ graph_file TEXT NOT NULL,
28571
+ graph_goal TEXT,
28572
+ status VARCHAR(32) NOT NULL DEFAULT 'running',
28573
+ started_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
28574
+ completed_at DATETIME,
28575
+ total_cost_usd DOUBLE NOT NULL DEFAULT 0.0,
28576
+ node_count INTEGER NOT NULL DEFAULT 0,
28577
+ final_outcome VARCHAR(32),
28578
+ checkpoint_path TEXT
28579
+ )
28580
+ `);
28581
+ await adapter.exec(`
28582
+ CREATE TABLE IF NOT EXISTS graph_node_results (
28583
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
28584
+ run_id VARCHAR(255) NOT NULL REFERENCES graph_runs(id),
28585
+ node_id VARCHAR(255) NOT NULL,
28586
+ attempt INTEGER NOT NULL DEFAULT 1,
28587
+ status VARCHAR(32) NOT NULL,
28588
+ started_at DATETIME NOT NULL,
28589
+ completed_at DATETIME,
28590
+ duration_ms INTEGER,
28591
+ cost_usd DOUBLE NOT NULL DEFAULT 0.0,
28592
+ failure_reason TEXT,
28593
+ context_snapshot TEXT
28594
+ )
28595
+ `);
28596
+ await adapter.exec("CREATE INDEX IF NOT EXISTS idx_graph_node_results_run ON graph_node_results(run_id)");
28597
+ await adapter.exec(`
28598
+ CREATE TABLE IF NOT EXISTS scenario_results (
28599
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
28600
+ run_id VARCHAR(255) NOT NULL REFERENCES graph_runs(id),
28601
+ node_id VARCHAR(255) NOT NULL,
28602
+ iteration INTEGER NOT NULL DEFAULT 1,
28603
+ total_scenarios INTEGER NOT NULL,
28604
+ passed INTEGER NOT NULL,
28605
+ failed INTEGER NOT NULL,
28606
+ satisfaction_score DOUBLE NOT NULL,
28607
+ threshold DOUBLE NOT NULL DEFAULT 0.8,
28608
+ passes BOOLEAN NOT NULL,
28609
+ details TEXT,
28610
+ executed_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
28611
+ )
28612
+ `);
28613
+ await adapter.exec("CREATE INDEX IF NOT EXISTS idx_scenario_results_run ON scenario_results(run_id)");
28614
+ }
28615
+
28199
28616
  //#endregion
28200
28617
  //#region packages/factory/dist/factory-command.js
28201
28618
  /**
@@ -28225,10 +28642,17 @@ async function resolveGraphPath(opts, projectDir) {
28225
28642
  return null;
28226
28643
  }
28227
28644
  /**
28645
+ * Total number of validation rules loaded by `createValidator()`.
28646
+ * 8 error rules + 5 warning rules = 13 total (stories 42-4, 42-5).
28647
+ * This is a fixed constant — the GraphValidator interface does not expose a rule count.
28648
+ */
28649
+ const TOTAL_RULE_COUNT = 13;
28650
+ /**
28228
28651
  * Register the `factory` command group on the provided Commander program.
28229
28652
  *
28230
28653
  * Story 44-8: registers the `scenarios` subcommand.
28231
28654
  * Story 44-9: registers the `run` subcommand.
28655
+ * Story 46-7: registers the `validate` subcommand.
28232
28656
  */
28233
28657
  function registerFactoryCommand(program) {
28234
28658
  const factoryCmd = program.command("factory").description("Factory pipeline and scenario management commands");
@@ -28273,6 +28697,11 @@ function registerFactoryCommand(program) {
28273
28697
  await stateManager.initRun(dotSource);
28274
28698
  /** wallClockCapMs: FactoryConfig.wall_clock_cap_seconds × 1000 (story 45-10) */
28275
28699
  const factoryConfig = await loadFactoryConfig(projectDir, opts.config);
28700
+ const adapter = createDatabaseAdapter$1({
28701
+ backend: "auto",
28702
+ basePath: projectDir
28703
+ });
28704
+ await factorySchema(adapter);
28276
28705
  const executor = createGraphExecutor();
28277
28706
  await executor.run(graph, {
28278
28707
  runId,
@@ -28280,10 +28709,13 @@ function registerFactoryCommand(program) {
28280
28709
  handlerRegistry: createDefaultRegistry(),
28281
28710
  eventBus,
28282
28711
  dotSource,
28712
+ adapter,
28283
28713
  wallClockCapMs: (factoryConfig.factory?.wall_clock_cap_seconds ?? 0) * 1e3,
28284
28714
  pipelineBudgetCapUsd: factoryConfig.factory?.budget_cap_usd ?? 0,
28285
28715
  plateauWindow: factoryConfig.factory?.plateau_window ?? 3,
28286
- plateauThreshold: factoryConfig.factory?.plateau_threshold ?? .05
28716
+ plateauThreshold: factoryConfig.factory?.plateau_threshold ?? .05,
28717
+ satisfactionThreshold: factoryConfig.factory?.satisfaction_threshold ?? .8,
28718
+ qualityMode: factoryConfig.factory?.quality_mode ?? "dual-signal"
28287
28719
  });
28288
28720
  } catch (err) {
28289
28721
  const msg = err instanceof Error ? err.message : String(err);
@@ -28291,6 +28723,53 @@ function registerFactoryCommand(program) {
28291
28723
  process.exit(1);
28292
28724
  }
28293
28725
  });
28726
+ factoryCmd.command("validate <graph-file>").description("Parse and lint a DOT graph against all 13 validation rules").option("--output-format <format>", "Output format: json | text", "text").action(async (graphFile, opts) => {
28727
+ let source;
28728
+ try {
28729
+ source = await readFile$1(graphFile, "utf-8");
28730
+ } catch (err) {
28731
+ const isEnoent = err instanceof Error && err.code === "ENOENT";
28732
+ if (isEnoent) process.stderr.write(`Error: file not found: ${graphFile}\n`);
28733
+ else {
28734
+ const msg = err instanceof Error ? err.message : String(err);
28735
+ process.stderr.write(`Error: file not found: ${graphFile} (${msg})\n`);
28736
+ }
28737
+ process.exit(2);
28738
+ return;
28739
+ }
28740
+ let graph;
28741
+ try {
28742
+ graph = parseGraph(source);
28743
+ } catch (err) {
28744
+ const msg = err instanceof Error ? err.message : String(err);
28745
+ process.stderr.write(`Error: failed to parse graph: ${msg}\n`);
28746
+ process.exit(2);
28747
+ return;
28748
+ }
28749
+ const diagnostics = createValidator().validate(graph);
28750
+ const errors = diagnostics.filter((d) => d.severity === "error");
28751
+ const warnings = diagnostics.filter((d) => d.severity === "warning");
28752
+ const firedRuleIds = new Set(diagnostics.map((d) => d.ruleId));
28753
+ const passedCount = TOTAL_RULE_COUNT - firedRuleIds.size;
28754
+ if (opts.outputFormat === "json") {
28755
+ process.stdout.write(JSON.stringify(diagnostics, null, 2) + "\n");
28756
+ if (errors.length > 0) process.exit(1);
28757
+ return;
28758
+ }
28759
+ if (diagnostics.length > 0) {
28760
+ for (const d of diagnostics) {
28761
+ const nodeStr = d.nodeId ? ` [node: ${d.nodeId}]` : "";
28762
+ const edgeStr = d.edgeIndex !== void 0 ? ` [edge: ${d.edgeIndex}]` : "";
28763
+ process.stdout.write(` ${d.severity.padEnd(7)} ${d.ruleId.padEnd(24)} ${d.message}${nodeStr}${edgeStr}\n`);
28764
+ }
28765
+ process.stdout.write("\n");
28766
+ }
28767
+ const errLabel = errors.length !== 1 ? "errors" : "error";
28768
+ const warnLabel = warnings.length !== 1 ? "warnings" : "warning";
28769
+ if (diagnostics.length === 0) process.stdout.write(`✓ ${TOTAL_RULE_COUNT}/${TOTAL_RULE_COUNT} rules passed, 0 errors, 0 warnings\n`);
28770
+ else process.stdout.write(`✗ ${passedCount}/${TOTAL_RULE_COUNT} rules passed, ${errors.length} ${errLabel}, ${warnings.length} ${warnLabel}\n`);
28771
+ if (errors.length > 0) process.exit(1);
28772
+ });
28294
28773
  }
28295
28774
 
28296
28775
  //#endregion
@@ -29765,5 +30244,5 @@ function registerRunCommand(program, _version = "0.0.0", projectRoot = process.c
29765
30244
  }
29766
30245
 
29767
30246
  //#endregion
29768
- export { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, normalizeGraphSummaryToStatus, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
29769
- //# sourceMappingURL=run-CUMPhuVq.js.map
30247
+ export { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, listGraphRuns, normalizeGraphSummaryToStatus, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runRunAction, runSolutioningPhase, validateStopAfterFromConflict };
30248
+ //# sourceMappingURL=run-CRP9jiCz.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "substrate-ai",
3
- "version": "0.14.0",
3
+ "version": "0.15.1",
4
4
  "description": "Substrate — multi-agent orchestration daemon for AI coding agents",
5
5
  "type": "module",
6
6
  "license": "MIT",