substrate-ai 0.13.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -4,7 +4,7 @@ import { createLogger } from "../logger-KeHncl-f.js";
4
4
  import { createEventBus } from "../helpers-CElYrONe.js";
5
5
  import { AdapterRegistry, BudgetConfigSchema, CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, ConfigError, CostTrackerConfigSchema, DEFAULT_CONFIG, DoltClient, DoltNotInstalled, EXPERIMENT_RESULT, GlobalSettingsSchema, IngestionServer, MonitorDatabaseImpl, OPERATIONAL_FINDING, PartialGlobalSettingsSchema, PartialProviderConfigSchema, ProvidersSchema, RoutingRecommender, STORY_METRICS, TelemetryConfigSchema, addTokenUsage, aggregateTokenUsageForRun, checkDoltInstalled, compareRunMetrics, createAmendmentRun, createConfigSystem, createDecision, createDoltClient, createPipelineRun, getActiveDecisions, getAllCostEntriesFiltered, getBaselineRunMetrics, getDecisionsByCategory, getDecisionsByPhaseForRun, getLatestCompletedRun, getLatestRun, getPipelineRunById, getPlanningCostTotal, getRetryableEscalations, getRunMetrics, getSessionCostSummary, getSessionCostSummaryFiltered, getStoryMetricsForRun, getTokenUsageSummary, incrementRunRestarts, initSchema, initializeDolt, listRequirements, listRunMetrics, loadParentRunDecisions, supersedeDecision, tagRunAsBaseline, updatePipelineRun } from "../dist-CLvAwmT7.js";
6
6
  import "../adapter-registry-DXLMTmfD.js";
7
- import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-DDUeFC-I.js";
7
+ import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, listGraphRuns, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-bhGoAbu9.js";
8
8
  import "../errors-D1LU8CZ9.js";
9
9
  import "../routing-CcBOCuC9.js";
10
10
  import "../decisions-C0pz9Clx.js";
@@ -4359,7 +4359,7 @@ async function runSupervisorAction(options, deps = {}) {
4359
4359
  await initSchema(expAdapter);
4360
4360
  const { runRunAction: runPipeline } = await import(
4361
4361
  /* @vite-ignore */
4362
- "../run-BV2zNwIC.js"
4362
+ "../run-BQmRdbhV.js"
4363
4363
  );
4364
4364
  const runStoryFn = async (opts) => {
4365
4365
  const exitCode = await runPipeline({
@@ -4696,8 +4696,22 @@ function printCategoryTable(stats, label) {
4696
4696
  process.stdout.write(` ${c.category.padEnd(22)} ${c.totalTokens.toLocaleString().padStart(12)} ${pct.padStart(8)} ${String(c.eventCount).padStart(8)} ${avg.padStart(10)} ${c.trend}\n`);
4697
4697
  }
4698
4698
  }
4699
+ function printFactoryRunTable(runs) {
4700
+ process.stdout.write(`\nFactory Runs (${runs.length} records)\n`);
4701
+ process.stdout.write("─".repeat(80) + "\n");
4702
+ process.stdout.write(` ${"run_id".padEnd(10)} ${"score".padStart(7)} ${"passes".padStart(7)} ${"started_at".padEnd(20)} ${"cost_usd".padStart(10)} ${"status".padEnd(16)}\n`);
4703
+ process.stdout.write(" " + "─".repeat(74) + "\n");
4704
+ for (const run of runs) {
4705
+ const scoreStr = run.satisfaction_score !== null ? `${(run.satisfaction_score * 100).toFixed(1)}%` : "—";
4706
+ const passesStr = run.passes !== null ? run.passes ? "✓" : "✗" : "—";
4707
+ const startedAt = run.started_at.slice(0, 19);
4708
+ const costStr = `$${run.total_cost_usd.toFixed(4)}`;
4709
+ const statusStr = run.convergence_status ?? "—";
4710
+ process.stdout.write(` ${run.run_id.slice(0, 8).padEnd(10)} ${scoreStr.padStart(7)} ${passesStr.padStart(7)} ${startedAt.padEnd(20)} ${costStr.padStart(10)} ${statusStr.padEnd(16)}\n`);
4711
+ }
4712
+ }
4699
4713
  async function runMetricsAction(options) {
4700
- const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations } = options;
4714
+ const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations, run, factory } = options;
4701
4715
  const telemetryModes = [
4702
4716
  efficiency,
4703
4717
  recommendations,
@@ -4931,7 +4945,7 @@ async function runMetricsAction(options) {
4931
4945
  }
4932
4946
  const dbRoot = await resolveMainRepoRoot(projectRoot);
4933
4947
  const doltStateDir = join(dbRoot, ".substrate", "state", ".dolt");
4934
- if (!existsSync$1(doltStateDir)) {
4948
+ if (!existsSync$1(doltStateDir) && factory !== true && run === void 0) {
4935
4949
  if (outputFormat === "json") process.stdout.write(formatOutput({
4936
4950
  runs: [],
4937
4951
  message: "No metrics yet — no pipeline database found. Initialize Dolt with `substrate init`."
@@ -4945,6 +4959,60 @@ async function runMetricsAction(options) {
4945
4959
  });
4946
4960
  try {
4947
4961
  await initSchema(adapter);
4962
+ if (run !== void 0) {
4963
+ let rows = [];
4964
+ let resolvedRunId = run;
4965
+ try {
4966
+ rows = await getScenarioResultsForRun(adapter, run);
4967
+ if (rows.length === 0 && run.length < 36) {
4968
+ const allRuns = await listGraphRuns(adapter, 100);
4969
+ const matching = allRuns.filter((r) => r.id.startsWith(run));
4970
+ if (matching.length === 1 && matching[0] !== void 0) {
4971
+ resolvedRunId = matching[0].id;
4972
+ rows = await getScenarioResultsForRun(adapter, resolvedRunId);
4973
+ }
4974
+ }
4975
+ } catch (err) {
4976
+ logger$10.debug({ err }, "getScenarioResultsForRun failed");
4977
+ }
4978
+ if (rows.length === 0) {
4979
+ const msg = `No factory run found with id: ${run}`;
4980
+ if (outputFormat === "json") process.stdout.write(formatOutput({ message: msg }, "json", true) + "\n");
4981
+ else process.stderr.write(`Error: ${msg}\n`);
4982
+ return 1;
4983
+ }
4984
+ if (outputFormat === "json") process.stdout.write(formatOutput({
4985
+ run_id: resolvedRunId,
4986
+ type: "factory",
4987
+ iterations: rows
4988
+ }, "json", true) + "\n");
4989
+ else {
4990
+ process.stdout.write(`\nFactory Run: ${resolvedRunId}\n`);
4991
+ process.stdout.write("─".repeat(80) + "\n");
4992
+ process.stdout.write(` ${"#".padStart(3)} ${"score".padStart(7)} ${"passes".padStart(7)} ${"passed/total".padStart(13)} ${"executed_at".padEnd(20)}\n`);
4993
+ process.stdout.write(" " + "─".repeat(54) + "\n");
4994
+ for (const r of rows) {
4995
+ const scoreStr = `${(r.satisfaction_score * 100).toFixed(1)}%`;
4996
+ const passesStr = r.passes ? "✓" : "✗";
4997
+ const passedTotal = `${r.passed}/${r.total_scenarios}`;
4998
+ const execAt = String(r.executed_at).slice(0, 19);
4999
+ process.stdout.write(` ${String(r.iteration).padStart(3)} ${scoreStr.padStart(7)} ${passesStr.padStart(7)} ${passedTotal.padStart(13)} ${execAt.padEnd(20)}\n`);
5000
+ }
5001
+ }
5002
+ return 0;
5003
+ }
5004
+ if (factory === true) {
5005
+ let factoryRuns$1 = [];
5006
+ try {
5007
+ factoryRuns$1 = await getFactoryRunSummaries(adapter, limit);
5008
+ } catch (err) {
5009
+ logger$10.debug({ err }, "getFactoryRunSummaries failed in factory-only mode");
5010
+ }
5011
+ if (outputFormat === "json") process.stdout.write(formatOutput({ graph_runs: factoryRuns$1 }, "json", true) + "\n");
5012
+ else if (factoryRuns$1.length === 0) process.stdout.write("No factory runs recorded yet.\n");
5013
+ else printFactoryRunTable(factoryRuns$1);
5014
+ return 0;
5015
+ }
4948
5016
  if (tagBaseline !== void 0) {
4949
5017
  const row = await getRunMetrics(adapter, tagBaseline);
4950
5018
  if (!row) {
@@ -5033,18 +5101,26 @@ async function runMetricsAction(options) {
5033
5101
  const phaseBreakdownMap = {};
5034
5102
  try {
5035
5103
  const kvStore = new FileStateStore({ basePath: join(dbRoot, ".substrate") });
5036
- for (const run of runs) {
5037
- const raw = await kvStore.getMetric(run.run_id, "phase_token_breakdown");
5038
- phaseBreakdownMap[run.run_id] = raw !== void 0 ? raw : null;
5104
+ for (const run$1 of runs) {
5105
+ const raw = await kvStore.getMetric(run$1.run_id, "phase_token_breakdown");
5106
+ phaseBreakdownMap[run$1.run_id] = raw !== void 0 ? raw : null;
5039
5107
  }
5040
5108
  } catch {}
5109
+ let factoryRuns = [];
5110
+ try {
5111
+ factoryRuns = await getFactoryRunSummaries(adapter, limit);
5112
+ } catch (err) {
5113
+ logger$10.debug({ err }, "getFactoryRunSummaries failed — table may not exist in older databases");
5114
+ }
5041
5115
  if (outputFormat === "json") {
5042
- const runsWithBreakdown = runs.map((run) => ({
5043
- ...run,
5044
- phase_token_breakdown: phaseBreakdownMap[run.run_id] ?? null
5116
+ const runsWithBreakdown = runs.map((run$1) => ({
5117
+ ...run$1,
5118
+ type: "sdlc",
5119
+ phase_token_breakdown: phaseBreakdownMap[run$1.run_id] ?? null
5045
5120
  }));
5046
5121
  const jsonPayload = {
5047
5122
  runs: runsWithBreakdown,
5123
+ graph_runs: factoryRuns,
5048
5124
  story_metrics: storyMetrics
5049
5125
  };
5050
5126
  if (doltMetrics !== void 0) if (aggregate) {
@@ -5066,23 +5142,23 @@ async function runMetricsAction(options) {
5066
5142
  } else jsonPayload.dolt_metrics = doltMetrics;
5067
5143
  process.stdout.write(formatOutput(jsonPayload, "json", true) + "\n");
5068
5144
  } else {
5069
- if (runs.length === 0 && storyMetrics.length === 0 && (doltMetrics === void 0 || doltMetrics.length === 0)) {
5145
+ if (runs.length === 0 && storyMetrics.length === 0 && (doltMetrics === void 0 || doltMetrics.length === 0) && factoryRuns.length === 0) {
5070
5146
  process.stdout.write("No run metrics recorded yet. Run `substrate run` to generate metrics.\n");
5071
5147
  return 0;
5072
5148
  }
5073
5149
  if (runs.length > 0) {
5074
5150
  process.stdout.write(`\nPipeline Run Metrics (last ${runs.length} runs)\n`);
5075
5151
  process.stdout.write("─".repeat(80) + "\n");
5076
- for (const run of runs) {
5077
- const isBaseline = run.is_baseline ? " [BASELINE]" : "";
5078
- process.stdout.write(`\nRun: ${run.run_id}${isBaseline}\n`);
5079
- process.stdout.write(` Status: ${run.status} | Methodology: ${run.methodology}\n`);
5080
- process.stdout.write(` Started: ${run.started_at}\n`);
5081
- if (run.completed_at) process.stdout.write(` Completed: ${run.completed_at} (${run.wall_clock_seconds}s)\n`);
5082
- process.stdout.write(` Stories: attempted=${run.stories_attempted} succeeded=${run.stories_succeeded} failed=${run.stories_failed} escalated=${run.stories_escalated}\n`);
5083
- process.stdout.write(` Tokens: ${(run.total_input_tokens ?? 0).toLocaleString()} in / ${(run.total_output_tokens ?? 0).toLocaleString()} out $${(run.total_cost_usd ?? 0).toFixed(4)}\n`);
5084
- process.stdout.write(` Cycles: ${run.total_review_cycles} | Dispatches: ${run.total_dispatches} | Concurrency: ${run.concurrency_setting}\n`);
5085
- const breakdown = phaseBreakdownMap[run.run_id];
5152
+ for (const run$1 of runs) {
5153
+ const isBaseline = run$1.is_baseline ? " [BASELINE]" : "";
5154
+ process.stdout.write(`\nRun: ${run$1.run_id}${isBaseline}\n`);
5155
+ process.stdout.write(` Status: ${run$1.status} | Methodology: ${run$1.methodology}\n`);
5156
+ process.stdout.write(` Started: ${run$1.started_at}\n`);
5157
+ if (run$1.completed_at) process.stdout.write(` Completed: ${run$1.completed_at} (${run$1.wall_clock_seconds}s)\n`);
5158
+ process.stdout.write(` Stories: attempted=${run$1.stories_attempted} succeeded=${run$1.stories_succeeded} failed=${run$1.stories_failed} escalated=${run$1.stories_escalated}\n`);
5159
+ process.stdout.write(` Tokens: ${(run$1.total_input_tokens ?? 0).toLocaleString()} in / ${(run$1.total_output_tokens ?? 0).toLocaleString()} out $${(run$1.total_cost_usd ?? 0).toFixed(4)}\n`);
5160
+ process.stdout.write(` Cycles: ${run$1.total_review_cycles} | Dispatches: ${run$1.total_dispatches} | Concurrency: ${run$1.concurrency_setting}\n`);
5161
+ const breakdown = phaseBreakdownMap[run$1.run_id];
5086
5162
  if (breakdown !== null && breakdown !== void 0 && breakdown.entries.length > 0) {
5087
5163
  process.stdout.write(" Phase Token Breakdown:\n");
5088
5164
  for (const entry of breakdown.entries) process.stdout.write(` ${entry.phase.padEnd(10)} | ${entry.model.padEnd(30)} | in: ${entry.inputTokens} | out: ${entry.outputTokens} | dispatches: ${entry.dispatchCount}\n`);
@@ -5136,6 +5212,7 @@ async function runMetricsAction(options) {
5136
5212
  process.stdout.write(` ${m.storyKey.padEnd(16)} ${m.taskType.padEnd(16)} ${tokIn.padStart(10)} ${tokOut.padStart(11)} ${wall.padStart(10)} ${res.padEnd(12)}\n`);
5137
5213
  }
5138
5214
  }
5215
+ if (factoryRuns.length > 0) printFactoryRunTable(factoryRuns);
5139
5216
  }
5140
5217
  return 0;
5141
5218
  } catch (err) {
@@ -5151,7 +5228,7 @@ async function runMetricsAction(options) {
5151
5228
  }
5152
5229
  }
5153
5230
  function registerMetricsCommand(program, _version = "0.0.0", projectRoot = process.cwd()) {
5154
- program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").action(async (opts) => {
5231
+ program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").option("--run <run-id>", "Show per-iteration score history for a specific factory run").option("--factory", "Show only factory graph run metrics (excludes SDLC runs)").action(async (opts) => {
5155
5232
  const outputFormat = opts.outputFormat === "json" ? "json" : "human";
5156
5233
  let compareIds;
5157
5234
  if (opts.compare !== void 0) {
@@ -5186,7 +5263,9 @@ function registerMetricsCommand(program, _version = "0.0.0", projectRoot = proce
5186
5263
  ...opts.consumers !== void 0 && { consumers: opts.consumers },
5187
5264
  ...opts.categories !== void 0 && { categories: opts.categories },
5188
5265
  ...compareStoriesIds !== void 0 && { compareStories: compareStoriesIds },
5189
- ...opts.routingRecommendations !== void 0 && { routingRecommendations: opts.routingRecommendations }
5266
+ ...opts.routingRecommendations !== void 0 && { routingRecommendations: opts.routingRecommendations },
5267
+ ...opts.run !== void 0 && { run: opts.run },
5268
+ ...opts.factory !== void 0 && { factory: opts.factory }
5190
5269
  };
5191
5270
  const exitCode = await runMetricsAction(metricsOpts);
5192
5271
  process.exitCode = exitCode;
@@ -2,7 +2,7 @@ import "./health-DswaC1q5.js";
2
2
  import "./logger-KeHncl-f.js";
3
3
  import "./helpers-CElYrONe.js";
4
4
  import "./dist-CLvAwmT7.js";
5
- import { normalizeGraphSummaryToStatus, registerRunCommand, runRunAction } from "./run-DDUeFC-I.js";
5
+ import { normalizeGraphSummaryToStatus, registerRunCommand, runRunAction } from "./run-bhGoAbu9.js";
6
6
  import "./routing-CcBOCuC9.js";
7
7
  import "./decisions-C0pz9Clx.js";
8
8