npm - substrate-ai - Versions diffs - 0.13.1 → 0.15.0 - Mend

substrate-ai 0.13.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/cli/index.js +102 -23
package/dist/{run-BV2zNwIC.js → run-BQmRdbhV.js} +1 -1
package/dist/{run-DDUeFC-I.js → run-bhGoAbu9.js} +969 -65
package/package.json +1 -1

package/dist/cli/index.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { createLogger } from "../logger-KeHncl-f.js";
 import { createEventBus } from "../helpers-CElYrONe.js";
 import { AdapterRegistry, BudgetConfigSchema, CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, ConfigError, CostTrackerConfigSchema, DEFAULT_CONFIG, DoltClient, DoltNotInstalled, EXPERIMENT_RESULT, GlobalSettingsSchema, IngestionServer, MonitorDatabaseImpl, OPERATIONAL_FINDING, PartialGlobalSettingsSchema, PartialProviderConfigSchema, ProvidersSchema, RoutingRecommender, STORY_METRICS, TelemetryConfigSchema, addTokenUsage, aggregateTokenUsageForRun, checkDoltInstalled, compareRunMetrics, createAmendmentRun, createConfigSystem, createDecision, createDoltClient, createPipelineRun, getActiveDecisions, getAllCostEntriesFiltered, getBaselineRunMetrics, getDecisionsByCategory, getDecisionsByPhaseForRun, getLatestCompletedRun, getLatestRun, getPipelineRunById, getPlanningCostTotal, getRetryableEscalations, getRunMetrics, getSessionCostSummary, getSessionCostSummaryFiltered, getStoryMetricsForRun, getTokenUsageSummary, incrementRunRestarts, initSchema, initializeDolt, listRequirements, listRunMetrics, loadParentRunDecisions, supersedeDecision, tagRunAsBaseline, updatePipelineRun } from "../dist-CLvAwmT7.js";
 import "../adapter-registry-DXLMTmfD.js";
-import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-DDUeFC-I.js";
+import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, listGraphRuns, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runSolutioningPhase, validateStopAfterFromConflict } from "../run-bhGoAbu9.js";
 import "../errors-D1LU8CZ9.js";
 import "../routing-CcBOCuC9.js";
 import "../decisions-C0pz9Clx.js";
@@ -4359,7 +4359,7 @@ async function runSupervisorAction(options, deps = {}) {
 								await initSchema(expAdapter);
 								const { runRunAction: runPipeline } = await import(
 									/* @vite-ignore */
-									"../run-BV2zNwIC.js"
+									"../run-BQmRdbhV.js"
 );
 								const runStoryFn = async (opts) => {
 									const exitCode = await runPipeline({
@@ -4696,8 +4696,22 @@ function printCategoryTable(stats, label) {
 		process.stdout.write(`  ${c.category.padEnd(22)} ${c.totalTokens.toLocaleString().padStart(12)} ${pct.padStart(8)} ${String(c.eventCount).padStart(8)} ${avg.padStart(10)} ${c.trend}\n`);
 	}
 }
+function printFactoryRunTable(runs) {
+	process.stdout.write(`\nFactory Runs (${runs.length} records)\n`);
+	process.stdout.write("─".repeat(80) + "\n");
+	process.stdout.write(`  ${"run_id".padEnd(10)} ${"score".padStart(7)} ${"passes".padStart(7)} ${"started_at".padEnd(20)} ${"cost_usd".padStart(10)} ${"status".padEnd(16)}\n`);
+	process.stdout.write("  " + "─".repeat(74) + "\n");
+	for (const run of runs) {
+		const scoreStr = run.satisfaction_score !== null ? `${(run.satisfaction_score * 100).toFixed(1)}%` : "—";
+		const passesStr = run.passes !== null ? run.passes ? "✓" : "✗" : "—";
+		const startedAt = run.started_at.slice(0, 19);
+		const costStr = `$${run.total_cost_usd.toFixed(4)}`;
+		const statusStr = run.convergence_status ?? "—";
+		process.stdout.write(`  ${run.run_id.slice(0, 8).padEnd(10)} ${scoreStr.padStart(7)} ${passesStr.padStart(7)} ${startedAt.padEnd(20)} ${costStr.padStart(10)} ${statusStr.padEnd(16)}\n`);
+	}
+}
 async function runMetricsAction(options) {
-	const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations } = options;
+	const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations, run, factory } = options;
 	const telemetryModes = [
 		efficiency,
 		recommendations,
@@ -4931,7 +4945,7 @@ async function runMetricsAction(options) {
 	}
 	const dbRoot = await resolveMainRepoRoot(projectRoot);
 	const doltStateDir = join(dbRoot, ".substrate", "state", ".dolt");
-	if (!existsSync$1(doltStateDir)) {
+	if (!existsSync$1(doltStateDir) && factory !== true && run === void 0) {
 		if (outputFormat === "json") process.stdout.write(formatOutput({
 			runs: [],
 			message: "No metrics yet — no pipeline database found. Initialize Dolt with `substrate init`."
@@ -4945,6 +4959,60 @@ async function runMetricsAction(options) {
 	});
 	try {
 		await initSchema(adapter);
+		if (run !== void 0) {
+			let rows = [];
+			let resolvedRunId = run;
+			try {
+				rows = await getScenarioResultsForRun(adapter, run);
+				if (rows.length === 0 && run.length < 36) {
+					const allRuns = await listGraphRuns(adapter, 100);
+					const matching = allRuns.filter((r) => r.id.startsWith(run));
+					if (matching.length === 1 && matching[0] !== void 0) {
+						resolvedRunId = matching[0].id;
+						rows = await getScenarioResultsForRun(adapter, resolvedRunId);
+					}
+				}
+			} catch (err) {
+				logger$10.debug({ err }, "getScenarioResultsForRun failed");
+			}
+			if (rows.length === 0) {
+				const msg = `No factory run found with id: ${run}`;
+				if (outputFormat === "json") process.stdout.write(formatOutput({ message: msg }, "json", true) + "\n");
+				else process.stderr.write(`Error: ${msg}\n`);
+				return 1;
+			}
+			if (outputFormat === "json") process.stdout.write(formatOutput({
+				run_id: resolvedRunId,
+				type: "factory",
+				iterations: rows
+			}, "json", true) + "\n");
+			else {
+				process.stdout.write(`\nFactory Run: ${resolvedRunId}\n`);
+				process.stdout.write("─".repeat(80) + "\n");
+				process.stdout.write(`  ${"#".padStart(3)} ${"score".padStart(7)} ${"passes".padStart(7)} ${"passed/total".padStart(13)} ${"executed_at".padEnd(20)}\n`);
+				process.stdout.write("  " + "─".repeat(54) + "\n");
+				for (const r of rows) {
+					const scoreStr = `${(r.satisfaction_score * 100).toFixed(1)}%`;
+					const passesStr = r.passes ? "✓" : "✗";
+					const passedTotal = `${r.passed}/${r.total_scenarios}`;
+					const execAt = String(r.executed_at).slice(0, 19);
+					process.stdout.write(`  ${String(r.iteration).padStart(3)} ${scoreStr.padStart(7)} ${passesStr.padStart(7)} ${passedTotal.padStart(13)} ${execAt.padEnd(20)}\n`);
+				}
+			}
+			return 0;
+		}
+		if (factory === true) {
+			let factoryRuns$1 = [];
+			try {
+				factoryRuns$1 = await getFactoryRunSummaries(adapter, limit);
+			} catch (err) {
+				logger$10.debug({ err }, "getFactoryRunSummaries failed in factory-only mode");
+			}
+			if (outputFormat === "json") process.stdout.write(formatOutput({ graph_runs: factoryRuns$1 }, "json", true) + "\n");
+			else if (factoryRuns$1.length === 0) process.stdout.write("No factory runs recorded yet.\n");
+			else printFactoryRunTable(factoryRuns$1);
+			return 0;
+		}
 		if (tagBaseline !== void 0) {
 			const row = await getRunMetrics(adapter, tagBaseline);
 			if (!row) {
@@ -5033,18 +5101,26 @@ async function runMetricsAction(options) {
 		const phaseBreakdownMap = {};
 		try {
 			const kvStore = new FileStateStore({ basePath: join(dbRoot, ".substrate") });
-			for (const run of runs) {
-				const raw = await kvStore.getMetric(run.run_id, "phase_token_breakdown");
-				phaseBreakdownMap[run.run_id] = raw !== void 0 ? raw : null;
+			for (const run$1 of runs) {
+				const raw = await kvStore.getMetric(run$1.run_id, "phase_token_breakdown");
+				phaseBreakdownMap[run$1.run_id] = raw !== void 0 ? raw : null;
 			}
 		} catch {}
+		let factoryRuns = [];
+		try {
+			factoryRuns = await getFactoryRunSummaries(adapter, limit);
+		} catch (err) {
+			logger$10.debug({ err }, "getFactoryRunSummaries failed — table may not exist in older databases");
+		}
 		if (outputFormat === "json") {
-			const runsWithBreakdown = runs.map((run) => ({
-				...run,
-				phase_token_breakdown: phaseBreakdownMap[run.run_id] ?? null
+			const runsWithBreakdown = runs.map((run$1) => ({
+				...run$1,
+				type: "sdlc",
+				phase_token_breakdown: phaseBreakdownMap[run$1.run_id] ?? null
 			}));
 			const jsonPayload = {
 				runs: runsWithBreakdown,
+				graph_runs: factoryRuns,
 				story_metrics: storyMetrics
 			};
 			if (doltMetrics !== void 0) if (aggregate) {
@@ -5066,23 +5142,23 @@ async function runMetricsAction(options) {
 			} else jsonPayload.dolt_metrics = doltMetrics;
 			process.stdout.write(formatOutput(jsonPayload, "json", true) + "\n");
 		} else {
-			if (runs.length === 0 && storyMetrics.length === 0 && (doltMetrics === void 0 || doltMetrics.length === 0)) {
+			if (runs.length === 0 && storyMetrics.length === 0 && (doltMetrics === void 0 || doltMetrics.length === 0) && factoryRuns.length === 0) {
 				process.stdout.write("No run metrics recorded yet. Run `substrate run` to generate metrics.\n");
 				return 0;
 			}
 			if (runs.length > 0) {
 				process.stdout.write(`\nPipeline Run Metrics (last ${runs.length} runs)\n`);
 				process.stdout.write("─".repeat(80) + "\n");
-				for (const run of runs) {
-					const isBaseline = run.is_baseline ? " [BASELINE]" : "";
-					process.stdout.write(`\nRun: ${run.run_id}${isBaseline}\n`);
-					process.stdout.write(`  Status:    ${run.status}  |  Methodology: ${run.methodology}\n`);
-					process.stdout.write(`  Started:   ${run.started_at}\n`);
-					if (run.completed_at) process.stdout.write(`  Completed: ${run.completed_at}  (${run.wall_clock_seconds}s)\n`);
-					process.stdout.write(`  Stories:   attempted=${run.stories_attempted} succeeded=${run.stories_succeeded} failed=${run.stories_failed} escalated=${run.stories_escalated}\n`);
-					process.stdout.write(`  Tokens:    ${(run.total_input_tokens ?? 0).toLocaleString()} in / ${(run.total_output_tokens ?? 0).toLocaleString()} out  $${(run.total_cost_usd ?? 0).toFixed(4)}\n`);
-					process.stdout.write(`  Cycles:    ${run.total_review_cycles}  |  Dispatches: ${run.total_dispatches}  |  Concurrency: ${run.concurrency_setting}\n`);
-					const breakdown = phaseBreakdownMap[run.run_id];
+				for (const run$1 of runs) {
+					const isBaseline = run$1.is_baseline ? " [BASELINE]" : "";
+					process.stdout.write(`\nRun: ${run$1.run_id}${isBaseline}\n`);
+					process.stdout.write(`  Status:    ${run$1.status}  |  Methodology: ${run$1.methodology}\n`);
+					process.stdout.write(`  Started:   ${run$1.started_at}\n`);
+					if (run$1.completed_at) process.stdout.write(`  Completed: ${run$1.completed_at}  (${run$1.wall_clock_seconds}s)\n`);
+					process.stdout.write(`  Stories:   attempted=${run$1.stories_attempted} succeeded=${run$1.stories_succeeded} failed=${run$1.stories_failed} escalated=${run$1.stories_escalated}\n`);
+					process.stdout.write(`  Tokens:    ${(run$1.total_input_tokens ?? 0).toLocaleString()} in / ${(run$1.total_output_tokens ?? 0).toLocaleString()} out  $${(run$1.total_cost_usd ?? 0).toFixed(4)}\n`);
+					process.stdout.write(`  Cycles:    ${run$1.total_review_cycles}  |  Dispatches: ${run$1.total_dispatches}  |  Concurrency: ${run$1.concurrency_setting}\n`);
+					const breakdown = phaseBreakdownMap[run$1.run_id];
 					if (breakdown !== null && breakdown !== void 0 && breakdown.entries.length > 0) {
 						process.stdout.write("  Phase Token Breakdown:\n");
 						for (const entry of breakdown.entries) process.stdout.write(`    ${entry.phase.padEnd(10)} | ${entry.model.padEnd(30)} | in: ${entry.inputTokens} | out: ${entry.outputTokens} | dispatches: ${entry.dispatchCount}\n`);
@@ -5136,6 +5212,7 @@ async function runMetricsAction(options) {
 					process.stdout.write(`  ${m.storyKey.padEnd(16)} ${m.taskType.padEnd(16)} ${tokIn.padStart(10)} ${tokOut.padStart(11)} ${wall.padStart(10)} ${res.padEnd(12)}\n`);
 				}
 			}
+			if (factoryRuns.length > 0) printFactoryRunTable(factoryRuns);
 		}
 		return 0;
 	} catch (err) {
@@ -5151,7 +5228,7 @@ async function runMetricsAction(options) {
 	}
 }
 function registerMetricsCommand(program, _version = "0.0.0", projectRoot = process.cwd()) {
-	program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").action(async (opts) => {
+	program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").option("--run <run-id>", "Show per-iteration score history for a specific factory run").option("--factory", "Show only factory graph run metrics (excludes SDLC runs)").action(async (opts) => {
 		const outputFormat = opts.outputFormat === "json" ? "json" : "human";
 		let compareIds;
 		if (opts.compare !== void 0) {
@@ -5186,7 +5263,9 @@ function registerMetricsCommand(program, _version = "0.0.0", projectRoot = proce
 			...opts.consumers !== void 0 && { consumers: opts.consumers },
 			...opts.categories !== void 0 && { categories: opts.categories },
 			...compareStoriesIds !== void 0 && { compareStories: compareStoriesIds },
-			...opts.routingRecommendations !== void 0 && { routingRecommendations: opts.routingRecommendations }
+			...opts.routingRecommendations !== void 0 && { routingRecommendations: opts.routingRecommendations },
+			...opts.run !== void 0 && { run: opts.run },
+			...opts.factory !== void 0 && { factory: opts.factory }
 		};
 		const exitCode = await runMetricsAction(metricsOpts);
 		process.exitCode = exitCode;

package/dist/{run-BV2zNwIC.js → run-BQmRdbhV.js} RENAMED Viewed

@@ -2,7 +2,7 @@ import "./health-DswaC1q5.js";
 import "./logger-KeHncl-f.js";
 import "./helpers-CElYrONe.js";
 import "./dist-CLvAwmT7.js";
-import { normalizeGraphSummaryToStatus, registerRunCommand, runRunAction } from "./run-DDUeFC-I.js";
+import { normalizeGraphSummaryToStatus, registerRunCommand, runRunAction } from "./run-bhGoAbu9.js";
 import "./routing-CcBOCuC9.js";
 import "./decisions-C0pz9Clx.js";