substrate-ai 0.20.39 → 0.20.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +120 -17
- package/dist/{health-R9GiVIiQ.js → health-BmEu3n9Z.js} +295 -11
- package/dist/{health-C-ZrHJwL.js → health-CAV-PNE4.js} +1 -1
- package/dist/index.d.ts +52 -0
- package/dist/{run-CL1C4M2f.js → run-CXTAvjCK.js} +2 -2
- package/dist/{run-Dm70k8bR.js → run-DnwsCYYA.js} +27 -3
- package/package.json +1 -1
- package/packs/bmad/prompts/create-story.md +7 -0
package/dist/cli/index.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { FileStateStore, RunManifest, SUBSTRATE_OWNED_SETTINGS_KEYS, SupervisorLock, VALID_PHASES, WorkGraphRepository, ZERO_FINDING_COUNTS, buildPipelineStatusOutput, createDatabaseAdapter, createStateStore, findPackageRoot, formatOutput, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, inspectProcessTree, parseDbTimestampAsUtc, parseRuntimeProbes, registerHealthCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveRunManifest, rollupFindingCounts } from "../health-
|
|
2
|
+
import { FileStateStore, RunManifest, SUBSTRATE_OWNED_SETTINGS_KEYS, SupervisorLock, VALID_PHASES, WorkGraphRepository, ZERO_FINDINGS_BY_AUTHOR, ZERO_FINDING_COUNTS, ZERO_PROBE_AUTHOR_METRICS, aggregateProbeAuthorMetrics, buildPipelineStatusOutput, createDatabaseAdapter, createStateStore, findPackageRoot, formatOutput, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, inspectProcessTree, parseDbTimestampAsUtc, parseRuntimeProbes, registerHealthCommand, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveMainRepoRoot, resolveRunManifest, rollupFindingCounts, rollupFindingsByAuthor, rollupProbeAuthorMetrics } from "../health-BmEu3n9Z.js";
|
|
3
3
|
import { createLogger } from "../logger-KeHncl-f.js";
|
|
4
4
|
import { createEventBus } from "../helpers-CElYrONe.js";
|
|
5
5
|
import { AdapterRegistry, BudgetConfigSchema, CURRENT_CONFIG_FORMAT_VERSION, CURRENT_TASK_GRAPH_VERSION, ConfigError, CostTrackerConfigSchema, DEFAULT_CONFIG, DoltClient, DoltNotInstalled, GlobalSettingsSchema, InMemoryDatabaseAdapter, IngestionServer, MonitorDatabaseImpl, OPERATIONAL_FINDING, PartialGlobalSettingsSchema, PartialProviderConfigSchema, ProvidersSchema, RoutingRecommender, STORY_METRICS, TelemetryConfigSchema, addTokenUsage, aggregateTokenUsageForRun, checkDoltInstalled, compareRunMetrics, createAmendmentRun, createConfigSystem, createDecision, createDoltClient, createPipelineRun, getActiveDecisions, getAllCostEntriesFiltered, getBaselineRunMetrics, getDecisionsByCategory, getDecisionsByPhaseForRun, getLatestCompletedRun, getLatestRun, getPipelineRunById, getPlanningCostTotal, getRetryableEscalations, getRunMetrics, getRunningPipelineRuns, getSessionCostSummary, getSessionCostSummaryFiltered, getStoryMetricsForRun, getTokenUsageSummary, incrementRunRestarts, initSchema, initializeDolt, listRunMetrics, loadParentRunDecisions, supersedeDecision, tagRunAsBaseline, updatePipelineRun } from "../dist-VcMmfo2w.js";
|
|
6
6
|
import "../adapter-registry-DXLMTmfD.js";
|
|
7
|
-
import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, EpicIngester, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, getTwinRunsForRun, listGraphRuns, registerExportCommand, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runProbeAuthor, runSolutioningPhase, validateStopAfterFromConflict } from "../run-
|
|
7
|
+
import { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, EpicIngester, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, getTwinRunsForRun, listGraphRuns, registerExportCommand, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runProbeAuthor, runSolutioningPhase, validateStopAfterFromConflict } from "../run-DnwsCYYA.js";
|
|
8
8
|
import "../errors-CogpxBUg.js";
|
|
9
9
|
import "../routing-CcBOCuC9.js";
|
|
10
10
|
import "../decisions-C0pz9Clx.js";
|
|
@@ -3667,7 +3667,7 @@ async function runStatusAction(options) {
|
|
|
3667
3667
|
logger$13.debug({ err }, "Work graph query failed, continuing without work graph data");
|
|
3668
3668
|
}
|
|
3669
3669
|
if (run === void 0) {
|
|
3670
|
-
const { inspectProcessTree: inspectProcessTree$1 } = await import("../health-
|
|
3670
|
+
const { inspectProcessTree: inspectProcessTree$1 } = await import("../health-CAV-PNE4.js");
|
|
3671
3671
|
const substrateDirPath = join(projectRoot, ".substrate");
|
|
3672
3672
|
const processInfo = inspectProcessTree$1({
|
|
3673
3673
|
projectRoot,
|
|
@@ -3712,6 +3712,8 @@ async function runStatusAction(options) {
|
|
|
3712
3712
|
} catch {}
|
|
3713
3713
|
const verificationResult = manifestPerStoryState?.[row.story_key]?.verification_result;
|
|
3714
3714
|
const verificationFindings = rollupFindingCounts(verificationResult);
|
|
3715
|
+
const probeAuthorMetrics = rollupProbeAuthorMetrics(verificationResult);
|
|
3716
|
+
const findingsByAuthor = rollupFindingsByAuthor(verificationResult);
|
|
3715
3717
|
const verificationRan = verificationResult !== void 0 && verificationResult !== null;
|
|
3716
3718
|
return {
|
|
3717
3719
|
story_key: row.story_key,
|
|
@@ -3724,8 +3726,12 @@ async function runStatusAction(options) {
|
|
|
3724
3726
|
},
|
|
3725
3727
|
review_cycles: row.review_cycles ?? 0,
|
|
3726
3728
|
dispatches: row.dispatches ?? 0,
|
|
3727
|
-
verification_findings:
|
|
3728
|
-
|
|
3729
|
+
verification_findings: {
|
|
3730
|
+
...verificationFindings,
|
|
3731
|
+
byAuthor: findingsByAuthor
|
|
3732
|
+
},
|
|
3733
|
+
verification_ran: verificationRan,
|
|
3734
|
+
probe_author: probeAuthorMetrics
|
|
3729
3735
|
};
|
|
3730
3736
|
});
|
|
3731
3737
|
let pipelineWallClockMs = 0;
|
|
@@ -5198,7 +5204,7 @@ async function runSupervisorAction(options, deps = {}) {
|
|
|
5198
5204
|
await initSchema(expAdapter);
|
|
5199
5205
|
const { runRunAction: runPipeline } = await import(
|
|
5200
5206
|
/* @vite-ignore */
|
|
5201
|
-
"../run-
|
|
5207
|
+
"../run-CXTAvjCK.js"
|
|
5202
5208
|
);
|
|
5203
5209
|
const runStoryFn = async (opts) => {
|
|
5204
5210
|
const exitCode = await runPipeline({
|
|
@@ -5552,7 +5558,7 @@ function printFactoryRunTable(runs) {
|
|
|
5552
5558
|
}
|
|
5553
5559
|
}
|
|
5554
5560
|
async function runMetricsAction(options) {
|
|
5555
|
-
const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations, run, factory } = options;
|
|
5561
|
+
const { outputFormat, projectRoot, limit = 10, compare, tagBaseline, analysis, sprint, story, taskType, since, aggregate, efficiency, recommendations, turns, consumers, categories, compareStories, routingRecommendations, run, factory, probeAuthorSummary } = options;
|
|
5556
5562
|
const telemetryModes = [
|
|
5557
5563
|
efficiency,
|
|
5558
5564
|
recommendations,
|
|
@@ -5962,6 +5968,8 @@ async function runMetricsAction(options) {
|
|
|
5962
5968
|
} catch {}
|
|
5963
5969
|
const findingCountsByStoryRun = new Map();
|
|
5964
5970
|
const verificationRanByStoryRun = new Map();
|
|
5971
|
+
const probeAuthorByStoryRun = new Map();
|
|
5972
|
+
const findingsByAuthorByStoryRun = new Map();
|
|
5965
5973
|
const uniqueRunIds = Array.from(new Set(storyMetrics.map((sm) => sm.run_id).filter((id) => id !== "")));
|
|
5966
5974
|
for (const uniqueRunId of uniqueRunIds) try {
|
|
5967
5975
|
const { manifest } = await resolveRunManifest(dbRoot, uniqueRunId);
|
|
@@ -5971,6 +5979,8 @@ async function runMetricsAction(options) {
|
|
|
5971
5979
|
const key = `${storyKey}:${uniqueRunId}`;
|
|
5972
5980
|
findingCountsByStoryRun.set(key, rollupFindingCounts(entry.verification_result));
|
|
5973
5981
|
verificationRanByStoryRun.set(key, entry.verification_result !== void 0 && entry.verification_result !== null);
|
|
5982
|
+
probeAuthorByStoryRun.set(key, rollupProbeAuthorMetrics(entry.verification_result));
|
|
5983
|
+
findingsByAuthorByStoryRun.set(key, rollupFindingsByAuthor(entry.verification_result));
|
|
5974
5984
|
}
|
|
5975
5985
|
} catch {}
|
|
5976
5986
|
let factoryRuns = [];
|
|
@@ -5985,16 +5995,30 @@ async function runMetricsAction(options) {
|
|
|
5985
5995
|
type: "sdlc",
|
|
5986
5996
|
phase_token_breakdown: phaseBreakdownMap[run$1.run_id] ?? null
|
|
5987
5997
|
}));
|
|
5988
|
-
const storyMetricsWithFindings = storyMetrics.map((sm) =>
|
|
5989
|
-
|
|
5990
|
-
|
|
5991
|
-
|
|
5992
|
-
|
|
5998
|
+
const storyMetricsWithFindings = storyMetrics.map((sm) => {
|
|
5999
|
+
const key = `${sm.story_key}:${sm.run_id}`;
|
|
6000
|
+
const counts = findingCountsByStoryRun.get(key) ?? { ...ZERO_FINDING_COUNTS };
|
|
6001
|
+
const byAuthor = findingsByAuthorByStoryRun.get(key) ?? { ...ZERO_FINDINGS_BY_AUTHOR };
|
|
6002
|
+
return {
|
|
6003
|
+
...sm,
|
|
6004
|
+
verification_findings: {
|
|
6005
|
+
...counts,
|
|
6006
|
+
byAuthor
|
|
6007
|
+
},
|
|
6008
|
+
verification_ran: verificationRanByStoryRun.get(key) ?? false,
|
|
6009
|
+
probe_author: probeAuthorByStoryRun.get(key) ?? { ...ZERO_PROBE_AUTHOR_METRICS }
|
|
6010
|
+
};
|
|
6011
|
+
});
|
|
5993
6012
|
const jsonPayload = {
|
|
5994
6013
|
runs: runsWithBreakdown,
|
|
5995
6014
|
graph_runs: factoryRuns,
|
|
5996
6015
|
story_metrics: storyMetricsWithFindings
|
|
5997
6016
|
};
|
|
6017
|
+
if (probeAuthorSummary) {
|
|
6018
|
+
const allMetrics = storyMetricsWithFindings.map((sm) => sm.probe_author);
|
|
6019
|
+
const aggregate$1 = aggregateProbeAuthorMetrics(allMetrics, storyMetricsWithFindings.length);
|
|
6020
|
+
jsonPayload.probe_author_summary = aggregate$1;
|
|
6021
|
+
}
|
|
5998
6022
|
if (doltMetrics !== void 0) if (aggregate) {
|
|
5999
6023
|
const aggregateResults = doltMetrics.map((m) => ({
|
|
6000
6024
|
task_type: m.taskType,
|
|
@@ -6100,7 +6124,7 @@ async function runMetricsAction(options) {
|
|
|
6100
6124
|
}
|
|
6101
6125
|
}
|
|
6102
6126
|
function registerMetricsCommand(program, _version = "0.0.0", projectRoot = process.cwd()) {
|
|
6103
|
-
program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").option("--run <run-id>", "Show per-iteration score history for a specific factory run").option("--factory", "Show only factory graph run metrics (excludes SDLC runs)").action(async (opts) => {
|
|
6127
|
+
program.command("metrics").description("Show historical pipeline run metrics and cross-run comparison").option("--project-root <path>", "Project root directory", projectRoot).option("--output-format <format>", "Output format: human (default) or json", "human").option("--limit <n>", "Number of runs to show (default: 10)", (v) => parseInt(v, 10), 10).option("--compare <run-id-a,run-id-b>", "Compare two runs side-by-side (comma-separated IDs, e.g. abc123,def456)").option("--tag-baseline <run-id>", "Mark a run as the performance baseline").option("--analysis <run-id>", "Read and output the analysis report for the specified run (AC5 of Story 17-3)").option("--sprint <sprint>", "Filter StateStore metrics by sprint (e.g. sprint-1)").option("--story <story-key>", "Filter StateStore metrics by story key (e.g. 26-1)").option("--task-type <type>", "Filter StateStore metrics by task type (e.g. dev-story)").option("--since <iso-date>", "Filter StateStore metrics at or after this ISO timestamp").option("--aggregate", "Aggregate StateStore metrics grouped by task_type").option("--efficiency", "Show telemetry efficiency scores for recent stories").option("--recommendations", "Show all telemetry recommendations across stories").option("--turns <storyKey>", "Show per-turn analysis for a specific story").option("--consumers <storyKey>", "Show consumer stats for a specific story").option("--categories", "Show category stats (optionally scoped by --story <storyKey>)").option("--compare-stories <storyA,storyB>", "Compare efficiency scores of two stories side-by-side (comma-separated keys)").option("--routing-recommendations", "Show routing recommendations derived from phase token breakdown history").option("--run <run-id>", "Show per-iteration score history for a specific factory run").option("--factory", "Show only factory graph run metrics (excludes SDLC runs)").option("--probe-author-summary", "Print cross-run probe-author KPI aggregate (Story 60-15)").action(async (opts) => {
|
|
6104
6128
|
const outputFormat = opts.outputFormat === "json" ? "json" : "human";
|
|
6105
6129
|
let compareIds;
|
|
6106
6130
|
if (opts.compare !== void 0) {
|
|
@@ -6137,7 +6161,8 @@ function registerMetricsCommand(program, _version = "0.0.0", projectRoot = proce
|
|
|
6137
6161
|
...compareStoriesIds !== void 0 && { compareStories: compareStoriesIds },
|
|
6138
6162
|
...opts.routingRecommendations !== void 0 && { routingRecommendations: opts.routingRecommendations },
|
|
6139
6163
|
...opts.run !== void 0 && { run: opts.run },
|
|
6140
|
-
...opts.factory !== void 0 && { factory: opts.factory }
|
|
6164
|
+
...opts.factory !== void 0 && { factory: opts.factory },
|
|
6165
|
+
...opts.probeAuthorSummary !== void 0 && { probeAuthorSummary: opts.probeAuthorSummary }
|
|
6141
6166
|
};
|
|
6142
6167
|
const exitCode = await runMetricsAction(metricsOpts);
|
|
6143
6168
|
process.exitCode = exitCode;
|
|
@@ -8440,7 +8465,7 @@ function registerProbesCommand(program) {
|
|
|
8440
8465
|
probesA = extractProbesFromArtifact(artifactA);
|
|
8441
8466
|
} catch (err) {
|
|
8442
8467
|
const msg = err instanceof Error ? err.message : String(err);
|
|
8443
|
-
emitError(format, `failed to read artifact A: ${msg}`);
|
|
8468
|
+
emitError$1(format, `failed to read artifact A: ${msg}`);
|
|
8444
8469
|
process.exitCode = 1;
|
|
8445
8470
|
return;
|
|
8446
8471
|
}
|
|
@@ -8448,7 +8473,7 @@ function registerProbesCommand(program) {
|
|
|
8448
8473
|
probesB = extractProbesFromArtifact(artifactB);
|
|
8449
8474
|
} catch (err) {
|
|
8450
8475
|
const msg = err instanceof Error ? err.message : String(err);
|
|
8451
|
-
emitError(format, `failed to read artifact B: ${msg}`);
|
|
8476
|
+
emitError$1(format, `failed to read artifact B: ${msg}`);
|
|
8452
8477
|
process.exitCode = 1;
|
|
8453
8478
|
return;
|
|
8454
8479
|
}
|
|
@@ -8463,7 +8488,7 @@ function registerProbesCommand(program) {
|
|
|
8463
8488
|
});
|
|
8464
8489
|
});
|
|
8465
8490
|
}
|
|
8466
|
-
function emitError(format, message) {
|
|
8491
|
+
function emitError$1(format, message) {
|
|
8467
8492
|
if (format === "json") process.stdout.write(JSON.stringify({
|
|
8468
8493
|
success: false,
|
|
8469
8494
|
error: message
|
|
@@ -8633,6 +8658,83 @@ async function runProbeAuthorDispatch(opts, projectRoot, registry) {
|
|
|
8633
8658
|
return result.result === "success" ? 0 : 1;
|
|
8634
8659
|
}
|
|
8635
8660
|
|
|
8661
|
+
//#endregion
|
|
8662
|
+
//#region src/cli/commands/annotate.ts
|
|
8663
|
+
function registerAnnotateCommand(program, _version, projectRoot) {
|
|
8664
|
+
program.command("annotate").description("Post-hoc operator annotation on a verification finding (probe-author KPI confirmed-defect tagging — Story 60-15)").requiredOption("--story <key>", "Story key (e.g. 1-12)").requiredOption("--finding-category <category>", "Finding category to annotate (e.g. runtime-probe-fail, runtime-probe-error-response)").option("--probe-name <name>", "Optional probe name to narrow the annotation when multiple probes share a category").option("--note <text>", "Free-form note explaining the judgment").option("--run-id <id>", "Run ID to annotate (defaults to the latest run for this project)").option("--confirmed-defect", "Annotate as: probe failure caught a real defect").option("--false-positive", "Annotate as: probe failure was not a real defect").option("--probe-bug", "Annotate as: probe itself was buggy (not a defect catch or false positive)").option("--output-format <format>", "Output format: human (default) or json", "human").option("--project-root <path>", "Project root directory", projectRoot).action(async (opts) => {
|
|
8665
|
+
const exitCode = await runAnnotateAction(opts);
|
|
8666
|
+
process.exitCode = exitCode;
|
|
8667
|
+
});
|
|
8668
|
+
}
|
|
8669
|
+
async function runAnnotateAction(opts) {
|
|
8670
|
+
const format = opts.outputFormat === "json" ? "json" : "human";
|
|
8671
|
+
const judgmentCount = (opts.confirmedDefect ? 1 : 0) + (opts.falsePositive ? 1 : 0) + (opts.probeBug ? 1 : 0);
|
|
8672
|
+
if (judgmentCount !== 1) return emitError(format, `exactly one of --confirmed-defect, --false-positive, --probe-bug must be provided (got ${judgmentCount})`);
|
|
8673
|
+
const judgment = opts.confirmedDefect ? "confirmed-defect" : opts.falsePositive ? "false-positive" : "probe-bug";
|
|
8674
|
+
const dbRoot = await resolveMainRepoRoot(opts.projectRoot);
|
|
8675
|
+
let runId = opts.runId;
|
|
8676
|
+
if (runId === void 0) {
|
|
8677
|
+
const adapter = createDatabaseAdapter({
|
|
8678
|
+
backend: "auto",
|
|
8679
|
+
basePath: opts.projectRoot
|
|
8680
|
+
});
|
|
8681
|
+
try {
|
|
8682
|
+
const latest = await getLatestRun(adapter);
|
|
8683
|
+
if (latest === null || latest === void 0) return emitError(format, "no runs found — pass --run-id explicitly");
|
|
8684
|
+
runId = latest.id;
|
|
8685
|
+
} finally {
|
|
8686
|
+
await adapter.close();
|
|
8687
|
+
}
|
|
8688
|
+
}
|
|
8689
|
+
const manifest = RunManifest.open(runId, join$1(dbRoot, "runs"));
|
|
8690
|
+
let existingData;
|
|
8691
|
+
try {
|
|
8692
|
+
existingData = await manifest.read();
|
|
8693
|
+
} catch (err) {
|
|
8694
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
8695
|
+
return emitError(format, `failed to read manifest for run ${runId}: ${msg}`);
|
|
8696
|
+
}
|
|
8697
|
+
const storyState = existingData.per_story_state[opts.story];
|
|
8698
|
+
if (storyState === void 0) return emitError(format, `story ${opts.story} not found in run ${runId}`);
|
|
8699
|
+
const verificationResult = storyState.verification_result;
|
|
8700
|
+
if (verificationResult === void 0 || verificationResult === null) return emitError(format, `story ${opts.story} has no verification_result to annotate`);
|
|
8701
|
+
const newAnnotation = {
|
|
8702
|
+
findingCategory: opts.findingCategory,
|
|
8703
|
+
judgment,
|
|
8704
|
+
createdAt: new Date().toISOString(),
|
|
8705
|
+
...opts.probeName !== void 0 ? { probeName: opts.probeName } : {},
|
|
8706
|
+
...opts.note !== void 0 ? { note: opts.note } : {}
|
|
8707
|
+
};
|
|
8708
|
+
const existingAnnotations = verificationResult.annotations ?? [];
|
|
8709
|
+
const updatedVerificationResult = {
|
|
8710
|
+
...verificationResult,
|
|
8711
|
+
annotations: [...existingAnnotations, newAnnotation]
|
|
8712
|
+
};
|
|
8713
|
+
try {
|
|
8714
|
+
await manifest.patchStoryState(opts.story, { verification_result: updatedVerificationResult });
|
|
8715
|
+
} catch (err) {
|
|
8716
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
8717
|
+
return emitError(format, `failed to write annotation: ${msg}`);
|
|
8718
|
+
}
|
|
8719
|
+
if (format === "json") process.stdout.write(JSON.stringify({
|
|
8720
|
+
success: true,
|
|
8721
|
+
runId,
|
|
8722
|
+
storyKey: opts.story,
|
|
8723
|
+
annotation: newAnnotation,
|
|
8724
|
+
totalAnnotations: existingAnnotations.length + 1
|
|
8725
|
+
}) + "\n");
|
|
8726
|
+
else process.stdout.write(`annotated story ${opts.story} (${runId}): ${judgment} on ${opts.findingCategory}` + (opts.probeName !== void 0 ? ` [${opts.probeName}]` : "") + "\n");
|
|
8727
|
+
return 0;
|
|
8728
|
+
}
|
|
8729
|
+
function emitError(format, message) {
|
|
8730
|
+
if (format === "json") process.stdout.write(JSON.stringify({
|
|
8731
|
+
success: false,
|
|
8732
|
+
error: message
|
|
8733
|
+
}) + "\n");
|
|
8734
|
+
else process.stderr.write(`Error: ${message}\n`);
|
|
8735
|
+
return 1;
|
|
8736
|
+
}
|
|
8737
|
+
|
|
8636
8738
|
//#endregion
|
|
8637
8739
|
//#region src/cli/commands/history.ts
|
|
8638
8740
|
function registerHistoryCommand(program) {
|
|
@@ -9238,6 +9340,7 @@ async function createProgram() {
|
|
|
9238
9340
|
registerMigrateCommand(program);
|
|
9239
9341
|
registerProbesCommand(program);
|
|
9240
9342
|
registerProbeAuthorCommand(program, version, process.cwd(), registry);
|
|
9343
|
+
registerAnnotateCommand(program, version, process.cwd());
|
|
9241
9344
|
registerRepoMapCommand(program);
|
|
9242
9345
|
registerRoutingCommand(program);
|
|
9243
9346
|
registerCostCommand(program, version);
|
|
@@ -3666,7 +3666,8 @@ const RuntimeProbeSchema = z.object({
|
|
|
3666
3666
|
timeout_ms: z.number().int().positive().optional(),
|
|
3667
3667
|
description: z.string().optional(),
|
|
3668
3668
|
expect_stdout_no_regex: z.array(z.string().min(1)).optional(),
|
|
3669
|
-
expect_stdout_regex: z.array(z.string().min(1)).optional()
|
|
3669
|
+
expect_stdout_regex: z.array(z.string().min(1)).optional(),
|
|
3670
|
+
_authoredBy: z.enum(["probe-author", "create-story-ac-transfer"]).optional()
|
|
3670
3671
|
});
|
|
3671
3672
|
/** Zod schema for the full list (wrapping the per-probe schema). */
|
|
3672
3673
|
const RuntimeProbeListSchema = z.array(RuntimeProbeSchema);
|
|
@@ -4140,8 +4141,8 @@ var RuntimeProbeCheck = class {
|
|
|
4140
4141
|
if (context.sourceEpicContent !== void 0) {
|
|
4141
4142
|
if (detectsEventDrivenAC(context.sourceEpicContent) && !probesInvokeProductionTrigger(parsed.probes)) findings.push({
|
|
4142
4143
|
category: CATEGORY_MISSING_TRIGGER,
|
|
4143
|
-
severity: "
|
|
4144
|
-
message: "source AC describes an event-driven mechanism (hook / timer / signal / webhook) but no probe's command invokes a known production trigger (git merge/pull/push, systemctl, crontab, kill -<sig>, curl -X POST, etc.). Probes that call the implementation directly skip the wiring layer the AC's user-facing event would exercise — see strata Run 13 / Story 1-12 for the canonical case (post-merge hook never fires under git's conflict semantic). Authoring guidance: probes/event-driven section of create-story.md."
|
|
4144
|
+
severity: "error",
|
|
4145
|
+
message: "source AC describes an event-driven mechanism (hook / timer / signal / webhook) but no probe's command invokes a known production trigger (git merge/pull/push, systemctl, crontab, kill -<sig>, curl -X POST, etc.). Probes that call the implementation directly skip the wiring layer the AC's user-facing event would exercise — see strata Run 13 / Story 1-12 for the canonical case (post-merge hook never fires under git's conflict semantic). Authoring guidance: probes/event-driven section of create-story.md, or invoke probe-author to derive AC-grounded probes automatically (Epic 60 Phase 2)."
|
|
4145
4146
|
});
|
|
4146
4147
|
}
|
|
4147
4148
|
for (const probe of parsed.probes) {
|
|
@@ -4170,7 +4171,8 @@ var RuntimeProbeCheck = class {
|
|
|
4170
4171
|
...result.exitCode !== void 0 ? { exitCode: result.exitCode } : {},
|
|
4171
4172
|
stdoutTail: result.stdoutTail,
|
|
4172
4173
|
stderrTail: result.stderrTail,
|
|
4173
|
-
durationMs: result.durationMs
|
|
4174
|
+
durationMs: result.durationMs,
|
|
4175
|
+
_authoredBy: probe._authoredBy ?? "create-story-ac-transfer"
|
|
4174
4176
|
});
|
|
4175
4177
|
}
|
|
4176
4178
|
const status = findings.some((f) => f.severity === "error") ? "fail" : findings.some((f) => f.severity === "warn") ? "warn" : "pass";
|
|
@@ -4508,10 +4510,90 @@ function computeTakenOptionPerGroup(hardClauses, workingDir) {
|
|
|
4508
4510
|
* 3. The presence of `## Runtime Probes` heading followed by a fenced yaml block
|
|
4509
4511
|
* (represented as a single "runtime-probes-section" clause)
|
|
4510
4512
|
*/
|
|
4513
|
+
/**
|
|
4514
|
+
* Negation phrases that mark a paragraph as "the paths in this paragraph
|
|
4515
|
+
* are references the dev should NOT deliver/modify". When any of these
|
|
4516
|
+
* phrases appears in a paragraph, every path-clause in that paragraph is
|
|
4517
|
+
* tagged `negation: true` so the verification emit routes them to the
|
|
4518
|
+
* info-severity `source-ac-negation-reference` finding instead of the
|
|
4519
|
+
* under-delivery error path.
|
|
4520
|
+
*
|
|
4521
|
+
* Strata Run 16 (Story 1-16, 2026-04-27): the AC contained
|
|
4522
|
+
*
|
|
4523
|
+
* "the existing test scaffolding is documented (NOT replaced):
|
|
4524
|
+
* `packages/memory` already uses vitest (Story 1.8+);
|
|
4525
|
+
* `packages/memory-mcp` already uses pytest (Story 1.10+); ...
|
|
4526
|
+
* 1.16 does NOT replace or rewrite existing test infrastructure."
|
|
4527
|
+
*
|
|
4528
|
+
* Substrate emitted 6 ERROR-level under-delivery findings on the listed
|
|
4529
|
+
* paths; the dev had correctly NOT modified them. The flood of
|
|
4530
|
+
* false-positive ERRORs masked a real WARN about the missing `## Runtime
|
|
4531
|
+
* Probes` section (which itself would have surfaced two real defects in
|
|
4532
|
+
* the delivery). Detecting the negation context up-front separates the
|
|
4533
|
+
* legitimate "paths the AC mentions but the dev should not modify" case
|
|
4534
|
+
* from genuine under-delivery.
|
|
4535
|
+
*
|
|
4536
|
+
* Patterns are case-sensitive on the keyword (NOT, MUST NOT, gitignored)
|
|
4537
|
+
* because lowercased forms are common in non-imperative prose ("not
|
|
4538
|
+
* really" / "must not exceed" appear in unrelated contexts and
|
|
4539
|
+
* over-trigger). The strata observation enumerates the canonical forms.
|
|
4540
|
+
*/
|
|
4541
|
+
const NEGATION_PHRASE_PATTERNS = [
|
|
4542
|
+
/\(NOT\s+\w+/,
|
|
4543
|
+
/documented\s*\(NOT/i,
|
|
4544
|
+
/\bMUST\s+NOT\b|\bSHALL\s+NOT\b/,
|
|
4545
|
+
/\bdo(?:es)?\s+NOT\s+\w+/,
|
|
4546
|
+
/\bdeferred\s+to\b/i,
|
|
4547
|
+
/\b(?:is|are)\s+gitignored\b/i
|
|
4548
|
+
];
|
|
4549
|
+
/**
|
|
4550
|
+
* Find all line indices that fall within a negation context. The scope of
|
|
4551
|
+
* a single negation context is the line that contains the negation phrase
|
|
4552
|
+
* PLUS any markdown indented-continuation lines following it (so a bullet
|
|
4553
|
+
* that wraps onto multiple indented lines is treated as one logical unit).
|
|
4554
|
+
*
|
|
4555
|
+
* The continuation walk stops at the first of: blank line, next markdown
|
|
4556
|
+
* bullet (`- `, `* `, numbered list), or any non-indented non-blank line.
|
|
4557
|
+
*
|
|
4558
|
+
* Coarser scopes (e.g., paragraph-wide aggregation) over-triggered on
|
|
4559
|
+
* test fixtures like:
|
|
4560
|
+
*
|
|
4561
|
+
* The implementation MUST validate input.
|
|
4562
|
+
* The system MUST NOT skip authentication.
|
|
4563
|
+
* Files SHALL be placed in `src/auth/validator.ts`.
|
|
4564
|
+
*
|
|
4565
|
+
* — three independent statements wrapped without blank-line separators.
|
|
4566
|
+
* The "MUST NOT" on line 2 should not mark the unrelated path on line 3
|
|
4567
|
+
* as a negation reference. Only the bullet's CONTINUATION-LINE structure
|
|
4568
|
+
* constitutes "the same logical reference unit" as the negation phrase.
|
|
4569
|
+
*/
|
|
4570
|
+
function detectNegationContextLines(lines) {
|
|
4571
|
+
const result = new Set();
|
|
4572
|
+
for (let i = 0; i < lines.length; i++) {
|
|
4573
|
+
const line = lines[i] ?? "";
|
|
4574
|
+
if (NEGATION_PHRASE_PATTERNS.some((pat) => pat.test(line))) {
|
|
4575
|
+
result.add(i);
|
|
4576
|
+
let j = i + 1;
|
|
4577
|
+
while (j < lines.length) {
|
|
4578
|
+
const next = lines[j] ?? "";
|
|
4579
|
+
if (next.trim() === "") break;
|
|
4580
|
+
if (/^\s*(?:-|\*|\d+\.)\s+/.test(next)) break;
|
|
4581
|
+
if (/^\s+\S/.test(next)) {
|
|
4582
|
+
result.add(j);
|
|
4583
|
+
j++;
|
|
4584
|
+
continue;
|
|
4585
|
+
}
|
|
4586
|
+
break;
|
|
4587
|
+
}
|
|
4588
|
+
}
|
|
4589
|
+
}
|
|
4590
|
+
return result;
|
|
4591
|
+
}
|
|
4511
4592
|
function extractHardClauses(sectionContent) {
|
|
4512
4593
|
const clauses = [];
|
|
4513
4594
|
const lines = sectionContent.split("\n");
|
|
4514
4595
|
const alternativeOptions = detectAlternativeOptions(lines);
|
|
4596
|
+
const negationContextLines = detectNegationContextLines(lines);
|
|
4515
4597
|
const mustPattern = /\b(MUST NOT|MUST|SHALL NOT|SHALL)\b/;
|
|
4516
4598
|
for (const line of lines) {
|
|
4517
4599
|
const match = mustPattern.exec(line);
|
|
@@ -4530,10 +4612,12 @@ function extractHardClauses(sectionContent) {
|
|
|
4530
4612
|
let pathMatch;
|
|
4531
4613
|
while ((pathMatch = pathPattern.exec(line)) !== null) {
|
|
4532
4614
|
const alt = findOptionForLine(lineIdx, alternativeOptions);
|
|
4615
|
+
const inNegation = negationContextLines.has(lineIdx);
|
|
4533
4616
|
clauses.push({
|
|
4534
4617
|
type: "path",
|
|
4535
4618
|
text: `\`${pathMatch[1]}\``,
|
|
4536
|
-
...alt ? { alternative: alt } : {}
|
|
4619
|
+
...alt ? { alternative: alt } : {},
|
|
4620
|
+
...inNegation ? { negation: true } : {}
|
|
4537
4621
|
});
|
|
4538
4622
|
}
|
|
4539
4623
|
}
|
|
@@ -4583,15 +4667,24 @@ var SourceAcFidelityCheck = class {
|
|
|
4583
4667
|
for (const clause of hardClauses) if (clause.type === "runtime-probes-section") {
|
|
4584
4668
|
if (!storyContent.includes("## Runtime Probes")) {
|
|
4585
4669
|
const truncated = clause.text.length > 120 ? clause.text.slice(0, 120) : clause.text;
|
|
4670
|
+
const isEventDrivenAc = detectsEventDrivenAC(context.sourceEpicContent);
|
|
4586
4671
|
findings.push({
|
|
4587
4672
|
category: "source-ac-drift",
|
|
4588
|
-
severity: "warn",
|
|
4589
|
-
message: `runtime-probes-section: "${truncated}" present in epics source but absent in story artifact`
|
|
4673
|
+
severity: isEventDrivenAc ? "error" : "warn",
|
|
4674
|
+
message: isEventDrivenAc ? `runtime-probes-section: "${truncated}" present in epics source but absent in story artifact AND source AC is event-driven (probes are required for event-driven ACs — runtime-probes check will skip without the section)` : `runtime-probes-section: "${truncated}" present in epics source but absent in story artifact`
|
|
4590
4675
|
});
|
|
4591
4676
|
}
|
|
4592
4677
|
} else if (!storyContent.includes(clause.text)) {
|
|
4593
4678
|
const truncated = clause.text.length > 120 ? clause.text.slice(0, 120) : clause.text;
|
|
4594
4679
|
if (clause.type === "path") {
|
|
4680
|
+
if (clause.negation === true) {
|
|
4681
|
+
findings.push({
|
|
4682
|
+
category: "source-ac-negation-reference",
|
|
4683
|
+
severity: "info",
|
|
4684
|
+
message: `path: "${truncated}" referenced in source AC inside a negation context (e.g., "(NOT replaced)", "MUST NOT", "deferred to", "documented (NOT", "does NOT replace", "is gitignored") — the AC explicitly directed the dev NOT to deliver/modify this path; treated as reference-only, not a deliverable`
|
|
4685
|
+
});
|
|
4686
|
+
continue;
|
|
4687
|
+
}
|
|
4595
4688
|
if (isOperationalPath(clause.text)) {
|
|
4596
4689
|
findings.push({
|
|
4597
4690
|
category: "source-ac-operational-path-reference",
|
|
@@ -4821,7 +4914,8 @@ const StoredVerificationFindingSchema = z.object({
|
|
|
4821
4914
|
exitCode: z.number().int().optional(),
|
|
4822
4915
|
stdoutTail: z.string().optional(),
|
|
4823
4916
|
stderrTail: z.string().optional(),
|
|
4824
|
-
durationMs: z.number().nonnegative().optional()
|
|
4917
|
+
durationMs: z.number().nonnegative().optional(),
|
|
4918
|
+
_authoredBy: z.enum(["probe-author", "create-story-ac-transfer"]).optional()
|
|
4825
4919
|
});
|
|
4826
4920
|
/**
|
|
4827
4921
|
* Schema for a single per-check verification result stored in the manifest.
|
|
@@ -4846,6 +4940,30 @@ const StoredVerificationCheckResultSchema = z.object({
|
|
|
4846
4940
|
* Mirrors VerificationSummary from packages/sdlc/src/verification/types.ts
|
|
4847
4941
|
* without importing from that module (avoids circular dependency).
|
|
4848
4942
|
*/
|
|
4943
|
+
/**
|
|
4944
|
+
* Story 60-15: an operator-added annotation on a verification finding,
|
|
4945
|
+
* recording the post-hoc judgment about whether a probe failure was
|
|
4946
|
+
* a real defect catch, a false positive, or a probe bug.
|
|
4947
|
+
*
|
|
4948
|
+
* Persisted under `per_story_state[storyKey].verification_result.annotations[]`.
|
|
4949
|
+
* Set via `substrate annotate --story <key> --finding-category <cat>
|
|
4950
|
+
* --confirmed-defect|--false-positive|--probe-bug`.
|
|
4951
|
+
*
|
|
4952
|
+
* Used by `rollupProbeAuthorMetrics` to compute the catch-rate KPI's
|
|
4953
|
+
* `confirmed-defect` count: a probe-author probe failure that the
|
|
4954
|
+
* operator subsequently confirmed as catching a real defect.
|
|
4955
|
+
*/
|
|
4956
|
+
const StoredVerificationAnnotationSchema = z.object({
|
|
4957
|
+
findingCategory: z.string(),
|
|
4958
|
+
judgment: z.enum([
|
|
4959
|
+
"confirmed-defect",
|
|
4960
|
+
"false-positive",
|
|
4961
|
+
"probe-bug"
|
|
4962
|
+
]),
|
|
4963
|
+
probeName: z.string().optional(),
|
|
4964
|
+
note: z.string().optional(),
|
|
4965
|
+
createdAt: z.string()
|
|
4966
|
+
});
|
|
4849
4967
|
const StoredVerificationSummarySchema = z.object({
|
|
4850
4968
|
storyKey: z.string(),
|
|
4851
4969
|
checks: z.array(StoredVerificationCheckResultSchema),
|
|
@@ -4854,7 +4972,8 @@ const StoredVerificationSummarySchema = z.object({
|
|
|
4854
4972
|
"warn",
|
|
4855
4973
|
"fail"
|
|
4856
4974
|
]),
|
|
4857
|
-
duration_ms: z.number().nonnegative()
|
|
4975
|
+
duration_ms: z.number().nonnegative(),
|
|
4976
|
+
annotations: z.array(StoredVerificationAnnotationSchema).optional()
|
|
4858
4977
|
});
|
|
4859
4978
|
|
|
4860
4979
|
//#endregion
|
|
@@ -5599,6 +5718,171 @@ function rollupFindingCounts(summary) {
|
|
|
5599
5718
|
};
|
|
5600
5719
|
}
|
|
5601
5720
|
|
|
5721
|
+
//#endregion
|
|
5722
|
+
//#region packages/sdlc/dist/run-model/probe-author-metrics.js
|
|
5723
|
+
/**
|
|
5724
|
+
* Probe-author per-story rollup — Story 60-15.
|
|
5725
|
+
*
|
|
5726
|
+
* Collapses every probe-author-relevant signal in a
|
|
5727
|
+
* StoredVerificationSummary (including any operator annotations on
|
|
5728
|
+
* probe-failures) into a flat `{dispatched, probesAuthoredCount,
|
|
5729
|
+
* authoredProbesFailedCount, authoredProbesCaughtConfirmedDefectCount}`
|
|
5730
|
+
* shape suitable for per-story surfacing in `substrate status`/`metrics`
|
|
5731
|
+
* JSON output and for cross-run aggregation in the `--probe-author-summary`
|
|
5732
|
+
* flag.
|
|
5733
|
+
*
|
|
5734
|
+
* Intentionally pure (mirrors `rollupFindingCounts` shape): no I/O, no
|
|
5735
|
+
* logger, no throw. Backward-compat is load-bearing — pre-60-15 manifests
|
|
5736
|
+
* have no `_authoredBy` discriminator on their stored findings, no
|
|
5737
|
+
* `annotations` array, and probe-author wasn't actually running (Sprint
|
|
5738
|
+
* 13/Sprint 20 lesson — the manifest registration bug). The rollup must
|
|
5739
|
+
* produce sensible zero values on every legacy code path.
|
|
5740
|
+
*/
|
|
5741
|
+
/** Zero rollup, used as default when no probe-author signal is present. */
|
|
5742
|
+
const ZERO_PROBE_AUTHOR_METRICS = Object.freeze({
|
|
5743
|
+
dispatched: false,
|
|
5744
|
+
probesAuthoredCount: 0,
|
|
5745
|
+
authoredProbesFailedCount: 0,
|
|
5746
|
+
authoredProbesCaughtConfirmedDefectCount: 0
|
|
5747
|
+
});
|
|
5748
|
+
const ZERO_FINDINGS_BY_AUTHOR = Object.freeze({
|
|
5749
|
+
"probe-author": {
|
|
5750
|
+
error: 0,
|
|
5751
|
+
warn: 0,
|
|
5752
|
+
info: 0
|
|
5753
|
+
},
|
|
5754
|
+
"create-story-ac-transfer": {
|
|
5755
|
+
error: 0,
|
|
5756
|
+
warn: 0,
|
|
5757
|
+
info: 0
|
|
5758
|
+
}
|
|
5759
|
+
});
|
|
5760
|
+
/**
|
|
5761
|
+
* Compute the per-story probe-author rollup from a verification summary.
|
|
5762
|
+
*
|
|
5763
|
+
* Inputs:
|
|
5764
|
+
* - `summary`: the per-story stored verification summary (`per_story_state
|
|
5765
|
+
* [storyKey].verification_result` in run manifest terms)
|
|
5766
|
+
* - `dispatchedHint`: optional override — when the caller has direct
|
|
5767
|
+
* knowledge that probe-author dispatched (e.g., from a captured
|
|
5768
|
+
* `probe-author:appended-to-artifact` event on a fresh run), pass true.
|
|
5769
|
+
* Useful when probe-author authored probes that all passed (no
|
|
5770
|
+
* failure findings to attribute, but presence is real).
|
|
5771
|
+
*
|
|
5772
|
+
* Backward-compat: undefined/null summary → zero rollup. Missing
|
|
5773
|
+
* `_authoredBy` field on findings → counted under
|
|
5774
|
+
* `'create-story-ac-transfer'` per the schema's documented semantic.
|
|
5775
|
+
* Missing `annotations` array → confirmed-defect count is 0.
|
|
5776
|
+
*/
|
|
5777
|
+
function rollupProbeAuthorMetrics(summary, dispatchedHint) {
|
|
5778
|
+
if (summary === void 0 || summary === null) return dispatchedHint === true ? {
|
|
5779
|
+
...ZERO_PROBE_AUTHOR_METRICS,
|
|
5780
|
+
dispatched: true
|
|
5781
|
+
} : { ...ZERO_PROBE_AUTHOR_METRICS };
|
|
5782
|
+
const authoredFailures = [];
|
|
5783
|
+
for (const check of summary.checks) {
|
|
5784
|
+
const findings = check.findings ?? [];
|
|
5785
|
+
for (const f of findings) {
|
|
5786
|
+
const author = f._authoredBy ?? "create-story-ac-transfer";
|
|
5787
|
+
if (author !== "probe-author") continue;
|
|
5788
|
+
if (!f.category.startsWith("runtime-probe-")) continue;
|
|
5789
|
+
const probeName = extractProbeName(f.message) ?? f.category;
|
|
5790
|
+
authoredFailures.push({
|
|
5791
|
+
probeName,
|
|
5792
|
+
category: f.category
|
|
5793
|
+
});
|
|
5794
|
+
}
|
|
5795
|
+
}
|
|
5796
|
+
const annotations = summary.annotations ?? [];
|
|
5797
|
+
let confirmedDefectCount = 0;
|
|
5798
|
+
for (const failure of authoredFailures) {
|
|
5799
|
+
const matched = annotations.find((a) => a.judgment === "confirmed-defect" && a.findingCategory === failure.category && (a.probeName === void 0 || a.probeName === failure.probeName));
|
|
5800
|
+
if (matched !== void 0) confirmedDefectCount += 1;
|
|
5801
|
+
}
|
|
5802
|
+
return {
|
|
5803
|
+
dispatched: dispatchedHint === true || authoredFailures.length > 0,
|
|
5804
|
+
probesAuthoredCount: authoredFailures.length,
|
|
5805
|
+
authoredProbesFailedCount: authoredFailures.length,
|
|
5806
|
+
authoredProbesCaughtConfirmedDefectCount: confirmedDefectCount
|
|
5807
|
+
};
|
|
5808
|
+
}
|
|
5809
|
+
/**
|
|
5810
|
+
* Compute the byAuthor breakdown of finding counts on a verification summary.
|
|
5811
|
+
* Each finding's `_authoredBy` (default `'create-story-ac-transfer'` when
|
|
5812
|
+
* absent) routes its severity into the appropriate per-author bucket.
|
|
5813
|
+
*/
|
|
5814
|
+
function rollupFindingsByAuthor(summary) {
|
|
5815
|
+
const result = {
|
|
5816
|
+
"probe-author": {
|
|
5817
|
+
error: 0,
|
|
5818
|
+
warn: 0,
|
|
5819
|
+
info: 0
|
|
5820
|
+
},
|
|
5821
|
+
"create-story-ac-transfer": {
|
|
5822
|
+
error: 0,
|
|
5823
|
+
warn: 0,
|
|
5824
|
+
info: 0
|
|
5825
|
+
}
|
|
5826
|
+
};
|
|
5827
|
+
if (summary === void 0 || summary === null) return result;
|
|
5828
|
+
for (const check of summary.checks) {
|
|
5829
|
+
const findings = check.findings ?? [];
|
|
5830
|
+
for (const f of findings) {
|
|
5831
|
+
const author = f._authoredBy ?? "create-story-ac-transfer";
|
|
5832
|
+
const bucket = result[author];
|
|
5833
|
+
switch (f.severity) {
|
|
5834
|
+
case "error":
|
|
5835
|
+
bucket.error += 1;
|
|
5836
|
+
break;
|
|
5837
|
+
case "warn":
|
|
5838
|
+
bucket.warn += 1;
|
|
5839
|
+
break;
|
|
5840
|
+
case "info":
|
|
5841
|
+
bucket.info += 1;
|
|
5842
|
+
break;
|
|
5843
|
+
}
|
|
5844
|
+
}
|
|
5845
|
+
}
|
|
5846
|
+
return result;
|
|
5847
|
+
}
|
|
5848
|
+
/**
|
|
5849
|
+
* Sum per-story probe-author rollups into a single aggregate. `totalStories`
|
|
5850
|
+
* is the denominator for `probeAuthorDispatchedPct` (stories where dispatch
|
|
5851
|
+
* could have happened, regardless of outcome). Pass the count of stories
|
|
5852
|
+
* the run touched, NOT just the ones with successful dispatch.
|
|
5853
|
+
*/
|
|
5854
|
+
function aggregateProbeAuthorMetrics(perStory, totalStories) {
|
|
5855
|
+
let dispatched = 0;
|
|
5856
|
+
let authored = 0;
|
|
5857
|
+
let failed = 0;
|
|
5858
|
+
let confirmed = 0;
|
|
5859
|
+
for (const story of perStory) {
|
|
5860
|
+
if (story.dispatched) dispatched += 1;
|
|
5861
|
+
authored += story.probesAuthoredCount;
|
|
5862
|
+
failed += story.authoredProbesFailedCount;
|
|
5863
|
+
confirmed += story.authoredProbesCaughtConfirmedDefectCount;
|
|
5864
|
+
}
|
|
5865
|
+
return {
|
|
5866
|
+
totalStoriesDispatched: totalStories,
|
|
5867
|
+
probeAuthorDispatchedCount: dispatched,
|
|
5868
|
+
probeAuthorDispatchedPct: totalStories > 0 ? dispatched / totalStories : 0,
|
|
5869
|
+
totalAuthoredProbes: authored,
|
|
5870
|
+
totalAuthoredProbesFailed: failed,
|
|
5871
|
+
totalConfirmedDefectsCaught: confirmed,
|
|
5872
|
+
catchRateByCount: authored > 0 ? failed / authored : 0,
|
|
5873
|
+
catchRateByConfirmedDefect: authored > 0 ? confirmed / authored : 0
|
|
5874
|
+
};
|
|
5875
|
+
}
|
|
5876
|
+
/** Extract the probe name from a runtime-probe finding's message. The
|
|
5877
|
+
* runtime-probe-check formatter writes `probe "<name>"...` as the leading
|
|
5878
|
+
* pattern across all category branches (fail/timeout/assertion-fail/
|
|
5879
|
+
* error-response). Returns undefined when the message doesn't match
|
|
5880
|
+
* (fault tolerance — the rollup falls back to category-as-name). */
|
|
5881
|
+
function extractProbeName(message) {
|
|
5882
|
+
const match = /^probe\s+"([^"]+)"/.exec(message);
|
|
5883
|
+
return match?.[1];
|
|
5884
|
+
}
|
|
5885
|
+
|
|
5602
5886
|
//#endregion
|
|
5603
5887
|
//#region packages/sdlc/dist/run-model/supervisor-lock.js
|
|
5604
5888
|
const defaultLogger = console;
|
|
@@ -6343,5 +6627,5 @@ function registerHealthCommand(program, _version = "0.0.0", projectRoot = proces
|
|
|
6343
6627
|
}
|
|
6344
6628
|
|
|
6345
6629
|
//#endregion
|
|
6346
|
-
export { BMAD_BASELINE_TOKENS_FULL, DEFAULT_STALL_THRESHOLD_SECONDS, DoltMergeConflict, FileStateStore, FindingsInjector, RunManifest, RuntimeProbeListSchema, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN$1 as STORY_KEY_PATTERN, SUBSTRATE_OWNED_SETTINGS_KEYS, SupervisorLock, VALID_PHASES, WorkGraphRepository, ZERO_FINDING_COUNTS, __commonJS, __require, __toESM, applyConfigToGraph, buildPipelineStatusOutput, createDatabaseAdapter$1 as createDatabaseAdapter, createDefaultVerificationPipeline, createGraphOrchestrator, createSdlcCodeReviewHandler, createSdlcCreateStoryHandler, createSdlcDevStoryHandler, createSdlcPhaseHandler, createStateStore, detectCycles, detectsEventDrivenAC, extractTargetFilesFromStoryContent, findPackageRoot, formatOutput, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, inspectProcessTree, isOrchestratorProcessLine, parseDbTimestampAsUtc, parseRuntimeProbes, registerHealthCommand, renderFindings, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveGraphPath, resolveMainRepoRoot, resolveRunManifest, rollupFindingCounts, runHealthAction, validateStoryKey };
|
|
6347
|
-
//# sourceMappingURL=health-
|
|
6630
|
+
export { BMAD_BASELINE_TOKENS_FULL, DEFAULT_STALL_THRESHOLD_SECONDS, DoltMergeConflict, FileStateStore, FindingsInjector, RunManifest, RuntimeProbeListSchema, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN$1 as STORY_KEY_PATTERN, SUBSTRATE_OWNED_SETTINGS_KEYS, SupervisorLock, VALID_PHASES, WorkGraphRepository, ZERO_FINDINGS_BY_AUTHOR, ZERO_FINDING_COUNTS, ZERO_PROBE_AUTHOR_METRICS, __commonJS, __require, __toESM, aggregateProbeAuthorMetrics, applyConfigToGraph, buildPipelineStatusOutput, createDatabaseAdapter$1 as createDatabaseAdapter, createDefaultVerificationPipeline, createGraphOrchestrator, createSdlcCodeReviewHandler, createSdlcCreateStoryHandler, createSdlcDevStoryHandler, createSdlcPhaseHandler, createStateStore, detectCycles, detectsEventDrivenAC, extractTargetFilesFromStoryContent, findPackageRoot, formatOutput, formatPipelineStatusHuman, formatPipelineSummary, formatTokenTelemetry, getAllDescendantPids, getAutoHealthData, getSubstrateDefaultSettings, inspectProcessTree, isOrchestratorProcessLine, parseDbTimestampAsUtc, parseRuntimeProbes, registerHealthCommand, renderFindings, resolveBmadMethodSrcPath, resolveBmadMethodVersion, resolveGraphPath, resolveMainRepoRoot, resolveRunManifest, rollupFindingCounts, rollupFindingsByAuthor, rollupProbeAuthorMetrics, runHealthAction, validateStoryKey };
|
|
6631
|
+
//# sourceMappingURL=health-BmEu3n9Z.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DEFAULT_STALL_THRESHOLD_SECONDS, getAllDescendantPids, getAutoHealthData, inspectProcessTree, isOrchestratorProcessLine, registerHealthCommand, runHealthAction } from "./health-
|
|
1
|
+
import { DEFAULT_STALL_THRESHOLD_SECONDS, getAllDescendantPids, getAutoHealthData, inspectProcessTree, isOrchestratorProcessLine, registerHealthCommand, runHealthAction } from "./health-BmEu3n9Z.js";
|
|
2
2
|
import "./logger-KeHncl-f.js";
|
|
3
3
|
import "./dist-VcMmfo2w.js";
|
|
4
4
|
import "./decisions-C0pz9Clx.js";
|
package/dist/index.d.ts
CHANGED
|
@@ -2117,6 +2117,58 @@ interface OrchestratorEvents {
|
|
|
2117
2117
|
mode: 'enabled' | 'disabled';
|
|
2118
2118
|
source: 'cli' | 'env' | 'default';
|
|
2119
2119
|
};
|
|
2120
|
+
/**
|
|
2121
|
+
* Story 60-15: probe-author dispatch lifecycle events. Each event fires
|
|
2122
|
+
* exactly once per probe-author phase invocation (or zero times when the
|
|
2123
|
+
* phase didn't run). Powers the per-story telemetry breakdown surfaced
|
|
2124
|
+
* via `substrate status`/`metrics` and the cross-run probe-author
|
|
2125
|
+
* catch-rate KPI.
|
|
2126
|
+
*/
|
|
2127
|
+
/** Probe-author dispatch completed for a story (success or skip-after-
|
|
2128
|
+
* re-read). Existing event emitted by 60-13's runProbeAuthor; 60-15
|
|
2129
|
+
* formalizes the schema as part of the lifecycle event family. */
|
|
2130
|
+
'probe-author:dispatched': {
|
|
2131
|
+
storyKey: string;
|
|
2132
|
+
runId: string;
|
|
2133
|
+
probesAuthoredCount: number;
|
|
2134
|
+
dispatchDurationMs: number;
|
|
2135
|
+
costUsd: number;
|
|
2136
|
+
};
|
|
2137
|
+
/** Probe-author agent's YAML output successfully parsed. Counts probes
|
|
2138
|
+
* authored before any append/idempotency check. */
|
|
2139
|
+
'probe-author:output-parsed': {
|
|
2140
|
+
storyKey: string;
|
|
2141
|
+
runId: string;
|
|
2142
|
+
probesParsedCount: number;
|
|
2143
|
+
};
|
|
2144
|
+
/** Probe-author probes appended to the story artifact. The terminal
|
|
2145
|
+
* success event for the phase. `probesAuthoredCount` is the count of
|
|
2146
|
+
* NEW probes added (excluding any that were already present). */
|
|
2147
|
+
'probe-author:appended-to-artifact': {
|
|
2148
|
+
storyKey: string;
|
|
2149
|
+
runId: string;
|
|
2150
|
+
probesAuthoredCount: number;
|
|
2151
|
+
storyFilePath: string;
|
|
2152
|
+
};
|
|
2153
|
+
/** Probe-author phase skipped (gate fired). Distinguishes the two skip
|
|
2154
|
+
* reasons so operators can debug the gating decision. */
|
|
2155
|
+
'probe-author:skipped': {
|
|
2156
|
+
storyKey: string;
|
|
2157
|
+
runId: string;
|
|
2158
|
+
reason: 'non-event-driven' | 'author-declared-probes-present';
|
|
2159
|
+
};
|
|
2160
|
+
/** A probe carrying `_authoredBy: 'probe-author'` failed at runtime. Emitted
|
|
2161
|
+
* by runtime-probe-check when it produces a probe-failure finding whose
|
|
2162
|
+
* source probe was authored by probe-author. NOTE: probe-failure is NOT
|
|
2163
|
+
* the same as defect-caught — operators tag confirmed-defect via
|
|
2164
|
+
* `substrate annotate`. This event surfaces failures regardless of
|
|
2165
|
+
* annotation status; the rollup helper distinguishes them downstream. */
|
|
2166
|
+
'probe-author:authored-probe-failed': {
|
|
2167
|
+
storyKey: string;
|
|
2168
|
+
runId: string;
|
|
2169
|
+
probeName: string;
|
|
2170
|
+
findingCategory: string;
|
|
2171
|
+
};
|
|
2120
2172
|
/**
|
|
2121
2173
|
* Story 62-3: code-review agent emitted YAML output that failed schema
|
|
2122
2174
|
* validation (typically a parse error from unquoted-colon-in-value or
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import "./health-
|
|
1
|
+
import "./health-BmEu3n9Z.js";
|
|
2
2
|
import "./logger-KeHncl-f.js";
|
|
3
3
|
import "./helpers-CElYrONe.js";
|
|
4
4
|
import "./dist-VcMmfo2w.js";
|
|
5
|
-
import { normalizeGraphSummaryToStatus, registerRunCommand, resolveMaxReviewCycles, runRunAction, wireNdjsonEmitter } from "./run-
|
|
5
|
+
import { normalizeGraphSummaryToStatus, registerRunCommand, resolveMaxReviewCycles, runRunAction, wireNdjsonEmitter } from "./run-DnwsCYYA.js";
|
|
6
6
|
import "./routing-CcBOCuC9.js";
|
|
7
7
|
import "./decisions-C0pz9Clx.js";
|
|
8
8
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { BMAD_BASELINE_TOKENS_FULL, DoltMergeConflict, FileStateStore, FindingsInjector, RunManifest, RuntimeProbeListSchema, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN, VALID_PHASES, WorkGraphRepository, __commonJS, __require, __toESM, applyConfigToGraph, buildPipelineStatusOutput, createDatabaseAdapter, createDefaultVerificationPipeline, createGraphOrchestrator, createSdlcCodeReviewHandler, createSdlcCreateStoryHandler, createSdlcDevStoryHandler, createSdlcPhaseHandler, detectCycles, detectsEventDrivenAC, extractTargetFilesFromStoryContent, formatOutput, formatPipelineSummary, formatTokenTelemetry, inspectProcessTree, parseDbTimestampAsUtc, renderFindings, resolveGraphPath, resolveMainRepoRoot, validateStoryKey } from "./health-
|
|
1
|
+
import { BMAD_BASELINE_TOKENS_FULL, DoltMergeConflict, FileStateStore, FindingsInjector, RunManifest, RuntimeProbeListSchema, STOP_AFTER_VALID_PHASES, STORY_KEY_PATTERN, VALID_PHASES, WorkGraphRepository, __commonJS, __require, __toESM, applyConfigToGraph, buildPipelineStatusOutput, createDatabaseAdapter, createDefaultVerificationPipeline, createGraphOrchestrator, createSdlcCodeReviewHandler, createSdlcCreateStoryHandler, createSdlcDevStoryHandler, createSdlcPhaseHandler, detectCycles, detectsEventDrivenAC, extractTargetFilesFromStoryContent, formatOutput, formatPipelineSummary, formatTokenTelemetry, inspectProcessTree, parseDbTimestampAsUtc, renderFindings, resolveGraphPath, resolveMainRepoRoot, validateStoryKey } from "./health-BmEu3n9Z.js";
|
|
2
2
|
import { createLogger } from "./logger-KeHncl-f.js";
|
|
3
3
|
import { TypedEventBusImpl, createEventBus, createTuiApp, isTuiCapable, printNonTtyWarning, sleep } from "./helpers-CElYrONe.js";
|
|
4
4
|
import { ADVISORY_NOTES, Categorizer, ConsumerAnalyzer, DEFAULT_GLOBAL_SETTINGS, DispatcherImpl, DoltClient, ESCALATION_DIAGNOSIS, EXPERIMENT_RESULT, EfficiencyScorer, IngestionServer, LogTurnAnalyzer, OPERATIONAL_FINDING, Recommender, RoutingRecommender, RoutingResolver, RoutingTelemetry, RoutingTokenAccumulator, RoutingTuner, STORY_METRICS, STORY_OUTCOME, SubstrateConfigSchema, TEST_EXPANSION_FINDING, TEST_PLAN, TelemetryNormalizer, TelemetryPipeline, TurnAnalyzer, addTokenUsage, aggregateTokenUsageForRun, aggregateTokenUsageForStory, callLLM, createConfigSystem, createDatabaseAdapter$1, createDecision, createPipelineRun, createRequirement, detectInterfaceChanges, getArtifactByTypeForRun, getArtifactsByRun, getDecisionsByCategory, getDecisionsByPhase, getDecisionsByPhaseForRun, getLatestRun, getPipelineRunById, getRunMetrics, getRunningPipelineRuns, getStoryMetricsForRun, getTokenUsageSummary, initSchema, listRequirements, loadModelRoutingConfig, registerArtifact, updatePipelineRun, updatePipelineRunConfig, upsertDecision, writeRunMetrics, writeStoryMetrics } from "./dist-VcMmfo2w.js";
|
|
@@ -8353,6 +8353,11 @@ async function runProbeAuthor(deps, params) {
|
|
|
8353
8353
|
};
|
|
8354
8354
|
if (bypassGates !== true && !detectsEventDrivenAC(epicContent)) {
|
|
8355
8355
|
logger$14.debug({ storyKey }, "probe-author: source AC not event-driven — skipping");
|
|
8356
|
+
emitEvent?.("probe-author:skipped", {
|
|
8357
|
+
storyKey,
|
|
8358
|
+
runId: pipelineRunId,
|
|
8359
|
+
reason: "non-event-driven"
|
|
8360
|
+
});
|
|
8356
8361
|
return makeSkippedResult(tokenUsage, start);
|
|
8357
8362
|
}
|
|
8358
8363
|
let storyContent;
|
|
@@ -8360,6 +8365,11 @@ async function runProbeAuthor(deps, params) {
|
|
|
8360
8365
|
storyContent = await readFile$1(storyFilePath, "utf-8");
|
|
8361
8366
|
if (bypassGates !== true && /^## Runtime Probes/m.test(storyContent)) {
|
|
8362
8367
|
logger$14.info({ storyKey }, "probe-author: story artifact already has ## Runtime Probes — skipping");
|
|
8368
|
+
emitEvent?.("probe-author:skipped", {
|
|
8369
|
+
storyKey,
|
|
8370
|
+
runId: pipelineRunId,
|
|
8371
|
+
reason: "author-declared-probes-present"
|
|
8372
|
+
});
|
|
8363
8373
|
return makeSkippedResult(tokenUsage, start);
|
|
8364
8374
|
}
|
|
8365
8375
|
} catch (err) {
|
|
@@ -8516,7 +8526,15 @@ async function runProbeAuthor(deps, params) {
|
|
|
8516
8526
|
}, "probe-author: probes failed RuntimeProbeListSchema — falling through");
|
|
8517
8527
|
return makeFailedResult(`schema_validation_failed: ${validationError}`, tokenUsage, start);
|
|
8518
8528
|
}
|
|
8519
|
-
const probes = probeValidation.data
|
|
8529
|
+
const probes = probeValidation.data.map((p) => ({
|
|
8530
|
+
...p,
|
|
8531
|
+
_authoredBy: "probe-author"
|
|
8532
|
+
}));
|
|
8533
|
+
emitEvent?.("probe-author:output-parsed", {
|
|
8534
|
+
storyKey,
|
|
8535
|
+
runId: pipelineRunId,
|
|
8536
|
+
probesParsedCount: probes.length
|
|
8537
|
+
});
|
|
8520
8538
|
if (probes.length === 0) {
|
|
8521
8539
|
logger$14.info({ storyKey }, "probe-author: authored empty probes list — no probes needed");
|
|
8522
8540
|
emitEvent?.("probe-author:no-probes-authored", {
|
|
@@ -8555,6 +8573,12 @@ async function runProbeAuthor(deps, params) {
|
|
|
8555
8573
|
storyKey,
|
|
8556
8574
|
probesCount: probes.length
|
|
8557
8575
|
}, "probe-author: appended ## Runtime Probes section");
|
|
8576
|
+
emitEvent?.("probe-author:appended-to-artifact", {
|
|
8577
|
+
storyKey,
|
|
8578
|
+
runId: pipelineRunId,
|
|
8579
|
+
probesAuthoredCount: probes.length,
|
|
8580
|
+
storyFilePath
|
|
8581
|
+
});
|
|
8558
8582
|
} catch (err) {
|
|
8559
8583
|
const error = err instanceof Error ? err.message : String(err);
|
|
8560
8584
|
logger$14.warn({
|
|
@@ -45151,4 +45175,4 @@ function registerRunCommand(program, _version = "0.0.0", projectRoot = process.c
|
|
|
45151
45175
|
|
|
45152
45176
|
//#endregion
|
|
45153
45177
|
export { AdapterTelemetryPersistence, AppError, DoltRepoMapMetaRepository, DoltSymbolRepository, ERR_REPO_MAP_STORAGE_WRITE, EpicIngester, GitClient, GrammarLoader, RepoMapInjector, RepoMapModule, RepoMapQueryEngine, RepoMapStorage, SymbolParser, createContextCompiler, createDispatcher, createEventEmitter, createImplementationOrchestrator, createPackLoader, createPhaseOrchestrator, createStopAfterGate, createTelemetryAdvisor, formatPhaseCompletionSummary, getFactoryRunSummaries, getScenarioResultsForRun, getTwinRunsForRun, listGraphRuns, normalizeGraphSummaryToStatus, registerExportCommand, registerFactoryCommand, registerRunCommand, registerScenariosCommand, resolveMaxReviewCycles, resolveStoryKeys, runAnalysisPhase, runPlanningPhase, runProbeAuthor, runRunAction, runSolutioningPhase, validateStopAfterFromConflict, wireNdjsonEmitter };
|
|
45154
|
-
//# sourceMappingURL=run-
|
|
45178
|
+
//# sourceMappingURL=run-DnwsCYYA.js.map
|
package/package.json
CHANGED
|
@@ -206,6 +206,13 @@ Strata Run 13 (Story 1-12, post-merge git hook) shipped SHIP_IT after the dev's
|
|
|
206
206
|
|
|
207
207
|
Note this example, taken to production, would have caught the strata 1-12 bug at runtime-probe phase rather than only at e2e smoke pass. That's the standard 60-10 sets.
|
|
208
208
|
|
|
209
|
+
**This is now a blocking gate (Story 60-16, v0.20.41+).** Probes for stories whose AC describes an event-driven mechanism (hook, timer, signal, webhook) MUST invoke the production trigger via a known command pattern (`git merge|pull|push`, `systemctl start <unit>`, `crontab`, `kill -<signal>`, `curl -X POST`, etc.). When the gate fires (`runtime-probe-missing-production-trigger` finding category, error severity), verification status becomes `fail` and the story cannot SHIP_IT until probes invoke the trigger. Two ways to satisfy:
|
|
210
|
+
|
|
211
|
+
1. **Author trigger-invoking probes directly** in this `## Runtime Probes` section — exercise the production trigger as the example above does.
|
|
212
|
+
2. **Let probe-author derive probes** from the AC (Epic 60 Phase 2). When probe-author dispatches for an event-driven AC, it authors AC-grounded probes that exercise production triggers by design. The eval-validated catch rate on the v1 corpus was 4/4 (100%) under v0.20.39+. The probes carry `_authoredBy: 'probe-author'` metadata for telemetry attribution.
|
|
213
|
+
|
|
214
|
+
Pre-Sprint-22 (warn-severity advisory) the gate produced false negatives at SHIP_IT time. Post-flip, the gate is the load-bearing line of defense for the trigger-invocation property.
|
|
215
|
+
|
|
209
216
|
### Examples by artifact class
|
|
210
217
|
|
|
211
218
|
**Systemd unit:**
|