mustflow 1.31.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +23 -9
  2. package/dist/cli/commands/classify.js +61 -6
  3. package/dist/cli/commands/contract-lint.js +13 -4
  4. package/dist/cli/commands/dashboard.js +77 -2
  5. package/dist/cli/commands/explain-verify.js +11 -1
  6. package/dist/cli/commands/index.js +14 -0
  7. package/dist/cli/commands/run.js +4 -1
  8. package/dist/cli/commands/verify.js +986 -43
  9. package/dist/cli/i18n/en.js +61 -10
  10. package/dist/cli/i18n/es.js +61 -10
  11. package/dist/cli/i18n/fr.js +61 -10
  12. package/dist/cli/i18n/hi.js +61 -10
  13. package/dist/cli/i18n/ko.js +61 -10
  14. package/dist/cli/i18n/zh.js +61 -10
  15. package/dist/cli/lib/dashboard-export.js +62 -12
  16. package/dist/cli/lib/dashboard-html/client-script.js +1936 -0
  17. package/dist/cli/lib/dashboard-html/locale-bootstrap.js +8 -0
  18. package/dist/cli/lib/dashboard-html/styles.js +572 -0
  19. package/dist/cli/lib/dashboard-html/template.js +134 -0
  20. package/dist/cli/lib/dashboard-html/types.js +1 -0
  21. package/dist/cli/lib/dashboard-html.js +1 -1907
  22. package/dist/cli/lib/dashboard-locale.js +37 -0
  23. package/dist/cli/lib/local-index/constants.js +48 -0
  24. package/dist/cli/lib/local-index/index.js +2951 -0
  25. package/dist/cli/lib/local-index/sql.js +15 -0
  26. package/dist/cli/lib/local-index/types.js +1 -0
  27. package/dist/cli/lib/local-index.js +1 -1911
  28. package/dist/cli/lib/run-plan.js +76 -1
  29. package/dist/cli/lib/templates.js +18 -1
  30. package/dist/cli/lib/validation/command-intents.js +11 -0
  31. package/dist/cli/lib/validation/constants.js +238 -0
  32. package/dist/cli/lib/validation/index.js +1384 -0
  33. package/dist/cli/lib/validation/primitives.js +198 -0
  34. package/dist/cli/lib/validation/test-selection.js +95 -0
  35. package/dist/cli/lib/validation/types.js +1 -0
  36. package/dist/cli/lib/validation.js +1 -1770
  37. package/dist/core/check-issues.js +6 -0
  38. package/dist/core/completion-verdict.js +341 -0
  39. package/dist/core/contract-lint.js +221 -6
  40. package/dist/core/external-evidence.js +9 -0
  41. package/dist/core/public-json-contracts.js +21 -0
  42. package/dist/core/repeated-failure.js +179 -0
  43. package/dist/core/repro-evidence.js +134 -0
  44. package/dist/core/scope-risk.js +64 -0
  45. package/dist/core/skill-route-alignment.js +20 -0
  46. package/dist/core/source-anchor-status.js +4 -1
  47. package/dist/core/test-selection.js +3 -0
  48. package/dist/core/validation-ratchet.js +196 -0
  49. package/dist/core/verification-evidence.js +249 -0
  50. package/examples/README.md +12 -4
  51. package/package.json +3 -3
  52. package/schemas/README.md +13 -3
  53. package/schemas/change-verification-report.schema.json +16 -2
  54. package/schemas/commands.schema.json +4 -0
  55. package/schemas/contract-lint-report.schema.json +29 -0
  56. package/schemas/dashboard-export.schema.json +310 -0
  57. package/schemas/explain-report.schema.json +173 -1
  58. package/schemas/latest-run-pointer.schema.json +601 -0
  59. package/schemas/run-receipt.schema.json +4 -0
  60. package/schemas/test-selection.schema.json +81 -0
  61. package/schemas/verify-report.schema.json +578 -1
  62. package/schemas/verify-run-manifest.schema.json +627 -0
  63. package/templates/default/i18n.toml +1 -1
  64. package/templates/default/locales/en/.mustflow/skills/INDEX.md +124 -29
  65. package/templates/default/locales/en/.mustflow/skills/routes.toml +289 -0
  66. package/templates/default/manifest.toml +29 -2
@@ -0,0 +1,179 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
3
+ import path from 'node:path';
4
+ export const REPEATED_FAILURE_STATE_PATH = '.mustflow/state/repeated-failures.json';
5
+ export const REPEATED_FAILURE_STATE_LIMIT = 50;
6
+ const UNRESOLVED_VERIFY_STATUSES = new Set(['failed', 'blocked', 'partial']);
7
+ function sha256Json(value) {
8
+ return `sha256:${createHash('sha256').update(JSON.stringify(value)).digest('hex')}`;
9
+ }
10
+ function normalizeStrings(values) {
11
+ return [...new Set(values.map((value) => value.trim()).filter((value) => value.length > 0))].sort((left, right) => left.localeCompare(right));
12
+ }
13
+ function hashStrings(values) {
14
+ return sha256Json(normalizeStrings(values));
15
+ }
16
+ function hashBooleans(values) {
17
+ return sha256Json([...new Set(values)].sort((left, right) => Number(left) - Number(right)));
18
+ }
19
+ function isString(value) {
20
+ return typeof value === 'string' && value.length > 0;
21
+ }
22
+ function isRepeatedFailureSummary(value) {
23
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
24
+ return false;
25
+ }
26
+ const record = value;
27
+ return (record.schema_version === '1' &&
28
+ isString(record.fingerprint) &&
29
+ isString(record.verification_plan_id) &&
30
+ isString(record.status) &&
31
+ isString(record.failed_intents_hash) &&
32
+ isString(record.risk_codes_hash) &&
33
+ isString(record.affected_surfaces_hash) &&
34
+ isString(record.first_seen_at) &&
35
+ isString(record.last_seen_at) &&
36
+ typeof record.seen_count === 'number' &&
37
+ Number.isInteger(record.seen_count) &&
38
+ record.seen_count > 0 &&
39
+ typeof record.requires_new_evidence === 'boolean');
40
+ }
41
+ function repeatedFailureStatePath(projectRoot) {
42
+ return path.join(projectRoot, ...REPEATED_FAILURE_STATE_PATH.split('/'));
43
+ }
44
+ function readRepeatedFailureState(projectRoot) {
45
+ const statePath = repeatedFailureStatePath(projectRoot);
46
+ if (!existsSync(statePath)) {
47
+ return { schema_version: '1', fingerprints: [] };
48
+ }
49
+ try {
50
+ const parsed = JSON.parse(readFileSync(statePath, 'utf8'));
51
+ const fingerprints = Array.isArray(parsed.fingerprints)
52
+ ? parsed.fingerprints.filter(isRepeatedFailureSummary)
53
+ : [];
54
+ return { schema_version: '1', fingerprints };
55
+ }
56
+ catch {
57
+ return { schema_version: '1', fingerprints: [] };
58
+ }
59
+ }
60
+ function writeRepeatedFailureState(projectRoot, state) {
61
+ const statePath = repeatedFailureStatePath(projectRoot);
62
+ mkdirSync(path.dirname(statePath), { recursive: true });
63
+ writeFileSync(statePath, `${JSON.stringify(state, null, 2)}\n`, 'utf8');
64
+ }
65
+ export function createVerificationFailureFingerprint(input) {
66
+ const failedIntents = normalizeStrings(input.failedIntents);
67
+ const riskCodes = normalizeStrings(input.riskCodes);
68
+ if (failedIntents.length === 0 && riskCodes.length === 0) {
69
+ return null;
70
+ }
71
+ const exitCodeClasses = normalizeStrings(input.exitCodeClasses);
72
+ const timeoutFlags = [...new Set(input.timeoutFlags)].sort((left, right) => Number(left) - Number(right));
73
+ const errorKinds = normalizeStrings(input.errorKinds);
74
+ const affectedSurfaces = normalizeStrings(input.affectedSurfaces);
75
+ const commandFingerprints = normalizeStrings(input.commandFingerprints);
76
+ const diagnosticSignals = {
77
+ exit_code_classes: exitCodeClasses,
78
+ timeout_flags: timeoutFlags,
79
+ error_kinds: errorKinds,
80
+ };
81
+ const fingerprintSource = {
82
+ schema_version: '1',
83
+ verification_plan_id: input.verificationPlanId,
84
+ failed_intents: failedIntents,
85
+ diagnostic_signals: diagnosticSignals,
86
+ risk_codes: riskCodes,
87
+ affected_surfaces: affectedSurfaces,
88
+ command_fingerprints: commandFingerprints,
89
+ };
90
+ return {
91
+ schema_version: '1',
92
+ fingerprint: sha256Json(fingerprintSource),
93
+ verification_plan_id: input.verificationPlanId,
94
+ failed_intents_hash: hashStrings(failedIntents),
95
+ exit_code_classes_hash: hashStrings(exitCodeClasses),
96
+ timeout_flags_hash: hashBooleans(timeoutFlags),
97
+ error_kinds_hash: hashStrings(errorKinds),
98
+ diagnostic_hash: sha256Json(diagnosticSignals),
99
+ risk_codes_hash: hashStrings(riskCodes),
100
+ affected_surfaces_hash: hashStrings(affectedSurfaces),
101
+ command_fingerprints_hash: hashStrings(commandFingerprints),
102
+ };
103
+ }
104
+ export function updateRepeatedFailureState(input) {
105
+ const failureFingerprint = input.failureFingerprint;
106
+ if (!failureFingerprint) {
107
+ return null;
108
+ }
109
+ const state = readRepeatedFailureState(input.projectRoot);
110
+ const observedAt = (input.observedAt ?? new Date()).toISOString();
111
+ const existing = state.fingerprints.find((entry) => entry.fingerprint === failureFingerprint.fingerprint);
112
+ const seenCount = (existing?.seen_count ?? 0) + 1;
113
+ const summary = {
114
+ schema_version: '1',
115
+ fingerprint: failureFingerprint.fingerprint,
116
+ verification_plan_id: failureFingerprint.verification_plan_id,
117
+ status: input.status,
118
+ failed_intents_hash: failureFingerprint.failed_intents_hash,
119
+ risk_codes_hash: failureFingerprint.risk_codes_hash,
120
+ affected_surfaces_hash: failureFingerprint.affected_surfaces_hash,
121
+ first_seen_at: existing?.first_seen_at ?? observedAt,
122
+ last_seen_at: observedAt,
123
+ seen_count: seenCount,
124
+ requires_new_evidence: UNRESOLVED_VERIFY_STATUSES.has(input.status) && seenCount >= 2,
125
+ };
126
+ const nextFingerprints = [summary, ...state.fingerprints.filter((entry) => entry.fingerprint !== summary.fingerprint)]
127
+ .sort((left, right) => right.last_seen_at.localeCompare(left.last_seen_at))
128
+ .slice(0, REPEATED_FAILURE_STATE_LIMIT);
129
+ writeRepeatedFailureState(input.projectRoot, {
130
+ schema_version: '1',
131
+ fingerprints: nextFingerprints,
132
+ });
133
+ return summary;
134
+ }
135
+ function createRepeatedFailureRisk(code, currentFingerprint, previousStatus) {
136
+ const detail = code === 'repeated_verification_failure'
137
+ ? 'The previous verify summary has the same failure fingerprint and an unresolved status; provide new evidence or a narrower hypothesis before marking the task complete.'
138
+ : code === 'no_new_evidence_since_previous_failure'
139
+ ? 'The previous verify summary has the same plan, failed-intent hash, and affected-surface hash; provide new source or reproduction evidence before treating the next completion claim as verifiable.'
140
+ : 'The same unresolved failure fingerprint has repeated three or more times; new evidence is required before another completion claim can be treated as verifiable.';
141
+ return {
142
+ code,
143
+ severity: 'high',
144
+ verdict_effect: code === 'repeated_verification_failure' ? 'contradiction' : 'blocker',
145
+ previous_status: previousStatus,
146
+ verification_plan_id: currentFingerprint.verification_plan_id,
147
+ failure_fingerprint: currentFingerprint.fingerprint,
148
+ failed_intents_hash: currentFingerprint.failed_intents_hash,
149
+ risk_codes_hash: currentFingerprint.risk_codes_hash,
150
+ affected_surfaces_hash: currentFingerprint.affected_surfaces_hash,
151
+ detail,
152
+ };
153
+ }
154
+ export function createRepeatedFailureRisks(input) {
155
+ const currentFingerprint = input.currentFailureFingerprint;
156
+ if (input.previousFailureFingerprint === null ||
157
+ input.previousStatus === null ||
158
+ currentFingerprint === null ||
159
+ !UNRESOLVED_VERIFY_STATUSES.has(input.previousStatus) ||
160
+ !UNRESOLVED_VERIFY_STATUSES.has(input.currentStatus)) {
161
+ return [];
162
+ }
163
+ const risks = [];
164
+ const previousFingerprint = input.previousFailureFingerprint;
165
+ const sameFingerprint = previousFingerprint.fingerprint === currentFingerprint.fingerprint;
166
+ const samePlanAndNoNewSourceEvidence = previousFingerprint.verification_plan_id === currentFingerprint.verification_plan_id &&
167
+ previousFingerprint.failed_intents_hash === currentFingerprint.failed_intents_hash &&
168
+ previousFingerprint.affected_surfaces_hash === currentFingerprint.affected_surfaces_hash;
169
+ if (sameFingerprint) {
170
+ risks.push(createRepeatedFailureRisk('repeated_verification_failure', currentFingerprint, input.previousStatus));
171
+ }
172
+ if (samePlanAndNoNewSourceEvidence && !sameFingerprint) {
173
+ risks.push(createRepeatedFailureRisk('no_new_evidence_since_previous_failure', currentFingerprint, input.previousStatus));
174
+ }
175
+ if ((input.currentSummary?.seen_count ?? 0) >= 3 && input.currentSummary?.requires_new_evidence === true) {
176
+ risks.push(createRepeatedFailureRisk('repeated_failure_requires_new_evidence', currentFingerprint, input.previousStatus));
177
+ }
178
+ return risks;
179
+ }
@@ -0,0 +1,134 @@
1
+ const TEXT_FIELD_LABELS = {
2
+ reported_symptom: 'reported symptom',
3
+ expected_behavior: 'expected behavior',
4
+ observed_behavior: 'observed behavior',
5
+ };
6
+ function pushRisk(risks, detail, verdictEffect = 'partial') {
7
+ risks.push({
8
+ code: 'repro_evidence_missing',
9
+ severity: verdictEffect === 'contradicted' ? 'critical' : 'high',
10
+ detail,
11
+ verdict_effect: verdictEffect,
12
+ });
13
+ }
14
+ function collectReceiptBindingRisks(phaseLabel, evidence, options, risks) {
15
+ if (!evidence.receipt_path || !evidence.receipt_sha256 || !evidence.verification_plan_id) {
16
+ pushRisk(risks, `Bug-fix repro evidence ${phaseLabel} observation is not bound to receipt_path, receipt_sha256, and verification_plan_id.`);
17
+ return;
18
+ }
19
+ if (options.verificationPlanId && evidence.verification_plan_id !== options.verificationPlanId) {
20
+ pushRisk(risks, `Bug-fix repro evidence ${phaseLabel} receipt is stale for the current verification plan.`);
21
+ }
22
+ }
23
+ function collectBeforeFixRisks(report, options, risks) {
24
+ if (report.before_fix.status === 'missing') {
25
+ pushRisk(risks, 'Bug-fix repro evidence is missing before-fix reproduction; reproduce the original failure or mark it unavailable before claiming verification.');
26
+ return;
27
+ }
28
+ if (report.before_fix.status === 'unavailable') {
29
+ pushRisk(risks, report.before_fix.reason
30
+ ? 'Bug-fix repro evidence marks before-fix reproduction unavailable; the result cannot be verified without the original failure being observed.'
31
+ : 'Bug-fix repro evidence marks before-fix reproduction unavailable without explaining why.');
32
+ return;
33
+ }
34
+ if (!report.before_fix.summary) {
35
+ pushRisk(risks, 'Bug-fix repro evidence reproduced the before-fix failure but does not summarize the evidence.');
36
+ }
37
+ if (report.before_fix.outcome !== 'failed_as_expected') {
38
+ pushRisk(risks, 'Bug-fix repro evidence reproduced the before-fix path without outcome failed_as_expected.');
39
+ }
40
+ collectReceiptBindingRisks('before-fix', report.before_fix, options, risks);
41
+ }
42
+ function collectRouteIdentityRisks(report, risks) {
43
+ if (!report.reproduction_route.route_id) {
44
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_id.', 'unverified');
45
+ }
46
+ if (!report.reproduction_route.route_kind) {
47
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_kind.');
48
+ }
49
+ if (!report.reproduction_route.route_digest) {
50
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_digest.', 'unverified');
51
+ }
52
+ if (!report.reproduction_route.failure_oracle_hash) {
53
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.failure_oracle_hash.');
54
+ }
55
+ if (report.reproduction_route.steps.length === 0) {
56
+ pushRisk(risks, 'Bug-fix repro evidence is missing bounded reproduction route steps.', 'unverified');
57
+ }
58
+ }
59
+ function collectAfterFixRisks(report, options, risks) {
60
+ if (report.after_fix.status === 'missing') {
61
+ pushRisk(risks, 'Bug-fix repro evidence is missing after-fix same-route evidence; rerun the original route after the fix before claiming verification.', 'unverified');
62
+ return;
63
+ }
64
+ if (report.after_fix.status === 'unavailable') {
65
+ pushRisk(risks, report.after_fix.reason
66
+ ? 'Bug-fix repro evidence marks after-fix same-route evidence unavailable; the result cannot be verified without a post-fix pass.'
67
+ : 'Bug-fix repro evidence marks after-fix same-route evidence unavailable without explaining why.', 'unverified');
68
+ return;
69
+ }
70
+ if (report.after_fix.status === 'failed') {
71
+ pushRisk(risks, 'Bug-fix repro evidence says the after-fix route still failed.', 'contradicted');
72
+ return;
73
+ }
74
+ if (!report.after_fix.summary) {
75
+ pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed but does not summarize the evidence.');
76
+ }
77
+ if (report.after_fix.outcome !== 'passed_expected_behavior') {
78
+ pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed without outcome passed_expected_behavior.', 'unverified');
79
+ }
80
+ if (!report.after_fix.same_route_as) {
81
+ pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed without same_route_as.', 'unverified');
82
+ }
83
+ if (report.reproduction_route.route_id &&
84
+ report.after_fix.same_route_as &&
85
+ report.after_fix.same_route_as !== report.reproduction_route.route_id) {
86
+ pushRisk(risks, 'Bug-fix repro evidence after_fix.same_route_as does not match reproduction_route.route_id.');
87
+ }
88
+ collectReceiptBindingRisks('after-fix', report.after_fix, options, risks);
89
+ }
90
+ function collectRegressionGuardRisks(report, options, risks) {
91
+ if (report.regression_guard.status === 'missing') {
92
+ pushRisk(risks, 'Bug-fix repro evidence is missing a regression guard; add or identify the guard before claiming verification.');
93
+ return;
94
+ }
95
+ if (report.regression_guard.status === 'unavailable') {
96
+ pushRisk(risks, report.regression_guard.reason
97
+ ? 'Bug-fix repro evidence marks the regression guard unavailable; the result cannot be verified without a guard or explicit limitation.'
98
+ : 'Bug-fix repro evidence marks the regression guard unavailable without explaining why.');
99
+ return;
100
+ }
101
+ if (report.regression_guard.status === 'failed') {
102
+ pushRisk(risks, 'Bug-fix repro evidence says the regression guard failed.', 'contradicted');
103
+ return;
104
+ }
105
+ if (!report.regression_guard.summary) {
106
+ pushRisk(risks, 'Bug-fix repro evidence marks the regression guard passed but does not summarize the evidence.');
107
+ }
108
+ if (!report.regression_guard.intent && !report.regression_guard.test_path) {
109
+ pushRisk(risks, 'Bug-fix repro evidence marks the regression guard passed without an intent or test path.');
110
+ }
111
+ collectReceiptBindingRisks('regression-guard', report.regression_guard, options, risks);
112
+ }
113
+ export function createReproEvidenceRisks(report, options = {}) {
114
+ if (!report) {
115
+ return [];
116
+ }
117
+ const risks = [];
118
+ for (const [field, label] of Object.entries(TEXT_FIELD_LABELS)) {
119
+ if (!report[field]) {
120
+ pushRisk(risks, `Bug-fix repro evidence is missing ${label}; do not mark the task verified from command receipts alone.`);
121
+ }
122
+ }
123
+ collectRouteIdentityRisks(report, risks);
124
+ collectBeforeFixRisks(report, options, risks);
125
+ collectAfterFixRisks(report, options, risks);
126
+ collectRegressionGuardRisks(report, options, risks);
127
+ return risks;
128
+ }
129
+ export function countReproEvidenceVerdictEffects(risks) {
130
+ return {
131
+ contradicted: risks.filter((risk) => risk.verdict_effect === 'contradicted').length,
132
+ unverified: risks.filter((risk) => risk.verdict_effect === 'unverified').length,
133
+ };
134
+ }
@@ -0,0 +1,64 @@
1
+ export const SCOPE_DIFF_BUDGET_DEFAULTS = {
2
+ maxChangedFiles: 8,
3
+ maxPublicSurfaces: 4,
4
+ maxChangeKindFamilies: 3,
5
+ maxPathsPerRisk: 8,
6
+ };
7
+ const CHANGE_KIND_FAMILY_BY_KIND = {
8
+ documentation: 'documentation',
9
+ example: 'documentation',
10
+ translation: 'documentation',
11
+ workflow: 'workflow',
12
+ host_instruction: 'workflow',
13
+ installed_template: 'template',
14
+ package_metadata: 'release',
15
+ schema: 'contract',
16
+ test: 'test',
17
+ test_fixture: 'test',
18
+ implementation: 'implementation',
19
+ unknown: 'unknown',
20
+ };
21
+ function uniqueSorted(values) {
22
+ return [...new Set(values)].sort((left, right) => left.localeCompare(right));
23
+ }
24
+ function changeKindFamily(kind) {
25
+ return CHANGE_KIND_FAMILY_BY_KIND[kind] ?? kind;
26
+ }
27
+ function firstPaths(paths) {
28
+ return paths.slice(0, SCOPE_DIFF_BUDGET_DEFAULTS.maxPathsPerRisk);
29
+ }
30
+ export function createScopeDiffRisks(report) {
31
+ const risks = [];
32
+ if (report.summary.fileCount > SCOPE_DIFF_BUDGET_DEFAULTS.maxChangedFiles) {
33
+ risks.push({
34
+ code: 'diff_budget_exceeded',
35
+ severity: 'high',
36
+ detail: `Changed file count ${report.summary.fileCount} exceeds the conservative completion budget of ${SCOPE_DIFF_BUDGET_DEFAULTS.maxChangedFiles}.`,
37
+ count: report.summary.fileCount,
38
+ limit: SCOPE_DIFF_BUDGET_DEFAULTS.maxChangedFiles,
39
+ paths: firstPaths(report.files),
40
+ });
41
+ }
42
+ if (report.summary.publicSurfaceCount > SCOPE_DIFF_BUDGET_DEFAULTS.maxPublicSurfaces) {
43
+ risks.push({
44
+ code: 'public_surface_budget_exceeded',
45
+ severity: 'high',
46
+ detail: `Public surface count ${report.summary.publicSurfaceCount} exceeds the conservative completion budget of ${SCOPE_DIFF_BUDGET_DEFAULTS.maxPublicSurfaces}.`,
47
+ count: report.summary.publicSurfaceCount,
48
+ limit: SCOPE_DIFF_BUDGET_DEFAULTS.maxPublicSurfaces,
49
+ paths: firstPaths(report.classifications.filter((classification) => classification.surface.isPublicSurface).map((classification) => classification.path)),
50
+ });
51
+ }
52
+ const changeKindFamilies = uniqueSorted(report.summary.changeKinds.map(changeKindFamily));
53
+ if (changeKindFamilies.length > SCOPE_DIFF_BUDGET_DEFAULTS.maxChangeKindFamilies) {
54
+ risks.push({
55
+ code: 'mixed_change_kind_budget_exceeded',
56
+ severity: 'medium',
57
+ detail: `Change kind family count ${changeKindFamilies.length} exceeds the conservative completion budget of ${SCOPE_DIFF_BUDGET_DEFAULTS.maxChangeKindFamilies}: ${changeKindFamilies.join(', ')}.`,
58
+ count: changeKindFamilies.length,
59
+ limit: SCOPE_DIFF_BUDGET_DEFAULTS.maxChangeKindFamilies,
60
+ paths: firstPaths(report.files),
61
+ });
62
+ }
63
+ return risks;
64
+ }
@@ -1,5 +1,6 @@
1
1
  const SKILL_ROUTE_SOURCE_FILES = [
2
2
  '.mustflow/skills/INDEX.md',
3
+ '.mustflow/skills/routes.toml',
3
4
  '.mustflow/skills/*/SKILL.md',
4
5
  '.mustflow/config/commands.toml',
5
6
  '.mustflow/docs/agent-workflow.md',
@@ -20,6 +21,18 @@ export const SKILL_INDEX_ROUTE_COLUMN_COUNT = 7;
20
21
  export const SKILL_INDEX_SKILL_PATH_COLUMN_INDEX = 1;
21
22
  export const SKILL_INDEX_VERIFICATION_INTENTS_COLUMN_INDEX = 5;
22
23
  export const SKILL_INDEX_ROUTE_COLUMNS = 'Trigger, Skill Document, Required Input, Edit Scope, Risk, Verification Intents, Expected Output';
24
+ export const SKILL_ROUTE_CATEGORY_LABELS = {
25
+ bug_failure: 'Bug and Failure',
26
+ general_code: 'General Code Change',
27
+ tests: 'Tests and Regression',
28
+ docs_release: 'Documentation and Release',
29
+ security_privacy: 'Security and Privacy',
30
+ data_external: 'Data and External Systems',
31
+ ui_assets: 'UI and Assets',
32
+ architecture_patterns: 'Architecture Patterns',
33
+ workflow_contracts: 'Workflow and Contract Maintenance',
34
+ };
35
+ const SKILL_ROUTE_CATEGORY_BY_HEADING = new Map(Object.entries(SKILL_ROUTE_CATEGORY_LABELS).map(([category, label]) => [label, category]));
23
36
  function splitMarkdownTableRow(line) {
24
37
  return line
25
38
  .trim()
@@ -39,7 +52,13 @@ export function findSkillIndexRoutePathColumn(cells) {
39
52
  }
40
53
  export function parseSkillIndexRoutes(content) {
41
54
  const routes = [];
55
+ let currentCategory;
42
56
  for (const line of content.split(/\r?\n/u)) {
57
+ const categoryHeading = /^###\s+(.+?)\s*$/u.exec(line.trim())?.[1];
58
+ if (categoryHeading) {
59
+ currentCategory = SKILL_ROUTE_CATEGORY_BY_HEADING.get(categoryHeading);
60
+ continue;
61
+ }
43
62
  if (!line.trim().startsWith('|')) {
44
63
  continue;
45
64
  }
@@ -63,6 +82,7 @@ export function parseSkillIndexRoutes(content) {
63
82
  risk: cells[4] ?? '',
64
83
  commandIntents: readBacktickValues(cells[SKILL_INDEX_VERIFICATION_INTENTS_COLUMN_INDEX] ?? ''),
65
84
  expectedOutput: cells[6] ?? '',
85
+ category: currentCategory,
66
86
  });
67
87
  }
68
88
  return routes;
@@ -19,6 +19,9 @@ const HIGH_RISK_SOURCE_ANCHOR_TAGS = new Set([
19
19
  'ssrf',
20
20
  'xss',
21
21
  ]);
22
+ export function hasHighRiskSourceAnchorRiskTags(risk) {
23
+ return risk.some((tag) => HIGH_RISK_SOURCE_ANCHOR_TAGS.has(tag));
24
+ }
22
25
  function sha256(value) {
23
26
  return `sha256:${createHash('sha256').update(value).digest('hex')}`;
24
27
  }
@@ -62,7 +65,7 @@ function currentAnchorSignals(risk) {
62
65
  };
63
66
  }
64
67
  function hasHighRisk(risk) {
65
- return risk.some((tag) => HIGH_RISK_SOURCE_ANCHOR_TAGS.has(tag));
68
+ return hasHighRiskSourceAnchorRiskTags(risk);
66
69
  }
67
70
  function sameSymbolIdentity(left, right) {
68
71
  return left.kind === right.kind && left.name !== null && left.name === right.name;
@@ -3,6 +3,7 @@ import { isRecord, readStringArray, resolveMustflowConfigPath, } from './config-
3
3
  import { readTomlFile } from './toml.js';
4
4
  import { classifyVerificationCandidate, } from './verification-plan.js';
5
5
  export const TEST_SELECTION_CONFIG_RELATIVE_PATH = '.mustflow/config/test-selection.toml';
6
+ const STALE_OR_MISSING_RULES_NOTE = 'Project-declared test selection rules did not cover the current changed files; review .mustflow/config/test-selection.toml for stale or missing rules.';
6
7
  function uniqueSorted(values) {
7
8
  return [...new Set(values)].sort((left, right) => left.localeCompare(right));
8
9
  }
@@ -203,7 +204,9 @@ export function createProjectTestSelectionPlan(projectRoot, classificationReport
203
204
  matches.length > 0
204
205
  ? 'Matched project-declared test selection rules are treated as a minimum selected set.'
205
206
  : 'No project-declared test selection rules matched the current changed files.',
207
+ ...(matches.length > 0 ? [] : [STALE_OR_MISSING_RULES_NOTE]),
206
208
  'Local index data and performance history may add suggestions later, but must not remove manifest-selected tests.',
209
+ 'Absence of historical failures is not evidence that a test can be omitted.',
207
210
  'Test targets are passed only when the selected command intent declares selection.accepts_test_targets = true.',
208
211
  ];
209
212
  return {
@@ -0,0 +1,196 @@
1
+ import { spawnSync } from 'node:child_process';
2
+ import { existsSync, readFileSync } from 'node:fs';
3
+ import path from 'node:path';
4
+ const TEST_CHANGE_KINDS = new Set(['test', 'test_fixture']);
5
+ const SKIP_OR_ONLY_MARKER = /\b(?:describe|it|test)\s*\.\s*(?:skip|only)\s*\(/u;
6
+ const TODO_OR_PENDING_MARKER = /\b(?:describe|it|test)\s*\.\s*(?:todo|pending)\s*\(/u;
7
+ const ASSERTION_CALL = /\b(?:assert(?:\.\w+)?|expect)\s*\(/u;
8
+ const STRONG_ASSERTION = /\b(?:assert\.(?:equal|deepEqual|strictEqual|throws|rejects)|to(?:Equal|StrictEqual|Be|Throw)|throws|rejects)\b/u;
9
+ const WEAK_ASSERTION = /\b(?:assert\.ok|toBeDefined|toBeTruthy|toBeFalsy)\b/u;
10
+ const NEGATIVE_ASSERTION = /\b(?:notEqual|notDeepEqual|doesNotMatch|doesNotThrow|\.not\.)\b/u;
11
+ const EXCEPTION_ASSERTION = /\b(?:assert\.(?:throws|rejects|doesNotThrow|doesNotReject)|toThrow|rejects|throws)\b/u;
12
+ const SNAPSHOT_PATH = /(?:^|\/)(?:__snapshots__\/|snapshots\/)|\.snap$/u;
13
+ const GOLDEN_PATH = /(?:^|\/)(?:golden|fixtures|expected)(?:\/|-)|(?:\.golden\.|\.expected\.)/u;
14
+ const JAVASCRIPT_TEST_PATH = /(?:^|\/)[^/]+\.(?:test|spec)\.[cm]?[jt]sx?$/u;
15
+ const COMMAND_CONTRACT_PATH = '.mustflow/config/commands.toml';
16
+ const TEST_SELECTION_PATTERN = /(?:--(?:grep|testNamePattern|testPathPattern|runTestsByPath|test-name-pattern)|\bgrep\s*=|\btestNamePattern\b|\btestPathPattern\b)/u;
17
+ const COMMAND_ALLOWS_NO_TESTS_PATTERN = /(?:passWithNoTests|--pass-with-no-tests|--passWithNoTests)/u;
18
+ const COMMAND_FORCES_SNAPSHOT_UPDATE_PATTERN = /(?:updateSnapshot|--update-snapshot|--updateSnapshot|\s-u(?:\s|"))/u;
19
+ const COMMAND_HIDES_FAILURE_PATTERN = /(?:\|\|\s*true|;\s*true\b|&&\s*true\b|\bexit\s+0\b)/u;
20
+ const CRITICAL_RATCHET_CODES = new Set([
21
+ 'success_exit_codes_widened',
22
+ 'command_allows_no_tests',
23
+ 'command_hides_failure',
24
+ ]);
25
+ function isTestClassification(classification) {
26
+ return classification.surface.category === 'test' || classification.changeKinds.some((kind) => TEST_CHANGE_KINDS.has(kind));
27
+ }
28
+ function isJavaScriptTestPath(relativePath) {
29
+ return JAVASCRIPT_TEST_PATH.test(relativePath);
30
+ }
31
+ function resolveInsideRoot(projectRoot, relativePath) {
32
+ const resolvedPath = path.resolve(projectRoot, relativePath);
33
+ const relative = path.relative(projectRoot, resolvedPath);
34
+ if (relative.startsWith('..') || path.isAbsolute(relative)) {
35
+ return null;
36
+ }
37
+ return resolvedPath;
38
+ }
39
+ function fileTextIfReadable(projectRoot, relativePath) {
40
+ const resolvedPath = resolveInsideRoot(projectRoot, relativePath);
41
+ if (resolvedPath === null || !existsSync(resolvedPath)) {
42
+ return null;
43
+ }
44
+ try {
45
+ return readFileSync(resolvedPath, 'utf8');
46
+ }
47
+ catch {
48
+ return null;
49
+ }
50
+ }
51
+ function gitDiffLines(projectRoot, relativePath) {
52
+ const result = spawnSync('git', ['diff', '--no-ext-diff', '--unified=0', '--', relativePath], {
53
+ cwd: projectRoot,
54
+ encoding: 'utf8',
55
+ windowsHide: true,
56
+ });
57
+ if (result.status !== 0 || typeof result.stdout !== 'string' || result.stdout.length === 0) {
58
+ return { added: [], removed: [] };
59
+ }
60
+ const added = [];
61
+ const removed = [];
62
+ for (const line of result.stdout.split(/\r?\n/u)) {
63
+ if (line.startsWith('+++') || line.startsWith('---')) {
64
+ continue;
65
+ }
66
+ if (line.startsWith('+')) {
67
+ added.push(line.slice(1));
68
+ }
69
+ else if (line.startsWith('-')) {
70
+ removed.push(line.slice(1));
71
+ }
72
+ }
73
+ return { added, removed };
74
+ }
75
+ function countMatching(lines, pattern) {
76
+ return lines.filter((line) => pattern.test(line)).length;
77
+ }
78
+ function hasAny(lines, pattern) {
79
+ return lines.some((line) => pattern.test(line));
80
+ }
81
+ function extractCoverageNumbers(lines) {
82
+ return lines
83
+ .filter((line) => /\b(?:coverage|threshold|branches|functions|lines|statements)\b/iu.test(line))
84
+ .flatMap((line) => [...line.matchAll(/\b\d+(?:\.\d+)?\b/gu)].map((match) => Number(match[0])))
85
+ .filter((value) => Number.isFinite(value));
86
+ }
87
+ function isCommandContractPath(relativePath) {
88
+ return relativePath === COMMAND_CONTRACT_PATH;
89
+ }
90
+ function isValidationConfigPath(relativePath) {
91
+ return (isCommandContractPath(relativePath) ||
92
+ /(?:^|\/)(?:package\.json|jest\.config\.[cm]?[jt]s|vitest\.config\.[cm]?[jt]s|nyc\.config\.[cm]?[jt]s)$/u.test(relativePath) ||
93
+ /\bcoverage\b/u.test(relativePath));
94
+ }
95
+ function riskDetail(pathText, message) {
96
+ return `Changed validation path ${pathText} ${message}`;
97
+ }
98
+ export function countValidationRatchetVerdictEffects(risks) {
99
+ return {
100
+ contradicted: risks.filter((risk) => risk.severity === 'critical' && CRITICAL_RATCHET_CODES.has(risk.code)).length,
101
+ };
102
+ }
103
+ export function createValidationRatchetRisks(report, projectRoot) {
104
+ const risks = [];
105
+ const seenRisks = new Set();
106
+ function addRisk(code, severity, pathText, detail) {
107
+ const key = `${pathText}\0${code}`;
108
+ if (seenRisks.has(key)) {
109
+ return;
110
+ }
111
+ seenRisks.add(key);
112
+ risks.push({ code, severity, path: pathText, detail });
113
+ }
114
+ for (const classification of report.classifications) {
115
+ const resolvedPath = resolveInsideRoot(projectRoot, classification.path);
116
+ const diff = report.source === 'changed' ? gitDiffLines(projectRoot, classification.path) : { added: [], removed: [] };
117
+ const addedText = diff.added.join('\n');
118
+ if (isTestClassification(classification)) {
119
+ if (report.source === 'changed' && (resolvedPath === null || !existsSync(resolvedPath))) {
120
+ addRisk('related_test_deleted', 'high', classification.path, `Changed test path ${classification.path} is absent; deleted related tests require review before marking the task verified.`);
121
+ continue;
122
+ }
123
+ if (isJavaScriptTestPath(classification.path)) {
124
+ const fileText = fileTextIfReadable(projectRoot, classification.path);
125
+ if (fileText !== null && SKIP_OR_ONLY_MARKER.test(fileText)) {
126
+ addRisk('skip_or_only_marker_present', 'medium', classification.path, `Changed test path ${classification.path} contains a .skip or .only marker; review whether validation was weakened before marking the task verified.`);
127
+ }
128
+ if ((fileText !== null && TODO_OR_PENDING_MARKER.test(fileText)) || TODO_OR_PENDING_MARKER.test(addedText)) {
129
+ addRisk('todo_or_pending_marker_added', 'medium', classification.path, `Changed test path ${classification.path} contains a todo or pending marker; review whether validation was deferred before marking the task verified.`);
130
+ }
131
+ const removedAssertionCount = countMatching(diff.removed, ASSERTION_CALL);
132
+ const addedAssertionCount = countMatching(diff.added, ASSERTION_CALL);
133
+ if (removedAssertionCount > addedAssertionCount) {
134
+ addRisk('assertion_count_decreased', 'high', classification.path, riskDetail(classification.path, `removes ${removedAssertionCount} assertion line(s) and adds ${addedAssertionCount}; review whether validation strength decreased.`));
135
+ }
136
+ if (hasAny(diff.removed, STRONG_ASSERTION) && hasAny(diff.added, WEAK_ASSERTION)) {
137
+ addRisk('assertion_matcher_weakened', 'high', classification.path, riskDetail(classification.path, 'replaces a stronger assertion with a weaker presence or truthiness assertion.'));
138
+ }
139
+ if (hasAny(diff.removed, NEGATIVE_ASSERTION)) {
140
+ addRisk('negative_assertion_removed', 'high', classification.path, riskDetail(classification.path, 'removes a negative assertion; confirm the denied behavior is still covered.'));
141
+ }
142
+ if (hasAny(diff.removed, EXCEPTION_ASSERTION)) {
143
+ addRisk('exception_assertion_removed', 'high', classification.path, riskDetail(classification.path, 'removes an exception assertion; confirm failure behavior is still covered.'));
144
+ }
145
+ }
146
+ if (SNAPSHOT_PATH.test(classification.path) && diff.added.length + diff.removed.length >= 20) {
147
+ addRisk('snapshot_mass_updated', 'medium', classification.path, riskDetail(classification.path, 'changes a large snapshot region; review that the update does not hide a regression.'));
148
+ }
149
+ if (GOLDEN_PATH.test(`${classification.path} `) && diff.added.length + diff.removed.length >= 20) {
150
+ addRisk('golden_output_replaced', 'medium', classification.path, riskDetail(classification.path, 'replaces a broad golden or expected-output region; review the behavioral reason.'));
151
+ }
152
+ }
153
+ if (isCommandContractPath(classification.path)) {
154
+ if (hasAny(diff.added, /\bstatus\s*=\s*"(?:manual_only|disabled|unknown)"/u)) {
155
+ addRisk('verification_intent_disabled', 'high', classification.path, riskDetail(classification.path, 'adds a non-runnable verification intent status.'));
156
+ }
157
+ if (hasAny(diff.removed, /\brequired_after\s*=/u) && !hasAny(diff.added, /\brequired_after\s*=/u)) {
158
+ addRisk('verification_required_after_removed', 'high', classification.path, riskDetail(classification.path, 'removes a required_after mapping from the command contract.'));
159
+ }
160
+ if (hasAny(diff.added, /\bsuccess_exit_codes\s*=\s*\[[^\]]*(?:[1-9]\d*|true)[^\]]*\]/u)) {
161
+ addRisk('success_exit_codes_widened', 'critical', classification.path, riskDetail(classification.path, 'widens success exit codes beyond the normal zero-exit contract.'));
162
+ }
163
+ if (hasAny(diff.added, COMMAND_ALLOWS_NO_TESTS_PATTERN)) {
164
+ addRisk('command_allows_no_tests', 'critical', classification.path, riskDetail(classification.path, 'allows a test command to pass when no tests run.'));
165
+ }
166
+ if (hasAny(diff.added, COMMAND_FORCES_SNAPSHOT_UPDATE_PATTERN)) {
167
+ addRisk('command_forces_snapshot_update', 'medium', classification.path, riskDetail(classification.path, 'adds snapshot update behavior to a verification command.'));
168
+ }
169
+ if (hasAny(diff.added, COMMAND_HIDES_FAILURE_PATTERN)) {
170
+ addRisk('command_hides_failure', 'critical', classification.path, riskDetail(classification.path, 'adds shell behavior that can hide command failure.'));
171
+ }
172
+ }
173
+ if (isValidationConfigPath(classification.path)) {
174
+ const removedCoverageNumbers = extractCoverageNumbers(diff.removed);
175
+ const addedCoverageNumbers = extractCoverageNumbers(diff.added);
176
+ if (removedCoverageNumbers.length > 0 &&
177
+ addedCoverageNumbers.length > 0 &&
178
+ Math.min(...addedCoverageNumbers) < Math.max(...removedCoverageNumbers)) {
179
+ addRisk('coverage_threshold_lowered', 'medium', classification.path, riskDetail(classification.path, 'lowers a coverage-related numeric threshold.'));
180
+ }
181
+ if (hasAny(diff.added, COMMAND_ALLOWS_NO_TESTS_PATTERN)) {
182
+ addRisk('command_allows_no_tests', 'critical', classification.path, riskDetail(classification.path, 'allows a validation script to pass when no tests run.'));
183
+ }
184
+ if (hasAny(diff.added, COMMAND_FORCES_SNAPSHOT_UPDATE_PATTERN)) {
185
+ addRisk('command_forces_snapshot_update', 'medium', classification.path, riskDetail(classification.path, 'adds snapshot update behavior to a validation script.'));
186
+ }
187
+ if (hasAny(diff.added, COMMAND_HIDES_FAILURE_PATTERN)) {
188
+ addRisk('command_hides_failure', 'critical', classification.path, riskDetail(classification.path, 'adds shell behavior that can hide validation failure.'));
189
+ }
190
+ if (hasAny(diff.added, TEST_SELECTION_PATTERN)) {
191
+ addRisk('test_selection_narrowed', 'medium', classification.path, riskDetail(classification.path, 'adds a test-selection filter; confirm coverage still matches the change.'));
192
+ }
193
+ }
194
+ }
195
+ return risks;
196
+ }