mustflow 2.11.0 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,90 @@
1
+ function createRiskEvidence(input) {
2
+ return {
3
+ source_anchor: input.sourceAnchorRiskCount ?? 0,
4
+ scope_diff: input.scopeDiffRiskCount ?? 0,
5
+ repeated_failure: input.repeatedFailureCount ?? 0,
6
+ validation_ratchet: input.validationRatchetRiskCount ?? 0,
7
+ repro_evidence: input.reproEvidenceRiskCount ?? 0,
8
+ external_evidence: input.externalEvidenceRiskCount ?? 0,
9
+ write_drift: input.writeDriftRiskCount ?? 0,
10
+ receipt_binding: input.receiptBindingRiskCount ?? 0,
11
+ stale_receipt: input.staleReceiptCount ?? 0,
12
+ plan_mismatch: input.planMismatchCount ?? 0,
13
+ };
14
+ }
15
+ function emptyReceiptBindingEvidence() {
16
+ return {
17
+ plan_bound_count: 0,
18
+ plan_unbound_count: 0,
19
+ fingerprint_bound_count: 0,
20
+ fingerprint_unbound_count: 0,
21
+ current_state_bound_count: 0,
22
+ current_state_unavailable_count: 0,
23
+ stale_count: 0,
24
+ plan_mismatch_count: 0,
25
+ };
26
+ }
27
+ function emptyCriteriaEvidence() {
28
+ return {
29
+ total: 0,
30
+ covered: 0,
31
+ partially_covered: 0,
32
+ uncovered: 0,
33
+ blocked: 0,
34
+ contradicted: 0,
35
+ };
36
+ }
37
+ function normalizeVerifyCompletionInput(input) {
38
+ const missingReceiptCount = Math.max(0, input.ranIntents - input.receiptCount);
39
+ if (missingReceiptCount === 0) {
40
+ return input;
41
+ }
42
+ return {
43
+ ...input,
44
+ receiptBindingRiskCount: (input.receiptBindingRiskCount ?? 0) + missingReceiptCount,
45
+ };
46
+ }
1
47
  function verifyStatus(input) {
48
+ const contradictions = [];
2
49
  if (input.failedIntents > 0) {
3
- const contradictions = ['one_or_more_selected_verification_intents_failed'];
4
- if ((input.repeatedFailureCount ?? 0) > 0) {
50
+ contradictions.push('one_or_more_selected_verification_intents_failed');
51
+ }
52
+ if ((input.planMismatchCount ?? 0) > 0) {
53
+ contradictions.push('plan_receipt_mismatch');
54
+ }
55
+ if ((input.reproEvidenceContradictionCount ?? 0) > 0) {
56
+ contradictions.push('repro_evidence_contradicted');
57
+ }
58
+ if ((input.validationRatchetContradictionCount ?? 0) > 0) {
59
+ contradictions.push('validation_ratchet_contradicted');
60
+ }
61
+ if (contradictions.length > 0) {
62
+ if (input.failedIntents > 0 && (input.repeatedFailureCount ?? 0) > 0) {
5
63
  contradictions.push('repeated_verification_failure');
6
64
  }
7
65
  return {
8
66
  status: 'contradicted',
9
- primaryReason: 'verification_failed',
67
+ primaryReason: input.failedIntents > 0
68
+ ? 'verification_failed'
69
+ : (input.planMismatchCount ?? 0) > 0
70
+ ? 'plan_receipt_mismatch'
71
+ : (input.reproEvidenceContradictionCount ?? 0) > 0
72
+ ? 'repro_evidence_contradicted'
73
+ : 'validation_ratchet_contradicted',
10
74
  blockers: [],
11
75
  contradictions,
12
76
  limitations: [],
13
77
  };
14
78
  }
79
+ if ((input.repeatedFailureBlockerCount ?? 0) > 0) {
80
+ return {
81
+ status: 'blocked',
82
+ primaryReason: 'repeated_failure_requires_new_evidence',
83
+ blockers: ['repeated_failure_requires_new_evidence'],
84
+ contradictions: [],
85
+ limitations: [],
86
+ };
87
+ }
15
88
  if (input.ranIntents === 0 && input.skippedIntents > 0) {
16
89
  const blockers = ['all_matching_verification_intents_were_skipped'];
17
90
  if ((input.repeatedFailureCount ?? 0) > 0) {
@@ -51,6 +124,15 @@ function verifyStatus(input) {
51
124
  limitations,
52
125
  };
53
126
  }
127
+ if ((input.reproEvidenceUnverifiedCount ?? 0) > 0) {
128
+ return {
129
+ status: 'unverified',
130
+ primaryReason: 'repro_evidence_unverified',
131
+ blockers: [],
132
+ contradictions: [],
133
+ limitations: ['repro_evidence_missing'],
134
+ };
135
+ }
54
136
  const downgradeLimitations = [];
55
137
  if ((input.sourceAnchorRiskCount ?? 0) > 0) {
56
138
  downgradeLimitations.push('high_risk_source_anchor_requires_review');
@@ -61,6 +143,15 @@ function verifyStatus(input) {
61
143
  if ((input.validationRatchetRiskCount ?? 0) > 0) {
62
144
  downgradeLimitations.push('validation_ratchet_risk_requires_review');
63
145
  }
146
+ if ((input.writeDriftRiskCount ?? 0) > 0) {
147
+ downgradeLimitations.push('write_drift_requires_review');
148
+ }
149
+ if ((input.receiptBindingRiskCount ?? 0) > 0) {
150
+ downgradeLimitations.push('receipt_binding_requires_review');
151
+ }
152
+ if ((input.staleReceiptCount ?? 0) > 0) {
153
+ downgradeLimitations.push('stale_receipt_requires_review');
154
+ }
64
155
  if ((input.reproEvidenceRiskCount ?? 0) > 0) {
65
156
  downgradeLimitations.push('repro_evidence_missing');
66
157
  }
@@ -76,9 +167,15 @@ function verifyStatus(input) {
76
167
  ? 'scope_diff_review_required'
77
168
  : (input.validationRatchetRiskCount ?? 0) > 0
78
169
  ? 'validation_ratchet_review_required'
79
- : (input.reproEvidenceRiskCount ?? 0) > 0
80
- ? 'repro_evidence_missing'
81
- : 'external_evidence_review_required',
170
+ : (input.writeDriftRiskCount ?? 0) > 0
171
+ ? 'write_drift_review_required'
172
+ : (input.receiptBindingRiskCount ?? 0) > 0
173
+ ? 'receipt_binding_review_required'
174
+ : (input.staleReceiptCount ?? 0) > 0
175
+ ? 'stale_receipt_review_required'
176
+ : (input.reproEvidenceRiskCount ?? 0) > 0
177
+ ? 'repro_evidence_missing'
178
+ : 'external_evidence_review_required',
82
179
  blockers: [],
83
180
  contradictions: [],
84
181
  limitations: downgradeLimitations,
@@ -102,26 +199,39 @@ function verifyStatus(input) {
102
199
  };
103
200
  }
104
201
  export function createVerifyCompletionVerdict(input) {
105
- const result = verifyStatus(input);
202
+ const normalizedInput = normalizeVerifyCompletionInput(input);
203
+ const result = verifyStatus(normalizedInput);
204
+ const risks = createRiskEvidence(normalizedInput);
205
+ const receiptBinding = normalizedInput.receiptBinding ?? emptyReceiptBindingEvidence();
206
+ const criteria = normalizedInput.criteria ?? emptyCriteriaEvidence();
106
207
  return {
107
208
  schema_version: '1',
108
209
  status: result.status,
109
210
  primary_reason: result.primaryReason,
110
211
  evidence: {
111
212
  source: 'mf_verify',
112
- verification_plan_id: input.verificationPlanId,
213
+ verification_plan_id: normalizedInput.verificationPlanId,
113
214
  changed_file_count: null,
114
- matched_intents: input.matchedIntents,
115
- ran_intents: input.ranIntents,
116
- passed_intents: input.passedIntents,
117
- failed_intents: input.failedIntents,
118
- skipped_intents: input.skippedIntents,
119
- receipt_count: input.receiptCount,
120
- gap_count: input.skippedIntents,
121
- source_anchor_risk_count: input.sourceAnchorRiskCount ?? 0,
122
- scope_diff_risk_count: input.scopeDiffRiskCount ?? 0,
123
- repeated_failure_count: input.repeatedFailureCount ?? 0,
124
- validation_ratchet_risk_count: input.validationRatchetRiskCount ?? 0,
215
+ criteria,
216
+ matched_intents: normalizedInput.matchedIntents,
217
+ ran_intents: normalizedInput.ranIntents,
218
+ passed_intents: normalizedInput.passedIntents,
219
+ failed_intents: normalizedInput.failedIntents,
220
+ skipped_intents: normalizedInput.skippedIntents,
221
+ receipt_count: normalizedInput.receiptCount,
222
+ gap_count: normalizedInput.skippedIntents,
223
+ source_anchor_risk_count: normalizedInput.sourceAnchorRiskCount ?? 0,
224
+ scope_diff_risk_count: normalizedInput.scopeDiffRiskCount ?? 0,
225
+ repeated_failure_count: normalizedInput.repeatedFailureCount ?? 0,
226
+ validation_ratchet_risk_count: normalizedInput.validationRatchetRiskCount ?? 0,
227
+ repro_evidence_risk_count: normalizedInput.reproEvidenceRiskCount ?? 0,
228
+ external_evidence_risk_count: normalizedInput.externalEvidenceRiskCount ?? 0,
229
+ write_drift_risk_count: normalizedInput.writeDriftRiskCount ?? 0,
230
+ receipt_binding_risk_count: normalizedInput.receiptBindingRiskCount ?? 0,
231
+ stale_receipt_count: normalizedInput.staleReceiptCount ?? 0,
232
+ plan_mismatch_count: normalizedInput.planMismatchCount ?? 0,
233
+ risks,
234
+ receipt_binding: receiptBinding,
125
235
  latest_run_status: null,
126
236
  },
127
237
  blockers: result.blockers,
@@ -130,6 +240,8 @@ export function createVerifyCompletionVerdict(input) {
130
240
  };
131
241
  }
132
242
  export function createDashboardCompletionVerdict(input) {
243
+ const risks = createRiskEvidence(input);
244
+ const receiptBinding = input.receiptBinding ?? emptyReceiptBindingEvidence();
133
245
  const latestRunFailed = input.latestRunStatus === 'failed' ||
134
246
  input.latestRunStatus === 'timed_out' ||
135
247
  input.latestRunStatus === 'start_failed';
@@ -181,6 +293,17 @@ export function createDashboardCompletionVerdict(input) {
181
293
  primaryReason = 'latest_run_passed_without_current_claim_binding';
182
294
  limitations.push('latest_run_is_not_bound_to_a_current_completion_claim');
183
295
  }
296
+ const criteria = input.criteria ??
297
+ (input.changedFileCount > 0 || input.runnableIntentCount > 0 || input.skippedIntentCount > 0 || input.gapCount > 0
298
+ ? {
299
+ total: 1,
300
+ covered: 0,
301
+ partially_covered: status === 'partially_verified' ? 1 : 0,
302
+ uncovered: status === 'unverified' ? 1 : 0,
303
+ blocked: status === 'blocked' ? 1 : 0,
304
+ contradicted: status === 'contradicted' ? 1 : 0,
305
+ }
306
+ : emptyCriteriaEvidence());
184
307
  return {
185
308
  schema_version: '1',
186
309
  status,
@@ -189,6 +312,7 @@ export function createDashboardCompletionVerdict(input) {
189
312
  source: 'dashboard_export',
190
313
  verification_plan_id: null,
191
314
  changed_file_count: input.changedFileCount,
315
+ criteria,
192
316
  matched_intents: input.runnableIntentCount + input.skippedIntentCount,
193
317
  ran_intents: 0,
194
318
  passed_intents: 0,
@@ -200,6 +324,14 @@ export function createDashboardCompletionVerdict(input) {
200
324
  scope_diff_risk_count: input.scopeDiffRiskCount ?? 0,
201
325
  repeated_failure_count: input.repeatedFailureCount ?? 0,
202
326
  validation_ratchet_risk_count: input.validationRatchetRiskCount ?? 0,
327
+ repro_evidence_risk_count: input.reproEvidenceRiskCount ?? 0,
328
+ external_evidence_risk_count: input.externalEvidenceRiskCount ?? 0,
329
+ write_drift_risk_count: input.writeDriftRiskCount ?? 0,
330
+ receipt_binding_risk_count: input.receiptBindingRiskCount ?? 0,
331
+ stale_receipt_count: input.staleReceiptCount ?? 0,
332
+ plan_mismatch_count: input.planMismatchCount ?? 0,
333
+ risks,
334
+ receipt_binding: receiptBinding,
203
335
  latest_run_status: input.latestRunStatus,
204
336
  },
205
337
  blockers,
@@ -1,17 +1,179 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
3
+ import path from 'node:path';
4
+ export const REPEATED_FAILURE_STATE_PATH = '.mustflow/state/repeated-failures.json';
5
+ export const REPEATED_FAILURE_STATE_LIMIT = 50;
1
6
  const UNRESOLVED_VERIFY_STATUSES = new Set(['failed', 'blocked', 'partial']);
2
- export function createRepeatedFailureRisk(input) {
3
- if (input.previousVerificationPlanId === null ||
4
- input.previousStatus === null ||
5
- input.previousVerificationPlanId !== input.currentVerificationPlanId ||
6
- !UNRESOLVED_VERIFY_STATUSES.has(input.previousStatus) ||
7
- !UNRESOLVED_VERIFY_STATUSES.has(input.currentStatus)) {
7
+ function sha256Json(value) {
8
+ return `sha256:${createHash('sha256').update(JSON.stringify(value)).digest('hex')}`;
9
+ }
10
+ function normalizeStrings(values) {
11
+ return [...new Set(values.map((value) => value.trim()).filter((value) => value.length > 0))].sort((left, right) => left.localeCompare(right));
12
+ }
13
+ function hashStrings(values) {
14
+ return sha256Json(normalizeStrings(values));
15
+ }
16
+ function hashBooleans(values) {
17
+ return sha256Json([...new Set(values)].sort((left, right) => Number(left) - Number(right)));
18
+ }
19
+ function isString(value) {
20
+ return typeof value === 'string' && value.length > 0;
21
+ }
22
+ function isRepeatedFailureSummary(value) {
23
+ if (!value || typeof value !== 'object' || Array.isArray(value)) {
24
+ return false;
25
+ }
26
+ const record = value;
27
+ return (record.schema_version === '1' &&
28
+ isString(record.fingerprint) &&
29
+ isString(record.verification_plan_id) &&
30
+ isString(record.status) &&
31
+ isString(record.failed_intents_hash) &&
32
+ isString(record.risk_codes_hash) &&
33
+ isString(record.affected_surfaces_hash) &&
34
+ isString(record.first_seen_at) &&
35
+ isString(record.last_seen_at) &&
36
+ typeof record.seen_count === 'number' &&
37
+ Number.isInteger(record.seen_count) &&
38
+ record.seen_count > 0 &&
39
+ typeof record.requires_new_evidence === 'boolean');
40
+ }
41
+ function repeatedFailureStatePath(projectRoot) {
42
+ return path.join(projectRoot, ...REPEATED_FAILURE_STATE_PATH.split('/'));
43
+ }
44
+ function readRepeatedFailureState(projectRoot) {
45
+ const statePath = repeatedFailureStatePath(projectRoot);
46
+ if (!existsSync(statePath)) {
47
+ return { schema_version: '1', fingerprints: [] };
48
+ }
49
+ try {
50
+ const parsed = JSON.parse(readFileSync(statePath, 'utf8'));
51
+ const fingerprints = Array.isArray(parsed.fingerprints)
52
+ ? parsed.fingerprints.filter(isRepeatedFailureSummary)
53
+ : [];
54
+ return { schema_version: '1', fingerprints };
55
+ }
56
+ catch {
57
+ return { schema_version: '1', fingerprints: [] };
58
+ }
59
+ }
60
+ function writeRepeatedFailureState(projectRoot, state) {
61
+ const statePath = repeatedFailureStatePath(projectRoot);
62
+ mkdirSync(path.dirname(statePath), { recursive: true });
63
+ writeFileSync(statePath, `${JSON.stringify(state, null, 2)}\n`, 'utf8');
64
+ }
65
+ export function createVerificationFailureFingerprint(input) {
66
+ const failedIntents = normalizeStrings(input.failedIntents);
67
+ const riskCodes = normalizeStrings(input.riskCodes);
68
+ if (failedIntents.length === 0 && riskCodes.length === 0) {
8
69
  return null;
9
70
  }
71
+ const exitCodeClasses = normalizeStrings(input.exitCodeClasses);
72
+ const timeoutFlags = [...new Set(input.timeoutFlags)].sort((left, right) => Number(left) - Number(right));
73
+ const errorKinds = normalizeStrings(input.errorKinds);
74
+ const affectedSurfaces = normalizeStrings(input.affectedSurfaces);
75
+ const commandFingerprints = normalizeStrings(input.commandFingerprints);
76
+ const diagnosticSignals = {
77
+ exit_code_classes: exitCodeClasses,
78
+ timeout_flags: timeoutFlags,
79
+ error_kinds: errorKinds,
80
+ };
81
+ const fingerprintSource = {
82
+ schema_version: '1',
83
+ verification_plan_id: input.verificationPlanId,
84
+ failed_intents: failedIntents,
85
+ diagnostic_signals: diagnosticSignals,
86
+ risk_codes: riskCodes,
87
+ affected_surfaces: affectedSurfaces,
88
+ command_fingerprints: commandFingerprints,
89
+ };
90
+ return {
91
+ schema_version: '1',
92
+ fingerprint: sha256Json(fingerprintSource),
93
+ verification_plan_id: input.verificationPlanId,
94
+ failed_intents_hash: hashStrings(failedIntents),
95
+ exit_code_classes_hash: hashStrings(exitCodeClasses),
96
+ timeout_flags_hash: hashBooleans(timeoutFlags),
97
+ error_kinds_hash: hashStrings(errorKinds),
98
+ diagnostic_hash: sha256Json(diagnosticSignals),
99
+ risk_codes_hash: hashStrings(riskCodes),
100
+ affected_surfaces_hash: hashStrings(affectedSurfaces),
101
+ command_fingerprints_hash: hashStrings(commandFingerprints),
102
+ };
103
+ }
104
+ export function updateRepeatedFailureState(input) {
105
+ const failureFingerprint = input.failureFingerprint;
106
+ if (!failureFingerprint) {
107
+ return null;
108
+ }
109
+ const state = readRepeatedFailureState(input.projectRoot);
110
+ const observedAt = (input.observedAt ?? new Date()).toISOString();
111
+ const existing = state.fingerprints.find((entry) => entry.fingerprint === failureFingerprint.fingerprint);
112
+ const seenCount = (existing?.seen_count ?? 0) + 1;
113
+ const summary = {
114
+ schema_version: '1',
115
+ fingerprint: failureFingerprint.fingerprint,
116
+ verification_plan_id: failureFingerprint.verification_plan_id,
117
+ status: input.status,
118
+ failed_intents_hash: failureFingerprint.failed_intents_hash,
119
+ risk_codes_hash: failureFingerprint.risk_codes_hash,
120
+ affected_surfaces_hash: failureFingerprint.affected_surfaces_hash,
121
+ first_seen_at: existing?.first_seen_at ?? observedAt,
122
+ last_seen_at: observedAt,
123
+ seen_count: seenCount,
124
+ requires_new_evidence: UNRESOLVED_VERIFY_STATUSES.has(input.status) && seenCount >= 2,
125
+ };
126
+ const nextFingerprints = [summary, ...state.fingerprints.filter((entry) => entry.fingerprint !== summary.fingerprint)]
127
+ .sort((left, right) => right.last_seen_at.localeCompare(left.last_seen_at))
128
+ .slice(0, REPEATED_FAILURE_STATE_LIMIT);
129
+ writeRepeatedFailureState(input.projectRoot, {
130
+ schema_version: '1',
131
+ fingerprints: nextFingerprints,
132
+ });
133
+ return summary;
134
+ }
135
+ function createRepeatedFailureRisk(code, currentFingerprint, previousStatus) {
136
+ const detail = code === 'repeated_verification_failure'
137
+ ? 'The previous verify summary has the same failure fingerprint and an unresolved status; provide new evidence or a narrower hypothesis before marking the task complete.'
138
+ : code === 'no_new_evidence_since_previous_failure'
139
+ ? 'The previous verify summary has the same plan, failed-intent hash, and affected-surface hash; provide new source or reproduction evidence before treating the next completion claim as verifiable.'
140
+ : 'The same unresolved failure fingerprint has repeated three or more times; new evidence is required before another completion claim can be treated as verifiable.';
10
141
  return {
11
- code: 'repeated_verification_failure',
142
+ code,
12
143
  severity: 'high',
13
- previous_status: input.previousStatus,
14
- verification_plan_id: input.currentVerificationPlanId,
15
- detail: 'The previous verify summary has the same verification_plan_id and an unresolved status; provide new evidence or a narrower hypothesis before marking the task complete.',
144
+ verdict_effect: code === 'repeated_verification_failure' ? 'contradiction' : 'blocker',
145
+ previous_status: previousStatus,
146
+ verification_plan_id: currentFingerprint.verification_plan_id,
147
+ failure_fingerprint: currentFingerprint.fingerprint,
148
+ failed_intents_hash: currentFingerprint.failed_intents_hash,
149
+ risk_codes_hash: currentFingerprint.risk_codes_hash,
150
+ affected_surfaces_hash: currentFingerprint.affected_surfaces_hash,
151
+ detail,
16
152
  };
17
153
  }
154
+ export function createRepeatedFailureRisks(input) {
155
+ const currentFingerprint = input.currentFailureFingerprint;
156
+ if (input.previousFailureFingerprint === null ||
157
+ input.previousStatus === null ||
158
+ currentFingerprint === null ||
159
+ !UNRESOLVED_VERIFY_STATUSES.has(input.previousStatus) ||
160
+ !UNRESOLVED_VERIFY_STATUSES.has(input.currentStatus)) {
161
+ return [];
162
+ }
163
+ const risks = [];
164
+ const previousFingerprint = input.previousFailureFingerprint;
165
+ const sameFingerprint = previousFingerprint.fingerprint === currentFingerprint.fingerprint;
166
+ const samePlanAndNoNewSourceEvidence = previousFingerprint.verification_plan_id === currentFingerprint.verification_plan_id &&
167
+ previousFingerprint.failed_intents_hash === currentFingerprint.failed_intents_hash &&
168
+ previousFingerprint.affected_surfaces_hash === currentFingerprint.affected_surfaces_hash;
169
+ if (sameFingerprint) {
170
+ risks.push(createRepeatedFailureRisk('repeated_verification_failure', currentFingerprint, input.previousStatus));
171
+ }
172
+ if (samePlanAndNoNewSourceEvidence && !sameFingerprint) {
173
+ risks.push(createRepeatedFailureRisk('no_new_evidence_since_previous_failure', currentFingerprint, input.previousStatus));
174
+ }
175
+ if ((input.currentSummary?.seen_count ?? 0) >= 3 && input.currentSummary?.requires_new_evidence === true) {
176
+ risks.push(createRepeatedFailureRisk('repeated_failure_requires_new_evidence', currentFingerprint, input.previousStatus));
177
+ }
178
+ return risks;
179
+ }
@@ -3,51 +3,132 @@ const TEXT_FIELD_LABELS = {
3
3
  expected_behavior: 'expected behavior',
4
4
  observed_behavior: 'observed behavior',
5
5
  };
6
- const ITEM_FIELD_LABELS = {
7
- original_reproduction: 'original reproduction path',
8
- evidence_before_fix: 'before-fix evidence',
9
- evidence_after_fix: 'after-fix evidence',
10
- regression_guard: 'regression guard',
11
- };
12
- export function createReproEvidenceRisks(report) {
6
+ function pushRisk(risks, detail, verdictEffect = 'partial') {
7
+ risks.push({
8
+ code: 'repro_evidence_missing',
9
+ severity: verdictEffect === 'contradicted' ? 'critical' : 'high',
10
+ detail,
11
+ verdict_effect: verdictEffect,
12
+ });
13
+ }
14
+ function collectReceiptBindingRisks(phaseLabel, evidence, options, risks) {
15
+ if (!evidence.receipt_path || !evidence.receipt_sha256 || !evidence.verification_plan_id) {
16
+ pushRisk(risks, `Bug-fix repro evidence ${phaseLabel} observation is not bound to receipt_path, receipt_sha256, and verification_plan_id.`);
17
+ return;
18
+ }
19
+ if (options.verificationPlanId && evidence.verification_plan_id !== options.verificationPlanId) {
20
+ pushRisk(risks, `Bug-fix repro evidence ${phaseLabel} receipt is stale for the current verification plan.`);
21
+ }
22
+ }
23
+ function collectBeforeFixRisks(report, options, risks) {
24
+ if (report.before_fix.status === 'missing') {
25
+ pushRisk(risks, 'Bug-fix repro evidence is missing before-fix reproduction; reproduce the original failure or mark it unavailable before claiming verification.');
26
+ return;
27
+ }
28
+ if (report.before_fix.status === 'unavailable') {
29
+ pushRisk(risks, report.before_fix.reason
30
+ ? 'Bug-fix repro evidence marks before-fix reproduction unavailable; the result cannot be verified without the original failure being observed.'
31
+ : 'Bug-fix repro evidence marks before-fix reproduction unavailable without explaining why.');
32
+ return;
33
+ }
34
+ if (!report.before_fix.summary) {
35
+ pushRisk(risks, 'Bug-fix repro evidence reproduced the before-fix failure but does not summarize the evidence.');
36
+ }
37
+ if (report.before_fix.outcome !== 'failed_as_expected') {
38
+ pushRisk(risks, 'Bug-fix repro evidence reproduced the before-fix path without outcome failed_as_expected.');
39
+ }
40
+ collectReceiptBindingRisks('before-fix', report.before_fix, options, risks);
41
+ }
42
+ function collectRouteIdentityRisks(report, risks) {
43
+ if (!report.reproduction_route.route_id) {
44
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_id.', 'unverified');
45
+ }
46
+ if (!report.reproduction_route.route_kind) {
47
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_kind.');
48
+ }
49
+ if (!report.reproduction_route.route_digest) {
50
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_digest.', 'unverified');
51
+ }
52
+ if (!report.reproduction_route.failure_oracle_hash) {
53
+ pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.failure_oracle_hash.');
54
+ }
55
+ if (report.reproduction_route.steps.length === 0) {
56
+ pushRisk(risks, 'Bug-fix repro evidence is missing bounded reproduction route steps.', 'unverified');
57
+ }
58
+ }
59
+ function collectAfterFixRisks(report, options, risks) {
60
+ if (report.after_fix.status === 'missing') {
61
+ pushRisk(risks, 'Bug-fix repro evidence is missing after-fix same-route evidence; rerun the original route after the fix before claiming verification.', 'unverified');
62
+ return;
63
+ }
64
+ if (report.after_fix.status === 'unavailable') {
65
+ pushRisk(risks, report.after_fix.reason
66
+ ? 'Bug-fix repro evidence marks after-fix same-route evidence unavailable; the result cannot be verified without a post-fix pass.'
67
+ : 'Bug-fix repro evidence marks after-fix same-route evidence unavailable without explaining why.', 'unverified');
68
+ return;
69
+ }
70
+ if (report.after_fix.status === 'failed') {
71
+ pushRisk(risks, 'Bug-fix repro evidence says the after-fix route still failed.', 'contradicted');
72
+ return;
73
+ }
74
+ if (!report.after_fix.summary) {
75
+ pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed but does not summarize the evidence.');
76
+ }
77
+ if (report.after_fix.outcome !== 'passed_expected_behavior') {
78
+ pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed without outcome passed_expected_behavior.', 'unverified');
79
+ }
80
+ if (!report.after_fix.same_route_as) {
81
+ pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed without same_route_as.', 'unverified');
82
+ }
83
+ if (report.reproduction_route.route_id &&
84
+ report.after_fix.same_route_as &&
85
+ report.after_fix.same_route_as !== report.reproduction_route.route_id) {
86
+ pushRisk(risks, 'Bug-fix repro evidence after_fix.same_route_as does not match reproduction_route.route_id.');
87
+ }
88
+ collectReceiptBindingRisks('after-fix', report.after_fix, options, risks);
89
+ }
90
+ function collectRegressionGuardRisks(report, options, risks) {
91
+ if (report.regression_guard.status === 'missing') {
92
+ pushRisk(risks, 'Bug-fix repro evidence is missing a regression guard; add or identify the guard before claiming verification.');
93
+ return;
94
+ }
95
+ if (report.regression_guard.status === 'unavailable') {
96
+ pushRisk(risks, report.regression_guard.reason
97
+ ? 'Bug-fix repro evidence marks the regression guard unavailable; the result cannot be verified without a guard or explicit limitation.'
98
+ : 'Bug-fix repro evidence marks the regression guard unavailable without explaining why.');
99
+ return;
100
+ }
101
+ if (report.regression_guard.status === 'failed') {
102
+ pushRisk(risks, 'Bug-fix repro evidence says the regression guard failed.', 'contradicted');
103
+ return;
104
+ }
105
+ if (!report.regression_guard.summary) {
106
+ pushRisk(risks, 'Bug-fix repro evidence marks the regression guard passed but does not summarize the evidence.');
107
+ }
108
+ if (!report.regression_guard.intent && !report.regression_guard.test_path) {
109
+ pushRisk(risks, 'Bug-fix repro evidence marks the regression guard passed without an intent or test path.');
110
+ }
111
+ collectReceiptBindingRisks('regression-guard', report.regression_guard, options, risks);
112
+ }
113
+ export function createReproEvidenceRisks(report, options = {}) {
13
114
  if (!report) {
14
115
  return [];
15
116
  }
16
117
  const risks = [];
17
118
  for (const [field, label] of Object.entries(TEXT_FIELD_LABELS)) {
18
119
  if (!report[field]) {
19
- risks.push({
20
- code: 'repro_evidence_missing',
21
- severity: 'high',
22
- detail: `Bug-fix repro evidence is missing ${label}; do not mark the task verified from command receipts alone.`,
23
- });
24
- }
25
- }
26
- for (const [field, label] of Object.entries(ITEM_FIELD_LABELS)) {
27
- const item = report[field];
28
- if (item.status === 'missing') {
29
- risks.push({
30
- code: 'repro_evidence_missing',
31
- severity: 'high',
32
- detail: `Bug-fix repro evidence is missing ${label}; rerun or explicitly mark it unavailable before claiming verification.`,
33
- });
34
- continue;
35
- }
36
- if (item.status === 'present' && !item.summary) {
37
- risks.push({
38
- code: 'repro_evidence_missing',
39
- severity: 'high',
40
- detail: `Bug-fix repro evidence marks ${label} present but does not summarize the evidence.`,
41
- });
42
- continue;
43
- }
44
- if (item.status === 'unavailable' && !item.reason) {
45
- risks.push({
46
- code: 'repro_evidence_missing',
47
- severity: 'high',
48
- detail: `Bug-fix repro evidence marks ${label} unavailable without explaining why.`,
49
- });
120
+ pushRisk(risks, `Bug-fix repro evidence is missing ${label}; do not mark the task verified from command receipts alone.`);
50
121
  }
51
122
  }
123
+ collectRouteIdentityRisks(report, risks);
124
+ collectBeforeFixRisks(report, options, risks);
125
+ collectAfterFixRisks(report, options, risks);
126
+ collectRegressionGuardRisks(report, options, risks);
52
127
  return risks;
53
128
  }
129
+ export function countReproEvidenceVerdictEffects(risks) {
130
+ return {
131
+ contradicted: risks.filter((risk) => risk.verdict_effect === 'contradicted').length,
132
+ unverified: risks.filter((risk) => risk.verdict_effect === 'unverified').length,
133
+ };
134
+ }