mustflow 2.11.0 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/dashboard.js +71 -2
- package/dist/cli/commands/explain-verify.js +11 -1
- package/dist/cli/commands/index.js +9 -0
- package/dist/cli/commands/verify.js +528 -30
- package/dist/cli/lib/local-index/constants.js +1 -1
- package/dist/cli/lib/local-index/index.js +708 -13
- package/dist/core/completion-verdict.js +151 -19
- package/dist/core/repeated-failure.js +172 -10
- package/dist/core/repro-evidence.js +119 -38
- package/dist/core/validation-ratchet.js +161 -17
- package/package.json +3 -3
- package/schemas/dashboard-export.schema.json +83 -0
- package/schemas/explain-report.schema.json +173 -1
- package/schemas/latest-run-pointer.schema.json +227 -10
- package/schemas/verify-report.schema.json +227 -10
- package/schemas/verify-run-manifest.schema.json +227 -10
- package/templates/default/manifest.toml +1 -1
|
@@ -1,17 +1,90 @@
|
|
|
1
|
+
function createRiskEvidence(input) {
|
|
2
|
+
return {
|
|
3
|
+
source_anchor: input.sourceAnchorRiskCount ?? 0,
|
|
4
|
+
scope_diff: input.scopeDiffRiskCount ?? 0,
|
|
5
|
+
repeated_failure: input.repeatedFailureCount ?? 0,
|
|
6
|
+
validation_ratchet: input.validationRatchetRiskCount ?? 0,
|
|
7
|
+
repro_evidence: input.reproEvidenceRiskCount ?? 0,
|
|
8
|
+
external_evidence: input.externalEvidenceRiskCount ?? 0,
|
|
9
|
+
write_drift: input.writeDriftRiskCount ?? 0,
|
|
10
|
+
receipt_binding: input.receiptBindingRiskCount ?? 0,
|
|
11
|
+
stale_receipt: input.staleReceiptCount ?? 0,
|
|
12
|
+
plan_mismatch: input.planMismatchCount ?? 0,
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
function emptyReceiptBindingEvidence() {
|
|
16
|
+
return {
|
|
17
|
+
plan_bound_count: 0,
|
|
18
|
+
plan_unbound_count: 0,
|
|
19
|
+
fingerprint_bound_count: 0,
|
|
20
|
+
fingerprint_unbound_count: 0,
|
|
21
|
+
current_state_bound_count: 0,
|
|
22
|
+
current_state_unavailable_count: 0,
|
|
23
|
+
stale_count: 0,
|
|
24
|
+
plan_mismatch_count: 0,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
function emptyCriteriaEvidence() {
|
|
28
|
+
return {
|
|
29
|
+
total: 0,
|
|
30
|
+
covered: 0,
|
|
31
|
+
partially_covered: 0,
|
|
32
|
+
uncovered: 0,
|
|
33
|
+
blocked: 0,
|
|
34
|
+
contradicted: 0,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
function normalizeVerifyCompletionInput(input) {
|
|
38
|
+
const missingReceiptCount = Math.max(0, input.ranIntents - input.receiptCount);
|
|
39
|
+
if (missingReceiptCount === 0) {
|
|
40
|
+
return input;
|
|
41
|
+
}
|
|
42
|
+
return {
|
|
43
|
+
...input,
|
|
44
|
+
receiptBindingRiskCount: (input.receiptBindingRiskCount ?? 0) + missingReceiptCount,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
1
47
|
function verifyStatus(input) {
|
|
48
|
+
const contradictions = [];
|
|
2
49
|
if (input.failedIntents > 0) {
|
|
3
|
-
|
|
4
|
-
|
|
50
|
+
contradictions.push('one_or_more_selected_verification_intents_failed');
|
|
51
|
+
}
|
|
52
|
+
if ((input.planMismatchCount ?? 0) > 0) {
|
|
53
|
+
contradictions.push('plan_receipt_mismatch');
|
|
54
|
+
}
|
|
55
|
+
if ((input.reproEvidenceContradictionCount ?? 0) > 0) {
|
|
56
|
+
contradictions.push('repro_evidence_contradicted');
|
|
57
|
+
}
|
|
58
|
+
if ((input.validationRatchetContradictionCount ?? 0) > 0) {
|
|
59
|
+
contradictions.push('validation_ratchet_contradicted');
|
|
60
|
+
}
|
|
61
|
+
if (contradictions.length > 0) {
|
|
62
|
+
if (input.failedIntents > 0 && (input.repeatedFailureCount ?? 0) > 0) {
|
|
5
63
|
contradictions.push('repeated_verification_failure');
|
|
6
64
|
}
|
|
7
65
|
return {
|
|
8
66
|
status: 'contradicted',
|
|
9
|
-
primaryReason:
|
|
67
|
+
primaryReason: input.failedIntents > 0
|
|
68
|
+
? 'verification_failed'
|
|
69
|
+
: (input.planMismatchCount ?? 0) > 0
|
|
70
|
+
? 'plan_receipt_mismatch'
|
|
71
|
+
: (input.reproEvidenceContradictionCount ?? 0) > 0
|
|
72
|
+
? 'repro_evidence_contradicted'
|
|
73
|
+
: 'validation_ratchet_contradicted',
|
|
10
74
|
blockers: [],
|
|
11
75
|
contradictions,
|
|
12
76
|
limitations: [],
|
|
13
77
|
};
|
|
14
78
|
}
|
|
79
|
+
if ((input.repeatedFailureBlockerCount ?? 0) > 0) {
|
|
80
|
+
return {
|
|
81
|
+
status: 'blocked',
|
|
82
|
+
primaryReason: 'repeated_failure_requires_new_evidence',
|
|
83
|
+
blockers: ['repeated_failure_requires_new_evidence'],
|
|
84
|
+
contradictions: [],
|
|
85
|
+
limitations: [],
|
|
86
|
+
};
|
|
87
|
+
}
|
|
15
88
|
if (input.ranIntents === 0 && input.skippedIntents > 0) {
|
|
16
89
|
const blockers = ['all_matching_verification_intents_were_skipped'];
|
|
17
90
|
if ((input.repeatedFailureCount ?? 0) > 0) {
|
|
@@ -51,6 +124,15 @@ function verifyStatus(input) {
|
|
|
51
124
|
limitations,
|
|
52
125
|
};
|
|
53
126
|
}
|
|
127
|
+
if ((input.reproEvidenceUnverifiedCount ?? 0) > 0) {
|
|
128
|
+
return {
|
|
129
|
+
status: 'unverified',
|
|
130
|
+
primaryReason: 'repro_evidence_unverified',
|
|
131
|
+
blockers: [],
|
|
132
|
+
contradictions: [],
|
|
133
|
+
limitations: ['repro_evidence_missing'],
|
|
134
|
+
};
|
|
135
|
+
}
|
|
54
136
|
const downgradeLimitations = [];
|
|
55
137
|
if ((input.sourceAnchorRiskCount ?? 0) > 0) {
|
|
56
138
|
downgradeLimitations.push('high_risk_source_anchor_requires_review');
|
|
@@ -61,6 +143,15 @@ function verifyStatus(input) {
|
|
|
61
143
|
if ((input.validationRatchetRiskCount ?? 0) > 0) {
|
|
62
144
|
downgradeLimitations.push('validation_ratchet_risk_requires_review');
|
|
63
145
|
}
|
|
146
|
+
if ((input.writeDriftRiskCount ?? 0) > 0) {
|
|
147
|
+
downgradeLimitations.push('write_drift_requires_review');
|
|
148
|
+
}
|
|
149
|
+
if ((input.receiptBindingRiskCount ?? 0) > 0) {
|
|
150
|
+
downgradeLimitations.push('receipt_binding_requires_review');
|
|
151
|
+
}
|
|
152
|
+
if ((input.staleReceiptCount ?? 0) > 0) {
|
|
153
|
+
downgradeLimitations.push('stale_receipt_requires_review');
|
|
154
|
+
}
|
|
64
155
|
if ((input.reproEvidenceRiskCount ?? 0) > 0) {
|
|
65
156
|
downgradeLimitations.push('repro_evidence_missing');
|
|
66
157
|
}
|
|
@@ -76,9 +167,15 @@ function verifyStatus(input) {
|
|
|
76
167
|
? 'scope_diff_review_required'
|
|
77
168
|
: (input.validationRatchetRiskCount ?? 0) > 0
|
|
78
169
|
? 'validation_ratchet_review_required'
|
|
79
|
-
: (input.
|
|
80
|
-
? '
|
|
81
|
-
:
|
|
170
|
+
: (input.writeDriftRiskCount ?? 0) > 0
|
|
171
|
+
? 'write_drift_review_required'
|
|
172
|
+
: (input.receiptBindingRiskCount ?? 0) > 0
|
|
173
|
+
? 'receipt_binding_review_required'
|
|
174
|
+
: (input.staleReceiptCount ?? 0) > 0
|
|
175
|
+
? 'stale_receipt_review_required'
|
|
176
|
+
: (input.reproEvidenceRiskCount ?? 0) > 0
|
|
177
|
+
? 'repro_evidence_missing'
|
|
178
|
+
: 'external_evidence_review_required',
|
|
82
179
|
blockers: [],
|
|
83
180
|
contradictions: [],
|
|
84
181
|
limitations: downgradeLimitations,
|
|
@@ -102,26 +199,39 @@ function verifyStatus(input) {
|
|
|
102
199
|
};
|
|
103
200
|
}
|
|
104
201
|
export function createVerifyCompletionVerdict(input) {
|
|
105
|
-
const
|
|
202
|
+
const normalizedInput = normalizeVerifyCompletionInput(input);
|
|
203
|
+
const result = verifyStatus(normalizedInput);
|
|
204
|
+
const risks = createRiskEvidence(normalizedInput);
|
|
205
|
+
const receiptBinding = normalizedInput.receiptBinding ?? emptyReceiptBindingEvidence();
|
|
206
|
+
const criteria = normalizedInput.criteria ?? emptyCriteriaEvidence();
|
|
106
207
|
return {
|
|
107
208
|
schema_version: '1',
|
|
108
209
|
status: result.status,
|
|
109
210
|
primary_reason: result.primaryReason,
|
|
110
211
|
evidence: {
|
|
111
212
|
source: 'mf_verify',
|
|
112
|
-
verification_plan_id:
|
|
213
|
+
verification_plan_id: normalizedInput.verificationPlanId,
|
|
113
214
|
changed_file_count: null,
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
215
|
+
criteria,
|
|
216
|
+
matched_intents: normalizedInput.matchedIntents,
|
|
217
|
+
ran_intents: normalizedInput.ranIntents,
|
|
218
|
+
passed_intents: normalizedInput.passedIntents,
|
|
219
|
+
failed_intents: normalizedInput.failedIntents,
|
|
220
|
+
skipped_intents: normalizedInput.skippedIntents,
|
|
221
|
+
receipt_count: normalizedInput.receiptCount,
|
|
222
|
+
gap_count: normalizedInput.skippedIntents,
|
|
223
|
+
source_anchor_risk_count: normalizedInput.sourceAnchorRiskCount ?? 0,
|
|
224
|
+
scope_diff_risk_count: normalizedInput.scopeDiffRiskCount ?? 0,
|
|
225
|
+
repeated_failure_count: normalizedInput.repeatedFailureCount ?? 0,
|
|
226
|
+
validation_ratchet_risk_count: normalizedInput.validationRatchetRiskCount ?? 0,
|
|
227
|
+
repro_evidence_risk_count: normalizedInput.reproEvidenceRiskCount ?? 0,
|
|
228
|
+
external_evidence_risk_count: normalizedInput.externalEvidenceRiskCount ?? 0,
|
|
229
|
+
write_drift_risk_count: normalizedInput.writeDriftRiskCount ?? 0,
|
|
230
|
+
receipt_binding_risk_count: normalizedInput.receiptBindingRiskCount ?? 0,
|
|
231
|
+
stale_receipt_count: normalizedInput.staleReceiptCount ?? 0,
|
|
232
|
+
plan_mismatch_count: normalizedInput.planMismatchCount ?? 0,
|
|
233
|
+
risks,
|
|
234
|
+
receipt_binding: receiptBinding,
|
|
125
235
|
latest_run_status: null,
|
|
126
236
|
},
|
|
127
237
|
blockers: result.blockers,
|
|
@@ -130,6 +240,8 @@ export function createVerifyCompletionVerdict(input) {
|
|
|
130
240
|
};
|
|
131
241
|
}
|
|
132
242
|
export function createDashboardCompletionVerdict(input) {
|
|
243
|
+
const risks = createRiskEvidence(input);
|
|
244
|
+
const receiptBinding = input.receiptBinding ?? emptyReceiptBindingEvidence();
|
|
133
245
|
const latestRunFailed = input.latestRunStatus === 'failed' ||
|
|
134
246
|
input.latestRunStatus === 'timed_out' ||
|
|
135
247
|
input.latestRunStatus === 'start_failed';
|
|
@@ -181,6 +293,17 @@ export function createDashboardCompletionVerdict(input) {
|
|
|
181
293
|
primaryReason = 'latest_run_passed_without_current_claim_binding';
|
|
182
294
|
limitations.push('latest_run_is_not_bound_to_a_current_completion_claim');
|
|
183
295
|
}
|
|
296
|
+
const criteria = input.criteria ??
|
|
297
|
+
(input.changedFileCount > 0 || input.runnableIntentCount > 0 || input.skippedIntentCount > 0 || input.gapCount > 0
|
|
298
|
+
? {
|
|
299
|
+
total: 1,
|
|
300
|
+
covered: 0,
|
|
301
|
+
partially_covered: status === 'partially_verified' ? 1 : 0,
|
|
302
|
+
uncovered: status === 'unverified' ? 1 : 0,
|
|
303
|
+
blocked: status === 'blocked' ? 1 : 0,
|
|
304
|
+
contradicted: status === 'contradicted' ? 1 : 0,
|
|
305
|
+
}
|
|
306
|
+
: emptyCriteriaEvidence());
|
|
184
307
|
return {
|
|
185
308
|
schema_version: '1',
|
|
186
309
|
status,
|
|
@@ -189,6 +312,7 @@ export function createDashboardCompletionVerdict(input) {
|
|
|
189
312
|
source: 'dashboard_export',
|
|
190
313
|
verification_plan_id: null,
|
|
191
314
|
changed_file_count: input.changedFileCount,
|
|
315
|
+
criteria,
|
|
192
316
|
matched_intents: input.runnableIntentCount + input.skippedIntentCount,
|
|
193
317
|
ran_intents: 0,
|
|
194
318
|
passed_intents: 0,
|
|
@@ -200,6 +324,14 @@ export function createDashboardCompletionVerdict(input) {
|
|
|
200
324
|
scope_diff_risk_count: input.scopeDiffRiskCount ?? 0,
|
|
201
325
|
repeated_failure_count: input.repeatedFailureCount ?? 0,
|
|
202
326
|
validation_ratchet_risk_count: input.validationRatchetRiskCount ?? 0,
|
|
327
|
+
repro_evidence_risk_count: input.reproEvidenceRiskCount ?? 0,
|
|
328
|
+
external_evidence_risk_count: input.externalEvidenceRiskCount ?? 0,
|
|
329
|
+
write_drift_risk_count: input.writeDriftRiskCount ?? 0,
|
|
330
|
+
receipt_binding_risk_count: input.receiptBindingRiskCount ?? 0,
|
|
331
|
+
stale_receipt_count: input.staleReceiptCount ?? 0,
|
|
332
|
+
plan_mismatch_count: input.planMismatchCount ?? 0,
|
|
333
|
+
risks,
|
|
334
|
+
receipt_binding: receiptBinding,
|
|
203
335
|
latest_run_status: input.latestRunStatus,
|
|
204
336
|
},
|
|
205
337
|
blockers,
|
|
@@ -1,17 +1,179 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
export const REPEATED_FAILURE_STATE_PATH = '.mustflow/state/repeated-failures.json';
|
|
5
|
+
export const REPEATED_FAILURE_STATE_LIMIT = 50;
|
|
1
6
|
const UNRESOLVED_VERIFY_STATUSES = new Set(['failed', 'blocked', 'partial']);
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
function sha256Json(value) {
|
|
8
|
+
return `sha256:${createHash('sha256').update(JSON.stringify(value)).digest('hex')}`;
|
|
9
|
+
}
|
|
10
|
+
function normalizeStrings(values) {
|
|
11
|
+
return [...new Set(values.map((value) => value.trim()).filter((value) => value.length > 0))].sort((left, right) => left.localeCompare(right));
|
|
12
|
+
}
|
|
13
|
+
function hashStrings(values) {
|
|
14
|
+
return sha256Json(normalizeStrings(values));
|
|
15
|
+
}
|
|
16
|
+
function hashBooleans(values) {
|
|
17
|
+
return sha256Json([...new Set(values)].sort((left, right) => Number(left) - Number(right)));
|
|
18
|
+
}
|
|
19
|
+
function isString(value) {
|
|
20
|
+
return typeof value === 'string' && value.length > 0;
|
|
21
|
+
}
|
|
22
|
+
function isRepeatedFailureSummary(value) {
|
|
23
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
const record = value;
|
|
27
|
+
return (record.schema_version === '1' &&
|
|
28
|
+
isString(record.fingerprint) &&
|
|
29
|
+
isString(record.verification_plan_id) &&
|
|
30
|
+
isString(record.status) &&
|
|
31
|
+
isString(record.failed_intents_hash) &&
|
|
32
|
+
isString(record.risk_codes_hash) &&
|
|
33
|
+
isString(record.affected_surfaces_hash) &&
|
|
34
|
+
isString(record.first_seen_at) &&
|
|
35
|
+
isString(record.last_seen_at) &&
|
|
36
|
+
typeof record.seen_count === 'number' &&
|
|
37
|
+
Number.isInteger(record.seen_count) &&
|
|
38
|
+
record.seen_count > 0 &&
|
|
39
|
+
typeof record.requires_new_evidence === 'boolean');
|
|
40
|
+
}
|
|
41
|
+
function repeatedFailureStatePath(projectRoot) {
|
|
42
|
+
return path.join(projectRoot, ...REPEATED_FAILURE_STATE_PATH.split('/'));
|
|
43
|
+
}
|
|
44
|
+
function readRepeatedFailureState(projectRoot) {
|
|
45
|
+
const statePath = repeatedFailureStatePath(projectRoot);
|
|
46
|
+
if (!existsSync(statePath)) {
|
|
47
|
+
return { schema_version: '1', fingerprints: [] };
|
|
48
|
+
}
|
|
49
|
+
try {
|
|
50
|
+
const parsed = JSON.parse(readFileSync(statePath, 'utf8'));
|
|
51
|
+
const fingerprints = Array.isArray(parsed.fingerprints)
|
|
52
|
+
? parsed.fingerprints.filter(isRepeatedFailureSummary)
|
|
53
|
+
: [];
|
|
54
|
+
return { schema_version: '1', fingerprints };
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return { schema_version: '1', fingerprints: [] };
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
function writeRepeatedFailureState(projectRoot, state) {
|
|
61
|
+
const statePath = repeatedFailureStatePath(projectRoot);
|
|
62
|
+
mkdirSync(path.dirname(statePath), { recursive: true });
|
|
63
|
+
writeFileSync(statePath, `${JSON.stringify(state, null, 2)}\n`, 'utf8');
|
|
64
|
+
}
|
|
65
|
+
export function createVerificationFailureFingerprint(input) {
|
|
66
|
+
const failedIntents = normalizeStrings(input.failedIntents);
|
|
67
|
+
const riskCodes = normalizeStrings(input.riskCodes);
|
|
68
|
+
if (failedIntents.length === 0 && riskCodes.length === 0) {
|
|
8
69
|
return null;
|
|
9
70
|
}
|
|
71
|
+
const exitCodeClasses = normalizeStrings(input.exitCodeClasses);
|
|
72
|
+
const timeoutFlags = [...new Set(input.timeoutFlags)].sort((left, right) => Number(left) - Number(right));
|
|
73
|
+
const errorKinds = normalizeStrings(input.errorKinds);
|
|
74
|
+
const affectedSurfaces = normalizeStrings(input.affectedSurfaces);
|
|
75
|
+
const commandFingerprints = normalizeStrings(input.commandFingerprints);
|
|
76
|
+
const diagnosticSignals = {
|
|
77
|
+
exit_code_classes: exitCodeClasses,
|
|
78
|
+
timeout_flags: timeoutFlags,
|
|
79
|
+
error_kinds: errorKinds,
|
|
80
|
+
};
|
|
81
|
+
const fingerprintSource = {
|
|
82
|
+
schema_version: '1',
|
|
83
|
+
verification_plan_id: input.verificationPlanId,
|
|
84
|
+
failed_intents: failedIntents,
|
|
85
|
+
diagnostic_signals: diagnosticSignals,
|
|
86
|
+
risk_codes: riskCodes,
|
|
87
|
+
affected_surfaces: affectedSurfaces,
|
|
88
|
+
command_fingerprints: commandFingerprints,
|
|
89
|
+
};
|
|
90
|
+
return {
|
|
91
|
+
schema_version: '1',
|
|
92
|
+
fingerprint: sha256Json(fingerprintSource),
|
|
93
|
+
verification_plan_id: input.verificationPlanId,
|
|
94
|
+
failed_intents_hash: hashStrings(failedIntents),
|
|
95
|
+
exit_code_classes_hash: hashStrings(exitCodeClasses),
|
|
96
|
+
timeout_flags_hash: hashBooleans(timeoutFlags),
|
|
97
|
+
error_kinds_hash: hashStrings(errorKinds),
|
|
98
|
+
diagnostic_hash: sha256Json(diagnosticSignals),
|
|
99
|
+
risk_codes_hash: hashStrings(riskCodes),
|
|
100
|
+
affected_surfaces_hash: hashStrings(affectedSurfaces),
|
|
101
|
+
command_fingerprints_hash: hashStrings(commandFingerprints),
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
export function updateRepeatedFailureState(input) {
|
|
105
|
+
const failureFingerprint = input.failureFingerprint;
|
|
106
|
+
if (!failureFingerprint) {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
const state = readRepeatedFailureState(input.projectRoot);
|
|
110
|
+
const observedAt = (input.observedAt ?? new Date()).toISOString();
|
|
111
|
+
const existing = state.fingerprints.find((entry) => entry.fingerprint === failureFingerprint.fingerprint);
|
|
112
|
+
const seenCount = (existing?.seen_count ?? 0) + 1;
|
|
113
|
+
const summary = {
|
|
114
|
+
schema_version: '1',
|
|
115
|
+
fingerprint: failureFingerprint.fingerprint,
|
|
116
|
+
verification_plan_id: failureFingerprint.verification_plan_id,
|
|
117
|
+
status: input.status,
|
|
118
|
+
failed_intents_hash: failureFingerprint.failed_intents_hash,
|
|
119
|
+
risk_codes_hash: failureFingerprint.risk_codes_hash,
|
|
120
|
+
affected_surfaces_hash: failureFingerprint.affected_surfaces_hash,
|
|
121
|
+
first_seen_at: existing?.first_seen_at ?? observedAt,
|
|
122
|
+
last_seen_at: observedAt,
|
|
123
|
+
seen_count: seenCount,
|
|
124
|
+
requires_new_evidence: UNRESOLVED_VERIFY_STATUSES.has(input.status) && seenCount >= 2,
|
|
125
|
+
};
|
|
126
|
+
const nextFingerprints = [summary, ...state.fingerprints.filter((entry) => entry.fingerprint !== summary.fingerprint)]
|
|
127
|
+
.sort((left, right) => right.last_seen_at.localeCompare(left.last_seen_at))
|
|
128
|
+
.slice(0, REPEATED_FAILURE_STATE_LIMIT);
|
|
129
|
+
writeRepeatedFailureState(input.projectRoot, {
|
|
130
|
+
schema_version: '1',
|
|
131
|
+
fingerprints: nextFingerprints,
|
|
132
|
+
});
|
|
133
|
+
return summary;
|
|
134
|
+
}
|
|
135
|
+
function createRepeatedFailureRisk(code, currentFingerprint, previousStatus) {
|
|
136
|
+
const detail = code === 'repeated_verification_failure'
|
|
137
|
+
? 'The previous verify summary has the same failure fingerprint and an unresolved status; provide new evidence or a narrower hypothesis before marking the task complete.'
|
|
138
|
+
: code === 'no_new_evidence_since_previous_failure'
|
|
139
|
+
? 'The previous verify summary has the same plan, failed-intent hash, and affected-surface hash; provide new source or reproduction evidence before treating the next completion claim as verifiable.'
|
|
140
|
+
: 'The same unresolved failure fingerprint has repeated three or more times; new evidence is required before another completion claim can be treated as verifiable.';
|
|
10
141
|
return {
|
|
11
|
-
code
|
|
142
|
+
code,
|
|
12
143
|
severity: 'high',
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
144
|
+
verdict_effect: code === 'repeated_verification_failure' ? 'contradiction' : 'blocker',
|
|
145
|
+
previous_status: previousStatus,
|
|
146
|
+
verification_plan_id: currentFingerprint.verification_plan_id,
|
|
147
|
+
failure_fingerprint: currentFingerprint.fingerprint,
|
|
148
|
+
failed_intents_hash: currentFingerprint.failed_intents_hash,
|
|
149
|
+
risk_codes_hash: currentFingerprint.risk_codes_hash,
|
|
150
|
+
affected_surfaces_hash: currentFingerprint.affected_surfaces_hash,
|
|
151
|
+
detail,
|
|
16
152
|
};
|
|
17
153
|
}
|
|
154
|
+
export function createRepeatedFailureRisks(input) {
|
|
155
|
+
const currentFingerprint = input.currentFailureFingerprint;
|
|
156
|
+
if (input.previousFailureFingerprint === null ||
|
|
157
|
+
input.previousStatus === null ||
|
|
158
|
+
currentFingerprint === null ||
|
|
159
|
+
!UNRESOLVED_VERIFY_STATUSES.has(input.previousStatus) ||
|
|
160
|
+
!UNRESOLVED_VERIFY_STATUSES.has(input.currentStatus)) {
|
|
161
|
+
return [];
|
|
162
|
+
}
|
|
163
|
+
const risks = [];
|
|
164
|
+
const previousFingerprint = input.previousFailureFingerprint;
|
|
165
|
+
const sameFingerprint = previousFingerprint.fingerprint === currentFingerprint.fingerprint;
|
|
166
|
+
const samePlanAndNoNewSourceEvidence = previousFingerprint.verification_plan_id === currentFingerprint.verification_plan_id &&
|
|
167
|
+
previousFingerprint.failed_intents_hash === currentFingerprint.failed_intents_hash &&
|
|
168
|
+
previousFingerprint.affected_surfaces_hash === currentFingerprint.affected_surfaces_hash;
|
|
169
|
+
if (sameFingerprint) {
|
|
170
|
+
risks.push(createRepeatedFailureRisk('repeated_verification_failure', currentFingerprint, input.previousStatus));
|
|
171
|
+
}
|
|
172
|
+
if (samePlanAndNoNewSourceEvidence && !sameFingerprint) {
|
|
173
|
+
risks.push(createRepeatedFailureRisk('no_new_evidence_since_previous_failure', currentFingerprint, input.previousStatus));
|
|
174
|
+
}
|
|
175
|
+
if ((input.currentSummary?.seen_count ?? 0) >= 3 && input.currentSummary?.requires_new_evidence === true) {
|
|
176
|
+
risks.push(createRepeatedFailureRisk('repeated_failure_requires_new_evidence', currentFingerprint, input.previousStatus));
|
|
177
|
+
}
|
|
178
|
+
return risks;
|
|
179
|
+
}
|
|
@@ -3,51 +3,132 @@ const TEXT_FIELD_LABELS = {
|
|
|
3
3
|
expected_behavior: 'expected behavior',
|
|
4
4
|
observed_behavior: 'observed behavior',
|
|
5
5
|
};
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
6
|
+
function pushRisk(risks, detail, verdictEffect = 'partial') {
|
|
7
|
+
risks.push({
|
|
8
|
+
code: 'repro_evidence_missing',
|
|
9
|
+
severity: verdictEffect === 'contradicted' ? 'critical' : 'high',
|
|
10
|
+
detail,
|
|
11
|
+
verdict_effect: verdictEffect,
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
function collectReceiptBindingRisks(phaseLabel, evidence, options, risks) {
|
|
15
|
+
if (!evidence.receipt_path || !evidence.receipt_sha256 || !evidence.verification_plan_id) {
|
|
16
|
+
pushRisk(risks, `Bug-fix repro evidence ${phaseLabel} observation is not bound to receipt_path, receipt_sha256, and verification_plan_id.`);
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
if (options.verificationPlanId && evidence.verification_plan_id !== options.verificationPlanId) {
|
|
20
|
+
pushRisk(risks, `Bug-fix repro evidence ${phaseLabel} receipt is stale for the current verification plan.`);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function collectBeforeFixRisks(report, options, risks) {
|
|
24
|
+
if (report.before_fix.status === 'missing') {
|
|
25
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing before-fix reproduction; reproduce the original failure or mark it unavailable before claiming verification.');
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
if (report.before_fix.status === 'unavailable') {
|
|
29
|
+
pushRisk(risks, report.before_fix.reason
|
|
30
|
+
? 'Bug-fix repro evidence marks before-fix reproduction unavailable; the result cannot be verified without the original failure being observed.'
|
|
31
|
+
: 'Bug-fix repro evidence marks before-fix reproduction unavailable without explaining why.');
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
if (!report.before_fix.summary) {
|
|
35
|
+
pushRisk(risks, 'Bug-fix repro evidence reproduced the before-fix failure but does not summarize the evidence.');
|
|
36
|
+
}
|
|
37
|
+
if (report.before_fix.outcome !== 'failed_as_expected') {
|
|
38
|
+
pushRisk(risks, 'Bug-fix repro evidence reproduced the before-fix path without outcome failed_as_expected.');
|
|
39
|
+
}
|
|
40
|
+
collectReceiptBindingRisks('before-fix', report.before_fix, options, risks);
|
|
41
|
+
}
|
|
42
|
+
function collectRouteIdentityRisks(report, risks) {
|
|
43
|
+
if (!report.reproduction_route.route_id) {
|
|
44
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_id.', 'unverified');
|
|
45
|
+
}
|
|
46
|
+
if (!report.reproduction_route.route_kind) {
|
|
47
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_kind.');
|
|
48
|
+
}
|
|
49
|
+
if (!report.reproduction_route.route_digest) {
|
|
50
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.route_digest.', 'unverified');
|
|
51
|
+
}
|
|
52
|
+
if (!report.reproduction_route.failure_oracle_hash) {
|
|
53
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing reproduction_route.failure_oracle_hash.');
|
|
54
|
+
}
|
|
55
|
+
if (report.reproduction_route.steps.length === 0) {
|
|
56
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing bounded reproduction route steps.', 'unverified');
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
function collectAfterFixRisks(report, options, risks) {
|
|
60
|
+
if (report.after_fix.status === 'missing') {
|
|
61
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing after-fix same-route evidence; rerun the original route after the fix before claiming verification.', 'unverified');
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
if (report.after_fix.status === 'unavailable') {
|
|
65
|
+
pushRisk(risks, report.after_fix.reason
|
|
66
|
+
? 'Bug-fix repro evidence marks after-fix same-route evidence unavailable; the result cannot be verified without a post-fix pass.'
|
|
67
|
+
: 'Bug-fix repro evidence marks after-fix same-route evidence unavailable without explaining why.', 'unverified');
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
if (report.after_fix.status === 'failed') {
|
|
71
|
+
pushRisk(risks, 'Bug-fix repro evidence says the after-fix route still failed.', 'contradicted');
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
if (!report.after_fix.summary) {
|
|
75
|
+
pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed but does not summarize the evidence.');
|
|
76
|
+
}
|
|
77
|
+
if (report.after_fix.outcome !== 'passed_expected_behavior') {
|
|
78
|
+
pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed without outcome passed_expected_behavior.', 'unverified');
|
|
79
|
+
}
|
|
80
|
+
if (!report.after_fix.same_route_as) {
|
|
81
|
+
pushRisk(risks, 'Bug-fix repro evidence marks after-fix evidence passed without same_route_as.', 'unverified');
|
|
82
|
+
}
|
|
83
|
+
if (report.reproduction_route.route_id &&
|
|
84
|
+
report.after_fix.same_route_as &&
|
|
85
|
+
report.after_fix.same_route_as !== report.reproduction_route.route_id) {
|
|
86
|
+
pushRisk(risks, 'Bug-fix repro evidence after_fix.same_route_as does not match reproduction_route.route_id.');
|
|
87
|
+
}
|
|
88
|
+
collectReceiptBindingRisks('after-fix', report.after_fix, options, risks);
|
|
89
|
+
}
|
|
90
|
+
function collectRegressionGuardRisks(report, options, risks) {
|
|
91
|
+
if (report.regression_guard.status === 'missing') {
|
|
92
|
+
pushRisk(risks, 'Bug-fix repro evidence is missing a regression guard; add or identify the guard before claiming verification.');
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
if (report.regression_guard.status === 'unavailable') {
|
|
96
|
+
pushRisk(risks, report.regression_guard.reason
|
|
97
|
+
? 'Bug-fix repro evidence marks the regression guard unavailable; the result cannot be verified without a guard or explicit limitation.'
|
|
98
|
+
: 'Bug-fix repro evidence marks the regression guard unavailable without explaining why.');
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
if (report.regression_guard.status === 'failed') {
|
|
102
|
+
pushRisk(risks, 'Bug-fix repro evidence says the regression guard failed.', 'contradicted');
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
if (!report.regression_guard.summary) {
|
|
106
|
+
pushRisk(risks, 'Bug-fix repro evidence marks the regression guard passed but does not summarize the evidence.');
|
|
107
|
+
}
|
|
108
|
+
if (!report.regression_guard.intent && !report.regression_guard.test_path) {
|
|
109
|
+
pushRisk(risks, 'Bug-fix repro evidence marks the regression guard passed without an intent or test path.');
|
|
110
|
+
}
|
|
111
|
+
collectReceiptBindingRisks('regression-guard', report.regression_guard, options, risks);
|
|
112
|
+
}
|
|
113
|
+
export function createReproEvidenceRisks(report, options = {}) {
|
|
13
114
|
if (!report) {
|
|
14
115
|
return [];
|
|
15
116
|
}
|
|
16
117
|
const risks = [];
|
|
17
118
|
for (const [field, label] of Object.entries(TEXT_FIELD_LABELS)) {
|
|
18
119
|
if (!report[field]) {
|
|
19
|
-
risks
|
|
20
|
-
code: 'repro_evidence_missing',
|
|
21
|
-
severity: 'high',
|
|
22
|
-
detail: `Bug-fix repro evidence is missing ${label}; do not mark the task verified from command receipts alone.`,
|
|
23
|
-
});
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
for (const [field, label] of Object.entries(ITEM_FIELD_LABELS)) {
|
|
27
|
-
const item = report[field];
|
|
28
|
-
if (item.status === 'missing') {
|
|
29
|
-
risks.push({
|
|
30
|
-
code: 'repro_evidence_missing',
|
|
31
|
-
severity: 'high',
|
|
32
|
-
detail: `Bug-fix repro evidence is missing ${label}; rerun or explicitly mark it unavailable before claiming verification.`,
|
|
33
|
-
});
|
|
34
|
-
continue;
|
|
35
|
-
}
|
|
36
|
-
if (item.status === 'present' && !item.summary) {
|
|
37
|
-
risks.push({
|
|
38
|
-
code: 'repro_evidence_missing',
|
|
39
|
-
severity: 'high',
|
|
40
|
-
detail: `Bug-fix repro evidence marks ${label} present but does not summarize the evidence.`,
|
|
41
|
-
});
|
|
42
|
-
continue;
|
|
43
|
-
}
|
|
44
|
-
if (item.status === 'unavailable' && !item.reason) {
|
|
45
|
-
risks.push({
|
|
46
|
-
code: 'repro_evidence_missing',
|
|
47
|
-
severity: 'high',
|
|
48
|
-
detail: `Bug-fix repro evidence marks ${label} unavailable without explaining why.`,
|
|
49
|
-
});
|
|
120
|
+
pushRisk(risks, `Bug-fix repro evidence is missing ${label}; do not mark the task verified from command receipts alone.`);
|
|
50
121
|
}
|
|
51
122
|
}
|
|
123
|
+
collectRouteIdentityRisks(report, risks);
|
|
124
|
+
collectBeforeFixRisks(report, options, risks);
|
|
125
|
+
collectAfterFixRisks(report, options, risks);
|
|
126
|
+
collectRegressionGuardRisks(report, options, risks);
|
|
52
127
|
return risks;
|
|
53
128
|
}
|
|
129
|
+
export function countReproEvidenceVerdictEffects(risks) {
|
|
130
|
+
return {
|
|
131
|
+
contradicted: risks.filter((risk) => risk.verdict_effect === 'contradicted').length,
|
|
132
|
+
unverified: risks.filter((risk) => risk.verdict_effect === 'unverified').length,
|
|
133
|
+
};
|
|
134
|
+
}
|