mustflow 2.11.0 → 2.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/cli/commands/dashboard.js +71 -2
- package/dist/cli/commands/explain-verify.js +11 -1
- package/dist/cli/commands/index.js +9 -0
- package/dist/cli/commands/upgrade.js +3 -1
- package/dist/cli/commands/verify.js +528 -30
- package/dist/cli/commands/version.js +1 -1
- package/dist/cli/i18n/en.js +1 -1
- package/dist/cli/i18n/es.js +1 -1
- package/dist/cli/i18n/fr.js +1 -1
- package/dist/cli/i18n/hi.js +1 -1
- package/dist/cli/lib/local-index/constants.js +1 -1
- package/dist/cli/lib/local-index/index.js +708 -13
- package/dist/cli/lib/npm-version-check.js +71 -1
- package/dist/core/completion-verdict.js +151 -19
- package/dist/core/repeated-failure.js +172 -10
- package/dist/core/repro-evidence.js +119 -38
- package/dist/core/validation-ratchet.js +161 -17
- package/package.json +3 -3
- package/schemas/dashboard-export.schema.json +83 -0
- package/schemas/explain-report.schema.json +173 -1
- package/schemas/latest-run-pointer.schema.json +227 -10
- package/schemas/verify-report.schema.json +227 -10
- package/schemas/verify-run-manifest.schema.json +227 -10
- package/templates/default/manifest.toml +1 -1
|
@@ -1,5 +1,42 @@
|
|
|
1
1
|
const DEFAULT_NPM_REGISTRY_URL = 'https://registry.npmjs.org';
|
|
2
2
|
const DEFAULT_VERSION_CHECK_TIMEOUT_MS = 3_000;
|
|
3
|
+
const PACKAGE_MANAGER_COMMANDS = [
|
|
4
|
+
{
|
|
5
|
+
id: 'npm',
|
|
6
|
+
label: 'npm',
|
|
7
|
+
command(packageName) {
|
|
8
|
+
return `npm install -g ${packageName}@latest`;
|
|
9
|
+
},
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
id: 'bun',
|
|
13
|
+
label: 'bun',
|
|
14
|
+
command(packageName) {
|
|
15
|
+
return `bun add -g ${packageName}@latest`;
|
|
16
|
+
},
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
id: 'pnpm',
|
|
20
|
+
label: 'pnpm',
|
|
21
|
+
command(packageName) {
|
|
22
|
+
return `pnpm add -g ${packageName}@latest`;
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
id: 'yarn',
|
|
27
|
+
label: 'yarn',
|
|
28
|
+
command(packageName) {
|
|
29
|
+
return `yarn global add ${packageName}@latest`;
|
|
30
|
+
},
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
id: 'deno',
|
|
34
|
+
label: 'deno',
|
|
35
|
+
command(packageName) {
|
|
36
|
+
return `deno install -g -A -n mf npm:${packageName}@latest`;
|
|
37
|
+
},
|
|
38
|
+
},
|
|
39
|
+
];
|
|
3
40
|
function isRecord(value) {
|
|
4
41
|
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
5
42
|
}
|
|
@@ -65,6 +102,37 @@ function getTimeoutMs() {
|
|
|
65
102
|
const parsed = rawValue ? Number(rawValue) : DEFAULT_VERSION_CHECK_TIMEOUT_MS;
|
|
66
103
|
return Number.isSafeInteger(parsed) && parsed > 0 ? parsed : DEFAULT_VERSION_CHECK_TIMEOUT_MS;
|
|
67
104
|
}
|
|
105
|
+
function detectPackageManagerId() {
|
|
106
|
+
const signals = [
|
|
107
|
+
process.env.npm_config_user_agent,
|
|
108
|
+
process.env.npm_execpath,
|
|
109
|
+
process.execPath,
|
|
110
|
+
process.argv[1],
|
|
111
|
+
import.meta.url,
|
|
112
|
+
]
|
|
113
|
+
.filter((signal) => typeof signal === 'string' && signal.length > 0)
|
|
114
|
+
.map((signal) => signal.toLowerCase());
|
|
115
|
+
for (const id of ['bun', 'pnpm', 'yarn', 'deno', 'npm']) {
|
|
116
|
+
if (signals.some((signal) => signal.includes(id))) {
|
|
117
|
+
return id;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
function getPackageInstallCommands(packageName) {
|
|
123
|
+
const detectedId = detectPackageManagerId();
|
|
124
|
+
const commands = [...PACKAGE_MANAGER_COMMANDS];
|
|
125
|
+
const recommendedIndex = detectedId ? commands.findIndex((entry) => entry.id === detectedId) : -1;
|
|
126
|
+
if (recommendedIndex > 0) {
|
|
127
|
+
const [recommended] = commands.splice(recommendedIndex, 1);
|
|
128
|
+
commands.unshift(recommended);
|
|
129
|
+
}
|
|
130
|
+
return commands.map((entry, index) => ({
|
|
131
|
+
manager: entry.label,
|
|
132
|
+
command: entry.command(packageName),
|
|
133
|
+
recommended: index === 0 && detectedId === entry.id,
|
|
134
|
+
}));
|
|
135
|
+
}
|
|
68
136
|
function buildLatestPackageUrl(registryUrl, packageName) {
|
|
69
137
|
const trimmedRegistryUrl = registryUrl.replace(/\/+$/u, '');
|
|
70
138
|
const encodedPackageName = packageName.startsWith('@')
|
|
@@ -86,12 +154,14 @@ export async function checkNpmLatestVersion(metadata) {
|
|
|
86
154
|
if (!latestVersion) {
|
|
87
155
|
throw new Error('npm registry response did not include a version');
|
|
88
156
|
}
|
|
157
|
+
const updateCommands = getPackageInstallCommands(metadata.name);
|
|
89
158
|
return {
|
|
90
159
|
packageName: metadata.name,
|
|
91
160
|
currentVersion: metadata.version,
|
|
92
161
|
latestVersion,
|
|
93
162
|
updateAvailable: comparePackageVersions(metadata.version, latestVersion) < 0,
|
|
94
163
|
registryUrl,
|
|
95
|
-
updateCommand: `npm install -g ${metadata.name}@latest`,
|
|
164
|
+
updateCommand: updateCommands[0]?.command ?? `npm install -g ${metadata.name}@latest`,
|
|
165
|
+
updateCommands,
|
|
96
166
|
};
|
|
97
167
|
}
|
|
@@ -1,17 +1,90 @@
|
|
|
1
|
+
function createRiskEvidence(input) {
|
|
2
|
+
return {
|
|
3
|
+
source_anchor: input.sourceAnchorRiskCount ?? 0,
|
|
4
|
+
scope_diff: input.scopeDiffRiskCount ?? 0,
|
|
5
|
+
repeated_failure: input.repeatedFailureCount ?? 0,
|
|
6
|
+
validation_ratchet: input.validationRatchetRiskCount ?? 0,
|
|
7
|
+
repro_evidence: input.reproEvidenceRiskCount ?? 0,
|
|
8
|
+
external_evidence: input.externalEvidenceRiskCount ?? 0,
|
|
9
|
+
write_drift: input.writeDriftRiskCount ?? 0,
|
|
10
|
+
receipt_binding: input.receiptBindingRiskCount ?? 0,
|
|
11
|
+
stale_receipt: input.staleReceiptCount ?? 0,
|
|
12
|
+
plan_mismatch: input.planMismatchCount ?? 0,
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
function emptyReceiptBindingEvidence() {
|
|
16
|
+
return {
|
|
17
|
+
plan_bound_count: 0,
|
|
18
|
+
plan_unbound_count: 0,
|
|
19
|
+
fingerprint_bound_count: 0,
|
|
20
|
+
fingerprint_unbound_count: 0,
|
|
21
|
+
current_state_bound_count: 0,
|
|
22
|
+
current_state_unavailable_count: 0,
|
|
23
|
+
stale_count: 0,
|
|
24
|
+
plan_mismatch_count: 0,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
function emptyCriteriaEvidence() {
|
|
28
|
+
return {
|
|
29
|
+
total: 0,
|
|
30
|
+
covered: 0,
|
|
31
|
+
partially_covered: 0,
|
|
32
|
+
uncovered: 0,
|
|
33
|
+
blocked: 0,
|
|
34
|
+
contradicted: 0,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
function normalizeVerifyCompletionInput(input) {
|
|
38
|
+
const missingReceiptCount = Math.max(0, input.ranIntents - input.receiptCount);
|
|
39
|
+
if (missingReceiptCount === 0) {
|
|
40
|
+
return input;
|
|
41
|
+
}
|
|
42
|
+
return {
|
|
43
|
+
...input,
|
|
44
|
+
receiptBindingRiskCount: (input.receiptBindingRiskCount ?? 0) + missingReceiptCount,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
1
47
|
function verifyStatus(input) {
|
|
48
|
+
const contradictions = [];
|
|
2
49
|
if (input.failedIntents > 0) {
|
|
3
|
-
|
|
4
|
-
|
|
50
|
+
contradictions.push('one_or_more_selected_verification_intents_failed');
|
|
51
|
+
}
|
|
52
|
+
if ((input.planMismatchCount ?? 0) > 0) {
|
|
53
|
+
contradictions.push('plan_receipt_mismatch');
|
|
54
|
+
}
|
|
55
|
+
if ((input.reproEvidenceContradictionCount ?? 0) > 0) {
|
|
56
|
+
contradictions.push('repro_evidence_contradicted');
|
|
57
|
+
}
|
|
58
|
+
if ((input.validationRatchetContradictionCount ?? 0) > 0) {
|
|
59
|
+
contradictions.push('validation_ratchet_contradicted');
|
|
60
|
+
}
|
|
61
|
+
if (contradictions.length > 0) {
|
|
62
|
+
if (input.failedIntents > 0 && (input.repeatedFailureCount ?? 0) > 0) {
|
|
5
63
|
contradictions.push('repeated_verification_failure');
|
|
6
64
|
}
|
|
7
65
|
return {
|
|
8
66
|
status: 'contradicted',
|
|
9
|
-
primaryReason:
|
|
67
|
+
primaryReason: input.failedIntents > 0
|
|
68
|
+
? 'verification_failed'
|
|
69
|
+
: (input.planMismatchCount ?? 0) > 0
|
|
70
|
+
? 'plan_receipt_mismatch'
|
|
71
|
+
: (input.reproEvidenceContradictionCount ?? 0) > 0
|
|
72
|
+
? 'repro_evidence_contradicted'
|
|
73
|
+
: 'validation_ratchet_contradicted',
|
|
10
74
|
blockers: [],
|
|
11
75
|
contradictions,
|
|
12
76
|
limitations: [],
|
|
13
77
|
};
|
|
14
78
|
}
|
|
79
|
+
if ((input.repeatedFailureBlockerCount ?? 0) > 0) {
|
|
80
|
+
return {
|
|
81
|
+
status: 'blocked',
|
|
82
|
+
primaryReason: 'repeated_failure_requires_new_evidence',
|
|
83
|
+
blockers: ['repeated_failure_requires_new_evidence'],
|
|
84
|
+
contradictions: [],
|
|
85
|
+
limitations: [],
|
|
86
|
+
};
|
|
87
|
+
}
|
|
15
88
|
if (input.ranIntents === 0 && input.skippedIntents > 0) {
|
|
16
89
|
const blockers = ['all_matching_verification_intents_were_skipped'];
|
|
17
90
|
if ((input.repeatedFailureCount ?? 0) > 0) {
|
|
@@ -51,6 +124,15 @@ function verifyStatus(input) {
|
|
|
51
124
|
limitations,
|
|
52
125
|
};
|
|
53
126
|
}
|
|
127
|
+
if ((input.reproEvidenceUnverifiedCount ?? 0) > 0) {
|
|
128
|
+
return {
|
|
129
|
+
status: 'unverified',
|
|
130
|
+
primaryReason: 'repro_evidence_unverified',
|
|
131
|
+
blockers: [],
|
|
132
|
+
contradictions: [],
|
|
133
|
+
limitations: ['repro_evidence_missing'],
|
|
134
|
+
};
|
|
135
|
+
}
|
|
54
136
|
const downgradeLimitations = [];
|
|
55
137
|
if ((input.sourceAnchorRiskCount ?? 0) > 0) {
|
|
56
138
|
downgradeLimitations.push('high_risk_source_anchor_requires_review');
|
|
@@ -61,6 +143,15 @@ function verifyStatus(input) {
|
|
|
61
143
|
if ((input.validationRatchetRiskCount ?? 0) > 0) {
|
|
62
144
|
downgradeLimitations.push('validation_ratchet_risk_requires_review');
|
|
63
145
|
}
|
|
146
|
+
if ((input.writeDriftRiskCount ?? 0) > 0) {
|
|
147
|
+
downgradeLimitations.push('write_drift_requires_review');
|
|
148
|
+
}
|
|
149
|
+
if ((input.receiptBindingRiskCount ?? 0) > 0) {
|
|
150
|
+
downgradeLimitations.push('receipt_binding_requires_review');
|
|
151
|
+
}
|
|
152
|
+
if ((input.staleReceiptCount ?? 0) > 0) {
|
|
153
|
+
downgradeLimitations.push('stale_receipt_requires_review');
|
|
154
|
+
}
|
|
64
155
|
if ((input.reproEvidenceRiskCount ?? 0) > 0) {
|
|
65
156
|
downgradeLimitations.push('repro_evidence_missing');
|
|
66
157
|
}
|
|
@@ -76,9 +167,15 @@ function verifyStatus(input) {
|
|
|
76
167
|
? 'scope_diff_review_required'
|
|
77
168
|
: (input.validationRatchetRiskCount ?? 0) > 0
|
|
78
169
|
? 'validation_ratchet_review_required'
|
|
79
|
-
: (input.
|
|
80
|
-
? '
|
|
81
|
-
:
|
|
170
|
+
: (input.writeDriftRiskCount ?? 0) > 0
|
|
171
|
+
? 'write_drift_review_required'
|
|
172
|
+
: (input.receiptBindingRiskCount ?? 0) > 0
|
|
173
|
+
? 'receipt_binding_review_required'
|
|
174
|
+
: (input.staleReceiptCount ?? 0) > 0
|
|
175
|
+
? 'stale_receipt_review_required'
|
|
176
|
+
: (input.reproEvidenceRiskCount ?? 0) > 0
|
|
177
|
+
? 'repro_evidence_missing'
|
|
178
|
+
: 'external_evidence_review_required',
|
|
82
179
|
blockers: [],
|
|
83
180
|
contradictions: [],
|
|
84
181
|
limitations: downgradeLimitations,
|
|
@@ -102,26 +199,39 @@ function verifyStatus(input) {
|
|
|
102
199
|
};
|
|
103
200
|
}
|
|
104
201
|
export function createVerifyCompletionVerdict(input) {
|
|
105
|
-
const
|
|
202
|
+
const normalizedInput = normalizeVerifyCompletionInput(input);
|
|
203
|
+
const result = verifyStatus(normalizedInput);
|
|
204
|
+
const risks = createRiskEvidence(normalizedInput);
|
|
205
|
+
const receiptBinding = normalizedInput.receiptBinding ?? emptyReceiptBindingEvidence();
|
|
206
|
+
const criteria = normalizedInput.criteria ?? emptyCriteriaEvidence();
|
|
106
207
|
return {
|
|
107
208
|
schema_version: '1',
|
|
108
209
|
status: result.status,
|
|
109
210
|
primary_reason: result.primaryReason,
|
|
110
211
|
evidence: {
|
|
111
212
|
source: 'mf_verify',
|
|
112
|
-
verification_plan_id:
|
|
213
|
+
verification_plan_id: normalizedInput.verificationPlanId,
|
|
113
214
|
changed_file_count: null,
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
215
|
+
criteria,
|
|
216
|
+
matched_intents: normalizedInput.matchedIntents,
|
|
217
|
+
ran_intents: normalizedInput.ranIntents,
|
|
218
|
+
passed_intents: normalizedInput.passedIntents,
|
|
219
|
+
failed_intents: normalizedInput.failedIntents,
|
|
220
|
+
skipped_intents: normalizedInput.skippedIntents,
|
|
221
|
+
receipt_count: normalizedInput.receiptCount,
|
|
222
|
+
gap_count: normalizedInput.skippedIntents,
|
|
223
|
+
source_anchor_risk_count: normalizedInput.sourceAnchorRiskCount ?? 0,
|
|
224
|
+
scope_diff_risk_count: normalizedInput.scopeDiffRiskCount ?? 0,
|
|
225
|
+
repeated_failure_count: normalizedInput.repeatedFailureCount ?? 0,
|
|
226
|
+
validation_ratchet_risk_count: normalizedInput.validationRatchetRiskCount ?? 0,
|
|
227
|
+
repro_evidence_risk_count: normalizedInput.reproEvidenceRiskCount ?? 0,
|
|
228
|
+
external_evidence_risk_count: normalizedInput.externalEvidenceRiskCount ?? 0,
|
|
229
|
+
write_drift_risk_count: normalizedInput.writeDriftRiskCount ?? 0,
|
|
230
|
+
receipt_binding_risk_count: normalizedInput.receiptBindingRiskCount ?? 0,
|
|
231
|
+
stale_receipt_count: normalizedInput.staleReceiptCount ?? 0,
|
|
232
|
+
plan_mismatch_count: normalizedInput.planMismatchCount ?? 0,
|
|
233
|
+
risks,
|
|
234
|
+
receipt_binding: receiptBinding,
|
|
125
235
|
latest_run_status: null,
|
|
126
236
|
},
|
|
127
237
|
blockers: result.blockers,
|
|
@@ -130,6 +240,8 @@ export function createVerifyCompletionVerdict(input) {
|
|
|
130
240
|
};
|
|
131
241
|
}
|
|
132
242
|
export function createDashboardCompletionVerdict(input) {
|
|
243
|
+
const risks = createRiskEvidence(input);
|
|
244
|
+
const receiptBinding = input.receiptBinding ?? emptyReceiptBindingEvidence();
|
|
133
245
|
const latestRunFailed = input.latestRunStatus === 'failed' ||
|
|
134
246
|
input.latestRunStatus === 'timed_out' ||
|
|
135
247
|
input.latestRunStatus === 'start_failed';
|
|
@@ -181,6 +293,17 @@ export function createDashboardCompletionVerdict(input) {
|
|
|
181
293
|
primaryReason = 'latest_run_passed_without_current_claim_binding';
|
|
182
294
|
limitations.push('latest_run_is_not_bound_to_a_current_completion_claim');
|
|
183
295
|
}
|
|
296
|
+
const criteria = input.criteria ??
|
|
297
|
+
(input.changedFileCount > 0 || input.runnableIntentCount > 0 || input.skippedIntentCount > 0 || input.gapCount > 0
|
|
298
|
+
? {
|
|
299
|
+
total: 1,
|
|
300
|
+
covered: 0,
|
|
301
|
+
partially_covered: status === 'partially_verified' ? 1 : 0,
|
|
302
|
+
uncovered: status === 'unverified' ? 1 : 0,
|
|
303
|
+
blocked: status === 'blocked' ? 1 : 0,
|
|
304
|
+
contradicted: status === 'contradicted' ? 1 : 0,
|
|
305
|
+
}
|
|
306
|
+
: emptyCriteriaEvidence());
|
|
184
307
|
return {
|
|
185
308
|
schema_version: '1',
|
|
186
309
|
status,
|
|
@@ -189,6 +312,7 @@ export function createDashboardCompletionVerdict(input) {
|
|
|
189
312
|
source: 'dashboard_export',
|
|
190
313
|
verification_plan_id: null,
|
|
191
314
|
changed_file_count: input.changedFileCount,
|
|
315
|
+
criteria,
|
|
192
316
|
matched_intents: input.runnableIntentCount + input.skippedIntentCount,
|
|
193
317
|
ran_intents: 0,
|
|
194
318
|
passed_intents: 0,
|
|
@@ -200,6 +324,14 @@ export function createDashboardCompletionVerdict(input) {
|
|
|
200
324
|
scope_diff_risk_count: input.scopeDiffRiskCount ?? 0,
|
|
201
325
|
repeated_failure_count: input.repeatedFailureCount ?? 0,
|
|
202
326
|
validation_ratchet_risk_count: input.validationRatchetRiskCount ?? 0,
|
|
327
|
+
repro_evidence_risk_count: input.reproEvidenceRiskCount ?? 0,
|
|
328
|
+
external_evidence_risk_count: input.externalEvidenceRiskCount ?? 0,
|
|
329
|
+
write_drift_risk_count: input.writeDriftRiskCount ?? 0,
|
|
330
|
+
receipt_binding_risk_count: input.receiptBindingRiskCount ?? 0,
|
|
331
|
+
stale_receipt_count: input.staleReceiptCount ?? 0,
|
|
332
|
+
plan_mismatch_count: input.planMismatchCount ?? 0,
|
|
333
|
+
risks,
|
|
334
|
+
receipt_binding: receiptBinding,
|
|
203
335
|
latest_run_status: input.latestRunStatus,
|
|
204
336
|
},
|
|
205
337
|
blockers,
|
|
@@ -1,17 +1,179 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
export const REPEATED_FAILURE_STATE_PATH = '.mustflow/state/repeated-failures.json';
|
|
5
|
+
export const REPEATED_FAILURE_STATE_LIMIT = 50;
|
|
1
6
|
const UNRESOLVED_VERIFY_STATUSES = new Set(['failed', 'blocked', 'partial']);
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
function sha256Json(value) {
|
|
8
|
+
return `sha256:${createHash('sha256').update(JSON.stringify(value)).digest('hex')}`;
|
|
9
|
+
}
|
|
10
|
+
function normalizeStrings(values) {
|
|
11
|
+
return [...new Set(values.map((value) => value.trim()).filter((value) => value.length > 0))].sort((left, right) => left.localeCompare(right));
|
|
12
|
+
}
|
|
13
|
+
function hashStrings(values) {
|
|
14
|
+
return sha256Json(normalizeStrings(values));
|
|
15
|
+
}
|
|
16
|
+
function hashBooleans(values) {
|
|
17
|
+
return sha256Json([...new Set(values)].sort((left, right) => Number(left) - Number(right)));
|
|
18
|
+
}
|
|
19
|
+
function isString(value) {
|
|
20
|
+
return typeof value === 'string' && value.length > 0;
|
|
21
|
+
}
|
|
22
|
+
function isRepeatedFailureSummary(value) {
|
|
23
|
+
if (!value || typeof value !== 'object' || Array.isArray(value)) {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
const record = value;
|
|
27
|
+
return (record.schema_version === '1' &&
|
|
28
|
+
isString(record.fingerprint) &&
|
|
29
|
+
isString(record.verification_plan_id) &&
|
|
30
|
+
isString(record.status) &&
|
|
31
|
+
isString(record.failed_intents_hash) &&
|
|
32
|
+
isString(record.risk_codes_hash) &&
|
|
33
|
+
isString(record.affected_surfaces_hash) &&
|
|
34
|
+
isString(record.first_seen_at) &&
|
|
35
|
+
isString(record.last_seen_at) &&
|
|
36
|
+
typeof record.seen_count === 'number' &&
|
|
37
|
+
Number.isInteger(record.seen_count) &&
|
|
38
|
+
record.seen_count > 0 &&
|
|
39
|
+
typeof record.requires_new_evidence === 'boolean');
|
|
40
|
+
}
|
|
41
|
+
function repeatedFailureStatePath(projectRoot) {
|
|
42
|
+
return path.join(projectRoot, ...REPEATED_FAILURE_STATE_PATH.split('/'));
|
|
43
|
+
}
|
|
44
|
+
function readRepeatedFailureState(projectRoot) {
|
|
45
|
+
const statePath = repeatedFailureStatePath(projectRoot);
|
|
46
|
+
if (!existsSync(statePath)) {
|
|
47
|
+
return { schema_version: '1', fingerprints: [] };
|
|
48
|
+
}
|
|
49
|
+
try {
|
|
50
|
+
const parsed = JSON.parse(readFileSync(statePath, 'utf8'));
|
|
51
|
+
const fingerprints = Array.isArray(parsed.fingerprints)
|
|
52
|
+
? parsed.fingerprints.filter(isRepeatedFailureSummary)
|
|
53
|
+
: [];
|
|
54
|
+
return { schema_version: '1', fingerprints };
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return { schema_version: '1', fingerprints: [] };
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
function writeRepeatedFailureState(projectRoot, state) {
|
|
61
|
+
const statePath = repeatedFailureStatePath(projectRoot);
|
|
62
|
+
mkdirSync(path.dirname(statePath), { recursive: true });
|
|
63
|
+
writeFileSync(statePath, `${JSON.stringify(state, null, 2)}\n`, 'utf8');
|
|
64
|
+
}
|
|
65
|
+
export function createVerificationFailureFingerprint(input) {
|
|
66
|
+
const failedIntents = normalizeStrings(input.failedIntents);
|
|
67
|
+
const riskCodes = normalizeStrings(input.riskCodes);
|
|
68
|
+
if (failedIntents.length === 0 && riskCodes.length === 0) {
|
|
8
69
|
return null;
|
|
9
70
|
}
|
|
71
|
+
const exitCodeClasses = normalizeStrings(input.exitCodeClasses);
|
|
72
|
+
const timeoutFlags = [...new Set(input.timeoutFlags)].sort((left, right) => Number(left) - Number(right));
|
|
73
|
+
const errorKinds = normalizeStrings(input.errorKinds);
|
|
74
|
+
const affectedSurfaces = normalizeStrings(input.affectedSurfaces);
|
|
75
|
+
const commandFingerprints = normalizeStrings(input.commandFingerprints);
|
|
76
|
+
const diagnosticSignals = {
|
|
77
|
+
exit_code_classes: exitCodeClasses,
|
|
78
|
+
timeout_flags: timeoutFlags,
|
|
79
|
+
error_kinds: errorKinds,
|
|
80
|
+
};
|
|
81
|
+
const fingerprintSource = {
|
|
82
|
+
schema_version: '1',
|
|
83
|
+
verification_plan_id: input.verificationPlanId,
|
|
84
|
+
failed_intents: failedIntents,
|
|
85
|
+
diagnostic_signals: diagnosticSignals,
|
|
86
|
+
risk_codes: riskCodes,
|
|
87
|
+
affected_surfaces: affectedSurfaces,
|
|
88
|
+
command_fingerprints: commandFingerprints,
|
|
89
|
+
};
|
|
90
|
+
return {
|
|
91
|
+
schema_version: '1',
|
|
92
|
+
fingerprint: sha256Json(fingerprintSource),
|
|
93
|
+
verification_plan_id: input.verificationPlanId,
|
|
94
|
+
failed_intents_hash: hashStrings(failedIntents),
|
|
95
|
+
exit_code_classes_hash: hashStrings(exitCodeClasses),
|
|
96
|
+
timeout_flags_hash: hashBooleans(timeoutFlags),
|
|
97
|
+
error_kinds_hash: hashStrings(errorKinds),
|
|
98
|
+
diagnostic_hash: sha256Json(diagnosticSignals),
|
|
99
|
+
risk_codes_hash: hashStrings(riskCodes),
|
|
100
|
+
affected_surfaces_hash: hashStrings(affectedSurfaces),
|
|
101
|
+
command_fingerprints_hash: hashStrings(commandFingerprints),
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
export function updateRepeatedFailureState(input) {
|
|
105
|
+
const failureFingerprint = input.failureFingerprint;
|
|
106
|
+
if (!failureFingerprint) {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
const state = readRepeatedFailureState(input.projectRoot);
|
|
110
|
+
const observedAt = (input.observedAt ?? new Date()).toISOString();
|
|
111
|
+
const existing = state.fingerprints.find((entry) => entry.fingerprint === failureFingerprint.fingerprint);
|
|
112
|
+
const seenCount = (existing?.seen_count ?? 0) + 1;
|
|
113
|
+
const summary = {
|
|
114
|
+
schema_version: '1',
|
|
115
|
+
fingerprint: failureFingerprint.fingerprint,
|
|
116
|
+
verification_plan_id: failureFingerprint.verification_plan_id,
|
|
117
|
+
status: input.status,
|
|
118
|
+
failed_intents_hash: failureFingerprint.failed_intents_hash,
|
|
119
|
+
risk_codes_hash: failureFingerprint.risk_codes_hash,
|
|
120
|
+
affected_surfaces_hash: failureFingerprint.affected_surfaces_hash,
|
|
121
|
+
first_seen_at: existing?.first_seen_at ?? observedAt,
|
|
122
|
+
last_seen_at: observedAt,
|
|
123
|
+
seen_count: seenCount,
|
|
124
|
+
requires_new_evidence: UNRESOLVED_VERIFY_STATUSES.has(input.status) && seenCount >= 2,
|
|
125
|
+
};
|
|
126
|
+
const nextFingerprints = [summary, ...state.fingerprints.filter((entry) => entry.fingerprint !== summary.fingerprint)]
|
|
127
|
+
.sort((left, right) => right.last_seen_at.localeCompare(left.last_seen_at))
|
|
128
|
+
.slice(0, REPEATED_FAILURE_STATE_LIMIT);
|
|
129
|
+
writeRepeatedFailureState(input.projectRoot, {
|
|
130
|
+
schema_version: '1',
|
|
131
|
+
fingerprints: nextFingerprints,
|
|
132
|
+
});
|
|
133
|
+
return summary;
|
|
134
|
+
}
|
|
135
|
+
function createRepeatedFailureRisk(code, currentFingerprint, previousStatus) {
|
|
136
|
+
const detail = code === 'repeated_verification_failure'
|
|
137
|
+
? 'The previous verify summary has the same failure fingerprint and an unresolved status; provide new evidence or a narrower hypothesis before marking the task complete.'
|
|
138
|
+
: code === 'no_new_evidence_since_previous_failure'
|
|
139
|
+
? 'The previous verify summary has the same plan, failed-intent hash, and affected-surface hash; provide new source or reproduction evidence before treating the next completion claim as verifiable.'
|
|
140
|
+
: 'The same unresolved failure fingerprint has repeated three or more times; new evidence is required before another completion claim can be treated as verifiable.';
|
|
10
141
|
return {
|
|
11
|
-
code
|
|
142
|
+
code,
|
|
12
143
|
severity: 'high',
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
144
|
+
verdict_effect: code === 'repeated_verification_failure' ? 'contradiction' : 'blocker',
|
|
145
|
+
previous_status: previousStatus,
|
|
146
|
+
verification_plan_id: currentFingerprint.verification_plan_id,
|
|
147
|
+
failure_fingerprint: currentFingerprint.fingerprint,
|
|
148
|
+
failed_intents_hash: currentFingerprint.failed_intents_hash,
|
|
149
|
+
risk_codes_hash: currentFingerprint.risk_codes_hash,
|
|
150
|
+
affected_surfaces_hash: currentFingerprint.affected_surfaces_hash,
|
|
151
|
+
detail,
|
|
16
152
|
};
|
|
17
153
|
}
|
|
154
|
+
export function createRepeatedFailureRisks(input) {
|
|
155
|
+
const currentFingerprint = input.currentFailureFingerprint;
|
|
156
|
+
if (input.previousFailureFingerprint === null ||
|
|
157
|
+
input.previousStatus === null ||
|
|
158
|
+
currentFingerprint === null ||
|
|
159
|
+
!UNRESOLVED_VERIFY_STATUSES.has(input.previousStatus) ||
|
|
160
|
+
!UNRESOLVED_VERIFY_STATUSES.has(input.currentStatus)) {
|
|
161
|
+
return [];
|
|
162
|
+
}
|
|
163
|
+
const risks = [];
|
|
164
|
+
const previousFingerprint = input.previousFailureFingerprint;
|
|
165
|
+
const sameFingerprint = previousFingerprint.fingerprint === currentFingerprint.fingerprint;
|
|
166
|
+
const samePlanAndNoNewSourceEvidence = previousFingerprint.verification_plan_id === currentFingerprint.verification_plan_id &&
|
|
167
|
+
previousFingerprint.failed_intents_hash === currentFingerprint.failed_intents_hash &&
|
|
168
|
+
previousFingerprint.affected_surfaces_hash === currentFingerprint.affected_surfaces_hash;
|
|
169
|
+
if (sameFingerprint) {
|
|
170
|
+
risks.push(createRepeatedFailureRisk('repeated_verification_failure', currentFingerprint, input.previousStatus));
|
|
171
|
+
}
|
|
172
|
+
if (samePlanAndNoNewSourceEvidence && !sameFingerprint) {
|
|
173
|
+
risks.push(createRepeatedFailureRisk('no_new_evidence_since_previous_failure', currentFingerprint, input.previousStatus));
|
|
174
|
+
}
|
|
175
|
+
if ((input.currentSummary?.seen_count ?? 0) >= 3 && input.currentSummary?.requires_new_evidence === true) {
|
|
176
|
+
risks.push(createRepeatedFailureRisk('repeated_failure_requires_new_evidence', currentFingerprint, input.previousStatus));
|
|
177
|
+
}
|
|
178
|
+
return risks;
|
|
179
|
+
}
|