mustflow 2.11.0 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/dashboard.js +71 -2
- package/dist/cli/commands/explain-verify.js +11 -1
- package/dist/cli/commands/index.js +9 -0
- package/dist/cli/commands/verify.js +528 -30
- package/dist/cli/lib/local-index/constants.js +1 -1
- package/dist/cli/lib/local-index/index.js +708 -13
- package/dist/core/completion-verdict.js +151 -19
- package/dist/core/repeated-failure.js +172 -10
- package/dist/core/repro-evidence.js +119 -38
- package/dist/core/validation-ratchet.js +161 -17
- package/package.json +3 -3
- package/schemas/dashboard-export.schema.json +83 -0
- package/schemas/explain-report.schema.json +173 -1
- package/schemas/latest-run-pointer.schema.json +227 -10
- package/schemas/verify-report.schema.json +227 -10
- package/schemas/verify-run-manifest.schema.json +227 -10
- package/templates/default/manifest.toml +1 -1
|
@@ -4,13 +4,13 @@ import path from 'node:path';
|
|
|
4
4
|
import { createClassifyOutput } from './classify.js';
|
|
5
5
|
import { runRun } from './run.js';
|
|
6
6
|
import { createChangeVerificationReport, } from '../../core/change-verification.js';
|
|
7
|
-
import { createVerifyCompletionVerdict } from '../../core/completion-verdict.js';
|
|
7
|
+
import { createVerifyCompletionVerdict, } from '../../core/completion-verdict.js';
|
|
8
8
|
import { createExternalEvidenceRisks, } from '../../core/external-evidence.js';
|
|
9
|
-
import {
|
|
10
|
-
import { createReproEvidenceRisks, } from '../../core/repro-evidence.js';
|
|
9
|
+
import { createRepeatedFailureRisks, createVerificationFailureFingerprint, updateRepeatedFailureState, } from '../../core/repeated-failure.js';
|
|
10
|
+
import { countReproEvidenceVerdictEffects, createReproEvidenceRisks, } from '../../core/repro-evidence.js';
|
|
11
11
|
import { createVerifyEvidenceModel } from '../../core/verification-evidence.js';
|
|
12
12
|
import { createScopeDiffRisks } from '../../core/scope-risk.js';
|
|
13
|
-
import { createValidationRatchetRisks } from '../../core/validation-ratchet.js';
|
|
13
|
+
import { countValidationRatchetVerdictEffects, createValidationRatchetRisks, } from '../../core/validation-ratchet.js';
|
|
14
14
|
import { readCommandContract } from '../../core/config-loading.js';
|
|
15
15
|
import { printUsageError, renderHelp } from '../lib/cli-output.js';
|
|
16
16
|
import { t } from '../lib/i18n.js';
|
|
@@ -463,25 +463,212 @@ export function readInputFromPlan(projectRoot, inputPath) {
|
|
|
463
463
|
function isExternalEvidenceStatus(value) {
|
|
464
464
|
return value === 'passed' || value === 'failed' || value === 'cancelled' || value === 'unknown';
|
|
465
465
|
}
|
|
466
|
-
function
|
|
466
|
+
function isLegacyReproEvidenceStatus(value) {
|
|
467
467
|
return value === 'present' || value === 'unavailable' || value === 'missing';
|
|
468
468
|
}
|
|
469
|
+
function isReproBeforeFixStatus(value) {
|
|
470
|
+
return value === 'reproduced' || value === 'unavailable' || value === 'missing';
|
|
471
|
+
}
|
|
472
|
+
function isReproBeforeFixOutcome(value) {
|
|
473
|
+
return value === 'failed_as_expected' || value === 'failed_differently' || value === 'passed_unexpectedly' || value === null;
|
|
474
|
+
}
|
|
475
|
+
function isReproAfterFixStatus(value) {
|
|
476
|
+
return value === 'passed' || value === 'failed' || value === 'unavailable' || value === 'missing';
|
|
477
|
+
}
|
|
478
|
+
function isReproAfterFixOutcome(value) {
|
|
479
|
+
return value === 'passed_expected_behavior' || value === 'failed_same_route' || value === 'failed_differently' || value === null;
|
|
480
|
+
}
|
|
481
|
+
function isReproRegressionGuardStatus(value) {
|
|
482
|
+
return value === 'passed' || value === 'failed' || value === 'unavailable' || value === 'missing';
|
|
483
|
+
}
|
|
484
|
+
function isReproRouteKind(value) {
|
|
485
|
+
return (value === 'test' ||
|
|
486
|
+
value === 'cli' ||
|
|
487
|
+
value === 'browser' ||
|
|
488
|
+
value === 'api' ||
|
|
489
|
+
value === 'manual' ||
|
|
490
|
+
value === 'unknown' ||
|
|
491
|
+
value === null);
|
|
492
|
+
}
|
|
469
493
|
function readOptionalString(value) {
|
|
470
494
|
return typeof value === 'string' && value.length > 0 ? value : null;
|
|
471
495
|
}
|
|
472
|
-
function
|
|
496
|
+
function readRouteStep(value, index) {
|
|
497
|
+
if (!isPlainRecord(value)) {
|
|
498
|
+
return {
|
|
499
|
+
ordinal: index + 1,
|
|
500
|
+
action: null,
|
|
501
|
+
target: null,
|
|
502
|
+
input_digest: null,
|
|
503
|
+
observation_digest: null,
|
|
504
|
+
summary: null,
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
const ordinal = typeof value.ordinal === 'number' && Number.isInteger(value.ordinal) && value.ordinal > 0 ? value.ordinal : index + 1;
|
|
508
|
+
return {
|
|
509
|
+
ordinal,
|
|
510
|
+
action: readOptionalString(value.action),
|
|
511
|
+
target: readOptionalString(value.target),
|
|
512
|
+
input_digest: readOptionalString(value.input_digest),
|
|
513
|
+
observation_digest: readOptionalString(value.observation_digest),
|
|
514
|
+
summary: readOptionalString(value.summary),
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
function readReproductionRoute(value) {
|
|
518
|
+
if (!isPlainRecord(value)) {
|
|
519
|
+
return {
|
|
520
|
+
route_id: null,
|
|
521
|
+
route_kind: null,
|
|
522
|
+
route_digest: null,
|
|
523
|
+
failure_oracle_hash: null,
|
|
524
|
+
steps: [],
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
const routeKind = value.route_kind ?? null;
|
|
528
|
+
if (!isReproRouteKind(routeKind)) {
|
|
529
|
+
throw new Error('invalid_repro_evidence_file');
|
|
530
|
+
}
|
|
531
|
+
const rawSteps = Array.isArray(value.steps) ? value.steps : [];
|
|
532
|
+
return {
|
|
533
|
+
route_id: readOptionalString(value.route_id),
|
|
534
|
+
route_kind: routeKind,
|
|
535
|
+
route_digest: readOptionalString(value.route_digest),
|
|
536
|
+
failure_oracle_hash: readOptionalString(value.failure_oracle_hash),
|
|
537
|
+
steps: rawSteps.map((step, index) => readRouteStep(step, index)),
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
function readLegacyReproEvidenceItem(value) {
|
|
541
|
+
if (!isPlainRecord(value)) {
|
|
542
|
+
return {
|
|
543
|
+
status: 'missing',
|
|
544
|
+
summary: null,
|
|
545
|
+
reason: null,
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
if (!isLegacyReproEvidenceStatus(value.status)) {
|
|
549
|
+
throw new Error('invalid_repro_evidence_file');
|
|
550
|
+
}
|
|
551
|
+
return {
|
|
552
|
+
status: value.status,
|
|
553
|
+
summary: readOptionalString(value.summary),
|
|
554
|
+
reason: readOptionalString(value.reason),
|
|
555
|
+
};
|
|
556
|
+
}
|
|
557
|
+
function legacyBeforeFixEvidence(value) {
|
|
558
|
+
const item = readLegacyReproEvidenceItem(value);
|
|
559
|
+
return {
|
|
560
|
+
status: item.status === 'present' ? 'reproduced' : item.status,
|
|
561
|
+
outcome: item.status === 'present' ? 'failed_as_expected' : null,
|
|
562
|
+
receipt_path: null,
|
|
563
|
+
receipt_sha256: null,
|
|
564
|
+
verification_plan_id: null,
|
|
565
|
+
summary: item.summary,
|
|
566
|
+
reason: item.reason,
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
function legacyAfterFixEvidence(value) {
|
|
570
|
+
const item = readLegacyReproEvidenceItem(value);
|
|
571
|
+
return {
|
|
572
|
+
status: item.status === 'present' ? 'passed' : item.status,
|
|
573
|
+
outcome: item.status === 'present' ? 'passed_expected_behavior' : null,
|
|
574
|
+
same_route_as: null,
|
|
575
|
+
receipt_path: null,
|
|
576
|
+
receipt_sha256: null,
|
|
577
|
+
verification_plan_id: null,
|
|
578
|
+
summary: item.summary,
|
|
579
|
+
reason: item.reason,
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
function legacyRegressionGuardEvidence(value) {
|
|
583
|
+
const item = readLegacyReproEvidenceItem(value);
|
|
584
|
+
return {
|
|
585
|
+
status: item.status === 'present' ? 'passed' : item.status,
|
|
586
|
+
intent: null,
|
|
587
|
+
test_path: null,
|
|
588
|
+
receipt_path: null,
|
|
589
|
+
receipt_sha256: null,
|
|
590
|
+
verification_plan_id: null,
|
|
591
|
+
summary: item.summary,
|
|
592
|
+
reason: item.reason,
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
function readBeforeFixEvidence(value) {
|
|
596
|
+
if (!isPlainRecord(value)) {
|
|
597
|
+
return {
|
|
598
|
+
status: 'missing',
|
|
599
|
+
outcome: null,
|
|
600
|
+
receipt_path: null,
|
|
601
|
+
receipt_sha256: null,
|
|
602
|
+
verification_plan_id: null,
|
|
603
|
+
summary: null,
|
|
604
|
+
reason: null,
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
const outcome = value.outcome ?? null;
|
|
608
|
+
if (!isReproBeforeFixStatus(value.status) || !isReproBeforeFixOutcome(outcome)) {
|
|
609
|
+
throw new Error('invalid_repro_evidence_file');
|
|
610
|
+
}
|
|
611
|
+
return {
|
|
612
|
+
status: value.status,
|
|
613
|
+
outcome,
|
|
614
|
+
receipt_path: readOptionalString(value.receipt_path),
|
|
615
|
+
receipt_sha256: readOptionalString(value.receipt_sha256),
|
|
616
|
+
verification_plan_id: readOptionalString(value.verification_plan_id),
|
|
617
|
+
summary: readOptionalString(value.summary),
|
|
618
|
+
reason: readOptionalString(value.reason),
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
function readAfterFixEvidence(value) {
|
|
622
|
+
if (!isPlainRecord(value)) {
|
|
623
|
+
return {
|
|
624
|
+
status: 'missing',
|
|
625
|
+
outcome: null,
|
|
626
|
+
same_route_as: null,
|
|
627
|
+
receipt_path: null,
|
|
628
|
+
receipt_sha256: null,
|
|
629
|
+
verification_plan_id: null,
|
|
630
|
+
summary: null,
|
|
631
|
+
reason: null,
|
|
632
|
+
};
|
|
633
|
+
}
|
|
634
|
+
const outcome = value.outcome ?? null;
|
|
635
|
+
if (!isReproAfterFixStatus(value.status) || !isReproAfterFixOutcome(outcome)) {
|
|
636
|
+
throw new Error('invalid_repro_evidence_file');
|
|
637
|
+
}
|
|
638
|
+
return {
|
|
639
|
+
status: value.status,
|
|
640
|
+
outcome,
|
|
641
|
+
same_route_as: readOptionalString(value.same_route_as),
|
|
642
|
+
receipt_path: readOptionalString(value.receipt_path),
|
|
643
|
+
receipt_sha256: readOptionalString(value.receipt_sha256),
|
|
644
|
+
verification_plan_id: readOptionalString(value.verification_plan_id),
|
|
645
|
+
summary: readOptionalString(value.summary),
|
|
646
|
+
reason: readOptionalString(value.reason),
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
function readRegressionGuardEvidence(value) {
|
|
473
650
|
if (!isPlainRecord(value)) {
|
|
474
651
|
return {
|
|
475
652
|
status: 'missing',
|
|
653
|
+
intent: null,
|
|
654
|
+
test_path: null,
|
|
655
|
+
receipt_path: null,
|
|
656
|
+
receipt_sha256: null,
|
|
657
|
+
verification_plan_id: null,
|
|
476
658
|
summary: null,
|
|
477
659
|
reason: null,
|
|
478
660
|
};
|
|
479
661
|
}
|
|
480
|
-
if (!
|
|
662
|
+
if (!isReproRegressionGuardStatus(value.status)) {
|
|
481
663
|
throw new Error('invalid_repro_evidence_file');
|
|
482
664
|
}
|
|
483
665
|
return {
|
|
484
666
|
status: value.status,
|
|
667
|
+
intent: readOptionalString(value.intent),
|
|
668
|
+
test_path: readOptionalString(value.test_path),
|
|
669
|
+
receipt_path: readOptionalString(value.receipt_path),
|
|
670
|
+
receipt_sha256: readOptionalString(value.receipt_sha256),
|
|
671
|
+
verification_plan_id: readOptionalString(value.verification_plan_id),
|
|
485
672
|
summary: readOptionalString(value.summary),
|
|
486
673
|
reason: readOptionalString(value.reason),
|
|
487
674
|
};
|
|
@@ -498,16 +685,23 @@ function readReproEvidenceFile(projectRoot, inputPath) {
|
|
|
498
685
|
if (!isPlainRecord(parsed) || parsed.schema_version !== '1' || parsed.command !== 'repro-evidence') {
|
|
499
686
|
throw new Error('unsupported_repro_evidence_source');
|
|
500
687
|
}
|
|
688
|
+
const regressionGuard = isPlainRecord(parsed.regression_guard) && isReproRegressionGuardStatus(parsed.regression_guard.status)
|
|
689
|
+
? readRegressionGuardEvidence(parsed.regression_guard)
|
|
690
|
+
: legacyRegressionGuardEvidence(parsed.regression_guard);
|
|
501
691
|
return {
|
|
502
692
|
source: 'repro_first_debug',
|
|
503
693
|
authority: 'claim_evidence',
|
|
504
694
|
reported_symptom: readOptionalString(parsed.reported_symptom),
|
|
505
695
|
expected_behavior: readOptionalString(parsed.expected_behavior),
|
|
506
696
|
observed_behavior: readOptionalString(parsed.observed_behavior),
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
697
|
+
reproduction_route: readReproductionRoute(parsed.reproduction_route),
|
|
698
|
+
before_fix: isPlainRecord(parsed.before_fix)
|
|
699
|
+
? readBeforeFixEvidence(parsed.before_fix)
|
|
700
|
+
: legacyBeforeFixEvidence(parsed.evidence_before_fix),
|
|
701
|
+
after_fix: isPlainRecord(parsed.after_fix)
|
|
702
|
+
? readAfterFixEvidence(parsed.after_fix)
|
|
703
|
+
: legacyAfterFixEvidence(parsed.evidence_after_fix),
|
|
704
|
+
regression_guard: regressionGuard,
|
|
511
705
|
};
|
|
512
706
|
}
|
|
513
707
|
function readExternalEvidenceFile(projectRoot, inputPath) {
|
|
@@ -674,6 +868,210 @@ function summarizeResults(results) {
|
|
|
674
868
|
skipped,
|
|
675
869
|
};
|
|
676
870
|
}
|
|
871
|
+
function countUndeclaredWriteDrift(results) {
|
|
872
|
+
return results.filter((result) => {
|
|
873
|
+
const writeDrift = result.receipt?.write_drift;
|
|
874
|
+
if (typeof writeDrift !== 'object' || writeDrift === null) {
|
|
875
|
+
return false;
|
|
876
|
+
}
|
|
877
|
+
return writeDrift.has_undeclared_changes === true;
|
|
878
|
+
}).length;
|
|
879
|
+
}
|
|
880
|
+
function stringField(value) {
|
|
881
|
+
return typeof value === 'string' && value.length > 0 ? value : null;
|
|
882
|
+
}
|
|
883
|
+
function objectField(value) {
|
|
884
|
+
return typeof value === 'object' && value !== null ? value : null;
|
|
885
|
+
}
|
|
886
|
+
function performanceForResult(result) {
|
|
887
|
+
return objectField(result.receipt?.performance);
|
|
888
|
+
}
|
|
889
|
+
function resultSummaryForResult(result) {
|
|
890
|
+
return objectField(performanceForResult(result)?.result_summary);
|
|
891
|
+
}
|
|
892
|
+
function commandFingerprintForResult(result) {
|
|
893
|
+
return stringField(performanceForResult(result)?.command_fingerprint);
|
|
894
|
+
}
|
|
895
|
+
function exitCodeClassForResult(result) {
|
|
896
|
+
const resultSummary = resultSummaryForResult(result);
|
|
897
|
+
const explicitClass = stringField(resultSummary?.exit_code_class);
|
|
898
|
+
if (explicitClass) {
|
|
899
|
+
return explicitClass;
|
|
900
|
+
}
|
|
901
|
+
if (result.exit_code === null) {
|
|
902
|
+
return 'no_exit_code';
|
|
903
|
+
}
|
|
904
|
+
return result.exit_code === 0 ? 'success' : 'failure';
|
|
905
|
+
}
|
|
906
|
+
function timedOutForResult(result) {
|
|
907
|
+
const resultSummary = resultSummaryForResult(result);
|
|
908
|
+
return result.status === 'timed_out' || resultSummary?.timed_out === true;
|
|
909
|
+
}
|
|
910
|
+
function errorKindForResult(result) {
|
|
911
|
+
return stringField(resultSummaryForResult(result)?.error_kind) ?? (result.status === 'start_failed' ? 'start_failed' : null);
|
|
912
|
+
}
|
|
913
|
+
function failedResults(results) {
|
|
914
|
+
return results.filter((result) => !result.skipped &&
|
|
915
|
+
(result.status === 'failed' || result.status === 'timed_out' || result.status === 'start_failed'));
|
|
916
|
+
}
|
|
917
|
+
function createFailureFingerprintForVerify(input) {
|
|
918
|
+
const failures = failedResults(input.results);
|
|
919
|
+
return createVerificationFailureFingerprint({
|
|
920
|
+
verificationPlanId: input.verificationPlanId,
|
|
921
|
+
failedIntents: failures.map((result) => result.intent).filter((intent) => intent !== null),
|
|
922
|
+
exitCodeClasses: failures.map(exitCodeClassForResult).filter((value) => value !== null),
|
|
923
|
+
timeoutFlags: failures.map(timedOutForResult),
|
|
924
|
+
errorKinds: failures.map(errorKindForResult).filter((value) => value !== null),
|
|
925
|
+
riskCodes: input.riskCodes,
|
|
926
|
+
affectedSurfaces: input.report.requirements.flatMap((requirement) => requirement.surfaces),
|
|
927
|
+
commandFingerprints: failures.map(commandFingerprintForResult).filter((value) => value !== null),
|
|
928
|
+
});
|
|
929
|
+
}
|
|
930
|
+
function riskCodesForFailureFingerprint(input) {
|
|
931
|
+
const writeDriftRiskCodes = countUndeclaredWriteDrift(input.results) > 0 ? ['undeclared_write_drift'] : [];
|
|
932
|
+
return [
|
|
933
|
+
...input.sourceAnchorRisks.map(() => 'source_anchor_invariant_review_required'),
|
|
934
|
+
...input.scopeDiffRisks.map((risk) => risk.code),
|
|
935
|
+
...input.validationRatchetRisks.map((risk) => risk.code),
|
|
936
|
+
...input.reproEvidenceRisks.map((risk) => risk.code),
|
|
937
|
+
...input.externalEvidenceRisks.map((risk) => risk.code),
|
|
938
|
+
...writeDriftRiskCodes,
|
|
939
|
+
];
|
|
940
|
+
}
|
|
941
|
+
function createReceiptBindingEvidence(results, verificationPlanId) {
|
|
942
|
+
let planBoundCount = 0;
|
|
943
|
+
let planUnboundCount = 0;
|
|
944
|
+
let fingerprintBoundCount = 0;
|
|
945
|
+
let fingerprintUnboundCount = 0;
|
|
946
|
+
let currentStateBoundCount = 0;
|
|
947
|
+
let currentStateUnavailableCount = 0;
|
|
948
|
+
let staleCount = 0;
|
|
949
|
+
let planMismatchCount = 0;
|
|
950
|
+
for (const result of results) {
|
|
951
|
+
if (!result.receipt) {
|
|
952
|
+
continue;
|
|
953
|
+
}
|
|
954
|
+
const receiptPlanId = stringField(result.receipt.verification_plan_id);
|
|
955
|
+
const resultPlanId = result.verification_plan_id;
|
|
956
|
+
const resultPlanMatches = resultPlanId === verificationPlanId;
|
|
957
|
+
const receiptPlanMatches = receiptPlanId === verificationPlanId;
|
|
958
|
+
if (resultPlanMatches && receiptPlanMatches) {
|
|
959
|
+
planBoundCount += 1;
|
|
960
|
+
}
|
|
961
|
+
else if ((typeof resultPlanId === 'string' && resultPlanId !== verificationPlanId) ||
|
|
962
|
+
(receiptPlanId !== null && receiptPlanId !== verificationPlanId)) {
|
|
963
|
+
planMismatchCount += 1;
|
|
964
|
+
}
|
|
965
|
+
else {
|
|
966
|
+
planUnboundCount += 1;
|
|
967
|
+
}
|
|
968
|
+
const performance = objectField(result.receipt.performance);
|
|
969
|
+
const hasFingerprints = performance !== null &&
|
|
970
|
+
stringField(performance.command_fingerprint) !== null &&
|
|
971
|
+
stringField(performance.intent_fingerprint) !== null &&
|
|
972
|
+
stringField(performance.contract_fingerprint) !== null;
|
|
973
|
+
if (hasFingerprints) {
|
|
974
|
+
fingerprintBoundCount += 1;
|
|
975
|
+
}
|
|
976
|
+
else {
|
|
977
|
+
fingerprintUnboundCount += 1;
|
|
978
|
+
}
|
|
979
|
+
const currentStateBinding = stringField(result.receipt.head_tree_hash) ??
|
|
980
|
+
stringField(result.receipt.changed_files_hash) ??
|
|
981
|
+
stringField(result.receipt.current_state_hash);
|
|
982
|
+
if (currentStateBinding !== null) {
|
|
983
|
+
currentStateBoundCount += 1;
|
|
984
|
+
}
|
|
985
|
+
else {
|
|
986
|
+
currentStateUnavailableCount += 1;
|
|
987
|
+
}
|
|
988
|
+
if (!result.receipt_path || !result.receipt_sha256) {
|
|
989
|
+
staleCount += 1;
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
return {
|
|
993
|
+
plan_bound_count: planBoundCount,
|
|
994
|
+
plan_unbound_count: planUnboundCount,
|
|
995
|
+
fingerprint_bound_count: fingerprintBoundCount,
|
|
996
|
+
fingerprint_unbound_count: fingerprintUnboundCount,
|
|
997
|
+
current_state_bound_count: currentStateBoundCount,
|
|
998
|
+
current_state_unavailable_count: currentStateUnavailableCount,
|
|
999
|
+
stale_count: staleCount,
|
|
1000
|
+
plan_mismatch_count: planMismatchCount,
|
|
1001
|
+
};
|
|
1002
|
+
}
|
|
1003
|
+
function resultForSelectedIntent(results, intent) {
|
|
1004
|
+
return results.find((result) => result.intent === intent && result.status !== 'skipped') ?? null;
|
|
1005
|
+
}
|
|
1006
|
+
function createCriteriaEvidence(report, results) {
|
|
1007
|
+
const evidence = {
|
|
1008
|
+
total: report.requirements.length,
|
|
1009
|
+
covered: 0,
|
|
1010
|
+
partially_covered: 0,
|
|
1011
|
+
uncovered: 0,
|
|
1012
|
+
blocked: 0,
|
|
1013
|
+
contradicted: 0,
|
|
1014
|
+
};
|
|
1015
|
+
return report.requirements.reduce((current, requirement) => {
|
|
1016
|
+
const candidates = report.candidates.filter((candidate) => candidate.reason === requirement.reason);
|
|
1017
|
+
const selectedIntents = candidates
|
|
1018
|
+
.filter((candidate) => candidate.selectionState === 'selected')
|
|
1019
|
+
.map((candidate) => candidate.intent)
|
|
1020
|
+
.filter((intent) => intent !== null);
|
|
1021
|
+
const skippedIntents = candidates
|
|
1022
|
+
.filter((candidate) => candidate.status !== 'runnable')
|
|
1023
|
+
.map((candidate) => candidate.intent)
|
|
1024
|
+
.filter((intent) => intent !== null);
|
|
1025
|
+
const gapCount = report.gaps.filter((gap) => gap.reason === requirement.reason).length;
|
|
1026
|
+
const selectedResults = selectedIntents.map((intent) => resultForSelectedIntent(results, intent));
|
|
1027
|
+
if (selectedResults.some((result) => result?.status === 'failed' || result?.status === 'timed_out' || result?.status === 'start_failed')) {
|
|
1028
|
+
return { ...current, contradicted: current.contradicted + 1 };
|
|
1029
|
+
}
|
|
1030
|
+
if (gapCount > 0 || (selectedIntents.length === 0 && skippedIntents.length > 0)) {
|
|
1031
|
+
return { ...current, blocked: current.blocked + 1 };
|
|
1032
|
+
}
|
|
1033
|
+
if (selectedIntents.length === 0) {
|
|
1034
|
+
return { ...current, uncovered: current.uncovered + 1 };
|
|
1035
|
+
}
|
|
1036
|
+
if (skippedIntents.length > 0) {
|
|
1037
|
+
return { ...current, partially_covered: current.partially_covered + 1 };
|
|
1038
|
+
}
|
|
1039
|
+
if (selectedResults.every((result) => result?.status === 'passed')) {
|
|
1040
|
+
return { ...current, covered: current.covered + 1 };
|
|
1041
|
+
}
|
|
1042
|
+
return { ...current, uncovered: current.uncovered + 1 };
|
|
1043
|
+
}, evidence);
|
|
1044
|
+
}
|
|
1045
|
+
function createCompletionVerdictForResults(input) {
|
|
1046
|
+
const receiptBinding = createReceiptBindingEvidence(input.results, input.verificationPlanId);
|
|
1047
|
+
const receiptBindingRiskCount = receiptBinding.plan_unbound_count + receiptBinding.fingerprint_unbound_count;
|
|
1048
|
+
const repeatedFailureBlockerCount = input.repeatedFailureRisks.filter((risk) => risk.verdict_effect === 'blocker').length;
|
|
1049
|
+
return createVerifyCompletionVerdict({
|
|
1050
|
+
verificationPlanId: input.verificationPlanId,
|
|
1051
|
+
matchedIntents: input.summary.matched,
|
|
1052
|
+
ranIntents: input.summary.ran,
|
|
1053
|
+
passedIntents: input.summary.passed,
|
|
1054
|
+
failedIntents: input.summary.failed,
|
|
1055
|
+
skippedIntents: input.summary.skipped,
|
|
1056
|
+
receiptCount: input.results.filter((result) => result.receipt !== null).length,
|
|
1057
|
+
sourceAnchorRiskCount: input.sourceAnchorRiskCount,
|
|
1058
|
+
scopeDiffRiskCount: input.scopeDiffRiskCount,
|
|
1059
|
+
repeatedFailureCount: input.repeatedFailureRisks.length,
|
|
1060
|
+
repeatedFailureBlockerCount,
|
|
1061
|
+
validationRatchetRiskCount: input.validationRatchetRiskCount,
|
|
1062
|
+
validationRatchetContradictionCount: input.validationRatchetContradictionCount,
|
|
1063
|
+
reproEvidenceRiskCount: input.reproEvidenceRiskCount,
|
|
1064
|
+
reproEvidenceContradictionCount: input.reproEvidenceContradictionCount,
|
|
1065
|
+
reproEvidenceUnverifiedCount: input.reproEvidenceUnverifiedCount,
|
|
1066
|
+
externalEvidenceRiskCount: input.externalEvidenceRiskCount,
|
|
1067
|
+
writeDriftRiskCount: countUndeclaredWriteDrift(input.results),
|
|
1068
|
+
receiptBindingRiskCount,
|
|
1069
|
+
staleReceiptCount: receiptBinding.stale_count,
|
|
1070
|
+
planMismatchCount: receiptBinding.plan_mismatch_count,
|
|
1071
|
+
criteria: createCriteriaEvidence(input.report, input.results),
|
|
1072
|
+
receiptBinding,
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
677
1075
|
function getVerificationStatus(summary) {
|
|
678
1076
|
if (summary.failed > 0) {
|
|
679
1077
|
return 'failed';
|
|
@@ -689,6 +1087,35 @@ function getVerificationStatus(summary) {
|
|
|
689
1087
|
function isVerificationStatus(value) {
|
|
690
1088
|
return value === 'passed' || value === 'partial' || value === 'failed' || value === 'blocked';
|
|
691
1089
|
}
|
|
1090
|
+
function readVerificationFailureFingerprint(value) {
|
|
1091
|
+
const record = objectField(value);
|
|
1092
|
+
if (record?.schema_version !== '1' ||
|
|
1093
|
+
typeof record.fingerprint !== 'string' ||
|
|
1094
|
+
typeof record.verification_plan_id !== 'string' ||
|
|
1095
|
+
typeof record.failed_intents_hash !== 'string' ||
|
|
1096
|
+
typeof record.exit_code_classes_hash !== 'string' ||
|
|
1097
|
+
typeof record.timeout_flags_hash !== 'string' ||
|
|
1098
|
+
typeof record.error_kinds_hash !== 'string' ||
|
|
1099
|
+
typeof record.diagnostic_hash !== 'string' ||
|
|
1100
|
+
typeof record.risk_codes_hash !== 'string' ||
|
|
1101
|
+
typeof record.affected_surfaces_hash !== 'string' ||
|
|
1102
|
+
typeof record.command_fingerprints_hash !== 'string') {
|
|
1103
|
+
return null;
|
|
1104
|
+
}
|
|
1105
|
+
return {
|
|
1106
|
+
schema_version: '1',
|
|
1107
|
+
fingerprint: record.fingerprint,
|
|
1108
|
+
verification_plan_id: record.verification_plan_id,
|
|
1109
|
+
failed_intents_hash: record.failed_intents_hash,
|
|
1110
|
+
exit_code_classes_hash: record.exit_code_classes_hash,
|
|
1111
|
+
timeout_flags_hash: record.timeout_flags_hash,
|
|
1112
|
+
error_kinds_hash: record.error_kinds_hash,
|
|
1113
|
+
diagnostic_hash: record.diagnostic_hash,
|
|
1114
|
+
risk_codes_hash: record.risk_codes_hash,
|
|
1115
|
+
affected_surfaces_hash: record.affected_surfaces_hash,
|
|
1116
|
+
command_fingerprints_hash: record.command_fingerprints_hash,
|
|
1117
|
+
};
|
|
1118
|
+
}
|
|
692
1119
|
function readPreviousVerifyLatestSummary(projectRoot) {
|
|
693
1120
|
try {
|
|
694
1121
|
const parsed = JSON.parse(readFileSync(path.join(projectRoot, LATEST_RUN_RECEIPT_PATH), 'utf8'));
|
|
@@ -701,6 +1128,7 @@ function readPreviousVerifyLatestSummary(projectRoot) {
|
|
|
701
1128
|
return {
|
|
702
1129
|
verification_plan_id: parsed.verification_plan_id,
|
|
703
1130
|
status: parsed.status,
|
|
1131
|
+
failure_fingerprint: readVerificationFailureFingerprint(parsed.failure_fingerprint),
|
|
704
1132
|
};
|
|
705
1133
|
}
|
|
706
1134
|
catch {
|
|
@@ -750,7 +1178,7 @@ function createVerificationPlanId(report, contract) {
|
|
|
750
1178
|
};
|
|
751
1179
|
return hashTextSha256(stableJson(fingerprintSource));
|
|
752
1180
|
}
|
|
753
|
-
function writeVerifyRunReceipts(projectRoot, output, report, sourceAnchorRisks, scopeDiffRisks,
|
|
1181
|
+
function writeVerifyRunReceipts(projectRoot, output, report, sourceAnchorRisks, scopeDiffRisks, validationRatchetRisks, reproEvidence, externalChecks) {
|
|
754
1182
|
const runDir = path.join(projectRoot, VERIFY_RUN_DIR);
|
|
755
1183
|
const intentDir = path.join(runDir, 'intents');
|
|
756
1184
|
const receipts = [];
|
|
@@ -790,22 +1218,72 @@ function writeVerifyRunReceipts(projectRoot, output, report, sourceAnchorRisks,
|
|
|
790
1218
|
receipt,
|
|
791
1219
|
});
|
|
792
1220
|
}
|
|
1221
|
+
const reproEvidenceRisks = createReproEvidenceRisks(reproEvidence, {
|
|
1222
|
+
verificationPlanId: output.verification_plan_id,
|
|
1223
|
+
});
|
|
1224
|
+
const reproEvidenceVerdictEffects = countReproEvidenceVerdictEffects(reproEvidenceRisks);
|
|
1225
|
+
const validationRatchetVerdictEffects = countValidationRatchetVerdictEffects(validationRatchetRisks);
|
|
1226
|
+
const externalEvidenceRisks = createExternalEvidenceRisks(externalChecks);
|
|
1227
|
+
const failureFingerprint = createFailureFingerprintForVerify({
|
|
1228
|
+
verificationPlanId: output.verification_plan_id,
|
|
1229
|
+
report,
|
|
1230
|
+
results,
|
|
1231
|
+
riskCodes: riskCodesForFailureFingerprint({
|
|
1232
|
+
sourceAnchorRisks,
|
|
1233
|
+
scopeDiffRisks,
|
|
1234
|
+
validationRatchetRisks,
|
|
1235
|
+
reproEvidenceRisks,
|
|
1236
|
+
externalEvidenceRisks,
|
|
1237
|
+
results,
|
|
1238
|
+
}),
|
|
1239
|
+
});
|
|
1240
|
+
const repeatedFailureSummary = updateRepeatedFailureState({
|
|
1241
|
+
projectRoot,
|
|
1242
|
+
failureFingerprint,
|
|
1243
|
+
status: output.status,
|
|
1244
|
+
});
|
|
1245
|
+
const previousVerifyLatest = readPreviousVerifyLatestSummary(projectRoot);
|
|
1246
|
+
const finalRepeatedFailureRisks = createRepeatedFailureRisks({
|
|
1247
|
+
previousFailureFingerprint: previousVerifyLatest?.failure_fingerprint ?? null,
|
|
1248
|
+
previousStatus: previousVerifyLatest?.status ?? null,
|
|
1249
|
+
currentFailureFingerprint: failureFingerprint,
|
|
1250
|
+
currentStatus: output.status,
|
|
1251
|
+
currentSummary: repeatedFailureSummary,
|
|
1252
|
+
});
|
|
1253
|
+
const completionVerdict = createCompletionVerdictForResults({
|
|
1254
|
+
report,
|
|
1255
|
+
verificationPlanId: output.verification_plan_id,
|
|
1256
|
+
summary: output.summary,
|
|
1257
|
+
results,
|
|
1258
|
+
sourceAnchorRiskCount: sourceAnchorRisks.length,
|
|
1259
|
+
scopeDiffRiskCount: scopeDiffRisks.length,
|
|
1260
|
+
repeatedFailureRisks: finalRepeatedFailureRisks,
|
|
1261
|
+
validationRatchetRiskCount: validationRatchetRisks.length,
|
|
1262
|
+
validationRatchetContradictionCount: validationRatchetVerdictEffects.contradicted,
|
|
1263
|
+
reproEvidenceRiskCount: reproEvidenceRisks.length,
|
|
1264
|
+
reproEvidenceContradictionCount: reproEvidenceVerdictEffects.contradicted,
|
|
1265
|
+
reproEvidenceUnverifiedCount: reproEvidenceVerdictEffects.unverified,
|
|
1266
|
+
externalEvidenceRiskCount: externalEvidenceRisks.length,
|
|
1267
|
+
});
|
|
793
1268
|
const outputWithReceiptPaths = {
|
|
794
1269
|
...output,
|
|
1270
|
+
completion_verdict: completionVerdict,
|
|
1271
|
+
failure_fingerprint: failureFingerprint,
|
|
1272
|
+
repeated_failure_summary: repeatedFailureSummary,
|
|
795
1273
|
results,
|
|
796
1274
|
evidence_model: createVerifyEvidenceModel({
|
|
797
1275
|
report,
|
|
798
1276
|
results,
|
|
799
1277
|
verificationPlanId: output.verification_plan_id,
|
|
800
|
-
verdict:
|
|
1278
|
+
verdict: completionVerdict,
|
|
801
1279
|
sourceAnchorRisks,
|
|
802
1280
|
scopeDiffRisks,
|
|
803
|
-
repeatedFailureRisks,
|
|
1281
|
+
repeatedFailureRisks: finalRepeatedFailureRisks,
|
|
804
1282
|
validationRatchetRisks,
|
|
805
1283
|
reproEvidence,
|
|
806
|
-
reproEvidenceRisks
|
|
1284
|
+
reproEvidenceRisks,
|
|
807
1285
|
externalChecks,
|
|
808
|
-
externalEvidenceRisks
|
|
1286
|
+
externalEvidenceRisks,
|
|
809
1287
|
}),
|
|
810
1288
|
};
|
|
811
1289
|
const manifest = {
|
|
@@ -818,6 +1296,8 @@ function writeVerifyRunReceipts(projectRoot, output, report, sourceAnchorRisks,
|
|
|
818
1296
|
status: outputWithReceiptPaths.status,
|
|
819
1297
|
completion_verdict: outputWithReceiptPaths.completion_verdict,
|
|
820
1298
|
evidence_model: outputWithReceiptPaths.evidence_model,
|
|
1299
|
+
failure_fingerprint: outputWithReceiptPaths.failure_fingerprint,
|
|
1300
|
+
repeated_failure_summary: outputWithReceiptPaths.repeated_failure_summary,
|
|
821
1301
|
summary: outputWithReceiptPaths.summary,
|
|
822
1302
|
...(outputWithReceiptPaths.repro_evidence ? { repro_evidence: outputWithReceiptPaths.repro_evidence } : {}),
|
|
823
1303
|
...(outputWithReceiptPaths.external_checks ? { external_checks: outputWithReceiptPaths.external_checks } : {}),
|
|
@@ -835,6 +1315,8 @@ function writeVerifyRunReceipts(projectRoot, output, report, sourceAnchorRisks,
|
|
|
835
1315
|
status: outputWithReceiptPaths.status,
|
|
836
1316
|
completion_verdict: outputWithReceiptPaths.completion_verdict,
|
|
837
1317
|
evidence_model: outputWithReceiptPaths.evidence_model,
|
|
1318
|
+
failure_fingerprint: outputWithReceiptPaths.failure_fingerprint,
|
|
1319
|
+
repeated_failure_summary: outputWithReceiptPaths.repeated_failure_summary,
|
|
838
1320
|
summary: outputWithReceiptPaths.summary,
|
|
839
1321
|
...(outputWithReceiptPaths.repro_evidence ? { repro_evidence: outputWithReceiptPaths.repro_evidence } : {}),
|
|
840
1322
|
...(outputWithReceiptPaths.external_checks ? { external_checks: outputWithReceiptPaths.external_checks } : {}),
|
|
@@ -853,7 +1335,9 @@ async function createVerifyOutput(input, planSource, projectRoot, lang, reproEvi
|
|
|
853
1335
|
const sourceAnchorRisks = await readLocalSourceAnchorVerdictRisks(projectRoot, report.files);
|
|
854
1336
|
const scopeDiffRisks = createScopeDiffRisks(input.classificationReport);
|
|
855
1337
|
const validationRatchetRisks = createValidationRatchetRisks(input.classificationReport, projectRoot);
|
|
856
|
-
const
|
|
1338
|
+
const validationRatchetVerdictEffects = countValidationRatchetVerdictEffects(validationRatchetRisks);
|
|
1339
|
+
const reproEvidenceRisks = createReproEvidenceRisks(reproEvidence, { verificationPlanId });
|
|
1340
|
+
const reproEvidenceVerdictEffects = countReproEvidenceVerdictEffects(reproEvidenceRisks);
|
|
857
1341
|
const externalEvidenceRisks = createExternalEvidenceRisks(externalChecks);
|
|
858
1342
|
const results = [];
|
|
859
1343
|
for (const entry of report.schedule.entries) {
|
|
@@ -863,26 +1347,38 @@ async function createVerifyOutput(input, planSource, projectRoot, lang, reproEvi
|
|
|
863
1347
|
const summary = summarizeResults(results);
|
|
864
1348
|
const status = getVerificationStatus(summary);
|
|
865
1349
|
const previousVerifyLatest = readPreviousVerifyLatestSummary(projectRoot);
|
|
866
|
-
const
|
|
867
|
-
|
|
1350
|
+
const failureFingerprint = createFailureFingerprintForVerify({
|
|
1351
|
+
verificationPlanId,
|
|
1352
|
+
report,
|
|
1353
|
+
results,
|
|
1354
|
+
riskCodes: riskCodesForFailureFingerprint({
|
|
1355
|
+
sourceAnchorRisks,
|
|
1356
|
+
scopeDiffRisks,
|
|
1357
|
+
validationRatchetRisks,
|
|
1358
|
+
reproEvidenceRisks,
|
|
1359
|
+
externalEvidenceRisks,
|
|
1360
|
+
results,
|
|
1361
|
+
}),
|
|
1362
|
+
});
|
|
1363
|
+
const repeatedFailureRisks = createRepeatedFailureRisks({
|
|
1364
|
+
previousFailureFingerprint: previousVerifyLatest?.failure_fingerprint ?? null,
|
|
868
1365
|
previousStatus: previousVerifyLatest?.status ?? null,
|
|
869
|
-
|
|
1366
|
+
currentFailureFingerprint: failureFingerprint,
|
|
870
1367
|
currentStatus: status,
|
|
871
1368
|
});
|
|
872
|
-
const
|
|
873
|
-
|
|
1369
|
+
const completionVerdict = createCompletionVerdictForResults({
|
|
1370
|
+
report,
|
|
874
1371
|
verificationPlanId,
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
passedIntents: summary.passed,
|
|
878
|
-
failedIntents: summary.failed,
|
|
879
|
-
skippedIntents: summary.skipped,
|
|
880
|
-
receiptCount: results.filter((result) => result.receipt !== null).length,
|
|
1372
|
+
summary,
|
|
1373
|
+
results,
|
|
881
1374
|
sourceAnchorRiskCount: sourceAnchorRisks.length,
|
|
882
1375
|
scopeDiffRiskCount: scopeDiffRisks.length,
|
|
883
|
-
|
|
1376
|
+
repeatedFailureRisks,
|
|
884
1377
|
validationRatchetRiskCount: validationRatchetRisks.length,
|
|
1378
|
+
validationRatchetContradictionCount: validationRatchetVerdictEffects.contradicted,
|
|
885
1379
|
reproEvidenceRiskCount: reproEvidenceRisks.length,
|
|
1380
|
+
reproEvidenceContradictionCount: reproEvidenceVerdictEffects.contradicted,
|
|
1381
|
+
reproEvidenceUnverifiedCount: reproEvidenceVerdictEffects.unverified,
|
|
886
1382
|
externalEvidenceRiskCount: externalEvidenceRisks.length,
|
|
887
1383
|
});
|
|
888
1384
|
const evidenceModel = createVerifyEvidenceModel({
|
|
@@ -910,6 +1406,8 @@ async function createVerifyOutput(input, planSource, projectRoot, lang, reproEvi
|
|
|
910
1406
|
status,
|
|
911
1407
|
completion_verdict: completionVerdict,
|
|
912
1408
|
evidence_model: evidenceModel,
|
|
1409
|
+
failure_fingerprint: failureFingerprint,
|
|
1410
|
+
repeated_failure_summary: null,
|
|
913
1411
|
summary,
|
|
914
1412
|
...(reproEvidence ? { repro_evidence: reproEvidence } : {}),
|
|
915
1413
|
...(externalChecks.length > 0 ? { external_checks: externalChecks } : {}),
|
|
@@ -917,7 +1415,7 @@ async function createVerifyOutput(input, planSource, projectRoot, lang, reproEvi
|
|
|
917
1415
|
manifest_path: toPosixPath(VERIFY_MANIFEST_PATH),
|
|
918
1416
|
results,
|
|
919
1417
|
};
|
|
920
|
-
return writeVerifyRunReceipts(projectRoot, output, report, sourceAnchorRisks, scopeDiffRisks,
|
|
1418
|
+
return writeVerifyRunReceipts(projectRoot, output, report, sourceAnchorRisks, scopeDiffRisks, validationRatchetRisks, reproEvidence, externalChecks);
|
|
921
1419
|
}
|
|
922
1420
|
async function createPlanOnlyOutput(input, projectRoot) {
|
|
923
1421
|
const contract = readCommandContract(projectRoot);
|