invarlock 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +1 -1
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +1 -1
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +58 -39
- invarlock/cli/commands/doctor.py +3 -1
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/report.py +1 -1
- invarlock/cli/commands/run.py +159 -61
- invarlock/cli/commands/verify.py +78 -4
- invarlock/cli/config.py +21 -5
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +2 -2
- invarlock/core/runner.py +314 -50
- invarlock/eval/bench.py +0 -13
- invarlock/eval/data.py +14 -28
- invarlock/eval/metrics.py +4 -1
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +625 -544
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +5 -29
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +42 -15
- invarlock/reporting/certificate.py +225 -46
- invarlock/reporting/certificate_schema.py +2 -1
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +197 -274
- invarlock/reporting/normalizer.py +6 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +61 -0
- invarlock/reporting/report.py +1 -1
- invarlock/reporting/report_types.py +5 -2
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/METADATA +6 -6
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/RECORD +48 -46
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/WHEEL +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.6.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,30 @@
|
|
|
1
1
|
# mypy: ignore-errors
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
4
6
|
import math
|
|
5
7
|
from typing import Any, no_type_check
|
|
6
8
|
|
|
7
9
|
from invarlock.core.auto_tuning import get_tier_policies
|
|
8
10
|
|
|
9
|
-
from .policy_utils import
|
|
11
|
+
from .policy_utils import _resolve_policy_tier
|
|
10
12
|
from .report_types import RunReport
|
|
11
13
|
|
|
12
14
|
|
|
15
|
+
def _measurement_contract_digest(contract: Any) -> str | None:
|
|
16
|
+
if not isinstance(contract, dict) or not contract:
|
|
17
|
+
return None
|
|
18
|
+
try:
|
|
19
|
+
canonical = json.dumps(contract, sort_keys=True, default=str)
|
|
20
|
+
except Exception:
|
|
21
|
+
return None
|
|
22
|
+
return hashlib.sha256(canonical.encode()).hexdigest()[:16]
|
|
23
|
+
|
|
24
|
+
|
|
13
25
|
@no_type_check
|
|
14
26
|
def _extract_invariants(report: RunReport) -> dict[str, Any]:
|
|
15
|
-
"""Extract invariant check results (matches
|
|
27
|
+
"""Extract invariant check results (matches the shape used in tests)."""
|
|
16
28
|
invariants_data = (report.get("metrics", {}) or {}).get("invariants", {})
|
|
17
29
|
failures: list[dict[str, Any]] = []
|
|
18
30
|
summary: dict[str, Any] = {}
|
|
@@ -299,10 +311,10 @@ def _extract_spectral_analysis(
|
|
|
299
311
|
families: dict[str, dict[str, Any]] = {}
|
|
300
312
|
family_caps: dict[str, dict[str, float]] = {}
|
|
301
313
|
top_z_scores: dict[str, list[dict[str, Any]]] = {}
|
|
314
|
+
deadband_used: float | None = None
|
|
302
315
|
|
|
303
316
|
if isinstance(guard_metrics, dict):
|
|
304
317
|
# Resolve deadband from policy/metrics/defaults
|
|
305
|
-
deadband_used: float | None = None
|
|
306
318
|
try:
|
|
307
319
|
db_raw = guard_policy.get("deadband") if guard_policy else None
|
|
308
320
|
if db_raw is None and isinstance(guard_metrics, dict):
|
|
@@ -314,16 +326,12 @@ def _extract_spectral_analysis(
|
|
|
314
326
|
except Exception:
|
|
315
327
|
deadband_used = None
|
|
316
328
|
|
|
317
|
-
# Resolve sigma_quantile for summary
|
|
329
|
+
# Resolve sigma_quantile for summary
|
|
318
330
|
sigma_q_used: float | None = None
|
|
319
331
|
try:
|
|
320
332
|
pol_sq = None
|
|
321
333
|
if isinstance(guard_policy, dict):
|
|
322
|
-
pol_sq = (
|
|
323
|
-
guard_policy.get("sigma_quantile")
|
|
324
|
-
or guard_policy.get("contraction")
|
|
325
|
-
or guard_policy.get("kappa")
|
|
326
|
-
)
|
|
334
|
+
pol_sq = guard_policy.get("sigma_quantile")
|
|
327
335
|
if pol_sq is None:
|
|
328
336
|
pol_sq = default_sigma_quantile
|
|
329
337
|
if pol_sq is not None:
|
|
@@ -371,7 +379,7 @@ def _extract_spectral_analysis(
|
|
|
371
379
|
else {}
|
|
372
380
|
)
|
|
373
381
|
if not families:
|
|
374
|
-
# Prefer z-summary when available; accept
|
|
382
|
+
# Prefer z-summary when available; accept 'family_stats' too
|
|
375
383
|
fzs = guard_metrics.get("family_z_summary")
|
|
376
384
|
if not isinstance(fzs, dict) or not fzs:
|
|
377
385
|
fzs = guard_metrics.get("family_stats")
|
|
@@ -493,7 +501,7 @@ def _extract_spectral_analysis(
|
|
|
493
501
|
for source in sources:
|
|
494
502
|
if not isinstance(source, dict):
|
|
495
503
|
continue
|
|
496
|
-
candidate = source.get("multiple_testing")
|
|
504
|
+
candidate = source.get("multiple_testing")
|
|
497
505
|
if isinstance(candidate, dict) and candidate:
|
|
498
506
|
return candidate
|
|
499
507
|
return None
|
|
@@ -505,20 +513,13 @@ def _extract_spectral_analysis(
|
|
|
505
513
|
policy_out: dict[str, Any] | None = None
|
|
506
514
|
if isinstance(guard_policy, dict) and guard_policy:
|
|
507
515
|
policy_out = dict(guard_policy)
|
|
508
|
-
_promote_legacy_multiple_testing_key(policy_out)
|
|
509
516
|
if default_sigma_quantile is not None:
|
|
510
|
-
sq = (
|
|
511
|
-
policy_out.get("sigma_quantile")
|
|
512
|
-
or policy_out.get("contraction")
|
|
513
|
-
or policy_out.get("kappa")
|
|
514
|
-
)
|
|
517
|
+
sq = policy_out.get("sigma_quantile")
|
|
515
518
|
if sq is not None:
|
|
516
519
|
try:
|
|
517
520
|
policy_out["sigma_quantile"] = float(sq)
|
|
518
521
|
except Exception:
|
|
519
522
|
pass
|
|
520
|
-
policy_out.pop("contraction", None)
|
|
521
|
-
policy_out.pop("kappa", None)
|
|
522
523
|
if tier == "balanced":
|
|
523
524
|
policy_out["correction_enabled"] = False
|
|
524
525
|
policy_out["max_spectral_norm"] = None
|
|
@@ -532,7 +533,7 @@ def _extract_spectral_analysis(
|
|
|
532
533
|
"families": families,
|
|
533
534
|
"family_caps": family_caps,
|
|
534
535
|
}
|
|
535
|
-
#
|
|
536
|
+
# Surface a stable/capped status on the summary for schema parity.
|
|
536
537
|
try:
|
|
537
538
|
summary["status"] = "stable" if int(caps_applied) == 0 else "capped"
|
|
538
539
|
except Exception:
|
|
@@ -594,6 +595,40 @@ def _extract_spectral_analysis(
|
|
|
594
595
|
result["top_violations"] = top_violations
|
|
595
596
|
if family_quantiles:
|
|
596
597
|
result["family_z_quantiles"] = family_quantiles
|
|
598
|
+
result["evaluated"] = bool(spectral_guard)
|
|
599
|
+
|
|
600
|
+
measurement_contract = None
|
|
601
|
+
try:
|
|
602
|
+
mc = (
|
|
603
|
+
guard_metrics.get("measurement_contract")
|
|
604
|
+
if isinstance(guard_metrics, dict)
|
|
605
|
+
else None
|
|
606
|
+
)
|
|
607
|
+
if isinstance(mc, dict) and mc:
|
|
608
|
+
measurement_contract = mc
|
|
609
|
+
except Exception:
|
|
610
|
+
measurement_contract = None
|
|
611
|
+
baseline_contract = None
|
|
612
|
+
try:
|
|
613
|
+
bc = (
|
|
614
|
+
baseline_spectral.get("measurement_contract")
|
|
615
|
+
if isinstance(baseline_spectral, dict)
|
|
616
|
+
else None
|
|
617
|
+
)
|
|
618
|
+
if isinstance(bc, dict) and bc:
|
|
619
|
+
baseline_contract = bc
|
|
620
|
+
except Exception:
|
|
621
|
+
baseline_contract = None
|
|
622
|
+
mc_hash = _measurement_contract_digest(measurement_contract)
|
|
623
|
+
baseline_hash = _measurement_contract_digest(baseline_contract)
|
|
624
|
+
if measurement_contract is not None:
|
|
625
|
+
result["measurement_contract"] = measurement_contract
|
|
626
|
+
if mc_hash:
|
|
627
|
+
result["measurement_contract_hash"] = mc_hash
|
|
628
|
+
if baseline_hash:
|
|
629
|
+
result["baseline_measurement_contract_hash"] = baseline_hash
|
|
630
|
+
if mc_hash and baseline_hash:
|
|
631
|
+
result["measurement_contract_match"] = bool(mc_hash == baseline_hash)
|
|
597
632
|
result["caps_exceeded"] = bool(caps_exceeded)
|
|
598
633
|
try:
|
|
599
634
|
summary["caps_exceeded"] = bool(caps_exceeded)
|
|
@@ -624,24 +659,22 @@ def _extract_spectral_analysis(
|
|
|
624
659
|
def _extract_rmt_analysis(
|
|
625
660
|
report: RunReport, baseline: dict[str, Any]
|
|
626
661
|
) -> dict[str, Any]:
|
|
662
|
+
"""Extract RMT analysis using activation edge-risk ε-band semantics."""
|
|
627
663
|
tier = _resolve_policy_tier(report)
|
|
628
664
|
tier_policies = get_tier_policies()
|
|
629
665
|
tier_defaults = tier_policies.get(tier, tier_policies.get("balanced", {}))
|
|
666
|
+
|
|
630
667
|
default_epsilon_map = (
|
|
631
668
|
tier_defaults.get("rmt", {}).get("epsilon_by_family")
|
|
632
669
|
if isinstance(tier_defaults, dict)
|
|
633
670
|
else {}
|
|
634
671
|
)
|
|
635
|
-
if not default_epsilon_map and isinstance(tier_defaults, dict):
|
|
636
|
-
default_epsilon_map = (tier_defaults.get("rmt", {}) or {}).get("epsilon", {})
|
|
637
672
|
default_epsilon_map = {
|
|
638
673
|
str(family): float(value)
|
|
639
674
|
for family, value in (default_epsilon_map or {}).items()
|
|
640
|
-
if isinstance(value, int | float)
|
|
675
|
+
if isinstance(value, int | float) and math.isfinite(float(value))
|
|
641
676
|
}
|
|
642
677
|
|
|
643
|
-
outliers_guarded = 0
|
|
644
|
-
outliers_bare = 0
|
|
645
678
|
epsilon_default = 0.1
|
|
646
679
|
try:
|
|
647
680
|
eps_def = (
|
|
@@ -653,278 +686,168 @@ def _extract_rmt_analysis(
|
|
|
653
686
|
epsilon_default = float(eps_def)
|
|
654
687
|
except Exception:
|
|
655
688
|
pass
|
|
656
|
-
stable = True
|
|
657
|
-
explicit_stability = False
|
|
658
|
-
max_ratio = 0.0
|
|
659
|
-
max_deviation_ratio = 1.0
|
|
660
|
-
mean_deviation_ratio = 1.0
|
|
661
|
-
epsilon_map: dict[str, float] = {}
|
|
662
|
-
baseline_outliers_per_family: dict[str, int] = {}
|
|
663
|
-
outliers_per_family: dict[str, int] = {}
|
|
664
|
-
epsilon_violations: list[Any] = []
|
|
665
|
-
margin_used = None
|
|
666
|
-
deadband_used = None
|
|
667
|
-
policy_out: dict[str, Any] | None = None
|
|
668
689
|
|
|
690
|
+
baseline_rmt = baseline.get("rmt", {}) if isinstance(baseline, dict) else {}
|
|
691
|
+
baseline_edge_by_family: dict[str, float] = {}
|
|
692
|
+
baseline_contract = None
|
|
693
|
+
if isinstance(baseline_rmt, dict) and baseline_rmt:
|
|
694
|
+
bc = baseline_rmt.get("measurement_contract")
|
|
695
|
+
if isinstance(bc, dict) and bc:
|
|
696
|
+
baseline_contract = bc
|
|
697
|
+
base = baseline_rmt.get("edge_risk_by_family") or baseline_rmt.get(
|
|
698
|
+
"edge_risk_by_family_base"
|
|
699
|
+
)
|
|
700
|
+
if isinstance(base, dict):
|
|
701
|
+
for k, v in base.items():
|
|
702
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
703
|
+
baseline_edge_by_family[str(k)] = float(v)
|
|
704
|
+
|
|
705
|
+
rmt_guard = None
|
|
706
|
+
guard_metrics: dict[str, Any] = {}
|
|
707
|
+
guard_policy: dict[str, Any] = {}
|
|
669
708
|
for guard in report.get("guards", []) or []:
|
|
670
709
|
if str(guard.get("name", "")).lower() == "rmt":
|
|
710
|
+
rmt_guard = guard
|
|
671
711
|
guard_metrics = guard.get("metrics", {}) or {}
|
|
672
712
|
guard_policy = guard.get("policy", {}) or {}
|
|
673
|
-
if isinstance(guard_policy, dict) and guard_policy:
|
|
674
|
-
policy_out = dict(guard_policy)
|
|
675
|
-
if "epsilon_by_family" not in policy_out and isinstance(
|
|
676
|
-
policy_out.get("epsilon"), dict
|
|
677
|
-
):
|
|
678
|
-
policy_out["epsilon_by_family"] = dict(policy_out["epsilon"])
|
|
679
|
-
if isinstance(policy_out.get("margin"), int | float) and math.isfinite(
|
|
680
|
-
float(policy_out.get("margin"))
|
|
681
|
-
):
|
|
682
|
-
margin_used = float(policy_out.get("margin"))
|
|
683
|
-
if isinstance(
|
|
684
|
-
policy_out.get("deadband"), int | float
|
|
685
|
-
) and math.isfinite(float(policy_out.get("deadband"))):
|
|
686
|
-
deadband_used = float(policy_out.get("deadband"))
|
|
687
|
-
if isinstance(
|
|
688
|
-
policy_out.get("epsilon_default"), int | float
|
|
689
|
-
) and math.isfinite(float(policy_out.get("epsilon_default"))):
|
|
690
|
-
epsilon_default = float(policy_out.get("epsilon_default"))
|
|
691
|
-
if isinstance(
|
|
692
|
-
guard_metrics.get("epsilon_default"), int | float
|
|
693
|
-
) and math.isfinite(float(guard_metrics.get("epsilon_default"))):
|
|
694
|
-
epsilon_default = float(guard_metrics.get("epsilon_default"))
|
|
695
|
-
outliers_guarded = guard_metrics.get(
|
|
696
|
-
"rmt_outliers", guard_metrics.get("layers_flagged", outliers_guarded)
|
|
697
|
-
)
|
|
698
|
-
max_ratio = guard_metrics.get("max_ratio", 0.0)
|
|
699
|
-
epsilon_map = guard_metrics.get("epsilon_by_family", {}) or epsilon_map
|
|
700
|
-
if not epsilon_map and isinstance(guard_policy, dict):
|
|
701
|
-
eps_src = guard_policy.get("epsilon_by_family") or guard_policy.get(
|
|
702
|
-
"epsilon"
|
|
703
|
-
)
|
|
704
|
-
if isinstance(eps_src, dict):
|
|
705
|
-
try:
|
|
706
|
-
epsilon_map = {
|
|
707
|
-
str(k): float(v)
|
|
708
|
-
for k, v in eps_src.items()
|
|
709
|
-
if isinstance(v, int | float) and math.isfinite(float(v))
|
|
710
|
-
}
|
|
711
|
-
except Exception:
|
|
712
|
-
pass
|
|
713
|
-
baseline_outliers_per_family = (
|
|
714
|
-
guard_metrics.get("baseline_outliers_per_family", {})
|
|
715
|
-
or baseline_outliers_per_family
|
|
716
|
-
)
|
|
717
|
-
outliers_per_family = (
|
|
718
|
-
guard_metrics.get("outliers_per_family", {}) or outliers_per_family
|
|
719
|
-
)
|
|
720
|
-
epsilon_violations = guard_metrics.get(
|
|
721
|
-
"epsilon_violations", epsilon_violations
|
|
722
|
-
)
|
|
723
|
-
if outliers_per_family:
|
|
724
|
-
outliers_guarded = sum(
|
|
725
|
-
int(v)
|
|
726
|
-
for v in outliers_per_family.values()
|
|
727
|
-
if isinstance(v, int | float)
|
|
728
|
-
)
|
|
729
|
-
if baseline_outliers_per_family:
|
|
730
|
-
outliers_bare = sum(
|
|
731
|
-
int(v)
|
|
732
|
-
for v in baseline_outliers_per_family.values()
|
|
733
|
-
if isinstance(v, int | float)
|
|
734
|
-
)
|
|
735
|
-
flagged_rate = guard_metrics.get("flagged_rate", 0.0)
|
|
736
|
-
stable = flagged_rate <= 0.5
|
|
737
|
-
max_mp_ratio = guard_metrics.get("max_mp_ratio_final", 0.0)
|
|
738
|
-
mean_mp_ratio = guard_metrics.get("mean_mp_ratio_final", 0.0)
|
|
739
|
-
|
|
740
|
-
baseline_max = None
|
|
741
|
-
baseline_mean = None
|
|
742
|
-
baseline_rmt = baseline.get("rmt", {}) if isinstance(baseline, dict) else {}
|
|
743
|
-
if baseline_rmt:
|
|
744
|
-
baseline_max = baseline_rmt.get(
|
|
745
|
-
"max_mp_ratio", baseline_rmt.get("max_mp_ratio_final")
|
|
746
|
-
)
|
|
747
|
-
baseline_mean = baseline_rmt.get(
|
|
748
|
-
"mean_mp_ratio", baseline_rmt.get("mean_mp_ratio_final")
|
|
749
|
-
)
|
|
750
|
-
outliers_bare = baseline_rmt.get(
|
|
751
|
-
"outliers", baseline_rmt.get("rmt_outliers", 0)
|
|
752
|
-
)
|
|
753
|
-
if baseline_max is None:
|
|
754
|
-
baseline_metrics = (
|
|
755
|
-
baseline.get("metrics", {}) if isinstance(baseline, dict) else {}
|
|
756
|
-
)
|
|
757
|
-
if "rmt" in baseline_metrics:
|
|
758
|
-
baseline_rmt_metrics = baseline_metrics["rmt"]
|
|
759
|
-
baseline_max = baseline_rmt_metrics.get("max_mp_ratio_final")
|
|
760
|
-
baseline_mean = baseline_rmt_metrics.get("mean_mp_ratio_final")
|
|
761
|
-
if baseline_max is None and isinstance(guard.get("baseline_metrics"), dict):
|
|
762
|
-
gb = guard.get("baseline_metrics")
|
|
763
|
-
baseline_max = gb.get("max_mp_ratio")
|
|
764
|
-
baseline_mean = gb.get("mean_mp_ratio")
|
|
765
|
-
if baseline_max is not None and baseline_max > 0:
|
|
766
|
-
max_deviation_ratio = max_mp_ratio / baseline_max
|
|
767
|
-
else:
|
|
768
|
-
max_deviation_ratio = 1.0
|
|
769
|
-
if baseline_mean is not None and baseline_mean > 0:
|
|
770
|
-
mean_deviation_ratio = mean_mp_ratio / baseline_mean
|
|
771
|
-
else:
|
|
772
|
-
mean_deviation_ratio = 1.0
|
|
773
|
-
if isinstance(guard_metrics.get("stable"), bool):
|
|
774
|
-
stable = bool(guard_metrics.get("stable"))
|
|
775
|
-
explicit_stability = True
|
|
776
713
|
break
|
|
777
714
|
|
|
778
|
-
|
|
779
|
-
if
|
|
780
|
-
|
|
781
|
-
if isinstance(
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
except Exception:
|
|
807
|
-
pass
|
|
808
|
-
except Exception:
|
|
809
|
-
continue
|
|
810
|
-
try:
|
|
811
|
-
if outliers_bare == 0:
|
|
812
|
-
outliers_bare = int(rmt_top.get("outliers", 0) or 0)
|
|
813
|
-
except Exception:
|
|
814
|
-
pass
|
|
815
|
-
|
|
816
|
-
# If stability not explicitly provided, derive from outlier behavior
|
|
817
|
-
if not explicit_stability:
|
|
818
|
-
try:
|
|
819
|
-
if outliers_guarded == 0 and outliers_bare == 0:
|
|
820
|
-
stable = True
|
|
821
|
-
elif outliers_guarded <= outliers_bare:
|
|
822
|
-
stable = True
|
|
823
|
-
else:
|
|
824
|
-
stable = (outliers_guarded - outliers_bare) / max(
|
|
825
|
-
outliers_bare, 1
|
|
826
|
-
) <= 0.5
|
|
827
|
-
except Exception:
|
|
828
|
-
pass
|
|
715
|
+
policy_out: dict[str, Any] | None = None
|
|
716
|
+
if isinstance(guard_policy, dict) and guard_policy:
|
|
717
|
+
policy_out = dict(guard_policy)
|
|
718
|
+
if isinstance(policy_out.get("epsilon_default"), int | float) and math.isfinite(
|
|
719
|
+
float(policy_out.get("epsilon_default"))
|
|
720
|
+
):
|
|
721
|
+
epsilon_default = float(policy_out.get("epsilon_default"))
|
|
722
|
+
|
|
723
|
+
if isinstance(guard_metrics.get("epsilon_default"), int | float) and math.isfinite(
|
|
724
|
+
float(guard_metrics.get("epsilon_default"))
|
|
725
|
+
):
|
|
726
|
+
epsilon_default = float(guard_metrics.get("epsilon_default"))
|
|
727
|
+
|
|
728
|
+
edge_base: dict[str, float] = {}
|
|
729
|
+
edge_cur: dict[str, float] = {}
|
|
730
|
+
if isinstance(guard_metrics, dict) and guard_metrics:
|
|
731
|
+
base = guard_metrics.get("edge_risk_by_family_base") or {}
|
|
732
|
+
cur = guard_metrics.get("edge_risk_by_family") or {}
|
|
733
|
+
if isinstance(base, dict):
|
|
734
|
+
for k, v in base.items():
|
|
735
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
736
|
+
edge_base[str(k)] = float(v)
|
|
737
|
+
if isinstance(cur, dict):
|
|
738
|
+
for k, v in cur.items():
|
|
739
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
740
|
+
edge_cur[str(k)] = float(v)
|
|
741
|
+
if not edge_base and baseline_edge_by_family:
|
|
742
|
+
edge_base = dict(baseline_edge_by_family)
|
|
829
743
|
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
744
|
+
epsilon_map: dict[str, float] = {}
|
|
745
|
+
eps_src = guard_metrics.get("epsilon_by_family") or {}
|
|
746
|
+
if not eps_src and isinstance(guard_policy, dict):
|
|
747
|
+
eps_src = guard_policy.get("epsilon_by_family") or {}
|
|
748
|
+
if isinstance(eps_src, dict):
|
|
749
|
+
for k, v in eps_src.items():
|
|
750
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
751
|
+
epsilon_map[str(k)] = float(v)
|
|
752
|
+
|
|
753
|
+
epsilon_violations = guard_metrics.get("epsilon_violations") or []
|
|
754
|
+
if not (isinstance(epsilon_violations, list) and epsilon_violations):
|
|
755
|
+
epsilon_violations = []
|
|
756
|
+
families = set(edge_cur) | set(edge_base)
|
|
757
|
+
for family in families:
|
|
758
|
+
base = float(edge_base.get(family, 0.0) or 0.0)
|
|
759
|
+
cur = float(edge_cur.get(family, 0.0) or 0.0)
|
|
760
|
+
if base <= 0.0:
|
|
761
|
+
continue
|
|
762
|
+
eps = float(
|
|
763
|
+
epsilon_map.get(
|
|
764
|
+
family, default_epsilon_map.get(family, epsilon_default)
|
|
847
765
|
)
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
766
|
+
)
|
|
767
|
+
allowed = (1.0 + eps) * base
|
|
768
|
+
if cur > allowed:
|
|
769
|
+
delta = (cur / base) - 1.0 if base > 0 else float("inf")
|
|
770
|
+
epsilon_violations.append(
|
|
771
|
+
{
|
|
772
|
+
"family": family,
|
|
773
|
+
"edge_base": base,
|
|
774
|
+
"edge_cur": cur,
|
|
775
|
+
"delta": float(delta),
|
|
776
|
+
"allowed": allowed,
|
|
777
|
+
"epsilon": eps,
|
|
778
|
+
}
|
|
860
779
|
)
|
|
861
|
-
except Exception:
|
|
862
|
-
pass
|
|
863
|
-
|
|
864
|
-
# Compute epsilon scalar (fallback) and detailed family breakdown
|
|
865
|
-
if epsilon_map:
|
|
866
|
-
epsilon_scalar = max(float(v) for v in epsilon_map.values())
|
|
867
|
-
elif default_epsilon_map:
|
|
868
|
-
try:
|
|
869
|
-
epsilon_scalar = max(float(v) for v in default_epsilon_map.values())
|
|
870
|
-
except Exception:
|
|
871
|
-
epsilon_scalar = float(epsilon_default)
|
|
872
|
-
else:
|
|
873
|
-
epsilon_scalar = float(epsilon_default)
|
|
874
|
-
try:
|
|
875
|
-
epsilon_scalar = round(float(epsilon_scalar), 3)
|
|
876
|
-
except Exception:
|
|
877
|
-
epsilon_scalar = float(epsilon_default)
|
|
878
780
|
|
|
879
|
-
|
|
880
|
-
try:
|
|
881
|
-
return int(v)
|
|
882
|
-
except (TypeError, ValueError):
|
|
883
|
-
return 0
|
|
781
|
+
stable = bool(guard_metrics.get("stable", not epsilon_violations))
|
|
884
782
|
|
|
885
|
-
|
|
886
|
-
set(
|
|
783
|
+
families_all = sorted(
|
|
784
|
+
set(edge_base) | set(edge_cur) | set(epsilon_map) | set(default_epsilon_map)
|
|
887
785
|
)
|
|
888
|
-
family_breakdown = {
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
786
|
+
family_breakdown: dict[str, dict[str, Any]] = {}
|
|
787
|
+
ratios: list[float] = []
|
|
788
|
+
deltas: list[float] = []
|
|
789
|
+
for family in families_all:
|
|
790
|
+
base = float(edge_base.get(family, 0.0) or 0.0)
|
|
791
|
+
cur = float(edge_cur.get(family, 0.0) or 0.0)
|
|
792
|
+
eps = float(
|
|
793
|
+
epsilon_map.get(family, default_epsilon_map.get(family, epsilon_default))
|
|
794
|
+
)
|
|
795
|
+
allowed = (1.0 + eps) * base if base > 0.0 else None
|
|
796
|
+
ratio = (cur / base) if base > 0.0 else None
|
|
797
|
+
delta = ((cur / base) - 1.0) if base > 0.0 else None
|
|
798
|
+
if isinstance(ratio, float) and math.isfinite(ratio):
|
|
799
|
+
ratios.append(ratio)
|
|
800
|
+
if isinstance(delta, float) and math.isfinite(delta):
|
|
801
|
+
deltas.append(delta)
|
|
802
|
+
family_breakdown[family] = {
|
|
803
|
+
"edge_base": base,
|
|
804
|
+
"edge_cur": cur,
|
|
805
|
+
"epsilon": eps,
|
|
806
|
+
"allowed": allowed,
|
|
807
|
+
"ratio": ratio,
|
|
808
|
+
"delta": delta,
|
|
893
809
|
}
|
|
894
|
-
for family in sorted(families)
|
|
895
|
-
}
|
|
896
810
|
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
811
|
+
measurement_contract = None
|
|
812
|
+
try:
|
|
813
|
+
mc = (
|
|
814
|
+
guard_metrics.get("measurement_contract")
|
|
815
|
+
if isinstance(guard_metrics, dict)
|
|
816
|
+
else None
|
|
817
|
+
)
|
|
818
|
+
if isinstance(mc, dict) and mc:
|
|
819
|
+
measurement_contract = mc
|
|
820
|
+
except Exception:
|
|
821
|
+
measurement_contract = None
|
|
822
|
+
|
|
823
|
+
mc_hash = _measurement_contract_digest(measurement_contract)
|
|
824
|
+
baseline_hash = _measurement_contract_digest(baseline_contract)
|
|
903
825
|
|
|
904
|
-
result = {
|
|
905
|
-
"
|
|
906
|
-
"
|
|
907
|
-
"
|
|
826
|
+
result: dict[str, Any] = {
|
|
827
|
+
"tier": tier,
|
|
828
|
+
"edge_risk_by_family_base": dict(edge_base),
|
|
829
|
+
"edge_risk_by_family": dict(edge_cur),
|
|
908
830
|
"epsilon_default": float(epsilon_default),
|
|
909
|
-
"epsilon_by_family": epsilon_map,
|
|
910
|
-
"
|
|
911
|
-
"baseline_outliers_per_family": baseline_outliers_per_family,
|
|
912
|
-
"delta_per_family": delta_per_family,
|
|
913
|
-
"delta_total": delta_total,
|
|
914
|
-
"epsilon_violations": epsilon_violations,
|
|
831
|
+
"epsilon_by_family": dict(epsilon_map),
|
|
832
|
+
"epsilon_violations": list(epsilon_violations),
|
|
915
833
|
"stable": stable,
|
|
916
834
|
"status": "stable" if stable else "unstable",
|
|
917
|
-
"
|
|
918
|
-
"
|
|
919
|
-
"
|
|
835
|
+
"max_edge_ratio": max(ratios) if ratios else None,
|
|
836
|
+
"max_edge_delta": max(deltas) if deltas else None,
|
|
837
|
+
"mean_edge_delta": (sum(deltas) / len(deltas)) if deltas else None,
|
|
920
838
|
"families": family_breakdown,
|
|
839
|
+
"evaluated": bool(rmt_guard),
|
|
921
840
|
}
|
|
922
|
-
if margin_used is not None:
|
|
923
|
-
result["margin"] = float(margin_used)
|
|
924
|
-
if deadband_used is not None:
|
|
925
|
-
result["deadband"] = float(deadband_used)
|
|
926
841
|
if policy_out:
|
|
927
842
|
result["policy"] = policy_out
|
|
843
|
+
if measurement_contract is not None:
|
|
844
|
+
result["measurement_contract"] = measurement_contract
|
|
845
|
+
if mc_hash:
|
|
846
|
+
result["measurement_contract_hash"] = mc_hash
|
|
847
|
+
if baseline_hash:
|
|
848
|
+
result["baseline_measurement_contract_hash"] = baseline_hash
|
|
849
|
+
if mc_hash and baseline_hash:
|
|
850
|
+
result["measurement_contract_match"] = bool(mc_hash == baseline_hash)
|
|
928
851
|
return result
|
|
929
852
|
|
|
930
853
|
|
|
@@ -183,6 +183,7 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
|
|
|
183
183
|
"spectral",
|
|
184
184
|
"rmt",
|
|
185
185
|
"invariants",
|
|
186
|
+
"primary_metric_tail",
|
|
186
187
|
"logloss_delta_ci",
|
|
187
188
|
"bootstrap",
|
|
188
189
|
"reduction",
|
|
@@ -237,6 +238,11 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
|
|
|
237
238
|
flags=flags,
|
|
238
239
|
)
|
|
239
240
|
|
|
241
|
+
# keep context when provided (profile/assurance provenance)
|
|
242
|
+
ctx = src.get("context")
|
|
243
|
+
if isinstance(ctx, Mapping):
|
|
244
|
+
out["context"] = dict(ctx)
|
|
245
|
+
|
|
240
246
|
# keep evaluation_windows if provided (for deeper pairing-based features)
|
|
241
247
|
ew = src.get("evaluation_windows")
|
|
242
248
|
if isinstance(ew, dict):
|