invarlock 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. invarlock/__init__.py +1 -1
  2. invarlock/_data/runtime/tiers.yaml +57 -30
  3. invarlock/adapters/__init__.py +1 -1
  4. invarlock/calibration/spectral_null.py +15 -10
  5. invarlock/calibration/variance_ve.py +0 -2
  6. invarlock/cli/commands/calibrate.py +6 -2
  7. invarlock/cli/commands/certify.py +58 -39
  8. invarlock/cli/commands/doctor.py +3 -1
  9. invarlock/cli/commands/explain_gates.py +57 -8
  10. invarlock/cli/commands/report.py +1 -1
  11. invarlock/cli/commands/run.py +159 -61
  12. invarlock/cli/commands/verify.py +78 -4
  13. invarlock/cli/config.py +21 -5
  14. invarlock/core/api.py +45 -5
  15. invarlock/core/auto_tuning.py +65 -20
  16. invarlock/core/contracts.py +7 -1
  17. invarlock/core/registry.py +2 -2
  18. invarlock/core/runner.py +314 -50
  19. invarlock/eval/bench.py +0 -13
  20. invarlock/eval/data.py +73 -283
  21. invarlock/eval/metrics.py +134 -4
  22. invarlock/eval/primary_metric.py +23 -0
  23. invarlock/eval/tail_stats.py +230 -0
  24. invarlock/guards/_estimators.py +154 -0
  25. invarlock/guards/policies.py +16 -6
  26. invarlock/guards/rmt.py +625 -544
  27. invarlock/guards/spectral.py +348 -110
  28. invarlock/guards/tier_config.py +32 -30
  29. invarlock/guards/variance.py +5 -29
  30. invarlock/guards_ref/rmt_ref.py +23 -23
  31. invarlock/model_profile.py +42 -15
  32. invarlock/reporting/certificate.py +225 -46
  33. invarlock/reporting/certificate_schema.py +2 -1
  34. invarlock/reporting/dataset_hashing.py +15 -2
  35. invarlock/reporting/guards_analysis.py +197 -274
  36. invarlock/reporting/normalizer.py +6 -0
  37. invarlock/reporting/policy_utils.py +38 -36
  38. invarlock/reporting/primary_metric_utils.py +71 -17
  39. invarlock/reporting/render.py +61 -0
  40. invarlock/reporting/report.py +1 -1
  41. invarlock/reporting/report_types.py +5 -2
  42. invarlock/reporting/validate.py +1 -18
  43. {invarlock-0.3.4.dist-info → invarlock-0.3.6.dist-info}/METADATA +6 -6
  44. {invarlock-0.3.4.dist-info → invarlock-0.3.6.dist-info}/RECORD +48 -46
  45. {invarlock-0.3.4.dist-info → invarlock-0.3.6.dist-info}/WHEEL +0 -0
  46. {invarlock-0.3.4.dist-info → invarlock-0.3.6.dist-info}/entry_points.txt +0 -0
  47. {invarlock-0.3.4.dist-info → invarlock-0.3.6.dist-info}/licenses/LICENSE +0 -0
  48. {invarlock-0.3.4.dist-info → invarlock-0.3.6.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,30 @@
1
1
  # mypy: ignore-errors
2
2
  from __future__ import annotations
3
3
 
4
+ import hashlib
5
+ import json
4
6
  import math
5
7
  from typing import Any, no_type_check
6
8
 
7
9
  from invarlock.core.auto_tuning import get_tier_policies
8
10
 
9
- from .policy_utils import _promote_legacy_multiple_testing_key, _resolve_policy_tier
11
+ from .policy_utils import _resolve_policy_tier
10
12
  from .report_types import RunReport
11
13
 
12
14
 
15
+ def _measurement_contract_digest(contract: Any) -> str | None:
16
+ if not isinstance(contract, dict) or not contract:
17
+ return None
18
+ try:
19
+ canonical = json.dumps(contract, sort_keys=True, default=str)
20
+ except Exception:
21
+ return None
22
+ return hashlib.sha256(canonical.encode()).hexdigest()[:16]
23
+
24
+
13
25
  @no_type_check
14
26
  def _extract_invariants(report: RunReport) -> dict[str, Any]:
15
- """Extract invariant check results (matches legacy shape used in tests)."""
27
+ """Extract invariant check results (matches the shape used in tests)."""
16
28
  invariants_data = (report.get("metrics", {}) or {}).get("invariants", {})
17
29
  failures: list[dict[str, Any]] = []
18
30
  summary: dict[str, Any] = {}
@@ -299,10 +311,10 @@ def _extract_spectral_analysis(
299
311
  families: dict[str, dict[str, Any]] = {}
300
312
  family_caps: dict[str, dict[str, float]] = {}
301
313
  top_z_scores: dict[str, list[dict[str, Any]]] = {}
314
+ deadband_used: float | None = None
302
315
 
303
316
  if isinstance(guard_metrics, dict):
304
317
  # Resolve deadband from policy/metrics/defaults
305
- deadband_used: float | None = None
306
318
  try:
307
319
  db_raw = guard_policy.get("deadband") if guard_policy else None
308
320
  if db_raw is None and isinstance(guard_metrics, dict):
@@ -314,16 +326,12 @@ def _extract_spectral_analysis(
314
326
  except Exception:
315
327
  deadband_used = None
316
328
 
317
- # Resolve sigma_quantile for summary (policy aliases supported)
329
+ # Resolve sigma_quantile for summary
318
330
  sigma_q_used: float | None = None
319
331
  try:
320
332
  pol_sq = None
321
333
  if isinstance(guard_policy, dict):
322
- pol_sq = (
323
- guard_policy.get("sigma_quantile")
324
- or guard_policy.get("contraction")
325
- or guard_policy.get("kappa")
326
- )
334
+ pol_sq = guard_policy.get("sigma_quantile")
327
335
  if pol_sq is None:
328
336
  pol_sq = default_sigma_quantile
329
337
  if pol_sq is not None:
@@ -371,7 +379,7 @@ def _extract_spectral_analysis(
371
379
  else {}
372
380
  )
373
381
  if not families:
374
- # Prefer z-summary when available; accept legacy 'family_stats' too
382
+ # Prefer z-summary when available; accept 'family_stats' too
375
383
  fzs = guard_metrics.get("family_z_summary")
376
384
  if not isinstance(fzs, dict) or not fzs:
377
385
  fzs = guard_metrics.get("family_stats")
@@ -493,7 +501,7 @@ def _extract_spectral_analysis(
493
501
  for source in sources:
494
502
  if not isinstance(source, dict):
495
503
  continue
496
- candidate = source.get("multiple_testing") or source.get("multipletesting")
504
+ candidate = source.get("multiple_testing")
497
505
  if isinstance(candidate, dict) and candidate:
498
506
  return candidate
499
507
  return None
@@ -505,20 +513,13 @@ def _extract_spectral_analysis(
505
513
  policy_out: dict[str, Any] | None = None
506
514
  if isinstance(guard_policy, dict) and guard_policy:
507
515
  policy_out = dict(guard_policy)
508
- _promote_legacy_multiple_testing_key(policy_out)
509
516
  if default_sigma_quantile is not None:
510
- sq = (
511
- policy_out.get("sigma_quantile")
512
- or policy_out.get("contraction")
513
- or policy_out.get("kappa")
514
- )
517
+ sq = policy_out.get("sigma_quantile")
515
518
  if sq is not None:
516
519
  try:
517
520
  policy_out["sigma_quantile"] = float(sq)
518
521
  except Exception:
519
522
  pass
520
- policy_out.pop("contraction", None)
521
- policy_out.pop("kappa", None)
522
523
  if tier == "balanced":
523
524
  policy_out["correction_enabled"] = False
524
525
  policy_out["max_spectral_norm"] = None
@@ -532,7 +533,7 @@ def _extract_spectral_analysis(
532
533
  "families": families,
533
534
  "family_caps": family_caps,
534
535
  }
535
- # Attach status to summary for backward-compatibility in tests
536
+ # Surface a stable/capped status on the summary for schema parity.
536
537
  try:
537
538
  summary["status"] = "stable" if int(caps_applied) == 0 else "capped"
538
539
  except Exception:
@@ -594,6 +595,40 @@ def _extract_spectral_analysis(
594
595
  result["top_violations"] = top_violations
595
596
  if family_quantiles:
596
597
  result["family_z_quantiles"] = family_quantiles
598
+ result["evaluated"] = bool(spectral_guard)
599
+
600
+ measurement_contract = None
601
+ try:
602
+ mc = (
603
+ guard_metrics.get("measurement_contract")
604
+ if isinstance(guard_metrics, dict)
605
+ else None
606
+ )
607
+ if isinstance(mc, dict) and mc:
608
+ measurement_contract = mc
609
+ except Exception:
610
+ measurement_contract = None
611
+ baseline_contract = None
612
+ try:
613
+ bc = (
614
+ baseline_spectral.get("measurement_contract")
615
+ if isinstance(baseline_spectral, dict)
616
+ else None
617
+ )
618
+ if isinstance(bc, dict) and bc:
619
+ baseline_contract = bc
620
+ except Exception:
621
+ baseline_contract = None
622
+ mc_hash = _measurement_contract_digest(measurement_contract)
623
+ baseline_hash = _measurement_contract_digest(baseline_contract)
624
+ if measurement_contract is not None:
625
+ result["measurement_contract"] = measurement_contract
626
+ if mc_hash:
627
+ result["measurement_contract_hash"] = mc_hash
628
+ if baseline_hash:
629
+ result["baseline_measurement_contract_hash"] = baseline_hash
630
+ if mc_hash and baseline_hash:
631
+ result["measurement_contract_match"] = bool(mc_hash == baseline_hash)
597
632
  result["caps_exceeded"] = bool(caps_exceeded)
598
633
  try:
599
634
  summary["caps_exceeded"] = bool(caps_exceeded)
@@ -624,24 +659,22 @@ def _extract_spectral_analysis(
624
659
  def _extract_rmt_analysis(
625
660
  report: RunReport, baseline: dict[str, Any]
626
661
  ) -> dict[str, Any]:
662
+ """Extract RMT analysis using activation edge-risk ε-band semantics."""
627
663
  tier = _resolve_policy_tier(report)
628
664
  tier_policies = get_tier_policies()
629
665
  tier_defaults = tier_policies.get(tier, tier_policies.get("balanced", {}))
666
+
630
667
  default_epsilon_map = (
631
668
  tier_defaults.get("rmt", {}).get("epsilon_by_family")
632
669
  if isinstance(tier_defaults, dict)
633
670
  else {}
634
671
  )
635
- if not default_epsilon_map and isinstance(tier_defaults, dict):
636
- default_epsilon_map = (tier_defaults.get("rmt", {}) or {}).get("epsilon", {})
637
672
  default_epsilon_map = {
638
673
  str(family): float(value)
639
674
  for family, value in (default_epsilon_map or {}).items()
640
- if isinstance(value, int | float)
675
+ if isinstance(value, int | float) and math.isfinite(float(value))
641
676
  }
642
677
 
643
- outliers_guarded = 0
644
- outliers_bare = 0
645
678
  epsilon_default = 0.1
646
679
  try:
647
680
  eps_def = (
@@ -653,278 +686,168 @@ def _extract_rmt_analysis(
653
686
  epsilon_default = float(eps_def)
654
687
  except Exception:
655
688
  pass
656
- stable = True
657
- explicit_stability = False
658
- max_ratio = 0.0
659
- max_deviation_ratio = 1.0
660
- mean_deviation_ratio = 1.0
661
- epsilon_map: dict[str, float] = {}
662
- baseline_outliers_per_family: dict[str, int] = {}
663
- outliers_per_family: dict[str, int] = {}
664
- epsilon_violations: list[Any] = []
665
- margin_used = None
666
- deadband_used = None
667
- policy_out: dict[str, Any] | None = None
668
689
 
690
+ baseline_rmt = baseline.get("rmt", {}) if isinstance(baseline, dict) else {}
691
+ baseline_edge_by_family: dict[str, float] = {}
692
+ baseline_contract = None
693
+ if isinstance(baseline_rmt, dict) and baseline_rmt:
694
+ bc = baseline_rmt.get("measurement_contract")
695
+ if isinstance(bc, dict) and bc:
696
+ baseline_contract = bc
697
+ base = baseline_rmt.get("edge_risk_by_family") or baseline_rmt.get(
698
+ "edge_risk_by_family_base"
699
+ )
700
+ if isinstance(base, dict):
701
+ for k, v in base.items():
702
+ if isinstance(v, int | float) and math.isfinite(float(v)):
703
+ baseline_edge_by_family[str(k)] = float(v)
704
+
705
+ rmt_guard = None
706
+ guard_metrics: dict[str, Any] = {}
707
+ guard_policy: dict[str, Any] = {}
669
708
  for guard in report.get("guards", []) or []:
670
709
  if str(guard.get("name", "")).lower() == "rmt":
710
+ rmt_guard = guard
671
711
  guard_metrics = guard.get("metrics", {}) or {}
672
712
  guard_policy = guard.get("policy", {}) or {}
673
- if isinstance(guard_policy, dict) and guard_policy:
674
- policy_out = dict(guard_policy)
675
- if "epsilon_by_family" not in policy_out and isinstance(
676
- policy_out.get("epsilon"), dict
677
- ):
678
- policy_out["epsilon_by_family"] = dict(policy_out["epsilon"])
679
- if isinstance(policy_out.get("margin"), int | float) and math.isfinite(
680
- float(policy_out.get("margin"))
681
- ):
682
- margin_used = float(policy_out.get("margin"))
683
- if isinstance(
684
- policy_out.get("deadband"), int | float
685
- ) and math.isfinite(float(policy_out.get("deadband"))):
686
- deadband_used = float(policy_out.get("deadband"))
687
- if isinstance(
688
- policy_out.get("epsilon_default"), int | float
689
- ) and math.isfinite(float(policy_out.get("epsilon_default"))):
690
- epsilon_default = float(policy_out.get("epsilon_default"))
691
- if isinstance(
692
- guard_metrics.get("epsilon_default"), int | float
693
- ) and math.isfinite(float(guard_metrics.get("epsilon_default"))):
694
- epsilon_default = float(guard_metrics.get("epsilon_default"))
695
- outliers_guarded = guard_metrics.get(
696
- "rmt_outliers", guard_metrics.get("layers_flagged", outliers_guarded)
697
- )
698
- max_ratio = guard_metrics.get("max_ratio", 0.0)
699
- epsilon_map = guard_metrics.get("epsilon_by_family", {}) or epsilon_map
700
- if not epsilon_map and isinstance(guard_policy, dict):
701
- eps_src = guard_policy.get("epsilon_by_family") or guard_policy.get(
702
- "epsilon"
703
- )
704
- if isinstance(eps_src, dict):
705
- try:
706
- epsilon_map = {
707
- str(k): float(v)
708
- for k, v in eps_src.items()
709
- if isinstance(v, int | float) and math.isfinite(float(v))
710
- }
711
- except Exception:
712
- pass
713
- baseline_outliers_per_family = (
714
- guard_metrics.get("baseline_outliers_per_family", {})
715
- or baseline_outliers_per_family
716
- )
717
- outliers_per_family = (
718
- guard_metrics.get("outliers_per_family", {}) or outliers_per_family
719
- )
720
- epsilon_violations = guard_metrics.get(
721
- "epsilon_violations", epsilon_violations
722
- )
723
- if outliers_per_family:
724
- outliers_guarded = sum(
725
- int(v)
726
- for v in outliers_per_family.values()
727
- if isinstance(v, int | float)
728
- )
729
- if baseline_outliers_per_family:
730
- outliers_bare = sum(
731
- int(v)
732
- for v in baseline_outliers_per_family.values()
733
- if isinstance(v, int | float)
734
- )
735
- flagged_rate = guard_metrics.get("flagged_rate", 0.0)
736
- stable = flagged_rate <= 0.5
737
- max_mp_ratio = guard_metrics.get("max_mp_ratio_final", 0.0)
738
- mean_mp_ratio = guard_metrics.get("mean_mp_ratio_final", 0.0)
739
-
740
- baseline_max = None
741
- baseline_mean = None
742
- baseline_rmt = baseline.get("rmt", {}) if isinstance(baseline, dict) else {}
743
- if baseline_rmt:
744
- baseline_max = baseline_rmt.get(
745
- "max_mp_ratio", baseline_rmt.get("max_mp_ratio_final")
746
- )
747
- baseline_mean = baseline_rmt.get(
748
- "mean_mp_ratio", baseline_rmt.get("mean_mp_ratio_final")
749
- )
750
- outliers_bare = baseline_rmt.get(
751
- "outliers", baseline_rmt.get("rmt_outliers", 0)
752
- )
753
- if baseline_max is None:
754
- baseline_metrics = (
755
- baseline.get("metrics", {}) if isinstance(baseline, dict) else {}
756
- )
757
- if "rmt" in baseline_metrics:
758
- baseline_rmt_metrics = baseline_metrics["rmt"]
759
- baseline_max = baseline_rmt_metrics.get("max_mp_ratio_final")
760
- baseline_mean = baseline_rmt_metrics.get("mean_mp_ratio_final")
761
- if baseline_max is None and isinstance(guard.get("baseline_metrics"), dict):
762
- gb = guard.get("baseline_metrics")
763
- baseline_max = gb.get("max_mp_ratio")
764
- baseline_mean = gb.get("mean_mp_ratio")
765
- if baseline_max is not None and baseline_max > 0:
766
- max_deviation_ratio = max_mp_ratio / baseline_max
767
- else:
768
- max_deviation_ratio = 1.0
769
- if baseline_mean is not None and baseline_mean > 0:
770
- mean_deviation_ratio = mean_mp_ratio / baseline_mean
771
- else:
772
- mean_deviation_ratio = 1.0
773
- if isinstance(guard_metrics.get("stable"), bool):
774
- stable = bool(guard_metrics.get("stable"))
775
- explicit_stability = True
776
713
  break
777
714
 
778
- # Fallback: use metrics.rmt and/or top-level rmt section when guard is absent
779
- if outliers_guarded == 0:
780
- rmt_metrics = (report.get("metrics", {}) or {}).get("rmt", {})
781
- if isinstance(rmt_metrics, dict):
782
- try:
783
- outliers_guarded = int(rmt_metrics.get("outliers", 0) or 0)
784
- except Exception:
785
- outliers_guarded = 0
786
- if isinstance(rmt_metrics.get("stable"), bool):
787
- stable = bool(rmt_metrics.get("stable"))
788
- explicit_stability = True
789
- rmt_top = report.get("rmt", {}) if isinstance(report.get("rmt"), dict) else {}
790
- if isinstance(rmt_top, dict):
791
- fams = rmt_top.get("families", {})
792
- if isinstance(fams, dict) and fams:
793
- for fam, rec in fams.items():
794
- if not isinstance(rec, dict):
795
- continue
796
- try:
797
- outliers_per_family[str(fam)] = int(
798
- rec.get("outliers_guarded", 0) or 0
799
- )
800
- baseline_outliers_per_family[str(fam)] = int(
801
- rec.get("outliers_bare", 0) or 0
802
- )
803
- if rec.get("epsilon") is not None:
804
- try:
805
- epsilon_map[str(fam)] = float(rec.get("epsilon"))
806
- except Exception:
807
- pass
808
- except Exception:
809
- continue
810
- try:
811
- if outliers_bare == 0:
812
- outliers_bare = int(rmt_top.get("outliers", 0) or 0)
813
- except Exception:
814
- pass
815
-
816
- # If stability not explicitly provided, derive from outlier behavior
817
- if not explicit_stability:
818
- try:
819
- if outliers_guarded == 0 and outliers_bare == 0:
820
- stable = True
821
- elif outliers_guarded <= outliers_bare:
822
- stable = True
823
- else:
824
- stable = (outliers_guarded - outliers_bare) / max(
825
- outliers_bare, 1
826
- ) <= 0.5
827
- except Exception:
828
- pass
715
+ policy_out: dict[str, Any] | None = None
716
+ if isinstance(guard_policy, dict) and guard_policy:
717
+ policy_out = dict(guard_policy)
718
+ if isinstance(policy_out.get("epsilon_default"), int | float) and math.isfinite(
719
+ float(policy_out.get("epsilon_default"))
720
+ ):
721
+ epsilon_default = float(policy_out.get("epsilon_default"))
722
+
723
+ if isinstance(guard_metrics.get("epsilon_default"), int | float) and math.isfinite(
724
+ float(guard_metrics.get("epsilon_default"))
725
+ ):
726
+ epsilon_default = float(guard_metrics.get("epsilon_default"))
727
+
728
+ edge_base: dict[str, float] = {}
729
+ edge_cur: dict[str, float] = {}
730
+ if isinstance(guard_metrics, dict) and guard_metrics:
731
+ base = guard_metrics.get("edge_risk_by_family_base") or {}
732
+ cur = guard_metrics.get("edge_risk_by_family") or {}
733
+ if isinstance(base, dict):
734
+ for k, v in base.items():
735
+ if isinstance(v, int | float) and math.isfinite(float(v)):
736
+ edge_base[str(k)] = float(v)
737
+ if isinstance(cur, dict):
738
+ for k, v in cur.items():
739
+ if isinstance(v, int | float) and math.isfinite(float(v)):
740
+ edge_cur[str(k)] = float(v)
741
+ if not edge_base and baseline_edge_by_family:
742
+ edge_base = dict(baseline_edge_by_family)
829
743
 
830
- delta_per_family = {
831
- k: int(outliers_per_family.get(k, 0))
832
- - int(baseline_outliers_per_family.get(k, 0))
833
- for k in set(outliers_per_family) | set(baseline_outliers_per_family)
834
- }
835
- delta_total = int(outliers_guarded) - int(outliers_bare)
836
- # Conservative baseline fallback when not available
837
- if outliers_bare == 0 and outliers_guarded > 0:
838
- # Assume baseline had fewer outliers to make acceptance harder
839
- outliers_bare = max(0, outliers_guarded - 1)
840
-
841
- # Recompute stability from epsilon rule when not explicitly provided
842
- if not explicit_stability:
843
- try:
844
- if outliers_per_family and baseline_outliers_per_family:
845
- families_union = set(outliers_per_family) | set(
846
- baseline_outliers_per_family
744
+ epsilon_map: dict[str, float] = {}
745
+ eps_src = guard_metrics.get("epsilon_by_family") or {}
746
+ if not eps_src and isinstance(guard_policy, dict):
747
+ eps_src = guard_policy.get("epsilon_by_family") or {}
748
+ if isinstance(eps_src, dict):
749
+ for k, v in eps_src.items():
750
+ if isinstance(v, int | float) and math.isfinite(float(v)):
751
+ epsilon_map[str(k)] = float(v)
752
+
753
+ epsilon_violations = guard_metrics.get("epsilon_violations") or []
754
+ if not (isinstance(epsilon_violations, list) and epsilon_violations):
755
+ epsilon_violations = []
756
+ families = set(edge_cur) | set(edge_base)
757
+ for family in families:
758
+ base = float(edge_base.get(family, 0.0) or 0.0)
759
+ cur = float(edge_cur.get(family, 0.0) or 0.0)
760
+ if base <= 0.0:
761
+ continue
762
+ eps = float(
763
+ epsilon_map.get(
764
+ family, default_epsilon_map.get(family, epsilon_default)
847
765
  )
848
- checks: list[bool] = []
849
- for fam in families_union:
850
- guarded = int(outliers_per_family.get(fam, 0) or 0)
851
- bare = int(baseline_outliers_per_family.get(fam, 0) or 0)
852
- eps_val = float(epsilon_map.get(fam, epsilon_default))
853
- allowed = math.ceil(bare * (1.0 + eps_val))
854
- checks.append(guarded <= allowed)
855
- if checks:
856
- stable = all(checks)
857
- elif outliers_bare > 0:
858
- stable = outliers_guarded <= (
859
- outliers_bare * (1.0 + float(epsilon_default))
766
+ )
767
+ allowed = (1.0 + eps) * base
768
+ if cur > allowed:
769
+ delta = (cur / base) - 1.0 if base > 0 else float("inf")
770
+ epsilon_violations.append(
771
+ {
772
+ "family": family,
773
+ "edge_base": base,
774
+ "edge_cur": cur,
775
+ "delta": float(delta),
776
+ "allowed": allowed,
777
+ "epsilon": eps,
778
+ }
860
779
  )
861
- except Exception:
862
- pass
863
-
864
- # Compute epsilon scalar (fallback) and detailed family breakdown
865
- if epsilon_map:
866
- epsilon_scalar = max(float(v) for v in epsilon_map.values())
867
- elif default_epsilon_map:
868
- try:
869
- epsilon_scalar = max(float(v) for v in default_epsilon_map.values())
870
- except Exception:
871
- epsilon_scalar = float(epsilon_default)
872
- else:
873
- epsilon_scalar = float(epsilon_default)
874
- try:
875
- epsilon_scalar = round(float(epsilon_scalar), 3)
876
- except Exception:
877
- epsilon_scalar = float(epsilon_default)
878
780
 
879
- def _to_int(v: Any) -> int:
880
- try:
881
- return int(v)
882
- except (TypeError, ValueError):
883
- return 0
781
+ stable = bool(guard_metrics.get("stable", not epsilon_violations))
884
782
 
885
- families = (
886
- set(outliers_per_family) | set(baseline_outliers_per_family) | set(epsilon_map)
783
+ families_all = sorted(
784
+ set(edge_base) | set(edge_cur) | set(epsilon_map) | set(default_epsilon_map)
887
785
  )
888
- family_breakdown = {
889
- family: {
890
- "bare": _to_int(baseline_outliers_per_family.get(family, 0)),
891
- "guarded": _to_int(outliers_per_family.get(family, 0)),
892
- "epsilon": float(epsilon_map.get(family, epsilon_scalar)),
786
+ family_breakdown: dict[str, dict[str, Any]] = {}
787
+ ratios: list[float] = []
788
+ deltas: list[float] = []
789
+ for family in families_all:
790
+ base = float(edge_base.get(family, 0.0) or 0.0)
791
+ cur = float(edge_cur.get(family, 0.0) or 0.0)
792
+ eps = float(
793
+ epsilon_map.get(family, default_epsilon_map.get(family, epsilon_default))
794
+ )
795
+ allowed = (1.0 + eps) * base if base > 0.0 else None
796
+ ratio = (cur / base) if base > 0.0 else None
797
+ delta = ((cur / base) - 1.0) if base > 0.0 else None
798
+ if isinstance(ratio, float) and math.isfinite(ratio):
799
+ ratios.append(ratio)
800
+ if isinstance(delta, float) and math.isfinite(delta):
801
+ deltas.append(delta)
802
+ family_breakdown[family] = {
803
+ "edge_base": base,
804
+ "edge_cur": cur,
805
+ "epsilon": eps,
806
+ "allowed": allowed,
807
+ "ratio": ratio,
808
+ "delta": delta,
893
809
  }
894
- for family in sorted(families)
895
- }
896
810
 
897
- # Stringify per-family dict keys for stability
898
- outliers_per_family = {str(k): _to_int(v) for k, v in outliers_per_family.items()}
899
- baseline_outliers_per_family = {
900
- str(k): _to_int(v) for k, v in baseline_outliers_per_family.items()
901
- }
902
- delta_per_family = {str(k): _to_int(v) for k, v in delta_per_family.items()}
811
+ measurement_contract = None
812
+ try:
813
+ mc = (
814
+ guard_metrics.get("measurement_contract")
815
+ if isinstance(guard_metrics, dict)
816
+ else None
817
+ )
818
+ if isinstance(mc, dict) and mc:
819
+ measurement_contract = mc
820
+ except Exception:
821
+ measurement_contract = None
822
+
823
+ mc_hash = _measurement_contract_digest(measurement_contract)
824
+ baseline_hash = _measurement_contract_digest(baseline_contract)
903
825
 
904
- result = {
905
- "outliers_bare": outliers_bare,
906
- "outliers_guarded": outliers_guarded,
907
- "epsilon": epsilon_scalar,
826
+ result: dict[str, Any] = {
827
+ "tier": tier,
828
+ "edge_risk_by_family_base": dict(edge_base),
829
+ "edge_risk_by_family": dict(edge_cur),
908
830
  "epsilon_default": float(epsilon_default),
909
- "epsilon_by_family": epsilon_map,
910
- "outliers_per_family": outliers_per_family,
911
- "baseline_outliers_per_family": baseline_outliers_per_family,
912
- "delta_per_family": delta_per_family,
913
- "delta_total": delta_total,
914
- "epsilon_violations": epsilon_violations,
831
+ "epsilon_by_family": dict(epsilon_map),
832
+ "epsilon_violations": list(epsilon_violations),
915
833
  "stable": stable,
916
834
  "status": "stable" if stable else "unstable",
917
- "max_ratio": max_ratio,
918
- "max_deviation_ratio": max_deviation_ratio,
919
- "mean_deviation_ratio": mean_deviation_ratio,
835
+ "max_edge_ratio": max(ratios) if ratios else None,
836
+ "max_edge_delta": max(deltas) if deltas else None,
837
+ "mean_edge_delta": (sum(deltas) / len(deltas)) if deltas else None,
920
838
  "families": family_breakdown,
839
+ "evaluated": bool(rmt_guard),
921
840
  }
922
- if margin_used is not None:
923
- result["margin"] = float(margin_used)
924
- if deadband_used is not None:
925
- result["deadband"] = float(deadband_used)
926
841
  if policy_out:
927
842
  result["policy"] = policy_out
843
+ if measurement_contract is not None:
844
+ result["measurement_contract"] = measurement_contract
845
+ if mc_hash:
846
+ result["measurement_contract_hash"] = mc_hash
847
+ if baseline_hash:
848
+ result["baseline_measurement_contract_hash"] = baseline_hash
849
+ if mc_hash and baseline_hash:
850
+ result["measurement_contract_match"] = bool(mc_hash == baseline_hash)
928
851
  return result
929
852
 
930
853
 
@@ -183,6 +183,7 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
183
183
  "spectral",
184
184
  "rmt",
185
185
  "invariants",
186
+ "primary_metric_tail",
186
187
  "logloss_delta_ci",
187
188
  "bootstrap",
188
189
  "reduction",
@@ -237,6 +238,11 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
237
238
  flags=flags,
238
239
  )
239
240
 
241
+ # keep context when provided (profile/assurance provenance)
242
+ ctx = src.get("context")
243
+ if isinstance(ctx, Mapping):
244
+ out["context"] = dict(ctx)
245
+
240
246
  # keep evaluation_windows if provided (for deeper pairing-based features)
241
247
  ew = src.get("evaluation_windows")
242
248
  if isinstance(ew, dict):