invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +11 -15
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +651 -91
- invarlock/cli/commands/doctor.py +11 -11
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +1066 -244
- invarlock/cli/commands/verify.py +154 -15
- invarlock/cli/config.py +22 -6
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +11 -13
- invarlock/core/runner.py +425 -75
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -16
- invarlock/eval/data.py +82 -51
- invarlock/eval/metrics.py +63 -2
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +627 -546
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +7 -31
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +90 -42
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +384 -55
- invarlock/reporting/certificate_schema.py +3 -2
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +350 -277
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +13 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +852 -431
- invarlock/reporting/report.py +40 -4
- invarlock/reporting/report_types.py +11 -3
- invarlock/reporting/telemetry.py +86 -0
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,32 @@
|
|
|
1
1
|
# mypy: ignore-errors
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
4
6
|
import math
|
|
5
7
|
from typing import Any, no_type_check
|
|
6
8
|
|
|
7
9
|
from invarlock.core.auto_tuning import get_tier_policies
|
|
8
10
|
|
|
9
|
-
from .policy_utils import
|
|
11
|
+
from .policy_utils import _resolve_policy_tier
|
|
10
12
|
from .report_types import RunReport
|
|
11
13
|
|
|
12
14
|
|
|
15
|
+
def _measurement_contract_digest(contract: Any) -> str | None:
|
|
16
|
+
if not isinstance(contract, dict) or not contract:
|
|
17
|
+
return None
|
|
18
|
+
try:
|
|
19
|
+
canonical = json.dumps(contract, sort_keys=True, default=str)
|
|
20
|
+
except Exception:
|
|
21
|
+
return None
|
|
22
|
+
return hashlib.sha256(canonical.encode()).hexdigest()[:16]
|
|
23
|
+
|
|
24
|
+
|
|
13
25
|
@no_type_check
|
|
14
|
-
def _extract_invariants(
|
|
15
|
-
|
|
26
|
+
def _extract_invariants(
|
|
27
|
+
report: RunReport, baseline: RunReport | None = None
|
|
28
|
+
) -> dict[str, Any]:
|
|
29
|
+
"""Extract invariant check results (matches the shape used in tests)."""
|
|
16
30
|
invariants_data = (report.get("metrics", {}) or {}).get("invariants", {})
|
|
17
31
|
failures: list[dict[str, Any]] = []
|
|
18
32
|
summary: dict[str, Any] = {}
|
|
@@ -69,6 +83,108 @@ def _extract_invariants(report: RunReport) -> dict[str, Any]:
|
|
|
69
83
|
guard_entry = guard
|
|
70
84
|
break
|
|
71
85
|
|
|
86
|
+
baseline_guard_entry = None
|
|
87
|
+
if baseline is not None:
|
|
88
|
+
for guard in baseline.get("guards", []) or []:
|
|
89
|
+
if str(guard.get("name", "")).lower() == "invariants":
|
|
90
|
+
baseline_guard_entry = guard
|
|
91
|
+
break
|
|
92
|
+
|
|
93
|
+
def _coerce_checks(value: Any) -> dict[str, Any] | None:
|
|
94
|
+
return value if isinstance(value, dict) else None
|
|
95
|
+
|
|
96
|
+
def _extract_guard_checks(
|
|
97
|
+
entry: Any,
|
|
98
|
+
) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
|
|
99
|
+
if not isinstance(entry, dict):
|
|
100
|
+
return None, None
|
|
101
|
+
details = entry.get("details")
|
|
102
|
+
if not isinstance(details, dict):
|
|
103
|
+
return None, None
|
|
104
|
+
return _coerce_checks(details.get("baseline_checks")), _coerce_checks(
|
|
105
|
+
details.get("current_checks")
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _compare_invariants(
|
|
109
|
+
baseline_checks: dict[str, Any],
|
|
110
|
+
current_checks: dict[str, Any],
|
|
111
|
+
) -> tuple[list[dict[str, Any]], int, int]:
|
|
112
|
+
violations: list[dict[str, Any]] = []
|
|
113
|
+
|
|
114
|
+
# LayerNorm coverage check
|
|
115
|
+
baseline_layer_norms = set(baseline_checks.get("layer_norm_paths", ()))
|
|
116
|
+
current_layer_norms = set(current_checks.get("layer_norm_paths", ()))
|
|
117
|
+
missing_layer_norms = sorted(baseline_layer_norms - current_layer_norms)
|
|
118
|
+
if missing_layer_norms:
|
|
119
|
+
violations.append(
|
|
120
|
+
{
|
|
121
|
+
"type": "layer_norm_missing",
|
|
122
|
+
"missing": missing_layer_norms,
|
|
123
|
+
"message": "Expected LayerNorm modules are missing vs baseline",
|
|
124
|
+
}
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Tokenizer / vocab alignment
|
|
128
|
+
baseline_vocab_sizes = baseline_checks.get("embedding_vocab_sizes")
|
|
129
|
+
current_vocab_sizes = current_checks.get("embedding_vocab_sizes")
|
|
130
|
+
if isinstance(baseline_vocab_sizes, dict):
|
|
131
|
+
for module_name, baseline_size in baseline_vocab_sizes.items():
|
|
132
|
+
current_size = None
|
|
133
|
+
if isinstance(current_vocab_sizes, dict):
|
|
134
|
+
current_size = current_vocab_sizes.get(module_name)
|
|
135
|
+
if current_size is None or int(current_size) != int(baseline_size):
|
|
136
|
+
mismatch = {
|
|
137
|
+
"module": module_name,
|
|
138
|
+
"baseline": int(baseline_size),
|
|
139
|
+
"current": None if current_size is None else int(current_size),
|
|
140
|
+
}
|
|
141
|
+
violations.append(
|
|
142
|
+
{
|
|
143
|
+
"type": "tokenizer_mismatch",
|
|
144
|
+
"message": "Embedding vocabulary size changed vs baseline",
|
|
145
|
+
**mismatch,
|
|
146
|
+
}
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
handled_keys = {
|
|
150
|
+
"layer_norm_paths",
|
|
151
|
+
"embedding_vocab_sizes",
|
|
152
|
+
"config_vocab_size",
|
|
153
|
+
}
|
|
154
|
+
for check_name, baseline_value in baseline_checks.items():
|
|
155
|
+
if check_name in handled_keys:
|
|
156
|
+
continue
|
|
157
|
+
current_value = current_checks.get(check_name)
|
|
158
|
+
if current_value != baseline_value:
|
|
159
|
+
violations.append(
|
|
160
|
+
{
|
|
161
|
+
"type": "invariant_violation",
|
|
162
|
+
"check": check_name,
|
|
163
|
+
"baseline": baseline_value,
|
|
164
|
+
"current": current_value,
|
|
165
|
+
"message": (
|
|
166
|
+
f"Invariant {check_name} changed from {baseline_value} to {current_value}"
|
|
167
|
+
),
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
fatal_violation_types = {"tokenizer_mismatch"}
|
|
172
|
+
fatal_count = 0
|
|
173
|
+
warning_count = 0
|
|
174
|
+
annotated: list[dict[str, Any]] = []
|
|
175
|
+
for violation in violations:
|
|
176
|
+
violation_type = str(violation.get("type") or "")
|
|
177
|
+
severity = "fatal" if violation_type in fatal_violation_types else "warning"
|
|
178
|
+
annotated_violation = dict(violation)
|
|
179
|
+
annotated_violation.setdefault("severity", severity)
|
|
180
|
+
annotated.append(annotated_violation)
|
|
181
|
+
if severity == "fatal":
|
|
182
|
+
fatal_count += 1
|
|
183
|
+
else:
|
|
184
|
+
warning_count += 1
|
|
185
|
+
|
|
186
|
+
return annotated, fatal_count, warning_count
|
|
187
|
+
|
|
72
188
|
severity_status = "pass"
|
|
73
189
|
if guard_entry:
|
|
74
190
|
gm = guard_entry.get("metrics", {}) or {}
|
|
@@ -96,9 +212,51 @@ def _extract_invariants(report: RunReport) -> dict[str, Any]:
|
|
|
96
212
|
if detail:
|
|
97
213
|
row["detail"] = detail
|
|
98
214
|
failures.append(row)
|
|
99
|
-
|
|
215
|
+
base_fatal = 0
|
|
216
|
+
base_warn = 0
|
|
217
|
+
baseline_failures: list[dict[str, Any]] = []
|
|
218
|
+
if baseline_guard_entry is not None:
|
|
219
|
+
baseline_pre, baseline_post = _extract_guard_checks(baseline_guard_entry)
|
|
220
|
+
current_pre, current_post = _extract_guard_checks(guard_entry)
|
|
221
|
+
baseline_snapshot = baseline_pre or baseline_post
|
|
222
|
+
current_snapshot = current_post or current_pre
|
|
223
|
+
if isinstance(baseline_snapshot, dict) and isinstance(
|
|
224
|
+
current_snapshot, dict
|
|
225
|
+
):
|
|
226
|
+
baseline_failures, base_fatal, base_warn = _compare_invariants(
|
|
227
|
+
baseline_snapshot, current_snapshot
|
|
228
|
+
)
|
|
229
|
+
for violation in baseline_failures:
|
|
230
|
+
check_name = violation.get("check")
|
|
231
|
+
if not check_name:
|
|
232
|
+
check_name = (
|
|
233
|
+
violation.get("module")
|
|
234
|
+
or violation.get("type")
|
|
235
|
+
or "invariant"
|
|
236
|
+
)
|
|
237
|
+
row = {
|
|
238
|
+
"check": str(check_name),
|
|
239
|
+
"type": str(violation.get("type") or "violation"),
|
|
240
|
+
"severity": str(violation.get("severity") or "warning"),
|
|
241
|
+
}
|
|
242
|
+
detail = {k: v for k, v in violation.items() if k not in row}
|
|
243
|
+
if detail:
|
|
244
|
+
detail.setdefault("source", "baseline_compare")
|
|
245
|
+
row["detail"] = detail
|
|
246
|
+
failures.append(row)
|
|
247
|
+
|
|
248
|
+
fatal_total = fatal_count + base_fatal
|
|
249
|
+
warn_total = warning_count + base_warn
|
|
250
|
+
try:
|
|
251
|
+
summary["fatal_violations"] = fatal_total
|
|
252
|
+
summary["warning_violations"] = warn_total
|
|
253
|
+
summary["violations_found"] = fatal_total + warn_total
|
|
254
|
+
except Exception:
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
if fatal_total > 0:
|
|
100
258
|
severity_status = "fail"
|
|
101
|
-
elif
|
|
259
|
+
elif warn_total > 0 or violations:
|
|
102
260
|
severity_status = "warn"
|
|
103
261
|
|
|
104
262
|
# If any error-severity entry exists among failures, escalate to fail
|
|
@@ -118,12 +276,16 @@ def _extract_invariants(report: RunReport) -> dict[str, Any]:
|
|
|
118
276
|
"warning_violations": len(failures),
|
|
119
277
|
}
|
|
120
278
|
|
|
279
|
+
details_out = invariants_data
|
|
280
|
+
if not details_out and guard_entry and isinstance(guard_entry.get("details"), dict):
|
|
281
|
+
details_out = guard_entry.get("details", {})
|
|
282
|
+
|
|
121
283
|
return {
|
|
122
284
|
"pre": "pass",
|
|
123
285
|
"post": status,
|
|
124
286
|
"status": status,
|
|
125
287
|
"summary": summary,
|
|
126
|
-
"details":
|
|
288
|
+
"details": details_out,
|
|
127
289
|
"failures": failures,
|
|
128
290
|
}
|
|
129
291
|
|
|
@@ -299,10 +461,10 @@ def _extract_spectral_analysis(
|
|
|
299
461
|
families: dict[str, dict[str, Any]] = {}
|
|
300
462
|
family_caps: dict[str, dict[str, float]] = {}
|
|
301
463
|
top_z_scores: dict[str, list[dict[str, Any]]] = {}
|
|
464
|
+
deadband_used: float | None = None
|
|
302
465
|
|
|
303
466
|
if isinstance(guard_metrics, dict):
|
|
304
467
|
# Resolve deadband from policy/metrics/defaults
|
|
305
|
-
deadband_used: float | None = None
|
|
306
468
|
try:
|
|
307
469
|
db_raw = guard_policy.get("deadband") if guard_policy else None
|
|
308
470
|
if db_raw is None and isinstance(guard_metrics, dict):
|
|
@@ -314,16 +476,12 @@ def _extract_spectral_analysis(
|
|
|
314
476
|
except Exception:
|
|
315
477
|
deadband_used = None
|
|
316
478
|
|
|
317
|
-
# Resolve sigma_quantile for summary
|
|
479
|
+
# Resolve sigma_quantile for summary
|
|
318
480
|
sigma_q_used: float | None = None
|
|
319
481
|
try:
|
|
320
482
|
pol_sq = None
|
|
321
483
|
if isinstance(guard_policy, dict):
|
|
322
|
-
pol_sq = (
|
|
323
|
-
guard_policy.get("sigma_quantile")
|
|
324
|
-
or guard_policy.get("contraction")
|
|
325
|
-
or guard_policy.get("kappa")
|
|
326
|
-
)
|
|
484
|
+
pol_sq = guard_policy.get("sigma_quantile")
|
|
327
485
|
if pol_sq is None:
|
|
328
486
|
pol_sq = default_sigma_quantile
|
|
329
487
|
if pol_sq is not None:
|
|
@@ -371,7 +529,7 @@ def _extract_spectral_analysis(
|
|
|
371
529
|
else {}
|
|
372
530
|
)
|
|
373
531
|
if not families:
|
|
374
|
-
# Prefer z-summary when available; accept
|
|
532
|
+
# Prefer z-summary when available; accept 'family_stats' too
|
|
375
533
|
fzs = guard_metrics.get("family_z_summary")
|
|
376
534
|
if not isinstance(fzs, dict) or not fzs:
|
|
377
535
|
fzs = guard_metrics.get("family_stats")
|
|
@@ -493,7 +651,7 @@ def _extract_spectral_analysis(
|
|
|
493
651
|
for source in sources:
|
|
494
652
|
if not isinstance(source, dict):
|
|
495
653
|
continue
|
|
496
|
-
candidate = source.get("multiple_testing")
|
|
654
|
+
candidate = source.get("multiple_testing")
|
|
497
655
|
if isinstance(candidate, dict) and candidate:
|
|
498
656
|
return candidate
|
|
499
657
|
return None
|
|
@@ -505,20 +663,13 @@ def _extract_spectral_analysis(
|
|
|
505
663
|
policy_out: dict[str, Any] | None = None
|
|
506
664
|
if isinstance(guard_policy, dict) and guard_policy:
|
|
507
665
|
policy_out = dict(guard_policy)
|
|
508
|
-
_promote_legacy_multiple_testing_key(policy_out)
|
|
509
666
|
if default_sigma_quantile is not None:
|
|
510
|
-
sq = (
|
|
511
|
-
policy_out.get("sigma_quantile")
|
|
512
|
-
or policy_out.get("contraction")
|
|
513
|
-
or policy_out.get("kappa")
|
|
514
|
-
)
|
|
667
|
+
sq = policy_out.get("sigma_quantile")
|
|
515
668
|
if sq is not None:
|
|
516
669
|
try:
|
|
517
670
|
policy_out["sigma_quantile"] = float(sq)
|
|
518
671
|
except Exception:
|
|
519
672
|
pass
|
|
520
|
-
policy_out.pop("contraction", None)
|
|
521
|
-
policy_out.pop("kappa", None)
|
|
522
673
|
if tier == "balanced":
|
|
523
674
|
policy_out["correction_enabled"] = False
|
|
524
675
|
policy_out["max_spectral_norm"] = None
|
|
@@ -532,7 +683,7 @@ def _extract_spectral_analysis(
|
|
|
532
683
|
"families": families,
|
|
533
684
|
"family_caps": family_caps,
|
|
534
685
|
}
|
|
535
|
-
#
|
|
686
|
+
# Surface a stable/capped status on the summary for schema parity.
|
|
536
687
|
try:
|
|
537
688
|
summary["status"] = "stable" if int(caps_applied) == 0 else "capped"
|
|
538
689
|
except Exception:
|
|
@@ -594,6 +745,40 @@ def _extract_spectral_analysis(
|
|
|
594
745
|
result["top_violations"] = top_violations
|
|
595
746
|
if family_quantiles:
|
|
596
747
|
result["family_z_quantiles"] = family_quantiles
|
|
748
|
+
result["evaluated"] = bool(spectral_guard)
|
|
749
|
+
|
|
750
|
+
measurement_contract = None
|
|
751
|
+
try:
|
|
752
|
+
mc = (
|
|
753
|
+
guard_metrics.get("measurement_contract")
|
|
754
|
+
if isinstance(guard_metrics, dict)
|
|
755
|
+
else None
|
|
756
|
+
)
|
|
757
|
+
if isinstance(mc, dict) and mc:
|
|
758
|
+
measurement_contract = mc
|
|
759
|
+
except Exception:
|
|
760
|
+
measurement_contract = None
|
|
761
|
+
baseline_contract = None
|
|
762
|
+
try:
|
|
763
|
+
bc = (
|
|
764
|
+
baseline_spectral.get("measurement_contract")
|
|
765
|
+
if isinstance(baseline_spectral, dict)
|
|
766
|
+
else None
|
|
767
|
+
)
|
|
768
|
+
if isinstance(bc, dict) and bc:
|
|
769
|
+
baseline_contract = bc
|
|
770
|
+
except Exception:
|
|
771
|
+
baseline_contract = None
|
|
772
|
+
mc_hash = _measurement_contract_digest(measurement_contract)
|
|
773
|
+
baseline_hash = _measurement_contract_digest(baseline_contract)
|
|
774
|
+
if measurement_contract is not None:
|
|
775
|
+
result["measurement_contract"] = measurement_contract
|
|
776
|
+
if mc_hash:
|
|
777
|
+
result["measurement_contract_hash"] = mc_hash
|
|
778
|
+
if baseline_hash:
|
|
779
|
+
result["baseline_measurement_contract_hash"] = baseline_hash
|
|
780
|
+
if mc_hash and baseline_hash:
|
|
781
|
+
result["measurement_contract_match"] = bool(mc_hash == baseline_hash)
|
|
597
782
|
result["caps_exceeded"] = bool(caps_exceeded)
|
|
598
783
|
try:
|
|
599
784
|
summary["caps_exceeded"] = bool(caps_exceeded)
|
|
@@ -624,24 +809,22 @@ def _extract_spectral_analysis(
|
|
|
624
809
|
def _extract_rmt_analysis(
|
|
625
810
|
report: RunReport, baseline: dict[str, Any]
|
|
626
811
|
) -> dict[str, Any]:
|
|
812
|
+
"""Extract RMT analysis using activation edge-risk ε-band semantics."""
|
|
627
813
|
tier = _resolve_policy_tier(report)
|
|
628
814
|
tier_policies = get_tier_policies()
|
|
629
815
|
tier_defaults = tier_policies.get(tier, tier_policies.get("balanced", {}))
|
|
816
|
+
|
|
630
817
|
default_epsilon_map = (
|
|
631
818
|
tier_defaults.get("rmt", {}).get("epsilon_by_family")
|
|
632
819
|
if isinstance(tier_defaults, dict)
|
|
633
820
|
else {}
|
|
634
821
|
)
|
|
635
|
-
if not default_epsilon_map and isinstance(tier_defaults, dict):
|
|
636
|
-
default_epsilon_map = (tier_defaults.get("rmt", {}) or {}).get("epsilon", {})
|
|
637
822
|
default_epsilon_map = {
|
|
638
823
|
str(family): float(value)
|
|
639
824
|
for family, value in (default_epsilon_map or {}).items()
|
|
640
|
-
if isinstance(value, int | float)
|
|
825
|
+
if isinstance(value, int | float) and math.isfinite(float(value))
|
|
641
826
|
}
|
|
642
827
|
|
|
643
|
-
outliers_guarded = 0
|
|
644
|
-
outliers_bare = 0
|
|
645
828
|
epsilon_default = 0.1
|
|
646
829
|
try:
|
|
647
830
|
eps_def = (
|
|
@@ -653,278 +836,168 @@ def _extract_rmt_analysis(
|
|
|
653
836
|
epsilon_default = float(eps_def)
|
|
654
837
|
except Exception:
|
|
655
838
|
pass
|
|
656
|
-
stable = True
|
|
657
|
-
explicit_stability = False
|
|
658
|
-
max_ratio = 0.0
|
|
659
|
-
max_deviation_ratio = 1.0
|
|
660
|
-
mean_deviation_ratio = 1.0
|
|
661
|
-
epsilon_map: dict[str, float] = {}
|
|
662
|
-
baseline_outliers_per_family: dict[str, int] = {}
|
|
663
|
-
outliers_per_family: dict[str, int] = {}
|
|
664
|
-
epsilon_violations: list[Any] = []
|
|
665
|
-
margin_used = None
|
|
666
|
-
deadband_used = None
|
|
667
|
-
policy_out: dict[str, Any] | None = None
|
|
668
839
|
|
|
840
|
+
baseline_rmt = baseline.get("rmt", {}) if isinstance(baseline, dict) else {}
|
|
841
|
+
baseline_edge_by_family: dict[str, float] = {}
|
|
842
|
+
baseline_contract = None
|
|
843
|
+
if isinstance(baseline_rmt, dict) and baseline_rmt:
|
|
844
|
+
bc = baseline_rmt.get("measurement_contract")
|
|
845
|
+
if isinstance(bc, dict) and bc:
|
|
846
|
+
baseline_contract = bc
|
|
847
|
+
base = baseline_rmt.get("edge_risk_by_family") or baseline_rmt.get(
|
|
848
|
+
"edge_risk_by_family_base"
|
|
849
|
+
)
|
|
850
|
+
if isinstance(base, dict):
|
|
851
|
+
for k, v in base.items():
|
|
852
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
853
|
+
baseline_edge_by_family[str(k)] = float(v)
|
|
854
|
+
|
|
855
|
+
rmt_guard = None
|
|
856
|
+
guard_metrics: dict[str, Any] = {}
|
|
857
|
+
guard_policy: dict[str, Any] = {}
|
|
669
858
|
for guard in report.get("guards", []) or []:
|
|
670
859
|
if str(guard.get("name", "")).lower() == "rmt":
|
|
860
|
+
rmt_guard = guard
|
|
671
861
|
guard_metrics = guard.get("metrics", {}) or {}
|
|
672
862
|
guard_policy = guard.get("policy", {}) or {}
|
|
673
|
-
if isinstance(guard_policy, dict) and guard_policy:
|
|
674
|
-
policy_out = dict(guard_policy)
|
|
675
|
-
if "epsilon_by_family" not in policy_out and isinstance(
|
|
676
|
-
policy_out.get("epsilon"), dict
|
|
677
|
-
):
|
|
678
|
-
policy_out["epsilon_by_family"] = dict(policy_out["epsilon"])
|
|
679
|
-
if isinstance(policy_out.get("margin"), int | float) and math.isfinite(
|
|
680
|
-
float(policy_out.get("margin"))
|
|
681
|
-
):
|
|
682
|
-
margin_used = float(policy_out.get("margin"))
|
|
683
|
-
if isinstance(
|
|
684
|
-
policy_out.get("deadband"), int | float
|
|
685
|
-
) and math.isfinite(float(policy_out.get("deadband"))):
|
|
686
|
-
deadband_used = float(policy_out.get("deadband"))
|
|
687
|
-
if isinstance(
|
|
688
|
-
policy_out.get("epsilon_default"), int | float
|
|
689
|
-
) and math.isfinite(float(policy_out.get("epsilon_default"))):
|
|
690
|
-
epsilon_default = float(policy_out.get("epsilon_default"))
|
|
691
|
-
if isinstance(
|
|
692
|
-
guard_metrics.get("epsilon_default"), int | float
|
|
693
|
-
) and math.isfinite(float(guard_metrics.get("epsilon_default"))):
|
|
694
|
-
epsilon_default = float(guard_metrics.get("epsilon_default"))
|
|
695
|
-
outliers_guarded = guard_metrics.get(
|
|
696
|
-
"rmt_outliers", guard_metrics.get("layers_flagged", outliers_guarded)
|
|
697
|
-
)
|
|
698
|
-
max_ratio = guard_metrics.get("max_ratio", 0.0)
|
|
699
|
-
epsilon_map = guard_metrics.get("epsilon_by_family", {}) or epsilon_map
|
|
700
|
-
if not epsilon_map and isinstance(guard_policy, dict):
|
|
701
|
-
eps_src = guard_policy.get("epsilon_by_family") or guard_policy.get(
|
|
702
|
-
"epsilon"
|
|
703
|
-
)
|
|
704
|
-
if isinstance(eps_src, dict):
|
|
705
|
-
try:
|
|
706
|
-
epsilon_map = {
|
|
707
|
-
str(k): float(v)
|
|
708
|
-
for k, v in eps_src.items()
|
|
709
|
-
if isinstance(v, int | float) and math.isfinite(float(v))
|
|
710
|
-
}
|
|
711
|
-
except Exception:
|
|
712
|
-
pass
|
|
713
|
-
baseline_outliers_per_family = (
|
|
714
|
-
guard_metrics.get("baseline_outliers_per_family", {})
|
|
715
|
-
or baseline_outliers_per_family
|
|
716
|
-
)
|
|
717
|
-
outliers_per_family = (
|
|
718
|
-
guard_metrics.get("outliers_per_family", {}) or outliers_per_family
|
|
719
|
-
)
|
|
720
|
-
epsilon_violations = guard_metrics.get(
|
|
721
|
-
"epsilon_violations", epsilon_violations
|
|
722
|
-
)
|
|
723
|
-
if outliers_per_family:
|
|
724
|
-
outliers_guarded = sum(
|
|
725
|
-
int(v)
|
|
726
|
-
for v in outliers_per_family.values()
|
|
727
|
-
if isinstance(v, int | float)
|
|
728
|
-
)
|
|
729
|
-
if baseline_outliers_per_family:
|
|
730
|
-
outliers_bare = sum(
|
|
731
|
-
int(v)
|
|
732
|
-
for v in baseline_outliers_per_family.values()
|
|
733
|
-
if isinstance(v, int | float)
|
|
734
|
-
)
|
|
735
|
-
flagged_rate = guard_metrics.get("flagged_rate", 0.0)
|
|
736
|
-
stable = flagged_rate <= 0.5
|
|
737
|
-
max_mp_ratio = guard_metrics.get("max_mp_ratio_final", 0.0)
|
|
738
|
-
mean_mp_ratio = guard_metrics.get("mean_mp_ratio_final", 0.0)
|
|
739
|
-
|
|
740
|
-
baseline_max = None
|
|
741
|
-
baseline_mean = None
|
|
742
|
-
baseline_rmt = baseline.get("rmt", {}) if isinstance(baseline, dict) else {}
|
|
743
|
-
if baseline_rmt:
|
|
744
|
-
baseline_max = baseline_rmt.get(
|
|
745
|
-
"max_mp_ratio", baseline_rmt.get("max_mp_ratio_final")
|
|
746
|
-
)
|
|
747
|
-
baseline_mean = baseline_rmt.get(
|
|
748
|
-
"mean_mp_ratio", baseline_rmt.get("mean_mp_ratio_final")
|
|
749
|
-
)
|
|
750
|
-
outliers_bare = baseline_rmt.get(
|
|
751
|
-
"outliers", baseline_rmt.get("rmt_outliers", 0)
|
|
752
|
-
)
|
|
753
|
-
if baseline_max is None:
|
|
754
|
-
baseline_metrics = (
|
|
755
|
-
baseline.get("metrics", {}) if isinstance(baseline, dict) else {}
|
|
756
|
-
)
|
|
757
|
-
if "rmt" in baseline_metrics:
|
|
758
|
-
baseline_rmt_metrics = baseline_metrics["rmt"]
|
|
759
|
-
baseline_max = baseline_rmt_metrics.get("max_mp_ratio_final")
|
|
760
|
-
baseline_mean = baseline_rmt_metrics.get("mean_mp_ratio_final")
|
|
761
|
-
if baseline_max is None and isinstance(guard.get("baseline_metrics"), dict):
|
|
762
|
-
gb = guard.get("baseline_metrics")
|
|
763
|
-
baseline_max = gb.get("max_mp_ratio")
|
|
764
|
-
baseline_mean = gb.get("mean_mp_ratio")
|
|
765
|
-
if baseline_max is not None and baseline_max > 0:
|
|
766
|
-
max_deviation_ratio = max_mp_ratio / baseline_max
|
|
767
|
-
else:
|
|
768
|
-
max_deviation_ratio = 1.0
|
|
769
|
-
if baseline_mean is not None and baseline_mean > 0:
|
|
770
|
-
mean_deviation_ratio = mean_mp_ratio / baseline_mean
|
|
771
|
-
else:
|
|
772
|
-
mean_deviation_ratio = 1.0
|
|
773
|
-
if isinstance(guard_metrics.get("stable"), bool):
|
|
774
|
-
stable = bool(guard_metrics.get("stable"))
|
|
775
|
-
explicit_stability = True
|
|
776
863
|
break
|
|
777
864
|
|
|
778
|
-
|
|
779
|
-
if
|
|
780
|
-
|
|
781
|
-
if isinstance(
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
outliers_guarded = 0
|
|
786
|
-
if isinstance(rmt_metrics.get("stable"), bool):
|
|
787
|
-
stable = bool(rmt_metrics.get("stable"))
|
|
788
|
-
explicit_stability = True
|
|
789
|
-
rmt_top = report.get("rmt", {}) if isinstance(report.get("rmt"), dict) else {}
|
|
790
|
-
if isinstance(rmt_top, dict):
|
|
791
|
-
fams = rmt_top.get("families", {})
|
|
792
|
-
if isinstance(fams, dict) and fams:
|
|
793
|
-
for fam, rec in fams.items():
|
|
794
|
-
if not isinstance(rec, dict):
|
|
795
|
-
continue
|
|
796
|
-
try:
|
|
797
|
-
outliers_per_family[str(fam)] = int(
|
|
798
|
-
rec.get("outliers_guarded", 0) or 0
|
|
799
|
-
)
|
|
800
|
-
baseline_outliers_per_family[str(fam)] = int(
|
|
801
|
-
rec.get("outliers_bare", 0) or 0
|
|
802
|
-
)
|
|
803
|
-
if rec.get("epsilon") is not None:
|
|
804
|
-
try:
|
|
805
|
-
epsilon_map[str(fam)] = float(rec.get("epsilon"))
|
|
806
|
-
except Exception:
|
|
807
|
-
pass
|
|
808
|
-
except Exception:
|
|
809
|
-
continue
|
|
810
|
-
try:
|
|
811
|
-
if outliers_bare == 0:
|
|
812
|
-
outliers_bare = int(rmt_top.get("outliers", 0) or 0)
|
|
813
|
-
except Exception:
|
|
814
|
-
pass
|
|
865
|
+
policy_out: dict[str, Any] | None = None
|
|
866
|
+
if isinstance(guard_policy, dict) and guard_policy:
|
|
867
|
+
policy_out = dict(guard_policy)
|
|
868
|
+
if isinstance(policy_out.get("epsilon_default"), int | float) and math.isfinite(
|
|
869
|
+
float(policy_out.get("epsilon_default"))
|
|
870
|
+
):
|
|
871
|
+
epsilon_default = float(policy_out.get("epsilon_default"))
|
|
815
872
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
stable = True
|
|
821
|
-
elif outliers_guarded <= outliers_bare:
|
|
822
|
-
stable = True
|
|
823
|
-
else:
|
|
824
|
-
stable = (outliers_guarded - outliers_bare) / max(
|
|
825
|
-
outliers_bare, 1
|
|
826
|
-
) <= 0.5
|
|
827
|
-
except Exception:
|
|
828
|
-
pass
|
|
873
|
+
if isinstance(guard_metrics.get("epsilon_default"), int | float) and math.isfinite(
|
|
874
|
+
float(guard_metrics.get("epsilon_default"))
|
|
875
|
+
):
|
|
876
|
+
epsilon_default = float(guard_metrics.get("epsilon_default"))
|
|
829
877
|
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
878
|
+
edge_base: dict[str, float] = {}
|
|
879
|
+
edge_cur: dict[str, float] = {}
|
|
880
|
+
if isinstance(guard_metrics, dict) and guard_metrics:
|
|
881
|
+
base = guard_metrics.get("edge_risk_by_family_base") or {}
|
|
882
|
+
cur = guard_metrics.get("edge_risk_by_family") or {}
|
|
883
|
+
if isinstance(base, dict):
|
|
884
|
+
for k, v in base.items():
|
|
885
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
886
|
+
edge_base[str(k)] = float(v)
|
|
887
|
+
if isinstance(cur, dict):
|
|
888
|
+
for k, v in cur.items():
|
|
889
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
890
|
+
edge_cur[str(k)] = float(v)
|
|
891
|
+
if not edge_base and baseline_edge_by_family:
|
|
892
|
+
edge_base = dict(baseline_edge_by_family)
|
|
893
|
+
|
|
894
|
+
epsilon_map: dict[str, float] = {}
|
|
895
|
+
eps_src = guard_metrics.get("epsilon_by_family") or {}
|
|
896
|
+
if not eps_src and isinstance(guard_policy, dict):
|
|
897
|
+
eps_src = guard_policy.get("epsilon_by_family") or {}
|
|
898
|
+
if isinstance(eps_src, dict):
|
|
899
|
+
for k, v in eps_src.items():
|
|
900
|
+
if isinstance(v, int | float) and math.isfinite(float(v)):
|
|
901
|
+
epsilon_map[str(k)] = float(v)
|
|
902
|
+
|
|
903
|
+
epsilon_violations = guard_metrics.get("epsilon_violations") or []
|
|
904
|
+
if not (isinstance(epsilon_violations, list) and epsilon_violations):
|
|
905
|
+
epsilon_violations = []
|
|
906
|
+
families = set(edge_cur) | set(edge_base)
|
|
907
|
+
for family in families:
|
|
908
|
+
base = float(edge_base.get(family, 0.0) or 0.0)
|
|
909
|
+
cur = float(edge_cur.get(family, 0.0) or 0.0)
|
|
910
|
+
if base <= 0.0:
|
|
911
|
+
continue
|
|
912
|
+
eps = float(
|
|
913
|
+
epsilon_map.get(
|
|
914
|
+
family, default_epsilon_map.get(family, epsilon_default)
|
|
847
915
|
)
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
916
|
+
)
|
|
917
|
+
allowed = (1.0 + eps) * base
|
|
918
|
+
if cur > allowed:
|
|
919
|
+
delta = (cur / base) - 1.0 if base > 0 else float("inf")
|
|
920
|
+
epsilon_violations.append(
|
|
921
|
+
{
|
|
922
|
+
"family": family,
|
|
923
|
+
"edge_base": base,
|
|
924
|
+
"edge_cur": cur,
|
|
925
|
+
"delta": float(delta),
|
|
926
|
+
"allowed": allowed,
|
|
927
|
+
"epsilon": eps,
|
|
928
|
+
}
|
|
860
929
|
)
|
|
861
|
-
except Exception:
|
|
862
|
-
pass
|
|
863
|
-
|
|
864
|
-
# Compute epsilon scalar (fallback) and detailed family breakdown
|
|
865
|
-
if epsilon_map:
|
|
866
|
-
epsilon_scalar = max(float(v) for v in epsilon_map.values())
|
|
867
|
-
elif default_epsilon_map:
|
|
868
|
-
try:
|
|
869
|
-
epsilon_scalar = max(float(v) for v in default_epsilon_map.values())
|
|
870
|
-
except Exception:
|
|
871
|
-
epsilon_scalar = float(epsilon_default)
|
|
872
|
-
else:
|
|
873
|
-
epsilon_scalar = float(epsilon_default)
|
|
874
|
-
try:
|
|
875
|
-
epsilon_scalar = round(float(epsilon_scalar), 3)
|
|
876
|
-
except Exception:
|
|
877
|
-
epsilon_scalar = float(epsilon_default)
|
|
878
930
|
|
|
879
|
-
|
|
880
|
-
try:
|
|
881
|
-
return int(v)
|
|
882
|
-
except (TypeError, ValueError):
|
|
883
|
-
return 0
|
|
931
|
+
stable = bool(guard_metrics.get("stable", not epsilon_violations))
|
|
884
932
|
|
|
885
|
-
|
|
886
|
-
set(
|
|
933
|
+
families_all = sorted(
|
|
934
|
+
set(edge_base) | set(edge_cur) | set(epsilon_map) | set(default_epsilon_map)
|
|
887
935
|
)
|
|
888
|
-
family_breakdown = {
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
936
|
+
family_breakdown: dict[str, dict[str, Any]] = {}
|
|
937
|
+
ratios: list[float] = []
|
|
938
|
+
deltas: list[float] = []
|
|
939
|
+
for family in families_all:
|
|
940
|
+
base = float(edge_base.get(family, 0.0) or 0.0)
|
|
941
|
+
cur = float(edge_cur.get(family, 0.0) or 0.0)
|
|
942
|
+
eps = float(
|
|
943
|
+
epsilon_map.get(family, default_epsilon_map.get(family, epsilon_default))
|
|
944
|
+
)
|
|
945
|
+
allowed = (1.0 + eps) * base if base > 0.0 else None
|
|
946
|
+
ratio = (cur / base) if base > 0.0 else None
|
|
947
|
+
delta = ((cur / base) - 1.0) if base > 0.0 else None
|
|
948
|
+
if isinstance(ratio, float) and math.isfinite(ratio):
|
|
949
|
+
ratios.append(ratio)
|
|
950
|
+
if isinstance(delta, float) and math.isfinite(delta):
|
|
951
|
+
deltas.append(delta)
|
|
952
|
+
family_breakdown[family] = {
|
|
953
|
+
"edge_base": base,
|
|
954
|
+
"edge_cur": cur,
|
|
955
|
+
"epsilon": eps,
|
|
956
|
+
"allowed": allowed,
|
|
957
|
+
"ratio": ratio,
|
|
958
|
+
"delta": delta,
|
|
893
959
|
}
|
|
894
|
-
for family in sorted(families)
|
|
895
|
-
}
|
|
896
960
|
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
961
|
+
measurement_contract = None
|
|
962
|
+
try:
|
|
963
|
+
mc = (
|
|
964
|
+
guard_metrics.get("measurement_contract")
|
|
965
|
+
if isinstance(guard_metrics, dict)
|
|
966
|
+
else None
|
|
967
|
+
)
|
|
968
|
+
if isinstance(mc, dict) and mc:
|
|
969
|
+
measurement_contract = mc
|
|
970
|
+
except Exception:
|
|
971
|
+
measurement_contract = None
|
|
972
|
+
|
|
973
|
+
mc_hash = _measurement_contract_digest(measurement_contract)
|
|
974
|
+
baseline_hash = _measurement_contract_digest(baseline_contract)
|
|
903
975
|
|
|
904
|
-
result = {
|
|
905
|
-
"
|
|
906
|
-
"
|
|
907
|
-
"
|
|
976
|
+
result: dict[str, Any] = {
|
|
977
|
+
"tier": tier,
|
|
978
|
+
"edge_risk_by_family_base": dict(edge_base),
|
|
979
|
+
"edge_risk_by_family": dict(edge_cur),
|
|
908
980
|
"epsilon_default": float(epsilon_default),
|
|
909
|
-
"epsilon_by_family": epsilon_map,
|
|
910
|
-
"
|
|
911
|
-
"baseline_outliers_per_family": baseline_outliers_per_family,
|
|
912
|
-
"delta_per_family": delta_per_family,
|
|
913
|
-
"delta_total": delta_total,
|
|
914
|
-
"epsilon_violations": epsilon_violations,
|
|
981
|
+
"epsilon_by_family": dict(epsilon_map),
|
|
982
|
+
"epsilon_violations": list(epsilon_violations),
|
|
915
983
|
"stable": stable,
|
|
916
984
|
"status": "stable" if stable else "unstable",
|
|
917
|
-
"
|
|
918
|
-
"
|
|
919
|
-
"
|
|
985
|
+
"max_edge_ratio": max(ratios) if ratios else None,
|
|
986
|
+
"max_edge_delta": max(deltas) if deltas else None,
|
|
987
|
+
"mean_edge_delta": (sum(deltas) / len(deltas)) if deltas else None,
|
|
920
988
|
"families": family_breakdown,
|
|
989
|
+
"evaluated": bool(rmt_guard),
|
|
921
990
|
}
|
|
922
|
-
if margin_used is not None:
|
|
923
|
-
result["margin"] = float(margin_used)
|
|
924
|
-
if deadband_used is not None:
|
|
925
|
-
result["deadband"] = float(deadband_used)
|
|
926
991
|
if policy_out:
|
|
927
992
|
result["policy"] = policy_out
|
|
993
|
+
if measurement_contract is not None:
|
|
994
|
+
result["measurement_contract"] = measurement_contract
|
|
995
|
+
if mc_hash:
|
|
996
|
+
result["measurement_contract_hash"] = mc_hash
|
|
997
|
+
if baseline_hash:
|
|
998
|
+
result["baseline_measurement_contract_hash"] = baseline_hash
|
|
999
|
+
if mc_hash and baseline_hash:
|
|
1000
|
+
result["measurement_contract_match"] = bool(mc_hash == baseline_hash)
|
|
928
1001
|
return result
|
|
929
1002
|
|
|
930
1003
|
|