invarlock 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +1 -1
- invarlock/_data/runtime/tiers.yaml +61 -0
- invarlock/adapters/hf_loading.py +97 -0
- invarlock/calibration/__init__.py +6 -0
- invarlock/calibration/spectral_null.py +301 -0
- invarlock/calibration/variance_ve.py +154 -0
- invarlock/cli/app.py +15 -0
- invarlock/cli/commands/calibrate.py +576 -0
- invarlock/cli/commands/doctor.py +9 -3
- invarlock/cli/commands/explain_gates.py +53 -9
- invarlock/cli/commands/plugins.py +12 -2
- invarlock/cli/commands/run.py +181 -79
- invarlock/cli/commands/verify.py +40 -0
- invarlock/cli/config.py +11 -1
- invarlock/cli/determinism.py +252 -0
- invarlock/core/auto_tuning.py +215 -17
- invarlock/core/bootstrap.py +137 -5
- invarlock/core/registry.py +9 -4
- invarlock/core/runner.py +305 -35
- invarlock/eval/bench.py +467 -141
- invarlock/eval/bench_regression.py +12 -0
- invarlock/eval/bootstrap.py +3 -1
- invarlock/eval/data.py +29 -7
- invarlock/eval/primary_metric.py +20 -5
- invarlock/guards/rmt.py +536 -46
- invarlock/guards/spectral.py +217 -10
- invarlock/guards/variance.py +124 -42
- invarlock/reporting/certificate.py +476 -45
- invarlock/reporting/certificate_schema.py +4 -1
- invarlock/reporting/guards_analysis.py +108 -10
- invarlock/reporting/normalizer.py +24 -1
- invarlock/reporting/policy_utils.py +97 -15
- invarlock/reporting/primary_metric_utils.py +17 -0
- invarlock/reporting/validate.py +10 -10
- {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/METADATA +12 -10
- {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/RECORD +40 -33
- {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/WHEEL +0 -0
- {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/top_level.txt +0 -0
|
@@ -29,6 +29,7 @@ CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
|
|
|
29
29
|
"plugins",
|
|
30
30
|
"meta",
|
|
31
31
|
"dataset",
|
|
32
|
+
"primary_metric",
|
|
32
33
|
],
|
|
33
34
|
"properties": {
|
|
34
35
|
"schema_version": {"const": CERTIFICATE_SCHEMA_VERSION},
|
|
@@ -64,11 +65,12 @@ CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
|
|
|
64
65
|
"seq_len": {"type": "integer", "minimum": 1},
|
|
65
66
|
"windows": {
|
|
66
67
|
"type": "object",
|
|
67
|
-
"required": ["preview", "final"],
|
|
68
|
+
"required": ["preview", "final", "stats"],
|
|
68
69
|
"properties": {
|
|
69
70
|
"preview": {"type": "integer", "minimum": 0},
|
|
70
71
|
"final": {"type": "integer", "minimum": 0},
|
|
71
72
|
"seed": {"type": "integer"},
|
|
73
|
+
"stats": {"type": "object"},
|
|
72
74
|
},
|
|
73
75
|
},
|
|
74
76
|
},
|
|
@@ -77,6 +79,7 @@ CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
|
|
|
77
79
|
# ppl_* block removed from required schema; may appear for ppl-like tasks but is optional
|
|
78
80
|
"primary_metric": {
|
|
79
81
|
"type": "object",
|
|
82
|
+
"required": ["kind"],
|
|
80
83
|
"properties": {
|
|
81
84
|
"kind": {"type": "string"},
|
|
82
85
|
"unit": {"type": "string"},
|
|
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
import math
|
|
5
5
|
from typing import Any, no_type_check
|
|
6
6
|
|
|
7
|
-
from invarlock.core.auto_tuning import
|
|
7
|
+
from invarlock.core.auto_tuning import get_tier_policies
|
|
8
8
|
|
|
9
9
|
from .policy_utils import _promote_legacy_multiple_testing_key, _resolve_policy_tier
|
|
10
10
|
from .report_types import RunReport
|
|
@@ -133,7 +133,8 @@ def _extract_spectral_analysis(
|
|
|
133
133
|
report: RunReport, baseline: dict[str, Any]
|
|
134
134
|
) -> dict[str, Any]:
|
|
135
135
|
tier = _resolve_policy_tier(report)
|
|
136
|
-
|
|
136
|
+
tier_policies = get_tier_policies()
|
|
137
|
+
tier_defaults = tier_policies.get(tier, tier_policies.get("balanced", {}))
|
|
137
138
|
spectral_defaults = tier_defaults.get("spectral", {}) if tier_defaults else {}
|
|
138
139
|
default_sigma_quantile = spectral_defaults.get("sigma_quantile", 0.95)
|
|
139
140
|
default_deadband = spectral_defaults.get("deadband", 0.1)
|
|
@@ -166,9 +167,15 @@ def _extract_spectral_analysis(
|
|
|
166
167
|
caps_exceeded = (
|
|
167
168
|
bool(guard_metrics.get("caps_exceeded", False)) if guard_metrics else False
|
|
168
169
|
)
|
|
169
|
-
max_caps =
|
|
170
|
+
max_caps = guard_metrics.get("max_caps") if guard_metrics else None
|
|
171
|
+
if max_caps is None and guard_policy:
|
|
172
|
+
max_caps = guard_policy.get("max_caps")
|
|
170
173
|
if max_caps is None:
|
|
171
174
|
max_caps = default_max_caps
|
|
175
|
+
try:
|
|
176
|
+
max_caps = int(max_caps)
|
|
177
|
+
except Exception:
|
|
178
|
+
max_caps = int(default_max_caps)
|
|
172
179
|
|
|
173
180
|
try:
|
|
174
181
|
max_spectral_norm = float(
|
|
@@ -618,10 +625,15 @@ def _extract_rmt_analysis(
|
|
|
618
625
|
report: RunReport, baseline: dict[str, Any]
|
|
619
626
|
) -> dict[str, Any]:
|
|
620
627
|
tier = _resolve_policy_tier(report)
|
|
621
|
-
|
|
628
|
+
tier_policies = get_tier_policies()
|
|
629
|
+
tier_defaults = tier_policies.get(tier, tier_policies.get("balanced", {}))
|
|
622
630
|
default_epsilon_map = (
|
|
623
|
-
tier_defaults.get("rmt", {}).get("
|
|
631
|
+
tier_defaults.get("rmt", {}).get("epsilon_by_family")
|
|
632
|
+
if isinstance(tier_defaults, dict)
|
|
633
|
+
else {}
|
|
624
634
|
)
|
|
635
|
+
if not default_epsilon_map and isinstance(tier_defaults, dict):
|
|
636
|
+
default_epsilon_map = (tier_defaults.get("rmt", {}) or {}).get("epsilon", {})
|
|
625
637
|
default_epsilon_map = {
|
|
626
638
|
str(family): float(value)
|
|
627
639
|
for family, value in (default_epsilon_map or {}).items()
|
|
@@ -631,6 +643,16 @@ def _extract_rmt_analysis(
|
|
|
631
643
|
outliers_guarded = 0
|
|
632
644
|
outliers_bare = 0
|
|
633
645
|
epsilon_default = 0.1
|
|
646
|
+
try:
|
|
647
|
+
eps_def = (
|
|
648
|
+
tier_defaults.get("rmt", {}).get("epsilon_default")
|
|
649
|
+
if isinstance(tier_defaults, dict)
|
|
650
|
+
else None
|
|
651
|
+
)
|
|
652
|
+
if isinstance(eps_def, int | float) and math.isfinite(float(eps_def)):
|
|
653
|
+
epsilon_default = float(eps_def)
|
|
654
|
+
except Exception:
|
|
655
|
+
pass
|
|
634
656
|
stable = True
|
|
635
657
|
explicit_stability = False
|
|
636
658
|
max_ratio = 0.0
|
|
@@ -640,19 +662,54 @@ def _extract_rmt_analysis(
|
|
|
640
662
|
baseline_outliers_per_family: dict[str, int] = {}
|
|
641
663
|
outliers_per_family: dict[str, int] = {}
|
|
642
664
|
epsilon_violations: list[Any] = []
|
|
665
|
+
margin_used = None
|
|
666
|
+
deadband_used = None
|
|
667
|
+
policy_out: dict[str, Any] | None = None
|
|
643
668
|
|
|
644
669
|
for guard in report.get("guards", []) or []:
|
|
645
670
|
if str(guard.get("name", "")).lower() == "rmt":
|
|
646
671
|
guard_metrics = guard.get("metrics", {}) or {}
|
|
647
672
|
guard_policy = guard.get("policy", {}) or {}
|
|
673
|
+
if isinstance(guard_policy, dict) and guard_policy:
|
|
674
|
+
policy_out = dict(guard_policy)
|
|
675
|
+
if "epsilon_by_family" not in policy_out and isinstance(
|
|
676
|
+
policy_out.get("epsilon"), dict
|
|
677
|
+
):
|
|
678
|
+
policy_out["epsilon_by_family"] = dict(policy_out["epsilon"])
|
|
679
|
+
if isinstance(policy_out.get("margin"), int | float) and math.isfinite(
|
|
680
|
+
float(policy_out.get("margin"))
|
|
681
|
+
):
|
|
682
|
+
margin_used = float(policy_out.get("margin"))
|
|
683
|
+
if isinstance(
|
|
684
|
+
policy_out.get("deadband"), int | float
|
|
685
|
+
) and math.isfinite(float(policy_out.get("deadband"))):
|
|
686
|
+
deadband_used = float(policy_out.get("deadband"))
|
|
687
|
+
if isinstance(
|
|
688
|
+
policy_out.get("epsilon_default"), int | float
|
|
689
|
+
) and math.isfinite(float(policy_out.get("epsilon_default"))):
|
|
690
|
+
epsilon_default = float(policy_out.get("epsilon_default"))
|
|
691
|
+
if isinstance(
|
|
692
|
+
guard_metrics.get("epsilon_default"), int | float
|
|
693
|
+
) and math.isfinite(float(guard_metrics.get("epsilon_default"))):
|
|
694
|
+
epsilon_default = float(guard_metrics.get("epsilon_default"))
|
|
648
695
|
outliers_guarded = guard_metrics.get(
|
|
649
696
|
"rmt_outliers", guard_metrics.get("layers_flagged", outliers_guarded)
|
|
650
697
|
)
|
|
651
698
|
max_ratio = guard_metrics.get("max_ratio", 0.0)
|
|
652
|
-
epsilon_default = guard_policy.get(
|
|
653
|
-
"deadband", guard_metrics.get("deadband_used", epsilon_default)
|
|
654
|
-
)
|
|
655
699
|
epsilon_map = guard_metrics.get("epsilon_by_family", {}) or epsilon_map
|
|
700
|
+
if not epsilon_map and isinstance(guard_policy, dict):
|
|
701
|
+
eps_src = guard_policy.get("epsilon_by_family") or guard_policy.get(
|
|
702
|
+
"epsilon"
|
|
703
|
+
)
|
|
704
|
+
if isinstance(eps_src, dict):
|
|
705
|
+
try:
|
|
706
|
+
epsilon_map = {
|
|
707
|
+
str(k): float(v)
|
|
708
|
+
for k, v in eps_src.items()
|
|
709
|
+
if isinstance(v, int | float) and math.isfinite(float(v))
|
|
710
|
+
}
|
|
711
|
+
except Exception:
|
|
712
|
+
pass
|
|
656
713
|
baseline_outliers_per_family = (
|
|
657
714
|
guard_metrics.get("baseline_outliers_per_family", {})
|
|
658
715
|
or baseline_outliers_per_family
|
|
@@ -844,7 +901,7 @@ def _extract_rmt_analysis(
|
|
|
844
901
|
}
|
|
845
902
|
delta_per_family = {str(k): _to_int(v) for k, v in delta_per_family.items()}
|
|
846
903
|
|
|
847
|
-
|
|
904
|
+
result = {
|
|
848
905
|
"outliers_bare": outliers_bare,
|
|
849
906
|
"outliers_guarded": outliers_guarded,
|
|
850
907
|
"epsilon": epsilon_scalar,
|
|
@@ -862,6 +919,13 @@ def _extract_rmt_analysis(
|
|
|
862
919
|
"mean_deviation_ratio": mean_deviation_ratio,
|
|
863
920
|
"families": family_breakdown,
|
|
864
921
|
}
|
|
922
|
+
if margin_used is not None:
|
|
923
|
+
result["margin"] = float(margin_used)
|
|
924
|
+
if deadband_used is not None:
|
|
925
|
+
result["deadband"] = float(deadband_used)
|
|
926
|
+
if policy_out:
|
|
927
|
+
result["policy"] = policy_out
|
|
928
|
+
return result
|
|
865
929
|
|
|
866
930
|
|
|
867
931
|
@no_type_check
|
|
@@ -873,10 +937,14 @@ def _extract_variance_analysis(report: RunReport) -> dict[str, Any]:
|
|
|
873
937
|
ratio_ci = None
|
|
874
938
|
calibration = {}
|
|
875
939
|
guard_metrics: dict[str, Any] = {}
|
|
940
|
+
guard_policy: dict[str, Any] | None = None
|
|
876
941
|
for guard in report.get("guards", []) or []:
|
|
877
942
|
if "variance" in str(guard.get("name", "")).lower():
|
|
878
943
|
metrics = guard.get("metrics", {}) or {}
|
|
879
944
|
guard_metrics = metrics
|
|
945
|
+
gp = guard.get("policy", {}) or {}
|
|
946
|
+
if isinstance(gp, dict) and gp:
|
|
947
|
+
guard_policy = dict(gp)
|
|
880
948
|
ve_enabled = metrics.get("ve_enabled", bool(metrics))
|
|
881
949
|
gain = metrics.get("ab_gain", metrics.get("gain", None))
|
|
882
950
|
ppl_no_ve = metrics.get("ppl_no_ve", None)
|
|
@@ -932,11 +1000,41 @@ def _extract_variance_analysis(report: RunReport) -> dict[str, Any]:
|
|
|
932
1000
|
if guard_metrics.get("ab_windows_used") is not None:
|
|
933
1001
|
ab_section["windows_used"] = guard_metrics["ab_windows_used"]
|
|
934
1002
|
if guard_metrics.get("ab_provenance"):
|
|
935
|
-
|
|
1003
|
+
prov = guard_metrics["ab_provenance"]
|
|
1004
|
+
if isinstance(prov, dict):
|
|
1005
|
+
prov_out = dict(prov)
|
|
1006
|
+
|
|
1007
|
+
# Normalize a top-level `window_ids` list for docs + auditability.
|
|
1008
|
+
if "window_ids" not in prov_out:
|
|
1009
|
+
window_ids: set[int] = set()
|
|
1010
|
+
|
|
1011
|
+
def _collect(node: Any) -> None:
|
|
1012
|
+
if isinstance(node, dict):
|
|
1013
|
+
ids = node.get("window_ids")
|
|
1014
|
+
if isinstance(ids, list):
|
|
1015
|
+
for wid in ids:
|
|
1016
|
+
if isinstance(wid, int | float):
|
|
1017
|
+
window_ids.add(int(wid))
|
|
1018
|
+
for v in node.values():
|
|
1019
|
+
_collect(v)
|
|
1020
|
+
return
|
|
1021
|
+
if isinstance(node, list):
|
|
1022
|
+
for v in node:
|
|
1023
|
+
_collect(v)
|
|
1024
|
+
|
|
1025
|
+
_collect(prov_out)
|
|
1026
|
+
if window_ids:
|
|
1027
|
+
prov_out["window_ids"] = sorted(window_ids)
|
|
1028
|
+
|
|
1029
|
+
ab_section["provenance"] = prov_out
|
|
1030
|
+
else:
|
|
1031
|
+
ab_section["provenance"] = prov
|
|
936
1032
|
if guard_metrics.get("ab_point_estimates"):
|
|
937
1033
|
ab_section["point_estimates"] = guard_metrics["ab_point_estimates"]
|
|
938
1034
|
if ab_section:
|
|
939
1035
|
result["ab_test"] = ab_section
|
|
1036
|
+
if guard_policy:
|
|
1037
|
+
result["policy"] = guard_policy
|
|
940
1038
|
return result
|
|
941
1039
|
|
|
942
1040
|
|
|
@@ -40,15 +40,34 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
|
|
|
40
40
|
# ---- meta ----
|
|
41
41
|
meta_in = _as_mapping(src.get("meta"))
|
|
42
42
|
ts = _str(meta_in.get("ts") or datetime.now().isoformat())
|
|
43
|
+
try:
|
|
44
|
+
seed_value = int(meta_in.get("seed", 42))
|
|
45
|
+
except Exception:
|
|
46
|
+
seed_value = 42
|
|
43
47
|
meta_dict: dict[str, Any] = {
|
|
44
48
|
"model_id": _str(meta_in.get("model_id")),
|
|
45
49
|
"adapter": _str(meta_in.get("adapter")),
|
|
46
50
|
"commit": _str(meta_in.get("commit")),
|
|
47
|
-
"seed":
|
|
51
|
+
"seed": seed_value,
|
|
48
52
|
"device": _str(meta_in.get("device", "cpu")),
|
|
49
53
|
"ts": ts,
|
|
50
54
|
"auto": meta_in.get("auto") if isinstance(meta_in.get("auto"), dict) else None,
|
|
51
55
|
}
|
|
56
|
+
# Preserve additional provenance knobs used by certificate/digests.
|
|
57
|
+
for key in (
|
|
58
|
+
"policy_overrides",
|
|
59
|
+
"overrides",
|
|
60
|
+
"plugins",
|
|
61
|
+
"config",
|
|
62
|
+
"seeds",
|
|
63
|
+
"determinism",
|
|
64
|
+
"env_flags",
|
|
65
|
+
"cuda_flags",
|
|
66
|
+
"tokenizer_hash",
|
|
67
|
+
"model_profile",
|
|
68
|
+
):
|
|
69
|
+
if key in meta_in:
|
|
70
|
+
meta_dict[key] = meta_in.get(key)
|
|
52
71
|
meta = cast(MetaData, meta_dict)
|
|
53
72
|
|
|
54
73
|
# ---- data ----
|
|
@@ -164,6 +183,7 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
|
|
|
164
183
|
"spectral",
|
|
165
184
|
"rmt",
|
|
166
185
|
"invariants",
|
|
186
|
+
"logloss_delta_ci",
|
|
167
187
|
"bootstrap",
|
|
168
188
|
"reduction",
|
|
169
189
|
"moe",
|
|
@@ -174,6 +194,9 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
|
|
|
174
194
|
"window_pairing_reason",
|
|
175
195
|
"window_pairing_preview",
|
|
176
196
|
"window_pairing_final",
|
|
197
|
+
"window_plan",
|
|
198
|
+
"window_capacity",
|
|
199
|
+
"stats",
|
|
177
200
|
"total_tokens",
|
|
178
201
|
"preview_total_tokens",
|
|
179
202
|
"final_total_tokens",
|
|
@@ -6,7 +6,7 @@ import hashlib
|
|
|
6
6
|
import json
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from invarlock.core.auto_tuning import
|
|
9
|
+
from invarlock.core.auto_tuning import get_tier_policies, resolve_tier_policies
|
|
10
10
|
|
|
11
11
|
from .report_types import RunReport
|
|
12
12
|
|
|
@@ -38,17 +38,40 @@ def _compute_thresholds_payload(
|
|
|
38
38
|
from .certificate import TIER_RATIO_LIMITS # local to avoid cycles
|
|
39
39
|
|
|
40
40
|
tier_lc = (tier or "balanced").lower()
|
|
41
|
-
ratio_limit_base = float(TIER_RATIO_LIMITS.get(tier_lc, 1.10))
|
|
42
|
-
tier_policy = TIER_POLICIES.get(tier_lc, {}) if isinstance(tier_lc, str) else {}
|
|
43
41
|
metrics_policy = (
|
|
44
|
-
|
|
45
|
-
)
|
|
46
|
-
pm_policy = (
|
|
47
|
-
metrics_policy.get("pm_ratio", {}) if isinstance(metrics_policy, dict) else {}
|
|
48
|
-
)
|
|
49
|
-
acc_policy = (
|
|
50
|
-
metrics_policy.get("accuracy", {}) if isinstance(metrics_policy, dict) else {}
|
|
42
|
+
resolved_policy.get("metrics", {}) if isinstance(resolved_policy, dict) else {}
|
|
51
43
|
)
|
|
44
|
+
if not isinstance(metrics_policy, dict):
|
|
45
|
+
metrics_policy = {}
|
|
46
|
+
|
|
47
|
+
pm_policy = metrics_policy.get("pm_ratio", {})
|
|
48
|
+
if not isinstance(pm_policy, dict):
|
|
49
|
+
pm_policy = {}
|
|
50
|
+
|
|
51
|
+
acc_policy = metrics_policy.get("accuracy", {})
|
|
52
|
+
if not isinstance(acc_policy, dict):
|
|
53
|
+
acc_policy = {}
|
|
54
|
+
|
|
55
|
+
ratio_limit_base = pm_policy.get("ratio_limit_base")
|
|
56
|
+
try:
|
|
57
|
+
if ratio_limit_base is not None:
|
|
58
|
+
ratio_limit_base = float(ratio_limit_base)
|
|
59
|
+
except Exception:
|
|
60
|
+
ratio_limit_base = None
|
|
61
|
+
if ratio_limit_base is None:
|
|
62
|
+
tier_defaults = get_tier_policies().get(tier_lc, {})
|
|
63
|
+
fallback_pm = (
|
|
64
|
+
(tier_defaults.get("metrics") or {}).get("pm_ratio")
|
|
65
|
+
if isinstance(tier_defaults, dict)
|
|
66
|
+
else {}
|
|
67
|
+
)
|
|
68
|
+
ratio_limit_base = float(
|
|
69
|
+
(fallback_pm or {}).get(
|
|
70
|
+
"ratio_limit_base", TIER_RATIO_LIMITS.get(tier_lc, 1.10)
|
|
71
|
+
)
|
|
72
|
+
if isinstance(fallback_pm, dict)
|
|
73
|
+
else TIER_RATIO_LIMITS.get(tier_lc, 1.10)
|
|
74
|
+
)
|
|
52
75
|
variance_policy = (
|
|
53
76
|
resolved_policy.get("variance", {}) if isinstance(resolved_policy, dict) else {}
|
|
54
77
|
)
|
|
@@ -154,11 +177,21 @@ def _format_epsilon_map(epsilon_map: Any) -> dict[str, float]:
|
|
|
154
177
|
|
|
155
178
|
|
|
156
179
|
def _build_resolved_policies(
|
|
157
|
-
tier: str,
|
|
180
|
+
tier: str,
|
|
181
|
+
spectral: dict[str, Any],
|
|
182
|
+
rmt: dict[str, Any],
|
|
183
|
+
variance: dict[str, Any],
|
|
184
|
+
*,
|
|
185
|
+
profile: str | None = None,
|
|
186
|
+
explicit_overrides: dict[str, dict[str, Any]] | None = None,
|
|
158
187
|
) -> dict[str, Any]:
|
|
159
188
|
"""Merge tier defaults with observed policies to surface the resolved configuration."""
|
|
160
189
|
tier_key = (tier or "balanced").lower()
|
|
161
|
-
|
|
190
|
+
if tier_key == "none":
|
|
191
|
+
tier_key = "balanced"
|
|
192
|
+
base = resolve_tier_policies(
|
|
193
|
+
tier_key, edit_name=None, explicit_overrides=explicit_overrides, profile=profile
|
|
194
|
+
)
|
|
162
195
|
|
|
163
196
|
resolved: dict[str, Any] = {}
|
|
164
197
|
|
|
@@ -280,6 +313,37 @@ def _build_resolved_policies(
|
|
|
280
313
|
variance_resolved: dict[str, Any] = {}
|
|
281
314
|
if isinstance(base_variance, dict):
|
|
282
315
|
variance_resolved.update(base_variance)
|
|
316
|
+
|
|
317
|
+
observed_variance_policy = (
|
|
318
|
+
variance.get("policy") if isinstance(variance, dict) else None
|
|
319
|
+
)
|
|
320
|
+
if isinstance(observed_variance_policy, dict) and observed_variance_policy:
|
|
321
|
+
for key in (
|
|
322
|
+
"deadband",
|
|
323
|
+
"min_abs_adjust",
|
|
324
|
+
"max_scale_step",
|
|
325
|
+
"min_effect_lognll",
|
|
326
|
+
"predictive_one_sided",
|
|
327
|
+
"topk_backstop",
|
|
328
|
+
"max_adjusted_modules",
|
|
329
|
+
"tap",
|
|
330
|
+
"predictive_gate",
|
|
331
|
+
"scope",
|
|
332
|
+
"clamp",
|
|
333
|
+
"min_gain",
|
|
334
|
+
"min_rel_gain",
|
|
335
|
+
"max_calib",
|
|
336
|
+
"seed",
|
|
337
|
+
"mode",
|
|
338
|
+
"alpha",
|
|
339
|
+
"tie_breaker_deadband",
|
|
340
|
+
"calibration",
|
|
341
|
+
):
|
|
342
|
+
if (
|
|
343
|
+
key in observed_variance_policy
|
|
344
|
+
and observed_variance_policy.get(key) is not None
|
|
345
|
+
):
|
|
346
|
+
variance_resolved[key] = observed_variance_policy.get(key)
|
|
283
347
|
predictive_gate = variance.get("predictive_gate", {})
|
|
284
348
|
predictive_one_sided = variance_resolved.get("predictive_one_sided")
|
|
285
349
|
if isinstance(predictive_gate, dict) and "sided" in predictive_gate:
|
|
@@ -290,6 +354,10 @@ def _build_resolved_policies(
|
|
|
290
354
|
variance_resolved["min_effect_lognll"] = _safe_float(
|
|
291
355
|
variance_resolved.get("min_effect_lognll", 0.0), 0.0
|
|
292
356
|
)
|
|
357
|
+
if "topk_backstop" in variance_resolved:
|
|
358
|
+
variance_resolved["topk_backstop"] = _safe_int(
|
|
359
|
+
variance_resolved.get("topk_backstop", 0), 0
|
|
360
|
+
)
|
|
293
361
|
variance_resolved["max_adjusted_modules"] = _safe_int(
|
|
294
362
|
variance_resolved.get("max_adjusted_modules", 0), 0
|
|
295
363
|
)
|
|
@@ -307,10 +375,24 @@ def _build_resolved_policies(
|
|
|
307
375
|
)
|
|
308
376
|
resolved["variance"] = variance_resolved
|
|
309
377
|
|
|
310
|
-
#
|
|
378
|
+
# Metric gates (PM ratio, accuracy, confidence, etc.)
|
|
311
379
|
try:
|
|
312
380
|
metrics = base.get("metrics", {}) if isinstance(base, dict) else {}
|
|
313
|
-
|
|
381
|
+
if isinstance(metrics, dict) and metrics:
|
|
382
|
+
resolved["metrics"] = copy.deepcopy(metrics)
|
|
383
|
+
except Exception:
|
|
384
|
+
pass
|
|
385
|
+
|
|
386
|
+
# Confidence thresholds (optional policy knobs)
|
|
387
|
+
try:
|
|
388
|
+
conf = None
|
|
389
|
+
metrics = (
|
|
390
|
+
resolved.get("metrics")
|
|
391
|
+
if isinstance(resolved.get("metrics"), dict)
|
|
392
|
+
else None
|
|
393
|
+
)
|
|
394
|
+
if isinstance(metrics, dict):
|
|
395
|
+
conf = metrics.get("confidence")
|
|
314
396
|
if isinstance(conf, dict) and conf:
|
|
315
397
|
resolved["confidence"] = {}
|
|
316
398
|
if "ppl_ratio_width_max" in conf:
|
|
@@ -428,7 +510,7 @@ def _extract_effective_policies(report: RunReport) -> dict[str, Any]:
|
|
|
428
510
|
guard_policy[key] = original_policy[key]
|
|
429
511
|
policies[guard_name] = dict(guard_policy)
|
|
430
512
|
|
|
431
|
-
tier_defaults =
|
|
513
|
+
tier_defaults = get_tier_policies().get(_resolve_policy_tier(report), {})
|
|
432
514
|
|
|
433
515
|
def _merge_defaults(target: dict[str, Any], defaults: dict[str, Any]) -> None:
|
|
434
516
|
for key, value in defaults.items():
|
|
@@ -102,6 +102,23 @@ def attach_primary_metric(
|
|
|
102
102
|
and float(base_final) > 0
|
|
103
103
|
):
|
|
104
104
|
pm_copy["ratio_vs_baseline"] = float(fin) / float(base_final)
|
|
105
|
+
# Ensure display_ci aligns with log-space CI for ppl-like metrics
|
|
106
|
+
try:
|
|
107
|
+
kind = str(pm_copy.get("kind", "")).lower()
|
|
108
|
+
except Exception:
|
|
109
|
+
kind = ""
|
|
110
|
+
ci = pm_copy.get("ci")
|
|
111
|
+
if (
|
|
112
|
+
kind.startswith("ppl")
|
|
113
|
+
and isinstance(ci, list | tuple)
|
|
114
|
+
and len(ci) == 2
|
|
115
|
+
):
|
|
116
|
+
try:
|
|
117
|
+
lo, hi = float(ci[0]), float(ci[1])
|
|
118
|
+
if math.isfinite(lo) and math.isfinite(hi):
|
|
119
|
+
pm_copy["display_ci"] = [math.exp(lo), math.exp(hi)]
|
|
120
|
+
except Exception:
|
|
121
|
+
pass
|
|
105
122
|
# Provide a degenerate display CI if missing
|
|
106
123
|
if not isinstance(
|
|
107
124
|
pm_copy.get("display_ci"), list | tuple
|
invarlock/reporting/validate.py
CHANGED
|
@@ -407,25 +407,25 @@ def validate_guard_overhead(
|
|
|
407
407
|
else None
|
|
408
408
|
)
|
|
409
409
|
|
|
410
|
-
|
|
411
|
-
|
|
410
|
+
bare_ppl = None
|
|
411
|
+
guarded_ppl = None
|
|
412
412
|
if isinstance(bare_pm, dict):
|
|
413
|
-
|
|
413
|
+
bare_ppl = bare_pm.get("final")
|
|
414
414
|
if isinstance(guarded_pm, dict):
|
|
415
|
-
|
|
415
|
+
guarded_ppl = guarded_pm.get("final")
|
|
416
416
|
|
|
417
417
|
if (
|
|
418
|
-
isinstance(
|
|
419
|
-
and
|
|
420
|
-
and isinstance(
|
|
418
|
+
isinstance(bare_ppl, (int | float))
|
|
419
|
+
and bare_ppl > 0
|
|
420
|
+
and isinstance(guarded_ppl, (int | float))
|
|
421
421
|
):
|
|
422
|
-
overhead_ratio = float(
|
|
422
|
+
overhead_ratio = float(guarded_ppl) / float(bare_ppl)
|
|
423
423
|
overhead_percent = (overhead_ratio - 1.0) * 100
|
|
424
424
|
|
|
425
425
|
metrics["overhead_ratio"] = overhead_ratio
|
|
426
426
|
metrics["overhead_percent"] = overhead_percent
|
|
427
|
-
metrics["
|
|
428
|
-
metrics["
|
|
427
|
+
metrics["bare_ppl"] = float(bare_ppl)
|
|
428
|
+
metrics["guarded_ppl"] = float(guarded_ppl)
|
|
429
429
|
|
|
430
430
|
# Apply overhead gate
|
|
431
431
|
checks["guard_overhead"] = overhead_ratio <= (1.0 + overhead_threshold)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: invarlock
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
|
|
5
5
|
Author-email: InvarLock Team <oss@invarlock.dev>
|
|
6
6
|
Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
|
|
@@ -112,7 +112,7 @@ they don’t, roll back safely.
|
|
|
112
112
|
Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
|
|
113
113
|
variance) producing a machine‑readable Safety Certificate.
|
|
114
114
|
|
|
115
|
-
> **Status:** 0.3.
|
|
115
|
+
> **Status:** 0.3.3 (pre‑1.0). Until 1.0, **minor** releases may be
|
|
116
116
|
> breaking. See CLI help and the CHANGELOG for updates.
|
|
117
117
|
|
|
118
118
|
[](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
|
|
@@ -289,14 +289,16 @@ pip install "invarlock[hf]"
|
|
|
289
289
|
|
|
290
290
|
## 💻 Support Matrix
|
|
291
291
|
|
|
292
|
-
|
|
293
|
-
|
|
|
294
|
-
|
|
|
295
|
-
|
|
|
296
|
-
|
|
|
297
|
-
|
|
|
298
|
-
|
|
|
299
|
-
|
|
|
292
|
+
<!-- markdownlint-disable MD060 -->
|
|
293
|
+
| Platform | Status | Notes |
|
|
294
|
+
| ---------------------- | --------------- | ----------------------------------------- |
|
|
295
|
+
| Python 3.12+ | ✅ Required | |
|
|
296
|
+
| Linux | ✅ Full | Primary dev target |
|
|
297
|
+
| macOS (Intel/M-series) | ✅ Full | MPS supported (default on Apple Silicon) |
|
|
298
|
+
| Windows | ❌ Not supported | Use WSL2 or a Linux container if required |
|
|
299
|
+
| CUDA | ✅ Recommended | For larger models |
|
|
300
|
+
| CPU | ✅ Fallback | Slower but functional |
|
|
301
|
+
<!-- markdownlint-enable MD060 -->
|
|
300
302
|
|
|
301
303
|
**Device selection:** CUDA → MPS → CPU (auto). Override with torch env if
|
|
302
304
|
needed (e.g., `CUDA_VISIBLE_DEVICES`).
|