PyPI - invarlock - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

invarlock 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

invarlock/__init__.py +1 -1
invarlock/_data/runtime/profiles/ci_cpu.yaml +5 -0
invarlock/_data/runtime/tiers.yaml +61 -0
invarlock/adapters/hf_loading.py +97 -0
invarlock/calibration/__init__.py +6 -0
invarlock/calibration/spectral_null.py +301 -0
invarlock/calibration/variance_ve.py +154 -0
invarlock/cli/app.py +15 -0
invarlock/cli/commands/calibrate.py +576 -0
invarlock/cli/commands/doctor.py +16 -4
invarlock/cli/commands/explain_gates.py +53 -9
invarlock/cli/commands/plugins.py +12 -2
invarlock/cli/commands/run.py +323 -81
invarlock/cli/commands/verify.py +40 -0
invarlock/cli/determinism.py +237 -0
invarlock/core/auto_tuning.py +215 -17
invarlock/core/registry.py +9 -4
invarlock/eval/bench.py +467 -141
invarlock/eval/bench_regression.py +12 -0
invarlock/eval/data.py +29 -7
invarlock/guards/spectral.py +216 -9
invarlock/guards/variance.py +6 -3
invarlock/reporting/certificate.py +403 -51
invarlock/reporting/certificate_schema.py +4 -1
invarlock/reporting/guards_analysis.py +108 -10
invarlock/reporting/normalizer.py +21 -1
invarlock/reporting/policy_utils.py +100 -16
{invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/METADATA +12 -10
{invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/RECORD +33 -26
{invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/WHEEL +0 -0
{invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/entry_points.txt +0 -0
{invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.0.dist-info → invarlock-0.3.2.dist-info}/top_level.txt +0 -0

invarlock/eval/bench_regression.py ADDED Viewed

@@ -0,0 +1,12 @@
+from __future__ import annotations
+# Policy-change regression baseline identifiers.
+#
+# When the benchmark golden outputs are intentionally updated, bump
+# `BENCH_GOLDEN_ID` and update `BENCH_GOLDEN_SHA256` accordingly, then add a
+# matching entry to `CHANGELOG.md`.
+BENCH_GOLDEN_ID = "bench-golden-2025-12-13"
+BENCH_GOLDEN_SHA256 = "0d9ff3274d29dad16ad580b4a0cf37b4f89e4f7c2e4345ce3d30a39f146ff5a7"
+__all__ = ["BENCH_GOLDEN_ID", "BENCH_GOLDEN_SHA256"]

invarlock/eval/data.py CHANGED Viewed

@@ -855,6 +855,13 @@ class WikiText2Provider:
             eval_device_override = os.environ.get("INVARLOCK_EVAL_DEVICE")
             device_hint = getattr(self, "_device_hint", None)
+            def _is_device_usable(device: torch.device) -> bool:
+                try:
+                    _ = torch.zeros((1, 1), dtype=torch.long, device=device)
+                    return True
+                except Exception:
+                    return False
             if self._difficulty_model is None:
                 from transformers import GPT2LMHeadModel
@@ -874,6 +881,13 @@ class WikiText2Provider:
                 else:
                     device = self._pick_default_scorer_device()
+                if device.type != "cpu" and not _is_device_usable(device):
+                    warnings.warn(
+                        f"Difficulty scorer device {device} unavailable; falling back to CPU",
+                        stacklevel=2,
+                    )
+                    device = torch.device("cpu")
                 model.to(device)
                 self._difficulty_model = model
                 self._difficulty_device = device
@@ -898,16 +912,24 @@ class WikiText2Provider:
                     desired_device = device
             if desired_device != device:
-                try:
-                    model.to(desired_device)
-                    device = desired_device
-                    self._difficulty_device = desired_device
-                    self.__class__._MODEL_DEVICE = desired_device
-                except Exception as exc:
+                if desired_device.type != "cpu" and not _is_device_usable(
+                    desired_device
+                ):
                     warnings.warn(
-                        f"Failed to move GPT-2 difficulty scorer to {desired_device}: {exc}",
+                        f"Difficulty scorer device {desired_device} unavailable; keeping {device}",
                         stacklevel=2,
                     )
+                else:
+                    try:
+                        model.to(desired_device)
+                        device = desired_device
+                        self._difficulty_device = desired_device
+                        self.__class__._MODEL_DEVICE = desired_device
+                    except Exception as exc:
+                        warnings.warn(
+                            f"Failed to move GPT-2 difficulty scorer to {desired_device}: {exc}",
+                            stacklevel=2,
+                        )
             if not self._scorer_warmed:
                 with torch.no_grad():

invarlock/guards/spectral.py CHANGED Viewed

@@ -26,6 +26,80 @@ from invarlock.core.api import Guard
 from ._contracts import guard_assert
+def _z_to_two_sided_pvalue(z: Any) -> float:
+    try:
+        zf = float(z)
+        if not math.isfinite(zf):
+            return 1.0
+        return float(math.erfc(abs(zf) / math.sqrt(2.0)))
+    except Exception:
+        return 1.0
+def _finite01(value: Any) -> bool:
+    try:
+        f = float(value)
+        return math.isfinite(f) and 0.0 <= f <= 1.0
+    except Exception:
+        return False
+def _bh_reject_families(
+    family_pvals: dict[str, float], *, alpha: float, m: int
+) -> set[str]:
+    """BH family selection with denominator `m` (conservative if m >= #families)."""
+    if not family_pvals:
+        return set()
+    try:
+        alpha_f = float(alpha)
+    except Exception:
+        alpha_f = 0.05
+    if not (0.0 < alpha_f <= 1.0):
+        return set()
+    names = list(family_pvals.keys())
+    pvals = [family_pvals[n] for n in names]
+    n = len(pvals)
+    m_eff = max(int(m) if isinstance(m, int) else 0, n, 1)
+    order = sorted(
+        range(n),
+        key=lambda idx: (float("inf") if not _finite01(pvals[idx]) else pvals[idx]),
+    )
+    max_k = 0
+    for rank, idx in enumerate(order, start=1):
+        p = pvals[idx]
+        if not _finite01(p):
+            continue
+        if p <= (alpha_f * rank) / m_eff:
+            max_k = rank
+    if max_k <= 0:
+        return set()
+    cutoff = (alpha_f * max_k) / m_eff
+    selected: set[str] = set()
+    for idx in order:
+        p = pvals[idx]
+        if _finite01(p) and p <= cutoff:
+            selected.add(names[idx])
+    return selected
+def _bonferroni_reject_families(
+    family_pvals: dict[str, float], *, alpha: float, m: int
+) -> set[str]:
+    if not family_pvals:
+        return set()
+    try:
+        alpha_f = float(alpha)
+    except Exception:
+        alpha_f = 0.05
+    if not (0.0 < alpha_f <= 1.0):
+        return set()
+    m_eff = max(int(m) if isinstance(m, int) else 0, len(family_pvals), 1)
+    cutoff = alpha_f / m_eff
+    return {fam for fam, p in family_pvals.items() if _finite01(p) and p <= cutoff}
 class SpectralPolicy(TypedDict, total=False):
     """Type definition for spectral guard policy configuration."""
@@ -567,6 +641,121 @@ class SpectralGuard(Guard):
         return family_quantiles, top_z_scores
+    def _select_budgeted_violations(
+        self, budgeted_violations: list[dict[str, Any]]
+    ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
+        """Apply BH/Bonferroni selection at the family level.
+        Returns:
+            (selected_violations, selection_metrics)
+        """
+        mt = self.multiple_testing if isinstance(self.multiple_testing, dict) else {}
+        method = str(mt.get("method", "bh")).lower()
+        try:
+            alpha = float(mt.get("alpha", 0.05) or 0.05)
+        except Exception:
+            alpha = 0.05
+        m_raw = mt.get("m")
+        m = None
+        try:
+            if m_raw is not None:
+                m = int(m_raw)
+        except Exception:
+            m = None
+        # Fill in missing family assignments deterministically.
+        for violation in budgeted_violations:
+            if violation.get("family"):
+                continue
+            module = violation.get("module")
+            if isinstance(module, str):
+                family = self.module_family_map.get(module)
+                if isinstance(family, str) and family:
+                    violation["family"] = family
+                    continue
+            violation["family"] = "other"
+        # Family p-values derived from the most significant (min p) module in each family.
+        family_pvals: dict[str, float] = {}
+        family_max_abs_z: dict[str, float] = {}
+        family_counts: dict[str, int] = {}
+        for violation in budgeted_violations:
+            fam = violation.get("family")
+            if fam is None:
+                continue
+            family = str(fam)
+            z_val = violation.get("z_score")
+            try:
+                zf = float(z_val)
+            except Exception:
+                continue
+            if not math.isfinite(zf):
+                continue
+            p = _z_to_two_sided_pvalue(zf)
+            family_counts[family] = family_counts.get(family, 0) + 1
+            cur = family_pvals.get(family)
+            if cur is None or p < cur:
+                family_pvals[family] = p
+                family_max_abs_z[family] = abs(zf)
+        families_tested = sorted(family_pvals.keys())
+        m_eff = m if isinstance(m, int) and m > 0 else len(families_tested)
+        m_eff = max(m_eff, len(families_tested), 1)
+        if isinstance(self.multiple_testing, dict):
+            self.multiple_testing.setdefault("m", m_eff)
+        if method in {"bh", "benjamini-hochberg", "benjamini_hochberg"}:
+            selected_families = _bh_reject_families(family_pvals, alpha=alpha, m=m_eff)
+            applied_method = "bh"
+        elif method in {"bonferroni", "bonf"}:
+            selected_families = _bonferroni_reject_families(
+                family_pvals, alpha=alpha, m=m_eff
+            )
+            applied_method = "bonferroni"
+        else:
+            selected_families = _bonferroni_reject_families(
+                family_pvals, alpha=alpha, m=m_eff
+            )
+            applied_method = "bonferroni"
+        selected: list[dict[str, Any]] = []
+        default_selected_without_pvalue = 0
+        for violation in budgeted_violations:
+            fam = violation.get("family")
+            family = str(fam) if fam is not None else ""
+            z_val = violation.get("z_score")
+            p_val: float | None = None
+            try:
+                zf = float(z_val)
+            except Exception:
+                zf = None
+            if zf is not None and math.isfinite(zf):
+                p_val = _z_to_two_sided_pvalue(zf)
+                is_selected = family in selected_families
+            else:
+                # If we cannot compute a p-value, fail closed: keep the violation.
+                is_selected = True
+                default_selected_without_pvalue += 1
+            violation["p_value"] = p_val
+            violation["selected"] = is_selected
+            if is_selected:
+                selected.append(violation)
+        selection_metrics = {
+            "method": applied_method,
+            "alpha": alpha,
+            "m": int(m_eff),
+            "families_tested": families_tested,
+            "families_selected": sorted(selected_families),
+            "family_pvalues": {k: float(family_pvals[k]) for k in families_tested},
+            "family_max_abs_z": {
+                k: float(family_max_abs_z[k]) for k in families_tested
+            },
+            "family_violation_counts": dict(family_counts),
+            "default_selected_without_pvalue": int(default_selected_without_pvalue),
+        }
+        return selected, selection_metrics
     def validate(
         self, model: Any, adapter: Any, context: dict[str, Any]
     ) -> dict[str, Any]:
@@ -607,7 +796,13 @@ class SpectralGuard(Guard):
                 if violation.get("type") in fatal_violation_types
             ]
-            caps_applied = len(budgeted_violations)
+            selected_budgeted, mt_selection = self._select_budgeted_violations(
+                budgeted_violations
+            )
+            selected_violations = [*fatal_violations, *selected_budgeted]
+            candidate_budgeted = len(budgeted_violations)
+            caps_applied = len(selected_budgeted)
             caps_exceeded = caps_applied > int(self.max_caps)
             passed = not fatal_violations and not caps_exceeded
             if fatal_violations or caps_exceeded:
@@ -623,8 +818,9 @@ class SpectralGuard(Guard):
             )
             metrics = {
                 "modules_checked": len(current_metrics),
-                "violations_found": len(violations),
+                "violations_found": len(selected_violations),
                 "budgeted_violations": caps_applied,
+                "candidate_budgeted_violations": candidate_budgeted,
                 "fatal_violations": len(fatal_violations),
                 "max_spectral_norm": max(current_metrics.values())
                 if current_metrics
@@ -642,6 +838,7 @@ class SpectralGuard(Guard):
                 "caps_applied": caps_applied,
                 "caps_exceeded": caps_exceeded,
                 "multiple_testing": self.multiple_testing,
+                "multiple_testing_selection": mt_selection,
             }
             family_quantiles, top_z_scores = self._compute_family_observability()
@@ -653,7 +850,7 @@ class SpectralGuard(Guard):
             if passed:
                 message = (
                     "Spectral validation passed with "
-                    f"{len(violations)} violations "
+                    f"{len(selected_violations)} violations "
                     f"(caps_applied={caps_applied}, max_caps={self.max_caps})"
                 )
             else:
@@ -683,7 +880,7 @@ class SpectralGuard(Guard):
                 "passed": passed,
                 "action": action,
                 "metrics": metrics,
-                "violations": violations,
+                "violations": selected_violations,
                 "message": message,
                 "policy": self._serialize_policy(),
                 "final_z_scores": self.latest_z_scores.copy(),
@@ -743,15 +940,23 @@ class SpectralGuard(Guard):
             if violation.get("type") in fatal_violation_types
         ]
-        caps_applied = len(budgeted_violations)
+        selected_budgeted, mt_selection = self._select_budgeted_violations(
+            budgeted_violations
+        )
+        selected_final_violations = [*fatal_violations, *selected_budgeted]
+        candidate_budgeted = len(budgeted_violations)
+        caps_applied = len(selected_budgeted)
         caps_exceeded = caps_applied > int(self.max_caps)
         passed = not fatal_violations and not caps_exceeded
         # Compute comprehensive metrics
         metrics = {
             "modules_analyzed": len(final_metrics),
-            "violations_detected": len(final_violations),
+            "violations_detected": len(selected_final_violations),
             "budgeted_violations": caps_applied,
+            "candidate_violations_detected": len(final_violations),
+            "candidate_budgeted_violations": candidate_budgeted,
             "fatal_violations": len(fatal_violations),
             "baseline_modules": len(self.baseline_metrics),
             "scope": self.scope,
@@ -764,7 +969,8 @@ class SpectralGuard(Guard):
             "spectral_stability_score": 1.0
             - min(len(final_violations) / max(len(final_metrics), 1), 1.0),
             "target_sigma": self.target_sigma,
-            "correction_applied": len(final_violations) > 0 and self.correction_enabled,
+            "correction_applied": len(selected_final_violations) > 0
+            and self.correction_enabled,
             "family_caps": self.family_caps,
             "family_z_summary": final_z_summary,
             "family_stats": final_family_stats,
@@ -774,6 +980,7 @@ class SpectralGuard(Guard):
             "caps_applied": caps_applied,
             "caps_exceeded": caps_exceeded,
             "multiple_testing": self.multiple_testing,
+            "multiple_testing_selection": mt_selection,
             "family_z_quantiles": family_quantiles,
             "top_z_scores": top_z_scores,
         }
@@ -782,7 +989,7 @@ class SpectralGuard(Guard):
         warnings = []
         errors = []
-        for violation in final_violations:
+        for violation in selected_final_violations:
             if violation["type"] in ["max_spectral_norm", "ill_conditioned"]:
                 errors.append(violation["message"])
             else:
@@ -793,7 +1000,7 @@ class SpectralGuard(Guard):
             "metrics": metrics,
             "warnings": warnings,
             "errors": errors,
-            "violations": final_violations,
+            "violations": selected_final_violations,
             "events": self.events,
             "baseline_metrics": self.baseline_metrics,
             "final_metrics": final_metrics,

invarlock/guards/variance.py CHANGED Viewed

@@ -403,15 +403,18 @@ def _predictive_gate_outcome(
     ):
         return False, "ci_unavailable"
-    lower, upper = float(delta_ci[0]), float(delta_ci[1])
+    upper = float(delta_ci[1])
     min_effect = float(min_effect or 0.0)
     if one_sided:
-        if lower >= 0.0:
+        # One-sided improvement (ΔlogNLL < 0): certify a minimum effect by
+        # requiring the *upper* bound of the (two-sided) CI to clear -min_effect.
+        if upper >= 0.0:
             return False, "ci_contains_zero"
         if mean_delta >= 0.0:
             return False, "mean_not_negative"
-        if min_effect > 0.0 and (-mean_delta) < min_effect:
+        gain_lower_bound = -upper  # worst-case gain under CI
+        if gain_lower_bound < min_effect:
             return False, "gain_below_threshold"
         return True, "ci_gain_met"

invarlock 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

invarlock 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl