PyPI - invarlock - Versions diffs - 0.3.5__tar.gz → 0.3.6__tar.gz - Mend

invarlock 0.3.5tar.gz → 0.3.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

{invarlock-0.3.5/src/invarlock.egg-info → invarlock-0.3.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: invarlock
-Version: 0.3.5
+Version: 0.3.6
 Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
 Author-email: InvarLock Team <oss@invarlock.dev>
 Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
@@ -112,7 +112,7 @@ they don’t, roll back safely.
 Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
 variance) producing a machine‑readable Safety Certificate.
-> **Status:** 0.3.5 (pre‑1.0). Until 1.0, **minor** releases may be
+> **Status:** 0.3.6 (pre‑1.0). Until 1.0, **minor** releases may be
 > breaking. See CLI help and the CHANGELOG for updates.
 [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
@@ -170,7 +170,7 @@ Quick examples (repo presets, CPU; repo clone required for preset paths):
 pip install "invarlock[hf]"
 # Preflight a config (JSON diagnostics)
-invarlock doctor --config configs/tasks/causal_lm/ci_cpu.yaml --json
+invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
 # Calibrated GPT‑2 small (recommended starting point; repo preset)
 INVARLOCK_ALLOW_NETWORK=1 INVARLOCK_DEDUP_TEXTS=1 \
@@ -179,7 +179,7 @@ invarlock certify \
   --subject  gpt2 \
   --adapter auto \
   --profile release \
-  --preset configs/tasks/causal_lm/release_auto.yaml
+  --preset configs/presets/causal_lm/wikitext2_512.yaml
 # Tiny causal LM smoke (out‑of‑calibration, dev‑only)
 INVARLOCK_ALLOW_NETWORK=1 \
@@ -249,7 +249,7 @@ INVARLOCK_ALLOW_NETWORK=1 invarlock certify \
   --subject  gpt2 \
   --adapter auto \
   --profile ci \
-  --preset configs/tasks/causal_lm/ci_cpu.yaml
+  --preset configs/presets/causal_lm/wikitext2_512.yaml
 ```
 - Offline/air‑gapped usage: pre‑download to a cache, then run with network
@@ -488,7 +488,7 @@ output:
 Run preflight checks before a run to catch misconfigurations early:
 ```bash
-invarlock doctor --config configs/tasks/causal_lm/ci_cpu.yaml --json
+invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
 ```
 Text mode emits lines prefixed with `ERROR:`, `WARNING:`, or `NOTE:` and stable

{invarlock-0.3.5 → invarlock-0.3.6}/README.md RENAMED Viewed

@@ -6,7 +6,7 @@ they don’t, roll back safely.
 Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
 variance) producing a machine‑readable Safety Certificate.
-> **Status:** 0.3.5 (pre‑1.0). Until 1.0, **minor** releases may be
+> **Status:** 0.3.6 (pre‑1.0). Until 1.0, **minor** releases may be
 > breaking. See CLI help and the CHANGELOG for updates.
 [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
@@ -64,7 +64,7 @@ Quick examples (repo presets, CPU; repo clone required for preset paths):
 pip install "invarlock[hf]"
 # Preflight a config (JSON diagnostics)
-invarlock doctor --config configs/tasks/causal_lm/ci_cpu.yaml --json
+invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
 # Calibrated GPT‑2 small (recommended starting point; repo preset)
 INVARLOCK_ALLOW_NETWORK=1 INVARLOCK_DEDUP_TEXTS=1 \
@@ -73,7 +73,7 @@ invarlock certify \
   --subject  gpt2 \
   --adapter auto \
   --profile release \
-  --preset configs/tasks/causal_lm/release_auto.yaml
+  --preset configs/presets/causal_lm/wikitext2_512.yaml
 # Tiny causal LM smoke (out‑of‑calibration, dev‑only)
 INVARLOCK_ALLOW_NETWORK=1 \
@@ -143,7 +143,7 @@ INVARLOCK_ALLOW_NETWORK=1 invarlock certify \
   --subject  gpt2 \
   --adapter auto \
   --profile ci \
-  --preset configs/tasks/causal_lm/ci_cpu.yaml
+  --preset configs/presets/causal_lm/wikitext2_512.yaml
 ```
 - Offline/air‑gapped usage: pre‑download to a cache, then run with network
@@ -382,7 +382,7 @@ output:
 Run preflight checks before a run to catch misconfigurations early:
 ```bash
-invarlock doctor --config configs/tasks/causal_lm/ci_cpu.yaml --json
+invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
 ```
 Text mode emits lines prefixed with `ERROR:`, `WARNING:`, or `NOTE:` and stable

{invarlock-0.3.5 → invarlock-0.3.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "invarlock"
-version = "0.3.5"
+version = "0.3.6"
 description = "Edit‑agnostic robustness certificates for weight edits (InvarLock framework)"
 authors = [{ name = "InvarLock Team", email = "oss@invarlock.dev" }]
 maintainers = [{ name = "InvarLock Maintainers", email = "support@invarlock.dev" }]

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ For torch-dependent functionality, see subpackages under `invarlock.*`:
 - `invarlock.eval`: Metrics, guard-overhead checks, and certification
 """
-__version__ = "0.3.5"
+__version__ = "0.3.6"
 # Core exports - torch-independent
 from .config import CFG, Defaults, get_default_config

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/_data/runtime/tiers.yaml RENAMED Viewed

@@ -1,8 +1,12 @@
-# Tier guard policy knobs for variance correction (balanced vs conservative)
+# Tier policy defaults (metrics gates + guard knobs) used at runtime.
 #
-# These values mirror the settings validated during the December 2025
-# calibration runs. They should be kept in sync with the policy digest
-# embedded in certificates and referenced by automation documentation.
+# Balanced and Conservative values are calibrated/validated against pilot/null
+# runs (Nov/Dec 2025) where applicable; Aggressive is research-oriented (not in
+# the safety case).
+#
+# Rationale by key: docs/reference/tier-policy-catalog.md
+# Calibration method: docs/assurance/09-tier-v1-calibration.md
+# Provenance/digest: docs/assurance/11-policy-provenance.md
 balanced:
   metrics:
@@ -11,6 +15,13 @@ balanced:
       min_tokens: 50000
       hysteresis_ratio: 0.002
       min_token_fraction: 0.01
+    pm_tail:
+      mode: warn
+      min_windows: 50
+      quantile: 0.95
+      quantile_max: 0.20
+      epsilon: 0.0001
+      mass_max: 1.0
     accuracy:
       delta_min_pp: -1.0
       min_examples: 200
@@ -20,7 +31,7 @@ balanced:
     deadband: 0.02
     min_abs_adjust: 0.012
     max_scale_step: 0.03
-    min_effect_lognll: 0.0009
+    min_effect_lognll: 0.0
     predictive_one_sided: true
     topk_backstop: 1
     max_adjusted_modules: 1
@@ -33,10 +44,10 @@ balanced:
     max_caps: 5
     max_spectral_norm: null
     family_caps:
-      ffn: 3.834
-      attn: 3.423
-      embed: 3.1
-      other: 3.1
+      ffn: 3.849
+      attn: 3.018
+      embed: 1.05
+      other: 0.0
     multiple_testing:
       method: bh
       alpha: 0.05
@@ -44,12 +55,12 @@ balanced:
   rmt_guard:
     deadband: 0.10
     margin: 1.5
-    epsilon_default: 0.10
+    epsilon_default: 0.01
     epsilon_by_family:
-      ffn: 0.10
-      attn: 0.08
-      embed: 0.12
-      other: 0.12
+      ffn: 0.01
+      attn: 0.01
+      embed: 0.01
+      other: 0.01
 conservative:
   metrics:
@@ -58,6 +69,13 @@ conservative:
       min_tokens: 20000
       hysteresis_ratio: 0.002
       min_token_fraction: 0.01
+    pm_tail:
+      mode: warn
+      min_windows: 50
+      quantile: 0.95
+      quantile_max: 0.12
+      epsilon: 0.0001
+      mass_max: 1.0
     accuracy:
       delta_min_pp: -0.5
       min_examples: 200
@@ -67,7 +85,7 @@ conservative:
     deadband: 0.03
     min_abs_adjust: 0.02
     max_scale_step: 0.015
-    min_effect_lognll: 0.0018
+    min_effect_lognll: 0.016
     predictive_one_sided: false
     topk_backstop: 0
     max_adjusted_modules: 0
@@ -78,24 +96,25 @@ conservative:
     deadband: 0.05
     scope: ffn
     max_caps: 3
+    max_spectral_norm: null
     family_caps:
-      ffn: 2.3
+      ffn: 3.849
       attn: 2.6
       embed: 2.8
       other: 2.8
     multiple_testing:
       method: bonferroni
-      alpha: 0.02
+      alpha: 0.000625
       m: 4
   rmt_guard:
     deadband: 0.05
     margin: 1.3
-    epsilon_default: 0.06
+    epsilon_default: 0.01
     epsilon_by_family:
-      ffn: 0.06
-      attn: 0.05
-      embed: 0.07
-      other: 0.07
+      ffn: 0.01
+      attn: 0.01
+      embed: 0.01
+      other: 0.01
 aggressive:
   metrics:
@@ -104,6 +123,13 @@ aggressive:
       min_tokens: 50000
       hysteresis_ratio: 0.002
       min_token_fraction: 0.01
+    pm_tail:
+      mode: warn
+      min_windows: 50
+      quantile: 0.95
+      quantile_max: 0.30
+      epsilon: 0.0001
+      mass_max: 1.0
     accuracy:
       delta_min_pp: -2.0
       min_examples: 200
@@ -111,27 +137,28 @@ aggressive:
       min_examples_fraction: 0.01
   variance_guard:
     deadband: 0.12
-    min_effect_lognll: 0.0005
+    min_effect_lognll: 0.033
   spectral_guard:
     sigma_quantile: 0.98
     deadband: 0.15
     scope: ffn
     max_caps: 8
+    max_spectral_norm: null
     family_caps:
-      ffn: 3.0
+      ffn: 3.849
       attn: 3.5
       embed: 2.5
       other: 3.5
     multiple_testing:
       method: bh
-      alpha: 0.1
+      alpha: 0.00078125
       m: 4
   rmt_guard:
     deadband: 0.15
     margin: 1.8
-    epsilon_default: 0.15
+    epsilon_default: 0.01
     epsilon_by_family:
-      ffn: 0.15
-      attn: 0.15
-      embed: 0.15
-      other: 0.15
+      ffn: 0.01
+      attn: 0.01
+      embed: 0.01
+      other: 0.01

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/adapters/__init__.py RENAMED Viewed

@@ -76,7 +76,7 @@ class _RemovedComponent:
         return _RemovedComponent(self._name, self._replacement)
-# Placeholders for removed/legacy utilities referenced in tests
+# Placeholders for removed utilities referenced in tests
 HF_Pythia_Adapter = _RemovedComponent("HF_Pythia_Adapter")
 auto_tune_pruning_budget = _RemovedComponent("auto_tune_pruning_budget")
 run_auto_invarlock = _RemovedComponent("run_auto_invarlock")

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/calibration/spectral_null.py RENAMED Viewed

@@ -148,7 +148,7 @@ def _selected_families_for_alpha(
 def summarize_null_sweep_reports(
-    reports: list[dict[str, Any]],
+    reports: list[object],
     *,
     tier: str,
     safety_margin: float = 0.05,
@@ -186,20 +186,25 @@ def summarize_null_sweep_reports(
         mt = _extract_multiple_testing(metrics)
         if mt:
             mt_method = str(mt.get("method", mt_method))
-            if mt.get("alpha") is not None:
-                mt_alpha = float(mt.get("alpha"))
-            if mt.get("m") is not None:
-                mt_m = int(mt.get("m"))
+            alpha_value = mt.get("alpha")
+            if alpha_value is not None:
+                try:
+                    mt_alpha = float(alpha_value)
+                except Exception:
+                    pass
+            m_value = mt.get("m")
+            if m_value is not None:
+                try:
+                    mt_m = int(m_value)
+                except Exception:
+                    pass
         fam_z = _extract_family_max_z(metrics)
         for fam, z in fam_z.items():
             family_max_z[fam] = max(family_max_z[fam], float(z))
-        selection = (
-            metrics.get("multiple_testing_selection")
-            if isinstance(metrics.get("multiple_testing_selection"), dict)
-            else {}
-        )
+        raw_selection = metrics.get("multiple_testing_selection")
+        selection = raw_selection if isinstance(raw_selection, dict) else {}
         pvals = selection.get("family_pvalues")
         if not isinstance(pvals, dict):
             pvals = {}

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/calibration/variance_ve.py RENAMED Viewed

@@ -107,8 +107,6 @@ def summarize_ve_sweep_reports(
     evaluated = 0
     for report in reports:
-        if not isinstance(report, dict):
-            continue
         g = _extract_guard(report, "variance") or {}
         metrics = g.get("metrics", {}) if isinstance(g.get("metrics"), dict) else {}
         pg = metrics.get("predictive_gate")

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/cli/commands/calibrate.py RENAMED Viewed

@@ -144,7 +144,9 @@ def null_sweep(
     ),
     n_seeds: int = typer.Option(10, "--n-seeds", min=1, help="Number of seeds to run."),
     seed_start: int = typer.Option(42, "--seed-start", help="Starting seed."),
-    profile: str = typer.Option("ci", "--profile", help="Run profile (ci|release)."),
+    profile: str = typer.Option(
+        "ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
+    ),
     device: str | None = typer.Option(None, "--device", help="Device override."),
     safety_margin: float = typer.Option(
         0.05, "--safety-margin", help="Safety margin applied to κ recommendations."
@@ -363,7 +365,9 @@ def ve_sweep(
         "--target-enable-rate",
         help="Target expected VE enable rate (predictive-gate lower bound).",
     ),
-    profile: str = typer.Option("ci", "--profile", help="Run profile (ci|release)."),
+    profile: str = typer.Option(
+        "ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
+    ),
     device: str | None = typer.Option(None, "--device", help="Device override."),
     safety_margin: float = typer.Option(
         0.0,

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/cli/commands/certify.py RENAMED Viewed

@@ -22,9 +22,9 @@ from typing import Any
 import typer
 from rich.console import Console
+from ...core.exceptions import MetricsError
 from ..adapter_auto import resolve_auto_adapter
 from ..config import _deep_merge as _merge  # reuse helper
-from ..errors import InvarlockError
 # Use the report group's programmatic entry for report generation
 from .report import report_command as _report
@@ -98,7 +98,9 @@ def certify_command(
         "--device",
         help="Device override for runs (auto|cuda|mps|cpu)",
     ),
-    profile: str = typer.Option("ci", "--profile", help="Profile (ci|release)"),
+    profile: str = typer.Option(
+        "ci", "--profile", help="Profile (ci|release|ci_cpu|dev)"
+    ),
     tier: str = typer.Option("balanced", "--tier", help="Tier label for context"),
     preset: str | None = typer.Option(
         None,
@@ -152,9 +154,9 @@ def certify_command(
     # scenario), fall back to a minimal built-in universal preset so the
     # flag-only quick start works without cloning the repo.
     default_universal = (
-        Path("configs/tasks/masked_lm/ci_cpu.yaml")
+        Path("configs/presets/masked_lm/wikitext2_128.yaml")
         if eff_adapter == "hf_bert"
-        else Path("configs/tasks/causal_lm/ci_cpu.yaml")
+        else Path("configs/presets/causal_lm/wikitext2_512.yaml")
     )
     preset_path = Path(preset) if preset is not None else default_universal
@@ -185,6 +187,20 @@ def certify_command(
             model_block.pop("device", None)
             preset_data["model"] = model_block
+    default_guards_order = ["invariants", "spectral", "rmt", "variance", "invariants"]
+    guards_order = None
+    preset_guards = preset_data.get("guards")
+    if isinstance(preset_guards, dict):
+        preset_order = preset_guards.get("order")
+        if (
+            isinstance(preset_order, list)
+            and preset_order
+            and all(isinstance(item, str) for item in preset_order)
+        ):
+            guards_order = list(preset_order)
+    if guards_order is None:
+        guards_order = list(default_guards_order)
     # Create temp baseline config (no-op edit)
     # Normalize possible "hf:" prefixes for HF adapters
     norm_src_id = _normalize_model_id(src_id, eff_adapter)
@@ -199,9 +215,7 @@ def certify_command(
             },
             "edit": {"name": "noop", "plan": {}},
             "eval": {},
-            "guards": {
-                "order": ["invariants", "spectral", "rmt", "variance", "invariants"]
-            },
+            "guards": {"order": guards_order},
             "output": {"dir": str(Path(out) / "source")},
             "context": {"profile": profile, "tier": tier},
         },
@@ -292,15 +306,7 @@ def certify_command(
                 "model": {"id": norm_edt_id, "adapter": eff_adapter},
                 "edit": {"name": "noop", "plan": {}},
                 "eval": {},
-                "guards": {
-                    "order": [
-                        "invariants",
-                        "spectral",
-                        "rmt",
-                        "variance",
-                        "invariants",
-                    ]
-                },
+                "guards": {"order": guards_order},
                 "output": {"dir": str(Path(out) / "edited")},
                 "context": {"profile": profile, "tier": tier},
             },
@@ -325,12 +331,11 @@ def certify_command(
         raise typer.Exit(1)
     # CI/Release hard‑abort: fail fast when primary metric is not computable.
-    # Fall back to legacy ppl_* keys when primary_metric block is absent.
     try:
         prof = str(profile or "").strip().lower()
     except Exception:
         prof = ""
-    if prof in {"ci", "release"}:
+    if prof in {"ci", "ci_cpu", "release"}:
         try:
             with Path(edited_report).open("r", encoding="utf-8") as fh:
                 edited_payload = json.load(fh)
@@ -364,35 +369,49 @@ def certify_command(
             else None
         ) or "unknown"
-        # Enforce only when a metric block is present; skip for minimal stub reports
-        # Enforce only when a primary_metric block is present
+        # Enforce only when a primary_metric block is present; allow degraded-but-flagged metrics to emit certificates, but fail the task.
         has_metric_block = isinstance(pm, dict) and bool(pm)
         if has_metric_block:
-            # Treat non‑finite PM as hard error in CI/Release (after legacy fallback).
-            # Require a finite final value; preview is optional for legacy reports.
-            if not _finite(pm_final):
-                err = InvarlockError(
+            degraded = bool(pm.get("invalid") or pm.get("degraded"))
+            if degraded or not _finite(pm_final):
+                fallback = pm_prev if _finite(pm_prev) else pm_final
+                if not _finite(fallback) or fallback <= 0:
+                    fallback = 1.0
+                degraded_reason = pm.get("degraded_reason") or (
+                    "non_finite_pm"
+                    if (not _finite(pm_prev) or not _finite(pm_final))
+                    else "primary_metric_degraded"
+                )
+                console.print(
+                    "[yellow]⚠️  Primary metric degraded or non-finite; emitting certificate and marking task degraded. Primary metric computation failed.[/yellow]"
+                )
+                pm["degraded"] = True
+                pm["invalid"] = pm.get("invalid") or True
+                pm["preview"] = pm_prev if _finite(pm_prev) else fallback
+                pm["final"] = pm_final if _finite(pm_final) else fallback
+                pm["ratio_vs_baseline"] = pm_ratio if _finite(pm_ratio) else 1.0
+                pm["degraded_reason"] = degraded_reason
+                metrics["primary_metric"] = pm
+                edited_payload.setdefault("metrics", {}).update(metrics)
+                # Emit the certificate for inspection, then exit with a CI-visible error.
+                _report(
+                    run=str(edited_report),
+                    format="cert",
+                    baseline=str(baseline_report),
+                    output=cert_out,
+                )
+                err = MetricsError(
                     code="E111",
-                    message=(
-                        "Primary metric computation failed (NaN/inf). "
-                        f"Context: device={device}, adapter={adapter_name}, edit={edit_name}. "
-                        "Baseline ok; edited failed to compute ppl. "
-                        "Try: use an accelerator (mps/cuda), force float32, reduce max_modules, "
-                        "or lower the evaluation batch size."
-                    ),
+                    message=f"Primary metric degraded or non-finite ({degraded_reason}).",
                     details={
-                        "device": device,
+                        "reason": degraded_reason,
                         "adapter": adapter_name,
+                        "device": device,
                         "edit": edit_name,
-                        "pm_preview": pm_prev,
-                        "pm_final": pm_final,
-                        "pm_ratio": pm_ratio,
                     },
                 )
-                code = _resolve_exit_code(err, profile=prof)
-                console.print(f"[red]{err}[/red]")
-                # Do not emit a certificate
-                raise typer.Exit(code)
+                raise typer.Exit(_resolve_exit_code(err, profile=profile))
     console.print("📜 Emitting certificate")
     _report(

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/cli/commands/doctor.py RENAMED Viewed

@@ -188,7 +188,9 @@ def doctor_command(
         None, "--config", "-c", help="Path to YAML config for preflight lints"
     ),
     profile: str | None = typer.Option(
-        None, "--profile", help="Profile to apply for preflight (ci|release)"
+        None,
+        "--profile",
+        help="Profile to apply for preflight (e.g. ci, release, ci_cpu; dev is a no-op)",
     ),
     baseline: str | None = typer.Option(
         None, "--baseline", help="Optional baseline report to check pairing readiness"

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/cli/commands/explain_gates.py RENAMED Viewed

@@ -99,10 +99,6 @@ def explain_gates_command(
         pm = cert.get("primary_metric", {})
         ratio = pm.get("ratio_vs_baseline")
         ratio_ci = pm.get("display_ci")
-    elif isinstance(cert.get("ppl"), dict):  # legacy
-        ppl = cert.get("ppl", {})
-        ratio = ppl.get("ratio_vs_baseline")
-        ratio_ci = ppl.get("ratio_ci")
     hysteresis_applied = bool(validation.get("hysteresis_applied"))
     status = "PASS" if bool(validation.get("primary_metric_acceptable")) else "FAIL"
     console.print("[bold]Gate: Primary Metric vs Baseline[/bold]")
@@ -125,6 +121,63 @@ def explain_gates_command(
             f"  note: hysteresis applied → effective threshold = {limit_with_hyst:.3f}x"
         )
+    # Tail gate explanation (warn/fail; based on per-window Δlog-loss vs baseline)
+    pm_tail = (
+        cert.get("primary_metric_tail", {})
+        if isinstance(cert.get("primary_metric_tail"), dict)
+        else {}
+    )
+    if pm_tail:
+        mode = str(pm_tail.get("mode", "warn") or "warn").strip().lower()
+        evaluated = bool(pm_tail.get("evaluated", False))
+        passed = bool(pm_tail.get("passed", True))
+        policy = (
+            pm_tail.get("policy", {}) if isinstance(pm_tail.get("policy"), dict) else {}
+        )
+        stats = (
+            pm_tail.get("stats", {}) if isinstance(pm_tail.get("stats"), dict) else {}
+        )
+        q = policy.get("quantile", 0.95)
+        try:
+            qf = float(q)
+        except Exception:
+            qf = 0.95
+        qf = max(0.0, min(1.0, qf))
+        q_key = f"q{int(round(100.0 * qf))}"
+        q_name = f"P{int(round(100.0 * qf))}"
+        q_val = stats.get(q_key)
+        qmax = policy.get("quantile_max")
+        eps = policy.get("epsilon", stats.get("epsilon"))
+        mass = stats.get("tail_mass")
+        mmax = policy.get("mass_max")
+        if not evaluated:
+            status_tail = "INFO"
+        elif passed:
+            status_tail = "PASS"
+        elif mode == "fail":
+            status_tail = "FAIL"
+        else:
+            status_tail = "WARN"
+        console.print("\n[bold]Gate: Primary Metric Tail (ΔlogNLL)[/bold]")
+        console.print(f"  mode: {mode}")
+        console.print(f"  status: {status_tail}")
+        if isinstance(q_val, int | float):
+            console.print(f"  observed: {q_name}={float(q_val):.4f}")
+        if isinstance(mass, int | float):
+            console.print(f"  tail_mass: Pr[ΔlogNLL > ε]={float(mass):.4f}")
+        thr_parts: list[str] = []
+        if isinstance(qmax, int | float):
+            thr_parts.append(f"{q_name}≤{float(qmax):.4f}")
+        if isinstance(mmax, int | float):
+            thr_parts.append(f"mass≤{float(mmax):.4f}")
+        if isinstance(eps, int | float):
+            thr_parts.append(f"ε={float(eps):.1e}")
+        if thr_parts:
+            console.print("  threshold: " + "; ".join(thr_parts))
     # Dataset split visibility from report provenance
     try:
         split = (report_data.get("provenance", {}) or {}).get("dataset_split")
@@ -151,10 +204,6 @@ def explain_gates_command(
                     drift = float(final) / float(preview)
             except Exception:
                 drift = None
-    if isinstance(cert.get("ppl"), dict):  # legacy
-        ppl = cert.get("ppl", {})
-        drift = ppl.get("preview_final_ratio", drift)
-        drift_ci = ppl.get("drift_ci")
     drift_status = (
         "PASS" if bool(validation.get("preview_final_drift_acceptable")) else "FAIL"
     )

{invarlock-0.3.5 → invarlock-0.3.6}/src/invarlock/cli/commands/report.py RENAMED Viewed

@@ -120,7 +120,7 @@ def _generate_reports(
             else:
                 console.print(f"  📄 {fmt.upper()}: {file_path}")
-        # Show key metrics (PM-first). Avoid legacy PPL wording.
+        # Show key metrics (PM-first). Avoid PPL-first wording.
         console.print("\n📈 Key Metrics:")
         console.print(f"  Model: {primary_report['meta']['model_id']}")
         console.print(f"  Edit: {primary_report['edit']['name']}")

invarlock 0.3.5__tar.gz → 0.3.6__tar.gz

invarlock 0.3.5tar.gz → 0.3.6tar.gz