PyPI - invarlock - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

invarlock 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

invarlock/__init__.py +2 -2
invarlock/adapters/__init__.py +10 -14
invarlock/adapters/auto.py +35 -40
invarlock/adapters/capabilities.py +2 -2
invarlock/adapters/hf_causal.py +418 -0
invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
invarlock/adapters/hf_mixin.py +25 -4
invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
invarlock/cli/adapter_auto.py +31 -21
invarlock/cli/app.py +73 -2
invarlock/cli/commands/certify.py +600 -59
invarlock/cli/commands/doctor.py +8 -10
invarlock/cli/commands/plugins.py +13 -9
invarlock/cli/commands/report.py +233 -69
invarlock/cli/commands/run.py +907 -183
invarlock/cli/commands/verify.py +76 -11
invarlock/cli/config.py +1 -1
invarlock/cli/doctor_helpers.py +4 -5
invarlock/cli/output.py +193 -0
invarlock/cli/provenance.py +1 -1
invarlock/core/bootstrap.py +1 -1
invarlock/core/registry.py +9 -11
invarlock/core/runner.py +111 -25
invarlock/edits/quant_rtn.py +65 -37
invarlock/eval/bench.py +3 -3
invarlock/eval/data.py +68 -23
invarlock/eval/metrics.py +59 -1
invarlock/eval/tasks/__init__.py +12 -0
invarlock/eval/tasks/classification.py +48 -0
invarlock/eval/tasks/qa.py +36 -0
invarlock/eval/tasks/text_generation.py +102 -0
invarlock/guards/invariants.py +19 -10
invarlock/guards/rmt.py +2 -2
invarlock/guards/variance.py +2 -2
invarlock/model_profile.py +48 -27
invarlock/observability/health.py +6 -6
invarlock/observability/metrics.py +108 -0
invarlock/reporting/certificate.py +159 -9
invarlock/reporting/certificate_schema.py +1 -1
invarlock/reporting/guards_analysis.py +154 -4
invarlock/reporting/html.py +55 -5
invarlock/reporting/normalizer.py +7 -0
invarlock/reporting/render.py +791 -431
invarlock/reporting/report.py +39 -3
invarlock/reporting/report_types.py +6 -1
invarlock/reporting/telemetry.py +86 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
invarlock/adapters/hf_gpt2.py +0 -404
invarlock/adapters/hf_llama.py +0 -487
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0

invarlock/cli/commands/verify.py CHANGED Viewed

@@ -2,7 +2,7 @@
 invarlock verify command
 ====================
-Validates generated safety certificates for internal consistency. The command
+Validates generated evaluation certificates for internal consistency. The command
 ensures schema compliance, checks that the primary metric ratio agrees with the
 baseline reference, and enforces paired-window guarantees (match=1.0,
 overlap=0.0).
@@ -66,9 +66,25 @@ def _validate_primary_metric(certificate: dict[str, Any]) -> list[str]:
         errors.append("Certificate missing primary_metric block.")
         return errors
+    def _is_finite_number(value: Any) -> bool:
+        return isinstance(value, (int, float)) and math.isfinite(float(value))
+    def _declares_invalid_primary_metric(metric: dict[str, Any]) -> bool:
+        if bool(metric.get("invalid")):
+            return True
+        reason = metric.get("degraded_reason")
+        if isinstance(reason, str):
+            r = reason.strip().lower()
+            return r.startswith("non_finite") or r in {
+                "primary_metric_invalid",
+                "evaluation_error",
+            }
+        return False
     kind = str(pm.get("kind", "")).lower()
     ratio_vs_baseline = pm.get("ratio_vs_baseline")
     final = pm.get("final")
+    pm_invalid = _declares_invalid_primary_metric(pm)
     if kind.startswith("ppl"):
         baseline_ref = certificate.get("baseline_ref", {}) or {}
@@ -82,16 +98,14 @@ def _validate_primary_metric(certificate: dict[str, Any]) -> list[str]:
             bv = baseline_pm.get("final")
             if isinstance(bv, (int | float)):
                 baseline_final = float(bv)
-        if isinstance(final, int | float) and isinstance(baseline_final, int | float):
-            if baseline_final <= 0.0:
+        if _is_finite_number(final) and _is_finite_number(baseline_final):
+            if float(baseline_final) <= 0.0:
                 errors.append(
                     f"Baseline final must be > 0.0 to compute ratio (found {baseline_final})."
                 )
             else:
                 expected_ratio = float(final) / float(baseline_final)
-                if not isinstance(ratio_vs_baseline, int | float) or not math.isfinite(
-                    float(ratio_vs_baseline)
-                ):
+                if not _is_finite_number(ratio_vs_baseline):
                     errors.append(
                         "Certificate is missing a finite primary_metric.ratio_vs_baseline value."
                     )
@@ -102,7 +116,18 @@ def _validate_primary_metric(certificate: dict[str, Any]) -> list[str]:
                         "Primary metric ratio mismatch: "
                         f"recorded={float(ratio_vs_baseline):.12f}, expected={expected_ratio:.12f}"
                     )
+        else:
+            # If the primary metric is non-finite, it must be explicitly marked invalid.
+            # This is expected for structural error-injection runs (NaN/Inf weights).
+            if (isinstance(final, (int | float)) and not _is_finite_number(final)) and (
+                not pm_invalid
+            ):
+                errors.append(
+                    "Primary metric final is non-finite but primary_metric.invalid is not set."
+                )
     else:
+        if pm_invalid:
+            return errors
         if ratio_vs_baseline is None or not isinstance(ratio_vs_baseline, int | float):
             errors.append(
                 "Certificate missing primary_metric.ratio_vs_baseline for non-ppl metric."
@@ -194,14 +219,29 @@ def _validate_counts(certificate: dict[str, Any]) -> list[str]:
 def _validate_drift_band(certificate: dict[str, Any]) -> list[str]:
-    """Validate preview→final drift stays within the configured band (0.95–1.05)."""
+    """Validate preview→final drift stays within the configured band.
+    Defaults to 0.95–1.05 unless the certificate provides `primary_metric.drift_band`.
+    """
     errors: list[str] = []
     pm = certificate.get("primary_metric", {}) or {}
+    if not isinstance(pm, dict) or not pm:
+        errors.append("Certificate missing primary_metric block.")
+        return errors
+    if bool(pm.get("invalid")):
+        # Drift is undefined when the primary metric is invalid (e.g., NaN/Inf weights).
+        return errors
     drift_ratio = None
     try:
         prev = pm.get("preview")
         fin = pm.get("final")
-        if isinstance(prev, int | float) and isinstance(fin, int | float) and prev > 0:
+        if (
+            isinstance(prev, int | float)
+            and isinstance(fin, int | float)
+            and math.isfinite(float(prev))
+            and math.isfinite(float(fin))
+            and prev > 0
+        ):
             drift_ratio = float(fin) / float(prev)
     except Exception:
         drift_ratio = None
@@ -210,9 +250,33 @@ def _validate_drift_band(certificate: dict[str, Any]) -> list[str]:
         errors.append("Certificate missing preview/final to compute drift ratio.")
         return errors
-    if not 0.95 <= float(drift_ratio) <= 1.05:
+    drift_min = 0.95
+    drift_max = 1.05
+    band = pm.get("drift_band")
+    try:
+        if isinstance(band, dict):
+            lo = band.get("min")
+            hi = band.get("max")
+            if isinstance(lo, int | float) and isinstance(hi, int | float):
+                lo_f = float(lo)
+                hi_f = float(hi)
+                if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
+                    drift_min = lo_f
+                    drift_max = hi_f
+        elif isinstance(band, list | tuple) and len(band) == 2:
+            lo_raw, hi_raw = band[0], band[1]
+            if isinstance(lo_raw, int | float) and isinstance(hi_raw, int | float):
+                lo_f = float(lo_raw)
+                hi_f = float(hi_raw)
+                if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
+                    drift_min = lo_f
+                    drift_max = hi_f
+    except Exception:
+        pass
+    if not drift_min <= float(drift_ratio) <= drift_max:
         errors.append(
-            f"Preview→final drift ratio out of band (0.95–1.05): observed {drift_ratio:.6f}."
+            f"Preview→final drift ratio out of band ({drift_min:.2f}–{drift_max:.2f}): observed {drift_ratio:.6f}."
         )
     return errors
@@ -406,7 +470,8 @@ def _validate_certificate_payload(
         )
     except Exception:
         prof = "dev"
-    # Enforce drift band only for CI/Release; skip in dev profile
+    # Drift band is a CI/Release enforcement check; dev profile should not
+    # fail verification due to preview→final drift.
     if prof in {"ci", "release"}:
         errors.extend(_validate_drift_band(certificate))
     errors.extend(_apply_profile_lints(certificate))

invarlock/cli/config.py CHANGED Viewed

@@ -415,7 +415,7 @@ def _deep_merge_dicts(a: dict, b: dict) -> dict:  # pragma: no cover - trivial a
 def create_example_config() -> InvarLockConfig:  # pragma: no cover - test helper
     return InvarLockConfig(
-        model={"id": "gpt2", "adapter": "hf_gpt2", "device": "auto"},
+        model={"id": "gpt2", "adapter": "hf_causal", "device": "auto"},
         edit={"name": "quant_rtn", "plan": {}},
         dataset={"provider": "wikitext2", "seq_len": 512, "stride": 512},
         output={"dir": "runs"},

invarlock/cli/doctor_helpers.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any
 def get_adapter_rows() -> list[dict[str, Any]]:
     """Build adapter rows similar to doctor output for testing.
-    Applies optional-extra detection for hf_onnx (optimum/onnxruntime) even if
+    Applies optional-extra detection for hf_causal_onnx (optimum/onnxruntime) even if
     registered as a core adapter, so missing extras are surfaced.
     """
     from invarlock.core.registry import get_registry
@@ -29,13 +29,12 @@ def get_adapter_rows() -> list[dict[str, Any]]:
         module = str(info.get("module") or "")
         support = (
             "auto"
-            if module.startswith("invarlock.adapters")
-            and name in {"hf_causal_auto", "hf_mlm_auto"}
+            if module.startswith("invarlock.adapters") and name in {"hf_auto"}
             else ("core" if module.startswith("invarlock.adapters") else "optional")
         )
         backend, status, enable = None, "ready", ""
-        if name in {"hf_gpt2", "hf_bert", "hf_llama", "hf_causal_auto", "hf_mlm_auto"}:
+        if name in {"hf_causal", "hf_mlm", "hf_seq2seq", "hf_auto"}:
             backend = "transformers"
         elif name == "hf_gptq":
             backend = "auto-gptq"
@@ -49,7 +48,7 @@ def get_adapter_rows() -> list[dict[str, Any]]:
             backend = "bitsandbytes"
             if not has_cuda:
                 status, enable = "unsupported", "Requires CUDA"
-        elif name == "hf_onnx":
+        elif name == "hf_causal_onnx":
             backend = "onnxruntime"
             present = (
                 importlib.util.find_spec("optimum.onnxruntime") is not None

invarlock/cli/output.py ADDED Viewed

@@ -0,0 +1,193 @@
+from __future__ import annotations
+import os
+import time
+from collections.abc import Iterator
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import TextIO
+from rich.console import Console
+_STYLE_AUDIT = "audit"
+_STYLE_FRIENDLY = "friendly"
+_VALID_STYLES = {_STYLE_AUDIT, _STYLE_FRIENDLY}
+def _safe_console_print(console: Console, *args: object, **kwargs: object) -> None:
+    try:
+        console.print(*args, **kwargs)
+    except TypeError:
+        console.print(*args)
+def env_no_color() -> bool:
+    """Return True when NO_COLOR is set (value-agnostic)."""
+    return bool(str(os.environ.get("NO_COLOR", "")).strip())
+def perf_counter() -> float:
+    return time.perf_counter()
+@dataclass(frozen=True, slots=True)
+class OutputStyle:
+    name: str
+    progress: bool = False
+    timing: bool = False
+    color: bool = True
+    @property
+    def emojis(self) -> bool:
+        return self.name != _STYLE_AUDIT
+    @property
+    def audit(self) -> bool:
+        return self.name == _STYLE_AUDIT
+def normalize_style(style: str | None) -> str | None:
+    if style is None:
+        return None
+    value = str(style).strip().lower()
+    if not value:
+        return None
+    return value if value in _VALID_STYLES else None
+def resolve_style_name(style: str | None, profile: str | None) -> str:
+    normalized = normalize_style(style)
+    if normalized is not None:
+        return normalized
+    profile_norm = str(profile or "").strip().lower()
+    if profile_norm in {"ci", "ci_cpu", "release"}:
+        return _STYLE_AUDIT
+    return _STYLE_FRIENDLY
+def resolve_output_style(
+    *,
+    style: str | None,
+    profile: str | None,
+    progress: bool = False,
+    timing: bool = False,
+    no_color: bool = False,
+) -> OutputStyle:
+    name = resolve_style_name(style, profile)
+    color_enabled = not (bool(no_color) or env_no_color())
+    return OutputStyle(
+        name=name,
+        progress=bool(progress),
+        timing=bool(timing),
+        color=color_enabled,
+    )
+def make_console(
+    *,
+    file: TextIO | None = None,
+    force_terminal: bool | None = None,
+    no_color: bool | None = None,
+) -> Console:
+    if no_color is None:
+        no_color = env_no_color()
+    if no_color:
+        color_system = None
+    else:
+        color_system = "standard" if force_terminal else "auto"
+    return Console(
+        file=file,
+        force_terminal=force_terminal,
+        no_color=bool(no_color),
+        color_system=color_system,
+    )
+def format_event_line(
+    tag: str,
+    message: str,
+    *,
+    style: OutputStyle,
+    emoji: str | None = None,
+) -> str:
+    tag_norm = str(tag or "").strip().upper() or "INFO"
+    if style.emojis and emoji:
+        prefix = emoji
+    else:
+        prefix = f"[{tag_norm}]"
+    msg = str(message or "").rstrip()
+    return f"{prefix} {msg}".rstrip()
+def print_event(
+    console: Console,
+    tag: str,
+    message: str,
+    *,
+    style: OutputStyle,
+    emoji: str | None = None,
+    console_style: str | None = None,
+) -> None:
+    line = format_event_line(tag, message, style=style, emoji=emoji)
+    if console_style is None and style.color:
+        tag_norm = str(tag or "").strip().upper()
+        if tag_norm in {"PASS"}:
+            console_style = "green"
+        elif tag_norm in {"FAIL", "ERROR"}:
+            console_style = "red"
+        elif tag_norm in {"WARN", "WARNING"}:
+            console_style = "yellow"
+        elif tag_norm in {"METRIC"}:
+            console_style = "cyan"
+    _safe_console_print(console, line, style=console_style, markup=False)
+@contextmanager
+def timed_step(
+    *,
+    console: Console,
+    style: OutputStyle,
+    timings: dict[str, float] | None,
+    key: str,
+    tag: str,
+    message: str,
+    emoji: str | None = None,
+) -> Iterator[None]:
+    start = perf_counter()
+    try:
+        yield
+    finally:
+        elapsed = max(0.0, float(perf_counter() - start))
+        if timings is not None:
+            timings[key] = elapsed
+        if style.progress:
+            print_event(
+                console,
+                tag,
+                f"{message} done ({elapsed:.2f}s)",
+                style=style,
+                emoji=emoji,
+            )
+def print_timing_summary(
+    console: Console,
+    timings: dict[str, float],
+    *,
+    style: OutputStyle,
+    order: list[tuple[str, str]],
+    extra_lines: list[str] | None = None,
+) -> None:
+    if not style.timing:
+        return
+    _safe_console_print(console, "", markup=False)
+    _safe_console_print(console, "TIMING SUMMARY", markup=False)
+    for label, key in order:
+        if key not in timings:
+            continue
+        _safe_console_print(
+            console, f"  {label:<11}: {timings[key]:.2f}s", markup=False
+        )
+    if extra_lines:
+        for line in extra_lines:
+            _safe_console_print(console, line, markup=False)

invarlock/cli/provenance.py CHANGED Viewed

@@ -31,7 +31,7 @@ _FAMILY_MAP: dict[str, tuple[str, str, list[str]]] = {
     "hf_awq": ("awq", "autoawq", []),
     "hf_bnb": ("bnb", "bitsandbytes", []),
     # ONNX stack (requires extras: invarlock[onnx])
-    "hf_onnx": ("onnx", "onnxruntime", []),
+    "hf_causal_onnx": ("onnx", "onnxruntime", []),
 }

invarlock/core/bootstrap.py CHANGED Viewed

@@ -6,7 +6,7 @@ Numerically stable bootstrap helpers for evaluation metrics.
 This module provides bias-corrected and accelerated (BCa) confidence
 intervals tailored for paired log-loss statistics used by the runner
-and safety certificate reports.
+and evaluation certificate reports.
 """
 from __future__ import annotations

invarlock/core/registry.py CHANGED Viewed

@@ -140,23 +140,21 @@ class CoreRegistry:
                 )
         # Register built-in adapters
-        _fallback(self._adapters, "hf_gpt2", "invarlock.adapters", "HF_GPT2_Adapter")
-        _fallback(self._adapters, "hf_bert", "invarlock.adapters", "HF_BERT_Adapter")
-        _fallback(self._adapters, "hf_llama", "invarlock.adapters", "HF_LLaMA_Adapter")
-        _fallback(self._adapters, "hf_t5", "invarlock.adapters", "HF_T5_Adapter")
         _fallback(
-            self._adapters, "hf_onnx", "invarlock.adapters", "HF_ORT_CausalLM_Adapter"
+            self._adapters, "hf_causal", "invarlock.adapters", "HF_Causal_Adapter"
         )
-        # Convenience auto adapters (delegate to built-ins)
+        _fallback(self._adapters, "hf_mlm", "invarlock.adapters", "HF_MLM_Adapter")
         _fallback(
-            self._adapters,
-            "hf_causal_auto",
-            "invarlock.adapters",
-            "HF_Causal_Auto_Adapter",
+            self._adapters, "hf_seq2seq", "invarlock.adapters", "HF_Seq2Seq_Adapter"
         )
         _fallback(
-            self._adapters, "hf_mlm_auto", "invarlock.adapters", "HF_MLM_Auto_Adapter"
+            self._adapters,
+            "hf_causal_onnx",
+            "invarlock.adapters",
+            "HF_Causal_ONNX_Adapter",
+            required_deps=["optimum"],
         )
+        _fallback(self._adapters, "hf_auto", "invarlock.adapters", "HF_Auto_Adapter")
         # Optional plugin adapters (verify runtime dependencies)
         _fallback(
             self._adapters,

invarlock/core/runner.py CHANGED Viewed

@@ -19,6 +19,11 @@ from typing import Any
 import numpy as np
 from invarlock.eval.tail_stats import evaluate_metric_tail
+from invarlock.observability.metrics import (
+    capture_memory_snapshot,
+    reset_peak_memory_stats,
+    summarize_memory_snapshots,
+)
 from .api import (
     EditLike,
@@ -190,6 +195,18 @@ class CoreRunner:
                     pass
         report.status = RunStatus.RUNNING.value
+        timings: dict[str, float] = {}
+        guard_timings: dict[str, float] = {}
+        memory_snapshots: list[dict[str, Any]] = []
+        total_start = time.perf_counter()
+        def _record_timing(key: str, start: float) -> None:
+            timings[key] = max(0.0, float(time.perf_counter() - start))
+        def _capture_memory(phase: str) -> None:
+            snapshot = capture_memory_snapshot(phase)
+            if snapshot:
+                memory_snapshots.append(snapshot)
         try:
             # Log start
@@ -205,40 +222,78 @@ class CoreRunner:
             )
             # Phase 1: Prepare (describe model, create checkpoint)
-            model_desc = self._prepare_phase(model, adapter, report)
+            reset_peak_memory_stats()
+            phase_start = time.perf_counter()
+            try:
+                model_desc = self._prepare_phase(model, adapter, report)
+            finally:
+                _record_timing("prepare", phase_start)
+                _capture_memory("prepare")
             # Phase 2: Prepare guards (must happen before edit)
-            self._prepare_guards_phase(
-                model,
-                adapter,
-                guards,
-                calibration_data,
-                report,
-                auto_config,
-                config,
-            )
+            reset_peak_memory_stats()
+            phase_start = time.perf_counter()
+            try:
+                self._prepare_guards_phase(
+                    model,
+                    adapter,
+                    guards,
+                    calibration_data,
+                    report,
+                    auto_config,
+                    config,
+                )
+            finally:
+                _record_timing("prepare_guards", phase_start)
+                _capture_memory("prepare_guards")
             # Phase 3: Apply edit
-            self._edit_phase(model, adapter, edit, model_desc, report, edit_config)
+            reset_peak_memory_stats()
+            phase_start = time.perf_counter()
+            try:
+                self._edit_phase(model, adapter, edit, model_desc, report, edit_config)
+            finally:
+                _record_timing("edit", phase_start)
+                _capture_memory("edit")
             # Phase 4: Run guards
-            guard_results = self._guard_phase(model, adapter, guards, report)
+            reset_peak_memory_stats()
+            phase_start = time.perf_counter()
+            try:
+                guard_results = self._guard_phase(
+                    model, adapter, guards, report, guard_timings=guard_timings
+                )
+            finally:
+                _record_timing("guards", phase_start)
+                _capture_memory("guards")
             # Phase 5: Evaluate final metrics
-            metrics = self._eval_phase(
-                model,
-                adapter,
-                calibration_data,
-                report,
-                preview_n,
-                final_n,
-                config,
-            )
+            reset_peak_memory_stats()
+            phase_start = time.perf_counter()
+            try:
+                metrics = self._eval_phase(
+                    model,
+                    adapter,
+                    calibration_data,
+                    report,
+                    preview_n,
+                    final_n,
+                    config,
+                )
+            finally:
+                _record_timing("eval", phase_start)
+                _capture_memory("eval")
             # Phase 6: Finalize or rollback
-            final_status = self._finalize_phase(
-                model, adapter, guard_results, metrics, config, report
-            )
+            reset_peak_memory_stats()
+            phase_start = time.perf_counter()
+            try:
+                final_status = self._finalize_phase(
+                    model, adapter, guard_results, metrics, config, report
+                )
+            finally:
+                _record_timing("finalize", phase_start)
+                _capture_memory("finalize")
             report.status = final_status
             report.meta["end_time"] = time.time()
@@ -260,6 +315,25 @@ class CoreRunner:
             return report
         finally:
+            _record_timing("total", total_start)
+            if not isinstance(report.metrics, dict):
+                report.metrics = {}
+            if timings:
+                report.metrics.setdefault("timings", {}).update(timings)
+            if guard_timings:
+                report.metrics["guard_timings"] = guard_timings
+            if memory_snapshots:
+                report.metrics["memory_snapshots"] = memory_snapshots
+                summary = summarize_memory_snapshots(memory_snapshots)
+                if summary:
+                    mem_peak = summary.get("memory_mb_peak")
+                    if isinstance(mem_peak, (int | float)):
+                        existing = report.metrics.get("memory_mb_peak")
+                        if isinstance(existing, (int | float)):
+                            summary["memory_mb_peak"] = max(
+                                float(existing), float(mem_peak)
+                            )
+                    report.metrics.update(summary)
             self._active_model = None
             self._active_adapter = None
             self._cleanup_services()
@@ -455,7 +529,13 @@ class CoreRunner:
         )
     def _guard_phase(
-        self, model: Any, adapter: ModelAdapter, guards: list[Guard], report: RunReport
+        self,
+        model: Any,
+        adapter: ModelAdapter,
+        guards: list[Guard],
+        report: RunReport,
+        *,
+        guard_timings: dict[str, float] | None = None,
     ) -> dict[str, dict[str, Any]]:
         """Phase 4: Run safety guards."""
         self._log_event("guards", "start", LogLevel.INFO, {"count": len(guards)})
@@ -464,6 +544,7 @@ class CoreRunner:
         for guard in guards:
             self._log_event("guard", "start", LogLevel.INFO, {"guard": guard.name})
+            guard_start = time.perf_counter()
             if isinstance(guard, GuardWithContext):
                 try:
@@ -497,6 +578,11 @@ class CoreRunner:
                     LogLevel.ERROR,
                     {"guard": guard.name, "error": str(e)},
                 )
+            finally:
+                if guard_timings is not None:
+                    guard_timings[guard.name] = max(
+                        0.0, float(time.perf_counter() - guard_start)
+                    )
         report.guards = guard_results

invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

invarlock 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl