PyPI - invarlock - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

invarlock 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

invarlock/__init__.py +2 -2
invarlock/_data/runtime/tiers.yaml +57 -30
invarlock/adapters/__init__.py +11 -15
invarlock/adapters/auto.py +35 -40
invarlock/adapters/capabilities.py +2 -2
invarlock/adapters/hf_causal.py +418 -0
invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
invarlock/adapters/hf_mixin.py +25 -4
invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
invarlock/calibration/spectral_null.py +15 -10
invarlock/calibration/variance_ve.py +0 -2
invarlock/cli/adapter_auto.py +31 -21
invarlock/cli/app.py +73 -2
invarlock/cli/commands/calibrate.py +6 -2
invarlock/cli/commands/certify.py +651 -91
invarlock/cli/commands/doctor.py +11 -11
invarlock/cli/commands/explain_gates.py +57 -8
invarlock/cli/commands/plugins.py +13 -9
invarlock/cli/commands/report.py +233 -69
invarlock/cli/commands/run.py +1066 -244
invarlock/cli/commands/verify.py +154 -15
invarlock/cli/config.py +22 -6
invarlock/cli/doctor_helpers.py +4 -5
invarlock/cli/output.py +193 -0
invarlock/cli/provenance.py +1 -1
invarlock/core/api.py +45 -5
invarlock/core/auto_tuning.py +65 -20
invarlock/core/bootstrap.py +1 -1
invarlock/core/contracts.py +7 -1
invarlock/core/registry.py +11 -13
invarlock/core/runner.py +425 -75
invarlock/edits/quant_rtn.py +65 -37
invarlock/eval/bench.py +3 -16
invarlock/eval/data.py +82 -51
invarlock/eval/metrics.py +63 -2
invarlock/eval/primary_metric.py +23 -0
invarlock/eval/tail_stats.py +230 -0
invarlock/eval/tasks/__init__.py +12 -0
invarlock/eval/tasks/classification.py +48 -0
invarlock/eval/tasks/qa.py +36 -0
invarlock/eval/tasks/text_generation.py +102 -0
invarlock/guards/_estimators.py +154 -0
invarlock/guards/invariants.py +19 -10
invarlock/guards/policies.py +16 -6
invarlock/guards/rmt.py +627 -546
invarlock/guards/spectral.py +348 -110
invarlock/guards/tier_config.py +32 -30
invarlock/guards/variance.py +7 -31
invarlock/guards_ref/rmt_ref.py +23 -23
invarlock/model_profile.py +90 -42
invarlock/observability/health.py +6 -6
invarlock/observability/metrics.py +108 -0
invarlock/reporting/certificate.py +384 -55
invarlock/reporting/certificate_schema.py +3 -2
invarlock/reporting/dataset_hashing.py +15 -2
invarlock/reporting/guards_analysis.py +350 -277
invarlock/reporting/html.py +55 -5
invarlock/reporting/normalizer.py +13 -0
invarlock/reporting/policy_utils.py +38 -36
invarlock/reporting/primary_metric_utils.py +71 -17
invarlock/reporting/render.py +852 -431
invarlock/reporting/report.py +40 -4
invarlock/reporting/report_types.py +11 -3
invarlock/reporting/telemetry.py +86 -0
invarlock/reporting/validate.py +1 -18
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
invarlock/adapters/hf_gpt2.py +0 -404
invarlock/adapters/hf_llama.py +0 -487
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0

invarlock/cli/commands/certify.py CHANGED Viewed

@@ -14,17 +14,23 @@ Steps:
 from __future__ import annotations
+import inspect
+import io
 import json
 import math
+from collections.abc import Iterator
+from contextlib import contextmanager
 from pathlib import Path
-from typing import Any
+from typing import Any, NoReturn
 import typer
 from rich.console import Console
+from invarlock import __version__ as INVARLOCK_VERSION
+from ...core.exceptions import MetricsError
 from ..adapter_auto import resolve_auto_adapter
 from ..config import _deep_merge as _merge  # reuse helper
-from ..errors import InvarlockError
 # Use the report group's programmatic entry for report generation
 from .report import report_command as _report
@@ -32,9 +38,142 @@ from .run import _resolve_exit_code as _resolve_exit_code
 _LAZY_RUN_IMPORT = True
+PHASE_BAR_WIDTH = 67
+VERBOSITY_QUIET = 0
+VERBOSITY_DEFAULT = 1
+VERBOSITY_VERBOSE = 2
 console = Console()
+def _render_banner_lines(title: str, context: str) -> list[str]:
+    width = max(len(title), len(context))
+    border = "─" * (width + 2)
+    return [
+        f"┌{border}┐",
+        f"│ {title.ljust(width)} │",
+        f"│ {context.ljust(width)} │",
+        f"└{border}┘",
+    ]
+def _print_header_banner(
+    console: Console, *, version: str, profile: str, tier: str, adapter: str
+) -> None:
+    title = f"INVARLOCK v{version} · Certification Pipeline"
+    context = f"Profile: {profile} · Tier: {tier} · Adapter: {adapter}"
+    for line in _render_banner_lines(title, context):
+        console.print(line)
+def _phase_title(index: int, total: int, title: str) -> str:
+    return f"PHASE {index}/{total} · {title}"
+def _print_phase_header(console: Console, title: str) -> None:
+    bar_width = max(PHASE_BAR_WIDTH, len(title))
+    bar = "═" * bar_width
+    console.print(bar)
+    console.print(title)
+    console.print(bar)
+def _format_ratio(value: Any) -> str:
+    try:
+        val = float(value)
+    except (TypeError, ValueError):
+        return "N/A"
+    if not math.isfinite(val):
+        return "N/A"
+    return f"{val:.3f}"
+def _resolve_verbosity(quiet: bool, verbose: bool) -> int:
+    if quiet and verbose:
+        console.print("--quiet and --verbose are mutually exclusive")
+        raise typer.Exit(2)
+    if quiet:
+        return VERBOSITY_QUIET
+    if verbose:
+        return VERBOSITY_VERBOSE
+    return VERBOSITY_DEFAULT
+@contextmanager
+def _override_console(module: Any, new_console: Console) -> Iterator[None]:
+    original_console = getattr(module, "console", None)
+    module.console = new_console
+    try:
+        yield
+    finally:
+        module.console = original_console
+@contextmanager
+def _suppress_child_output(enabled: bool) -> Iterator[io.StringIO | None]:
+    if not enabled:
+        yield None
+        return
+    from . import report as report_mod
+    from . import run as run_mod
+    buffer = io.StringIO()
+    quiet_console = Console(file=buffer, force_terminal=False, color_system=None)
+    with (
+        _override_console(run_mod, quiet_console),
+        _override_console(report_mod, quiet_console),
+    ):
+        yield buffer
+def _print_quiet_summary(
+    *,
+    cert_out: Path,
+    source: str,
+    edited: str,
+    profile: str,
+) -> None:
+    cert_path = cert_out / "evaluation.cert.json"
+    console.print(f"INVARLOCK v{INVARLOCK_VERSION} · CERTIFY")
+    console.print(f"Baseline: {source} -> Subject: {edited} · Profile: {profile}")
+    if not cert_path.exists():
+        console.print(f"Output: {cert_out}")
+        return
+    try:
+        with cert_path.open("r", encoding="utf-8") as fh:
+            certificate = json.load(fh)
+    except Exception:
+        console.print(f"Output: {cert_path}")
+        return
+    if not isinstance(certificate, dict):
+        console.print(f"Output: {cert_path}")
+        return
+    try:
+        from invarlock.reporting.render import (
+            compute_console_validation_block as _console_block,
+        )
+        block = _console_block(certificate)
+        rows = block.get("rows", [])
+        total = len(rows) if isinstance(rows, list) else 0
+        passed = (
+            sum(1 for row in rows if row.get("ok")) if isinstance(rows, list) else 0
+        )
+        status = "PASS" if block.get("overall_pass") else "FAIL"
+    except Exception:
+        total = 0
+        passed = 0
+        status = "UNKNOWN"
+    pm_ratio = _format_ratio(
+        (certificate.get("primary_metric") or {}).get("ratio_vs_baseline")
+    )
+    gate_summary = f"{passed}/{total} passed" if total else "N/A"
+    console.print(f"Status: {status} · Gates: {gate_summary}")
+    if pm_ratio != "N/A":
+        console.print(f"Primary metric ratio: {pm_ratio}")
+    console.print(f"Output: {cert_path}")
 def _latest_run_report(run_root: Path) -> Path | None:
     if not run_root.exists():
         return None
@@ -90,6 +229,14 @@ def certify_command(
     edited: str = typer.Option(
         ..., "--edited", "--subject", help="Subject model dir or Hub ID"
     ),
+    baseline_report: str | None = typer.Option(
+        None,
+        "--baseline-report",
+        help=(
+            "Reuse an existing baseline run report.json (skips baseline evaluation). "
+            "Must include stored evaluation windows (e.g., set INVARLOCK_STORE_EVAL_WINDOWS=1)."
+        ),
+    ),
     adapter: str = typer.Option(
         "auto", "--adapter", help="Adapter name or 'auto' to resolve"
     ),
@@ -98,7 +245,9 @@ def certify_command(
         "--device",
         help="Device override for runs (auto|cuda|mps|cpu)",
     ),
-    profile: str = typer.Option("ci", "--profile", help="Profile (ci|release)"),
+    profile: str = typer.Option(
+        "ci", "--profile", help="Profile (ci|release|ci_cpu|dev)"
+    ),
     tier: str = typer.Option("balanced", "--tier", help="Tier label for context"),
     preset: str | None = typer.Option(
         None,
@@ -115,6 +264,31 @@ def certify_command(
     edit_config: str | None = typer.Option(
         None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
     ),
+    edit_label: str | None = typer.Option(
+        None,
+        "--edit-label",
+        help=(
+            "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
+            "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
+        ),
+    ),
+    quiet: bool = typer.Option(
+        False, "--quiet", "-q", help="Minimal output (suppress run/report detail)"
+    ),
+    verbose: bool = typer.Option(
+        False, "--verbose", "-v", help="Verbose output (include debug details)"
+    ),
+    banner: bool = typer.Option(
+        True, "--banner/--no-banner", help="Show header banner"
+    ),
+    style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
+    timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
+    progress: bool = typer.Option(
+        True, "--progress/--no-progress", help="Show progress done messages"
+    ),
+    no_color: bool = typer.Option(
+        False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
+    ),
 ):
     """Certify two checkpoints (baseline vs subject) with pinned windows."""
     # Support programmatic calls and Typer-invoked calls uniformly
@@ -130,6 +304,7 @@ def certify_command(
     source = _coerce_option(source)
     edited = _coerce_option(edited)
+    baseline_report = _coerce_option(baseline_report)
     adapter = _coerce_option(adapter, "auto")
     device = _coerce_option(device)
     profile = _coerce_option(profile, "ci")
@@ -138,23 +313,89 @@ def certify_command(
     out = _coerce_option(out, "runs")
     cert_out = _coerce_option(cert_out, "reports/cert")
     edit_config = _coerce_option(edit_config)
+    edit_label = _coerce_option(edit_label)
+    quiet = _coerce_option(quiet, False)
+    verbose = _coerce_option(verbose, False)
+    banner = _coerce_option(banner, True)
+    style = _coerce_option(style, "audit")
+    timing = bool(_coerce_option(timing, False))
+    progress = bool(_coerce_option(progress, True))
+    no_color = bool(_coerce_option(no_color, False))
+    verbosity = _resolve_verbosity(bool(quiet), bool(verbose))
+    if verbosity == VERBOSITY_QUIET:
+        progress = False
+        timing = False
+    from invarlock.cli.output import (
+        make_console,
+        perf_counter,
+        print_event,
+        print_timing_summary,
+        resolve_output_style,
+        timed_step,
+    )
+    output_style = resolve_output_style(
+        style=str(style),
+        profile=str(profile),
+        progress=bool(progress),
+        timing=bool(timing),
+        no_color=bool(no_color),
+    )
+    console = make_console(no_color=not output_style.color)
+    timings: dict[str, float] = {}
+    total_start: float | None = perf_counter() if output_style.timing else None
+    def _info(message: str, *, tag: str = "INFO", emoji: str | None = None) -> None:
+        if verbosity >= VERBOSITY_DEFAULT:
+            print_event(console, tag, message, style=output_style, emoji=emoji)
+    def _debug(msg: str) -> None:
+        if verbosity >= VERBOSITY_VERBOSE:
+            console.print(msg, markup=False)
+    def _fail(message: str, *, exit_code: int = 2) -> NoReturn:
+        print_event(console, "FAIL", message, style=output_style, emoji="❌")
+        raise typer.Exit(exit_code)
+    def _phase(index: int, total: int, title: str) -> None:
+        if verbosity >= VERBOSITY_DEFAULT:
+            console.print("")
+            _print_phase_header(console, _phase_title(index, total, title))
     src_id = str(source)
     edt_id = str(edited)
     # Resolve adapter when requested
     eff_adapter = adapter
-    if str(adapter).strip().lower() in {"auto", "hf_auto", "auto_hf"}:
+    adapter_auto = False
+    if str(adapter).strip().lower() in {"auto", "auto_hf"}:
         eff_adapter = resolve_auto_adapter(src_id)
-        console.print(f"🔎 Adapter:auto → {eff_adapter}")
+        adapter_auto = True
+    show_banner = bool(banner) and verbosity >= VERBOSITY_DEFAULT
+    if show_banner:
+        _print_header_banner(
+            console,
+            version=INVARLOCK_VERSION,
+            profile=profile,
+            tier=tier,
+            adapter=str(eff_adapter),
+        )
+        console.print("")
+    if adapter_auto:
+        _debug(f"Adapter:auto -> {eff_adapter}")
     # Choose preset. If none provided and repo preset is missing (pip install
     # scenario), fall back to a minimal built-in universal preset so the
     # flag-only quick start works without cloning the repo.
     default_universal = (
-        Path("configs/tasks/masked_lm/ci_cpu.yaml")
-        if eff_adapter == "hf_bert"
-        else Path("configs/tasks/causal_lm/ci_cpu.yaml")
+        Path("configs/presets/masked_lm/wikitext2_128.yaml")
+        if eff_adapter == "hf_mlm"
+        else Path("configs/presets/causal_lm/wikitext2_512.yaml")
     )
     preset_path = Path(preset) if preset is not None else default_universal
@@ -174,7 +415,13 @@ def certify_command(
         }
     else:
         if not preset_path.exists():
-            console.print(f"[red]❌ Preset not found: {preset_path}")
+            print_event(
+                console,
+                "FAIL",
+                f"Preset not found: {preset_path}",
+                style=output_style,
+                emoji="❌",
+            )
             raise typer.Exit(1)
         preset_data = _load_yaml(preset_path)
         # Do not hard-code device from presets in auto-generated certify configs;
@@ -185,6 +432,122 @@ def certify_command(
             model_block.pop("device", None)
             preset_data["model"] = model_block
+    default_guards_order = ["invariants", "spectral", "rmt", "variance", "invariants"]
+    guards_order = None
+    preset_guards = preset_data.get("guards")
+    if isinstance(preset_guards, dict):
+        preset_order = preset_guards.get("order")
+        if (
+            isinstance(preset_order, list)
+            and preset_order
+            and all(isinstance(item, str) for item in preset_order)
+        ):
+            guards_order = list(preset_order)
+    if guards_order is None:
+        guards_order = list(default_guards_order)
+    def _load_and_validate_baseline_report(
+        report_path: Path,
+        *,
+        expected_profile: str,
+        expected_tier: str,
+        expected_adapter: str,
+    ) -> Path:
+        candidate = Path(report_path).expanduser()
+        if not candidate.exists():
+            _fail(f"Baseline report not found: {candidate}")
+        resolved_report: Path | None = None
+        if candidate.is_dir():
+            direct = candidate / "report.json"
+            if direct.is_file():
+                resolved_report = direct
+            else:
+                resolved_report = _latest_run_report(candidate)
+        elif candidate.is_file():
+            resolved_report = candidate
+        if resolved_report is None or not resolved_report.is_file():
+            _fail(f"Baseline report not found: {candidate}")
+        resolved_report = resolved_report.resolve()
+        try:
+            with resolved_report.open("r", encoding="utf-8") as fh:
+                payload = json.load(fh)
+        except Exception as exc:  # noqa: BLE001
+            _fail(f"Baseline report is not valid JSON: {resolved_report} ({exc})")
+        if not isinstance(payload, dict):
+            _fail(f"Baseline report must be a JSON object: {resolved_report}")
+        edit_block = payload.get("edit")
+        edit_name = edit_block.get("name") if isinstance(edit_block, dict) else None
+        if edit_name != "noop":
+            _fail(
+                "Baseline report must be a no-op run (edit.name == 'noop'). "
+                f"Got edit.name={edit_name!r} in {resolved_report}"
+            )
+        meta = payload.get("meta")
+        if isinstance(meta, dict):
+            baseline_adapter = meta.get("adapter")
+            if (
+                isinstance(baseline_adapter, str)
+                and baseline_adapter != expected_adapter
+            ):
+                _fail(
+                    "Baseline report adapter mismatch. "
+                    f"Expected {expected_adapter!r}, got {baseline_adapter!r} in {resolved_report}"
+                )
+        context = payload.get("context")
+        if isinstance(context, dict):
+            baseline_profile = context.get("profile")
+            if (
+                isinstance(baseline_profile, str)
+                and baseline_profile.strip().lower() != expected_profile.strip().lower()
+            ):
+                _fail(
+                    "Baseline report profile mismatch. "
+                    f"Expected {expected_profile!r}, got {baseline_profile!r} in {resolved_report}"
+                )
+            auto_ctx = context.get("auto")
+            if isinstance(auto_ctx, dict):
+                baseline_tier = auto_ctx.get("tier")
+                if isinstance(baseline_tier, str) and baseline_tier != expected_tier:
+                    _fail(
+                        "Baseline report tier mismatch. "
+                        f"Expected {expected_tier!r}, got {baseline_tier!r} in {resolved_report}"
+                    )
+        eval_windows = payload.get("evaluation_windows")
+        if not isinstance(eval_windows, dict):
+            _fail(
+                "Baseline report missing evaluation window payloads. "
+                "Re-run baseline with INVARLOCK_STORE_EVAL_WINDOWS=1."
+            )
+        for phase_name in ("preview", "final"):
+            phase = eval_windows.get(phase_name)
+            if not isinstance(phase, dict):
+                _fail(
+                    f"Baseline report missing evaluation_windows.{phase_name} payloads. "
+                    "Re-run baseline with INVARLOCK_STORE_EVAL_WINDOWS=1."
+                )
+            window_ids = phase.get("window_ids")
+            input_ids = phase.get("input_ids")
+            if not isinstance(window_ids, list) or not window_ids:
+                _fail(
+                    f"Baseline report missing evaluation_windows.{phase_name}.window_ids."
+                )
+            if not isinstance(input_ids, list) or not input_ids:
+                _fail(
+                    f"Baseline report missing evaluation_windows.{phase_name}.input_ids."
+                )
+            if len(input_ids) != len(window_ids):
+                _fail(
+                    "Baseline report has inconsistent evaluation window payloads "
+                    f"for {phase_name}: input_ids={len(input_ids)} window_ids={len(window_ids)}."
+                )
+        return resolved_report
     # Create temp baseline config (no-op edit)
     # Normalize possible "hf:" prefixes for HF adapters
     norm_src_id = _normalize_model_id(src_id, eff_adapter)
@@ -199,47 +562,104 @@ def certify_command(
             },
             "edit": {"name": "noop", "plan": {}},
             "eval": {},
-            "guards": {
-                "order": ["invariants", "spectral", "rmt", "variance", "invariants"]
-            },
+            "guards": {"order": guards_order},
             "output": {"dir": str(Path(out) / "source")},
             "context": {"profile": profile, "tier": tier},
         },
     )
+    baseline_label = "noop"
+    subject_label: str | None = None
+    if edit_label:
+        subject_label = edit_label
+    elif not edit_config:
+        subject_label = "custom" if norm_src_id != norm_edt_id else "noop"
     tmp_dir = Path(".certify_tmp")
     tmp_dir.mkdir(parents=True, exist_ok=True)
-    baseline_yaml = tmp_dir / "baseline_noop.yaml"
-    _dump_yaml(baseline_yaml, baseline_cfg)
-    console.print("🏁 Running baseline (no-op edit)")
-    from .run import run_command as _run
-    _run(
-        config=str(baseline_yaml),
-        profile=profile,
-        out=str(Path(out) / "source"),
-        tier=tier,
-        device=device,
-    )
-    baseline_report = _latest_run_report(Path(out) / "source")
-    if not baseline_report:
-        console.print("[red]❌ Could not locate baseline report after run")
-        raise typer.Exit(1)
+    baseline_report_path: Path
+    if baseline_report:
+        _info(
+            "Using provided baseline report (skipping baseline evaluation)",
+            tag="EXEC",
+            emoji="♻️",
+        )
+        baseline_report_path = _load_and_validate_baseline_report(
+            Path(baseline_report),
+            expected_profile=profile,
+            expected_tier=tier,
+            expected_adapter=str(eff_adapter),
+        )
+        _debug(f"Baseline report: {baseline_report_path}")
+    else:
+        baseline_yaml = tmp_dir / "baseline_noop.yaml"
+        _dump_yaml(baseline_yaml, baseline_cfg)
+        _phase(1, 3, "BASELINE EVALUATION")
+        _info("Running baseline (no-op edit)", tag="EXEC", emoji="🏁")
+        _debug(f"Baseline config: {baseline_yaml}")
+        from .run import run_command as _run
+        with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
+            try:
+                with timed_step(
+                    console=console,
+                    style=output_style,
+                    timings=timings,
+                    key="baseline",
+                    tag="EXEC",
+                    message="Baseline",
+                    emoji="🏁",
+                ):
+                    _run(
+                        config=str(baseline_yaml),
+                        profile=profile,
+                        out=str(Path(out) / "source"),
+                        tier=tier,
+                        device=device,
+                        edit_label=baseline_label,
+                        style=output_style.name,
+                        progress=progress,
+                        timing=False,
+                        no_color=no_color,
+                    )
+            except Exception:
+                if quiet_buffer is not None:
+                    console.print(quiet_buffer.getvalue(), markup=False)
+                raise
+        baseline_report_path_candidate = _latest_run_report(Path(out) / "source")
+        if not baseline_report_path_candidate:
+            _fail("Could not locate baseline report after run", exit_code=1)
+        baseline_report_path = baseline_report_path_candidate
+        _debug(f"Baseline report: {baseline_report_path}")
     # Edited run: either no-op (Compare & Certify) or provided edit_config (demo edit)
+    _phase(2, 3, "SUBJECT EVALUATION")
     if edit_config:
         edited_yaml = Path(edit_config)
         if not edited_yaml.exists():
-            console.print(f"[red]❌ Edit config not found: {edited_yaml}")
+            print_event(
+                console,
+                "FAIL",
+                f"Edit config not found: {edited_yaml}",
+                style=output_style,
+                emoji="❌",
+            )
             raise typer.Exit(1)
-        console.print("✂️  Running edited (demo edit via --edit-config)")
+        _info("Running edited (demo edit via --edit-config)", tag="EXEC", emoji="✂️")
         # Overlay subject model id/adapter and output/context onto the provided edit config
         try:
             cfg_loaded: dict[str, Any] = _load_yaml(edited_yaml)
         except Exception as exc:  # noqa: BLE001
-            console.print(f"[red]❌ Failed to load edit config: {exc}")
+            print_event(
+                console,
+                "FAIL",
+                f"Failed to load edit config: {exc}",
+                style=output_style,
+                emoji="❌",
+            )
             raise typer.Exit(1) from exc
         # Ensure model.id/adapter point to the requested subject
@@ -268,23 +688,58 @@ def certify_command(
                 "context": {"profile": profile, "tier": tier},
             },
         )
+        # Ensure the edited run always has a guard chain. Presets/edit configs
+        # often omit it, but `invarlock run` expects guards.order.
+        guards_block = merged_edited_cfg.get("guards")
+        guards_order_cfg = (
+            guards_block.get("order") if isinstance(guards_block, dict) else None
+        )
+        if not (
+            isinstance(guards_order_cfg, list)
+            and guards_order_cfg
+            and all(isinstance(item, str) for item in guards_order_cfg)
+        ):
+            merged_edited_cfg = _merge(
+                merged_edited_cfg, {"guards": {"order": guards_order}}
+            )
         # Persist a temporary merged config for traceability
         tmp_dir = Path(".certify_tmp")
         tmp_dir.mkdir(parents=True, exist_ok=True)
         edited_merged_yaml = tmp_dir / "edited_merged.yaml"
         _dump_yaml(edited_merged_yaml, merged_edited_cfg)
+        _debug(f"Edited config (merged): {edited_merged_yaml}")
         from .run import run_command as _run
-        _run(
-            config=str(edited_merged_yaml),
-            profile=profile,
-            out=str(Path(out) / "edited"),
-            tier=tier,
-            baseline=str(baseline_report),
-            device=device,
-        )
+        with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
+            try:
+                with timed_step(
+                    console=console,
+                    style=output_style,
+                    timings=timings,
+                    key="subject",
+                    tag="EXEC",
+                    message="Subject",
+                    emoji="✂️",
+                ):
+                    _run(
+                        config=str(edited_merged_yaml),
+                        profile=profile,
+                        out=str(Path(out) / "edited"),
+                        tier=tier,
+                        baseline=str(baseline_report_path),
+                        device=device,
+                        edit_label=subject_label if edit_label else None,
+                        style=output_style.name,
+                        progress=progress,
+                        timing=False,
+                        no_color=no_color,
+                    )
+            except Exception:
+                if quiet_buffer is not None:
+                    console.print(quiet_buffer.getvalue(), markup=False)
+                raise
     else:
         edited_cfg = _merge(
             preset_data,
@@ -292,50 +747,121 @@ def certify_command(
                 "model": {"id": norm_edt_id, "adapter": eff_adapter},
                 "edit": {"name": "noop", "plan": {}},
                 "eval": {},
-                "guards": {
-                    "order": [
-                        "invariants",
-                        "spectral",
-                        "rmt",
-                        "variance",
-                        "invariants",
-                    ]
-                },
+                "guards": {"order": guards_order},
                 "output": {"dir": str(Path(out) / "edited")},
                 "context": {"profile": profile, "tier": tier},
             },
         )
         edited_yaml = tmp_dir / "edited_noop.yaml"
         _dump_yaml(edited_yaml, edited_cfg)
-        console.print("🧪 Running edited (no-op, Compare & Certify)")
+        _info("Running edited (no-op, Compare & Certify)", tag="EXEC", emoji="🧪")
+        _debug(f"Edited config: {edited_yaml}")
         from .run import run_command as _run
-        _run(
-            config=str(edited_yaml),
-            profile=profile,
-            out=str(Path(out) / "edited"),
-            tier=tier,
-            baseline=str(baseline_report),
-            device=device,
-        )
+        with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
+            try:
+                with timed_step(
+                    console=console,
+                    style=output_style,
+                    timings=timings,
+                    key="subject",
+                    tag="EXEC",
+                    message="Subject",
+                    emoji="🧪",
+                ):
+                    _run(
+                        config=str(edited_yaml),
+                        profile=profile,
+                        out=str(Path(out) / "edited"),
+                        tier=tier,
+                        baseline=str(baseline_report_path),
+                        device=device,
+                        edit_label=subject_label,
+                        style=output_style.name,
+                        progress=progress,
+                        timing=False,
+                        no_color=no_color,
+                    )
+            except Exception:
+                if quiet_buffer is not None:
+                    console.print(quiet_buffer.getvalue(), markup=False)
+                raise
     edited_report = _latest_run_report(Path(out) / "edited")
     if not edited_report:
-        console.print("[red]❌ Could not locate edited report after run")
+        print_event(
+            console,
+            "FAIL",
+            "Could not locate edited report after run",
+            style=output_style,
+            emoji="❌",
+        )
         raise typer.Exit(1)
+    _debug(f"Edited report: {edited_report}")
+    _phase(3, 3, "CERTIFICATE GENERATION")
+    def _emit_certificate() -> None:
+        _info("Emitting certificate", tag="EXEC", emoji="📜")
+        with _suppress_child_output(verbosity == VERBOSITY_QUIET) as quiet_buffer:
+            try:
+                with timed_step(
+                    console=console,
+                    style=output_style,
+                    timings=timings,
+                    key="certificate",
+                    tag="EXEC",
+                    message="Certificate",
+                    emoji="📜",
+                ):
+                    report_kwargs = {
+                        "run": str(edited_report),
+                        "format": "cert",
+                        "baseline": str(baseline_report_path),
+                        "output": cert_out,
+                        "style": output_style.name,
+                        "no_color": no_color,
+                    }
+                    try:
+                        sig = inspect.signature(_report)
+                    except (TypeError, ValueError):
+                        _report(**report_kwargs)
+                    else:
+                        if any(
+                            param.kind == inspect.Parameter.VAR_KEYWORD
+                            for param in sig.parameters.values()
+                        ):
+                            _report(**report_kwargs)
+                        else:
+                            _report(
+                                **{
+                                    key: value
+                                    for key, value in report_kwargs.items()
+                                    if key in sig.parameters
+                                }
+                            )
+            except Exception:
+                if quiet_buffer is not None:
+                    console.print(quiet_buffer.getvalue(), markup=False)
+                raise
     # CI/Release hard‑abort: fail fast when primary metric is not computable.
-    # Fall back to legacy ppl_* keys when primary_metric block is absent.
     try:
         prof = str(profile or "").strip().lower()
     except Exception:
         prof = ""
-    if prof in {"ci", "release"}:
+    if prof in {"ci", "ci_cpu", "release"}:
         try:
             with Path(edited_report).open("r", encoding="utf-8") as fh:
                 edited_payload = json.load(fh)
         except Exception as exc:  # noqa: BLE001
-            console.print(f"[red]❌ Failed to read edited report: {exc}")
+            print_event(
+                console,
+                "FAIL",
+                f"Failed to read edited report: {exc}",
+                style=output_style,
+                emoji="❌",
+            )
             raise typer.Exit(1) from exc
         def _finite(x: Any) -> bool:
@@ -364,40 +890,74 @@ def certify_command(
             else None
         ) or "unknown"
-        # Enforce only when a metric block is present; skip for minimal stub reports
-        # Enforce only when a primary_metric block is present
+        # Enforce only when a primary_metric block is present; allow degraded-but-flagged metrics to emit certificates, but fail the task.
         has_metric_block = isinstance(pm, dict) and bool(pm)
         if has_metric_block:
-            # Treat non‑finite PM as hard error in CI/Release (after legacy fallback).
-            # Require a finite final value; preview is optional for legacy reports.
-            if not _finite(pm_final):
-                err = InvarlockError(
+            degraded = bool(pm.get("invalid") or pm.get("degraded"))
+            if degraded or not _finite(pm_final):
+                fallback = pm_prev if _finite(pm_prev) else pm_final
+                if not _finite(fallback) or fallback <= 0:
+                    fallback = 1.0
+                degraded_reason = pm.get("degraded_reason") or (
+                    "non_finite_pm"
+                    if (not _finite(pm_prev) or not _finite(pm_final))
+                    else "primary_metric_degraded"
+                )
+                print_event(
+                    console,
+                    "WARN",
+                    "Primary metric degraded or non-finite; emitting certificate and marking task degraded. Primary metric computation failed.",
+                    style=output_style,
+                    emoji="⚠️",
+                )
+                pm["degraded"] = True
+                pm["invalid"] = pm.get("invalid") or True
+                pm["preview"] = pm_prev if _finite(pm_prev) else fallback
+                pm["final"] = pm_final if _finite(pm_final) else fallback
+                pm["ratio_vs_baseline"] = pm_ratio if _finite(pm_ratio) else 1.0
+                pm["degraded_reason"] = degraded_reason
+                metrics["primary_metric"] = pm
+                edited_payload.setdefault("metrics", {}).update(metrics)
+                # Emit the certificate for inspection, then exit with a CI-visible error.
+                _emit_certificate()
+                err = MetricsError(
                     code="E111",
-                    message=(
-                        "Primary metric computation failed (NaN/inf). "
-                        f"Context: device={device}, adapter={adapter_name}, edit={edit_name}. "
-                        "Baseline ok; edited failed to compute ppl. "
-                        "Try: use an accelerator (mps/cuda), force float32, reduce max_modules, "
-                        "or lower the evaluation batch size."
-                    ),
+                    message=f"Primary metric degraded or non-finite ({degraded_reason}).",
                     details={
-                        "device": device,
+                        "reason": degraded_reason,
                         "adapter": adapter_name,
+                        "device": device,
                         "edit": edit_name,
-                        "pm_preview": pm_prev,
-                        "pm_final": pm_final,
-                        "pm_ratio": pm_ratio,
                     },
                 )
-                code = _resolve_exit_code(err, profile=prof)
-                console.print(f"[red]{err}[/red]")
-                # Do not emit a certificate
-                raise typer.Exit(code)
-    console.print("📜 Emitting certificate")
-    _report(
-        run=str(edited_report),
-        format="cert",
-        baseline=str(baseline_report),
-        output=cert_out,
-    )
+                raise typer.Exit(_resolve_exit_code(err, profile=profile))
+    _emit_certificate()
+    if timing:
+        if total_start is not None:
+            timings["total"] = max(0.0, float(perf_counter() - total_start))
+        else:
+            timings["total"] = (
+                float(timings.get("baseline", 0.0))
+                + float(timings.get("subject", 0.0))
+                + float(timings.get("certificate", 0.0))
+            )
+        print_timing_summary(
+            console,
+            timings,
+            style=output_style,
+            order=[
+                ("Baseline", "baseline"),
+                ("Subject", "subject"),
+                ("Certificate", "certificate"),
+                ("Total", "total"),
+            ],
+        )
+    if verbosity == VERBOSITY_QUIET:
+        _print_quiet_summary(
+            cert_out=Path(cert_out),
+            source=src_id,
+            edited=edt_id,
+            profile=profile,
+        )

invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

invarlock 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl