PyPI - invarlock - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

invarlock 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

invarlock/__init__.py +4 -4
invarlock/adapters/__init__.py +10 -14
invarlock/adapters/auto.py +37 -50
invarlock/adapters/capabilities.py +2 -2
invarlock/adapters/hf_causal.py +418 -0
invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
invarlock/adapters/hf_loading.py +7 -7
invarlock/adapters/hf_mixin.py +53 -9
invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
invarlock/assurance/__init__.py +15 -23
invarlock/cli/adapter_auto.py +32 -26
invarlock/cli/app.py +128 -27
invarlock/cli/commands/__init__.py +2 -2
invarlock/cli/commands/calibrate.py +48 -4
invarlock/cli/commands/doctor.py +8 -10
invarlock/cli/commands/evaluate.py +986 -0
invarlock/cli/commands/explain_gates.py +25 -17
invarlock/cli/commands/export_html.py +11 -9
invarlock/cli/commands/plugins.py +13 -9
invarlock/cli/commands/report.py +326 -92
invarlock/cli/commands/run.py +1160 -228
invarlock/cli/commands/verify.py +157 -97
invarlock/cli/config.py +1 -1
invarlock/cli/determinism.py +1 -1
invarlock/cli/doctor_helpers.py +4 -5
invarlock/cli/output.py +193 -0
invarlock/cli/provenance.py +4 -4
invarlock/core/bootstrap.py +1 -1
invarlock/core/registry.py +9 -11
invarlock/core/retry.py +14 -14
invarlock/core/runner.py +112 -26
invarlock/edits/noop.py +2 -2
invarlock/edits/quant_rtn.py +67 -39
invarlock/eval/__init__.py +1 -1
invarlock/eval/bench.py +14 -10
invarlock/eval/data.py +68 -23
invarlock/eval/metrics.py +59 -1
invarlock/eval/primary_metric.py +1 -1
invarlock/eval/tasks/__init__.py +12 -0
invarlock/eval/tasks/classification.py +48 -0
invarlock/eval/tasks/qa.py +36 -0
invarlock/eval/tasks/text_generation.py +102 -0
invarlock/guards/invariants.py +19 -10
invarlock/guards/rmt.py +2 -2
invarlock/guards/spectral.py +1 -1
invarlock/guards/variance.py +2 -2
invarlock/model_profile.py +64 -62
invarlock/observability/health.py +6 -6
invarlock/observability/metrics.py +108 -0
invarlock/plugins/hf_bnb_adapter.py +32 -21
invarlock/reporting/__init__.py +18 -4
invarlock/reporting/guards_analysis.py +154 -4
invarlock/reporting/html.py +61 -11
invarlock/reporting/normalizer.py +9 -2
invarlock/reporting/policy_utils.py +1 -1
invarlock/reporting/primary_metric_utils.py +11 -11
invarlock/reporting/render.py +876 -510
invarlock/reporting/report.py +72 -30
invarlock/reporting/{certificate.py → report_builder.py} +252 -99
invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
invarlock/reporting/report_types.py +6 -1
invarlock/reporting/telemetry.py +86 -0
invarlock-0.3.8.dist-info/METADATA +283 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
invarlock/adapters/hf_gpt2.py +0 -404
invarlock/adapters/hf_llama.py +0 -487
invarlock/cli/commands/certify.py +0 -422
invarlock-0.3.6.dist-info/METADATA +0 -588
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0

invarlock/cli/app.py CHANGED Viewed

@@ -17,7 +17,11 @@ import typer
 from rich.console import Console
 from typer.core import TyperGroup
-from invarlock.security import enforce_default_security
+from invarlock.security import (
+    enforce_default_security,
+    enforce_network_policy,
+    network_policy_allows,
+)
 # Lightweight import mode disables heavy side effects in some modules, but we no
 # longer force plugin discovery off globally here; individual commands may gate
@@ -33,7 +37,7 @@ LIGHT_IMPORT = os.getenv("INVARLOCK_LIGHT_IMPORT", "").strip().lower() in {
 class OrderedGroup(TyperGroup):
     def list_commands(self, ctx):  # type: ignore[override]
         return [
-            "certify",
+            "evaluate",
             "calibrate",
             "report",
             "verify",
@@ -48,8 +52,8 @@ class OrderedGroup(TyperGroup):
 app = typer.Typer(
     name="invarlock",
     help=(
-        "InvarLock — certify model changes with deterministic pairing and safety gates.\n"
-        "Quick path: invarlock certify --baseline <MODEL> --subject <MODEL>\n"
+        "InvarLock — evaluate model changes with deterministic pairing and safety gates.\n"
+        "Quick path: invarlock evaluate --baseline <MODEL> --subject <MODEL>\n"
         "Hint: use --edit-config to run the built-in quant_rtn demo.\n"
         "Tip: enable downloads with INVARLOCK_ALLOW_NETWORK=1 when fetching.\n"
         "Exit codes:\n"
@@ -65,17 +69,16 @@ app = typer.Typer(
 console = Console()
-@app.command()
-def version():
-    """Show InvarLock version."""
+def _emit_version() -> None:
+    """Emit the InvarLock version string."""
     # Prefer package metadata when available so CLI reflects wheel truth
     try:
         from importlib.metadata import version as _pkg_version
         schema = None
         try:
-            from invarlock.reporting.certificate import (
-                CERTIFICATE_SCHEMA_VERSION as _SCHEMA,
+            from invarlock.reporting.report_builder import (
+                REPORT_SCHEMA_VERSION as _SCHEMA,
             )
             schema = _SCHEMA
@@ -96,26 +99,59 @@ def version():
         console.print("InvarLock version unknown")
+@app.callback(invoke_without_command=True)
+def _root(
+    ctx: typer.Context,
+    show_version: bool = typer.Option(
+        False,
+        "--version",
+        "-V",
+        help="Show version and exit.",
+        is_eager=True,
+    ),
+) -> None:
+    was_allowed = network_policy_allows()
+    enforce_default_security()
+    ctx.call_on_close(lambda: enforce_network_policy(was_allowed))
+    if show_version:
+        _emit_version()
+        raise typer.Exit()
+@app.command()
+def version():
+    """Show InvarLock version."""
+    _emit_version()
 """Register command modules and groups in the desired help order.
-Order: certify → report → run → plugins → doctor → version
+Order: evaluate → report → run → plugins → doctor → version
 """
 @app.command(
-    name="certify",
+    name="evaluate",
     help=(
-        "Certify a subject model against a baseline and generate a safety certificate. "
+        "Evaluate a subject model against a baseline and generate an evaluation report. "
         "Use when you have two model snapshots and want pass/fail gating."
     ),
 )
-def _certify_lazy(
+def _evaluate_lazy(
     source: str = typer.Option(
         ..., "--source", "--baseline", help="Baseline model dir or Hub ID"
     ),
     edited: str = typer.Option(
         ..., "--edited", "--subject", help="Subject model dir or Hub ID"
     ),
+    baseline_report: str | None = typer.Option(
+        None,
+        "--baseline-report",
+        help=(
+            "Reuse an existing baseline run report.json (skips baseline evaluation). "
+            "Must include stored evaluation windows (e.g., set INVARLOCK_STORE_EVAL_WINDOWS=1)."
+        ),
+    ),
     adapter: str = typer.Option(
         "auto", "--adapter", help="Adapter name or 'auto' to resolve"
     ),
@@ -133,26 +169,60 @@ def _certify_lazy(
         ),
     ),
     out: str = typer.Option("runs", "--out", help="Base output directory"),
-    cert_out: str = typer.Option(
-        "reports/cert", "--cert-out", help="Certificate output directory"
+    report_out: str = typer.Option(
+        "reports/eval", "--report-out", help="Evaluation report output directory"
     ),
     edit_config: str | None = typer.Option(
         None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
     ),
+    edit_label: str | None = typer.Option(
+        None,
+        "--edit-label",
+        help=(
+            "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
+            "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
+        ),
+    ),
+    quiet: bool = typer.Option(
+        False, "--quiet", "-q", help="Minimal output (suppress run/report detail)"
+    ),
+    verbose: bool = typer.Option(
+        False, "--verbose", "-v", help="Verbose output (include debug details)"
+    ),
+    banner: bool = typer.Option(
+        True, "--banner/--no-banner", help="Show header banner"
+    ),
+    style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
+    timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
+    progress: bool = typer.Option(
+        True, "--progress/--no-progress", help="Show progress done messages"
+    ),
+    no_color: bool = typer.Option(
+        False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
+    ),
 ):
-    from .commands.certify import certify_command as _cert
+    from .commands.evaluate import evaluate_command as _eval
-    return _cert(
+    return _eval(
         source=source,
         edited=edited,
+        baseline_report=baseline_report,
         adapter=adapter,
         device=device,
         profile=profile,
         tier=tier,
         preset=preset,
         out=out,
-        cert_out=cert_out,
+        report_out=report_out,
         edit_config=edit_config,
+        edit_label=edit_label,
+        quiet=quiet,
+        verbose=verbose,
+        banner=banner,
+        style=style,
+        timing=timing,
+        progress=progress,
+        no_color=no_color,
     )
@@ -184,18 +254,18 @@ def _register_subapps() -> None:
 @app.command(
     name="verify",
     help=(
-        "Verify certificate JSON(s) against schema, pairing math, and gates. "
+        "Verify evaluation report JSON(s) against schema, pairing math, and gates. "
         "Use --json for a single-line machine-readable envelope."
     ),
 )
 def _verify_typed(
-    certificates: list[str] = typer.Argument(
-        ..., help="One or more certificate JSON files to verify."
+    reports: list[str] = typer.Argument(
+        ..., help="One or more evaluation report JSON files to verify."
     ),
     baseline: str | None = typer.Option(
         None,
         "--baseline",
-        help="Optional baseline certificate/report JSON to enforce provider parity.",
+        help="Optional baseline evaluation report JSON to enforce provider parity.",
     ),
     tolerance: float = typer.Option(
         1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
@@ -215,10 +285,10 @@ def _verify_typed(
     from .commands.verify import verify_command as _verify
-    cert_paths = [_Path(c) for c in certificates]
+    report_paths = [_Path(p) for p in reports]
     baseline_path = _Path(baseline) if isinstance(baseline, str) else None
     return _verify(
-        certificates=cert_paths,
+        reports=report_paths,
         baseline=baseline_path,
         tolerance=tolerance,
         profile=profile,
@@ -230,7 +300,7 @@ def _verify_typed(
     name="run",
     help=(
         "Execute an end-to-end run from a YAML config (edit + guards + reports). "
-        "Writes run artifacts and optionally a safety certificate."
+        "Writes run artifacts and optionally an evaluation report."
     ),
 )
 def _run_typed(
@@ -245,16 +315,31 @@ def _run_typed(
     ),
     out: str | None = typer.Option(None, "--out", help="Output directory override"),
     edit: str | None = typer.Option(None, "--edit", help="Edit kind (quant|mixed)"),
+    edit_label: str | None = typer.Option(
+        None,
+        "--edit-label",
+        help=(
+            "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
+            "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
+        ),
+    ),
     tier: str | None = typer.Option(
         None,
         "--tier",
         help="Auto-tuning tier override (conservative|balanced|aggressive)",
     ),
+    metric_kind: str | None = typer.Option(
+        None,
+        "--metric-kind",
+        help="Primary metric kind override (ppl_causal|ppl_mlm|accuracy|etc.)",
+    ),
     probes: int | None = typer.Option(
         None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
     ),
     until_pass: bool = typer.Option(
-        False, "--until-pass", help="Retry until certificate passes (max 3 attempts)"
+        False,
+        "--until-pass",
+        help="Retry until evaluation report passes gates (max 3 attempts)",
     ),
     max_attempts: int = typer.Option(
         3, "--max-attempts", help="Maximum retry attempts for --until-pass mode"
@@ -265,11 +350,21 @@ def _run_typed(
     baseline: str | None = typer.Option(
         None,
         "--baseline",
-        help="Path to baseline report.json for certificate validation",
+        help="Path to baseline report.json for evaluation report validation",
     ),
     no_cleanup: bool = typer.Option(
         False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
     ),
+    style: str | None = typer.Option(
+        None, "--style", help="Output style (audit|friendly)"
+    ),
+    progress: bool = typer.Option(
+        False, "--progress", help="Show progress done messages"
+    ),
+    timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
+    no_color: bool = typer.Option(
+        False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
+    ),
 ):
     from .commands.run import run_command as _run
@@ -279,13 +374,19 @@ def _run_typed(
         profile=profile,
         out=out,
         edit=edit,
+        edit_label=edit_label,
         tier=tier,
+        metric_kind=metric_kind,
         probes=probes,
         until_pass=until_pass,
         max_attempts=max_attempts,
         timeout=timeout,
         baseline=baseline,
         no_cleanup=no_cleanup,
+        style=style,
+        progress=progress,
+        timing=timing,
+        no_color=no_color,
     )

invarlock/cli/commands/__init__.py CHANGED Viewed

@@ -5,8 +5,8 @@ Import-time work is minimal; subcommands themselves may perform heavier imports
 only when invoked.
 """
-from .certify import certify_command
 from .doctor import doctor_command
+from .evaluate import evaluate_command
 from .explain_gates import explain_gates_command
 from .export_html import export_html_command
 from .plugins import plugins_command
@@ -15,7 +15,7 @@ from .run import run_command
 from .verify import verify_command
 __all__ = [
-    "certify_command",
+    "evaluate_command",
     "doctor_command",
     "explain_gates_command",
     "export_html_command",

invarlock/cli/commands/calibrate.py CHANGED Viewed

@@ -20,10 +20,6 @@ import typer
 import yaml
 from rich.console import Console
-from invarlock.calibration.spectral_null import summarize_null_sweep_reports
-from invarlock.calibration.variance_ve import summarize_ve_sweep_reports
-from invarlock.guards.tier_config import get_tier_guard_config
 console = Console()
 calibrate_app = typer.Typer(
@@ -114,6 +110,26 @@ def _write_tiers_recommendation(
     )
+def get_tier_guard_config(tier: str, guard_key: str) -> dict[str, Any]:
+    """Lazy wrapper for tier config lookup.
+    This is intentionally a module-level symbol so tests can patch it without
+    importing torch/transformers at import time.
+    """
+    try:
+        from invarlock.guards.tier_config import get_tier_guard_config as _get_cfg
+    except ModuleNotFoundError as exc:
+        missing = getattr(exc, "name", "") or ""
+        if missing in {"torch", "transformers"}:
+            console.print(
+                "[red]Missing optional dependencies for calibration.[/red] "
+                "Install `invarlock[hf]` (or at least torch/transformers) to run sweeps."
+            )
+            raise typer.Exit(1) from exc
+        raise
+    return _get_cfg(tier, guard_key)
 @calibrate_app.command(
     name="null-sweep",
     help="Run a null (no-op edit) sweep and calibrate spectral κ/alpha empirically.",
@@ -160,6 +176,21 @@ def null_sweep(
     # Keep import light: only pull run machinery when invoked.
     from .run import run_command
+    # Optional deps: calibration sweeps require torch/guards, but docs/tests may
+    # import this module without heavy deps. Import lazily so CLI example
+    # validation can parse `invarlock calibrate ...` without installing torch.
+    try:
+        from invarlock.calibration.spectral_null import summarize_null_sweep_reports
+    except ModuleNotFoundError as exc:
+        missing = getattr(exc, "name", "") or ""
+        if missing in {"torch", "transformers"}:
+            console.print(
+                "[red]Missing optional dependencies for calibration.[/red] "
+                "Install `invarlock[hf]` (or at least torch/transformers) to run sweeps."
+            )
+            raise typer.Exit(1) from exc
+        raise
     base = _load_yaml(config)
     specs = _materialize_sweep_specs(
         tiers=tiers, seeds=seed, n_seeds=n_seeds, seed_start=seed_start
@@ -378,6 +409,19 @@ def ve_sweep(
     # Keep import light: only pull run machinery when invoked.
     from .run import run_command
+    # Optional deps: see null_sweep() note.
+    try:
+        from invarlock.calibration.variance_ve import summarize_ve_sweep_reports
+    except ModuleNotFoundError as exc:
+        missing = getattr(exc, "name", "") or ""
+        if missing in {"torch", "transformers"}:
+            console.print(
+                "[red]Missing optional dependencies for calibration.[/red] "
+                "Install `invarlock[hf]` (or at least torch/transformers) to run sweeps."
+            )
+            raise typer.Exit(1) from exc
+        raise
     base = _load_yaml(config)
     windows = [int(w) for w in (window or [])] or [6, 8, 12, 16]
     specs = _materialize_sweep_specs(

invarlock/cli/commands/doctor.py CHANGED Viewed

@@ -1042,8 +1042,7 @@ def doctor_command(
                 module = str(info.get("module") or "")
                 support = (
                     "auto"
-                    if module.startswith("invarlock.adapters")
-                    and n in {"hf_causal_auto", "hf_mlm_auto"}
+                    if module.startswith("invarlock.adapters") and n in {"hf_auto"}
                     else (
                         "core"
                         if module.startswith("invarlock.adapters")
@@ -1058,11 +1057,10 @@ def doctor_command(
                 # Heuristic backend mapping without heavy imports
                 if n in {
-                    "hf_gpt2",
-                    "hf_bert",
-                    "hf_llama",
-                    "hf_causal_auto",
-                    "hf_mlm_auto",
+                    "hf_causal",
+                    "hf_mlm",
+                    "hf_seq2seq",
+                    "hf_auto",
                 }:
                     # Transformers-based
                     backend = "transformers"
@@ -1097,8 +1095,8 @@ def doctor_command(
                         }.get(n)
                         if hint:
                             enable = f"pip install '{hint}'"
-                # Special-case: hf_onnx is a core adapter but requires Optimum/ONNXRuntime
-                if n == "hf_onnx":
+                # Special-case: ONNX causal adapter is core but requires Optimum/ONNXRuntime
+                if n == "hf_causal_onnx":
                     backend = backend or "onnxruntime"
                     present = (
                         importlib.util.find_spec("optimum.onnxruntime") is not None
@@ -1322,7 +1320,7 @@ def doctor_command(
         if "optimum" in str(e).lower():
             if not json_out:
                 console.print(
-                    "  [yellow]⚠️  Optional Optimum/ONNXRuntime missing; hf_onnx will be shown as needs_extra[/yellow]"
+                    "  [yellow]⚠️  Optional Optimum/ONNXRuntime missing; hf_causal_onnx will be shown as needs_extra[/yellow]"
                 )
             # Do not mark overall health as failed for optional extras
         else:

invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

invarlock 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl