PyPI - invarlock - Versions diffs - 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

invarlock 0.3.7py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

invarlock/__init__.py +3 -3
invarlock/adapters/auto.py +2 -10
invarlock/adapters/hf_loading.py +7 -7
invarlock/adapters/hf_mixin.py +28 -5
invarlock/assurance/__init__.py +15 -23
invarlock/cli/adapter_auto.py +1 -5
invarlock/cli/app.py +57 -27
invarlock/cli/commands/__init__.py +2 -2
invarlock/cli/commands/calibrate.py +48 -4
invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
invarlock/cli/commands/explain_gates.py +25 -17
invarlock/cli/commands/export_html.py +11 -9
invarlock/cli/commands/report.py +116 -46
invarlock/cli/commands/run.py +274 -66
invarlock/cli/commands/verify.py +84 -89
invarlock/cli/determinism.py +1 -1
invarlock/cli/provenance.py +3 -3
invarlock/core/bootstrap.py +1 -1
invarlock/core/retry.py +14 -14
invarlock/core/runner.py +1 -1
invarlock/edits/noop.py +2 -2
invarlock/edits/quant_rtn.py +2 -2
invarlock/eval/__init__.py +1 -1
invarlock/eval/bench.py +11 -7
invarlock/eval/primary_metric.py +1 -1
invarlock/guards/spectral.py +1 -1
invarlock/model_profile.py +16 -35
invarlock/plugins/hf_bnb_adapter.py +32 -21
invarlock/reporting/__init__.py +18 -4
invarlock/reporting/html.py +7 -7
invarlock/reporting/normalizer.py +2 -2
invarlock/reporting/policy_utils.py +1 -1
invarlock/reporting/primary_metric_utils.py +11 -11
invarlock/reporting/render.py +126 -120
invarlock/reporting/report.py +43 -37
invarlock/reporting/{certificate.py → report_builder.py} +98 -95
invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
invarlock-0.3.8.dist-info/METADATA +283 -0
{invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/RECORD +43 -43
{invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
invarlock-0.3.7.dist-info/METADATA +0 -602
{invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +0 -0
{invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0

invarlock/core/runner.py CHANGED Viewed

@@ -521,7 +521,7 @@ class CoreRunner:
                         f"Guard '{guard.name}' prepare failed: {e}"
                     ) from e
-        # Store resolved policies in report for certificate
+        # Store resolved policies in report for evaluation report generation
         report.meta["tier_policies"] = tier_policies
         self._log_event(

invarlock/edits/noop.py CHANGED Viewed

@@ -1,7 +1,7 @@
-"""Built-in no-op edit used for baseline and Compare & Certify (BYOE).
+"""Built-in no-op edit used for baseline and Compare & Evaluate (BYOE).
 This edit does not modify the model and reports zero deltas. It exists to
-support baseline runs and Compare & Certify certification where the subject
+support baseline runs and Compare & Evaluate workflows where the subject
 checkpoint is produced outside of InvarLock.
 """

invarlock/edits/quant_rtn.py CHANGED Viewed

@@ -406,7 +406,7 @@ class RTNQuantEdit(ModelEdit):
                         if layer_name not in modified_layers:
                             modified_layers.append(layer_name)
-            # Store edit plan for certificate generation
+            # Store edit plan for evaluation report generation
             modules_quantized = [r["module_name"] for r in quantization_results]
             edit_plan = {
@@ -424,7 +424,7 @@ class RTNQuantEdit(ModelEdit):
             return {
                 "name": self.name,
                 "plan_digest": f"rtn_quantization_{bitwidth}bit_{scope}",
-                "plan": edit_plan,  # Include the plan for certificate generation
+                "plan": edit_plan,  # Include the plan for evaluation report generation
                 "deltas": {
                     "params_changed": total_params_quantized,
                     "sparsity": None,  # Quantization doesn't create sparsity

invarlock/eval/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Evaluation utilities (`invarlock.eval`).
 This package now focuses on evaluation helpers (metrics, data, probes).
-Reporting (report building, certificate, schema validation) has moved to
+Reporting (evaluation report building and schema validation) has moved to
 `invarlock.reporting`.
 """

invarlock/eval/bench.py CHANGED Viewed

@@ -976,17 +976,21 @@ def execute_scenario(
     except Exception:
         pass
-    # Generate certificate artifact when both runs produced reports
+    # Generate evaluation report artifact when both runs produced reports
     try:
         if bare_result.success and guarded_result.success:
-            from invarlock.reporting.certificate import make_certificate
+            from invarlock.reporting.report_builder import make_report
-            cert = make_certificate(guarded_result.report, bare_result.report)
-            cert_path = scenario_dir / "certificate.json"
-            cert_path.write_text(json.dumps(cert, indent=2), encoding="utf-8")
-            artifacts["certificate"] = str(cert_path)
+            evaluation_report = make_report(guarded_result.report, bare_result.report)
+            report_path = scenario_dir / "evaluation.report.json"
+            report_path.write_text(
+                json.dumps(evaluation_report, indent=2), encoding="utf-8"
+            )
+            artifacts["evaluation_report"] = str(report_path)
     except Exception as exc:
-        logger.warning(f"Certificate generation failed for {scenario_slug}: {exc}")
+        logger.warning(
+            f"Evaluation report generation failed for {scenario_slug}: {exc}"
+        )
     # Resolve epsilon from runtime or use config
     epsilon_used = config.epsilon

invarlock/eval/primary_metric.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Primary metric abstraction and minimal ppl_causal implementation (Phase 1).
 This module introduces a light-weight, task-agnostic metric interface and a
-registry so the runner/certificate can evolve beyond causal-LM perplexity.
+registry so the runner and evaluation report builder can evolve beyond causal-LM perplexity.
 Phase 1 goal: provide a ppl_causal metric and a helper that can compute point
 estimates directly from evaluation window aggregates already present in run

invarlock/guards/spectral.py CHANGED Viewed

@@ -344,7 +344,7 @@ class SpectralGuard(Guard):
                 self.ignore_preview_inflation = bool(policy["ignore_preview_inflation"])
                 self.config["ignore_preview_inflation"] = self.ignore_preview_inflation
-            # Optional hydration of baseline stats from policy (e.g., baseline certificate)
+            # Optional hydration of baseline stats from policy (e.g., baseline evaluation report)
             if "baseline_family_stats" in policy and isinstance(
                 policy["baseline_family_stats"], dict
             ):

invarlock/model_profile.py CHANGED Viewed

@@ -6,11 +6,9 @@ from dataclasses import dataclass, field
 from typing import Any
 AutoTokenizer: Any | None = None
-GPT2Tokenizer: Any | None = None
 try:
     from transformers import AutoTokenizer as _AutoTokenizer
-    from transformers import GPT2Tokenizer as _GPT2Tokenizer
     from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 except Exception:  # pragma: no cover - exercised only when transformers is absent
@@ -26,7 +24,6 @@ except Exception:  # pragma: no cover - exercised only when transformers is abse
 else:  # pragma: no cover - transformers optional
     AutoTokenizer = _AutoTokenizer
-    GPT2Tokenizer = _GPT2Tokenizer
 TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
@@ -177,12 +174,12 @@ def _make_bert_tokenizer(model_id: str):
 def _make_gpt2_tokenizer(model_id: str):
     def factory() -> tuple[PreTrainedTokenizerBase, str]:
-        if GPT2Tokenizer is None:
+        if AutoTokenizer is None:
             raise RuntimeError(
                 "GPT-2 tokenizers require the 'transformers' extra. "
                 "Install it with: pip install 'invarlock[adapters]'."
             )
-        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+        tokenizer = AutoTokenizer.from_pretrained("gpt2")
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
         hash_value = _hash_tokenizer(tokenizer)
@@ -193,7 +190,7 @@ def _make_gpt2_tokenizer(model_id: str):
 def _make_causal_auto_tokenizer(model_id: str):
     def factory() -> tuple[PreTrainedTokenizerBase, str]:
-        if AutoTokenizer is None and GPT2Tokenizer is None:
+        if AutoTokenizer is None:
             raise RuntimeError(
                 "Causal tokenizers require the 'transformers' extra. "
                 "Install it with: pip install 'invarlock[adapters]'."
@@ -202,23 +199,15 @@ def _make_causal_auto_tokenizer(model_id: str):
         # local GPT-2 tokenizer if the model assets are not cached or network
         # access is denied.
         tokenizer = None
-        if AutoTokenizer is not None:
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
+        except Exception:
             try:
-                tokenizer = AutoTokenizer.from_pretrained(
-                    model_id, local_files_only=True
-                )
+                tokenizer = AutoTokenizer.from_pretrained(model_id)
             except Exception:
-                try:
-                    tokenizer = AutoTokenizer.from_pretrained(model_id)
-                except Exception:
-                    tokenizer = None
+                tokenizer = None
         if tokenizer is None:
-            if GPT2Tokenizer is None:
-                raise RuntimeError(
-                    "Tokenization requires the 'transformers' extra. "
-                    "Install it with: pip install 'invarlock[adapters]'."
-                )
-            tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+            tokenizer = AutoTokenizer.from_pretrained("gpt2")
         # Ensure padding/bos tokens are configured so downstream encoding
         # yields stable non-zero ids and a valid attention mask regardless of
         # environment defaults or tokenizer variants.
@@ -246,30 +235,22 @@ def _make_causal_auto_tokenizer(model_id: str):
 def _make_unknown_tokenizer(model_id: str):
     def factory() -> tuple[PreTrainedTokenizerBase, str]:
-        if AutoTokenizer is None and GPT2Tokenizer is None:
+        if AutoTokenizer is None:
             raise RuntimeError(
                 "Text tokenization requires the 'transformers' extra. "
                 "Install it with: pip install 'invarlock[adapters]'."
             )
         # Unknown families: try local-only first, then remote, then degrade to GPT-2
         tokenizer = None
-        if AutoTokenizer is not None:
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
+        except Exception:
             try:
-                tokenizer = AutoTokenizer.from_pretrained(
-                    model_id, local_files_only=True
-                )
+                tokenizer = AutoTokenizer.from_pretrained(model_id)
             except Exception:
-                try:
-                    tokenizer = AutoTokenizer.from_pretrained(model_id)
-                except Exception:
-                    tokenizer = None
+                tokenizer = None
         if tokenizer is None:
-            if GPT2Tokenizer is None:
-                raise RuntimeError(
-                    "Text tokenization requires the 'transformers' extra. "
-                    "Install it with: pip install 'invarlock[adapters]'."
-                )
-            tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+            tokenizer = AutoTokenizer.from_pretrained("gpt2")
         if getattr(tokenizer, "pad_token", None) is None:
             eos_token = getattr(tokenizer, "eos_token", None)
             if eos_token is not None:

invarlock/plugins/hf_bnb_adapter.py CHANGED Viewed

@@ -7,7 +7,7 @@ through Transformers. Requires GPU for practical use.
 Install with the `gpu` extra on supported platforms.
 This adapter handles both:
-1. Fresh quantization of FP16 models (load_in_8bit/load_in_4bit)
+1. Fresh quantization of FP16 models (via quantization_config)
 2. Loading pre-quantized BNB checkpoints (auto-detected via quantization_config)
 """
@@ -55,16 +55,17 @@ def _detect_pre_quantized_bnb(model_id: str) -> tuple[bool, int]:
         if not quant_cfg:
             return False, 0
-        # Check for BNB quantization
-        quant_method = quant_cfg.get("quant_method", "").lower()
-        if quant_method == "bitsandbytes" or "load_in_8bit" in quant_cfg:
+        # Check for BNB quantization. Prefer explicit bits, then legacy flags.
+        quant_method = str(quant_cfg.get("quant_method", "")).lower()
+        if "bitsandbytes" in quant_method or "bnb" in quant_method:
+            bits = quant_cfg.get("bits")
+            if isinstance(bits, int) and bits in {4, 8}:
+                return True, bits
             if quant_cfg.get("load_in_8bit"):
                 return True, 8
             if quant_cfg.get("load_in_4bit"):
                 return True, 4
-            # Fallback to bits field
-            bits = quant_cfg.get("bits", 8)
-            return True, bits
+            return True, 8
     except Exception:
         pass
@@ -82,11 +83,17 @@ class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
             "DEPENDENCY-MISSING: transformers",
             lambda e: {"dependency": "transformers"},
         ):
-            from transformers import AutoModelForCausalLM
+            from transformers import AutoModelForCausalLM, BitsAndBytesConfig
         # Check if this is a pre-quantized checkpoint
         is_pre_quantized, pre_quant_bits = _detect_pre_quantized_bnb(model_id)
+        if "load_in_8bit" in kwargs or "load_in_4bit" in kwargs:
+            raise ValueError(
+                "hf_bnb adapter: load_in_8bit/load_in_4bit are not supported. "
+                "Use model.quantization_config instead."
+            )
         if is_pre_quantized:
             # Load pre-quantized checkpoint WITHOUT re-applying quantization
             with wrap_errors(
@@ -99,20 +106,25 @@ class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
                     model_id,
                     device_map="auto",
                     trust_remote_code=True,
-                    # Do NOT pass load_in_8bit/load_in_4bit for pre-quantized
-                    **{
-                        k: v
-                        for k, v in kwargs.items()
-                        if k not in ("load_in_8bit", "load_in_4bit")
-                    },
+                    **kwargs,
                 )
         else:
             # Fresh quantization of FP16 model
-            load_in_8bit = bool(kwargs.pop("load_in_8bit", True))
-            load_in_4bit = bool(kwargs.pop("load_in_4bit", False))
-            if load_in_4bit:
-                load_in_8bit = False
+            quantization_config = kwargs.pop("quantization_config", None)
+            if quantization_config is None:
+                quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+            elif isinstance(quantization_config, dict):
+                qdict = dict(quantization_config)
+                bits = qdict.pop("bits", None)
+                qdict.pop("quant_method", None)
+                if isinstance(bits, int):
+                    if bits == 4:
+                        qdict.setdefault("load_in_4bit", True)
+                        qdict.setdefault("load_in_8bit", False)
+                    elif bits == 8:
+                        qdict.setdefault("load_in_8bit", True)
+                        qdict.setdefault("load_in_4bit", False)
+                quantization_config = BitsAndBytesConfig(**qdict)
             with wrap_errors(
                 ModelLoadError,
@@ -123,9 +135,8 @@ class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
                 model = AutoModelForCausalLM.from_pretrained(
                     model_id,
                     device_map="auto",
-                    load_in_8bit=load_in_8bit,
-                    load_in_4bit=load_in_4bit,
                     trust_remote_code=True,
+                    quantization_config=quantization_config,
                     **kwargs,
                 )

invarlock/reporting/__init__.py CHANGED Viewed

@@ -1,7 +1,21 @@
 """
-Reporting package (aliasing evaluation reporting modules).
+Evaluation report tooling (`invarlock.reporting`).
-Provides a stable unified import path for report schema, builders,
-and certificate generation while keeping backward compatibility with
-`invarlock.eval.*` imports.
+Provides the evaluation report schema, builder, and renderers.
 """
+from __future__ import annotations
+from .html import render_report_html
+from .render import render_report_markdown
+from .report_builder import make_report, validate_report
+from .report_schema import REPORT_JSON_SCHEMA, REPORT_SCHEMA_VERSION
+__all__ = [
+    "REPORT_SCHEMA_VERSION",
+    "REPORT_JSON_SCHEMA",
+    "make_report",
+    "render_report_markdown",
+    "render_report_html",
+    "validate_report",
+]

invarlock/reporting/html.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Minimal HTML exporter for certificates.
+Minimal HTML exporter for reports.
 This implementation wraps the Markdown rendering in a simple HTML template so
 that the numbers and core content remain identical across formats.
@@ -10,7 +10,7 @@ from __future__ import annotations
 from html import escape
 from typing import Any
-from .render import render_certificate_markdown
+from .render import render_report_markdown
 markdown_module: Any | None = None
 try:
@@ -37,13 +37,13 @@ def _apply_status_badges(html_body: str) -> str:
     return updated
-def render_certificate_html(certificate: dict[str, Any]) -> str:
-    """Render a certificate as a simple HTML document.
+def render_report_html(evaluation_report: dict[str, Any]) -> str:
+    """Render an evaluation report as a simple HTML document.
     Uses the Markdown renderer and converts to HTML when available, falling back
     to a <pre> block when the markdown dependency is missing.
     """
-    md = render_certificate_markdown(certificate)
+    md = render_report_markdown(evaluation_report)
     if markdown_module is None:
         body = f'<pre class="invarlock-md">{escape(md)}</pre>'
     else:
@@ -52,7 +52,7 @@ def render_certificate_html(certificate: dict[str, Any]) -> str:
         body = f'<div class="invarlock-md">{html_body}</div>'
     return (
         '<!DOCTYPE html><html><head><meta charset="utf-8">'
-        "<title>InvarLock Evaluation Certificate</title>"
+        "<title>InvarLock Evaluation Report</title>"
         "<style>"
         ":root{--pass:#2da44e;--fail:#cf222e;--warn:#bf8700;--ink:#1f2328;"
         "--muted:#57606a;--panel:#f6f8fa;--border:#d0d7de}"
@@ -79,4 +79,4 @@ def render_certificate_html(certificate: dict[str, Any]) -> str:
     )
-__all__ = ["render_certificate_html"]
+__all__ = ["render_report_html"]

invarlock/reporting/normalizer.py CHANGED Viewed

@@ -33,7 +33,7 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
     """Coerce an arbitrary report-like mapping into a canonical RunReport.
     This is the single entry point for converting pre-canonical or loosely-typed
-    data into the strict PM-only RunReport shape used by certificate/report.
+    data into the strict PM-only RunReport shape used by evaluation report building.
     """
     src = _as_mapping(report)
@@ -53,7 +53,7 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
         "ts": ts,
         "auto": meta_in.get("auto") if isinstance(meta_in.get("auto"), dict) else None,
     }
-    # Preserve additional provenance knobs used by certificate/digests.
+    # Preserve additional provenance knobs used by evaluation report digests.
     for key in (
         "pm_acceptance_range",
         "pm_drift_band",

invarlock/reporting/policy_utils.py CHANGED Viewed

@@ -35,7 +35,7 @@ def _compute_thresholds_payload(
     tier: str, resolved_policy: dict[str, Any]
 ) -> dict[str, Any]:
     """Build canonical thresholds payload for digest stability."""
-    from .certificate import TIER_RATIO_LIMITS  # local to avoid cycles
+    from .report_builder import TIER_RATIO_LIMITS  # local to avoid cycles
     tier_lc = (tier or "balanced").lower()
     metrics_policy = (

invarlock/reporting/primary_metric_utils.py CHANGED Viewed

@@ -8,21 +8,21 @@ from .utils import _coerce_interval, _weighted_mean
 def attach_primary_metric(
-    certificate: dict[str, Any],
+    evaluation_report: dict[str, Any],
     report: dict[str, Any],
     baseline_raw: dict[str, Any] | None,
     baseline_ref: dict[str, Any] | None,
     ppl_analysis: dict[str, Any] | None,
 ) -> None:
-    """Attach/normalize the primary_metric block on the certificate.
+    """Attach/normalize the primary_metric block on the evaluation report.
-    Behavior mirrors historical logic in certificate.py and preserves structure:
+    Behavior mirrors historical logic in report_builder.py and preserves structure:
     - Prefer explicit metrics.primary_metric if present
     - Compute missing ratio_vs_baseline, degenerate display_ci
     - ppl window-based analysis info (mean logloss) added when available
     - Fallbacks for classification metrics and eval-window-derived ppl
     - Ensure display_ci always present for schema invariants
-    Mutates the certificate in-place.
+    Mutates the evaluation report in-place.
     """
     # Attach primary metric snapshot when provided in report
     try:
@@ -180,12 +180,12 @@ def attach_primary_metric(
                     ]
             except Exception:
                 pass
-            certificate["primary_metric"] = pm_copy
+            evaluation_report["primary_metric"] = pm_copy
     except Exception:
         pass
     def _attach_from_windows() -> None:
-        if isinstance(certificate.get("primary_metric"), dict):
+        if isinstance(evaluation_report.get("primary_metric"), dict):
             return
         try:
             m = (
@@ -212,7 +212,7 @@ def attach_primary_metric(
                 baseline=baseline_raw if isinstance(baseline_raw, dict) else None,
             )
             if isinstance(pm_block, dict) and pm_block:
-                certificate["primary_metric"] = pm_block
+                evaluation_report["primary_metric"] = pm_block
         except Exception:
             pass
@@ -220,7 +220,7 @@ def attach_primary_metric(
     _attach_from_windows()
     # Minimal fallback for classification-only reports without explicit primary_metric
-    if not isinstance(certificate.get("primary_metric"), dict):
+    if not isinstance(evaluation_report.get("primary_metric"), dict):
         try:
             metrics_map = report.get("metrics", {}) if isinstance(report, dict) else {}
             clf = (
@@ -298,7 +298,7 @@ def attach_primary_metric(
                         acc_pm["ratio_vs_baseline"] = delta_pp
                 except Exception:
                     pass
-                certificate["primary_metric"] = acc_pm
+                evaluation_report["primary_metric"] = acc_pm
         except Exception:
             pass
@@ -308,8 +308,8 @@ def attach_primary_metric(
     # Ensure primary_metric has display_ci populated for schema invariants
     try:
         pm = (
-            certificate.get("primary_metric", {})
-            if isinstance(certificate.get("primary_metric"), dict)
+            evaluation_report.get("primary_metric", {})
+            if isinstance(evaluation_report.get("primary_metric"), dict)
             else None
         )
         if isinstance(pm, dict) and pm:

invarlock 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl

invarlock 0.3.7py3-none-any.whl → 0.3.8py3-none-any.whl