PyPI - invarlock - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

invarlock 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

invarlock/__init__.py +4 -4
invarlock/adapters/__init__.py +10 -14
invarlock/adapters/auto.py +37 -50
invarlock/adapters/capabilities.py +2 -2
invarlock/adapters/hf_causal.py +418 -0
invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
invarlock/adapters/hf_loading.py +7 -7
invarlock/adapters/hf_mixin.py +53 -9
invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
invarlock/assurance/__init__.py +15 -23
invarlock/cli/adapter_auto.py +32 -26
invarlock/cli/app.py +128 -27
invarlock/cli/commands/__init__.py +2 -2
invarlock/cli/commands/calibrate.py +48 -4
invarlock/cli/commands/doctor.py +8 -10
invarlock/cli/commands/evaluate.py +986 -0
invarlock/cli/commands/explain_gates.py +25 -17
invarlock/cli/commands/export_html.py +11 -9
invarlock/cli/commands/plugins.py +13 -9
invarlock/cli/commands/report.py +326 -92
invarlock/cli/commands/run.py +1160 -228
invarlock/cli/commands/verify.py +157 -97
invarlock/cli/config.py +1 -1
invarlock/cli/determinism.py +1 -1
invarlock/cli/doctor_helpers.py +4 -5
invarlock/cli/output.py +193 -0
invarlock/cli/provenance.py +4 -4
invarlock/core/bootstrap.py +1 -1
invarlock/core/registry.py +9 -11
invarlock/core/retry.py +14 -14
invarlock/core/runner.py +112 -26
invarlock/edits/noop.py +2 -2
invarlock/edits/quant_rtn.py +67 -39
invarlock/eval/__init__.py +1 -1
invarlock/eval/bench.py +14 -10
invarlock/eval/data.py +68 -23
invarlock/eval/metrics.py +59 -1
invarlock/eval/primary_metric.py +1 -1
invarlock/eval/tasks/__init__.py +12 -0
invarlock/eval/tasks/classification.py +48 -0
invarlock/eval/tasks/qa.py +36 -0
invarlock/eval/tasks/text_generation.py +102 -0
invarlock/guards/invariants.py +19 -10
invarlock/guards/rmt.py +2 -2
invarlock/guards/spectral.py +1 -1
invarlock/guards/variance.py +2 -2
invarlock/model_profile.py +64 -62
invarlock/observability/health.py +6 -6
invarlock/observability/metrics.py +108 -0
invarlock/plugins/hf_bnb_adapter.py +32 -21
invarlock/reporting/__init__.py +18 -4
invarlock/reporting/guards_analysis.py +154 -4
invarlock/reporting/html.py +61 -11
invarlock/reporting/normalizer.py +9 -2
invarlock/reporting/policy_utils.py +1 -1
invarlock/reporting/primary_metric_utils.py +11 -11
invarlock/reporting/render.py +876 -510
invarlock/reporting/report.py +72 -30
invarlock/reporting/{certificate.py → report_builder.py} +252 -99
invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
invarlock/reporting/report_types.py +6 -1
invarlock/reporting/telemetry.py +86 -0
invarlock-0.3.8.dist-info/METADATA +283 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
invarlock/adapters/hf_gpt2.py +0 -404
invarlock/adapters/hf_llama.py +0 -487
invarlock/cli/commands/certify.py +0 -422
invarlock-0.3.6.dist-info/METADATA +0 -588
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0

invarlock/reporting/{certificate.py → report_builder.py} RENAMED Viewed

@@ -1,15 +1,16 @@
 """
-InvarLock Safety Certificate Generation
-==================================
+InvarLock Evaluation Report Generation
+=====================================
-Generate standardized safety certificates from RunReport and baseline comparison.
-Certificates are standalone, portable verification artifacts that can be used
-for CI/CD gates and regulatory compliance.
+Generate standardized evaluation reports from RunReport and baseline
+comparison.
+Evaluation reports are standalone, portable artifacts that record statistical
+gates and evidence for CI/CD checks and audits (not formal verification).
 """
 from __future__ import annotations
-## Core certificate generation and analysis orchestration lives here.
+## Core evaluation report building and analysis orchestration lives here.
 # mypy: ignore-errors
 import copy
 import hashlib
@@ -38,11 +39,7 @@ from invarlock.eval.primary_metric import compute_primary_metric_from_report, ge
 from invarlock.eval.tail_stats import evaluate_metric_tail
 from invarlock.utils.digest import hash_json
-from . import certificate_schema as _cert_schema
-from .certificate_schema import (
-    CERTIFICATE_JSON_SCHEMA,
-    CERTIFICATE_SCHEMA_VERSION,
-)
+from . import report_schema as _report_schema
 from .dataset_hashing import (
     _extract_dataset_info,
 )
@@ -52,10 +49,15 @@ from .guards_analysis import (
     _extract_spectral_analysis,
     _extract_variance_analysis,
 )
-from .report_types import RunReport, validate_report
+from .report_schema import (
+    REPORT_JSON_SCHEMA,
+    REPORT_SCHEMA_VERSION,
+)
+from .report_types import RunReport
+from .report_types import validate_report as validate_run_report
 # Expose compute_window_hash for tests that monkeypatch it
-# compute_window_hash used to be exposed via certificate; tests now patch
+# compute_window_hash used to be exposed via the evaluation report builder; tests now patch
 # dataset_hashing.compute_window_hash directly, so this import is no longer needed.
 from .utils import (
     _coerce_int,
@@ -102,7 +104,7 @@ def _is_ppl_kind(name: Any) -> bool:
 ## NOTE: Deprecated helper `_get_ppl_final` was removed; callers should
-## use the normalized primary_metric block directly via make_certificate or
+## use the normalized primary_metric block directly via make_report or
 ## report processing utilities.
@@ -130,8 +132,8 @@ def _compute_edit_digest(report: dict) -> dict:
     return {"family": family, "impl_hash": impl_hash, "version": 1}
-def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
-    """Compute certificate confidence label based on stability and CI width.
+def _compute_confidence_label(evaluation_report: dict[str, Any]) -> dict[str, Any]:
+    """Compute evaluation report confidence label based on stability and CI width.
     Heuristics:
     - High: ppl_acceptable=True, unstable=False, width <= 0.03 (ratio) or <= 1.0 pp for accuracy
@@ -139,7 +141,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
     - Low: otherwise (floors unmet, failure, or missing bounds)
     Returns a dict with label, basis, width and threshold for transparency.
     """
-    validation = certificate.get("validation", {}) or {}
+    validation = evaluation_report.get("validation", {}) or {}
     pm_ok = bool(validation.get("primary_metric_acceptable", False))
     # Basis label shown in confidence block:
     #  - For ppl-like metrics, use 'ppl_ratio' to reflect ratio width threshold
@@ -148,7 +150,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
     basis = "primary_metric"
     lo = hi = float("nan")
     try:
-        pm = certificate.get("primary_metric", {}) or {}
+        pm = evaluation_report.get("primary_metric", {}) or {}
         kind = str(pm.get("kind", "") or "").lower()
         if isinstance(pm, dict) and pm and pm.get("display_ci"):
             dci = pm.get("display_ci")
@@ -169,7 +171,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
     thr_ratio = 0.03  # 3% width for ratio
     thr_pp = 1.0  # 1.0 percentage point for accuracy kinds
     try:
-        pol = certificate.get("resolved_policy")
+        pol = evaluation_report.get("resolved_policy")
         if isinstance(pol, dict):
             conf_pol = pol.get("confidence")
             if isinstance(conf_pol, dict):
@@ -186,7 +188,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
     # Unstable hint from primary metric (if provided)
     try:
-        unstable = bool((certificate.get("primary_metric") or {}).get("unstable"))
+        unstable = bool((evaluation_report.get("primary_metric") or {}).get("unstable"))
     except Exception:  # pragma: no cover
         unstable = False
@@ -212,39 +214,39 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
     }
-# Minimal JSON Schema describing the canonical shape of a certificate.
+# Minimal JSON Schema describing the canonical shape of an evaluation report.
 # This focuses on structural validity; numerical thresholds are validated
 # separately in metric-specific logic.
-# JSON Schema is provided by certificate_schema; no duplication here.
+# JSON Schema is provided by report_schema; no duplication here.
 # Mirror jsonschema and structural validator for test monkeypatching compatibility.
-jsonschema = getattr(_cert_schema, "jsonschema", None)
+jsonschema = getattr(_report_schema, "jsonschema", None)
-def _validate_with_jsonschema(certificate: dict[str, Any]) -> bool:
+def _validate_with_jsonschema(evaluation_report: dict[str, Any]) -> bool:
     if jsonschema is None:
         return True
     try:
-        jsonschema.validate(instance=certificate, schema=CERTIFICATE_JSON_SCHEMA)
+        jsonschema.validate(instance=evaluation_report, schema=REPORT_JSON_SCHEMA)
         return True
     except Exception:  # pragma: no cover
         return False
-def validate_certificate(certificate: dict[str, Any]) -> bool:
-    """Validate that a certificate has all required fields and valid data."""
+def validate_report(evaluation_report: dict[str, Any]) -> bool:
+    """Validate that an evaluation report has all required fields and valid data."""
     try:
-        if certificate.get("schema_version") != CERTIFICATE_SCHEMA_VERSION:
+        if evaluation_report.get("schema_version") != REPORT_SCHEMA_VERSION:
             return False
         # Prefer JSON Schema structural validation; if unavailable or too strict,
         # fall back to a lenient minimal check used by unit tests.
-        if not _validate_with_jsonschema(certificate):
+        if not _validate_with_jsonschema(evaluation_report):
             # Minimal fallback: require schema version + run_id + primary_metric
-            run_id_ok = isinstance(certificate.get("run_id"), str) and bool(
-                certificate.get("run_id")
+            run_id_ok = isinstance(evaluation_report.get("run_id"), str) and bool(
+                evaluation_report.get("run_id")
             )
-            pm = certificate.get("primary_metric")
+            pm = evaluation_report.get("primary_metric")
             pm_ok = isinstance(pm, dict) and (
                 isinstance(pm.get("final"), int | float)
                 or (isinstance(pm.get("kind"), str) and bool(pm.get("kind")))
@@ -252,7 +254,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
             if not (run_id_ok and pm_ok):
                 return False
-        validation = certificate.get("validation", {})
+        validation = evaluation_report.get("validation", {})
         for flag in [
             "preview_final_drift_acceptable",
             "primary_metric_acceptable",
@@ -427,8 +429,8 @@ def _load_validation_allowlist() -> set[str]:
 # disallow unknown validation keys at schema level.
 try:
     _vkeys = _load_validation_allowlist()
-    if isinstance(CERTIFICATE_JSON_SCHEMA.get("properties"), dict):
-        vspec = CERTIFICATE_JSON_SCHEMA["properties"].get("validation")
+    if isinstance(REPORT_JSON_SCHEMA.get("properties"), dict):
+        vspec = REPORT_JSON_SCHEMA["properties"].get("validation")
         if isinstance(vspec, dict):
             vspec["properties"] = {k: {"type": "boolean"} for k in _vkeys}
             vspec["additionalProperties"] = False
@@ -445,7 +447,7 @@ except Exception:  # pragma: no cover
 def _normalize_and_validate_report(report: RunReport | dict[str, Any]) -> RunReport:
     """Normalize a possibly-minimal report and validate its structure.
-    Uses the local normalizer when available, then checks `validate_report`.
+    Uses the local normalizer when available, then checks `validate_run_report`.
     Raises ValueError on invalid input. Returns the normalized RunReport.
     """
     try:
@@ -455,13 +457,13 @@ def _normalize_and_validate_report(report: RunReport | dict[str, Any]) -> RunRep
             report = _norm(report)
     except Exception:  # pragma: no cover
         pass
-    if not validate_report(report):
+    if not validate_run_report(report):
         raise ValueError("Invalid RunReport structure")
     return report
-def _extract_certificate_meta(report: RunReport) -> dict[str, Any]:
-    """Extract the certificate metadata block with a full seed bundle."""
+def _extract_report_meta(report: RunReport) -> dict[str, Any]:
+    """Extract the evaluation report metadata block with a full seed bundle."""
     meta_section = (
         report.get("meta", {}) if isinstance(report.get("meta"), dict) else {}
     )
@@ -738,22 +740,22 @@ def _fallback_paired_windows(
     return paired_windows
-def make_certificate(
+def make_report(
     report: RunReport,
     baseline: RunReport | dict[str, Any],
 ) -> dict[str, Any]:
     """
-    Generate a safety certificate from a RunReport and baseline comparison.
+    Generate an evaluation report from a RunReport and baseline comparison.
-    The certificate is a standalone, portable artifact that contains all
-    essential metrics and comparisons needed for safety verification.
+    The evaluation report is a standalone, portable artifact that contains all
+    essential paired metrics and comparisons used by InvarLock gates.
     Args:
-        report: The guarded run report to certify
+        report: The guarded run report to evaluate
         baseline: Step-0 baseline RunReport or baseline metrics dict
     Returns:
-        Certificate dictionary with all required fields
+        Evaluation report dictionary with all required fields
     Raises:
         ValueError: If inputs are invalid or required data is missing
@@ -764,13 +766,24 @@ def make_certificate(
     # Normalize baseline input
     baseline_raw = baseline
     baseline_normalized = _normalize_baseline(baseline_raw)
+    baseline_report: RunReport | None = None
+    try:
+        if (
+            isinstance(baseline_raw, dict)
+            and "meta" in baseline_raw
+            and "metrics" in baseline_raw
+            and "edit" in baseline_raw
+        ):
+            baseline_report = _normalize_and_validate_report(baseline_raw)
+    except Exception:  # pragma: no cover - baseline compare is best-effort
+        baseline_report = None
     # Extract core metadata with full seed bundle
-    meta = _extract_certificate_meta(report)
+    meta = _extract_report_meta(report)
     # Propagate environment flags captured in the RunReport (e.g., deterministic algos,
     # TF32 controls, MPS/CUDA availability). This is useful for auditability and
-    # reproducibility of certification runs.
+    # reproducibility of evaluation runs.
     try:
         env_flags = (
             report.get("meta", {}).get("env_flags")
@@ -1440,7 +1453,7 @@ def make_certificate(
         ppl_analysis["window_plan"] = window_plan_ctx
     # Extract invariant status
-    invariants = _extract_invariants(report)
+    invariants = _extract_invariants(report, baseline=baseline_report)
     # Extract spectral analysis
     spectral = _extract_spectral_analysis(report, baseline_normalized)
@@ -1558,7 +1571,13 @@ def make_certificate(
     telemetry: dict[str, Any] = {}
     metrics_section = report.get("metrics", {})
     if isinstance(metrics_section, dict):
-        for key in ("latency_ms_per_tok", "memory_mb_peak", "throughput_tok_per_s"):
+        for key in (
+            "latency_ms_per_tok",
+            "memory_mb_peak",
+            "gpu_memory_mb_peak",
+            "gpu_memory_reserved_mb_peak",
+            "throughput_tok_per_s",
+        ):
             value = metrics_section.get(key)
             if isinstance(value, int | float) and math.isfinite(value):
                 telemetry[key] = float(value)
@@ -1584,7 +1603,7 @@ def make_certificate(
     if device_name:
         telemetry.setdefault("device", device_name)
-    # Build the certificate
+    # Build the evaluation report
     window_capacity_ctx = (
         report.get("metrics", {}).get("window_capacity")
         if isinstance(report.get("metrics"), dict)
@@ -1755,6 +1774,7 @@ def make_certificate(
         capacity_examples = None
     pm_acceptance_range = _resolve_pm_acceptance_range_from_report(report)
+    pm_drift_band = _resolve_pm_drift_band_from_report(report)
     # Primary metric tail evidence and gate evaluation (ΔlogNLL vs baseline, per-window).
     pm_tail_result: dict[str, Any] = {}
@@ -1881,6 +1901,12 @@ def make_certificate(
     except Exception:  # pragma: no cover - defensive against patched functions
         validation_kwargs["pm_acceptance_range"] = pm_acceptance_range
+    try:
+        if "pm_drift_band" in inspect.signature(_compute_validation_flags).parameters:
+            validation_kwargs["pm_drift_band"] = pm_drift_band
+    except Exception:  # pragma: no cover - defensive against patched functions
+        validation_kwargs["pm_drift_band"] = pm_drift_band
     try:
         if "pm_tail" in inspect.signature(_compute_validation_flags).parameters:
             validation_kwargs["pm_tail"] = pm_tail_result
@@ -1895,8 +1921,8 @@ def make_certificate(
         k: bool(v) for k, v in validation_flags.items() if k in _allowed_validation
     }
-    certificate = {
-        "schema_version": CERTIFICATE_SCHEMA_VERSION,
+    evaluation_report = {
+        "schema_version": REPORT_SCHEMA_VERSION,
         "run_id": current_run_id,
         "meta": meta,
         "auto": auto,
@@ -1939,8 +1965,8 @@ def make_certificate(
         _tiny_relax_env = False
     if _tiny_relax_env:
         try:
-            certificate.setdefault("auto", {})["tiny_relax"] = True
-            prov = certificate.setdefault("provenance", {})
+            evaluation_report.setdefault("auto", {})["tiny_relax"] = True
+            prov = evaluation_report.setdefault("provenance", {})
             flags = prov.setdefault("flags", [])
             if "tiny_relax" not in flags:
                 flags.append("tiny_relax")
@@ -1966,12 +1992,12 @@ def make_certificate(
             and "value" in qo
             and math.isfinite(float(qo.get("value", float("nan"))))
         ):
-            certificate["quality_overhead"] = qo
+            evaluation_report["quality_overhead"] = qo
     except Exception:  # pragma: no cover
         pass
     try:
-        _propagate_pairing_stats(certificate, ppl_analysis)
+        _propagate_pairing_stats(evaluation_report, ppl_analysis)
     except Exception:  # pragma: no cover
         pass
@@ -2032,7 +2058,7 @@ def make_certificate(
         (resolved_policy.get("variance") or {}).get("min_effect_lognll", 0.0) or 0.0
     )
-    certificate["policy_digest"] = {
+    evaluation_report["policy_digest"] = {
         "policy_version": POLICY_VERSION,
         "tier_policy_name": cur_tier,
         "thresholds_hash": thresholds_hash,
@@ -2063,7 +2089,7 @@ def make_certificate(
                                 payload[key] = item[key]
                         sanitized.append(payload)
                 if sanitized:
-                    certificate["secondary_metrics"] = sanitized
+                    evaluation_report["secondary_metrics"] = sanitized
     except Exception:  # pragma: no cover
         pass
@@ -2111,7 +2137,7 @@ def make_certificate(
                     except Exception:  # pragma: no cover
                         continue
                 if out:
-                    certificate["classification"] = {"subgroups": out}
+                    evaluation_report["classification"] = {"subgroups": out}
     except Exception:  # pragma: no cover
         pass
@@ -2127,7 +2153,7 @@ def make_certificate(
                 if isinstance(container.get("metrics"), dict)
                 else {}
             )
-            # Edited report case: also check certificate telemetry keys
+            # Edited report case: also check evaluation_report telemetry keys
             telem = telemetry if isinstance(telemetry, dict) else {}
             # Prefer explicit p50/p95 throughput keys if present
             for key in ("latency_ms_p50", "latency_ms_p95", "throughput_sps"):
@@ -2168,17 +2194,24 @@ def make_certificate(
                     entry["ratio"] = float("nan")
             system_overhead[metric_key] = entry
         if system_overhead:
-            certificate["system_overhead"] = system_overhead
+            evaluation_report["system_overhead"] = system_overhead
     except Exception:  # pragma: no cover
         pass
     # Attach/normalize primary metric block (moved to helper)
     from .primary_metric_utils import attach_primary_metric as _attach_pm
-    _attach_pm(certificate, report, baseline_raw, baseline_ref, ppl_analysis)
+    _attach_pm(evaluation_report, report, baseline_raw, baseline_ref, ppl_analysis)
+    try:
+        if isinstance(pm_drift_band, dict) and pm_drift_band:
+            pm_block = evaluation_report.get("primary_metric")
+            if isinstance(pm_block, dict):
+                pm_block.setdefault("drift_band", dict(pm_drift_band))
+    except Exception:  # pragma: no cover
+        pass
     _enforce_display_ci_alignment(
         ratio_ci_source,
-        certificate.get("primary_metric"),
+        evaluation_report.get("primary_metric"),
         logloss_delta_ci,
         window_plan_profile,
     )
@@ -2186,8 +2219,8 @@ def make_certificate(
     # Ensure primary_metric has display_ci populated for schema invariants
     try:
         pm = (
-            certificate.get("primary_metric", {})
-            if isinstance(certificate.get("primary_metric"), dict)
+            evaluation_report.get("primary_metric", {})
+            if isinstance(evaluation_report.get("primary_metric"), dict)
             else None
         )
         if isinstance(pm, dict) and pm:
@@ -2227,8 +2260,8 @@ def make_certificate(
         if not kind:
             kind = "ppl"
         windows_cfg = (
-            certificate.get("dataset", {}).get("windows", {})
-            if isinstance(certificate.get("dataset"), dict)
+            evaluation_report.get("dataset", {}).get("windows", {})
+            if isinstance(evaluation_report.get("dataset"), dict)
             else {}
         )
         n_prev = windows_cfg.get("preview")
@@ -2236,7 +2269,7 @@ def make_certificate(
         tokens_total = None
         try:
             tokens_total = (
-                certificate.get("dataset", {}).get("hash", {}).get("total_tokens")
+                evaluation_report.get("dataset", {}).get("hash", {}).get("total_tokens")
             )
         except Exception:  # pragma: no cover
             tokens_total = None
@@ -2244,7 +2277,7 @@ def make_certificate(
         ci_lo = None
         ci_hi = None
         ratio = None
-        pmc = certificate.get("primary_metric", {})
+        pmc = evaluation_report.get("primary_metric", {})
         rci = pmc.get("display_ci") or pmc.get("ci")
         if isinstance(rci, tuple | list) and len(rci) == 2:
             ci_lo, ci_hi = rci[0], rci[1]
@@ -2256,7 +2289,7 @@ def make_certificate(
         except Exception:  # pragma: no cover
             ci_w = None
         # Gate outcome
-        val = certificate.get("validation", {})
+        val = evaluation_report.get("validation", {})
         gate_ok = None
         try:
             gate_ok = bool(val.get("primary_metric_acceptable"))
@@ -2271,10 +2304,10 @@ def make_certificate(
             f"tokens={tokens_total}",
         ]
         try:
-            split = (certificate.get("provenance", {}) or {}).get("dataset_split")
+            split = (evaluation_report.get("provenance", {}) or {}).get("dataset_split")
             if not split:
                 split = (report.get("provenance", {}) or {}).get("dataset_split")
-            sf = (certificate.get("provenance", {}) or {}).get("split_fallback")
+            sf = (evaluation_report.get("provenance", {}) or {}).get("split_fallback")
             if sf is None:
                 sf = (report.get("provenance", {}) or {}).get("split_fallback")
             if split:
@@ -2290,7 +2323,7 @@ def make_certificate(
         if isinstance(gate_ok, bool):
             parts.append(f"gate={'pass' if gate_ok else 'fail'}")
         summary_line = "INVARLOCK_TELEMETRY " + " ".join(parts)
-        certificate.setdefault("telemetry", {})["summary_line"] = summary_line
+        evaluation_report.setdefault("telemetry", {})["summary_line"] = summary_line
         if str(os.environ.get("INVARLOCK_TELEMETRY", "")).strip().lower() in {
             "1",
             "true",
@@ -2303,17 +2336,17 @@ def make_certificate(
     # Attach confidence label (non-gating)
     try:
-        certificate["confidence"] = _compute_confidence_label(certificate)
+        evaluation_report["confidence"] = _compute_confidence_label(evaluation_report)
     except Exception:  # pragma: no cover
         pass
-    return certificate
+    return evaluation_report
 # Console Validation Block helpers have moved to invarlock.reporting.render.
-## NOTE: render_certificate_markdown has been moved to invarlock.reporting.render.
+## NOTE: render_report_markdown has been moved to invarlock.reporting.render.
 ## It is re-exported at the bottom of this module to preserve the public API.
 ## Private helper functions
@@ -2591,7 +2624,7 @@ def _extract_structural_deltas(report: RunReport) -> dict[str, Any]:
 def _extract_edit_metadata(
     report: RunReport, plugin_provenance: dict[str, Any]
 ) -> dict[str, Any]:
-    """Extract edit-level provenance and configuration metadata for the certificate."""
+    """Extract edit-level provenance and configuration metadata for the evaluation report."""
     edit_section = _get_mapping(report, "edit")
     if not edit_section:
@@ -2614,7 +2647,7 @@ def _extract_edit_metadata(
         alg_lower = str(algorithm).strip().lower()
     except Exception:  # pragma: no cover
         alg_lower = ""
-    allowed_algorithms = {"quant_rtn", "noop"}
+    allowed_algorithms = {"quant_rtn", "noop", "custom"}
     if alg_lower not in allowed_algorithms:
         algorithm = ""
@@ -2988,12 +3021,12 @@ def _compute_quality_overhead_from_guard(
 def _propagate_pairing_stats(
-    certificate: dict[str, Any], ppl_analysis: dict[str, Any] | None
+    evaluation_report: dict[str, Any], ppl_analysis: dict[str, Any] | None
 ) -> None:
-    """Surface pairing statistics inside certificate.dataset.windows.stats."""
-    if not isinstance(certificate, dict):
+    """Surface pairing statistics inside evaluation_report.dataset.windows.stats."""
+    if not isinstance(evaluation_report, dict):
         return
-    ds = certificate.get("dataset", {})
+    ds = evaluation_report.get("dataset", {})
     if not isinstance(ds, dict):
         return
     windows = ds.get("windows", {})
@@ -3047,7 +3080,7 @@ def _propagate_pairing_stats(
         windows["stats"] = stats
     if windows is not ds.get("windows"):
         ds["windows"] = windows
-    certificate["dataset"] = ds
+    evaluation_report["dataset"] = ds
 def _build_provenance_block(
@@ -3225,6 +3258,105 @@ def _resolve_pm_acceptance_range_from_report(
     return {"min": float(min_val), "max": float(max_val)}
+def _resolve_pm_drift_band_from_report(
+    report: dict[str, Any] | None,
+) -> dict[str, float]:
+    """Resolve preview→final drift band from report context/meta/env."""
+    base_min = 0.95
+    base_max = 1.05
+    def _safe_float(val: Any) -> float | None:
+        try:
+            if val is None:
+                return None
+            out = float(val)
+        except Exception:
+            return None
+        return out if math.isfinite(out) else None
+    cfg_min = None
+    cfg_max = None
+    ctx = report.get("context") if isinstance(report, dict) else None
+    if isinstance(ctx, dict):
+        pm_ctx = ctx.get("primary_metric")
+        if isinstance(pm_ctx, dict):
+            band = pm_ctx.get("drift_band")
+            if isinstance(band, dict):
+                cfg_min = _safe_float(band.get("min"))
+                cfg_max = _safe_float(band.get("max"))
+            elif isinstance(band, list | tuple) and len(band) == 2:
+                cfg_min = _safe_float(band[0])
+                cfg_max = _safe_float(band[1])
+        if cfg_min is None or cfg_max is None:
+            alt = ctx.get("pm_drift_band")
+            if isinstance(alt, dict):
+                cfg_min = (
+                    cfg_min if cfg_min is not None else _safe_float(alt.get("min"))
+                )
+                cfg_max = (
+                    cfg_max if cfg_max is not None else _safe_float(alt.get("max"))
+                )
+    if (cfg_min is None or cfg_max is None) and isinstance(report, dict):
+        meta = report.get("meta")
+        if isinstance(meta, dict):
+            meta_band = meta.get("pm_drift_band")
+            if isinstance(meta_band, dict):
+                cfg_min = (
+                    cfg_min
+                    if cfg_min is not None
+                    else _safe_float(meta_band.get("min"))
+                )
+                cfg_max = (
+                    cfg_max
+                    if cfg_max is not None
+                    else _safe_float(meta_band.get("max"))
+                )
+    def _parse_env(name: str) -> float | None:
+        try:
+            raw = os.environ.get(name, "")
+            if raw is None or str(raw).strip() == "":
+                return None
+            return float(raw)
+        except Exception:
+            return None
+    env_min = _parse_env("INVARLOCK_PM_DRIFT_MIN")
+    env_max = _parse_env("INVARLOCK_PM_DRIFT_MAX")
+    has_explicit = any(v is not None for v in (cfg_min, cfg_max, env_min, env_max))
+    if not has_explicit:
+        return {}
+    min_val = (
+        env_min if env_min is not None else cfg_min if cfg_min is not None else base_min
+    )
+    max_val = (
+        env_max if env_max is not None else cfg_max if cfg_max is not None else base_max
+    )
+    try:
+        if min_val is not None and min_val <= 0:
+            min_val = base_min
+    except Exception:
+        min_val = base_min
+    try:
+        if max_val is not None and max_val <= 0:
+            max_val = base_max
+    except Exception:
+        max_val = base_max
+    try:
+        if min_val is not None and max_val is not None and min_val >= max_val:
+            min_val, max_val = base_min, base_max
+    except Exception:
+        min_val, max_val = base_min, base_max
+    return {"min": float(min_val), "max": float(max_val)}
 def _compute_validation_flags(
     ppl: dict[str, Any],
     spectral: dict[str, Any],
@@ -3238,9 +3370,10 @@ def _compute_validation_flags(
     moe: dict[str, Any] | None = None,
     dataset_capacity: dict[str, Any] | None = None,
     pm_acceptance_range: dict[str, float] | None = None,
+    pm_drift_band: dict[str, float] | None = None,
     pm_tail: dict[str, Any] | None = None,
 ) -> dict[str, bool]:
-    """Compute validation flags for the certificate including canonical gates."""
+    """Compute validation flags for the evaluation report including canonical gates."""
     tier = (tier or "balanced").lower()
     # Dev-only tiny relax: widen gates and lower floors when explicitly requested
     import os as _os
@@ -3301,9 +3434,27 @@ def _compute_validation_flags(
         ratio_limit = min(ratio_limit, float(target_ratio))
     # Canonical Gates
-    # 1. Drift gate: 0.95 ≤ final/preview ≤ 1.05
+    # 1. Drift gate: by default 0.95 ≤ final/preview ≤ 1.05 (configurable)
     drift_ratio = ppl.get("preview_final_ratio", 1.0)
-    preview_final_drift_acceptable = 0.95 <= drift_ratio <= 1.05
+    drift_min = 0.95
+    drift_max = 1.05
+    if isinstance(pm_drift_band, dict):
+        try:
+            cand_min = pm_drift_band.get("min")
+            cand_max = pm_drift_band.get("max")
+            if isinstance(cand_min, int | float) and isinstance(cand_max, int | float):
+                cand_min_f = float(cand_min)
+                cand_max_f = float(cand_max)
+                if (
+                    math.isfinite(cand_min_f)
+                    and math.isfinite(cand_max_f)
+                    and 0 < cand_min_f < cand_max_f
+                ):
+                    drift_min = cand_min_f
+                    drift_max = cand_max_f
+        except Exception:  # pragma: no cover
+            pass
+    preview_final_drift_acceptable = drift_min <= drift_ratio <= drift_max
     if _tiny_relax:
         # Treat drift identity as informational in tiny dev demos
         preview_final_drift_acceptable = True
@@ -3463,7 +3614,7 @@ def _compute_validation_flags(
             if _tiny_relax and threshold_val < 0.10:
                 threshold_val = 0.10
             if not math.isfinite(ratio_val):
-                # In dev/Compare-&-Certify flows we often lack a bare run; treat missing metric as pass
+                # In dev/Compare-&-Evaluate flows we often lack a bare run; treat missing metric as pass
                 guard_overhead_pass = True
             else:
                 guard_overhead_pass = ratio_val <= (1.0 + max(0.0, threshold_val))
@@ -3619,7 +3770,7 @@ def _generate_run_id(report: RunReport) -> str:
     return hashlib.sha256(base_str.encode()).hexdigest()[:16]
-## NOTE: _compute_certificate_hash moved to invarlock.reporting.render and is re-exported below.
+## NOTE: _compute_report_hash moved to invarlock.reporting.render and is re-exported below.
 def _analyze_bitwidth_map(bitwidth_map: dict[str, Any]) -> dict[str, Any]:
@@ -3964,22 +4115,24 @@ def _extract_compression_diagnostics(
 # Re-export rendering API from dedicated module to avoid bloat/cycles
 # Rendering helpers live in invarlock.reporting.render; internal code should import there directly.
-# Tests and public API expect render_certificate_markdown to be available from
-# invarlock.reporting.certificate. Import lazily at module end to avoid cycles with
+# Tests and public API expect render_report_markdown to be available from
+# invarlock.reporting.report_builder. Import lazily at module end to avoid cycles with
 # invarlock.reporting.render which imports this module as a namespace.
 try:  # pragma: no cover - simple re-export
     from .render import (
         compute_console_validation_block,  # type: ignore
-        render_certificate_markdown,  # type: ignore
+        render_report_markdown,  # type: ignore
     )
 except Exception:  # pragma: no cover - defensive fallback
-    def render_certificate_markdown(certificate: dict[str, Any]) -> str:  # type: ignore
+    def render_report_markdown(evaluation_report: dict[str, Any]) -> str:  # type: ignore
         raise ImportError(
-            "render_certificate_markdown is unavailable; rendering dependencies missing"
+            "render_report_markdown is unavailable; rendering dependencies missing"
         )
-    def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, Any]:  # type: ignore
+    def compute_console_validation_block(
+        evaluation_report: dict[str, Any],
+    ) -> dict[str, Any]:  # type: ignore
         raise ImportError(
             "compute_console_validation_block is unavailable; rendering dependencies missing"
         )
@@ -3987,12 +4140,12 @@ except Exception:  # pragma: no cover - defensive fallback
 # Export public API
 __all__ = [
-    "make_certificate",
-    "validate_certificate",
+    "make_report",
+    "validate_report",
     "_validate_with_jsonschema",
     "jsonschema",
-    "render_certificate_markdown",
+    "render_report_markdown",
     "compute_console_validation_block",
-    "CERTIFICATE_SCHEMA_VERSION",
-    "CERTIFICATE_JSON_SCHEMA",
+    "REPORT_SCHEMA_VERSION",
+    "REPORT_JSON_SCHEMA",
 ]

invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

invarlock 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl