PyPI - invarlock - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

invarlock 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

invarlock/__init__.py +2 -2
invarlock/_data/runtime/tiers.yaml +57 -30
invarlock/adapters/__init__.py +11 -15
invarlock/adapters/auto.py +35 -40
invarlock/adapters/capabilities.py +2 -2
invarlock/adapters/hf_causal.py +418 -0
invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
invarlock/adapters/hf_mixin.py +25 -4
invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
invarlock/calibration/spectral_null.py +15 -10
invarlock/calibration/variance_ve.py +0 -2
invarlock/cli/adapter_auto.py +31 -21
invarlock/cli/app.py +73 -2
invarlock/cli/commands/calibrate.py +6 -2
invarlock/cli/commands/certify.py +651 -91
invarlock/cli/commands/doctor.py +11 -11
invarlock/cli/commands/explain_gates.py +57 -8
invarlock/cli/commands/plugins.py +13 -9
invarlock/cli/commands/report.py +233 -69
invarlock/cli/commands/run.py +1066 -244
invarlock/cli/commands/verify.py +154 -15
invarlock/cli/config.py +22 -6
invarlock/cli/doctor_helpers.py +4 -5
invarlock/cli/output.py +193 -0
invarlock/cli/provenance.py +1 -1
invarlock/core/api.py +45 -5
invarlock/core/auto_tuning.py +65 -20
invarlock/core/bootstrap.py +1 -1
invarlock/core/contracts.py +7 -1
invarlock/core/registry.py +11 -13
invarlock/core/runner.py +425 -75
invarlock/edits/quant_rtn.py +65 -37
invarlock/eval/bench.py +3 -16
invarlock/eval/data.py +82 -51
invarlock/eval/metrics.py +63 -2
invarlock/eval/primary_metric.py +23 -0
invarlock/eval/tail_stats.py +230 -0
invarlock/eval/tasks/__init__.py +12 -0
invarlock/eval/tasks/classification.py +48 -0
invarlock/eval/tasks/qa.py +36 -0
invarlock/eval/tasks/text_generation.py +102 -0
invarlock/guards/_estimators.py +154 -0
invarlock/guards/invariants.py +19 -10
invarlock/guards/policies.py +16 -6
invarlock/guards/rmt.py +627 -546
invarlock/guards/spectral.py +348 -110
invarlock/guards/tier_config.py +32 -30
invarlock/guards/variance.py +7 -31
invarlock/guards_ref/rmt_ref.py +23 -23
invarlock/model_profile.py +90 -42
invarlock/observability/health.py +6 -6
invarlock/observability/metrics.py +108 -0
invarlock/reporting/certificate.py +384 -55
invarlock/reporting/certificate_schema.py +3 -2
invarlock/reporting/dataset_hashing.py +15 -2
invarlock/reporting/guards_analysis.py +350 -277
invarlock/reporting/html.py +55 -5
invarlock/reporting/normalizer.py +13 -0
invarlock/reporting/policy_utils.py +38 -36
invarlock/reporting/primary_metric_utils.py +71 -17
invarlock/reporting/render.py +852 -431
invarlock/reporting/report.py +40 -4
invarlock/reporting/report_types.py +11 -3
invarlock/reporting/telemetry.py +86 -0
invarlock/reporting/validate.py +1 -18
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
invarlock/adapters/hf_gpt2.py +0 -404
invarlock/adapters/hf_llama.py +0 -487
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0

invarlock/cli/adapter_auto.py CHANGED Viewed

@@ -2,7 +2,8 @@
 Auto adapter resolution utilities.
 These helpers map a model identifier (HF directory or Hub ID) to a
-concrete built-in adapter name (hf_gpt2, hf_llama, hf_bert) without
+concrete built-in adapter name (hf_causal, hf_mlm, hf_seq2seq, hf_causal_onnx)
+without
 adding a hard dependency on Transformers.
 """
@@ -58,15 +59,15 @@ def _detect_quant_family_from_cfg(cfg: dict[str, Any]) -> str | None:
 def resolve_auto_adapter(
-    model_id: str | os.PathLike[str], default: str = "hf_gpt2"
+    model_id: str | os.PathLike[str], default: str = "hf_causal"
 ) -> str:
     """Resolve an appropriate built-in adapter name for a model.
     Heuristics:
       - Prefer local config.json (no network). Inspect `model_type` and
-        `architectures` to classify LLaMA/Mistral vs BERT vs GPT-like.
+        `architectures` to classify causal vs masked-LM vs seq2seq.
       - Fallback to simple name heuristics on the model_id string.
-      - Default to `hf_gpt2` when unsure.
+      - Default to `hf_causal` when unsure.
     """
     cfg = _read_local_hf_config(model_id)
     model_id_str = str(model_id)
@@ -77,32 +78,41 @@ def resolve_auto_adapter(
         if fam:
             return fam
         mt = str(c.get("model_type", "")).lower()
+        if bool(c.get("is_encoder_decoder", False)):
+            return "hf_seq2seq"
         archs = [str(a) for a in c.get("architectures", []) if isinstance(a, str)]
         arch_blob = " ".join(archs)
-        if (
-            mt in {"llama", "mistral", "qwen", "yi"}
-            or "Llama" in arch_blob
-            or "Mistral" in arch_blob
-        ):
-            return "hf_llama"
+        if "ConditionalGeneration" in arch_blob or "Seq2SeqLM" in arch_blob:
+            return "hf_seq2seq"
         # Treat masked-LM families as BERT-like
         if (
             mt in {"bert", "roberta", "distilbert", "albert", "deberta", "deberta-v2"}
             or "MaskedLM" in arch_blob
         ):
-            return "hf_bert"
-        # Generic causal LM
-        if "CausalLM" in arch_blob or mt in {
+            return "hf_mlm"
+        # Causal LM families (best-effort; structural validation happens in the adapter).
+        if "CausalLM" in arch_blob or "ForCausalLM" in arch_blob:
+            return "hf_causal"
+        if mt in {
+            "mistral",
+            "mixtral",
+            "qwen",
+            "qwen2",
+            "qwen2_moe",
+            "yi",
             "gpt2",
             "gpt_neox",
             "opt",
             "gptj",
-            "gptj8bit",
+            "phi",
+            "falcon",
+            "glm",
+            "deepseek",
         }:
-            return "hf_gpt2"
+            return "hf_causal"
         return None
-    # If local directory contains ONNX model files, prefer hf_onnx
+    # If local directory contains ONNX model files, prefer the ONNX causal adapter.
     try:
         p = Path(model_id)
         if p.exists() and p.is_dir():
@@ -114,7 +124,7 @@ def resolve_auto_adapter(
                 "encoder_model.onnx",
             ]
             if any((p / fname).exists() for fname in onnx_files):
-                return "hf_onnx"
+                return "hf_causal_onnx"
     except Exception:
         pass
@@ -134,10 +144,10 @@ def resolve_auto_adapter(
         k in lower_id for k in ["bnb", "bitsandbytes", "-4bit", "-8bit", "4bit", "8bit"]
     ):
         return "hf_bnb"
-    if any(k in lower_id for k in ["llama", "mistral", "qwen", "yi"]):
-        return "hf_llama"
+    if any(k in lower_id for k in ["t5", "bart"]):
+        return "hf_seq2seq"
     if any(k in lower_id for k in ["bert", "roberta", "albert", "deberta"]):
-        return "hf_bert"
+        return "hf_mlm"
     return default
@@ -148,7 +158,7 @@ def apply_auto_adapter_if_needed(cfg: Any) -> Any:
     """
     try:
         adapter = str(getattr(cfg.model, "adapter", ""))
-        if adapter.strip().lower() not in {"auto", "hf_auto", "auto_hf"}:
+        if adapter.strip().lower() not in {"auto", "auto_hf"}:
             return cfg
         model_id = str(getattr(cfg.model, "id", ""))
         resolved = resolve_auto_adapter(model_id)

invarlock/cli/app.py CHANGED Viewed

@@ -105,7 +105,7 @@ Order: certify → report → run → plugins → doctor → version
 @app.command(
     name="certify",
     help=(
-        "Certify a subject model against a baseline and generate a safety certificate. "
+        "Certify a subject model against a baseline and generate an evaluation certificate. "
         "Use when you have two model snapshots and want pass/fail gating."
     ),
 )
@@ -116,6 +116,14 @@ def _certify_lazy(
     edited: str = typer.Option(
         ..., "--edited", "--subject", help="Subject model dir or Hub ID"
     ),
+    baseline_report: str | None = typer.Option(
+        None,
+        "--baseline-report",
+        help=(
+            "Reuse an existing baseline run report.json (skips baseline evaluation). "
+            "Must include stored evaluation windows (e.g., set INVARLOCK_STORE_EVAL_WINDOWS=1)."
+        ),
+    ),
     adapter: str = typer.Option(
         "auto", "--adapter", help="Adapter name or 'auto' to resolve"
     ),
@@ -139,12 +147,38 @@ def _certify_lazy(
     edit_config: str | None = typer.Option(
         None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
     ),
+    edit_label: str | None = typer.Option(
+        None,
+        "--edit-label",
+        help=(
+            "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
+            "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
+        ),
+    ),
+    quiet: bool = typer.Option(
+        False, "--quiet", "-q", help="Minimal output (suppress run/report detail)"
+    ),
+    verbose: bool = typer.Option(
+        False, "--verbose", "-v", help="Verbose output (include debug details)"
+    ),
+    banner: bool = typer.Option(
+        True, "--banner/--no-banner", help="Show header banner"
+    ),
+    style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
+    timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
+    progress: bool = typer.Option(
+        True, "--progress/--no-progress", help="Show progress done messages"
+    ),
+    no_color: bool = typer.Option(
+        False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
+    ),
 ):
     from .commands.certify import certify_command as _cert
     return _cert(
         source=source,
         edited=edited,
+        baseline_report=baseline_report,
         adapter=adapter,
         device=device,
         profile=profile,
@@ -153,6 +187,14 @@ def _certify_lazy(
         out=out,
         cert_out=cert_out,
         edit_config=edit_config,
+        edit_label=edit_label,
+        quiet=quiet,
+        verbose=verbose,
+        banner=banner,
+        style=style,
+        timing=timing,
+        progress=progress,
+        no_color=no_color,
     )
@@ -230,7 +272,7 @@ def _verify_typed(
     name="run",
     help=(
         "Execute an end-to-end run from a YAML config (edit + guards + reports). "
-        "Writes run artifacts and optionally a safety certificate."
+        "Writes run artifacts and optionally an evaluation certificate."
     ),
 )
 def _run_typed(
@@ -245,11 +287,24 @@ def _run_typed(
     ),
     out: str | None = typer.Option(None, "--out", help="Output directory override"),
     edit: str | None = typer.Option(None, "--edit", help="Edit kind (quant|mixed)"),
+    edit_label: str | None = typer.Option(
+        None,
+        "--edit-label",
+        help=(
+            "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
+            "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
+        ),
+    ),
     tier: str | None = typer.Option(
         None,
         "--tier",
         help="Auto-tuning tier override (conservative|balanced|aggressive)",
     ),
+    metric_kind: str | None = typer.Option(
+        None,
+        "--metric-kind",
+        help="Primary metric kind override (ppl_causal|ppl_mlm|accuracy|etc.)",
+    ),
     probes: int | None = typer.Option(
         None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
     ),
@@ -270,6 +325,16 @@ def _run_typed(
     no_cleanup: bool = typer.Option(
         False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
     ),
+    style: str | None = typer.Option(
+        None, "--style", help="Output style (audit|friendly)"
+    ),
+    progress: bool = typer.Option(
+        False, "--progress", help="Show progress done messages"
+    ),
+    timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
+    no_color: bool = typer.Option(
+        False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
+    ),
 ):
     from .commands.run import run_command as _run
@@ -279,13 +344,19 @@ def _run_typed(
         profile=profile,
         out=out,
         edit=edit,
+        edit_label=edit_label,
         tier=tier,
+        metric_kind=metric_kind,
         probes=probes,
         until_pass=until_pass,
         max_attempts=max_attempts,
         timeout=timeout,
         baseline=baseline,
         no_cleanup=no_cleanup,
+        style=style,
+        progress=progress,
+        timing=timing,
+        no_color=no_color,
     )

invarlock/cli/commands/calibrate.py CHANGED Viewed

@@ -144,7 +144,9 @@ def null_sweep(
     ),
     n_seeds: int = typer.Option(10, "--n-seeds", min=1, help="Number of seeds to run."),
     seed_start: int = typer.Option(42, "--seed-start", help="Starting seed."),
-    profile: str = typer.Option("ci", "--profile", help="Run profile (ci|release)."),
+    profile: str = typer.Option(
+        "ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
+    ),
     device: str | None = typer.Option(None, "--device", help="Device override."),
     safety_margin: float = typer.Option(
         0.05, "--safety-margin", help="Safety margin applied to κ recommendations."
@@ -363,7 +365,9 @@ def ve_sweep(
         "--target-enable-rate",
         help="Target expected VE enable rate (predictive-gate lower bound).",
     ),
-    profile: str = typer.Option("ci", "--profile", help="Run profile (ci|release)."),
+    profile: str = typer.Option(
+        "ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
+    ),
     device: str | None = typer.Option(None, "--device", help="Device override."),
     safety_margin: float = typer.Option(
         0.0,

invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

invarlock 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl