PyPI - invarlock - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

invarlock 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

invarlock/__init__.py +2 -2
invarlock/adapters/__init__.py +10 -14
invarlock/adapters/auto.py +35 -40
invarlock/adapters/capabilities.py +2 -2
invarlock/adapters/hf_causal.py +418 -0
invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
invarlock/adapters/hf_mixin.py +25 -4
invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
invarlock/cli/adapter_auto.py +31 -21
invarlock/cli/app.py +73 -2
invarlock/cli/commands/certify.py +600 -59
invarlock/cli/commands/doctor.py +8 -10
invarlock/cli/commands/plugins.py +13 -9
invarlock/cli/commands/report.py +233 -69
invarlock/cli/commands/run.py +907 -183
invarlock/cli/commands/verify.py +76 -11
invarlock/cli/config.py +1 -1
invarlock/cli/doctor_helpers.py +4 -5
invarlock/cli/output.py +193 -0
invarlock/cli/provenance.py +1 -1
invarlock/core/bootstrap.py +1 -1
invarlock/core/registry.py +9 -11
invarlock/core/runner.py +111 -25
invarlock/edits/quant_rtn.py +65 -37
invarlock/eval/bench.py +3 -3
invarlock/eval/data.py +68 -23
invarlock/eval/metrics.py +59 -1
invarlock/eval/tasks/__init__.py +12 -0
invarlock/eval/tasks/classification.py +48 -0
invarlock/eval/tasks/qa.py +36 -0
invarlock/eval/tasks/text_generation.py +102 -0
invarlock/guards/invariants.py +19 -10
invarlock/guards/rmt.py +2 -2
invarlock/guards/variance.py +2 -2
invarlock/model_profile.py +48 -27
invarlock/observability/health.py +6 -6
invarlock/observability/metrics.py +108 -0
invarlock/reporting/certificate.py +159 -9
invarlock/reporting/certificate_schema.py +1 -1
invarlock/reporting/guards_analysis.py +154 -4
invarlock/reporting/html.py +55 -5
invarlock/reporting/normalizer.py +7 -0
invarlock/reporting/render.py +791 -431
invarlock/reporting/report.py +39 -3
invarlock/reporting/report_types.py +6 -1
invarlock/reporting/telemetry.py +86 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
invarlock/adapters/hf_gpt2.py +0 -404
invarlock/adapters/hf_llama.py +0 -487
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0

invarlock/eval/tasks/text_generation.py ADDED Viewed

@@ -0,0 +1,102 @@
+from __future__ import annotations
+from collections import Counter
+from collections.abc import Iterable
+from typing import Any
+def _tokenize(text: str) -> list[str]:
+    return [tok for tok in str(text).strip().lower().split() if tok]
+def _bleu1(pred: str, ref: str) -> float:
+    pred_tokens = _tokenize(pred)
+    ref_tokens = _tokenize(ref)
+    if not pred_tokens or not ref_tokens:
+        return 0.0
+    pred_counts = Counter(pred_tokens)
+    ref_counts = Counter(ref_tokens)
+    overlap = sum(min(pred_counts[tok], ref_counts.get(tok, 0)) for tok in pred_counts)
+    precision = overlap / float(len(pred_tokens))
+    bp = 1.0
+    if len(pred_tokens) < len(ref_tokens):
+        bp = pow(2.718281828, 1.0 - (len(ref_tokens) / float(len(pred_tokens))))
+    return float(precision * bp)
+def bleu1_from_records(records: Iterable[dict[str, Any]]) -> float:
+    """Compute BLEU-1 from records with predictions and references."""
+    scores: list[float] = []
+    for record in records:
+        if not isinstance(record, dict):
+            continue
+        pred = record.get("prediction")
+        refs = record.get("references")
+        if pred is None:
+            continue
+        if refs is None and "reference" in record:
+            refs = [record.get("reference")]
+        if refs is None:
+            continue
+        ref_list = refs if isinstance(refs, list) else [refs]
+        best = 0.0
+        for ref in ref_list:
+            if ref is None:
+                continue
+            best = max(best, _bleu1(str(pred), str(ref)))
+        scores.append(best)
+    if not scores:
+        return float("nan")
+    return float(sum(scores) / float(len(scores)))
+def _lcs_len(a: list[str], b: list[str]) -> int:
+    if not a or not b:
+        return 0
+    dp = [[0] * (len(b) + 1) for _ in range(len(a) + 1)]
+    for i, tok_a in enumerate(a, start=1):
+        for j, tok_b in enumerate(b, start=1):
+            if tok_a == tok_b:
+                dp[i][j] = dp[i - 1][j - 1] + 1
+            else:
+                dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
+    return dp[-1][-1]
+def _rouge_l(pred: str, ref: str) -> float:
+    pred_tokens = _tokenize(pred)
+    ref_tokens = _tokenize(ref)
+    if not pred_tokens or not ref_tokens:
+        return 0.0
+    lcs = _lcs_len(pred_tokens, ref_tokens)
+    prec = lcs / float(len(pred_tokens))
+    rec = lcs / float(len(ref_tokens))
+    if prec + rec == 0:
+        return 0.0
+    return float(2 * prec * rec / (prec + rec))
+def rouge_l_from_records(records: Iterable[dict[str, Any]]) -> float:
+    """Compute ROUGE-L (F1) from records with predictions and references."""
+    scores: list[float] = []
+    for record in records:
+        if not isinstance(record, dict):
+            continue
+        pred = record.get("prediction")
+        refs = record.get("references")
+        if pred is None:
+            continue
+        if refs is None and "reference" in record:
+            refs = [record.get("reference")]
+        if refs is None:
+            continue
+        ref_list = refs if isinstance(refs, list) else [refs]
+        best = 0.0
+        for ref in ref_list:
+            if ref is None:
+                continue
+            best = max(best, _rouge_l(str(pred), str(ref)))
+        scores.append(best)
+    if not scores:
+        return float("nan")
+    return float(sum(scores) / float(len(scores)))

invarlock/guards/invariants.py CHANGED Viewed

@@ -5,6 +5,7 @@ InvarLock Guards - Invariants
 Invariant checking for model edits to ensure structural integrity.
 """
+import hashlib
 from typing import Any
 import torch
@@ -33,6 +34,7 @@ class InvariantsGuard(Guard):
         self.on_fail = on_fail
         self.prepared = False
         self.baseline_checks: dict[str, Any] = {}
+        self.last_current_checks: dict[str, Any] = {}
         self.profile_checks: tuple[str, ...] = ()
     def prepare(
@@ -102,6 +104,10 @@ class InvariantsGuard(Guard):
             "action": outcome.action,
             "violations": outcome.violations,
             "metrics": outcome.metrics,
+            "details": {
+                "baseline_checks": self.baseline_checks,
+                "current_checks": self.last_current_checks,
+            },
         }
     def finalize(self, model: Any) -> GuardOutcome:
@@ -125,6 +131,7 @@ class InvariantsGuard(Guard):
         # Check current invariants
         current_checks = self._capture_invariants(model, None)
+        self.last_current_checks = current_checks
         violations: list[dict[str, Any]] = []
         tokenizer_mismatches: list[dict[str, Any]] = []
@@ -354,14 +361,14 @@ class InvariantsGuard(Guard):
         except Exception:
             pass
-        # LLaMA style (model.embed_tokens <-> lm_head)
+        # Decoder embed_tokens style (model.embed_tokens <-> lm_head)
         try:
-            llama_model = getattr(model, "model", None)
-            embed_tokens = getattr(llama_model, "embed_tokens", None)
+            decoder_model = getattr(model, "model", None)
+            embed_tokens = getattr(decoder_model, "embed_tokens", None)
             embed_weight = getattr(embed_tokens, "weight", None)
-            llama_head_weight = getattr(getattr(model, "lm_head", None), "weight", None)
-            if embed_weight is not None and llama_head_weight is not None:
-                weight_tying_flags["llama"] = _is_tied(embed_weight, llama_head_weight)
+            head_weight = getattr(getattr(model, "lm_head", None), "weight", None)
+            if embed_weight is not None and head_weight is not None:
+                weight_tying_flags["embed_tokens"] = _is_tied(embed_weight, head_weight)
         except Exception:
             pass
@@ -376,8 +383,10 @@ class InvariantsGuard(Guard):
             structure_items = []
             for name, module in model.named_modules():
                 structure_items.append(f"{name}:{type(module).__name__}")
-            structure_hash = hash(tuple(structure_items))
-            checks["structure_hash"] = structure_hash
+            canonical = "\n".join(sorted(structure_items))
+            checks["structure_hash"] = hashlib.sha256(
+                canonical.encode("utf-8")
+            ).hexdigest()[:16]
         except Exception:
             checks["structure_hash"] = 0
@@ -424,7 +433,7 @@ class InvariantsGuard(Guard):
             return "bert" in model_type or has_cls_decoder
         if name in {"rope_rotary_embedding", "rotary_embedding"}:
-            # Detect rotary embeddings used by LLaMA-style models
+            # Detect rotary embeddings used by RoPE-style models
             if hasattr(model, "model") and hasattr(model.model, "layers"):
                 first_layer = model.model.layers[0] if model.model.layers else None
             else:
@@ -443,7 +452,7 @@ class InvariantsGuard(Guard):
             model_type = getattr(config, "model_type", "") if config else ""
             return any(
                 keyword in model_type
-                for keyword in ("gpt", "llama", "mistral", "opt", "phi")
+                for keyword in ("gpt", "mistral", "mixtral", "qwen", "opt", "phi")
             )
         return True

invarlock/guards/rmt.py CHANGED Viewed

@@ -387,7 +387,7 @@ def _iter_transformer_layers(model: nn.Module):
             except (TypeError, AttributeError):
                 pass
     elif hasattr(model, "model") and hasattr(model.model, "layers"):
-        # LLaMA style
+        # RoPE decoder style
         layers = model.model.layers
         if hasattr(layers, "__iter__") and hasattr(layers, "__len__"):
             try:
@@ -746,7 +746,7 @@ def rmt_detect_with_names(
             for idx, layer in enumerate(h_layers):
                 layer_modules.append((f"transformer.h.{idx}", layer))
     elif hasattr(model, "model") and hasattr(model.model, "layers"):
-        # LLaMA style
+        # RoPE decoder style
         layers = model.model.layers
         if hasattr(layers, "__iter__"):
             for idx, layer in enumerate(layers):

invarlock/guards/variance.py CHANGED Viewed

@@ -121,7 +121,7 @@ def _iter_transformer_layers(model: nn.Module):
         # GPT-2 style
         yield from model.transformer.h
     elif hasattr(model, "model") and hasattr(model.model, "layers"):
-        # LLaMA style
+        # RoPE decoder style
         yield from model.model.layers
     elif hasattr(model, "encoder") and hasattr(model.encoder, "layer"):
         # BERT style
@@ -214,7 +214,7 @@ def equalise_residual_variance(
                 hooks[name] = attn_proj.register_forward_hook(_branch_hook(name))
         if hasattr(blk, "mlp"):
-            # Check for c_proj (GPT-2) or down_proj (LLaMA) or fc2 (generic)
+            # Check for c_proj (GPT-2) or down_proj (RoPE decoder) or fc2 (generic)
             mlp_proj = (
                 getattr(blk.mlp, "c_proj", None)
                 or getattr(blk.mlp, "down_proj", None)

invarlock/model_profile.py CHANGED Viewed

@@ -106,7 +106,7 @@ def _gpt2_selectors() -> dict[str, list[str]]:
     }
-def _llama_selectors() -> dict[str, list[str]]:
+def _rope_decoder_selectors() -> dict[str, list[str]]:
     return {
         "attention": [
             "self_attn.q_proj",
@@ -191,11 +191,11 @@ def _make_gpt2_tokenizer(model_id: str):
     return factory
-def _make_llama_tokenizer(model_id: str):
+def _make_causal_auto_tokenizer(model_id: str):
     def factory() -> tuple[PreTrainedTokenizerBase, str]:
         if AutoTokenizer is None and GPT2Tokenizer is None:
             raise RuntimeError(
-                "LLaMA-style tokenizers require the 'transformers' extra. "
+                "Causal tokenizers require the 'transformers' extra. "
                 "Install it with: pip install 'invarlock[adapters]'."
             )
         # Try offline-first to respect InvarLock network guard; fall back to a
@@ -227,7 +227,7 @@ def _make_llama_tokenizer(model_id: str):
             eos_token = getattr(tokenizer, "eos_token", None)
             if eos_token is not None:
                 tokenizer.pad_token = eos_token
-        # Some LLaMA tokenizers default to not adding a BOS token on encode;
+        # Some causal tokenizers default to not adding a BOS token on encode;
         # enable it to guarantee at least one non-pad, non-zero token id.
         if hasattr(tokenizer, "add_bos_token"):
             try:
@@ -289,7 +289,7 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
     model_lower = (model_id or "").lower()
     if any(
-        keyword in adapter_lower for keyword in ("bert", "roberta", "deberta")
+        keyword in adapter_lower for keyword in ("hf_mlm", "bert", "roberta", "deberta")
     ) or any(keyword in model_lower for keyword in ("bert", "roberta", "deberta")):
         return ModelProfile(
             family="bert",
@@ -302,57 +302,78 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
             cert_lints=(
                 {
                     "type": "equals",
-                    "path": "metrics.loss_type",
-                    "value": "mlm",
-                    "message": "BERT cert must record MLM loss type.",
+                    "path": "primary_metric.kind",
+                    "value": "ppl_mlm",
+                    "message": "BERT cert must use MLM metric.",
                 },
                 {
                     "type": "gte",
-                    "path": "metrics.masked_tokens_total",
+                    "path": "telemetry.masked_tokens_total",
                     "value": "1",
                     "message": "BERT cert must report masked tokens.",
                 },
             ),
         )
-    if any(keyword in adapter_lower for keyword in ("llama", "mistral", "qwen")) or any(
-        keyword in model_lower for keyword in ("llama", "mistral", "qwen")
+    if any(keyword in adapter_lower for keyword in ("hf_seq2seq", "t5", "bart")) or any(
+        keyword in model_lower for keyword in ("t5", "bart")
     ):
         return ModelProfile(
-            family="llama",
+            family="seq2seq",
+            default_loss="seq2seq",
+            make_tokenizer=_make_unknown_tokenizer(model_id),
+            default_metric="ppl_seq2seq",
+            default_provider="wikitext2",
+            module_selectors=_unknown_selectors(),
+            invariants=(),
+            cert_lints=(),
+        )
+    if any(
+        keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
+    ) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
+        return ModelProfile(
+            family="gpt2",
             default_loss="causal",
-            make_tokenizer=_make_llama_tokenizer(model_id),
+            make_tokenizer=_make_gpt2_tokenizer(model_id),
             default_metric="ppl_causal",
             default_provider="wikitext2",
-            module_selectors=_llama_selectors(),
-            invariants=("rope_rotary_embedding",),
+            module_selectors=_gpt2_selectors(),
+            invariants=("causal_masking",),
             cert_lints=(
                 {
                     "type": "equals",
-                    "path": "metrics.loss_type",
-                    "value": "causal",
-                    "message": "LLaMA cert should report causal loss.",
+                    "path": "primary_metric.kind",
+                    "value": "ppl_causal",
+                    "message": "GPT-style cert must use causal ppl metric.",
                 },
             ),
         )
     if any(
-        keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
-    ) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
+        keyword in adapter_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
+    ) or any(
+        keyword in model_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
+    ):
+        family = "causal"
+        for keyword in ("mixtral", "mistral", "qwen", "yi"):
+            if keyword in adapter_lower or keyword in model_lower:
+                family = keyword
+                break
         return ModelProfile(
-            family="gpt2",
+            family=family,
             default_loss="causal",
-            make_tokenizer=_make_gpt2_tokenizer(model_id),
+            make_tokenizer=_make_causal_auto_tokenizer(model_id),
             default_metric="ppl_causal",
             default_provider="wikitext2",
-            module_selectors=_gpt2_selectors(),
-            invariants=("causal_masking",),
+            module_selectors=_rope_decoder_selectors(),
+            invariants=("rope_rotary_embedding",),
             cert_lints=(
                 {
                     "type": "equals",
-                    "path": "metrics.loss_type",
-                    "value": "causal",
-                    "message": "GPT-style cert should record causal loss.",
+                    "path": "primary_metric.kind",
+                    "value": "ppl_causal",
+                    "message": "Causal cert must use causal ppl metric.",
                 },
             ),
         )

invarlock/observability/health.py CHANGED Viewed

@@ -374,15 +374,15 @@ class InvarLockHealthChecker(HealthChecker):
             """Check adapter availability."""
             try:
                 from invarlock.adapters import (
-                    HF_BERT_Adapter,
-                    HF_GPT2_Adapter,
-                    HF_LLaMA_Adapter,
+                    HF_Causal_Adapter,
+                    HF_MLM_Adapter,
+                    HF_Seq2Seq_Adapter,
                 )
                 adapters = {
-                    "hf_gpt2": HF_GPT2_Adapter,
-                    "hf_llama": HF_LLaMA_Adapter,
-                    "hf_bert": HF_BERT_Adapter,
+                    "hf_causal": HF_Causal_Adapter,
+                    "hf_mlm": HF_MLM_Adapter,
+                    "hf_seq2seq": HF_Seq2Seq_Adapter,
                 }
                 available_adapters = []

invarlock/observability/metrics.py CHANGED Viewed

@@ -455,3 +455,111 @@ def create_resource_metrics(registry: MetricsRegistry) -> dict[str, Any]:
         "gpu_memory": registry.register_gauge("invarlock.resource.gpu_memory_percent"),
         "disk_usage": registry.register_gauge("invarlock.resource.disk_percent"),
     }
+def reset_peak_memory_stats() -> None:
+    """Reset GPU peak memory stats when available."""
+    try:
+        import torch
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats()
+        mps = getattr(torch, "mps", None)
+        if mps is not None and hasattr(mps, "reset_peak_memory_stats"):
+            mps.reset_peak_memory_stats()
+    except Exception:
+        pass
+def capture_memory_snapshot(
+    phase: str, *, timestamp: float | None = None
+) -> dict[str, Any]:
+    """Capture a point-in-time memory snapshot for the current process."""
+    snapshot: dict[str, Any] = {"phase": str(phase)}
+    if timestamp is None:
+        timestamp = time.time()
+    snapshot["ts"] = float(timestamp)
+    try:
+        import os
+        import psutil
+        process = psutil.Process(os.getpid())
+        rss_mb = process.memory_info().rss / 1024 / 1024
+        snapshot["rss_mb"] = float(rss_mb)
+    except Exception:
+        pass
+    try:
+        import torch
+        if torch.cuda.is_available():
+            device_index = torch.cuda.current_device()
+            snapshot["gpu_device"] = f"cuda:{device_index}"
+            snapshot["gpu_mb"] = float(
+                torch.cuda.memory_allocated(device_index) / 1024 / 1024
+            )
+            snapshot["gpu_reserved_mb"] = float(
+                torch.cuda.memory_reserved(device_index) / 1024 / 1024
+            )
+            snapshot["gpu_peak_mb"] = float(
+                torch.cuda.max_memory_allocated(device_index) / 1024 / 1024
+            )
+            snapshot["gpu_peak_reserved_mb"] = float(
+                torch.cuda.max_memory_reserved(device_index) / 1024 / 1024
+            )
+        else:
+            mps = getattr(torch, "mps", None)
+            if mps is not None and hasattr(torch.backends, "mps"):
+                if torch.backends.mps.is_available():
+                    snapshot["gpu_device"] = "mps"
+                    if hasattr(mps, "current_allocated_memory"):
+                        snapshot["gpu_mb"] = float(
+                            mps.current_allocated_memory() / 1024 / 1024
+                        )
+                    if hasattr(mps, "driver_allocated_memory"):
+                        snapshot["gpu_reserved_mb"] = float(
+                            mps.driver_allocated_memory() / 1024 / 1024
+                        )
+    except Exception:
+        pass
+    if len(snapshot) <= 2:
+        return {}
+    return snapshot
+def summarize_memory_snapshots(
+    snapshots: list[dict[str, Any]],
+) -> dict[str, float]:
+    """Summarize memory snapshots into peak metrics."""
+    def _peak(key: str) -> float | None:
+        values: list[float] = []
+        for entry in snapshots:
+            if not isinstance(entry, dict):
+                continue
+            value = entry.get(key)
+            if isinstance(value, int | float):
+                values.append(float(value))
+        return max(values) if values else None
+    summary: dict[str, float] = {}
+    rss_peak = _peak("rss_mb")
+    if rss_peak is not None:
+        summary["memory_mb_peak"] = rss_peak
+    gpu_peak = _peak("gpu_peak_mb")
+    if gpu_peak is None:
+        gpu_peak = _peak("gpu_mb")
+    if gpu_peak is not None:
+        summary["gpu_memory_mb_peak"] = gpu_peak
+    gpu_reserved_peak = _peak("gpu_peak_reserved_mb")
+    if gpu_reserved_peak is None:
+        gpu_reserved_peak = _peak("gpu_reserved_mb")
+    if gpu_reserved_peak is not None:
+        summary["gpu_memory_reserved_mb_peak"] = gpu_reserved_peak
+    return summary

invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

invarlock 0.3.6py3-none-any.whl → 0.3.7py3-none-any.whl