invarlock 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. invarlock/__init__.py +33 -0
  2. invarlock/__main__.py +10 -0
  3. invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
  4. invarlock/_data/runtime/profiles/release.yaml +23 -0
  5. invarlock/_data/runtime/tiers.yaml +76 -0
  6. invarlock/adapters/__init__.py +102 -0
  7. invarlock/adapters/_capabilities.py +45 -0
  8. invarlock/adapters/auto.py +99 -0
  9. invarlock/adapters/base.py +530 -0
  10. invarlock/adapters/base_types.py +85 -0
  11. invarlock/adapters/hf_bert.py +852 -0
  12. invarlock/adapters/hf_gpt2.py +403 -0
  13. invarlock/adapters/hf_llama.py +485 -0
  14. invarlock/adapters/hf_mixin.py +383 -0
  15. invarlock/adapters/hf_onnx.py +112 -0
  16. invarlock/adapters/hf_t5.py +137 -0
  17. invarlock/adapters/py.typed +1 -0
  18. invarlock/assurance/__init__.py +43 -0
  19. invarlock/cli/__init__.py +8 -0
  20. invarlock/cli/__main__.py +8 -0
  21. invarlock/cli/_evidence.py +25 -0
  22. invarlock/cli/_json.py +75 -0
  23. invarlock/cli/adapter_auto.py +162 -0
  24. invarlock/cli/app.py +287 -0
  25. invarlock/cli/commands/__init__.py +26 -0
  26. invarlock/cli/commands/certify.py +403 -0
  27. invarlock/cli/commands/doctor.py +1358 -0
  28. invarlock/cli/commands/explain_gates.py +151 -0
  29. invarlock/cli/commands/export_html.py +100 -0
  30. invarlock/cli/commands/plugins.py +1331 -0
  31. invarlock/cli/commands/report.py +354 -0
  32. invarlock/cli/commands/run.py +4146 -0
  33. invarlock/cli/commands/verify.py +1040 -0
  34. invarlock/cli/config.py +396 -0
  35. invarlock/cli/constants.py +68 -0
  36. invarlock/cli/device.py +92 -0
  37. invarlock/cli/doctor_helpers.py +74 -0
  38. invarlock/cli/errors.py +6 -0
  39. invarlock/cli/overhead_utils.py +60 -0
  40. invarlock/cli/provenance.py +66 -0
  41. invarlock/cli/utils.py +41 -0
  42. invarlock/config.py +56 -0
  43. invarlock/core/__init__.py +62 -0
  44. invarlock/core/abi.py +15 -0
  45. invarlock/core/api.py +274 -0
  46. invarlock/core/auto_tuning.py +317 -0
  47. invarlock/core/bootstrap.py +226 -0
  48. invarlock/core/checkpoint.py +221 -0
  49. invarlock/core/contracts.py +73 -0
  50. invarlock/core/error_utils.py +64 -0
  51. invarlock/core/events.py +298 -0
  52. invarlock/core/exceptions.py +95 -0
  53. invarlock/core/registry.py +481 -0
  54. invarlock/core/retry.py +146 -0
  55. invarlock/core/runner.py +2041 -0
  56. invarlock/core/types.py +154 -0
  57. invarlock/edits/__init__.py +12 -0
  58. invarlock/edits/_edit_utils.py +249 -0
  59. invarlock/edits/_external_utils.py +268 -0
  60. invarlock/edits/noop.py +47 -0
  61. invarlock/edits/py.typed +1 -0
  62. invarlock/edits/quant_rtn.py +801 -0
  63. invarlock/edits/registry.py +166 -0
  64. invarlock/eval/__init__.py +23 -0
  65. invarlock/eval/bench.py +1207 -0
  66. invarlock/eval/bootstrap.py +50 -0
  67. invarlock/eval/data.py +2052 -0
  68. invarlock/eval/metrics.py +2167 -0
  69. invarlock/eval/primary_metric.py +767 -0
  70. invarlock/eval/probes/__init__.py +24 -0
  71. invarlock/eval/probes/fft.py +139 -0
  72. invarlock/eval/probes/mi.py +213 -0
  73. invarlock/eval/probes/post_attention.py +323 -0
  74. invarlock/eval/providers/base.py +67 -0
  75. invarlock/eval/providers/seq2seq.py +111 -0
  76. invarlock/eval/providers/text_lm.py +113 -0
  77. invarlock/eval/providers/vision_text.py +93 -0
  78. invarlock/eval/py.typed +1 -0
  79. invarlock/guards/__init__.py +18 -0
  80. invarlock/guards/_contracts.py +9 -0
  81. invarlock/guards/invariants.py +640 -0
  82. invarlock/guards/policies.py +805 -0
  83. invarlock/guards/py.typed +1 -0
  84. invarlock/guards/rmt.py +2097 -0
  85. invarlock/guards/spectral.py +1419 -0
  86. invarlock/guards/tier_config.py +354 -0
  87. invarlock/guards/variance.py +3298 -0
  88. invarlock/guards_ref/__init__.py +15 -0
  89. invarlock/guards_ref/rmt_ref.py +40 -0
  90. invarlock/guards_ref/spectral_ref.py +135 -0
  91. invarlock/guards_ref/variance_ref.py +60 -0
  92. invarlock/model_profile.py +353 -0
  93. invarlock/model_utils.py +221 -0
  94. invarlock/observability/__init__.py +10 -0
  95. invarlock/observability/alerting.py +535 -0
  96. invarlock/observability/core.py +546 -0
  97. invarlock/observability/exporters.py +565 -0
  98. invarlock/observability/health.py +588 -0
  99. invarlock/observability/metrics.py +457 -0
  100. invarlock/observability/py.typed +1 -0
  101. invarlock/observability/utils.py +553 -0
  102. invarlock/plugins/__init__.py +12 -0
  103. invarlock/plugins/hello_guard.py +33 -0
  104. invarlock/plugins/hf_awq_adapter.py +82 -0
  105. invarlock/plugins/hf_bnb_adapter.py +79 -0
  106. invarlock/plugins/hf_gptq_adapter.py +78 -0
  107. invarlock/plugins/py.typed +1 -0
  108. invarlock/py.typed +1 -0
  109. invarlock/reporting/__init__.py +7 -0
  110. invarlock/reporting/certificate.py +3221 -0
  111. invarlock/reporting/certificate_schema.py +244 -0
  112. invarlock/reporting/dataset_hashing.py +215 -0
  113. invarlock/reporting/guards_analysis.py +948 -0
  114. invarlock/reporting/html.py +32 -0
  115. invarlock/reporting/normalizer.py +235 -0
  116. invarlock/reporting/policy_utils.py +517 -0
  117. invarlock/reporting/primary_metric_utils.py +265 -0
  118. invarlock/reporting/render.py +1442 -0
  119. invarlock/reporting/report.py +903 -0
  120. invarlock/reporting/report_types.py +278 -0
  121. invarlock/reporting/utils.py +175 -0
  122. invarlock/reporting/validate.py +631 -0
  123. invarlock/security.py +176 -0
  124. invarlock/sparsity_utils.py +323 -0
  125. invarlock/utils/__init__.py +150 -0
  126. invarlock/utils/digest.py +45 -0
  127. invarlock-0.2.0.dist-info/METADATA +586 -0
  128. invarlock-0.2.0.dist-info/RECORD +132 -0
  129. invarlock-0.2.0.dist-info/WHEEL +5 -0
  130. invarlock-0.2.0.dist-info/entry_points.txt +20 -0
  131. invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
  132. invarlock-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,67 @@
1
+ """
2
+ EvaluationProvider base Protocol (Phase 1 scaffold).
3
+
4
+ Providers encapsulate dataset/task specifics (pairing, masking, transforms),
5
+ exposing a stable schedule and a reproducibility digest so metrics can be
6
+ computed in a task-agnostic way.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Iterable
12
+ from typing import Any, Protocol
13
+
14
+
15
+ def deterministic_worker_init_fn(worker_id: int, *, base_seed: int = 0) -> None:
16
+ """Best-effort deterministic worker initializer.
17
+
18
+ Sets RNG seeds for `random`, `numpy`, and `torch` (if available) using a
19
+ stable derivation from `base_seed` and `worker_id`.
20
+ """
21
+ try:
22
+ import random
23
+
24
+ random.seed((base_seed ^ (worker_id + 17)) & 0x7FFFFFFF)
25
+ except Exception:
26
+ pass
27
+ try:
28
+ import numpy as _np
29
+
30
+ _np.random.seed(((base_seed + 97) ^ (worker_id * 131)) & 0x7FFFFFFF)
31
+ except Exception:
32
+ pass
33
+ try: # pragma: no cover - torch may be unavailable in CI
34
+ import torch as _torch
35
+
36
+ _torch.manual_seed((base_seed * 1009 + worker_id * 7919) & 0x7FFFFFFF)
37
+ if hasattr(_torch.cuda, "manual_seed_all"):
38
+ _torch.cuda.manual_seed_all(
39
+ (base_seed * 1013 + worker_id * 7951) & 0x7FFFFFFF
40
+ )
41
+ except Exception:
42
+ pass
43
+
44
+
45
+ def deterministic_shards(n: int, *, num_workers: int) -> list[list[int]]:
46
+ """Return a deterministic partition of `range(n)` across `num_workers` workers.
47
+
48
+ Uses simple modulo-based assignment to ensure stable sharding independent of
49
+ process scheduling. `num_workers <= 1` yields a single shard with all items.
50
+ """
51
+ if num_workers is None or num_workers <= 1:
52
+ return [list(range(n))]
53
+ shards: list[list[int]] = [[] for _ in range(int(num_workers))]
54
+ for i in range(int(n)):
55
+ shards[i % int(num_workers)].append(i)
56
+ return shards
57
+
58
+
59
+ class EvaluationProvider(Protocol):
60
+ def pairing_schedule(self) -> list[str]:
61
+ """Return a stable, sorted list of example IDs used for pairing."""
62
+
63
+ def digest(self) -> dict[str, Any]:
64
+ """Return a reproducibility digest (tokenizer/masking/transform hashes)."""
65
+
66
+ def batches(self, *, seed: int, batch_size: int) -> Iterable[dict[str, Any]]:
67
+ """Yield task-appropriate batches (input tensors/labels/masks)."""
@@ -0,0 +1,111 @@
1
+ """
2
+ Seq2Seq provider (Phase 2 scaffold).
3
+
4
+ Future implementation will stream paired (encoder_inputs, decoder_labels) with
5
+ stable example IDs and a digest of tokenization/EOS policies.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Iterable
11
+ from typing import Any
12
+
13
+ from .base import EvaluationProvider
14
+
15
+
16
+ class Seq2SeqProvider(EvaluationProvider):
17
+ """Deterministic synthetic seq2seq provider for tests and smokes.
18
+
19
+ Args (kwargs):
20
+ n: number of examples (default: 12)
21
+ src_len: source length (default: 6)
22
+ tgt_len: target length (default: 7)
23
+ pad_id: pad token id (default: 0)
24
+ bos_id: BOS id (default: 1)
25
+ eos_id: EOS id (default: 2)
26
+ """
27
+
28
+ def __init__(self, **kwargs: Any) -> None:
29
+ self._n = int(kwargs.get("n", 12))
30
+ self._src_len = int(kwargs.get("src_len", 6))
31
+ self._tgt_len = int(kwargs.get("tgt_len", 7))
32
+ self._pad_id = int(kwargs.get("pad_id", 0))
33
+ self._bos_id = int(kwargs.get("bos_id", 1))
34
+ self._eos_id = int(kwargs.get("eos_id", 2))
35
+ self._ids: list[str] = []
36
+
37
+ def pairing_schedule(self) -> list[str]:
38
+ return (
39
+ sorted(self._ids) if self._ids else [f"ex{i:04d}" for i in range(self._n)]
40
+ )
41
+
42
+ def digest(self) -> dict[str, Any]:
43
+ return {
44
+ "provider": "seq2seq",
45
+ "version": 1,
46
+ "pad_id": self._pad_id,
47
+ "eos_id": self._eos_id,
48
+ "bos_id": self._bos_id,
49
+ }
50
+
51
+ def _gen_example(self, idx: int, *, seed: int) -> dict[str, Any]:
52
+ import random
53
+
54
+ rng = random.Random((seed + 17) ^ (idx * 97))
55
+ # Source: BOS + tokens + PAD
56
+ src_real = max(3, self._src_len - (idx % 2))
57
+ src_ids = (
58
+ [self._bos_id]
59
+ + [1 + rng.randint(0, 19) for _ in range(src_real - 2)]
60
+ + [self._eos_id]
61
+ )
62
+ if src_real < self._src_len:
63
+ src_ids += [self._pad_id] * (self._src_len - src_real)
64
+ src_mask = [1 if t != self._pad_id else 0 for t in src_ids]
65
+
66
+ # Target: tokens ending with EOS and padding
67
+ tgt_real = max(3, self._tgt_len - (idx % 3))
68
+ tgt_ids = [1 + rng.randint(0, 19) for _ in range(tgt_real - 1)] + [self._eos_id]
69
+ if tgt_real < self._tgt_len:
70
+ tgt_ids += [self._pad_id] * (self._tgt_len - tgt_real)
71
+ tgt_mask = [1 if t != self._pad_id else 0 for t in tgt_ids]
72
+
73
+ ex_id = f"ex{idx:04d}"
74
+ weights = sum(1 for t, m in zip(tgt_ids, tgt_mask, strict=False) if m)
75
+ return {
76
+ "ids": ex_id,
77
+ "src_ids": src_ids,
78
+ "src_mask": src_mask,
79
+ "tgt_ids": tgt_ids,
80
+ "tgt_mask": tgt_mask,
81
+ "weights": int(weights),
82
+ }
83
+
84
+ def batches(self, *, seed: int, batch_size: int) -> Iterable[dict[str, Any]]:
85
+ assert batch_size > 0
86
+ batch = {
87
+ "ids": [],
88
+ "src_ids": [],
89
+ "src_mask": [],
90
+ "tgt_ids": [],
91
+ "tgt_mask": [],
92
+ "weights": [],
93
+ }
94
+ self._ids = []
95
+ for i in range(self._n):
96
+ ex = self._gen_example(i, seed=seed)
97
+ self._ids.append(ex["ids"])
98
+ for k in ("ids", "src_ids", "src_mask", "tgt_ids", "tgt_mask", "weights"):
99
+ batch[k].append(ex[k])
100
+ if len(batch["ids"]) >= batch_size:
101
+ yield batch
102
+ batch = {
103
+ "ids": [],
104
+ "src_ids": [],
105
+ "src_mask": [],
106
+ "tgt_ids": [],
107
+ "tgt_mask": [],
108
+ "weights": [],
109
+ }
110
+ if batch["ids"]:
111
+ yield batch
@@ -0,0 +1,113 @@
1
+ """
2
+ Text LM provider (Phase 2 scaffold).
3
+
4
+ This module will house a provider that encapsulates dataset/tokenizer logic for
5
+ language modeling tasks. For now, it serves as a placeholder aligning with the
6
+ EvaluationProvider protocol.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Iterable
12
+ from typing import Any
13
+
14
+ from .base import EvaluationProvider
15
+
16
+
17
+ class TextLMProvider(EvaluationProvider):
18
+ """Deterministic synthetic text LM provider for tests and smokes.
19
+
20
+ Args (kwargs):
21
+ task: 'causal' or 'mlm' (default: 'causal')
22
+ n: number of examples (default: 16)
23
+ seq_len: length of each sequence (default: 8)
24
+ mask_prob: MLM mask probability (default: 0.15)
25
+ pad_id: padding token id (default: 0)
26
+ eos_id: end-of-sequence id (default: 2)
27
+ """
28
+
29
+ def __init__(self, **kwargs: Any) -> None:
30
+ self._task = str(kwargs.get("task", "causal")).lower()
31
+ self._n = int(kwargs.get("n", 16))
32
+ self._seq_len = int(kwargs.get("seq_len", 8))
33
+ self._mask_prob = float(kwargs.get("mask_prob", 0.15))
34
+ self._pad_id = int(kwargs.get("pad_id", 0))
35
+ self._eos_id = int(kwargs.get("eos_id", 2))
36
+ self._ids: list[str] = []
37
+
38
+ def pairing_schedule(self) -> list[str]:
39
+ return (
40
+ sorted(self._ids) if self._ids else [f"ex{i:04d}" for i in range(self._n)]
41
+ )
42
+
43
+ def digest(self) -> dict[str, Any]:
44
+ return {"provider": "text_lm", "version": 1, "task": self._task}
45
+
46
+ def _gen_example(self, idx: int, *, seed: int) -> dict[str, Any]:
47
+ import random
48
+
49
+ rng = random.Random((seed + 31) ^ (idx * 131))
50
+ # Generate a simple pattern with EOS at end and some pads
51
+ real_len = max(3, self._seq_len - (idx % 3))
52
+ ids = [1 + (rng.randint(0, 19)) for _ in range(real_len - 1)] + [self._eos_id]
53
+ if real_len < self._seq_len:
54
+ ids = ids + [self._pad_id] * (self._seq_len - real_len)
55
+ attn = [1 if t != self._pad_id else 0 for t in ids]
56
+ ex_id = f"ex{idx:04d}"
57
+
58
+ labels: list[int] | None = None
59
+ weights = sum(attn)
60
+ if self._task == "mlm":
61
+ labels = [-100] * len(ids)
62
+ masked = 0
63
+ for pos, (tok, m) in enumerate(zip(ids, attn, strict=False)):
64
+ if not m or tok in (self._pad_id, self._eos_id):
65
+ continue
66
+ # Deterministic mask using rng per-position
67
+ rng2 = random.Random((seed + idx * 17 + pos * 13) & 0x7FFFFFFF)
68
+ if rng2.random() < self._mask_prob:
69
+ labels[pos] = tok
70
+ masked += 1
71
+ if masked == 0:
72
+ # Ensure at least one label to avoid degenerate windows
73
+ for pos, (tok, m) in enumerate(zip(ids, attn, strict=False)):
74
+ if m and tok not in (self._pad_id, self._eos_id):
75
+ labels[pos] = tok
76
+ masked = 1
77
+ break
78
+ weights = masked
79
+
80
+ return {
81
+ "ids": ex_id,
82
+ "input_ids": ids,
83
+ "attention_mask": attn,
84
+ "labels": labels if labels is not None else [],
85
+ "weights": int(weights),
86
+ }
87
+
88
+ def batches(self, *, seed: int, batch_size: int) -> Iterable[dict[str, Any]]:
89
+ assert batch_size > 0
90
+ batch: dict[str, Any] = {
91
+ "ids": [],
92
+ "input_ids": [],
93
+ "attention_mask": [],
94
+ "labels": [],
95
+ "weights": [],
96
+ }
97
+ self._ids = []
98
+ for i in range(self._n):
99
+ ex = self._gen_example(i, seed=seed)
100
+ self._ids.append(ex["ids"])
101
+ for k in ("ids", "input_ids", "attention_mask", "labels", "weights"):
102
+ batch[k].append(ex[k])
103
+ if len(batch["ids"]) >= batch_size:
104
+ yield batch
105
+ batch = {
106
+ "ids": [],
107
+ "input_ids": [],
108
+ "attention_mask": [],
109
+ "labels": [],
110
+ "weights": [],
111
+ }
112
+ if batch["ids"]:
113
+ yield batch
@@ -0,0 +1,93 @@
1
+ """
2
+ Vision-Text provider (Phase 4): deterministic pairing and reproducibility digest.
3
+
4
+ This lightweight provider focuses on stable IDs and a reproducibility digest for
5
+ multimodal (image+text) tasks such as image captioning and VQA. It does not
6
+ perform actual batching or transforms; those belong to adapters/inference.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ from collections.abc import Iterable, Sequence
13
+ from typing import Any
14
+
15
+ from .base import EvaluationProvider
16
+
17
+
18
+ def _sha256_hex(data: bytes) -> str:
19
+ h = hashlib.sha256()
20
+ h.update(data)
21
+ return h.hexdigest()
22
+
23
+
24
+ class VisionTextProvider(EvaluationProvider):
25
+ """
26
+ Minimal provider that exposes:
27
+ - a stable pairing schedule (sorted example IDs), and
28
+ - a digest with ids and image hashes plus transform pipeline string.
29
+
30
+ Args:
31
+ items: sequence of records with at least keys {"id": str, "image_bytes": bytes}
32
+ transform_pipeline: human-readable transform pipeline description
33
+ seed: integer seed recorded for determinism breadcrumbs
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ *,
39
+ items: Sequence[dict[str, Any]],
40
+ transform_pipeline: str = "",
41
+ seed: int | None = None,
42
+ ) -> None:
43
+ self._items = list(items)
44
+ self._transform_pipeline = str(transform_pipeline)
45
+ self._seed = int(seed) if seed is not None else None
46
+
47
+ # Build sorted schedule once
48
+ try:
49
+ self._schedule: list[str] = sorted(str(x["id"]) for x in self._items)
50
+ except Exception: # pragma: no cover - defensive
51
+ self._schedule = []
52
+
53
+ # Precompute digest components
54
+ ids_concat = "".join(self._schedule).encode()
55
+ self._ids_sha256 = _sha256_hex(ids_concat)
56
+ # Compute per-image hashes in schedule order; missing bytes treated as empty
57
+ per_img_hashes = []
58
+ for rec_id in self._schedule:
59
+ # find the record with matching id (first match)
60
+ img_bytes = b""
61
+ for rec in self._items:
62
+ if str(rec.get("id")) == rec_id:
63
+ ib = rec.get("image_bytes")
64
+ if isinstance(ib, bytes | bytearray):
65
+ img_bytes = bytes(ib)
66
+ break
67
+ per_img_hashes.append(_sha256_hex(img_bytes).encode())
68
+ self._images_sha256 = _sha256_hex(b"".join(per_img_hashes))
69
+
70
+ def pairing_schedule(self) -> list[str]:
71
+ return list(self._schedule)
72
+
73
+ def digest(self) -> dict[str, Any]:
74
+ d: dict[str, Any] = {
75
+ "provider": "vision_text",
76
+ "version": 1,
77
+ "ids_sha256": self._ids_sha256,
78
+ "images_sha256": self._images_sha256,
79
+ "transform_pipeline": self._transform_pipeline,
80
+ }
81
+ if self._seed is not None:
82
+ d["seed"] = int(self._seed)
83
+ return d
84
+
85
+ def batches(
86
+ self, *, seed: int, batch_size: int
87
+ ) -> Iterable[dict[str, Any]]: # pragma: no cover - not used in Phase 4 tests
88
+ raise NotImplementedError(
89
+ "VisionTextProvider batches are adapter/task-specific and not implemented here"
90
+ )
91
+
92
+
93
+ __all__ = ["VisionTextProvider"]
@@ -0,0 +1 @@
1
+ # Marker file for PEP 561 type hints
@@ -0,0 +1,18 @@
1
+ """Guard namespace (`invarlock.guards`) re-exporting built-in guards."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from invarlock.core.abi import INVARLOCK_CORE_ABI as INVARLOCK_CORE_ABI
6
+
7
+ from .invariants import InvariantsGuard
8
+ from .rmt import RMTGuard
9
+ from .spectral import SpectralGuard
10
+ from .variance import VarianceGuard
11
+
12
+ __all__ = [
13
+ "InvariantsGuard",
14
+ "SpectralGuard",
15
+ "VarianceGuard",
16
+ "RMTGuard",
17
+ "INVARLOCK_CORE_ABI",
18
+ ]
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+
6
+ def guard_assert(cond: bool, msg: str) -> None:
7
+ """Enable lightweight runtime contracts when INVARLOCK_ASSERT_GUARDS=1."""
8
+ if os.getenv("INVARLOCK_ASSERT_GUARDS", "0") == "1":
9
+ assert bool(cond), msg