invarlock 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +33 -0
- invarlock/__main__.py +10 -0
- invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
- invarlock/_data/runtime/profiles/release.yaml +23 -0
- invarlock/_data/runtime/tiers.yaml +76 -0
- invarlock/adapters/__init__.py +102 -0
- invarlock/adapters/_capabilities.py +45 -0
- invarlock/adapters/auto.py +99 -0
- invarlock/adapters/base.py +530 -0
- invarlock/adapters/base_types.py +85 -0
- invarlock/adapters/hf_bert.py +852 -0
- invarlock/adapters/hf_gpt2.py +403 -0
- invarlock/adapters/hf_llama.py +485 -0
- invarlock/adapters/hf_mixin.py +383 -0
- invarlock/adapters/hf_onnx.py +112 -0
- invarlock/adapters/hf_t5.py +137 -0
- invarlock/adapters/py.typed +1 -0
- invarlock/assurance/__init__.py +43 -0
- invarlock/cli/__init__.py +8 -0
- invarlock/cli/__main__.py +8 -0
- invarlock/cli/_evidence.py +25 -0
- invarlock/cli/_json.py +75 -0
- invarlock/cli/adapter_auto.py +162 -0
- invarlock/cli/app.py +287 -0
- invarlock/cli/commands/__init__.py +26 -0
- invarlock/cli/commands/certify.py +403 -0
- invarlock/cli/commands/doctor.py +1358 -0
- invarlock/cli/commands/explain_gates.py +151 -0
- invarlock/cli/commands/export_html.py +100 -0
- invarlock/cli/commands/plugins.py +1331 -0
- invarlock/cli/commands/report.py +354 -0
- invarlock/cli/commands/run.py +4146 -0
- invarlock/cli/commands/verify.py +1040 -0
- invarlock/cli/config.py +396 -0
- invarlock/cli/constants.py +68 -0
- invarlock/cli/device.py +92 -0
- invarlock/cli/doctor_helpers.py +74 -0
- invarlock/cli/errors.py +6 -0
- invarlock/cli/overhead_utils.py +60 -0
- invarlock/cli/provenance.py +66 -0
- invarlock/cli/utils.py +41 -0
- invarlock/config.py +56 -0
- invarlock/core/__init__.py +62 -0
- invarlock/core/abi.py +15 -0
- invarlock/core/api.py +274 -0
- invarlock/core/auto_tuning.py +317 -0
- invarlock/core/bootstrap.py +226 -0
- invarlock/core/checkpoint.py +221 -0
- invarlock/core/contracts.py +73 -0
- invarlock/core/error_utils.py +64 -0
- invarlock/core/events.py +298 -0
- invarlock/core/exceptions.py +95 -0
- invarlock/core/registry.py +481 -0
- invarlock/core/retry.py +146 -0
- invarlock/core/runner.py +2041 -0
- invarlock/core/types.py +154 -0
- invarlock/edits/__init__.py +12 -0
- invarlock/edits/_edit_utils.py +249 -0
- invarlock/edits/_external_utils.py +268 -0
- invarlock/edits/noop.py +47 -0
- invarlock/edits/py.typed +1 -0
- invarlock/edits/quant_rtn.py +801 -0
- invarlock/edits/registry.py +166 -0
- invarlock/eval/__init__.py +23 -0
- invarlock/eval/bench.py +1207 -0
- invarlock/eval/bootstrap.py +50 -0
- invarlock/eval/data.py +2052 -0
- invarlock/eval/metrics.py +2167 -0
- invarlock/eval/primary_metric.py +767 -0
- invarlock/eval/probes/__init__.py +24 -0
- invarlock/eval/probes/fft.py +139 -0
- invarlock/eval/probes/mi.py +213 -0
- invarlock/eval/probes/post_attention.py +323 -0
- invarlock/eval/providers/base.py +67 -0
- invarlock/eval/providers/seq2seq.py +111 -0
- invarlock/eval/providers/text_lm.py +113 -0
- invarlock/eval/providers/vision_text.py +93 -0
- invarlock/eval/py.typed +1 -0
- invarlock/guards/__init__.py +18 -0
- invarlock/guards/_contracts.py +9 -0
- invarlock/guards/invariants.py +640 -0
- invarlock/guards/policies.py +805 -0
- invarlock/guards/py.typed +1 -0
- invarlock/guards/rmt.py +2097 -0
- invarlock/guards/spectral.py +1419 -0
- invarlock/guards/tier_config.py +354 -0
- invarlock/guards/variance.py +3298 -0
- invarlock/guards_ref/__init__.py +15 -0
- invarlock/guards_ref/rmt_ref.py +40 -0
- invarlock/guards_ref/spectral_ref.py +135 -0
- invarlock/guards_ref/variance_ref.py +60 -0
- invarlock/model_profile.py +353 -0
- invarlock/model_utils.py +221 -0
- invarlock/observability/__init__.py +10 -0
- invarlock/observability/alerting.py +535 -0
- invarlock/observability/core.py +546 -0
- invarlock/observability/exporters.py +565 -0
- invarlock/observability/health.py +588 -0
- invarlock/observability/metrics.py +457 -0
- invarlock/observability/py.typed +1 -0
- invarlock/observability/utils.py +553 -0
- invarlock/plugins/__init__.py +12 -0
- invarlock/plugins/hello_guard.py +33 -0
- invarlock/plugins/hf_awq_adapter.py +82 -0
- invarlock/plugins/hf_bnb_adapter.py +79 -0
- invarlock/plugins/hf_gptq_adapter.py +78 -0
- invarlock/plugins/py.typed +1 -0
- invarlock/py.typed +1 -0
- invarlock/reporting/__init__.py +7 -0
- invarlock/reporting/certificate.py +3221 -0
- invarlock/reporting/certificate_schema.py +244 -0
- invarlock/reporting/dataset_hashing.py +215 -0
- invarlock/reporting/guards_analysis.py +948 -0
- invarlock/reporting/html.py +32 -0
- invarlock/reporting/normalizer.py +235 -0
- invarlock/reporting/policy_utils.py +517 -0
- invarlock/reporting/primary_metric_utils.py +265 -0
- invarlock/reporting/render.py +1442 -0
- invarlock/reporting/report.py +903 -0
- invarlock/reporting/report_types.py +278 -0
- invarlock/reporting/utils.py +175 -0
- invarlock/reporting/validate.py +631 -0
- invarlock/security.py +176 -0
- invarlock/sparsity_utils.py +323 -0
- invarlock/utils/__init__.py +150 -0
- invarlock/utils/digest.py +45 -0
- invarlock-0.2.0.dist-info/METADATA +586 -0
- invarlock-0.2.0.dist-info/RECORD +132 -0
- invarlock-0.2.0.dist-info/WHEEL +5 -0
- invarlock-0.2.0.dist-info/entry_points.txt +20 -0
- invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
- invarlock-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
EvaluationProvider base Protocol (Phase 1 scaffold).
|
|
3
|
+
|
|
4
|
+
Providers encapsulate dataset/task specifics (pairing, masking, transforms),
|
|
5
|
+
exposing a stable schedule and a reproducibility digest so metrics can be
|
|
6
|
+
computed in a task-agnostic way.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections.abc import Iterable
|
|
12
|
+
from typing import Any, Protocol
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def deterministic_worker_init_fn(worker_id: int, *, base_seed: int = 0) -> None:
|
|
16
|
+
"""Best-effort deterministic worker initializer.
|
|
17
|
+
|
|
18
|
+
Sets RNG seeds for `random`, `numpy`, and `torch` (if available) using a
|
|
19
|
+
stable derivation from `base_seed` and `worker_id`.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
import random
|
|
23
|
+
|
|
24
|
+
random.seed((base_seed ^ (worker_id + 17)) & 0x7FFFFFFF)
|
|
25
|
+
except Exception:
|
|
26
|
+
pass
|
|
27
|
+
try:
|
|
28
|
+
import numpy as _np
|
|
29
|
+
|
|
30
|
+
_np.random.seed(((base_seed + 97) ^ (worker_id * 131)) & 0x7FFFFFFF)
|
|
31
|
+
except Exception:
|
|
32
|
+
pass
|
|
33
|
+
try: # pragma: no cover - torch may be unavailable in CI
|
|
34
|
+
import torch as _torch
|
|
35
|
+
|
|
36
|
+
_torch.manual_seed((base_seed * 1009 + worker_id * 7919) & 0x7FFFFFFF)
|
|
37
|
+
if hasattr(_torch.cuda, "manual_seed_all"):
|
|
38
|
+
_torch.cuda.manual_seed_all(
|
|
39
|
+
(base_seed * 1013 + worker_id * 7951) & 0x7FFFFFFF
|
|
40
|
+
)
|
|
41
|
+
except Exception:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def deterministic_shards(n: int, *, num_workers: int) -> list[list[int]]:
|
|
46
|
+
"""Return a deterministic partition of `range(n)` across `num_workers` workers.
|
|
47
|
+
|
|
48
|
+
Uses simple modulo-based assignment to ensure stable sharding independent of
|
|
49
|
+
process scheduling. `num_workers <= 1` yields a single shard with all items.
|
|
50
|
+
"""
|
|
51
|
+
if num_workers is None or num_workers <= 1:
|
|
52
|
+
return [list(range(n))]
|
|
53
|
+
shards: list[list[int]] = [[] for _ in range(int(num_workers))]
|
|
54
|
+
for i in range(int(n)):
|
|
55
|
+
shards[i % int(num_workers)].append(i)
|
|
56
|
+
return shards
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class EvaluationProvider(Protocol):
|
|
60
|
+
def pairing_schedule(self) -> list[str]:
|
|
61
|
+
"""Return a stable, sorted list of example IDs used for pairing."""
|
|
62
|
+
|
|
63
|
+
def digest(self) -> dict[str, Any]:
|
|
64
|
+
"""Return a reproducibility digest (tokenizer/masking/transform hashes)."""
|
|
65
|
+
|
|
66
|
+
def batches(self, *, seed: int, batch_size: int) -> Iterable[dict[str, Any]]:
|
|
67
|
+
"""Yield task-appropriate batches (input tensors/labels/masks)."""
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Seq2Seq provider (Phase 2 scaffold).
|
|
3
|
+
|
|
4
|
+
Future implementation will stream paired (encoder_inputs, decoder_labels) with
|
|
5
|
+
stable example IDs and a digest of tokenization/EOS policies.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Iterable
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from .base import EvaluationProvider
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Seq2SeqProvider(EvaluationProvider):
|
|
17
|
+
"""Deterministic synthetic seq2seq provider for tests and smokes.
|
|
18
|
+
|
|
19
|
+
Args (kwargs):
|
|
20
|
+
n: number of examples (default: 12)
|
|
21
|
+
src_len: source length (default: 6)
|
|
22
|
+
tgt_len: target length (default: 7)
|
|
23
|
+
pad_id: pad token id (default: 0)
|
|
24
|
+
bos_id: BOS id (default: 1)
|
|
25
|
+
eos_id: EOS id (default: 2)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
29
|
+
self._n = int(kwargs.get("n", 12))
|
|
30
|
+
self._src_len = int(kwargs.get("src_len", 6))
|
|
31
|
+
self._tgt_len = int(kwargs.get("tgt_len", 7))
|
|
32
|
+
self._pad_id = int(kwargs.get("pad_id", 0))
|
|
33
|
+
self._bos_id = int(kwargs.get("bos_id", 1))
|
|
34
|
+
self._eos_id = int(kwargs.get("eos_id", 2))
|
|
35
|
+
self._ids: list[str] = []
|
|
36
|
+
|
|
37
|
+
def pairing_schedule(self) -> list[str]:
|
|
38
|
+
return (
|
|
39
|
+
sorted(self._ids) if self._ids else [f"ex{i:04d}" for i in range(self._n)]
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def digest(self) -> dict[str, Any]:
|
|
43
|
+
return {
|
|
44
|
+
"provider": "seq2seq",
|
|
45
|
+
"version": 1,
|
|
46
|
+
"pad_id": self._pad_id,
|
|
47
|
+
"eos_id": self._eos_id,
|
|
48
|
+
"bos_id": self._bos_id,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def _gen_example(self, idx: int, *, seed: int) -> dict[str, Any]:
|
|
52
|
+
import random
|
|
53
|
+
|
|
54
|
+
rng = random.Random((seed + 17) ^ (idx * 97))
|
|
55
|
+
# Source: BOS + tokens + PAD
|
|
56
|
+
src_real = max(3, self._src_len - (idx % 2))
|
|
57
|
+
src_ids = (
|
|
58
|
+
[self._bos_id]
|
|
59
|
+
+ [1 + rng.randint(0, 19) for _ in range(src_real - 2)]
|
|
60
|
+
+ [self._eos_id]
|
|
61
|
+
)
|
|
62
|
+
if src_real < self._src_len:
|
|
63
|
+
src_ids += [self._pad_id] * (self._src_len - src_real)
|
|
64
|
+
src_mask = [1 if t != self._pad_id else 0 for t in src_ids]
|
|
65
|
+
|
|
66
|
+
# Target: tokens ending with EOS and padding
|
|
67
|
+
tgt_real = max(3, self._tgt_len - (idx % 3))
|
|
68
|
+
tgt_ids = [1 + rng.randint(0, 19) for _ in range(tgt_real - 1)] + [self._eos_id]
|
|
69
|
+
if tgt_real < self._tgt_len:
|
|
70
|
+
tgt_ids += [self._pad_id] * (self._tgt_len - tgt_real)
|
|
71
|
+
tgt_mask = [1 if t != self._pad_id else 0 for t in tgt_ids]
|
|
72
|
+
|
|
73
|
+
ex_id = f"ex{idx:04d}"
|
|
74
|
+
weights = sum(1 for t, m in zip(tgt_ids, tgt_mask, strict=False) if m)
|
|
75
|
+
return {
|
|
76
|
+
"ids": ex_id,
|
|
77
|
+
"src_ids": src_ids,
|
|
78
|
+
"src_mask": src_mask,
|
|
79
|
+
"tgt_ids": tgt_ids,
|
|
80
|
+
"tgt_mask": tgt_mask,
|
|
81
|
+
"weights": int(weights),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def batches(self, *, seed: int, batch_size: int) -> Iterable[dict[str, Any]]:
|
|
85
|
+
assert batch_size > 0
|
|
86
|
+
batch = {
|
|
87
|
+
"ids": [],
|
|
88
|
+
"src_ids": [],
|
|
89
|
+
"src_mask": [],
|
|
90
|
+
"tgt_ids": [],
|
|
91
|
+
"tgt_mask": [],
|
|
92
|
+
"weights": [],
|
|
93
|
+
}
|
|
94
|
+
self._ids = []
|
|
95
|
+
for i in range(self._n):
|
|
96
|
+
ex = self._gen_example(i, seed=seed)
|
|
97
|
+
self._ids.append(ex["ids"])
|
|
98
|
+
for k in ("ids", "src_ids", "src_mask", "tgt_ids", "tgt_mask", "weights"):
|
|
99
|
+
batch[k].append(ex[k])
|
|
100
|
+
if len(batch["ids"]) >= batch_size:
|
|
101
|
+
yield batch
|
|
102
|
+
batch = {
|
|
103
|
+
"ids": [],
|
|
104
|
+
"src_ids": [],
|
|
105
|
+
"src_mask": [],
|
|
106
|
+
"tgt_ids": [],
|
|
107
|
+
"tgt_mask": [],
|
|
108
|
+
"weights": [],
|
|
109
|
+
}
|
|
110
|
+
if batch["ids"]:
|
|
111
|
+
yield batch
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Text LM provider (Phase 2 scaffold).
|
|
3
|
+
|
|
4
|
+
This module will house a provider that encapsulates dataset/tokenizer logic for
|
|
5
|
+
language modeling tasks. For now, it serves as a placeholder aligning with the
|
|
6
|
+
EvaluationProvider protocol.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from collections.abc import Iterable
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from .base import EvaluationProvider
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TextLMProvider(EvaluationProvider):
|
|
18
|
+
"""Deterministic synthetic text LM provider for tests and smokes.
|
|
19
|
+
|
|
20
|
+
Args (kwargs):
|
|
21
|
+
task: 'causal' or 'mlm' (default: 'causal')
|
|
22
|
+
n: number of examples (default: 16)
|
|
23
|
+
seq_len: length of each sequence (default: 8)
|
|
24
|
+
mask_prob: MLM mask probability (default: 0.15)
|
|
25
|
+
pad_id: padding token id (default: 0)
|
|
26
|
+
eos_id: end-of-sequence id (default: 2)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, **kwargs: Any) -> None:
|
|
30
|
+
self._task = str(kwargs.get("task", "causal")).lower()
|
|
31
|
+
self._n = int(kwargs.get("n", 16))
|
|
32
|
+
self._seq_len = int(kwargs.get("seq_len", 8))
|
|
33
|
+
self._mask_prob = float(kwargs.get("mask_prob", 0.15))
|
|
34
|
+
self._pad_id = int(kwargs.get("pad_id", 0))
|
|
35
|
+
self._eos_id = int(kwargs.get("eos_id", 2))
|
|
36
|
+
self._ids: list[str] = []
|
|
37
|
+
|
|
38
|
+
def pairing_schedule(self) -> list[str]:
|
|
39
|
+
return (
|
|
40
|
+
sorted(self._ids) if self._ids else [f"ex{i:04d}" for i in range(self._n)]
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def digest(self) -> dict[str, Any]:
|
|
44
|
+
return {"provider": "text_lm", "version": 1, "task": self._task}
|
|
45
|
+
|
|
46
|
+
def _gen_example(self, idx: int, *, seed: int) -> dict[str, Any]:
|
|
47
|
+
import random
|
|
48
|
+
|
|
49
|
+
rng = random.Random((seed + 31) ^ (idx * 131))
|
|
50
|
+
# Generate a simple pattern with EOS at end and some pads
|
|
51
|
+
real_len = max(3, self._seq_len - (idx % 3))
|
|
52
|
+
ids = [1 + (rng.randint(0, 19)) for _ in range(real_len - 1)] + [self._eos_id]
|
|
53
|
+
if real_len < self._seq_len:
|
|
54
|
+
ids = ids + [self._pad_id] * (self._seq_len - real_len)
|
|
55
|
+
attn = [1 if t != self._pad_id else 0 for t in ids]
|
|
56
|
+
ex_id = f"ex{idx:04d}"
|
|
57
|
+
|
|
58
|
+
labels: list[int] | None = None
|
|
59
|
+
weights = sum(attn)
|
|
60
|
+
if self._task == "mlm":
|
|
61
|
+
labels = [-100] * len(ids)
|
|
62
|
+
masked = 0
|
|
63
|
+
for pos, (tok, m) in enumerate(zip(ids, attn, strict=False)):
|
|
64
|
+
if not m or tok in (self._pad_id, self._eos_id):
|
|
65
|
+
continue
|
|
66
|
+
# Deterministic mask using rng per-position
|
|
67
|
+
rng2 = random.Random((seed + idx * 17 + pos * 13) & 0x7FFFFFFF)
|
|
68
|
+
if rng2.random() < self._mask_prob:
|
|
69
|
+
labels[pos] = tok
|
|
70
|
+
masked += 1
|
|
71
|
+
if masked == 0:
|
|
72
|
+
# Ensure at least one label to avoid degenerate windows
|
|
73
|
+
for pos, (tok, m) in enumerate(zip(ids, attn, strict=False)):
|
|
74
|
+
if m and tok not in (self._pad_id, self._eos_id):
|
|
75
|
+
labels[pos] = tok
|
|
76
|
+
masked = 1
|
|
77
|
+
break
|
|
78
|
+
weights = masked
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
"ids": ex_id,
|
|
82
|
+
"input_ids": ids,
|
|
83
|
+
"attention_mask": attn,
|
|
84
|
+
"labels": labels if labels is not None else [],
|
|
85
|
+
"weights": int(weights),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
def batches(self, *, seed: int, batch_size: int) -> Iterable[dict[str, Any]]:
|
|
89
|
+
assert batch_size > 0
|
|
90
|
+
batch: dict[str, Any] = {
|
|
91
|
+
"ids": [],
|
|
92
|
+
"input_ids": [],
|
|
93
|
+
"attention_mask": [],
|
|
94
|
+
"labels": [],
|
|
95
|
+
"weights": [],
|
|
96
|
+
}
|
|
97
|
+
self._ids = []
|
|
98
|
+
for i in range(self._n):
|
|
99
|
+
ex = self._gen_example(i, seed=seed)
|
|
100
|
+
self._ids.append(ex["ids"])
|
|
101
|
+
for k in ("ids", "input_ids", "attention_mask", "labels", "weights"):
|
|
102
|
+
batch[k].append(ex[k])
|
|
103
|
+
if len(batch["ids"]) >= batch_size:
|
|
104
|
+
yield batch
|
|
105
|
+
batch = {
|
|
106
|
+
"ids": [],
|
|
107
|
+
"input_ids": [],
|
|
108
|
+
"attention_mask": [],
|
|
109
|
+
"labels": [],
|
|
110
|
+
"weights": [],
|
|
111
|
+
}
|
|
112
|
+
if batch["ids"]:
|
|
113
|
+
yield batch
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vision-Text provider (Phase 4): deterministic pairing and reproducibility digest.
|
|
3
|
+
|
|
4
|
+
This lightweight provider focuses on stable IDs and a reproducibility digest for
|
|
5
|
+
multimodal (image+text) tasks such as image captioning and VQA. It does not
|
|
6
|
+
perform actual batching or transforms; those belong to adapters/inference.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
from collections.abc import Iterable, Sequence
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from .base import EvaluationProvider
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _sha256_hex(data: bytes) -> str:
|
|
19
|
+
h = hashlib.sha256()
|
|
20
|
+
h.update(data)
|
|
21
|
+
return h.hexdigest()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class VisionTextProvider(EvaluationProvider):
|
|
25
|
+
"""
|
|
26
|
+
Minimal provider that exposes:
|
|
27
|
+
- a stable pairing schedule (sorted example IDs), and
|
|
28
|
+
- a digest with ids and image hashes plus transform pipeline string.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
items: sequence of records with at least keys {"id": str, "image_bytes": bytes}
|
|
32
|
+
transform_pipeline: human-readable transform pipeline description
|
|
33
|
+
seed: integer seed recorded for determinism breadcrumbs
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
*,
|
|
39
|
+
items: Sequence[dict[str, Any]],
|
|
40
|
+
transform_pipeline: str = "",
|
|
41
|
+
seed: int | None = None,
|
|
42
|
+
) -> None:
|
|
43
|
+
self._items = list(items)
|
|
44
|
+
self._transform_pipeline = str(transform_pipeline)
|
|
45
|
+
self._seed = int(seed) if seed is not None else None
|
|
46
|
+
|
|
47
|
+
# Build sorted schedule once
|
|
48
|
+
try:
|
|
49
|
+
self._schedule: list[str] = sorted(str(x["id"]) for x in self._items)
|
|
50
|
+
except Exception: # pragma: no cover - defensive
|
|
51
|
+
self._schedule = []
|
|
52
|
+
|
|
53
|
+
# Precompute digest components
|
|
54
|
+
ids_concat = "".join(self._schedule).encode()
|
|
55
|
+
self._ids_sha256 = _sha256_hex(ids_concat)
|
|
56
|
+
# Compute per-image hashes in schedule order; missing bytes treated as empty
|
|
57
|
+
per_img_hashes = []
|
|
58
|
+
for rec_id in self._schedule:
|
|
59
|
+
# find the record with matching id (first match)
|
|
60
|
+
img_bytes = b""
|
|
61
|
+
for rec in self._items:
|
|
62
|
+
if str(rec.get("id")) == rec_id:
|
|
63
|
+
ib = rec.get("image_bytes")
|
|
64
|
+
if isinstance(ib, bytes | bytearray):
|
|
65
|
+
img_bytes = bytes(ib)
|
|
66
|
+
break
|
|
67
|
+
per_img_hashes.append(_sha256_hex(img_bytes).encode())
|
|
68
|
+
self._images_sha256 = _sha256_hex(b"".join(per_img_hashes))
|
|
69
|
+
|
|
70
|
+
def pairing_schedule(self) -> list[str]:
|
|
71
|
+
return list(self._schedule)
|
|
72
|
+
|
|
73
|
+
def digest(self) -> dict[str, Any]:
|
|
74
|
+
d: dict[str, Any] = {
|
|
75
|
+
"provider": "vision_text",
|
|
76
|
+
"version": 1,
|
|
77
|
+
"ids_sha256": self._ids_sha256,
|
|
78
|
+
"images_sha256": self._images_sha256,
|
|
79
|
+
"transform_pipeline": self._transform_pipeline,
|
|
80
|
+
}
|
|
81
|
+
if self._seed is not None:
|
|
82
|
+
d["seed"] = int(self._seed)
|
|
83
|
+
return d
|
|
84
|
+
|
|
85
|
+
def batches(
|
|
86
|
+
self, *, seed: int, batch_size: int
|
|
87
|
+
) -> Iterable[dict[str, Any]]: # pragma: no cover - not used in Phase 4 tests
|
|
88
|
+
raise NotImplementedError(
|
|
89
|
+
"VisionTextProvider batches are adapter/task-specific and not implemented here"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
__all__ = ["VisionTextProvider"]
|
invarlock/eval/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Marker file for PEP 561 type hints
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Guard namespace (`invarlock.guards`) re-exporting built-in guards."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from invarlock.core.abi import INVARLOCK_CORE_ABI as INVARLOCK_CORE_ABI
|
|
6
|
+
|
|
7
|
+
from .invariants import InvariantsGuard
|
|
8
|
+
from .rmt import RMTGuard
|
|
9
|
+
from .spectral import SpectralGuard
|
|
10
|
+
from .variance import VarianceGuard
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"InvariantsGuard",
|
|
14
|
+
"SpectralGuard",
|
|
15
|
+
"VarianceGuard",
|
|
16
|
+
"RMTGuard",
|
|
17
|
+
"INVARLOCK_CORE_ABI",
|
|
18
|
+
]
|