invarlock 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +33 -0
- invarlock/__main__.py +10 -0
- invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
- invarlock/_data/runtime/profiles/release.yaml +23 -0
- invarlock/_data/runtime/tiers.yaml +76 -0
- invarlock/adapters/__init__.py +102 -0
- invarlock/adapters/_capabilities.py +45 -0
- invarlock/adapters/auto.py +99 -0
- invarlock/adapters/base.py +530 -0
- invarlock/adapters/base_types.py +85 -0
- invarlock/adapters/hf_bert.py +852 -0
- invarlock/adapters/hf_gpt2.py +403 -0
- invarlock/adapters/hf_llama.py +485 -0
- invarlock/adapters/hf_mixin.py +383 -0
- invarlock/adapters/hf_onnx.py +112 -0
- invarlock/adapters/hf_t5.py +137 -0
- invarlock/adapters/py.typed +1 -0
- invarlock/assurance/__init__.py +43 -0
- invarlock/cli/__init__.py +8 -0
- invarlock/cli/__main__.py +8 -0
- invarlock/cli/_evidence.py +25 -0
- invarlock/cli/_json.py +75 -0
- invarlock/cli/adapter_auto.py +162 -0
- invarlock/cli/app.py +287 -0
- invarlock/cli/commands/__init__.py +26 -0
- invarlock/cli/commands/certify.py +403 -0
- invarlock/cli/commands/doctor.py +1358 -0
- invarlock/cli/commands/explain_gates.py +151 -0
- invarlock/cli/commands/export_html.py +100 -0
- invarlock/cli/commands/plugins.py +1331 -0
- invarlock/cli/commands/report.py +354 -0
- invarlock/cli/commands/run.py +4146 -0
- invarlock/cli/commands/verify.py +1040 -0
- invarlock/cli/config.py +396 -0
- invarlock/cli/constants.py +68 -0
- invarlock/cli/device.py +92 -0
- invarlock/cli/doctor_helpers.py +74 -0
- invarlock/cli/errors.py +6 -0
- invarlock/cli/overhead_utils.py +60 -0
- invarlock/cli/provenance.py +66 -0
- invarlock/cli/utils.py +41 -0
- invarlock/config.py +56 -0
- invarlock/core/__init__.py +62 -0
- invarlock/core/abi.py +15 -0
- invarlock/core/api.py +274 -0
- invarlock/core/auto_tuning.py +317 -0
- invarlock/core/bootstrap.py +226 -0
- invarlock/core/checkpoint.py +221 -0
- invarlock/core/contracts.py +73 -0
- invarlock/core/error_utils.py +64 -0
- invarlock/core/events.py +298 -0
- invarlock/core/exceptions.py +95 -0
- invarlock/core/registry.py +481 -0
- invarlock/core/retry.py +146 -0
- invarlock/core/runner.py +2041 -0
- invarlock/core/types.py +154 -0
- invarlock/edits/__init__.py +12 -0
- invarlock/edits/_edit_utils.py +249 -0
- invarlock/edits/_external_utils.py +268 -0
- invarlock/edits/noop.py +47 -0
- invarlock/edits/py.typed +1 -0
- invarlock/edits/quant_rtn.py +801 -0
- invarlock/edits/registry.py +166 -0
- invarlock/eval/__init__.py +23 -0
- invarlock/eval/bench.py +1207 -0
- invarlock/eval/bootstrap.py +50 -0
- invarlock/eval/data.py +2052 -0
- invarlock/eval/metrics.py +2167 -0
- invarlock/eval/primary_metric.py +767 -0
- invarlock/eval/probes/__init__.py +24 -0
- invarlock/eval/probes/fft.py +139 -0
- invarlock/eval/probes/mi.py +213 -0
- invarlock/eval/probes/post_attention.py +323 -0
- invarlock/eval/providers/base.py +67 -0
- invarlock/eval/providers/seq2seq.py +111 -0
- invarlock/eval/providers/text_lm.py +113 -0
- invarlock/eval/providers/vision_text.py +93 -0
- invarlock/eval/py.typed +1 -0
- invarlock/guards/__init__.py +18 -0
- invarlock/guards/_contracts.py +9 -0
- invarlock/guards/invariants.py +640 -0
- invarlock/guards/policies.py +805 -0
- invarlock/guards/py.typed +1 -0
- invarlock/guards/rmt.py +2097 -0
- invarlock/guards/spectral.py +1419 -0
- invarlock/guards/tier_config.py +354 -0
- invarlock/guards/variance.py +3298 -0
- invarlock/guards_ref/__init__.py +15 -0
- invarlock/guards_ref/rmt_ref.py +40 -0
- invarlock/guards_ref/spectral_ref.py +135 -0
- invarlock/guards_ref/variance_ref.py +60 -0
- invarlock/model_profile.py +353 -0
- invarlock/model_utils.py +221 -0
- invarlock/observability/__init__.py +10 -0
- invarlock/observability/alerting.py +535 -0
- invarlock/observability/core.py +546 -0
- invarlock/observability/exporters.py +565 -0
- invarlock/observability/health.py +588 -0
- invarlock/observability/metrics.py +457 -0
- invarlock/observability/py.typed +1 -0
- invarlock/observability/utils.py +553 -0
- invarlock/plugins/__init__.py +12 -0
- invarlock/plugins/hello_guard.py +33 -0
- invarlock/plugins/hf_awq_adapter.py +82 -0
- invarlock/plugins/hf_bnb_adapter.py +79 -0
- invarlock/plugins/hf_gptq_adapter.py +78 -0
- invarlock/plugins/py.typed +1 -0
- invarlock/py.typed +1 -0
- invarlock/reporting/__init__.py +7 -0
- invarlock/reporting/certificate.py +3221 -0
- invarlock/reporting/certificate_schema.py +244 -0
- invarlock/reporting/dataset_hashing.py +215 -0
- invarlock/reporting/guards_analysis.py +948 -0
- invarlock/reporting/html.py +32 -0
- invarlock/reporting/normalizer.py +235 -0
- invarlock/reporting/policy_utils.py +517 -0
- invarlock/reporting/primary_metric_utils.py +265 -0
- invarlock/reporting/render.py +1442 -0
- invarlock/reporting/report.py +903 -0
- invarlock/reporting/report_types.py +278 -0
- invarlock/reporting/utils.py +175 -0
- invarlock/reporting/validate.py +631 -0
- invarlock/security.py +176 -0
- invarlock/sparsity_utils.py +323 -0
- invarlock/utils/__init__.py +150 -0
- invarlock/utils/digest.py +45 -0
- invarlock-0.2.0.dist-info/METADATA +586 -0
- invarlock-0.2.0.dist-info/RECORD +132 -0
- invarlock-0.2.0.dist-info/WHEEL +5 -0
- invarlock-0.2.0.dist-info/entry_points.txt +20 -0
- invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
- invarlock-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Reference decision kernels for guards (pure, side-effect-free).
|
|
2
|
+
|
|
3
|
+
Exposes small math-first helpers used by property/differential tests.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .rmt_ref import rmt_decide
|
|
7
|
+
from .spectral_ref import bh_select, spectral_decide
|
|
8
|
+
from .variance_ref import variance_decide
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"bh_select",
|
|
12
|
+
"spectral_decide",
|
|
13
|
+
"rmt_decide",
|
|
14
|
+
"variance_decide",
|
|
15
|
+
]
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def rmt_decide(
|
|
8
|
+
bare_by_family: Mapping[str, int],
|
|
9
|
+
guarded_by_family: Mapping[str, int],
|
|
10
|
+
epsilon_by_family: Mapping[str, float],
|
|
11
|
+
) -> dict[str, object]:
|
|
12
|
+
"""
|
|
13
|
+
Reference epsilon-rule decision for RMT.
|
|
14
|
+
|
|
15
|
+
Allowed excess A_f = ceil(epsilon_f * max(1, b_f)).
|
|
16
|
+
PASS iff for all families Δ_f <= A_f and sum Δ_f <= sum A_f.
|
|
17
|
+
"""
|
|
18
|
+
families = set(bare_by_family) | set(guarded_by_family) | set(epsilon_by_family)
|
|
19
|
+
delta_by_family: dict[str, int] = {}
|
|
20
|
+
allowed_by_family: dict[str, int] = {}
|
|
21
|
+
sum_delta = 0
|
|
22
|
+
sum_allowed = 0
|
|
23
|
+
for f in families:
|
|
24
|
+
b = int(bare_by_family.get(f, 0) or 0)
|
|
25
|
+
g = int(guarded_by_family.get(f, 0) or 0)
|
|
26
|
+
eps = float(epsilon_by_family.get(f, 0.0) or 0.0)
|
|
27
|
+
d = g - b
|
|
28
|
+
a = int(math.ceil(eps * max(1, b)))
|
|
29
|
+
delta_by_family[f] = d
|
|
30
|
+
allowed_by_family[f] = a
|
|
31
|
+
sum_delta += d
|
|
32
|
+
sum_allowed += a
|
|
33
|
+
ok = all(delta_by_family[f] <= allowed_by_family[f] for f in families) and (
|
|
34
|
+
sum_delta <= sum_allowed
|
|
35
|
+
)
|
|
36
|
+
return {
|
|
37
|
+
"pass": ok,
|
|
38
|
+
"delta_by_family": delta_by_family,
|
|
39
|
+
"allowed_by_family": allowed_by_family,
|
|
40
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def bh_select(pvals: list[float], alpha: float) -> list[bool]:
|
|
8
|
+
"""
|
|
9
|
+
Benjamini–Hochberg procedure. Returns boolean mask of rejections in input order.
|
|
10
|
+
|
|
11
|
+
Preconditions: 0 < alpha <= 1; p in [0,1] or NaN (NaN => reject=False).
|
|
12
|
+
"""
|
|
13
|
+
n = len(pvals)
|
|
14
|
+
if n == 0:
|
|
15
|
+
return []
|
|
16
|
+
alpha = float(alpha)
|
|
17
|
+
if not (0.0 < alpha <= 1.0):
|
|
18
|
+
# Treat invalid alpha as no rejections to be conservative
|
|
19
|
+
return [False] * n
|
|
20
|
+
|
|
21
|
+
# Sort by p-value ascending while remembering original indices
|
|
22
|
+
order = sorted(
|
|
23
|
+
range(n), key=lambda i: (float("inf") if not _finite01(pvals[i]) else pvals[i])
|
|
24
|
+
)
|
|
25
|
+
rejs_sorted = [False] * n
|
|
26
|
+
max_k = 0
|
|
27
|
+
for rank, idx in enumerate(order, start=1):
|
|
28
|
+
p = pvals[idx]
|
|
29
|
+
if not _finite01(p):
|
|
30
|
+
continue
|
|
31
|
+
threshold = (alpha * rank) / n
|
|
32
|
+
if p <= threshold:
|
|
33
|
+
max_k = rank
|
|
34
|
+
# Mark as rejected those with p <= (alpha * max_k / n)
|
|
35
|
+
if max_k > 0:
|
|
36
|
+
cutoff = (alpha * max_k) / n
|
|
37
|
+
for idx in order:
|
|
38
|
+
p = pvals[idx]
|
|
39
|
+
if _finite01(p) and p <= cutoff:
|
|
40
|
+
rejs_sorted[idx] = True
|
|
41
|
+
# Return in original order
|
|
42
|
+
return rejs_sorted
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def spectral_decide(
|
|
46
|
+
sigma_by_name: Mapping[str, float],
|
|
47
|
+
default_denom_by_name: Mapping[str, float],
|
|
48
|
+
family_of_name: Mapping[str, str],
|
|
49
|
+
deadband: float,
|
|
50
|
+
caps_by_family: Mapping[str, float],
|
|
51
|
+
mtest: Mapping[str, object] | None = None,
|
|
52
|
+
) -> dict[str, object]:
|
|
53
|
+
"""
|
|
54
|
+
Pure decision kernel for spectral guard.
|
|
55
|
+
|
|
56
|
+
- z_i = ((sigma_i / denom_i) - 1) / max(deadband, eps)
|
|
57
|
+
- p_i = Phi(|z_i|) under standard normal tail (two-sided conservative mapping)
|
|
58
|
+
- Multiple testing per method; then cap by family kappa.
|
|
59
|
+
"""
|
|
60
|
+
eps = 1e-12
|
|
61
|
+
dead = max(float(deadband or 0.0), 0.0)
|
|
62
|
+
|
|
63
|
+
names = list(sigma_by_name.keys())
|
|
64
|
+
z_by_name: dict[str, float] = {}
|
|
65
|
+
for name in names:
|
|
66
|
+
s = float(sigma_by_name.get(name, 0.0) or 0.0)
|
|
67
|
+
d = float(default_denom_by_name.get(name, 1.0) or 1.0)
|
|
68
|
+
d = d if d > 0.0 else 1.0
|
|
69
|
+
rel = (s / d) - 1.0
|
|
70
|
+
z = 0.0
|
|
71
|
+
if abs(rel) > dead:
|
|
72
|
+
z = rel / max(dead, eps)
|
|
73
|
+
z_by_name[name] = z
|
|
74
|
+
|
|
75
|
+
# Map to two-sided p-values via complementary error function (normal)
|
|
76
|
+
# p = 2 * (1 - Phi(|z|)) = erfc(|z| / sqrt(2))
|
|
77
|
+
try:
|
|
78
|
+
import math as _m
|
|
79
|
+
|
|
80
|
+
def _p(z: float) -> float:
|
|
81
|
+
return float(_m.erfc(abs(z) / math.sqrt(2.0)))
|
|
82
|
+
except Exception:
|
|
83
|
+
|
|
84
|
+
def _p(z: float) -> float: # pragma: no cover
|
|
85
|
+
return 1.0
|
|
86
|
+
|
|
87
|
+
pvals = [_p(z_by_name[n]) for n in names]
|
|
88
|
+
method_obj = (mtest or {}).get("method", "bh")
|
|
89
|
+
method = str(method_obj).lower()
|
|
90
|
+
alpha_obj = (mtest or {}).get("alpha", 0.05)
|
|
91
|
+
try:
|
|
92
|
+
alpha = float(alpha_obj) # type: ignore[arg-type]
|
|
93
|
+
except Exception:
|
|
94
|
+
alpha = 0.05
|
|
95
|
+
if method in {"bh", "benjamini-hochberg", "benjamini_hochberg"}:
|
|
96
|
+
rejects = bh_select(pvals, alpha)
|
|
97
|
+
elif method in {"bonferroni"}:
|
|
98
|
+
cutoff = alpha / max(1, len(pvals))
|
|
99
|
+
rejects = [bool(p <= cutoff) if _finite01(p) else False for p in pvals]
|
|
100
|
+
else:
|
|
101
|
+
# Unknown method: conservative
|
|
102
|
+
rejects = [False] * len(pvals)
|
|
103
|
+
|
|
104
|
+
# Apply per-family caps (kappa) after selection: greedily keep top-|z| per family up to kappa
|
|
105
|
+
fam_map = {n: str(family_of_name.get(n, "other")) for n in names}
|
|
106
|
+
selected: list[str] = []
|
|
107
|
+
per_family_counts: dict[str, int] = {}
|
|
108
|
+
# Sort by |z| descending; pick among rejected set
|
|
109
|
+
for name in sorted(names, key=lambda n: abs(z_by_name[n]), reverse=True):
|
|
110
|
+
if not rejects[names.index(name)]:
|
|
111
|
+
continue
|
|
112
|
+
fam = fam_map[name]
|
|
113
|
+
kappa = float(caps_by_family.get(fam, float("inf")) or float("inf"))
|
|
114
|
+
curr = per_family_counts.get(fam, 0)
|
|
115
|
+
if curr < int(math.ceil(kappa)):
|
|
116
|
+
per_family_counts[fam] = curr + 1
|
|
117
|
+
selected.append(name)
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
"pass": len(selected) == 0,
|
|
121
|
+
"selected": selected,
|
|
122
|
+
"z_by_name": z_by_name,
|
|
123
|
+
"per_family_counts": per_family_counts,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _finite01(p: float) -> bool:
|
|
128
|
+
try:
|
|
129
|
+
return (
|
|
130
|
+
(isinstance(p, int | float))
|
|
131
|
+
and math.isfinite(p)
|
|
132
|
+
and (0.0 <= float(p) <= 1.0)
|
|
133
|
+
)
|
|
134
|
+
except Exception:
|
|
135
|
+
return False
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def variance_decide(
|
|
5
|
+
mean_delta: float,
|
|
6
|
+
ci: tuple[float, float] | list[float],
|
|
7
|
+
direction: str, # "lower" or "higher" is better
|
|
8
|
+
min_effect: float,
|
|
9
|
+
predictive_one_sided: bool,
|
|
10
|
+
) -> dict[str, object]:
|
|
11
|
+
"""
|
|
12
|
+
Reference predictive gate decision.
|
|
13
|
+
|
|
14
|
+
For direction=="lower", negative deltas are improvements (Δ<0 better).
|
|
15
|
+
For direction=="higher", flip sign so that improvements are treated consistently.
|
|
16
|
+
"""
|
|
17
|
+
if not (isinstance(ci, tuple | list) and len(ci) == 2):
|
|
18
|
+
return {"evaluated": False, "pass": True, "reason": "ci_unavailable"}
|
|
19
|
+
lo, hi = float(ci[0]), float(ci[1])
|
|
20
|
+
mu = float(mean_delta)
|
|
21
|
+
me = float(min_effect or 0.0)
|
|
22
|
+
|
|
23
|
+
dir_norm = (direction or "lower").strip().lower()
|
|
24
|
+
# Normalize to "lower is better" frame
|
|
25
|
+
if dir_norm == "higher":
|
|
26
|
+
mu = -mu
|
|
27
|
+
lo, hi = -hi, -lo
|
|
28
|
+
|
|
29
|
+
# One-sided vs two-sided enablement semantics
|
|
30
|
+
if predictive_one_sided:
|
|
31
|
+
# Production parity: evaluate with one-sided criteria (no strict 0-exclusion required)
|
|
32
|
+
evaluated = True
|
|
33
|
+
if mu >= 0.0:
|
|
34
|
+
return {
|
|
35
|
+
"evaluated": evaluated,
|
|
36
|
+
"pass": False,
|
|
37
|
+
"reason": "mean_not_negative",
|
|
38
|
+
}
|
|
39
|
+
if me > 0.0 and (-mu) < me:
|
|
40
|
+
return {
|
|
41
|
+
"evaluated": evaluated,
|
|
42
|
+
"pass": False,
|
|
43
|
+
"reason": "gain_below_threshold",
|
|
44
|
+
}
|
|
45
|
+
if lo >= 0.0:
|
|
46
|
+
return {"evaluated": evaluated, "pass": False, "reason": "ci_contains_zero"}
|
|
47
|
+
return {"evaluated": evaluated, "pass": True, "reason": "ci_gain_met"}
|
|
48
|
+
|
|
49
|
+
# Two-sided enablement requires strict exclusion of 0 and sufficient effect
|
|
50
|
+
evaluated = (lo <= hi) and (abs(mu) >= me) and not (lo <= 0.0 <= hi)
|
|
51
|
+
if not evaluated:
|
|
52
|
+
return {"evaluated": False, "pass": True, "reason": "not_evaluated"}
|
|
53
|
+
|
|
54
|
+
# Two-sided: require CI strictly below zero with gain >= min_effect
|
|
55
|
+
if hi >= 0.0:
|
|
56
|
+
return {"evaluated": True, "pass": False, "reason": "ci_contains_zero"}
|
|
57
|
+
gain_lower = -hi
|
|
58
|
+
if gain_lower < me:
|
|
59
|
+
return {"evaluated": True, "pass": False, "reason": "gain_below_threshold"}
|
|
60
|
+
return {"evaluated": True, "pass": True, "reason": "ci_gain_met"}
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from transformers import AutoTokenizer, GPT2Tokenizer
|
|
10
|
+
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
|
11
|
+
except Exception: # pragma: no cover - exercised only when transformers is absent
|
|
12
|
+
AutoTokenizer = None # type: ignore[assignment]
|
|
13
|
+
GPT2Tokenizer = None # type: ignore[assignment]
|
|
14
|
+
|
|
15
|
+
class PreTrainedTokenizerBase: # type: ignore[no-redef]
|
|
16
|
+
"""Lightweight stub used when transformers is not installed."""
|
|
17
|
+
|
|
18
|
+
def __call__(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
|
|
19
|
+
raise RuntimeError(
|
|
20
|
+
"Tokenization requires the 'transformers' extra. "
|
|
21
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _hash_tokenizer(tokenizer: PreTrainedTokenizerBase) -> str:
|
|
29
|
+
try:
|
|
30
|
+
if hasattr(tokenizer, "get_vocab"):
|
|
31
|
+
vocab_mapping = tokenizer.get_vocab()
|
|
32
|
+
else:
|
|
33
|
+
vocab_mapping = getattr(tokenizer, "vocab", {})
|
|
34
|
+
if hasattr(vocab_mapping, "items"):
|
|
35
|
+
vocab_items = list(vocab_mapping.items())
|
|
36
|
+
else:
|
|
37
|
+
vocab_items = []
|
|
38
|
+
except Exception:
|
|
39
|
+
vocab_items = []
|
|
40
|
+
|
|
41
|
+
hasher = hashlib.blake2s(digest_size=16)
|
|
42
|
+
try:
|
|
43
|
+
for token, idx in sorted(vocab_items, key=lambda x: x[0]):
|
|
44
|
+
token_str = token if isinstance(token, str) else str(token)
|
|
45
|
+
hasher.update(token_str.encode("utf-8", "ignore"))
|
|
46
|
+
try:
|
|
47
|
+
hasher.update(int(idx).to_bytes(4, "little", signed=False))
|
|
48
|
+
except Exception:
|
|
49
|
+
hasher.update(str(idx).encode("utf-8", "ignore"))
|
|
50
|
+
except Exception:
|
|
51
|
+
return "unknown"
|
|
52
|
+
|
|
53
|
+
hasher.update(tokenizer.__class__.__name__.encode("utf-8", "ignore"))
|
|
54
|
+
name_path = getattr(tokenizer, "name_or_path", "")
|
|
55
|
+
hasher.update(str(name_path).encode("utf-8", "ignore"))
|
|
56
|
+
return hasher.hexdigest()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass(frozen=True)
|
|
60
|
+
class ModelProfile:
|
|
61
|
+
"""Captured capabilities for a recognised model family."""
|
|
62
|
+
|
|
63
|
+
family: str
|
|
64
|
+
default_loss: str
|
|
65
|
+
make_tokenizer: TokenizerFactory
|
|
66
|
+
default_metric: str = "ppl_causal"
|
|
67
|
+
# Must correspond to a registered provider in invarlock.eval.data.get_provider
|
|
68
|
+
default_provider: str = "wikitext2"
|
|
69
|
+
module_selectors: dict[str, list[str]] = field(default_factory=dict)
|
|
70
|
+
invariants: tuple[str, ...] = ()
|
|
71
|
+
cert_lints: tuple[dict[str, str], ...] = ()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _bert_selectors() -> dict[str, list[str]]:
|
|
75
|
+
return {
|
|
76
|
+
"attention": [
|
|
77
|
+
"attention.self.query",
|
|
78
|
+
"attention.self.key",
|
|
79
|
+
"attention.self.value",
|
|
80
|
+
"attention.output.dense",
|
|
81
|
+
],
|
|
82
|
+
"ffn": [
|
|
83
|
+
"intermediate.dense",
|
|
84
|
+
"output.dense",
|
|
85
|
+
],
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _gpt2_selectors() -> dict[str, list[str]]:
|
|
90
|
+
return {
|
|
91
|
+
"attention": [
|
|
92
|
+
"attn.c_attn",
|
|
93
|
+
"attn.c_proj",
|
|
94
|
+
],
|
|
95
|
+
"ffn": [
|
|
96
|
+
"mlp.c_fc",
|
|
97
|
+
"mlp.c_proj",
|
|
98
|
+
],
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _llama_selectors() -> dict[str, list[str]]:
|
|
103
|
+
return {
|
|
104
|
+
"attention": [
|
|
105
|
+
"self_attn.q_proj",
|
|
106
|
+
"self_attn.k_proj",
|
|
107
|
+
"self_attn.v_proj",
|
|
108
|
+
"self_attn.o_proj",
|
|
109
|
+
],
|
|
110
|
+
"ffn": [
|
|
111
|
+
"mlp.up_proj",
|
|
112
|
+
"mlp.down_proj",
|
|
113
|
+
"mlp.gate_proj",
|
|
114
|
+
],
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _unknown_selectors() -> dict[str, list[str]]:
|
|
119
|
+
return {
|
|
120
|
+
"attention": ["attention"],
|
|
121
|
+
"ffn": [],
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _make_bert_tokenizer(model_id: str):
|
|
126
|
+
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
127
|
+
if AutoTokenizer is None:
|
|
128
|
+
raise RuntimeError(
|
|
129
|
+
"BERT tokenizers require the 'transformers' extra. "
|
|
130
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
131
|
+
)
|
|
132
|
+
# Prefer offline/local cache first to respect network guard
|
|
133
|
+
tokenizer: PreTrainedTokenizerBase | None = None
|
|
134
|
+
try:
|
|
135
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
|
|
136
|
+
except Exception:
|
|
137
|
+
# Try a common local BERT if specific model is not cached
|
|
138
|
+
try:
|
|
139
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
140
|
+
"bert-base-uncased", local_files_only=True
|
|
141
|
+
)
|
|
142
|
+
except Exception:
|
|
143
|
+
# If network is permitted, attempt remote fetch; otherwise propagate
|
|
144
|
+
try:
|
|
145
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
146
|
+
except Exception:
|
|
147
|
+
tokenizer = None
|
|
148
|
+
if tokenizer is None:
|
|
149
|
+
raise RuntimeError(
|
|
150
|
+
"Unable to load a BERT tokenizer locally. Set INVARLOCK_ALLOW_NETWORK=1 "
|
|
151
|
+
"to allow fetching from the Hugging Face Hub, or pre-cache a BERT tokenizer."
|
|
152
|
+
)
|
|
153
|
+
if getattr(tokenizer, "mask_token", None) is None:
|
|
154
|
+
raise ValueError(
|
|
155
|
+
f"Tokenizer for '{model_id}' does not expose [MASK]; cannot run MLM evaluation."
|
|
156
|
+
)
|
|
157
|
+
if getattr(tokenizer, "pad_token", None) is None:
|
|
158
|
+
for candidate in (
|
|
159
|
+
getattr(tokenizer, "sep_token", None),
|
|
160
|
+
getattr(tokenizer, "cls_token", None),
|
|
161
|
+
):
|
|
162
|
+
if candidate is not None:
|
|
163
|
+
tokenizer.pad_token = candidate
|
|
164
|
+
break
|
|
165
|
+
hash_value = _hash_tokenizer(tokenizer)
|
|
166
|
+
return tokenizer, hash_value
|
|
167
|
+
|
|
168
|
+
return factory
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _make_gpt2_tokenizer(model_id: str):
|
|
172
|
+
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
173
|
+
if GPT2Tokenizer is None:
|
|
174
|
+
raise RuntimeError(
|
|
175
|
+
"GPT-2 tokenizers require the 'transformers' extra. "
|
|
176
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
177
|
+
)
|
|
178
|
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
179
|
+
if tokenizer.pad_token is None:
|
|
180
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
181
|
+
hash_value = _hash_tokenizer(tokenizer)
|
|
182
|
+
return tokenizer, hash_value
|
|
183
|
+
|
|
184
|
+
return factory
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _make_llama_tokenizer(model_id: str):
|
|
188
|
+
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
189
|
+
if AutoTokenizer is None and GPT2Tokenizer is None:
|
|
190
|
+
raise RuntimeError(
|
|
191
|
+
"LLaMA-style tokenizers require the 'transformers' extra. "
|
|
192
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
193
|
+
)
|
|
194
|
+
# Try offline-first to respect InvarLock network guard; fall back to a
|
|
195
|
+
# local GPT-2 tokenizer if the model assets are not cached or network
|
|
196
|
+
# access is denied.
|
|
197
|
+
try:
|
|
198
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
|
|
199
|
+
except Exception:
|
|
200
|
+
try:
|
|
201
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
202
|
+
except Exception:
|
|
203
|
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
204
|
+
# Ensure padding/bos tokens are configured so downstream encoding
|
|
205
|
+
# yields stable non-zero ids and a valid attention mask regardless of
|
|
206
|
+
# environment defaults or tokenizer variants.
|
|
207
|
+
# Prefer EOS as pad token when no explicit pad token is defined.
|
|
208
|
+
if getattr(tokenizer, "pad_token", None) is None:
|
|
209
|
+
eos_token = getattr(tokenizer, "eos_token", None)
|
|
210
|
+
if eos_token is not None:
|
|
211
|
+
tokenizer.pad_token = eos_token
|
|
212
|
+
# Some LLaMA tokenizers default to not adding a BOS token on encode;
|
|
213
|
+
# enable it to guarantee at least one non-pad, non-zero token id.
|
|
214
|
+
if hasattr(tokenizer, "add_bos_token"):
|
|
215
|
+
try:
|
|
216
|
+
tokenizer.add_bos_token = True
|
|
217
|
+
except Exception:
|
|
218
|
+
pass
|
|
219
|
+
if getattr(tokenizer, "pad_token", None) is None:
|
|
220
|
+
raise ValueError(
|
|
221
|
+
f"Tokenizer for '{model_id}' does not define a pad token and no EOS fallback is available."
|
|
222
|
+
)
|
|
223
|
+
hash_value = _hash_tokenizer(tokenizer)
|
|
224
|
+
return tokenizer, hash_value
|
|
225
|
+
|
|
226
|
+
return factory
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _make_unknown_tokenizer(model_id: str):
|
|
230
|
+
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
231
|
+
if AutoTokenizer is None and GPT2Tokenizer is None:
|
|
232
|
+
raise RuntimeError(
|
|
233
|
+
"Text tokenization requires the 'transformers' extra. "
|
|
234
|
+
"Install it with: pip install 'invarlock[adapters]'."
|
|
235
|
+
)
|
|
236
|
+
# Unknown families: try local-only first, then remote, then degrade to GPT-2
|
|
237
|
+
try:
|
|
238
|
+
tokenizer: PreTrainedTokenizerBase = AutoTokenizer.from_pretrained(
|
|
239
|
+
model_id, local_files_only=True
|
|
240
|
+
)
|
|
241
|
+
except Exception:
|
|
242
|
+
try:
|
|
243
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
244
|
+
except Exception:
|
|
245
|
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
246
|
+
if getattr(tokenizer, "pad_token", None) is None:
|
|
247
|
+
eos_token = getattr(tokenizer, "eos_token", None)
|
|
248
|
+
if eos_token is not None:
|
|
249
|
+
tokenizer.pad_token = eos_token
|
|
250
|
+
hash_value = _hash_tokenizer(tokenizer)
|
|
251
|
+
return tokenizer, hash_value
|
|
252
|
+
|
|
253
|
+
return factory
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProfile:
|
|
257
|
+
"""
|
|
258
|
+
Infer the model family and provide profile metadata used for evaluation.
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
adapter_lower = (adapter or "").lower()
|
|
262
|
+
model_lower = (model_id or "").lower()
|
|
263
|
+
|
|
264
|
+
if any(
|
|
265
|
+
keyword in adapter_lower for keyword in ("bert", "roberta", "deberta")
|
|
266
|
+
) or any(keyword in model_lower for keyword in ("bert", "roberta", "deberta")):
|
|
267
|
+
return ModelProfile(
|
|
268
|
+
family="bert",
|
|
269
|
+
default_loss="mlm",
|
|
270
|
+
make_tokenizer=_make_bert_tokenizer(model_id),
|
|
271
|
+
default_metric="ppl_mlm",
|
|
272
|
+
default_provider="hf_text",
|
|
273
|
+
module_selectors=_bert_selectors(),
|
|
274
|
+
invariants=("mlm_mask_alignment",),
|
|
275
|
+
cert_lints=(
|
|
276
|
+
{
|
|
277
|
+
"type": "equals",
|
|
278
|
+
"path": "metrics.loss_type",
|
|
279
|
+
"value": "mlm",
|
|
280
|
+
"message": "BERT cert must record MLM loss type.",
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
"type": "gte",
|
|
284
|
+
"path": "metrics.masked_tokens_total",
|
|
285
|
+
"value": "1",
|
|
286
|
+
"message": "BERT cert must report masked tokens.",
|
|
287
|
+
},
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
if any(keyword in adapter_lower for keyword in ("llama", "mistral", "qwen")) or any(
|
|
292
|
+
keyword in model_lower for keyword in ("llama", "mistral", "qwen")
|
|
293
|
+
):
|
|
294
|
+
return ModelProfile(
|
|
295
|
+
family="llama",
|
|
296
|
+
default_loss="causal",
|
|
297
|
+
make_tokenizer=_make_llama_tokenizer(model_id),
|
|
298
|
+
default_metric="ppl_causal",
|
|
299
|
+
default_provider="wikitext2",
|
|
300
|
+
module_selectors=_llama_selectors(),
|
|
301
|
+
invariants=("rope_rotary_embedding",),
|
|
302
|
+
cert_lints=(
|
|
303
|
+
{
|
|
304
|
+
"type": "equals",
|
|
305
|
+
"path": "metrics.loss_type",
|
|
306
|
+
"value": "causal",
|
|
307
|
+
"message": "LLaMA cert should report causal loss.",
|
|
308
|
+
},
|
|
309
|
+
),
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if any(
|
|
313
|
+
keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
|
|
314
|
+
) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
|
|
315
|
+
return ModelProfile(
|
|
316
|
+
family="gpt2",
|
|
317
|
+
default_loss="causal",
|
|
318
|
+
make_tokenizer=_make_gpt2_tokenizer(model_id),
|
|
319
|
+
default_metric="ppl_causal",
|
|
320
|
+
default_provider="wikitext2",
|
|
321
|
+
module_selectors=_gpt2_selectors(),
|
|
322
|
+
invariants=("causal_masking",),
|
|
323
|
+
cert_lints=(
|
|
324
|
+
{
|
|
325
|
+
"type": "equals",
|
|
326
|
+
"path": "metrics.loss_type",
|
|
327
|
+
"value": "causal",
|
|
328
|
+
"message": "GPT-style cert should record causal loss.",
|
|
329
|
+
},
|
|
330
|
+
),
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
return ModelProfile(
|
|
334
|
+
family="unknown",
|
|
335
|
+
default_loss="causal",
|
|
336
|
+
make_tokenizer=_make_unknown_tokenizer(model_id),
|
|
337
|
+
default_metric="ppl_causal",
|
|
338
|
+
default_provider="wikitext2",
|
|
339
|
+
module_selectors=_unknown_selectors(),
|
|
340
|
+
invariants=(),
|
|
341
|
+
cert_lints=(),
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def resolve_tokenizer(profile: ModelProfile) -> tuple[PreTrainedTokenizerBase, str]:
|
|
346
|
+
"""
|
|
347
|
+
Instantiate a tokenizer for the given profile and return it with its hash.
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
tokenizer, hash_value = profile.make_tokenizer()
|
|
351
|
+
if not isinstance(hash_value, str) or not hash_value:
|
|
352
|
+
hash_value = _hash_tokenizer(tokenizer)
|
|
353
|
+
return tokenizer, hash_value
|