invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +4 -4
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +37 -50
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +53 -9
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +32 -26
- invarlock/cli/app.py +128 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/evaluate.py +986 -0
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +326 -92
- invarlock/cli/commands/run.py +1160 -228
- invarlock/cli/commands/verify.py +157 -97
- invarlock/cli/config.py +1 -1
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +4 -4
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +112 -26
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +67 -39
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +14 -10
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/primary_metric.py +1 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/spectral.py +1 -1
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +64 -62
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +61 -11
- invarlock/reporting/normalizer.py +9 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +876 -510
- invarlock/reporting/report.py +72 -30
- invarlock/reporting/{certificate.py → report_builder.py} +252 -99
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- invarlock/cli/commands/certify.py +0 -422
- invarlock-0.3.6.dist-info/METADATA +0 -588
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _normalize(text: str) -> str:
|
|
8
|
+
return " ".join(str(text).strip().lower().split())
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def exact_match_from_records(records: Iterable[dict[str, Any]]) -> float:
|
|
12
|
+
"""Compute exact-match accuracy for QA-style records.
|
|
13
|
+
|
|
14
|
+
Accepted record shapes:
|
|
15
|
+
- {"prediction": "...", "answer": "..."}
|
|
16
|
+
- {"prediction": "...", "answers": ["...", ...]}
|
|
17
|
+
"""
|
|
18
|
+
total = 0
|
|
19
|
+
correct = 0
|
|
20
|
+
for record in records:
|
|
21
|
+
if not isinstance(record, dict):
|
|
22
|
+
continue
|
|
23
|
+
pred = record.get("prediction")
|
|
24
|
+
answers = record.get("answers")
|
|
25
|
+
if answers is None and "answer" in record:
|
|
26
|
+
answers = [record.get("answer")]
|
|
27
|
+
if pred is None or answers is None:
|
|
28
|
+
continue
|
|
29
|
+
pred_norm = _normalize(pred)
|
|
30
|
+
answer_list = answers if isinstance(answers, list) else [answers]
|
|
31
|
+
total += 1
|
|
32
|
+
if any(_normalize(a) == pred_norm for a in answer_list if a is not None):
|
|
33
|
+
correct += 1
|
|
34
|
+
if total == 0:
|
|
35
|
+
return float("nan")
|
|
36
|
+
return float(correct / total)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _tokenize(text: str) -> list[str]:
|
|
9
|
+
return [tok for tok in str(text).strip().lower().split() if tok]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _bleu1(pred: str, ref: str) -> float:
|
|
13
|
+
pred_tokens = _tokenize(pred)
|
|
14
|
+
ref_tokens = _tokenize(ref)
|
|
15
|
+
if not pred_tokens or not ref_tokens:
|
|
16
|
+
return 0.0
|
|
17
|
+
pred_counts = Counter(pred_tokens)
|
|
18
|
+
ref_counts = Counter(ref_tokens)
|
|
19
|
+
overlap = sum(min(pred_counts[tok], ref_counts.get(tok, 0)) for tok in pred_counts)
|
|
20
|
+
precision = overlap / float(len(pred_tokens))
|
|
21
|
+
bp = 1.0
|
|
22
|
+
if len(pred_tokens) < len(ref_tokens):
|
|
23
|
+
bp = pow(2.718281828, 1.0 - (len(ref_tokens) / float(len(pred_tokens))))
|
|
24
|
+
return float(precision * bp)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def bleu1_from_records(records: Iterable[dict[str, Any]]) -> float:
|
|
28
|
+
"""Compute BLEU-1 from records with predictions and references."""
|
|
29
|
+
scores: list[float] = []
|
|
30
|
+
for record in records:
|
|
31
|
+
if not isinstance(record, dict):
|
|
32
|
+
continue
|
|
33
|
+
pred = record.get("prediction")
|
|
34
|
+
refs = record.get("references")
|
|
35
|
+
if pred is None:
|
|
36
|
+
continue
|
|
37
|
+
if refs is None and "reference" in record:
|
|
38
|
+
refs = [record.get("reference")]
|
|
39
|
+
if refs is None:
|
|
40
|
+
continue
|
|
41
|
+
ref_list = refs if isinstance(refs, list) else [refs]
|
|
42
|
+
best = 0.0
|
|
43
|
+
for ref in ref_list:
|
|
44
|
+
if ref is None:
|
|
45
|
+
continue
|
|
46
|
+
best = max(best, _bleu1(str(pred), str(ref)))
|
|
47
|
+
scores.append(best)
|
|
48
|
+
if not scores:
|
|
49
|
+
return float("nan")
|
|
50
|
+
return float(sum(scores) / float(len(scores)))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _lcs_len(a: list[str], b: list[str]) -> int:
|
|
54
|
+
if not a or not b:
|
|
55
|
+
return 0
|
|
56
|
+
dp = [[0] * (len(b) + 1) for _ in range(len(a) + 1)]
|
|
57
|
+
for i, tok_a in enumerate(a, start=1):
|
|
58
|
+
for j, tok_b in enumerate(b, start=1):
|
|
59
|
+
if tok_a == tok_b:
|
|
60
|
+
dp[i][j] = dp[i - 1][j - 1] + 1
|
|
61
|
+
else:
|
|
62
|
+
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
|
|
63
|
+
return dp[-1][-1]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _rouge_l(pred: str, ref: str) -> float:
|
|
67
|
+
pred_tokens = _tokenize(pred)
|
|
68
|
+
ref_tokens = _tokenize(ref)
|
|
69
|
+
if not pred_tokens or not ref_tokens:
|
|
70
|
+
return 0.0
|
|
71
|
+
lcs = _lcs_len(pred_tokens, ref_tokens)
|
|
72
|
+
prec = lcs / float(len(pred_tokens))
|
|
73
|
+
rec = lcs / float(len(ref_tokens))
|
|
74
|
+
if prec + rec == 0:
|
|
75
|
+
return 0.0
|
|
76
|
+
return float(2 * prec * rec / (prec + rec))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def rouge_l_from_records(records: Iterable[dict[str, Any]]) -> float:
|
|
80
|
+
"""Compute ROUGE-L (F1) from records with predictions and references."""
|
|
81
|
+
scores: list[float] = []
|
|
82
|
+
for record in records:
|
|
83
|
+
if not isinstance(record, dict):
|
|
84
|
+
continue
|
|
85
|
+
pred = record.get("prediction")
|
|
86
|
+
refs = record.get("references")
|
|
87
|
+
if pred is None:
|
|
88
|
+
continue
|
|
89
|
+
if refs is None and "reference" in record:
|
|
90
|
+
refs = [record.get("reference")]
|
|
91
|
+
if refs is None:
|
|
92
|
+
continue
|
|
93
|
+
ref_list = refs if isinstance(refs, list) else [refs]
|
|
94
|
+
best = 0.0
|
|
95
|
+
for ref in ref_list:
|
|
96
|
+
if ref is None:
|
|
97
|
+
continue
|
|
98
|
+
best = max(best, _rouge_l(str(pred), str(ref)))
|
|
99
|
+
scores.append(best)
|
|
100
|
+
if not scores:
|
|
101
|
+
return float("nan")
|
|
102
|
+
return float(sum(scores) / float(len(scores)))
|
invarlock/guards/invariants.py
CHANGED
|
@@ -5,6 +5,7 @@ InvarLock Guards - Invariants
|
|
|
5
5
|
Invariant checking for model edits to ensure structural integrity.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import hashlib
|
|
8
9
|
from typing import Any
|
|
9
10
|
|
|
10
11
|
import torch
|
|
@@ -33,6 +34,7 @@ class InvariantsGuard(Guard):
|
|
|
33
34
|
self.on_fail = on_fail
|
|
34
35
|
self.prepared = False
|
|
35
36
|
self.baseline_checks: dict[str, Any] = {}
|
|
37
|
+
self.last_current_checks: dict[str, Any] = {}
|
|
36
38
|
self.profile_checks: tuple[str, ...] = ()
|
|
37
39
|
|
|
38
40
|
def prepare(
|
|
@@ -102,6 +104,10 @@ class InvariantsGuard(Guard):
|
|
|
102
104
|
"action": outcome.action,
|
|
103
105
|
"violations": outcome.violations,
|
|
104
106
|
"metrics": outcome.metrics,
|
|
107
|
+
"details": {
|
|
108
|
+
"baseline_checks": self.baseline_checks,
|
|
109
|
+
"current_checks": self.last_current_checks,
|
|
110
|
+
},
|
|
105
111
|
}
|
|
106
112
|
|
|
107
113
|
def finalize(self, model: Any) -> GuardOutcome:
|
|
@@ -125,6 +131,7 @@ class InvariantsGuard(Guard):
|
|
|
125
131
|
|
|
126
132
|
# Check current invariants
|
|
127
133
|
current_checks = self._capture_invariants(model, None)
|
|
134
|
+
self.last_current_checks = current_checks
|
|
128
135
|
violations: list[dict[str, Any]] = []
|
|
129
136
|
tokenizer_mismatches: list[dict[str, Any]] = []
|
|
130
137
|
|
|
@@ -354,14 +361,14 @@ class InvariantsGuard(Guard):
|
|
|
354
361
|
except Exception:
|
|
355
362
|
pass
|
|
356
363
|
|
|
357
|
-
#
|
|
364
|
+
# Decoder embed_tokens style (model.embed_tokens <-> lm_head)
|
|
358
365
|
try:
|
|
359
|
-
|
|
360
|
-
embed_tokens = getattr(
|
|
366
|
+
decoder_model = getattr(model, "model", None)
|
|
367
|
+
embed_tokens = getattr(decoder_model, "embed_tokens", None)
|
|
361
368
|
embed_weight = getattr(embed_tokens, "weight", None)
|
|
362
|
-
|
|
363
|
-
if embed_weight is not None and
|
|
364
|
-
weight_tying_flags["
|
|
369
|
+
head_weight = getattr(getattr(model, "lm_head", None), "weight", None)
|
|
370
|
+
if embed_weight is not None and head_weight is not None:
|
|
371
|
+
weight_tying_flags["embed_tokens"] = _is_tied(embed_weight, head_weight)
|
|
365
372
|
except Exception:
|
|
366
373
|
pass
|
|
367
374
|
|
|
@@ -376,8 +383,10 @@ class InvariantsGuard(Guard):
|
|
|
376
383
|
structure_items = []
|
|
377
384
|
for name, module in model.named_modules():
|
|
378
385
|
structure_items.append(f"{name}:{type(module).__name__}")
|
|
379
|
-
|
|
380
|
-
checks["structure_hash"] =
|
|
386
|
+
canonical = "\n".join(sorted(structure_items))
|
|
387
|
+
checks["structure_hash"] = hashlib.sha256(
|
|
388
|
+
canonical.encode("utf-8")
|
|
389
|
+
).hexdigest()[:16]
|
|
381
390
|
except Exception:
|
|
382
391
|
checks["structure_hash"] = 0
|
|
383
392
|
|
|
@@ -424,7 +433,7 @@ class InvariantsGuard(Guard):
|
|
|
424
433
|
return "bert" in model_type or has_cls_decoder
|
|
425
434
|
|
|
426
435
|
if name in {"rope_rotary_embedding", "rotary_embedding"}:
|
|
427
|
-
# Detect rotary embeddings used by
|
|
436
|
+
# Detect rotary embeddings used by RoPE-style models
|
|
428
437
|
if hasattr(model, "model") and hasattr(model.model, "layers"):
|
|
429
438
|
first_layer = model.model.layers[0] if model.model.layers else None
|
|
430
439
|
else:
|
|
@@ -443,7 +452,7 @@ class InvariantsGuard(Guard):
|
|
|
443
452
|
model_type = getattr(config, "model_type", "") if config else ""
|
|
444
453
|
return any(
|
|
445
454
|
keyword in model_type
|
|
446
|
-
for keyword in ("gpt", "
|
|
455
|
+
for keyword in ("gpt", "mistral", "mixtral", "qwen", "opt", "phi")
|
|
447
456
|
)
|
|
448
457
|
|
|
449
458
|
return True
|
invarlock/guards/rmt.py
CHANGED
|
@@ -387,7 +387,7 @@ def _iter_transformer_layers(model: nn.Module):
|
|
|
387
387
|
except (TypeError, AttributeError):
|
|
388
388
|
pass
|
|
389
389
|
elif hasattr(model, "model") and hasattr(model.model, "layers"):
|
|
390
|
-
#
|
|
390
|
+
# RoPE decoder style
|
|
391
391
|
layers = model.model.layers
|
|
392
392
|
if hasattr(layers, "__iter__") and hasattr(layers, "__len__"):
|
|
393
393
|
try:
|
|
@@ -746,7 +746,7 @@ def rmt_detect_with_names(
|
|
|
746
746
|
for idx, layer in enumerate(h_layers):
|
|
747
747
|
layer_modules.append((f"transformer.h.{idx}", layer))
|
|
748
748
|
elif hasattr(model, "model") and hasattr(model.model, "layers"):
|
|
749
|
-
#
|
|
749
|
+
# RoPE decoder style
|
|
750
750
|
layers = model.model.layers
|
|
751
751
|
if hasattr(layers, "__iter__"):
|
|
752
752
|
for idx, layer in enumerate(layers):
|
invarlock/guards/spectral.py
CHANGED
|
@@ -344,7 +344,7 @@ class SpectralGuard(Guard):
|
|
|
344
344
|
self.ignore_preview_inflation = bool(policy["ignore_preview_inflation"])
|
|
345
345
|
self.config["ignore_preview_inflation"] = self.ignore_preview_inflation
|
|
346
346
|
|
|
347
|
-
# Optional hydration of baseline stats from policy (e.g., baseline
|
|
347
|
+
# Optional hydration of baseline stats from policy (e.g., baseline evaluation report)
|
|
348
348
|
if "baseline_family_stats" in policy and isinstance(
|
|
349
349
|
policy["baseline_family_stats"], dict
|
|
350
350
|
):
|
invarlock/guards/variance.py
CHANGED
|
@@ -121,7 +121,7 @@ def _iter_transformer_layers(model: nn.Module):
|
|
|
121
121
|
# GPT-2 style
|
|
122
122
|
yield from model.transformer.h
|
|
123
123
|
elif hasattr(model, "model") and hasattr(model.model, "layers"):
|
|
124
|
-
#
|
|
124
|
+
# RoPE decoder style
|
|
125
125
|
yield from model.model.layers
|
|
126
126
|
elif hasattr(model, "encoder") and hasattr(model.encoder, "layer"):
|
|
127
127
|
# BERT style
|
|
@@ -214,7 +214,7 @@ def equalise_residual_variance(
|
|
|
214
214
|
hooks[name] = attn_proj.register_forward_hook(_branch_hook(name))
|
|
215
215
|
|
|
216
216
|
if hasattr(blk, "mlp"):
|
|
217
|
-
# Check for c_proj (GPT-2) or down_proj (
|
|
217
|
+
# Check for c_proj (GPT-2) or down_proj (RoPE decoder) or fc2 (generic)
|
|
218
218
|
mlp_proj = (
|
|
219
219
|
getattr(blk.mlp, "c_proj", None)
|
|
220
220
|
or getattr(blk.mlp, "down_proj", None)
|
invarlock/model_profile.py
CHANGED
|
@@ -6,11 +6,9 @@ from dataclasses import dataclass, field
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
AutoTokenizer: Any | None = None
|
|
9
|
-
GPT2Tokenizer: Any | None = None
|
|
10
9
|
|
|
11
10
|
try:
|
|
12
11
|
from transformers import AutoTokenizer as _AutoTokenizer
|
|
13
|
-
from transformers import GPT2Tokenizer as _GPT2Tokenizer
|
|
14
12
|
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
|
|
15
13
|
except Exception: # pragma: no cover - exercised only when transformers is absent
|
|
16
14
|
|
|
@@ -26,7 +24,6 @@ except Exception: # pragma: no cover - exercised only when transformers is abse
|
|
|
26
24
|
|
|
27
25
|
else: # pragma: no cover - transformers optional
|
|
28
26
|
AutoTokenizer = _AutoTokenizer
|
|
29
|
-
GPT2Tokenizer = _GPT2Tokenizer
|
|
30
27
|
|
|
31
28
|
|
|
32
29
|
TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
|
|
@@ -106,7 +103,7 @@ def _gpt2_selectors() -> dict[str, list[str]]:
|
|
|
106
103
|
}
|
|
107
104
|
|
|
108
105
|
|
|
109
|
-
def
|
|
106
|
+
def _rope_decoder_selectors() -> dict[str, list[str]]:
|
|
110
107
|
return {
|
|
111
108
|
"attention": [
|
|
112
109
|
"self_attn.q_proj",
|
|
@@ -177,12 +174,12 @@ def _make_bert_tokenizer(model_id: str):
|
|
|
177
174
|
|
|
178
175
|
def _make_gpt2_tokenizer(model_id: str):
|
|
179
176
|
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
180
|
-
if
|
|
177
|
+
if AutoTokenizer is None:
|
|
181
178
|
raise RuntimeError(
|
|
182
179
|
"GPT-2 tokenizers require the 'transformers' extra. "
|
|
183
180
|
"Install it with: pip install 'invarlock[adapters]'."
|
|
184
181
|
)
|
|
185
|
-
tokenizer =
|
|
182
|
+
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
|
186
183
|
if tokenizer.pad_token is None:
|
|
187
184
|
tokenizer.pad_token = tokenizer.eos_token
|
|
188
185
|
hash_value = _hash_tokenizer(tokenizer)
|
|
@@ -191,34 +188,26 @@ def _make_gpt2_tokenizer(model_id: str):
|
|
|
191
188
|
return factory
|
|
192
189
|
|
|
193
190
|
|
|
194
|
-
def
|
|
191
|
+
def _make_causal_auto_tokenizer(model_id: str):
|
|
195
192
|
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
196
|
-
if AutoTokenizer is None
|
|
193
|
+
if AutoTokenizer is None:
|
|
197
194
|
raise RuntimeError(
|
|
198
|
-
"
|
|
195
|
+
"Causal tokenizers require the 'transformers' extra. "
|
|
199
196
|
"Install it with: pip install 'invarlock[adapters]'."
|
|
200
197
|
)
|
|
201
198
|
# Try offline-first to respect InvarLock network guard; fall back to a
|
|
202
199
|
# local GPT-2 tokenizer if the model assets are not cached or network
|
|
203
200
|
# access is denied.
|
|
204
201
|
tokenizer = None
|
|
205
|
-
|
|
202
|
+
try:
|
|
203
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
|
|
204
|
+
except Exception:
|
|
206
205
|
try:
|
|
207
|
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
208
|
-
model_id, local_files_only=True
|
|
209
|
-
)
|
|
206
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
210
207
|
except Exception:
|
|
211
|
-
|
|
212
|
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
213
|
-
except Exception:
|
|
214
|
-
tokenizer = None
|
|
208
|
+
tokenizer = None
|
|
215
209
|
if tokenizer is None:
|
|
216
|
-
|
|
217
|
-
raise RuntimeError(
|
|
218
|
-
"Tokenization requires the 'transformers' extra. "
|
|
219
|
-
"Install it with: pip install 'invarlock[adapters]'."
|
|
220
|
-
)
|
|
221
|
-
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
210
|
+
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
|
222
211
|
# Ensure padding/bos tokens are configured so downstream encoding
|
|
223
212
|
# yields stable non-zero ids and a valid attention mask regardless of
|
|
224
213
|
# environment defaults or tokenizer variants.
|
|
@@ -227,7 +216,7 @@ def _make_llama_tokenizer(model_id: str):
|
|
|
227
216
|
eos_token = getattr(tokenizer, "eos_token", None)
|
|
228
217
|
if eos_token is not None:
|
|
229
218
|
tokenizer.pad_token = eos_token
|
|
230
|
-
# Some
|
|
219
|
+
# Some causal tokenizers default to not adding a BOS token on encode;
|
|
231
220
|
# enable it to guarantee at least one non-pad, non-zero token id.
|
|
232
221
|
if hasattr(tokenizer, "add_bos_token"):
|
|
233
222
|
try:
|
|
@@ -246,30 +235,22 @@ def _make_llama_tokenizer(model_id: str):
|
|
|
246
235
|
|
|
247
236
|
def _make_unknown_tokenizer(model_id: str):
|
|
248
237
|
def factory() -> tuple[PreTrainedTokenizerBase, str]:
|
|
249
|
-
if AutoTokenizer is None
|
|
238
|
+
if AutoTokenizer is None:
|
|
250
239
|
raise RuntimeError(
|
|
251
240
|
"Text tokenization requires the 'transformers' extra. "
|
|
252
241
|
"Install it with: pip install 'invarlock[adapters]'."
|
|
253
242
|
)
|
|
254
243
|
# Unknown families: try local-only first, then remote, then degrade to GPT-2
|
|
255
244
|
tokenizer = None
|
|
256
|
-
|
|
245
|
+
try:
|
|
246
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
|
|
247
|
+
except Exception:
|
|
257
248
|
try:
|
|
258
|
-
tokenizer = AutoTokenizer.from_pretrained(
|
|
259
|
-
model_id, local_files_only=True
|
|
260
|
-
)
|
|
249
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
261
250
|
except Exception:
|
|
262
|
-
|
|
263
|
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
264
|
-
except Exception:
|
|
265
|
-
tokenizer = None
|
|
251
|
+
tokenizer = None
|
|
266
252
|
if tokenizer is None:
|
|
267
|
-
|
|
268
|
-
raise RuntimeError(
|
|
269
|
-
"Text tokenization requires the 'transformers' extra. "
|
|
270
|
-
"Install it with: pip install 'invarlock[adapters]'."
|
|
271
|
-
)
|
|
272
|
-
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
|
253
|
+
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
|
273
254
|
if getattr(tokenizer, "pad_token", None) is None:
|
|
274
255
|
eos_token = getattr(tokenizer, "eos_token", None)
|
|
275
256
|
if eos_token is not None:
|
|
@@ -289,7 +270,7 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
|
|
|
289
270
|
model_lower = (model_id or "").lower()
|
|
290
271
|
|
|
291
272
|
if any(
|
|
292
|
-
keyword in adapter_lower for keyword in ("bert", "roberta", "deberta")
|
|
273
|
+
keyword in adapter_lower for keyword in ("hf_mlm", "bert", "roberta", "deberta")
|
|
293
274
|
) or any(keyword in model_lower for keyword in ("bert", "roberta", "deberta")):
|
|
294
275
|
return ModelProfile(
|
|
295
276
|
family="bert",
|
|
@@ -302,57 +283,78 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
|
|
|
302
283
|
cert_lints=(
|
|
303
284
|
{
|
|
304
285
|
"type": "equals",
|
|
305
|
-
"path": "
|
|
306
|
-
"value": "
|
|
307
|
-
"message": "BERT cert must
|
|
286
|
+
"path": "primary_metric.kind",
|
|
287
|
+
"value": "ppl_mlm",
|
|
288
|
+
"message": "BERT cert must use MLM metric.",
|
|
308
289
|
},
|
|
309
290
|
{
|
|
310
291
|
"type": "gte",
|
|
311
|
-
"path": "
|
|
292
|
+
"path": "telemetry.masked_tokens_total",
|
|
312
293
|
"value": "1",
|
|
313
294
|
"message": "BERT cert must report masked tokens.",
|
|
314
295
|
},
|
|
315
296
|
),
|
|
316
297
|
)
|
|
317
298
|
|
|
318
|
-
if any(keyword in adapter_lower for keyword in ("
|
|
319
|
-
keyword in model_lower for keyword in ("
|
|
299
|
+
if any(keyword in adapter_lower for keyword in ("hf_seq2seq", "t5", "bart")) or any(
|
|
300
|
+
keyword in model_lower for keyword in ("t5", "bart")
|
|
320
301
|
):
|
|
321
302
|
return ModelProfile(
|
|
322
|
-
family="
|
|
303
|
+
family="seq2seq",
|
|
304
|
+
default_loss="seq2seq",
|
|
305
|
+
make_tokenizer=_make_unknown_tokenizer(model_id),
|
|
306
|
+
default_metric="ppl_seq2seq",
|
|
307
|
+
default_provider="wikitext2",
|
|
308
|
+
module_selectors=_unknown_selectors(),
|
|
309
|
+
invariants=(),
|
|
310
|
+
cert_lints=(),
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
if any(
|
|
314
|
+
keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
|
|
315
|
+
) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
|
|
316
|
+
return ModelProfile(
|
|
317
|
+
family="gpt2",
|
|
323
318
|
default_loss="causal",
|
|
324
|
-
make_tokenizer=
|
|
319
|
+
make_tokenizer=_make_gpt2_tokenizer(model_id),
|
|
325
320
|
default_metric="ppl_causal",
|
|
326
321
|
default_provider="wikitext2",
|
|
327
|
-
module_selectors=
|
|
328
|
-
invariants=("
|
|
322
|
+
module_selectors=_gpt2_selectors(),
|
|
323
|
+
invariants=("causal_masking",),
|
|
329
324
|
cert_lints=(
|
|
330
325
|
{
|
|
331
326
|
"type": "equals",
|
|
332
|
-
"path": "
|
|
333
|
-
"value": "
|
|
334
|
-
"message": "
|
|
327
|
+
"path": "primary_metric.kind",
|
|
328
|
+
"value": "ppl_causal",
|
|
329
|
+
"message": "GPT-style cert must use causal ppl metric.",
|
|
335
330
|
},
|
|
336
331
|
),
|
|
337
332
|
)
|
|
338
333
|
|
|
339
334
|
if any(
|
|
340
|
-
keyword in adapter_lower for keyword in ("
|
|
341
|
-
) or any(
|
|
335
|
+
keyword in adapter_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
|
|
336
|
+
) or any(
|
|
337
|
+
keyword in model_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
|
|
338
|
+
):
|
|
339
|
+
family = "causal"
|
|
340
|
+
for keyword in ("mixtral", "mistral", "qwen", "yi"):
|
|
341
|
+
if keyword in adapter_lower or keyword in model_lower:
|
|
342
|
+
family = keyword
|
|
343
|
+
break
|
|
342
344
|
return ModelProfile(
|
|
343
|
-
family=
|
|
345
|
+
family=family,
|
|
344
346
|
default_loss="causal",
|
|
345
|
-
make_tokenizer=
|
|
347
|
+
make_tokenizer=_make_causal_auto_tokenizer(model_id),
|
|
346
348
|
default_metric="ppl_causal",
|
|
347
349
|
default_provider="wikitext2",
|
|
348
|
-
module_selectors=
|
|
349
|
-
invariants=("
|
|
350
|
+
module_selectors=_rope_decoder_selectors(),
|
|
351
|
+
invariants=("rope_rotary_embedding",),
|
|
350
352
|
cert_lints=(
|
|
351
353
|
{
|
|
352
354
|
"type": "equals",
|
|
353
|
-
"path": "
|
|
354
|
-
"value": "
|
|
355
|
-
"message": "
|
|
355
|
+
"path": "primary_metric.kind",
|
|
356
|
+
"value": "ppl_causal",
|
|
357
|
+
"message": "Causal cert must use causal ppl metric.",
|
|
356
358
|
},
|
|
357
359
|
),
|
|
358
360
|
)
|
|
@@ -374,15 +374,15 @@ class InvarLockHealthChecker(HealthChecker):
|
|
|
374
374
|
"""Check adapter availability."""
|
|
375
375
|
try:
|
|
376
376
|
from invarlock.adapters import (
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
377
|
+
HF_Causal_Adapter,
|
|
378
|
+
HF_MLM_Adapter,
|
|
379
|
+
HF_Seq2Seq_Adapter,
|
|
380
380
|
)
|
|
381
381
|
|
|
382
382
|
adapters = {
|
|
383
|
-
"
|
|
384
|
-
"
|
|
385
|
-
"
|
|
383
|
+
"hf_causal": HF_Causal_Adapter,
|
|
384
|
+
"hf_mlm": HF_MLM_Adapter,
|
|
385
|
+
"hf_seq2seq": HF_Seq2Seq_Adapter,
|
|
386
386
|
}
|
|
387
387
|
|
|
388
388
|
available_adapters = []
|
|
@@ -455,3 +455,111 @@ def create_resource_metrics(registry: MetricsRegistry) -> dict[str, Any]:
|
|
|
455
455
|
"gpu_memory": registry.register_gauge("invarlock.resource.gpu_memory_percent"),
|
|
456
456
|
"disk_usage": registry.register_gauge("invarlock.resource.disk_percent"),
|
|
457
457
|
}
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def reset_peak_memory_stats() -> None:
|
|
461
|
+
"""Reset GPU peak memory stats when available."""
|
|
462
|
+
try:
|
|
463
|
+
import torch
|
|
464
|
+
|
|
465
|
+
if torch.cuda.is_available():
|
|
466
|
+
torch.cuda.reset_peak_memory_stats()
|
|
467
|
+
mps = getattr(torch, "mps", None)
|
|
468
|
+
if mps is not None and hasattr(mps, "reset_peak_memory_stats"):
|
|
469
|
+
mps.reset_peak_memory_stats()
|
|
470
|
+
except Exception:
|
|
471
|
+
pass
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def capture_memory_snapshot(
|
|
475
|
+
phase: str, *, timestamp: float | None = None
|
|
476
|
+
) -> dict[str, Any]:
|
|
477
|
+
"""Capture a point-in-time memory snapshot for the current process."""
|
|
478
|
+
snapshot: dict[str, Any] = {"phase": str(phase)}
|
|
479
|
+
if timestamp is None:
|
|
480
|
+
timestamp = time.time()
|
|
481
|
+
snapshot["ts"] = float(timestamp)
|
|
482
|
+
|
|
483
|
+
try:
|
|
484
|
+
import os
|
|
485
|
+
|
|
486
|
+
import psutil
|
|
487
|
+
|
|
488
|
+
process = psutil.Process(os.getpid())
|
|
489
|
+
rss_mb = process.memory_info().rss / 1024 / 1024
|
|
490
|
+
snapshot["rss_mb"] = float(rss_mb)
|
|
491
|
+
except Exception:
|
|
492
|
+
pass
|
|
493
|
+
|
|
494
|
+
try:
|
|
495
|
+
import torch
|
|
496
|
+
|
|
497
|
+
if torch.cuda.is_available():
|
|
498
|
+
device_index = torch.cuda.current_device()
|
|
499
|
+
snapshot["gpu_device"] = f"cuda:{device_index}"
|
|
500
|
+
snapshot["gpu_mb"] = float(
|
|
501
|
+
torch.cuda.memory_allocated(device_index) / 1024 / 1024
|
|
502
|
+
)
|
|
503
|
+
snapshot["gpu_reserved_mb"] = float(
|
|
504
|
+
torch.cuda.memory_reserved(device_index) / 1024 / 1024
|
|
505
|
+
)
|
|
506
|
+
snapshot["gpu_peak_mb"] = float(
|
|
507
|
+
torch.cuda.max_memory_allocated(device_index) / 1024 / 1024
|
|
508
|
+
)
|
|
509
|
+
snapshot["gpu_peak_reserved_mb"] = float(
|
|
510
|
+
torch.cuda.max_memory_reserved(device_index) / 1024 / 1024
|
|
511
|
+
)
|
|
512
|
+
else:
|
|
513
|
+
mps = getattr(torch, "mps", None)
|
|
514
|
+
if mps is not None and hasattr(torch.backends, "mps"):
|
|
515
|
+
if torch.backends.mps.is_available():
|
|
516
|
+
snapshot["gpu_device"] = "mps"
|
|
517
|
+
if hasattr(mps, "current_allocated_memory"):
|
|
518
|
+
snapshot["gpu_mb"] = float(
|
|
519
|
+
mps.current_allocated_memory() / 1024 / 1024
|
|
520
|
+
)
|
|
521
|
+
if hasattr(mps, "driver_allocated_memory"):
|
|
522
|
+
snapshot["gpu_reserved_mb"] = float(
|
|
523
|
+
mps.driver_allocated_memory() / 1024 / 1024
|
|
524
|
+
)
|
|
525
|
+
except Exception:
|
|
526
|
+
pass
|
|
527
|
+
|
|
528
|
+
if len(snapshot) <= 2:
|
|
529
|
+
return {}
|
|
530
|
+
return snapshot
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def summarize_memory_snapshots(
|
|
534
|
+
snapshots: list[dict[str, Any]],
|
|
535
|
+
) -> dict[str, float]:
|
|
536
|
+
"""Summarize memory snapshots into peak metrics."""
|
|
537
|
+
|
|
538
|
+
def _peak(key: str) -> float | None:
|
|
539
|
+
values: list[float] = []
|
|
540
|
+
for entry in snapshots:
|
|
541
|
+
if not isinstance(entry, dict):
|
|
542
|
+
continue
|
|
543
|
+
value = entry.get(key)
|
|
544
|
+
if isinstance(value, int | float):
|
|
545
|
+
values.append(float(value))
|
|
546
|
+
return max(values) if values else None
|
|
547
|
+
|
|
548
|
+
summary: dict[str, float] = {}
|
|
549
|
+
rss_peak = _peak("rss_mb")
|
|
550
|
+
if rss_peak is not None:
|
|
551
|
+
summary["memory_mb_peak"] = rss_peak
|
|
552
|
+
|
|
553
|
+
gpu_peak = _peak("gpu_peak_mb")
|
|
554
|
+
if gpu_peak is None:
|
|
555
|
+
gpu_peak = _peak("gpu_mb")
|
|
556
|
+
if gpu_peak is not None:
|
|
557
|
+
summary["gpu_memory_mb_peak"] = gpu_peak
|
|
558
|
+
|
|
559
|
+
gpu_reserved_peak = _peak("gpu_peak_reserved_mb")
|
|
560
|
+
if gpu_reserved_peak is None:
|
|
561
|
+
gpu_reserved_peak = _peak("gpu_reserved_mb")
|
|
562
|
+
if gpu_reserved_peak is not None:
|
|
563
|
+
summary["gpu_memory_reserved_mb_peak"] = gpu_reserved_peak
|
|
564
|
+
|
|
565
|
+
return summary
|