invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable
4
+ from typing import Any
5
+
6
+
7
+ def _normalize(text: str) -> str:
8
+ return " ".join(str(text).strip().lower().split())
9
+
10
+
11
+ def exact_match_from_records(records: Iterable[dict[str, Any]]) -> float:
12
+ """Compute exact-match accuracy for QA-style records.
13
+
14
+ Accepted record shapes:
15
+ - {"prediction": "...", "answer": "..."}
16
+ - {"prediction": "...", "answers": ["...", ...]}
17
+ """
18
+ total = 0
19
+ correct = 0
20
+ for record in records:
21
+ if not isinstance(record, dict):
22
+ continue
23
+ pred = record.get("prediction")
24
+ answers = record.get("answers")
25
+ if answers is None and "answer" in record:
26
+ answers = [record.get("answer")]
27
+ if pred is None or answers is None:
28
+ continue
29
+ pred_norm = _normalize(pred)
30
+ answer_list = answers if isinstance(answers, list) else [answers]
31
+ total += 1
32
+ if any(_normalize(a) == pred_norm for a in answer_list if a is not None):
33
+ correct += 1
34
+ if total == 0:
35
+ return float("nan")
36
+ return float(correct / total)
@@ -0,0 +1,102 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from collections.abc import Iterable
5
+ from typing import Any
6
+
7
+
8
+ def _tokenize(text: str) -> list[str]:
9
+ return [tok for tok in str(text).strip().lower().split() if tok]
10
+
11
+
12
+ def _bleu1(pred: str, ref: str) -> float:
13
+ pred_tokens = _tokenize(pred)
14
+ ref_tokens = _tokenize(ref)
15
+ if not pred_tokens or not ref_tokens:
16
+ return 0.0
17
+ pred_counts = Counter(pred_tokens)
18
+ ref_counts = Counter(ref_tokens)
19
+ overlap = sum(min(pred_counts[tok], ref_counts.get(tok, 0)) for tok in pred_counts)
20
+ precision = overlap / float(len(pred_tokens))
21
+ bp = 1.0
22
+ if len(pred_tokens) < len(ref_tokens):
23
+ bp = pow(2.718281828, 1.0 - (len(ref_tokens) / float(len(pred_tokens))))
24
+ return float(precision * bp)
25
+
26
+
27
+ def bleu1_from_records(records: Iterable[dict[str, Any]]) -> float:
28
+ """Compute BLEU-1 from records with predictions and references."""
29
+ scores: list[float] = []
30
+ for record in records:
31
+ if not isinstance(record, dict):
32
+ continue
33
+ pred = record.get("prediction")
34
+ refs = record.get("references")
35
+ if pred is None:
36
+ continue
37
+ if refs is None and "reference" in record:
38
+ refs = [record.get("reference")]
39
+ if refs is None:
40
+ continue
41
+ ref_list = refs if isinstance(refs, list) else [refs]
42
+ best = 0.0
43
+ for ref in ref_list:
44
+ if ref is None:
45
+ continue
46
+ best = max(best, _bleu1(str(pred), str(ref)))
47
+ scores.append(best)
48
+ if not scores:
49
+ return float("nan")
50
+ return float(sum(scores) / float(len(scores)))
51
+
52
+
53
+ def _lcs_len(a: list[str], b: list[str]) -> int:
54
+ if not a or not b:
55
+ return 0
56
+ dp = [[0] * (len(b) + 1) for _ in range(len(a) + 1)]
57
+ for i, tok_a in enumerate(a, start=1):
58
+ for j, tok_b in enumerate(b, start=1):
59
+ if tok_a == tok_b:
60
+ dp[i][j] = dp[i - 1][j - 1] + 1
61
+ else:
62
+ dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
63
+ return dp[-1][-1]
64
+
65
+
66
+ def _rouge_l(pred: str, ref: str) -> float:
67
+ pred_tokens = _tokenize(pred)
68
+ ref_tokens = _tokenize(ref)
69
+ if not pred_tokens or not ref_tokens:
70
+ return 0.0
71
+ lcs = _lcs_len(pred_tokens, ref_tokens)
72
+ prec = lcs / float(len(pred_tokens))
73
+ rec = lcs / float(len(ref_tokens))
74
+ if prec + rec == 0:
75
+ return 0.0
76
+ return float(2 * prec * rec / (prec + rec))
77
+
78
+
79
+ def rouge_l_from_records(records: Iterable[dict[str, Any]]) -> float:
80
+ """Compute ROUGE-L (F1) from records with predictions and references."""
81
+ scores: list[float] = []
82
+ for record in records:
83
+ if not isinstance(record, dict):
84
+ continue
85
+ pred = record.get("prediction")
86
+ refs = record.get("references")
87
+ if pred is None:
88
+ continue
89
+ if refs is None and "reference" in record:
90
+ refs = [record.get("reference")]
91
+ if refs is None:
92
+ continue
93
+ ref_list = refs if isinstance(refs, list) else [refs]
94
+ best = 0.0
95
+ for ref in ref_list:
96
+ if ref is None:
97
+ continue
98
+ best = max(best, _rouge_l(str(pred), str(ref)))
99
+ scores.append(best)
100
+ if not scores:
101
+ return float("nan")
102
+ return float(sum(scores) / float(len(scores)))
@@ -5,6 +5,7 @@ InvarLock Guards - Invariants
5
5
  Invariant checking for model edits to ensure structural integrity.
6
6
  """
7
7
 
8
+ import hashlib
8
9
  from typing import Any
9
10
 
10
11
  import torch
@@ -33,6 +34,7 @@ class InvariantsGuard(Guard):
33
34
  self.on_fail = on_fail
34
35
  self.prepared = False
35
36
  self.baseline_checks: dict[str, Any] = {}
37
+ self.last_current_checks: dict[str, Any] = {}
36
38
  self.profile_checks: tuple[str, ...] = ()
37
39
 
38
40
  def prepare(
@@ -102,6 +104,10 @@ class InvariantsGuard(Guard):
102
104
  "action": outcome.action,
103
105
  "violations": outcome.violations,
104
106
  "metrics": outcome.metrics,
107
+ "details": {
108
+ "baseline_checks": self.baseline_checks,
109
+ "current_checks": self.last_current_checks,
110
+ },
105
111
  }
106
112
 
107
113
  def finalize(self, model: Any) -> GuardOutcome:
@@ -125,6 +131,7 @@ class InvariantsGuard(Guard):
125
131
 
126
132
  # Check current invariants
127
133
  current_checks = self._capture_invariants(model, None)
134
+ self.last_current_checks = current_checks
128
135
  violations: list[dict[str, Any]] = []
129
136
  tokenizer_mismatches: list[dict[str, Any]] = []
130
137
 
@@ -354,14 +361,14 @@ class InvariantsGuard(Guard):
354
361
  except Exception:
355
362
  pass
356
363
 
357
- # LLaMA style (model.embed_tokens <-> lm_head)
364
+ # Decoder embed_tokens style (model.embed_tokens <-> lm_head)
358
365
  try:
359
- llama_model = getattr(model, "model", None)
360
- embed_tokens = getattr(llama_model, "embed_tokens", None)
366
+ decoder_model = getattr(model, "model", None)
367
+ embed_tokens = getattr(decoder_model, "embed_tokens", None)
361
368
  embed_weight = getattr(embed_tokens, "weight", None)
362
- llama_head_weight = getattr(getattr(model, "lm_head", None), "weight", None)
363
- if embed_weight is not None and llama_head_weight is not None:
364
- weight_tying_flags["llama"] = _is_tied(embed_weight, llama_head_weight)
369
+ head_weight = getattr(getattr(model, "lm_head", None), "weight", None)
370
+ if embed_weight is not None and head_weight is not None:
371
+ weight_tying_flags["embed_tokens"] = _is_tied(embed_weight, head_weight)
365
372
  except Exception:
366
373
  pass
367
374
 
@@ -376,8 +383,10 @@ class InvariantsGuard(Guard):
376
383
  structure_items = []
377
384
  for name, module in model.named_modules():
378
385
  structure_items.append(f"{name}:{type(module).__name__}")
379
- structure_hash = hash(tuple(structure_items))
380
- checks["structure_hash"] = structure_hash
386
+ canonical = "\n".join(sorted(structure_items))
387
+ checks["structure_hash"] = hashlib.sha256(
388
+ canonical.encode("utf-8")
389
+ ).hexdigest()[:16]
381
390
  except Exception:
382
391
  checks["structure_hash"] = 0
383
392
 
@@ -424,7 +433,7 @@ class InvariantsGuard(Guard):
424
433
  return "bert" in model_type or has_cls_decoder
425
434
 
426
435
  if name in {"rope_rotary_embedding", "rotary_embedding"}:
427
- # Detect rotary embeddings used by LLaMA-style models
436
+ # Detect rotary embeddings used by RoPE-style models
428
437
  if hasattr(model, "model") and hasattr(model.model, "layers"):
429
438
  first_layer = model.model.layers[0] if model.model.layers else None
430
439
  else:
@@ -443,7 +452,7 @@ class InvariantsGuard(Guard):
443
452
  model_type = getattr(config, "model_type", "") if config else ""
444
453
  return any(
445
454
  keyword in model_type
446
- for keyword in ("gpt", "llama", "mistral", "opt", "phi")
455
+ for keyword in ("gpt", "mistral", "mixtral", "qwen", "opt", "phi")
447
456
  )
448
457
 
449
458
  return True
invarlock/guards/rmt.py CHANGED
@@ -387,7 +387,7 @@ def _iter_transformer_layers(model: nn.Module):
387
387
  except (TypeError, AttributeError):
388
388
  pass
389
389
  elif hasattr(model, "model") and hasattr(model.model, "layers"):
390
- # LLaMA style
390
+ # RoPE decoder style
391
391
  layers = model.model.layers
392
392
  if hasattr(layers, "__iter__") and hasattr(layers, "__len__"):
393
393
  try:
@@ -746,7 +746,7 @@ def rmt_detect_with_names(
746
746
  for idx, layer in enumerate(h_layers):
747
747
  layer_modules.append((f"transformer.h.{idx}", layer))
748
748
  elif hasattr(model, "model") and hasattr(model.model, "layers"):
749
- # LLaMA style
749
+ # RoPE decoder style
750
750
  layers = model.model.layers
751
751
  if hasattr(layers, "__iter__"):
752
752
  for idx, layer in enumerate(layers):
@@ -344,7 +344,7 @@ class SpectralGuard(Guard):
344
344
  self.ignore_preview_inflation = bool(policy["ignore_preview_inflation"])
345
345
  self.config["ignore_preview_inflation"] = self.ignore_preview_inflation
346
346
 
347
- # Optional hydration of baseline stats from policy (e.g., baseline certificate)
347
+ # Optional hydration of baseline stats from policy (e.g., baseline evaluation report)
348
348
  if "baseline_family_stats" in policy and isinstance(
349
349
  policy["baseline_family_stats"], dict
350
350
  ):
@@ -121,7 +121,7 @@ def _iter_transformer_layers(model: nn.Module):
121
121
  # GPT-2 style
122
122
  yield from model.transformer.h
123
123
  elif hasattr(model, "model") and hasattr(model.model, "layers"):
124
- # LLaMA style
124
+ # RoPE decoder style
125
125
  yield from model.model.layers
126
126
  elif hasattr(model, "encoder") and hasattr(model.encoder, "layer"):
127
127
  # BERT style
@@ -214,7 +214,7 @@ def equalise_residual_variance(
214
214
  hooks[name] = attn_proj.register_forward_hook(_branch_hook(name))
215
215
 
216
216
  if hasattr(blk, "mlp"):
217
- # Check for c_proj (GPT-2) or down_proj (LLaMA) or fc2 (generic)
217
+ # Check for c_proj (GPT-2) or down_proj (RoPE decoder) or fc2 (generic)
218
218
  mlp_proj = (
219
219
  getattr(blk.mlp, "c_proj", None)
220
220
  or getattr(blk.mlp, "down_proj", None)
@@ -6,11 +6,9 @@ from dataclasses import dataclass, field
6
6
  from typing import Any
7
7
 
8
8
  AutoTokenizer: Any | None = None
9
- GPT2Tokenizer: Any | None = None
10
9
 
11
10
  try:
12
11
  from transformers import AutoTokenizer as _AutoTokenizer
13
- from transformers import GPT2Tokenizer as _GPT2Tokenizer
14
12
  from transformers.tokenization_utils_base import PreTrainedTokenizerBase
15
13
  except Exception: # pragma: no cover - exercised only when transformers is absent
16
14
 
@@ -26,7 +24,6 @@ except Exception: # pragma: no cover - exercised only when transformers is abse
26
24
 
27
25
  else: # pragma: no cover - transformers optional
28
26
  AutoTokenizer = _AutoTokenizer
29
- GPT2Tokenizer = _GPT2Tokenizer
30
27
 
31
28
 
32
29
  TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
@@ -106,7 +103,7 @@ def _gpt2_selectors() -> dict[str, list[str]]:
106
103
  }
107
104
 
108
105
 
109
- def _llama_selectors() -> dict[str, list[str]]:
106
+ def _rope_decoder_selectors() -> dict[str, list[str]]:
110
107
  return {
111
108
  "attention": [
112
109
  "self_attn.q_proj",
@@ -177,12 +174,12 @@ def _make_bert_tokenizer(model_id: str):
177
174
 
178
175
  def _make_gpt2_tokenizer(model_id: str):
179
176
  def factory() -> tuple[PreTrainedTokenizerBase, str]:
180
- if GPT2Tokenizer is None:
177
+ if AutoTokenizer is None:
181
178
  raise RuntimeError(
182
179
  "GPT-2 tokenizers require the 'transformers' extra. "
183
180
  "Install it with: pip install 'invarlock[adapters]'."
184
181
  )
185
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
182
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
186
183
  if tokenizer.pad_token is None:
187
184
  tokenizer.pad_token = tokenizer.eos_token
188
185
  hash_value = _hash_tokenizer(tokenizer)
@@ -191,34 +188,26 @@ def _make_gpt2_tokenizer(model_id: str):
191
188
  return factory
192
189
 
193
190
 
194
- def _make_llama_tokenizer(model_id: str):
191
+ def _make_causal_auto_tokenizer(model_id: str):
195
192
  def factory() -> tuple[PreTrainedTokenizerBase, str]:
196
- if AutoTokenizer is None and GPT2Tokenizer is None:
193
+ if AutoTokenizer is None:
197
194
  raise RuntimeError(
198
- "LLaMA-style tokenizers require the 'transformers' extra. "
195
+ "Causal tokenizers require the 'transformers' extra. "
199
196
  "Install it with: pip install 'invarlock[adapters]'."
200
197
  )
201
198
  # Try offline-first to respect InvarLock network guard; fall back to a
202
199
  # local GPT-2 tokenizer if the model assets are not cached or network
203
200
  # access is denied.
204
201
  tokenizer = None
205
- if AutoTokenizer is not None:
202
+ try:
203
+ tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
204
+ except Exception:
206
205
  try:
207
- tokenizer = AutoTokenizer.from_pretrained(
208
- model_id, local_files_only=True
209
- )
206
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
210
207
  except Exception:
211
- try:
212
- tokenizer = AutoTokenizer.from_pretrained(model_id)
213
- except Exception:
214
- tokenizer = None
208
+ tokenizer = None
215
209
  if tokenizer is None:
216
- if GPT2Tokenizer is None:
217
- raise RuntimeError(
218
- "Tokenization requires the 'transformers' extra. "
219
- "Install it with: pip install 'invarlock[adapters]'."
220
- )
221
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
210
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
222
211
  # Ensure padding/bos tokens are configured so downstream encoding
223
212
  # yields stable non-zero ids and a valid attention mask regardless of
224
213
  # environment defaults or tokenizer variants.
@@ -227,7 +216,7 @@ def _make_llama_tokenizer(model_id: str):
227
216
  eos_token = getattr(tokenizer, "eos_token", None)
228
217
  if eos_token is not None:
229
218
  tokenizer.pad_token = eos_token
230
- # Some LLaMA tokenizers default to not adding a BOS token on encode;
219
+ # Some causal tokenizers default to not adding a BOS token on encode;
231
220
  # enable it to guarantee at least one non-pad, non-zero token id.
232
221
  if hasattr(tokenizer, "add_bos_token"):
233
222
  try:
@@ -246,30 +235,22 @@ def _make_llama_tokenizer(model_id: str):
246
235
 
247
236
  def _make_unknown_tokenizer(model_id: str):
248
237
  def factory() -> tuple[PreTrainedTokenizerBase, str]:
249
- if AutoTokenizer is None and GPT2Tokenizer is None:
238
+ if AutoTokenizer is None:
250
239
  raise RuntimeError(
251
240
  "Text tokenization requires the 'transformers' extra. "
252
241
  "Install it with: pip install 'invarlock[adapters]'."
253
242
  )
254
243
  # Unknown families: try local-only first, then remote, then degrade to GPT-2
255
244
  tokenizer = None
256
- if AutoTokenizer is not None:
245
+ try:
246
+ tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
247
+ except Exception:
257
248
  try:
258
- tokenizer = AutoTokenizer.from_pretrained(
259
- model_id, local_files_only=True
260
- )
249
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
261
250
  except Exception:
262
- try:
263
- tokenizer = AutoTokenizer.from_pretrained(model_id)
264
- except Exception:
265
- tokenizer = None
251
+ tokenizer = None
266
252
  if tokenizer is None:
267
- if GPT2Tokenizer is None:
268
- raise RuntimeError(
269
- "Text tokenization requires the 'transformers' extra. "
270
- "Install it with: pip install 'invarlock[adapters]'."
271
- )
272
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
253
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
273
254
  if getattr(tokenizer, "pad_token", None) is None:
274
255
  eos_token = getattr(tokenizer, "eos_token", None)
275
256
  if eos_token is not None:
@@ -289,7 +270,7 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
289
270
  model_lower = (model_id or "").lower()
290
271
 
291
272
  if any(
292
- keyword in adapter_lower for keyword in ("bert", "roberta", "deberta")
273
+ keyword in adapter_lower for keyword in ("hf_mlm", "bert", "roberta", "deberta")
293
274
  ) or any(keyword in model_lower for keyword in ("bert", "roberta", "deberta")):
294
275
  return ModelProfile(
295
276
  family="bert",
@@ -302,57 +283,78 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
302
283
  cert_lints=(
303
284
  {
304
285
  "type": "equals",
305
- "path": "metrics.loss_type",
306
- "value": "mlm",
307
- "message": "BERT cert must record MLM loss type.",
286
+ "path": "primary_metric.kind",
287
+ "value": "ppl_mlm",
288
+ "message": "BERT cert must use MLM metric.",
308
289
  },
309
290
  {
310
291
  "type": "gte",
311
- "path": "metrics.masked_tokens_total",
292
+ "path": "telemetry.masked_tokens_total",
312
293
  "value": "1",
313
294
  "message": "BERT cert must report masked tokens.",
314
295
  },
315
296
  ),
316
297
  )
317
298
 
318
- if any(keyword in adapter_lower for keyword in ("llama", "mistral", "qwen")) or any(
319
- keyword in model_lower for keyword in ("llama", "mistral", "qwen")
299
+ if any(keyword in adapter_lower for keyword in ("hf_seq2seq", "t5", "bart")) or any(
300
+ keyword in model_lower for keyword in ("t5", "bart")
320
301
  ):
321
302
  return ModelProfile(
322
- family="llama",
303
+ family="seq2seq",
304
+ default_loss="seq2seq",
305
+ make_tokenizer=_make_unknown_tokenizer(model_id),
306
+ default_metric="ppl_seq2seq",
307
+ default_provider="wikitext2",
308
+ module_selectors=_unknown_selectors(),
309
+ invariants=(),
310
+ cert_lints=(),
311
+ )
312
+
313
+ if any(
314
+ keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
315
+ ) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
316
+ return ModelProfile(
317
+ family="gpt2",
323
318
  default_loss="causal",
324
- make_tokenizer=_make_llama_tokenizer(model_id),
319
+ make_tokenizer=_make_gpt2_tokenizer(model_id),
325
320
  default_metric="ppl_causal",
326
321
  default_provider="wikitext2",
327
- module_selectors=_llama_selectors(),
328
- invariants=("rope_rotary_embedding",),
322
+ module_selectors=_gpt2_selectors(),
323
+ invariants=("causal_masking",),
329
324
  cert_lints=(
330
325
  {
331
326
  "type": "equals",
332
- "path": "metrics.loss_type",
333
- "value": "causal",
334
- "message": "LLaMA cert should report causal loss.",
327
+ "path": "primary_metric.kind",
328
+ "value": "ppl_causal",
329
+ "message": "GPT-style cert must use causal ppl metric.",
335
330
  },
336
331
  ),
337
332
  )
338
333
 
339
334
  if any(
340
- keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
341
- ) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
335
+ keyword in adapter_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
336
+ ) or any(
337
+ keyword in model_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
338
+ ):
339
+ family = "causal"
340
+ for keyword in ("mixtral", "mistral", "qwen", "yi"):
341
+ if keyword in adapter_lower or keyword in model_lower:
342
+ family = keyword
343
+ break
342
344
  return ModelProfile(
343
- family="gpt2",
345
+ family=family,
344
346
  default_loss="causal",
345
- make_tokenizer=_make_gpt2_tokenizer(model_id),
347
+ make_tokenizer=_make_causal_auto_tokenizer(model_id),
346
348
  default_metric="ppl_causal",
347
349
  default_provider="wikitext2",
348
- module_selectors=_gpt2_selectors(),
349
- invariants=("causal_masking",),
350
+ module_selectors=_rope_decoder_selectors(),
351
+ invariants=("rope_rotary_embedding",),
350
352
  cert_lints=(
351
353
  {
352
354
  "type": "equals",
353
- "path": "metrics.loss_type",
354
- "value": "causal",
355
- "message": "GPT-style cert should record causal loss.",
355
+ "path": "primary_metric.kind",
356
+ "value": "ppl_causal",
357
+ "message": "Causal cert must use causal ppl metric.",
356
358
  },
357
359
  ),
358
360
  )
@@ -374,15 +374,15 @@ class InvarLockHealthChecker(HealthChecker):
374
374
  """Check adapter availability."""
375
375
  try:
376
376
  from invarlock.adapters import (
377
- HF_BERT_Adapter,
378
- HF_GPT2_Adapter,
379
- HF_LLaMA_Adapter,
377
+ HF_Causal_Adapter,
378
+ HF_MLM_Adapter,
379
+ HF_Seq2Seq_Adapter,
380
380
  )
381
381
 
382
382
  adapters = {
383
- "hf_gpt2": HF_GPT2_Adapter,
384
- "hf_llama": HF_LLaMA_Adapter,
385
- "hf_bert": HF_BERT_Adapter,
383
+ "hf_causal": HF_Causal_Adapter,
384
+ "hf_mlm": HF_MLM_Adapter,
385
+ "hf_seq2seq": HF_Seq2Seq_Adapter,
386
386
  }
387
387
 
388
388
  available_adapters = []
@@ -455,3 +455,111 @@ def create_resource_metrics(registry: MetricsRegistry) -> dict[str, Any]:
455
455
  "gpu_memory": registry.register_gauge("invarlock.resource.gpu_memory_percent"),
456
456
  "disk_usage": registry.register_gauge("invarlock.resource.disk_percent"),
457
457
  }
458
+
459
+
460
+ def reset_peak_memory_stats() -> None:
461
+ """Reset GPU peak memory stats when available."""
462
+ try:
463
+ import torch
464
+
465
+ if torch.cuda.is_available():
466
+ torch.cuda.reset_peak_memory_stats()
467
+ mps = getattr(torch, "mps", None)
468
+ if mps is not None and hasattr(mps, "reset_peak_memory_stats"):
469
+ mps.reset_peak_memory_stats()
470
+ except Exception:
471
+ pass
472
+
473
+
474
+ def capture_memory_snapshot(
475
+ phase: str, *, timestamp: float | None = None
476
+ ) -> dict[str, Any]:
477
+ """Capture a point-in-time memory snapshot for the current process."""
478
+ snapshot: dict[str, Any] = {"phase": str(phase)}
479
+ if timestamp is None:
480
+ timestamp = time.time()
481
+ snapshot["ts"] = float(timestamp)
482
+
483
+ try:
484
+ import os
485
+
486
+ import psutil
487
+
488
+ process = psutil.Process(os.getpid())
489
+ rss_mb = process.memory_info().rss / 1024 / 1024
490
+ snapshot["rss_mb"] = float(rss_mb)
491
+ except Exception:
492
+ pass
493
+
494
+ try:
495
+ import torch
496
+
497
+ if torch.cuda.is_available():
498
+ device_index = torch.cuda.current_device()
499
+ snapshot["gpu_device"] = f"cuda:{device_index}"
500
+ snapshot["gpu_mb"] = float(
501
+ torch.cuda.memory_allocated(device_index) / 1024 / 1024
502
+ )
503
+ snapshot["gpu_reserved_mb"] = float(
504
+ torch.cuda.memory_reserved(device_index) / 1024 / 1024
505
+ )
506
+ snapshot["gpu_peak_mb"] = float(
507
+ torch.cuda.max_memory_allocated(device_index) / 1024 / 1024
508
+ )
509
+ snapshot["gpu_peak_reserved_mb"] = float(
510
+ torch.cuda.max_memory_reserved(device_index) / 1024 / 1024
511
+ )
512
+ else:
513
+ mps = getattr(torch, "mps", None)
514
+ if mps is not None and hasattr(torch.backends, "mps"):
515
+ if torch.backends.mps.is_available():
516
+ snapshot["gpu_device"] = "mps"
517
+ if hasattr(mps, "current_allocated_memory"):
518
+ snapshot["gpu_mb"] = float(
519
+ mps.current_allocated_memory() / 1024 / 1024
520
+ )
521
+ if hasattr(mps, "driver_allocated_memory"):
522
+ snapshot["gpu_reserved_mb"] = float(
523
+ mps.driver_allocated_memory() / 1024 / 1024
524
+ )
525
+ except Exception:
526
+ pass
527
+
528
+ if len(snapshot) <= 2:
529
+ return {}
530
+ return snapshot
531
+
532
+
533
+ def summarize_memory_snapshots(
534
+ snapshots: list[dict[str, Any]],
535
+ ) -> dict[str, float]:
536
+ """Summarize memory snapshots into peak metrics."""
537
+
538
+ def _peak(key: str) -> float | None:
539
+ values: list[float] = []
540
+ for entry in snapshots:
541
+ if not isinstance(entry, dict):
542
+ continue
543
+ value = entry.get(key)
544
+ if isinstance(value, int | float):
545
+ values.append(float(value))
546
+ return max(values) if values else None
547
+
548
+ summary: dict[str, float] = {}
549
+ rss_peak = _peak("rss_mb")
550
+ if rss_peak is not None:
551
+ summary["memory_mb_peak"] = rss_peak
552
+
553
+ gpu_peak = _peak("gpu_peak_mb")
554
+ if gpu_peak is None:
555
+ gpu_peak = _peak("gpu_mb")
556
+ if gpu_peak is not None:
557
+ summary["gpu_memory_mb_peak"] = gpu_peak
558
+
559
+ gpu_reserved_peak = _peak("gpu_peak_reserved_mb")
560
+ if gpu_reserved_peak is None:
561
+ gpu_reserved_peak = _peak("gpu_reserved_mb")
562
+ if gpu_reserved_peak is not None:
563
+ summary["gpu_memory_reserved_mb_peak"] = gpu_reserved_peak
564
+
565
+ return summary