invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/_data/runtime/tiers.yaml +57 -30
  3. invarlock/adapters/__init__.py +11 -15
  4. invarlock/adapters/auto.py +35 -40
  5. invarlock/adapters/capabilities.py +2 -2
  6. invarlock/adapters/hf_causal.py +418 -0
  7. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  8. invarlock/adapters/hf_mixin.py +25 -4
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/calibration/spectral_null.py +15 -10
  12. invarlock/calibration/variance_ve.py +0 -2
  13. invarlock/cli/adapter_auto.py +31 -21
  14. invarlock/cli/app.py +73 -2
  15. invarlock/cli/commands/calibrate.py +6 -2
  16. invarlock/cli/commands/certify.py +651 -91
  17. invarlock/cli/commands/doctor.py +11 -11
  18. invarlock/cli/commands/explain_gates.py +57 -8
  19. invarlock/cli/commands/plugins.py +13 -9
  20. invarlock/cli/commands/report.py +233 -69
  21. invarlock/cli/commands/run.py +1066 -244
  22. invarlock/cli/commands/verify.py +154 -15
  23. invarlock/cli/config.py +22 -6
  24. invarlock/cli/doctor_helpers.py +4 -5
  25. invarlock/cli/output.py +193 -0
  26. invarlock/cli/provenance.py +1 -1
  27. invarlock/core/api.py +45 -5
  28. invarlock/core/auto_tuning.py +65 -20
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/contracts.py +7 -1
  31. invarlock/core/registry.py +11 -13
  32. invarlock/core/runner.py +425 -75
  33. invarlock/edits/quant_rtn.py +65 -37
  34. invarlock/eval/bench.py +3 -16
  35. invarlock/eval/data.py +82 -51
  36. invarlock/eval/metrics.py +63 -2
  37. invarlock/eval/primary_metric.py +23 -0
  38. invarlock/eval/tail_stats.py +230 -0
  39. invarlock/eval/tasks/__init__.py +12 -0
  40. invarlock/eval/tasks/classification.py +48 -0
  41. invarlock/eval/tasks/qa.py +36 -0
  42. invarlock/eval/tasks/text_generation.py +102 -0
  43. invarlock/guards/_estimators.py +154 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/policies.py +16 -6
  46. invarlock/guards/rmt.py +627 -546
  47. invarlock/guards/spectral.py +348 -110
  48. invarlock/guards/tier_config.py +32 -30
  49. invarlock/guards/variance.py +7 -31
  50. invarlock/guards_ref/rmt_ref.py +23 -23
  51. invarlock/model_profile.py +90 -42
  52. invarlock/observability/health.py +6 -6
  53. invarlock/observability/metrics.py +108 -0
  54. invarlock/reporting/certificate.py +384 -55
  55. invarlock/reporting/certificate_schema.py +3 -2
  56. invarlock/reporting/dataset_hashing.py +15 -2
  57. invarlock/reporting/guards_analysis.py +350 -277
  58. invarlock/reporting/html.py +55 -5
  59. invarlock/reporting/normalizer.py +13 -0
  60. invarlock/reporting/policy_utils.py +38 -36
  61. invarlock/reporting/primary_metric_utils.py +71 -17
  62. invarlock/reporting/render.py +852 -431
  63. invarlock/reporting/report.py +40 -4
  64. invarlock/reporting/report_types.py +11 -3
  65. invarlock/reporting/telemetry.py +86 -0
  66. invarlock/reporting/validate.py +1 -18
  67. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
  68. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
  69. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  70. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  71. invarlock/adapters/hf_gpt2.py +0 -404
  72. invarlock/adapters/hf_llama.py +0 -487
  73. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  74. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -31,7 +31,7 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
31
31
  "deadband": 0.02,
32
32
  "min_abs_adjust": 0.012,
33
33
  "max_scale_step": 0.03,
34
- "min_effect_lognll": 0.0009,
34
+ "min_effect_lognll": 0.0,
35
35
  "predictive_one_sided": True,
36
36
  "topk_backstop": 1,
37
37
  "max_adjusted_modules": 1,
@@ -43,10 +43,10 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
43
43
  "max_caps": 5,
44
44
  "max_spectral_norm": None,
45
45
  "family_caps": {
46
- "ffn": 2.5,
47
- "attn": 2.8,
48
- "embed": 3.0,
49
- "other": 3.0,
46
+ "ffn": 3.849,
47
+ "attn": 3.018,
48
+ "embed": 1.05,
49
+ "other": 0.0,
50
50
  },
51
51
  "multiple_testing": {
52
52
  "method": "bh",
@@ -57,12 +57,12 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
57
57
  "rmt_guard": {
58
58
  "deadband": 0.10,
59
59
  "margin": 1.5,
60
- "epsilon_default": 0.10,
60
+ "epsilon_default": 0.01,
61
61
  "epsilon_by_family": {
62
- "ffn": 0.10,
63
- "attn": 0.08,
64
- "embed": 0.12,
65
- "other": 0.12,
62
+ "ffn": 0.01,
63
+ "attn": 0.01,
64
+ "embed": 0.01,
65
+ "other": 0.01,
66
66
  },
67
67
  },
68
68
  },
@@ -71,7 +71,7 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
71
71
  "deadband": 0.03,
72
72
  "min_abs_adjust": 0.02,
73
73
  "max_scale_step": 0.015,
74
- "min_effect_lognll": 0.0018,
74
+ "min_effect_lognll": 0.016,
75
75
  "predictive_one_sided": False,
76
76
  "topk_backstop": 0,
77
77
  "max_adjusted_modules": 0,
@@ -81,61 +81,63 @@ _FALLBACK_CONFIG: dict[str, dict[str, Any]] = {
81
81
  "deadband": 0.05,
82
82
  "scope": "ffn",
83
83
  "max_caps": 3,
84
+ "max_spectral_norm": None,
84
85
  "family_caps": {
85
- "ffn": 2.3,
86
+ "ffn": 3.849,
86
87
  "attn": 2.6,
87
88
  "embed": 2.8,
88
89
  "other": 2.8,
89
90
  },
90
91
  "multiple_testing": {
91
92
  "method": "bonferroni",
92
- "alpha": 0.02,
93
+ "alpha": 0.000625,
93
94
  "m": 4,
94
95
  },
95
96
  },
96
97
  "rmt_guard": {
97
98
  "deadband": 0.05,
98
99
  "margin": 1.3,
99
- "epsilon_default": 0.06,
100
+ "epsilon_default": 0.01,
100
101
  "epsilon_by_family": {
101
- "ffn": 0.06,
102
- "attn": 0.05,
103
- "embed": 0.07,
104
- "other": 0.07,
102
+ "ffn": 0.01,
103
+ "attn": 0.01,
104
+ "embed": 0.01,
105
+ "other": 0.01,
105
106
  },
106
107
  },
107
108
  },
108
109
  "aggressive": {
109
110
  "variance_guard": {
110
111
  "deadband": 0.12,
111
- "min_effect_lognll": 0.0005,
112
+ "min_effect_lognll": 0.033,
112
113
  },
113
114
  "spectral_guard": {
114
115
  "sigma_quantile": 0.98,
115
116
  "deadband": 0.15,
116
- "scope": "all",
117
+ "scope": "ffn",
117
118
  "max_caps": 8,
119
+ "max_spectral_norm": None,
118
120
  "family_caps": {
119
- "ffn": 3.0,
120
- "attn": 3.2,
121
- "embed": 3.5,
121
+ "ffn": 3.849,
122
+ "attn": 3.5,
123
+ "embed": 2.5,
122
124
  "other": 3.5,
123
125
  },
124
126
  "multiple_testing": {
125
127
  "method": "bh",
126
- "alpha": 0.1,
128
+ "alpha": 0.00078125,
127
129
  "m": 4,
128
130
  },
129
131
  },
130
132
  "rmt_guard": {
131
133
  "deadband": 0.15,
132
134
  "margin": 1.8,
133
- "epsilon_default": 0.15,
135
+ "epsilon_default": 0.01,
134
136
  "epsilon_by_family": {
135
- "ffn": 0.15,
136
- "attn": 0.15,
137
- "embed": 0.15,
138
- "other": 0.15,
137
+ "ffn": 0.01,
138
+ "attn": 0.01,
139
+ "embed": 0.01,
140
+ "other": 0.01,
139
141
  },
140
142
  },
141
143
  },
@@ -257,7 +259,7 @@ def get_rmt_epsilon(tier: TierName = "balanced") -> dict[str, float]:
257
259
  def get_variance_min_effect(tier: TierName = "balanced") -> float:
258
260
  """Get VE min_effect_lognll for a tier."""
259
261
  config = get_tier_guard_config(tier, "variance_guard")
260
- return config.get("min_effect_lognll", 0.0009)
262
+ return config.get("min_effect_lognll", 0.0)
261
263
 
262
264
 
263
265
  def check_drift(
@@ -36,7 +36,7 @@ from ._contracts import guard_assert
36
36
  # Import the policy type and Guard interface
37
37
  from .policies import VariancePolicyDict
38
38
 
39
- __all__ = ["equalise_residual_variance", "equalise_branch_variance", "VarianceGuard"]
39
+ __all__ = ["equalise_residual_variance", "VarianceGuard"]
40
40
 
41
41
 
42
42
  def _safe_mean(
@@ -121,7 +121,7 @@ def _iter_transformer_layers(model: nn.Module):
121
121
  # GPT-2 style
122
122
  yield from model.transformer.h
123
123
  elif hasattr(model, "model") and hasattr(model.model, "layers"):
124
- # LLaMA style
124
+ # RoPE decoder style
125
125
  yield from model.model.layers
126
126
  elif hasattr(model, "encoder") and hasattr(model.encoder, "layer"):
127
127
  # BERT style
@@ -214,7 +214,7 @@ def equalise_residual_variance(
214
214
  hooks[name] = attn_proj.register_forward_hook(_branch_hook(name))
215
215
 
216
216
  if hasattr(blk, "mlp"):
217
- # Check for c_proj (GPT-2) or down_proj (LLaMA) or fc2 (generic)
217
+ # Check for c_proj (GPT-2) or down_proj (RoPE decoder) or fc2 (generic)
218
218
  mlp_proj = (
219
219
  getattr(blk.mlp, "c_proj", None)
220
220
  or getattr(blk.mlp, "down_proj", None)
@@ -348,33 +348,6 @@ def equalise_residual_variance(
348
348
  return applied_scales
349
349
 
350
350
 
351
- def equalise_branch_variance(
352
- model: nn.Module,
353
- dataloader,
354
- windows: int = 32,
355
- tol: float = 0.02,
356
- scale_bias: bool = True,
357
- seed: int = 42,
358
- device: str | None = None,
359
- allow_empty: bool = False,
360
- ) -> dict[str, float]:
361
- """
362
- Legacy alias for equalise_residual_variance.
363
-
364
- Maintained for backward compatibility.
365
- """
366
- return equalise_residual_variance(
367
- model=model,
368
- dataloader=dataloader,
369
- windows=windows,
370
- tol=tol,
371
- scale_bias=scale_bias,
372
- seed=seed,
373
- device=device,
374
- allow_empty=allow_empty,
375
- )
376
-
377
-
378
351
  def _predictive_gate_outcome(
379
352
  mean_delta: float,
380
353
  delta_ci: tuple[float, float] | None,
@@ -1328,7 +1301,10 @@ class VarianceGuard(Guard):
1328
1301
  if not filtered_scales and topk > 0 and best_candidate:
1329
1302
  name, scale = best_candidate
1330
1303
  deadband = float(self._policy.get("deadband", 0.0) or 0.0)
1331
- threshold = max(deadband * 0.5, min_abs)
1304
+ # Backstop should remain below the main min_abs filter; clamp if deadband is large.
1305
+ threshold = max(deadband * 0.5, min_abs * 0.5)
1306
+ if min_abs > 0 and threshold >= min_abs:
1307
+ threshold = min_abs * 0.5
1332
1308
  if best_delta >= threshold:
1333
1309
  if max_step > 0.0:
1334
1310
  limited_delta = min(best_delta, max_step)
@@ -1,37 +1,37 @@
1
1
  from __future__ import annotations
2
2
 
3
- import math
4
3
  from collections.abc import Mapping
5
4
 
6
5
 
7
6
  def rmt_decide(
8
- bare_by_family: Mapping[str, int],
9
- guarded_by_family: Mapping[str, int],
7
+ baseline_by_family: Mapping[str, float],
8
+ current_by_family: Mapping[str, float],
10
9
  epsilon_by_family: Mapping[str, float],
11
10
  ) -> dict[str, object]:
12
11
  """
13
- Reference epsilon-rule decision for RMT.
12
+ Reference epsilon-rule decision for RMT activation edge-risk drift.
14
13
 
15
- Allowed excess A_f = ceil(epsilon_f * max(1, b_f)).
16
- PASS iff for all families Δ_f <= A_f and sum Δ_f <= sum A_f.
14
+ For each family with baseline edge-risk > 0:
15
+ PASS iff current_edge <= (1 + epsilon) * baseline_edge
17
16
  """
18
- families = set(bare_by_family) | set(guarded_by_family) | set(epsilon_by_family)
19
- delta_by_family: dict[str, int] = {}
20
- allowed_by_family: dict[str, int] = {}
21
- sum_delta = 0
22
- sum_allowed = 0
23
- for f in families:
24
- b = int(bare_by_family.get(f, 0) or 0)
25
- g = int(guarded_by_family.get(f, 0) or 0)
26
- eps = float(epsilon_by_family.get(f, 0.0) or 0.0)
27
- d = g - b
28
- a = int(math.ceil(eps * max(1, b)))
29
- delta_by_family[f] = d
30
- allowed_by_family[f] = a
31
- sum_delta += d
32
- sum_allowed += a
33
- ok = all(delta_by_family[f] <= allowed_by_family[f] for f in families) and (
34
- sum_delta <= sum_allowed
17
+ families = set(baseline_by_family) | set(current_by_family) | set(epsilon_by_family)
18
+ delta_by_family: dict[str, float] = {}
19
+ allowed_by_family: dict[str, float] = {}
20
+ for family in families:
21
+ baseline = float(baseline_by_family.get(family, 0.0) or 0.0)
22
+ current = float(current_by_family.get(family, 0.0) or 0.0)
23
+ if baseline <= 0.0:
24
+ continue
25
+ epsilon_val = float(epsilon_by_family.get(family, 0.0) or 0.0)
26
+ allowed = (1.0 + epsilon_val) * baseline
27
+ allowed_by_family[family] = allowed
28
+ delta_by_family[family] = (
29
+ (current / baseline) - 1.0 if baseline > 0 else float("inf")
30
+ )
31
+
32
+ ok = all(
33
+ float(current_by_family.get(family, 0.0) or 0.0) <= allowed_by_family[family]
34
+ for family in allowed_by_family
35
35
  )
36
36
  return {
37
37
  "pass": ok,
@@ -5,12 +5,14 @@ from collections.abc import Callable
5
5
  from dataclasses import dataclass, field
6
6
  from typing import Any
7
7
 
8
+ AutoTokenizer: Any | None = None
9
+ GPT2Tokenizer: Any | None = None
10
+
8
11
  try:
9
- from transformers import AutoTokenizer, GPT2Tokenizer
12
+ from transformers import AutoTokenizer as _AutoTokenizer
13
+ from transformers import GPT2Tokenizer as _GPT2Tokenizer
10
14
  from transformers.tokenization_utils_base import PreTrainedTokenizerBase
11
15
  except Exception: # pragma: no cover - exercised only when transformers is absent
12
- AutoTokenizer = None # type: ignore[assignment]
13
- GPT2Tokenizer = None # type: ignore[assignment]
14
16
 
15
17
  class PreTrainedTokenizerBase: # type: ignore[no-redef]
16
18
  """Lightweight stub used when transformers is not installed."""
@@ -22,6 +24,11 @@ except Exception: # pragma: no cover - exercised only when transformers is abse
22
24
  )
23
25
 
24
26
 
27
+ else: # pragma: no cover - transformers optional
28
+ AutoTokenizer = _AutoTokenizer
29
+ GPT2Tokenizer = _GPT2Tokenizer
30
+
31
+
25
32
  TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
26
33
 
27
34
 
@@ -99,7 +106,7 @@ def _gpt2_selectors() -> dict[str, list[str]]:
99
106
  }
100
107
 
101
108
 
102
- def _llama_selectors() -> dict[str, list[str]]:
109
+ def _rope_decoder_selectors() -> dict[str, list[str]]:
103
110
  return {
104
111
  "attention": [
105
112
  "self_attn.q_proj",
@@ -184,23 +191,34 @@ def _make_gpt2_tokenizer(model_id: str):
184
191
  return factory
185
192
 
186
193
 
187
- def _make_llama_tokenizer(model_id: str):
194
+ def _make_causal_auto_tokenizer(model_id: str):
188
195
  def factory() -> tuple[PreTrainedTokenizerBase, str]:
189
196
  if AutoTokenizer is None and GPT2Tokenizer is None:
190
197
  raise RuntimeError(
191
- "LLaMA-style tokenizers require the 'transformers' extra. "
198
+ "Causal tokenizers require the 'transformers' extra. "
192
199
  "Install it with: pip install 'invarlock[adapters]'."
193
200
  )
194
201
  # Try offline-first to respect InvarLock network guard; fall back to a
195
202
  # local GPT-2 tokenizer if the model assets are not cached or network
196
203
  # access is denied.
197
- try:
198
- tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
199
- except Exception:
204
+ tokenizer = None
205
+ if AutoTokenizer is not None:
200
206
  try:
201
- tokenizer = AutoTokenizer.from_pretrained(model_id)
207
+ tokenizer = AutoTokenizer.from_pretrained(
208
+ model_id, local_files_only=True
209
+ )
202
210
  except Exception:
203
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
211
+ try:
212
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
213
+ except Exception:
214
+ tokenizer = None
215
+ if tokenizer is None:
216
+ if GPT2Tokenizer is None:
217
+ raise RuntimeError(
218
+ "Tokenization requires the 'transformers' extra. "
219
+ "Install it with: pip install 'invarlock[adapters]'."
220
+ )
221
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
204
222
  # Ensure padding/bos tokens are configured so downstream encoding
205
223
  # yields stable non-zero ids and a valid attention mask regardless of
206
224
  # environment defaults or tokenizer variants.
@@ -209,7 +227,7 @@ def _make_llama_tokenizer(model_id: str):
209
227
  eos_token = getattr(tokenizer, "eos_token", None)
210
228
  if eos_token is not None:
211
229
  tokenizer.pad_token = eos_token
212
- # Some LLaMA tokenizers default to not adding a BOS token on encode;
230
+ # Some causal tokenizers default to not adding a BOS token on encode;
213
231
  # enable it to guarantee at least one non-pad, non-zero token id.
214
232
  if hasattr(tokenizer, "add_bos_token"):
215
233
  try:
@@ -234,15 +252,24 @@ def _make_unknown_tokenizer(model_id: str):
234
252
  "Install it with: pip install 'invarlock[adapters]'."
235
253
  )
236
254
  # Unknown families: try local-only first, then remote, then degrade to GPT-2
237
- try:
238
- tokenizer: PreTrainedTokenizerBase = AutoTokenizer.from_pretrained(
239
- model_id, local_files_only=True
240
- )
241
- except Exception:
255
+ tokenizer = None
256
+ if AutoTokenizer is not None:
242
257
  try:
243
- tokenizer = AutoTokenizer.from_pretrained(model_id)
258
+ tokenizer = AutoTokenizer.from_pretrained(
259
+ model_id, local_files_only=True
260
+ )
244
261
  except Exception:
245
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
262
+ try:
263
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
264
+ except Exception:
265
+ tokenizer = None
266
+ if tokenizer is None:
267
+ if GPT2Tokenizer is None:
268
+ raise RuntimeError(
269
+ "Text tokenization requires the 'transformers' extra. "
270
+ "Install it with: pip install 'invarlock[adapters]'."
271
+ )
272
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
246
273
  if getattr(tokenizer, "pad_token", None) is None:
247
274
  eos_token = getattr(tokenizer, "eos_token", None)
248
275
  if eos_token is not None:
@@ -262,7 +289,7 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
262
289
  model_lower = (model_id or "").lower()
263
290
 
264
291
  if any(
265
- keyword in adapter_lower for keyword in ("bert", "roberta", "deberta")
292
+ keyword in adapter_lower for keyword in ("hf_mlm", "bert", "roberta", "deberta")
266
293
  ) or any(keyword in model_lower for keyword in ("bert", "roberta", "deberta")):
267
294
  return ModelProfile(
268
295
  family="bert",
@@ -275,57 +302,78 @@ def detect_model_profile(model_id: str, adapter: str | None = None) -> ModelProf
275
302
  cert_lints=(
276
303
  {
277
304
  "type": "equals",
278
- "path": "metrics.loss_type",
279
- "value": "mlm",
280
- "message": "BERT cert must record MLM loss type.",
305
+ "path": "primary_metric.kind",
306
+ "value": "ppl_mlm",
307
+ "message": "BERT cert must use MLM metric.",
281
308
  },
282
309
  {
283
310
  "type": "gte",
284
- "path": "metrics.masked_tokens_total",
311
+ "path": "telemetry.masked_tokens_total",
285
312
  "value": "1",
286
313
  "message": "BERT cert must report masked tokens.",
287
314
  },
288
315
  ),
289
316
  )
290
317
 
291
- if any(keyword in adapter_lower for keyword in ("llama", "mistral", "qwen")) or any(
292
- keyword in model_lower for keyword in ("llama", "mistral", "qwen")
318
+ if any(keyword in adapter_lower for keyword in ("hf_seq2seq", "t5", "bart")) or any(
319
+ keyword in model_lower for keyword in ("t5", "bart")
293
320
  ):
294
321
  return ModelProfile(
295
- family="llama",
322
+ family="seq2seq",
323
+ default_loss="seq2seq",
324
+ make_tokenizer=_make_unknown_tokenizer(model_id),
325
+ default_metric="ppl_seq2seq",
326
+ default_provider="wikitext2",
327
+ module_selectors=_unknown_selectors(),
328
+ invariants=(),
329
+ cert_lints=(),
330
+ )
331
+
332
+ if any(
333
+ keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
334
+ ) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
335
+ return ModelProfile(
336
+ family="gpt2",
296
337
  default_loss="causal",
297
- make_tokenizer=_make_llama_tokenizer(model_id),
338
+ make_tokenizer=_make_gpt2_tokenizer(model_id),
298
339
  default_metric="ppl_causal",
299
340
  default_provider="wikitext2",
300
- module_selectors=_llama_selectors(),
301
- invariants=("rope_rotary_embedding",),
341
+ module_selectors=_gpt2_selectors(),
342
+ invariants=("causal_masking",),
302
343
  cert_lints=(
303
344
  {
304
345
  "type": "equals",
305
- "path": "metrics.loss_type",
306
- "value": "causal",
307
- "message": "LLaMA cert should report causal loss.",
346
+ "path": "primary_metric.kind",
347
+ "value": "ppl_causal",
348
+ "message": "GPT-style cert must use causal ppl metric.",
308
349
  },
309
350
  ),
310
351
  )
311
352
 
312
353
  if any(
313
- keyword in adapter_lower for keyword in ("gpt", "neox", "opt", "phi")
314
- ) or any(keyword in model_lower for keyword in ("gpt", "neox", "opt", "phi")):
354
+ keyword in adapter_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
355
+ ) or any(
356
+ keyword in model_lower for keyword in ("mistral", "mixtral", "qwen", "yi")
357
+ ):
358
+ family = "causal"
359
+ for keyword in ("mixtral", "mistral", "qwen", "yi"):
360
+ if keyword in adapter_lower or keyword in model_lower:
361
+ family = keyword
362
+ break
315
363
  return ModelProfile(
316
- family="gpt2",
364
+ family=family,
317
365
  default_loss="causal",
318
- make_tokenizer=_make_gpt2_tokenizer(model_id),
366
+ make_tokenizer=_make_causal_auto_tokenizer(model_id),
319
367
  default_metric="ppl_causal",
320
368
  default_provider="wikitext2",
321
- module_selectors=_gpt2_selectors(),
322
- invariants=("causal_masking",),
369
+ module_selectors=_rope_decoder_selectors(),
370
+ invariants=("rope_rotary_embedding",),
323
371
  cert_lints=(
324
372
  {
325
373
  "type": "equals",
326
- "path": "metrics.loss_type",
327
- "value": "causal",
328
- "message": "GPT-style cert should record causal loss.",
374
+ "path": "primary_metric.kind",
375
+ "value": "ppl_causal",
376
+ "message": "Causal cert must use causal ppl metric.",
329
377
  },
330
378
  ),
331
379
  )
@@ -374,15 +374,15 @@ class InvarLockHealthChecker(HealthChecker):
374
374
  """Check adapter availability."""
375
375
  try:
376
376
  from invarlock.adapters import (
377
- HF_BERT_Adapter,
378
- HF_GPT2_Adapter,
379
- HF_LLaMA_Adapter,
377
+ HF_Causal_Adapter,
378
+ HF_MLM_Adapter,
379
+ HF_Seq2Seq_Adapter,
380
380
  )
381
381
 
382
382
  adapters = {
383
- "hf_gpt2": HF_GPT2_Adapter,
384
- "hf_llama": HF_LLaMA_Adapter,
385
- "hf_bert": HF_BERT_Adapter,
383
+ "hf_causal": HF_Causal_Adapter,
384
+ "hf_mlm": HF_MLM_Adapter,
385
+ "hf_seq2seq": HF_Seq2Seq_Adapter,
386
386
  }
387
387
 
388
388
  available_adapters = []
@@ -455,3 +455,111 @@ def create_resource_metrics(registry: MetricsRegistry) -> dict[str, Any]:
455
455
  "gpu_memory": registry.register_gauge("invarlock.resource.gpu_memory_percent"),
456
456
  "disk_usage": registry.register_gauge("invarlock.resource.disk_percent"),
457
457
  }
458
+
459
+
460
+ def reset_peak_memory_stats() -> None:
461
+ """Reset GPU peak memory stats when available."""
462
+ try:
463
+ import torch
464
+
465
+ if torch.cuda.is_available():
466
+ torch.cuda.reset_peak_memory_stats()
467
+ mps = getattr(torch, "mps", None)
468
+ if mps is not None and hasattr(mps, "reset_peak_memory_stats"):
469
+ mps.reset_peak_memory_stats()
470
+ except Exception:
471
+ pass
472
+
473
+
474
+ def capture_memory_snapshot(
475
+ phase: str, *, timestamp: float | None = None
476
+ ) -> dict[str, Any]:
477
+ """Capture a point-in-time memory snapshot for the current process."""
478
+ snapshot: dict[str, Any] = {"phase": str(phase)}
479
+ if timestamp is None:
480
+ timestamp = time.time()
481
+ snapshot["ts"] = float(timestamp)
482
+
483
+ try:
484
+ import os
485
+
486
+ import psutil
487
+
488
+ process = psutil.Process(os.getpid())
489
+ rss_mb = process.memory_info().rss / 1024 / 1024
490
+ snapshot["rss_mb"] = float(rss_mb)
491
+ except Exception:
492
+ pass
493
+
494
+ try:
495
+ import torch
496
+
497
+ if torch.cuda.is_available():
498
+ device_index = torch.cuda.current_device()
499
+ snapshot["gpu_device"] = f"cuda:{device_index}"
500
+ snapshot["gpu_mb"] = float(
501
+ torch.cuda.memory_allocated(device_index) / 1024 / 1024
502
+ )
503
+ snapshot["gpu_reserved_mb"] = float(
504
+ torch.cuda.memory_reserved(device_index) / 1024 / 1024
505
+ )
506
+ snapshot["gpu_peak_mb"] = float(
507
+ torch.cuda.max_memory_allocated(device_index) / 1024 / 1024
508
+ )
509
+ snapshot["gpu_peak_reserved_mb"] = float(
510
+ torch.cuda.max_memory_reserved(device_index) / 1024 / 1024
511
+ )
512
+ else:
513
+ mps = getattr(torch, "mps", None)
514
+ if mps is not None and hasattr(torch.backends, "mps"):
515
+ if torch.backends.mps.is_available():
516
+ snapshot["gpu_device"] = "mps"
517
+ if hasattr(mps, "current_allocated_memory"):
518
+ snapshot["gpu_mb"] = float(
519
+ mps.current_allocated_memory() / 1024 / 1024
520
+ )
521
+ if hasattr(mps, "driver_allocated_memory"):
522
+ snapshot["gpu_reserved_mb"] = float(
523
+ mps.driver_allocated_memory() / 1024 / 1024
524
+ )
525
+ except Exception:
526
+ pass
527
+
528
+ if len(snapshot) <= 2:
529
+ return {}
530
+ return snapshot
531
+
532
+
533
+ def summarize_memory_snapshots(
534
+ snapshots: list[dict[str, Any]],
535
+ ) -> dict[str, float]:
536
+ """Summarize memory snapshots into peak metrics."""
537
+
538
+ def _peak(key: str) -> float | None:
539
+ values: list[float] = []
540
+ for entry in snapshots:
541
+ if not isinstance(entry, dict):
542
+ continue
543
+ value = entry.get(key)
544
+ if isinstance(value, int | float):
545
+ values.append(float(value))
546
+ return max(values) if values else None
547
+
548
+ summary: dict[str, float] = {}
549
+ rss_peak = _peak("rss_mb")
550
+ if rss_peak is not None:
551
+ summary["memory_mb_peak"] = rss_peak
552
+
553
+ gpu_peak = _peak("gpu_peak_mb")
554
+ if gpu_peak is None:
555
+ gpu_peak = _peak("gpu_mb")
556
+ if gpu_peak is not None:
557
+ summary["gpu_memory_mb_peak"] = gpu_peak
558
+
559
+ gpu_reserved_peak = _peak("gpu_peak_reserved_mb")
560
+ if gpu_reserved_peak is None:
561
+ gpu_reserved_peak = _peak("gpu_reserved_mb")
562
+ if gpu_reserved_peak is not None:
563
+ summary["gpu_memory_reserved_mb_peak"] = gpu_reserved_peak
564
+
565
+ return summary