invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +35 -40
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_mixin.py +25 -4
  8. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  9. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  10. invarlock/cli/adapter_auto.py +31 -21
  11. invarlock/cli/app.py +73 -2
  12. invarlock/cli/commands/certify.py +600 -59
  13. invarlock/cli/commands/doctor.py +8 -10
  14. invarlock/cli/commands/plugins.py +13 -9
  15. invarlock/cli/commands/report.py +233 -69
  16. invarlock/cli/commands/run.py +907 -183
  17. invarlock/cli/commands/verify.py +76 -11
  18. invarlock/cli/config.py +1 -1
  19. invarlock/cli/doctor_helpers.py +4 -5
  20. invarlock/cli/output.py +193 -0
  21. invarlock/cli/provenance.py +1 -1
  22. invarlock/core/bootstrap.py +1 -1
  23. invarlock/core/registry.py +9 -11
  24. invarlock/core/runner.py +111 -25
  25. invarlock/edits/quant_rtn.py +65 -37
  26. invarlock/eval/bench.py +3 -3
  27. invarlock/eval/data.py +68 -23
  28. invarlock/eval/metrics.py +59 -1
  29. invarlock/eval/tasks/__init__.py +12 -0
  30. invarlock/eval/tasks/classification.py +48 -0
  31. invarlock/eval/tasks/qa.py +36 -0
  32. invarlock/eval/tasks/text_generation.py +102 -0
  33. invarlock/guards/invariants.py +19 -10
  34. invarlock/guards/rmt.py +2 -2
  35. invarlock/guards/variance.py +2 -2
  36. invarlock/model_profile.py +48 -27
  37. invarlock/observability/health.py +6 -6
  38. invarlock/observability/metrics.py +108 -0
  39. invarlock/reporting/certificate.py +159 -9
  40. invarlock/reporting/certificate_schema.py +1 -1
  41. invarlock/reporting/guards_analysis.py +154 -4
  42. invarlock/reporting/html.py +55 -5
  43. invarlock/reporting/normalizer.py +7 -0
  44. invarlock/reporting/render.py +791 -431
  45. invarlock/reporting/report.py +39 -3
  46. invarlock/reporting/report_types.py +6 -1
  47. invarlock/reporting/telemetry.py +86 -0
  48. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
  49. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
  50. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  51. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  52. invarlock/adapters/hf_gpt2.py +0 -404
  53. invarlock/adapters/hf_llama.py +0 -487
  54. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  55. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,8 @@
1
1
  """
2
- HuggingFace BERT Model Adapter
2
+ HuggingFace masked LM adapter.
3
3
  ==============================
4
4
 
5
- ModelAdapter implementation for HuggingFace BERT architecture models.
6
-
7
- This adapter provides BERT-specific integration including:
8
- - Support for BERT, RoBERTa, DistilBERT, and other BERT variants
9
- - Proper handling of bidirectional attention layers
10
- - Support for classification heads and pooling layers
11
- - Token type embeddings and position embeddings handling
12
- - Proper device-aware state serialization
5
+ ModelAdapter implementation for HuggingFace masked language models.
13
6
  """
14
7
 
15
8
  from typing import Any
@@ -27,7 +20,7 @@ TensorType = torch.Tensor
27
20
  ModuleType = nn.Module
28
21
 
29
22
 
30
- class HF_BERT_Adapter(HFAdapterMixin, ModelAdapter):
23
+ class HF_MLM_Adapter(HFAdapterMixin, ModelAdapter):
31
24
  """
32
25
  HuggingFace-specific ModelAdapter implementation for BERT models.
33
26
 
@@ -39,7 +32,7 @@ class HF_BERT_Adapter(HFAdapterMixin, ModelAdapter):
39
32
  - Device-aware state serialization
40
33
  """
41
34
 
42
- name = "hf_bert"
35
+ name = "hf_mlm"
43
36
 
44
37
  def load_model(
45
38
  self, model_id: str, device: str = "auto", **kwargs: Any
@@ -1,11 +1,11 @@
1
1
  """
2
- HuggingFace T5 Model Adapter
3
- ============================
2
+ HuggingFace encoder-decoder adapter.
3
+ ===================================
4
4
 
5
- ModelAdapter implementation for HuggingFace T5 encoder-decoder models.
5
+ ModelAdapter implementation for HuggingFace encoder-decoder (seq2seq) models.
6
6
 
7
- Loads AutoModelForSeq2SeqLM (e.g., t5-small/base/large) and exposes a minimal
8
- describe() sufficient for guard policies and reporting.
7
+ Loads AutoModelForSeq2SeqLM and exposes a minimal describe() sufficient for
8
+ guard policies and reporting.
9
9
  """
10
10
 
11
11
  from __future__ import annotations
@@ -25,10 +25,10 @@ TensorType = torch.Tensor
25
25
  ModuleType = nn.Module
26
26
 
27
27
 
28
- class HF_T5_Adapter(HFAdapterMixin, ModelAdapter):
29
- """HuggingFace T5 adapter using AutoModelForSeq2SeqLM."""
28
+ class HF_Seq2Seq_Adapter(HFAdapterMixin, ModelAdapter):
29
+ """HuggingFace encoder-decoder adapter using AutoModelForSeq2SeqLM."""
30
30
 
31
- name = "hf_t5"
31
+ name = "hf_seq2seq"
32
32
 
33
33
  def load_model( # type: ignore[override]
34
34
  self, model_id: str, device: str = "auto", **kwargs: Any
@@ -136,4 +136,4 @@ class HF_T5_Adapter(HFAdapterMixin, ModelAdapter):
136
136
  return super().restore(model, blob)
137
137
 
138
138
 
139
- __all__ = ["HF_T5_Adapter"]
139
+ __all__ = ["HF_Seq2Seq_Adapter"]
@@ -2,7 +2,8 @@
2
2
  Auto adapter resolution utilities.
3
3
 
4
4
  These helpers map a model identifier (HF directory or Hub ID) to a
5
- concrete built-in adapter name (hf_gpt2, hf_llama, hf_bert) without
5
+ concrete built-in adapter name (hf_causal, hf_mlm, hf_seq2seq, hf_causal_onnx)
6
+ without
6
7
  adding a hard dependency on Transformers.
7
8
  """
8
9
 
@@ -58,15 +59,15 @@ def _detect_quant_family_from_cfg(cfg: dict[str, Any]) -> str | None:
58
59
 
59
60
 
60
61
  def resolve_auto_adapter(
61
- model_id: str | os.PathLike[str], default: str = "hf_gpt2"
62
+ model_id: str | os.PathLike[str], default: str = "hf_causal"
62
63
  ) -> str:
63
64
  """Resolve an appropriate built-in adapter name for a model.
64
65
 
65
66
  Heuristics:
66
67
  - Prefer local config.json (no network). Inspect `model_type` and
67
- `architectures` to classify LLaMA/Mistral vs BERT vs GPT-like.
68
+ `architectures` to classify causal vs masked-LM vs seq2seq.
68
69
  - Fallback to simple name heuristics on the model_id string.
69
- - Default to `hf_gpt2` when unsure.
70
+ - Default to `hf_causal` when unsure.
70
71
  """
71
72
  cfg = _read_local_hf_config(model_id)
72
73
  model_id_str = str(model_id)
@@ -77,32 +78,41 @@ def resolve_auto_adapter(
77
78
  if fam:
78
79
  return fam
79
80
  mt = str(c.get("model_type", "")).lower()
81
+ if bool(c.get("is_encoder_decoder", False)):
82
+ return "hf_seq2seq"
80
83
  archs = [str(a) for a in c.get("architectures", []) if isinstance(a, str)]
81
84
  arch_blob = " ".join(archs)
82
- if (
83
- mt in {"llama", "mistral", "qwen", "yi"}
84
- or "Llama" in arch_blob
85
- or "Mistral" in arch_blob
86
- ):
87
- return "hf_llama"
85
+ if "ConditionalGeneration" in arch_blob or "Seq2SeqLM" in arch_blob:
86
+ return "hf_seq2seq"
88
87
  # Treat masked-LM families as BERT-like
89
88
  if (
90
89
  mt in {"bert", "roberta", "distilbert", "albert", "deberta", "deberta-v2"}
91
90
  or "MaskedLM" in arch_blob
92
91
  ):
93
- return "hf_bert"
94
- # Generic causal LM
95
- if "CausalLM" in arch_blob or mt in {
92
+ return "hf_mlm"
93
+ # Causal LM families (best-effort; structural validation happens in the adapter).
94
+ if "CausalLM" in arch_blob or "ForCausalLM" in arch_blob:
95
+ return "hf_causal"
96
+ if mt in {
97
+ "mistral",
98
+ "mixtral",
99
+ "qwen",
100
+ "qwen2",
101
+ "qwen2_moe",
102
+ "yi",
96
103
  "gpt2",
97
104
  "gpt_neox",
98
105
  "opt",
99
106
  "gptj",
100
- "gptj8bit",
107
+ "phi",
108
+ "falcon",
109
+ "glm",
110
+ "deepseek",
101
111
  }:
102
- return "hf_gpt2"
112
+ return "hf_causal"
103
113
  return None
104
114
 
105
- # If local directory contains ONNX model files, prefer hf_onnx
115
+ # If local directory contains ONNX model files, prefer the ONNX causal adapter.
106
116
  try:
107
117
  p = Path(model_id)
108
118
  if p.exists() and p.is_dir():
@@ -114,7 +124,7 @@ def resolve_auto_adapter(
114
124
  "encoder_model.onnx",
115
125
  ]
116
126
  if any((p / fname).exists() for fname in onnx_files):
117
- return "hf_onnx"
127
+ return "hf_causal_onnx"
118
128
  except Exception:
119
129
  pass
120
130
 
@@ -134,10 +144,10 @@ def resolve_auto_adapter(
134
144
  k in lower_id for k in ["bnb", "bitsandbytes", "-4bit", "-8bit", "4bit", "8bit"]
135
145
  ):
136
146
  return "hf_bnb"
137
- if any(k in lower_id for k in ["llama", "mistral", "qwen", "yi"]):
138
- return "hf_llama"
147
+ if any(k in lower_id for k in ["t5", "bart"]):
148
+ return "hf_seq2seq"
139
149
  if any(k in lower_id for k in ["bert", "roberta", "albert", "deberta"]):
140
- return "hf_bert"
150
+ return "hf_mlm"
141
151
  return default
142
152
 
143
153
 
@@ -148,7 +158,7 @@ def apply_auto_adapter_if_needed(cfg: Any) -> Any:
148
158
  """
149
159
  try:
150
160
  adapter = str(getattr(cfg.model, "adapter", ""))
151
- if adapter.strip().lower() not in {"auto", "hf_auto", "auto_hf"}:
161
+ if adapter.strip().lower() not in {"auto", "auto_hf"}:
152
162
  return cfg
153
163
  model_id = str(getattr(cfg.model, "id", ""))
154
164
  resolved = resolve_auto_adapter(model_id)
invarlock/cli/app.py CHANGED
@@ -105,7 +105,7 @@ Order: certify → report → run → plugins → doctor → version
105
105
  @app.command(
106
106
  name="certify",
107
107
  help=(
108
- "Certify a subject model against a baseline and generate a safety certificate. "
108
+ "Certify a subject model against a baseline and generate an evaluation certificate. "
109
109
  "Use when you have two model snapshots and want pass/fail gating."
110
110
  ),
111
111
  )
@@ -116,6 +116,14 @@ def _certify_lazy(
116
116
  edited: str = typer.Option(
117
117
  ..., "--edited", "--subject", help="Subject model dir or Hub ID"
118
118
  ),
119
+ baseline_report: str | None = typer.Option(
120
+ None,
121
+ "--baseline-report",
122
+ help=(
123
+ "Reuse an existing baseline run report.json (skips baseline evaluation). "
124
+ "Must include stored evaluation windows (e.g., set INVARLOCK_STORE_EVAL_WINDOWS=1)."
125
+ ),
126
+ ),
119
127
  adapter: str = typer.Option(
120
128
  "auto", "--adapter", help="Adapter name or 'auto' to resolve"
121
129
  ),
@@ -139,12 +147,38 @@ def _certify_lazy(
139
147
  edit_config: str | None = typer.Option(
140
148
  None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
141
149
  ),
150
+ edit_label: str | None = typer.Option(
151
+ None,
152
+ "--edit-label",
153
+ help=(
154
+ "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
155
+ "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
156
+ ),
157
+ ),
158
+ quiet: bool = typer.Option(
159
+ False, "--quiet", "-q", help="Minimal output (suppress run/report detail)"
160
+ ),
161
+ verbose: bool = typer.Option(
162
+ False, "--verbose", "-v", help="Verbose output (include debug details)"
163
+ ),
164
+ banner: bool = typer.Option(
165
+ True, "--banner/--no-banner", help="Show header banner"
166
+ ),
167
+ style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
168
+ timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
169
+ progress: bool = typer.Option(
170
+ True, "--progress/--no-progress", help="Show progress done messages"
171
+ ),
172
+ no_color: bool = typer.Option(
173
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
174
+ ),
142
175
  ):
143
176
  from .commands.certify import certify_command as _cert
144
177
 
145
178
  return _cert(
146
179
  source=source,
147
180
  edited=edited,
181
+ baseline_report=baseline_report,
148
182
  adapter=adapter,
149
183
  device=device,
150
184
  profile=profile,
@@ -153,6 +187,14 @@ def _certify_lazy(
153
187
  out=out,
154
188
  cert_out=cert_out,
155
189
  edit_config=edit_config,
190
+ edit_label=edit_label,
191
+ quiet=quiet,
192
+ verbose=verbose,
193
+ banner=banner,
194
+ style=style,
195
+ timing=timing,
196
+ progress=progress,
197
+ no_color=no_color,
156
198
  )
157
199
 
158
200
 
@@ -230,7 +272,7 @@ def _verify_typed(
230
272
  name="run",
231
273
  help=(
232
274
  "Execute an end-to-end run from a YAML config (edit + guards + reports). "
233
- "Writes run artifacts and optionally a safety certificate."
275
+ "Writes run artifacts and optionally an evaluation certificate."
234
276
  ),
235
277
  )
236
278
  def _run_typed(
@@ -245,11 +287,24 @@ def _run_typed(
245
287
  ),
246
288
  out: str | None = typer.Option(None, "--out", help="Output directory override"),
247
289
  edit: str | None = typer.Option(None, "--edit", help="Edit kind (quant|mixed)"),
290
+ edit_label: str | None = typer.Option(
291
+ None,
292
+ "--edit-label",
293
+ help=(
294
+ "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
295
+ "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
296
+ ),
297
+ ),
248
298
  tier: str | None = typer.Option(
249
299
  None,
250
300
  "--tier",
251
301
  help="Auto-tuning tier override (conservative|balanced|aggressive)",
252
302
  ),
303
+ metric_kind: str | None = typer.Option(
304
+ None,
305
+ "--metric-kind",
306
+ help="Primary metric kind override (ppl_causal|ppl_mlm|accuracy|etc.)",
307
+ ),
253
308
  probes: int | None = typer.Option(
254
309
  None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
255
310
  ),
@@ -270,6 +325,16 @@ def _run_typed(
270
325
  no_cleanup: bool = typer.Option(
271
326
  False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
272
327
  ),
328
+ style: str | None = typer.Option(
329
+ None, "--style", help="Output style (audit|friendly)"
330
+ ),
331
+ progress: bool = typer.Option(
332
+ False, "--progress", help="Show progress done messages"
333
+ ),
334
+ timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
335
+ no_color: bool = typer.Option(
336
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
337
+ ),
273
338
  ):
274
339
  from .commands.run import run_command as _run
275
340
 
@@ -279,13 +344,19 @@ def _run_typed(
279
344
  profile=profile,
280
345
  out=out,
281
346
  edit=edit,
347
+ edit_label=edit_label,
282
348
  tier=tier,
349
+ metric_kind=metric_kind,
283
350
  probes=probes,
284
351
  until_pass=until_pass,
285
352
  max_attempts=max_attempts,
286
353
  timeout=timeout,
287
354
  baseline=baseline,
288
355
  no_cleanup=no_cleanup,
356
+ style=style,
357
+ progress=progress,
358
+ timing=timing,
359
+ no_color=no_color,
289
360
  )
290
361
 
291
362