invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/certify.py +600 -59
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +907 -183
- invarlock/cli/commands/verify.py +76 -11
- invarlock/cli/config.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/runner.py +111 -25
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -3
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +48 -27
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +159 -9
- invarlock/reporting/certificate_schema.py +1 -1
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +7 -0
- invarlock/reporting/render.py +791 -431
- invarlock/reporting/report.py +39 -3
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
|
-
HuggingFace
|
|
2
|
+
HuggingFace masked LM adapter.
|
|
3
3
|
==============================
|
|
4
4
|
|
|
5
|
-
ModelAdapter implementation for HuggingFace
|
|
6
|
-
|
|
7
|
-
This adapter provides BERT-specific integration including:
|
|
8
|
-
- Support for BERT, RoBERTa, DistilBERT, and other BERT variants
|
|
9
|
-
- Proper handling of bidirectional attention layers
|
|
10
|
-
- Support for classification heads and pooling layers
|
|
11
|
-
- Token type embeddings and position embeddings handling
|
|
12
|
-
- Proper device-aware state serialization
|
|
5
|
+
ModelAdapter implementation for HuggingFace masked language models.
|
|
13
6
|
"""
|
|
14
7
|
|
|
15
8
|
from typing import Any
|
|
@@ -27,7 +20,7 @@ TensorType = torch.Tensor
|
|
|
27
20
|
ModuleType = nn.Module
|
|
28
21
|
|
|
29
22
|
|
|
30
|
-
class
|
|
23
|
+
class HF_MLM_Adapter(HFAdapterMixin, ModelAdapter):
|
|
31
24
|
"""
|
|
32
25
|
HuggingFace-specific ModelAdapter implementation for BERT models.
|
|
33
26
|
|
|
@@ -39,7 +32,7 @@ class HF_BERT_Adapter(HFAdapterMixin, ModelAdapter):
|
|
|
39
32
|
- Device-aware state serialization
|
|
40
33
|
"""
|
|
41
34
|
|
|
42
|
-
name = "
|
|
35
|
+
name = "hf_mlm"
|
|
43
36
|
|
|
44
37
|
def load_model(
|
|
45
38
|
self, model_id: str, device: str = "auto", **kwargs: Any
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
"""
|
|
2
|
-
HuggingFace
|
|
3
|
-
|
|
2
|
+
HuggingFace encoder-decoder adapter.
|
|
3
|
+
===================================
|
|
4
4
|
|
|
5
|
-
ModelAdapter implementation for HuggingFace
|
|
5
|
+
ModelAdapter implementation for HuggingFace encoder-decoder (seq2seq) models.
|
|
6
6
|
|
|
7
|
-
Loads AutoModelForSeq2SeqLM
|
|
8
|
-
|
|
7
|
+
Loads AutoModelForSeq2SeqLM and exposes a minimal describe() sufficient for
|
|
8
|
+
guard policies and reporting.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
from __future__ import annotations
|
|
@@ -25,10 +25,10 @@ TensorType = torch.Tensor
|
|
|
25
25
|
ModuleType = nn.Module
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
class
|
|
29
|
-
"""HuggingFace
|
|
28
|
+
class HF_Seq2Seq_Adapter(HFAdapterMixin, ModelAdapter):
|
|
29
|
+
"""HuggingFace encoder-decoder adapter using AutoModelForSeq2SeqLM."""
|
|
30
30
|
|
|
31
|
-
name = "
|
|
31
|
+
name = "hf_seq2seq"
|
|
32
32
|
|
|
33
33
|
def load_model( # type: ignore[override]
|
|
34
34
|
self, model_id: str, device: str = "auto", **kwargs: Any
|
|
@@ -136,4 +136,4 @@ class HF_T5_Adapter(HFAdapterMixin, ModelAdapter):
|
|
|
136
136
|
return super().restore(model, blob)
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
__all__ = ["
|
|
139
|
+
__all__ = ["HF_Seq2Seq_Adapter"]
|
invarlock/cli/adapter_auto.py
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
Auto adapter resolution utilities.
|
|
3
3
|
|
|
4
4
|
These helpers map a model identifier (HF directory or Hub ID) to a
|
|
5
|
-
concrete built-in adapter name (
|
|
5
|
+
concrete built-in adapter name (hf_causal, hf_mlm, hf_seq2seq, hf_causal_onnx)
|
|
6
|
+
without
|
|
6
7
|
adding a hard dependency on Transformers.
|
|
7
8
|
"""
|
|
8
9
|
|
|
@@ -58,15 +59,15 @@ def _detect_quant_family_from_cfg(cfg: dict[str, Any]) -> str | None:
|
|
|
58
59
|
|
|
59
60
|
|
|
60
61
|
def resolve_auto_adapter(
|
|
61
|
-
model_id: str | os.PathLike[str], default: str = "
|
|
62
|
+
model_id: str | os.PathLike[str], default: str = "hf_causal"
|
|
62
63
|
) -> str:
|
|
63
64
|
"""Resolve an appropriate built-in adapter name for a model.
|
|
64
65
|
|
|
65
66
|
Heuristics:
|
|
66
67
|
- Prefer local config.json (no network). Inspect `model_type` and
|
|
67
|
-
`architectures` to classify
|
|
68
|
+
`architectures` to classify causal vs masked-LM vs seq2seq.
|
|
68
69
|
- Fallback to simple name heuristics on the model_id string.
|
|
69
|
-
- Default to `
|
|
70
|
+
- Default to `hf_causal` when unsure.
|
|
70
71
|
"""
|
|
71
72
|
cfg = _read_local_hf_config(model_id)
|
|
72
73
|
model_id_str = str(model_id)
|
|
@@ -77,32 +78,41 @@ def resolve_auto_adapter(
|
|
|
77
78
|
if fam:
|
|
78
79
|
return fam
|
|
79
80
|
mt = str(c.get("model_type", "")).lower()
|
|
81
|
+
if bool(c.get("is_encoder_decoder", False)):
|
|
82
|
+
return "hf_seq2seq"
|
|
80
83
|
archs = [str(a) for a in c.get("architectures", []) if isinstance(a, str)]
|
|
81
84
|
arch_blob = " ".join(archs)
|
|
82
|
-
if
|
|
83
|
-
|
|
84
|
-
or "Llama" in arch_blob
|
|
85
|
-
or "Mistral" in arch_blob
|
|
86
|
-
):
|
|
87
|
-
return "hf_llama"
|
|
85
|
+
if "ConditionalGeneration" in arch_blob or "Seq2SeqLM" in arch_blob:
|
|
86
|
+
return "hf_seq2seq"
|
|
88
87
|
# Treat masked-LM families as BERT-like
|
|
89
88
|
if (
|
|
90
89
|
mt in {"bert", "roberta", "distilbert", "albert", "deberta", "deberta-v2"}
|
|
91
90
|
or "MaskedLM" in arch_blob
|
|
92
91
|
):
|
|
93
|
-
return "
|
|
94
|
-
#
|
|
95
|
-
if "CausalLM" in arch_blob or
|
|
92
|
+
return "hf_mlm"
|
|
93
|
+
# Causal LM families (best-effort; structural validation happens in the adapter).
|
|
94
|
+
if "CausalLM" in arch_blob or "ForCausalLM" in arch_blob:
|
|
95
|
+
return "hf_causal"
|
|
96
|
+
if mt in {
|
|
97
|
+
"mistral",
|
|
98
|
+
"mixtral",
|
|
99
|
+
"qwen",
|
|
100
|
+
"qwen2",
|
|
101
|
+
"qwen2_moe",
|
|
102
|
+
"yi",
|
|
96
103
|
"gpt2",
|
|
97
104
|
"gpt_neox",
|
|
98
105
|
"opt",
|
|
99
106
|
"gptj",
|
|
100
|
-
"
|
|
107
|
+
"phi",
|
|
108
|
+
"falcon",
|
|
109
|
+
"glm",
|
|
110
|
+
"deepseek",
|
|
101
111
|
}:
|
|
102
|
-
return "
|
|
112
|
+
return "hf_causal"
|
|
103
113
|
return None
|
|
104
114
|
|
|
105
|
-
# If local directory contains ONNX model files, prefer
|
|
115
|
+
# If local directory contains ONNX model files, prefer the ONNX causal adapter.
|
|
106
116
|
try:
|
|
107
117
|
p = Path(model_id)
|
|
108
118
|
if p.exists() and p.is_dir():
|
|
@@ -114,7 +124,7 @@ def resolve_auto_adapter(
|
|
|
114
124
|
"encoder_model.onnx",
|
|
115
125
|
]
|
|
116
126
|
if any((p / fname).exists() for fname in onnx_files):
|
|
117
|
-
return "
|
|
127
|
+
return "hf_causal_onnx"
|
|
118
128
|
except Exception:
|
|
119
129
|
pass
|
|
120
130
|
|
|
@@ -134,10 +144,10 @@ def resolve_auto_adapter(
|
|
|
134
144
|
k in lower_id for k in ["bnb", "bitsandbytes", "-4bit", "-8bit", "4bit", "8bit"]
|
|
135
145
|
):
|
|
136
146
|
return "hf_bnb"
|
|
137
|
-
if any(k in lower_id for k in ["
|
|
138
|
-
return "
|
|
147
|
+
if any(k in lower_id for k in ["t5", "bart"]):
|
|
148
|
+
return "hf_seq2seq"
|
|
139
149
|
if any(k in lower_id for k in ["bert", "roberta", "albert", "deberta"]):
|
|
140
|
-
return "
|
|
150
|
+
return "hf_mlm"
|
|
141
151
|
return default
|
|
142
152
|
|
|
143
153
|
|
|
@@ -148,7 +158,7 @@ def apply_auto_adapter_if_needed(cfg: Any) -> Any:
|
|
|
148
158
|
"""
|
|
149
159
|
try:
|
|
150
160
|
adapter = str(getattr(cfg.model, "adapter", ""))
|
|
151
|
-
if adapter.strip().lower() not in {"auto", "
|
|
161
|
+
if adapter.strip().lower() not in {"auto", "auto_hf"}:
|
|
152
162
|
return cfg
|
|
153
163
|
model_id = str(getattr(cfg.model, "id", ""))
|
|
154
164
|
resolved = resolve_auto_adapter(model_id)
|
invarlock/cli/app.py
CHANGED
|
@@ -105,7 +105,7 @@ Order: certify → report → run → plugins → doctor → version
|
|
|
105
105
|
@app.command(
|
|
106
106
|
name="certify",
|
|
107
107
|
help=(
|
|
108
|
-
"Certify a subject model against a baseline and generate
|
|
108
|
+
"Certify a subject model against a baseline and generate an evaluation certificate. "
|
|
109
109
|
"Use when you have two model snapshots and want pass/fail gating."
|
|
110
110
|
),
|
|
111
111
|
)
|
|
@@ -116,6 +116,14 @@ def _certify_lazy(
|
|
|
116
116
|
edited: str = typer.Option(
|
|
117
117
|
..., "--edited", "--subject", help="Subject model dir or Hub ID"
|
|
118
118
|
),
|
|
119
|
+
baseline_report: str | None = typer.Option(
|
|
120
|
+
None,
|
|
121
|
+
"--baseline-report",
|
|
122
|
+
help=(
|
|
123
|
+
"Reuse an existing baseline run report.json (skips baseline evaluation). "
|
|
124
|
+
"Must include stored evaluation windows (e.g., set INVARLOCK_STORE_EVAL_WINDOWS=1)."
|
|
125
|
+
),
|
|
126
|
+
),
|
|
119
127
|
adapter: str = typer.Option(
|
|
120
128
|
"auto", "--adapter", help="Adapter name or 'auto' to resolve"
|
|
121
129
|
),
|
|
@@ -139,12 +147,38 @@ def _certify_lazy(
|
|
|
139
147
|
edit_config: str | None = typer.Option(
|
|
140
148
|
None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
|
|
141
149
|
),
|
|
150
|
+
edit_label: str | None = typer.Option(
|
|
151
|
+
None,
|
|
152
|
+
"--edit-label",
|
|
153
|
+
help=(
|
|
154
|
+
"Edit algorithm label for BYOE models. Use 'noop' for baseline, "
|
|
155
|
+
"'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
|
|
156
|
+
),
|
|
157
|
+
),
|
|
158
|
+
quiet: bool = typer.Option(
|
|
159
|
+
False, "--quiet", "-q", help="Minimal output (suppress run/report detail)"
|
|
160
|
+
),
|
|
161
|
+
verbose: bool = typer.Option(
|
|
162
|
+
False, "--verbose", "-v", help="Verbose output (include debug details)"
|
|
163
|
+
),
|
|
164
|
+
banner: bool = typer.Option(
|
|
165
|
+
True, "--banner/--no-banner", help="Show header banner"
|
|
166
|
+
),
|
|
167
|
+
style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
|
|
168
|
+
timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
|
|
169
|
+
progress: bool = typer.Option(
|
|
170
|
+
True, "--progress/--no-progress", help="Show progress done messages"
|
|
171
|
+
),
|
|
172
|
+
no_color: bool = typer.Option(
|
|
173
|
+
False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
|
|
174
|
+
),
|
|
142
175
|
):
|
|
143
176
|
from .commands.certify import certify_command as _cert
|
|
144
177
|
|
|
145
178
|
return _cert(
|
|
146
179
|
source=source,
|
|
147
180
|
edited=edited,
|
|
181
|
+
baseline_report=baseline_report,
|
|
148
182
|
adapter=adapter,
|
|
149
183
|
device=device,
|
|
150
184
|
profile=profile,
|
|
@@ -153,6 +187,14 @@ def _certify_lazy(
|
|
|
153
187
|
out=out,
|
|
154
188
|
cert_out=cert_out,
|
|
155
189
|
edit_config=edit_config,
|
|
190
|
+
edit_label=edit_label,
|
|
191
|
+
quiet=quiet,
|
|
192
|
+
verbose=verbose,
|
|
193
|
+
banner=banner,
|
|
194
|
+
style=style,
|
|
195
|
+
timing=timing,
|
|
196
|
+
progress=progress,
|
|
197
|
+
no_color=no_color,
|
|
156
198
|
)
|
|
157
199
|
|
|
158
200
|
|
|
@@ -230,7 +272,7 @@ def _verify_typed(
|
|
|
230
272
|
name="run",
|
|
231
273
|
help=(
|
|
232
274
|
"Execute an end-to-end run from a YAML config (edit + guards + reports). "
|
|
233
|
-
"Writes run artifacts and optionally
|
|
275
|
+
"Writes run artifacts and optionally an evaluation certificate."
|
|
234
276
|
),
|
|
235
277
|
)
|
|
236
278
|
def _run_typed(
|
|
@@ -245,11 +287,24 @@ def _run_typed(
|
|
|
245
287
|
),
|
|
246
288
|
out: str | None = typer.Option(None, "--out", help="Output directory override"),
|
|
247
289
|
edit: str | None = typer.Option(None, "--edit", help="Edit kind (quant|mixed)"),
|
|
290
|
+
edit_label: str | None = typer.Option(
|
|
291
|
+
None,
|
|
292
|
+
"--edit-label",
|
|
293
|
+
help=(
|
|
294
|
+
"Edit algorithm label for BYOE models. Use 'noop' for baseline, "
|
|
295
|
+
"'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
|
|
296
|
+
),
|
|
297
|
+
),
|
|
248
298
|
tier: str | None = typer.Option(
|
|
249
299
|
None,
|
|
250
300
|
"--tier",
|
|
251
301
|
help="Auto-tuning tier override (conservative|balanced|aggressive)",
|
|
252
302
|
),
|
|
303
|
+
metric_kind: str | None = typer.Option(
|
|
304
|
+
None,
|
|
305
|
+
"--metric-kind",
|
|
306
|
+
help="Primary metric kind override (ppl_causal|ppl_mlm|accuracy|etc.)",
|
|
307
|
+
),
|
|
253
308
|
probes: int | None = typer.Option(
|
|
254
309
|
None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
|
|
255
310
|
),
|
|
@@ -270,6 +325,16 @@ def _run_typed(
|
|
|
270
325
|
no_cleanup: bool = typer.Option(
|
|
271
326
|
False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
|
|
272
327
|
),
|
|
328
|
+
style: str | None = typer.Option(
|
|
329
|
+
None, "--style", help="Output style (audit|friendly)"
|
|
330
|
+
),
|
|
331
|
+
progress: bool = typer.Option(
|
|
332
|
+
False, "--progress", help="Show progress done messages"
|
|
333
|
+
),
|
|
334
|
+
timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
|
|
335
|
+
no_color: bool = typer.Option(
|
|
336
|
+
False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
|
|
337
|
+
),
|
|
273
338
|
):
|
|
274
339
|
from .commands.run import run_command as _run
|
|
275
340
|
|
|
@@ -279,13 +344,19 @@ def _run_typed(
|
|
|
279
344
|
profile=profile,
|
|
280
345
|
out=out,
|
|
281
346
|
edit=edit,
|
|
347
|
+
edit_label=edit_label,
|
|
282
348
|
tier=tier,
|
|
349
|
+
metric_kind=metric_kind,
|
|
283
350
|
probes=probes,
|
|
284
351
|
until_pass=until_pass,
|
|
285
352
|
max_attempts=max_attempts,
|
|
286
353
|
timeout=timeout,
|
|
287
354
|
baseline=baseline,
|
|
288
355
|
no_cleanup=no_cleanup,
|
|
356
|
+
style=style,
|
|
357
|
+
progress=progress,
|
|
358
|
+
timing=timing,
|
|
359
|
+
no_color=no_color,
|
|
289
360
|
)
|
|
290
361
|
|
|
291
362
|
|