invarlock 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +3 -3
- invarlock/adapters/auto.py +2 -10
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +28 -5
- invarlock/assurance/__init__.py +15 -23
- invarlock/calibration/spectral_null.py +1 -1
- invarlock/cli/adapter_auto.py +1 -5
- invarlock/cli/app.py +57 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
- invarlock/cli/commands/explain_gates.py +94 -51
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/report.py +121 -47
- invarlock/cli/commands/run.py +274 -66
- invarlock/cli/commands/verify.py +84 -89
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/provenance.py +3 -3
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +1 -1
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +2 -2
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +11 -7
- invarlock/eval/primary_metric.py +1 -1
- invarlock/guards/spectral.py +2 -2
- invarlock/guards_ref/spectral_ref.py +1 -1
- invarlock/model_profile.py +16 -35
- invarlock/observability/health.py +38 -20
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/html.py +7 -7
- invarlock/reporting/normalizer.py +2 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +126 -120
- invarlock/reporting/report.py +43 -37
- invarlock/reporting/{certificate.py → report_builder.py} +103 -99
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock-0.3.9.dist-info/METADATA +303 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/RECORD +46 -46
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/WHEEL +1 -1
- invarlock-0.3.7.dist-info/METADATA +0 -602
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/top_level.txt +0 -0
invarlock/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
InvarLock: Edit‑agnostic
|
|
2
|
+
InvarLock: Edit‑agnostic evaluation reports for weight edits
|
|
3
3
|
=============================================================
|
|
4
4
|
|
|
5
5
|
Core runtime package — torch-independent utilities, configuration, and interfaces.
|
|
@@ -9,10 +9,10 @@ For torch-dependent functionality, see subpackages under `invarlock.*`:
|
|
|
9
9
|
- `invarlock.adapters`: Model adapters (HF causal/MLM/seq2seq + auto)
|
|
10
10
|
- `invarlock.guards`: Safety mechanisms (invariants, spectral, RMT, variance)
|
|
11
11
|
- `invarlock.edits`: Built-in quantization and edit interfaces
|
|
12
|
-
- `invarlock.eval`: Metrics, guard-overhead checks, and
|
|
12
|
+
- `invarlock.eval`: Metrics, guard-overhead checks, and evaluation reporting
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
__version__ = "0.3.
|
|
15
|
+
__version__ = "0.3.9"
|
|
16
16
|
|
|
17
17
|
# Core exports - torch-independent
|
|
18
18
|
from .config import CFG, Defaults, get_default_config
|
invarlock/adapters/auto.py
CHANGED
|
@@ -38,11 +38,7 @@ def _detect_quantization_from_path(model_id: str) -> str | None:
|
|
|
38
38
|
return "hf_awq"
|
|
39
39
|
elif quant_method == "gptq":
|
|
40
40
|
return "hf_gptq"
|
|
41
|
-
elif
|
|
42
|
-
quant_method == "bitsandbytes"
|
|
43
|
-
or quant_cfg.get("load_in_8bit")
|
|
44
|
-
or quant_cfg.get("load_in_4bit")
|
|
45
|
-
):
|
|
41
|
+
elif "bitsandbytes" in quant_method or "bnb" in quant_method:
|
|
46
42
|
return "hf_bnb"
|
|
47
43
|
|
|
48
44
|
except Exception:
|
|
@@ -78,11 +74,7 @@ def _detect_quantization_from_model(model: Any) -> str | None:
|
|
|
78
74
|
return "hf_awq"
|
|
79
75
|
elif quant_method == "gptq":
|
|
80
76
|
return "hf_gptq"
|
|
81
|
-
elif
|
|
82
|
-
quant_method == "bitsandbytes"
|
|
83
|
-
or quant_cfg.get("load_in_8bit")
|
|
84
|
-
or quant_cfg.get("load_in_4bit")
|
|
85
|
-
):
|
|
77
|
+
elif "bitsandbytes" in quant_method or "bnb" in quant_method:
|
|
86
78
|
return "hf_bnb"
|
|
87
79
|
else:
|
|
88
80
|
# Object-style config
|
invarlock/adapters/hf_loading.py
CHANGED
|
@@ -50,7 +50,7 @@ def resolve_trust_remote_code(
|
|
|
50
50
|
return default
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
def
|
|
53
|
+
def default_dtype() -> torch.dtype:
|
|
54
54
|
"""Pick a safe default dtype for HF loads based on hardware."""
|
|
55
55
|
if torch.cuda.is_available():
|
|
56
56
|
try:
|
|
@@ -69,10 +69,10 @@ def default_torch_dtype() -> torch.dtype:
|
|
|
69
69
|
return torch.float32
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
def
|
|
73
|
-
"""Resolve
|
|
74
|
-
if kwargs and "
|
|
75
|
-
val = kwargs.get("
|
|
72
|
+
def resolve_dtype(kwargs: dict[str, Any] | None = None) -> torch.dtype | str:
|
|
73
|
+
"""Resolve dtype from kwargs or choose a hardware-aware default."""
|
|
74
|
+
if kwargs and "dtype" in kwargs:
|
|
75
|
+
val = kwargs.get("dtype")
|
|
76
76
|
if isinstance(val, torch.dtype):
|
|
77
77
|
return val
|
|
78
78
|
if isinstance(val, str):
|
|
@@ -91,7 +91,7 @@ def resolve_torch_dtype(kwargs: dict[str, Any] | None = None) -> torch.dtype | s
|
|
|
91
91
|
if s in mapping:
|
|
92
92
|
return mapping[s]
|
|
93
93
|
|
|
94
|
-
return
|
|
94
|
+
return default_dtype()
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
__all__ = ["resolve_trust_remote_code", "
|
|
97
|
+
__all__ = ["resolve_trust_remote_code", "default_dtype", "resolve_dtype"]
|
invarlock/adapters/hf_mixin.py
CHANGED
|
@@ -583,22 +583,45 @@ class HFAdapterMixin:
|
|
|
583
583
|
def _serialize_config(self, config: Any) -> dict[str, Any]:
|
|
584
584
|
"""Serialize HuggingFace config fields into simple Python types."""
|
|
585
585
|
|
|
586
|
+
def _collect(data: dict[str, Any]) -> dict[str, Any]:
|
|
587
|
+
out: dict[str, Any] = {}
|
|
588
|
+
for key, value in data.items():
|
|
589
|
+
if key.startswith("_") or key in {"method_calls"}:
|
|
590
|
+
continue
|
|
591
|
+
if value is None or isinstance(value, SCALAR_TYPES):
|
|
592
|
+
out[key] = value
|
|
593
|
+
elif isinstance(value, list | dict):
|
|
594
|
+
out[key] = value
|
|
595
|
+
return out
|
|
596
|
+
|
|
597
|
+
to_dict = getattr(config, "to_dict", None)
|
|
598
|
+
if callable(to_dict):
|
|
599
|
+
try:
|
|
600
|
+
data = to_dict()
|
|
601
|
+
except Exception:
|
|
602
|
+
data = None
|
|
603
|
+
if isinstance(data, dict):
|
|
604
|
+
return _collect(data)
|
|
605
|
+
|
|
606
|
+
try:
|
|
607
|
+
data = vars(config)
|
|
608
|
+
except TypeError:
|
|
609
|
+
data = None
|
|
610
|
+
if isinstance(data, dict):
|
|
611
|
+
return _collect(data)
|
|
612
|
+
|
|
586
613
|
result: dict[str, Any] = {}
|
|
587
614
|
for key in dir(config):
|
|
588
|
-
if key.startswith("_"):
|
|
615
|
+
if key.startswith("_") or key in {"torch_dtype"}:
|
|
589
616
|
continue
|
|
590
|
-
|
|
591
617
|
try:
|
|
592
618
|
value = getattr(config, key)
|
|
593
619
|
except AttributeError:
|
|
594
620
|
continue
|
|
595
|
-
|
|
596
621
|
if callable(value):
|
|
597
622
|
continue
|
|
598
|
-
|
|
599
623
|
if value is None or isinstance(value, SCALAR_TYPES):
|
|
600
624
|
result[key] = value
|
|
601
625
|
elif isinstance(value, list | dict):
|
|
602
626
|
result[key] = value
|
|
603
|
-
|
|
604
627
|
return result
|
invarlock/assurance/__init__.py
CHANGED
|
@@ -1,8 +1,4 @@
|
|
|
1
|
-
"""Assurance namespace (`invarlock.assurance`).
|
|
2
|
-
|
|
3
|
-
This namespace groups safety-certificate related surfaces. For now it forwards
|
|
4
|
-
to `invarlock.eval` and guard modules; future work may move implementations here.
|
|
5
|
-
"""
|
|
1
|
+
"""Assurance namespace (`invarlock.assurance`)."""
|
|
6
2
|
|
|
7
3
|
from __future__ import annotations
|
|
8
4
|
|
|
@@ -11,33 +7,29 @@ from typing import Any
|
|
|
11
7
|
from invarlock.reporting.report_types import RunReport
|
|
12
8
|
|
|
13
9
|
try: # pragma: no cover - shim to reporting modules
|
|
14
|
-
from invarlock.reporting.certificate import (
|
|
15
|
-
CERTIFICATE_SCHEMA_VERSION,
|
|
16
|
-
make_certificate,
|
|
17
|
-
validate_certificate,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
10
|
# Prefer direct import from render for rendering APIs
|
|
21
|
-
from invarlock.reporting.render import
|
|
11
|
+
from invarlock.reporting.render import render_report_markdown
|
|
12
|
+
from invarlock.reporting.report_builder import make_report
|
|
13
|
+
from invarlock.reporting.report_schema import REPORT_SCHEMA_VERSION, validate_report
|
|
22
14
|
except Exception: # pragma: no cover - provide soft stubs
|
|
23
|
-
|
|
15
|
+
REPORT_SCHEMA_VERSION = "v1"
|
|
24
16
|
|
|
25
|
-
def
|
|
17
|
+
def make_report(
|
|
26
18
|
report: RunReport,
|
|
27
19
|
baseline: RunReport | dict[str, Any],
|
|
28
20
|
) -> dict[str, Any]:
|
|
29
|
-
raise ImportError("invarlock.reporting.
|
|
21
|
+
raise ImportError("invarlock.reporting.report_builder not available")
|
|
30
22
|
|
|
31
|
-
def
|
|
32
|
-
raise ImportError("invarlock.reporting.
|
|
23
|
+
def render_report_markdown(evaluation_report: dict[str, Any]) -> str:
|
|
24
|
+
raise ImportError("invarlock.reporting.report_builder not available")
|
|
33
25
|
|
|
34
|
-
def
|
|
35
|
-
raise ImportError("invarlock.reporting.
|
|
26
|
+
def validate_report(report: dict[str, Any]) -> bool:
|
|
27
|
+
raise ImportError("invarlock.reporting.report_schema not available")
|
|
36
28
|
|
|
37
29
|
|
|
38
30
|
__all__ = [
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
"
|
|
31
|
+
"REPORT_SCHEMA_VERSION",
|
|
32
|
+
"make_report",
|
|
33
|
+
"render_report_markdown",
|
|
34
|
+
"validate_report",
|
|
43
35
|
]
|
|
@@ -35,7 +35,7 @@ def _bh_reject_families(
|
|
|
35
35
|
|
|
36
36
|
order = sorted(
|
|
37
37
|
range(n),
|
|
38
|
-
key=lambda idx:
|
|
38
|
+
key=lambda idx: float("inf") if not _finite01(pvals[idx]) else pvals[idx],
|
|
39
39
|
)
|
|
40
40
|
max_k = 0
|
|
41
41
|
for rank, idx in enumerate(order, start=1):
|
invarlock/cli/adapter_auto.py
CHANGED
|
@@ -47,11 +47,7 @@ def _detect_quant_family_from_cfg(cfg: dict[str, Any]) -> str | None:
|
|
|
47
47
|
return "hf_gptq"
|
|
48
48
|
if any(tok in method for tok in ("awq",)):
|
|
49
49
|
return "hf_awq"
|
|
50
|
-
|
|
51
|
-
if any(
|
|
52
|
-
str(q.get(k, "")).lower() in {"true", "1"}
|
|
53
|
-
for k in ("load_in_4bit", "load_in_8bit")
|
|
54
|
-
) or any("bitsandbytes" in str(v).lower() for v in q.values()):
|
|
50
|
+
if "bitsandbytes" in method or "bnb" in method:
|
|
55
51
|
return "hf_bnb"
|
|
56
52
|
except Exception:
|
|
57
53
|
return None
|
invarlock/cli/app.py
CHANGED
|
@@ -17,7 +17,11 @@ import typer
|
|
|
17
17
|
from rich.console import Console
|
|
18
18
|
from typer.core import TyperGroup
|
|
19
19
|
|
|
20
|
-
from invarlock.security import
|
|
20
|
+
from invarlock.security import (
|
|
21
|
+
enforce_default_security,
|
|
22
|
+
enforce_network_policy,
|
|
23
|
+
network_policy_allows,
|
|
24
|
+
)
|
|
21
25
|
|
|
22
26
|
# Lightweight import mode disables heavy side effects in some modules, but we no
|
|
23
27
|
# longer force plugin discovery off globally here; individual commands may gate
|
|
@@ -33,7 +37,7 @@ LIGHT_IMPORT = os.getenv("INVARLOCK_LIGHT_IMPORT", "").strip().lower() in {
|
|
|
33
37
|
class OrderedGroup(TyperGroup):
|
|
34
38
|
def list_commands(self, ctx): # type: ignore[override]
|
|
35
39
|
return [
|
|
36
|
-
"
|
|
40
|
+
"evaluate",
|
|
37
41
|
"calibrate",
|
|
38
42
|
"report",
|
|
39
43
|
"verify",
|
|
@@ -48,8 +52,8 @@ class OrderedGroup(TyperGroup):
|
|
|
48
52
|
app = typer.Typer(
|
|
49
53
|
name="invarlock",
|
|
50
54
|
help=(
|
|
51
|
-
"InvarLock —
|
|
52
|
-
"Quick path: invarlock
|
|
55
|
+
"InvarLock — evaluate model changes with deterministic pairing and safety gates.\n"
|
|
56
|
+
"Quick path: invarlock evaluate --baseline <MODEL> --subject <MODEL>\n"
|
|
53
57
|
"Hint: use --edit-config to run the built-in quant_rtn demo.\n"
|
|
54
58
|
"Tip: enable downloads with INVARLOCK_ALLOW_NETWORK=1 when fetching.\n"
|
|
55
59
|
"Exit codes:\n"
|
|
@@ -65,17 +69,16 @@ app = typer.Typer(
|
|
|
65
69
|
console = Console()
|
|
66
70
|
|
|
67
71
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
"""Show InvarLock version."""
|
|
72
|
+
def _emit_version() -> None:
|
|
73
|
+
"""Emit the InvarLock version string."""
|
|
71
74
|
# Prefer package metadata when available so CLI reflects wheel truth
|
|
72
75
|
try:
|
|
73
76
|
from importlib.metadata import version as _pkg_version
|
|
74
77
|
|
|
75
78
|
schema = None
|
|
76
79
|
try:
|
|
77
|
-
from invarlock.reporting.
|
|
78
|
-
|
|
80
|
+
from invarlock.reporting.report_builder import (
|
|
81
|
+
REPORT_SCHEMA_VERSION as _SCHEMA,
|
|
79
82
|
)
|
|
80
83
|
|
|
81
84
|
schema = _SCHEMA
|
|
@@ -96,20 +99,45 @@ def version():
|
|
|
96
99
|
console.print("InvarLock version unknown")
|
|
97
100
|
|
|
98
101
|
|
|
102
|
+
@app.callback(invoke_without_command=True)
|
|
103
|
+
def _root(
|
|
104
|
+
ctx: typer.Context,
|
|
105
|
+
show_version: bool = typer.Option(
|
|
106
|
+
False,
|
|
107
|
+
"--version",
|
|
108
|
+
"-V",
|
|
109
|
+
help="Show version and exit.",
|
|
110
|
+
is_eager=True,
|
|
111
|
+
),
|
|
112
|
+
) -> None:
|
|
113
|
+
was_allowed = network_policy_allows()
|
|
114
|
+
enforce_default_security()
|
|
115
|
+
ctx.call_on_close(lambda: enforce_network_policy(was_allowed))
|
|
116
|
+
if show_version:
|
|
117
|
+
_emit_version()
|
|
118
|
+
raise typer.Exit()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@app.command()
|
|
122
|
+
def version():
|
|
123
|
+
"""Show InvarLock version."""
|
|
124
|
+
_emit_version()
|
|
125
|
+
|
|
126
|
+
|
|
99
127
|
"""Register command modules and groups in the desired help order.
|
|
100
128
|
|
|
101
|
-
Order:
|
|
129
|
+
Order: evaluate → report → run → plugins → doctor → version
|
|
102
130
|
"""
|
|
103
131
|
|
|
104
132
|
|
|
105
133
|
@app.command(
|
|
106
|
-
name="
|
|
134
|
+
name="evaluate",
|
|
107
135
|
help=(
|
|
108
|
-
"
|
|
136
|
+
"Evaluate a subject model against a baseline and generate an evaluation report. "
|
|
109
137
|
"Use when you have two model snapshots and want pass/fail gating."
|
|
110
138
|
),
|
|
111
139
|
)
|
|
112
|
-
def
|
|
140
|
+
def _evaluate_lazy(
|
|
113
141
|
source: str = typer.Option(
|
|
114
142
|
..., "--source", "--baseline", help="Baseline model dir or Hub ID"
|
|
115
143
|
),
|
|
@@ -141,8 +169,8 @@ def _certify_lazy(
|
|
|
141
169
|
),
|
|
142
170
|
),
|
|
143
171
|
out: str = typer.Option("runs", "--out", help="Base output directory"),
|
|
144
|
-
|
|
145
|
-
"reports/
|
|
172
|
+
report_out: str = typer.Option(
|
|
173
|
+
"reports/eval", "--report-out", help="Evaluation report output directory"
|
|
146
174
|
),
|
|
147
175
|
edit_config: str | None = typer.Option(
|
|
148
176
|
None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
|
|
@@ -173,9 +201,9 @@ def _certify_lazy(
|
|
|
173
201
|
False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
|
|
174
202
|
),
|
|
175
203
|
):
|
|
176
|
-
from .commands.
|
|
204
|
+
from .commands.evaluate import evaluate_command as _eval
|
|
177
205
|
|
|
178
|
-
return
|
|
206
|
+
return _eval(
|
|
179
207
|
source=source,
|
|
180
208
|
edited=edited,
|
|
181
209
|
baseline_report=baseline_report,
|
|
@@ -185,7 +213,7 @@ def _certify_lazy(
|
|
|
185
213
|
tier=tier,
|
|
186
214
|
preset=preset,
|
|
187
215
|
out=out,
|
|
188
|
-
|
|
216
|
+
report_out=report_out,
|
|
189
217
|
edit_config=edit_config,
|
|
190
218
|
edit_label=edit_label,
|
|
191
219
|
quiet=quiet,
|
|
@@ -226,18 +254,18 @@ def _register_subapps() -> None:
|
|
|
226
254
|
@app.command(
|
|
227
255
|
name="verify",
|
|
228
256
|
help=(
|
|
229
|
-
"Verify
|
|
257
|
+
"Verify evaluation report JSON(s) against schema, pairing math, and gates. "
|
|
230
258
|
"Use --json for a single-line machine-readable envelope."
|
|
231
259
|
),
|
|
232
260
|
)
|
|
233
261
|
def _verify_typed(
|
|
234
|
-
|
|
235
|
-
..., help="One or more
|
|
262
|
+
reports: list[str] = typer.Argument(
|
|
263
|
+
..., help="One or more evaluation report JSON files to verify."
|
|
236
264
|
),
|
|
237
265
|
baseline: str | None = typer.Option(
|
|
238
266
|
None,
|
|
239
267
|
"--baseline",
|
|
240
|
-
help="Optional baseline
|
|
268
|
+
help="Optional baseline evaluation report JSON to enforce provider parity.",
|
|
241
269
|
),
|
|
242
270
|
tolerance: float = typer.Option(
|
|
243
271
|
1e-9, "--tolerance", help="Tolerance for analysis-basis comparisons."
|
|
@@ -257,10 +285,10 @@ def _verify_typed(
|
|
|
257
285
|
|
|
258
286
|
from .commands.verify import verify_command as _verify
|
|
259
287
|
|
|
260
|
-
|
|
288
|
+
report_paths = [_Path(p) for p in reports]
|
|
261
289
|
baseline_path = _Path(baseline) if isinstance(baseline, str) else None
|
|
262
290
|
return _verify(
|
|
263
|
-
|
|
291
|
+
reports=report_paths,
|
|
264
292
|
baseline=baseline_path,
|
|
265
293
|
tolerance=tolerance,
|
|
266
294
|
profile=profile,
|
|
@@ -272,7 +300,7 @@ def _verify_typed(
|
|
|
272
300
|
name="run",
|
|
273
301
|
help=(
|
|
274
302
|
"Execute an end-to-end run from a YAML config (edit + guards + reports). "
|
|
275
|
-
"Writes run artifacts and optionally an evaluation
|
|
303
|
+
"Writes run artifacts and optionally an evaluation report."
|
|
276
304
|
),
|
|
277
305
|
)
|
|
278
306
|
def _run_typed(
|
|
@@ -309,7 +337,9 @@ def _run_typed(
|
|
|
309
337
|
None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
|
|
310
338
|
),
|
|
311
339
|
until_pass: bool = typer.Option(
|
|
312
|
-
False,
|
|
340
|
+
False,
|
|
341
|
+
"--until-pass",
|
|
342
|
+
help="Retry until evaluation report passes gates (max 3 attempts)",
|
|
313
343
|
),
|
|
314
344
|
max_attempts: int = typer.Option(
|
|
315
345
|
3, "--max-attempts", help="Maximum retry attempts for --until-pass mode"
|
|
@@ -320,7 +350,7 @@ def _run_typed(
|
|
|
320
350
|
baseline: str | None = typer.Option(
|
|
321
351
|
None,
|
|
322
352
|
"--baseline",
|
|
323
|
-
help="Path to baseline report.json for
|
|
353
|
+
help="Path to baseline report.json for evaluation report validation",
|
|
324
354
|
),
|
|
325
355
|
no_cleanup: bool = typer.Option(
|
|
326
356
|
False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
|
|
@@ -5,8 +5,8 @@ Import-time work is minimal; subcommands themselves may perform heavier imports
|
|
|
5
5
|
only when invoked.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from .certify import certify_command
|
|
9
8
|
from .doctor import doctor_command
|
|
9
|
+
from .evaluate import evaluate_command
|
|
10
10
|
from .explain_gates import explain_gates_command
|
|
11
11
|
from .export_html import export_html_command
|
|
12
12
|
from .plugins import plugins_command
|
|
@@ -15,7 +15,7 @@ from .run import run_command
|
|
|
15
15
|
from .verify import verify_command
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
|
-
"
|
|
18
|
+
"evaluate_command",
|
|
19
19
|
"doctor_command",
|
|
20
20
|
"explain_gates_command",
|
|
21
21
|
"export_html_command",
|
|
@@ -20,10 +20,6 @@ import typer
|
|
|
20
20
|
import yaml
|
|
21
21
|
from rich.console import Console
|
|
22
22
|
|
|
23
|
-
from invarlock.calibration.spectral_null import summarize_null_sweep_reports
|
|
24
|
-
from invarlock.calibration.variance_ve import summarize_ve_sweep_reports
|
|
25
|
-
from invarlock.guards.tier_config import get_tier_guard_config
|
|
26
|
-
|
|
27
23
|
console = Console()
|
|
28
24
|
|
|
29
25
|
calibrate_app = typer.Typer(
|
|
@@ -114,6 +110,26 @@ def _write_tiers_recommendation(
|
|
|
114
110
|
)
|
|
115
111
|
|
|
116
112
|
|
|
113
|
+
def get_tier_guard_config(tier: str, guard_key: str) -> dict[str, Any]:
|
|
114
|
+
"""Lazy wrapper for tier config lookup.
|
|
115
|
+
|
|
116
|
+
This is intentionally a module-level symbol so tests can patch it without
|
|
117
|
+
importing torch/transformers at import time.
|
|
118
|
+
"""
|
|
119
|
+
try:
|
|
120
|
+
from invarlock.guards.tier_config import get_tier_guard_config as _get_cfg
|
|
121
|
+
except ModuleNotFoundError as exc:
|
|
122
|
+
missing = getattr(exc, "name", "") or ""
|
|
123
|
+
if missing in {"torch", "transformers"}:
|
|
124
|
+
console.print(
|
|
125
|
+
"[red]Missing optional dependencies for calibration.[/red] "
|
|
126
|
+
"Install `invarlock[hf]` (or at least torch/transformers) to run sweeps."
|
|
127
|
+
)
|
|
128
|
+
raise typer.Exit(1) from exc
|
|
129
|
+
raise
|
|
130
|
+
return _get_cfg(tier, guard_key)
|
|
131
|
+
|
|
132
|
+
|
|
117
133
|
@calibrate_app.command(
|
|
118
134
|
name="null-sweep",
|
|
119
135
|
help="Run a null (no-op edit) sweep and calibrate spectral κ/alpha empirically.",
|
|
@@ -160,6 +176,21 @@ def null_sweep(
|
|
|
160
176
|
# Keep import light: only pull run machinery when invoked.
|
|
161
177
|
from .run import run_command
|
|
162
178
|
|
|
179
|
+
# Optional deps: calibration sweeps require torch/guards, but docs/tests may
|
|
180
|
+
# import this module without heavy deps. Import lazily so CLI example
|
|
181
|
+
# validation can parse `invarlock calibrate ...` without installing torch.
|
|
182
|
+
try:
|
|
183
|
+
from invarlock.calibration.spectral_null import summarize_null_sweep_reports
|
|
184
|
+
except ModuleNotFoundError as exc:
|
|
185
|
+
missing = getattr(exc, "name", "") or ""
|
|
186
|
+
if missing in {"torch", "transformers"}:
|
|
187
|
+
console.print(
|
|
188
|
+
"[red]Missing optional dependencies for calibration.[/red] "
|
|
189
|
+
"Install `invarlock[hf]` (or at least torch/transformers) to run sweeps."
|
|
190
|
+
)
|
|
191
|
+
raise typer.Exit(1) from exc
|
|
192
|
+
raise
|
|
193
|
+
|
|
163
194
|
base = _load_yaml(config)
|
|
164
195
|
specs = _materialize_sweep_specs(
|
|
165
196
|
tiers=tiers, seeds=seed, n_seeds=n_seeds, seed_start=seed_start
|
|
@@ -378,6 +409,19 @@ def ve_sweep(
|
|
|
378
409
|
# Keep import light: only pull run machinery when invoked.
|
|
379
410
|
from .run import run_command
|
|
380
411
|
|
|
412
|
+
# Optional deps: see null_sweep() note.
|
|
413
|
+
try:
|
|
414
|
+
from invarlock.calibration.variance_ve import summarize_ve_sweep_reports
|
|
415
|
+
except ModuleNotFoundError as exc:
|
|
416
|
+
missing = getattr(exc, "name", "") or ""
|
|
417
|
+
if missing in {"torch", "transformers"}:
|
|
418
|
+
console.print(
|
|
419
|
+
"[red]Missing optional dependencies for calibration.[/red] "
|
|
420
|
+
"Install `invarlock[hf]` (or at least torch/transformers) to run sweeps."
|
|
421
|
+
)
|
|
422
|
+
raise typer.Exit(1) from exc
|
|
423
|
+
raise
|
|
424
|
+
|
|
381
425
|
base = _load_yaml(config)
|
|
382
426
|
windows = [int(w) for w in (window or [])] or [6, 8, 12, 16]
|
|
383
427
|
specs = _materialize_sweep_specs(
|