invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +4 -4
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +37 -50
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +53 -9
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +32 -26
- invarlock/cli/app.py +128 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/evaluate.py +986 -0
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +326 -92
- invarlock/cli/commands/run.py +1160 -228
- invarlock/cli/commands/verify.py +157 -97
- invarlock/cli/config.py +1 -1
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +4 -4
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +112 -26
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +67 -39
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +14 -10
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/primary_metric.py +1 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/spectral.py +1 -1
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +64 -62
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +61 -11
- invarlock/reporting/normalizer.py +9 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +876 -510
- invarlock/reporting/report.py +72 -30
- invarlock/reporting/{certificate.py → report_builder.py} +252 -99
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- invarlock/cli/commands/certify.py +0 -422
- invarlock-0.3.6.dist-info/METADATA +0 -588
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,7 @@ import typer
|
|
|
7
7
|
from rich.console import Console
|
|
8
8
|
|
|
9
9
|
from invarlock.core.auto_tuning import get_tier_policies
|
|
10
|
-
from invarlock.reporting.
|
|
10
|
+
from invarlock.reporting.report_builder import make_report
|
|
11
11
|
|
|
12
12
|
console = Console()
|
|
13
13
|
|
|
@@ -18,9 +18,9 @@ def explain_gates_command(
|
|
|
18
18
|
..., "--baseline", help="Path to baseline report.json"
|
|
19
19
|
),
|
|
20
20
|
) -> None:
|
|
21
|
-
"""Explain
|
|
21
|
+
"""Explain evaluation report gates for a report vs baseline.
|
|
22
22
|
|
|
23
|
-
Loads the reports, builds
|
|
23
|
+
Loads the reports, builds an evaluation report, and prints gate thresholds,
|
|
24
24
|
observed statistics, and pass/fail reasons in a compact, readable form.
|
|
25
25
|
"""
|
|
26
26
|
report_path = Path(report)
|
|
@@ -36,13 +36,17 @@ def explain_gates_command(
|
|
|
36
36
|
console.print(f"[red]Failed to load inputs: {exc}[/red]")
|
|
37
37
|
raise typer.Exit(1) from exc
|
|
38
38
|
|
|
39
|
-
|
|
39
|
+
evaluation_report = make_report(report_data, baseline_data)
|
|
40
40
|
validation = (
|
|
41
|
-
|
|
41
|
+
evaluation_report.get("validation", {})
|
|
42
|
+
if isinstance(evaluation_report.get("validation"), dict)
|
|
43
|
+
else {}
|
|
42
44
|
)
|
|
43
45
|
|
|
44
46
|
# Extract tier + metric policy (floors/hysteresis)
|
|
45
|
-
tier = str(
|
|
47
|
+
tier = str(
|
|
48
|
+
(evaluation_report.get("auto", {}) or {}).get("tier", "balanced")
|
|
49
|
+
).lower()
|
|
46
50
|
tier_thresholds = {
|
|
47
51
|
"conservative": 1.05,
|
|
48
52
|
"balanced": 1.10,
|
|
@@ -50,8 +54,8 @@ def explain_gates_command(
|
|
|
50
54
|
"none": 1.10,
|
|
51
55
|
}
|
|
52
56
|
resolved_policy = (
|
|
53
|
-
|
|
54
|
-
if isinstance(
|
|
57
|
+
evaluation_report.get("resolved_policy", {})
|
|
58
|
+
if isinstance(evaluation_report.get("resolved_policy"), dict)
|
|
55
59
|
else {}
|
|
56
60
|
)
|
|
57
61
|
metrics_policy = (
|
|
@@ -83,7 +87,11 @@ def explain_gates_command(
|
|
|
83
87
|
limit_base = tier_thresholds.get(tier, 1.10)
|
|
84
88
|
limit_with_hyst = limit_base + max(0.0, hysteresis_ratio)
|
|
85
89
|
tokens_ok = True
|
|
86
|
-
telem =
|
|
90
|
+
telem = (
|
|
91
|
+
evaluation_report.get("telemetry", {})
|
|
92
|
+
if isinstance(evaluation_report.get("telemetry"), dict)
|
|
93
|
+
else {}
|
|
94
|
+
)
|
|
87
95
|
try:
|
|
88
96
|
total_tokens = int(telem.get("preview_total_tokens", 0)) + int(
|
|
89
97
|
telem.get("final_total_tokens", 0)
|
|
@@ -95,8 +103,8 @@ def explain_gates_command(
|
|
|
95
103
|
# Primary-metric ratio gate explanation (ppl-like kinds shown as ratios)
|
|
96
104
|
ratio = None
|
|
97
105
|
ratio_ci = None
|
|
98
|
-
if isinstance(
|
|
99
|
-
pm =
|
|
106
|
+
if isinstance(evaluation_report.get("primary_metric"), dict):
|
|
107
|
+
pm = evaluation_report.get("primary_metric", {})
|
|
100
108
|
ratio = pm.get("ratio_vs_baseline")
|
|
101
109
|
ratio_ci = pm.get("display_ci")
|
|
102
110
|
hysteresis_applied = bool(validation.get("hysteresis_applied"))
|
|
@@ -123,8 +131,8 @@ def explain_gates_command(
|
|
|
123
131
|
|
|
124
132
|
# Tail gate explanation (warn/fail; based on per-window Δlog-loss vs baseline)
|
|
125
133
|
pm_tail = (
|
|
126
|
-
|
|
127
|
-
if isinstance(
|
|
134
|
+
evaluation_report.get("primary_metric_tail", {})
|
|
135
|
+
if isinstance(evaluation_report.get("primary_metric_tail"), dict)
|
|
128
136
|
else {}
|
|
129
137
|
)
|
|
130
138
|
if pm_tail:
|
|
@@ -194,8 +202,8 @@ def explain_gates_command(
|
|
|
194
202
|
# Drift gate explanation
|
|
195
203
|
drift = None
|
|
196
204
|
drift_ci = None
|
|
197
|
-
if isinstance(
|
|
198
|
-
pm =
|
|
205
|
+
if isinstance(evaluation_report.get("primary_metric"), dict):
|
|
206
|
+
pm = evaluation_report.get("primary_metric", {})
|
|
199
207
|
preview = pm.get("preview")
|
|
200
208
|
final = pm.get("final")
|
|
201
209
|
if isinstance(preview, int | float) and isinstance(final, int | float):
|
|
@@ -220,8 +228,8 @@ def explain_gates_command(
|
|
|
220
228
|
|
|
221
229
|
# Guard Overhead explanation (if present)
|
|
222
230
|
overhead = (
|
|
223
|
-
|
|
224
|
-
if isinstance(
|
|
231
|
+
evaluation_report.get("guard_overhead", {})
|
|
232
|
+
if isinstance(evaluation_report.get("guard_overhead"), dict)
|
|
225
233
|
else {}
|
|
226
234
|
)
|
|
227
235
|
if overhead:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
InvarLock HTML Export
|
|
3
3
|
=================
|
|
4
4
|
|
|
5
|
-
Thin wrapper over the HTML
|
|
5
|
+
Thin wrapper over the HTML evaluation report renderer to make exporting
|
|
6
6
|
discoverable and scriptable.
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -20,7 +20,9 @@ console = Console()
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def export_html_command(
|
|
23
|
-
input: str = typer.Option(
|
|
23
|
+
input: str = typer.Option(
|
|
24
|
+
..., "--input", "-i", help="Path to evaluation report JSON"
|
|
25
|
+
),
|
|
24
26
|
output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
|
|
25
27
|
embed_css: bool = typer.Option(
|
|
26
28
|
True,
|
|
@@ -31,12 +33,12 @@ def export_html_command(
|
|
|
31
33
|
False, "--force", help="Overwrite output file if it already exists"
|
|
32
34
|
),
|
|
33
35
|
) -> None:
|
|
34
|
-
"""Render
|
|
36
|
+
"""Render an evaluation report JSON to HTML.
|
|
35
37
|
|
|
36
38
|
Exit codes:
|
|
37
39
|
- 0: success
|
|
38
40
|
- 1: generic failure (IO or overwrite refusal)
|
|
39
|
-
- 2: validation failure (invalid
|
|
41
|
+
- 2: validation failure (invalid evaluation report schema)
|
|
40
42
|
"""
|
|
41
43
|
# When called programmatically, Typer's Option defaults can be OptionInfo
|
|
42
44
|
try: # pragma: no cover - defensive, matches other commands' pattern
|
|
@@ -70,12 +72,12 @@ def export_html_command(
|
|
|
70
72
|
raise typer.Exit(1) from exc
|
|
71
73
|
|
|
72
74
|
try:
|
|
73
|
-
from invarlock.reporting.html import
|
|
75
|
+
from invarlock.reporting.html import render_report_html
|
|
74
76
|
|
|
75
|
-
html =
|
|
77
|
+
html = render_report_html(payload)
|
|
76
78
|
except ValueError as exc:
|
|
77
|
-
#
|
|
78
|
-
console.print(f"[red]❌
|
|
79
|
+
# Evaluation report validation failed upstream
|
|
80
|
+
console.print(f"[red]❌ Evaluation report validation failed: {exc}[/red]")
|
|
79
81
|
raise typer.Exit(2) from exc
|
|
80
82
|
except Exception as exc:
|
|
81
83
|
console.print(f"[red]❌ Failed to render HTML: {exc}[/red]")
|
|
@@ -94,7 +96,7 @@ def export_html_command(
|
|
|
94
96
|
console.print(f"[red]❌ Failed to write output file: {exc}[/red]")
|
|
95
97
|
raise typer.Exit(1) from exc
|
|
96
98
|
|
|
97
|
-
console.print(f"✅ Exported
|
|
99
|
+
console.print(f"✅ Exported evaluation report HTML → {out_path}")
|
|
98
100
|
|
|
99
101
|
|
|
100
102
|
__all__ = ["export_html_command"]
|
|
@@ -201,9 +201,9 @@ def plugins_command(
|
|
|
201
201
|
entry = info.get("entry_point")
|
|
202
202
|
# Classify support level independent of origin
|
|
203
203
|
if module.startswith("invarlock.adapters"):
|
|
204
|
-
if n in {"
|
|
204
|
+
if n in {"hf_auto"}:
|
|
205
205
|
support = "auto"
|
|
206
|
-
elif n in {"
|
|
206
|
+
elif n in {"hf_causal_onnx"}:
|
|
207
207
|
# ONNX relies on optional extras (optimum + onnxruntime)
|
|
208
208
|
support = "optional"
|
|
209
209
|
else:
|
|
@@ -236,7 +236,7 @@ def plugins_command(
|
|
|
236
236
|
if backend_name in {"auto-gptq", "autoawq"} and not is_linux:
|
|
237
237
|
status = "unsupported"
|
|
238
238
|
enable = "Linux-only"
|
|
239
|
-
# Extras completeness for certain adapters (e.g.,
|
|
239
|
+
# Extras completeness for certain adapters (e.g., hf_causal_onnx needs optimum + onnxruntime)
|
|
240
240
|
try:
|
|
241
241
|
extras_status = _check_plugin_extras(n, "adapters")
|
|
242
242
|
except Exception:
|
|
@@ -883,10 +883,14 @@ def _check_plugin_extras(plugin_name: str, plugin_type: str) -> str:
|
|
|
883
883
|
"variance": {"packages": [], "extra": ""},
|
|
884
884
|
"rmt": {"packages": [], "extra": ""},
|
|
885
885
|
# Adapter plugins (baked-in only)
|
|
886
|
-
"
|
|
887
|
-
"
|
|
888
|
-
"
|
|
889
|
-
"
|
|
886
|
+
"hf_causal": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
|
|
887
|
+
"hf_mlm": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
|
|
888
|
+
"hf_seq2seq": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
|
|
889
|
+
"hf_auto": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
|
|
890
|
+
"hf_causal_onnx": {
|
|
891
|
+
"packages": ["optimum", "onnxruntime"],
|
|
892
|
+
"extra": "invarlock[onnx]",
|
|
893
|
+
},
|
|
890
894
|
# Optional adapter plugins
|
|
891
895
|
"hf_gptq": {"packages": ["auto_gptq"], "extra": "invarlock[gptq]"},
|
|
892
896
|
"hf_awq": {"packages": ["autoawq"], "extra": "invarlock[awq]"},
|
|
@@ -971,7 +975,7 @@ def _resolve_uninstall_targets(target: str) -> list[str]:
|
|
|
971
975
|
"bitsandbytes": ["bitsandbytes"],
|
|
972
976
|
# ONNX/Optimum family
|
|
973
977
|
"onnx": ["onnxruntime"],
|
|
974
|
-
"
|
|
978
|
+
"hf_causal_onnx": ["onnxruntime"],
|
|
975
979
|
"optimum": ["optimum"],
|
|
976
980
|
}
|
|
977
981
|
return mapping.get(name, [])
|
|
@@ -1010,7 +1014,7 @@ def _resolve_install_targets(target: str) -> list[str]:
|
|
|
1010
1014
|
"transformers": ["invarlock[adapters]"],
|
|
1011
1015
|
# ONNX/Optimum
|
|
1012
1016
|
"onnx": ["invarlock[onnx]"],
|
|
1013
|
-
"
|
|
1017
|
+
"hf_causal_onnx": ["invarlock[onnx]"],
|
|
1014
1018
|
"optimum": ["invarlock[onnx]"],
|
|
1015
1019
|
# Direct packages passthrough
|
|
1016
1020
|
"bitsandbytes": ["bitsandbytes"],
|