invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +4 -4
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +37 -50
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +53 -9
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +32 -26
- invarlock/cli/app.py +128 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/evaluate.py +986 -0
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +326 -92
- invarlock/cli/commands/run.py +1160 -228
- invarlock/cli/commands/verify.py +157 -97
- invarlock/cli/config.py +1 -1
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +4 -4
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +112 -26
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +67 -39
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +14 -10
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/primary_metric.py +1 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/spectral.py +1 -1
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +64 -62
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +61 -11
- invarlock/reporting/normalizer.py +9 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +876 -510
- invarlock/reporting/report.py +72 -30
- invarlock/reporting/{certificate.py → report_builder.py} +252 -99
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- invarlock/cli/commands/certify.py +0 -422
- invarlock-0.3.6.dist-info/METADATA +0 -588
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
|
@@ -11,16 +11,16 @@ except Exception: # pragma: no cover
|
|
|
11
11
|
jsonschema = None
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
#
|
|
15
|
-
|
|
14
|
+
# Evaluation report schema version (PM-first canonical)
|
|
15
|
+
REPORT_SCHEMA_VERSION = "v1"
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
# Minimal JSON Schema describing the canonical shape of
|
|
18
|
+
# Minimal JSON Schema describing the canonical shape of an evaluation report.
|
|
19
19
|
# This focuses on structural validity; numerical thresholds are validated
|
|
20
20
|
# separately in metric-specific logic.
|
|
21
|
-
|
|
21
|
+
REPORT_JSON_SCHEMA: dict[str, Any] = {
|
|
22
22
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
23
|
-
"title": "InvarLock
|
|
23
|
+
"title": "InvarLock Evaluation Report",
|
|
24
24
|
"type": "object",
|
|
25
25
|
"required": [
|
|
26
26
|
"schema_version",
|
|
@@ -32,7 +32,7 @@ CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
|
|
|
32
32
|
"primary_metric",
|
|
33
33
|
],
|
|
34
34
|
"properties": {
|
|
35
|
-
"schema_version": {"const":
|
|
35
|
+
"schema_version": {"const": REPORT_SCHEMA_VERSION},
|
|
36
36
|
"run_id": {"type": "string", "minLength": 4},
|
|
37
37
|
"edit_name": {"type": "string"},
|
|
38
38
|
"policy_digest": {
|
|
@@ -179,21 +179,21 @@ def _load_validation_allowlist() -> set[str]:
|
|
|
179
179
|
return set(_VALIDATION_ALLOWLIST_DEFAULT)
|
|
180
180
|
|
|
181
181
|
|
|
182
|
-
def _validate_with_jsonschema(
|
|
183
|
-
"""Validate
|
|
182
|
+
def _validate_with_jsonschema(report: dict[str, Any]) -> bool:
|
|
183
|
+
"""Validate evaluation report with JSON Schema when available."""
|
|
184
184
|
if jsonschema is None:
|
|
185
185
|
return True # Schema library unavailable; fall back to minimal checks
|
|
186
186
|
try:
|
|
187
|
-
jsonschema.validate(instance=
|
|
187
|
+
jsonschema.validate(instance=report, schema=REPORT_JSON_SCHEMA)
|
|
188
188
|
return True
|
|
189
189
|
except Exception:
|
|
190
190
|
return False
|
|
191
191
|
|
|
192
192
|
|
|
193
|
-
def
|
|
194
|
-
"""Validate
|
|
193
|
+
def validate_report(report: dict[str, Any]) -> bool:
|
|
194
|
+
"""Validate evaluation report structure and essential flags."""
|
|
195
195
|
try:
|
|
196
|
-
if
|
|
196
|
+
if report.get("schema_version") != REPORT_SCHEMA_VERSION:
|
|
197
197
|
return False
|
|
198
198
|
|
|
199
199
|
# Prefer JSON Schema structural validation; if unavailable or too strict,
|
|
@@ -202,20 +202,20 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
|
|
|
202
202
|
# disallow unknown validation keys at schema level.
|
|
203
203
|
try:
|
|
204
204
|
vkeys = _load_validation_allowlist()
|
|
205
|
-
if isinstance(
|
|
206
|
-
vspec =
|
|
205
|
+
if isinstance(REPORT_JSON_SCHEMA.get("properties"), dict):
|
|
206
|
+
vspec = REPORT_JSON_SCHEMA["properties"].get("validation")
|
|
207
207
|
if isinstance(vspec, dict):
|
|
208
208
|
vspec["properties"] = {k: {"type": "boolean"} for k in vkeys}
|
|
209
209
|
vspec["additionalProperties"] = False
|
|
210
210
|
except Exception:
|
|
211
211
|
pass
|
|
212
212
|
|
|
213
|
-
if not _validate_with_jsonschema(
|
|
213
|
+
if not _validate_with_jsonschema(report):
|
|
214
214
|
# Minimal fallback: require schema version + run_id + primary_metric
|
|
215
|
-
run_id_ok = isinstance(
|
|
216
|
-
|
|
215
|
+
run_id_ok = isinstance(report.get("run_id"), str) and bool(
|
|
216
|
+
report.get("run_id")
|
|
217
217
|
)
|
|
218
|
-
pm =
|
|
218
|
+
pm = report.get("primary_metric")
|
|
219
219
|
pm_ok = isinstance(pm, dict) and (
|
|
220
220
|
isinstance(pm.get("final"), int | float)
|
|
221
221
|
or (isinstance(pm.get("kind"), str) and bool(pm.get("kind")))
|
|
@@ -223,7 +223,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
|
|
|
223
223
|
if not (run_id_ok and pm_ok):
|
|
224
224
|
return False
|
|
225
225
|
|
|
226
|
-
validation =
|
|
226
|
+
validation = report.get("validation", {})
|
|
227
227
|
for flag in [
|
|
228
228
|
"preview_final_drift_acceptable",
|
|
229
229
|
"primary_metric_acceptable",
|
|
@@ -242,7 +242,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
|
|
|
242
242
|
|
|
243
243
|
|
|
244
244
|
__all__ = [
|
|
245
|
-
"
|
|
246
|
-
"
|
|
247
|
-
"
|
|
245
|
+
"REPORT_SCHEMA_VERSION",
|
|
246
|
+
"REPORT_JSON_SCHEMA",
|
|
247
|
+
"validate_report",
|
|
248
248
|
]
|
|
@@ -34,7 +34,7 @@ class MetaData(TypedDict):
|
|
|
34
34
|
"""Metadata about the model and execution environment."""
|
|
35
35
|
|
|
36
36
|
model_id: str # Model identifier (e.g., "gpt2", "path/to/model")
|
|
37
|
-
adapter: str # Adapter name (e.g., "
|
|
37
|
+
adapter: str # Adapter name (e.g., "hf_causal")
|
|
38
38
|
commit: str # Git commit SHA
|
|
39
39
|
seed: int # Random seed used for evaluation
|
|
40
40
|
device: str # Device used ("cpu", "cuda", "mps")
|
|
@@ -107,6 +107,11 @@ class EvalMetrics(TypedDict, total=False):
|
|
|
107
107
|
# Optional aux fields retained for guard telemetry and debug
|
|
108
108
|
latency_ms_per_tok: float # Average latency per token in milliseconds
|
|
109
109
|
memory_mb_peak: float # Peak memory usage in MB
|
|
110
|
+
gpu_memory_mb_peak: float # Peak GPU memory usage in MB
|
|
111
|
+
gpu_memory_reserved_mb_peak: float # Peak GPU reserved memory in MB
|
|
112
|
+
timings: dict[str, float] # Phase timing breakdown (seconds)
|
|
113
|
+
guard_timings: dict[str, float] # Per-guard timings (seconds)
|
|
114
|
+
memory_snapshots: list[dict[str, Any]] # Phase memory snapshots
|
|
110
115
|
spectral: dict[str, Any] # Spectral norm summaries
|
|
111
116
|
rmt: dict[str, Any] # RMT statistics
|
|
112
117
|
invariants: dict[str, Any] # Model invariant check results
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Telemetry report utilities.
|
|
3
|
+
|
|
4
|
+
Produces a compact JSON summary for performance analysis.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_telemetry_payload(report: dict[str, Any]) -> dict[str, Any]:
|
|
16
|
+
"""Build a structured telemetry payload from a run report."""
|
|
17
|
+
meta_in = report.get("meta", {}) if isinstance(report, dict) else {}
|
|
18
|
+
metrics_in = report.get("metrics", {}) if isinstance(report, dict) else {}
|
|
19
|
+
|
|
20
|
+
payload: dict[str, Any] = {"generated_at": datetime.now().isoformat()}
|
|
21
|
+
|
|
22
|
+
if isinstance(meta_in, dict):
|
|
23
|
+
payload["meta"] = {
|
|
24
|
+
"model_id": meta_in.get("model_id"),
|
|
25
|
+
"adapter": meta_in.get("adapter"),
|
|
26
|
+
"device": meta_in.get("device"),
|
|
27
|
+
"run_id": meta_in.get("run_id"),
|
|
28
|
+
"profile": meta_in.get("profile"),
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if isinstance(metrics_in, dict):
|
|
32
|
+
timings = metrics_in.get("timings")
|
|
33
|
+
if isinstance(timings, dict):
|
|
34
|
+
payload["timings"] = timings
|
|
35
|
+
|
|
36
|
+
guard_timings = metrics_in.get("guard_timings")
|
|
37
|
+
if isinstance(guard_timings, dict):
|
|
38
|
+
payload["guard_timings"] = guard_timings
|
|
39
|
+
|
|
40
|
+
memory_snapshots = metrics_in.get("memory_snapshots")
|
|
41
|
+
if isinstance(memory_snapshots, list):
|
|
42
|
+
payload["memory_snapshots"] = memory_snapshots
|
|
43
|
+
|
|
44
|
+
memory_summary: dict[str, Any] = {}
|
|
45
|
+
for key in (
|
|
46
|
+
"memory_mb_peak",
|
|
47
|
+
"gpu_memory_mb_peak",
|
|
48
|
+
"gpu_memory_reserved_mb_peak",
|
|
49
|
+
):
|
|
50
|
+
value = metrics_in.get(key)
|
|
51
|
+
if isinstance(value, int | float):
|
|
52
|
+
memory_summary[key] = float(value)
|
|
53
|
+
if memory_summary:
|
|
54
|
+
payload["memory"] = memory_summary
|
|
55
|
+
|
|
56
|
+
perf_metrics: dict[str, Any] = {}
|
|
57
|
+
for key in (
|
|
58
|
+
"latency_ms_per_tok",
|
|
59
|
+
"throughput_tok_per_s",
|
|
60
|
+
"eval_samples",
|
|
61
|
+
"total_tokens",
|
|
62
|
+
):
|
|
63
|
+
value = metrics_in.get(key)
|
|
64
|
+
if isinstance(value, int | float):
|
|
65
|
+
perf_metrics[key] = float(value)
|
|
66
|
+
if perf_metrics:
|
|
67
|
+
payload["performance"] = perf_metrics
|
|
68
|
+
|
|
69
|
+
return payload
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def save_telemetry_report(
|
|
73
|
+
report: dict[str, Any],
|
|
74
|
+
output_dir: Path,
|
|
75
|
+
*,
|
|
76
|
+
filename: str = "telemetry.json",
|
|
77
|
+
) -> Path:
|
|
78
|
+
"""Write telemetry JSON payload to the output directory."""
|
|
79
|
+
payload = build_telemetry_payload(report)
|
|
80
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
path = output_dir / filename
|
|
82
|
+
path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
83
|
+
return path
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
__all__ = ["build_telemetry_payload", "save_telemetry_report"]
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: invarlock
|
|
3
|
+
Version: 0.3.8
|
|
4
|
+
Summary: Edit‑agnostic robustness evaluation reports for weight edits (InvarLock framework)
|
|
5
|
+
Author-email: InvarLock Team <oss@invarlock.dev>
|
|
6
|
+
Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Project-URL: Homepage, https://github.com/invarlock/invarlock
|
|
9
|
+
Project-URL: Repository, https://github.com/invarlock/invarlock
|
|
10
|
+
Project-URL: Documentation, https://github.com/invarlock/invarlock/tree/main/docs
|
|
11
|
+
Project-URL: Issues, https://github.com/invarlock/invarlock/issues
|
|
12
|
+
Project-URL: Changelog, https://github.com/invarlock/invarlock/blob/main/CHANGELOG.md
|
|
13
|
+
Keywords: machine-learning,deep-learning,transformers,pytorch,llm,quantization,safety,evaluation,certification
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Operating System :: OS Independent
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.12
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: typer>=0.15
|
|
27
|
+
Requires-Dist: click>=8.1
|
|
28
|
+
Requires-Dist: shellingham>=1.5.0
|
|
29
|
+
Requires-Dist: pandas>=2.2
|
|
30
|
+
Requires-Dist: scikit-learn>=1.4
|
|
31
|
+
Requires-Dist: pydantic>=2.0
|
|
32
|
+
Requires-Dist: rich>=13.0
|
|
33
|
+
Requires-Dist: pyyaml>=6.0
|
|
34
|
+
Requires-Dist: markdown>=3.5
|
|
35
|
+
Requires-Dist: psutil>=5.9
|
|
36
|
+
Requires-Dist: hypothesis>=6.98
|
|
37
|
+
Requires-Dist: typing_extensions>=4.7
|
|
38
|
+
Requires-Dist: jsonschema>=4.0
|
|
39
|
+
Provides-Extra: adapters
|
|
40
|
+
Requires-Dist: torch>=2.1.0; extra == "adapters"
|
|
41
|
+
Requires-Dist: transformers>=5.0.0; extra == "adapters"
|
|
42
|
+
Provides-Extra: hf
|
|
43
|
+
Requires-Dist: torch>=2.1.0; extra == "hf"
|
|
44
|
+
Requires-Dist: transformers>=5.0.0; extra == "hf"
|
|
45
|
+
Requires-Dist: datasets>=3.0; extra == "hf"
|
|
46
|
+
Requires-Dist: numpy>=1.24; extra == "hf"
|
|
47
|
+
Requires-Dist: huggingface_hub>=1.0.0; extra == "hf"
|
|
48
|
+
Requires-Dist: aiohttp>=3.12.14; extra == "hf"
|
|
49
|
+
Requires-Dist: h2>=4.3.0; extra == "hf"
|
|
50
|
+
Requires-Dist: pillow>=11.3.0; extra == "hf"
|
|
51
|
+
Provides-Extra: guards
|
|
52
|
+
Requires-Dist: torch>=2.1.0; extra == "guards"
|
|
53
|
+
Requires-Dist: numpy>=1.24; extra == "guards"
|
|
54
|
+
Provides-Extra: edits
|
|
55
|
+
Requires-Dist: torch>=2.1.0; extra == "edits"
|
|
56
|
+
Provides-Extra: eval
|
|
57
|
+
Requires-Dist: torch>=2.1.0; extra == "eval"
|
|
58
|
+
Requires-Dist: datasets>=3.0; extra == "eval"
|
|
59
|
+
Provides-Extra: gptq
|
|
60
|
+
Requires-Dist: torch>=2.1.0; extra == "gptq"
|
|
61
|
+
Requires-Dist: auto-gptq>=0.7.0; platform_system == "Linux" and extra == "gptq"
|
|
62
|
+
Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "gptq"
|
|
63
|
+
Requires-Dist: transformers>=5.0.0; extra == "gptq"
|
|
64
|
+
Provides-Extra: awq
|
|
65
|
+
Requires-Dist: torch>=2.1.0; extra == "awq"
|
|
66
|
+
Requires-Dist: autoawq>=0.2.0; platform_system == "Linux" and extra == "awq"
|
|
67
|
+
Requires-Dist: transformers>=5.0.0; extra == "awq"
|
|
68
|
+
Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "awq"
|
|
69
|
+
Provides-Extra: gpu
|
|
70
|
+
Requires-Dist: torch>=2.1.0; extra == "gpu"
|
|
71
|
+
Requires-Dist: accelerate>=0.27; extra == "gpu"
|
|
72
|
+
Requires-Dist: bitsandbytes>=0.41; platform_system == "Linux" and extra == "gpu"
|
|
73
|
+
Provides-Extra: all
|
|
74
|
+
Requires-Dist: torch>=2.1.0; extra == "all"
|
|
75
|
+
Requires-Dist: transformers>=5.0.0; extra == "all"
|
|
76
|
+
Requires-Dist: datasets>=3.0; extra == "all"
|
|
77
|
+
Requires-Dist: numpy>=1.24; extra == "all"
|
|
78
|
+
Requires-Dist: huggingface_hub>=1.0.0; extra == "all"
|
|
79
|
+
Requires-Dist: accelerate>=0.27; extra == "all"
|
|
80
|
+
Requires-Dist: bitsandbytes>=0.41; platform_system == "Linux" and extra == "all"
|
|
81
|
+
Requires-Dist: auto-gptq>=0.7.0; platform_system == "Linux" and extra == "all"
|
|
82
|
+
Requires-Dist: autoawq>=0.2.0; platform_system == "Linux" and extra == "all"
|
|
83
|
+
Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "all"
|
|
84
|
+
Requires-Dist: aiohttp>=3.12.14; extra == "all"
|
|
85
|
+
Requires-Dist: h2>=4.3.0; extra == "all"
|
|
86
|
+
Requires-Dist: pillow>=11.3.0; extra == "all"
|
|
87
|
+
Provides-Extra: onnx
|
|
88
|
+
Requires-Dist: optimum>=1.17.0; extra == "onnx"
|
|
89
|
+
Requires-Dist: onnxruntime>=1.17.0; extra == "onnx"
|
|
90
|
+
Provides-Extra: dev
|
|
91
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
92
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
93
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
94
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
95
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
96
|
+
Requires-Dist: hypothesis>=6.98; extra == "dev"
|
|
97
|
+
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
98
|
+
Requires-Dist: mkdocs>=1.5; extra == "dev"
|
|
99
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "dev"
|
|
100
|
+
Requires-Dist: mkdocs-mermaid2-plugin>=1.1; extra == "dev"
|
|
101
|
+
Requires-Dist: sphinx>=7.0; extra == "dev"
|
|
102
|
+
Requires-Dist: matplotlib>=3.7; extra == "dev"
|
|
103
|
+
Requires-Dist: bitsandbytes>=0.41; extra == "dev"
|
|
104
|
+
Requires-Dist: build>=0.10.0; extra == "dev"
|
|
105
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
106
|
+
Dynamic: license-file
|
|
107
|
+
|
|
108
|
+
<p align="center">
|
|
109
|
+
<img
|
|
110
|
+
src="docs/assets/invarlock-logo.svg"
|
|
111
|
+
alt="InvarLock"
|
|
112
|
+
width="420"
|
|
113
|
+
/>
|
|
114
|
+
</p>
|
|
115
|
+
|
|
116
|
+
# InvarLock — Edit‑agnostic robustness reports for weight edits
|
|
117
|
+
|
|
118
|
+
[](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
|
|
119
|
+
[](https://pypi.org/project/invarlock/)
|
|
120
|
+
[](https://github.com/invarlock/invarlock/blob/main/docs/user-guide/quickstart.md)
|
|
121
|
+
[](LICENSE)
|
|
122
|
+
[](https://www.python.org/downloads/release/python-3120/)
|
|
123
|
+
|
|
124
|
+
Quantizing, pruning, or otherwise editing a model’s weights can silently degrade quality.
|
|
125
|
+
InvarLock compares an edited **subject** checkpoint against a fixed **baseline** with paired
|
|
126
|
+
evaluation windows, enforces a guard pipeline (invariants → spectral → RMT → variance), and
|
|
127
|
+
produces a machine‑readable Evaluation Report you can gate in CI.
|
|
128
|
+
|
|
129
|
+
> Status: pre‑1.0. Until 1.0, minor releases may be breaking. See `CHANGELOG.md`.
|
|
130
|
+
|
|
131
|
+
For guidance on where to ask questions, how to report bugs, and what to expect in terms of response times, see
|
|
132
|
+
[SUPPORT.md](https://github.com/invarlock/invarlock/blob/main/SUPPORT.md).
|
|
133
|
+
|
|
134
|
+
## Why InvarLock?
|
|
135
|
+
|
|
136
|
+
- **Quality gates for weight edits**: catch regressions before deployment.
|
|
137
|
+
- **Statistical guarantees**: paired primary metrics with confidence intervals.
|
|
138
|
+
- **Auditable evidence**: deterministic pairing metadata + policy digests in `evaluation.report.json`.
|
|
139
|
+
- **CI/CD-friendly**: stable exit codes, `--json` outputs, and portable “proof packs”.
|
|
140
|
+
- **Offline-first**: network is disabled by default; enable downloads per command.
|
|
141
|
+
|
|
142
|
+
## Who is this for?
|
|
143
|
+
|
|
144
|
+
- ML engineers shipping quantized/pruned checkpoints.
|
|
145
|
+
- MLOps teams building CI quality gates and reviewable artifacts.
|
|
146
|
+
- Researchers validating compression/edit methods with reproducible, paired eval.
|
|
147
|
+
|
|
148
|
+
## How it works
|
|
149
|
+
|
|
150
|
+
```text
|
|
151
|
+
┌───────────────────────┐ ┌────────────────────────────────────────────┐
|
|
152
|
+
│ Baseline (checkpoint) │────►│ │
|
|
153
|
+
└───────────────────────┘ │ invarlock evaluate │
|
|
154
|
+
│ ├─► Paired windows (deterministic) │
|
|
155
|
+
┌───────────────────────┐ │ ├─► GuardChain pipeline │
|
|
156
|
+
│ Subject (checkpoint) │────►│ │ └─► invariants → spectral → RMT → VE │
|
|
157
|
+
└───────────────────────┘ │ └─► Emit: evaluation.report.json │
|
|
158
|
+
│ │
|
|
159
|
+
└────────────────────────────────────────────┘
|
|
160
|
+
│
|
|
161
|
+
┌───────────────┴───────────────┐
|
|
162
|
+
▼ ▼
|
|
163
|
+
✅ PASS ❌ FAIL
|
|
164
|
+
(ship) (rollback)
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Quick start
|
|
169
|
+
|
|
170
|
+
Colab (CPU-friendly):
|
|
171
|
+
[](https://colab.research.google.com/github/invarlock/invarlock/blob/main/notebooks/invarlock_quickstart_cpu.ipynb)
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
# HF adapter stack (torch/transformers)
|
|
175
|
+
pip install "invarlock[hf]"
|
|
176
|
+
|
|
177
|
+
# Version + report schema (when available)
|
|
178
|
+
invarlock --version
|
|
179
|
+
|
|
180
|
+
# Compare baseline vs subject (downloads require explicit network enable)
|
|
181
|
+
INVARLOCK_ALLOW_NETWORK=1 invarlock evaluate \
|
|
182
|
+
--baseline gpt2 \
|
|
183
|
+
--subject gpt2 \
|
|
184
|
+
--adapter auto \
|
|
185
|
+
--profile dev \
|
|
186
|
+
--quiet
|
|
187
|
+
|
|
188
|
+
# Validate the evaluation report
|
|
189
|
+
invarlock verify reports/eval/evaluation.report.json
|
|
190
|
+
|
|
191
|
+
# Render HTML for sharing
|
|
192
|
+
invarlock report html -i reports/eval/evaluation.report.json -o reports/eval/evaluation.html
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Example output (abridged; counts vary by profile/config):
|
|
196
|
+
|
|
197
|
+
```text
|
|
198
|
+
INVARLOCK v<version> · EVALUATE
|
|
199
|
+
Baseline: gpt2 -> Subject: gpt2 · Profile: dev
|
|
200
|
+
Status: PASS · Gates: <passed>/<total> passed
|
|
201
|
+
Primary metric ratio: <ratio>
|
|
202
|
+
Output: reports/eval/evaluation.report.json
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Proof packs (portable evidence bundles)
|
|
206
|
+
|
|
207
|
+
Proof packs bundle reports + verification metadata into a distributable artifact.
|
|
208
|
+
|
|
209
|
+
- Guide: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/proof-packs.md>
|
|
210
|
+
- Verify: `scripts/proof_packs/verify_pack.sh --pack <dir> --strict` (or `PACK_STRICT_MODE=1 ...`)
|
|
211
|
+
|
|
212
|
+
Note: `configs/` and `scripts/` are repo resources and are not shipped in wheels; clone the repo to use
|
|
213
|
+
presets and proof-pack helpers.
|
|
214
|
+
|
|
215
|
+
## Installation
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Minimal CLI (no torch/transformers)
|
|
219
|
+
pip install invarlock
|
|
220
|
+
|
|
221
|
+
# HF workflows (torch/transformers)
|
|
222
|
+
pip install "invarlock[hf]"
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Optional extras: `invarlock[gpu]`, `invarlock[awq,gptq]`. Full setup: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/getting-started.md>.
|
|
226
|
+
|
|
227
|
+
## Documentation
|
|
228
|
+
|
|
229
|
+
- Quickstart: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/quickstart.md>
|
|
230
|
+
- Compare & evaluate (BYOE): <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/compare-and-evaluate.md>
|
|
231
|
+
- Reading a report: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/reading-report.md>
|
|
232
|
+
- CLI reference: <https://github.com/invarlock/invarlock/blob/main/docs/reference/cli.md>
|
|
233
|
+
- Assurance case: <https://github.com/invarlock/invarlock/blob/main/docs/assurance/00-safety-case.md>
|
|
234
|
+
- Threat model: <https://github.com/invarlock/invarlock/blob/main/docs/security/threat-model.md>
|
|
235
|
+
|
|
236
|
+
## Community
|
|
237
|
+
|
|
238
|
+
- Questions/ideas: <https://github.com/invarlock/invarlock/discussions>
|
|
239
|
+
- Bug reports: <https://github.com/invarlock/invarlock/issues>
|
|
240
|
+
- Contact: <mailto:support@invarlock.dev>
|
|
241
|
+
|
|
242
|
+
## Citation
|
|
243
|
+
|
|
244
|
+
If you use InvarLock in scientific work, please cite it (canonical metadata is in `CITATION.cff`):
|
|
245
|
+
|
|
246
|
+
```bibtex
|
|
247
|
+
@software{invarlock,
|
|
248
|
+
title = {InvarLock: Edit-agnostic robustness evaluation reports for weight edits},
|
|
249
|
+
author = {{InvarLock Maintainers}},
|
|
250
|
+
url = {https://github.com/invarlock/invarlock},
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Limitations
|
|
255
|
+
|
|
256
|
+
- InvarLock evaluates an edited model relative to a baseline under a specific configuration; results are not “global” guarantees.
|
|
257
|
+
- Not a content-safety/alignment tool.
|
|
258
|
+
- Native Windows is not supported (use WSL2 or Linux).
|
|
259
|
+
|
|
260
|
+
## Support matrix
|
|
261
|
+
|
|
262
|
+
<!-- markdownlint-disable MD060 -->
|
|
263
|
+
| Platform | Status | Notes |
|
|
264
|
+
| ---------------------- | --------------- | ----------------------------------------- |
|
|
265
|
+
| Python 3.12+ | ✅ Required | |
|
|
266
|
+
| Linux | ✅ Full | Primary dev target |
|
|
267
|
+
| macOS (Intel/M-series) | ✅ Full | MPS supported (default on Apple Silicon) |
|
|
268
|
+
| Windows | ❌ Not supported | Use WSL2 or a Linux container if required |
|
|
269
|
+
| CUDA | ✅ Recommended | For larger models |
|
|
270
|
+
| CPU | ✅ Fallback | Slower but functional |
|
|
271
|
+
<!-- markdownlint-enable MD060 -->
|
|
272
|
+
|
|
273
|
+
## Contributing
|
|
274
|
+
|
|
275
|
+
- Contributing guide: <https://github.com/invarlock/invarlock/blob/main/CONTRIBUTING.md>
|
|
276
|
+
- Fast local checks (repo clone):
|
|
277
|
+
- `make dev-install`
|
|
278
|
+
- `make test`
|
|
279
|
+
- `make lint`
|
|
280
|
+
|
|
281
|
+
## License
|
|
282
|
+
|
|
283
|
+
Apache-2.0 — see `LICENSE`.
|