invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -11,16 +11,16 @@ except Exception: # pragma: no cover
11
11
  jsonschema = None
12
12
 
13
13
 
14
- # Certificate schema version (PM-first canonical)
15
- CERTIFICATE_SCHEMA_VERSION = "v1"
14
+ # Evaluation report schema version (PM-first canonical)
15
+ REPORT_SCHEMA_VERSION = "v1"
16
16
 
17
17
 
18
- # Minimal JSON Schema describing the canonical shape of a certificate.
18
+ # Minimal JSON Schema describing the canonical shape of an evaluation report.
19
19
  # This focuses on structural validity; numerical thresholds are validated
20
20
  # separately in metric-specific logic.
21
- CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
21
+ REPORT_JSON_SCHEMA: dict[str, Any] = {
22
22
  "$schema": "https://json-schema.org/draft/2020-12/schema",
23
- "title": "InvarLock Safety Certificate",
23
+ "title": "InvarLock Evaluation Report",
24
24
  "type": "object",
25
25
  "required": [
26
26
  "schema_version",
@@ -32,7 +32,7 @@ CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
32
32
  "primary_metric",
33
33
  ],
34
34
  "properties": {
35
- "schema_version": {"const": CERTIFICATE_SCHEMA_VERSION},
35
+ "schema_version": {"const": REPORT_SCHEMA_VERSION},
36
36
  "run_id": {"type": "string", "minLength": 4},
37
37
  "edit_name": {"type": "string"},
38
38
  "policy_digest": {
@@ -179,21 +179,21 @@ def _load_validation_allowlist() -> set[str]:
179
179
  return set(_VALIDATION_ALLOWLIST_DEFAULT)
180
180
 
181
181
 
182
- def _validate_with_jsonschema(certificate: dict[str, Any]) -> bool:
183
- """Validate certificate with JSON Schema when available."""
182
+ def _validate_with_jsonschema(report: dict[str, Any]) -> bool:
183
+ """Validate evaluation report with JSON Schema when available."""
184
184
  if jsonschema is None:
185
185
  return True # Schema library unavailable; fall back to minimal checks
186
186
  try:
187
- jsonschema.validate(instance=certificate, schema=CERTIFICATE_JSON_SCHEMA)
187
+ jsonschema.validate(instance=report, schema=REPORT_JSON_SCHEMA)
188
188
  return True
189
189
  except Exception:
190
190
  return False
191
191
 
192
192
 
193
- def validate_certificate(certificate: dict[str, Any]) -> bool:
194
- """Validate certificate structure and essential flags."""
193
+ def validate_report(report: dict[str, Any]) -> bool:
194
+ """Validate evaluation report structure and essential flags."""
195
195
  try:
196
- if certificate.get("schema_version") != CERTIFICATE_SCHEMA_VERSION:
196
+ if report.get("schema_version") != REPORT_SCHEMA_VERSION:
197
197
  return False
198
198
 
199
199
  # Prefer JSON Schema structural validation; if unavailable or too strict,
@@ -202,20 +202,20 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
202
202
  # disallow unknown validation keys at schema level.
203
203
  try:
204
204
  vkeys = _load_validation_allowlist()
205
- if isinstance(CERTIFICATE_JSON_SCHEMA.get("properties"), dict):
206
- vspec = CERTIFICATE_JSON_SCHEMA["properties"].get("validation")
205
+ if isinstance(REPORT_JSON_SCHEMA.get("properties"), dict):
206
+ vspec = REPORT_JSON_SCHEMA["properties"].get("validation")
207
207
  if isinstance(vspec, dict):
208
208
  vspec["properties"] = {k: {"type": "boolean"} for k in vkeys}
209
209
  vspec["additionalProperties"] = False
210
210
  except Exception:
211
211
  pass
212
212
 
213
- if not _validate_with_jsonschema(certificate):
213
+ if not _validate_with_jsonschema(report):
214
214
  # Minimal fallback: require schema version + run_id + primary_metric
215
- run_id_ok = isinstance(certificate.get("run_id"), str) and bool(
216
- certificate.get("run_id")
215
+ run_id_ok = isinstance(report.get("run_id"), str) and bool(
216
+ report.get("run_id")
217
217
  )
218
- pm = certificate.get("primary_metric")
218
+ pm = report.get("primary_metric")
219
219
  pm_ok = isinstance(pm, dict) and (
220
220
  isinstance(pm.get("final"), int | float)
221
221
  or (isinstance(pm.get("kind"), str) and bool(pm.get("kind")))
@@ -223,7 +223,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
223
223
  if not (run_id_ok and pm_ok):
224
224
  return False
225
225
 
226
- validation = certificate.get("validation", {})
226
+ validation = report.get("validation", {})
227
227
  for flag in [
228
228
  "preview_final_drift_acceptable",
229
229
  "primary_metric_acceptable",
@@ -242,7 +242,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
242
242
 
243
243
 
244
244
  __all__ = [
245
- "CERTIFICATE_SCHEMA_VERSION",
246
- "CERTIFICATE_JSON_SCHEMA",
247
- "validate_certificate",
245
+ "REPORT_SCHEMA_VERSION",
246
+ "REPORT_JSON_SCHEMA",
247
+ "validate_report",
248
248
  ]
@@ -34,7 +34,7 @@ class MetaData(TypedDict):
34
34
  """Metadata about the model and execution environment."""
35
35
 
36
36
  model_id: str # Model identifier (e.g., "gpt2", "path/to/model")
37
- adapter: str # Adapter name (e.g., "hf_gpt2")
37
+ adapter: str # Adapter name (e.g., "hf_causal")
38
38
  commit: str # Git commit SHA
39
39
  seed: int # Random seed used for evaluation
40
40
  device: str # Device used ("cpu", "cuda", "mps")
@@ -107,6 +107,11 @@ class EvalMetrics(TypedDict, total=False):
107
107
  # Optional aux fields retained for guard telemetry and debug
108
108
  latency_ms_per_tok: float # Average latency per token in milliseconds
109
109
  memory_mb_peak: float # Peak memory usage in MB
110
+ gpu_memory_mb_peak: float # Peak GPU memory usage in MB
111
+ gpu_memory_reserved_mb_peak: float # Peak GPU reserved memory in MB
112
+ timings: dict[str, float] # Phase timing breakdown (seconds)
113
+ guard_timings: dict[str, float] # Per-guard timings (seconds)
114
+ memory_snapshots: list[dict[str, Any]] # Phase memory snapshots
110
115
  spectral: dict[str, Any] # Spectral norm summaries
111
116
  rmt: dict[str, Any] # RMT statistics
112
117
  invariants: dict[str, Any] # Model invariant check results
@@ -0,0 +1,86 @@
1
+ """
2
+ Telemetry report utilities.
3
+
4
+ Produces a compact JSON summary for performance analysis.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+
15
+ def build_telemetry_payload(report: dict[str, Any]) -> dict[str, Any]:
16
+ """Build a structured telemetry payload from a run report."""
17
+ meta_in = report.get("meta", {}) if isinstance(report, dict) else {}
18
+ metrics_in = report.get("metrics", {}) if isinstance(report, dict) else {}
19
+
20
+ payload: dict[str, Any] = {"generated_at": datetime.now().isoformat()}
21
+
22
+ if isinstance(meta_in, dict):
23
+ payload["meta"] = {
24
+ "model_id": meta_in.get("model_id"),
25
+ "adapter": meta_in.get("adapter"),
26
+ "device": meta_in.get("device"),
27
+ "run_id": meta_in.get("run_id"),
28
+ "profile": meta_in.get("profile"),
29
+ }
30
+
31
+ if isinstance(metrics_in, dict):
32
+ timings = metrics_in.get("timings")
33
+ if isinstance(timings, dict):
34
+ payload["timings"] = timings
35
+
36
+ guard_timings = metrics_in.get("guard_timings")
37
+ if isinstance(guard_timings, dict):
38
+ payload["guard_timings"] = guard_timings
39
+
40
+ memory_snapshots = metrics_in.get("memory_snapshots")
41
+ if isinstance(memory_snapshots, list):
42
+ payload["memory_snapshots"] = memory_snapshots
43
+
44
+ memory_summary: dict[str, Any] = {}
45
+ for key in (
46
+ "memory_mb_peak",
47
+ "gpu_memory_mb_peak",
48
+ "gpu_memory_reserved_mb_peak",
49
+ ):
50
+ value = metrics_in.get(key)
51
+ if isinstance(value, int | float):
52
+ memory_summary[key] = float(value)
53
+ if memory_summary:
54
+ payload["memory"] = memory_summary
55
+
56
+ perf_metrics: dict[str, Any] = {}
57
+ for key in (
58
+ "latency_ms_per_tok",
59
+ "throughput_tok_per_s",
60
+ "eval_samples",
61
+ "total_tokens",
62
+ ):
63
+ value = metrics_in.get(key)
64
+ if isinstance(value, int | float):
65
+ perf_metrics[key] = float(value)
66
+ if perf_metrics:
67
+ payload["performance"] = perf_metrics
68
+
69
+ return payload
70
+
71
+
72
+ def save_telemetry_report(
73
+ report: dict[str, Any],
74
+ output_dir: Path,
75
+ *,
76
+ filename: str = "telemetry.json",
77
+ ) -> Path:
78
+ """Write telemetry JSON payload to the output directory."""
79
+ payload = build_telemetry_payload(report)
80
+ output_dir.mkdir(parents=True, exist_ok=True)
81
+ path = output_dir / filename
82
+ path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
83
+ return path
84
+
85
+
86
+ __all__ = ["build_telemetry_payload", "save_telemetry_report"]
@@ -0,0 +1,283 @@
1
+ Metadata-Version: 2.4
2
+ Name: invarlock
3
+ Version: 0.3.8
4
+ Summary: Edit‑agnostic robustness evaluation reports for weight edits (InvarLock framework)
5
+ Author-email: InvarLock Team <oss@invarlock.dev>
6
+ Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
7
+ License-Expression: Apache-2.0
8
+ Project-URL: Homepage, https://github.com/invarlock/invarlock
9
+ Project-URL: Repository, https://github.com/invarlock/invarlock
10
+ Project-URL: Documentation, https://github.com/invarlock/invarlock/tree/main/docs
11
+ Project-URL: Issues, https://github.com/invarlock/invarlock/issues
12
+ Project-URL: Changelog, https://github.com/invarlock/invarlock/blob/main/CHANGELOG.md
13
+ Keywords: machine-learning,deep-learning,transformers,pytorch,llm,quantization,safety,evaluation,certification
14
+ Classifier: Development Status :: 4 - Beta
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Operating System :: OS Independent
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.12
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: typer>=0.15
27
+ Requires-Dist: click>=8.1
28
+ Requires-Dist: shellingham>=1.5.0
29
+ Requires-Dist: pandas>=2.2
30
+ Requires-Dist: scikit-learn>=1.4
31
+ Requires-Dist: pydantic>=2.0
32
+ Requires-Dist: rich>=13.0
33
+ Requires-Dist: pyyaml>=6.0
34
+ Requires-Dist: markdown>=3.5
35
+ Requires-Dist: psutil>=5.9
36
+ Requires-Dist: hypothesis>=6.98
37
+ Requires-Dist: typing_extensions>=4.7
38
+ Requires-Dist: jsonschema>=4.0
39
+ Provides-Extra: adapters
40
+ Requires-Dist: torch>=2.1.0; extra == "adapters"
41
+ Requires-Dist: transformers>=5.0.0; extra == "adapters"
42
+ Provides-Extra: hf
43
+ Requires-Dist: torch>=2.1.0; extra == "hf"
44
+ Requires-Dist: transformers>=5.0.0; extra == "hf"
45
+ Requires-Dist: datasets>=3.0; extra == "hf"
46
+ Requires-Dist: numpy>=1.24; extra == "hf"
47
+ Requires-Dist: huggingface_hub>=1.0.0; extra == "hf"
48
+ Requires-Dist: aiohttp>=3.12.14; extra == "hf"
49
+ Requires-Dist: h2>=4.3.0; extra == "hf"
50
+ Requires-Dist: pillow>=11.3.0; extra == "hf"
51
+ Provides-Extra: guards
52
+ Requires-Dist: torch>=2.1.0; extra == "guards"
53
+ Requires-Dist: numpy>=1.24; extra == "guards"
54
+ Provides-Extra: edits
55
+ Requires-Dist: torch>=2.1.0; extra == "edits"
56
+ Provides-Extra: eval
57
+ Requires-Dist: torch>=2.1.0; extra == "eval"
58
+ Requires-Dist: datasets>=3.0; extra == "eval"
59
+ Provides-Extra: gptq
60
+ Requires-Dist: torch>=2.1.0; extra == "gptq"
61
+ Requires-Dist: auto-gptq>=0.7.0; platform_system == "Linux" and extra == "gptq"
62
+ Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "gptq"
63
+ Requires-Dist: transformers>=5.0.0; extra == "gptq"
64
+ Provides-Extra: awq
65
+ Requires-Dist: torch>=2.1.0; extra == "awq"
66
+ Requires-Dist: autoawq>=0.2.0; platform_system == "Linux" and extra == "awq"
67
+ Requires-Dist: transformers>=5.0.0; extra == "awq"
68
+ Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "awq"
69
+ Provides-Extra: gpu
70
+ Requires-Dist: torch>=2.1.0; extra == "gpu"
71
+ Requires-Dist: accelerate>=0.27; extra == "gpu"
72
+ Requires-Dist: bitsandbytes>=0.41; platform_system == "Linux" and extra == "gpu"
73
+ Provides-Extra: all
74
+ Requires-Dist: torch>=2.1.0; extra == "all"
75
+ Requires-Dist: transformers>=5.0.0; extra == "all"
76
+ Requires-Dist: datasets>=3.0; extra == "all"
77
+ Requires-Dist: numpy>=1.24; extra == "all"
78
+ Requires-Dist: huggingface_hub>=1.0.0; extra == "all"
79
+ Requires-Dist: accelerate>=0.27; extra == "all"
80
+ Requires-Dist: bitsandbytes>=0.41; platform_system == "Linux" and extra == "all"
81
+ Requires-Dist: auto-gptq>=0.7.0; platform_system == "Linux" and extra == "all"
82
+ Requires-Dist: autoawq>=0.2.0; platform_system == "Linux" and extra == "all"
83
+ Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "all"
84
+ Requires-Dist: aiohttp>=3.12.14; extra == "all"
85
+ Requires-Dist: h2>=4.3.0; extra == "all"
86
+ Requires-Dist: pillow>=11.3.0; extra == "all"
87
+ Provides-Extra: onnx
88
+ Requires-Dist: optimum>=1.17.0; extra == "onnx"
89
+ Requires-Dist: onnxruntime>=1.17.0; extra == "onnx"
90
+ Provides-Extra: dev
91
+ Requires-Dist: pytest>=7.0; extra == "dev"
92
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
93
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
94
+ Requires-Dist: black>=23.0; extra == "dev"
95
+ Requires-Dist: mypy>=1.0; extra == "dev"
96
+ Requires-Dist: hypothesis>=6.98; extra == "dev"
97
+ Requires-Dist: pre-commit>=3.0; extra == "dev"
98
+ Requires-Dist: mkdocs>=1.5; extra == "dev"
99
+ Requires-Dist: mkdocs-material>=9.5; extra == "dev"
100
+ Requires-Dist: mkdocs-mermaid2-plugin>=1.1; extra == "dev"
101
+ Requires-Dist: sphinx>=7.0; extra == "dev"
102
+ Requires-Dist: matplotlib>=3.7; extra == "dev"
103
+ Requires-Dist: bitsandbytes>=0.41; extra == "dev"
104
+ Requires-Dist: build>=0.10.0; extra == "dev"
105
+ Requires-Dist: twine>=4.0.0; extra == "dev"
106
+ Dynamic: license-file
107
+
108
+ <p align="center">
109
+ <img
110
+ src="docs/assets/invarlock-logo.svg"
111
+ alt="InvarLock"
112
+ width="420"
113
+ />
114
+ </p>
115
+
116
+ # InvarLock — Edit‑agnostic robustness reports for weight edits
117
+
118
+ [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
119
+ [![PyPI](https://badge.fury.io/py/invarlock.svg)](https://pypi.org/project/invarlock/)
120
+ [![Docs](https://img.shields.io/badge/docs-quickstart-blue.svg)](https://github.com/invarlock/invarlock/blob/main/docs/user-guide/quickstart.md)
121
+ [![License: Apache-2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
122
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/release/python-3120/)
123
+
124
+ Quantizing, pruning, or otherwise editing a model’s weights can silently degrade quality.
125
+ InvarLock compares an edited **subject** checkpoint against a fixed **baseline** with paired
126
+ evaluation windows, enforces a guard pipeline (invariants → spectral → RMT → variance), and
127
+ produces a machine‑readable Evaluation Report you can gate in CI.
128
+
129
+ > Status: pre‑1.0. Until 1.0, minor releases may be breaking. See `CHANGELOG.md`.
130
+
131
+ For guidance on where to ask questions, how to report bugs, and what to expect in terms of response times, see
132
+ [SUPPORT.md](https://github.com/invarlock/invarlock/blob/main/SUPPORT.md).
133
+
134
+ ## Why InvarLock?
135
+
136
+ - **Quality gates for weight edits**: catch regressions before deployment.
137
+ - **Statistical guarantees**: paired primary metrics with confidence intervals.
138
+ - **Auditable evidence**: deterministic pairing metadata + policy digests in `evaluation.report.json`.
139
+ - **CI/CD-friendly**: stable exit codes, `--json` outputs, and portable “proof packs”.
140
+ - **Offline-first**: network is disabled by default; enable downloads per command.
141
+
142
+ ## Who is this for?
143
+
144
+ - ML engineers shipping quantized/pruned checkpoints.
145
+ - MLOps teams building CI quality gates and reviewable artifacts.
146
+ - Researchers validating compression/edit methods with reproducible, paired eval.
147
+
148
+ ## How it works
149
+
150
+ ```text
151
+ ┌───────────────────────┐ ┌────────────────────────────────────────────┐
152
+ │ Baseline (checkpoint) │────►│ │
153
+ └───────────────────────┘ │ invarlock evaluate │
154
+ │ ├─► Paired windows (deterministic) │
155
+ ┌───────────────────────┐ │ ├─► GuardChain pipeline │
156
+ │ Subject (checkpoint) │────►│ │ └─► invariants → spectral → RMT → VE │
157
+ └───────────────────────┘ │ └─► Emit: evaluation.report.json │
158
+ │ │
159
+ └────────────────────────────────────────────┘
160
+
161
+ ┌───────────────┴───────────────┐
162
+ ▼ ▼
163
+ ✅ PASS ❌ FAIL
164
+ (ship) (rollback)
165
+
166
+ ```
167
+
168
+ ## Quick start
169
+
170
+ Colab (CPU-friendly):
171
+ [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/invarlock/invarlock/blob/main/notebooks/invarlock_quickstart_cpu.ipynb)
172
+
173
+ ```bash
174
+ # HF adapter stack (torch/transformers)
175
+ pip install "invarlock[hf]"
176
+
177
+ # Version + report schema (when available)
178
+ invarlock --version
179
+
180
+ # Compare baseline vs subject (downloads require explicit network enable)
181
+ INVARLOCK_ALLOW_NETWORK=1 invarlock evaluate \
182
+ --baseline gpt2 \
183
+ --subject gpt2 \
184
+ --adapter auto \
185
+ --profile dev \
186
+ --quiet
187
+
188
+ # Validate the evaluation report
189
+ invarlock verify reports/eval/evaluation.report.json
190
+
191
+ # Render HTML for sharing
192
+ invarlock report html -i reports/eval/evaluation.report.json -o reports/eval/evaluation.html
193
+ ```
194
+
195
+ Example output (abridged; counts vary by profile/config):
196
+
197
+ ```text
198
+ INVARLOCK v<version> · EVALUATE
199
+ Baseline: gpt2 -> Subject: gpt2 · Profile: dev
200
+ Status: PASS · Gates: <passed>/<total> passed
201
+ Primary metric ratio: <ratio>
202
+ Output: reports/eval/evaluation.report.json
203
+ ```
204
+
205
+ ## Proof packs (portable evidence bundles)
206
+
207
+ Proof packs bundle reports + verification metadata into a distributable artifact.
208
+
209
+ - Guide: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/proof-packs.md>
210
+ - Verify: `scripts/proof_packs/verify_pack.sh --pack <dir> --strict` (or `PACK_STRICT_MODE=1 ...`)
211
+
212
+ Note: `configs/` and `scripts/` are repo resources and are not shipped in wheels; clone the repo to use
213
+ presets and proof-pack helpers.
214
+
215
+ ## Installation
216
+
217
+ ```bash
218
+ # Minimal CLI (no torch/transformers)
219
+ pip install invarlock
220
+
221
+ # HF workflows (torch/transformers)
222
+ pip install "invarlock[hf]"
223
+ ```
224
+
225
+ Optional extras: `invarlock[gpu]`, `invarlock[awq,gptq]`. Full setup: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/getting-started.md>.
226
+
227
+ ## Documentation
228
+
229
+ - Quickstart: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/quickstart.md>
230
+ - Compare & evaluate (BYOE): <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/compare-and-evaluate.md>
231
+ - Reading a report: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/reading-report.md>
232
+ - CLI reference: <https://github.com/invarlock/invarlock/blob/main/docs/reference/cli.md>
233
+ - Assurance case: <https://github.com/invarlock/invarlock/blob/main/docs/assurance/00-safety-case.md>
234
+ - Threat model: <https://github.com/invarlock/invarlock/blob/main/docs/security/threat-model.md>
235
+
236
+ ## Community
237
+
238
+ - Questions/ideas: <https://github.com/invarlock/invarlock/discussions>
239
+ - Bug reports: <https://github.com/invarlock/invarlock/issues>
240
+ - Contact: <mailto:support@invarlock.dev>
241
+
242
+ ## Citation
243
+
244
+ If you use InvarLock in scientific work, please cite it (canonical metadata is in `CITATION.cff`):
245
+
246
+ ```bibtex
247
+ @software{invarlock,
248
+ title = {InvarLock: Edit-agnostic robustness evaluation reports for weight edits},
249
+ author = {{InvarLock Maintainers}},
250
+ url = {https://github.com/invarlock/invarlock},
251
+ }
252
+ ```
253
+
254
+ ## Limitations
255
+
256
+ - InvarLock evaluates an edited model relative to a baseline under a specific configuration; results are not “global” guarantees.
257
+ - Not a content-safety/alignment tool.
258
+ - Native Windows is not supported (use WSL2 or Linux).
259
+
260
+ ## Support matrix
261
+
262
+ <!-- markdownlint-disable MD060 -->
263
+ | Platform | Status | Notes |
264
+ | ---------------------- | --------------- | ----------------------------------------- |
265
+ | Python 3.12+ | ✅ Required | |
266
+ | Linux | ✅ Full | Primary dev target |
267
+ | macOS (Intel/M-series) | ✅ Full | MPS supported (default on Apple Silicon) |
268
+ | Windows | ❌ Not supported | Use WSL2 or a Linux container if required |
269
+ | CUDA | ✅ Recommended | For larger models |
270
+ | CPU | ✅ Fallback | Slower but functional |
271
+ <!-- markdownlint-enable MD060 -->
272
+
273
+ ## Contributing
274
+
275
+ - Contributing guide: <https://github.com/invarlock/invarlock/blob/main/CONTRIBUTING.md>
276
+ - Fast local checks (repo clone):
277
+ - `make dev-install`
278
+ - `make test`
279
+ - `make lint`
280
+
281
+ ## License
282
+
283
+ Apache-2.0 — see `LICENSE`.