invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -1,588 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: invarlock
3
- Version: 0.3.6
4
- Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
5
- Author-email: InvarLock Team <oss@invarlock.dev>
6
- Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
7
- License-Expression: Apache-2.0
8
- Project-URL: Homepage, https://github.com/invarlock/invarlock
9
- Project-URL: Repository, https://github.com/invarlock/invarlock
10
- Project-URL: Documentation, https://github.com/invarlock/invarlock/tree/main/docs
11
- Project-URL: Issues, https://github.com/invarlock/invarlock/issues
12
- Project-URL: Changelog, https://github.com/invarlock/invarlock/blob/main/CHANGELOG.md
13
- Keywords: machine-learning,deep-learning,transformers,pytorch,llm,quantization,safety,evaluation,certification
14
- Classifier: Development Status :: 4 - Beta
15
- Classifier: Intended Audience :: Developers
16
- Classifier: Intended Audience :: Science/Research
17
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
- Classifier: Programming Language :: Python :: 3
19
- Classifier: Programming Language :: Python :: 3.12
20
- Classifier: Programming Language :: Python :: 3.13
21
- Classifier: Operating System :: OS Independent
22
- Classifier: Typing :: Typed
23
- Requires-Python: >=3.12
24
- Description-Content-Type: text/markdown
25
- License-File: LICENSE
26
- Requires-Dist: typer>=0.15
27
- Requires-Dist: click>=8.1
28
- Requires-Dist: shellingham>=1.5.0
29
- Requires-Dist: pandas>=2.2
30
- Requires-Dist: scikit-learn>=1.4
31
- Requires-Dist: pydantic>=2.0
32
- Requires-Dist: rich>=13.0
33
- Requires-Dist: pyyaml>=6.0
34
- Requires-Dist: psutil>=5.9
35
- Requires-Dist: hypothesis>=6.98
36
- Requires-Dist: typing_extensions>=4.7
37
- Requires-Dist: jsonschema>=4.0
38
- Provides-Extra: adapters
39
- Requires-Dist: torch>=2.1.0; extra == "adapters"
40
- Requires-Dist: transformers>=4.53.0; extra == "adapters"
41
- Provides-Extra: hf
42
- Requires-Dist: torch>=2.1.0; extra == "hf"
43
- Requires-Dist: transformers>=4.53.0; extra == "hf"
44
- Requires-Dist: datasets>=3.0; extra == "hf"
45
- Requires-Dist: numpy>=1.24; extra == "hf"
46
- Requires-Dist: huggingface_hub>=0.23; extra == "hf"
47
- Requires-Dist: aiohttp>=3.12.14; extra == "hf"
48
- Requires-Dist: h2>=4.3.0; extra == "hf"
49
- Requires-Dist: pillow>=11.3.0; extra == "hf"
50
- Provides-Extra: guards
51
- Requires-Dist: torch>=2.1.0; extra == "guards"
52
- Requires-Dist: numpy>=1.24; extra == "guards"
53
- Provides-Extra: edits
54
- Requires-Dist: torch>=2.1.0; extra == "edits"
55
- Provides-Extra: eval
56
- Requires-Dist: torch>=2.1.0; extra == "eval"
57
- Requires-Dist: datasets>=3.0; extra == "eval"
58
- Provides-Extra: gptq
59
- Requires-Dist: torch>=2.1.0; extra == "gptq"
60
- Requires-Dist: auto-gptq>=0.7.0; platform_system == "Linux" and extra == "gptq"
61
- Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "gptq"
62
- Requires-Dist: transformers>=4.53.0; extra == "gptq"
63
- Provides-Extra: awq
64
- Requires-Dist: torch>=2.1.0; extra == "awq"
65
- Requires-Dist: autoawq>=0.2.0; platform_system == "Linux" and extra == "awq"
66
- Requires-Dist: transformers>=4.53.0; extra == "awq"
67
- Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "awq"
68
- Provides-Extra: gpu
69
- Requires-Dist: torch>=2.1.0; extra == "gpu"
70
- Requires-Dist: accelerate>=0.27; extra == "gpu"
71
- Requires-Dist: bitsandbytes>=0.41; platform_system == "Linux" and extra == "gpu"
72
- Provides-Extra: all
73
- Requires-Dist: torch>=2.1.0; extra == "all"
74
- Requires-Dist: transformers>=4.53.0; extra == "all"
75
- Requires-Dist: datasets>=3.0; extra == "all"
76
- Requires-Dist: numpy>=1.24; extra == "all"
77
- Requires-Dist: huggingface_hub>=0.23; extra == "all"
78
- Requires-Dist: accelerate>=0.27; extra == "all"
79
- Requires-Dist: bitsandbytes>=0.41; platform_system == "Linux" and extra == "all"
80
- Requires-Dist: auto-gptq>=0.7.0; platform_system == "Linux" and extra == "all"
81
- Requires-Dist: autoawq>=0.2.0; platform_system == "Linux" and extra == "all"
82
- Requires-Dist: triton>=2.3.0; platform_system == "Linux" and extra == "all"
83
- Requires-Dist: aiohttp>=3.12.14; extra == "all"
84
- Requires-Dist: h2>=4.3.0; extra == "all"
85
- Requires-Dist: pillow>=11.3.0; extra == "all"
86
- Provides-Extra: onnx
87
- Requires-Dist: optimum>=1.17.0; extra == "onnx"
88
- Requires-Dist: onnxruntime>=1.17.0; extra == "onnx"
89
- Provides-Extra: dev
90
- Requires-Dist: pytest>=7.0; extra == "dev"
91
- Requires-Dist: pytest-cov>=4.0; extra == "dev"
92
- Requires-Dist: ruff>=0.1.0; extra == "dev"
93
- Requires-Dist: black>=23.0; extra == "dev"
94
- Requires-Dist: mypy>=1.0; extra == "dev"
95
- Requires-Dist: hypothesis>=6.98; extra == "dev"
96
- Requires-Dist: pre-commit>=3.0; extra == "dev"
97
- Requires-Dist: mkdocs>=1.5; extra == "dev"
98
- Requires-Dist: mkdocs-material>=9.5; extra == "dev"
99
- Requires-Dist: mkdocs-mermaid2-plugin>=1.1; extra == "dev"
100
- Requires-Dist: sphinx>=7.0; extra == "dev"
101
- Requires-Dist: matplotlib>=3.7; extra == "dev"
102
- Requires-Dist: bitsandbytes>=0.41; extra == "dev"
103
- Requires-Dist: build>=0.10.0; extra == "dev"
104
- Requires-Dist: twine>=4.0.0; extra == "dev"
105
- Dynamic: license-file
106
-
107
- # InvarLock — Edit‑agnostic robustness certificates for weight edits
108
-
109
- In short: certify that weight edits (e.g., quantization) preserve quality. If
110
- they don’t, roll back safely.
111
-
112
- Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
113
- variance) producing a machine‑readable Safety Certificate.
114
-
115
- > **Status:** 0.3.6 (pre‑1.0). Until 1.0, **minor** releases may be
116
- > breaking. See CLI help and the CHANGELOG for updates.
117
-
118
- [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
119
- [![PyPI](https://badge.fury.io/py/invarlock.svg)](https://pypi.org/project/invarlock/)
120
- [![Docs](https://img.shields.io/badge/docs-quickstart-blue.svg)](https://github.com/invarlock/invarlock/blob/main/docs/user-guide/quickstart.md)
121
- [![License: Apache-2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
122
- [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/release/python-3120/)
123
- ---
124
-
125
- For guidance on where to ask questions, how to report bugs, and what to expect in terms of response times, see
126
- [SUPPORT.md](https://github.com/invarlock/invarlock/blob/main/SUPPORT.md).
127
-
128
- ## 🚀 Quick start (no repo clone)
129
-
130
- [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/invarlock/invarlock/blob/main/notebooks/invarlock_quickstart_cpu.ipynb)
131
-
132
- ```bash
133
- # Install with HF adapters
134
- pip install "invarlock[hf]"
135
-
136
- # Fast dev self‑cert on GPT‑2 small (tiny‑relax; downloads require explicit network)
137
- INVARLOCK_ALLOW_NETWORK=1 INVARLOCK_DEDUP_TEXTS=1 INVARLOCK_TINY_RELAX=1 \
138
- invarlock certify \
139
- --baseline gpt2 \
140
- --subject gpt2 \
141
- --adapter auto \
142
- --profile dev
143
- ```
144
-
145
- This produces `reports/.../evaluation.cert.json` with paired metrics
146
- (ppl/accuracy), structural deltas, spectral/RMT stats, variance‑estimator
147
- provenance, seeds/hashes, pairing metrics, and a policy digest.
148
-
149
- > **Calibration note:** tier thresholds and window sizes are piloted on GPT‑2 small,
150
- > BERT base, and TinyLLaMA (see `docs/assurance/09-tier-v1-calibration.md`). For
151
- > calibrated Balanced/Conservative certs, use the preset‑based CI/Release examples
152
- > below. `INVARLOCK_TINY_RELAX` dev runs relax sample‑size floors and are intended
153
- > only for small smoke tests (not release evidence).
154
-
155
- > Need presets or matrix scripts? Clone this repo and see Presets & Demos below.
156
-
157
- ---
158
-
159
- ## 📚 Docs & Guides
160
-
161
- - Quickstart: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/quickstart.md>
162
- - Compare & Certify (BYOE): <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/compare-and-certify.md>
163
- - Reading a Certificate: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/reading-certificate.md>
164
- - CLI reference: <https://github.com/invarlock/invarlock/blob/main/docs/reference/cli.md>
165
-
166
- Quick examples (repo presets, CPU; repo clone required for preset paths):
167
-
168
- ```bash
169
- # Install with HF adapters
170
- pip install "invarlock[hf]"
171
-
172
- # Preflight a config (JSON diagnostics)
173
- invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
174
-
175
- # Calibrated GPT‑2 small (recommended starting point; repo preset)
176
- INVARLOCK_ALLOW_NETWORK=1 INVARLOCK_DEDUP_TEXTS=1 \
177
- invarlock certify \
178
- --baseline gpt2 \
179
- --subject gpt2 \
180
- --adapter auto \
181
- --profile release \
182
- --preset configs/presets/causal_lm/wikitext2_512.yaml
183
-
184
- # Tiny causal LM smoke (out‑of‑calibration, dev‑only)
185
- INVARLOCK_ALLOW_NETWORK=1 \
186
- invarlock certify \
187
- --baseline hf:sshleifer/tiny-gpt2 \
188
- --subject hf:sshleifer/tiny-gpt2 \
189
- --profile dev
190
- ```
191
-
192
- Notes:
193
-
194
- - Presets and scripts live in this repo (`configs/`, `scripts/`) and are not
195
- shipped in wheels. Use flag‑only `certify` when installing from PyPI, or clone
196
- this repo to use presets and the matrix script.
197
- - `python -m invarlock` works the same as `invarlock`.
198
- - InvarLock runs offline by default; enable network per command with `INVARLOCK_ALLOW_NETWORK=1` when fetching.
199
-
200
- ---
201
-
202
- ## 🔧 Installation
203
-
204
- ```bash
205
- # Core + HF adapter
206
- pip install "invarlock[hf]"
207
-
208
- # GPU extras (CUDA wheels if available)
209
- pip install "invarlock[gpu]"
210
-
211
- # Optional edit backends
212
- pip install "invarlock[awq,gptq]" # AWQ/GPTQ PTQ stacks
213
- pip install "invarlock[dev]" # dev tooling (ruff, pytest, mkdocs)
214
- ```
215
-
216
- > Minimal core installs with `pip install invarlock`. The OSS core is edit‑agnostic
217
- > (BYOE): supply baseline and subject checkpoints and run Compare & Certify. A
218
- > small built‑in edit, `quant_rtn`, is provided for CI/quickstart demos only;
219
- > optional extras (e.g., `gptq`, `awq`, `gpu`) are loaders/runtimes, not edit
220
- > pipelines. Core installs do not pull in torch/transformers; those are only
221
- > installed when you opt into extras such as `"invarlock[hf]"` or
222
- > `"invarlock[adapters]"`.
223
-
224
- Run either entry point:
225
-
226
- ```bash
227
- invarlock --help
228
- python -m invarlock --help
229
- ```
230
-
231
- Common error (missing torch on adapter-based commands):
232
-
233
- ```text
234
- ❌ Torch is required for this command.
235
- Install extras with: pip install "invarlock[hf]" or "invarlock[adapters]".
236
- ```
237
-
238
- If you see this, install an appropriate extra (for example, `pip install "invarlock[hf]"`)
239
- before running `invarlock run` or `invarlock certify` with HF adapters.
240
-
241
- ### Network Access
242
-
243
- - Outbound network is disabled by default for safety. Enable it explicitly (per
244
- command) when you need to download models or datasets:
245
-
246
- ```bash
247
- INVARLOCK_ALLOW_NETWORK=1 invarlock certify \
248
- --baseline gpt2 \
249
- --subject gpt2 \
250
- --adapter auto \
251
- --profile ci \
252
- --preset configs/presets/causal_lm/wikitext2_512.yaml
253
- ```
254
-
255
- - Offline/air‑gapped usage: pre‑download to a cache, then run with network
256
- disabled. You can enforce offline reads with `HF_DATASETS_OFFLINE=1` (and
257
- optionally set `HF_HOME`/`HF_DATASETS_CACHE` to your cache location).
258
-
259
- See the CLI reference and datasets guide for details:
260
-
261
- - <https://github.com/invarlock/invarlock/blob/main/docs/reference/cli.md>
262
- - <https://github.com/invarlock/invarlock/blob/main/docs/reference/datasets.md>
263
-
264
- ### Install via pipx (isolated)
265
-
266
- ```bash
267
- # Ensure pipx uses Python 3.12+
268
- pipx install --python python3.12 "invarlock[hf]" # Python 3.12+ recommended
269
-
270
- # With GPU extras (if supported on your platform)
271
- pipx install --python python3.12 "invarlock[hf,gpu]"
272
- ```
273
-
274
- ### Conda environment recipe
275
-
276
- ```bash
277
- conda create -n invarlock python=3.12 -y
278
- conda activate invarlock
279
-
280
- # Core + HF stack
281
- pip install "invarlock[hf]"
282
-
283
- # Optional extras
284
- # pip install "invarlock[gpu]"
285
- # pip install "invarlock[awq,gptq]"
286
- ```
287
-
288
- ---
289
-
290
- ## 💻 Support Matrix
291
-
292
- <!-- markdownlint-disable MD060 -->
293
- | Platform | Status | Notes |
294
- | ---------------------- | --------------- | ----------------------------------------- |
295
- | Python 3.12+ | ✅ Required | |
296
- | Linux | ✅ Full | Primary dev target |
297
- | macOS (Intel/M-series) | ✅ Full | MPS supported (default on Apple Silicon) |
298
- | Windows | ❌ Not supported | Use WSL2 or a Linux container if required |
299
- | CUDA | ✅ Recommended | For larger models |
300
- | CPU | ✅ Fallback | Slower but functional |
301
- <!-- markdownlint-enable MD060 -->
302
-
303
- **Device selection:** CUDA → MPS → CPU (auto). Override with torch env if
304
- needed (e.g., `CUDA_VISIBLE_DEVICES`).
305
-
306
- ---
307
-
308
- ## 🧱 What InvarLock Provides
309
-
310
- - **Runner** (torch-agnostic core): `prepare → preview → apply → guards → evaluate → report/rollback`
311
-
312
- - **Built-in edit**:
313
- - `quant_rtn` (INT8 RTN, per‑channel, clamp/group size)
314
-
315
- - **Guards** (policy-tiered; “GuardChain” = ordered guard pipeline):
316
-
317
- 1. **Invariants** (pre/post: shapes/finite/tying)
318
- 2. **Spectral** (per-family z-caps; monitor or gate per tier)
319
- 3. **RMT** (ε-band on outliers; monitor or gate per tier)
320
- 4. **Variance (VE)** (predictive paired ΔlogNLL gate; tiered sidedness)
321
-
322
- - **Safety Certificate (schema v1, PM‑only)**: Primary Metric (ppl or
323
- accuracy) with paired statistics, structural deltas, spectral/RMT stats, VE
324
- provenance, seeds/hashes, pairing metrics, and **policy digest**. Canonical
325
- artifact: `reports/.../evaluation.cert.json`.
326
-
327
- **Scope (what InvarLock does / does not do):**
328
-
329
- - InvarLock certifies **regression risk from weight edits** (e.g., quantization or
330
- pruning) relative to a fixed baseline under a specific configuration.
331
- - It focuses on **paired primary metrics** (ppl/accuracy) plus structural and
332
- guard telemetry (invariants, spectral, RMT, variance) for those edits.
333
- - It **does not** claim to solve content‑safety problems (toxicity, bias,
334
- jailbreaks) or alignment in general, and it does not certify arbitrary
335
- training changes or new datasets.
336
- - It is calibrated and tested on Linux/macOS environments using the HF/PyTorch
337
- stack described in the docs; native Windows is not supported.
338
- - For the detailed assurance case and threat model, see
339
- `docs/assurance/00-safety-case.md` and `docs/security/threat-model.md`.
340
-
341
- Minimal excerpt (redacted):
342
-
343
- ```json
344
- {
345
- "schema_version": "v1",
346
- "run_id": "...",
347
- "validation": {
348
- "primary_metric_acceptable": true,
349
- "guard_overhead_acceptable": true
350
- },
351
- "primary_metric": {
352
- "kind": "ppl_causal",
353
- "preview": 12.3,
354
- "final": 12.1,
355
- "ratio_vs_baseline": 0.98,
356
- "display_ci": [0.97, 0.99]
357
- },
358
- "structure": {"layers_modified": 0, "params_changed": 0},
359
- "spectral": {"caps_applied": 0},
360
- "rmt": {"stable": true},
361
- "auto": {"tier": "balanced"}
362
- }
363
- ```
364
-
365
- ---
366
-
367
- ## 🛡️ Guard Order & Balanced Defaults
368
-
369
- **Canonical order**: `["invariants", "spectral", "rmt", "variance", "invariants"]`
370
-
371
- **Balanced profile (example)**
372
-
373
- ```yaml
374
- guards:
375
- spectral:
376
- mode: monitor
377
- sigma_quantile: 0.95
378
- deadband: 0.10
379
- scope: all
380
- max_caps: 5
381
- max_spectral_norm: null # disable absolute clamp; rely on calibrated κ_f
382
- multiple_testing: { method: bh, alpha: 0.05, m: 4 }
383
- family_caps: { ffn: 2.5, attn: 2.8, embed: 3.0, other: 3.0 } # z-caps (FPR-derived)
384
- rmt:
385
- mode: monitor
386
- epsilon_by_family: { ffn: 0.10, attn: 0.08, embed: 0.12, other: 0.12 }
387
- variance:
388
- tap: "post mlp.c_proj (pre-residual)"
389
- targets: "edited_modules_only"
390
- discovery:
391
- deadband: 0.02
392
- min_abs_adjust: 0.012
393
- max_scale_step: 0.03
394
- gating:
395
- sided: "one-sided" # improvement-only
396
- min_effect_lognll: 9e-4 # pilot-derived power threshold
397
- ```
398
-
399
- > **Conservative** raises z-caps/ε/deadband/min-effect and uses **two-sided** VE; **Aggressive** relaxes accordingly.
400
-
401
- ---
402
-
403
- > 🔍 For development and CI commands (pytest, mkdocs, generators), see CONTRIBUTING.md.
404
-
405
- ---
406
-
407
- ## ✂️ Edits & Plugins
408
-
409
- - **Quant RTN** (built‑in): INT8 RTN, per‑channel, group size, percentile clamp
410
- - **Compare & Certify (BYOE, recommended)**: Bring your baseline + subject checkpoints and certify with InvarLock
411
- - **Plugins (optional)**: Adapters and guards via entry points. Adapters extend
412
- model loading/inference (e.g., GPTQ/AWQ formats); plugins do not add edit
413
- algorithms beyond RTN. List components with:
414
-
415
- ```bash
416
- invarlock plugins --help # summary
417
- invarlock plugins guards # guard plugins
418
- invarlock plugins edits # edit plugins
419
- invarlock plugins adapters # adapters and backend hints
420
- ```
421
-
422
- ---
423
-
424
- ## 🔁 Certification Criteria (balanced profile)
425
-
426
- Key checks enforced by balanced policy (summary):
427
-
428
- - **Pairing invariants**: preview = final counts; `match=1.00`, `overlap=0.00` (fail-fast in CI/Release)
429
- - **PM ratio gate** (ppl or accuracy): upper CI ≤ **1.10**
430
- - **Drift**: 0.95–1.05 (paired log-space)
431
- - **Spectral/RMT**: within tier FPR/ε band
432
- - **Catastrophe rollback**: automatic revert if PPL > **2.0×**
433
- - **Guard overhead**: a bare-vs-guarded comparison records `validation.guard_overhead_acceptable=true` when ≤ 1 % PPL overhead
434
-
435
-
436
- ---
437
-
438
- ## 🧾 Minimal Config (balanced GPT-2, CI profile)
439
-
440
- ```yaml
441
- model:
442
- id: "<set-your-model-id>" # e.g., gpt2
443
- adapter: "hf_gpt2"
444
- device: "cpu"
445
- dataset:
446
- provider: "wikitext2"
447
- split: "validation"
448
- seq_len: 512
449
- stride: 512
450
- preview_n: 64
451
- final_n: 64
452
- seed: 42
453
- edit:
454
- # Optional: built-in quant demo. Omit for Compare & Certify/BYOE.
455
- name: quant_rtn
456
- plan:
457
- bitwidth: 8
458
- per_channel: true
459
- scope: attn
460
- eval:
461
- metric:
462
- kind: ppl_causal
463
- loss:
464
- type: causal
465
- guards:
466
- order: [invariants, spectral, rmt, variance, invariants]
467
- spectral: { mode: monitor }
468
- rmt: { mode: monitor }
469
- variance:
470
- tap: "post mlp.c_proj (pre-residual)"
471
- targets: "edited_modules_only"
472
- discovery: { deadband: 0.02, min_abs_adjust: 0.012, max_scale_step: 0.03 }
473
- gating: { sided: one-sided, min_effect_lognll: 9e-4 }
474
- auto:
475
- enabled: true
476
- tier: balanced
477
- probes: 0
478
- output:
479
- dir: runs
480
- save_model: false
481
- save_report: true
482
- ```
483
-
484
- ---
485
-
486
- ## 🩺 Doctor (preflight)
487
-
488
- Run preflight checks before a run to catch misconfigurations early:
489
-
490
- ```bash
491
- invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
492
- ```
493
-
494
- Text mode emits lines prefixed with `ERROR:`, `WARNING:`, or `NOTE:` and stable
495
- codes like `[INVARLOCK:D001]`. JSON mode includes `summary`, `policy`,
496
- `findings[]`, `resolution`, and `format_version`.
497
-
498
- ---
499
-
500
- ## 🏗️ Source Layout (Single Distribution)
501
-
502
- ```text
503
- invarlock/
504
- ├─ src/
505
- │ ├─ invarlock/ # core + unified namespace
506
- │ │ ├─ core/ # runner, registry, contracts, events, ABI
507
- │ │ ├─ cli/ # console app + command wrappers (unified import path)
508
- │ │ ├─ adapters/ # adapter wrappers (HF GPT‑2/BERT/LLaMA)
509
- │ │ ├─ edits/ # quant_rtn
510
- │ │ ├─ guards/ # invariants, spectral, rmt, variance
511
- │ │ ├─ eval/ # evaluation metrics and helpers
512
- │ │ ├─ reporting/ # report assembly, certificate generation/validation
513
- │ │ ├─ assurance/ # assurance surface aggregating cert helpers
514
- │ │ ├─ plugins/ # built-in example plugins
515
- │ │ └─ observability/ # monitoring/metrics/tracing wrappers
516
- ├─ configs/ # presets (repo‑only; clone to use)
517
- ├─ docs/ # user guides, reference, assurance notes
518
- ├─ scripts/ # automation / QA helpers
519
- └─ tests/ # unit/integration/property tests
520
-
521
- Note: The package exposes a single import namespace (`invarlock.*`). Presets/scripts are repo resources and not packaged in wheels.
522
- ```
523
-
524
- ---
525
-
526
- ## 📚 Documentation
527
-
528
- - User Guide: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/getting-started.md>
529
- - Quickstart: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/quickstart.md>
530
- - Compare & Certify (BYOE): <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/compare-and-certify.md>
531
- - Reading a Certificate: <https://github.com/invarlock/invarlock/blob/main/docs/user-guide/reading-certificate.md>
532
- - Assurance (proof notes): <https://github.com/invarlock/invarlock/tree/main/docs/assurance>
533
- - eval math, spectral FPR, RMT ε, VE gate power, determinism
534
- - Config Schema: <https://github.com/invarlock/invarlock/blob/main/docs/reference/config-schema.md>
535
- - Guard Reference: <https://github.com/invarlock/invarlock/blob/main/docs/reference/guards.md>
536
-
537
- ---
538
-
539
- ## ⚡ Quick CPU Demos (dev)
540
-
541
- For tiny, CPU‑only demos that produce readable PASS banners in dev, enable
542
- tiny‑relax and run the matrix script (repo clone required). This mode relaxes
543
- primary‑metric token floors and is intended for smoke testing only (not release
544
- evidence):
545
-
546
- ```bash
547
- export INVARLOCK_TINY_RELAX=1 INVARLOCK_ALLOW_NETWORK=1 INVARLOCK_DEDUP_TEXTS=1 \
548
- TRANSFORMERS_NO_TORCHVISION=1 TOKENIZERS_PARALLELISM=false
549
- RUN=1 NET=1 bash scripts/run_tiny_all_matrix.sh
550
- ```
551
-
552
- Add `INCLUDE_MEASURED_CLS=1` to include a measured classification step (requires warmed HF caches/network).
553
-
554
- ---
555
-
556
- ## 🧪 Determinism & Provenance
557
-
558
- - Seeds: `{python, numpy, torch}` recorded in certs
559
- - Dataset/tokenizer hashes recorded
560
- - Paired non-overlapping windows (fail-fast if counts mismatch or pairing < 1.0)
561
- - Cert math checks: `ppl_ratio.point == exp(mean ΔlogNLL)` and CI from the **same** paired Δ array
562
-
563
- ---
564
-
565
- ## 🤝 Contributing
566
-
567
- ```bash
568
- make dev-install # editable + dev tools (pytest, ruff, mypy, mkdocs, etc.)
569
- make test # run tests
570
- make lint # ruff + mypy
571
- make format # ruff format/fix
572
- make docs # build docs (mkdocs)
573
- make verify # tests, lint, format, markdownlint
574
- ```
575
-
576
- Please see `CONTRIBUTING.md` for guidelines and `Makefile` for more targets.
577
-
578
- ---
579
-
580
- ## 📄 License
581
-
582
- Apache-2.0 — see `LICENSE`.
583
-
584
- ---
585
-
586
- ### Notes
587
-
588
- - PPL levels depend on `seq_len` (e.g., 768-token windows typically reduce PPL vs shorter contexts).