invarlock 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. invarlock/__init__.py +33 -0
  2. invarlock/__main__.py +10 -0
  3. invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
  4. invarlock/_data/runtime/profiles/release.yaml +23 -0
  5. invarlock/_data/runtime/tiers.yaml +76 -0
  6. invarlock/adapters/__init__.py +102 -0
  7. invarlock/adapters/_capabilities.py +45 -0
  8. invarlock/adapters/auto.py +99 -0
  9. invarlock/adapters/base.py +530 -0
  10. invarlock/adapters/base_types.py +85 -0
  11. invarlock/adapters/hf_bert.py +852 -0
  12. invarlock/adapters/hf_gpt2.py +403 -0
  13. invarlock/adapters/hf_llama.py +485 -0
  14. invarlock/adapters/hf_mixin.py +383 -0
  15. invarlock/adapters/hf_onnx.py +112 -0
  16. invarlock/adapters/hf_t5.py +137 -0
  17. invarlock/adapters/py.typed +1 -0
  18. invarlock/assurance/__init__.py +43 -0
  19. invarlock/cli/__init__.py +8 -0
  20. invarlock/cli/__main__.py +8 -0
  21. invarlock/cli/_evidence.py +25 -0
  22. invarlock/cli/_json.py +75 -0
  23. invarlock/cli/adapter_auto.py +162 -0
  24. invarlock/cli/app.py +287 -0
  25. invarlock/cli/commands/__init__.py +26 -0
  26. invarlock/cli/commands/certify.py +403 -0
  27. invarlock/cli/commands/doctor.py +1358 -0
  28. invarlock/cli/commands/explain_gates.py +151 -0
  29. invarlock/cli/commands/export_html.py +100 -0
  30. invarlock/cli/commands/plugins.py +1331 -0
  31. invarlock/cli/commands/report.py +354 -0
  32. invarlock/cli/commands/run.py +4146 -0
  33. invarlock/cli/commands/verify.py +1040 -0
  34. invarlock/cli/config.py +396 -0
  35. invarlock/cli/constants.py +68 -0
  36. invarlock/cli/device.py +92 -0
  37. invarlock/cli/doctor_helpers.py +74 -0
  38. invarlock/cli/errors.py +6 -0
  39. invarlock/cli/overhead_utils.py +60 -0
  40. invarlock/cli/provenance.py +66 -0
  41. invarlock/cli/utils.py +41 -0
  42. invarlock/config.py +56 -0
  43. invarlock/core/__init__.py +62 -0
  44. invarlock/core/abi.py +15 -0
  45. invarlock/core/api.py +274 -0
  46. invarlock/core/auto_tuning.py +317 -0
  47. invarlock/core/bootstrap.py +226 -0
  48. invarlock/core/checkpoint.py +221 -0
  49. invarlock/core/contracts.py +73 -0
  50. invarlock/core/error_utils.py +64 -0
  51. invarlock/core/events.py +298 -0
  52. invarlock/core/exceptions.py +95 -0
  53. invarlock/core/registry.py +481 -0
  54. invarlock/core/retry.py +146 -0
  55. invarlock/core/runner.py +2041 -0
  56. invarlock/core/types.py +154 -0
  57. invarlock/edits/__init__.py +12 -0
  58. invarlock/edits/_edit_utils.py +249 -0
  59. invarlock/edits/_external_utils.py +268 -0
  60. invarlock/edits/noop.py +47 -0
  61. invarlock/edits/py.typed +1 -0
  62. invarlock/edits/quant_rtn.py +801 -0
  63. invarlock/edits/registry.py +166 -0
  64. invarlock/eval/__init__.py +23 -0
  65. invarlock/eval/bench.py +1207 -0
  66. invarlock/eval/bootstrap.py +50 -0
  67. invarlock/eval/data.py +2052 -0
  68. invarlock/eval/metrics.py +2167 -0
  69. invarlock/eval/primary_metric.py +767 -0
  70. invarlock/eval/probes/__init__.py +24 -0
  71. invarlock/eval/probes/fft.py +139 -0
  72. invarlock/eval/probes/mi.py +213 -0
  73. invarlock/eval/probes/post_attention.py +323 -0
  74. invarlock/eval/providers/base.py +67 -0
  75. invarlock/eval/providers/seq2seq.py +111 -0
  76. invarlock/eval/providers/text_lm.py +113 -0
  77. invarlock/eval/providers/vision_text.py +93 -0
  78. invarlock/eval/py.typed +1 -0
  79. invarlock/guards/__init__.py +18 -0
  80. invarlock/guards/_contracts.py +9 -0
  81. invarlock/guards/invariants.py +640 -0
  82. invarlock/guards/policies.py +805 -0
  83. invarlock/guards/py.typed +1 -0
  84. invarlock/guards/rmt.py +2097 -0
  85. invarlock/guards/spectral.py +1419 -0
  86. invarlock/guards/tier_config.py +354 -0
  87. invarlock/guards/variance.py +3298 -0
  88. invarlock/guards_ref/__init__.py +15 -0
  89. invarlock/guards_ref/rmt_ref.py +40 -0
  90. invarlock/guards_ref/spectral_ref.py +135 -0
  91. invarlock/guards_ref/variance_ref.py +60 -0
  92. invarlock/model_profile.py +353 -0
  93. invarlock/model_utils.py +221 -0
  94. invarlock/observability/__init__.py +10 -0
  95. invarlock/observability/alerting.py +535 -0
  96. invarlock/observability/core.py +546 -0
  97. invarlock/observability/exporters.py +565 -0
  98. invarlock/observability/health.py +588 -0
  99. invarlock/observability/metrics.py +457 -0
  100. invarlock/observability/py.typed +1 -0
  101. invarlock/observability/utils.py +553 -0
  102. invarlock/plugins/__init__.py +12 -0
  103. invarlock/plugins/hello_guard.py +33 -0
  104. invarlock/plugins/hf_awq_adapter.py +82 -0
  105. invarlock/plugins/hf_bnb_adapter.py +79 -0
  106. invarlock/plugins/hf_gptq_adapter.py +78 -0
  107. invarlock/plugins/py.typed +1 -0
  108. invarlock/py.typed +1 -0
  109. invarlock/reporting/__init__.py +7 -0
  110. invarlock/reporting/certificate.py +3221 -0
  111. invarlock/reporting/certificate_schema.py +244 -0
  112. invarlock/reporting/dataset_hashing.py +215 -0
  113. invarlock/reporting/guards_analysis.py +948 -0
  114. invarlock/reporting/html.py +32 -0
  115. invarlock/reporting/normalizer.py +235 -0
  116. invarlock/reporting/policy_utils.py +517 -0
  117. invarlock/reporting/primary_metric_utils.py +265 -0
  118. invarlock/reporting/render.py +1442 -0
  119. invarlock/reporting/report.py +903 -0
  120. invarlock/reporting/report_types.py +278 -0
  121. invarlock/reporting/utils.py +175 -0
  122. invarlock/reporting/validate.py +631 -0
  123. invarlock/security.py +176 -0
  124. invarlock/sparsity_utils.py +323 -0
  125. invarlock/utils/__init__.py +150 -0
  126. invarlock/utils/digest.py +45 -0
  127. invarlock-0.2.0.dist-info/METADATA +586 -0
  128. invarlock-0.2.0.dist-info/RECORD +132 -0
  129. invarlock-0.2.0.dist-info/WHEEL +5 -0
  130. invarlock-0.2.0.dist-info/entry_points.txt +20 -0
  131. invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
  132. invarlock-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,403 @@
1
+ """
2
+ InvarLock CLI Certify Command
3
+ =========================
4
+
5
+ Hero path: Compare & Certify (BYOE). Provide baseline (`--baseline`) and
6
+ subject (`--subject`) checkpoints and InvarLock will run paired windows and emit a
7
+ certificate. Optionally, pass `--edit-config` to run the built‑in quant_rtn demo.
8
+
9
+ Steps:
10
+ 1) Baseline (no-op edit) on baseline model
11
+ 2) Subject (no-op or provided edit config) on subject model with --baseline pairing
12
+ 3) Emit certificate via `invarlock report --format cert`
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import math
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ import typer
23
+ from rich.console import Console
24
+
25
+ from ..adapter_auto import resolve_auto_adapter
26
+ from ..config import _deep_merge as _merge # reuse helper
27
+ from ..errors import InvarlockError
28
+
29
+ # Use the report group's programmatic entry for report generation
30
+ from .report import report_command as _report
31
+ from .run import _resolve_exit_code as _resolve_exit_code
32
+
33
+ _LAZY_RUN_IMPORT = True
34
+
35
+ console = Console()
36
+
37
+
38
+ def _latest_run_report(run_root: Path) -> Path | None:
39
+ if not run_root.exists():
40
+ return None
41
+ candidates = sorted([p for p in run_root.iterdir() if p.is_dir()])
42
+ if not candidates:
43
+ return None
44
+ latest = candidates[-1]
45
+ for f in [latest / "report.json", latest / f"{latest.name}.json"]:
46
+ if f.exists():
47
+ return f
48
+ # Fallback: first JSON in the directory
49
+ jsons = list(latest.glob("*.json"))
50
+ return jsons[0] if jsons else None
51
+
52
+
53
+ def _load_yaml(path: Path) -> dict[str, Any]:
54
+ import yaml
55
+
56
+ with path.open("r", encoding="utf-8") as fh:
57
+ data = yaml.safe_load(fh) or {}
58
+ if not isinstance(data, dict):
59
+ raise ValueError("Preset must be a mapping")
60
+ return data
61
+
62
+
63
+ def _dump_yaml(path: Path, data: dict[str, Any]) -> None:
64
+ import yaml
65
+
66
+ with path.open("w", encoding="utf-8") as fh:
67
+ yaml.safe_dump(data, fh, sort_keys=False)
68
+
69
+
70
+ def _normalize_model_id(model_id: str, adapter_name: str) -> str:
71
+ """Normalize model identifiers for adapters.
72
+
73
+ - Accepts optional "hf:" prefix for Hugging Face repo IDs and strips it
74
+ before passing to transformers APIs.
75
+ """
76
+ mid = str(model_id or "").strip()
77
+ try:
78
+ if str(adapter_name).startswith("hf_") and mid.startswith("hf:"):
79
+ return mid.split(":", 1)[1]
80
+ except Exception:
81
+ pass
82
+ return mid
83
+
84
+
85
+ def certify_command(
86
+ # Primary names for programmatic/test compatibility
87
+ source: str = typer.Option(
88
+ ..., "--source", "--baseline", help="Baseline model dir or Hub ID"
89
+ ),
90
+ edited: str = typer.Option(
91
+ ..., "--edited", "--subject", help="Subject model dir or Hub ID"
92
+ ),
93
+ adapter: str = typer.Option(
94
+ "auto", "--adapter", help="Adapter name or 'auto' to resolve"
95
+ ),
96
+ device: str | None = typer.Option(
97
+ None,
98
+ "--device",
99
+ help="Device override for runs (auto|cuda|mps|cpu)",
100
+ ),
101
+ profile: str = typer.Option("ci", "--profile", help="Profile (ci|release)"),
102
+ tier: str = typer.Option("balanced", "--tier", help="Tier label for context"),
103
+ preset: str | None = typer.Option(
104
+ None,
105
+ "--preset",
106
+ help=(
107
+ "Universal preset path to use (defaults to causal or masked preset"
108
+ " based on adapter)"
109
+ ),
110
+ ),
111
+ out: str = typer.Option("runs", "--out", help="Base output directory"),
112
+ cert_out: str = typer.Option(
113
+ "reports/cert", "--cert-out", help="Certificate output directory"
114
+ ),
115
+ edit_config: str | None = typer.Option(
116
+ None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
117
+ ),
118
+ ):
119
+ """Certify two checkpoints (baseline vs subject) with pinned windows."""
120
+ # Support programmatic calls and Typer-invoked calls uniformly
121
+ try:
122
+ from typer.models import OptionInfo as _TyperOptionInfo
123
+ except Exception: # pragma: no cover - typer internals may change
124
+ _TyperOptionInfo = () # type: ignore[assignment]
125
+
126
+ def _coerce_option(value, fallback=None):
127
+ if isinstance(value, _TyperOptionInfo):
128
+ return getattr(value, "default", fallback)
129
+ return value if value is not None else fallback
130
+
131
+ source = _coerce_option(source)
132
+ edited = _coerce_option(edited)
133
+ adapter = _coerce_option(adapter, "auto")
134
+ device = _coerce_option(device)
135
+ profile = _coerce_option(profile, "ci")
136
+ tier = _coerce_option(tier, "balanced")
137
+ preset = _coerce_option(preset)
138
+ out = _coerce_option(out, "runs")
139
+ cert_out = _coerce_option(cert_out, "reports/cert")
140
+ edit_config = _coerce_option(edit_config)
141
+
142
+ src_id = str(source)
143
+ edt_id = str(edited)
144
+
145
+ # Resolve adapter when requested
146
+ eff_adapter = adapter
147
+ if str(adapter).strip().lower() in {"auto", "hf_auto", "auto_hf"}:
148
+ eff_adapter = resolve_auto_adapter(src_id)
149
+ console.print(f"🔎 Adapter:auto → {eff_adapter}")
150
+
151
+ # Choose preset. If none provided and repo preset is missing (pip install
152
+ # scenario), fall back to a minimal built-in universal preset so the
153
+ # flag-only quick start works without cloning the repo.
154
+ default_universal = (
155
+ Path("configs/tasks/masked_lm/ci_cpu.yaml")
156
+ if eff_adapter == "hf_bert"
157
+ else Path("configs/tasks/causal_lm/ci_cpu.yaml")
158
+ )
159
+ preset_path = Path(preset) if preset is not None else default_universal
160
+
161
+ preset_data: dict[str, Any]
162
+ if preset is None and not preset_path.exists():
163
+ # Inline minimal preset (wikitext2 universal) for pip installs
164
+ preset_data = {
165
+ "dataset": {
166
+ "provider": "wikitext2",
167
+ "split": "validation",
168
+ "seq_len": 512,
169
+ "stride": 512,
170
+ "preview_n": 64,
171
+ "final_n": 64,
172
+ "seed": 42,
173
+ }
174
+ }
175
+ else:
176
+ if not preset_path.exists():
177
+ console.print(f"[red]❌ Preset not found: {preset_path}")
178
+ raise typer.Exit(1)
179
+ preset_data = _load_yaml(preset_path)
180
+ # Do not hard-code device from presets in auto-generated certify configs;
181
+ # allow device resolution to pick CUDA/MPS/CPU via 'auto' or CLI overrides.
182
+ model_block = preset_data.get("model")
183
+ if isinstance(model_block, dict) and "device" in model_block:
184
+ model_block = dict(model_block)
185
+ model_block.pop("device", None)
186
+ preset_data["model"] = model_block
187
+
188
+ # Create temp baseline config (no-op edit)
189
+ # Normalize possible "hf:" prefixes for HF adapters
190
+ norm_src_id = _normalize_model_id(src_id, eff_adapter)
191
+ norm_edt_id = _normalize_model_id(edt_id, eff_adapter)
192
+
193
+ baseline_cfg = _merge(
194
+ preset_data,
195
+ {
196
+ "model": {
197
+ "id": norm_src_id,
198
+ "adapter": eff_adapter,
199
+ },
200
+ "edit": {"name": "noop", "plan": {}},
201
+ "eval": {},
202
+ "guards": {
203
+ "order": ["invariants", "spectral", "rmt", "variance", "invariants"]
204
+ },
205
+ "output": {"dir": str(Path(out) / "source")},
206
+ "context": {"profile": profile, "tier": tier},
207
+ },
208
+ )
209
+
210
+ tmp_dir = Path(".certify_tmp")
211
+ tmp_dir.mkdir(parents=True, exist_ok=True)
212
+ baseline_yaml = tmp_dir / "baseline_noop.yaml"
213
+ _dump_yaml(baseline_yaml, baseline_cfg)
214
+
215
+ console.print("🏁 Running baseline (no-op edit)")
216
+ from .run import run_command as _run
217
+
218
+ _run(
219
+ config=str(baseline_yaml),
220
+ profile=profile,
221
+ out=str(Path(out) / "source"),
222
+ tier=tier,
223
+ device=device,
224
+ )
225
+
226
+ baseline_report = _latest_run_report(Path(out) / "source")
227
+ if not baseline_report:
228
+ console.print("[red]❌ Could not locate baseline report after run")
229
+ raise typer.Exit(1)
230
+
231
+ # Edited run: either no-op (Compare & Certify) or provided edit_config (demo edit)
232
+ if edit_config:
233
+ edited_yaml = Path(edit_config)
234
+ if not edited_yaml.exists():
235
+ console.print(f"[red]❌ Edit config not found: {edited_yaml}")
236
+ raise typer.Exit(1)
237
+ console.print("✂️ Running edited (demo edit via --edit-config)")
238
+ # Overlay subject model id/adapter and output/context onto the provided edit config
239
+ try:
240
+ cfg_loaded: dict[str, Any] = _load_yaml(edited_yaml)
241
+ except Exception as exc: # noqa: BLE001
242
+ console.print(f"[red]❌ Failed to load edit config: {exc}")
243
+ raise typer.Exit(1) from exc
244
+
245
+ # Ensure model.id/adapter point to the requested subject
246
+ model_block = dict(cfg_loaded.get("model") or {})
247
+ # Replace placeholder IDs like "<MODEL_ID>" or "<set-your-model-id>"
248
+ if not isinstance(model_block.get("id"), str) or model_block.get(
249
+ "id", ""
250
+ ).startswith("<"):
251
+ model_block["id"] = norm_edt_id
252
+ else:
253
+ # Always normalize when adapter is HF family
254
+ model_block["id"] = _normalize_model_id(str(model_block["id"]), eff_adapter)
255
+ # Respect explicit device from edit config; only set adapter if missing
256
+ if not isinstance(model_block.get("adapter"), str) or not model_block.get(
257
+ "adapter"
258
+ ):
259
+ model_block["adapter"] = eff_adapter
260
+ cfg_loaded["model"] = model_block
261
+
262
+ # Apply the same preset to the edited run to avoid duplicating dataset/task
263
+ # settings in edit configs; then overlay the edit, output, and context.
264
+ merged_edited_cfg = _merge(
265
+ _merge(preset_data, cfg_loaded),
266
+ {
267
+ "output": {"dir": str(Path(out) / "edited")},
268
+ "context": {"profile": profile, "tier": tier},
269
+ },
270
+ )
271
+
272
+ # Persist a temporary merged config for traceability
273
+ tmp_dir = Path(".certify_tmp")
274
+ tmp_dir.mkdir(parents=True, exist_ok=True)
275
+ edited_merged_yaml = tmp_dir / "edited_merged.yaml"
276
+ _dump_yaml(edited_merged_yaml, merged_edited_cfg)
277
+
278
+ from .run import run_command as _run
279
+
280
+ _run(
281
+ config=str(edited_merged_yaml),
282
+ profile=profile,
283
+ out=str(Path(out) / "edited"),
284
+ tier=tier,
285
+ baseline=str(baseline_report),
286
+ device=device,
287
+ )
288
+ else:
289
+ edited_cfg = _merge(
290
+ preset_data,
291
+ {
292
+ "model": {"id": norm_edt_id, "adapter": eff_adapter},
293
+ "edit": {"name": "noop", "plan": {}},
294
+ "eval": {},
295
+ "guards": {
296
+ "order": [
297
+ "invariants",
298
+ "spectral",
299
+ "rmt",
300
+ "variance",
301
+ "invariants",
302
+ ]
303
+ },
304
+ "output": {"dir": str(Path(out) / "edited")},
305
+ "context": {"profile": profile, "tier": tier},
306
+ },
307
+ )
308
+ edited_yaml = tmp_dir / "edited_noop.yaml"
309
+ _dump_yaml(edited_yaml, edited_cfg)
310
+ console.print("🧪 Running edited (no-op, Compare & Certify)")
311
+ from .run import run_command as _run
312
+
313
+ _run(
314
+ config=str(edited_yaml),
315
+ profile=profile,
316
+ out=str(Path(out) / "edited"),
317
+ tier=tier,
318
+ baseline=str(baseline_report),
319
+ device=device,
320
+ )
321
+
322
+ edited_report = _latest_run_report(Path(out) / "edited")
323
+ if not edited_report:
324
+ console.print("[red]❌ Could not locate edited report after run")
325
+ raise typer.Exit(1)
326
+
327
+ # CI/Release hard‑abort: fail fast when primary metric is not computable.
328
+ # Fall back to legacy ppl_* keys when primary_metric block is absent.
329
+ try:
330
+ prof = str(profile or "").strip().lower()
331
+ except Exception:
332
+ prof = ""
333
+ if prof in {"ci", "release"}:
334
+ try:
335
+ with Path(edited_report).open("r", encoding="utf-8") as fh:
336
+ edited_payload = json.load(fh)
337
+ except Exception as exc: # noqa: BLE001
338
+ console.print(f"[red]❌ Failed to read edited report: {exc}")
339
+ raise typer.Exit(1) from exc
340
+
341
+ def _finite(x: Any) -> bool:
342
+ try:
343
+ return isinstance(x, (int | float)) and math.isfinite(float(x))
344
+ except Exception:
345
+ return False
346
+
347
+ meta = (
348
+ edited_payload.get("meta", {}) if isinstance(edited_payload, dict) else {}
349
+ )
350
+ metrics = (
351
+ edited_payload.get("metrics", {})
352
+ if isinstance(edited_payload, dict)
353
+ else {}
354
+ )
355
+ pm = metrics.get("primary_metric", {}) if isinstance(metrics, dict) else {}
356
+ pm_prev = pm.get("preview") if isinstance(pm, dict) else None
357
+ pm_final = pm.get("final") if isinstance(pm, dict) else None
358
+ pm_ratio = pm.get("ratio_vs_baseline")
359
+ device = meta.get("device") or "unknown"
360
+ adapter_name = meta.get("adapter") or "unknown"
361
+ edit_name = (
362
+ (edited_payload.get("edit", {}) or {}).get("name")
363
+ if isinstance(edited_payload, dict)
364
+ else None
365
+ ) or "unknown"
366
+
367
+ # Enforce only when a metric block is present; skip for minimal stub reports
368
+ # Enforce only when a primary_metric block is present
369
+ has_metric_block = isinstance(pm, dict) and bool(pm)
370
+ if has_metric_block:
371
+ # Treat non‑finite PM as hard error in CI/Release (after legacy fallback).
372
+ # Require a finite final value; preview is optional for legacy reports.
373
+ if not _finite(pm_final):
374
+ err = InvarlockError(
375
+ code="E111",
376
+ message=(
377
+ "Primary metric computation failed (NaN/inf). "
378
+ f"Context: device={device}, adapter={adapter_name}, edit={edit_name}. "
379
+ "Baseline ok; edited failed to compute ppl. "
380
+ "Try: use an accelerator (mps/cuda), force float32, reduce max_modules, "
381
+ "or lower batch size (INVARLOCK_SCORES_BATCH_SIZE)."
382
+ ),
383
+ details={
384
+ "device": device,
385
+ "adapter": adapter_name,
386
+ "edit": edit_name,
387
+ "pm_preview": pm_prev,
388
+ "pm_final": pm_final,
389
+ "pm_ratio": pm_ratio,
390
+ },
391
+ )
392
+ code = _resolve_exit_code(err, profile=prof)
393
+ console.print(f"[red]{err}[/red]")
394
+ # Do not emit a certificate
395
+ raise typer.Exit(code)
396
+
397
+ console.print("📜 Emitting certificate")
398
+ _report(
399
+ run=str(edited_report),
400
+ format="cert",
401
+ baseline=str(baseline_report),
402
+ output=cert_out,
403
+ )