invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -1,422 +0,0 @@
1
- """
2
- InvarLock CLI Certify Command
3
- =========================
4
-
5
- Hero path: Compare & Certify (BYOE). Provide baseline (`--baseline`) and
6
- subject (`--subject`) checkpoints and InvarLock will run paired windows and emit a
7
- certificate. Optionally, pass `--edit-config` to run the built‑in quant_rtn demo.
8
-
9
- Steps:
10
- 1) Baseline (no-op edit) on baseline model
11
- 2) Subject (no-op or provided edit config) on subject model with --baseline pairing
12
- 3) Emit certificate via `invarlock report --format cert`
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- import json
18
- import math
19
- from pathlib import Path
20
- from typing import Any
21
-
22
- import typer
23
- from rich.console import Console
24
-
25
- from ...core.exceptions import MetricsError
26
- from ..adapter_auto import resolve_auto_adapter
27
- from ..config import _deep_merge as _merge # reuse helper
28
-
29
- # Use the report group's programmatic entry for report generation
30
- from .report import report_command as _report
31
- from .run import _resolve_exit_code as _resolve_exit_code
32
-
33
- _LAZY_RUN_IMPORT = True
34
-
35
- console = Console()
36
-
37
-
38
- def _latest_run_report(run_root: Path) -> Path | None:
39
- if not run_root.exists():
40
- return None
41
- candidates = sorted([p for p in run_root.iterdir() if p.is_dir()])
42
- if not candidates:
43
- return None
44
- latest = candidates[-1]
45
- for f in [latest / "report.json", latest / f"{latest.name}.json"]:
46
- if f.exists():
47
- return f
48
- # Fallback: first JSON in the directory
49
- jsons = list(latest.glob("*.json"))
50
- return jsons[0] if jsons else None
51
-
52
-
53
- def _load_yaml(path: Path) -> dict[str, Any]:
54
- import yaml
55
-
56
- with path.open("r", encoding="utf-8") as fh:
57
- data = yaml.safe_load(fh) or {}
58
- if not isinstance(data, dict):
59
- raise ValueError("Preset must be a mapping")
60
- return data
61
-
62
-
63
- def _dump_yaml(path: Path, data: dict[str, Any]) -> None:
64
- import yaml
65
-
66
- with path.open("w", encoding="utf-8") as fh:
67
- yaml.safe_dump(data, fh, sort_keys=False)
68
-
69
-
70
- def _normalize_model_id(model_id: str, adapter_name: str) -> str:
71
- """Normalize model identifiers for adapters.
72
-
73
- - Accepts optional "hf:" prefix for Hugging Face repo IDs and strips it
74
- before passing to transformers APIs.
75
- """
76
- mid = str(model_id or "").strip()
77
- try:
78
- if str(adapter_name).startswith("hf_") and mid.startswith("hf:"):
79
- return mid.split(":", 1)[1]
80
- except Exception:
81
- pass
82
- return mid
83
-
84
-
85
- def certify_command(
86
- # Primary names for programmatic/test compatibility
87
- source: str = typer.Option(
88
- ..., "--source", "--baseline", help="Baseline model dir or Hub ID"
89
- ),
90
- edited: str = typer.Option(
91
- ..., "--edited", "--subject", help="Subject model dir or Hub ID"
92
- ),
93
- adapter: str = typer.Option(
94
- "auto", "--adapter", help="Adapter name or 'auto' to resolve"
95
- ),
96
- device: str | None = typer.Option(
97
- None,
98
- "--device",
99
- help="Device override for runs (auto|cuda|mps|cpu)",
100
- ),
101
- profile: str = typer.Option(
102
- "ci", "--profile", help="Profile (ci|release|ci_cpu|dev)"
103
- ),
104
- tier: str = typer.Option("balanced", "--tier", help="Tier label for context"),
105
- preset: str | None = typer.Option(
106
- None,
107
- "--preset",
108
- help=(
109
- "Universal preset path to use (defaults to causal or masked preset"
110
- " based on adapter)"
111
- ),
112
- ),
113
- out: str = typer.Option("runs", "--out", help="Base output directory"),
114
- cert_out: str = typer.Option(
115
- "reports/cert", "--cert-out", help="Certificate output directory"
116
- ),
117
- edit_config: str | None = typer.Option(
118
- None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
119
- ),
120
- ):
121
- """Certify two checkpoints (baseline vs subject) with pinned windows."""
122
- # Support programmatic calls and Typer-invoked calls uniformly
123
- try:
124
- from typer.models import OptionInfo as _TyperOptionInfo
125
- except Exception: # pragma: no cover - typer internals may change
126
- _TyperOptionInfo = () # type: ignore[assignment]
127
-
128
- def _coerce_option(value, fallback=None):
129
- if isinstance(value, _TyperOptionInfo):
130
- return getattr(value, "default", fallback)
131
- return value if value is not None else fallback
132
-
133
- source = _coerce_option(source)
134
- edited = _coerce_option(edited)
135
- adapter = _coerce_option(adapter, "auto")
136
- device = _coerce_option(device)
137
- profile = _coerce_option(profile, "ci")
138
- tier = _coerce_option(tier, "balanced")
139
- preset = _coerce_option(preset)
140
- out = _coerce_option(out, "runs")
141
- cert_out = _coerce_option(cert_out, "reports/cert")
142
- edit_config = _coerce_option(edit_config)
143
-
144
- src_id = str(source)
145
- edt_id = str(edited)
146
-
147
- # Resolve adapter when requested
148
- eff_adapter = adapter
149
- if str(adapter).strip().lower() in {"auto", "hf_auto", "auto_hf"}:
150
- eff_adapter = resolve_auto_adapter(src_id)
151
- console.print(f"🔎 Adapter:auto → {eff_adapter}")
152
-
153
- # Choose preset. If none provided and repo preset is missing (pip install
154
- # scenario), fall back to a minimal built-in universal preset so the
155
- # flag-only quick start works without cloning the repo.
156
- default_universal = (
157
- Path("configs/presets/masked_lm/wikitext2_128.yaml")
158
- if eff_adapter == "hf_bert"
159
- else Path("configs/presets/causal_lm/wikitext2_512.yaml")
160
- )
161
- preset_path = Path(preset) if preset is not None else default_universal
162
-
163
- preset_data: dict[str, Any]
164
- if preset is None and not preset_path.exists():
165
- # Inline minimal preset (wikitext2 universal) for pip installs
166
- preset_data = {
167
- "dataset": {
168
- "provider": "wikitext2",
169
- "split": "validation",
170
- "seq_len": 512,
171
- "stride": 512,
172
- "preview_n": 64,
173
- "final_n": 64,
174
- "seed": 42,
175
- }
176
- }
177
- else:
178
- if not preset_path.exists():
179
- console.print(f"[red]❌ Preset not found: {preset_path}")
180
- raise typer.Exit(1)
181
- preset_data = _load_yaml(preset_path)
182
- # Do not hard-code device from presets in auto-generated certify configs;
183
- # allow device resolution to pick CUDA/MPS/CPU via 'auto' or CLI overrides.
184
- model_block = preset_data.get("model")
185
- if isinstance(model_block, dict) and "device" in model_block:
186
- model_block = dict(model_block)
187
- model_block.pop("device", None)
188
- preset_data["model"] = model_block
189
-
190
- default_guards_order = ["invariants", "spectral", "rmt", "variance", "invariants"]
191
- guards_order = None
192
- preset_guards = preset_data.get("guards")
193
- if isinstance(preset_guards, dict):
194
- preset_order = preset_guards.get("order")
195
- if (
196
- isinstance(preset_order, list)
197
- and preset_order
198
- and all(isinstance(item, str) for item in preset_order)
199
- ):
200
- guards_order = list(preset_order)
201
- if guards_order is None:
202
- guards_order = list(default_guards_order)
203
-
204
- # Create temp baseline config (no-op edit)
205
- # Normalize possible "hf:" prefixes for HF adapters
206
- norm_src_id = _normalize_model_id(src_id, eff_adapter)
207
- norm_edt_id = _normalize_model_id(edt_id, eff_adapter)
208
-
209
- baseline_cfg = _merge(
210
- preset_data,
211
- {
212
- "model": {
213
- "id": norm_src_id,
214
- "adapter": eff_adapter,
215
- },
216
- "edit": {"name": "noop", "plan": {}},
217
- "eval": {},
218
- "guards": {"order": guards_order},
219
- "output": {"dir": str(Path(out) / "source")},
220
- "context": {"profile": profile, "tier": tier},
221
- },
222
- )
223
-
224
- tmp_dir = Path(".certify_tmp")
225
- tmp_dir.mkdir(parents=True, exist_ok=True)
226
- baseline_yaml = tmp_dir / "baseline_noop.yaml"
227
- _dump_yaml(baseline_yaml, baseline_cfg)
228
-
229
- console.print("🏁 Running baseline (no-op edit)")
230
- from .run import run_command as _run
231
-
232
- _run(
233
- config=str(baseline_yaml),
234
- profile=profile,
235
- out=str(Path(out) / "source"),
236
- tier=tier,
237
- device=device,
238
- )
239
-
240
- baseline_report = _latest_run_report(Path(out) / "source")
241
- if not baseline_report:
242
- console.print("[red]❌ Could not locate baseline report after run")
243
- raise typer.Exit(1)
244
-
245
- # Edited run: either no-op (Compare & Certify) or provided edit_config (demo edit)
246
- if edit_config:
247
- edited_yaml = Path(edit_config)
248
- if not edited_yaml.exists():
249
- console.print(f"[red]❌ Edit config not found: {edited_yaml}")
250
- raise typer.Exit(1)
251
- console.print("✂️ Running edited (demo edit via --edit-config)")
252
- # Overlay subject model id/adapter and output/context onto the provided edit config
253
- try:
254
- cfg_loaded: dict[str, Any] = _load_yaml(edited_yaml)
255
- except Exception as exc: # noqa: BLE001
256
- console.print(f"[red]❌ Failed to load edit config: {exc}")
257
- raise typer.Exit(1) from exc
258
-
259
- # Ensure model.id/adapter point to the requested subject
260
- model_block = dict(cfg_loaded.get("model") or {})
261
- # Replace placeholder IDs like "<MODEL_ID>" or "<set-your-model-id>"
262
- if not isinstance(model_block.get("id"), str) or model_block.get(
263
- "id", ""
264
- ).startswith("<"):
265
- model_block["id"] = norm_edt_id
266
- else:
267
- # Always normalize when adapter is HF family
268
- model_block["id"] = _normalize_model_id(str(model_block["id"]), eff_adapter)
269
- # Respect explicit device from edit config; only set adapter if missing
270
- if not isinstance(model_block.get("adapter"), str) or not model_block.get(
271
- "adapter"
272
- ):
273
- model_block["adapter"] = eff_adapter
274
- cfg_loaded["model"] = model_block
275
-
276
- # Apply the same preset to the edited run to avoid duplicating dataset/task
277
- # settings in edit configs; then overlay the edit, output, and context.
278
- merged_edited_cfg = _merge(
279
- _merge(preset_data, cfg_loaded),
280
- {
281
- "output": {"dir": str(Path(out) / "edited")},
282
- "context": {"profile": profile, "tier": tier},
283
- },
284
- )
285
-
286
- # Persist a temporary merged config for traceability
287
- tmp_dir = Path(".certify_tmp")
288
- tmp_dir.mkdir(parents=True, exist_ok=True)
289
- edited_merged_yaml = tmp_dir / "edited_merged.yaml"
290
- _dump_yaml(edited_merged_yaml, merged_edited_cfg)
291
-
292
- from .run import run_command as _run
293
-
294
- _run(
295
- config=str(edited_merged_yaml),
296
- profile=profile,
297
- out=str(Path(out) / "edited"),
298
- tier=tier,
299
- baseline=str(baseline_report),
300
- device=device,
301
- )
302
- else:
303
- edited_cfg = _merge(
304
- preset_data,
305
- {
306
- "model": {"id": norm_edt_id, "adapter": eff_adapter},
307
- "edit": {"name": "noop", "plan": {}},
308
- "eval": {},
309
- "guards": {"order": guards_order},
310
- "output": {"dir": str(Path(out) / "edited")},
311
- "context": {"profile": profile, "tier": tier},
312
- },
313
- )
314
- edited_yaml = tmp_dir / "edited_noop.yaml"
315
- _dump_yaml(edited_yaml, edited_cfg)
316
- console.print("🧪 Running edited (no-op, Compare & Certify)")
317
- from .run import run_command as _run
318
-
319
- _run(
320
- config=str(edited_yaml),
321
- profile=profile,
322
- out=str(Path(out) / "edited"),
323
- tier=tier,
324
- baseline=str(baseline_report),
325
- device=device,
326
- )
327
-
328
- edited_report = _latest_run_report(Path(out) / "edited")
329
- if not edited_report:
330
- console.print("[red]❌ Could not locate edited report after run")
331
- raise typer.Exit(1)
332
-
333
- # CI/Release hard‑abort: fail fast when primary metric is not computable.
334
- try:
335
- prof = str(profile or "").strip().lower()
336
- except Exception:
337
- prof = ""
338
- if prof in {"ci", "ci_cpu", "release"}:
339
- try:
340
- with Path(edited_report).open("r", encoding="utf-8") as fh:
341
- edited_payload = json.load(fh)
342
- except Exception as exc: # noqa: BLE001
343
- console.print(f"[red]❌ Failed to read edited report: {exc}")
344
- raise typer.Exit(1) from exc
345
-
346
- def _finite(x: Any) -> bool:
347
- try:
348
- return isinstance(x, (int | float)) and math.isfinite(float(x))
349
- except Exception:
350
- return False
351
-
352
- meta = (
353
- edited_payload.get("meta", {}) if isinstance(edited_payload, dict) else {}
354
- )
355
- metrics = (
356
- edited_payload.get("metrics", {})
357
- if isinstance(edited_payload, dict)
358
- else {}
359
- )
360
- pm = metrics.get("primary_metric", {}) if isinstance(metrics, dict) else {}
361
- pm_prev = pm.get("preview") if isinstance(pm, dict) else None
362
- pm_final = pm.get("final") if isinstance(pm, dict) else None
363
- pm_ratio = pm.get("ratio_vs_baseline")
364
- device = meta.get("device") or "unknown"
365
- adapter_name = meta.get("adapter") or "unknown"
366
- edit_name = (
367
- (edited_payload.get("edit", {}) or {}).get("name")
368
- if isinstance(edited_payload, dict)
369
- else None
370
- ) or "unknown"
371
-
372
- # Enforce only when a primary_metric block is present; allow degraded-but-flagged metrics to emit certificates, but fail the task.
373
- has_metric_block = isinstance(pm, dict) and bool(pm)
374
- if has_metric_block:
375
- degraded = bool(pm.get("invalid") or pm.get("degraded"))
376
- if degraded or not _finite(pm_final):
377
- fallback = pm_prev if _finite(pm_prev) else pm_final
378
- if not _finite(fallback) or fallback <= 0:
379
- fallback = 1.0
380
- degraded_reason = pm.get("degraded_reason") or (
381
- "non_finite_pm"
382
- if (not _finite(pm_prev) or not _finite(pm_final))
383
- else "primary_metric_degraded"
384
- )
385
- console.print(
386
- "[yellow]⚠️ Primary metric degraded or non-finite; emitting certificate and marking task degraded. Primary metric computation failed.[/yellow]"
387
- )
388
- pm["degraded"] = True
389
- pm["invalid"] = pm.get("invalid") or True
390
- pm["preview"] = pm_prev if _finite(pm_prev) else fallback
391
- pm["final"] = pm_final if _finite(pm_final) else fallback
392
- pm["ratio_vs_baseline"] = pm_ratio if _finite(pm_ratio) else 1.0
393
- pm["degraded_reason"] = degraded_reason
394
- metrics["primary_metric"] = pm
395
- edited_payload.setdefault("metrics", {}).update(metrics)
396
-
397
- # Emit the certificate for inspection, then exit with a CI-visible error.
398
- _report(
399
- run=str(edited_report),
400
- format="cert",
401
- baseline=str(baseline_report),
402
- output=cert_out,
403
- )
404
- err = MetricsError(
405
- code="E111",
406
- message=f"Primary metric degraded or non-finite ({degraded_reason}).",
407
- details={
408
- "reason": degraded_reason,
409
- "adapter": adapter_name,
410
- "device": device,
411
- "edit": edit_name,
412
- },
413
- )
414
- raise typer.Exit(_resolve_exit_code(err, profile=profile))
415
-
416
- console.print("📜 Emitting certificate")
417
- _report(
418
- run=str(edited_report),
419
- format="cert",
420
- baseline=str(baseline_report),
421
- output=cert_out,
422
- )