invarlock 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. invarlock/__init__.py +1 -1
  2. invarlock/_data/runtime/tiers.yaml +61 -0
  3. invarlock/adapters/hf_loading.py +97 -0
  4. invarlock/calibration/__init__.py +6 -0
  5. invarlock/calibration/spectral_null.py +301 -0
  6. invarlock/calibration/variance_ve.py +154 -0
  7. invarlock/cli/app.py +15 -0
  8. invarlock/cli/commands/calibrate.py +576 -0
  9. invarlock/cli/commands/doctor.py +9 -3
  10. invarlock/cli/commands/explain_gates.py +53 -9
  11. invarlock/cli/commands/plugins.py +12 -2
  12. invarlock/cli/commands/run.py +181 -79
  13. invarlock/cli/commands/verify.py +40 -0
  14. invarlock/cli/config.py +11 -1
  15. invarlock/cli/determinism.py +252 -0
  16. invarlock/core/auto_tuning.py +215 -17
  17. invarlock/core/bootstrap.py +137 -5
  18. invarlock/core/registry.py +9 -4
  19. invarlock/core/runner.py +305 -35
  20. invarlock/eval/bench.py +467 -141
  21. invarlock/eval/bench_regression.py +12 -0
  22. invarlock/eval/bootstrap.py +3 -1
  23. invarlock/eval/data.py +29 -7
  24. invarlock/eval/primary_metric.py +20 -5
  25. invarlock/guards/rmt.py +536 -46
  26. invarlock/guards/spectral.py +217 -10
  27. invarlock/guards/variance.py +124 -42
  28. invarlock/reporting/certificate.py +476 -45
  29. invarlock/reporting/certificate_schema.py +4 -1
  30. invarlock/reporting/guards_analysis.py +108 -10
  31. invarlock/reporting/normalizer.py +24 -1
  32. invarlock/reporting/policy_utils.py +97 -15
  33. invarlock/reporting/primary_metric_utils.py +17 -0
  34. invarlock/reporting/validate.py +10 -10
  35. {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/METADATA +12 -10
  36. {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/RECORD +40 -33
  37. {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/WHEEL +0 -0
  38. {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/entry_points.txt +0 -0
  39. {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/licenses/LICENSE +0 -0
  40. {invarlock-0.3.1.dist-info → invarlock-0.3.3.dist-info}/top_level.txt +0 -0
@@ -897,11 +897,21 @@ def _check_plugin_extras(plugin_name: str, plugin_type: str) -> str:
897
897
  if not plugin_info or not plugin_info["packages"]:
898
898
  return "" # No extra dependencies needed
899
899
 
900
- # Check each required package using import to play nice with tests that mock __import__
900
+ # Check each required package. For most packages we use a light import so
901
+ # tests can monkeypatch __import__; for GPU-only stacks like bitsandbytes
902
+ # we only probe presence via importlib.util.find_spec to avoid crashing on
903
+ # CPU-only builds during simple listing.
901
904
  missing_packages: list[str] = []
902
905
  for pkg in plugin_info["packages"]:
903
906
  try:
904
- __import__(pkg)
907
+ if pkg == "bitsandbytes":
908
+ import importlib.util as _util
909
+
910
+ spec = _util.find_spec(pkg)
911
+ if spec is None:
912
+ raise ImportError("bitsandbytes not importable")
913
+ else:
914
+ __import__(pkg)
905
915
  except Exception:
906
916
  missing_packages.append(pkg)
907
917
 
@@ -9,6 +9,7 @@ prefer Compare & Certify via `invarlock certify --baseline ... --subject ...`.
9
9
 
10
10
  import copy
11
11
  import hashlib
12
+ import inspect
12
13
  import json
13
14
  import math
14
15
  import os
@@ -300,6 +301,12 @@ def _hash_sequences(seqs: Sequence[Sequence[int]] | Iterable[Sequence[int]]) ->
300
301
  """Compute a stable digest for a sequence of integer token sequences."""
301
302
  hasher = hashlib.blake2s(digest_size=16)
302
303
  for seq in seqs:
304
+ try:
305
+ seq_len = len(seq)
306
+ except TypeError:
307
+ seq = list(seq)
308
+ seq_len = len(seq)
309
+ hasher.update(seq_len.to_bytes(4, "little", signed=False))
303
310
  arr = array("I", (int(token) & 0xFFFFFFFF for token in seq))
304
311
  hasher.update(arr.tobytes())
305
312
  return hasher.hexdigest()
@@ -818,6 +825,51 @@ def _resolve_provider_and_split(
818
825
  return data_provider, resolved_split, used_fallback_split
819
826
 
820
827
 
828
+ def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
829
+ """Return adapter.load_model kwargs from config (excluding core fields)."""
830
+ try:
831
+ data = cfg.model_dump()
832
+ except Exception:
833
+ data = {}
834
+ model = data.get("model") if isinstance(data, dict) else None
835
+ if not isinstance(model, dict):
836
+ return {}
837
+ return {
838
+ key: value
839
+ for key, value in model.items()
840
+ if key not in {"id", "adapter", "device"} and value is not None
841
+ }
842
+
843
+
844
+ def _load_model_with_cfg(adapter: Any, cfg: InvarLockConfig, device: str) -> Any:
845
+ """Load a model with config-provided kwargs, filtering for strict adapters."""
846
+ try:
847
+ model_id = cfg.model.id
848
+ except Exception:
849
+ try:
850
+ model_id = (cfg.model_dump().get("model") or {}).get("id")
851
+ except Exception:
852
+ model_id = None
853
+ if not isinstance(model_id, str) or not model_id:
854
+ raise ValueError("Missing model.id in config")
855
+
856
+ extra = _extract_model_load_kwargs(cfg)
857
+ try:
858
+ sig = inspect.signature(adapter.load_model)
859
+ accepts_var_kw = any(
860
+ p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()
861
+ )
862
+ if accepts_var_kw:
863
+ return adapter.load_model(model_id, device=device, **extra)
864
+ allowed = {k: v for k, v in extra.items() if k in sig.parameters}
865
+ if allowed:
866
+ return adapter.load_model(model_id, device=device, **allowed)
867
+ except Exception:
868
+ # Fall back to the strictest call shape.
869
+ pass
870
+ return adapter.load_model(model_id, device=device)
871
+
872
+
821
873
  def _run_bare_control(
822
874
  *,
823
875
  adapter: Any,
@@ -899,6 +951,7 @@ def _run_bare_control(
899
951
  "errors": [],
900
952
  "checks": {},
901
953
  "source": f"{profile_normalized or 'ci'}_profile",
954
+ "mode": "bare",
902
955
  }
903
956
 
904
957
  if getattr(bare_report, "status", "").lower() not in {"success", "completed", "ok"}:
@@ -977,7 +1030,7 @@ def _postprocess_and_summarize(
977
1030
  match_fraction: float | None,
978
1031
  overlap_fraction: float | None,
979
1032
  console: Console,
980
- ) -> None:
1033
+ ) -> dict[str, str]:
981
1034
  """Finalize report windows stats and print/save summary artifacts."""
982
1035
  try:
983
1036
  ds = report.setdefault("dataset", {}).setdefault("windows", {})
@@ -1001,6 +1054,7 @@ def _postprocess_and_summarize(
1001
1054
  console.print(f"📄 Report: {saved_files['json']}")
1002
1055
  if run_config.event_path:
1003
1056
  console.print(f"📝 Events: {run_config.event_path}")
1057
+ return saved_files
1004
1058
 
1005
1059
 
1006
1060
  def _compute_provider_digest(report: dict[str, Any]) -> dict[str, str] | None:
@@ -1537,6 +1591,7 @@ def run_command(
1537
1591
  no_cleanup = bool(_coerce_option(no_cleanup, False))
1538
1592
 
1539
1593
  # Use shared CLI coercers from invarlock.cli.utils
1594
+ report_path_out: str | None = None
1540
1595
 
1541
1596
  def _fail_run(message: str) -> None:
1542
1597
  console.print(f"[red]❌ {message}[/red]")
@@ -1673,6 +1728,26 @@ def run_command(
1673
1728
  cfg, device=device, out=out, console=console
1674
1729
  )
1675
1730
 
1731
+ determinism_meta: dict[str, Any] | None = None
1732
+ try:
1733
+ from invarlock.cli.determinism import apply_determinism_preset
1734
+
1735
+ preset = apply_determinism_preset(
1736
+ profile=profile_label,
1737
+ device=resolved_device,
1738
+ seed=int(seed_bundle.get("python") or seed_value),
1739
+ threads=int(os.environ.get("INVARLOCK_OMP_THREADS", 1) or 1),
1740
+ )
1741
+ if isinstance(preset, dict) and preset:
1742
+ determinism_meta = preset
1743
+ preset_seeds = preset.get("seeds")
1744
+ if isinstance(preset_seeds, dict) and preset_seeds:
1745
+ for key in ("python", "numpy", "torch"):
1746
+ if key in preset_seeds:
1747
+ seed_bundle[key] = preset_seeds.get(key)
1748
+ except Exception:
1749
+ determinism_meta = None
1750
+
1676
1751
  # Create run directory with timestamp
1677
1752
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1678
1753
  run_dir = output_dir / timestamp
@@ -2934,7 +3009,23 @@ def run_command(
2934
3009
  )
2935
3010
 
2936
3011
  guard_overhead_payload: dict[str, Any] | None = None
2937
- if measure_guard_overhead:
3012
+ if skip_overhead and profile_normalized in {"ci", "release"}:
3013
+ guard_overhead_payload = {
3014
+ "overhead_threshold": GUARD_OVERHEAD_THRESHOLD,
3015
+ "evaluated": False,
3016
+ "passed": True,
3017
+ "skipped": True,
3018
+ "skip_reason": "INVARLOCK_SKIP_OVERHEAD_CHECK",
3019
+ "mode": "skipped",
3020
+ "source": "env:INVARLOCK_SKIP_OVERHEAD_CHECK",
3021
+ "messages": [
3022
+ "Overhead check skipped via INVARLOCK_SKIP_OVERHEAD_CHECK"
3023
+ ],
3024
+ "warnings": [],
3025
+ "errors": [],
3026
+ "checks": {},
3027
+ }
3028
+ elif measure_guard_overhead:
2938
3029
  guard_overhead_payload = _run_bare_control(
2939
3030
  adapter=adapter,
2940
3031
  edit_op=edit_op,
@@ -3076,6 +3167,8 @@ def run_command(
3076
3167
  meta_payload["invarlock_version"] = invarlock_version
3077
3168
  if env_flags:
3078
3169
  meta_payload["env_flags"] = env_flags
3170
+ if determinism_meta:
3171
+ meta_payload["determinism"] = determinism_meta
3079
3172
  report["meta"].update(meta_payload)
3080
3173
  if pm_acceptance_range:
3081
3174
  report["meta"]["pm_acceptance_range"] = pm_acceptance_range
@@ -3235,87 +3328,90 @@ def run_command(
3235
3328
  report["metrics"].update(metrics_payload)
3236
3329
 
3237
3330
  if guard_overhead_payload is not None:
3238
- # Compute guarded primary-metric snapshot; pass structured reports into validator
3239
- try:
3240
- # Map loss type to ppl family kind
3241
- lk = str(resolved_loss_type or "causal").lower()
3242
- if lk == "mlm":
3243
- pm_kind_for_overhead = "ppl_mlm"
3244
- elif lk in {"seq2seq", "s2s", "t5"}:
3245
- pm_kind_for_overhead = "ppl_seq2seq"
3246
- else:
3247
- pm_kind_for_overhead = "ppl_causal"
3331
+ if bool(guard_overhead_payload.get("skipped", False)):
3332
+ report["guard_overhead"] = guard_overhead_payload
3333
+ else:
3334
+ # Compute guarded primary-metric snapshot; pass structured reports into validator
3335
+ try:
3336
+ # Map loss type to ppl family kind
3337
+ lk = str(resolved_loss_type or "causal").lower()
3338
+ if lk == "mlm":
3339
+ pm_kind_for_overhead = "ppl_mlm"
3340
+ elif lk in {"seq2seq", "s2s", "t5"}:
3341
+ pm_kind_for_overhead = "ppl_seq2seq"
3342
+ else:
3343
+ pm_kind_for_overhead = "ppl_causal"
3248
3344
 
3249
- # Prefer computing from the in-memory core_report windows to avoid ordering issues
3250
- pm_guarded = _extract_pm_snapshot_for_overhead(
3251
- core_report, kind=pm_kind_for_overhead
3252
- )
3253
- if not isinstance(pm_guarded, dict) or not pm_guarded:
3345
+ # Prefer computing from the in-memory core_report windows to avoid ordering issues
3254
3346
  pm_guarded = _extract_pm_snapshot_for_overhead(
3255
- report, kind=pm_kind_for_overhead
3347
+ core_report, kind=pm_kind_for_overhead
3256
3348
  )
3349
+ if not isinstance(pm_guarded, dict) or not pm_guarded:
3350
+ pm_guarded = _extract_pm_snapshot_for_overhead(
3351
+ report, kind=pm_kind_for_overhead
3352
+ )
3257
3353
 
3258
- guard_overhead_payload["guarded_report"] = (
3259
- {"metrics": {"primary_metric": pm_guarded}}
3260
- if isinstance(pm_guarded, dict) and pm_guarded
3261
- else None
3354
+ guard_overhead_payload["guarded_report"] = (
3355
+ {"metrics": {"primary_metric": pm_guarded}}
3356
+ if isinstance(pm_guarded, dict) and pm_guarded
3357
+ else None
3358
+ )
3359
+ except Exception:
3360
+ guard_overhead_payload["guarded_report"] = None
3361
+ bare_struct = guard_overhead_payload.get("bare_report") or {}
3362
+ guarded_struct = guard_overhead_payload.get("guarded_report") or {}
3363
+ # Be robust to mocks or minimal objects returned by validators
3364
+ result = validate_guard_overhead(
3365
+ bare_struct,
3366
+ guarded_struct,
3367
+ overhead_threshold=guard_overhead_payload.get(
3368
+ "overhead_threshold", GUARD_OVERHEAD_THRESHOLD
3369
+ ),
3262
3370
  )
3263
- except Exception:
3264
- guard_overhead_payload["guarded_report"] = None
3265
- bare_struct = guard_overhead_payload.get("bare_report") or {}
3266
- guarded_struct = guard_overhead_payload.get("guarded_report") or {}
3267
- # Be robust to mocks or minimal objects returned by validators
3268
- result = validate_guard_overhead(
3269
- bare_struct,
3270
- guarded_struct,
3271
- overhead_threshold=guard_overhead_payload.get(
3272
- "overhead_threshold", GUARD_OVERHEAD_THRESHOLD
3273
- ),
3274
- )
3275
- try:
3276
- messages = list(getattr(result, "messages", []))
3277
- except Exception: # pragma: no cover - defensive
3278
- messages = []
3279
- try:
3280
- warnings = list(getattr(result, "warnings", []))
3281
- except Exception: # pragma: no cover - defensive
3282
- warnings = []
3283
- try:
3284
- errors = list(getattr(result, "errors", []))
3285
- except Exception: # pragma: no cover - defensive
3286
- errors = []
3287
- try:
3288
- checks = dict(getattr(result, "checks", {}))
3289
- except Exception: # pragma: no cover - defensive
3290
- checks = {}
3291
- metrics_obj = getattr(result, "metrics", {})
3292
- if not isinstance(metrics_obj, dict):
3293
- metrics_obj = {}
3294
- overhead_ratio = metrics_obj.get("overhead_ratio")
3295
- if overhead_ratio is None:
3296
- overhead_ratio = getattr(result, "overhead_ratio", None)
3297
- overhead_percent = metrics_obj.get("overhead_percent")
3298
- if overhead_percent is None:
3299
- overhead_percent = getattr(result, "overhead_percent", None)
3300
- passed_flag = bool(getattr(result, "passed", False))
3301
-
3302
- guard_overhead_payload.update(
3303
- {
3304
- "messages": messages,
3305
- "warnings": warnings,
3306
- "errors": errors,
3307
- "checks": checks,
3308
- "overhead_ratio": overhead_ratio,
3309
- "overhead_percent": overhead_percent,
3310
- "passed": passed_flag,
3311
- "evaluated": True,
3312
- }
3313
- )
3314
- # Normalize for non-finite/degenerate cases
3315
- guard_overhead_payload = _normalize_overhead_result(
3316
- guard_overhead_payload, profile=profile_normalized
3317
- )
3318
- report["guard_overhead"] = guard_overhead_payload
3371
+ try:
3372
+ messages = list(getattr(result, "messages", []))
3373
+ except Exception: # pragma: no cover - defensive
3374
+ messages = []
3375
+ try:
3376
+ warnings = list(getattr(result, "warnings", []))
3377
+ except Exception: # pragma: no cover - defensive
3378
+ warnings = []
3379
+ try:
3380
+ errors = list(getattr(result, "errors", []))
3381
+ except Exception: # pragma: no cover - defensive
3382
+ errors = []
3383
+ try:
3384
+ checks = dict(getattr(result, "checks", {}))
3385
+ except Exception: # pragma: no cover - defensive
3386
+ checks = {}
3387
+ metrics_obj = getattr(result, "metrics", {})
3388
+ if not isinstance(metrics_obj, dict):
3389
+ metrics_obj = {}
3390
+ overhead_ratio = metrics_obj.get("overhead_ratio")
3391
+ if overhead_ratio is None:
3392
+ overhead_ratio = getattr(result, "overhead_ratio", None)
3393
+ overhead_percent = metrics_obj.get("overhead_percent")
3394
+ if overhead_percent is None:
3395
+ overhead_percent = getattr(result, "overhead_percent", None)
3396
+ passed_flag = bool(getattr(result, "passed", False))
3397
+
3398
+ guard_overhead_payload.update(
3399
+ {
3400
+ "messages": messages,
3401
+ "warnings": warnings,
3402
+ "errors": errors,
3403
+ "checks": checks,
3404
+ "overhead_ratio": overhead_ratio,
3405
+ "overhead_percent": overhead_percent,
3406
+ "passed": passed_flag,
3407
+ "evaluated": True,
3408
+ }
3409
+ )
3410
+ # Normalize for non-finite/degenerate cases
3411
+ guard_overhead_payload = _normalize_overhead_result(
3412
+ guard_overhead_payload, profile=profile_normalized
3413
+ )
3414
+ report["guard_overhead"] = guard_overhead_payload
3319
3415
 
3320
3416
  had_baseline = bool(baseline and Path(baseline).exists())
3321
3417
  if (
@@ -3860,7 +3956,7 @@ def run_command(
3860
3956
  except Exception:
3861
3957
  pass
3862
3958
 
3863
- _postprocess_and_summarize(
3959
+ saved_files = _postprocess_and_summarize(
3864
3960
  report=report,
3865
3961
  run_dir=run_dir,
3866
3962
  run_config=run_config,
@@ -3870,6 +3966,11 @@ def run_command(
3870
3966
  overlap_fraction=overlap_fraction,
3871
3967
  console=console,
3872
3968
  )
3969
+ try:
3970
+ if isinstance(saved_files, dict) and saved_files.get("json"):
3971
+ report_path_out = str(saved_files["json"])
3972
+ except Exception:
3973
+ pass
3873
3974
 
3874
3975
  # Metrics display
3875
3976
  pm_obj = None
@@ -4060,6 +4161,7 @@ def run_command(
4060
4161
  pass
4061
4162
 
4062
4163
  # Normal path falls through; cleanup handled below in finally
4164
+ return report_path_out
4063
4165
 
4064
4166
  except FileNotFoundError as e:
4065
4167
  console.print(f"[red]❌ Configuration file not found: {e}[/red]")
@@ -35,6 +35,22 @@ from .run import _enforce_provider_parity, _resolve_exit_code
35
35
  console = Console()
36
36
 
37
37
 
38
+ def _coerce_float(value: Any) -> float | None:
39
+ try:
40
+ out = float(value)
41
+ except (TypeError, ValueError):
42
+ return None
43
+ return out if math.isfinite(out) else None
44
+
45
+
46
+ def _coerce_int(value: Any) -> int | None:
47
+ try:
48
+ out = int(value)
49
+ except (TypeError, ValueError):
50
+ return None
51
+ return out if out >= 0 else None
52
+
53
+
38
54
  def _load_certificate(path: Path) -> dict[str, Any]:
39
55
  """Load certificate JSON from disk."""
40
56
  with path.open("r", encoding="utf-8") as handle:
@@ -315,6 +331,30 @@ def _validate_certificate_payload(
315
331
  errors.extend(_validate_drift_band(certificate))
316
332
  errors.extend(_apply_profile_lints(certificate))
317
333
  errors.extend(_validate_tokenizer_hash(certificate))
334
+ # Release-only enforcement: guard overhead must be measured or explicitly skipped.
335
+ if prof == "release":
336
+ go = certificate.get("guard_overhead")
337
+ if not isinstance(go, dict) or not go:
338
+ errors.append(
339
+ "Release verification requires guard_overhead (missing). "
340
+ "Set INVARLOCK_SKIP_OVERHEAD_CHECK=1 to explicitly skip during certification."
341
+ )
342
+ else:
343
+ skipped = bool(go.get("skipped", False)) or (
344
+ str(go.get("mode", "")).strip().lower() == "skipped"
345
+ )
346
+ if not skipped:
347
+ evaluated = go.get("evaluated")
348
+ if evaluated is not True:
349
+ errors.append(
350
+ "Release verification requires evaluated guard_overhead (not evaluated). "
351
+ "Set INVARLOCK_SKIP_OVERHEAD_CHECK=1 to explicitly skip during certification."
352
+ )
353
+ ratio = go.get("overhead_ratio")
354
+ if ratio is None:
355
+ errors.append(
356
+ "Release verification requires guard_overhead.overhead_ratio (missing)."
357
+ )
318
358
  # Legacy cross-checks removed; primary_metric is canonical
319
359
 
320
360
  return errors
invarlock/cli/config.py CHANGED
@@ -207,11 +207,21 @@ def _create_loader(base_dir: Path):
207
207
  class Loader(yaml.SafeLoader):
208
208
  pass
209
209
 
210
- Loader._base_dir = Path(base_dir)
210
+ Loader._base_dir = Path(base_dir).resolve()
211
211
 
212
212
  def _construct_include(loader: yaml.SafeLoader, node: yaml.Node):
213
213
  rel = loader.construct_scalar(node)
214
214
  path = (loader._base_dir / rel).resolve()
215
+ allow_outside = os.environ.get("INVARLOCK_ALLOW_CONFIG_INCLUDE_OUTSIDE", "")
216
+ allow_outside = allow_outside.strip().lower() in {"1", "true", "yes", "on"}
217
+ if not allow_outside:
218
+ try:
219
+ path.relative_to(loader._base_dir)
220
+ except ValueError as exc:
221
+ raise ValueError(
222
+ "Config !include must stay within the config directory. "
223
+ "Set INVARLOCK_ALLOW_CONFIG_INCLUDE_OUTSIDE=1 to override."
224
+ ) from exc
215
225
  with path.open(encoding="utf-8") as fh:
216
226
  inc_loader = _create_loader(path.parent)
217
227
  return yaml.load(fh, Loader=inc_loader)