invarlock 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. invarlock/__init__.py +3 -3
  2. invarlock/adapters/auto.py +2 -10
  3. invarlock/adapters/hf_loading.py +7 -7
  4. invarlock/adapters/hf_mixin.py +28 -5
  5. invarlock/assurance/__init__.py +15 -23
  6. invarlock/calibration/spectral_null.py +1 -1
  7. invarlock/cli/adapter_auto.py +1 -5
  8. invarlock/cli/app.py +57 -27
  9. invarlock/cli/commands/__init__.py +2 -2
  10. invarlock/cli/commands/calibrate.py +48 -4
  11. invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
  12. invarlock/cli/commands/explain_gates.py +94 -51
  13. invarlock/cli/commands/export_html.py +11 -9
  14. invarlock/cli/commands/report.py +121 -47
  15. invarlock/cli/commands/run.py +274 -66
  16. invarlock/cli/commands/verify.py +84 -89
  17. invarlock/cli/determinism.py +1 -1
  18. invarlock/cli/provenance.py +3 -3
  19. invarlock/core/bootstrap.py +1 -1
  20. invarlock/core/retry.py +14 -14
  21. invarlock/core/runner.py +1 -1
  22. invarlock/edits/noop.py +2 -2
  23. invarlock/edits/quant_rtn.py +2 -2
  24. invarlock/eval/__init__.py +1 -1
  25. invarlock/eval/bench.py +11 -7
  26. invarlock/eval/primary_metric.py +1 -1
  27. invarlock/guards/spectral.py +2 -2
  28. invarlock/guards_ref/spectral_ref.py +1 -1
  29. invarlock/model_profile.py +16 -35
  30. invarlock/observability/health.py +38 -20
  31. invarlock/plugins/hf_bnb_adapter.py +32 -21
  32. invarlock/reporting/__init__.py +18 -4
  33. invarlock/reporting/html.py +7 -7
  34. invarlock/reporting/normalizer.py +2 -2
  35. invarlock/reporting/policy_utils.py +1 -1
  36. invarlock/reporting/primary_metric_utils.py +11 -11
  37. invarlock/reporting/render.py +126 -120
  38. invarlock/reporting/report.py +43 -37
  39. invarlock/reporting/{certificate.py → report_builder.py} +103 -99
  40. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  41. invarlock-0.3.9.dist-info/METADATA +303 -0
  42. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/RECORD +46 -46
  43. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/WHEEL +1 -1
  44. invarlock-0.3.7.dist-info/METADATA +0 -602
  45. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/entry_points.txt +0 -0
  46. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/licenses/LICENSE +0 -0
  47. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
  invarlock verify command
3
3
  ====================
4
4
 
5
- Validates generated evaluation certificates for internal consistency. The command
5
+ Validates generated evaluation reports for internal consistency. The command
6
6
  ensures schema compliance, checks that the primary metric ratio agrees with the
7
7
  baseline reference, and enforces paired-window guarantees (match=1.0,
8
8
  overlap=0.0).
@@ -26,7 +26,7 @@ from invarlock.core.exceptions import (
26
26
  from invarlock.core.exceptions import (
27
27
  ValidationError as _ValidationError,
28
28
  )
29
- from invarlock.reporting.certificate import validate_certificate
29
+ from invarlock.reporting.report_builder import validate_report
30
30
 
31
31
  from .._json import emit as _emit_json
32
32
  from .._json import encode_error as _encode_error
@@ -52,18 +52,18 @@ def _coerce_int(value: Any) -> int | None:
52
52
  return out if out >= 0 else None
53
53
 
54
54
 
55
- def _load_certificate(path: Path) -> dict[str, Any]:
56
- """Load certificate JSON from disk."""
55
+ def _load_evaluation_report(path: Path) -> dict[str, Any]:
56
+ """Load an evaluation report JSON from disk."""
57
57
  with path.open("r", encoding="utf-8") as handle:
58
58
  return json.load(handle)
59
59
 
60
60
 
61
- def _validate_primary_metric(certificate: dict[str, Any]) -> list[str]:
61
+ def _validate_primary_metric(report: dict[str, Any]) -> list[str]:
62
62
  """Validate primary metric ratio consistency with baseline reference."""
63
63
  errors: list[str] = []
64
- pm = certificate.get("primary_metric", {}) or {}
64
+ pm = report.get("primary_metric", {}) or {}
65
65
  if not isinstance(pm, dict) or not pm:
66
- errors.append("Certificate missing primary_metric block.")
66
+ errors.append("report missing primary_metric block.")
67
67
  return errors
68
68
 
69
69
  def _is_finite_number(value: Any) -> bool:
@@ -87,7 +87,7 @@ def _validate_primary_metric(certificate: dict[str, Any]) -> list[str]:
87
87
  pm_invalid = _declares_invalid_primary_metric(pm)
88
88
 
89
89
  if kind.startswith("ppl"):
90
- baseline_ref = certificate.get("baseline_ref", {}) or {}
90
+ baseline_ref = report.get("baseline_ref", {}) or {}
91
91
  baseline_pm = (
92
92
  baseline_ref.get("primary_metric")
93
93
  if isinstance(baseline_ref, dict)
@@ -107,7 +107,7 @@ def _validate_primary_metric(certificate: dict[str, Any]) -> list[str]:
107
107
  expected_ratio = float(final) / float(baseline_final)
108
108
  if not _is_finite_number(ratio_vs_baseline):
109
109
  errors.append(
110
- "Certificate is missing a finite primary_metric.ratio_vs_baseline value."
110
+ "report is missing a finite primary_metric.ratio_vs_baseline value."
111
111
  )
112
112
  elif not math.isclose(
113
113
  float(ratio_vs_baseline), expected_ratio, rel_tol=1e-6, abs_tol=1e-6
@@ -130,16 +130,16 @@ def _validate_primary_metric(certificate: dict[str, Any]) -> list[str]:
130
130
  return errors
131
131
  if ratio_vs_baseline is None or not isinstance(ratio_vs_baseline, int | float):
132
132
  errors.append(
133
- "Certificate missing primary_metric.ratio_vs_baseline for non-ppl metric."
133
+ "report missing primary_metric.ratio_vs_baseline for non-ppl metric."
134
134
  )
135
135
 
136
136
  return errors
137
137
 
138
138
 
139
- def _validate_pairing(certificate: dict[str, Any]) -> list[str]:
139
+ def _validate_pairing(report: dict[str, Any]) -> list[str]:
140
140
  """Validate window pairing metrics (PM-only location)."""
141
141
  errors: list[str] = []
142
- stats = certificate.get("dataset", {}).get("windows", {}).get("stats", {})
142
+ stats = report.get("dataset", {}).get("windows", {}).get("stats", {})
143
143
 
144
144
  match_fraction = stats.get("window_match_fraction")
145
145
  overlap_fraction = stats.get("window_overlap_fraction")
@@ -148,23 +148,23 @@ def _validate_pairing(certificate: dict[str, Any]) -> list[str]:
148
148
 
149
149
  if pairing_reason is not None:
150
150
  errors.append(
151
- "window_pairing_reason must be null/None for paired certificates "
151
+ "window_pairing_reason must be null/None for paired reports "
152
152
  f"(found {pairing_reason!r})."
153
153
  )
154
154
  if paired_windows is None:
155
- errors.append("Certificate missing paired_windows metric.")
155
+ errors.append("report missing paired_windows metric.")
156
156
  elif paired_windows == 0:
157
- errors.append("paired_windows must be > 0 for paired certificates (found 0).")
157
+ errors.append("paired_windows must be > 0 for paired reports (found 0).")
158
158
 
159
159
  if match_fraction is None:
160
- errors.append("Certificate missing window_match_fraction metric.")
160
+ errors.append("report missing window_match_fraction metric.")
161
161
  elif match_fraction < 0.999999:
162
162
  errors.append(
163
163
  f"window_match_fraction must be 1.0 for paired runs (found {match_fraction:.6f})."
164
164
  )
165
165
 
166
166
  if overlap_fraction is None:
167
- errors.append("Certificate missing window_overlap_fraction metric.")
167
+ errors.append("report missing window_overlap_fraction metric.")
168
168
  elif overlap_fraction > 1e-9:
169
169
  errors.append(
170
170
  f"window_overlap_fraction must be 0.0 (found {overlap_fraction:.6f})."
@@ -173,10 +173,10 @@ def _validate_pairing(certificate: dict[str, Any]) -> list[str]:
173
173
  return errors
174
174
 
175
175
 
176
- def _validate_counts(certificate: dict[str, Any]) -> list[str]:
176
+ def _validate_counts(report: dict[str, Any]) -> list[str]:
177
177
  """Validate preview/final window counts align with dataset configuration."""
178
178
  errors: list[str] = []
179
- dataset = certificate.get("dataset", {})
179
+ dataset = report.get("dataset", {})
180
180
  dataset_windows = dataset.get("windows", {})
181
181
  expected_preview = dataset_windows.get("preview")
182
182
  expected_final = dataset_windows.get("final")
@@ -190,9 +190,7 @@ def _validate_counts(certificate: dict[str, Any]) -> list[str]:
190
190
 
191
191
  if expected_preview is not None:
192
192
  if preview_used is None:
193
- errors.append(
194
- "Certificate missing coverage.preview.used for preview windows."
195
- )
193
+ errors.append("report missing coverage.preview.used for preview windows.")
196
194
  elif int(preview_used) != int(expected_preview):
197
195
  errors.append(
198
196
  f"Preview window count mismatch: expected {expected_preview}, observed {preview_used}."
@@ -200,7 +198,7 @@ def _validate_counts(certificate: dict[str, Any]) -> list[str]:
200
198
 
201
199
  if expected_final is not None:
202
200
  if final_used is None:
203
- errors.append("Certificate missing coverage.final.used for final windows.")
201
+ errors.append("report missing coverage.final.used for final windows.")
204
202
  elif int(final_used) != int(expected_final):
205
203
  errors.append(
206
204
  f"Final window count mismatch: expected {expected_final}, observed {final_used}."
@@ -218,15 +216,15 @@ def _validate_counts(certificate: dict[str, Any]) -> list[str]:
218
216
  return errors
219
217
 
220
218
 
221
- def _validate_drift_band(certificate: dict[str, Any]) -> list[str]:
219
+ def _validate_drift_band(report: dict[str, Any]) -> list[str]:
222
220
  """Validate preview→final drift stays within the configured band.
223
221
 
224
- Defaults to 0.95–1.05 unless the certificate provides `primary_metric.drift_band`.
222
+ Defaults to 0.95–1.05 unless the report provides `primary_metric.drift_band`.
225
223
  """
226
224
  errors: list[str] = []
227
- pm = certificate.get("primary_metric", {}) or {}
225
+ pm = report.get("primary_metric", {}) or {}
228
226
  if not isinstance(pm, dict) or not pm:
229
- errors.append("Certificate missing primary_metric block.")
227
+ errors.append("report missing primary_metric block.")
230
228
  return errors
231
229
  if bool(pm.get("invalid")):
232
230
  # Drift is undefined when the primary metric is invalid (e.g., NaN/Inf weights).
@@ -247,7 +245,7 @@ def _validate_drift_band(certificate: dict[str, Any]) -> list[str]:
247
245
  drift_ratio = None
248
246
 
249
247
  if not isinstance(drift_ratio, int | float):
250
- errors.append("Certificate missing preview/final to compute drift ratio.")
248
+ errors.append("report missing preview/final to compute drift ratio.")
251
249
  return errors
252
250
 
253
251
  drift_min = 0.95
@@ -282,15 +280,15 @@ def _validate_drift_band(certificate: dict[str, Any]) -> list[str]:
282
280
  return errors
283
281
 
284
282
 
285
- def _validate_tokenizer_hash(certificate: dict[str, Any]) -> list[str]:
283
+ def _validate_tokenizer_hash(report: dict[str, Any]) -> list[str]:
286
284
  """Validate tokenizer hash consistency between baseline and edited runs.
287
285
 
288
286
  The check is enforced only when both hashes are present. When present and
289
287
  different, the verification fails.
290
288
  """
291
289
  errors: list[str] = []
292
- meta = certificate.get("meta", {}) or {}
293
- dataset = certificate.get("dataset", {}) or {}
290
+ meta = report.get("meta", {}) or {}
291
+ dataset = report.get("dataset", {}) or {}
294
292
  edited_hash = None
295
293
  try:
296
294
  # Prefer meta.tokenizer_hash; fall back to dataset.tokenizer.hash
@@ -302,7 +300,7 @@ def _validate_tokenizer_hash(certificate: dict[str, Any]) -> list[str]:
302
300
  except Exception:
303
301
  edited_hash = None
304
302
 
305
- baseline_ref = certificate.get("baseline_ref", {}) or {}
303
+ baseline_ref = report.get("baseline_ref", {}) or {}
306
304
  baseline_hash = baseline_ref.get("tokenizer_hash")
307
305
 
308
306
  if isinstance(edited_hash, str) and isinstance(baseline_hash, str):
@@ -334,15 +332,15 @@ def _measurement_contract_digest(contract: Any) -> str | None:
334
332
 
335
333
 
336
334
  def _validate_measurement_contracts(
337
- certificate: dict[str, Any], *, profile: str
335
+ report: dict[str, Any], *, profile: str
338
336
  ) -> list[str]:
339
337
  """Enforce measurement-contract presence and baseline pairing for guards."""
340
338
  errors: list[str] = []
341
339
  prof = (profile or "").strip().lower()
342
- resolved_policy = certificate.get("resolved_policy") or {}
340
+ resolved_policy = report.get("resolved_policy") or {}
343
341
 
344
342
  for guard_key in ("spectral", "rmt"):
345
- block = certificate.get(guard_key) or {}
343
+ block = report.get(guard_key) or {}
346
344
  if not isinstance(block, dict):
347
345
  continue
348
346
  evaluated = bool(block.get("evaluated", True))
@@ -353,14 +351,14 @@ def _validate_measurement_contracts(
353
351
  mc_hash = _measurement_contract_digest(mc)
354
352
  expected_hash = block.get("measurement_contract_hash")
355
353
  if not isinstance(mc, dict) or not mc:
356
- errors.append(f"Certificate missing {guard_key}.measurement_contract.")
354
+ errors.append(f"report missing {guard_key}.measurement_contract.")
357
355
  elif isinstance(expected_hash, str) and expected_hash:
358
356
  if mc_hash and mc_hash != expected_hash:
359
357
  errors.append(
360
358
  f"{guard_key}.measurement_contract_hash mismatch: expected={expected_hash}, computed={mc_hash}."
361
359
  )
362
360
  else:
363
- errors.append(f"Certificate missing {guard_key}.measurement_contract_hash.")
361
+ errors.append(f"report missing {guard_key}.measurement_contract_hash.")
364
362
 
365
363
  rp_guard = (
366
364
  resolved_policy.get(guard_key)
@@ -373,7 +371,7 @@ def _validate_measurement_contracts(
373
371
  rp_hash = _measurement_contract_digest(rp_mc)
374
372
  if not isinstance(rp_mc, dict) or not rp_mc:
375
373
  errors.append(
376
- f"Certificate missing resolved_policy.{guard_key}.measurement_contract."
374
+ f"report missing resolved_policy.{guard_key}.measurement_contract."
377
375
  )
378
376
  elif mc_hash and rp_hash and mc_hash != rp_hash:
379
377
  errors.append(
@@ -391,10 +389,10 @@ def _validate_measurement_contracts(
391
389
  return errors
392
390
 
393
391
 
394
- def _apply_profile_lints(certificate: dict[str, Any]) -> list[str]:
395
- """Apply model-profile specific lint rules embedded in the certificate."""
392
+ def _apply_profile_lints(report: dict[str, Any]) -> list[str]:
393
+ """Apply model-profile specific lint rules embedded in the report."""
396
394
  errors: list[str] = []
397
- meta = certificate.get("meta", {})
395
+ meta = report.get("meta", {})
398
396
  profile = meta.get("model_profile") if isinstance(meta, dict) else None
399
397
  if not isinstance(profile, dict):
400
398
  return errors
@@ -410,7 +408,7 @@ def _apply_profile_lints(certificate: dict[str, Any]) -> list[str]:
410
408
  path = lint.get("path")
411
409
  expected = lint.get("value")
412
410
  message = lint.get("message") or "Model profile lint failed."
413
- actual = _resolve_path(certificate, path) if isinstance(path, str) else None
411
+ actual = _resolve_path(report, path) if isinstance(path, str) else None
414
412
 
415
413
  if lint_type == "equals":
416
414
  if actual != expected:
@@ -447,21 +445,21 @@ def _apply_profile_lints(certificate: dict[str, Any]) -> list[str]:
447
445
  return errors
448
446
 
449
447
 
450
- def _validate_certificate_payload(
448
+ def _validate_evaluation_report_payload(
451
449
  path: Path, *, profile: str | None = None
452
450
  ) -> list[str]:
453
- """Run all verification checks for a single certificate."""
451
+ """Run all verification checks for a single evaluation report."""
454
452
  errors: list[str] = []
455
- certificate = _load_certificate(path)
453
+ report = _load_evaluation_report(path)
456
454
 
457
455
  # Always surface schema validation failures for this payload
458
- if not validate_certificate(certificate):
459
- errors.append("Certificate schema validation failed.")
456
+ if not validate_report(report):
457
+ errors.append("report schema validation failed.")
460
458
  return errors
461
459
 
462
- errors.extend(_validate_primary_metric(certificate))
463
- errors.extend(_validate_pairing(certificate))
464
- errors.extend(_validate_counts(certificate))
460
+ errors.extend(_validate_primary_metric(report))
461
+ errors.extend(_validate_pairing(report))
462
+ errors.extend(_validate_counts(report))
465
463
  try:
466
464
  prof = (
467
465
  (profile or "").strip().lower()
@@ -473,22 +471,22 @@ def _validate_certificate_payload(
473
471
  # Drift band is a CI/Release enforcement check; dev profile should not
474
472
  # fail verification due to preview→final drift.
475
473
  if prof in {"ci", "release"}:
476
- errors.extend(_validate_drift_band(certificate))
477
- errors.extend(_apply_profile_lints(certificate))
478
- errors.extend(_validate_tokenizer_hash(certificate))
474
+ errors.extend(_validate_drift_band(report))
475
+ errors.extend(_apply_profile_lints(report))
476
+ errors.extend(_validate_tokenizer_hash(report))
479
477
  if prof in {"ci", "release"}:
480
- errors.extend(_validate_measurement_contracts(certificate, profile=prof))
478
+ errors.extend(_validate_measurement_contracts(report, profile=prof))
481
479
 
482
480
  # strict/fast assurance mode checks were removed; verification gates rely on
483
481
  # structural schema + guard metric contracts instead.
484
482
 
485
483
  # Release-only enforcement: guard overhead must be measured or explicitly skipped.
486
484
  if prof == "release":
487
- go = certificate.get("guard_overhead")
485
+ go = report.get("guard_overhead")
488
486
  if not isinstance(go, dict) or not go:
489
487
  errors.append(
490
488
  "Release verification requires guard_overhead (missing). "
491
- "Set INVARLOCK_SKIP_OVERHEAD_CHECK=1 to explicitly skip during certification."
489
+ "Set INVARLOCK_SKIP_OVERHEAD_CHECK=1 to explicitly skip during evaluation."
492
490
  )
493
491
  else:
494
492
  skipped = bool(go.get("skipped", False)) or (
@@ -499,7 +497,7 @@ def _validate_certificate_payload(
499
497
  if evaluated is not True:
500
498
  errors.append(
501
499
  "Release verification requires evaluated guard_overhead (not evaluated). "
502
- "Set INVARLOCK_SKIP_OVERHEAD_CHECK=1 to explicitly skip during certification."
500
+ "Set INVARLOCK_SKIP_OVERHEAD_CHECK=1 to explicitly skip during evaluation."
503
501
  )
504
502
  ratio = go.get("overhead_ratio")
505
503
  if ratio is None:
@@ -511,14 +509,14 @@ def _validate_certificate_payload(
511
509
  return errors
512
510
 
513
511
 
514
- def _warn_adapter_family_mismatch(cert_path: Path, certificate: dict[str, Any]) -> None:
512
+ def _warn_adapter_family_mismatch(cert_path: Path, report: dict[str, Any]) -> None:
515
513
  """Emit a soft warning if adapter families differ between baseline and edited.
516
514
 
517
515
  This is a non-fatal hint to catch inadvertent cross-family comparisons.
518
- Tries to load the baseline report referenced in the certificate provenance.
516
+ Tries to load the baseline report referenced in the report provenance.
519
517
  """
520
518
  try:
521
- plugins = certificate.get("plugins") or {}
519
+ plugins = report.get("plugins") or {}
522
520
  adapter_meta = plugins.get("adapter") if isinstance(plugins, dict) else None
523
521
  edited_family = None
524
522
  edited_lib = None
@@ -531,8 +529,8 @@ def _warn_adapter_family_mismatch(cert_path: Path, certificate: dict[str, Any])
531
529
  edited_ver = prov.get("version") or None
532
530
 
533
531
  baseline_prov = (
534
- certificate.get("provenance")
535
- if isinstance(certificate.get("provenance"), dict)
532
+ report.get("provenance")
533
+ if isinstance(report.get("provenance"), dict)
536
534
  else {}
537
535
  )
538
536
  baseline_report_path = None
@@ -582,7 +580,7 @@ def _warn_adapter_family_mismatch(cert_path: Path, certificate: dict[str, Any])
582
580
  f"[yellow] • edited : family={edited_family}, backend={edited_backend} {edited_version}[/yellow]"
583
581
  )
584
582
  console.print(
585
- "[yellow] Ensure this cross-family comparison is intentional (Compare & Certify flows should normally match families).[/yellow]"
583
+ "[yellow] Ensure this cross-family comparison is intentional (Compare & Evaluate flows should normally match families).[/yellow]"
586
584
  )
587
585
  except Exception:
588
586
  # Non-fatal and best-effort; suppress errors
@@ -590,18 +588,18 @@ def _warn_adapter_family_mismatch(cert_path: Path, certificate: dict[str, Any])
590
588
 
591
589
 
592
590
  def verify_command(
593
- certificates: list[Path] = typer.Argument(
591
+ reports: list[Path] = typer.Argument(
594
592
  ...,
595
593
  exists=True,
596
594
  dir_okay=False,
597
595
  readable=True,
598
596
  resolve_path=True,
599
- help="One or more certificate JSON files to verify.",
597
+ help="One or more evaluation report JSON files to verify.",
600
598
  ),
601
599
  baseline: Path | None = typer.Option(
602
600
  None,
603
601
  "--baseline",
604
- help="Optional baseline certificate/report JSON to enforce provider parity.",
602
+ help="Optional baseline evaluation report (or run report) JSON to enforce provider parity.",
605
603
  ),
606
604
  tolerance: float = typer.Option(
607
605
  1e-9,
@@ -620,9 +618,9 @@ def verify_command(
620
618
  ),
621
619
  ) -> None:
622
620
  """
623
- Verify certificate integrity.
621
+ Verify evaluation report integrity.
624
622
 
625
- Ensures each certificate passes schema validation, ratio consistency checks,
623
+ Ensures each evaluation report passes schema validation, ratio consistency checks,
626
624
  and strict pairing requirements (match=1.0, overlap=0.0).
627
625
  """
628
626
 
@@ -638,7 +636,7 @@ def verify_command(
638
636
  try:
639
637
  if baseline is not None:
640
638
  bdata = json.loads(baseline.read_text(encoding="utf-8"))
641
- # Accept either a certificate or a raw report; look under provenance when present
639
+ # Accept either an evaluation report or a run report (report.json); look under provenance when present.
642
640
  prov = bdata.get("provenance") if isinstance(bdata, dict) else None
643
641
  if isinstance(prov, dict):
644
642
  pd = prov.get("provider_digest")
@@ -650,8 +648,8 @@ def verify_command(
650
648
 
651
649
  malformed_any = False
652
650
  try:
653
- for cert_path in certificates:
654
- cert_obj = _load_certificate(cert_path)
651
+ for cert_path in reports:
652
+ cert_obj = _load_evaluation_report(cert_path)
655
653
 
656
654
  # Enforce provider digest presence in CI/Release profiles
657
655
  try:
@@ -679,24 +677,21 @@ def verify_command(
679
677
  )
680
678
 
681
679
  # Structural checks
682
- errors = _validate_certificate_payload(cert_path, profile=profile)
680
+ errors = _validate_evaluation_report_payload(cert_path, profile=profile)
683
681
  # JSON path: emit a typed ValidationError for schema failures to include error.code
684
682
  if json_out and any(
685
683
  "schema validation failed" in str(e).lower() for e in errors
686
684
  ):
687
685
  raise _ValidationError(
688
686
  code="E601",
689
- message="CERTIFICATE-SCHEMA-INVALID: schema validation failed",
687
+ message="REPORT-SCHEMA-INVALID: schema validation failed",
690
688
  details={"path": str(cert_path)},
691
689
  )
692
690
  # Determine malformed vs policy-fail for this cert
693
691
  is_malformed = any(
694
692
  ("schema validation failed" in e.lower())
695
693
  or ("missing primary_metric.ratio_vs_baseline" in e)
696
- or (
697
- "Certificate is missing a finite primary_metric.ratio_vs_baseline"
698
- in e
699
- )
694
+ or ("report is missing a finite primary_metric.ratio_vs_baseline" in e)
700
695
  for e in errors
701
696
  )
702
697
  malformed_any = malformed_any or is_malformed
@@ -813,7 +808,7 @@ def verify_command(
813
808
  )
814
809
  raise _MetricsError(
815
810
  code="E602",
816
- message="RECOMPUTE-MISMATCH: certificate values disagree with recomputation",
811
+ message="RECOMPUTE-MISMATCH: report values disagree with recomputation",
817
812
  details={"example": str(first)},
818
813
  )
819
814
 
@@ -835,11 +830,11 @@ def verify_command(
835
830
  if not overall_ok:
836
831
  code = 2 if malformed_any else 1
837
832
  if json_out:
838
- # Build per-certificate results payload
833
+ # Build per-report results payload
839
834
  results: list[dict[str, Any]] = []
840
- for cert_path in certificates:
835
+ for cert_path in reports:
841
836
  try:
842
- cert_obj = _load_certificate(cert_path)
837
+ cert_obj = _load_evaluation_report(cert_path)
843
838
  except Exception:
844
839
  cert_obj = {}
845
840
  pm = (
@@ -980,7 +975,7 @@ def verify_command(
980
975
  "ok": False,
981
976
  "reason": "malformed" if malformed_any else "policy_fail",
982
977
  },
983
- "certificate": {"count": len(certificates)},
978
+ "evaluation_report": {"count": len(reports)},
984
979
  "results": results,
985
980
  "resolution": {"exit_code": code},
986
981
  }
@@ -989,11 +984,11 @@ def verify_command(
989
984
 
990
985
  # Success emission
991
986
  if json_out:
992
- # Build per-certificate success results payload
987
+ # Build per-report success results payload
993
988
  results: list[dict[str, Any]] = []
994
- for cert_path in certificates:
989
+ for cert_path in reports:
995
990
  try:
996
- cert_obj = _load_certificate(cert_path)
991
+ cert_obj = _load_evaluation_report(cert_path)
997
992
  except Exception:
998
993
  cert_obj = {}
999
994
  pm = (
@@ -1122,7 +1117,7 @@ def verify_command(
1122
1117
  payload = {
1123
1118
  "format_version": FORMAT_VERIFY,
1124
1119
  "summary": {"ok": True, "reason": "ok"},
1125
- "certificate": {"count": len(certificates)},
1120
+ "evaluation_report": {"count": len(reports)},
1126
1121
  "results": results,
1127
1122
  "resolution": {"exit_code": 0},
1128
1123
  }
@@ -1130,7 +1125,7 @@ def verify_command(
1130
1125
  else:
1131
1126
  # Human-friendly success line
1132
1127
  try:
1133
- last = _load_certificate(certificates[-1]) if certificates else {}
1128
+ last = _load_evaluation_report(reports[-1]) if reports else {}
1134
1129
  pm = last.get("primary_metric", {}) if isinstance(last, dict) else {}
1135
1130
  kind = str(pm.get("kind") or "").strip()
1136
1131
  ppl = last.get("ppl", {}) if isinstance(last, dict) else {}
@@ -1181,7 +1176,7 @@ def verify_command(
1181
1176
  "summary": {"ok": False, "reason": reason},
1182
1177
  "results": [
1183
1178
  {
1184
- "id": str(certificates[0]) if certificates else "",
1179
+ "id": str(reports[0]) if reports else "",
1185
1180
  "schema_version": "v1",
1186
1181
  "kind": "",
1187
1182
  "ok": False,
@@ -1213,7 +1208,7 @@ def verify_command(
1213
1208
  "summary": {"ok": False, "reason": reason},
1214
1209
  "results": [
1215
1210
  {
1216
- "id": str(certificates[0]) if certificates else "",
1211
+ "id": str(reports[0]) if reports else "",
1217
1212
  "schema_version": "v1",
1218
1213
  "kind": "",
1219
1214
  "ok": False,
@@ -5,7 +5,7 @@ Centralizes:
5
5
  - Thread caps (OMP/MKL/etc + torch threads)
6
6
  - TF32 policy
7
7
  - torch deterministic algorithms
8
- - A structured "determinism level" for certificate provenance
8
+ - A structured "determinism level" for evaluation report provenance
9
9
  """
10
10
 
11
11
  from __future__ import annotations
@@ -2,7 +2,7 @@
2
2
 
3
3
  Provides a tiny, versioned schema describing the adapter family and the
4
4
  underlying library versions. This does not perform any edits; it only reads
5
- environment and import metadata to annotate reports/certificates.
5
+ environment and import metadata to annotate evaluation artifacts.
6
6
  """
7
7
 
8
8
  from __future__ import annotations
@@ -46,12 +46,12 @@ def extract_adapter_provenance(adapter_name: str) -> AdapterProvenance:
46
46
  msg = (
47
47
  None
48
48
  if supported
49
- else f"Use Compare & Certify (BYOE); {library} version unsupported (tested: {tested})"
49
+ else f"Use Compare & Evaluate (BYOE); {library} version unsupported (tested: {tested})"
50
50
  )
51
51
  except Exception: # Package not installed or version unknown
52
52
  ver = None
53
53
  supported = False
54
- msg = f"{library} not available; prefer Compare & Certify (BYOE) or install extras."
54
+ msg = f"{library} not available; prefer Compare & Evaluate (BYOE) or install extras."
55
55
 
56
56
  return AdapterProvenance(
57
57
  family=family,
@@ -6,7 +6,7 @@ Numerically stable bootstrap helpers for evaluation metrics.
6
6
 
7
7
  This module provides bias-corrected and accelerated (BCa) confidence
8
8
  intervals tailored for paired log-loss statistics used by the runner
9
- and evaluation certificate reports.
9
+ and evaluation reports.
10
10
  """
11
11
 
12
12
  from __future__ import annotations
invarlock/core/retry.py CHANGED
@@ -2,11 +2,11 @@
2
2
  InvarLock Retry Controller
3
3
  =====================
4
4
 
5
- Manages retry logic for automated certification workflows with:
5
+ Manages retry logic for automated evaluation workflows with:
6
6
  - Attempt budgets (max 3 attempts default)
7
7
  - Time budgets (optional timeout)
8
8
  - Parameter adjustment strategies per edit type
9
- - Certificate-driven retry decisions
9
+ - Gate-driven retry decisions
10
10
  """
11
11
 
12
12
  from __future__ import annotations
@@ -19,7 +19,7 @@ __all__ = ["RetryController", "adjust_edit_params"]
19
19
 
20
20
  class RetryController:
21
21
  """
22
- Controls retry logic for certificate-driven automation.
22
+ Controls retry logic for evaluation-report-driven automation.
23
23
 
24
24
  Features:
25
25
  - Attempt budget enforcement (default 3 max)
@@ -45,18 +45,18 @@ class RetryController:
45
45
  self.start_time = time.time()
46
46
  self.attempt_history: list[dict[str, Any]] = []
47
47
 
48
- def should_retry(self, certificate_passed: bool) -> bool:
48
+ def should_retry(self, report_passed: bool) -> bool:
49
49
  """
50
50
  Determine if retry should be attempted.
51
51
 
52
52
  Args:
53
- certificate_passed: Whether certificate validation passed
53
+ report_passed: Whether evaluation report gates passed
54
54
 
55
55
  Returns:
56
56
  True if retry should be attempted, False otherwise
57
57
  """
58
- # If certificate passed, no retry needed
59
- if certificate_passed:
58
+ # If report passed, no retry needed
59
+ if report_passed:
60
60
  return False
61
61
 
62
62
  # Check attempt budget (attempt count equals history length)
@@ -81,21 +81,21 @@ class RetryController:
81
81
  def record_attempt(
82
82
  self,
83
83
  attempt_num: int,
84
- certificate_result: dict[str, Any],
84
+ report_result: dict[str, Any],
85
85
  edit_params: dict[str, Any],
86
86
  ) -> None:
87
87
  """Record details of an attempt for tracking."""
88
- certificate_result = certificate_result or {}
88
+ report_result = report_result or {}
89
89
  edit_params = edit_params or {}
90
90
 
91
91
  self.attempt_history.append(
92
92
  {
93
93
  "attempt": attempt_num,
94
94
  "timestamp": time.time(),
95
- "certificate_passed": certificate_result.get("passed", False),
95
+ "report_passed": report_result.get("passed", False),
96
96
  "edit_params": edit_params.copy(),
97
- "failures": certificate_result.get("failures", []),
98
- "validation": certificate_result.get("validation", {}),
97
+ "failures": report_result.get("failures", []),
98
+ "validation": report_result.get("validation", {}),
99
99
  }
100
100
  )
101
101
 
@@ -114,7 +114,7 @@ def adjust_edit_params(
114
114
  edit_name: str,
115
115
  edit_params: dict[str, Any],
116
116
  attempt: int,
117
- certificate_result: dict[str, Any] | None = None,
117
+ report_result: dict[str, Any] | None = None,
118
118
  ) -> dict[str, Any]:
119
119
  """
120
120
  Adjust edit parameters for retry attempt based on edit type and failure mode.
@@ -126,7 +126,7 @@ def adjust_edit_params(
126
126
  edit_name: Name of the edit operation
127
127
  edit_params: Current edit parameters
128
128
  attempt: Attempt number (1-indexed)
129
- certificate_result: Optional certificate result for failure analysis
129
+ report_result: Optional evaluation report result for failure analysis
130
130
 
131
131
  Returns:
132
132
  Adjusted parameters for next attempt