invarlock 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. invarlock/__init__.py +3 -3
  2. invarlock/adapters/auto.py +2 -10
  3. invarlock/adapters/hf_loading.py +7 -7
  4. invarlock/adapters/hf_mixin.py +28 -5
  5. invarlock/assurance/__init__.py +15 -23
  6. invarlock/cli/adapter_auto.py +1 -5
  7. invarlock/cli/app.py +57 -27
  8. invarlock/cli/commands/__init__.py +2 -2
  9. invarlock/cli/commands/calibrate.py +48 -4
  10. invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
  11. invarlock/cli/commands/explain_gates.py +25 -17
  12. invarlock/cli/commands/export_html.py +11 -9
  13. invarlock/cli/commands/report.py +116 -46
  14. invarlock/cli/commands/run.py +274 -66
  15. invarlock/cli/commands/verify.py +84 -89
  16. invarlock/cli/determinism.py +1 -1
  17. invarlock/cli/provenance.py +3 -3
  18. invarlock/core/bootstrap.py +1 -1
  19. invarlock/core/retry.py +14 -14
  20. invarlock/core/runner.py +1 -1
  21. invarlock/edits/noop.py +2 -2
  22. invarlock/edits/quant_rtn.py +2 -2
  23. invarlock/eval/__init__.py +1 -1
  24. invarlock/eval/bench.py +11 -7
  25. invarlock/eval/primary_metric.py +1 -1
  26. invarlock/guards/spectral.py +1 -1
  27. invarlock/model_profile.py +16 -35
  28. invarlock/plugins/hf_bnb_adapter.py +32 -21
  29. invarlock/reporting/__init__.py +18 -4
  30. invarlock/reporting/html.py +7 -7
  31. invarlock/reporting/normalizer.py +2 -2
  32. invarlock/reporting/policy_utils.py +1 -1
  33. invarlock/reporting/primary_metric_utils.py +11 -11
  34. invarlock/reporting/render.py +126 -120
  35. invarlock/reporting/report.py +43 -37
  36. invarlock/reporting/{certificate.py → report_builder.py} +98 -95
  37. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  38. invarlock-0.3.8.dist-info/METADATA +283 -0
  39. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/RECORD +43 -43
  40. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  41. invarlock-0.3.7.dist-info/METADATA +0 -602
  42. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +0 -0
  43. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  44. {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,16 @@
1
1
  """
2
- InvarLock Evaluation Certificate Generation
3
- ==========================================
2
+ InvarLock Evaluation Report Generation
3
+ =====================================
4
4
 
5
- Generate standardized evaluation certificates from RunReport and baseline
5
+ Generate standardized evaluation reports from RunReport and baseline
6
6
  comparison.
7
- Certificates are standalone, portable verification artifacts that can be used
8
- for CI/CD gates and regulatory compliance.
7
+ Evaluation reports are standalone, portable artifacts that record statistical
8
+ gates and evidence for CI/CD checks and audits (not formal verification).
9
9
  """
10
10
 
11
11
  from __future__ import annotations
12
12
 
13
- ## Core certificate generation and analysis orchestration lives here.
13
+ ## Core evaluation report building and analysis orchestration lives here.
14
14
  # mypy: ignore-errors
15
15
  import copy
16
16
  import hashlib
@@ -39,11 +39,7 @@ from invarlock.eval.primary_metric import compute_primary_metric_from_report, ge
39
39
  from invarlock.eval.tail_stats import evaluate_metric_tail
40
40
  from invarlock.utils.digest import hash_json
41
41
 
42
- from . import certificate_schema as _cert_schema
43
- from .certificate_schema import (
44
- CERTIFICATE_JSON_SCHEMA,
45
- CERTIFICATE_SCHEMA_VERSION,
46
- )
42
+ from . import report_schema as _report_schema
47
43
  from .dataset_hashing import (
48
44
  _extract_dataset_info,
49
45
  )
@@ -53,10 +49,15 @@ from .guards_analysis import (
53
49
  _extract_spectral_analysis,
54
50
  _extract_variance_analysis,
55
51
  )
56
- from .report_types import RunReport, validate_report
52
+ from .report_schema import (
53
+ REPORT_JSON_SCHEMA,
54
+ REPORT_SCHEMA_VERSION,
55
+ )
56
+ from .report_types import RunReport
57
+ from .report_types import validate_report as validate_run_report
57
58
 
58
59
  # Expose compute_window_hash for tests that monkeypatch it
59
- # compute_window_hash used to be exposed via certificate; tests now patch
60
+ # compute_window_hash used to be exposed via the evaluation report builder; tests now patch
60
61
  # dataset_hashing.compute_window_hash directly, so this import is no longer needed.
61
62
  from .utils import (
62
63
  _coerce_int,
@@ -103,7 +104,7 @@ def _is_ppl_kind(name: Any) -> bool:
103
104
 
104
105
 
105
106
  ## NOTE: Deprecated helper `_get_ppl_final` was removed; callers should
106
- ## use the normalized primary_metric block directly via make_certificate or
107
+ ## use the normalized primary_metric block directly via make_report or
107
108
  ## report processing utilities.
108
109
 
109
110
 
@@ -131,8 +132,8 @@ def _compute_edit_digest(report: dict) -> dict:
131
132
  return {"family": family, "impl_hash": impl_hash, "version": 1}
132
133
 
133
134
 
134
- def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
135
- """Compute certificate confidence label based on stability and CI width.
135
+ def _compute_confidence_label(evaluation_report: dict[str, Any]) -> dict[str, Any]:
136
+ """Compute evaluation report confidence label based on stability and CI width.
136
137
 
137
138
  Heuristics:
138
139
  - High: ppl_acceptable=True, unstable=False, width <= 0.03 (ratio) or <= 1.0 pp for accuracy
@@ -140,7 +141,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
140
141
  - Low: otherwise (floors unmet, failure, or missing bounds)
141
142
  Returns a dict with label, basis, width and threshold for transparency.
142
143
  """
143
- validation = certificate.get("validation", {}) or {}
144
+ validation = evaluation_report.get("validation", {}) or {}
144
145
  pm_ok = bool(validation.get("primary_metric_acceptable", False))
145
146
  # Basis label shown in confidence block:
146
147
  # - For ppl-like metrics, use 'ppl_ratio' to reflect ratio width threshold
@@ -149,7 +150,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
149
150
  basis = "primary_metric"
150
151
  lo = hi = float("nan")
151
152
  try:
152
- pm = certificate.get("primary_metric", {}) or {}
153
+ pm = evaluation_report.get("primary_metric", {}) or {}
153
154
  kind = str(pm.get("kind", "") or "").lower()
154
155
  if isinstance(pm, dict) and pm and pm.get("display_ci"):
155
156
  dci = pm.get("display_ci")
@@ -170,7 +171,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
170
171
  thr_ratio = 0.03 # 3% width for ratio
171
172
  thr_pp = 1.0 # 1.0 percentage point for accuracy kinds
172
173
  try:
173
- pol = certificate.get("resolved_policy")
174
+ pol = evaluation_report.get("resolved_policy")
174
175
  if isinstance(pol, dict):
175
176
  conf_pol = pol.get("confidence")
176
177
  if isinstance(conf_pol, dict):
@@ -187,7 +188,7 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
187
188
 
188
189
  # Unstable hint from primary metric (if provided)
189
190
  try:
190
- unstable = bool((certificate.get("primary_metric") or {}).get("unstable"))
191
+ unstable = bool((evaluation_report.get("primary_metric") or {}).get("unstable"))
191
192
  except Exception: # pragma: no cover
192
193
  unstable = False
193
194
 
@@ -213,39 +214,39 @@ def _compute_confidence_label(certificate: dict[str, Any]) -> dict[str, Any]:
213
214
  }
214
215
 
215
216
 
216
- # Minimal JSON Schema describing the canonical shape of a certificate.
217
+ # Minimal JSON Schema describing the canonical shape of an evaluation report.
217
218
  # This focuses on structural validity; numerical thresholds are validated
218
219
  # separately in metric-specific logic.
219
- # JSON Schema is provided by certificate_schema; no duplication here.
220
+ # JSON Schema is provided by report_schema; no duplication here.
220
221
 
221
222
 
222
223
  # Mirror jsonschema and structural validator for test monkeypatching compatibility.
223
- jsonschema = getattr(_cert_schema, "jsonschema", None)
224
+ jsonschema = getattr(_report_schema, "jsonschema", None)
224
225
 
225
226
 
226
- def _validate_with_jsonschema(certificate: dict[str, Any]) -> bool:
227
+ def _validate_with_jsonschema(evaluation_report: dict[str, Any]) -> bool:
227
228
  if jsonschema is None:
228
229
  return True
229
230
  try:
230
- jsonschema.validate(instance=certificate, schema=CERTIFICATE_JSON_SCHEMA)
231
+ jsonschema.validate(instance=evaluation_report, schema=REPORT_JSON_SCHEMA)
231
232
  return True
232
233
  except Exception: # pragma: no cover
233
234
  return False
234
235
 
235
236
 
236
- def validate_certificate(certificate: dict[str, Any]) -> bool:
237
- """Validate that a certificate has all required fields and valid data."""
237
+ def validate_report(evaluation_report: dict[str, Any]) -> bool:
238
+ """Validate that an evaluation report has all required fields and valid data."""
238
239
  try:
239
- if certificate.get("schema_version") != CERTIFICATE_SCHEMA_VERSION:
240
+ if evaluation_report.get("schema_version") != REPORT_SCHEMA_VERSION:
240
241
  return False
241
242
  # Prefer JSON Schema structural validation; if unavailable or too strict,
242
243
  # fall back to a lenient minimal check used by unit tests.
243
- if not _validate_with_jsonschema(certificate):
244
+ if not _validate_with_jsonschema(evaluation_report):
244
245
  # Minimal fallback: require schema version + run_id + primary_metric
245
- run_id_ok = isinstance(certificate.get("run_id"), str) and bool(
246
- certificate.get("run_id")
246
+ run_id_ok = isinstance(evaluation_report.get("run_id"), str) and bool(
247
+ evaluation_report.get("run_id")
247
248
  )
248
- pm = certificate.get("primary_metric")
249
+ pm = evaluation_report.get("primary_metric")
249
250
  pm_ok = isinstance(pm, dict) and (
250
251
  isinstance(pm.get("final"), int | float)
251
252
  or (isinstance(pm.get("kind"), str) and bool(pm.get("kind")))
@@ -253,7 +254,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
253
254
  if not (run_id_ok and pm_ok):
254
255
  return False
255
256
 
256
- validation = certificate.get("validation", {})
257
+ validation = evaluation_report.get("validation", {})
257
258
  for flag in [
258
259
  "preview_final_drift_acceptable",
259
260
  "primary_metric_acceptable",
@@ -428,8 +429,8 @@ def _load_validation_allowlist() -> set[str]:
428
429
  # disallow unknown validation keys at schema level.
429
430
  try:
430
431
  _vkeys = _load_validation_allowlist()
431
- if isinstance(CERTIFICATE_JSON_SCHEMA.get("properties"), dict):
432
- vspec = CERTIFICATE_JSON_SCHEMA["properties"].get("validation")
432
+ if isinstance(REPORT_JSON_SCHEMA.get("properties"), dict):
433
+ vspec = REPORT_JSON_SCHEMA["properties"].get("validation")
433
434
  if isinstance(vspec, dict):
434
435
  vspec["properties"] = {k: {"type": "boolean"} for k in _vkeys}
435
436
  vspec["additionalProperties"] = False
@@ -446,7 +447,7 @@ except Exception: # pragma: no cover
446
447
  def _normalize_and_validate_report(report: RunReport | dict[str, Any]) -> RunReport:
447
448
  """Normalize a possibly-minimal report and validate its structure.
448
449
 
449
- Uses the local normalizer when available, then checks `validate_report`.
450
+ Uses the local normalizer when available, then checks `validate_run_report`.
450
451
  Raises ValueError on invalid input. Returns the normalized RunReport.
451
452
  """
452
453
  try:
@@ -456,13 +457,13 @@ def _normalize_and_validate_report(report: RunReport | dict[str, Any]) -> RunRep
456
457
  report = _norm(report)
457
458
  except Exception: # pragma: no cover
458
459
  pass
459
- if not validate_report(report):
460
+ if not validate_run_report(report):
460
461
  raise ValueError("Invalid RunReport structure")
461
462
  return report
462
463
 
463
464
 
464
- def _extract_certificate_meta(report: RunReport) -> dict[str, Any]:
465
- """Extract the certificate metadata block with a full seed bundle."""
465
+ def _extract_report_meta(report: RunReport) -> dict[str, Any]:
466
+ """Extract the evaluation report metadata block with a full seed bundle."""
466
467
  meta_section = (
467
468
  report.get("meta", {}) if isinstance(report.get("meta"), dict) else {}
468
469
  )
@@ -739,22 +740,22 @@ def _fallback_paired_windows(
739
740
  return paired_windows
740
741
 
741
742
 
742
- def make_certificate(
743
+ def make_report(
743
744
  report: RunReport,
744
745
  baseline: RunReport | dict[str, Any],
745
746
  ) -> dict[str, Any]:
746
747
  """
747
- Generate an evaluation certificate from a RunReport and baseline comparison.
748
+ Generate an evaluation report from a RunReport and baseline comparison.
748
749
 
749
- The certificate is a standalone, portable artifact that contains all
750
- essential metrics and comparisons needed for safety verification.
750
+ The evaluation report is a standalone, portable artifact that contains all
751
+ essential paired metrics and comparisons used by InvarLock gates.
751
752
 
752
753
  Args:
753
- report: The guarded run report to certify
754
+ report: The guarded run report to evaluate
754
755
  baseline: Step-0 baseline RunReport or baseline metrics dict
755
756
 
756
757
  Returns:
757
- Certificate dictionary with all required fields
758
+ Evaluation report dictionary with all required fields
758
759
 
759
760
  Raises:
760
761
  ValueError: If inputs are invalid or required data is missing
@@ -778,11 +779,11 @@ def make_certificate(
778
779
  baseline_report = None
779
780
 
780
781
  # Extract core metadata with full seed bundle
781
- meta = _extract_certificate_meta(report)
782
+ meta = _extract_report_meta(report)
782
783
 
783
784
  # Propagate environment flags captured in the RunReport (e.g., deterministic algos,
784
785
  # TF32 controls, MPS/CUDA availability). This is useful for auditability and
785
- # reproducibility of certification runs.
786
+ # reproducibility of evaluation runs.
786
787
  try:
787
788
  env_flags = (
788
789
  report.get("meta", {}).get("env_flags")
@@ -1602,7 +1603,7 @@ def make_certificate(
1602
1603
  if device_name:
1603
1604
  telemetry.setdefault("device", device_name)
1604
1605
 
1605
- # Build the certificate
1606
+ # Build the evaluation report
1606
1607
  window_capacity_ctx = (
1607
1608
  report.get("metrics", {}).get("window_capacity")
1608
1609
  if isinstance(report.get("metrics"), dict)
@@ -1920,8 +1921,8 @@ def make_certificate(
1920
1921
  k: bool(v) for k, v in validation_flags.items() if k in _allowed_validation
1921
1922
  }
1922
1923
 
1923
- certificate = {
1924
- "schema_version": CERTIFICATE_SCHEMA_VERSION,
1924
+ evaluation_report = {
1925
+ "schema_version": REPORT_SCHEMA_VERSION,
1925
1926
  "run_id": current_run_id,
1926
1927
  "meta": meta,
1927
1928
  "auto": auto,
@@ -1964,8 +1965,8 @@ def make_certificate(
1964
1965
  _tiny_relax_env = False
1965
1966
  if _tiny_relax_env:
1966
1967
  try:
1967
- certificate.setdefault("auto", {})["tiny_relax"] = True
1968
- prov = certificate.setdefault("provenance", {})
1968
+ evaluation_report.setdefault("auto", {})["tiny_relax"] = True
1969
+ prov = evaluation_report.setdefault("provenance", {})
1969
1970
  flags = prov.setdefault("flags", [])
1970
1971
  if "tiny_relax" not in flags:
1971
1972
  flags.append("tiny_relax")
@@ -1991,12 +1992,12 @@ def make_certificate(
1991
1992
  and "value" in qo
1992
1993
  and math.isfinite(float(qo.get("value", float("nan"))))
1993
1994
  ):
1994
- certificate["quality_overhead"] = qo
1995
+ evaluation_report["quality_overhead"] = qo
1995
1996
  except Exception: # pragma: no cover
1996
1997
  pass
1997
1998
 
1998
1999
  try:
1999
- _propagate_pairing_stats(certificate, ppl_analysis)
2000
+ _propagate_pairing_stats(evaluation_report, ppl_analysis)
2000
2001
  except Exception: # pragma: no cover
2001
2002
  pass
2002
2003
 
@@ -2057,7 +2058,7 @@ def make_certificate(
2057
2058
  (resolved_policy.get("variance") or {}).get("min_effect_lognll", 0.0) or 0.0
2058
2059
  )
2059
2060
 
2060
- certificate["policy_digest"] = {
2061
+ evaluation_report["policy_digest"] = {
2061
2062
  "policy_version": POLICY_VERSION,
2062
2063
  "tier_policy_name": cur_tier,
2063
2064
  "thresholds_hash": thresholds_hash,
@@ -2088,7 +2089,7 @@ def make_certificate(
2088
2089
  payload[key] = item[key]
2089
2090
  sanitized.append(payload)
2090
2091
  if sanitized:
2091
- certificate["secondary_metrics"] = sanitized
2092
+ evaluation_report["secondary_metrics"] = sanitized
2092
2093
  except Exception: # pragma: no cover
2093
2094
  pass
2094
2095
 
@@ -2136,7 +2137,7 @@ def make_certificate(
2136
2137
  except Exception: # pragma: no cover
2137
2138
  continue
2138
2139
  if out:
2139
- certificate["classification"] = {"subgroups": out}
2140
+ evaluation_report["classification"] = {"subgroups": out}
2140
2141
  except Exception: # pragma: no cover
2141
2142
  pass
2142
2143
 
@@ -2152,7 +2153,7 @@ def make_certificate(
2152
2153
  if isinstance(container.get("metrics"), dict)
2153
2154
  else {}
2154
2155
  )
2155
- # Edited report case: also check certificate telemetry keys
2156
+ # Edited report case: also check evaluation_report telemetry keys
2156
2157
  telem = telemetry if isinstance(telemetry, dict) else {}
2157
2158
  # Prefer explicit p50/p95 throughput keys if present
2158
2159
  for key in ("latency_ms_p50", "latency_ms_p95", "throughput_sps"):
@@ -2193,24 +2194,24 @@ def make_certificate(
2193
2194
  entry["ratio"] = float("nan")
2194
2195
  system_overhead[metric_key] = entry
2195
2196
  if system_overhead:
2196
- certificate["system_overhead"] = system_overhead
2197
+ evaluation_report["system_overhead"] = system_overhead
2197
2198
  except Exception: # pragma: no cover
2198
2199
  pass
2199
2200
 
2200
2201
  # Attach/normalize primary metric block (moved to helper)
2201
2202
  from .primary_metric_utils import attach_primary_metric as _attach_pm
2202
2203
 
2203
- _attach_pm(certificate, report, baseline_raw, baseline_ref, ppl_analysis)
2204
+ _attach_pm(evaluation_report, report, baseline_raw, baseline_ref, ppl_analysis)
2204
2205
  try:
2205
2206
  if isinstance(pm_drift_band, dict) and pm_drift_band:
2206
- pm_block = certificate.get("primary_metric")
2207
+ pm_block = evaluation_report.get("primary_metric")
2207
2208
  if isinstance(pm_block, dict):
2208
2209
  pm_block.setdefault("drift_band", dict(pm_drift_band))
2209
2210
  except Exception: # pragma: no cover
2210
2211
  pass
2211
2212
  _enforce_display_ci_alignment(
2212
2213
  ratio_ci_source,
2213
- certificate.get("primary_metric"),
2214
+ evaluation_report.get("primary_metric"),
2214
2215
  logloss_delta_ci,
2215
2216
  window_plan_profile,
2216
2217
  )
@@ -2218,8 +2219,8 @@ def make_certificate(
2218
2219
  # Ensure primary_metric has display_ci populated for schema invariants
2219
2220
  try:
2220
2221
  pm = (
2221
- certificate.get("primary_metric", {})
2222
- if isinstance(certificate.get("primary_metric"), dict)
2222
+ evaluation_report.get("primary_metric", {})
2223
+ if isinstance(evaluation_report.get("primary_metric"), dict)
2223
2224
  else None
2224
2225
  )
2225
2226
  if isinstance(pm, dict) and pm:
@@ -2259,8 +2260,8 @@ def make_certificate(
2259
2260
  if not kind:
2260
2261
  kind = "ppl"
2261
2262
  windows_cfg = (
2262
- certificate.get("dataset", {}).get("windows", {})
2263
- if isinstance(certificate.get("dataset"), dict)
2263
+ evaluation_report.get("dataset", {}).get("windows", {})
2264
+ if isinstance(evaluation_report.get("dataset"), dict)
2264
2265
  else {}
2265
2266
  )
2266
2267
  n_prev = windows_cfg.get("preview")
@@ -2268,7 +2269,7 @@ def make_certificate(
2268
2269
  tokens_total = None
2269
2270
  try:
2270
2271
  tokens_total = (
2271
- certificate.get("dataset", {}).get("hash", {}).get("total_tokens")
2272
+ evaluation_report.get("dataset", {}).get("hash", {}).get("total_tokens")
2272
2273
  )
2273
2274
  except Exception: # pragma: no cover
2274
2275
  tokens_total = None
@@ -2276,7 +2277,7 @@ def make_certificate(
2276
2277
  ci_lo = None
2277
2278
  ci_hi = None
2278
2279
  ratio = None
2279
- pmc = certificate.get("primary_metric", {})
2280
+ pmc = evaluation_report.get("primary_metric", {})
2280
2281
  rci = pmc.get("display_ci") or pmc.get("ci")
2281
2282
  if isinstance(rci, tuple | list) and len(rci) == 2:
2282
2283
  ci_lo, ci_hi = rci[0], rci[1]
@@ -2288,7 +2289,7 @@ def make_certificate(
2288
2289
  except Exception: # pragma: no cover
2289
2290
  ci_w = None
2290
2291
  # Gate outcome
2291
- val = certificate.get("validation", {})
2292
+ val = evaluation_report.get("validation", {})
2292
2293
  gate_ok = None
2293
2294
  try:
2294
2295
  gate_ok = bool(val.get("primary_metric_acceptable"))
@@ -2303,10 +2304,10 @@ def make_certificate(
2303
2304
  f"tokens={tokens_total}",
2304
2305
  ]
2305
2306
  try:
2306
- split = (certificate.get("provenance", {}) or {}).get("dataset_split")
2307
+ split = (evaluation_report.get("provenance", {}) or {}).get("dataset_split")
2307
2308
  if not split:
2308
2309
  split = (report.get("provenance", {}) or {}).get("dataset_split")
2309
- sf = (certificate.get("provenance", {}) or {}).get("split_fallback")
2310
+ sf = (evaluation_report.get("provenance", {}) or {}).get("split_fallback")
2310
2311
  if sf is None:
2311
2312
  sf = (report.get("provenance", {}) or {}).get("split_fallback")
2312
2313
  if split:
@@ -2322,7 +2323,7 @@ def make_certificate(
2322
2323
  if isinstance(gate_ok, bool):
2323
2324
  parts.append(f"gate={'pass' if gate_ok else 'fail'}")
2324
2325
  summary_line = "INVARLOCK_TELEMETRY " + " ".join(parts)
2325
- certificate.setdefault("telemetry", {})["summary_line"] = summary_line
2326
+ evaluation_report.setdefault("telemetry", {})["summary_line"] = summary_line
2326
2327
  if str(os.environ.get("INVARLOCK_TELEMETRY", "")).strip().lower() in {
2327
2328
  "1",
2328
2329
  "true",
@@ -2335,17 +2336,17 @@ def make_certificate(
2335
2336
 
2336
2337
  # Attach confidence label (non-gating)
2337
2338
  try:
2338
- certificate["confidence"] = _compute_confidence_label(certificate)
2339
+ evaluation_report["confidence"] = _compute_confidence_label(evaluation_report)
2339
2340
  except Exception: # pragma: no cover
2340
2341
  pass
2341
2342
 
2342
- return certificate
2343
+ return evaluation_report
2343
2344
 
2344
2345
 
2345
2346
  # Console Validation Block helpers have moved to invarlock.reporting.render.
2346
2347
 
2347
2348
 
2348
- ## NOTE: render_certificate_markdown has been moved to invarlock.reporting.render.
2349
+ ## NOTE: render_report_markdown has been moved to invarlock.reporting.render.
2349
2350
  ## It is re-exported at the bottom of this module to preserve the public API.
2350
2351
  ## Private helper functions
2351
2352
 
@@ -2623,7 +2624,7 @@ def _extract_structural_deltas(report: RunReport) -> dict[str, Any]:
2623
2624
  def _extract_edit_metadata(
2624
2625
  report: RunReport, plugin_provenance: dict[str, Any]
2625
2626
  ) -> dict[str, Any]:
2626
- """Extract edit-level provenance and configuration metadata for the certificate."""
2627
+ """Extract edit-level provenance and configuration metadata for the evaluation report."""
2627
2628
 
2628
2629
  edit_section = _get_mapping(report, "edit")
2629
2630
  if not edit_section:
@@ -3020,12 +3021,12 @@ def _compute_quality_overhead_from_guard(
3020
3021
 
3021
3022
 
3022
3023
  def _propagate_pairing_stats(
3023
- certificate: dict[str, Any], ppl_analysis: dict[str, Any] | None
3024
+ evaluation_report: dict[str, Any], ppl_analysis: dict[str, Any] | None
3024
3025
  ) -> None:
3025
- """Surface pairing statistics inside certificate.dataset.windows.stats."""
3026
- if not isinstance(certificate, dict):
3026
+ """Surface pairing statistics inside evaluation_report.dataset.windows.stats."""
3027
+ if not isinstance(evaluation_report, dict):
3027
3028
  return
3028
- ds = certificate.get("dataset", {})
3029
+ ds = evaluation_report.get("dataset", {})
3029
3030
  if not isinstance(ds, dict):
3030
3031
  return
3031
3032
  windows = ds.get("windows", {})
@@ -3079,7 +3080,7 @@ def _propagate_pairing_stats(
3079
3080
  windows["stats"] = stats
3080
3081
  if windows is not ds.get("windows"):
3081
3082
  ds["windows"] = windows
3082
- certificate["dataset"] = ds
3083
+ evaluation_report["dataset"] = ds
3083
3084
 
3084
3085
 
3085
3086
  def _build_provenance_block(
@@ -3372,7 +3373,7 @@ def _compute_validation_flags(
3372
3373
  pm_drift_band: dict[str, float] | None = None,
3373
3374
  pm_tail: dict[str, Any] | None = None,
3374
3375
  ) -> dict[str, bool]:
3375
- """Compute validation flags for the certificate including canonical gates."""
3376
+ """Compute validation flags for the evaluation report including canonical gates."""
3376
3377
  tier = (tier or "balanced").lower()
3377
3378
  # Dev-only tiny relax: widen gates and lower floors when explicitly requested
3378
3379
  import os as _os
@@ -3613,7 +3614,7 @@ def _compute_validation_flags(
3613
3614
  if _tiny_relax and threshold_val < 0.10:
3614
3615
  threshold_val = 0.10
3615
3616
  if not math.isfinite(ratio_val):
3616
- # In dev/Compare-&-Certify flows we often lack a bare run; treat missing metric as pass
3617
+ # In dev/Compare-&-Evaluate flows we often lack a bare run; treat missing metric as pass
3617
3618
  guard_overhead_pass = True
3618
3619
  else:
3619
3620
  guard_overhead_pass = ratio_val <= (1.0 + max(0.0, threshold_val))
@@ -3769,7 +3770,7 @@ def _generate_run_id(report: RunReport) -> str:
3769
3770
  return hashlib.sha256(base_str.encode()).hexdigest()[:16]
3770
3771
 
3771
3772
 
3772
- ## NOTE: _compute_certificate_hash moved to invarlock.reporting.render and is re-exported below.
3773
+ ## NOTE: _compute_report_hash moved to invarlock.reporting.render and is re-exported below.
3773
3774
 
3774
3775
 
3775
3776
  def _analyze_bitwidth_map(bitwidth_map: dict[str, Any]) -> dict[str, Any]:
@@ -4114,22 +4115,24 @@ def _extract_compression_diagnostics(
4114
4115
 
4115
4116
  # Re-export rendering API from dedicated module to avoid bloat/cycles
4116
4117
  # Rendering helpers live in invarlock.reporting.render; internal code should import there directly.
4117
- # Tests and public API expect render_certificate_markdown to be available from
4118
- # invarlock.reporting.certificate. Import lazily at module end to avoid cycles with
4118
+ # Tests and public API expect render_report_markdown to be available from
4119
+ # invarlock.reporting.report_builder. Import lazily at module end to avoid cycles with
4119
4120
  # invarlock.reporting.render which imports this module as a namespace.
4120
4121
  try: # pragma: no cover - simple re-export
4121
4122
  from .render import (
4122
4123
  compute_console_validation_block, # type: ignore
4123
- render_certificate_markdown, # type: ignore
4124
+ render_report_markdown, # type: ignore
4124
4125
  )
4125
4126
  except Exception: # pragma: no cover - defensive fallback
4126
4127
 
4127
- def render_certificate_markdown(certificate: dict[str, Any]) -> str: # type: ignore
4128
+ def render_report_markdown(evaluation_report: dict[str, Any]) -> str: # type: ignore
4128
4129
  raise ImportError(
4129
- "render_certificate_markdown is unavailable; rendering dependencies missing"
4130
+ "render_report_markdown is unavailable; rendering dependencies missing"
4130
4131
  )
4131
4132
 
4132
- def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, Any]: # type: ignore
4133
+ def compute_console_validation_block(
4134
+ evaluation_report: dict[str, Any],
4135
+ ) -> dict[str, Any]: # type: ignore
4133
4136
  raise ImportError(
4134
4137
  "compute_console_validation_block is unavailable; rendering dependencies missing"
4135
4138
  )
@@ -4137,12 +4140,12 @@ except Exception: # pragma: no cover - defensive fallback
4137
4140
 
4138
4141
  # Export public API
4139
4142
  __all__ = [
4140
- "make_certificate",
4141
- "validate_certificate",
4143
+ "make_report",
4144
+ "validate_report",
4142
4145
  "_validate_with_jsonschema",
4143
4146
  "jsonschema",
4144
- "render_certificate_markdown",
4147
+ "render_report_markdown",
4145
4148
  "compute_console_validation_block",
4146
- "CERTIFICATE_SCHEMA_VERSION",
4147
- "CERTIFICATE_JSON_SCHEMA",
4149
+ "REPORT_SCHEMA_VERSION",
4150
+ "REPORT_JSON_SCHEMA",
4148
4151
  ]
@@ -11,16 +11,16 @@ except Exception: # pragma: no cover
11
11
  jsonschema = None
12
12
 
13
13
 
14
- # Certificate schema version (PM-first canonical)
15
- CERTIFICATE_SCHEMA_VERSION = "v1"
14
+ # Evaluation report schema version (PM-first canonical)
15
+ REPORT_SCHEMA_VERSION = "v1"
16
16
 
17
17
 
18
- # Minimal JSON Schema describing the canonical shape of a certificate.
18
+ # Minimal JSON Schema describing the canonical shape of an evaluation report.
19
19
  # This focuses on structural validity; numerical thresholds are validated
20
20
  # separately in metric-specific logic.
21
- CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
21
+ REPORT_JSON_SCHEMA: dict[str, Any] = {
22
22
  "$schema": "https://json-schema.org/draft/2020-12/schema",
23
- "title": "InvarLock Evaluation Certificate",
23
+ "title": "InvarLock Evaluation Report",
24
24
  "type": "object",
25
25
  "required": [
26
26
  "schema_version",
@@ -32,7 +32,7 @@ CERTIFICATE_JSON_SCHEMA: dict[str, Any] = {
32
32
  "primary_metric",
33
33
  ],
34
34
  "properties": {
35
- "schema_version": {"const": CERTIFICATE_SCHEMA_VERSION},
35
+ "schema_version": {"const": REPORT_SCHEMA_VERSION},
36
36
  "run_id": {"type": "string", "minLength": 4},
37
37
  "edit_name": {"type": "string"},
38
38
  "policy_digest": {
@@ -179,21 +179,21 @@ def _load_validation_allowlist() -> set[str]:
179
179
  return set(_VALIDATION_ALLOWLIST_DEFAULT)
180
180
 
181
181
 
182
- def _validate_with_jsonschema(certificate: dict[str, Any]) -> bool:
183
- """Validate certificate with JSON Schema when available."""
182
+ def _validate_with_jsonschema(report: dict[str, Any]) -> bool:
183
+ """Validate evaluation report with JSON Schema when available."""
184
184
  if jsonschema is None:
185
185
  return True # Schema library unavailable; fall back to minimal checks
186
186
  try:
187
- jsonschema.validate(instance=certificate, schema=CERTIFICATE_JSON_SCHEMA)
187
+ jsonschema.validate(instance=report, schema=REPORT_JSON_SCHEMA)
188
188
  return True
189
189
  except Exception:
190
190
  return False
191
191
 
192
192
 
193
- def validate_certificate(certificate: dict[str, Any]) -> bool:
194
- """Validate certificate structure and essential flags."""
193
+ def validate_report(report: dict[str, Any]) -> bool:
194
+ """Validate evaluation report structure and essential flags."""
195
195
  try:
196
- if certificate.get("schema_version") != CERTIFICATE_SCHEMA_VERSION:
196
+ if report.get("schema_version") != REPORT_SCHEMA_VERSION:
197
197
  return False
198
198
 
199
199
  # Prefer JSON Schema structural validation; if unavailable or too strict,
@@ -202,20 +202,20 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
202
202
  # disallow unknown validation keys at schema level.
203
203
  try:
204
204
  vkeys = _load_validation_allowlist()
205
- if isinstance(CERTIFICATE_JSON_SCHEMA.get("properties"), dict):
206
- vspec = CERTIFICATE_JSON_SCHEMA["properties"].get("validation")
205
+ if isinstance(REPORT_JSON_SCHEMA.get("properties"), dict):
206
+ vspec = REPORT_JSON_SCHEMA["properties"].get("validation")
207
207
  if isinstance(vspec, dict):
208
208
  vspec["properties"] = {k: {"type": "boolean"} for k in vkeys}
209
209
  vspec["additionalProperties"] = False
210
210
  except Exception:
211
211
  pass
212
212
 
213
- if not _validate_with_jsonschema(certificate):
213
+ if not _validate_with_jsonschema(report):
214
214
  # Minimal fallback: require schema version + run_id + primary_metric
215
- run_id_ok = isinstance(certificate.get("run_id"), str) and bool(
216
- certificate.get("run_id")
215
+ run_id_ok = isinstance(report.get("run_id"), str) and bool(
216
+ report.get("run_id")
217
217
  )
218
- pm = certificate.get("primary_metric")
218
+ pm = report.get("primary_metric")
219
219
  pm_ok = isinstance(pm, dict) and (
220
220
  isinstance(pm.get("final"), int | float)
221
221
  or (isinstance(pm.get("kind"), str) and bool(pm.get("kind")))
@@ -223,7 +223,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
223
223
  if not (run_id_ok and pm_ok):
224
224
  return False
225
225
 
226
- validation = certificate.get("validation", {})
226
+ validation = report.get("validation", {})
227
227
  for flag in [
228
228
  "preview_final_drift_acceptable",
229
229
  "primary_metric_acceptable",
@@ -242,7 +242,7 @@ def validate_certificate(certificate: dict[str, Any]) -> bool:
242
242
 
243
243
 
244
244
  __all__ = [
245
- "CERTIFICATE_SCHEMA_VERSION",
246
- "CERTIFICATE_JSON_SCHEMA",
247
- "validate_certificate",
245
+ "REPORT_SCHEMA_VERSION",
246
+ "REPORT_JSON_SCHEMA",
247
+ "validate_report",
248
248
  ]