invarlock 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. invarlock/__init__.py +3 -3
  2. invarlock/adapters/auto.py +2 -10
  3. invarlock/adapters/hf_loading.py +7 -7
  4. invarlock/adapters/hf_mixin.py +28 -5
  5. invarlock/assurance/__init__.py +15 -23
  6. invarlock/calibration/spectral_null.py +1 -1
  7. invarlock/cli/adapter_auto.py +1 -5
  8. invarlock/cli/app.py +57 -27
  9. invarlock/cli/commands/__init__.py +2 -2
  10. invarlock/cli/commands/calibrate.py +48 -4
  11. invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
  12. invarlock/cli/commands/explain_gates.py +94 -51
  13. invarlock/cli/commands/export_html.py +11 -9
  14. invarlock/cli/commands/report.py +121 -47
  15. invarlock/cli/commands/run.py +274 -66
  16. invarlock/cli/commands/verify.py +84 -89
  17. invarlock/cli/determinism.py +1 -1
  18. invarlock/cli/provenance.py +3 -3
  19. invarlock/core/bootstrap.py +1 -1
  20. invarlock/core/retry.py +14 -14
  21. invarlock/core/runner.py +1 -1
  22. invarlock/edits/noop.py +2 -2
  23. invarlock/edits/quant_rtn.py +2 -2
  24. invarlock/eval/__init__.py +1 -1
  25. invarlock/eval/bench.py +11 -7
  26. invarlock/eval/primary_metric.py +1 -1
  27. invarlock/guards/spectral.py +2 -2
  28. invarlock/guards_ref/spectral_ref.py +1 -1
  29. invarlock/model_profile.py +16 -35
  30. invarlock/observability/health.py +38 -20
  31. invarlock/plugins/hf_bnb_adapter.py +32 -21
  32. invarlock/reporting/__init__.py +18 -4
  33. invarlock/reporting/html.py +7 -7
  34. invarlock/reporting/normalizer.py +2 -2
  35. invarlock/reporting/policy_utils.py +1 -1
  36. invarlock/reporting/primary_metric_utils.py +11 -11
  37. invarlock/reporting/render.py +126 -120
  38. invarlock/reporting/report.py +43 -37
  39. invarlock/reporting/{certificate.py → report_builder.py} +103 -99
  40. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  41. invarlock-0.3.9.dist-info/METADATA +303 -0
  42. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/RECORD +46 -46
  43. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/WHEEL +1 -1
  44. invarlock-0.3.7.dist-info/METADATA +0 -602
  45. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/entry_points.txt +0 -0
  46. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/licenses/LICENSE +0 -0
  47. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/top_level.txt +0 -0
invarlock/core/runner.py CHANGED
@@ -521,7 +521,7 @@ class CoreRunner:
521
521
  f"Guard '{guard.name}' prepare failed: {e}"
522
522
  ) from e
523
523
 
524
- # Store resolved policies in report for certificate
524
+ # Store resolved policies in report for evaluation report generation
525
525
  report.meta["tier_policies"] = tier_policies
526
526
 
527
527
  self._log_event(
invarlock/edits/noop.py CHANGED
@@ -1,7 +1,7 @@
1
- """Built-in no-op edit used for baseline and Compare & Certify (BYOE).
1
+ """Built-in no-op edit used for baseline and Compare & Evaluate (BYOE).
2
2
 
3
3
  This edit does not modify the model and reports zero deltas. It exists to
4
- support baseline runs and Compare & Certify certification where the subject
4
+ support baseline runs and Compare & Evaluate workflows where the subject
5
5
  checkpoint is produced outside of InvarLock.
6
6
  """
7
7
 
@@ -406,7 +406,7 @@ class RTNQuantEdit(ModelEdit):
406
406
  if layer_name not in modified_layers:
407
407
  modified_layers.append(layer_name)
408
408
 
409
- # Store edit plan for certificate generation
409
+ # Store edit plan for evaluation report generation
410
410
  modules_quantized = [r["module_name"] for r in quantization_results]
411
411
 
412
412
  edit_plan = {
@@ -424,7 +424,7 @@ class RTNQuantEdit(ModelEdit):
424
424
  return {
425
425
  "name": self.name,
426
426
  "plan_digest": f"rtn_quantization_{bitwidth}bit_{scope}",
427
- "plan": edit_plan, # Include the plan for certificate generation
427
+ "plan": edit_plan, # Include the plan for evaluation report generation
428
428
  "deltas": {
429
429
  "params_changed": total_params_quantized,
430
430
  "sparsity": None, # Quantization doesn't create sparsity
@@ -1,7 +1,7 @@
1
1
  """Evaluation utilities (`invarlock.eval`).
2
2
 
3
3
  This package now focuses on evaluation helpers (metrics, data, probes).
4
- Reporting (report building, certificate, schema validation) has moved to
4
+ Reporting (evaluation report building and schema validation) has moved to
5
5
  `invarlock.reporting`.
6
6
  """
7
7
 
invarlock/eval/bench.py CHANGED
@@ -976,17 +976,21 @@ def execute_scenario(
976
976
  except Exception:
977
977
  pass
978
978
 
979
- # Generate certificate artifact when both runs produced reports
979
+ # Generate evaluation report artifact when both runs produced reports
980
980
  try:
981
981
  if bare_result.success and guarded_result.success:
982
- from invarlock.reporting.certificate import make_certificate
982
+ from invarlock.reporting.report_builder import make_report
983
983
 
984
- cert = make_certificate(guarded_result.report, bare_result.report)
985
- cert_path = scenario_dir / "certificate.json"
986
- cert_path.write_text(json.dumps(cert, indent=2), encoding="utf-8")
987
- artifacts["certificate"] = str(cert_path)
984
+ evaluation_report = make_report(guarded_result.report, bare_result.report)
985
+ report_path = scenario_dir / "evaluation.report.json"
986
+ report_path.write_text(
987
+ json.dumps(evaluation_report, indent=2), encoding="utf-8"
988
+ )
989
+ artifacts["evaluation_report"] = str(report_path)
988
990
  except Exception as exc:
989
- logger.warning(f"Certificate generation failed for {scenario_slug}: {exc}")
991
+ logger.warning(
992
+ f"Evaluation report generation failed for {scenario_slug}: {exc}"
993
+ )
990
994
 
991
995
  # Resolve epsilon from runtime or use config
992
996
  epsilon_used = config.epsilon
@@ -2,7 +2,7 @@
2
2
  Primary metric abstraction and minimal ppl_causal implementation (Phase 1).
3
3
 
4
4
  This module introduces a light-weight, task-agnostic metric interface and a
5
- registry so the runner/certificate can evolve beyond causal-LM perplexity.
5
+ registry so the runner and evaluation report builder can evolve beyond causal-LM perplexity.
6
6
 
7
7
  Phase 1 goal: provide a ppl_causal metric and a helper that can compute point
8
8
  estimates directly from evaluation window aggregates already present in run
@@ -61,7 +61,7 @@ def _bh_reject_families(
61
61
 
62
62
  order = sorted(
63
63
  range(n),
64
- key=lambda idx: (float("inf") if not _finite01(pvals[idx]) else pvals[idx]),
64
+ key=lambda idx: float("inf") if not _finite01(pvals[idx]) else pvals[idx],
65
65
  )
66
66
  max_k = 0
67
67
  for rank, idx in enumerate(order, start=1):
@@ -344,7 +344,7 @@ class SpectralGuard(Guard):
344
344
  self.ignore_preview_inflation = bool(policy["ignore_preview_inflation"])
345
345
  self.config["ignore_preview_inflation"] = self.ignore_preview_inflation
346
346
 
347
- # Optional hydration of baseline stats from policy (e.g., baseline certificate)
347
+ # Optional hydration of baseline stats from policy (e.g., baseline evaluation report)
348
348
  if "baseline_family_stats" in policy and isinstance(
349
349
  policy["baseline_family_stats"], dict
350
350
  ):
@@ -20,7 +20,7 @@ def bh_select(pvals: list[float], alpha: float) -> list[bool]:
20
20
 
21
21
  # Sort by p-value ascending while remembering original indices
22
22
  order = sorted(
23
- range(n), key=lambda i: (float("inf") if not _finite01(pvals[i]) else pvals[i])
23
+ range(n), key=lambda i: float("inf") if not _finite01(pvals[i]) else pvals[i]
24
24
  )
25
25
  rejs_sorted = [False] * n
26
26
  max_k = 0
@@ -6,11 +6,9 @@ from dataclasses import dataclass, field
6
6
  from typing import Any
7
7
 
8
8
  AutoTokenizer: Any | None = None
9
- GPT2Tokenizer: Any | None = None
10
9
 
11
10
  try:
12
11
  from transformers import AutoTokenizer as _AutoTokenizer
13
- from transformers import GPT2Tokenizer as _GPT2Tokenizer
14
12
  from transformers.tokenization_utils_base import PreTrainedTokenizerBase
15
13
  except Exception: # pragma: no cover - exercised only when transformers is absent
16
14
 
@@ -26,7 +24,6 @@ except Exception: # pragma: no cover - exercised only when transformers is abse
26
24
 
27
25
  else: # pragma: no cover - transformers optional
28
26
  AutoTokenizer = _AutoTokenizer
29
- GPT2Tokenizer = _GPT2Tokenizer
30
27
 
31
28
 
32
29
  TokenizerFactory = Callable[[], tuple[PreTrainedTokenizerBase, str]]
@@ -177,12 +174,12 @@ def _make_bert_tokenizer(model_id: str):
177
174
 
178
175
  def _make_gpt2_tokenizer(model_id: str):
179
176
  def factory() -> tuple[PreTrainedTokenizerBase, str]:
180
- if GPT2Tokenizer is None:
177
+ if AutoTokenizer is None:
181
178
  raise RuntimeError(
182
179
  "GPT-2 tokenizers require the 'transformers' extra. "
183
180
  "Install it with: pip install 'invarlock[adapters]'."
184
181
  )
185
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
182
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
186
183
  if tokenizer.pad_token is None:
187
184
  tokenizer.pad_token = tokenizer.eos_token
188
185
  hash_value = _hash_tokenizer(tokenizer)
@@ -193,7 +190,7 @@ def _make_gpt2_tokenizer(model_id: str):
193
190
 
194
191
  def _make_causal_auto_tokenizer(model_id: str):
195
192
  def factory() -> tuple[PreTrainedTokenizerBase, str]:
196
- if AutoTokenizer is None and GPT2Tokenizer is None:
193
+ if AutoTokenizer is None:
197
194
  raise RuntimeError(
198
195
  "Causal tokenizers require the 'transformers' extra. "
199
196
  "Install it with: pip install 'invarlock[adapters]'."
@@ -202,23 +199,15 @@ def _make_causal_auto_tokenizer(model_id: str):
202
199
  # local GPT-2 tokenizer if the model assets are not cached or network
203
200
  # access is denied.
204
201
  tokenizer = None
205
- if AutoTokenizer is not None:
202
+ try:
203
+ tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
204
+ except Exception:
206
205
  try:
207
- tokenizer = AutoTokenizer.from_pretrained(
208
- model_id, local_files_only=True
209
- )
206
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
210
207
  except Exception:
211
- try:
212
- tokenizer = AutoTokenizer.from_pretrained(model_id)
213
- except Exception:
214
- tokenizer = None
208
+ tokenizer = None
215
209
  if tokenizer is None:
216
- if GPT2Tokenizer is None:
217
- raise RuntimeError(
218
- "Tokenization requires the 'transformers' extra. "
219
- "Install it with: pip install 'invarlock[adapters]'."
220
- )
221
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
210
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
222
211
  # Ensure padding/bos tokens are configured so downstream encoding
223
212
  # yields stable non-zero ids and a valid attention mask regardless of
224
213
  # environment defaults or tokenizer variants.
@@ -246,30 +235,22 @@ def _make_causal_auto_tokenizer(model_id: str):
246
235
 
247
236
  def _make_unknown_tokenizer(model_id: str):
248
237
  def factory() -> tuple[PreTrainedTokenizerBase, str]:
249
- if AutoTokenizer is None and GPT2Tokenizer is None:
238
+ if AutoTokenizer is None:
250
239
  raise RuntimeError(
251
240
  "Text tokenization requires the 'transformers' extra. "
252
241
  "Install it with: pip install 'invarlock[adapters]'."
253
242
  )
254
243
  # Unknown families: try local-only first, then remote, then degrade to GPT-2
255
244
  tokenizer = None
256
- if AutoTokenizer is not None:
245
+ try:
246
+ tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
247
+ except Exception:
257
248
  try:
258
- tokenizer = AutoTokenizer.from_pretrained(
259
- model_id, local_files_only=True
260
- )
249
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
261
250
  except Exception:
262
- try:
263
- tokenizer = AutoTokenizer.from_pretrained(model_id)
264
- except Exception:
265
- tokenizer = None
251
+ tokenizer = None
266
252
  if tokenizer is None:
267
- if GPT2Tokenizer is None:
268
- raise RuntimeError(
269
- "Text tokenization requires the 'transformers' extra. "
270
- "Install it with: pip install 'invarlock[adapters]'."
271
- )
272
- tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
253
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
273
254
  if getattr(tokenizer, "pad_token", None) is None:
274
255
  eos_token = getattr(tokenizer, "eos_token", None)
275
256
  if eos_token is not None:
@@ -3,6 +3,7 @@ Health checking and status monitoring.
3
3
  """
4
4
 
5
5
  import logging
6
+ import os
6
7
  import time
7
8
  import traceback
8
9
  from collections.abc import Callable
@@ -177,8 +178,12 @@ class HealthChecker:
177
178
 
178
179
  def check_cpu():
179
180
  """Check CPU usage."""
181
+ cpu_details: dict[str, Any] = {"percent": None, "core_count": None}
182
+ warnings: list[str] = []
183
+
180
184
  try:
181
185
  cpu_percent = psutil.cpu_percent(interval=1)
186
+ cpu_details["percent"] = cpu_percent
182
187
 
183
188
  if cpu_percent > 95:
184
189
  status = HealthStatus.CRITICAL
@@ -189,28 +194,41 @@ class HealthChecker:
189
194
  else:
190
195
  status = HealthStatus.HEALTHY
191
196
  message = f"CPU usage normal: {cpu_percent:.1f}%"
197
+ except Exception as e:
198
+ status = HealthStatus.CRITICAL
199
+ message = f"Failed to measure CPU usage: {e}"
200
+ cpu_details["error"] = str(e)
192
201
 
193
- return ComponentHealth(
194
- name="cpu",
195
- status=status,
196
- message=message,
197
- details={
198
- "percent": cpu_percent,
199
- "core_count": psutil.cpu_count(),
200
- "load_avg": psutil.getloadavg()
201
- if hasattr(psutil, "getloadavg")
202
- else None,
203
- },
204
- timestamp=time.time(),
205
- )
202
+ try:
203
+ core_count = psutil.cpu_count()
204
+ if core_count is None:
205
+ core_count = os.cpu_count()
206
+ cpu_details["core_count"] = core_count
206
207
  except Exception as e:
207
- return ComponentHealth(
208
- name="cpu",
209
- status=HealthStatus.CRITICAL,
210
- message=f"Failed to check CPU: {e}",
211
- details={"error": str(e)},
212
- timestamp=time.time(),
213
- )
208
+ cpu_details["core_count"] = os.cpu_count()
209
+ warnings.append(f"cpu_count_unavailable: {e}")
210
+
211
+ try:
212
+ load_avg: Any | None = None
213
+ if hasattr(psutil, "getloadavg"):
214
+ load_avg = psutil.getloadavg()
215
+ elif hasattr(os, "getloadavg"):
216
+ load_avg = os.getloadavg() # type: ignore[attr-defined]
217
+ cpu_details["load_avg"] = load_avg
218
+ except Exception as e:
219
+ cpu_details["load_avg"] = None
220
+ warnings.append(f"load_avg_unavailable: {e}")
221
+
222
+ if warnings:
223
+ cpu_details["warnings"] = warnings
224
+
225
+ return ComponentHealth(
226
+ name="cpu",
227
+ status=status,
228
+ message=message,
229
+ details=cpu_details,
230
+ timestamp=time.time(),
231
+ )
214
232
 
215
233
  def check_disk():
216
234
  """Check disk space."""
@@ -7,7 +7,7 @@ through Transformers. Requires GPU for practical use.
7
7
  Install with the `gpu` extra on supported platforms.
8
8
 
9
9
  This adapter handles both:
10
- 1. Fresh quantization of FP16 models (load_in_8bit/load_in_4bit)
10
+ 1. Fresh quantization of FP16 models (via quantization_config)
11
11
  2. Loading pre-quantized BNB checkpoints (auto-detected via quantization_config)
12
12
  """
13
13
 
@@ -55,16 +55,17 @@ def _detect_pre_quantized_bnb(model_id: str) -> tuple[bool, int]:
55
55
  if not quant_cfg:
56
56
  return False, 0
57
57
 
58
- # Check for BNB quantization
59
- quant_method = quant_cfg.get("quant_method", "").lower()
60
- if quant_method == "bitsandbytes" or "load_in_8bit" in quant_cfg:
58
+ # Check for BNB quantization. Prefer explicit bits, then legacy flags.
59
+ quant_method = str(quant_cfg.get("quant_method", "")).lower()
60
+ if "bitsandbytes" in quant_method or "bnb" in quant_method:
61
+ bits = quant_cfg.get("bits")
62
+ if isinstance(bits, int) and bits in {4, 8}:
63
+ return True, bits
61
64
  if quant_cfg.get("load_in_8bit"):
62
65
  return True, 8
63
66
  if quant_cfg.get("load_in_4bit"):
64
67
  return True, 4
65
- # Fallback to bits field
66
- bits = quant_cfg.get("bits", 8)
67
- return True, bits
68
+ return True, 8
68
69
 
69
70
  except Exception:
70
71
  pass
@@ -82,11 +83,17 @@ class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
82
83
  "DEPENDENCY-MISSING: transformers",
83
84
  lambda e: {"dependency": "transformers"},
84
85
  ):
85
- from transformers import AutoModelForCausalLM
86
+ from transformers import AutoModelForCausalLM, BitsAndBytesConfig
86
87
 
87
88
  # Check if this is a pre-quantized checkpoint
88
89
  is_pre_quantized, pre_quant_bits = _detect_pre_quantized_bnb(model_id)
89
90
 
91
+ if "load_in_8bit" in kwargs or "load_in_4bit" in kwargs:
92
+ raise ValueError(
93
+ "hf_bnb adapter: load_in_8bit/load_in_4bit are not supported. "
94
+ "Use model.quantization_config instead."
95
+ )
96
+
90
97
  if is_pre_quantized:
91
98
  # Load pre-quantized checkpoint WITHOUT re-applying quantization
92
99
  with wrap_errors(
@@ -99,20 +106,25 @@ class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
99
106
  model_id,
100
107
  device_map="auto",
101
108
  trust_remote_code=True,
102
- # Do NOT pass load_in_8bit/load_in_4bit for pre-quantized
103
- **{
104
- k: v
105
- for k, v in kwargs.items()
106
- if k not in ("load_in_8bit", "load_in_4bit")
107
- },
109
+ **kwargs,
108
110
  )
109
111
  else:
110
112
  # Fresh quantization of FP16 model
111
- load_in_8bit = bool(kwargs.pop("load_in_8bit", True))
112
- load_in_4bit = bool(kwargs.pop("load_in_4bit", False))
113
-
114
- if load_in_4bit:
115
- load_in_8bit = False
113
+ quantization_config = kwargs.pop("quantization_config", None)
114
+ if quantization_config is None:
115
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
116
+ elif isinstance(quantization_config, dict):
117
+ qdict = dict(quantization_config)
118
+ bits = qdict.pop("bits", None)
119
+ qdict.pop("quant_method", None)
120
+ if isinstance(bits, int):
121
+ if bits == 4:
122
+ qdict.setdefault("load_in_4bit", True)
123
+ qdict.setdefault("load_in_8bit", False)
124
+ elif bits == 8:
125
+ qdict.setdefault("load_in_8bit", True)
126
+ qdict.setdefault("load_in_4bit", False)
127
+ quantization_config = BitsAndBytesConfig(**qdict)
116
128
 
117
129
  with wrap_errors(
118
130
  ModelLoadError,
@@ -123,9 +135,8 @@ class HF_BNB_Adapter(HFAdapterMixin, ModelAdapter):
123
135
  model = AutoModelForCausalLM.from_pretrained(
124
136
  model_id,
125
137
  device_map="auto",
126
- load_in_8bit=load_in_8bit,
127
- load_in_4bit=load_in_4bit,
128
138
  trust_remote_code=True,
139
+ quantization_config=quantization_config,
129
140
  **kwargs,
130
141
  )
131
142
 
@@ -1,7 +1,21 @@
1
1
  """
2
- Reporting package (aliasing evaluation reporting modules).
2
+ Evaluation report tooling (`invarlock.reporting`).
3
3
 
4
- Provides a stable unified import path for report schema, builders,
5
- and certificate generation while keeping backward compatibility with
6
- `invarlock.eval.*` imports.
4
+ Provides the evaluation report schema, builder, and renderers.
7
5
  """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .html import render_report_html
10
+ from .render import render_report_markdown
11
+ from .report_builder import make_report, validate_report
12
+ from .report_schema import REPORT_JSON_SCHEMA, REPORT_SCHEMA_VERSION
13
+
14
+ __all__ = [
15
+ "REPORT_SCHEMA_VERSION",
16
+ "REPORT_JSON_SCHEMA",
17
+ "make_report",
18
+ "render_report_markdown",
19
+ "render_report_html",
20
+ "validate_report",
21
+ ]
@@ -1,5 +1,5 @@
1
1
  """
2
- Minimal HTML exporter for certificates.
2
+ Minimal HTML exporter for reports.
3
3
 
4
4
  This implementation wraps the Markdown rendering in a simple HTML template so
5
5
  that the numbers and core content remain identical across formats.
@@ -10,7 +10,7 @@ from __future__ import annotations
10
10
  from html import escape
11
11
  from typing import Any
12
12
 
13
- from .render import render_certificate_markdown
13
+ from .render import render_report_markdown
14
14
 
15
15
  markdown_module: Any | None = None
16
16
  try:
@@ -37,13 +37,13 @@ def _apply_status_badges(html_body: str) -> str:
37
37
  return updated
38
38
 
39
39
 
40
- def render_certificate_html(certificate: dict[str, Any]) -> str:
41
- """Render a certificate as a simple HTML document.
40
+ def render_report_html(evaluation_report: dict[str, Any]) -> str:
41
+ """Render an evaluation report as a simple HTML document.
42
42
 
43
43
  Uses the Markdown renderer and converts to HTML when available, falling back
44
44
  to a <pre> block when the markdown dependency is missing.
45
45
  """
46
- md = render_certificate_markdown(certificate)
46
+ md = render_report_markdown(evaluation_report)
47
47
  if markdown_module is None:
48
48
  body = f'<pre class="invarlock-md">{escape(md)}</pre>'
49
49
  else:
@@ -52,7 +52,7 @@ def render_certificate_html(certificate: dict[str, Any]) -> str:
52
52
  body = f'<div class="invarlock-md">{html_body}</div>'
53
53
  return (
54
54
  '<!DOCTYPE html><html><head><meta charset="utf-8">'
55
- "<title>InvarLock Evaluation Certificate</title>"
55
+ "<title>InvarLock Evaluation Report</title>"
56
56
  "<style>"
57
57
  ":root{--pass:#2da44e;--fail:#cf222e;--warn:#bf8700;--ink:#1f2328;"
58
58
  "--muted:#57606a;--panel:#f6f8fa;--border:#d0d7de}"
@@ -79,4 +79,4 @@ def render_certificate_html(certificate: dict[str, Any]) -> str:
79
79
  )
80
80
 
81
81
 
82
- __all__ = ["render_certificate_html"]
82
+ __all__ = ["render_report_html"]
@@ -33,7 +33,7 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
33
33
  """Coerce an arbitrary report-like mapping into a canonical RunReport.
34
34
 
35
35
  This is the single entry point for converting pre-canonical or loosely-typed
36
- data into the strict PM-only RunReport shape used by certificate/report.
36
+ data into the strict PM-only RunReport shape used by evaluation report building.
37
37
  """
38
38
  src = _as_mapping(report)
39
39
 
@@ -53,7 +53,7 @@ def normalize_run_report(report: Mapping[str, Any] | RunReport) -> RunReport:
53
53
  "ts": ts,
54
54
  "auto": meta_in.get("auto") if isinstance(meta_in.get("auto"), dict) else None,
55
55
  }
56
- # Preserve additional provenance knobs used by certificate/digests.
56
+ # Preserve additional provenance knobs used by evaluation report digests.
57
57
  for key in (
58
58
  "pm_acceptance_range",
59
59
  "pm_drift_band",
@@ -35,7 +35,7 @@ def _compute_thresholds_payload(
35
35
  tier: str, resolved_policy: dict[str, Any]
36
36
  ) -> dict[str, Any]:
37
37
  """Build canonical thresholds payload for digest stability."""
38
- from .certificate import TIER_RATIO_LIMITS # local to avoid cycles
38
+ from .report_builder import TIER_RATIO_LIMITS # local to avoid cycles
39
39
 
40
40
  tier_lc = (tier or "balanced").lower()
41
41
  metrics_policy = (
@@ -8,21 +8,21 @@ from .utils import _coerce_interval, _weighted_mean
8
8
 
9
9
 
10
10
  def attach_primary_metric(
11
- certificate: dict[str, Any],
11
+ evaluation_report: dict[str, Any],
12
12
  report: dict[str, Any],
13
13
  baseline_raw: dict[str, Any] | None,
14
14
  baseline_ref: dict[str, Any] | None,
15
15
  ppl_analysis: dict[str, Any] | None,
16
16
  ) -> None:
17
- """Attach/normalize the primary_metric block on the certificate.
17
+ """Attach/normalize the primary_metric block on the evaluation report.
18
18
 
19
- Behavior mirrors historical logic in certificate.py and preserves structure:
19
+ Behavior mirrors historical logic in report_builder.py and preserves structure:
20
20
  - Prefer explicit metrics.primary_metric if present
21
21
  - Compute missing ratio_vs_baseline, degenerate display_ci
22
22
  - ppl window-based analysis info (mean logloss) added when available
23
23
  - Fallbacks for classification metrics and eval-window-derived ppl
24
24
  - Ensure display_ci always present for schema invariants
25
- Mutates the certificate in-place.
25
+ Mutates the evaluation report in-place.
26
26
  """
27
27
  # Attach primary metric snapshot when provided in report
28
28
  try:
@@ -180,12 +180,12 @@ def attach_primary_metric(
180
180
  ]
181
181
  except Exception:
182
182
  pass
183
- certificate["primary_metric"] = pm_copy
183
+ evaluation_report["primary_metric"] = pm_copy
184
184
  except Exception:
185
185
  pass
186
186
 
187
187
  def _attach_from_windows() -> None:
188
- if isinstance(certificate.get("primary_metric"), dict):
188
+ if isinstance(evaluation_report.get("primary_metric"), dict):
189
189
  return
190
190
  try:
191
191
  m = (
@@ -212,7 +212,7 @@ def attach_primary_metric(
212
212
  baseline=baseline_raw if isinstance(baseline_raw, dict) else None,
213
213
  )
214
214
  if isinstance(pm_block, dict) and pm_block:
215
- certificate["primary_metric"] = pm_block
215
+ evaluation_report["primary_metric"] = pm_block
216
216
  except Exception:
217
217
  pass
218
218
 
@@ -220,7 +220,7 @@ def attach_primary_metric(
220
220
  _attach_from_windows()
221
221
 
222
222
  # Minimal fallback for classification-only reports without explicit primary_metric
223
- if not isinstance(certificate.get("primary_metric"), dict):
223
+ if not isinstance(evaluation_report.get("primary_metric"), dict):
224
224
  try:
225
225
  metrics_map = report.get("metrics", {}) if isinstance(report, dict) else {}
226
226
  clf = (
@@ -298,7 +298,7 @@ def attach_primary_metric(
298
298
  acc_pm["ratio_vs_baseline"] = delta_pp
299
299
  except Exception:
300
300
  pass
301
- certificate["primary_metric"] = acc_pm
301
+ evaluation_report["primary_metric"] = acc_pm
302
302
  except Exception:
303
303
  pass
304
304
 
@@ -308,8 +308,8 @@ def attach_primary_metric(
308
308
  # Ensure primary_metric has display_ci populated for schema invariants
309
309
  try:
310
310
  pm = (
311
- certificate.get("primary_metric", {})
312
- if isinstance(certificate.get("primary_metric"), dict)
311
+ evaluation_report.get("primary_metric", {})
312
+ if isinstance(evaluation_report.get("primary_metric"), dict)
313
313
  else None
314
314
  )
315
315
  if isinstance(pm, dict) and pm: