invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ from typing import Any
8
8
  def get_adapter_rows() -> list[dict[str, Any]]:
9
9
  """Build adapter rows similar to doctor output for testing.
10
10
 
11
- Applies optional-extra detection for hf_onnx (optimum/onnxruntime) even if
11
+ Applies optional-extra detection for hf_causal_onnx (optimum/onnxruntime) even if
12
12
  registered as a core adapter, so missing extras are surfaced.
13
13
  """
14
14
  from invarlock.core.registry import get_registry
@@ -29,13 +29,12 @@ def get_adapter_rows() -> list[dict[str, Any]]:
29
29
  module = str(info.get("module") or "")
30
30
  support = (
31
31
  "auto"
32
- if module.startswith("invarlock.adapters")
33
- and name in {"hf_causal_auto", "hf_mlm_auto"}
32
+ if module.startswith("invarlock.adapters") and name in {"hf_auto"}
34
33
  else ("core" if module.startswith("invarlock.adapters") else "optional")
35
34
  )
36
35
  backend, status, enable = None, "ready", ""
37
36
 
38
- if name in {"hf_gpt2", "hf_bert", "hf_llama", "hf_causal_auto", "hf_mlm_auto"}:
37
+ if name in {"hf_causal", "hf_mlm", "hf_seq2seq", "hf_auto"}:
39
38
  backend = "transformers"
40
39
  elif name == "hf_gptq":
41
40
  backend = "auto-gptq"
@@ -49,7 +48,7 @@ def get_adapter_rows() -> list[dict[str, Any]]:
49
48
  backend = "bitsandbytes"
50
49
  if not has_cuda:
51
50
  status, enable = "unsupported", "Requires CUDA"
52
- elif name == "hf_onnx":
51
+ elif name == "hf_causal_onnx":
53
52
  backend = "onnxruntime"
54
53
  present = (
55
54
  importlib.util.find_spec("optimum.onnxruntime") is not None
@@ -0,0 +1,193 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import time
5
+ from collections.abc import Iterator
6
+ from contextlib import contextmanager
7
+ from dataclasses import dataclass
8
+ from typing import TextIO
9
+
10
+ from rich.console import Console
11
+
12
+ _STYLE_AUDIT = "audit"
13
+ _STYLE_FRIENDLY = "friendly"
14
+ _VALID_STYLES = {_STYLE_AUDIT, _STYLE_FRIENDLY}
15
+
16
+
17
+ def _safe_console_print(console: Console, *args: object, **kwargs: object) -> None:
18
+ try:
19
+ console.print(*args, **kwargs)
20
+ except TypeError:
21
+ console.print(*args)
22
+
23
+
24
+ def env_no_color() -> bool:
25
+ """Return True when NO_COLOR is set (value-agnostic)."""
26
+ return bool(str(os.environ.get("NO_COLOR", "")).strip())
27
+
28
+
29
+ def perf_counter() -> float:
30
+ return time.perf_counter()
31
+
32
+
33
+ @dataclass(frozen=True, slots=True)
34
+ class OutputStyle:
35
+ name: str
36
+ progress: bool = False
37
+ timing: bool = False
38
+ color: bool = True
39
+
40
+ @property
41
+ def emojis(self) -> bool:
42
+ return self.name != _STYLE_AUDIT
43
+
44
+ @property
45
+ def audit(self) -> bool:
46
+ return self.name == _STYLE_AUDIT
47
+
48
+
49
+ def normalize_style(style: str | None) -> str | None:
50
+ if style is None:
51
+ return None
52
+ value = str(style).strip().lower()
53
+ if not value:
54
+ return None
55
+ return value if value in _VALID_STYLES else None
56
+
57
+
58
+ def resolve_style_name(style: str | None, profile: str | None) -> str:
59
+ normalized = normalize_style(style)
60
+ if normalized is not None:
61
+ return normalized
62
+ profile_norm = str(profile or "").strip().lower()
63
+ if profile_norm in {"ci", "ci_cpu", "release"}:
64
+ return _STYLE_AUDIT
65
+ return _STYLE_FRIENDLY
66
+
67
+
68
+ def resolve_output_style(
69
+ *,
70
+ style: str | None,
71
+ profile: str | None,
72
+ progress: bool = False,
73
+ timing: bool = False,
74
+ no_color: bool = False,
75
+ ) -> OutputStyle:
76
+ name = resolve_style_name(style, profile)
77
+ color_enabled = not (bool(no_color) or env_no_color())
78
+ return OutputStyle(
79
+ name=name,
80
+ progress=bool(progress),
81
+ timing=bool(timing),
82
+ color=color_enabled,
83
+ )
84
+
85
+
86
+ def make_console(
87
+ *,
88
+ file: TextIO | None = None,
89
+ force_terminal: bool | None = None,
90
+ no_color: bool | None = None,
91
+ ) -> Console:
92
+ if no_color is None:
93
+ no_color = env_no_color()
94
+ if no_color:
95
+ color_system = None
96
+ else:
97
+ color_system = "standard" if force_terminal else "auto"
98
+ return Console(
99
+ file=file,
100
+ force_terminal=force_terminal,
101
+ no_color=bool(no_color),
102
+ color_system=color_system,
103
+ )
104
+
105
+
106
+ def format_event_line(
107
+ tag: str,
108
+ message: str,
109
+ *,
110
+ style: OutputStyle,
111
+ emoji: str | None = None,
112
+ ) -> str:
113
+ tag_norm = str(tag or "").strip().upper() or "INFO"
114
+ if style.emojis and emoji:
115
+ prefix = emoji
116
+ else:
117
+ prefix = f"[{tag_norm}]"
118
+ msg = str(message or "").rstrip()
119
+ return f"{prefix} {msg}".rstrip()
120
+
121
+
122
+ def print_event(
123
+ console: Console,
124
+ tag: str,
125
+ message: str,
126
+ *,
127
+ style: OutputStyle,
128
+ emoji: str | None = None,
129
+ console_style: str | None = None,
130
+ ) -> None:
131
+ line = format_event_line(tag, message, style=style, emoji=emoji)
132
+ if console_style is None and style.color:
133
+ tag_norm = str(tag or "").strip().upper()
134
+ if tag_norm in {"PASS"}:
135
+ console_style = "green"
136
+ elif tag_norm in {"FAIL", "ERROR"}:
137
+ console_style = "red"
138
+ elif tag_norm in {"WARN", "WARNING"}:
139
+ console_style = "yellow"
140
+ elif tag_norm in {"METRIC"}:
141
+ console_style = "cyan"
142
+ _safe_console_print(console, line, style=console_style, markup=False)
143
+
144
+
145
+ @contextmanager
146
+ def timed_step(
147
+ *,
148
+ console: Console,
149
+ style: OutputStyle,
150
+ timings: dict[str, float] | None,
151
+ key: str,
152
+ tag: str,
153
+ message: str,
154
+ emoji: str | None = None,
155
+ ) -> Iterator[None]:
156
+ start = perf_counter()
157
+ try:
158
+ yield
159
+ finally:
160
+ elapsed = max(0.0, float(perf_counter() - start))
161
+ if timings is not None:
162
+ timings[key] = elapsed
163
+ if style.progress:
164
+ print_event(
165
+ console,
166
+ tag,
167
+ f"{message} done ({elapsed:.2f}s)",
168
+ style=style,
169
+ emoji=emoji,
170
+ )
171
+
172
+
173
+ def print_timing_summary(
174
+ console: Console,
175
+ timings: dict[str, float],
176
+ *,
177
+ style: OutputStyle,
178
+ order: list[tuple[str, str]],
179
+ extra_lines: list[str] | None = None,
180
+ ) -> None:
181
+ if not style.timing:
182
+ return
183
+ _safe_console_print(console, "", markup=False)
184
+ _safe_console_print(console, "TIMING SUMMARY", markup=False)
185
+ for label, key in order:
186
+ if key not in timings:
187
+ continue
188
+ _safe_console_print(
189
+ console, f" {label:<11}: {timings[key]:.2f}s", markup=False
190
+ )
191
+ if extra_lines:
192
+ for line in extra_lines:
193
+ _safe_console_print(console, line, markup=False)
@@ -2,7 +2,7 @@
2
2
 
3
3
  Provides a tiny, versioned schema describing the adapter family and the
4
4
  underlying library versions. This does not perform any edits; it only reads
5
- environment and import metadata to annotate reports/certificates.
5
+ environment and import metadata to annotate evaluation artifacts.
6
6
  """
7
7
 
8
8
  from __future__ import annotations
@@ -31,7 +31,7 @@ _FAMILY_MAP: dict[str, tuple[str, str, list[str]]] = {
31
31
  "hf_awq": ("awq", "autoawq", []),
32
32
  "hf_bnb": ("bnb", "bitsandbytes", []),
33
33
  # ONNX stack (requires extras: invarlock[onnx])
34
- "hf_onnx": ("onnx", "onnxruntime", []),
34
+ "hf_causal_onnx": ("onnx", "onnxruntime", []),
35
35
  }
36
36
 
37
37
 
@@ -46,12 +46,12 @@ def extract_adapter_provenance(adapter_name: str) -> AdapterProvenance:
46
46
  msg = (
47
47
  None
48
48
  if supported
49
- else f"Use Compare & Certify (BYOE); {library} version unsupported (tested: {tested})"
49
+ else f"Use Compare & Evaluate (BYOE); {library} version unsupported (tested: {tested})"
50
50
  )
51
51
  except Exception: # Package not installed or version unknown
52
52
  ver = None
53
53
  supported = False
54
- msg = f"{library} not available; prefer Compare & Certify (BYOE) or install extras."
54
+ msg = f"{library} not available; prefer Compare & Evaluate (BYOE) or install extras."
55
55
 
56
56
  return AdapterProvenance(
57
57
  family=family,
@@ -6,7 +6,7 @@ Numerically stable bootstrap helpers for evaluation metrics.
6
6
 
7
7
  This module provides bias-corrected and accelerated (BCa) confidence
8
8
  intervals tailored for paired log-loss statistics used by the runner
9
- and safety certificate reports.
9
+ and evaluation reports.
10
10
  """
11
11
 
12
12
  from __future__ import annotations
@@ -140,23 +140,21 @@ class CoreRegistry:
140
140
  )
141
141
 
142
142
  # Register built-in adapters
143
- _fallback(self._adapters, "hf_gpt2", "invarlock.adapters", "HF_GPT2_Adapter")
144
- _fallback(self._adapters, "hf_bert", "invarlock.adapters", "HF_BERT_Adapter")
145
- _fallback(self._adapters, "hf_llama", "invarlock.adapters", "HF_LLaMA_Adapter")
146
- _fallback(self._adapters, "hf_t5", "invarlock.adapters", "HF_T5_Adapter")
147
143
  _fallback(
148
- self._adapters, "hf_onnx", "invarlock.adapters", "HF_ORT_CausalLM_Adapter"
144
+ self._adapters, "hf_causal", "invarlock.adapters", "HF_Causal_Adapter"
149
145
  )
150
- # Convenience auto adapters (delegate to built-ins)
146
+ _fallback(self._adapters, "hf_mlm", "invarlock.adapters", "HF_MLM_Adapter")
151
147
  _fallback(
152
- self._adapters,
153
- "hf_causal_auto",
154
- "invarlock.adapters",
155
- "HF_Causal_Auto_Adapter",
148
+ self._adapters, "hf_seq2seq", "invarlock.adapters", "HF_Seq2Seq_Adapter"
156
149
  )
157
150
  _fallback(
158
- self._adapters, "hf_mlm_auto", "invarlock.adapters", "HF_MLM_Auto_Adapter"
151
+ self._adapters,
152
+ "hf_causal_onnx",
153
+ "invarlock.adapters",
154
+ "HF_Causal_ONNX_Adapter",
155
+ required_deps=["optimum"],
159
156
  )
157
+ _fallback(self._adapters, "hf_auto", "invarlock.adapters", "HF_Auto_Adapter")
160
158
  # Optional plugin adapters (verify runtime dependencies)
161
159
  _fallback(
162
160
  self._adapters,
invarlock/core/retry.py CHANGED
@@ -2,11 +2,11 @@
2
2
  InvarLock Retry Controller
3
3
  =====================
4
4
 
5
- Manages retry logic for automated certification workflows with:
5
+ Manages retry logic for automated evaluation workflows with:
6
6
  - Attempt budgets (max 3 attempts default)
7
7
  - Time budgets (optional timeout)
8
8
  - Parameter adjustment strategies per edit type
9
- - Certificate-driven retry decisions
9
+ - Gate-driven retry decisions
10
10
  """
11
11
 
12
12
  from __future__ import annotations
@@ -19,7 +19,7 @@ __all__ = ["RetryController", "adjust_edit_params"]
19
19
 
20
20
  class RetryController:
21
21
  """
22
- Controls retry logic for certificate-driven automation.
22
+ Controls retry logic for evaluation-report-driven automation.
23
23
 
24
24
  Features:
25
25
  - Attempt budget enforcement (default 3 max)
@@ -45,18 +45,18 @@ class RetryController:
45
45
  self.start_time = time.time()
46
46
  self.attempt_history: list[dict[str, Any]] = []
47
47
 
48
- def should_retry(self, certificate_passed: bool) -> bool:
48
+ def should_retry(self, report_passed: bool) -> bool:
49
49
  """
50
50
  Determine if retry should be attempted.
51
51
 
52
52
  Args:
53
- certificate_passed: Whether certificate validation passed
53
+ report_passed: Whether evaluation report gates passed
54
54
 
55
55
  Returns:
56
56
  True if retry should be attempted, False otherwise
57
57
  """
58
- # If certificate passed, no retry needed
59
- if certificate_passed:
58
+ # If report passed, no retry needed
59
+ if report_passed:
60
60
  return False
61
61
 
62
62
  # Check attempt budget (attempt count equals history length)
@@ -81,21 +81,21 @@ class RetryController:
81
81
  def record_attempt(
82
82
  self,
83
83
  attempt_num: int,
84
- certificate_result: dict[str, Any],
84
+ report_result: dict[str, Any],
85
85
  edit_params: dict[str, Any],
86
86
  ) -> None:
87
87
  """Record details of an attempt for tracking."""
88
- certificate_result = certificate_result or {}
88
+ report_result = report_result or {}
89
89
  edit_params = edit_params or {}
90
90
 
91
91
  self.attempt_history.append(
92
92
  {
93
93
  "attempt": attempt_num,
94
94
  "timestamp": time.time(),
95
- "certificate_passed": certificate_result.get("passed", False),
95
+ "report_passed": report_result.get("passed", False),
96
96
  "edit_params": edit_params.copy(),
97
- "failures": certificate_result.get("failures", []),
98
- "validation": certificate_result.get("validation", {}),
97
+ "failures": report_result.get("failures", []),
98
+ "validation": report_result.get("validation", {}),
99
99
  }
100
100
  )
101
101
 
@@ -114,7 +114,7 @@ def adjust_edit_params(
114
114
  edit_name: str,
115
115
  edit_params: dict[str, Any],
116
116
  attempt: int,
117
- certificate_result: dict[str, Any] | None = None,
117
+ report_result: dict[str, Any] | None = None,
118
118
  ) -> dict[str, Any]:
119
119
  """
120
120
  Adjust edit parameters for retry attempt based on edit type and failure mode.
@@ -126,7 +126,7 @@ def adjust_edit_params(
126
126
  edit_name: Name of the edit operation
127
127
  edit_params: Current edit parameters
128
128
  attempt: Attempt number (1-indexed)
129
- certificate_result: Optional certificate result for failure analysis
129
+ report_result: Optional evaluation report result for failure analysis
130
130
 
131
131
  Returns:
132
132
  Adjusted parameters for next attempt
invarlock/core/runner.py CHANGED
@@ -19,6 +19,11 @@ from typing import Any
19
19
  import numpy as np
20
20
 
21
21
  from invarlock.eval.tail_stats import evaluate_metric_tail
22
+ from invarlock.observability.metrics import (
23
+ capture_memory_snapshot,
24
+ reset_peak_memory_stats,
25
+ summarize_memory_snapshots,
26
+ )
22
27
 
23
28
  from .api import (
24
29
  EditLike,
@@ -190,6 +195,18 @@ class CoreRunner:
190
195
  pass
191
196
 
192
197
  report.status = RunStatus.RUNNING.value
198
+ timings: dict[str, float] = {}
199
+ guard_timings: dict[str, float] = {}
200
+ memory_snapshots: list[dict[str, Any]] = []
201
+ total_start = time.perf_counter()
202
+
203
+ def _record_timing(key: str, start: float) -> None:
204
+ timings[key] = max(0.0, float(time.perf_counter() - start))
205
+
206
+ def _capture_memory(phase: str) -> None:
207
+ snapshot = capture_memory_snapshot(phase)
208
+ if snapshot:
209
+ memory_snapshots.append(snapshot)
193
210
 
194
211
  try:
195
212
  # Log start
@@ -205,40 +222,78 @@ class CoreRunner:
205
222
  )
206
223
 
207
224
  # Phase 1: Prepare (describe model, create checkpoint)
208
- model_desc = self._prepare_phase(model, adapter, report)
225
+ reset_peak_memory_stats()
226
+ phase_start = time.perf_counter()
227
+ try:
228
+ model_desc = self._prepare_phase(model, adapter, report)
229
+ finally:
230
+ _record_timing("prepare", phase_start)
231
+ _capture_memory("prepare")
209
232
 
210
233
  # Phase 2: Prepare guards (must happen before edit)
211
- self._prepare_guards_phase(
212
- model,
213
- adapter,
214
- guards,
215
- calibration_data,
216
- report,
217
- auto_config,
218
- config,
219
- )
234
+ reset_peak_memory_stats()
235
+ phase_start = time.perf_counter()
236
+ try:
237
+ self._prepare_guards_phase(
238
+ model,
239
+ adapter,
240
+ guards,
241
+ calibration_data,
242
+ report,
243
+ auto_config,
244
+ config,
245
+ )
246
+ finally:
247
+ _record_timing("prepare_guards", phase_start)
248
+ _capture_memory("prepare_guards")
220
249
 
221
250
  # Phase 3: Apply edit
222
- self._edit_phase(model, adapter, edit, model_desc, report, edit_config)
251
+ reset_peak_memory_stats()
252
+ phase_start = time.perf_counter()
253
+ try:
254
+ self._edit_phase(model, adapter, edit, model_desc, report, edit_config)
255
+ finally:
256
+ _record_timing("edit", phase_start)
257
+ _capture_memory("edit")
223
258
 
224
259
  # Phase 4: Run guards
225
- guard_results = self._guard_phase(model, adapter, guards, report)
260
+ reset_peak_memory_stats()
261
+ phase_start = time.perf_counter()
262
+ try:
263
+ guard_results = self._guard_phase(
264
+ model, adapter, guards, report, guard_timings=guard_timings
265
+ )
266
+ finally:
267
+ _record_timing("guards", phase_start)
268
+ _capture_memory("guards")
226
269
 
227
270
  # Phase 5: Evaluate final metrics
228
- metrics = self._eval_phase(
229
- model,
230
- adapter,
231
- calibration_data,
232
- report,
233
- preview_n,
234
- final_n,
235
- config,
236
- )
271
+ reset_peak_memory_stats()
272
+ phase_start = time.perf_counter()
273
+ try:
274
+ metrics = self._eval_phase(
275
+ model,
276
+ adapter,
277
+ calibration_data,
278
+ report,
279
+ preview_n,
280
+ final_n,
281
+ config,
282
+ )
283
+ finally:
284
+ _record_timing("eval", phase_start)
285
+ _capture_memory("eval")
237
286
 
238
287
  # Phase 6: Finalize or rollback
239
- final_status = self._finalize_phase(
240
- model, adapter, guard_results, metrics, config, report
241
- )
288
+ reset_peak_memory_stats()
289
+ phase_start = time.perf_counter()
290
+ try:
291
+ final_status = self._finalize_phase(
292
+ model, adapter, guard_results, metrics, config, report
293
+ )
294
+ finally:
295
+ _record_timing("finalize", phase_start)
296
+ _capture_memory("finalize")
242
297
 
243
298
  report.status = final_status
244
299
  report.meta["end_time"] = time.time()
@@ -260,6 +315,25 @@ class CoreRunner:
260
315
  return report
261
316
 
262
317
  finally:
318
+ _record_timing("total", total_start)
319
+ if not isinstance(report.metrics, dict):
320
+ report.metrics = {}
321
+ if timings:
322
+ report.metrics.setdefault("timings", {}).update(timings)
323
+ if guard_timings:
324
+ report.metrics["guard_timings"] = guard_timings
325
+ if memory_snapshots:
326
+ report.metrics["memory_snapshots"] = memory_snapshots
327
+ summary = summarize_memory_snapshots(memory_snapshots)
328
+ if summary:
329
+ mem_peak = summary.get("memory_mb_peak")
330
+ if isinstance(mem_peak, (int | float)):
331
+ existing = report.metrics.get("memory_mb_peak")
332
+ if isinstance(existing, (int | float)):
333
+ summary["memory_mb_peak"] = max(
334
+ float(existing), float(mem_peak)
335
+ )
336
+ report.metrics.update(summary)
263
337
  self._active_model = None
264
338
  self._active_adapter = None
265
339
  self._cleanup_services()
@@ -447,7 +521,7 @@ class CoreRunner:
447
521
  f"Guard '{guard.name}' prepare failed: {e}"
448
522
  ) from e
449
523
 
450
- # Store resolved policies in report for certificate
524
+ # Store resolved policies in report for evaluation report generation
451
525
  report.meta["tier_policies"] = tier_policies
452
526
 
453
527
  self._log_event(
@@ -455,7 +529,13 @@ class CoreRunner:
455
529
  )
456
530
 
457
531
  def _guard_phase(
458
- self, model: Any, adapter: ModelAdapter, guards: list[Guard], report: RunReport
532
+ self,
533
+ model: Any,
534
+ adapter: ModelAdapter,
535
+ guards: list[Guard],
536
+ report: RunReport,
537
+ *,
538
+ guard_timings: dict[str, float] | None = None,
459
539
  ) -> dict[str, dict[str, Any]]:
460
540
  """Phase 4: Run safety guards."""
461
541
  self._log_event("guards", "start", LogLevel.INFO, {"count": len(guards)})
@@ -464,6 +544,7 @@ class CoreRunner:
464
544
 
465
545
  for guard in guards:
466
546
  self._log_event("guard", "start", LogLevel.INFO, {"guard": guard.name})
547
+ guard_start = time.perf_counter()
467
548
 
468
549
  if isinstance(guard, GuardWithContext):
469
550
  try:
@@ -497,6 +578,11 @@ class CoreRunner:
497
578
  LogLevel.ERROR,
498
579
  {"guard": guard.name, "error": str(e)},
499
580
  )
581
+ finally:
582
+ if guard_timings is not None:
583
+ guard_timings[guard.name] = max(
584
+ 0.0, float(time.perf_counter() - guard_start)
585
+ )
500
586
 
501
587
  report.guards = guard_results
502
588
 
invarlock/edits/noop.py CHANGED
@@ -1,7 +1,7 @@
1
- """Built-in no-op edit used for baseline and Compare & Certify (BYOE).
1
+ """Built-in no-op edit used for baseline and Compare & Evaluate (BYOE).
2
2
 
3
3
  This edit does not modify the model and reports zero deltas. It exists to
4
- support baseline runs and Compare & Certify certification where the subject
4
+ support baseline runs and Compare & Evaluate workflows where the subject
5
5
  checkpoint is produced outside of InvarLock.
6
6
  """
7
7