invarlock 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +3 -3
- invarlock/adapters/auto.py +2 -10
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +28 -5
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +1 -5
- invarlock/cli/app.py +57 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/report.py +116 -46
- invarlock/cli/commands/run.py +274 -66
- invarlock/cli/commands/verify.py +84 -89
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/provenance.py +3 -3
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +1 -1
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +2 -2
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +11 -7
- invarlock/eval/primary_metric.py +1 -1
- invarlock/guards/spectral.py +1 -1
- invarlock/model_profile.py +16 -35
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/html.py +7 -7
- invarlock/reporting/normalizer.py +2 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +126 -120
- invarlock/reporting/report.py +43 -37
- invarlock/reporting/{certificate.py → report_builder.py} +98 -95
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/RECORD +43 -43
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- invarlock-0.3.7.dist-info/METADATA +0 -602
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
invarlock/cli/commands/run.py
CHANGED
|
@@ -4,16 +4,18 @@ InvarLock CLI Run Command
|
|
|
4
4
|
|
|
5
5
|
Run a guarded pipeline from a YAML config. Intended for local smokes,
|
|
6
6
|
plugin demos, and development. Advanced: for pairwise certification,
|
|
7
|
-
prefer Compare &
|
|
7
|
+
prefer Compare & Evaluate via `invarlock evaluate --baseline ... --subject ...`.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import copy
|
|
11
11
|
import hashlib
|
|
12
12
|
import inspect
|
|
13
13
|
import json
|
|
14
|
+
import logging
|
|
14
15
|
import math
|
|
15
16
|
import os
|
|
16
17
|
import random
|
|
18
|
+
import re
|
|
17
19
|
import shutil
|
|
18
20
|
import sys as _sys
|
|
19
21
|
import types as _types
|
|
@@ -125,10 +127,7 @@ RELEASE_CALIBRATION_MAX = 24
|
|
|
125
127
|
GUARD_OVERHEAD_THRESHOLD = 0.01
|
|
126
128
|
KV_LABEL_WIDTH = 10
|
|
127
129
|
|
|
128
|
-
_NOISY_WARNING_PATTERNS = (
|
|
129
|
-
r".*`torch_dtype` is deprecated.*",
|
|
130
|
-
r".*loss_type=None.*unrecognized.*",
|
|
131
|
-
)
|
|
130
|
+
_NOISY_WARNING_PATTERNS = (r".*loss_type=None.*unrecognized.*",)
|
|
132
131
|
|
|
133
132
|
|
|
134
133
|
def _resolve_warning_suppression(profile: str | None) -> tuple[bool, bool]:
|
|
@@ -139,7 +138,7 @@ def _resolve_warning_suppression(profile: str | None) -> tuple[bool, bool]:
|
|
|
139
138
|
"on",
|
|
140
139
|
}
|
|
141
140
|
profile_norm = (profile or "").strip().lower()
|
|
142
|
-
enabled = bool(suppress_all) or profile_norm in {"ci", "ci_cpu", "release"
|
|
141
|
+
enabled = bool(suppress_all) or profile_norm in {"ci", "ci_cpu", "release"}
|
|
143
142
|
return enabled, suppress_all
|
|
144
143
|
|
|
145
144
|
|
|
@@ -156,14 +155,176 @@ def _apply_warning_filters(profile: str | None) -> bool:
|
|
|
156
155
|
|
|
157
156
|
|
|
158
157
|
@contextmanager
|
|
159
|
-
def _suppress_noisy_warnings(
|
|
160
|
-
|
|
158
|
+
def _suppress_noisy_warnings(
|
|
159
|
+
profile: str | None,
|
|
160
|
+
*,
|
|
161
|
+
event_path: Path | None = None,
|
|
162
|
+
context: dict[str, Any] | None = None,
|
|
163
|
+
) -> Iterator[None]:
|
|
164
|
+
enabled, suppress_all = _resolve_warning_suppression(profile)
|
|
161
165
|
if not enabled:
|
|
162
166
|
yield
|
|
163
167
|
return
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
168
|
+
|
|
169
|
+
prev_tf_verbosity = os.environ.get("TRANSFORMERS_VERBOSITY")
|
|
170
|
+
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
|
|
171
|
+
transformers_logger = logging.getLogger("transformers")
|
|
172
|
+
prev_tf_level = transformers_logger.level
|
|
173
|
+
transformers_logger.setLevel(logging.ERROR)
|
|
174
|
+
|
|
175
|
+
patterns = [re.compile(p) for p in _NOISY_WARNING_PATTERNS]
|
|
176
|
+
suppressed: list[str] = []
|
|
177
|
+
|
|
178
|
+
class _NoisyLogFilter(logging.Filter):
|
|
179
|
+
def filter(self, record: logging.LogRecord) -> bool: # noqa: A003
|
|
180
|
+
try:
|
|
181
|
+
message = record.getMessage()
|
|
182
|
+
except Exception:
|
|
183
|
+
return True
|
|
184
|
+
if any(p.search(message) for p in patterns):
|
|
185
|
+
suppressed.append(message)
|
|
186
|
+
return False
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
def _iter_handlers() -> list[logging.Handler]:
|
|
190
|
+
handlers: list[logging.Handler] = []
|
|
191
|
+
seen: set[int] = set()
|
|
192
|
+
for logger in (
|
|
193
|
+
logging.getLogger(),
|
|
194
|
+
logging.getLogger("transformers"),
|
|
195
|
+
logging.getLogger("huggingface_hub"),
|
|
196
|
+
logging.getLogger("datasets"),
|
|
197
|
+
):
|
|
198
|
+
for handler in getattr(logger, "handlers", []) or []:
|
|
199
|
+
if id(handler) in seen:
|
|
200
|
+
continue
|
|
201
|
+
seen.add(id(handler))
|
|
202
|
+
handlers.append(handler)
|
|
203
|
+
return handlers
|
|
204
|
+
|
|
205
|
+
log_filter = _NoisyLogFilter()
|
|
206
|
+
handlers = _iter_handlers()
|
|
207
|
+
|
|
208
|
+
def _append_suppressed_warnings() -> None:
|
|
209
|
+
if not suppressed or event_path is None:
|
|
210
|
+
return
|
|
211
|
+
try:
|
|
212
|
+
path = Path(event_path)
|
|
213
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
214
|
+
payload = {
|
|
215
|
+
"timestamp": datetime.now().isoformat(),
|
|
216
|
+
"component": "warnings",
|
|
217
|
+
"operation": "suppressed",
|
|
218
|
+
"level": "WARNING",
|
|
219
|
+
"data": {
|
|
220
|
+
"count": len(suppressed),
|
|
221
|
+
"messages": suppressed[:50],
|
|
222
|
+
"profile": profile or "",
|
|
223
|
+
**(context or {}),
|
|
224
|
+
},
|
|
225
|
+
}
|
|
226
|
+
with path.open("a", encoding="utf-8") as fh:
|
|
227
|
+
fh.write(json.dumps(payload) + "\n")
|
|
228
|
+
except Exception:
|
|
229
|
+
# Best-effort: suppressed warnings are non-fatal and logging must not
|
|
230
|
+
# impact model loading.
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
for handler in handlers:
|
|
234
|
+
handler.addFilter(log_filter)
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
with warnings.catch_warnings():
|
|
238
|
+
from contextlib import redirect_stderr, redirect_stdout
|
|
239
|
+
|
|
240
|
+
class _FilteredStream:
|
|
241
|
+
def __init__(self, raw: Any) -> None:
|
|
242
|
+
self._raw = raw
|
|
243
|
+
|
|
244
|
+
def __getattr__(self, name: str) -> object:
|
|
245
|
+
return getattr(self._raw, name)
|
|
246
|
+
|
|
247
|
+
def write(self, s: object) -> int:
|
|
248
|
+
try:
|
|
249
|
+
if isinstance(s, bytes):
|
|
250
|
+
text = s.decode("utf-8", errors="replace")
|
|
251
|
+
else:
|
|
252
|
+
text = str(s)
|
|
253
|
+
except Exception:
|
|
254
|
+
return int(self._raw.write(s))
|
|
255
|
+
|
|
256
|
+
# Preserve progress bars (carriage returns) by passing through
|
|
257
|
+
# all non-matching chunks immediately.
|
|
258
|
+
pieces = text.splitlines(keepends=True)
|
|
259
|
+
for piece in pieces:
|
|
260
|
+
if any(p.search(piece) for p in patterns):
|
|
261
|
+
suppressed.append(piece.rstrip("\n"))
|
|
262
|
+
continue
|
|
263
|
+
self._raw.write(piece)
|
|
264
|
+
return len(text)
|
|
265
|
+
|
|
266
|
+
def flush(self) -> None:
|
|
267
|
+
try:
|
|
268
|
+
self._raw.flush()
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
|
|
272
|
+
stdout_proxy = _FilteredStream(_sys.stdout)
|
|
273
|
+
stderr_proxy = _FilteredStream(_sys.stderr)
|
|
274
|
+
|
|
275
|
+
with redirect_stdout(stdout_proxy), redirect_stderr(stderr_proxy):
|
|
276
|
+
if suppress_all:
|
|
277
|
+
warnings.simplefilter("ignore")
|
|
278
|
+
yield
|
|
279
|
+
else:
|
|
280
|
+
original_showwarning = warnings.showwarning
|
|
281
|
+
|
|
282
|
+
def _showwarning(
|
|
283
|
+
message: Warning | str,
|
|
284
|
+
category: type[Warning],
|
|
285
|
+
filename: str,
|
|
286
|
+
lineno: int,
|
|
287
|
+
file: object | None = None,
|
|
288
|
+
line: str | None = None,
|
|
289
|
+
) -> None:
|
|
290
|
+
try:
|
|
291
|
+
rendered = warnings.formatwarning(
|
|
292
|
+
message, category, filename, lineno, line
|
|
293
|
+
)
|
|
294
|
+
except Exception:
|
|
295
|
+
rendered = str(message)
|
|
296
|
+
if any(p.search(rendered) for p in patterns):
|
|
297
|
+
suppressed.append(str(message))
|
|
298
|
+
return
|
|
299
|
+
original_showwarning(
|
|
300
|
+
message,
|
|
301
|
+
category,
|
|
302
|
+
filename,
|
|
303
|
+
lineno,
|
|
304
|
+
file=file,
|
|
305
|
+
line=line,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
warnings.showwarning = _showwarning # type: ignore[assignment]
|
|
309
|
+
try:
|
|
310
|
+
yield
|
|
311
|
+
finally:
|
|
312
|
+
warnings.showwarning = original_showwarning # type: ignore[assignment]
|
|
313
|
+
finally:
|
|
314
|
+
for handler in handlers:
|
|
315
|
+
try:
|
|
316
|
+
handler.removeFilter(log_filter)
|
|
317
|
+
except Exception:
|
|
318
|
+
pass
|
|
319
|
+
try:
|
|
320
|
+
transformers_logger.setLevel(prev_tf_level)
|
|
321
|
+
except Exception:
|
|
322
|
+
pass
|
|
323
|
+
if prev_tf_verbosity is None:
|
|
324
|
+
os.environ.pop("TRANSFORMERS_VERBOSITY", None)
|
|
325
|
+
else:
|
|
326
|
+
os.environ["TRANSFORMERS_VERBOSITY"] = prev_tf_verbosity
|
|
327
|
+
_append_suppressed_warnings()
|
|
167
328
|
|
|
168
329
|
|
|
169
330
|
def _format_kv_line(label: str, value: str, *, width: int = KV_LABEL_WIDTH) -> str:
|
|
@@ -361,7 +522,7 @@ def _resolve_pm_drift_band(
|
|
|
361
522
|
"""Resolve preview→final drift band from config/env with safe defaults.
|
|
362
523
|
|
|
363
524
|
The drift band governs the Preview Final Drift Acceptable gate. By default,
|
|
364
|
-
|
|
525
|
+
evaluation reports enforce 0.95–1.05 unless an explicit band is provided.
|
|
365
526
|
"""
|
|
366
527
|
|
|
367
528
|
base_min = 0.95
|
|
@@ -1185,13 +1346,24 @@ def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
|
|
|
1185
1346
|
for key, value in model.items()
|
|
1186
1347
|
if key not in {"id", "adapter", "device"} and value is not None
|
|
1187
1348
|
}
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1349
|
+
removed_keys: list[str] = []
|
|
1350
|
+
for key in ("torch_dtype", "load_in_8bit", "load_in_4bit"):
|
|
1351
|
+
if key in extra:
|
|
1352
|
+
removed_keys.append(key)
|
|
1353
|
+
if removed_keys:
|
|
1354
|
+
raise InvarlockError(
|
|
1355
|
+
code="E007",
|
|
1356
|
+
message=(
|
|
1357
|
+
"CONFIG-KEY-REMOVED: "
|
|
1358
|
+
+ ", ".join(removed_keys)
|
|
1359
|
+
+ ". Use model.dtype and/or model.quantization_config."
|
|
1360
|
+
),
|
|
1361
|
+
details={"removed_keys": removed_keys},
|
|
1362
|
+
)
|
|
1191
1363
|
|
|
1192
|
-
# Normalize
|
|
1193
|
-
if "
|
|
1194
|
-
dtype_str = str(extra.get("
|
|
1364
|
+
# Normalize dtype when present (keep as string for JSON-ability).
|
|
1365
|
+
if "dtype" in extra and isinstance(extra.get("dtype"), str):
|
|
1366
|
+
dtype_str = str(extra.get("dtype") or "").strip().lower()
|
|
1195
1367
|
aliases = {
|
|
1196
1368
|
"fp16": "float16",
|
|
1197
1369
|
"half": "float16",
|
|
@@ -1199,9 +1371,9 @@ def _extract_model_load_kwargs(cfg: InvarLockConfig) -> dict[str, Any]:
|
|
|
1199
1371
|
"fp32": "float32",
|
|
1200
1372
|
}
|
|
1201
1373
|
if dtype_str in aliases:
|
|
1202
|
-
extra["
|
|
1374
|
+
extra["dtype"] = aliases[dtype_str]
|
|
1203
1375
|
elif dtype_str:
|
|
1204
|
-
extra["
|
|
1376
|
+
extra["dtype"] = dtype_str
|
|
1205
1377
|
|
|
1206
1378
|
return extra
|
|
1207
1379
|
|
|
@@ -1212,6 +1384,8 @@ def _load_model_with_cfg(
|
|
|
1212
1384
|
device: str,
|
|
1213
1385
|
*,
|
|
1214
1386
|
profile: str | None = None,
|
|
1387
|
+
event_path: Path | None = None,
|
|
1388
|
+
warning_context: dict[str, Any] | None = None,
|
|
1215
1389
|
) -> Any:
|
|
1216
1390
|
"""Load a model with config-provided kwargs, filtering for strict adapters."""
|
|
1217
1391
|
try:
|
|
@@ -1225,7 +1399,11 @@ def _load_model_with_cfg(
|
|
|
1225
1399
|
raise ValueError("Missing model.id in config")
|
|
1226
1400
|
|
|
1227
1401
|
extra = _extract_model_load_kwargs(cfg)
|
|
1228
|
-
with _suppress_noisy_warnings(
|
|
1402
|
+
with _suppress_noisy_warnings(
|
|
1403
|
+
profile,
|
|
1404
|
+
event_path=event_path,
|
|
1405
|
+
context=warning_context,
|
|
1406
|
+
):
|
|
1229
1407
|
try:
|
|
1230
1408
|
sig = inspect.signature(adapter.load_model)
|
|
1231
1409
|
accepts_var_kw = any(
|
|
@@ -1307,18 +1485,23 @@ def _run_bare_control(
|
|
|
1307
1485
|
if snapshot_provenance is not None:
|
|
1308
1486
|
snapshot_provenance["reload_path_used"] = True
|
|
1309
1487
|
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1488
|
+
with _suppress_noisy_warnings(
|
|
1489
|
+
profile_normalized,
|
|
1490
|
+
event_path=getattr(run_config, "event_path", None),
|
|
1491
|
+
context={"phase": "guard_overhead_bare"},
|
|
1492
|
+
):
|
|
1493
|
+
bare_report = bare_runner.execute(
|
|
1494
|
+
model=bare_target_model,
|
|
1495
|
+
adapter=adapter,
|
|
1496
|
+
edit=edit_op,
|
|
1497
|
+
guards=[],
|
|
1498
|
+
config=bare_config,
|
|
1499
|
+
calibration_data=calibration_data,
|
|
1500
|
+
auto_config=auto_config,
|
|
1501
|
+
edit_config=runtime_edit_config,
|
|
1502
|
+
preview_n=preview_count,
|
|
1503
|
+
final_n=final_count,
|
|
1504
|
+
)
|
|
1322
1505
|
finally:
|
|
1323
1506
|
if private_model_loaded:
|
|
1324
1507
|
_free_model_memory(bare_target_model)
|
|
@@ -1417,8 +1600,21 @@ def _execute_guarded_run(
|
|
|
1417
1600
|
emoji="🔧",
|
|
1418
1601
|
profile=profile_normalized,
|
|
1419
1602
|
)
|
|
1603
|
+
warning_context: dict[str, Any] = {"phase": "load_model"}
|
|
1604
|
+
try:
|
|
1605
|
+
if hasattr(run_config, "context") and isinstance(run_config.context, dict):
|
|
1606
|
+
rid = run_config.context.get("run_id")
|
|
1607
|
+
if isinstance(rid, str) and rid:
|
|
1608
|
+
warning_context["run_id"] = rid
|
|
1609
|
+
except Exception:
|
|
1610
|
+
pass
|
|
1420
1611
|
model = _load_model_with_cfg(
|
|
1421
|
-
adapter,
|
|
1612
|
+
adapter,
|
|
1613
|
+
cfg,
|
|
1614
|
+
resolved_device,
|
|
1615
|
+
profile=profile_normalized,
|
|
1616
|
+
event_path=getattr(run_config, "event_path", None),
|
|
1617
|
+
warning_context=warning_context,
|
|
1422
1618
|
)
|
|
1423
1619
|
if snapshot_provenance is not None:
|
|
1424
1620
|
snapshot_provenance["reload_path_used"] = True
|
|
@@ -1430,18 +1626,23 @@ def _execute_guarded_run(
|
|
|
1430
1626
|
)
|
|
1431
1627
|
runtime_edit_config.setdefault("emit", True)
|
|
1432
1628
|
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1629
|
+
with _suppress_noisy_warnings(
|
|
1630
|
+
profile_normalized,
|
|
1631
|
+
event_path=getattr(run_config, "event_path", None),
|
|
1632
|
+
context={"phase": "core_runner_execute"},
|
|
1633
|
+
):
|
|
1634
|
+
core_report = runner.execute(
|
|
1635
|
+
model=model,
|
|
1636
|
+
adapter=adapter,
|
|
1637
|
+
edit=edit_op,
|
|
1638
|
+
guards=guards,
|
|
1639
|
+
config=run_config,
|
|
1640
|
+
calibration_data=calibration_data,
|
|
1641
|
+
auto_config=auto_config,
|
|
1642
|
+
edit_config=runtime_edit_config,
|
|
1643
|
+
preview_n=preview_count,
|
|
1644
|
+
final_n=final_count,
|
|
1645
|
+
)
|
|
1445
1646
|
return core_report, model
|
|
1446
1647
|
|
|
1447
1648
|
|
|
@@ -2226,7 +2427,9 @@ def run_command(
|
|
|
2226
2427
|
None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
|
|
2227
2428
|
),
|
|
2228
2429
|
until_pass: bool = typer.Option(
|
|
2229
|
-
False,
|
|
2430
|
+
False,
|
|
2431
|
+
"--until-pass",
|
|
2432
|
+
help="Retry until evaluation report passes gates (max 3 attempts)",
|
|
2230
2433
|
),
|
|
2231
2434
|
max_attempts: int = typer.Option(
|
|
2232
2435
|
3, "--max-attempts", help="Maximum retry attempts for --until-pass mode"
|
|
@@ -2237,7 +2440,7 @@ def run_command(
|
|
|
2237
2440
|
baseline: str | None = typer.Option(
|
|
2238
2441
|
None,
|
|
2239
2442
|
"--baseline",
|
|
2240
|
-
help="Path to baseline report.json for
|
|
2443
|
+
help="Path to baseline report.json for evaluation report validation",
|
|
2241
2444
|
),
|
|
2242
2445
|
no_cleanup: bool = typer.Option(
|
|
2243
2446
|
False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
|
|
@@ -2262,7 +2465,7 @@ def run_command(
|
|
|
2262
2465
|
The command assembles non-overlapping preview/final windows, executes the
|
|
2263
2466
|
GuardChain (invariants → spectral → RMT → variance), checks pairing/overlap
|
|
2264
2467
|
invariants, enforces guard-overhead ≤1 %, and emits a run report plus JSONL
|
|
2265
|
-
events suitable for
|
|
2468
|
+
events suitable for evaluation report generation.
|
|
2266
2469
|
"""
|
|
2267
2470
|
|
|
2268
2471
|
try:
|
|
@@ -3655,7 +3858,12 @@ def run_command(
|
|
|
3655
3858
|
emoji="🔧",
|
|
3656
3859
|
):
|
|
3657
3860
|
model = _load_model_with_cfg(
|
|
3658
|
-
adapter,
|
|
3861
|
+
adapter,
|
|
3862
|
+
cfg,
|
|
3863
|
+
resolved_device,
|
|
3864
|
+
profile=profile_normalized,
|
|
3865
|
+
event_path=run_dir / "events.jsonl",
|
|
3866
|
+
warning_context={"phase": "load_model", "run_id": run_id},
|
|
3659
3867
|
)
|
|
3660
3868
|
|
|
3661
3869
|
# No edit-specific bootstrap logic
|
|
@@ -4024,7 +4232,7 @@ def run_command(
|
|
|
4024
4232
|
# Convert CoreRunner report to evaluation report
|
|
4025
4233
|
report = create_empty_report()
|
|
4026
4234
|
|
|
4027
|
-
# Persist minimal run context for
|
|
4235
|
+
# Persist minimal run context for evaluation report provenance.
|
|
4028
4236
|
try:
|
|
4029
4237
|
report["context"] = {
|
|
4030
4238
|
"profile": profile_normalized,
|
|
@@ -5121,11 +5329,11 @@ def run_command(
|
|
|
5121
5329
|
f"(>{threshold_fraction * 100:.1f}% increase)"
|
|
5122
5330
|
)
|
|
5123
5331
|
|
|
5124
|
-
# Drift gate status is no longer surfaced in console; rely on
|
|
5332
|
+
# Drift gate status is no longer surfaced in console; rely on evaluation report gates
|
|
5125
5333
|
|
|
5126
|
-
#
|
|
5334
|
+
# Evaluation report validation for --until-pass mode
|
|
5127
5335
|
if retry_controller and baseline:
|
|
5128
|
-
from invarlock.reporting.
|
|
5336
|
+
from invarlock.reporting.report_builder import make_report
|
|
5129
5337
|
|
|
5130
5338
|
try:
|
|
5131
5339
|
baseline_report = baseline_report_data
|
|
@@ -5140,18 +5348,18 @@ def run_command(
|
|
|
5140
5348
|
_event(
|
|
5141
5349
|
console,
|
|
5142
5350
|
"EXEC",
|
|
5143
|
-
"Generating evaluation
|
|
5351
|
+
"Generating evaluation report...",
|
|
5144
5352
|
emoji="📜",
|
|
5145
5353
|
profile=profile_normalized,
|
|
5146
5354
|
)
|
|
5147
|
-
|
|
5355
|
+
evaluation_report = make_report(report, baseline_report)
|
|
5148
5356
|
|
|
5149
|
-
validation =
|
|
5150
|
-
|
|
5357
|
+
validation = evaluation_report.get("validation", {})
|
|
5358
|
+
report_passed = all(validation.values())
|
|
5151
5359
|
|
|
5152
5360
|
failed_gates = [k for k, v in validation.items() if not v]
|
|
5153
5361
|
result_summary = {
|
|
5154
|
-
"passed":
|
|
5362
|
+
"passed": report_passed,
|
|
5155
5363
|
"failures": failed_gates,
|
|
5156
5364
|
"validation": validation,
|
|
5157
5365
|
}
|
|
@@ -5159,11 +5367,11 @@ def run_command(
|
|
|
5159
5367
|
attempt, result_summary, edit_config
|
|
5160
5368
|
)
|
|
5161
5369
|
|
|
5162
|
-
if
|
|
5370
|
+
if report_passed:
|
|
5163
5371
|
_event(
|
|
5164
5372
|
console,
|
|
5165
5373
|
"PASS",
|
|
5166
|
-
"
|
|
5374
|
+
"Evaluation report PASSED all gates!",
|
|
5167
5375
|
emoji="✅",
|
|
5168
5376
|
profile=profile_normalized,
|
|
5169
5377
|
)
|
|
@@ -5172,7 +5380,7 @@ def run_command(
|
|
|
5172
5380
|
_event(
|
|
5173
5381
|
console,
|
|
5174
5382
|
"FAIL",
|
|
5175
|
-
f"
|
|
5383
|
+
f"Evaluation report FAILED gates: {', '.join(failed_gates)}",
|
|
5176
5384
|
emoji="⚠️",
|
|
5177
5385
|
profile=profile_normalized,
|
|
5178
5386
|
)
|
|
@@ -5229,7 +5437,7 @@ def run_command(
|
|
|
5229
5437
|
except Exception:
|
|
5230
5438
|
pass
|
|
5231
5439
|
|
|
5232
|
-
if retry_controller.should_retry(
|
|
5440
|
+
if retry_controller.should_retry(report_passed):
|
|
5233
5441
|
attempt += 1
|
|
5234
5442
|
continue
|
|
5235
5443
|
else:
|
|
@@ -5242,11 +5450,11 @@ def run_command(
|
|
|
5242
5450
|
)
|
|
5243
5451
|
break
|
|
5244
5452
|
|
|
5245
|
-
except Exception as
|
|
5453
|
+
except Exception as report_error:
|
|
5246
5454
|
_event(
|
|
5247
5455
|
console,
|
|
5248
5456
|
"WARN",
|
|
5249
|
-
f"
|
|
5457
|
+
f"Evaluation report validation failed: {report_error}",
|
|
5250
5458
|
emoji="⚠️",
|
|
5251
5459
|
profile=profile_normalized,
|
|
5252
5460
|
)
|
|
@@ -5255,7 +5463,7 @@ def run_command(
|
|
|
5255
5463
|
attempt,
|
|
5256
5464
|
{
|
|
5257
5465
|
"passed": False,
|
|
5258
|
-
"failures": ["
|
|
5466
|
+
"failures": ["report_error"],
|
|
5259
5467
|
"validation": {},
|
|
5260
5468
|
},
|
|
5261
5469
|
edit_config,
|