invarlock 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +3 -3
- invarlock/adapters/auto.py +2 -10
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +28 -5
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +1 -5
- invarlock/cli/app.py +57 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/report.py +116 -46
- invarlock/cli/commands/run.py +274 -66
- invarlock/cli/commands/verify.py +84 -89
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/provenance.py +3 -3
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +1 -1
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +2 -2
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +11 -7
- invarlock/eval/primary_metric.py +1 -1
- invarlock/guards/spectral.py +1 -1
- invarlock/model_profile.py +16 -35
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/html.py +7 -7
- invarlock/reporting/normalizer.py +2 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +126 -120
- invarlock/reporting/report.py +43 -37
- invarlock/reporting/{certificate.py → report_builder.py} +98 -95
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/RECORD +43 -43
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- invarlock-0.3.7.dist-info/METADATA +0 -602
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.7.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
invarlock/reporting/report.py
CHANGED
|
@@ -16,11 +16,9 @@ from typing import Any, cast
|
|
|
16
16
|
|
|
17
17
|
from invarlock.cli._evidence import maybe_dump_guard_evidence
|
|
18
18
|
|
|
19
|
-
from .certificate import (
|
|
20
|
-
make_certificate,
|
|
21
|
-
)
|
|
22
19
|
from .normalizer import normalize_run_report
|
|
23
|
-
from .render import
|
|
20
|
+
from .render import render_report_markdown
|
|
21
|
+
from .report_builder import make_report
|
|
24
22
|
from .report_types import RunReport, validate_report
|
|
25
23
|
|
|
26
24
|
|
|
@@ -160,17 +158,19 @@ def to_html(
|
|
|
160
158
|
return "\n".join(html_parts)
|
|
161
159
|
|
|
162
160
|
|
|
163
|
-
def
|
|
161
|
+
def to_evaluation_report(
|
|
162
|
+
report: RunReport, baseline: RunReport, format: str = "json"
|
|
163
|
+
) -> str:
|
|
164
164
|
"""
|
|
165
|
-
Convert RunReport to
|
|
165
|
+
Convert RunReport to evaluation report format.
|
|
166
166
|
|
|
167
167
|
Args:
|
|
168
|
-
report: Primary RunReport to
|
|
168
|
+
report: Primary RunReport to evaluate
|
|
169
169
|
baseline: Baseline RunReport for comparison
|
|
170
170
|
format: Output format ("json" or "markdown")
|
|
171
171
|
|
|
172
172
|
Returns:
|
|
173
|
-
Formatted
|
|
173
|
+
Formatted evaluation report string
|
|
174
174
|
"""
|
|
175
175
|
if not validate_report(report):
|
|
176
176
|
raise ValueError("Invalid primary RunReport structure")
|
|
@@ -178,15 +178,14 @@ def to_certificate(report: RunReport, baseline: RunReport, format: str = "json")
|
|
|
178
178
|
if not _validate_baseline_or_report(baseline):
|
|
179
179
|
raise ValueError("Invalid baseline RunReport structure")
|
|
180
180
|
|
|
181
|
-
|
|
182
|
-
certificate = make_certificate(report, baseline)
|
|
181
|
+
evaluation_report = make_report(report, baseline)
|
|
183
182
|
|
|
184
183
|
if format == "json":
|
|
185
|
-
return json.dumps(
|
|
184
|
+
return json.dumps(evaluation_report, indent=2, ensure_ascii=False)
|
|
186
185
|
elif format == "markdown":
|
|
187
|
-
return
|
|
186
|
+
return render_report_markdown(evaluation_report)
|
|
188
187
|
else:
|
|
189
|
-
raise ValueError(f"Unsupported
|
|
188
|
+
raise ValueError(f"Unsupported evaluation report format: {format}")
|
|
190
189
|
|
|
191
190
|
|
|
192
191
|
def save_report(
|
|
@@ -203,9 +202,9 @@ def save_report(
|
|
|
203
202
|
Args:
|
|
204
203
|
report: RunReport to save
|
|
205
204
|
output_dir: Directory to save reports in
|
|
206
|
-
formats: List of formats to generate ("json", "markdown", "html", "
|
|
205
|
+
formats: List of formats to generate ("json", "markdown", "html", "report")
|
|
207
206
|
compare: Optional comparison report
|
|
208
|
-
baseline: Optional baseline report for
|
|
207
|
+
baseline: Optional baseline report for evaluation report generation
|
|
209
208
|
filename_prefix: Prefix for generated filenames
|
|
210
209
|
|
|
211
210
|
Returns:
|
|
@@ -238,23 +237,24 @@ def save_report(
|
|
|
238
237
|
f.write(to_html(report, compare))
|
|
239
238
|
saved_files["html"] = html_path
|
|
240
239
|
|
|
241
|
-
if "
|
|
240
|
+
if "report" in formats:
|
|
242
241
|
if baseline is None:
|
|
243
|
-
raise ValueError(
|
|
242
|
+
raise ValueError(
|
|
243
|
+
"Baseline report required for evaluation report generation"
|
|
244
|
+
)
|
|
244
245
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
saved_files["cert"] = cert_json_path
|
|
246
|
+
report_json = to_evaluation_report(report, baseline, format="json")
|
|
247
|
+
report_json_path = output_path / "evaluation.report.json"
|
|
248
|
+
with open(report_json_path, "w", encoding="utf-8") as f:
|
|
249
|
+
f.write(report_json)
|
|
250
|
+
saved_files["report"] = report_json_path
|
|
251
251
|
|
|
252
252
|
# Also emit a markdown variant for human consumption
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
with open(
|
|
256
|
-
f.write(
|
|
257
|
-
saved_files["
|
|
253
|
+
report_md = to_evaluation_report(report, baseline, format="markdown")
|
|
254
|
+
report_md_path = output_path / "evaluation_report.md"
|
|
255
|
+
with open(report_md_path, "w", encoding="utf-8") as f:
|
|
256
|
+
f.write(report_md)
|
|
257
|
+
saved_files["report_md"] = report_md_path
|
|
258
258
|
|
|
259
259
|
# Emit a lightweight manifest to serve as an evidence bundle index
|
|
260
260
|
try:
|
|
@@ -263,8 +263,8 @@ def save_report(
|
|
|
263
263
|
manifest: dict[str, Any] = {
|
|
264
264
|
"generated_at": _dt.now().isoformat(),
|
|
265
265
|
"files": {
|
|
266
|
-
"
|
|
267
|
-
"
|
|
266
|
+
"evaluation_report_json": str(report_json_path),
|
|
267
|
+
"evaluation_report_markdown": str(report_md_path),
|
|
268
268
|
},
|
|
269
269
|
"summary": {
|
|
270
270
|
"run_model": (report.get("meta", {}) or {}).get("model_id"),
|
|
@@ -273,15 +273,15 @@ def save_report(
|
|
|
273
273
|
},
|
|
274
274
|
}
|
|
275
275
|
|
|
276
|
-
# Surface quick triage fields without opening the
|
|
276
|
+
# Surface quick triage fields without opening the evaluation report.
|
|
277
277
|
try:
|
|
278
278
|
from .render import compute_console_validation_block
|
|
279
279
|
|
|
280
|
-
|
|
281
|
-
if not isinstance(
|
|
282
|
-
raise TypeError("
|
|
280
|
+
evaluation_report_obj = json.loads(report_json)
|
|
281
|
+
if not isinstance(evaluation_report_obj, dict):
|
|
282
|
+
raise TypeError("evaluation report JSON did not decode to a dict")
|
|
283
283
|
|
|
284
|
-
block = compute_console_validation_block(
|
|
284
|
+
block = compute_console_validation_block(evaluation_report_obj)
|
|
285
285
|
rows = block.get("rows", []) or []
|
|
286
286
|
gates_total = len(rows)
|
|
287
287
|
gates_passed = sum(
|
|
@@ -290,7 +290,7 @@ def save_report(
|
|
|
290
290
|
overall_status = "PASS" if block.get("overall_pass") else "FAIL"
|
|
291
291
|
|
|
292
292
|
pm_ratio = None
|
|
293
|
-
pm =
|
|
293
|
+
pm = evaluation_report_obj.get("primary_metric", {}) or {}
|
|
294
294
|
if isinstance(pm, dict):
|
|
295
295
|
ratio = pm.get("ratio_vs_baseline")
|
|
296
296
|
if isinstance(ratio, int | float):
|
|
@@ -936,4 +936,10 @@ def _get_default_css() -> str:
|
|
|
936
936
|
|
|
937
937
|
|
|
938
938
|
# Export public API
|
|
939
|
-
__all__ = [
|
|
939
|
+
__all__ = [
|
|
940
|
+
"to_json",
|
|
941
|
+
"to_markdown",
|
|
942
|
+
"to_html",
|
|
943
|
+
"to_evaluation_report",
|
|
944
|
+
"save_report",
|
|
945
|
+
]
|