invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +4 -4
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +37 -50
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_loading.py +7 -7
- invarlock/adapters/hf_mixin.py +53 -9
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/assurance/__init__.py +15 -23
- invarlock/cli/adapter_auto.py +32 -26
- invarlock/cli/app.py +128 -27
- invarlock/cli/commands/__init__.py +2 -2
- invarlock/cli/commands/calibrate.py +48 -4
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/evaluate.py +986 -0
- invarlock/cli/commands/explain_gates.py +25 -17
- invarlock/cli/commands/export_html.py +11 -9
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +326 -92
- invarlock/cli/commands/run.py +1160 -228
- invarlock/cli/commands/verify.py +157 -97
- invarlock/cli/config.py +1 -1
- invarlock/cli/determinism.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +4 -4
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/retry.py +14 -14
- invarlock/core/runner.py +112 -26
- invarlock/edits/noop.py +2 -2
- invarlock/edits/quant_rtn.py +67 -39
- invarlock/eval/__init__.py +1 -1
- invarlock/eval/bench.py +14 -10
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/primary_metric.py +1 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/spectral.py +1 -1
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +64 -62
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/plugins/hf_bnb_adapter.py +32 -21
- invarlock/reporting/__init__.py +18 -4
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +61 -11
- invarlock/reporting/normalizer.py +9 -2
- invarlock/reporting/policy_utils.py +1 -1
- invarlock/reporting/primary_metric_utils.py +11 -11
- invarlock/reporting/render.py +876 -510
- invarlock/reporting/report.py +72 -30
- invarlock/reporting/{certificate.py → report_builder.py} +252 -99
- invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- invarlock-0.3.8.dist-info/METADATA +283 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- invarlock/cli/commands/certify.py +0 -422
- invarlock-0.3.6.dist-info/METADATA +0 -588
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
invarlock/reporting/report.py
CHANGED
|
@@ -16,11 +16,9 @@ from typing import Any, cast
|
|
|
16
16
|
|
|
17
17
|
from invarlock.cli._evidence import maybe_dump_guard_evidence
|
|
18
18
|
|
|
19
|
-
from .certificate import (
|
|
20
|
-
make_certificate,
|
|
21
|
-
)
|
|
22
19
|
from .normalizer import normalize_run_report
|
|
23
|
-
from .render import
|
|
20
|
+
from .render import render_report_markdown
|
|
21
|
+
from .report_builder import make_report
|
|
24
22
|
from .report_types import RunReport, validate_report
|
|
25
23
|
|
|
26
24
|
|
|
@@ -160,17 +158,19 @@ def to_html(
|
|
|
160
158
|
return "\n".join(html_parts)
|
|
161
159
|
|
|
162
160
|
|
|
163
|
-
def
|
|
161
|
+
def to_evaluation_report(
|
|
162
|
+
report: RunReport, baseline: RunReport, format: str = "json"
|
|
163
|
+
) -> str:
|
|
164
164
|
"""
|
|
165
|
-
Convert RunReport to
|
|
165
|
+
Convert RunReport to evaluation report format.
|
|
166
166
|
|
|
167
167
|
Args:
|
|
168
|
-
report: Primary RunReport to
|
|
168
|
+
report: Primary RunReport to evaluate
|
|
169
169
|
baseline: Baseline RunReport for comparison
|
|
170
170
|
format: Output format ("json" or "markdown")
|
|
171
171
|
|
|
172
172
|
Returns:
|
|
173
|
-
Formatted
|
|
173
|
+
Formatted evaluation report string
|
|
174
174
|
"""
|
|
175
175
|
if not validate_report(report):
|
|
176
176
|
raise ValueError("Invalid primary RunReport structure")
|
|
@@ -178,15 +178,14 @@ def to_certificate(report: RunReport, baseline: RunReport, format: str = "json")
|
|
|
178
178
|
if not _validate_baseline_or_report(baseline):
|
|
179
179
|
raise ValueError("Invalid baseline RunReport structure")
|
|
180
180
|
|
|
181
|
-
|
|
182
|
-
certificate = make_certificate(report, baseline)
|
|
181
|
+
evaluation_report = make_report(report, baseline)
|
|
183
182
|
|
|
184
183
|
if format == "json":
|
|
185
|
-
return json.dumps(
|
|
184
|
+
return json.dumps(evaluation_report, indent=2, ensure_ascii=False)
|
|
186
185
|
elif format == "markdown":
|
|
187
|
-
return
|
|
186
|
+
return render_report_markdown(evaluation_report)
|
|
188
187
|
else:
|
|
189
|
-
raise ValueError(f"Unsupported
|
|
188
|
+
raise ValueError(f"Unsupported evaluation report format: {format}")
|
|
190
189
|
|
|
191
190
|
|
|
192
191
|
def save_report(
|
|
@@ -203,9 +202,9 @@ def save_report(
|
|
|
203
202
|
Args:
|
|
204
203
|
report: RunReport to save
|
|
205
204
|
output_dir: Directory to save reports in
|
|
206
|
-
formats: List of formats to generate ("json", "markdown", "html", "
|
|
205
|
+
formats: List of formats to generate ("json", "markdown", "html", "report")
|
|
207
206
|
compare: Optional comparison report
|
|
208
|
-
baseline: Optional baseline report for
|
|
207
|
+
baseline: Optional baseline report for evaluation report generation
|
|
209
208
|
filename_prefix: Prefix for generated filenames
|
|
210
209
|
|
|
211
210
|
Returns:
|
|
@@ -238,31 +237,34 @@ def save_report(
|
|
|
238
237
|
f.write(to_html(report, compare))
|
|
239
238
|
saved_files["html"] = html_path
|
|
240
239
|
|
|
241
|
-
if "
|
|
240
|
+
if "report" in formats:
|
|
242
241
|
if baseline is None:
|
|
243
|
-
raise ValueError(
|
|
242
|
+
raise ValueError(
|
|
243
|
+
"Baseline report required for evaluation report generation"
|
|
244
|
+
)
|
|
244
245
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
with open(
|
|
248
|
-
f.write(
|
|
249
|
-
saved_files["
|
|
246
|
+
report_json = to_evaluation_report(report, baseline, format="json")
|
|
247
|
+
report_json_path = output_path / "evaluation.report.json"
|
|
248
|
+
with open(report_json_path, "w", encoding="utf-8") as f:
|
|
249
|
+
f.write(report_json)
|
|
250
|
+
saved_files["report"] = report_json_path
|
|
250
251
|
|
|
251
252
|
# Also emit a markdown variant for human consumption
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
253
|
+
report_md = to_evaluation_report(report, baseline, format="markdown")
|
|
254
|
+
report_md_path = output_path / "evaluation_report.md"
|
|
255
|
+
with open(report_md_path, "w", encoding="utf-8") as f:
|
|
256
|
+
f.write(report_md)
|
|
257
|
+
saved_files["report_md"] = report_md_path
|
|
256
258
|
|
|
257
259
|
# Emit a lightweight manifest to serve as an evidence bundle index
|
|
258
260
|
try:
|
|
259
261
|
from datetime import datetime as _dt
|
|
260
262
|
|
|
261
|
-
manifest = {
|
|
263
|
+
manifest: dict[str, Any] = {
|
|
262
264
|
"generated_at": _dt.now().isoformat(),
|
|
263
265
|
"files": {
|
|
264
|
-
"
|
|
265
|
-
"
|
|
266
|
+
"evaluation_report_json": str(report_json_path),
|
|
267
|
+
"evaluation_report_markdown": str(report_md_path),
|
|
266
268
|
},
|
|
267
269
|
"summary": {
|
|
268
270
|
"run_model": (report.get("meta", {}) or {}).get("model_id"),
|
|
@@ -270,6 +272,40 @@ def save_report(
|
|
|
270
272
|
"seed": (report.get("meta", {}) or {}).get("seed"),
|
|
271
273
|
},
|
|
272
274
|
}
|
|
275
|
+
|
|
276
|
+
# Surface quick triage fields without opening the evaluation report.
|
|
277
|
+
try:
|
|
278
|
+
from .render import compute_console_validation_block
|
|
279
|
+
|
|
280
|
+
evaluation_report_obj = json.loads(report_json)
|
|
281
|
+
if not isinstance(evaluation_report_obj, dict):
|
|
282
|
+
raise TypeError("evaluation report JSON did not decode to a dict")
|
|
283
|
+
|
|
284
|
+
block = compute_console_validation_block(evaluation_report_obj)
|
|
285
|
+
rows = block.get("rows", []) or []
|
|
286
|
+
gates_total = len(rows)
|
|
287
|
+
gates_passed = sum(
|
|
288
|
+
1 for r in rows if isinstance(r, dict) and bool(r.get("ok"))
|
|
289
|
+
)
|
|
290
|
+
overall_status = "PASS" if block.get("overall_pass") else "FAIL"
|
|
291
|
+
|
|
292
|
+
pm_ratio = None
|
|
293
|
+
pm = evaluation_report_obj.get("primary_metric", {}) or {}
|
|
294
|
+
if isinstance(pm, dict):
|
|
295
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
296
|
+
if isinstance(ratio, int | float):
|
|
297
|
+
pm_ratio = float(ratio)
|
|
298
|
+
|
|
299
|
+
manifest["summary"].update(
|
|
300
|
+
{
|
|
301
|
+
"overall_status": overall_status,
|
|
302
|
+
"primary_metric_ratio": pm_ratio,
|
|
303
|
+
"gates_passed": gates_passed,
|
|
304
|
+
"gates_total": gates_total,
|
|
305
|
+
}
|
|
306
|
+
)
|
|
307
|
+
except Exception:
|
|
308
|
+
pass
|
|
273
309
|
# Write debug evidence (tiny) when requested via env
|
|
274
310
|
guard_payload = {}
|
|
275
311
|
try:
|
|
@@ -900,4 +936,10 @@ def _get_default_css() -> str:
|
|
|
900
936
|
|
|
901
937
|
|
|
902
938
|
# Export public API
|
|
903
|
-
__all__ = [
|
|
939
|
+
__all__ = [
|
|
940
|
+
"to_json",
|
|
941
|
+
"to_markdown",
|
|
942
|
+
"to_html",
|
|
943
|
+
"to_evaluation_report",
|
|
944
|
+
"save_report",
|
|
945
|
+
]
|