invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -16,11 +16,9 @@ from typing import Any, cast
16
16
 
17
17
  from invarlock.cli._evidence import maybe_dump_guard_evidence
18
18
 
19
- from .certificate import (
20
- make_certificate,
21
- )
22
19
  from .normalizer import normalize_run_report
23
- from .render import render_certificate_markdown
20
+ from .render import render_report_markdown
21
+ from .report_builder import make_report
24
22
  from .report_types import RunReport, validate_report
25
23
 
26
24
 
@@ -160,17 +158,19 @@ def to_html(
160
158
  return "\n".join(html_parts)
161
159
 
162
160
 
163
- def to_certificate(report: RunReport, baseline: RunReport, format: str = "json") -> str:
161
+ def to_evaluation_report(
162
+ report: RunReport, baseline: RunReport, format: str = "json"
163
+ ) -> str:
164
164
  """
165
- Convert RunReport to certificate format.
165
+ Convert RunReport to evaluation report format.
166
166
 
167
167
  Args:
168
- report: Primary RunReport to certify
168
+ report: Primary RunReport to evaluate
169
169
  baseline: Baseline RunReport for comparison
170
170
  format: Output format ("json" or "markdown")
171
171
 
172
172
  Returns:
173
- Formatted certificate string
173
+ Formatted evaluation report string
174
174
  """
175
175
  if not validate_report(report):
176
176
  raise ValueError("Invalid primary RunReport structure")
@@ -178,15 +178,14 @@ def to_certificate(report: RunReport, baseline: RunReport, format: str = "json")
178
178
  if not _validate_baseline_or_report(baseline):
179
179
  raise ValueError("Invalid baseline RunReport structure")
180
180
 
181
- # Generate certificate
182
- certificate = make_certificate(report, baseline)
181
+ evaluation_report = make_report(report, baseline)
183
182
 
184
183
  if format == "json":
185
- return json.dumps(certificate, indent=2, ensure_ascii=False)
184
+ return json.dumps(evaluation_report, indent=2, ensure_ascii=False)
186
185
  elif format == "markdown":
187
- return render_certificate_markdown(certificate)
186
+ return render_report_markdown(evaluation_report)
188
187
  else:
189
- raise ValueError(f"Unsupported certificate format: {format}")
188
+ raise ValueError(f"Unsupported evaluation report format: {format}")
190
189
 
191
190
 
192
191
  def save_report(
@@ -203,9 +202,9 @@ def save_report(
203
202
  Args:
204
203
  report: RunReport to save
205
204
  output_dir: Directory to save reports in
206
- formats: List of formats to generate ("json", "markdown", "html", "cert")
205
+ formats: List of formats to generate ("json", "markdown", "html", "report")
207
206
  compare: Optional comparison report
208
- baseline: Optional baseline report for certificate generation
207
+ baseline: Optional baseline report for evaluation report generation
209
208
  filename_prefix: Prefix for generated filenames
210
209
 
211
210
  Returns:
@@ -238,31 +237,34 @@ def save_report(
238
237
  f.write(to_html(report, compare))
239
238
  saved_files["html"] = html_path
240
239
 
241
- if "cert" in formats:
240
+ if "report" in formats:
242
241
  if baseline is None:
243
- raise ValueError("Baseline report required for certificate generation")
242
+ raise ValueError(
243
+ "Baseline report required for evaluation report generation"
244
+ )
244
245
 
245
- # Generate certificate JSON in canonical path/name
246
- cert_json_path = output_path / "evaluation.cert.json"
247
- with open(cert_json_path, "w", encoding="utf-8") as f:
248
- f.write(to_certificate(report, baseline, format="json"))
249
- saved_files["cert"] = cert_json_path
246
+ report_json = to_evaluation_report(report, baseline, format="json")
247
+ report_json_path = output_path / "evaluation.report.json"
248
+ with open(report_json_path, "w", encoding="utf-8") as f:
249
+ f.write(report_json)
250
+ saved_files["report"] = report_json_path
250
251
 
251
252
  # Also emit a markdown variant for human consumption
252
- cert_md_path = output_path / f"{filename_prefix}_certificate.md"
253
- with open(cert_md_path, "w", encoding="utf-8") as f:
254
- f.write(to_certificate(report, baseline, format="markdown"))
255
- saved_files["cert_md"] = cert_md_path
253
+ report_md = to_evaluation_report(report, baseline, format="markdown")
254
+ report_md_path = output_path / "evaluation_report.md"
255
+ with open(report_md_path, "w", encoding="utf-8") as f:
256
+ f.write(report_md)
257
+ saved_files["report_md"] = report_md_path
256
258
 
257
259
  # Emit a lightweight manifest to serve as an evidence bundle index
258
260
  try:
259
261
  from datetime import datetime as _dt
260
262
 
261
- manifest = {
263
+ manifest: dict[str, Any] = {
262
264
  "generated_at": _dt.now().isoformat(),
263
265
  "files": {
264
- "certificate_json": str(cert_json_path),
265
- "certificate_markdown": str(cert_md_path),
266
+ "evaluation_report_json": str(report_json_path),
267
+ "evaluation_report_markdown": str(report_md_path),
266
268
  },
267
269
  "summary": {
268
270
  "run_model": (report.get("meta", {}) or {}).get("model_id"),
@@ -270,6 +272,40 @@ def save_report(
270
272
  "seed": (report.get("meta", {}) or {}).get("seed"),
271
273
  },
272
274
  }
275
+
276
+ # Surface quick triage fields without opening the evaluation report.
277
+ try:
278
+ from .render import compute_console_validation_block
279
+
280
+ evaluation_report_obj = json.loads(report_json)
281
+ if not isinstance(evaluation_report_obj, dict):
282
+ raise TypeError("evaluation report JSON did not decode to a dict")
283
+
284
+ block = compute_console_validation_block(evaluation_report_obj)
285
+ rows = block.get("rows", []) or []
286
+ gates_total = len(rows)
287
+ gates_passed = sum(
288
+ 1 for r in rows if isinstance(r, dict) and bool(r.get("ok"))
289
+ )
290
+ overall_status = "PASS" if block.get("overall_pass") else "FAIL"
291
+
292
+ pm_ratio = None
293
+ pm = evaluation_report_obj.get("primary_metric", {}) or {}
294
+ if isinstance(pm, dict):
295
+ ratio = pm.get("ratio_vs_baseline")
296
+ if isinstance(ratio, int | float):
297
+ pm_ratio = float(ratio)
298
+
299
+ manifest["summary"].update(
300
+ {
301
+ "overall_status": overall_status,
302
+ "primary_metric_ratio": pm_ratio,
303
+ "gates_passed": gates_passed,
304
+ "gates_total": gates_total,
305
+ }
306
+ )
307
+ except Exception:
308
+ pass
273
309
  # Write debug evidence (tiny) when requested via env
274
310
  guard_payload = {}
275
311
  try:
@@ -900,4 +936,10 @@ def _get_default_css() -> str:
900
936
 
901
937
 
902
938
  # Export public API
903
- __all__ = ["to_json", "to_markdown", "to_html", "to_certificate", "save_report"]
939
+ __all__ = [
940
+ "to_json",
941
+ "to_markdown",
942
+ "to_html",
943
+ "to_evaluation_report",
944
+ "save_report",
945
+ ]