invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. invarlock/__init__.py +4 -4
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +37 -50
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_loading.py +7 -7
  8. invarlock/adapters/hf_mixin.py +53 -9
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/assurance/__init__.py +15 -23
  12. invarlock/cli/adapter_auto.py +32 -26
  13. invarlock/cli/app.py +128 -27
  14. invarlock/cli/commands/__init__.py +2 -2
  15. invarlock/cli/commands/calibrate.py +48 -4
  16. invarlock/cli/commands/doctor.py +8 -10
  17. invarlock/cli/commands/evaluate.py +986 -0
  18. invarlock/cli/commands/explain_gates.py +25 -17
  19. invarlock/cli/commands/export_html.py +11 -9
  20. invarlock/cli/commands/plugins.py +13 -9
  21. invarlock/cli/commands/report.py +326 -92
  22. invarlock/cli/commands/run.py +1160 -228
  23. invarlock/cli/commands/verify.py +157 -97
  24. invarlock/cli/config.py +1 -1
  25. invarlock/cli/determinism.py +1 -1
  26. invarlock/cli/doctor_helpers.py +4 -5
  27. invarlock/cli/output.py +193 -0
  28. invarlock/cli/provenance.py +4 -4
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/registry.py +9 -11
  31. invarlock/core/retry.py +14 -14
  32. invarlock/core/runner.py +112 -26
  33. invarlock/edits/noop.py +2 -2
  34. invarlock/edits/quant_rtn.py +67 -39
  35. invarlock/eval/__init__.py +1 -1
  36. invarlock/eval/bench.py +14 -10
  37. invarlock/eval/data.py +68 -23
  38. invarlock/eval/metrics.py +59 -1
  39. invarlock/eval/primary_metric.py +1 -1
  40. invarlock/eval/tasks/__init__.py +12 -0
  41. invarlock/eval/tasks/classification.py +48 -0
  42. invarlock/eval/tasks/qa.py +36 -0
  43. invarlock/eval/tasks/text_generation.py +102 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/rmt.py +2 -2
  46. invarlock/guards/spectral.py +1 -1
  47. invarlock/guards/variance.py +2 -2
  48. invarlock/model_profile.py +64 -62
  49. invarlock/observability/health.py +6 -6
  50. invarlock/observability/metrics.py +108 -0
  51. invarlock/plugins/hf_bnb_adapter.py +32 -21
  52. invarlock/reporting/__init__.py +18 -4
  53. invarlock/reporting/guards_analysis.py +154 -4
  54. invarlock/reporting/html.py +61 -11
  55. invarlock/reporting/normalizer.py +9 -2
  56. invarlock/reporting/policy_utils.py +1 -1
  57. invarlock/reporting/primary_metric_utils.py +11 -11
  58. invarlock/reporting/render.py +876 -510
  59. invarlock/reporting/report.py +72 -30
  60. invarlock/reporting/{certificate.py → report_builder.py} +252 -99
  61. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  62. invarlock/reporting/report_types.py +6 -1
  63. invarlock/reporting/telemetry.py +86 -0
  64. invarlock-0.3.8.dist-info/METADATA +283 -0
  65. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
  66. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
  67. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
  68. invarlock/adapters/hf_gpt2.py +0 -404
  69. invarlock/adapters/hf_llama.py +0 -487
  70. invarlock/cli/commands/certify.py +0 -422
  71. invarlock-0.3.6.dist-info/METADATA +0 -588
  72. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
  73. {invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ import typer
7
7
  from rich.console import Console
8
8
 
9
9
  from invarlock.core.auto_tuning import get_tier_policies
10
- from invarlock.reporting.certificate import make_certificate
10
+ from invarlock.reporting.report_builder import make_report
11
11
 
12
12
  console = Console()
13
13
 
@@ -18,9 +18,9 @@ def explain_gates_command(
18
18
  ..., "--baseline", help="Path to baseline report.json"
19
19
  ),
20
20
  ) -> None:
21
- """Explain certificate gates for a report vs baseline.
21
+ """Explain evaluation report gates for a report vs baseline.
22
22
 
23
- Loads the reports, builds a certificate, and prints gate thresholds,
23
+ Loads the reports, builds an evaluation report, and prints gate thresholds,
24
24
  observed statistics, and pass/fail reasons in a compact, readable form.
25
25
  """
26
26
  report_path = Path(report)
@@ -36,13 +36,17 @@ def explain_gates_command(
36
36
  console.print(f"[red]Failed to load inputs: {exc}[/red]")
37
37
  raise typer.Exit(1) from exc
38
38
 
39
- cert = make_certificate(report_data, baseline_data)
39
+ evaluation_report = make_report(report_data, baseline_data)
40
40
  validation = (
41
- cert.get("validation", {}) if isinstance(cert.get("validation"), dict) else {}
41
+ evaluation_report.get("validation", {})
42
+ if isinstance(evaluation_report.get("validation"), dict)
43
+ else {}
42
44
  )
43
45
 
44
46
  # Extract tier + metric policy (floors/hysteresis)
45
- tier = str((cert.get("auto", {}) or {}).get("tier", "balanced")).lower()
47
+ tier = str(
48
+ (evaluation_report.get("auto", {}) or {}).get("tier", "balanced")
49
+ ).lower()
46
50
  tier_thresholds = {
47
51
  "conservative": 1.05,
48
52
  "balanced": 1.10,
@@ -50,8 +54,8 @@ def explain_gates_command(
50
54
  "none": 1.10,
51
55
  }
52
56
  resolved_policy = (
53
- cert.get("resolved_policy", {})
54
- if isinstance(cert.get("resolved_policy"), dict)
57
+ evaluation_report.get("resolved_policy", {})
58
+ if isinstance(evaluation_report.get("resolved_policy"), dict)
55
59
  else {}
56
60
  )
57
61
  metrics_policy = (
@@ -83,7 +87,11 @@ def explain_gates_command(
83
87
  limit_base = tier_thresholds.get(tier, 1.10)
84
88
  limit_with_hyst = limit_base + max(0.0, hysteresis_ratio)
85
89
  tokens_ok = True
86
- telem = cert.get("telemetry", {}) if isinstance(cert.get("telemetry"), dict) else {}
90
+ telem = (
91
+ evaluation_report.get("telemetry", {})
92
+ if isinstance(evaluation_report.get("telemetry"), dict)
93
+ else {}
94
+ )
87
95
  try:
88
96
  total_tokens = int(telem.get("preview_total_tokens", 0)) + int(
89
97
  telem.get("final_total_tokens", 0)
@@ -95,8 +103,8 @@ def explain_gates_command(
95
103
  # Primary-metric ratio gate explanation (ppl-like kinds shown as ratios)
96
104
  ratio = None
97
105
  ratio_ci = None
98
- if isinstance(cert.get("primary_metric"), dict):
99
- pm = cert.get("primary_metric", {})
106
+ if isinstance(evaluation_report.get("primary_metric"), dict):
107
+ pm = evaluation_report.get("primary_metric", {})
100
108
  ratio = pm.get("ratio_vs_baseline")
101
109
  ratio_ci = pm.get("display_ci")
102
110
  hysteresis_applied = bool(validation.get("hysteresis_applied"))
@@ -123,8 +131,8 @@ def explain_gates_command(
123
131
 
124
132
  # Tail gate explanation (warn/fail; based on per-window Δlog-loss vs baseline)
125
133
  pm_tail = (
126
- cert.get("primary_metric_tail", {})
127
- if isinstance(cert.get("primary_metric_tail"), dict)
134
+ evaluation_report.get("primary_metric_tail", {})
135
+ if isinstance(evaluation_report.get("primary_metric_tail"), dict)
128
136
  else {}
129
137
  )
130
138
  if pm_tail:
@@ -194,8 +202,8 @@ def explain_gates_command(
194
202
  # Drift gate explanation
195
203
  drift = None
196
204
  drift_ci = None
197
- if isinstance(cert.get("primary_metric"), dict):
198
- pm = cert.get("primary_metric", {})
205
+ if isinstance(evaluation_report.get("primary_metric"), dict):
206
+ pm = evaluation_report.get("primary_metric", {})
199
207
  preview = pm.get("preview")
200
208
  final = pm.get("final")
201
209
  if isinstance(preview, int | float) and isinstance(final, int | float):
@@ -220,8 +228,8 @@ def explain_gates_command(
220
228
 
221
229
  # Guard Overhead explanation (if present)
222
230
  overhead = (
223
- cert.get("guard_overhead", {})
224
- if isinstance(cert.get("guard_overhead"), dict)
231
+ evaluation_report.get("guard_overhead", {})
232
+ if isinstance(evaluation_report.get("guard_overhead"), dict)
225
233
  else {}
226
234
  )
227
235
  if overhead:
@@ -2,7 +2,7 @@
2
2
  InvarLock HTML Export
3
3
  =================
4
4
 
5
- Thin wrapper over the HTML certificate renderer to make exporting
5
+ Thin wrapper over the HTML evaluation report renderer to make exporting
6
6
  discoverable and scriptable.
7
7
  """
8
8
 
@@ -20,7 +20,9 @@ console = Console()
20
20
 
21
21
 
22
22
  def export_html_command(
23
- input: str = typer.Option(..., "--input", "-i", help="Path to certificate JSON"),
23
+ input: str = typer.Option(
24
+ ..., "--input", "-i", help="Path to evaluation report JSON"
25
+ ),
24
26
  output: str = typer.Option(..., "--output", "-o", help="Path to output HTML file"),
25
27
  embed_css: bool = typer.Option(
26
28
  True,
@@ -31,12 +33,12 @@ def export_html_command(
31
33
  False, "--force", help="Overwrite output file if it already exists"
32
34
  ),
33
35
  ) -> None:
34
- """Render a certificate JSON to HTML.
36
+ """Render an evaluation report JSON to HTML.
35
37
 
36
38
  Exit codes:
37
39
  - 0: success
38
40
  - 1: generic failure (IO or overwrite refusal)
39
- - 2: validation failure (invalid certificate schema)
41
+ - 2: validation failure (invalid evaluation report schema)
40
42
  """
41
43
  # When called programmatically, Typer's Option defaults can be OptionInfo
42
44
  try: # pragma: no cover - defensive, matches other commands' pattern
@@ -70,12 +72,12 @@ def export_html_command(
70
72
  raise typer.Exit(1) from exc
71
73
 
72
74
  try:
73
- from invarlock.reporting.html import render_certificate_html
75
+ from invarlock.reporting.html import render_report_html
74
76
 
75
- html = render_certificate_html(payload)
77
+ html = render_report_html(payload)
76
78
  except ValueError as exc:
77
- # Certificate validation failed upstream
78
- console.print(f"[red]❌ Certificate validation failed: {exc}[/red]")
79
+ # Evaluation report validation failed upstream
80
+ console.print(f"[red]❌ Evaluation report validation failed: {exc}[/red]")
79
81
  raise typer.Exit(2) from exc
80
82
  except Exception as exc:
81
83
  console.print(f"[red]❌ Failed to render HTML: {exc}[/red]")
@@ -94,7 +96,7 @@ def export_html_command(
94
96
  console.print(f"[red]❌ Failed to write output file: {exc}[/red]")
95
97
  raise typer.Exit(1) from exc
96
98
 
97
- console.print(f"✅ Exported certificate HTML → {out_path}")
99
+ console.print(f"✅ Exported evaluation report HTML → {out_path}")
98
100
 
99
101
 
100
102
  __all__ = ["export_html_command"]
@@ -201,9 +201,9 @@ def plugins_command(
201
201
  entry = info.get("entry_point")
202
202
  # Classify support level independent of origin
203
203
  if module.startswith("invarlock.adapters"):
204
- if n in {"hf_causal_auto", "hf_mlm_auto"}:
204
+ if n in {"hf_auto"}:
205
205
  support = "auto"
206
- elif n in {"hf_onnx"}:
206
+ elif n in {"hf_causal_onnx"}:
207
207
  # ONNX relies on optional extras (optimum + onnxruntime)
208
208
  support = "optional"
209
209
  else:
@@ -236,7 +236,7 @@ def plugins_command(
236
236
  if backend_name in {"auto-gptq", "autoawq"} and not is_linux:
237
237
  status = "unsupported"
238
238
  enable = "Linux-only"
239
- # Extras completeness for certain adapters (e.g., hf_onnx needs optimum + onnxruntime)
239
+ # Extras completeness for certain adapters (e.g., hf_causal_onnx needs optimum + onnxruntime)
240
240
  try:
241
241
  extras_status = _check_plugin_extras(n, "adapters")
242
242
  except Exception:
@@ -883,10 +883,14 @@ def _check_plugin_extras(plugin_name: str, plugin_type: str) -> str:
883
883
  "variance": {"packages": [], "extra": ""},
884
884
  "rmt": {"packages": [], "extra": ""},
885
885
  # Adapter plugins (baked-in only)
886
- "hf_gpt2": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
887
- "hf_bert": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
888
- "hf_llama": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
889
- "hf_onnx": {"packages": ["optimum", "onnxruntime"], "extra": "invarlock[onnx]"},
886
+ "hf_causal": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
887
+ "hf_mlm": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
888
+ "hf_seq2seq": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
889
+ "hf_auto": {"packages": ["transformers"], "extra": "invarlock[adapters]"},
890
+ "hf_causal_onnx": {
891
+ "packages": ["optimum", "onnxruntime"],
892
+ "extra": "invarlock[onnx]",
893
+ },
890
894
  # Optional adapter plugins
891
895
  "hf_gptq": {"packages": ["auto_gptq"], "extra": "invarlock[gptq]"},
892
896
  "hf_awq": {"packages": ["autoawq"], "extra": "invarlock[awq]"},
@@ -971,7 +975,7 @@ def _resolve_uninstall_targets(target: str) -> list[str]:
971
975
  "bitsandbytes": ["bitsandbytes"],
972
976
  # ONNX/Optimum family
973
977
  "onnx": ["onnxruntime"],
974
- "hf_onnx": ["onnxruntime"],
978
+ "hf_causal_onnx": ["onnxruntime"],
975
979
  "optimum": ["optimum"],
976
980
  }
977
981
  return mapping.get(name, [])
@@ -1010,7 +1014,7 @@ def _resolve_install_targets(target: str) -> list[str]:
1010
1014
  "transformers": ["invarlock[adapters]"],
1011
1015
  # ONNX/Optimum
1012
1016
  "onnx": ["invarlock[onnx]"],
1013
- "hf_onnx": ["invarlock[onnx]"],
1017
+ "hf_causal_onnx": ["invarlock[onnx]"],
1014
1018
  "optimum": ["invarlock[onnx]"],
1015
1019
  # Direct packages passthrough
1016
1020
  "bitsandbytes": ["bitsandbytes"],