invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/_data/runtime/tiers.yaml +57 -30
  3. invarlock/adapters/__init__.py +11 -15
  4. invarlock/adapters/auto.py +35 -40
  5. invarlock/adapters/capabilities.py +2 -2
  6. invarlock/adapters/hf_causal.py +418 -0
  7. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  8. invarlock/adapters/hf_mixin.py +25 -4
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/calibration/spectral_null.py +15 -10
  12. invarlock/calibration/variance_ve.py +0 -2
  13. invarlock/cli/adapter_auto.py +31 -21
  14. invarlock/cli/app.py +73 -2
  15. invarlock/cli/commands/calibrate.py +6 -2
  16. invarlock/cli/commands/certify.py +651 -91
  17. invarlock/cli/commands/doctor.py +11 -11
  18. invarlock/cli/commands/explain_gates.py +57 -8
  19. invarlock/cli/commands/plugins.py +13 -9
  20. invarlock/cli/commands/report.py +233 -69
  21. invarlock/cli/commands/run.py +1066 -244
  22. invarlock/cli/commands/verify.py +154 -15
  23. invarlock/cli/config.py +22 -6
  24. invarlock/cli/doctor_helpers.py +4 -5
  25. invarlock/cli/output.py +193 -0
  26. invarlock/cli/provenance.py +1 -1
  27. invarlock/core/api.py +45 -5
  28. invarlock/core/auto_tuning.py +65 -20
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/contracts.py +7 -1
  31. invarlock/core/registry.py +11 -13
  32. invarlock/core/runner.py +425 -75
  33. invarlock/edits/quant_rtn.py +65 -37
  34. invarlock/eval/bench.py +3 -16
  35. invarlock/eval/data.py +82 -51
  36. invarlock/eval/metrics.py +63 -2
  37. invarlock/eval/primary_metric.py +23 -0
  38. invarlock/eval/tail_stats.py +230 -0
  39. invarlock/eval/tasks/__init__.py +12 -0
  40. invarlock/eval/tasks/classification.py +48 -0
  41. invarlock/eval/tasks/qa.py +36 -0
  42. invarlock/eval/tasks/text_generation.py +102 -0
  43. invarlock/guards/_estimators.py +154 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/policies.py +16 -6
  46. invarlock/guards/rmt.py +627 -546
  47. invarlock/guards/spectral.py +348 -110
  48. invarlock/guards/tier_config.py +32 -30
  49. invarlock/guards/variance.py +7 -31
  50. invarlock/guards_ref/rmt_ref.py +23 -23
  51. invarlock/model_profile.py +90 -42
  52. invarlock/observability/health.py +6 -6
  53. invarlock/observability/metrics.py +108 -0
  54. invarlock/reporting/certificate.py +384 -55
  55. invarlock/reporting/certificate_schema.py +3 -2
  56. invarlock/reporting/dataset_hashing.py +15 -2
  57. invarlock/reporting/guards_analysis.py +350 -277
  58. invarlock/reporting/html.py +55 -5
  59. invarlock/reporting/normalizer.py +13 -0
  60. invarlock/reporting/policy_utils.py +38 -36
  61. invarlock/reporting/primary_metric_utils.py +71 -17
  62. invarlock/reporting/render.py +852 -431
  63. invarlock/reporting/report.py +40 -4
  64. invarlock/reporting/report_types.py +11 -3
  65. invarlock/reporting/telemetry.py +86 -0
  66. invarlock/reporting/validate.py +1 -18
  67. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
  68. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
  69. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  70. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  71. invarlock/adapters/hf_gpt2.py +0 -404
  72. invarlock/adapters/hf_llama.py +0 -487
  73. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  74. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,8 @@
2
2
  Auto adapter resolution utilities.
3
3
 
4
4
  These helpers map a model identifier (HF directory or Hub ID) to a
5
- concrete built-in adapter name (hf_gpt2, hf_llama, hf_bert) without
5
+ concrete built-in adapter name (hf_causal, hf_mlm, hf_seq2seq, hf_causal_onnx)
6
+ without
6
7
  adding a hard dependency on Transformers.
7
8
  """
8
9
 
@@ -58,15 +59,15 @@ def _detect_quant_family_from_cfg(cfg: dict[str, Any]) -> str | None:
58
59
 
59
60
 
60
61
  def resolve_auto_adapter(
61
- model_id: str | os.PathLike[str], default: str = "hf_gpt2"
62
+ model_id: str | os.PathLike[str], default: str = "hf_causal"
62
63
  ) -> str:
63
64
  """Resolve an appropriate built-in adapter name for a model.
64
65
 
65
66
  Heuristics:
66
67
  - Prefer local config.json (no network). Inspect `model_type` and
67
- `architectures` to classify LLaMA/Mistral vs BERT vs GPT-like.
68
+ `architectures` to classify causal vs masked-LM vs seq2seq.
68
69
  - Fallback to simple name heuristics on the model_id string.
69
- - Default to `hf_gpt2` when unsure.
70
+ - Default to `hf_causal` when unsure.
70
71
  """
71
72
  cfg = _read_local_hf_config(model_id)
72
73
  model_id_str = str(model_id)
@@ -77,32 +78,41 @@ def resolve_auto_adapter(
77
78
  if fam:
78
79
  return fam
79
80
  mt = str(c.get("model_type", "")).lower()
81
+ if bool(c.get("is_encoder_decoder", False)):
82
+ return "hf_seq2seq"
80
83
  archs = [str(a) for a in c.get("architectures", []) if isinstance(a, str)]
81
84
  arch_blob = " ".join(archs)
82
- if (
83
- mt in {"llama", "mistral", "qwen", "yi"}
84
- or "Llama" in arch_blob
85
- or "Mistral" in arch_blob
86
- ):
87
- return "hf_llama"
85
+ if "ConditionalGeneration" in arch_blob or "Seq2SeqLM" in arch_blob:
86
+ return "hf_seq2seq"
88
87
  # Treat masked-LM families as BERT-like
89
88
  if (
90
89
  mt in {"bert", "roberta", "distilbert", "albert", "deberta", "deberta-v2"}
91
90
  or "MaskedLM" in arch_blob
92
91
  ):
93
- return "hf_bert"
94
- # Generic causal LM
95
- if "CausalLM" in arch_blob or mt in {
92
+ return "hf_mlm"
93
+ # Causal LM families (best-effort; structural validation happens in the adapter).
94
+ if "CausalLM" in arch_blob or "ForCausalLM" in arch_blob:
95
+ return "hf_causal"
96
+ if mt in {
97
+ "mistral",
98
+ "mixtral",
99
+ "qwen",
100
+ "qwen2",
101
+ "qwen2_moe",
102
+ "yi",
96
103
  "gpt2",
97
104
  "gpt_neox",
98
105
  "opt",
99
106
  "gptj",
100
- "gptj8bit",
107
+ "phi",
108
+ "falcon",
109
+ "glm",
110
+ "deepseek",
101
111
  }:
102
- return "hf_gpt2"
112
+ return "hf_causal"
103
113
  return None
104
114
 
105
- # If local directory contains ONNX model files, prefer hf_onnx
115
+ # If local directory contains ONNX model files, prefer the ONNX causal adapter.
106
116
  try:
107
117
  p = Path(model_id)
108
118
  if p.exists() and p.is_dir():
@@ -114,7 +124,7 @@ def resolve_auto_adapter(
114
124
  "encoder_model.onnx",
115
125
  ]
116
126
  if any((p / fname).exists() for fname in onnx_files):
117
- return "hf_onnx"
127
+ return "hf_causal_onnx"
118
128
  except Exception:
119
129
  pass
120
130
 
@@ -134,10 +144,10 @@ def resolve_auto_adapter(
134
144
  k in lower_id for k in ["bnb", "bitsandbytes", "-4bit", "-8bit", "4bit", "8bit"]
135
145
  ):
136
146
  return "hf_bnb"
137
- if any(k in lower_id for k in ["llama", "mistral", "qwen", "yi"]):
138
- return "hf_llama"
147
+ if any(k in lower_id for k in ["t5", "bart"]):
148
+ return "hf_seq2seq"
139
149
  if any(k in lower_id for k in ["bert", "roberta", "albert", "deberta"]):
140
- return "hf_bert"
150
+ return "hf_mlm"
141
151
  return default
142
152
 
143
153
 
@@ -148,7 +158,7 @@ def apply_auto_adapter_if_needed(cfg: Any) -> Any:
148
158
  """
149
159
  try:
150
160
  adapter = str(getattr(cfg.model, "adapter", ""))
151
- if adapter.strip().lower() not in {"auto", "hf_auto", "auto_hf"}:
161
+ if adapter.strip().lower() not in {"auto", "auto_hf"}:
152
162
  return cfg
153
163
  model_id = str(getattr(cfg.model, "id", ""))
154
164
  resolved = resolve_auto_adapter(model_id)
invarlock/cli/app.py CHANGED
@@ -105,7 +105,7 @@ Order: certify → report → run → plugins → doctor → version
105
105
  @app.command(
106
106
  name="certify",
107
107
  help=(
108
- "Certify a subject model against a baseline and generate a safety certificate. "
108
+ "Certify a subject model against a baseline and generate an evaluation certificate. "
109
109
  "Use when you have two model snapshots and want pass/fail gating."
110
110
  ),
111
111
  )
@@ -116,6 +116,14 @@ def _certify_lazy(
116
116
  edited: str = typer.Option(
117
117
  ..., "--edited", "--subject", help="Subject model dir or Hub ID"
118
118
  ),
119
+ baseline_report: str | None = typer.Option(
120
+ None,
121
+ "--baseline-report",
122
+ help=(
123
+ "Reuse an existing baseline run report.json (skips baseline evaluation). "
124
+ "Must include stored evaluation windows (e.g., set INVARLOCK_STORE_EVAL_WINDOWS=1)."
125
+ ),
126
+ ),
119
127
  adapter: str = typer.Option(
120
128
  "auto", "--adapter", help="Adapter name or 'auto' to resolve"
121
129
  ),
@@ -139,12 +147,38 @@ def _certify_lazy(
139
147
  edit_config: str | None = typer.Option(
140
148
  None, "--edit-config", help="Edit preset to apply a demo edit (quant_rtn)"
141
149
  ),
150
+ edit_label: str | None = typer.Option(
151
+ None,
152
+ "--edit-label",
153
+ help=(
154
+ "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
155
+ "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
156
+ ),
157
+ ),
158
+ quiet: bool = typer.Option(
159
+ False, "--quiet", "-q", help="Minimal output (suppress run/report detail)"
160
+ ),
161
+ verbose: bool = typer.Option(
162
+ False, "--verbose", "-v", help="Verbose output (include debug details)"
163
+ ),
164
+ banner: bool = typer.Option(
165
+ True, "--banner/--no-banner", help="Show header banner"
166
+ ),
167
+ style: str = typer.Option("audit", "--style", help="Output style (audit|friendly)"),
168
+ timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
169
+ progress: bool = typer.Option(
170
+ True, "--progress/--no-progress", help="Show progress done messages"
171
+ ),
172
+ no_color: bool = typer.Option(
173
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
174
+ ),
142
175
  ):
143
176
  from .commands.certify import certify_command as _cert
144
177
 
145
178
  return _cert(
146
179
  source=source,
147
180
  edited=edited,
181
+ baseline_report=baseline_report,
148
182
  adapter=adapter,
149
183
  device=device,
150
184
  profile=profile,
@@ -153,6 +187,14 @@ def _certify_lazy(
153
187
  out=out,
154
188
  cert_out=cert_out,
155
189
  edit_config=edit_config,
190
+ edit_label=edit_label,
191
+ quiet=quiet,
192
+ verbose=verbose,
193
+ banner=banner,
194
+ style=style,
195
+ timing=timing,
196
+ progress=progress,
197
+ no_color=no_color,
156
198
  )
157
199
 
158
200
 
@@ -230,7 +272,7 @@ def _verify_typed(
230
272
  name="run",
231
273
  help=(
232
274
  "Execute an end-to-end run from a YAML config (edit + guards + reports). "
233
- "Writes run artifacts and optionally a safety certificate."
275
+ "Writes run artifacts and optionally an evaluation certificate."
234
276
  ),
235
277
  )
236
278
  def _run_typed(
@@ -245,11 +287,24 @@ def _run_typed(
245
287
  ),
246
288
  out: str | None = typer.Option(None, "--out", help="Output directory override"),
247
289
  edit: str | None = typer.Option(None, "--edit", help="Edit kind (quant|mixed)"),
290
+ edit_label: str | None = typer.Option(
291
+ None,
292
+ "--edit-label",
293
+ help=(
294
+ "Edit algorithm label for BYOE models. Use 'noop' for baseline, "
295
+ "'quant_rtn' etc. for built-in edits, 'custom' for pre-edited models."
296
+ ),
297
+ ),
248
298
  tier: str | None = typer.Option(
249
299
  None,
250
300
  "--tier",
251
301
  help="Auto-tuning tier override (conservative|balanced|aggressive)",
252
302
  ),
303
+ metric_kind: str | None = typer.Option(
304
+ None,
305
+ "--metric-kind",
306
+ help="Primary metric kind override (ppl_causal|ppl_mlm|accuracy|etc.)",
307
+ ),
253
308
  probes: int | None = typer.Option(
254
309
  None, "--probes", help="Number of micro-probes (0=deterministic, >0=adaptive)"
255
310
  ),
@@ -270,6 +325,16 @@ def _run_typed(
270
325
  no_cleanup: bool = typer.Option(
271
326
  False, "--no-cleanup", help="Skip cleanup of temporary artifacts"
272
327
  ),
328
+ style: str | None = typer.Option(
329
+ None, "--style", help="Output style (audit|friendly)"
330
+ ),
331
+ progress: bool = typer.Option(
332
+ False, "--progress", help="Show progress done messages"
333
+ ),
334
+ timing: bool = typer.Option(False, "--timing", help="Show timing summary"),
335
+ no_color: bool = typer.Option(
336
+ False, "--no-color", help="Disable ANSI colors (respects NO_COLOR=1)"
337
+ ),
273
338
  ):
274
339
  from .commands.run import run_command as _run
275
340
 
@@ -279,13 +344,19 @@ def _run_typed(
279
344
  profile=profile,
280
345
  out=out,
281
346
  edit=edit,
347
+ edit_label=edit_label,
282
348
  tier=tier,
349
+ metric_kind=metric_kind,
283
350
  probes=probes,
284
351
  until_pass=until_pass,
285
352
  max_attempts=max_attempts,
286
353
  timeout=timeout,
287
354
  baseline=baseline,
288
355
  no_cleanup=no_cleanup,
356
+ style=style,
357
+ progress=progress,
358
+ timing=timing,
359
+ no_color=no_color,
289
360
  )
290
361
 
291
362
 
@@ -144,7 +144,9 @@ def null_sweep(
144
144
  ),
145
145
  n_seeds: int = typer.Option(10, "--n-seeds", min=1, help="Number of seeds to run."),
146
146
  seed_start: int = typer.Option(42, "--seed-start", help="Starting seed."),
147
- profile: str = typer.Option("ci", "--profile", help="Run profile (ci|release)."),
147
+ profile: str = typer.Option(
148
+ "ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
149
+ ),
148
150
  device: str | None = typer.Option(None, "--device", help="Device override."),
149
151
  safety_margin: float = typer.Option(
150
152
  0.05, "--safety-margin", help="Safety margin applied to κ recommendations."
@@ -363,7 +365,9 @@ def ve_sweep(
363
365
  "--target-enable-rate",
364
366
  help="Target expected VE enable rate (predictive-gate lower bound).",
365
367
  ),
366
- profile: str = typer.Option("ci", "--profile", help="Run profile (ci|release)."),
368
+ profile: str = typer.Option(
369
+ "ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
370
+ ),
367
371
  device: str | None = typer.Option(None, "--device", help="Device override."),
368
372
  safety_margin: float = typer.Option(
369
373
  0.0,