invarlock 0.3.4__tar.gz → 0.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {invarlock-0.3.4/src/invarlock.egg-info → invarlock-0.3.6}/PKG-INFO +6 -6
- {invarlock-0.3.4 → invarlock-0.3.6}/README.md +5 -5
- {invarlock-0.3.4 → invarlock-0.3.6}/pyproject.toml +1 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/__init__.py +1 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/_data/runtime/tiers.yaml +57 -30
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/__init__.py +1 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/calibration/spectral_null.py +15 -10
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/calibration/variance_ve.py +0 -2
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/calibrate.py +6 -2
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/certify.py +58 -39
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/doctor.py +3 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/explain_gates.py +57 -8
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/report.py +1 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/run.py +159 -61
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/verify.py +78 -4
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/config.py +21 -5
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/api.py +45 -5
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/auto_tuning.py +65 -20
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/contracts.py +7 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/registry.py +2 -2
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/runner.py +314 -50
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/bench.py +0 -13
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/data.py +73 -283
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/metrics.py +134 -4
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/primary_metric.py +23 -0
- invarlock-0.3.6/src/invarlock/eval/tail_stats.py +230 -0
- invarlock-0.3.6/src/invarlock/guards/_estimators.py +154 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/policies.py +16 -6
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/rmt.py +625 -544
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/spectral.py +348 -110
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/tier_config.py +32 -30
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/variance.py +5 -29
- invarlock-0.3.6/src/invarlock/guards_ref/rmt_ref.py +40 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/model_profile.py +42 -15
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/certificate.py +225 -46
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/certificate_schema.py +2 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/dataset_hashing.py +15 -2
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/guards_analysis.py +197 -274
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/normalizer.py +6 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/policy_utils.py +38 -36
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/primary_metric_utils.py +71 -17
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/render.py +61 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/report.py +1 -1
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/report_types.py +5 -2
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.4 → invarlock-0.3.6/src/invarlock.egg-info}/PKG-INFO +6 -6
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock.egg-info/SOURCES.txt +2 -0
- invarlock-0.3.4/src/invarlock/guards_ref/rmt_ref.py +0 -40
- {invarlock-0.3.4 → invarlock-0.3.6}/LICENSE +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/MANIFEST.in +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/setup.cfg +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/__main__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/_data/runtime/profiles/ci_cpu.yaml +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/_data/runtime/profiles/release.yaml +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/_capabilities.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/auto.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/base.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/base_types.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/capabilities.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/hf_bert.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/hf_gpt2.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/hf_llama.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/hf_loading.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/hf_mixin.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/hf_onnx.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/hf_t5.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/adapters/py.typed +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/assurance/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/calibration/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/__main__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/_evidence.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/_json.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/adapter_auto.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/app.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/export_html.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/commands/plugins.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/constants.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/determinism.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/device.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/doctor_helpers.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/errors.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/overhead_utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/provenance.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/cli/utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/config.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/abi.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/bootstrap.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/checkpoint.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/error_utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/events.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/exceptions.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/retry.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/core/types.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/edits/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/edits/_edit_utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/edits/_external_utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/edits/noop.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/edits/py.typed +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/edits/quant_rtn.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/edits/registry.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/bench_regression.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/bootstrap.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/probes/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/probes/fft.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/probes/mi.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/probes/post_attention.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/providers/base.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/providers/seq2seq.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/providers/text_lm.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/providers/vision_text.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/eval/py.typed +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/_contracts.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/invariants.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards/py.typed +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards_ref/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards_ref/spectral_ref.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/guards_ref/variance_ref.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/model_utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/alerting.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/core.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/exporters.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/health.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/metrics.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/py.typed +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/observability/utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/plugins/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/plugins/hello_guard.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/plugins/hf_awq_adapter.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/plugins/hf_bnb_adapter.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/plugins/hf_gptq_adapter.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/plugins/py.typed +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/py.typed +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/html.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/reporting/utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/security.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/sparsity_utils.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/utils/__init__.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock/utils/digest.py +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock.egg-info/dependency_links.txt +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock.egg-info/entry_points.txt +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock.egg-info/requires.txt +0 -0
- {invarlock-0.3.4 → invarlock-0.3.6}/src/invarlock.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: invarlock
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.6
|
|
4
4
|
Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
|
|
5
5
|
Author-email: InvarLock Team <oss@invarlock.dev>
|
|
6
6
|
Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
|
|
@@ -112,7 +112,7 @@ they don’t, roll back safely.
|
|
|
112
112
|
Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
|
|
113
113
|
variance) producing a machine‑readable Safety Certificate.
|
|
114
114
|
|
|
115
|
-
> **Status:** 0.3.
|
|
115
|
+
> **Status:** 0.3.6 (pre‑1.0). Until 1.0, **minor** releases may be
|
|
116
116
|
> breaking. See CLI help and the CHANGELOG for updates.
|
|
117
117
|
|
|
118
118
|
[](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
|
|
@@ -170,7 +170,7 @@ Quick examples (repo presets, CPU; repo clone required for preset paths):
|
|
|
170
170
|
pip install "invarlock[hf]"
|
|
171
171
|
|
|
172
172
|
# Preflight a config (JSON diagnostics)
|
|
173
|
-
invarlock doctor --config configs/
|
|
173
|
+
invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
|
|
174
174
|
|
|
175
175
|
# Calibrated GPT‑2 small (recommended starting point; repo preset)
|
|
176
176
|
INVARLOCK_ALLOW_NETWORK=1 INVARLOCK_DEDUP_TEXTS=1 \
|
|
@@ -179,7 +179,7 @@ invarlock certify \
|
|
|
179
179
|
--subject gpt2 \
|
|
180
180
|
--adapter auto \
|
|
181
181
|
--profile release \
|
|
182
|
-
--preset configs/
|
|
182
|
+
--preset configs/presets/causal_lm/wikitext2_512.yaml
|
|
183
183
|
|
|
184
184
|
# Tiny causal LM smoke (out‑of‑calibration, dev‑only)
|
|
185
185
|
INVARLOCK_ALLOW_NETWORK=1 \
|
|
@@ -249,7 +249,7 @@ INVARLOCK_ALLOW_NETWORK=1 invarlock certify \
|
|
|
249
249
|
--subject gpt2 \
|
|
250
250
|
--adapter auto \
|
|
251
251
|
--profile ci \
|
|
252
|
-
--preset configs/
|
|
252
|
+
--preset configs/presets/causal_lm/wikitext2_512.yaml
|
|
253
253
|
```
|
|
254
254
|
|
|
255
255
|
- Offline/air‑gapped usage: pre‑download to a cache, then run with network
|
|
@@ -488,7 +488,7 @@ output:
|
|
|
488
488
|
Run preflight checks before a run to catch misconfigurations early:
|
|
489
489
|
|
|
490
490
|
```bash
|
|
491
|
-
invarlock doctor --config configs/
|
|
491
|
+
invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
|
|
492
492
|
```
|
|
493
493
|
|
|
494
494
|
Text mode emits lines prefixed with `ERROR:`, `WARNING:`, or `NOTE:` and stable
|
|
@@ -6,7 +6,7 @@ they don’t, roll back safely.
|
|
|
6
6
|
Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
|
|
7
7
|
variance) producing a machine‑readable Safety Certificate.
|
|
8
8
|
|
|
9
|
-
> **Status:** 0.3.
|
|
9
|
+
> **Status:** 0.3.6 (pre‑1.0). Until 1.0, **minor** releases may be
|
|
10
10
|
> breaking. See CLI help and the CHANGELOG for updates.
|
|
11
11
|
|
|
12
12
|
[](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
|
|
@@ -64,7 +64,7 @@ Quick examples (repo presets, CPU; repo clone required for preset paths):
|
|
|
64
64
|
pip install "invarlock[hf]"
|
|
65
65
|
|
|
66
66
|
# Preflight a config (JSON diagnostics)
|
|
67
|
-
invarlock doctor --config configs/
|
|
67
|
+
invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
|
|
68
68
|
|
|
69
69
|
# Calibrated GPT‑2 small (recommended starting point; repo preset)
|
|
70
70
|
INVARLOCK_ALLOW_NETWORK=1 INVARLOCK_DEDUP_TEXTS=1 \
|
|
@@ -73,7 +73,7 @@ invarlock certify \
|
|
|
73
73
|
--subject gpt2 \
|
|
74
74
|
--adapter auto \
|
|
75
75
|
--profile release \
|
|
76
|
-
--preset configs/
|
|
76
|
+
--preset configs/presets/causal_lm/wikitext2_512.yaml
|
|
77
77
|
|
|
78
78
|
# Tiny causal LM smoke (out‑of‑calibration, dev‑only)
|
|
79
79
|
INVARLOCK_ALLOW_NETWORK=1 \
|
|
@@ -143,7 +143,7 @@ INVARLOCK_ALLOW_NETWORK=1 invarlock certify \
|
|
|
143
143
|
--subject gpt2 \
|
|
144
144
|
--adapter auto \
|
|
145
145
|
--profile ci \
|
|
146
|
-
--preset configs/
|
|
146
|
+
--preset configs/presets/causal_lm/wikitext2_512.yaml
|
|
147
147
|
```
|
|
148
148
|
|
|
149
149
|
- Offline/air‑gapped usage: pre‑download to a cache, then run with network
|
|
@@ -382,7 +382,7 @@ output:
|
|
|
382
382
|
Run preflight checks before a run to catch misconfigurations early:
|
|
383
383
|
|
|
384
384
|
```bash
|
|
385
|
-
invarlock doctor --config configs/
|
|
385
|
+
invarlock doctor --config configs/presets/causal_lm/wikitext2_512.yaml --json
|
|
386
386
|
```
|
|
387
387
|
|
|
388
388
|
Text mode emits lines prefixed with `ERROR:`, `WARNING:`, or `NOTE:` and stable
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "invarlock"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.6"
|
|
8
8
|
description = "Edit‑agnostic robustness certificates for weight edits (InvarLock framework)"
|
|
9
9
|
authors = [{ name = "InvarLock Team", email = "oss@invarlock.dev" }]
|
|
10
10
|
maintainers = [{ name = "InvarLock Maintainers", email = "support@invarlock.dev" }]
|
|
@@ -12,7 +12,7 @@ For torch-dependent functionality, see subpackages under `invarlock.*`:
|
|
|
12
12
|
- `invarlock.eval`: Metrics, guard-overhead checks, and certification
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
__version__ = "0.3.
|
|
15
|
+
__version__ = "0.3.6"
|
|
16
16
|
|
|
17
17
|
# Core exports - torch-independent
|
|
18
18
|
from .config import CFG, Defaults, get_default_config
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
-
# Tier
|
|
1
|
+
# Tier policy defaults (metrics gates + guard knobs) used at runtime.
|
|
2
2
|
#
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
3
|
+
# Balanced and Conservative values are calibrated/validated against pilot/null
|
|
4
|
+
# runs (Nov/Dec 2025) where applicable; Aggressive is research-oriented (not in
|
|
5
|
+
# the safety case).
|
|
6
|
+
#
|
|
7
|
+
# Rationale by key: docs/reference/tier-policy-catalog.md
|
|
8
|
+
# Calibration method: docs/assurance/09-tier-v1-calibration.md
|
|
9
|
+
# Provenance/digest: docs/assurance/11-policy-provenance.md
|
|
6
10
|
|
|
7
11
|
balanced:
|
|
8
12
|
metrics:
|
|
@@ -11,6 +15,13 @@ balanced:
|
|
|
11
15
|
min_tokens: 50000
|
|
12
16
|
hysteresis_ratio: 0.002
|
|
13
17
|
min_token_fraction: 0.01
|
|
18
|
+
pm_tail:
|
|
19
|
+
mode: warn
|
|
20
|
+
min_windows: 50
|
|
21
|
+
quantile: 0.95
|
|
22
|
+
quantile_max: 0.20
|
|
23
|
+
epsilon: 0.0001
|
|
24
|
+
mass_max: 1.0
|
|
14
25
|
accuracy:
|
|
15
26
|
delta_min_pp: -1.0
|
|
16
27
|
min_examples: 200
|
|
@@ -20,7 +31,7 @@ balanced:
|
|
|
20
31
|
deadband: 0.02
|
|
21
32
|
min_abs_adjust: 0.012
|
|
22
33
|
max_scale_step: 0.03
|
|
23
|
-
min_effect_lognll: 0.
|
|
34
|
+
min_effect_lognll: 0.0
|
|
24
35
|
predictive_one_sided: true
|
|
25
36
|
topk_backstop: 1
|
|
26
37
|
max_adjusted_modules: 1
|
|
@@ -33,10 +44,10 @@ balanced:
|
|
|
33
44
|
max_caps: 5
|
|
34
45
|
max_spectral_norm: null
|
|
35
46
|
family_caps:
|
|
36
|
-
ffn: 3.
|
|
37
|
-
attn: 3.
|
|
38
|
-
embed:
|
|
39
|
-
other:
|
|
47
|
+
ffn: 3.849
|
|
48
|
+
attn: 3.018
|
|
49
|
+
embed: 1.05
|
|
50
|
+
other: 0.0
|
|
40
51
|
multiple_testing:
|
|
41
52
|
method: bh
|
|
42
53
|
alpha: 0.05
|
|
@@ -44,12 +55,12 @@ balanced:
|
|
|
44
55
|
rmt_guard:
|
|
45
56
|
deadband: 0.10
|
|
46
57
|
margin: 1.5
|
|
47
|
-
epsilon_default: 0.
|
|
58
|
+
epsilon_default: 0.01
|
|
48
59
|
epsilon_by_family:
|
|
49
|
-
ffn: 0.
|
|
50
|
-
attn: 0.
|
|
51
|
-
embed: 0.
|
|
52
|
-
other: 0.
|
|
60
|
+
ffn: 0.01
|
|
61
|
+
attn: 0.01
|
|
62
|
+
embed: 0.01
|
|
63
|
+
other: 0.01
|
|
53
64
|
|
|
54
65
|
conservative:
|
|
55
66
|
metrics:
|
|
@@ -58,6 +69,13 @@ conservative:
|
|
|
58
69
|
min_tokens: 20000
|
|
59
70
|
hysteresis_ratio: 0.002
|
|
60
71
|
min_token_fraction: 0.01
|
|
72
|
+
pm_tail:
|
|
73
|
+
mode: warn
|
|
74
|
+
min_windows: 50
|
|
75
|
+
quantile: 0.95
|
|
76
|
+
quantile_max: 0.12
|
|
77
|
+
epsilon: 0.0001
|
|
78
|
+
mass_max: 1.0
|
|
61
79
|
accuracy:
|
|
62
80
|
delta_min_pp: -0.5
|
|
63
81
|
min_examples: 200
|
|
@@ -67,7 +85,7 @@ conservative:
|
|
|
67
85
|
deadband: 0.03
|
|
68
86
|
min_abs_adjust: 0.02
|
|
69
87
|
max_scale_step: 0.015
|
|
70
|
-
min_effect_lognll: 0.
|
|
88
|
+
min_effect_lognll: 0.016
|
|
71
89
|
predictive_one_sided: false
|
|
72
90
|
topk_backstop: 0
|
|
73
91
|
max_adjusted_modules: 0
|
|
@@ -78,24 +96,25 @@ conservative:
|
|
|
78
96
|
deadband: 0.05
|
|
79
97
|
scope: ffn
|
|
80
98
|
max_caps: 3
|
|
99
|
+
max_spectral_norm: null
|
|
81
100
|
family_caps:
|
|
82
|
-
ffn:
|
|
101
|
+
ffn: 3.849
|
|
83
102
|
attn: 2.6
|
|
84
103
|
embed: 2.8
|
|
85
104
|
other: 2.8
|
|
86
105
|
multiple_testing:
|
|
87
106
|
method: bonferroni
|
|
88
|
-
alpha: 0.
|
|
107
|
+
alpha: 0.000625
|
|
89
108
|
m: 4
|
|
90
109
|
rmt_guard:
|
|
91
110
|
deadband: 0.05
|
|
92
111
|
margin: 1.3
|
|
93
|
-
epsilon_default: 0.
|
|
112
|
+
epsilon_default: 0.01
|
|
94
113
|
epsilon_by_family:
|
|
95
|
-
ffn: 0.
|
|
96
|
-
attn: 0.
|
|
97
|
-
embed: 0.
|
|
98
|
-
other: 0.
|
|
114
|
+
ffn: 0.01
|
|
115
|
+
attn: 0.01
|
|
116
|
+
embed: 0.01
|
|
117
|
+
other: 0.01
|
|
99
118
|
|
|
100
119
|
aggressive:
|
|
101
120
|
metrics:
|
|
@@ -104,6 +123,13 @@ aggressive:
|
|
|
104
123
|
min_tokens: 50000
|
|
105
124
|
hysteresis_ratio: 0.002
|
|
106
125
|
min_token_fraction: 0.01
|
|
126
|
+
pm_tail:
|
|
127
|
+
mode: warn
|
|
128
|
+
min_windows: 50
|
|
129
|
+
quantile: 0.95
|
|
130
|
+
quantile_max: 0.30
|
|
131
|
+
epsilon: 0.0001
|
|
132
|
+
mass_max: 1.0
|
|
107
133
|
accuracy:
|
|
108
134
|
delta_min_pp: -2.0
|
|
109
135
|
min_examples: 200
|
|
@@ -111,27 +137,28 @@ aggressive:
|
|
|
111
137
|
min_examples_fraction: 0.01
|
|
112
138
|
variance_guard:
|
|
113
139
|
deadband: 0.12
|
|
114
|
-
min_effect_lognll: 0.
|
|
140
|
+
min_effect_lognll: 0.033
|
|
115
141
|
spectral_guard:
|
|
116
142
|
sigma_quantile: 0.98
|
|
117
143
|
deadband: 0.15
|
|
118
144
|
scope: ffn
|
|
119
145
|
max_caps: 8
|
|
146
|
+
max_spectral_norm: null
|
|
120
147
|
family_caps:
|
|
121
|
-
ffn: 3.
|
|
148
|
+
ffn: 3.849
|
|
122
149
|
attn: 3.5
|
|
123
150
|
embed: 2.5
|
|
124
151
|
other: 3.5
|
|
125
152
|
multiple_testing:
|
|
126
153
|
method: bh
|
|
127
|
-
alpha: 0.
|
|
154
|
+
alpha: 0.00078125
|
|
128
155
|
m: 4
|
|
129
156
|
rmt_guard:
|
|
130
157
|
deadband: 0.15
|
|
131
158
|
margin: 1.8
|
|
132
|
-
epsilon_default: 0.
|
|
159
|
+
epsilon_default: 0.01
|
|
133
160
|
epsilon_by_family:
|
|
134
|
-
ffn: 0.
|
|
135
|
-
attn: 0.
|
|
136
|
-
embed: 0.
|
|
137
|
-
other: 0.
|
|
161
|
+
ffn: 0.01
|
|
162
|
+
attn: 0.01
|
|
163
|
+
embed: 0.01
|
|
164
|
+
other: 0.01
|
|
@@ -76,7 +76,7 @@ class _RemovedComponent:
|
|
|
76
76
|
return _RemovedComponent(self._name, self._replacement)
|
|
77
77
|
|
|
78
78
|
|
|
79
|
-
# Placeholders for removed
|
|
79
|
+
# Placeholders for removed utilities referenced in tests
|
|
80
80
|
HF_Pythia_Adapter = _RemovedComponent("HF_Pythia_Adapter")
|
|
81
81
|
auto_tune_pruning_budget = _RemovedComponent("auto_tune_pruning_budget")
|
|
82
82
|
run_auto_invarlock = _RemovedComponent("run_auto_invarlock")
|
|
@@ -148,7 +148,7 @@ def _selected_families_for_alpha(
|
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
def summarize_null_sweep_reports(
|
|
151
|
-
reports: list[
|
|
151
|
+
reports: list[object],
|
|
152
152
|
*,
|
|
153
153
|
tier: str,
|
|
154
154
|
safety_margin: float = 0.05,
|
|
@@ -186,20 +186,25 @@ def summarize_null_sweep_reports(
|
|
|
186
186
|
mt = _extract_multiple_testing(metrics)
|
|
187
187
|
if mt:
|
|
188
188
|
mt_method = str(mt.get("method", mt_method))
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
189
|
+
alpha_value = mt.get("alpha")
|
|
190
|
+
if alpha_value is not None:
|
|
191
|
+
try:
|
|
192
|
+
mt_alpha = float(alpha_value)
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
m_value = mt.get("m")
|
|
196
|
+
if m_value is not None:
|
|
197
|
+
try:
|
|
198
|
+
mt_m = int(m_value)
|
|
199
|
+
except Exception:
|
|
200
|
+
pass
|
|
193
201
|
|
|
194
202
|
fam_z = _extract_family_max_z(metrics)
|
|
195
203
|
for fam, z in fam_z.items():
|
|
196
204
|
family_max_z[fam] = max(family_max_z[fam], float(z))
|
|
197
205
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
if isinstance(metrics.get("multiple_testing_selection"), dict)
|
|
201
|
-
else {}
|
|
202
|
-
)
|
|
206
|
+
raw_selection = metrics.get("multiple_testing_selection")
|
|
207
|
+
selection = raw_selection if isinstance(raw_selection, dict) else {}
|
|
203
208
|
pvals = selection.get("family_pvalues")
|
|
204
209
|
if not isinstance(pvals, dict):
|
|
205
210
|
pvals = {}
|
|
@@ -107,8 +107,6 @@ def summarize_ve_sweep_reports(
|
|
|
107
107
|
evaluated = 0
|
|
108
108
|
|
|
109
109
|
for report in reports:
|
|
110
|
-
if not isinstance(report, dict):
|
|
111
|
-
continue
|
|
112
110
|
g = _extract_guard(report, "variance") or {}
|
|
113
111
|
metrics = g.get("metrics", {}) if isinstance(g.get("metrics"), dict) else {}
|
|
114
112
|
pg = metrics.get("predictive_gate")
|
|
@@ -144,7 +144,9 @@ def null_sweep(
|
|
|
144
144
|
),
|
|
145
145
|
n_seeds: int = typer.Option(10, "--n-seeds", min=1, help="Number of seeds to run."),
|
|
146
146
|
seed_start: int = typer.Option(42, "--seed-start", help="Starting seed."),
|
|
147
|
-
profile: str = typer.Option(
|
|
147
|
+
profile: str = typer.Option(
|
|
148
|
+
"ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
|
|
149
|
+
),
|
|
148
150
|
device: str | None = typer.Option(None, "--device", help="Device override."),
|
|
149
151
|
safety_margin: float = typer.Option(
|
|
150
152
|
0.05, "--safety-margin", help="Safety margin applied to κ recommendations."
|
|
@@ -363,7 +365,9 @@ def ve_sweep(
|
|
|
363
365
|
"--target-enable-rate",
|
|
364
366
|
help="Target expected VE enable rate (predictive-gate lower bound).",
|
|
365
367
|
),
|
|
366
|
-
profile: str = typer.Option(
|
|
368
|
+
profile: str = typer.Option(
|
|
369
|
+
"ci", "--profile", help="Run profile (ci|release|ci_cpu|dev)."
|
|
370
|
+
),
|
|
367
371
|
device: str | None = typer.Option(None, "--device", help="Device override."),
|
|
368
372
|
safety_margin: float = typer.Option(
|
|
369
373
|
0.0,
|
|
@@ -22,9 +22,9 @@ from typing import Any
|
|
|
22
22
|
import typer
|
|
23
23
|
from rich.console import Console
|
|
24
24
|
|
|
25
|
+
from ...core.exceptions import MetricsError
|
|
25
26
|
from ..adapter_auto import resolve_auto_adapter
|
|
26
27
|
from ..config import _deep_merge as _merge # reuse helper
|
|
27
|
-
from ..errors import InvarlockError
|
|
28
28
|
|
|
29
29
|
# Use the report group's programmatic entry for report generation
|
|
30
30
|
from .report import report_command as _report
|
|
@@ -98,7 +98,9 @@ def certify_command(
|
|
|
98
98
|
"--device",
|
|
99
99
|
help="Device override for runs (auto|cuda|mps|cpu)",
|
|
100
100
|
),
|
|
101
|
-
profile: str = typer.Option(
|
|
101
|
+
profile: str = typer.Option(
|
|
102
|
+
"ci", "--profile", help="Profile (ci|release|ci_cpu|dev)"
|
|
103
|
+
),
|
|
102
104
|
tier: str = typer.Option("balanced", "--tier", help="Tier label for context"),
|
|
103
105
|
preset: str | None = typer.Option(
|
|
104
106
|
None,
|
|
@@ -152,9 +154,9 @@ def certify_command(
|
|
|
152
154
|
# scenario), fall back to a minimal built-in universal preset so the
|
|
153
155
|
# flag-only quick start works without cloning the repo.
|
|
154
156
|
default_universal = (
|
|
155
|
-
Path("configs/
|
|
157
|
+
Path("configs/presets/masked_lm/wikitext2_128.yaml")
|
|
156
158
|
if eff_adapter == "hf_bert"
|
|
157
|
-
else Path("configs/
|
|
159
|
+
else Path("configs/presets/causal_lm/wikitext2_512.yaml")
|
|
158
160
|
)
|
|
159
161
|
preset_path = Path(preset) if preset is not None else default_universal
|
|
160
162
|
|
|
@@ -185,6 +187,20 @@ def certify_command(
|
|
|
185
187
|
model_block.pop("device", None)
|
|
186
188
|
preset_data["model"] = model_block
|
|
187
189
|
|
|
190
|
+
default_guards_order = ["invariants", "spectral", "rmt", "variance", "invariants"]
|
|
191
|
+
guards_order = None
|
|
192
|
+
preset_guards = preset_data.get("guards")
|
|
193
|
+
if isinstance(preset_guards, dict):
|
|
194
|
+
preset_order = preset_guards.get("order")
|
|
195
|
+
if (
|
|
196
|
+
isinstance(preset_order, list)
|
|
197
|
+
and preset_order
|
|
198
|
+
and all(isinstance(item, str) for item in preset_order)
|
|
199
|
+
):
|
|
200
|
+
guards_order = list(preset_order)
|
|
201
|
+
if guards_order is None:
|
|
202
|
+
guards_order = list(default_guards_order)
|
|
203
|
+
|
|
188
204
|
# Create temp baseline config (no-op edit)
|
|
189
205
|
# Normalize possible "hf:" prefixes for HF adapters
|
|
190
206
|
norm_src_id = _normalize_model_id(src_id, eff_adapter)
|
|
@@ -199,9 +215,7 @@ def certify_command(
|
|
|
199
215
|
},
|
|
200
216
|
"edit": {"name": "noop", "plan": {}},
|
|
201
217
|
"eval": {},
|
|
202
|
-
"guards": {
|
|
203
|
-
"order": ["invariants", "spectral", "rmt", "variance", "invariants"]
|
|
204
|
-
},
|
|
218
|
+
"guards": {"order": guards_order},
|
|
205
219
|
"output": {"dir": str(Path(out) / "source")},
|
|
206
220
|
"context": {"profile": profile, "tier": tier},
|
|
207
221
|
},
|
|
@@ -292,15 +306,7 @@ def certify_command(
|
|
|
292
306
|
"model": {"id": norm_edt_id, "adapter": eff_adapter},
|
|
293
307
|
"edit": {"name": "noop", "plan": {}},
|
|
294
308
|
"eval": {},
|
|
295
|
-
"guards": {
|
|
296
|
-
"order": [
|
|
297
|
-
"invariants",
|
|
298
|
-
"spectral",
|
|
299
|
-
"rmt",
|
|
300
|
-
"variance",
|
|
301
|
-
"invariants",
|
|
302
|
-
]
|
|
303
|
-
},
|
|
309
|
+
"guards": {"order": guards_order},
|
|
304
310
|
"output": {"dir": str(Path(out) / "edited")},
|
|
305
311
|
"context": {"profile": profile, "tier": tier},
|
|
306
312
|
},
|
|
@@ -325,12 +331,11 @@ def certify_command(
|
|
|
325
331
|
raise typer.Exit(1)
|
|
326
332
|
|
|
327
333
|
# CI/Release hard‑abort: fail fast when primary metric is not computable.
|
|
328
|
-
# Fall back to legacy ppl_* keys when primary_metric block is absent.
|
|
329
334
|
try:
|
|
330
335
|
prof = str(profile or "").strip().lower()
|
|
331
336
|
except Exception:
|
|
332
337
|
prof = ""
|
|
333
|
-
if prof in {"ci", "release"}:
|
|
338
|
+
if prof in {"ci", "ci_cpu", "release"}:
|
|
334
339
|
try:
|
|
335
340
|
with Path(edited_report).open("r", encoding="utf-8") as fh:
|
|
336
341
|
edited_payload = json.load(fh)
|
|
@@ -364,35 +369,49 @@ def certify_command(
|
|
|
364
369
|
else None
|
|
365
370
|
) or "unknown"
|
|
366
371
|
|
|
367
|
-
# Enforce only when a
|
|
368
|
-
# Enforce only when a primary_metric block is present
|
|
372
|
+
# Enforce only when a primary_metric block is present; allow degraded-but-flagged metrics to emit certificates, but fail the task.
|
|
369
373
|
has_metric_block = isinstance(pm, dict) and bool(pm)
|
|
370
374
|
if has_metric_block:
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
+
degraded = bool(pm.get("invalid") or pm.get("degraded"))
|
|
376
|
+
if degraded or not _finite(pm_final):
|
|
377
|
+
fallback = pm_prev if _finite(pm_prev) else pm_final
|
|
378
|
+
if not _finite(fallback) or fallback <= 0:
|
|
379
|
+
fallback = 1.0
|
|
380
|
+
degraded_reason = pm.get("degraded_reason") or (
|
|
381
|
+
"non_finite_pm"
|
|
382
|
+
if (not _finite(pm_prev) or not _finite(pm_final))
|
|
383
|
+
else "primary_metric_degraded"
|
|
384
|
+
)
|
|
385
|
+
console.print(
|
|
386
|
+
"[yellow]⚠️ Primary metric degraded or non-finite; emitting certificate and marking task degraded. Primary metric computation failed.[/yellow]"
|
|
387
|
+
)
|
|
388
|
+
pm["degraded"] = True
|
|
389
|
+
pm["invalid"] = pm.get("invalid") or True
|
|
390
|
+
pm["preview"] = pm_prev if _finite(pm_prev) else fallback
|
|
391
|
+
pm["final"] = pm_final if _finite(pm_final) else fallback
|
|
392
|
+
pm["ratio_vs_baseline"] = pm_ratio if _finite(pm_ratio) else 1.0
|
|
393
|
+
pm["degraded_reason"] = degraded_reason
|
|
394
|
+
metrics["primary_metric"] = pm
|
|
395
|
+
edited_payload.setdefault("metrics", {}).update(metrics)
|
|
396
|
+
|
|
397
|
+
# Emit the certificate for inspection, then exit with a CI-visible error.
|
|
398
|
+
_report(
|
|
399
|
+
run=str(edited_report),
|
|
400
|
+
format="cert",
|
|
401
|
+
baseline=str(baseline_report),
|
|
402
|
+
output=cert_out,
|
|
403
|
+
)
|
|
404
|
+
err = MetricsError(
|
|
375
405
|
code="E111",
|
|
376
|
-
message=(
|
|
377
|
-
"Primary metric computation failed (NaN/inf). "
|
|
378
|
-
f"Context: device={device}, adapter={adapter_name}, edit={edit_name}. "
|
|
379
|
-
"Baseline ok; edited failed to compute ppl. "
|
|
380
|
-
"Try: use an accelerator (mps/cuda), force float32, reduce max_modules, "
|
|
381
|
-
"or lower batch size (INVARLOCK_SCORES_BATCH_SIZE)."
|
|
382
|
-
),
|
|
406
|
+
message=f"Primary metric degraded or non-finite ({degraded_reason}).",
|
|
383
407
|
details={
|
|
384
|
-
"
|
|
408
|
+
"reason": degraded_reason,
|
|
385
409
|
"adapter": adapter_name,
|
|
410
|
+
"device": device,
|
|
386
411
|
"edit": edit_name,
|
|
387
|
-
"pm_preview": pm_prev,
|
|
388
|
-
"pm_final": pm_final,
|
|
389
|
-
"pm_ratio": pm_ratio,
|
|
390
412
|
},
|
|
391
413
|
)
|
|
392
|
-
|
|
393
|
-
console.print(f"[red]{err}[/red]")
|
|
394
|
-
# Do not emit a certificate
|
|
395
|
-
raise typer.Exit(code)
|
|
414
|
+
raise typer.Exit(_resolve_exit_code(err, profile=profile))
|
|
396
415
|
|
|
397
416
|
console.print("📜 Emitting certificate")
|
|
398
417
|
_report(
|
|
@@ -188,7 +188,9 @@ def doctor_command(
|
|
|
188
188
|
None, "--config", "-c", help="Path to YAML config for preflight lints"
|
|
189
189
|
),
|
|
190
190
|
profile: str | None = typer.Option(
|
|
191
|
-
None,
|
|
191
|
+
None,
|
|
192
|
+
"--profile",
|
|
193
|
+
help="Profile to apply for preflight (e.g. ci, release, ci_cpu; dev is a no-op)",
|
|
192
194
|
),
|
|
193
195
|
baseline: str | None = typer.Option(
|
|
194
196
|
None, "--baseline", help="Optional baseline report to check pairing readiness"
|
|
@@ -99,10 +99,6 @@ def explain_gates_command(
|
|
|
99
99
|
pm = cert.get("primary_metric", {})
|
|
100
100
|
ratio = pm.get("ratio_vs_baseline")
|
|
101
101
|
ratio_ci = pm.get("display_ci")
|
|
102
|
-
elif isinstance(cert.get("ppl"), dict): # legacy
|
|
103
|
-
ppl = cert.get("ppl", {})
|
|
104
|
-
ratio = ppl.get("ratio_vs_baseline")
|
|
105
|
-
ratio_ci = ppl.get("ratio_ci")
|
|
106
102
|
hysteresis_applied = bool(validation.get("hysteresis_applied"))
|
|
107
103
|
status = "PASS" if bool(validation.get("primary_metric_acceptable")) else "FAIL"
|
|
108
104
|
console.print("[bold]Gate: Primary Metric vs Baseline[/bold]")
|
|
@@ -125,6 +121,63 @@ def explain_gates_command(
|
|
|
125
121
|
f" note: hysteresis applied → effective threshold = {limit_with_hyst:.3f}x"
|
|
126
122
|
)
|
|
127
123
|
|
|
124
|
+
# Tail gate explanation (warn/fail; based on per-window Δlog-loss vs baseline)
|
|
125
|
+
pm_tail = (
|
|
126
|
+
cert.get("primary_metric_tail", {})
|
|
127
|
+
if isinstance(cert.get("primary_metric_tail"), dict)
|
|
128
|
+
else {}
|
|
129
|
+
)
|
|
130
|
+
if pm_tail:
|
|
131
|
+
mode = str(pm_tail.get("mode", "warn") or "warn").strip().lower()
|
|
132
|
+
evaluated = bool(pm_tail.get("evaluated", False))
|
|
133
|
+
passed = bool(pm_tail.get("passed", True))
|
|
134
|
+
policy = (
|
|
135
|
+
pm_tail.get("policy", {}) if isinstance(pm_tail.get("policy"), dict) else {}
|
|
136
|
+
)
|
|
137
|
+
stats = (
|
|
138
|
+
pm_tail.get("stats", {}) if isinstance(pm_tail.get("stats"), dict) else {}
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
q = policy.get("quantile", 0.95)
|
|
142
|
+
try:
|
|
143
|
+
qf = float(q)
|
|
144
|
+
except Exception:
|
|
145
|
+
qf = 0.95
|
|
146
|
+
qf = max(0.0, min(1.0, qf))
|
|
147
|
+
q_key = f"q{int(round(100.0 * qf))}"
|
|
148
|
+
q_name = f"P{int(round(100.0 * qf))}"
|
|
149
|
+
q_val = stats.get(q_key)
|
|
150
|
+
qmax = policy.get("quantile_max")
|
|
151
|
+
eps = policy.get("epsilon", stats.get("epsilon"))
|
|
152
|
+
mass = stats.get("tail_mass")
|
|
153
|
+
mmax = policy.get("mass_max")
|
|
154
|
+
|
|
155
|
+
if not evaluated:
|
|
156
|
+
status_tail = "INFO"
|
|
157
|
+
elif passed:
|
|
158
|
+
status_tail = "PASS"
|
|
159
|
+
elif mode == "fail":
|
|
160
|
+
status_tail = "FAIL"
|
|
161
|
+
else:
|
|
162
|
+
status_tail = "WARN"
|
|
163
|
+
|
|
164
|
+
console.print("\n[bold]Gate: Primary Metric Tail (ΔlogNLL)[/bold]")
|
|
165
|
+
console.print(f" mode: {mode}")
|
|
166
|
+
console.print(f" status: {status_tail}")
|
|
167
|
+
if isinstance(q_val, int | float):
|
|
168
|
+
console.print(f" observed: {q_name}={float(q_val):.4f}")
|
|
169
|
+
if isinstance(mass, int | float):
|
|
170
|
+
console.print(f" tail_mass: Pr[ΔlogNLL > ε]={float(mass):.4f}")
|
|
171
|
+
thr_parts: list[str] = []
|
|
172
|
+
if isinstance(qmax, int | float):
|
|
173
|
+
thr_parts.append(f"{q_name}≤{float(qmax):.4f}")
|
|
174
|
+
if isinstance(mmax, int | float):
|
|
175
|
+
thr_parts.append(f"mass≤{float(mmax):.4f}")
|
|
176
|
+
if isinstance(eps, int | float):
|
|
177
|
+
thr_parts.append(f"ε={float(eps):.1e}")
|
|
178
|
+
if thr_parts:
|
|
179
|
+
console.print(" threshold: " + "; ".join(thr_parts))
|
|
180
|
+
|
|
128
181
|
# Dataset split visibility from report provenance
|
|
129
182
|
try:
|
|
130
183
|
split = (report_data.get("provenance", {}) or {}).get("dataset_split")
|
|
@@ -151,10 +204,6 @@ def explain_gates_command(
|
|
|
151
204
|
drift = float(final) / float(preview)
|
|
152
205
|
except Exception:
|
|
153
206
|
drift = None
|
|
154
|
-
if isinstance(cert.get("ppl"), dict): # legacy
|
|
155
|
-
ppl = cert.get("ppl", {})
|
|
156
|
-
drift = ppl.get("preview_final_ratio", drift)
|
|
157
|
-
drift_ci = ppl.get("drift_ci")
|
|
158
207
|
drift_status = (
|
|
159
208
|
"PASS" if bool(validation.get("preview_final_drift_acceptable")) else "FAIL"
|
|
160
209
|
)
|
|
@@ -120,7 +120,7 @@ def _generate_reports(
|
|
|
120
120
|
else:
|
|
121
121
|
console.print(f" 📄 {fmt.upper()}: {file_path}")
|
|
122
122
|
|
|
123
|
-
# Show key metrics (PM-first). Avoid
|
|
123
|
+
# Show key metrics (PM-first). Avoid PPL-first wording.
|
|
124
124
|
console.print("\n📈 Key Metrics:")
|
|
125
125
|
console.print(f" Model: {primary_report['meta']['model_id']}")
|
|
126
126
|
console.print(f" Edit: {primary_report['edit']['name']}")
|