invarlock 0.3.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {invarlock-0.3.2/src/invarlock.egg-info → invarlock-0.3.3}/PKG-INFO +2 -2
- {invarlock-0.3.2 → invarlock-0.3.3}/README.md +1 -1
- {invarlock-0.3.2 → invarlock-0.3.3}/pyproject.toml +1 -1
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/__init__.py +1 -1
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/run.py +6 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/config.py +11 -1
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/determinism.py +16 -1
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/bootstrap.py +137 -5
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/runner.py +305 -35
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/bootstrap.py +3 -1
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/primary_metric.py +20 -5
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/rmt.py +536 -46
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/spectral.py +1 -1
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/variance.py +122 -43
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/certificate.py +231 -12
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/normalizer.py +3 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/policy_utils.py +1 -3
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/primary_metric_utils.py +17 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/validate.py +10 -10
- {invarlock-0.3.2 → invarlock-0.3.3/src/invarlock.egg-info}/PKG-INFO +2 -2
- {invarlock-0.3.2 → invarlock-0.3.3}/LICENSE +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/MANIFEST.in +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/setup.cfg +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/__main__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/_data/runtime/profiles/ci_cpu.yaml +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/_data/runtime/profiles/release.yaml +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/_data/runtime/tiers.yaml +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/_capabilities.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/auto.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/base.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/base_types.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/capabilities.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/hf_bert.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/hf_gpt2.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/hf_llama.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/hf_loading.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/hf_mixin.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/hf_onnx.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/hf_t5.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/adapters/py.typed +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/assurance/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/calibration/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/calibration/spectral_null.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/calibration/variance_ve.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/__main__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/_evidence.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/_json.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/adapter_auto.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/app.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/calibrate.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/certify.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/doctor.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/explain_gates.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/export_html.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/plugins.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/report.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/verify.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/constants.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/device.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/doctor_helpers.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/errors.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/overhead_utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/provenance.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/config.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/abi.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/api.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/auto_tuning.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/checkpoint.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/contracts.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/error_utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/events.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/exceptions.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/registry.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/retry.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/types.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/edits/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/edits/_edit_utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/edits/_external_utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/edits/noop.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/edits/py.typed +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/edits/quant_rtn.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/edits/registry.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/bench.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/bench_regression.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/data.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/metrics.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/probes/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/probes/fft.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/probes/mi.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/probes/post_attention.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/providers/base.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/providers/seq2seq.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/providers/text_lm.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/providers/vision_text.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/eval/py.typed +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/_contracts.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/invariants.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/policies.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/py.typed +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards/tier_config.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards_ref/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards_ref/rmt_ref.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards_ref/spectral_ref.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/guards_ref/variance_ref.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/model_profile.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/model_utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/alerting.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/core.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/exporters.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/health.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/metrics.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/py.typed +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/observability/utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/plugins/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/plugins/hello_guard.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/plugins/hf_awq_adapter.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/plugins/hf_bnb_adapter.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/plugins/hf_gptq_adapter.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/plugins/py.typed +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/py.typed +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/certificate_schema.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/dataset_hashing.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/guards_analysis.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/html.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/render.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/report.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/report_types.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/reporting/utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/security.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/sparsity_utils.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/utils/__init__.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/utils/digest.py +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock.egg-info/SOURCES.txt +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock.egg-info/dependency_links.txt +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock.egg-info/entry_points.txt +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock.egg-info/requires.txt +0 -0
- {invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: invarlock
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
|
|
5
5
|
Author-email: InvarLock Team <oss@invarlock.dev>
|
|
6
6
|
Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
|
|
@@ -112,7 +112,7 @@ they don’t, roll back safely.
|
|
|
112
112
|
Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
|
|
113
113
|
variance) producing a machine‑readable Safety Certificate.
|
|
114
114
|
|
|
115
|
-
> **Status:** 0.3.
|
|
115
|
+
> **Status:** 0.3.3 (pre‑1.0). Until 1.0, **minor** releases may be
|
|
116
116
|
> breaking. See CLI help and the CHANGELOG for updates.
|
|
117
117
|
|
|
118
118
|
[](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
|
|
@@ -6,7 +6,7 @@ they don’t, roll back safely.
|
|
|
6
6
|
Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
|
|
7
7
|
variance) producing a machine‑readable Safety Certificate.
|
|
8
8
|
|
|
9
|
-
> **Status:** 0.3.
|
|
9
|
+
> **Status:** 0.3.3 (pre‑1.0). Until 1.0, **minor** releases may be
|
|
10
10
|
> breaking. See CLI help and the CHANGELOG for updates.
|
|
11
11
|
|
|
12
12
|
[](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "invarlock"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.3"
|
|
8
8
|
description = "Edit‑agnostic robustness certificates for weight edits (InvarLock framework)"
|
|
9
9
|
authors = [{ name = "InvarLock Team", email = "oss@invarlock.dev" }]
|
|
10
10
|
maintainers = [{ name = "InvarLock Maintainers", email = "support@invarlock.dev" }]
|
|
@@ -12,7 +12,7 @@ For torch-dependent functionality, see subpackages under `invarlock.*`:
|
|
|
12
12
|
- `invarlock.eval`: Metrics, guard-overhead checks, and certification
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
|
-
__version__ = "0.3.
|
|
15
|
+
__version__ = "0.3.3"
|
|
16
16
|
|
|
17
17
|
# Core exports - torch-independent
|
|
18
18
|
from .config import CFG, Defaults, get_default_config
|
|
@@ -301,6 +301,12 @@ def _hash_sequences(seqs: Sequence[Sequence[int]] | Iterable[Sequence[int]]) ->
|
|
|
301
301
|
"""Compute a stable digest for a sequence of integer token sequences."""
|
|
302
302
|
hasher = hashlib.blake2s(digest_size=16)
|
|
303
303
|
for seq in seqs:
|
|
304
|
+
try:
|
|
305
|
+
seq_len = len(seq)
|
|
306
|
+
except TypeError:
|
|
307
|
+
seq = list(seq)
|
|
308
|
+
seq_len = len(seq)
|
|
309
|
+
hasher.update(seq_len.to_bytes(4, "little", signed=False))
|
|
304
310
|
arr = array("I", (int(token) & 0xFFFFFFFF for token in seq))
|
|
305
311
|
hasher.update(arr.tobytes())
|
|
306
312
|
return hasher.hexdigest()
|
|
@@ -207,11 +207,21 @@ def _create_loader(base_dir: Path):
|
|
|
207
207
|
class Loader(yaml.SafeLoader):
|
|
208
208
|
pass
|
|
209
209
|
|
|
210
|
-
Loader._base_dir = Path(base_dir)
|
|
210
|
+
Loader._base_dir = Path(base_dir).resolve()
|
|
211
211
|
|
|
212
212
|
def _construct_include(loader: yaml.SafeLoader, node: yaml.Node):
|
|
213
213
|
rel = loader.construct_scalar(node)
|
|
214
214
|
path = (loader._base_dir / rel).resolve()
|
|
215
|
+
allow_outside = os.environ.get("INVARLOCK_ALLOW_CONFIG_INCLUDE_OUTSIDE", "")
|
|
216
|
+
allow_outside = allow_outside.strip().lower() in {"1", "true", "yes", "on"}
|
|
217
|
+
if not allow_outside:
|
|
218
|
+
try:
|
|
219
|
+
path.relative_to(loader._base_dir)
|
|
220
|
+
except ValueError as exc:
|
|
221
|
+
raise ValueError(
|
|
222
|
+
"Config !include must stay within the config directory. "
|
|
223
|
+
"Set INVARLOCK_ALLOW_CONFIG_INCLUDE_OUTSIDE=1 to override."
|
|
224
|
+
) from exc
|
|
215
225
|
with path.open(encoding="utf-8") as fh:
|
|
216
226
|
inc_loader = _create_loader(path.parent)
|
|
217
227
|
return yaml.load(fh, Loader=inc_loader)
|
|
@@ -83,9 +83,24 @@ def apply_determinism_preset(
|
|
|
83
83
|
|
|
84
84
|
# CUDA determinism: cuBLAS workspace config.
|
|
85
85
|
if requested == "strict" and dev.startswith("cuda"):
|
|
86
|
-
|
|
86
|
+
preferred = ":4096:8"
|
|
87
|
+
fallback = ":16:8"
|
|
88
|
+
if "CUBLAS_WORKSPACE_CONFIG" not in os.environ:
|
|
89
|
+
selected = preferred
|
|
90
|
+
if torch is not None:
|
|
91
|
+
try:
|
|
92
|
+
mem_bytes = int(torch.cuda.get_device_properties(0).total_memory)
|
|
93
|
+
if mem_bytes and mem_bytes < 8 * 1024**3:
|
|
94
|
+
selected = fallback
|
|
95
|
+
except Exception:
|
|
96
|
+
selected = preferred
|
|
97
|
+
os.environ["CUBLAS_WORKSPACE_CONFIG"] = selected
|
|
87
98
|
env_set["CUBLAS_WORKSPACE_CONFIG"] = os.environ.get("CUBLAS_WORKSPACE_CONFIG")
|
|
88
99
|
|
|
100
|
+
if requested == "strict":
|
|
101
|
+
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
|
102
|
+
env_set["TOKENIZERS_PARALLELISM"] = os.environ.get("TOKENIZERS_PARALLELISM")
|
|
103
|
+
|
|
89
104
|
# Seed all RNGs (python/numpy/torch) using the existing helper for parity.
|
|
90
105
|
set_seed(int(seed))
|
|
91
106
|
|
|
@@ -39,6 +39,31 @@ def _ensure_array(samples: Iterable[float]) -> np.ndarray:
|
|
|
39
39
|
return arr
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
def _normalize_weights(weights: Iterable[float] | None, n: int) -> np.ndarray | None:
|
|
43
|
+
if weights is None:
|
|
44
|
+
return None
|
|
45
|
+
arr = np.asarray(list(weights), dtype=float)
|
|
46
|
+
if arr.ndim != 1 or arr.size != n:
|
|
47
|
+
return None
|
|
48
|
+
if not np.all(np.isfinite(arr)):
|
|
49
|
+
return None
|
|
50
|
+
if np.any(arr < 0):
|
|
51
|
+
return None
|
|
52
|
+
total = float(arr.sum())
|
|
53
|
+
if total <= 0.0:
|
|
54
|
+
return None
|
|
55
|
+
if np.allclose(arr, arr[0]):
|
|
56
|
+
return None
|
|
57
|
+
return arr / total
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _weighted_mean(samples: np.ndarray, weights: np.ndarray) -> float:
|
|
61
|
+
total = float(weights.sum())
|
|
62
|
+
if total <= 0.0:
|
|
63
|
+
return float(np.mean(samples))
|
|
64
|
+
return float(np.dot(samples, weights) / total)
|
|
65
|
+
|
|
66
|
+
|
|
42
67
|
def _percentile_interval(stats: np.ndarray, alpha: float) -> tuple[float, float]:
|
|
43
68
|
"""Return lower/upper bounds from an array of bootstrap statistics."""
|
|
44
69
|
lower_q = 100.0 * (alpha / 2.0)
|
|
@@ -46,6 +71,61 @@ def _percentile_interval(stats: np.ndarray, alpha: float) -> tuple[float, float]
|
|
|
46
71
|
return float(np.percentile(stats, lower_q)), float(np.percentile(stats, upper_q))
|
|
47
72
|
|
|
48
73
|
|
|
74
|
+
def _bca_interval_weighted(
|
|
75
|
+
samples: np.ndarray,
|
|
76
|
+
*,
|
|
77
|
+
weights: np.ndarray,
|
|
78
|
+
replicates: int,
|
|
79
|
+
alpha: float,
|
|
80
|
+
rng: np.random.Generator,
|
|
81
|
+
) -> tuple[float, float]:
|
|
82
|
+
"""Compute a BCa interval for the mean under weighted resampling."""
|
|
83
|
+
n = samples.size
|
|
84
|
+
if n < 2:
|
|
85
|
+
stat = _weighted_mean(samples, weights)
|
|
86
|
+
return float(stat), float(stat)
|
|
87
|
+
|
|
88
|
+
prob = weights / float(weights.sum())
|
|
89
|
+
stats = np.empty(replicates, dtype=float)
|
|
90
|
+
for i in range(replicates):
|
|
91
|
+
idx = rng.choice(n, size=n, replace=True, p=prob)
|
|
92
|
+
stats[i] = float(np.mean(samples[idx]))
|
|
93
|
+
|
|
94
|
+
stats.sort()
|
|
95
|
+
stat_hat = _weighted_mean(samples, weights)
|
|
96
|
+
|
|
97
|
+
prop = np.clip((stats < stat_hat).mean(), 1e-6, 1.0 - 1e-6)
|
|
98
|
+
z0 = Normal.inv_cdf(prop)
|
|
99
|
+
|
|
100
|
+
sum_w = float(weights.sum())
|
|
101
|
+
sum_wx = float(np.dot(samples, weights))
|
|
102
|
+
jack = np.empty(n, dtype=float)
|
|
103
|
+
for i in range(n):
|
|
104
|
+
w_i = float(weights[i])
|
|
105
|
+
denom = sum_w - w_i
|
|
106
|
+
if denom <= 0.0:
|
|
107
|
+
jack[i] = stat_hat
|
|
108
|
+
else:
|
|
109
|
+
jack[i] = (sum_wx - w_i * float(samples[i])) / denom
|
|
110
|
+
|
|
111
|
+
jack_mean = jack.mean()
|
|
112
|
+
numerator = np.sum((jack_mean - jack) ** 3)
|
|
113
|
+
denominator = 6.0 * (np.sum((jack_mean - jack) ** 2) ** 1.5)
|
|
114
|
+
if denominator == 0.0:
|
|
115
|
+
return _percentile_interval(stats, alpha)
|
|
116
|
+
|
|
117
|
+
acc = numerator / denominator
|
|
118
|
+
|
|
119
|
+
def _adjust_quantile(z_alpha: float) -> float:
|
|
120
|
+
adj = z0 + (z0 + z_alpha) / max(1.0 - acc * (z0 + z_alpha), 1e-12)
|
|
121
|
+
return float(Normal.cdf(adj))
|
|
122
|
+
|
|
123
|
+
lower_pct = _adjust_quantile(Normal.inv_cdf(alpha / 2.0))
|
|
124
|
+
upper_pct = _adjust_quantile(Normal.inv_cdf(1.0 - alpha / 2.0))
|
|
125
|
+
|
|
126
|
+
return float(np.quantile(stats, lower_pct)), float(np.quantile(stats, upper_pct))
|
|
127
|
+
|
|
128
|
+
|
|
49
129
|
def _bca_interval(
|
|
50
130
|
samples: np.ndarray,
|
|
51
131
|
*,
|
|
@@ -104,6 +184,42 @@ def _bca_interval(
|
|
|
104
184
|
return float(np.quantile(stats, lower_pct)), float(np.quantile(stats, upper_pct))
|
|
105
185
|
|
|
106
186
|
|
|
187
|
+
def _bootstrap_mean_ci_weighted(
|
|
188
|
+
samples: np.ndarray,
|
|
189
|
+
weights: np.ndarray,
|
|
190
|
+
*,
|
|
191
|
+
method: str,
|
|
192
|
+
replicates: int,
|
|
193
|
+
alpha: float,
|
|
194
|
+
seed: int,
|
|
195
|
+
) -> tuple[float, float]:
|
|
196
|
+
if replicates <= 0:
|
|
197
|
+
raise ValueError("replicates must be positive")
|
|
198
|
+
if not 0.0 < alpha < 1.0:
|
|
199
|
+
raise ValueError("alpha must be between 0 and 1")
|
|
200
|
+
|
|
201
|
+
rng = np.random.default_rng(seed)
|
|
202
|
+
if method == "percentile":
|
|
203
|
+
stats = np.empty(replicates, dtype=float)
|
|
204
|
+
n = samples.size
|
|
205
|
+
prob = weights / float(weights.sum())
|
|
206
|
+
for i in range(replicates):
|
|
207
|
+
idx = rng.choice(n, size=n, replace=True, p=prob)
|
|
208
|
+
stats[i] = float(np.mean(samples[idx]))
|
|
209
|
+
stats.sort()
|
|
210
|
+
return _percentile_interval(stats, alpha)
|
|
211
|
+
if method == "bca":
|
|
212
|
+
return _bca_interval_weighted(
|
|
213
|
+
samples,
|
|
214
|
+
weights=weights,
|
|
215
|
+
replicates=replicates,
|
|
216
|
+
alpha=alpha,
|
|
217
|
+
rng=rng,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
raise ValueError(f"Unsupported bootstrap method '{method}'")
|
|
221
|
+
|
|
222
|
+
|
|
107
223
|
def _bootstrap_interval(
|
|
108
224
|
samples: np.ndarray,
|
|
109
225
|
*,
|
|
@@ -171,6 +287,7 @@ def compute_logloss_ci(
|
|
|
171
287
|
def compute_paired_delta_log_ci(
|
|
172
288
|
final_logloss: Iterable[float],
|
|
173
289
|
baseline_logloss: Iterable[float],
|
|
290
|
+
weights: Iterable[float] | None = None,
|
|
174
291
|
*,
|
|
175
292
|
method: str = "bca",
|
|
176
293
|
replicates: int = 1000,
|
|
@@ -180,15 +297,14 @@ def compute_paired_delta_log_ci(
|
|
|
180
297
|
"""
|
|
181
298
|
Compute a confidence interval over the paired mean delta of log-loss.
|
|
182
299
|
|
|
183
|
-
This implementation uses
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
(perfect pairing), so the equal-weight simplification applies. See
|
|
187
|
-
docs/assurance/01-eval-math-proof.md for the full derivation.
|
|
300
|
+
This implementation uses token-weighted resampling when window weights are
|
|
301
|
+
provided. When all weights are equal, the weighted bootstrap reduces to the
|
|
302
|
+
simple mean. See docs/assurance/01-eval-math-proof.md for the derivation.
|
|
188
303
|
|
|
189
304
|
Args:
|
|
190
305
|
final_logloss: Iterable of per-window log-loss values after the edit/guard.
|
|
191
306
|
baseline_logloss: Iterable of paired per-window log-loss values (before edit).
|
|
307
|
+
weights: Optional token counts per window; used for weighted resampling.
|
|
192
308
|
|
|
193
309
|
Returns:
|
|
194
310
|
(lo, hi) bounds of Δlog-loss such that ratio CI = exp(bounds).
|
|
@@ -199,6 +315,12 @@ def compute_paired_delta_log_ci(
|
|
|
199
315
|
size = min(final_arr.size, base_arr.size)
|
|
200
316
|
final_arr = final_arr[:size]
|
|
201
317
|
base_arr = base_arr[:size]
|
|
318
|
+
weight_arr = None
|
|
319
|
+
if weights is not None:
|
|
320
|
+
weight_list = list(weights)
|
|
321
|
+
if len(weight_list) >= final_arr.size:
|
|
322
|
+
weight_list = weight_list[: final_arr.size]
|
|
323
|
+
weight_arr = _normalize_weights(weight_list, final_arr.size)
|
|
202
324
|
if final_arr.size == 0:
|
|
203
325
|
return 0.0, 0.0
|
|
204
326
|
|
|
@@ -207,6 +329,16 @@ def compute_paired_delta_log_ci(
|
|
|
207
329
|
mean_delta = float(delta.mean())
|
|
208
330
|
return mean_delta, mean_delta
|
|
209
331
|
|
|
332
|
+
if weight_arr is not None:
|
|
333
|
+
return _bootstrap_mean_ci_weighted(
|
|
334
|
+
delta,
|
|
335
|
+
weight_arr,
|
|
336
|
+
method=method,
|
|
337
|
+
replicates=replicates,
|
|
338
|
+
alpha=alpha,
|
|
339
|
+
seed=seed,
|
|
340
|
+
)
|
|
341
|
+
|
|
210
342
|
def stat_fn(data: np.ndarray) -> float:
|
|
211
343
|
return float(np.mean(data))
|
|
212
344
|
|