PyPI - invarlock - Versions diffs - 0.3.2__tar.gz → 0.3.3__tar.gz - Mend

invarlock 0.3.2tar.gz → 0.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

{invarlock-0.3.2/src/invarlock.egg-info → invarlock-0.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: invarlock
-Version: 0.3.2
+Version: 0.3.3
 Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
 Author-email: InvarLock Team <oss@invarlock.dev>
 Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
@@ -112,7 +112,7 @@ they don’t, roll back safely.
 Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
 variance) producing a machine‑readable Safety Certificate.
-> **Status:** 0.3.2 (pre‑1.0). Until 1.0, **minor** releases may be
+> **Status:** 0.3.3 (pre‑1.0). Until 1.0, **minor** releases may be
 > breaking. See CLI help and the CHANGELOG for updates.
 [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)

{invarlock-0.3.2 → invarlock-0.3.3}/README.md RENAMED Viewed

@@ -6,7 +6,7 @@ they don’t, roll back safely.
 Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
 variance) producing a machine‑readable Safety Certificate.
-> **Status:** 0.3.2 (pre‑1.0). Until 1.0, **minor** releases may be
+> **Status:** 0.3.3 (pre‑1.0). Until 1.0, **minor** releases may be
 > breaking. See CLI help and the CHANGELOG for updates.
 [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)

{invarlock-0.3.2 → invarlock-0.3.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "invarlock"
-version = "0.3.2"
+version = "0.3.3"
 description = "Edit‑agnostic robustness certificates for weight edits (InvarLock framework)"
 authors = [{ name = "InvarLock Team", email = "oss@invarlock.dev" }]
 maintainers = [{ name = "InvarLock Maintainers", email = "support@invarlock.dev" }]

{invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/__init__.py RENAMED Viewed

@@ -12,7 +12,7 @@ For torch-dependent functionality, see subpackages under `invarlock.*`:
 - `invarlock.eval`: Metrics, guard-overhead checks, and certification
 """
-__version__ = "0.3.2"
+__version__ = "0.3.3"
 # Core exports - torch-independent
 from .config import CFG, Defaults, get_default_config

{invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/commands/run.py RENAMED Viewed

@@ -301,6 +301,12 @@ def _hash_sequences(seqs: Sequence[Sequence[int]] | Iterable[Sequence[int]]) ->
     """Compute a stable digest for a sequence of integer token sequences."""
     hasher = hashlib.blake2s(digest_size=16)
     for seq in seqs:
+        try:
+            seq_len = len(seq)
+        except TypeError:
+            seq = list(seq)
+            seq_len = len(seq)
+        hasher.update(seq_len.to_bytes(4, "little", signed=False))
         arr = array("I", (int(token) & 0xFFFFFFFF for token in seq))
         hasher.update(arr.tobytes())
     return hasher.hexdigest()

{invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/config.py RENAMED Viewed

@@ -207,11 +207,21 @@ def _create_loader(base_dir: Path):
     class Loader(yaml.SafeLoader):
         pass
-    Loader._base_dir = Path(base_dir)
+    Loader._base_dir = Path(base_dir).resolve()
     def _construct_include(loader: yaml.SafeLoader, node: yaml.Node):
         rel = loader.construct_scalar(node)
         path = (loader._base_dir / rel).resolve()
+        allow_outside = os.environ.get("INVARLOCK_ALLOW_CONFIG_INCLUDE_OUTSIDE", "")
+        allow_outside = allow_outside.strip().lower() in {"1", "true", "yes", "on"}
+        if not allow_outside:
+            try:
+                path.relative_to(loader._base_dir)
+            except ValueError as exc:
+                raise ValueError(
+                    "Config !include must stay within the config directory. "
+                    "Set INVARLOCK_ALLOW_CONFIG_INCLUDE_OUTSIDE=1 to override."
+                ) from exc
         with path.open(encoding="utf-8") as fh:
             inc_loader = _create_loader(path.parent)
             return yaml.load(fh, Loader=inc_loader)

{invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/cli/determinism.py RENAMED Viewed

@@ -83,9 +83,24 @@ def apply_determinism_preset(
     # CUDA determinism: cuBLAS workspace config.
     if requested == "strict" and dev.startswith("cuda"):
-        os.environ.setdefault("CUBLAS_WORKSPACE_CONFIG", ":16:8")
+        preferred = ":4096:8"
+        fallback = ":16:8"
+        if "CUBLAS_WORKSPACE_CONFIG" not in os.environ:
+            selected = preferred
+            if torch is not None:
+                try:
+                    mem_bytes = int(torch.cuda.get_device_properties(0).total_memory)
+                    if mem_bytes and mem_bytes < 8 * 1024**3:
+                        selected = fallback
+                except Exception:
+                    selected = preferred
+            os.environ["CUBLAS_WORKSPACE_CONFIG"] = selected
         env_set["CUBLAS_WORKSPACE_CONFIG"] = os.environ.get("CUBLAS_WORKSPACE_CONFIG")
+    if requested == "strict":
+        os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+        env_set["TOKENIZERS_PARALLELISM"] = os.environ.get("TOKENIZERS_PARALLELISM")
     # Seed all RNGs (python/numpy/torch) using the existing helper for parity.
     set_seed(int(seed))

{invarlock-0.3.2 → invarlock-0.3.3}/src/invarlock/core/bootstrap.py RENAMED Viewed

@@ -39,6 +39,31 @@ def _ensure_array(samples: Iterable[float]) -> np.ndarray:
     return arr
+def _normalize_weights(weights: Iterable[float] | None, n: int) -> np.ndarray | None:
+    if weights is None:
+        return None
+    arr = np.asarray(list(weights), dtype=float)
+    if arr.ndim != 1 or arr.size != n:
+        return None
+    if not np.all(np.isfinite(arr)):
+        return None
+    if np.any(arr < 0):
+        return None
+    total = float(arr.sum())
+    if total <= 0.0:
+        return None
+    if np.allclose(arr, arr[0]):
+        return None
+    return arr / total
+def _weighted_mean(samples: np.ndarray, weights: np.ndarray) -> float:
+    total = float(weights.sum())
+    if total <= 0.0:
+        return float(np.mean(samples))
+    return float(np.dot(samples, weights) / total)
 def _percentile_interval(stats: np.ndarray, alpha: float) -> tuple[float, float]:
     """Return lower/upper bounds from an array of bootstrap statistics."""
     lower_q = 100.0 * (alpha / 2.0)
@@ -46,6 +71,61 @@ def _percentile_interval(stats: np.ndarray, alpha: float) -> tuple[float, float]
     return float(np.percentile(stats, lower_q)), float(np.percentile(stats, upper_q))
+def _bca_interval_weighted(
+    samples: np.ndarray,
+    *,
+    weights: np.ndarray,
+    replicates: int,
+    alpha: float,
+    rng: np.random.Generator,
+) -> tuple[float, float]:
+    """Compute a BCa interval for the mean under weighted resampling."""
+    n = samples.size
+    if n < 2:
+        stat = _weighted_mean(samples, weights)
+        return float(stat), float(stat)
+    prob = weights / float(weights.sum())
+    stats = np.empty(replicates, dtype=float)
+    for i in range(replicates):
+        idx = rng.choice(n, size=n, replace=True, p=prob)
+        stats[i] = float(np.mean(samples[idx]))
+    stats.sort()
+    stat_hat = _weighted_mean(samples, weights)
+    prop = np.clip((stats < stat_hat).mean(), 1e-6, 1.0 - 1e-6)
+    z0 = Normal.inv_cdf(prop)
+    sum_w = float(weights.sum())
+    sum_wx = float(np.dot(samples, weights))
+    jack = np.empty(n, dtype=float)
+    for i in range(n):
+        w_i = float(weights[i])
+        denom = sum_w - w_i
+        if denom <= 0.0:
+            jack[i] = stat_hat
+        else:
+            jack[i] = (sum_wx - w_i * float(samples[i])) / denom
+    jack_mean = jack.mean()
+    numerator = np.sum((jack_mean - jack) ** 3)
+    denominator = 6.0 * (np.sum((jack_mean - jack) ** 2) ** 1.5)
+    if denominator == 0.0:
+        return _percentile_interval(stats, alpha)
+    acc = numerator / denominator
+    def _adjust_quantile(z_alpha: float) -> float:
+        adj = z0 + (z0 + z_alpha) / max(1.0 - acc * (z0 + z_alpha), 1e-12)
+        return float(Normal.cdf(adj))
+    lower_pct = _adjust_quantile(Normal.inv_cdf(alpha / 2.0))
+    upper_pct = _adjust_quantile(Normal.inv_cdf(1.0 - alpha / 2.0))
+    return float(np.quantile(stats, lower_pct)), float(np.quantile(stats, upper_pct))
 def _bca_interval(
     samples: np.ndarray,
     *,
@@ -104,6 +184,42 @@ def _bca_interval(
     return float(np.quantile(stats, lower_pct)), float(np.quantile(stats, upper_pct))
+def _bootstrap_mean_ci_weighted(
+    samples: np.ndarray,
+    weights: np.ndarray,
+    *,
+    method: str,
+    replicates: int,
+    alpha: float,
+    seed: int,
+) -> tuple[float, float]:
+    if replicates <= 0:
+        raise ValueError("replicates must be positive")
+    if not 0.0 < alpha < 1.0:
+        raise ValueError("alpha must be between 0 and 1")
+    rng = np.random.default_rng(seed)
+    if method == "percentile":
+        stats = np.empty(replicates, dtype=float)
+        n = samples.size
+        prob = weights / float(weights.sum())
+        for i in range(replicates):
+            idx = rng.choice(n, size=n, replace=True, p=prob)
+            stats[i] = float(np.mean(samples[idx]))
+        stats.sort()
+        return _percentile_interval(stats, alpha)
+    if method == "bca":
+        return _bca_interval_weighted(
+            samples,
+            weights=weights,
+            replicates=replicates,
+            alpha=alpha,
+            rng=rng,
+        )
+    raise ValueError(f"Unsupported bootstrap method '{method}'")
 def _bootstrap_interval(
     samples: np.ndarray,
     *,
@@ -171,6 +287,7 @@ def compute_logloss_ci(
 def compute_paired_delta_log_ci(
     final_logloss: Iterable[float],
     baseline_logloss: Iterable[float],
+    weights: Iterable[float] | None = None,
     *,
     method: str = "bca",
     replicates: int = 1000,
@@ -180,15 +297,14 @@ def compute_paired_delta_log_ci(
     """
     Compute a confidence interval over the paired mean delta of log-loss.
-    This implementation uses simple mean, which equals the token-weighted mean
-    when all evaluation windows have equal token counts. The runner enforces
-    `seq_len == stride` (non-overlapping windows) and `window_match_fraction == 1.0`
-    (perfect pairing), so the equal-weight simplification applies. See
-    docs/assurance/01-eval-math-proof.md for the full derivation.
+    This implementation uses token-weighted resampling when window weights are
+    provided. When all weights are equal, the weighted bootstrap reduces to the
+    simple mean. See docs/assurance/01-eval-math-proof.md for the derivation.
     Args:
         final_logloss: Iterable of per-window log-loss values after the edit/guard.
         baseline_logloss: Iterable of paired per-window log-loss values (before edit).
+        weights: Optional token counts per window; used for weighted resampling.
     Returns:
         (lo, hi) bounds of Δlog-loss such that ratio CI = exp(bounds).
@@ -199,6 +315,12 @@ def compute_paired_delta_log_ci(
         size = min(final_arr.size, base_arr.size)
         final_arr = final_arr[:size]
         base_arr = base_arr[:size]
+    weight_arr = None
+    if weights is not None:
+        weight_list = list(weights)
+        if len(weight_list) >= final_arr.size:
+            weight_list = weight_list[: final_arr.size]
+        weight_arr = _normalize_weights(weight_list, final_arr.size)
     if final_arr.size == 0:
         return 0.0, 0.0
@@ -207,6 +329,16 @@ def compute_paired_delta_log_ci(
         mean_delta = float(delta.mean())
         return mean_delta, mean_delta
+    if weight_arr is not None:
+        return _bootstrap_mean_ci_weighted(
+            delta,
+            weight_arr,
+            method=method,
+            replicates=replicates,
+            alpha=alpha,
+            seed=seed,
+        )
     def stat_fn(data: np.ndarray) -> float:
         return float(np.mean(data))

invarlock 0.3.2__tar.gz → 0.3.3__tar.gz

invarlock 0.3.2tar.gz → 0.3.3tar.gz