invarlock 0.3.4__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. {invarlock-0.3.4/src/invarlock.egg-info → invarlock-0.3.5}/PKG-INFO +2 -2
  2. {invarlock-0.3.4 → invarlock-0.3.5}/README.md +1 -1
  3. {invarlock-0.3.4 → invarlock-0.3.5}/pyproject.toml +1 -1
  4. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/__init__.py +1 -1
  5. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/certify.py +1 -1
  6. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/data.py +59 -255
  7. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/metrics.py +130 -3
  8. {invarlock-0.3.4 → invarlock-0.3.5/src/invarlock.egg-info}/PKG-INFO +2 -2
  9. {invarlock-0.3.4 → invarlock-0.3.5}/LICENSE +0 -0
  10. {invarlock-0.3.4 → invarlock-0.3.5}/MANIFEST.in +0 -0
  11. {invarlock-0.3.4 → invarlock-0.3.5}/setup.cfg +0 -0
  12. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/__main__.py +0 -0
  13. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/_data/runtime/profiles/ci_cpu.yaml +0 -0
  14. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/_data/runtime/profiles/release.yaml +0 -0
  15. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/_data/runtime/tiers.yaml +0 -0
  16. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/__init__.py +0 -0
  17. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/_capabilities.py +0 -0
  18. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/auto.py +0 -0
  19. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/base.py +0 -0
  20. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/base_types.py +0 -0
  21. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/capabilities.py +0 -0
  22. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/hf_bert.py +0 -0
  23. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/hf_gpt2.py +0 -0
  24. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/hf_llama.py +0 -0
  25. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/hf_loading.py +0 -0
  26. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/hf_mixin.py +0 -0
  27. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/hf_onnx.py +0 -0
  28. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/hf_t5.py +0 -0
  29. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/adapters/py.typed +0 -0
  30. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/assurance/__init__.py +0 -0
  31. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/calibration/__init__.py +0 -0
  32. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/calibration/spectral_null.py +0 -0
  33. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/calibration/variance_ve.py +0 -0
  34. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/__init__.py +0 -0
  35. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/__main__.py +0 -0
  36. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/_evidence.py +0 -0
  37. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/_json.py +0 -0
  38. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/adapter_auto.py +0 -0
  39. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/app.py +0 -0
  40. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/__init__.py +0 -0
  41. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/calibrate.py +0 -0
  42. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/doctor.py +0 -0
  43. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/explain_gates.py +0 -0
  44. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/export_html.py +0 -0
  45. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/plugins.py +0 -0
  46. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/report.py +0 -0
  47. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/run.py +0 -0
  48. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/commands/verify.py +0 -0
  49. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/config.py +0 -0
  50. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/constants.py +0 -0
  51. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/determinism.py +0 -0
  52. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/device.py +0 -0
  53. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/doctor_helpers.py +0 -0
  54. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/errors.py +0 -0
  55. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/overhead_utils.py +0 -0
  56. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/provenance.py +0 -0
  57. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/cli/utils.py +0 -0
  58. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/config.py +0 -0
  59. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/__init__.py +0 -0
  60. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/abi.py +0 -0
  61. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/api.py +0 -0
  62. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/auto_tuning.py +0 -0
  63. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/bootstrap.py +0 -0
  64. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/checkpoint.py +0 -0
  65. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/contracts.py +0 -0
  66. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/error_utils.py +0 -0
  67. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/events.py +0 -0
  68. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/exceptions.py +0 -0
  69. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/registry.py +0 -0
  70. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/retry.py +0 -0
  71. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/runner.py +0 -0
  72. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/core/types.py +0 -0
  73. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/edits/__init__.py +0 -0
  74. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/edits/_edit_utils.py +0 -0
  75. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/edits/_external_utils.py +0 -0
  76. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/edits/noop.py +0 -0
  77. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/edits/py.typed +0 -0
  78. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/edits/quant_rtn.py +0 -0
  79. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/edits/registry.py +0 -0
  80. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/__init__.py +0 -0
  81. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/bench.py +0 -0
  82. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/bench_regression.py +0 -0
  83. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/bootstrap.py +0 -0
  84. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/primary_metric.py +0 -0
  85. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/probes/__init__.py +0 -0
  86. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/probes/fft.py +0 -0
  87. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/probes/mi.py +0 -0
  88. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/probes/post_attention.py +0 -0
  89. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/providers/base.py +0 -0
  90. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/providers/seq2seq.py +0 -0
  91. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/providers/text_lm.py +0 -0
  92. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/providers/vision_text.py +0 -0
  93. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/eval/py.typed +0 -0
  94. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/__init__.py +0 -0
  95. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/_contracts.py +0 -0
  96. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/invariants.py +0 -0
  97. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/policies.py +0 -0
  98. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/py.typed +0 -0
  99. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/rmt.py +0 -0
  100. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/spectral.py +0 -0
  101. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/tier_config.py +0 -0
  102. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards/variance.py +0 -0
  103. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards_ref/__init__.py +0 -0
  104. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards_ref/rmt_ref.py +0 -0
  105. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards_ref/spectral_ref.py +0 -0
  106. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/guards_ref/variance_ref.py +0 -0
  107. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/model_profile.py +0 -0
  108. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/model_utils.py +0 -0
  109. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/__init__.py +0 -0
  110. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/alerting.py +0 -0
  111. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/core.py +0 -0
  112. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/exporters.py +0 -0
  113. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/health.py +0 -0
  114. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/metrics.py +0 -0
  115. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/py.typed +0 -0
  116. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/observability/utils.py +0 -0
  117. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/plugins/__init__.py +0 -0
  118. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/plugins/hello_guard.py +0 -0
  119. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/plugins/hf_awq_adapter.py +0 -0
  120. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/plugins/hf_bnb_adapter.py +0 -0
  121. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/plugins/hf_gptq_adapter.py +0 -0
  122. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/plugins/py.typed +0 -0
  123. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/py.typed +0 -0
  124. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/__init__.py +0 -0
  125. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/certificate.py +0 -0
  126. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/certificate_schema.py +0 -0
  127. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/dataset_hashing.py +0 -0
  128. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/guards_analysis.py +0 -0
  129. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/html.py +0 -0
  130. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/normalizer.py +0 -0
  131. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/policy_utils.py +0 -0
  132. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/primary_metric_utils.py +0 -0
  133. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/render.py +0 -0
  134. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/report.py +0 -0
  135. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/report_types.py +0 -0
  136. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/utils.py +0 -0
  137. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/reporting/validate.py +0 -0
  138. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/security.py +0 -0
  139. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/sparsity_utils.py +0 -0
  140. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/utils/__init__.py +0 -0
  141. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock/utils/digest.py +0 -0
  142. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock.egg-info/SOURCES.txt +0 -0
  143. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock.egg-info/dependency_links.txt +0 -0
  144. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock.egg-info/entry_points.txt +0 -0
  145. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock.egg-info/requires.txt +0 -0
  146. {invarlock-0.3.4 → invarlock-0.3.5}/src/invarlock.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: invarlock
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
5
5
  Author-email: InvarLock Team <oss@invarlock.dev>
6
6
  Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
@@ -112,7 +112,7 @@ they don’t, roll back safely.
112
112
  Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
113
113
  variance) producing a machine‑readable Safety Certificate.
114
114
 
115
- > **Status:** 0.3.4 (pre‑1.0). Until 1.0, **minor** releases may be
115
+ > **Status:** 0.3.5 (pre‑1.0). Until 1.0, **minor** releases may be
116
116
  > breaking. See CLI help and the CHANGELOG for updates.
117
117
 
118
118
  [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
@@ -6,7 +6,7 @@ they don’t, roll back safely.
6
6
  Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
7
7
  variance) producing a machine‑readable Safety Certificate.
8
8
 
9
- > **Status:** 0.3.4 (pre‑1.0). Until 1.0, **minor** releases may be
9
+ > **Status:** 0.3.5 (pre‑1.0). Until 1.0, **minor** releases may be
10
10
  > breaking. See CLI help and the CHANGELOG for updates.
11
11
 
12
12
  [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "invarlock"
7
- version = "0.3.4"
7
+ version = "0.3.5"
8
8
  description = "Edit‑agnostic robustness certificates for weight edits (InvarLock framework)"
9
9
  authors = [{ name = "InvarLock Team", email = "oss@invarlock.dev" }]
10
10
  maintainers = [{ name = "InvarLock Maintainers", email = "support@invarlock.dev" }]
@@ -12,7 +12,7 @@ For torch-dependent functionality, see subpackages under `invarlock.*`:
12
12
  - `invarlock.eval`: Metrics, guard-overhead checks, and certification
13
13
  """
14
14
 
15
- __version__ = "0.3.4"
15
+ __version__ = "0.3.5"
16
16
 
17
17
  # Core exports - torch-independent
18
18
  from .config import CFG, Defaults, get_default_config
@@ -378,7 +378,7 @@ def certify_command(
378
378
  f"Context: device={device}, adapter={adapter_name}, edit={edit_name}. "
379
379
  "Baseline ok; edited failed to compute ppl. "
380
380
  "Try: use an accelerator (mps/cuda), force float32, reduce max_modules, "
381
- "or lower batch size (INVARLOCK_SCORES_BATCH_SIZE)."
381
+ "or lower the evaluation batch size."
382
382
  ),
383
383
  details={
384
384
  "device": device,
@@ -7,7 +7,6 @@ Pluggable data loading system with deterministic windowing for reproducible eval
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- import atexit
11
10
  import hashlib
12
11
  import json
13
12
  import math
@@ -51,7 +50,6 @@ except ImportError:
51
50
 
52
51
  try:
53
52
  import torch
54
- import torch.nn.functional as F
55
53
 
56
54
  HAS_TORCH = True
57
55
  except ImportError:
@@ -160,9 +158,9 @@ class WikiText2Provider:
160
158
  """
161
159
 
162
160
  name = "wikitext2"
163
- _MODEL_CACHE: Any | None | bool = None
164
- _MODEL_DEVICE: Any | None = None
165
- _CLEANUP_REGISTERED: bool = False
161
+ _BYTE_NGRAM_ORDER = 4
162
+ _BYTE_NGRAM_PAD = 256
163
+ _BYTE_NGRAM_ALPHA = 1.0
166
164
 
167
165
  def __init__(
168
166
  self,
@@ -178,13 +176,9 @@ class WikiText2Provider:
178
176
  """
179
177
  self.cache_dir = cache_dir
180
178
  self._validate_dependencies()
181
- self._register_cleanup()
182
- self._difficulty_model = self.__class__._MODEL_CACHE
183
- self._difficulty_device = self.__class__._MODEL_DEVICE
184
179
  self._last_stratification_stats: dict[str, Any] | None = None
185
180
  self._last_batch_size_used: int = 0
186
181
  self._last_scorer_profile: dict[str, Any] | None = None
187
- self._scorer_warmed: bool = False
188
182
  # In-process cache for loaded/filtered texts to avoid repeated
189
183
  # load_dataset() calls across stratification retries.
190
184
  self._texts_cache: dict[str, list[str]] = {}
@@ -192,43 +186,6 @@ class WikiText2Provider:
192
186
  normalized_hint = (device_hint or "").strip().lower()
193
187
  self._device_hint: str | None = normalized_hint or None
194
188
 
195
- @classmethod
196
- def _register_cleanup(cls) -> None:
197
- """Register an atexit hook once per process to release cached models."""
198
- if cls._CLEANUP_REGISTERED or not HAS_TORCH:
199
- return
200
-
201
- def _cleanup() -> None:
202
- cls._cleanup_model_cache()
203
-
204
- atexit.register(_cleanup)
205
- cls._CLEANUP_REGISTERED = True
206
-
207
- @classmethod
208
- def _cleanup_model_cache(cls) -> None:
209
- """Release cached models to avoid leaking multiprocessing semaphores."""
210
- cache = cls._MODEL_CACHE
211
- if cache is not None and cache is not False and HAS_TORCH:
212
- try:
213
- cache.to("cpu")
214
- except Exception:
215
- pass
216
- cls._MODEL_CACHE = None
217
- cls._MODEL_DEVICE = None
218
-
219
- @staticmethod
220
- def _pick_default_scorer_device() -> torch.device:
221
- """
222
- Choose a default device for the difficulty scorer model.
223
-
224
- Prefers CUDA → MPS → CPU when available.
225
- """
226
- if torch.cuda.is_available():
227
- return torch.device("cuda")
228
- if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
229
- return torch.device("mps")
230
- return torch.device("cpu")
231
-
232
189
  def _validate_dependencies(self) -> None:
233
190
  """Check that required dependencies are available."""
234
191
  if not HAS_DATASETS:
@@ -513,9 +470,11 @@ class WikiText2Provider:
513
470
  candidates.append(
514
471
  {
515
472
  "dataset_index": idx,
473
+ "text": texts[idx],
516
474
  "input_ids": input_ids_list,
517
475
  "attention_mask": attention_mask_list,
518
476
  "token_count": real_tokens,
477
+ "seq_len": len(input_ids_list),
519
478
  }
520
479
  )
521
480
 
@@ -531,32 +490,7 @@ class WikiText2Provider:
531
490
  details={"needed": int(total_required), "got": int(len(candidates))},
532
491
  )
533
492
 
534
- if not self._score_candidates_with_model(candidates):
535
- token_counter: Counter[int] = Counter()
536
- for candidate in candidates:
537
- for token_id, mask in zip(
538
- candidate["input_ids"], candidate["attention_mask"], strict=False
539
- ):
540
- if mask:
541
- token_counter[int(token_id)] += 1
542
-
543
- total_tokens = sum(token_counter.values()) or 1
544
- vocab_size = max(len(token_counter), 1)
545
-
546
- for candidate in candidates:
547
- difficulty = 0.0
548
- real_tokens = 0
549
- for token_id, mask in zip(
550
- candidate["input_ids"], candidate["attention_mask"], strict=False
551
- ):
552
- if not mask:
553
- continue
554
- freq = (token_counter[int(token_id)] + 1.0) / (
555
- total_tokens + vocab_size
556
- )
557
- difficulty -= math.log(freq)
558
- real_tokens += 1
559
- candidate["difficulty"] = difficulty / max(real_tokens, 1)
493
+ self._score_candidates_byte_ngram(candidates)
560
494
 
561
495
  sorted_candidates = sorted(
562
496
  candidates, key=lambda item: (item["difficulty"], item["dataset_index"])
@@ -843,193 +777,63 @@ class WikiText2Provider:
843
777
 
844
778
  return results
845
779
 
846
- def _score_candidates_with_model(self, candidates: list[dict[str, Any]]) -> bool:
847
- """Score candidate windows using a pretrained GPT-2 model if available."""
848
- if not HAS_TORCH:
849
- return False
850
-
851
- if self._difficulty_model is False:
852
- return False
853
-
854
- try:
855
- eval_device_override = os.environ.get("INVARLOCK_EVAL_DEVICE")
856
- device_hint = getattr(self, "_device_hint", None)
857
-
858
- def _is_device_usable(device: torch.device) -> bool:
859
- try:
860
- _ = torch.zeros((1, 1), dtype=torch.long, device=device)
861
- return True
862
- except Exception:
863
- return False
864
-
865
- if self._difficulty_model is None:
866
- from transformers import GPT2LMHeadModel
867
-
868
- model = GPT2LMHeadModel.from_pretrained("gpt2")
869
- model.eval()
870
- # Decide initial scorer device: env override → provider hint → heuristic
871
- if eval_device_override:
872
- try:
873
- device = torch.device(eval_device_override)
874
- except Exception:
875
- device = self._pick_default_scorer_device()
876
- elif device_hint and device_hint != "auto":
877
- try:
878
- device = torch.device(device_hint)
879
- except Exception:
880
- device = self._pick_default_scorer_device()
881
- else:
882
- device = self._pick_default_scorer_device()
883
-
884
- if device.type != "cpu" and not _is_device_usable(device):
885
- warnings.warn(
886
- f"Difficulty scorer device {device} unavailable; falling back to CPU",
887
- stacklevel=2,
888
- )
889
- device = torch.device("cpu")
890
-
891
- model.to(device)
892
- self._difficulty_model = model
893
- self._difficulty_device = device
894
- self.__class__._MODEL_CACHE = model
895
- self.__class__._MODEL_DEVICE = device
896
-
897
- assert self._difficulty_model is not None
898
- model = self._difficulty_model
899
- device = self._difficulty_device or torch.device("cpu")
900
-
901
- # If a new override/hint is provided, move the cached model if needed.
902
- desired_device = device
903
- if eval_device_override:
904
- try:
905
- desired_device = torch.device(eval_device_override)
906
- except Exception:
907
- desired_device = device
908
- elif device_hint and device_hint != "auto":
909
- try:
910
- desired_device = torch.device(device_hint)
911
- except Exception:
912
- desired_device = device
913
-
914
- if desired_device != device:
915
- if desired_device.type != "cpu" and not _is_device_usable(
916
- desired_device
917
- ):
918
- warnings.warn(
919
- f"Difficulty scorer device {desired_device} unavailable; keeping {device}",
920
- stacklevel=2,
921
- )
922
- else:
923
- try:
924
- model.to(desired_device)
925
- device = desired_device
926
- self._difficulty_device = desired_device
927
- self.__class__._MODEL_DEVICE = desired_device
928
- except Exception as exc:
929
- warnings.warn(
930
- f"Failed to move GPT-2 difficulty scorer to {desired_device}: {exc}",
931
- stacklevel=2,
932
- )
933
-
934
- if not self._scorer_warmed:
935
- with torch.no_grad():
936
- dummy_input = torch.zeros((1, 8), dtype=torch.long, device=device)
937
- dummy_attention = torch.ones_like(dummy_input)
938
- model(dummy_input, attention_mask=dummy_attention)
939
- self._scorer_warmed = True
940
-
941
- batch_override = os.environ.get("INVARLOCK_SCORES_BATCH_SIZE")
942
- override_size = None
943
- if batch_override:
944
- try:
945
- override_size = max(1, int(batch_override))
946
- except ValueError:
947
- override_size = None
948
-
949
- batch_size = min(32, max(4, len(candidates)))
950
- if override_size is not None:
951
- batch_size = max(1, min(override_size, len(candidates)))
952
-
953
- config = getattr(model, "config", None)
954
- scorer_vocab_size = getattr(config, "vocab_size", None)
955
-
956
- input_batch: list[list[int]] = []
957
- attention_batch: list[list[int]] = []
958
- candidate_batch: list[dict[str, Any]] = []
959
- total_tokens = 0
960
- start_time = time.perf_counter()
961
-
962
- with torch.no_grad():
963
- for candidate in candidates:
964
- input_batch.append(candidate["input_ids"])
965
- attention_batch.append(candidate["attention_mask"])
966
- candidate_batch.append(candidate)
967
-
968
- if len(input_batch) == batch_size or candidate is candidates[-1]:
969
- input_tensor = torch.tensor(
970
- input_batch, dtype=torch.long, device=device
971
- )
972
- attention_tensor = torch.tensor(
973
- attention_batch, dtype=torch.long, device=device
974
- )
975
-
976
- # Guard against out-of-range token IDs when scoring with GPT-2.
977
- # Some model tokenizers emit IDs beyond GPT-2 vocab, which can
978
- # trigger device-side asserts in embedding/gather kernels.
979
- if scorer_vocab_size and scorer_vocab_size > 0:
980
- input_tensor = input_tensor.clamp(
981
- min=0, max=scorer_vocab_size - 1
982
- )
983
-
984
- outputs = model(input_tensor, attention_mask=attention_tensor)
985
- shift_logits = outputs.logits[:, :-1, :].contiguous()
986
- shift_labels = input_tensor[:, 1:].contiguous()
987
- shift_mask = attention_tensor[:, 1:].contiguous()
988
- shift_labels = shift_labels.masked_fill(shift_mask == 0, 0)
989
-
990
- vocab_size = shift_logits.size(-1)
991
- losses = F.cross_entropy(
992
- shift_logits.view(-1, vocab_size),
993
- shift_labels.view(-1),
994
- reduction="none",
995
- )
996
- losses = losses.view(shift_labels.size()) * shift_mask
997
- token_counts = shift_mask.sum(dim=1).clamp(min=1)
998
- loss_per_example = (
999
- (losses.sum(dim=1) / token_counts).cpu().tolist()
1000
- )
1001
-
1002
- for cand_obj, loss_value in zip(
1003
- candidate_batch, loss_per_example, strict=False
1004
- ):
1005
- cand_obj["difficulty"] = float(loss_value)
1006
- total_tokens += int(token_counts.sum().item())
1007
-
1008
- input_batch.clear()
1009
- attention_batch.clear()
1010
- candidate_batch.clear()
1011
- self._last_batch_size_used = batch_size
1012
- elapsed = max(time.perf_counter() - start_time, 1e-9)
1013
- tokens_per_sec = total_tokens / elapsed if total_tokens else 0.0
1014
- self._last_scorer_profile = {
1015
- "batch_size": batch_size,
1016
- "tokens_processed": total_tokens,
1017
- "elapsed_seconds": elapsed,
1018
- "tokens_per_second": tokens_per_sec,
1019
- }
1020
- return True
1021
- except Exception as exc: # pragma: no cover - defensive
1022
- warnings.warn(
1023
- f"Failed to compute GPT-2 difficulty scores: {exc}", stacklevel=2
1024
- )
1025
- self._difficulty_model = False
1026
- self._difficulty_device = None
1027
- self.__class__._MODEL_CACHE = False
1028
- self.__class__._MODEL_DEVICE = None
780
+ def _score_candidates_byte_ngram(self, candidates: list[dict[str, Any]]) -> bool:
781
+ if not candidates:
1029
782
  self._last_batch_size_used = 0
1030
783
  self._last_scorer_profile = None
1031
784
  return False
1032
785
 
786
+ order = max(1, int(self._BYTE_NGRAM_ORDER))
787
+ pad_token = int(self._BYTE_NGRAM_PAD)
788
+ alpha = float(self._BYTE_NGRAM_ALPHA)
789
+ vocab_size = pad_token + 1
790
+
791
+ context_counts: Counter[tuple[int, ...]] = Counter()
792
+ ngram_counts: Counter[tuple[int, ...]] = Counter()
793
+ sequences: list[list[int]] = []
794
+ start_time = time.perf_counter()
795
+
796
+ for candidate in candidates:
797
+ text = candidate.get("text")
798
+ if not isinstance(text, str):
799
+ text = ""
800
+ byte_values = list(text.encode("utf-8", errors="replace"))
801
+ tokens = ([pad_token] * (order - 1)) + byte_values
802
+ sequences.append(tokens)
803
+ for idx in range(order - 1, len(tokens)):
804
+ context = tuple(tokens[idx - order + 1 : idx])
805
+ ngram = context + (tokens[idx],)
806
+ context_counts[context] += 1
807
+ ngram_counts[ngram] += 1
808
+
809
+ total_tokens = 0
810
+ for candidate, tokens in zip(candidates, sequences, strict=False):
811
+ loss_sum = 0.0
812
+ token_count = 0
813
+ for idx in range(order - 1, len(tokens)):
814
+ context = tuple(tokens[idx - order + 1 : idx])
815
+ ngram = context + (tokens[idx],)
816
+ context_count = context_counts.get(context, 0)
817
+ ngram_count = ngram_counts.get(ngram, 0)
818
+ prob = (ngram_count + alpha) / (context_count + alpha * vocab_size)
819
+ loss_sum += -math.log(prob)
820
+ token_count += 1
821
+ candidate["difficulty"] = loss_sum / max(token_count, 1)
822
+ total_tokens += token_count
823
+
824
+ self._last_batch_size_used = len(candidates)
825
+ elapsed = max(time.perf_counter() - start_time, 1e-9)
826
+ tokens_per_sec = total_tokens / elapsed if total_tokens else 0.0
827
+ self._last_scorer_profile = {
828
+ "mode": "byte_ngram",
829
+ "order": order,
830
+ "vocab_size": vocab_size,
831
+ "tokens_processed": total_tokens,
832
+ "elapsed_seconds": elapsed,
833
+ "tokens_per_second": tokens_per_sec,
834
+ }
835
+ return True
836
+
1033
837
  def _tokenize_samples(
1034
838
  self,
1035
839
  texts: list[str],
@@ -1379,6 +1379,88 @@ def _resolve_eval_device(
1379
1379
  return resolved
1380
1380
 
1381
1381
 
1382
+ def _infer_model_vocab_size(model: nn.Module) -> int | None:
1383
+ """Best-effort vocab size for guarding against invalid token IDs.
1384
+
1385
+ Prefer the actual embedding size (more reliable than config.vocab_size when
1386
+ tokenizers have added tokens), and fall back to config when embeddings are
1387
+ unavailable (e.g., stub models in tests).
1388
+ """
1389
+ try:
1390
+ get_emb = getattr(model, "get_input_embeddings", None)
1391
+ if callable(get_emb):
1392
+ emb = get_emb()
1393
+ weight = getattr(emb, "weight", None)
1394
+ if weight is not None and hasattr(weight, "shape"):
1395
+ size = int(weight.shape[0])
1396
+ if size > 0:
1397
+ return size
1398
+ except Exception:
1399
+ pass
1400
+
1401
+ # Fallback for lightweight/stub models: pick the largest nn.Embedding module.
1402
+ # This is not guaranteed to be the token embedding, but is a good heuristic
1403
+ # when get_input_embeddings/config.vocab_size are unavailable.
1404
+ try:
1405
+ max_embeddings = 0
1406
+ for module in model.modules():
1407
+ if isinstance(module, nn.Embedding):
1408
+ max_embeddings = max(max_embeddings, int(module.num_embeddings))
1409
+ if max_embeddings > 0:
1410
+ return max_embeddings
1411
+ except Exception:
1412
+ pass
1413
+
1414
+ config = getattr(model, "config", None)
1415
+ vocab_size = getattr(config, "vocab_size", None)
1416
+ if isinstance(vocab_size, int) and vocab_size > 0:
1417
+ return vocab_size
1418
+ return None
1419
+
1420
+
1421
+ def _resolve_pad_token_id(model: nn.Module, vocab_size: int | None) -> int:
1422
+ """Pick a safe pad token id for sanitizing invalid token IDs."""
1423
+ config = getattr(model, "config", None)
1424
+ pad_token_id = getattr(config, "pad_token_id", None)
1425
+ if isinstance(pad_token_id, int) and pad_token_id >= 0:
1426
+ if vocab_size is None or pad_token_id < vocab_size:
1427
+ return pad_token_id
1428
+ return 0
1429
+
1430
+
1431
+ def _sanitize_token_ids_for_model(
1432
+ input_ids: torch.Tensor,
1433
+ attention_mask: torch.Tensor | None,
1434
+ labels: torch.Tensor | None,
1435
+ *,
1436
+ vocab_size: int,
1437
+ pad_token_id: int,
1438
+ ) -> tuple[torch.Tensor, torch.Tensor | None, torch.Tensor | None]:
1439
+ """Prevent device-side asserts from out-of-range token IDs.
1440
+
1441
+ Out-of-range token IDs can trigger CUDA device-side asserts in embedding and
1442
+ gather kernels, poisoning the CUDA context for the entire process. Instead,
1443
+ mask them out as padding and ignore them in labels.
1444
+ """
1445
+ if vocab_size <= 0:
1446
+ return input_ids, attention_mask, labels
1447
+
1448
+ invalid_inputs = (input_ids < 0) | (input_ids >= vocab_size)
1449
+ if invalid_inputs.any():
1450
+ input_ids = input_ids.masked_fill(invalid_inputs, pad_token_id)
1451
+ if attention_mask is not None:
1452
+ attention_mask = attention_mask.masked_fill(invalid_inputs, 0)
1453
+ if labels is not None:
1454
+ labels = labels.masked_fill(invalid_inputs, -100)
1455
+
1456
+ if labels is not None:
1457
+ invalid_labels = (labels != -100) & ((labels < 0) | (labels >= vocab_size))
1458
+ if invalid_labels.any():
1459
+ labels = labels.masked_fill(invalid_labels, -100)
1460
+
1461
+ return input_ids, attention_mask, labels
1462
+
1463
+
1382
1464
  # ── Perplexity calculation ─────────────────────────────────────────────────
1383
1465
  @torch.no_grad()
1384
1466
  def calculate_perplexity(
@@ -1415,6 +1497,8 @@ def compute_perplexity_strict(
1415
1497
  device = _resolve_eval_device(model, device)
1416
1498
 
1417
1499
  model.eval()
1500
+ model_vocab_size = _infer_model_vocab_size(model)
1501
+ pad_token_id = _resolve_pad_token_id(model, model_vocab_size)
1418
1502
  nll_sum = 0.0
1419
1503
  tok_count = 0
1420
1504
 
@@ -1453,6 +1537,15 @@ def compute_perplexity_strict(
1453
1537
  else:
1454
1538
  labels = labels.to(device)
1455
1539
 
1540
+ if model_vocab_size is not None:
1541
+ input_ids, attn, labels = _sanitize_token_ids_for_model(
1542
+ input_ids,
1543
+ attn,
1544
+ labels,
1545
+ vocab_size=model_vocab_size,
1546
+ pad_token_id=pad_token_id,
1547
+ )
1548
+
1456
1549
  # Skip if sequence too short
1457
1550
  if input_ids.size(1) < 2:
1458
1551
  continue
@@ -1507,7 +1600,11 @@ def compute_perplexity_strict(
1507
1600
  continue
1508
1601
 
1509
1602
  log_probs = shift_logits.log_softmax(dim=-1) # [B,T-1,V]
1510
- tgt = shift_labels.clamp_min(0).unsqueeze(-1) # [B,T-1,1]
1603
+ vocab_size = int(shift_logits.size(-1))
1604
+ valid = valid & (shift_labels >= 0) & (shift_labels < vocab_size)
1605
+ if not valid.any():
1606
+ continue
1607
+ tgt = shift_labels.clamp(min=0, max=vocab_size - 1).unsqueeze(-1) # [B,T-1,1]
1511
1608
  nll = -log_probs.gather(-1, tgt).squeeze(-1) # [B,T-1]
1512
1609
 
1513
1610
  nll_sum += nll[valid].sum().item()
@@ -1552,6 +1649,8 @@ def compute_perplexity(
1552
1649
  device = _resolve_eval_device(model, device)
1553
1650
 
1554
1651
  model.eval()
1652
+ model_vocab_size = _infer_model_vocab_size(model)
1653
+ pad_token_id = _resolve_pad_token_id(model, model_vocab_size)
1555
1654
  nll_sum = 0.0
1556
1655
  tok_count = 0
1557
1656
  batch_count = 0
@@ -1589,6 +1688,15 @@ def compute_perplexity(
1589
1688
  else:
1590
1689
  labels = labels.to(device)
1591
1690
 
1691
+ if model_vocab_size is not None:
1692
+ input_ids, attn, labels = _sanitize_token_ids_for_model(
1693
+ input_ids,
1694
+ attn,
1695
+ labels,
1696
+ vocab_size=model_vocab_size,
1697
+ pad_token_id=pad_token_id,
1698
+ )
1699
+
1592
1700
  # Skip if sequence too short
1593
1701
  if input_ids.size(1) < 2:
1594
1702
  continue
@@ -1620,7 +1728,11 @@ def compute_perplexity(
1620
1728
 
1621
1729
  # Compute negative log-likelihood
1622
1730
  log_probs = shift_logits.log_softmax(dim=-1) # [B,T-1,V]
1623
- tgt = shift_labels.clamp_min(0).unsqueeze(-1) # [B,T-1,1]
1731
+ vocab_size = int(shift_logits.size(-1))
1732
+ valid = valid & (shift_labels >= 0) & (shift_labels < vocab_size)
1733
+ if not valid.any():
1734
+ continue
1735
+ tgt = shift_labels.clamp(min=0, max=vocab_size - 1).unsqueeze(-1) # [B,T-1,1]
1624
1736
 
1625
1737
  # MPS workaround: gather operation can fail on MPS, use CPU fallback
1626
1738
  if str(device).startswith("mps"):
@@ -1694,6 +1806,8 @@ def compute_ppl(
1694
1806
  device = _resolve_eval_device(model, device)
1695
1807
 
1696
1808
  model.eval()
1809
+ model_vocab_size = _infer_model_vocab_size(model)
1810
+ pad_token_id = _resolve_pad_token_id(model, model_vocab_size)
1697
1811
  nll_sum = 0.0
1698
1812
  tok_count = 0
1699
1813
 
@@ -1712,6 +1826,15 @@ def compute_ppl(
1712
1826
  torch.tensor(attention_mask, dtype=torch.long).unsqueeze(0).to(device)
1713
1827
  )
1714
1828
 
1829
+ if model_vocab_size is not None:
1830
+ input_ids_tensor, attention_mask_tensor, _ = _sanitize_token_ids_for_model(
1831
+ input_ids_tensor,
1832
+ attention_mask_tensor,
1833
+ labels=None,
1834
+ vocab_size=model_vocab_size,
1835
+ pad_token_id=pad_token_id,
1836
+ )
1837
+
1715
1838
  # Skip sequences that are too short
1716
1839
  if input_ids_tensor.size(1) < 2:
1717
1840
  continue
@@ -1747,7 +1870,11 @@ def compute_ppl(
1747
1870
 
1748
1871
  # Compute negative log-likelihood
1749
1872
  log_probs = shift_logits.log_softmax(dim=-1) # [B,T-1,V]
1750
- tgt = shift_labels.clamp_min(0).unsqueeze(-1) # [B,T-1,1]
1873
+ vocab_size = int(shift_logits.size(-1))
1874
+ valid = valid & (shift_labels >= 0) & (shift_labels < vocab_size)
1875
+ if not valid.any():
1876
+ continue
1877
+ tgt = shift_labels.clamp(min=0, max=vocab_size - 1).unsqueeze(-1) # [B,T-1,1]
1751
1878
 
1752
1879
  # Handle MPS device issues with gather
1753
1880
  if str(device).startswith("mps"):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: invarlock
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Summary: Edit‑agnostic robustness certificates for weight edits (InvarLock framework)
5
5
  Author-email: InvarLock Team <oss@invarlock.dev>
6
6
  Maintainer-email: InvarLock Maintainers <support@invarlock.dev>
@@ -112,7 +112,7 @@ they don’t, roll back safely.
112
112
  Technical: edit‑agnostic guard pipeline (invariants → spectral → RMT →
113
113
  variance) producing a machine‑readable Safety Certificate.
114
114
 
115
- > **Status:** 0.3.4 (pre‑1.0). Until 1.0, **minor** releases may be
115
+ > **Status:** 0.3.5 (pre‑1.0). Until 1.0, **minor** releases may be
116
116
  > breaking. See CLI help and the CHANGELOG for updates.
117
117
 
118
118
  [![CI](https://img.shields.io/github/actions/workflow/status/invarlock/invarlock/ci.yml?branch=main&logo=github&label=CI)](https://github.com/invarlock/invarlock/actions/workflows/ci.yml)
File without changes
File without changes
File without changes