nervecode 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. nervecode-0.1.0/.gitignore +10 -0
  2. nervecode-0.1.0/CHANGELOG.md +181 -0
  3. nervecode-0.1.0/LICENSE +22 -0
  4. nervecode-0.1.0/PKG-INFO +83 -0
  5. nervecode-0.1.0/README.md +135 -0
  6. nervecode-0.1.0/benchmarks/README.md +17 -0
  7. nervecode-0.1.0/benchmarks/__init__.py +4 -0
  8. nervecode-0.1.0/benchmarks/ood/__init__.py +8 -0
  9. nervecode-0.1.0/benchmarks/ood/cifar_resnet18.py +1332 -0
  10. nervecode-0.1.0/benchmarks/ood/mnist_cnn.py +458 -0
  11. nervecode-0.1.0/benchmarks/ood/simple.py +331 -0
  12. nervecode-0.1.0/benchmarks/overhead/__init__.py +4 -0
  13. nervecode-0.1.0/benchmarks/overhead/conv_overhead.py +255 -0
  14. nervecode-0.1.0/benchmarks/overhead/overhead.py +233 -0
  15. nervecode-0.1.0/benchmarks/scaling/__init__.py +13 -0
  16. nervecode-0.1.0/benchmarks/scaling/study.py +418 -0
  17. nervecode-0.1.0/docs/README.md +34 -0
  18. nervecode-0.1.0/docs/api.md +290 -0
  19. nervecode-0.1.0/docs/architecture.md +60 -0
  20. nervecode-0.1.0/docs/benchmarks/cifar10-ood-template.md +41 -0
  21. nervecode-0.1.0/docs/benchmarks/intro-20260402-105142.md +46 -0
  22. nervecode-0.1.0/docs/benchmarks/intro-20260402-235707.md +50 -0
  23. nervecode-0.1.0/docs/benchmarks/intro-20260403-102339.md +57 -0
  24. nervecode-0.1.0/docs/benchmarks/ood-synthetic-20260325-145820.md +16 -0
  25. nervecode-0.1.0/docs/benchmarks/ood-synthetic-20260325-153136.md +16 -0
  26. nervecode-0.1.0/docs/benchmarks/ood-synthetic-20260325-170515.md +16 -0
  27. nervecode-0.1.0/docs/benchmarks.md +158 -0
  28. nervecode-0.1.0/docs/diagnostics.md +116 -0
  29. nervecode-0.1.0/docs/overhead.md +52 -0
  30. nervecode-0.1.0/docs/quickstart.md +152 -0
  31. nervecode-0.1.0/docs/scaling.md +99 -0
  32. nervecode-0.1.0/examples/README.md +6 -0
  33. nervecode-0.1.0/examples/ood_smoke_test.py +88 -0
  34. nervecode-0.1.0/examples/quickstart_cnn.py +75 -0
  35. nervecode-0.1.0/examples/quickstart_mlp.py +210 -0
  36. nervecode-0.1.0/nervecode/__init__.py +415 -0
  37. nervecode-0.1.0/nervecode/_version.py +10 -0
  38. nervecode-0.1.0/nervecode/core/__init__.py +19 -0
  39. nervecode-0.1.0/nervecode/core/assignment.py +165 -0
  40. nervecode-0.1.0/nervecode/core/codebook.py +182 -0
  41. nervecode-0.1.0/nervecode/core/shapes.py +107 -0
  42. nervecode-0.1.0/nervecode/core/temperature.py +227 -0
  43. nervecode-0.1.0/nervecode/core/trace.py +166 -0
  44. nervecode-0.1.0/nervecode/core/types.py +116 -0
  45. nervecode-0.1.0/nervecode/integration/__init__.py +9 -0
  46. nervecode-0.1.0/nervecode/layers/__init__.py +15 -0
  47. nervecode-0.1.0/nervecode/layers/base.py +333 -0
  48. nervecode-0.1.0/nervecode/layers/conv.py +174 -0
  49. nervecode-0.1.0/nervecode/layers/linear.py +176 -0
  50. nervecode-0.1.0/nervecode/layers/reducers.py +80 -0
  51. nervecode-0.1.0/nervecode/layers/wrap.py +223 -0
  52. nervecode-0.1.0/nervecode/scoring/__init__.py +20 -0
  53. nervecode-0.1.0/nervecode/scoring/aggregator.py +369 -0
  54. nervecode-0.1.0/nervecode/scoring/calibrator.py +396 -0
  55. nervecode-0.1.0/nervecode/scoring/types.py +33 -0
  56. nervecode-0.1.0/nervecode/training/__init__.py +25 -0
  57. nervecode-0.1.0/nervecode/training/diagnostics.py +194 -0
  58. nervecode-0.1.0/nervecode/training/loss.py +188 -0
  59. nervecode-0.1.0/nervecode/training/updaters.py +168 -0
  60. nervecode-0.1.0/nervecode/utils/__init__.py +14 -0
  61. nervecode-0.1.0/nervecode/utils/overhead.py +177 -0
  62. nervecode-0.1.0/nervecode/utils/seed.py +161 -0
  63. nervecode-0.1.0/pyproject.toml +135 -0
  64. nervecode-0.1.0/scripts/__init__.py +6 -0
  65. nervecode-0.1.0/scripts/ablate_grid.py +489 -0
  66. nervecode-0.1.0/scripts/dev_smoke.py +58 -0
  67. nervecode-0.1.0/scripts/experiment_intro.sh +240 -0
  68. nervecode-0.1.0/scripts/run_benchmarks_cpu.sh +285 -0
  69. nervecode-0.1.0/scripts/run_cifar_ood.sh +130 -0
  70. nervecode-0.1.0/scripts/run_cifar_ood_ablate.sh +117 -0
  71. nervecode-0.1.0/scripts/run_cpu_suite.sh +198 -0
  72. nervecode-0.1.0/scripts/run_ood_matrix.sh +223 -0
  73. nervecode-0.1.0/scripts/train_minimal.py +356 -0
  74. nervecode-0.1.0/tests/docs/test_benchmarks_doc.py +26 -0
  75. nervecode-0.1.0/tests/docs/test_quickstart_doc.py +12 -0
  76. nervecode-0.1.0/tests/integration/README.md +3 -0
@@ -0,0 +1,10 @@
1
+ .venv/
2
+ plan.md
3
+ todo.md
4
+ runs/
5
+ scripts/rundev.py
6
+ .claude
7
+ .mypy_cache
8
+ .pytest_cache
9
+ .ruff_cache
10
+ data
@@ -0,0 +1,181 @@
1
+ # Changelog
2
+
3
+
4
+ ## v0.1.0 — 2026-04-07
5
+
6
+ - Initial preview release. Establishes PyTorch-native coding wrappers (Linear/Conv2d),
7
+ layerwise surprise aggregation (mean/max/weighted), calibration utilities,
8
+ a `WrappedModel` container, training utilities, benchmarks (CIFAR-10 OOD, MNIST OOD),
9
+ and a CPU-friendly experiment suite and scripts. Includes CI (lint/typecheck/tests),
10
+ docs scaffolding, and example scripts.
11
+
12
+ - Wrappers and model container: Disabling coding now clears any cached
13
+ per-layer traces immediately (fail-open) so that model-level behavior (e.g.,
14
+ `WrappedModel.surprise()`) reflects the disabled state without requiring an
15
+ extra forward pass.
16
+
17
+ - API stability: Locked the minimal public surface via explicit __all__ in
18
+ packages and added tests (including star-import check) to prevent accidental
19
+ API creep until the MVP is validated on real examples/benchmarks.
20
+
21
+ - Tests: Separated fast correctness tests from heavy runs by introducing
22
+ `benchmark` and `slow` markers and deselecting them by default; CI runs only
23
+ the fast suite to keep pipelines reliable and quick. See CONTRIBUTING and
24
+ `docs/benchmarks.md` for how to include them locally.
25
+
26
+ - Docs: Added `docs/benchmarks.md` template for experiment/benchmark reports to
27
+ standardize documentation and avoid one‑off notebooks.
28
+
29
+ - Docs: Added `docs/scaling.md` covering selected-layer instrumentation, reduced
30
+ coding spaces, and practical tradeoffs between expressivity and overhead.
31
+
32
+ - Layers: Added experimental convolutional reducers in `nervecode.layers.reducers`.
33
+ Includes a token-like spatial view (BHWC) for Conv2d outputs and a global
34
+ max-pooling reducer. These are opt-in via the existing `reducer=` parameter on
35
+ `CodingConv2d` and do not change defaults. Added a unit test exercising the
36
+ token-like path.
37
+
38
+ - Training: Added experimental `EmaCodebookUpdater` (off by default) to update
39
+ codebook centers via EMA of batch-weighted means from a `CodingTrace`. Keeps
40
+ gradient-updated baseline unchanged; enables hybrid exploration when needed.
41
+
42
+ - Docs: Added `docs/quickstart.md` explaining the end-to-end user flow and
43
+ mirroring the `examples/quickstart_mlp.py` script.
44
+
45
+ - Benchmarks: Added `benchmarks/scaling/study.py` with `run_scaling_study` and a CLI
46
+ to measure compute (per-iteration time) and memory proxies (parameter and
47
+ activation overhead) as a function of layer width, coding dimension, and the
48
+ number of instrumented layers. Includes a unit test.
49
+
50
+ - Scripts: Added `scripts/ablate_grid.py` to run ablations varying codebook size (K),
51
+ coding dimension (D), temperature (T), and layer selection strategy. Records
52
+ minimal quality (test accuracy) and approximate overhead proxies, and can write
53
+ results to CSV for quick analysis.
54
+
55
+ - Tests: Added deterministic, explainability-focused unit tests for richer
56
+ aggregation modes (fixed weighted and max) on hand-constructed examples to
57
+ ensure stable outputs and clear aggregation metadata.
58
+
59
+ - Tests: Added checkpoint-compatibility tests covering two common flows —
60
+ loading base-model weights before instrumentation and loading wrapped-model
61
+ checkpoints together with calibrator state after calibration.
62
+
63
+ - Scoring: Implemented `weighted_surprise(...)` for fixed weighted aggregation
64
+ across layers with a simple configuration interface (layer-name or index-aligned
65
+ weights; optional normalization). Learnable aggregation is intentionally
66
+ deferred pending evidence of real-world benefit.
67
+
68
+ - Scoring: Added `max_surprise(...)` as a supported alternative to
69
+ `mean_surprise(...)`, returning the same `AggregatedSurprise` result type and
70
+ plugging into existing calibrator flows.
71
+
72
+ - Docs: Added rationale explaining why the first Conv2d wrapper uses pooled
73
+ coding and why richer spatial modes are deferred (see `docs/architecture.md`).
74
+
75
+ - Wrap: `nervecode.layers.wrap.wrap` can now instrument convolutional layers explicitly
76
+ and via the shortcut `layers="all_conv"` (currently wraps `nn.Conv2d` with
77
+ `CodingConv2d`).
78
+
79
+ - Examples: Added `examples/quickstart_cnn.py` demonstrating a small CNN where
80
+ pooled Conv2d traces contribute to the aggregated per-sample surprise.
81
+
82
+ - Tests: Added unit tests verifying that `CodingConv2d` preserves the exact
83
+ forward output while producing valid pooled traces with correct metadata.
84
+
85
+ - Integration: Added end-to-end tests exercising a small CNN with pooled
86
+ convolutional coding — covering training with a joint objective, surprise
87
+ retrieval via `WrappedModel.surprise()`, and empirical-percentile calibration
88
+ with basic ID/OOD separation.
89
+
90
+ - Conv2d pooled coding metadata: pooled representations now record explicit
91
+ spatial-reduction metadata in ``reduction_meta`` (e.g., ``spatial_reduction``
92
+ and ``reduction_axes``) to make it clear that the coding view comes from a
93
+ spatial reduction rather than the raw activation tensor.
94
+
95
+ - Layers: Added `CodingConv2d` with observe-only pooled coding (global average pooling over H×W by default) and set pooled coding as the default sample-level reducer for the first convolutional wrapper.
96
+
97
+ - Layers: Convolutional wrappers now reuse the shared reduction and trace pipeline, so their pooled outputs feed the same assignment, loss, and calibration code paths as linear wrappers.
98
+
99
+ - Docs: Added `CONTRIBUTING.md` and linked the changelog from README to prepare the repo for early external use.
100
+
101
+ - Integration tests: added tests that execute README and API docs example code paths to ensure they remain valid.
102
+
103
+ - Docs: corrected calibrator usage to `threshold_for(...)` in README and API reference examples.
104
+
105
+ - Overhead (Conv2d pooled coding): added a deterministic estimator
106
+ (`nervecode.utils.overhead.estimate_pooled_conv_overhead`) and a timing
107
+ harness (`benchmarks/overhead/conv_overhead.py`). Documented expected
108
+ operating limits and typical compute/memory overhead in `docs/overhead.md`.
109
+
110
+ - README: added installation instructions, an MVP quickstart example, and an explicit scope note (what Nervecode does/does not do).
111
+
112
+ - Added a release smoke test that builds the package, installs the built wheel into a clean venv, and runs the quickstart example (opt-in via RUN_RELEASE_SMOKE=1).
113
+
114
+ - Added `docs/api.md` documenting `wrap()`, `WrappedModel`, `CodingLinear`,
115
+ `CodingLoss`, the calibrator, and the aggregated surprise result.
116
+
117
+ - Added `docs/diagnostics.md` explaining code utilization, entropy, code length,
118
+ commitment distance, empirical percentiles, and threshold-based OOD flags.
119
+
120
+ - Repository structure scaffolded (`nervecode/`, `tests/`, `examples/`, `benchmarks/`, `docs/`, `scripts/`) and package importable from a clean checkout.
121
+ - CodingLoss: add commitment term that reads per-trace nearest-center distances (no recomputation); default weight 0.0 to preserve behavior and tests.
122
+ - Added `pyproject.toml` with core metadata, Python 3.10 requirement, PyTorch dependency, and extras for `dev`, `docs`, `viz`, and `logging`.
123
+ - Configured `ruff`, `mypy`, and `pytest` with a strict-enough baseline to catch shape, typing, and API regressions early.
124
+ - Added `pre-commit` hooks for formatting, linting, type-checking, and a fast smoke test suite.
125
+ - Added a minimal `README.md` explaining the product boundary, first public API shape, and MVP scope.
126
+ - Added top-level API stubs in `nervecode/__init__.py` exposing `wrap` and `WrappedModel`.
127
+ - Created package layout mirroring conceptual architecture: `nervecode/core/`, `nervecode/layers/`, `nervecode/scoring/`, `nervecode/training/`, `nervecode/integration/`, and `nervecode/utils/`.
128
+ - Split tests into `tests/unit/`, `tests/integration/`, and `tests/smoke/`; added a minimal unit test and updated docs.
129
+ - Added CI workflow to run install, ruff lint, mypy type-checks, and fast tests (unit + smoke) on push and pull requests.
130
+ - Added `scripts/dev_smoke.py` to quickly verify imports and placeholder instantiation locally.
131
+ - Added `scripts/train_minimal.py`: a dataset-agnostic training script for quick local validation and CI smoke runs.
132
+ - Added `benchmarks/overhead/overhead.py`: a simple timing benchmark that compares a base MLP against the instrumented model on a fixed workload.
133
+ - Added `docs/architecture.md` summarizing non-negotiable design rules (observe-only wrappers, fail-open, selected-layer instrumentation, explicit trace support, MVP-first scope).
134
+ - Added deterministic utilities: `nervecode.utils.seed` with `seed_everything`, `temp_seed`, and a per-test auto-seeding fixture honoring `NERVECODE_SEED` for reproducible runs.
135
+ - Implemented `nervecode/core/types.py` with a `SoftCode` dataclass whose `probs` tensor supports arbitrary leading dimensions with a final code dimension `(..., K)`.
136
+ - Extended `SoftCode` with optional fields `best_length`, `entropy`, `best_indices`, and `combined_surprise`, and added validation that all scalar-like fields match the leading shape of `probs`.
137
+ - Added `nervecode/core/trace.py` with a `CodingTrace` dataclass carrying reduced activations, reduction metadata, nearest-center distances, chosen center indices, commitment distances, and the associated `SoftCode`.
138
+ - Added `nervecode/core/shapes.py` with `flatten_leading`/`unflatten_leading` helpers to work uniformly across batch-only, token-like, and pooled-convolution layouts.
139
+ - Implemented `nervecode/core/codebook.py` providing a gradient-updated `Codebook` module with centers of shape `(K, code_dim)` and initialization scaled by the coding-space dimension.
140
+ - Codebook now follows PyTorch conventions: `reset_parameters()` without args using stored init strategy, improved `extra_repr()`, and a serialization contract via `get_extra_state`/`set_extra_state`.
141
+ - Packaging: added dynamic versioning via Hatch (single-source `nervecode/_version.py`) and build targets for wheel and sdist with appropriate includes.
142
+ - Marked Phase 1 TODO "Clamp all log and division operations" as NE locally pending the assignment engine implementation in `nervecode/core/`.
143
+ - Added `nervecode/core/temperature.py` with `FixedTemperature` and `CosineTemperature` schedules and a small `TemperatureSchedule` interface, and exported them via `nervecode.core`.
144
+ - Implemented `nervecode/core/assignment.py` with a `SoftAssignment` engine that returns a `SoftCode` and trace-ready intermediates (nearest distances, chosen indices, commitment distances) to avoid recomputing distances in loss.
145
+ - Added unit tests for `SoftCode`, `CodingTrace`, shape helpers, temperature schedules, and the CPU assignment engine; adjusted typing in `temperature.py` to satisfy mypy without a hard torch dependency.
146
+ - Added unit tests that verify gradient flow from the surprise signal back to reduced activations and codebook centers.
147
+ - Added edge-case unit tests for the soft assignment engine covering exact center matches, ties between centers, uniform assignments, large input magnitudes, and autocast mixed precision where supported.
148
+ - Added a synthetic convergence integration test that trains a codebook on a simple 2D Gaussian mixture and verifies it learns at least the expected number of active regions.
149
+ - Implemented `nervecode/layers/base.py` with a shared base class and protocol that standardize bypass behavior, trace caching, reduction setup, and diagnostics hooks.
150
+ - Added `nervecode/layers/linear.py` with `CodingLinear`, the first production wrapper around `nn.Linear` implementing observe-only semantics and trace caching (identity reduction in MVP).
151
+ - CodingLinear now accepts an optional `coding_dim` and uses identity reduction when `out_features == coding_dim`, or a learned linear projection reducer when `out_features > coding_dim`.
152
+ - CodingLinear now exposes an explicit `forward_with_trace(x)` method returning `(y, trace)` while caching the latest `CodingTrace` for convenience via `last_trace`.
153
+ - Verified `CodingLinear` across training/eval modes, mixed precision (autocast + GradScaler), and CPU/CUDA; added targeted unit tests.
154
+ - CodingLinear preserves the original layer output bit-for-bit when coding is disabled or bypassed; added unit tests asserting `torch.equal` under disabled and bypass contexts.
155
+ - Added layer-level diagnostics helpers on wrappers (utilization, mean entropy, mean code length, mean commitment distance) derived from the latest cached trace; added a unit test for None-on-unavailable behavior.
156
+ - Improved `CodingLinear` representation: `extra_repr()` now shows `code_dim`, codebook size `K`, reducer type, and coding enabled state; added unit tests for the printed summary.
157
+ - Added unit test comparing an unwrapped `nn.Linear` and a wrapped twin with identical parameters to verify that the visible forward output is unchanged.
158
+ - Added unit tests verifying trace caching after plain forwards and that the explicit trace-return path is independent of hidden mutable state for `CodingLinear`.
159
+ - Added multi-batch toggle tests for `CodingLinear`: verify disabling before forward yields no trace, re-enabling restores tracing, and cache updates across batches.
160
+ - Added integration tests for a tiny model containing a `CodingLinear`, verifying device transfers, state_dict serialization round-trip, and optimizer integration.
161
+ - Added a synthetic training integration test that uses `CodingLinear` inside a toy classifier and verifies that task loss and a coding-derived loss can be optimized together.
162
+ - Added `nervecode/layers/wrap.py` with a `wrap()` helper to instrument models by explicit module names or via the `layers="all_linear"` shortcut; added unit tests for selection and output preservation.
163
+ - Refined layer selection in `wrap()`: introduced a tiny, extensible selector registry. MVP supports `layers="all_linear"` and explicit module-name lists; unrecognized shortcuts fail open.
164
+ - Implemented a thin `WrappedModel` container that preserves the wrapped model's API (attribute access and `__call__`) while tracking inserted coding layers for later aggregation; added a unit test.
165
+ - Implemented `WrappedModel.forward()` to delegate to the wrapped model's normal call and populate convenience caches with the latest per-layer traces when coding is enabled; added unit tests.
166
+ - Added model-level fail-open controls on `WrappedModel`: `enable_coding()`, `disable_coding()`, and a nestable `bypass()` context manager that delegate to all wrapped layers; added unit tests.
167
+ - Added `nervecode/scoring/aggregator.py` with a `mean_surprise(...)` function that computes per-sample mean aggregation across layer surprise signals; added unit tests.
168
+ - Structured the aggregator API to return an `AggregatedSurprise` result object, enabling future `max` and `weighted` strategies without changing the user-facing result type; updated unit tests accordingly.
169
+ - Aggregator now supports mixing wrappers with different leading dimensions by reducing each per-layer surprise to a per-sample view before combining. Added `CodingTrace.sample_reduced_surprise()` to expose the sample-level reduction for layers.
170
+ - Implemented `WrappedModel.surprise()` to return the latest mean-aggregated surprise across wrapped layers after a standard forward pass; added a unit test.
171
+ - Added integration test verifying that an explicit layer `forward_with_trace(...)` and model-level `WrappedModel.surprise()` agree on the same batch when called in a consistent order.
172
+ - Added integration tests clarifying that explicit traces should be preferred in concurrent-looking usage and that the model-level convenience cache reflects last-forward state; updated docs to state this explicitly.
173
+ - Added `nervecode/training/loss.py` with a `CodingLoss` module that consumes `CodingTrace` objects (or `SoftCode`/tensors) and computes a scalar loss by aggregating per-layer surprise via `mean_surprise` and averaging over samples; avoids recomputing distances from raw outputs.
174
+ - Implemented `WrappedModel.coding_loss()` to compute loss from the latest per-layer traces and raise a helpful error when traces are unavailable; added unit tests.
175
+ - Added `nervecode/scoring/calibrator.py` with an empirical percentile calibrator that stores the sorted surprise distribution, chosen threshold quantiles, and minimal metadata to reproduce calibration; exported via `nervecode.scoring` and added a unit test for stored state.
176
+ - Implemented percentile lookup, threshold comparison, and boolean OOD decisions for scalar and batched surprise values in the calibrator; added unit tests.
177
+ - Added synthetic calibration tests validating percentile ordering and threshold behavior on in-distribution vs shifted distributions.
178
+ - Added end-to-end diagnostics tests ensuring finiteness and correct shapes across training, evaluation, and calibration passes.
179
+ - Added bypass-consistency tests verifying that, under a temporary coding bypass, surprise aggregation returns None, `CodingLoss` raises on missing signals, and the calibrator rejects None inputs.
180
+ - Added `examples/quickstart_mlp.py` demonstrating a tiny MLP wrapped with coding layers, training with `CodingLoss`, calibrating empirical percentiles on held-out in-distribution data, and reading surprise + percentiles at inference.
181
+ - Added `examples/ood_smoke_test.py` demonstrating an obvious in-distribution vs out-of-distribution comparison and printing both raw scores and calibrated percentiles.
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nervecode Maintainers
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.4
2
+ Name: nervecode
3
+ Version: 0.1.0
4
+ Summary: Intrinsic surprise scoring for PyTorch via statistical coding.
5
+ Project-URL: Homepage, https://gitlab.com/domezsolt/nervecode
6
+ Project-URL: Repository, https://gitlab.com/domezsolt/nervecode
7
+ Author: Zsolt Döme
8
+ License: MIT License
9
+
10
+ Copyright (c) 2026 Nervecode Maintainers
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+
30
+ License-File: LICENSE
31
+ Keywords: coding,ml,pytorch,research,surprise
32
+ Requires-Python: >=3.10
33
+ Requires-Dist: torch>=2.0
34
+ Provides-Extra: dev
35
+ Requires-Dist: mypy>=1.8; extra == 'dev'
36
+ Requires-Dist: pre-commit>=3.6; extra == 'dev'
37
+ Requires-Dist: pytest>=7; extra == 'dev'
38
+ Requires-Dist: ruff>=0.3; extra == 'dev'
39
+ Provides-Extra: docs
40
+ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
41
+ Requires-Dist: mkdocs>=1.5; extra == 'docs'
42
+ Provides-Extra: logging
43
+ Requires-Dist: loguru>=0.7; extra == 'logging'
44
+ Requires-Dist: rich>=13; extra == 'logging'
45
+ Provides-Extra: viz
46
+ Requires-Dist: matplotlib>=3.7; extra == 'viz'
47
+ Requires-Dist: seaborn>=0.13; extra == 'viz'
48
+ Description-Content-Type: text/markdown
49
+
50
+ # Nervecode Docs
51
+
52
+ This directory will contain documentation for the Nervecode project.
53
+
54
+ ## Developer Setup
55
+
56
+ - Install dev tools (in your virtualenv): `pip install -e .[dev]`
57
+ - Install git hooks: `pre-commit install`
58
+ - Run on all files once: `pre-commit run --all-files`
59
+
60
+ Included hooks:
61
+ - Formatting: `ruff-format`
62
+ - Linting and import sorting: `ruff` (with `--fix`)
63
+ - Type-checking: `mypy`
64
+
65
+ ## Tests
66
+
67
+ - Unit tests: `pytest tests/unit` (fast correctness checks)
68
+ - Integration tests: `pytest tests/integration` (heavier, cross-module)
69
+ - Smoke tests: `pytest tests/smoke` (repo wiring and API surface)
70
+
71
+ ### Deterministic runs
72
+
73
+ - Tests auto-seed RNGs per test using `NERVECODE_SEED` (default: `1234`).
74
+ - You can control reproducibility in your own scripts via:
75
+ ```python
76
+ from nervecode.utils.seed import seed_everything, seed_from_env
77
+ seed_everything(seed_from_env())
78
+ ```
79
+
80
+ ## Performance Notes
81
+
82
+ - Overhead estimates and guidance for pooled Conv2d coding: see `docs/overhead.md`.
83
+ - Scaling and tradeoffs for layer selection and coding dimension: see `docs/scaling.md`.
@@ -0,0 +1,135 @@
1
+ # Nervecode
2
+
3
+ Nervecode is a PyTorch library that adds an intrinsic uncertainty signal to neural networks by scoring how compressible internal activations are under learned codebooks. The goal is a practical, observe-only wrapper that preserves model outputs while exposing a calibrated surprise score for OOD detection, guardrails, and monitoring.
4
+
5
+ ## Installation
6
+ - Prerequisites: Python 3.10+, PyTorch 2.0+ (install a build matching your platform from pytorch.org).
7
+ - From a checkout for local use: `pip install -e .`
8
+ - For development with tooling: `pip install -e .[dev]` then `pre-commit install`.
9
+ - Optional extras: `.[viz]` for plotting, `.[logging]` for richer logs.
10
+
11
+ Note: You can use the top-level convenience `nervecode.wrap(...)` which instruments your model in-place and returns a `WrappedModel` container.
12
+
13
+ ## Quickstart
14
+ Minimal end-to-end flow using the current public surface:
15
+
16
+ ```python
17
+ import torch
18
+ from torch import nn
19
+ from nervecode.layers.wrap import wrap as wrap_layers
20
+ import nervecode as nvc
21
+ from nervecode.scoring import EmpiricalPercentileCalibrator, mean_surprise
22
+
23
+ # 1) Build and instrument a tiny model
24
+ model = nn.Sequential(nn.Linear(2, 32), nn.ReLU(), nn.Linear(32, 2))
25
+ wrap_layers(model, layers="all_linear") # in-place Linear wrappers
26
+ wrapped = nvc.WrappedModel(model)
27
+
28
+ # 2) Train with task loss + coding loss
29
+ x = torch.randn(64, 2)
30
+ y = torch.randint(0, 2, (64,))
31
+ logits = wrapped.forward(x)
32
+ loss = nn.CrossEntropyLoss()(logits, y) + 0.1 * wrapped.coding_loss()
33
+ loss.backward()
34
+
35
+ # 3) Calibrate empirical percentiles on in-distribution scores
36
+ with torch.no_grad():
37
+ _ = wrapped.forward(torch.randn(64, 2))
38
+ agg = wrapped.surprise() or mean_surprise(getattr(wrapped, "_last_layer_traces", {}))
39
+ scores = agg.surprise if agg is not None else torch.empty(0)
40
+ calib = EmpiricalPercentileCalibrator(threshold_quantiles=(0.95,))
41
+ state = calib.fit(scores, aggregation="mean")
42
+ thr = calib.threshold_for() # threshold at 95th percentile
43
+ ```
44
+
45
+ ## Product Boundary
46
+ - Is: a lightweight PyTorch library that wraps selected layers (start with Linear), learns codebooks over reduced activations, and emits layer-wise and aggregated surprise scores.
47
+ - Is not: a hardware project, a full observability platform, or a framework-agnostic toolkit; MVP targets PyTorch only and focuses on observe-only wrappers with modest overhead.
48
+
49
+ ## First Public API Shape (MVP)
50
+ The initial public surface is intentionally small and convenient:
51
+
52
+ ```python
53
+ import nervecode
54
+
55
+ model = MyModel()
56
+ wrapped = nervecode.wrap(model, layers="all_linear")
57
+
58
+ for x, y in train_loader:
59
+ logits = wrapped(x)
60
+ loss = task_loss_fn(logits, y) + wrapped.coding_loss()
61
+ loss.backward()
62
+ optimizer.step(); optimizer.zero_grad()
63
+
64
+ wrapped.calibrate(calib_loader)
65
+
66
+ logits = wrapped(x_test)
67
+ surprise = wrapped.surprise() # includes score and percentile
68
+
69
+ # Optional explicit trace path for robust integrations
70
+ logits, trace = wrapped.forward_with_trace(x_test)
71
+ ```
72
+
73
+ Provisional API entries:
74
+ - `wrap(...)`
75
+ - `WrappedModel.coding_loss()`
76
+ - `WrappedModel.calibrate(...)`
77
+ - `WrappedModel.surprise()`
78
+ - `WrappedModel.forward_with_trace(...)`
79
+
80
+ ## MVP Scope
81
+ The MVP is a narrow, end-to-end vertical slice:
82
+ - Gradient-updated codebooks and differentiable soft assignment.
83
+ - `SoftCode` and `CodingTrace` data structures.
84
+ - `CodingLinear` wrapper and `wrap(..., layers="all_linear")` convenience. The wrapper supports an optional `coding_dim` to project wide layer outputs down to a coding space via a learned linear reducer while preserving the layer's visible output.
85
+ - Mean and max aggregation for a per-input surprise score.
86
+ - Empirical percentile calibration on in-distribution data.
87
+ - Lightweight coding loss and basic diagnostics (CSV/JSONL).
88
+ - One small end-to-end example (MLP or simple CNN).
89
+
90
+ Distance-augmented surprise:
91
+ - The combined per-position surprise can include a distance component to lift
92
+ OOD scores above ID across the bulk, improving percentile thresholding. Set
93
+ `assignment.beta_distance > 0` (e.g., 0.2–1.0) to enable `S = βL·L + βH·H + βD·D`
94
+ where `D ≈ log1p(nearest-center squared distance)`.
95
+
96
+ Quickstart: see `examples/quickstart_mlp.py` for a tiny end-to-end MLP training + calibration + inference script. For pooled Conv2d coding contributing to aggregated surprise, see `examples/quickstart_cnn.py`. For a plain‑language walkthrough of the expected user flow, read `docs/quickstart.md`.
97
+
98
+ For a fast, dataset-agnostic smoke run suitable for CI or local validation, use `scripts/train_minimal.py` which trains a tiny model on a synthetic dataset and calibrates an empirical percentile threshold.
99
+
100
+ For a minimal OOD benchmark harness, see `benchmarks/ood/simple.py` which trains an MLP, calibrates percentiles on in-distribution data, and reports AUROC versus a synthetic OOD split.
101
+
102
+ For quick ablations over codebook/coding hyperparameters and layer selection, use `scripts/ablate_grid.py` which sweeps small grids of K (codebook size), D (coding dimension), T (temperature), and selection strategies, then logs a minimal quality metric and overhead proxies to CSV.
103
+
104
+ For a minimal OOD comparison using synthetic scores and the empirical percentile calibrator, see `examples/ood_smoke_test.py`.
105
+
106
+ Performance notes: see `docs/overhead.md` for pooled Conv2d coding overhead estimates, timing harness, and operating guidance.
107
+
108
+ ## Recommended OOD Settings (quick start)
109
+ - Selection: `layers=first_linear`
110
+ - Aggregation: `agg=max`
111
+ - Coding: `coding_dim D=8`
112
+ - Codebook: `K=16`
113
+ - Weights: `βL=1.0`, `βE=1.0`, `βD=1.0` (distance-augmented surprise)
114
+ - Calibration: `quantile q=0.90` (use `0.95` for stricter ID control)
115
+
116
+ Run the bundled OOD benchmark with these settings:
117
+
118
+ ```
119
+ python -m benchmarks.ood.simple --epochs 20 --device cpu \
120
+ --agg max --layers first_linear --K 16 --coding-dim 8 \
121
+ --beta-length 1.0 --beta-entropy 1.0 --beta-distance 1.0 \
122
+ --quantile 0.90 --json
123
+ ```
124
+
125
+ Or sweep a narrow fast grid:
126
+
127
+ ```
128
+ FAST=1 bash scripts/run_ood_matrix.sh
129
+ ```
130
+
131
+ ## Contributing
132
+ Contributions are welcome. Please see `CONTRIBUTING.md` for a quick start, coding guidelines, and how to run tests locally.
133
+
134
+ ## Changelog
135
+ User-facing changes are tracked in `CHANGELOG.md` under the Unreleased section and versioned entries.
@@ -0,0 +1,17 @@
1
+ # Benchmarks
2
+
3
+ This directory contains performance benchmarks and profiling harnesses.
4
+
5
+ - Overhead: `benchmarks/overhead/overhead.py` compares a base MLP against the
6
+ instrumented version on a fixed workload and reports per‑iteration timings.
7
+ Usage:
8
+ - `python -m benchmarks.overhead.overhead --iters 100 --device cpu`
9
+ - `python benchmarks/overhead/overhead.py --json`
10
+
11
+ - Conv Overhead: `benchmarks/overhead/conv_overhead.py` runs a tiny CNN with
12
+ and without pooled Conv2d coding and reports per‑iteration timings. On CUDA
13
+ it also prints a rough GPU memory delta after instrumentation. Usage:
14
+ - `python -m benchmarks.overhead.conv_overhead --iters 100 --device cpu`
15
+ - `python benchmarks/overhead/conv_overhead.py --json`
16
+
17
+ Benchmarks are not part of strict CI and are intended for manual runs.
@@ -0,0 +1,4 @@
1
+ """Benchmarks and profiling harnesses for Nervecode.
2
+
3
+ Not part of strict CI; intended for manual runs.
4
+ """
@@ -0,0 +1,8 @@
1
+ """Simple OOD benchmark harness.
2
+
3
+ This package contains a minimal, dependency-light benchmark that trains a small
4
+ model, calibrates aggregated surprise on in-distribution data, and measures
5
+ separation between in-distribution and out-of-distribution samples via AUROC.
6
+ """
7
+
8
+ __all__: list[str] = []