neuroquant 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. neuroquant-2.0.0/LICENSE +21 -0
  2. neuroquant-2.0.0/PKG-INFO +248 -0
  3. neuroquant-2.0.0/README.md +196 -0
  4. neuroquant-2.0.0/neuroquant/__init__.py +95 -0
  5. neuroquant-2.0.0/neuroquant/cli.py +3471 -0
  6. neuroquant-2.0.0/neuroquant/config.py +1050 -0
  7. neuroquant-2.0.0/neuroquant/data/__init__.py +1 -0
  8. neuroquant-2.0.0/neuroquant/data/data_loader.py +764 -0
  9. neuroquant-2.0.0/neuroquant/models/__init__.py +1 -0
  10. neuroquant-2.0.0/neuroquant/models/model_loader.py +602 -0
  11. neuroquant-2.0.0/neuroquant/py.typed +0 -0
  12. neuroquant-2.0.0/neuroquant/quantization/__init__.py +57 -0
  13. neuroquant-2.0.0/neuroquant/quantization/_default_config.yaml +206 -0
  14. neuroquant-2.0.0/neuroquant/quantization/adaround.py +1075 -0
  15. neuroquant-2.0.0/neuroquant/quantization/alpha_search.py +312 -0
  16. neuroquant-2.0.0/neuroquant/quantization/awq.py +646 -0
  17. neuroquant-2.0.0/neuroquant/quantization/base.py +289 -0
  18. neuroquant-2.0.0/neuroquant/quantization/bn_folding.py +194 -0
  19. neuroquant-2.0.0/neuroquant/quantization/gptq.py +261 -0
  20. neuroquant-2.0.0/neuroquant/quantization/hessian_clustering.py +731 -0
  21. neuroquant-2.0.0/neuroquant/quantization/latency_lut.py +368 -0
  22. neuroquant-2.0.0/neuroquant/quantization/nsga_ii_search.py +1255 -0
  23. neuroquant-2.0.0/neuroquant/quantization/ptq.py +684 -0
  24. neuroquant-2.0.0/neuroquant/quantization/qat.py +739 -0
  25. neuroquant-2.0.0/neuroquant/quantization/smoothquant.py +698 -0
  26. neuroquant-2.0.0/neuroquant/quantization/smoothquant_gptq.py +142 -0
  27. neuroquant-2.0.0/neuroquant/quantization/surrogate.py +291 -0
  28. neuroquant-2.0.0/neuroquant/tracking/__init__.py +1 -0
  29. neuroquant-2.0.0/neuroquant/tracking/mlflow_logger.py +267 -0
  30. neuroquant-2.0.0/neuroquant/utils/__init__.py +1 -0
  31. neuroquant-2.0.0/neuroquant/utils/checkpointing.py +469 -0
  32. neuroquant-2.0.0/neuroquant/utils/common.py +227 -0
  33. neuroquant-2.0.0/neuroquant/utils/deployment_export.py +322 -0
  34. neuroquant-2.0.0/neuroquant/utils/metrics.py +295 -0
  35. neuroquant-2.0.0/neuroquant/utils/numerics.py +88 -0
  36. neuroquant-2.0.0/neuroquant/utils/onnx_export.py +522 -0
  37. neuroquant-2.0.0/neuroquant/visualization/__init__.py +61 -0
  38. neuroquant-2.0.0/neuroquant/visualization/error_attribution.py +403 -0
  39. neuroquant-2.0.0/neuroquant/visualization/pareto_analysis.py +1158 -0
  40. neuroquant-2.0.0/neuroquant/visualization/report.py +452 -0
  41. neuroquant-2.0.0/neuroquant/visualization/sensitivity.py +244 -0
  42. neuroquant-2.0.0/neuroquant/visualization/style.py +144 -0
  43. neuroquant-2.0.0/neuroquant/xai/__init__.py +1 -0
  44. neuroquant-2.0.0/neuroquant/xai/explainability.py +1299 -0
  45. neuroquant-2.0.0/neuroquant.egg-info/PKG-INFO +248 -0
  46. neuroquant-2.0.0/neuroquant.egg-info/SOURCES.txt +53 -0
  47. neuroquant-2.0.0/neuroquant.egg-info/dependency_links.txt +1 -0
  48. neuroquant-2.0.0/neuroquant.egg-info/entry_points.txt +2 -0
  49. neuroquant-2.0.0/neuroquant.egg-info/requires.txt +28 -0
  50. neuroquant-2.0.0/neuroquant.egg-info/top_level.txt +1 -0
  51. neuroquant-2.0.0/pyproject.toml +141 -0
  52. neuroquant-2.0.0/setup.cfg +4 -0
  53. neuroquant-2.0.0/tests/test_config.py +31 -0
  54. neuroquant-2.0.0/tests/test_imports.py +35 -0
  55. neuroquant-2.0.0/tests/test_quantizers.py +58 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 NeuroQuant Authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,248 @@
1
+ Metadata-Version: 2.4
2
+ Name: neuroquant
3
+ Version: 2.0.0
4
+ Summary: Production-grade neural-network quantization framework with NSGA + ONNX + hardware-aware search
5
+ Author: NeuroQuant Authors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/AbdelazizElHelaly11/NeuroQuant
8
+ Project-URL: Documentation, https://github.com/AbdelazizElHelaly11/NeuroQuant/blob/main/README.md
9
+ Project-URL: Repository, https://github.com/AbdelazizElHelaly11/NeuroQuant
10
+ Project-URL: Issues, https://github.com/AbdelazizElHelaly11/NeuroQuant/issues
11
+ Keywords: quantization,deep-learning,pytorch,onnx,ptq,qat,gptq,smoothquant,awq,nsga,neural-network-compression,edge-ai
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Operating System :: Microsoft :: Windows
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: torch>=2.4
27
+ Requires-Dist: torchvision>=0.19
28
+ Requires-Dist: numpy>=1.26
29
+ Requires-Dist: pandas>=2.0
30
+ Requires-Dist: matplotlib>=3.7
31
+ Requires-Dist: seaborn>=0.13
32
+ Requires-Dist: pyyaml>=6.0
33
+ Requires-Dist: pydantic<3.0,>=2.5
34
+ Requires-Dist: mlflow>=2.10
35
+ Requires-Dist: pymoo>=0.6.0
36
+ Requires-Dist: onnx>=1.14
37
+ Requires-Dist: onnxruntime>=1.16
38
+ Requires-Dist: onnxscript>=0.1
39
+ Requires-Dist: scikit-learn>=1.3
40
+ Provides-Extra: dev
41
+ Requires-Dist: ruff>=0.5; extra == "dev"
42
+ Requires-Dist: build>=1.2; extra == "dev"
43
+ Requires-Dist: pytest>=7.0; extra == "dev"
44
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
45
+ Provides-Extra: xai
46
+ Requires-Dist: shap>=0.42.0; extra == "xai"
47
+ Provides-Extra: docs
48
+ Requires-Dist: mkdocs-material>=9.5; extra == "docs"
49
+ Requires-Dist: mkdocstrings[python]>=0.25; extra == "docs"
50
+ Requires-Dist: pymdown-extensions>=10.0; extra == "docs"
51
+ Dynamic: license-file
52
+
53
+ # NeuroQuant v2.0
54
+
55
+ [![python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)]()
56
+ [![license](https://img.shields.io/badge/license-MIT-green)]()
57
+
58
+ **Production-grade neural-network quantization framework with multi-objective NSGA search, ONNX deployment, and hardware-aware optimisation.**
59
+
60
+ NeuroQuant takes a pre-trained PyTorch model and produces deployable INT8 / mixed-precision artefacts that have been measured (not estimated) on the same runtime that ships in production. Every public number is the result of running a real quantized graph through ONNX Runtime — no synthetic shortcuts.
61
+
62
+ ---
63
+
64
+ ## What it does
65
+
66
+ ```
67
+ ┌────────────────────────────────────────────────────────────────────────┐
68
+ │ │
69
+ │ FP32 PyTorch model ─────► 10-phase pipeline ─────► INT8 .onnx │
70
+ │ + metrics │
71
+ │ ┌──────────────────────────────────────────────────────────────┐ │
72
+ │ │ P0 Prepare model + dataset, FP32 baseline │ │
73
+ │ │ P1a Hessian / Fisher per-layer sensitivity │ │
74
+ │ │ P1b FITCompress warm-start seed │ │
75
+ │ │ P1c NSGA multi-objective search (2- or 3-obj) │ │
76
+ │ │ P1d AdaRound canonical-order weight rounding │ │
77
+ │ │ P1e Real W+A QAT with FP32 teacher distillation │ │
78
+ │ │ P1f GPTQ + SmoothQuant + AWQ + SmoothQuant→GPTQ │ │
79
+ │ │ P2 Pareto analysis + plots │ │
80
+ │ │ P3 Grad-CAM + SHAP explainability │ │
81
+ │ │ P4 MLflow finalisation + reproducibility manifest │ │
82
+ │ └──────────────────────────────────────────────────────────────┘ │
83
+ │ │
84
+ └────────────────────────────────────────────────────────────────────────┘
85
+ ```
86
+
87
+ The pipeline runs to completion in **~60 seconds** on CPU for a CIFAR-class model.
88
+
89
+ ---
90
+
91
+ ## Why it is production-grade
92
+
93
+ This framework was built deliberately to avoid the "research prototype" failure modes that disqualify most academic quantization tooling from real deployment:
94
+
95
+ | Concern | What NeuroQuant does |
96
+ | ------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
97
+ | **Real INT inference** | Wave 4 emits true static-INT8 ONNX graphs via `onnxruntime.quantization.quantize_static`, not FP32 simulation. |
98
+ | **Real on-disk size** | `model_size_mb` is the literal `.onnx` filesystem size, not `numel × bw / 8`. The synthetic estimate is kept as `theoretical_size_mb` for ablation. |
99
+ | **Real latency** | `latency_ms` is measured under ONNX Runtime on the same machine that will deploy the artefact. |
100
+ | **Hardware-aware search** | The NSGA third objective sums a per-layer ORT latency LUT (Wave 4 C2). Every gene's latency cost is a real timing.|
101
+ | **No leakage between splits** | Train / search / val / test are 80/10/10/test-set; NSGA fitness reads search, QAT early-stop reads val, headline reads test. |
102
+ | **Strict determinism** | `set_seed(strict=True)` enforces `CUBLAS_WORKSPACE_CONFIG`, `use_deterministic_algorithms`, `cudnn.deterministic`. |
103
+ | **Safe checkpoints** | All `torch.load(weights_only=True)`; pickle path is closed. Architectural wrappers persist as JSON manifests. |
104
+ | **Real W+A QAT** | INT8 activations always; weight parametrisation via `torch.nn.utils.parametrize` (autograd-aware STE). |
105
+ | **Validated config** | Pydantic v2 dataclasses with field validators — bad values fail at load, not deep in a phase. |
106
+
107
+ ---
108
+
109
+ ## Install
110
+
111
+ ### From the wheel
112
+
113
+ ```bash
114
+ pip install neuroquant-2.0.0-py3-none-any.whl
115
+ neuroquant --help
116
+ ```
117
+
118
+ ### From source
119
+
120
+ ```bash
121
+ git clone https://github.com/AbdelazizElHelaly11/NeuroQuant
122
+ cd NeuroQuant
123
+ pip install -e ".[dev]" # editable + dev extras
124
+ ```
125
+
126
+ GPU users:
127
+
128
+ ```bash
129
+ pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
130
+ pip install -e ".[dev]"
131
+ ```
132
+
133
+ ---
134
+
135
+ ## Run
136
+
137
+ The console-script `neuroquant` is installed by the wheel; it accepts the same flags as `python main.py`.
138
+
139
+ ```bash
140
+ # Full pipeline on the bundled config (CIFAR-10 + MobileNetV2)
141
+ neuroquant --config config.yaml --epochs 20
142
+
143
+ # Fast smoke (CPU, no training, first three phases)
144
+ neuroquant --config config.yaml --epochs 0 --device cpu \
145
+ --phases phase_0_preparation phase_1a_hessian_clustering phase_1b_fitcompress
146
+
147
+ # Resume after interruption
148
+ neuroquant --config config.yaml --epochs 20 --resume
149
+
150
+ # Hardware-aware mode (3-objective NSGA + ORT latency LUT)
151
+ # Set hardware_aware_search: true in config.yaml, then:
152
+ neuroquant --config config.yaml --epochs 20
153
+ ```
154
+
155
+ The pipeline writes everything to `output_dir` (default `./artifacts/`):
156
+
157
+ ```
158
+ artifacts/
159
+ ├── checkpoints/ # per-phase resume points
160
+ ├── onnx/ # FP32 + per-method INT8 .onnx files
161
+ ├── pareto/ # Pareto plots + JSON
162
+ ├── reports/ # pipeline_report.txt, pareto_summary.json
163
+ ├── reproducibility_manifest.json
164
+ ├── latency_lut.json # only when hardware_aware_search=true
165
+ └── pipeline_report.txt
166
+ ```
167
+
168
+ ---
169
+
170
+ ## Configuration
171
+
172
+ All knobs live in [`config.yaml`](config.yaml). Common overrides:
173
+
174
+ ```yaml
175
+ model:
176
+ name: resnet18 # any torchvision name
177
+ num_classes: 10
178
+ input_shape: [3, 32, 32]
179
+
180
+ dataset:
181
+ name: cifar10 # cifar10 | cifar100 | imagefolder | synthetic | custom
182
+ class: null # optional "pkg.module.MyDataset"
183
+ train_dir: null # optional ImageFolder split dirs
184
+ val_dir: null
185
+ test_dir: null
186
+ batch_size: 128
187
+
188
+ methods: [ptq, qat, gptq, smoothquant, awq]
189
+ bitwidths:
190
+ supported: [4, 8]
191
+ io_layer: 8 # force first/last layers to INT8
192
+
193
+ hyperparams:
194
+ hardware_aware_search: true # Wave 4 J4: 3-obj NSGA
195
+ onnx_export_enabled: true # Wave 4 J1/J2/J3
196
+ qat_distill_alpha: 0.5 # Wave 2 E5: KD with FP32 teacher
197
+ smoothquant_per_layer_alpha: true # Wave 3 F3
198
+ hessian_estimator: fisher # Wave 3 B2: 3× faster than diag
199
+ ```
200
+
201
+ Pydantic field validators run at load time — invalid values surface immediately with the offending field path:
202
+
203
+ ```text
204
+ ValueError: Configuration validation failed:
205
+ num_classes must be >= 2.
206
+ ```
207
+
208
+ ---
209
+
210
+ ## Architecture
211
+
212
+ The framework was built in seven waves, each ending with a strict-format report. Per-wave architecture notes live in [`docs/architecture/`](docs/architecture/):
213
+
214
+ | Wave | Theme | Notes |
215
+ | ---- | ------------------------------ | ------------------------------------------ |
216
+ | 1 | Foundation (security + leakage) | [wave1.md](docs/architecture/wave1.md) |
217
+ | 2 | Real W+A QAT pipeline | [wave2.md](docs/architecture/wave2.md) |
218
+ | 3 | Method audits + Fisher | [wave3.md](docs/architecture/wave3.md) |
219
+ | 4 | ONNX + hardware-aware search | [wave4.md](docs/architecture/wave4.md) |
220
+ | 5 | Reporting + MLflow | [wave5.md](docs/architecture/wave5.md) |
221
+ | 6 | Config validation (Pydantic) | [wave6.md](docs/architecture/wave6.md) |
222
+ | 7 | Packaging + docs | [wave7.md](docs/architecture/wave7.md) |
223
+
224
+ ---
225
+
226
+ ## Quantization methods
227
+
228
+ | Method | When to use | Module |
229
+ | --------------------- | --------------------------------------------------------------------- | ----------------------------------------------------- |
230
+ | **PTQ** | Fast baseline; INT8 with bitwidth-aware calibration. | [`quantization/ptq.py`](quantization/ptq.py) |
231
+ | **QAT** | Best accuracy at INT8; requires fine-tuning data. | [`quantization/qat.py`](quantization/qat.py) |
232
+ | **GPTQ** | Best accuracy at INT4 weights; data-aware optimal rounding. | [`quantization/gptq.py`](quantization/gptq.py) |
233
+ | **SmoothQuant** | Activation-friendly INT8; per-layer α grid search. | [`quantization/smoothquant.py`](quantization/smoothquant.py) |
234
+ | **AWQ** | INT4 with salient-channel preservation; per-layer α + FP16 carve-out. | [`quantization/awq.py`](quantization/awq.py) |
235
+ | **SmoothQuant→GPTQ** | Production recipe — strict-Pareto improvement over either method alone. | [`quantization/smoothquant_gptq.py`](quantization/smoothquant_gptq.py) |
236
+ | **AdaRound** | Post-PTQ refinement; canonical input→output traversal. | [`quantization/adaround.py`](quantization/adaround.py) |
237
+
238
+ ---
239
+
240
+ ## License
241
+
242
+ MIT. See [LICENSE](LICENSE) for the full text.
243
+
244
+ ---
245
+
246
+ ## Acknowledgements
247
+
248
+ The seven-wave production hardening was specified, implemented, and refined in collaboration with **Claude Opus 4.7 (1M context)**. Per-wave architecture notes live under [`docs/architecture/`](docs/architecture/).
@@ -0,0 +1,196 @@
1
+ # NeuroQuant v2.0
2
+
3
+ [![python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)]()
4
+ [![license](https://img.shields.io/badge/license-MIT-green)]()
5
+
6
+ **Production-grade neural-network quantization framework with multi-objective NSGA search, ONNX deployment, and hardware-aware optimisation.**
7
+
8
+ NeuroQuant takes a pre-trained PyTorch model and produces deployable INT8 / mixed-precision artefacts that have been measured (not estimated) on the same runtime that ships in production. Every public number is the result of running a real quantized graph through ONNX Runtime — no synthetic shortcuts.
9
+
10
+ ---
11
+
12
+ ## What it does
13
+
14
+ ```
15
+ ┌────────────────────────────────────────────────────────────────────────┐
16
+ │ │
17
+ │ FP32 PyTorch model ─────► 10-phase pipeline ─────► INT8 .onnx │
18
+ │ + metrics │
19
+ │ ┌──────────────────────────────────────────────────────────────┐ │
20
+ │ │ P0 Prepare model + dataset, FP32 baseline │ │
21
+ │ │ P1a Hessian / Fisher per-layer sensitivity │ │
22
+ │ │ P1b FITCompress warm-start seed │ │
23
+ │ │ P1c NSGA multi-objective search (2- or 3-obj) │ │
24
+ │ │ P1d AdaRound canonical-order weight rounding │ │
25
+ │ │ P1e Real W+A QAT with FP32 teacher distillation │ │
26
+ │ │ P1f GPTQ + SmoothQuant + AWQ + SmoothQuant→GPTQ │ │
27
+ │ │ P2 Pareto analysis + plots │ │
28
+ │ │ P3 Grad-CAM + SHAP explainability │ │
29
+ │ │ P4 MLflow finalisation + reproducibility manifest │ │
30
+ │ └──────────────────────────────────────────────────────────────┘ │
31
+ │ │
32
+ └────────────────────────────────────────────────────────────────────────┘
33
+ ```
34
+
35
+ The pipeline runs to completion in **~60 seconds** on CPU for a CIFAR-class model.
36
+
37
+ ---
38
+
39
+ ## Why it is production-grade
40
+
41
+ This framework was built deliberately to avoid the "research prototype" failure modes that disqualify most academic quantization tooling from real deployment:
42
+
43
+ | Concern | What NeuroQuant does |
44
+ | ------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
45
+ | **Real INT inference** | Wave 4 emits true static-INT8 ONNX graphs via `onnxruntime.quantization.quantize_static`, not FP32 simulation. |
46
+ | **Real on-disk size** | `model_size_mb` is the literal `.onnx` filesystem size, not `numel × bw / 8`. The synthetic estimate is kept as `theoretical_size_mb` for ablation. |
47
+ | **Real latency** | `latency_ms` is measured under ONNX Runtime on the same machine that will deploy the artefact. |
48
+ | **Hardware-aware search** | The NSGA third objective sums a per-layer ORT latency LUT (Wave 4 C2). Every gene's latency cost is a real timing.|
49
+ | **No leakage between splits** | Train / search / val / test are 80/10/10/test-set; NSGA fitness reads search, QAT early-stop reads val, headline reads test. |
50
+ | **Strict determinism** | `set_seed(strict=True)` enforces `CUBLAS_WORKSPACE_CONFIG`, `use_deterministic_algorithms`, `cudnn.deterministic`. |
51
+ | **Safe checkpoints** | All `torch.load(weights_only=True)`; pickle path is closed. Architectural wrappers persist as JSON manifests. |
52
+ | **Real W+A QAT** | INT8 activations always; weight parametrisation via `torch.nn.utils.parametrize` (autograd-aware STE). |
53
+ | **Validated config** | Pydantic v2 dataclasses with field validators — bad values fail at load, not deep in a phase. |
54
+
55
+ ---
56
+
57
+ ## Install
58
+
59
+ ### From the wheel
60
+
61
+ ```bash
62
+ pip install neuroquant-2.0.0-py3-none-any.whl
63
+ neuroquant --help
64
+ ```
65
+
66
+ ### From source
67
+
68
+ ```bash
69
+ git clone https://github.com/AbdelazizElHelaly11/NeuroQuant
70
+ cd NeuroQuant
71
+ pip install -e ".[dev]" # editable + dev extras
72
+ ```
73
+
74
+ GPU users:
75
+
76
+ ```bash
77
+ pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121
78
+ pip install -e ".[dev]"
79
+ ```
80
+
81
+ ---
82
+
83
+ ## Run
84
+
85
+ The console-script `neuroquant` is installed by the wheel; it accepts the same flags as `python main.py`.
86
+
87
+ ```bash
88
+ # Full pipeline on the bundled config (CIFAR-10 + MobileNetV2)
89
+ neuroquant --config config.yaml --epochs 20
90
+
91
+ # Fast smoke (CPU, no training, first three phases)
92
+ neuroquant --config config.yaml --epochs 0 --device cpu \
93
+ --phases phase_0_preparation phase_1a_hessian_clustering phase_1b_fitcompress
94
+
95
+ # Resume after interruption
96
+ neuroquant --config config.yaml --epochs 20 --resume
97
+
98
+ # Hardware-aware mode (3-objective NSGA + ORT latency LUT)
99
+ # Set hardware_aware_search: true in config.yaml, then:
100
+ neuroquant --config config.yaml --epochs 20
101
+ ```
102
+
103
+ The pipeline writes everything to `output_dir` (default `./artifacts/`):
104
+
105
+ ```
106
+ artifacts/
107
+ ├── checkpoints/ # per-phase resume points
108
+ ├── onnx/ # FP32 + per-method INT8 .onnx files
109
+ ├── pareto/ # Pareto plots + JSON
110
+ ├── reports/ # pipeline_report.txt, pareto_summary.json
111
+ ├── reproducibility_manifest.json
112
+ ├── latency_lut.json # only when hardware_aware_search=true
113
+ └── pipeline_report.txt
114
+ ```
115
+
116
+ ---
117
+
118
+ ## Configuration
119
+
120
+ All knobs live in [`config.yaml`](config.yaml). Common overrides:
121
+
122
+ ```yaml
123
+ model:
124
+ name: resnet18 # any torchvision name
125
+ num_classes: 10
126
+ input_shape: [3, 32, 32]
127
+
128
+ dataset:
129
+ name: cifar10 # cifar10 | cifar100 | imagefolder | synthetic | custom
130
+ class: null # optional "pkg.module.MyDataset"
131
+ train_dir: null # optional ImageFolder split dirs
132
+ val_dir: null
133
+ test_dir: null
134
+ batch_size: 128
135
+
136
+ methods: [ptq, qat, gptq, smoothquant, awq]
137
+ bitwidths:
138
+ supported: [4, 8]
139
+ io_layer: 8 # force first/last layers to INT8
140
+
141
+ hyperparams:
142
+ hardware_aware_search: true # Wave 4 J4: 3-obj NSGA
143
+ onnx_export_enabled: true # Wave 4 J1/J2/J3
144
+ qat_distill_alpha: 0.5 # Wave 2 E5: KD with FP32 teacher
145
+ smoothquant_per_layer_alpha: true # Wave 3 F3
146
+ hessian_estimator: fisher # Wave 3 B2: 3× faster than diag
147
+ ```
148
+
149
+ Pydantic field validators run at load time — invalid values surface immediately with the offending field path:
150
+
151
+ ```text
152
+ ValueError: Configuration validation failed:
153
+ num_classes must be >= 2.
154
+ ```
155
+
156
+ ---
157
+
158
+ ## Architecture
159
+
160
+ The framework was built in seven waves, each ending with a strict-format report. Per-wave architecture notes live in [`docs/architecture/`](docs/architecture/):
161
+
162
+ | Wave | Theme | Notes |
163
+ | ---- | ------------------------------ | ------------------------------------------ |
164
+ | 1 | Foundation (security + leakage) | [wave1.md](docs/architecture/wave1.md) |
165
+ | 2 | Real W+A QAT pipeline | [wave2.md](docs/architecture/wave2.md) |
166
+ | 3 | Method audits + Fisher | [wave3.md](docs/architecture/wave3.md) |
167
+ | 4 | ONNX + hardware-aware search | [wave4.md](docs/architecture/wave4.md) |
168
+ | 5 | Reporting + MLflow | [wave5.md](docs/architecture/wave5.md) |
169
+ | 6 | Config validation (Pydantic) | [wave6.md](docs/architecture/wave6.md) |
170
+ | 7 | Packaging + docs | [wave7.md](docs/architecture/wave7.md) |
171
+
172
+ ---
173
+
174
+ ## Quantization methods
175
+
176
+ | Method | When to use | Module |
177
+ | --------------------- | --------------------------------------------------------------------- | ----------------------------------------------------- |
178
+ | **PTQ** | Fast baseline; INT8 with bitwidth-aware calibration. | [`quantization/ptq.py`](quantization/ptq.py) |
179
+ | **QAT** | Best accuracy at INT8; requires fine-tuning data. | [`quantization/qat.py`](quantization/qat.py) |
180
+ | **GPTQ** | Best accuracy at INT4 weights; data-aware optimal rounding. | [`quantization/gptq.py`](quantization/gptq.py) |
181
+ | **SmoothQuant** | Activation-friendly INT8; per-layer α grid search. | [`quantization/smoothquant.py`](quantization/smoothquant.py) |
182
+ | **AWQ** | INT4 with salient-channel preservation; per-layer α + FP16 carve-out. | [`quantization/awq.py`](quantization/awq.py) |
183
+ | **SmoothQuant→GPTQ** | Production recipe — strict-Pareto improvement over either method alone. | [`quantization/smoothquant_gptq.py`](quantization/smoothquant_gptq.py) |
184
+ | **AdaRound** | Post-PTQ refinement; canonical input→output traversal. | [`quantization/adaround.py`](quantization/adaround.py) |
185
+
186
+ ---
187
+
188
+ ## License
189
+
190
+ MIT. See [LICENSE](LICENSE) for the full text.
191
+
192
+ ---
193
+
194
+ ## Acknowledgements
195
+
196
+ The seven-wave production hardening was specified, implemented, and refined in collaboration with **Claude Opus 4.7 (1M context)**. Per-wave architecture notes live under [`docs/architecture/`](docs/architecture/).
@@ -0,0 +1,95 @@
1
+ """
2
+ NeuroQuant — production-grade neural-network quantization framework.
3
+
4
+ Public API for library users::
5
+
6
+ from neuroquant import (
7
+ # Quantizers (notebook / library use)
8
+ PTQQuantizer, AWQQuantizer, GPTQQuantizer,
9
+ SmoothQuantQuantizer, SmoothQuantGPTQQuantizer,
10
+ QATTrainer, AdaroundOptimizer,
11
+ # Multi-objective search + clustering + surrogate
12
+ NSGAIIClusterSearch, LayerClusterer, AccuracySurrogate,
13
+ # Configuration object (every quantizer accepts ``config=None``
14
+ # and falls back to ``QuantizationConfig()`` defaults)
15
+ QuantizationConfig,
16
+ # Explainability + Pareto visualization
17
+ XAIGenerator, ParetoAnalyzer, ParetoVisualizer,
18
+ )
19
+
20
+ The ``neuroquant`` command-line entry point lives in
21
+ :mod:`neuroquant.cli` and is exposed via ``[project.scripts]`` in
22
+ ``pyproject.toml``. Library users normally do not need to import it
23
+ directly — instantiate :class:`PTQQuantizer` (etc.) and drive the
24
+ pipeline themselves.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ __version__ = "2.0.0"
30
+
31
+ # Re-export the configuration dataclass first because every other
32
+ # public symbol depends on it (directly or transitively).
33
+ from neuroquant.config import QuantizationConfig
34
+
35
+ # Quantizers and the search / clustering / surrogate trio. The
36
+ # subpackage __init__ already curates these — re-export from there so
37
+ # any future additions land in one place.
38
+ from neuroquant.quantization import (
39
+ BaseQuantizer,
40
+ PTQQuantizer,
41
+ AWQQuantizer,
42
+ GPTQQuantizer,
43
+ SmoothQuantQuantizer,
44
+ SmoothQuantGPTQQuantizer,
45
+ AdaroundOptimizer,
46
+ QATTrainer,
47
+ NSGAIIClusterSearch,
48
+ LayerClusterer,
49
+ AccuracySurrogate,
50
+ )
51
+
52
+ # Visualization surface (Pareto + plot helpers). ``XAIGenerator`` is
53
+ # re-exported from the visualization package via a guarded import so
54
+ # users who skipped the optional ``xai`` extras (shap / captum) don't
55
+ # crash at ``import neuroquant``.
56
+ from neuroquant.visualization import (
57
+ ParetoAnalyzer,
58
+ ParetoVisualizer,
59
+ XAIGenerator,
60
+ compute_layer_errors,
61
+ plot_error_attribution,
62
+ plot_error_comparison,
63
+ plot_sensitivity_heatmap,
64
+ plot_tier_distribution,
65
+ generate_html_report,
66
+ )
67
+
68
+ __all__ = [
69
+ "__version__",
70
+ # Configuration
71
+ "QuantizationConfig",
72
+ # Quantizers
73
+ "BaseQuantizer",
74
+ "PTQQuantizer",
75
+ "AWQQuantizer",
76
+ "GPTQQuantizer",
77
+ "SmoothQuantQuantizer",
78
+ "SmoothQuantGPTQQuantizer",
79
+ "AdaroundOptimizer",
80
+ "QATTrainer",
81
+ # Search / clustering / surrogate
82
+ "NSGAIIClusterSearch",
83
+ "LayerClusterer",
84
+ "AccuracySurrogate",
85
+ # Visualization
86
+ "ParetoAnalyzer",
87
+ "ParetoVisualizer",
88
+ "XAIGenerator",
89
+ "compute_layer_errors",
90
+ "plot_error_attribution",
91
+ "plot_error_comparison",
92
+ "plot_sensitivity_heatmap",
93
+ "plot_tier_distribution",
94
+ "generate_html_report",
95
+ ]