quantile-guard 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantile_guard-0.5.0/LICENSE +21 -0
- quantile_guard-0.5.0/PKG-INFO +264 -0
- quantile_guard-0.5.0/README.md +223 -0
- quantile_guard-0.5.0/pyproject.toml +65 -0
- quantile_guard-0.5.0/setup.cfg +4 -0
- quantile_guard-0.5.0/src/quantile_guard/__init__.py +11 -0
- quantile_guard-0.5.0/src/quantile_guard/calibration.py +319 -0
- quantile_guard-0.5.0/src/quantile_guard/conformal.py +236 -0
- quantile_guard-0.5.0/src/quantile_guard/metrics.py +347 -0
- quantile_guard-0.5.0/src/quantile_guard/postprocess.py +147 -0
- quantile_guard-0.5.0/src/quantile_guard/quantile_regression.py +1296 -0
- quantile_guard-0.5.0/src/quantile_guard.egg-info/PKG-INFO +264 -0
- quantile_guard-0.5.0/src/quantile_guard.egg-info/SOURCES.txt +20 -0
- quantile_guard-0.5.0/src/quantile_guard.egg-info/dependency_links.txt +1 -0
- quantile_guard-0.5.0/src/quantile_guard.egg-info/requires.txt +24 -0
- quantile_guard-0.5.0/src/quantile_guard.egg-info/top_level.txt +1 -0
- quantile_guard-0.5.0/tests/test_advanced_features.py +679 -0
- quantile_guard-0.5.0/tests/test_calibration.py +230 -0
- quantile_guard-0.5.0/tests/test_conformal.py +199 -0
- quantile_guard-0.5.0/tests/test_metrics.py +265 -0
- quantile_guard-0.5.0/tests/test_postprocess.py +138 -0
- quantile_guard-0.5.0/tests/test_quantile_regression.py +409 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Joshua Vernazza
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quantile-guard
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Non-crossing quantile regression toolkit with joint multi-quantile fitting, inference, conformal calibration, and evaluation. Scikit-learn compatible.
|
|
5
|
+
Author: Joshua Vernazza
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/joshvern/quantile-guard
|
|
8
|
+
Project-URL: Issues, https://github.com/joshvern/quantile-guard/issues
|
|
9
|
+
Project-URL: Documentation, https://joshvern.github.io/quantile-guard/
|
|
10
|
+
Keywords: quantile-regression,non-crossing,prediction-intervals,conformal,calibration,statistics,machine-learning,scikit-learn
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: ortools>=9.0.9047
|
|
22
|
+
Requires-Dist: numpy>=1.18.0
|
|
23
|
+
Requires-Dist: pandas>=1.0.0
|
|
24
|
+
Requires-Dist: scipy>=1.4.0
|
|
25
|
+
Requires-Dist: tqdm>=4.50.0
|
|
26
|
+
Requires-Dist: joblib>=1.0.0
|
|
27
|
+
Requires-Dist: scikit-learn>=0.22.0
|
|
28
|
+
Provides-Extra: test
|
|
29
|
+
Requires-Dist: pytest>=7; extra == "test"
|
|
30
|
+
Provides-Extra: formula
|
|
31
|
+
Requires-Dist: patsy>=0.5.0; extra == "formula"
|
|
32
|
+
Provides-Extra: plot
|
|
33
|
+
Requires-Dist: matplotlib>=3.1.0; extra == "plot"
|
|
34
|
+
Provides-Extra: benchmark
|
|
35
|
+
Requires-Dist: statsmodels>=0.13.0; extra == "benchmark"
|
|
36
|
+
Requires-Dist: matplotlib>=3.1.0; extra == "benchmark"
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Requires-Dist: patsy>=0.5.0; extra == "all"
|
|
39
|
+
Requires-Dist: matplotlib>=3.1.0; extra == "all"
|
|
40
|
+
Dynamic: license-file
|
|
41
|
+
|
|
42
|
+
[![PyPI][pypi-badge]][pypi-link]
|
|
43
|
+
[![Python Versions][py-badge]][pypi-link]
|
|
44
|
+
[![CI][ci-badge]][ci-link]
|
|
45
|
+
[![Docs][docs-badge]][docs-link]
|
|
46
|
+
|
|
47
|
+
[pypi-badge]: https://img.shields.io/pypi/v/quantile-guard.svg
|
|
48
|
+
[py-badge]: https://img.shields.io/pypi/pyversions/quantile-guard.svg
|
|
49
|
+
[ci-badge]: https://github.com/joshvern/quantile-guard/actions/workflows/ci.yml/badge.svg
|
|
50
|
+
[docs-badge]: https://github.com/joshvern/quantile-guard/actions/workflows/docs.yml/badge.svg
|
|
51
|
+
|
|
52
|
+
[pypi-link]: https://pypi.org/project/quantile-guard/
|
|
53
|
+
[ci-link]: https://github.com/joshvern/quantile-guard/actions/workflows/ci.yml
|
|
54
|
+
[docs-link]: https://joshvern.github.io/quantile-guard/
|
|
55
|
+
|
|
56
|
+
# quantile-guard
|
|
57
|
+
|
|
58
|
+
**Non-crossing quantile models with built-in inference, calibration, and evaluation.**
|
|
59
|
+
|
|
60
|
+
A quantile modeling toolkit — not just a quantile regressor. Fits multiple quantiles jointly with monotonicity constraints that guarantee predictions never cross. Wraps the result in inference, conformal calibration, evaluation metrics, and crossing diagnostics.
|
|
61
|
+
|
|
62
|
+
Scikit-learn compatible. Validated against sklearn, statsmodels, and R's `quantreg`.
|
|
63
|
+
|
|
64
|
+
## Why Not Just Fit Quantiles Independently?
|
|
65
|
+
|
|
66
|
+
When you fit quantiles one at a time (as sklearn and statsmodels do), nothing prevents the 90th percentile prediction from falling *below* the 10th. On real-world data with heavy tails, noise, or many quantile levels, **this happens frequently**:
|
|
67
|
+
|
|
68
|
+
| n | features | quantiles | Crossing rate (independent) | Crossing rate (this package) |
|
|
69
|
+
|---:|---:|---:|---:|---:|
|
|
70
|
+
| 500 | 10 | 13 | **30.0%** | **0%** |
|
|
71
|
+
| 1,000 | 10 | 13 | **16.5%** | **0%** |
|
|
72
|
+
| 2,000 | 20 | 13 | **11.0%** | **0%** |
|
|
73
|
+
| 2,000 | 20 | 7 | **4.5%** | **0%** |
|
|
74
|
+
|
|
75
|
+
This package eliminates crossings by construction. The joint formulation also acts as beneficial regularization — achieving **equal or better pinball loss** than independent fitting.
|
|
76
|
+
|
|
77
|
+
Full benchmark methodology and results: [Benchmarks](https://joshvern.github.io/quantile_guard/benchmarks/)
|
|
78
|
+
|
|
79
|
+
## What You Get
|
|
80
|
+
|
|
81
|
+
This is a **toolkit**, not a single estimator. It covers the workflow from raw quantile regression through calibrated prediction intervals:
|
|
82
|
+
|
|
83
|
+
| Workflow | What it does |
|
|
84
|
+
|----------|-------------|
|
|
85
|
+
| **Joint Quantile Regression** | Fit multiple quantiles in one call with non-crossing guarantees |
|
|
86
|
+
| **Conformalized Quantile Regression** | Calibrate intervals for finite-sample coverage guarantees |
|
|
87
|
+
| **Censored Quantile Regression** | Handle right- or left-censored (survival) data |
|
|
88
|
+
| **Evaluation & Metrics** | Pinball loss, coverage, interval score, crossing diagnostics |
|
|
89
|
+
| **Calibration Diagnostics** | Coverage by group/bin, nominal vs empirical, sharpness analysis |
|
|
90
|
+
| **Crossing Detection & Repair** | Diagnose and fix crossings from any quantile model |
|
|
91
|
+
|
|
92
|
+
### Feature Comparison
|
|
93
|
+
|
|
94
|
+
| Feature | This package | sklearn | statsmodels |
|
|
95
|
+
|---------|:---:|:---:|:---:|
|
|
96
|
+
| Multiple quantiles (joint fit) | Yes | No | No |
|
|
97
|
+
| Non-crossing guarantee | Yes | No | No |
|
|
98
|
+
| Multi-output regression | Yes | No | No |
|
|
99
|
+
| Analytical / kernel / cluster / bootstrap SEs | Yes | No | Partial |
|
|
100
|
+
| L1 / Elastic Net / SCAD / MCP | Yes | L1 only | No |
|
|
101
|
+
| Conformal calibration (CQR) | Yes | No | No |
|
|
102
|
+
| Calibration diagnostics | Yes | No | No |
|
|
103
|
+
| Evaluation metrics suite | Yes | Partial | No |
|
|
104
|
+
| Crossing detection + fix | Yes | No | No |
|
|
105
|
+
| Censored QR | Yes | No | No |
|
|
106
|
+
| Prediction intervals | Yes | No | No |
|
|
107
|
+
| Pseudo R² | Yes | No | Yes |
|
|
108
|
+
| Formula interface | Yes | No | Yes |
|
|
109
|
+
| Sklearn pipeline compatible | Yes | Yes | No |
|
|
110
|
+
|
|
111
|
+
## Installation
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
pip install quantile-guard
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Optional extras:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
pip install quantile-guard[all] # formula interface + plots
|
|
121
|
+
pip install quantile-guard[plot] # matplotlib only
|
|
122
|
+
pip install quantile-guard[formula] # patsy only
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Quick Start
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
import numpy as np
|
|
129
|
+
from quantile_guard import QuantileRegression
|
|
130
|
+
|
|
131
|
+
X = np.random.default_rng(0).normal(size=(200, 3))
|
|
132
|
+
y = X @ [2.0, -1.5, 0.8] + np.random.default_rng(1).normal(scale=0.5, size=200)
|
|
133
|
+
|
|
134
|
+
# Fit 3 quantiles jointly — guaranteed non-crossing
|
|
135
|
+
model = QuantileRegression(tau=[0.1, 0.5, 0.9], se_method='analytical')
|
|
136
|
+
model.fit(X, y)
|
|
137
|
+
|
|
138
|
+
# Summaries with coefficients, SEs, p-values, and 95% CIs
|
|
139
|
+
print(model.summary()[0.5]['y'])
|
|
140
|
+
|
|
141
|
+
# Prediction intervals (guaranteed monotone: lower < median < upper)
|
|
142
|
+
interval = model.predict_interval(X[:5], coverage=0.80)
|
|
143
|
+
print(interval['y']['lower'], interval['y']['upper'])
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Conformal Calibration
|
|
147
|
+
|
|
148
|
+
Turn raw quantile predictions into intervals with coverage guarantees:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from quantile_guard.conformal import ConformalQuantileRegression
|
|
152
|
+
|
|
153
|
+
base = QuantileRegression(tau=[0.05, 0.5, 0.95], se_method='analytical')
|
|
154
|
+
cqr = ConformalQuantileRegression(base_estimator=base, coverage=0.90)
|
|
155
|
+
cqr.fit(X_train, y_train)
|
|
156
|
+
|
|
157
|
+
intervals = cqr.predict_interval(X_test)
|
|
158
|
+
print(cqr.empirical_coverage(X_test, y_test)) # should be >= 0.90
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Censored Quantile Regression
|
|
162
|
+
|
|
163
|
+
For survival data with right- or left-censoring:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from quantile_guard import CensoredQuantileRegression
|
|
167
|
+
|
|
168
|
+
model = CensoredQuantileRegression(tau=0.5, censoring='right', se_method='analytical')
|
|
169
|
+
model.fit(X, observed_time, event_indicator=delta)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Evaluate Any Quantile Model
|
|
173
|
+
|
|
174
|
+
The metrics and diagnostics modules work with predictions from any source — not just this package:
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from quantile_guard.metrics import quantile_evaluation_report
|
|
178
|
+
from quantile_guard.postprocess import crossing_summary
|
|
179
|
+
|
|
180
|
+
# Evaluate predictions from XGBoost, LightGBM, or any other model
|
|
181
|
+
report = quantile_evaluation_report(y_true, predictions, taus)
|
|
182
|
+
crossings = crossing_summary(predictions, taus)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Regularization
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
QuantileRegression(tau=0.5, regularization='l1', alpha=0.1) # Lasso
|
|
189
|
+
QuantileRegression(tau=0.5, regularization='elasticnet', alpha=0.1, l1_ratio=0.5)
|
|
190
|
+
QuantileRegression(tau=0.5, regularization='scad', alpha=0.3) # Less bias on large coefficients
|
|
191
|
+
QuantileRegression(tau=0.5, regularization='mcp', alpha=0.3)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Inference Options
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
QuantileRegression(tau=0.5, se_method='analytical') # Fast asymptotic SEs
|
|
198
|
+
QuantileRegression(tau=0.5, se_method='kernel') # Heteroscedasticity-robust
|
|
199
|
+
QuantileRegression(tau=0.5, se_method='bootstrap', n_bootstrap=500)
|
|
200
|
+
# Cluster-robust SEs
|
|
201
|
+
model.fit(X, y, clusters=group_labels)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## Benchmarks
|
|
205
|
+
|
|
206
|
+
Tested on heavy-tailed heteroscedastic data (Student-t noise, 10-20 features, up to 13 quantiles):
|
|
207
|
+
|
|
208
|
+
| n | features | quantiles | Crossing (this) | Crossing (sklearn) | Pinball (this) | Pinball (sklearn) |
|
|
209
|
+
|---:|---:|---:|---:|---:|---:|---:|
|
|
210
|
+
| 500 | 10 | 7 | **0%** | 11.0% | **0.5148** | 0.5166 |
|
|
211
|
+
| 500 | 10 | 13 | **0%** | 30.0% | **0.5095** | 0.5240 |
|
|
212
|
+
| 1,000 | 10 | 13 | **0%** | 16.5% | **0.5048** | 0.5071 |
|
|
213
|
+
| 2,000 | 20 | 13 | **0%** | 11.0% | **0.5599** | 0.5611 |
|
|
214
|
+
|
|
215
|
+
The joint formulation also achieves slightly better pinball loss — the non-crossing constraints act as beneficial regularization.
|
|
216
|
+
|
|
217
|
+
**Speed tradeoff:** This package solves a single joint LP with non-crossing constraints, which is slower than fitting each quantile independently. The value is in the guarantee and the richer downstream workflows. For single-quantile fits where speed matters most, sklearn or statsmodels may be more appropriate.
|
|
218
|
+
|
|
219
|
+
Full results: [Benchmarks](https://joshvern.github.io/quantile_guard/benchmarks/) | [Reproduce locally](https://joshvern.github.io/quantile_guard/benchmarks/#reproducing-these-results)
|
|
220
|
+
|
|
221
|
+
## When to Use This Package
|
|
222
|
+
|
|
223
|
+
**Use this when you need:**
|
|
224
|
+
- Multiple quantile predictions that must not cross (production pipelines, interval forecasts)
|
|
225
|
+
- Statistical inference on quantile coefficients (SEs, p-values, confidence intervals)
|
|
226
|
+
- Calibrated prediction intervals (conformal quantile regression)
|
|
227
|
+
- Censored/survival quantile models
|
|
228
|
+
- A complete evaluation workflow for any quantile model's predictions
|
|
229
|
+
|
|
230
|
+
**Use sklearn or statsmodels when:**
|
|
231
|
+
- You only need a single quantile (e.g., median regression)
|
|
232
|
+
- Raw speed matters more than crossing guarantees
|
|
233
|
+
- You don't need inference, calibration, or evaluation tooling
|
|
234
|
+
|
|
235
|
+
## Documentation
|
|
236
|
+
|
|
237
|
+
Full docs: [joshvern.github.io/quantile_guard](https://joshvern.github.io/quantile_guard/)
|
|
238
|
+
|
|
239
|
+
## Implementation
|
|
240
|
+
|
|
241
|
+
Quantile regression is naturally a linear program. This package solves joint multi-quantile LPs with non-crossing constraints using:
|
|
242
|
+
|
|
243
|
+
- **PDLP** — first-order primal-dual solver (default, from Google OR-Tools)
|
|
244
|
+
- **GLOP** — revised simplex (faster on small/medium problems)
|
|
245
|
+
- **HiGHS** — via scipy's sparse LP interface (memory-efficient)
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
QuantileRegression(tau=0.5, solver_backend='GLOP') # simplex
|
|
249
|
+
QuantileRegression(tau=0.5, use_sparse=True) # scipy sparse
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## Dependencies
|
|
253
|
+
|
|
254
|
+
**Required:** numpy, pandas, scipy, scikit-learn, ortools, tqdm, joblib
|
|
255
|
+
|
|
256
|
+
**Optional:** matplotlib (plots), patsy (formulas), statsmodels (benchmarks)
|
|
257
|
+
|
|
258
|
+
## Contributing
|
|
259
|
+
|
|
260
|
+
Contributions welcome! Open an issue or submit a pull request on [GitHub](https://github.com/joshvern/quantile_guard).
|
|
261
|
+
|
|
262
|
+
## License
|
|
263
|
+
|
|
264
|
+
MIT
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
[![PyPI][pypi-badge]][pypi-link]
|
|
2
|
+
[![Python Versions][py-badge]][pypi-link]
|
|
3
|
+
[![CI][ci-badge]][ci-link]
|
|
4
|
+
[![Docs][docs-badge]][docs-link]
|
|
5
|
+
|
|
6
|
+
[pypi-badge]: https://img.shields.io/pypi/v/quantile-guard.svg
|
|
7
|
+
[py-badge]: https://img.shields.io/pypi/pyversions/quantile-guard.svg
|
|
8
|
+
[ci-badge]: https://github.com/joshvern/quantile-guard/actions/workflows/ci.yml/badge.svg
|
|
9
|
+
[docs-badge]: https://github.com/joshvern/quantile-guard/actions/workflows/docs.yml/badge.svg
|
|
10
|
+
|
|
11
|
+
[pypi-link]: https://pypi.org/project/quantile-guard/
|
|
12
|
+
[ci-link]: https://github.com/joshvern/quantile-guard/actions/workflows/ci.yml
|
|
13
|
+
[docs-link]: https://joshvern.github.io/quantile-guard/
|
|
14
|
+
|
|
15
|
+
# quantile-guard
|
|
16
|
+
|
|
17
|
+
**Non-crossing quantile models with built-in inference, calibration, and evaluation.**
|
|
18
|
+
|
|
19
|
+
A quantile modeling toolkit — not just a quantile regressor. Fits multiple quantiles jointly with monotonicity constraints that guarantee predictions never cross. Wraps the result in inference, conformal calibration, evaluation metrics, and crossing diagnostics.
|
|
20
|
+
|
|
21
|
+
Scikit-learn compatible. Validated against sklearn, statsmodels, and R's `quantreg`.
|
|
22
|
+
|
|
23
|
+
## Why Not Just Fit Quantiles Independently?
|
|
24
|
+
|
|
25
|
+
When you fit quantiles one at a time (as sklearn and statsmodels do), nothing prevents the 90th percentile prediction from falling *below* the 10th. On real-world data with heavy tails, noise, or many quantile levels, **this happens frequently**:
|
|
26
|
+
|
|
27
|
+
| n | features | quantiles | Crossing rate (independent) | Crossing rate (this package) |
|
|
28
|
+
|---:|---:|---:|---:|---:|
|
|
29
|
+
| 500 | 10 | 13 | **30.0%** | **0%** |
|
|
30
|
+
| 1,000 | 10 | 13 | **16.5%** | **0%** |
|
|
31
|
+
| 2,000 | 20 | 13 | **11.0%** | **0%** |
|
|
32
|
+
| 2,000 | 20 | 7 | **4.5%** | **0%** |
|
|
33
|
+
|
|
34
|
+
This package eliminates crossings by construction. The joint formulation also acts as beneficial regularization — achieving **equal or better pinball loss** than independent fitting.
|
|
35
|
+
|
|
36
|
+
Full benchmark methodology and results: [Benchmarks](https://joshvern.github.io/quantile_guard/benchmarks/)
|
|
37
|
+
|
|
38
|
+
## What You Get
|
|
39
|
+
|
|
40
|
+
This is a **toolkit**, not a single estimator. It covers the workflow from raw quantile regression through calibrated prediction intervals:
|
|
41
|
+
|
|
42
|
+
| Workflow | What it does |
|
|
43
|
+
|----------|-------------|
|
|
44
|
+
| **Joint Quantile Regression** | Fit multiple quantiles in one call with non-crossing guarantees |
|
|
45
|
+
| **Conformalized Quantile Regression** | Calibrate intervals for finite-sample coverage guarantees |
|
|
46
|
+
| **Censored Quantile Regression** | Handle right- or left-censored (survival) data |
|
|
47
|
+
| **Evaluation & Metrics** | Pinball loss, coverage, interval score, crossing diagnostics |
|
|
48
|
+
| **Calibration Diagnostics** | Coverage by group/bin, nominal vs empirical, sharpness analysis |
|
|
49
|
+
| **Crossing Detection & Repair** | Diagnose and fix crossings from any quantile model |
|
|
50
|
+
|
|
51
|
+
### Feature Comparison
|
|
52
|
+
|
|
53
|
+
| Feature | This package | sklearn | statsmodels |
|
|
54
|
+
|---------|:---:|:---:|:---:|
|
|
55
|
+
| Multiple quantiles (joint fit) | Yes | No | No |
|
|
56
|
+
| Non-crossing guarantee | Yes | No | No |
|
|
57
|
+
| Multi-output regression | Yes | No | No |
|
|
58
|
+
| Analytical / kernel / cluster / bootstrap SEs | Yes | No | Partial |
|
|
59
|
+
| L1 / Elastic Net / SCAD / MCP | Yes | L1 only | No |
|
|
60
|
+
| Conformal calibration (CQR) | Yes | No | No |
|
|
61
|
+
| Calibration diagnostics | Yes | No | No |
|
|
62
|
+
| Evaluation metrics suite | Yes | Partial | No |
|
|
63
|
+
| Crossing detection + fix | Yes | No | No |
|
|
64
|
+
| Censored QR | Yes | No | No |
|
|
65
|
+
| Prediction intervals | Yes | No | No |
|
|
66
|
+
| Pseudo R² | Yes | No | Yes |
|
|
67
|
+
| Formula interface | Yes | No | Yes |
|
|
68
|
+
| Sklearn pipeline compatible | Yes | Yes | No |
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install quantile-guard
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Optional extras:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install quantile-guard[all] # formula interface + plots
|
|
80
|
+
pip install quantile-guard[plot] # matplotlib only
|
|
81
|
+
pip install quantile-guard[formula] # patsy only
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Quick Start
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import numpy as np
|
|
88
|
+
from quantile_guard import QuantileRegression
|
|
89
|
+
|
|
90
|
+
X = np.random.default_rng(0).normal(size=(200, 3))
|
|
91
|
+
y = X @ [2.0, -1.5, 0.8] + np.random.default_rng(1).normal(scale=0.5, size=200)
|
|
92
|
+
|
|
93
|
+
# Fit 3 quantiles jointly — guaranteed non-crossing
|
|
94
|
+
model = QuantileRegression(tau=[0.1, 0.5, 0.9], se_method='analytical')
|
|
95
|
+
model.fit(X, y)
|
|
96
|
+
|
|
97
|
+
# Summaries with coefficients, SEs, p-values, and 95% CIs
|
|
98
|
+
print(model.summary()[0.5]['y'])
|
|
99
|
+
|
|
100
|
+
# Prediction intervals (guaranteed monotone: lower < median < upper)
|
|
101
|
+
interval = model.predict_interval(X[:5], coverage=0.80)
|
|
102
|
+
print(interval['y']['lower'], interval['y']['upper'])
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Conformal Calibration
|
|
106
|
+
|
|
107
|
+
Turn raw quantile predictions into intervals with coverage guarantees:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from quantile_guard.conformal import ConformalQuantileRegression
|
|
111
|
+
|
|
112
|
+
base = QuantileRegression(tau=[0.05, 0.5, 0.95], se_method='analytical')
|
|
113
|
+
cqr = ConformalQuantileRegression(base_estimator=base, coverage=0.90)
|
|
114
|
+
cqr.fit(X_train, y_train)
|
|
115
|
+
|
|
116
|
+
intervals = cqr.predict_interval(X_test)
|
|
117
|
+
print(cqr.empirical_coverage(X_test, y_test)) # should be >= 0.90
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Censored Quantile Regression
|
|
121
|
+
|
|
122
|
+
For survival data with right- or left-censoring:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from quantile_guard import CensoredQuantileRegression
|
|
126
|
+
|
|
127
|
+
model = CensoredQuantileRegression(tau=0.5, censoring='right', se_method='analytical')
|
|
128
|
+
model.fit(X, observed_time, event_indicator=delta)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Evaluate Any Quantile Model
|
|
132
|
+
|
|
133
|
+
The metrics and diagnostics modules work with predictions from any source — not just this package:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from quantile_guard.metrics import quantile_evaluation_report
|
|
137
|
+
from quantile_guard.postprocess import crossing_summary
|
|
138
|
+
|
|
139
|
+
# Evaluate predictions from XGBoost, LightGBM, or any other model
|
|
140
|
+
report = quantile_evaluation_report(y_true, predictions, taus)
|
|
141
|
+
crossings = crossing_summary(predictions, taus)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Regularization
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
QuantileRegression(tau=0.5, regularization='l1', alpha=0.1) # Lasso
|
|
148
|
+
QuantileRegression(tau=0.5, regularization='elasticnet', alpha=0.1, l1_ratio=0.5)
|
|
149
|
+
QuantileRegression(tau=0.5, regularization='scad', alpha=0.3) # Less bias on large coefficients
|
|
150
|
+
QuantileRegression(tau=0.5, regularization='mcp', alpha=0.3)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Inference Options
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
QuantileRegression(tau=0.5, se_method='analytical') # Fast asymptotic SEs
|
|
157
|
+
QuantileRegression(tau=0.5, se_method='kernel') # Heteroscedasticity-robust
|
|
158
|
+
QuantileRegression(tau=0.5, se_method='bootstrap', n_bootstrap=500)
|
|
159
|
+
# Cluster-robust SEs
|
|
160
|
+
model.fit(X, y, clusters=group_labels)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Benchmarks
|
|
164
|
+
|
|
165
|
+
Tested on heavy-tailed heteroscedastic data (Student-t noise, 10-20 features, up to 13 quantiles):
|
|
166
|
+
|
|
167
|
+
| n | features | quantiles | Crossing (this) | Crossing (sklearn) | Pinball (this) | Pinball (sklearn) |
|
|
168
|
+
|---:|---:|---:|---:|---:|---:|---:|
|
|
169
|
+
| 500 | 10 | 7 | **0%** | 11.0% | **0.5148** | 0.5166 |
|
|
170
|
+
| 500 | 10 | 13 | **0%** | 30.0% | **0.5095** | 0.5240 |
|
|
171
|
+
| 1,000 | 10 | 13 | **0%** | 16.5% | **0.5048** | 0.5071 |
|
|
172
|
+
| 2,000 | 20 | 13 | **0%** | 11.0% | **0.5599** | 0.5611 |
|
|
173
|
+
|
|
174
|
+
The joint formulation also achieves slightly better pinball loss — the non-crossing constraints act as beneficial regularization.
|
|
175
|
+
|
|
176
|
+
**Speed tradeoff:** This package solves a single joint LP with non-crossing constraints, which is slower than fitting each quantile independently. The value is in the guarantee and the richer downstream workflows. For single-quantile fits where speed matters most, sklearn or statsmodels may be more appropriate.
|
|
177
|
+
|
|
178
|
+
Full results: [Benchmarks](https://joshvern.github.io/quantile_guard/benchmarks/) | [Reproduce locally](https://joshvern.github.io/quantile_guard/benchmarks/#reproducing-these-results)
|
|
179
|
+
|
|
180
|
+
## When to Use This Package
|
|
181
|
+
|
|
182
|
+
**Use this when you need:**
|
|
183
|
+
- Multiple quantile predictions that must not cross (production pipelines, interval forecasts)
|
|
184
|
+
- Statistical inference on quantile coefficients (SEs, p-values, confidence intervals)
|
|
185
|
+
- Calibrated prediction intervals (conformal quantile regression)
|
|
186
|
+
- Censored/survival quantile models
|
|
187
|
+
- A complete evaluation workflow for any quantile model's predictions
|
|
188
|
+
|
|
189
|
+
**Use sklearn or statsmodels when:**
|
|
190
|
+
- You only need a single quantile (e.g., median regression)
|
|
191
|
+
- Raw speed matters more than crossing guarantees
|
|
192
|
+
- You don't need inference, calibration, or evaluation tooling
|
|
193
|
+
|
|
194
|
+
## Documentation
|
|
195
|
+
|
|
196
|
+
Full docs: [joshvern.github.io/quantile_guard](https://joshvern.github.io/quantile_guard/)
|
|
197
|
+
|
|
198
|
+
## Implementation
|
|
199
|
+
|
|
200
|
+
Quantile regression is naturally a linear program. This package solves joint multi-quantile LPs with non-crossing constraints using:
|
|
201
|
+
|
|
202
|
+
- **PDLP** — first-order primal-dual solver (default, from Google OR-Tools)
|
|
203
|
+
- **GLOP** — revised simplex (faster on small/medium problems)
|
|
204
|
+
- **HiGHS** — via scipy's sparse LP interface (memory-efficient)
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
QuantileRegression(tau=0.5, solver_backend='GLOP') # simplex
|
|
208
|
+
QuantileRegression(tau=0.5, use_sparse=True) # scipy sparse
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Dependencies
|
|
212
|
+
|
|
213
|
+
**Required:** numpy, pandas, scipy, scikit-learn, ortools, tqdm, joblib
|
|
214
|
+
|
|
215
|
+
**Optional:** matplotlib (plots), patsy (formulas), statsmodels (benchmarks)
|
|
216
|
+
|
|
217
|
+
## Contributing
|
|
218
|
+
|
|
219
|
+
Contributions welcome! Open an issue or submit a pull request on [GitHub](https://github.com/joshvern/quantile_guard).
|
|
220
|
+
|
|
221
|
+
## License
|
|
222
|
+
|
|
223
|
+
MIT
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "quantile-guard"
|
|
7
|
+
version = "0.5.0"
|
|
8
|
+
description = "Non-crossing quantile regression toolkit with joint multi-quantile fitting, inference, conformal calibration, and evaluation. Scikit-learn compatible."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [{ name = "Joshua Vernazza" }]
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
|
+
keywords = ["quantile-regression", "non-crossing", "prediction-intervals", "conformal", "calibration", "statistics", "machine-learning", "scikit-learn"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"ortools>=9.0.9047",
|
|
25
|
+
"numpy>=1.18.0",
|
|
26
|
+
"pandas>=1.0.0",
|
|
27
|
+
"scipy>=1.4.0",
|
|
28
|
+
"tqdm>=4.50.0",
|
|
29
|
+
"joblib>=1.0.0",
|
|
30
|
+
"scikit-learn>=0.22.0",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Repository = "https://github.com/joshvern/quantile-guard"
|
|
35
|
+
Issues = "https://github.com/joshvern/quantile-guard/issues"
|
|
36
|
+
Documentation = "https://joshvern.github.io/quantile-guard/"
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
test = [
|
|
40
|
+
"pytest>=7",
|
|
41
|
+
]
|
|
42
|
+
formula = [
|
|
43
|
+
"patsy>=0.5.0",
|
|
44
|
+
]
|
|
45
|
+
plot = [
|
|
46
|
+
"matplotlib>=3.1.0",
|
|
47
|
+
]
|
|
48
|
+
benchmark = [
|
|
49
|
+
"statsmodels>=0.13.0",
|
|
50
|
+
"matplotlib>=3.1.0",
|
|
51
|
+
]
|
|
52
|
+
all = [
|
|
53
|
+
"patsy>=0.5.0",
|
|
54
|
+
"matplotlib>=3.1.0",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[tool.setuptools]
|
|
58
|
+
include-package-data = true
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.packages.find]
|
|
61
|
+
where = ["src"]
|
|
62
|
+
|
|
63
|
+
[tool.pytest.ini_options]
|
|
64
|
+
addopts = "-q --ignore=tests/bench_vs_sklearn.py --ignore=tests/bench_vs_r.py"
|
|
65
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# quantile_guard/__init__.py
|
|
2
|
+
|
|
3
|
+
from .quantile_regression import QuantileRegression, CensoredQuantileRegression
|
|
4
|
+
|
|
5
|
+
__all__ = ['QuantileRegression', 'CensoredQuantileRegression']
|
|
6
|
+
|
|
7
|
+
# Submodules available via:
|
|
8
|
+
# from quantile_guard.metrics import pinball_loss, ...
|
|
9
|
+
# from quantile_guard.postprocess import rearrange_quantiles, ...
|
|
10
|
+
# from quantile_guard.conformal import ConformalQuantileRegression
|
|
11
|
+
# from quantile_guard.calibration import calibration_summary, ...
|