deup 0.1.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deup-0.1.1 → deup-0.3.0}/CHANGELOG.md +32 -0
- {deup-0.1.1 → deup-0.3.0}/PKG-INFO +9 -8
- {deup-0.1.1 → deup-0.3.0}/README.md +8 -7
- deup-0.3.0/docs/api/calibration.md +7 -0
- deup-0.3.0/docs/api/decomposition.md +17 -0
- deup-0.3.0/docs/api/diagnostics.md +23 -0
- deup-0.3.0/docs/api/domains.md +23 -0
- deup-0.3.0/docs/api/estimators.md +7 -0
- deup-0.3.0/docs/api/features.md +13 -0
- deup-0.3.0/docs/calibration.md +89 -0
- deup-0.3.0/docs/decomposition.md +98 -0
- deup-0.3.0/docs/domains.md +57 -0
- deup-0.3.0/docs/features.md +119 -0
- {deup-0.1.1 → deup-0.3.0}/docs/getting-started.md +100 -11
- {deup-0.1.1 → deup-0.3.0}/docs/index.md +14 -0
- deup-0.3.0/docs/javascripts/mathjax.js +16 -0
- deup-0.3.0/docs/reliability.md +96 -0
- deup-0.3.0/docs/theory.md +197 -0
- deup-0.3.0/examples/mapie_interop.py +65 -0
- {deup-0.1.1 → deup-0.3.0}/mkdocs.yml +23 -1
- {deup-0.1.1 → deup-0.3.0}/pyproject.toml +2 -2
- {deup-0.1.1 → deup-0.3.0}/src/deup/__init__.py +3 -3
- deup-0.3.0/src/deup/calibration/__init__.py +21 -0
- deup-0.3.0/src/deup/calibration/conformal.py +215 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/core/__init__.py +22 -0
- deup-0.3.0/src/deup/core/aleatoric.py +165 -0
- deup-0.3.0/src/deup/core/decompose.py +266 -0
- deup-0.3.0/src/deup/core/error_estimator.py +136 -0
- deup-0.3.0/src/deup/core/features/__init__.py +28 -0
- deup-0.3.0/src/deup/core/features/density.py +123 -0
- deup-0.3.0/src/deup/core/features/distance.py +47 -0
- deup-0.3.0/src/deup/core/features/pipeline.py +61 -0
- deup-0.3.0/src/deup/core/features/raw.py +36 -0
- deup-0.3.0/src/deup/core/features/residual.py +62 -0
- deup-0.3.0/src/deup/core/features/seen_bit.py +55 -0
- deup-0.3.0/src/deup/core/features/variance.py +95 -0
- deup-0.3.0/src/deup/diagnostics/__init__.py +43 -0
- deup-0.3.0/src/deup/diagnostics/aggregation.py +212 -0
- deup-0.3.0/src/deup/diagnostics/health.py +195 -0
- deup-0.3.0/src/deup/domains/__init__.py +9 -0
- deup-0.3.0/src/deup/domains/finance.py +256 -0
- deup-0.3.0/src/deup/domains/tabular.py +79 -0
- deup-0.3.0/src/deup/domains/vision.py +167 -0
- deup-0.3.0/src/deup/estimators.py +429 -0
- deup-0.3.0/tests/test_calibration.py +165 -0
- deup-0.3.0/tests/test_decompose.py +223 -0
- deup-0.3.0/tests/test_diagnostics.py +198 -0
- deup-0.3.0/tests/test_domains.py +147 -0
- deup-0.3.0/tests/test_estimators.py +239 -0
- deup-0.3.0/tests/test_features.py +143 -0
- deup-0.1.1/docs/api/estimators.md +0 -3
- deup-0.1.1/src/deup/estimators.py +0 -140
- deup-0.1.1/tests/test_estimators.py +0 -108
- {deup-0.1.1 → deup-0.3.0}/.github/workflows/ci.yml +0 -0
- {deup-0.1.1 → deup-0.3.0}/.github/workflows/docs.yml +0 -0
- {deup-0.1.1 → deup-0.3.0}/.github/workflows/release.yml +0 -0
- {deup-0.1.1 → deup-0.3.0}/.gitignore +0 -0
- {deup-0.1.1 → deup-0.3.0}/.pre-commit-config.yaml +0 -0
- {deup-0.1.1 → deup-0.3.0}/ARCHITECTURE.md +0 -0
- {deup-0.1.1 → deup-0.3.0}/BENCHMARKS.md +0 -0
- {deup-0.1.1 → deup-0.3.0}/CITATION.cff +0 -0
- {deup-0.1.1 → deup-0.3.0}/LICENSE +0 -0
- {deup-0.1.1 → deup-0.3.0}/RELEASING.md +0 -0
- {deup-0.1.1 → deup-0.3.0}/benchmarks/__init__.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/benchmarks/results/regression_benchmark.json +0 -0
- {deup-0.1.1 → deup-0.3.0}/benchmarks/run_regression_benchmark.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/docs/api/core.md +0 -0
- {deup-0.1.1 → deup-0.3.0}/docs/api/splitters.md +0 -0
- {deup-0.1.1 → deup-0.3.0}/docs/benchmarks.md +0 -0
- {deup-0.1.1 → deup-0.3.0}/docs/losses.md +0 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/core/grouping.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/core/losses.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/core/oof.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/core/protocols.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/core/types.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/py.typed +0 -0
- {deup-0.1.1 → deup-0.3.0}/src/deup/splitters.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_benchmark_smoke.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_grouping.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_losses.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_oof.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_protocols.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_smoke.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_splitters.py +0 -0
- {deup-0.1.1 → deup-0.3.0}/tests/test_types.py +0 -0
|
@@ -1,5 +1,37 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [Unreleased]
|
|
4
|
+
|
|
5
|
+
## [0.3.0] — 2026-06-05
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- **Reliability diagnostics** (`deup.diagnostics`): `AggregationReliability` /
|
|
10
|
+
`should_trust_aggregate` (Finding 1) and pluggable `HealthIndex` (Finding 2).
|
|
11
|
+
- **Domain presets** (`deup.domains`):
|
|
12
|
+
- `CrossSectionalDEUP` — finance flagship: `PurgedWalkForward`, rank
|
|
13
|
+
residualization, vol/breadth/regime g-features, `HealthIndex`, multi-horizon
|
|
14
|
+
targets, panel DataFrame API.
|
|
15
|
+
- `TabularDEUP` — KFold + raw X + Mahalanobis density.
|
|
16
|
+
- `VisionDEUP` — embedding → density + variance for OOD classification.
|
|
17
|
+
- Docs: `reliability.md`, `domains.md`, API pages.
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- PyPI release bundles P5–P10 features (feature builders through domain presets).
|
|
22
|
+
|
|
23
|
+
## [0.2.0] — 2026-06-05
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- **`DEUPClassifier`** — classification with log-loss / Brier OOF errors + `predict_proba`
|
|
28
|
+
- **`DEUPRanker`** — cross-sectional ranking; `loss="rank"`, `PurgedWalkForward` default,
|
|
29
|
+
rank-geometry residualization ON by default (Finding 3)
|
|
30
|
+
- **`acquire(pool, k)`** — active-learning hook (top-k by epistemic uncertainty)
|
|
31
|
+
- Refactored **`DEUPRegressor`** onto `ErrorEstimator` + optional `features` /
|
|
32
|
+
`aleatoric` / `decompose`
|
|
33
|
+
- Docs updated for all three estimators and `acquire`
|
|
34
|
+
|
|
3
35
|
## [0.1.1] — 2026-06-04
|
|
4
36
|
|
|
5
37
|
First release published to PyPI.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deup
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Direct Epistemic Uncertainty Prediction (DEUP) for any scikit-learn model, with first-class time-series support.
|
|
5
5
|
Project-URL: Homepage, https://github.com/ursinasanderink/deup
|
|
6
6
|
Project-URL: Repository, https://github.com/ursinasanderink/deup
|
|
@@ -67,12 +67,13 @@ model.fit(X_train, y_train)
|
|
|
67
67
|
pred, unc = model.predict(X_test, return_uncertainty=True)
|
|
68
68
|
```
|
|
69
69
|
|
|
70
|
-
For time-series / cross-sectional
|
|
70
|
+
For time-series / cross-sectional finance panels:
|
|
71
71
|
|
|
72
72
|
```python
|
|
73
|
-
from deup.
|
|
73
|
+
from deup.domains.finance import CrossSectionalDEUP
|
|
74
74
|
|
|
75
|
-
model =
|
|
75
|
+
model = CrossSectionalDEUP(horizon=20).fit(panel_df)
|
|
76
|
+
pred, unc = model.predict(test_df, return_uncertainty=True)
|
|
76
77
|
```
|
|
77
78
|
|
|
78
79
|
## Install
|
|
@@ -80,6 +81,7 @@ model = DEUPRegressor(base_model=my_model, cv=PurgedWalkForward(embargo=5))
|
|
|
80
81
|
```bash
|
|
81
82
|
pip install deup # core (numpy + scikit-learn)
|
|
82
83
|
pip install "deup[gbm]" # + LightGBM error predictor
|
|
84
|
+
pip install "deup[finance]" # + pandas (CrossSectionalDEUP)
|
|
83
85
|
pip install "deup[docs]" # + MkDocs site locally
|
|
84
86
|
```
|
|
85
87
|
|
|
@@ -96,11 +98,10 @@ than ensemble disagreement or a conformal residual baseline — see [BENCHMARKS.
|
|
|
96
98
|
|
|
97
99
|
## Status / roadmap
|
|
98
100
|
|
|
99
|
-
**v0.
|
|
100
|
-
(
|
|
101
|
+
**v0.3 (current):** everything in v0.2 plus aggregation-reliability diagnostics
|
|
102
|
+
(Findings 1–2), domain presets (`CrossSectionalDEUP`, `TabularDEUP`, `VisionDEUP`).
|
|
101
103
|
|
|
102
|
-
**
|
|
103
|
-
density/GP features, aggregation-reliability diagnostics.
|
|
104
|
+
**Next:** thesis parity migration (P11), full benchmark suite with N-sweep (P12).
|
|
104
105
|
|
|
105
106
|
## Citing
|
|
106
107
|
|
|
@@ -26,12 +26,13 @@ model.fit(X_train, y_train)
|
|
|
26
26
|
pred, unc = model.predict(X_test, return_uncertainty=True)
|
|
27
27
|
```
|
|
28
28
|
|
|
29
|
-
For time-series / cross-sectional
|
|
29
|
+
For time-series / cross-sectional finance panels:
|
|
30
30
|
|
|
31
31
|
```python
|
|
32
|
-
from deup.
|
|
32
|
+
from deup.domains.finance import CrossSectionalDEUP
|
|
33
33
|
|
|
34
|
-
model =
|
|
34
|
+
model = CrossSectionalDEUP(horizon=20).fit(panel_df)
|
|
35
|
+
pred, unc = model.predict(test_df, return_uncertainty=True)
|
|
35
36
|
```
|
|
36
37
|
|
|
37
38
|
## Install
|
|
@@ -39,6 +40,7 @@ model = DEUPRegressor(base_model=my_model, cv=PurgedWalkForward(embargo=5))
|
|
|
39
40
|
```bash
|
|
40
41
|
pip install deup # core (numpy + scikit-learn)
|
|
41
42
|
pip install "deup[gbm]" # + LightGBM error predictor
|
|
43
|
+
pip install "deup[finance]" # + pandas (CrossSectionalDEUP)
|
|
42
44
|
pip install "deup[docs]" # + MkDocs site locally
|
|
43
45
|
```
|
|
44
46
|
|
|
@@ -55,11 +57,10 @@ than ensemble disagreement or a conformal residual baseline — see [BENCHMARKS.
|
|
|
55
57
|
|
|
56
58
|
## Status / roadmap
|
|
57
59
|
|
|
58
|
-
**v0.
|
|
59
|
-
(
|
|
60
|
+
**v0.3 (current):** everything in v0.2 plus aggregation-reliability diagnostics
|
|
61
|
+
(Findings 1–2), domain presets (`CrossSectionalDEUP`, `TabularDEUP`, `VisionDEUP`).
|
|
60
62
|
|
|
61
|
-
**
|
|
62
|
-
density/GP features, aggregation-reliability diagnostics.
|
|
63
|
+
**Next:** thesis parity migration (P11), full benchmark suite with N-sweep (P12).
|
|
63
64
|
|
|
64
65
|
## Citing
|
|
65
66
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
::: deup.core.error_estimator.ErrorEstimator
|
|
2
|
+
|
|
3
|
+
::: deup.core.aleatoric.Homoscedastic
|
|
4
|
+
|
|
5
|
+
::: deup.core.aleatoric.Heteroscedastic
|
|
6
|
+
|
|
7
|
+
::: deup.core.aleatoric.Quantile
|
|
8
|
+
|
|
9
|
+
::: deup.core.decompose.decompose_epistemic
|
|
10
|
+
|
|
11
|
+
::: deup.core.decompose.RankResidualizer
|
|
12
|
+
|
|
13
|
+
::: deup.core.decompose.coupling_retention_report
|
|
14
|
+
|
|
15
|
+
::: deup.core.decompose.density_kill_criterion
|
|
16
|
+
|
|
17
|
+
::: deup.core.decompose.partial_correlation
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# API: Diagnostics
|
|
2
|
+
|
|
3
|
+
## Aggregation reliability
|
|
4
|
+
|
|
5
|
+
::: deup.diagnostics.aggregation.AggregationReliability
|
|
6
|
+
|
|
7
|
+
::: deup.diagnostics.aggregation.AggregationVerdict
|
|
8
|
+
|
|
9
|
+
::: deup.diagnostics.aggregation.effective_sample_size
|
|
10
|
+
|
|
11
|
+
::: deup.diagnostics.aggregation.should_trust_aggregate
|
|
12
|
+
|
|
13
|
+
## Composite health index
|
|
14
|
+
|
|
15
|
+
::: deup.diagnostics.health.HealthIndex
|
|
16
|
+
|
|
17
|
+
::: deup.diagnostics.health.HealthReport
|
|
18
|
+
|
|
19
|
+
::: deup.diagnostics.health.realized_efficacy
|
|
20
|
+
|
|
21
|
+
::: deup.diagnostics.health.drift_psi
|
|
22
|
+
|
|
23
|
+
::: deup.diagnostics.health.model_disagreement
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# API: Domain presets
|
|
2
|
+
|
|
3
|
+
## Finance
|
|
4
|
+
|
|
5
|
+
::: deup.domains.finance.CrossSectionalDEUP
|
|
6
|
+
|
|
7
|
+
::: deup.domains.finance.enrich_panel
|
|
8
|
+
|
|
9
|
+
::: deup.domains.finance.FINANCE_G_FEATURES
|
|
10
|
+
|
|
11
|
+
## Tabular
|
|
12
|
+
|
|
13
|
+
::: deup.domains.tabular.TabularDEUP
|
|
14
|
+
|
|
15
|
+
::: deup.domains.tabular.tabular_feature_pipeline
|
|
16
|
+
|
|
17
|
+
## Vision
|
|
18
|
+
|
|
19
|
+
::: deup.domains.vision.VisionDEUP
|
|
20
|
+
|
|
21
|
+
::: deup.domains.vision.EmbeddingUncertaintyFeatures
|
|
22
|
+
|
|
23
|
+
::: deup.domains.vision.IdentityEmbedding
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
::: deup.core.features.FeaturePipeline
|
|
2
|
+
|
|
3
|
+
::: deup.core.features.RawFeatures
|
|
4
|
+
|
|
5
|
+
::: deup.core.features.DensityFeature
|
|
6
|
+
|
|
7
|
+
::: deup.core.features.VarianceFeature
|
|
8
|
+
|
|
9
|
+
::: deup.core.features.DistanceToTrain
|
|
10
|
+
|
|
11
|
+
::: deup.core.features.SeenBit
|
|
12
|
+
|
|
13
|
+
::: deup.core.features.ResidualMagnitude
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Conformal calibration
|
|
2
|
+
|
|
3
|
+
DEUP's `predict_epistemic` returns an *uncalibrated* score: higher means "less
|
|
4
|
+
trustworthy", but not a probability. **Split-conformal calibration** turns it into
|
|
5
|
+
prediction intervals with finite-sample, distribution-free marginal coverage
|
|
6
|
+
$P(y \in [\hat{y}^-, \hat{y}^+]) \ge 1 - \alpha$ — using the DEUP signal as the
|
|
7
|
+
interval's *width*.
|
|
8
|
+
|
|
9
|
+
## How it works
|
|
10
|
+
|
|
11
|
+
On a **held-out** calibration set, compute normalized residuals
|
|
12
|
+
$r_i = |y_i - f(x_i)| / g(x_i)$ and take their $(1-\alpha)$ empirical quantile $q$.
|
|
13
|
+
The interval at a new point is
|
|
14
|
+
|
|
15
|
+
$$
|
|
16
|
+
[\,f(x) - q\,g(x),\;\; f(x) + q\,g(x)\,].
|
|
17
|
+
$$
|
|
18
|
+
|
|
19
|
+
Intervals are **narrow where $g$ is small** (confident) and wide where $g$ is large —
|
|
20
|
+
locally adaptive coverage, unlike a constant-width baseline.
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from deup import DEUPRegressor
|
|
26
|
+
|
|
27
|
+
model = DEUPRegressor(base_model=my_model).fit(X_train, y_train)
|
|
28
|
+
|
|
29
|
+
# calibrate on a separate held-out split (NOT the training data)
|
|
30
|
+
model.calibrate(X_cal, y_cal, method="normalized", alpha=0.1)
|
|
31
|
+
|
|
32
|
+
interval = model.predict_interval(X_test)
|
|
33
|
+
interval.lower, interval.upper, interval.width
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
!!! warning "Use held-out data"
|
|
37
|
+
Coverage guarantees require the calibration set to be unseen by both the base model
|
|
38
|
+
$f$ and the error model $g$. Don't calibrate on training rows.
|
|
39
|
+
|
|
40
|
+
## Methods
|
|
41
|
+
|
|
42
|
+
| `method` | Score | Use when |
|
|
43
|
+
|---|---|---|
|
|
44
|
+
| `normalized` (default) | $\lvert y-f(x)\rvert / g(x)$ | locally adaptive intervals |
|
|
45
|
+
| `mondrian` | per-group quantile | group/regime-conditional coverage |
|
|
46
|
+
| `cqr` | conformalized quantile regression | you already have quantile models |
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
# Mondrian: group-conditional coverage (e.g. per regime)
|
|
50
|
+
model.calibrate(X_cal, y_cal, method="mondrian", alpha=0.1, groups=regime_cal)
|
|
51
|
+
interval = model.predict_interval(X_test, groups=regime_test)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The standalone `UncertaintyCalibrator` works with raw arrays (any base model):
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
from deup.calibration import UncertaintyCalibrator
|
|
58
|
+
|
|
59
|
+
cal = UncertaintyCalibrator(method="normalized", alpha=0.1)
|
|
60
|
+
cal.fit(y_cal, y_pred_cal, uncertainty_cal)
|
|
61
|
+
interval = cal.predict_interval(y_pred_test, uncertainty_test)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## MAPIE interop
|
|
65
|
+
|
|
66
|
+
`deup` is **complementary** to [MAPIE](https://mapie.readthedocs.io/): MAPIE supplies
|
|
67
|
+
mature conformal machinery, DEUP supplies a high-quality per-point scale $g(x)$. Expose
|
|
68
|
+
the DEUP scale as a normalizer:
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from deup.calibration import deup_normalizer
|
|
72
|
+
|
|
73
|
+
normalizer = deup_normalizer(model) # .predict(X) == model.predict_epistemic(X)
|
|
74
|
+
scale = normalizer.predict(X_cal) # feed into MAPIE as a residual scale
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
See [`examples/mapie_interop.py`](https://github.com/ursinasanderink/deup/blob/main/examples/mapie_interop.py)
|
|
78
|
+
for a runnable script.
|
|
79
|
+
|
|
80
|
+
## Coverage guarantee
|
|
81
|
+
|
|
82
|
+
Split conformal gives the finite-sample bound (Lei et al., 2018)
|
|
83
|
+
|
|
84
|
+
$$
|
|
85
|
+
1 - \alpha \;\le\; P(y \in \hat{C}(x)) \;\le\; 1 - \alpha + \tfrac{1}{n_{\text{cal}}+1},
|
|
86
|
+
$$
|
|
87
|
+
|
|
88
|
+
so intervals may *slightly over-cover*; this is correct, not a bug. `deup`'s test suite
|
|
89
|
+
checks empirical coverage within tolerance on i.i.d. and purged time-split fixtures.
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Decomposition & rank residualization
|
|
2
|
+
|
|
3
|
+
This page covers the v0.2 components that turn the raw error estimate $g(x)$ into a
|
|
4
|
+
reported epistemic signal: the error estimator, aleatoric estimators, the
|
|
5
|
+
$\hat{e} = \max(0, g - a)$ decomposition, and cross-sectional rank-geometry
|
|
6
|
+
residualization. See [Theory](theory.md) for the underlying math.
|
|
7
|
+
|
|
8
|
+
## ErrorEstimator
|
|
9
|
+
|
|
10
|
+
`ErrorEstimator` is the reusable DEUP error model $g$ — feature pipeline +
|
|
11
|
+
target transform + non-negativity, fit on out-of-fold errors.
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from deup.core import ErrorEstimator
|
|
15
|
+
from deup.core.features import DensityFeature, FeaturePipeline, RawFeatures
|
|
16
|
+
from deup.core.oof import OOFErrorCollector
|
|
17
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
18
|
+
from sklearn.model_selection import KFold
|
|
19
|
+
|
|
20
|
+
oof = OOFErrorCollector(
|
|
21
|
+
RandomForestRegressor(), cv=KFold(5), loss="squared"
|
|
22
|
+
).fit_collect(X, y)
|
|
23
|
+
|
|
24
|
+
g = ErrorEstimator(
|
|
25
|
+
features=FeaturePipeline([("raw", RawFeatures()), ("density", DensityFeature())]),
|
|
26
|
+
target_transform="log",
|
|
27
|
+
).fit(X[oof.indices], oof.errors)
|
|
28
|
+
|
|
29
|
+
error_estimate = g.predict(X_new) # >= 0
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Aleatoric estimators $a(x)$
|
|
33
|
+
|
|
34
|
+
Model-agnostic estimates of the irreducible noise floor $A(x) = \mathrm{Var}(Y\mid X=x)$
|
|
35
|
+
(variance scale, matching a squared-error target).
|
|
36
|
+
|
|
37
|
+
| Estimator | $a(x)$ | When |
|
|
38
|
+
|---|---|---|
|
|
39
|
+
| `Homoscedastic` | constant $\sigma^2$ | noise ~ constant across $\mathcal{X}$ |
|
|
40
|
+
| `Heteroscedastic` | local k-NN label variance | input-dependent noise |
|
|
41
|
+
| `Quantile` | $((q_{hi}-q_{lo})/z)^2$ from quantile regression | skewed / tail noise |
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from deup.core import Heteroscedastic
|
|
45
|
+
|
|
46
|
+
a = Heteroscedastic(k=20).fit(X, y).predict(X_new)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Decomposition
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from deup.core import decompose_epistemic
|
|
53
|
+
|
|
54
|
+
e_hat = decompose_epistemic(error_estimate, a) # max(0, g - a)
|
|
55
|
+
# a=None -> conservative proxy e_hat = g (the v0.1 default)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
$\hat{e}$ is always non-negative.
|
|
59
|
+
|
|
60
|
+
## Rank-geometry residualization (Finding 3)
|
|
61
|
+
|
|
62
|
+
For cross-sectional rankers, $g$ and the loss target can be partly **mechanical rank
|
|
63
|
+
geometry** rather than genuine error. `RankResidualizer` fits an isotonic map from the
|
|
64
|
+
within-group rank of $|score|$ to the signal and subtracts it, leaving the part *not*
|
|
65
|
+
explained by rank geometry.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from deup.core import RankResidualizer, coupling_retention_report
|
|
69
|
+
|
|
70
|
+
# decouple g from rank geometry, per date
|
|
71
|
+
res = RankResidualizer().fit(g_values, abs_score, groups=dates)
|
|
72
|
+
g_decoupled = res.transform(g_values, abs_score, groups=dates)
|
|
73
|
+
|
|
74
|
+
# diagnostics: coupling before/after + loss-association retention
|
|
75
|
+
report = coupling_retention_report(g_values, score, loss, groups=dates)
|
|
76
|
+
print(report.coupling_before, report.coupling_after, report.retention)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
!!! note "Thesis finding"
|
|
80
|
+
Residualization decoupled the signal (per-date $\rho(\hat{e}, |score|)$:
|
|
81
|
+
$0.616 \to 0.317$) while **retaining ~92.5%** of the loss association. This is
|
|
82
|
+
**off by default** and **on in `DEUPRanker`** (P7).
|
|
83
|
+
|
|
84
|
+
## Density kill criterion (Finding 3 corollary)
|
|
85
|
+
|
|
86
|
+
Density features can be an **informative null** in homogeneous universes. The kill
|
|
87
|
+
criterion drops them when their gain importance is negligible **and** they barely move
|
|
88
|
+
the loss partial-correlation.
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from deup.core import density_kill_criterion
|
|
92
|
+
|
|
93
|
+
decision = density_kill_criterion(gain_importance=1e-5, delta_partial_corr=0.001)
|
|
94
|
+
print(decision.keep, decision.reason) # False, "killed: ..."
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Use `partial_correlation(a, b, control)` to compute the $\Delta$ partial-correlation
|
|
98
|
+
with vs without the density feature.
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Domain presets
|
|
2
|
+
|
|
3
|
+
The core library is domain-agnostic; these modules are **thin presets** that wire the
|
|
4
|
+
right splitter, features, and diagnostics for common workflows. They do not duplicate
|
|
5
|
+
OOF collection or error-estimator logic — see ``ARCHITECTURE.md``.
|
|
6
|
+
|
|
7
|
+
## Cross-sectional finance (flagship)
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from deup.domains.finance import CrossSectionalDEUP
|
|
12
|
+
|
|
13
|
+
# long-format panel: one row per (date, asset)
|
|
14
|
+
panel = pd.read_parquet("signals.parquet") # columns: date, score, vol_20d, ...
|
|
15
|
+
|
|
16
|
+
model = CrossSectionalDEUP(horizon=20, cv=5, embargo=1).fit(panel)
|
|
17
|
+
model.calibrate(cal_panel, alpha=0.1)
|
|
18
|
+
|
|
19
|
+
pred, unc = model.predict(test_panel, return_uncertainty=True)
|
|
20
|
+
health = model.health_report(test_panel) # per-date context gating (Finding 2)
|
|
21
|
+
health.gate # bool per date: trust / trade?
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Defaults wired in:
|
|
25
|
+
|
|
26
|
+
| Setting | Value |
|
|
27
|
+
|---|---|
|
|
28
|
+
| Estimator | :class:`~deup.estimators.DEUPRanker` |
|
|
29
|
+
| CV | :class:`~deup.splitters.PurgedWalkForward` + embargo |
|
|
30
|
+
| Rank geometry | residualization **ON** (Finding 3) |
|
|
31
|
+
| g-features | vol / breadth / regime preset columns when present |
|
|
32
|
+
| Context health | :class:`~deup.diagnostics.HealthIndex` |
|
|
33
|
+
|
|
34
|
+
Requires ``pip install "deup[finance]"`` (pandas).
|
|
35
|
+
|
|
36
|
+
## Generic tabular
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from deup.domains.tabular import TabularDEUP
|
|
40
|
+
|
|
41
|
+
model = TabularDEUP(task="regression", cv=5).fit(X, y)
|
|
42
|
+
unc = model.predict_epistemic(X_test)
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Wires ``KFold`` + raw ``X`` + Mahalanobis density features for ``g``.
|
|
46
|
+
|
|
47
|
+
## Vision / OOD classification
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from deup.domains.vision import VisionDEUP
|
|
51
|
+
|
|
52
|
+
model = VisionDEUP(cv=5).fit(images, labels) # tensors OK — auto-flattened
|
|
53
|
+
unc = model.predict_epistemic(images)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Wires embedding → density + variance features for ``g`` (CIFAR-style high-N path).
|
|
57
|
+
Pass a custom ``embedding=`` transformer or callable for CNN embeddings.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Feature builders for $g(x)$
|
|
2
|
+
|
|
3
|
+
The error predictor $g$ in DEUP can use **stationarizing features**
|
|
4
|
+
$\phi_{z^N}(x)$ beyond raw inputs (Lahlou *et al.*, 2023, Sec. 3.2). Each builder
|
|
5
|
+
is a scikit-learn `TransformerMixin` that **fits on training data only** — the same
|
|
6
|
+
leakage discipline as `OOFErrorCollector` (Finding 4).
|
|
7
|
+
|
|
8
|
+
See [Theory](theory.md) for the mathematical definitions.
|
|
9
|
+
|
|
10
|
+
## Quick example
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
import numpy as np
|
|
14
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
15
|
+
|
|
16
|
+
from deup.core.features import (
|
|
17
|
+
DensityFeature,
|
|
18
|
+
DistanceToTrain,
|
|
19
|
+
FeaturePipeline,
|
|
20
|
+
RawFeatures,
|
|
21
|
+
SeenBit,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
pipe = FeaturePipeline([
|
|
25
|
+
("raw", RawFeatures()),
|
|
26
|
+
("density", DensityFeature(method="mahalanobis")),
|
|
27
|
+
("dist", DistanceToTrain(k=5)),
|
|
28
|
+
("seen", SeenBit(atol=1e-8)),
|
|
29
|
+
])
|
|
30
|
+
|
|
31
|
+
X_train = np.random.default_rng(0).normal(size=(500, 8))
|
|
32
|
+
X_test = np.random.default_rng(1).normal(size=(50, 8))
|
|
33
|
+
|
|
34
|
+
phi_train = pipe.fit_transform(X_train)
|
|
35
|
+
phi_test = pipe.transform(X_test)
|
|
36
|
+
print(phi_train.shape, phi_test.shape) # (500, 8+1+1+1), (50, ...)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Builders
|
|
40
|
+
|
|
41
|
+
| Class | Output | Methods / notes |
|
|
42
|
+
|---|---|---|
|
|
43
|
+
| `RawFeatures` | $x$ | passthrough |
|
|
44
|
+
| `DensityFeature` | $\log \hat{q}(x)$ column | `mahalanobis`, `knn`, `kde`; `flow` requires `[torch]` |
|
|
45
|
+
| `VarianceFeature` | $\log \hat{V}(x)$ column | `ensemble` (bootstrap); `gp` requires `[torch]` |
|
|
46
|
+
| `DistanceToTrain` | $k$-th NN distance | default `k=5` |
|
|
47
|
+
| `SeenBit` | $s \in \{0,1\}$ | exact / `atol` duplicate detection |
|
|
48
|
+
| `ResidualMagnitude` | kNN-smoothed $\|y-f(x)\|$ | needs `estimator` + `y` at `fit` |
|
|
49
|
+
|
|
50
|
+
### DensityFeature
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
# Diagonal Gaussian — matches thesis GaussianDensity.log_prob (Lee et al. 2018)
|
|
54
|
+
DensityFeature(method="mahalanobis")
|
|
55
|
+
|
|
56
|
+
# k-NN distance proxy: log q ≈ -log(d_k + ε)
|
|
57
|
+
DensityFeature(method="knn", k=5)
|
|
58
|
+
|
|
59
|
+
# sklearn KernelDensity
|
|
60
|
+
DensityFeature(method="kde", bandwidth=1.0)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
!!! warning "Finding 3"
|
|
64
|
+
Density can be **informative null** in homogeneous tabular panels. Ablate with
|
|
65
|
+
`FeaturePipeline` column importances or drop if $\Delta\rho < 0.005$.
|
|
66
|
+
|
|
67
|
+
### VarianceFeature (ensemble)
|
|
68
|
+
|
|
69
|
+
Fits `n_estimators` bootstrap replicas of a base model and returns
|
|
70
|
+
$\log(\mathrm{Var}_j f_j(x) + \varepsilon)$.
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
VarianceFeature(
|
|
74
|
+
method="ensemble",
|
|
75
|
+
estimator=RandomForestRegressor(n_estimators=50, random_state=0),
|
|
76
|
+
n_estimators=10,
|
|
77
|
+
)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### ResidualMagnitude
|
|
81
|
+
|
|
82
|
+
At `fit(X, y)` stores training residuals $|y - f(x)|$. At `transform(X)` returns
|
|
83
|
+
the mean residual magnitude among $k$ nearest training neighbors — a local error prior
|
|
84
|
+
when $y$ is unavailable at inference.
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
ResidualMagnitude(
|
|
88
|
+
estimator=RandomForestRegressor(),
|
|
89
|
+
k=5,
|
|
90
|
+
).fit(X_train, y_train)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## FeaturePipeline
|
|
94
|
+
|
|
95
|
+
`FeaturePipeline` horizontally stacks named builders (FeatureUnion-style). Names appear
|
|
96
|
+
in `get_feature_names_out()`.
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from deup.core.features import FeaturePipeline, VarianceFeature, SeenBit
|
|
100
|
+
|
|
101
|
+
pipe = FeaturePipeline([
|
|
102
|
+
("var", VarianceFeature(method="ensemble")),
|
|
103
|
+
("seen", SeenBit()),
|
|
104
|
+
])
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Torch-dependent methods
|
|
108
|
+
|
|
109
|
+
`DensityFeature(method="flow")` and `VarianceFeature(method="gp")` require
|
|
110
|
+
`pip install "deup[torch]"`. Without torch, construction raises `ImportError` with an
|
|
111
|
+
install hint; the module still imports cleanly on a torch-free install.
|
|
112
|
+
|
|
113
|
+
## v0.1 vs v0.2
|
|
114
|
+
|
|
115
|
+
**v0.1 (this release):** feature builders + pipeline are available as primitives.
|
|
116
|
+
`DEUPRegressor` still trains $g$ on raw $X$ by default.
|
|
117
|
+
|
|
118
|
+
**v0.2 (P6):** `ErrorEstimator` wires `FeaturePipeline` into the DEUP training loop
|
|
119
|
+
with target transforms and non-negativity clipping.
|