deup 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deup-0.1.1/.github/workflows/ci.yml +36 -0
- deup-0.1.1/.github/workflows/docs.yml +41 -0
- deup-0.1.1/.github/workflows/release.yml +36 -0
- deup-0.1.1/.gitignore +218 -0
- deup-0.1.1/.pre-commit-config.yaml +13 -0
- deup-0.1.1/ARCHITECTURE.md +72 -0
- deup-0.1.1/BENCHMARKS.md +54 -0
- deup-0.1.1/CHANGELOG.md +41 -0
- deup-0.1.1/CITATION.cff +42 -0
- deup-0.1.1/LICENSE +201 -0
- deup-0.1.1/PKG-INFO +112 -0
- deup-0.1.1/README.md +71 -0
- deup-0.1.1/RELEASING.md +38 -0
- deup-0.1.1/benchmarks/__init__.py +0 -0
- deup-0.1.1/benchmarks/results/regression_benchmark.json +40 -0
- deup-0.1.1/benchmarks/run_regression_benchmark.py +194 -0
- deup-0.1.1/docs/api/core.md +15 -0
- deup-0.1.1/docs/api/estimators.md +3 -0
- deup-0.1.1/docs/api/splitters.md +5 -0
- deup-0.1.1/docs/benchmarks.md +52 -0
- deup-0.1.1/docs/getting-started.md +136 -0
- deup-0.1.1/docs/index.md +39 -0
- deup-0.1.1/docs/losses.md +53 -0
- deup-0.1.1/mkdocs.yml +49 -0
- deup-0.1.1/pyproject.toml +83 -0
- deup-0.1.1/src/deup/__init__.py +19 -0
- deup-0.1.1/src/deup/core/__init__.py +32 -0
- deup-0.1.1/src/deup/core/grouping.py +104 -0
- deup-0.1.1/src/deup/core/losses.py +213 -0
- deup-0.1.1/src/deup/core/oof.py +183 -0
- deup-0.1.1/src/deup/core/protocols.py +41 -0
- deup-0.1.1/src/deup/core/types.py +124 -0
- deup-0.1.1/src/deup/estimators.py +140 -0
- deup-0.1.1/src/deup/py.typed +0 -0
- deup-0.1.1/src/deup/splitters.py +117 -0
- deup-0.1.1/tests/test_benchmark_smoke.py +29 -0
- deup-0.1.1/tests/test_estimators.py +108 -0
- deup-0.1.1/tests/test_grouping.py +58 -0
- deup-0.1.1/tests/test_losses.py +104 -0
- deup-0.1.1/tests/test_oof.py +157 -0
- deup-0.1.1/tests/test_protocols.py +41 -0
- deup-0.1.1/tests/test_smoke.py +10 -0
- deup-0.1.1/tests/test_splitters.py +56 -0
- deup-0.1.1/tests/test_types.py +81 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
concurrency:
|
|
9
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
10
|
+
cancel-in-progress: true
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
runs-on: ${{ matrix.os }}
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
os: [ubuntu-latest, macos-latest]
|
|
19
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
- uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: ${{ matrix.python-version }}
|
|
25
|
+
- name: Install
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
pip install -e ".[dev,gbm]"
|
|
29
|
+
- name: Lint (ruff)
|
|
30
|
+
run: |
|
|
31
|
+
ruff check .
|
|
32
|
+
ruff format --check .
|
|
33
|
+
- name: Type-check (mypy)
|
|
34
|
+
run: mypy
|
|
35
|
+
- name: Test
|
|
36
|
+
run: pytest --cov=deup --cov-report=term-missing
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: Docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
pages: write
|
|
11
|
+
id-token: write
|
|
12
|
+
|
|
13
|
+
concurrency:
|
|
14
|
+
group: pages
|
|
15
|
+
cancel-in-progress: false
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v4
|
|
22
|
+
- uses: actions/setup-python@v5
|
|
23
|
+
with:
|
|
24
|
+
python-version: "3.12"
|
|
25
|
+
- name: Install
|
|
26
|
+
run: pip install -e ".[docs]"
|
|
27
|
+
- name: Build MkDocs
|
|
28
|
+
run: mkdocs build --strict
|
|
29
|
+
- uses: actions/upload-pages-artifact@v3
|
|
30
|
+
with:
|
|
31
|
+
path: site
|
|
32
|
+
|
|
33
|
+
deploy:
|
|
34
|
+
needs: build
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
environment:
|
|
37
|
+
name: github-pages
|
|
38
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
39
|
+
steps:
|
|
40
|
+
- id: deployment
|
|
41
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
id-token: write
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
pypi:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
- uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.12"
|
|
20
|
+
- name: Install build tools
|
|
21
|
+
run: python -m pip install --upgrade pip build
|
|
22
|
+
- name: Build sdist/wheel
|
|
23
|
+
run: python -m build
|
|
24
|
+
- name: Publish to PyPI (trusted publishing)
|
|
25
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
26
|
+
|
|
27
|
+
github-release:
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
permissions:
|
|
30
|
+
contents: write
|
|
31
|
+
steps:
|
|
32
|
+
- uses: actions/checkout@v4
|
|
33
|
+
- name: Create GitHub Release
|
|
34
|
+
uses: softprops/action-gh-release@v2
|
|
35
|
+
with:
|
|
36
|
+
generate_release_notes: true
|
deup-0.1.1/.gitignore
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
# Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
# uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
# poetry.lock
|
|
109
|
+
# poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
# pdm.lock
|
|
116
|
+
# pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
# pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# Redis
|
|
135
|
+
*.rdb
|
|
136
|
+
*.aof
|
|
137
|
+
*.pid
|
|
138
|
+
|
|
139
|
+
# RabbitMQ
|
|
140
|
+
mnesia/
|
|
141
|
+
rabbitmq/
|
|
142
|
+
rabbitmq-data/
|
|
143
|
+
|
|
144
|
+
# ActiveMQ
|
|
145
|
+
activemq-data/
|
|
146
|
+
|
|
147
|
+
# SageMath parsed files
|
|
148
|
+
*.sage.py
|
|
149
|
+
|
|
150
|
+
# Environments
|
|
151
|
+
.env
|
|
152
|
+
.envrc
|
|
153
|
+
.venv
|
|
154
|
+
env/
|
|
155
|
+
venv/
|
|
156
|
+
ENV/
|
|
157
|
+
env.bak/
|
|
158
|
+
venv.bak/
|
|
159
|
+
|
|
160
|
+
# Spyder project settings
|
|
161
|
+
.spyderproject
|
|
162
|
+
.spyproject
|
|
163
|
+
|
|
164
|
+
# Rope project settings
|
|
165
|
+
.ropeproject
|
|
166
|
+
|
|
167
|
+
# mkdocs documentation
|
|
168
|
+
/site
|
|
169
|
+
|
|
170
|
+
# mypy
|
|
171
|
+
.mypy_cache/
|
|
172
|
+
.dmypy.json
|
|
173
|
+
dmypy.json
|
|
174
|
+
|
|
175
|
+
# Pyre type checker
|
|
176
|
+
.pyre/
|
|
177
|
+
|
|
178
|
+
# pytype static type analyzer
|
|
179
|
+
.pytype/
|
|
180
|
+
|
|
181
|
+
# Cython debug symbols
|
|
182
|
+
cython_debug/
|
|
183
|
+
|
|
184
|
+
# PyCharm
|
|
185
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
186
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
188
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
189
|
+
# .idea/
|
|
190
|
+
|
|
191
|
+
# Abstra
|
|
192
|
+
# Abstra is an AI-powered process automation framework.
|
|
193
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
194
|
+
# Learn more at https://abstra.io/docs
|
|
195
|
+
.abstra/
|
|
196
|
+
|
|
197
|
+
# Visual Studio Code
|
|
198
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
199
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
200
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
201
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
202
|
+
# .vscode/
|
|
203
|
+
# Temporary file for partial code execution
|
|
204
|
+
tempCodeRunnerFile.py
|
|
205
|
+
|
|
206
|
+
# Ruff stuff:
|
|
207
|
+
.ruff_cache/
|
|
208
|
+
|
|
209
|
+
# PyPI configuration file
|
|
210
|
+
.pypirc
|
|
211
|
+
|
|
212
|
+
# Marimo
|
|
213
|
+
marimo/_static/
|
|
214
|
+
marimo/_lsp/
|
|
215
|
+
__marimo__/
|
|
216
|
+
|
|
217
|
+
# Streamlit
|
|
218
|
+
.streamlit/secrets.toml
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.6.9
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
9
|
+
rev: v1.11.2
|
|
10
|
+
hooks:
|
|
11
|
+
- id: mypy
|
|
12
|
+
additional_dependencies: [numpy, scikit-learn]
|
|
13
|
+
files: ^src/
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
This document captures the load-bearing design decisions for `deup`. It is the
|
|
4
|
+
contract that keeps the library general without becoming a god-object, and honest
|
|
5
|
+
about time-series correctness.
|
|
6
|
+
|
|
7
|
+
## 1. DEUP is a meta-algorithm, not a model
|
|
8
|
+
|
|
9
|
+
DEUP wraps *any* predictor: train `f`, collect `f`'s out-of-sample errors, train a
|
|
10
|
+
secondary predictor `g` to estimate those errors, expose `g(x)` as epistemic
|
|
11
|
+
uncertainty (optionally minus an aleatoric estimate `a(x)`). We therefore do **not**
|
|
12
|
+
extend PyTorch or any framework — we orchestrate models behind a small,
|
|
13
|
+
scikit-learn-style protocol (`fit` / `predict` / `predict_proba`). PyTorch is an
|
|
14
|
+
**optional backend** (`deup[torch]`), never the foundation.
|
|
15
|
+
|
|
16
|
+
## 2. The five axes (every use case is a configuration)
|
|
17
|
+
|
|
18
|
+
All supported use cases differ only along five pluggable axes; the core orchestration
|
|
19
|
+
is identical:
|
|
20
|
+
|
|
21
|
+
| Axis | Strategy object | Examples |
|
|
22
|
+
|---|---|---|
|
|
23
|
+
| 1. Task | estimator class | regression, classification, ranking, quantile |
|
|
24
|
+
| 2. Loss / error target | `Loss` | squared, log-loss, pinball, rank-loss, callable |
|
|
25
|
+
| 3. Grouping | `group_by` | i.i.d. rows, panel-by-entity, cross-section-by-date |
|
|
26
|
+
| 4. Out-of-sample scheme | `cv` splitter | KFold, GroupKFold, TimeSeriesSplit, PurgedWalkForward |
|
|
27
|
+
| 5. `g`-features | feature pipeline | raw X, density, variance, distance-to-train |
|
|
28
|
+
|
|
29
|
+
Use-case map:
|
|
30
|
+
|
|
31
|
+
| Use case | task | loss | group | cv | g-features |
|
|
32
|
+
|---|---|---|---|---|---|
|
|
33
|
+
| Cross-sectional ranker | ranking | rank-loss | by-date | PurgedWalkForward | score, vol, regime |
|
|
34
|
+
| Mean-reversion forecast | regression | squared | time | TimeSeriesSplit | residual, vol |
|
|
35
|
+
| Direction / credit | classification | log-loss | time / iid | walk-forward / Stratified | density, margin |
|
|
36
|
+
| Quantile / vol | quantile | pinball | time | walk-forward | realized-vol |
|
|
37
|
+
| OOD / vision | classification | per-sample loss | iid | holdout + seen-bit | embedding density, GP var |
|
|
38
|
+
| Active learning / BO | any | predicted error | iid | KFold | density, distance |
|
|
39
|
+
| Generic tabular | reg / clf | squared / log-loss | iid | KFold | raw X, density |
|
|
40
|
+
|
|
41
|
+
## 3. Layered primitives + thin wrappers
|
|
42
|
+
|
|
43
|
+
Build the primitives, then ship convenience estimators over them:
|
|
44
|
+
|
|
45
|
+
- `OOFErrorCollector(estimator, cv, loss, group_by)` — leakage-correct out-of-fold
|
|
46
|
+
errors (the crux).
|
|
47
|
+
- feature builders + pipeline — what `g` sees.
|
|
48
|
+
- `ErrorEstimator(model, features)` — fits `g`.
|
|
49
|
+
- `UncertaintyCalibrator` — turns relative `g(x)` into calibrated intervals (v0.2+).
|
|
50
|
+
- `DEUPRegressor` / `DEUPClassifier` / `DEUPRanker` — ~20–40 line wrappers composing
|
|
51
|
+
the above, with the ergonomic `predict(X, return_uncertainty=True)` API.
|
|
52
|
+
|
|
53
|
+
## 4. General core, time-series flagship
|
|
54
|
+
|
|
55
|
+
The core is splitter-agnostic and i.i.d.-clean, so the general crowd gets a simple,
|
|
56
|
+
correct API. But leakage-control is **first-class**: `PurgedWalkForward` /
|
|
57
|
+
`EmbargoedKFold` ship in the core with dedicated leakage tests, because correct
|
|
58
|
+
out-of-fold error construction for sequential / cross-sectional data is the
|
|
59
|
+
differentiator versus vision-centric UQ frameworks. Marketing leads with time-series;
|
|
60
|
+
the abstractions stay general.
|
|
61
|
+
|
|
62
|
+
## 5. Non-negotiable: no leakage
|
|
63
|
+
|
|
64
|
+
Every fold-local quantity (the error targets, scalers, density references, aleatoric
|
|
65
|
+
estimates) is fit on training folds only, inside the CV loop. A future-peeking
|
|
66
|
+
splitter must make a designed test fail. This is enforced in code, not assumed.
|
|
67
|
+
|
|
68
|
+
## 6. Attribution
|
|
69
|
+
|
|
70
|
+
DEUP the *method* is Lahlou, Jain, Nekoei, Butoi, Bertin, Rector-Brooks, Korablyov,
|
|
71
|
+
and Bengio (2023, TMLR). This repository is an independent library implementation;
|
|
72
|
+
it credits the method and does not claim it.
|
deup-0.1.1/BENCHMARKS.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Benchmarks
|
|
2
|
+
|
|
3
|
+
Reproducible uncertainty-quality comparisons for `deup`.
|
|
4
|
+
|
|
5
|
+
## Quick run
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install -e ".[dev]"
|
|
9
|
+
python benchmarks/run_regression_benchmark.py
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
Results are written to `benchmarks/results/regression_benchmark.json`.
|
|
13
|
+
|
|
14
|
+
## Regression benchmark (California housing)
|
|
15
|
+
|
|
16
|
+
**Question:** which method best *ranks* test points by realized squared error?
|
|
17
|
+
|
|
18
|
+
**Metric:** Spearman correlation between each method's uncertainty score and
|
|
19
|
+
`(y - ŷ)²` on a held-out test set (n=4,128). Higher is better.
|
|
20
|
+
|
|
21
|
+
| Method | Spearman | Notes |
|
|
22
|
+
|---|---:|---|
|
|
23
|
+
| **DEUP** | **0.510** | `DEUPRegressor` + RF base |
|
|
24
|
+
| Ensemble disagreement | 0.460 | 5 bootstrap RF members, prediction variance |
|
|
25
|
+
| Conformal residual | 0.447 | Cal-set model for `\|residual\|` magnitude |
|
|
26
|
+
|
|
27
|
+
*Last run: local dev checkout, seed=0, commit `P-min-bench`.*
|
|
28
|
+
|
|
29
|
+
DEUP wins on this tabular regression task — the uncertainty score tracks which
|
|
30
|
+
predictions are likely to be wrong better than the two sklearn-only baselines.
|
|
31
|
+
|
|
32
|
+
### N-sweep teaser (context-level aggregation)
|
|
33
|
+
|
|
34
|
+
Synthetic heteroscedastic panels; for each context size N we report Spearman
|
|
35
|
+
between **mean g(x)** per context and **mean realized squared error** per context.
|
|
36
|
+
|
|
37
|
+
| N / context | # contexts | agg Spearman |
|
|
38
|
+
|---:|---:|---:|
|
|
39
|
+
| 10 | 800 | 0.611 |
|
|
40
|
+
| 50 | 160 | 0.577 |
|
|
41
|
+
| 200 | 40 | 0.664 |
|
|
42
|
+
| 1000 | 20 | 0.498 |
|
|
43
|
+
|
|
44
|
+
This is a **teaser**, not the full finance/CIFAR cross-domain study from the thesis.
|
|
45
|
+
At very small numbers of contexts (N=1000 → only 20 contexts) the aggregate
|
|
46
|
+
estimate is noisy. The full `AggregationReliability` diagnostic (v0.2) will formalize
|
|
47
|
+
when aggregated DEUP is trustworthy.
|
|
48
|
+
|
|
49
|
+
## Not yet benchmarked (v0.2+)
|
|
50
|
+
|
|
51
|
+
- MC-Dropout (requires `[torch]`)
|
|
52
|
+
- MAPIE interop
|
|
53
|
+
- Time-series / purged walk-forward on real finance panel
|
|
54
|
+
- CIFAR-10-C OOD reproduction
|
deup-0.1.1/CHANGELOG.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.1.1] — 2026-06-04
|
|
4
|
+
|
|
5
|
+
First release published to PyPI.
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- `OOFErrorCollector` now supports multiclass `predict_proba` targets (previously
|
|
10
|
+
only binary worked; multiclass stored 2-D probabilities and crashed).
|
|
11
|
+
- Guard against rows assigned to multiple test folds (e.g. repeated CV): a warning
|
|
12
|
+
is raised and one error per row is kept, preserving honest OOF targets.
|
|
13
|
+
- Validate `groups` length against `n_rows` and the loss output length.
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
- Research-grade docstrings documenting the "g trained on a slightly smaller f"
|
|
18
|
+
refit assumption (DEUP Algorithm 2) plus a "How it works" docs section.
|
|
19
|
+
|
|
20
|
+
## [0.1.0] — 2026-06-04
|
|
21
|
+
|
|
22
|
+
First public release.
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
|
|
26
|
+
- `DEUPRegressor` — sklearn-compatible wrapper with `predict(..., return_uncertainty=True)`
|
|
27
|
+
- Leakage-correct `OOFErrorCollector` (DEUP Algorithm 2 / K-fold OOF errors)
|
|
28
|
+
- Splitters: `PurgedWalkForward`, re-export `KFold` / `TimeSeriesSplit`
|
|
29
|
+
- Loss registry: `squared`, `absolute`, `logloss`, `brier`, `pinball`, `rank`
|
|
30
|
+
- Target transforms: `log`, `asinh`, `none` for error-predictor training
|
|
31
|
+
- Benchmark: DEUP vs ensemble vs conformal on California housing
|
|
32
|
+
- MkDocs documentation site
|
|
33
|
+
- 54+ unit tests including parity-exact OOF and leakage gate
|
|
34
|
+
|
|
35
|
+
### Notes
|
|
36
|
+
|
|
37
|
+
- Aleatoric decomposition (`ê = max(0, g - a)`), conformal intervals, and
|
|
38
|
+
`DEUPClassifier` / `DEUPRanker` are planned for v0.2.
|
|
39
|
+
|
|
40
|
+
[0.1.1]: https://github.com/ursinasanderink/deup/releases/tag/v0.1.1
|
|
41
|
+
[0.1.0]: https://github.com/ursinasanderink/deup/releases/tag/v0.1.0
|
deup-0.1.1/CITATION.cff
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use this software, please cite both the software and the original DEUP paper."
|
|
3
|
+
title: "deup: Direct Epistemic Uncertainty Prediction"
|
|
4
|
+
abstract: >-
|
|
5
|
+
A scikit-learn-compatible implementation of Direct Epistemic Uncertainty
|
|
6
|
+
Prediction (DEUP) with first-class, leakage-correct support for time-series and
|
|
7
|
+
cross-sectional workflows.
|
|
8
|
+
type: software
|
|
9
|
+
authors:
|
|
10
|
+
- family-names: Sanderink
|
|
11
|
+
given-names: Ursina
|
|
12
|
+
repository-code: "https://github.com/ursinasanderink/deup"
|
|
13
|
+
license: Apache-2.0
|
|
14
|
+
keywords:
|
|
15
|
+
- epistemic uncertainty
|
|
16
|
+
- DEUP
|
|
17
|
+
- uncertainty quantification
|
|
18
|
+
- scikit-learn
|
|
19
|
+
- time-series
|
|
20
|
+
references:
|
|
21
|
+
- type: article
|
|
22
|
+
title: "DEUP: Direct Epistemic Uncertainty Prediction"
|
|
23
|
+
authors:
|
|
24
|
+
- family-names: Lahlou
|
|
25
|
+
given-names: Salem
|
|
26
|
+
- family-names: Jain
|
|
27
|
+
given-names: Moksh
|
|
28
|
+
- family-names: Nekoei
|
|
29
|
+
given-names: Hadi
|
|
30
|
+
- family-names: Butoi
|
|
31
|
+
given-names: Victor Ion
|
|
32
|
+
- family-names: Bertin
|
|
33
|
+
given-names: Paul
|
|
34
|
+
- family-names: Rector-Brooks
|
|
35
|
+
given-names: Jarrid
|
|
36
|
+
- family-names: Korablyov
|
|
37
|
+
given-names: Maksym
|
|
38
|
+
- family-names: Bengio
|
|
39
|
+
given-names: Yoshua
|
|
40
|
+
journal: "Transactions on Machine Learning Research"
|
|
41
|
+
year: 2023
|
|
42
|
+
url: "https://openreview.net/forum?id=eGLdVRvvfQ"
|