deup 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. deup-0.1.1/.github/workflows/ci.yml +36 -0
  2. deup-0.1.1/.github/workflows/docs.yml +41 -0
  3. deup-0.1.1/.github/workflows/release.yml +36 -0
  4. deup-0.1.1/.gitignore +218 -0
  5. deup-0.1.1/.pre-commit-config.yaml +13 -0
  6. deup-0.1.1/ARCHITECTURE.md +72 -0
  7. deup-0.1.1/BENCHMARKS.md +54 -0
  8. deup-0.1.1/CHANGELOG.md +41 -0
  9. deup-0.1.1/CITATION.cff +42 -0
  10. deup-0.1.1/LICENSE +201 -0
  11. deup-0.1.1/PKG-INFO +112 -0
  12. deup-0.1.1/README.md +71 -0
  13. deup-0.1.1/RELEASING.md +38 -0
  14. deup-0.1.1/benchmarks/__init__.py +0 -0
  15. deup-0.1.1/benchmarks/results/regression_benchmark.json +40 -0
  16. deup-0.1.1/benchmarks/run_regression_benchmark.py +194 -0
  17. deup-0.1.1/docs/api/core.md +15 -0
  18. deup-0.1.1/docs/api/estimators.md +3 -0
  19. deup-0.1.1/docs/api/splitters.md +5 -0
  20. deup-0.1.1/docs/benchmarks.md +52 -0
  21. deup-0.1.1/docs/getting-started.md +136 -0
  22. deup-0.1.1/docs/index.md +39 -0
  23. deup-0.1.1/docs/losses.md +53 -0
  24. deup-0.1.1/mkdocs.yml +49 -0
  25. deup-0.1.1/pyproject.toml +83 -0
  26. deup-0.1.1/src/deup/__init__.py +19 -0
  27. deup-0.1.1/src/deup/core/__init__.py +32 -0
  28. deup-0.1.1/src/deup/core/grouping.py +104 -0
  29. deup-0.1.1/src/deup/core/losses.py +213 -0
  30. deup-0.1.1/src/deup/core/oof.py +183 -0
  31. deup-0.1.1/src/deup/core/protocols.py +41 -0
  32. deup-0.1.1/src/deup/core/types.py +124 -0
  33. deup-0.1.1/src/deup/estimators.py +140 -0
  34. deup-0.1.1/src/deup/py.typed +0 -0
  35. deup-0.1.1/src/deup/splitters.py +117 -0
  36. deup-0.1.1/tests/test_benchmark_smoke.py +29 -0
  37. deup-0.1.1/tests/test_estimators.py +108 -0
  38. deup-0.1.1/tests/test_grouping.py +58 -0
  39. deup-0.1.1/tests/test_losses.py +104 -0
  40. deup-0.1.1/tests/test_oof.py +157 -0
  41. deup-0.1.1/tests/test_protocols.py +41 -0
  42. deup-0.1.1/tests/test_smoke.py +10 -0
  43. deup-0.1.1/tests/test_splitters.py +56 -0
  44. deup-0.1.1/tests/test_types.py +81 -0
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ concurrency:
9
+ group: ${{ github.workflow }}-${{ github.ref }}
10
+ cancel-in-progress: true
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest, macos-latest]
19
+ python-version: ["3.10", "3.11", "3.12"]
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+ - name: Install
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install -e ".[dev,gbm]"
29
+ - name: Lint (ruff)
30
+ run: |
31
+ ruff check .
32
+ ruff format --check .
33
+ - name: Type-check (mypy)
34
+ run: mypy
35
+ - name: Test
36
+ run: pytest --cov=deup --cov-report=term-missing
@@ -0,0 +1,41 @@
1
+ name: Docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+ pages: write
11
+ id-token: write
12
+
13
+ concurrency:
14
+ group: pages
15
+ cancel-in-progress: false
16
+
17
+ jobs:
18
+ build:
19
+ runs-on: ubuntu-latest
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+ - uses: actions/setup-python@v5
23
+ with:
24
+ python-version: "3.12"
25
+ - name: Install
26
+ run: pip install -e ".[docs]"
27
+ - name: Build MkDocs
28
+ run: mkdocs build --strict
29
+ - uses: actions/upload-pages-artifact@v3
30
+ with:
31
+ path: site
32
+
33
+ deploy:
34
+ needs: build
35
+ runs-on: ubuntu-latest
36
+ environment:
37
+ name: github-pages
38
+ url: ${{ steps.deployment.outputs.page_url }}
39
+ steps:
40
+ - id: deployment
41
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,36 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ id-token: write
10
+ contents: read
11
+
12
+ jobs:
13
+ pypi:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+ - name: Install build tools
21
+ run: python -m pip install --upgrade pip build
22
+ - name: Build sdist/wheel
23
+ run: python -m build
24
+ - name: Publish to PyPI (trusted publishing)
25
+ uses: pypa/gh-action-pypi-publish@release/v1
26
+
27
+ github-release:
28
+ runs-on: ubuntu-latest
29
+ permissions:
30
+ contents: write
31
+ steps:
32
+ - uses: actions/checkout@v4
33
+ - name: Create GitHub Release
34
+ uses: softprops/action-gh-release@v2
35
+ with:
36
+ generate_release_notes: true
deup-0.1.1/.gitignore ADDED
@@ -0,0 +1,218 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+ # Temporary file for partial code execution
204
+ tempCodeRunnerFile.py
205
+
206
+ # Ruff stuff:
207
+ .ruff_cache/
208
+
209
+ # PyPI configuration file
210
+ .pypirc
211
+
212
+ # Marimo
213
+ marimo/_static/
214
+ marimo/_lsp/
215
+ __marimo__/
216
+
217
+ # Streamlit
218
+ .streamlit/secrets.toml
@@ -0,0 +1,13 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.6.9
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+ - repo: https://github.com/pre-commit/mirrors-mypy
9
+ rev: v1.11.2
10
+ hooks:
11
+ - id: mypy
12
+ additional_dependencies: [numpy, scikit-learn]
13
+ files: ^src/
@@ -0,0 +1,72 @@
1
+ # Architecture
2
+
3
+ This document captures the load-bearing design decisions for `deup`. It is the
4
+ contract that keeps the library general without becoming a god-object, and honest
5
+ about time-series correctness.
6
+
7
+ ## 1. DEUP is a meta-algorithm, not a model
8
+
9
+ DEUP wraps *any* predictor: train `f`, collect `f`'s out-of-sample errors, train a
10
+ secondary predictor `g` to estimate those errors, expose `g(x)` as epistemic
11
+ uncertainty (optionally minus an aleatoric estimate `a(x)`). We therefore do **not**
12
+ extend PyTorch or any framework — we orchestrate models behind a small,
13
+ scikit-learn-style protocol (`fit` / `predict` / `predict_proba`). PyTorch is an
14
+ **optional backend** (`deup[torch]`), never the foundation.
15
+
16
+ ## 2. The five axes (every use case is a configuration)
17
+
18
+ All supported use cases differ only along five pluggable axes; the core orchestration
19
+ is identical:
20
+
21
+ | Axis | Strategy object | Examples |
22
+ |---|---|---|
23
+ | 1. Task | estimator class | regression, classification, ranking, quantile |
24
+ | 2. Loss / error target | `Loss` | squared, log-loss, pinball, rank-loss, callable |
25
+ | 3. Grouping | `group_by` | i.i.d. rows, panel-by-entity, cross-section-by-date |
26
+ | 4. Out-of-sample scheme | `cv` splitter | KFold, GroupKFold, TimeSeriesSplit, PurgedWalkForward |
27
+ | 5. `g`-features | feature pipeline | raw X, density, variance, distance-to-train |
28
+
29
+ Use-case map:
30
+
31
+ | Use case | task | loss | group | cv | g-features |
32
+ |---|---|---|---|---|---|
33
+ | Cross-sectional ranker | ranking | rank-loss | by-date | PurgedWalkForward | score, vol, regime |
34
+ | Mean-reversion forecast | regression | squared | time | TimeSeriesSplit | residual, vol |
35
+ | Direction / credit | classification | log-loss | time / iid | walk-forward / Stratified | density, margin |
36
+ | Quantile / vol | quantile | pinball | time | walk-forward | realized-vol |
37
+ | OOD / vision | classification | per-sample loss | iid | holdout + seen-bit | embedding density, GP var |
38
+ | Active learning / BO | any | predicted error | iid | KFold | density, distance |
39
+ | Generic tabular | reg / clf | squared / log-loss | iid | KFold | raw X, density |
40
+
41
+ ## 3. Layered primitives + thin wrappers
42
+
43
+ Build the primitives, then ship convenience estimators over them:
44
+
45
+ - `OOFErrorCollector(estimator, cv, loss, group_by)` — leakage-correct out-of-fold
46
+ errors (the crux).
47
+ - feature builders + pipeline — what `g` sees.
48
+ - `ErrorEstimator(model, features)` — fits `g`.
49
+ - `UncertaintyCalibrator` — turns relative `g(x)` into calibrated intervals (v0.2+).
50
+ - `DEUPRegressor` / `DEUPClassifier` / `DEUPRanker` — ~20–40 line wrappers composing
51
+ the above, with the ergonomic `predict(X, return_uncertainty=True)` API.
52
+
53
+ ## 4. General core, time-series flagship
54
+
55
+ The core is splitter-agnostic and i.i.d.-clean, so the general crowd gets a simple,
56
+ correct API. But leakage-control is **first-class**: `PurgedWalkForward` /
57
+ `EmbargoedKFold` ship in the core with dedicated leakage tests, because correct
58
+ out-of-fold error construction for sequential / cross-sectional data is the
59
+ differentiator versus vision-centric UQ frameworks. Marketing leads with time-series;
60
+ the abstractions stay general.
61
+
62
+ ## 5. Non-negotiable: no leakage
63
+
64
+ Every fold-local quantity (the error targets, scalers, density references, aleatoric
65
+ estimates) is fit on training folds only, inside the CV loop. A future-peeking
66
+ splitter must make a designed test fail. This is enforced in code, not assumed.
67
+
68
+ ## 6. Attribution
69
+
70
+ DEUP the *method* is Lahlou, Jain, Nekoei, Butoi, Bertin, Rector-Brooks, Korablyov,
71
+ and Bengio (2023, TMLR). This repository is an independent library implementation;
72
+ it credits the method and does not claim it.
@@ -0,0 +1,54 @@
1
+ # Benchmarks
2
+
3
+ Reproducible uncertainty-quality comparisons for `deup`.
4
+
5
+ ## Quick run
6
+
7
+ ```bash
8
+ pip install -e ".[dev]"
9
+ python benchmarks/run_regression_benchmark.py
10
+ ```
11
+
12
+ Results are written to `benchmarks/results/regression_benchmark.json`.
13
+
14
+ ## Regression benchmark (California housing)
15
+
16
+ **Question:** which method best *ranks* test points by realized squared error?
17
+
18
+ **Metric:** Spearman correlation between each method's uncertainty score and
19
+ `(y - ŷ)²` on a held-out test set (n=4,128). Higher is better.
20
+
21
+ | Method | Spearman | Notes |
22
+ |---|---:|---|
23
+ | **DEUP** | **0.510** | `DEUPRegressor` + RF base |
24
+ | Ensemble disagreement | 0.460 | 5 bootstrap RF members, prediction variance |
25
+ | Conformal residual | 0.447 | Cal-set model for `\|residual\|` magnitude |
26
+
27
+ *Last run: local dev checkout, seed=0, commit `P-min-bench`.*
28
+
29
+ DEUP wins on this tabular regression task — the uncertainty score tracks which
30
+ predictions are likely to be wrong better than the two sklearn-only baselines.
31
+
32
+ ### N-sweep teaser (context-level aggregation)
33
+
34
+ Synthetic heteroscedastic panels; for each context size N we report Spearman
35
+ between **mean g(x)** per context and **mean realized squared error** per context.
36
+
37
+ | N / context | # contexts | agg Spearman |
38
+ |---:|---:|---:|
39
+ | 10 | 800 | 0.611 |
40
+ | 50 | 160 | 0.577 |
41
+ | 200 | 40 | 0.664 |
42
+ | 1000 | 20 | 0.498 |
43
+
44
+ This is a **teaser**, not the full finance/CIFAR cross-domain study from the thesis.
45
+ At very small numbers of contexts (N=1000 → only 20 contexts) the aggregate
46
+ estimate is noisy. The full `AggregationReliability` diagnostic (v0.2) will formalize
47
+ when aggregated DEUP is trustworthy.
48
+
49
+ ## Not yet benchmarked (v0.2+)
50
+
51
+ - MC-Dropout (requires `[torch]`)
52
+ - MAPIE interop
53
+ - Time-series / purged walk-forward on real finance panel
54
+ - CIFAR-10-C OOD reproduction
@@ -0,0 +1,41 @@
1
+ # Changelog
2
+
3
+ ## [0.1.1] — 2026-06-04
4
+
5
+ First release published to PyPI.
6
+
7
+ ### Fixed
8
+
9
+ - `OOFErrorCollector` now supports multiclass `predict_proba` targets (previously
10
+ only binary worked; multiclass stored 2-D probabilities and crashed).
11
+ - Guard against rows assigned to multiple test folds (e.g. repeated CV): a warning
12
+ is raised and one error per row is kept, preserving honest OOF targets.
13
+ - Validate `groups` length against `n_rows` and the loss output length.
14
+
15
+ ### Added
16
+
17
+ - Research-grade docstrings documenting the "g trained on a slightly smaller f"
18
+ refit assumption (DEUP Algorithm 2) plus a "How it works" docs section.
19
+
20
+ ## [0.1.0] — 2026-06-04
21
+
22
+ First public release.
23
+
24
+ ### Added
25
+
26
+ - `DEUPRegressor` — sklearn-compatible wrapper with `predict(..., return_uncertainty=True)`
27
+ - Leakage-correct `OOFErrorCollector` (DEUP Algorithm 2 / K-fold OOF errors)
28
+ - Splitters: `PurgedWalkForward`, re-export `KFold` / `TimeSeriesSplit`
29
+ - Loss registry: `squared`, `absolute`, `logloss`, `brier`, `pinball`, `rank`
30
+ - Target transforms: `log`, `asinh`, `none` for error-predictor training
31
+ - Benchmark: DEUP vs ensemble vs conformal on California housing
32
+ - MkDocs documentation site
33
+ - 54+ unit tests including parity-exact OOF and leakage gate
34
+
35
+ ### Notes
36
+
37
+ - Aleatoric decomposition (`ê = max(0, g - a)`), conformal intervals, and
38
+ `DEUPClassifier` / `DEUPRanker` are planned for v0.2.
39
+
40
+ [0.1.1]: https://github.com/ursinasanderink/deup/releases/tag/v0.1.1
41
+ [0.1.0]: https://github.com/ursinasanderink/deup/releases/tag/v0.1.0
@@ -0,0 +1,42 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite both the software and the original DEUP paper."
3
+ title: "deup: Direct Epistemic Uncertainty Prediction"
4
+ abstract: >-
5
+ A scikit-learn-compatible implementation of Direct Epistemic Uncertainty
6
+ Prediction (DEUP) with first-class, leakage-correct support for time-series and
7
+ cross-sectional workflows.
8
+ type: software
9
+ authors:
10
+ - family-names: Sanderink
11
+ given-names: Ursina
12
+ repository-code: "https://github.com/ursinasanderink/deup"
13
+ license: Apache-2.0
14
+ keywords:
15
+ - epistemic uncertainty
16
+ - DEUP
17
+ - uncertainty quantification
18
+ - scikit-learn
19
+ - time-series
20
+ references:
21
+ - type: article
22
+ title: "DEUP: Direct Epistemic Uncertainty Prediction"
23
+ authors:
24
+ - family-names: Lahlou
25
+ given-names: Salem
26
+ - family-names: Jain
27
+ given-names: Moksh
28
+ - family-names: Nekoei
29
+ given-names: Hadi
30
+ - family-names: Butoi
31
+ given-names: Victor Ion
32
+ - family-names: Bertin
33
+ given-names: Paul
34
+ - family-names: Rector-Brooks
35
+ given-names: Jarrid
36
+ - family-names: Korablyov
37
+ given-names: Maksym
38
+ - family-names: Bengio
39
+ given-names: Yoshua
40
+ journal: "Transactions on Machine Learning Research"
41
+ year: 2023
42
+ url: "https://openreview.net/forum?id=eGLdVRvvfQ"