coppuccino 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coppuccino-1.0.0/.gitattributes +2 -0
- coppuccino-1.0.0/.github/workflows/publish.yml +48 -0
- coppuccino-1.0.0/.github/workflows/test.yml +53 -0
- coppuccino-1.0.0/.gitignore +188 -0
- coppuccino-1.0.0/LICENSE +21 -0
- coppuccino-1.0.0/PKG-INFO +328 -0
- coppuccino-1.0.0/README.md +296 -0
- coppuccino-1.0.0/examples/data/LDC2A_00000007_posterior.feather +0 -0
- coppuccino-1.0.0/examples/galactic_binary_example.ipynb +349 -0
- coppuccino-1.0.0/examples/hdr_credibility.ipynb +206 -0
- coppuccino-1.0.0/examples/requirements.txt +9 -0
- coppuccino-1.0.0/image.png +0 -0
- coppuccino-1.0.0/pyproject.toml +56 -0
- coppuccino-1.0.0/src/coppuccino/__init__.py +31 -0
- coppuccino-1.0.0/src/coppuccino/bijections.py +724 -0
- coppuccino-1.0.0/src/coppuccino/copula_flows.py +427 -0
- coppuccino-1.0.0/src/coppuccino/hdr.py +191 -0
- coppuccino-1.0.0/src/coppuccino/model_io.py +242 -0
- coppuccino-1.0.0/src/coppuccino/py.typed +0 -0
- coppuccino-1.0.0/tests/__init__.py +0 -0
- coppuccino-1.0.0/tests/test_bijections.py +512 -0
- coppuccino-1.0.0/tests/test_copula_flows.py +363 -0
- coppuccino-1.0.0/tests/test_hdr.py +308 -0
- coppuccino-1.0.0/tests/test_model_io.py +544 -0
- coppuccino-1.0.0/uv.lock +511 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
# Publishes the package whenever a GitHub Release is published.
|
|
4
|
+
# Uses PyPI Trusted Publishing (OIDC) — no API tokens are stored in the repo.
|
|
5
|
+
on:
|
|
6
|
+
release:
|
|
7
|
+
types: [published]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build:
|
|
11
|
+
name: Build distributions
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Install uv
|
|
17
|
+
uses: astral-sh/setup-uv@v6
|
|
18
|
+
|
|
19
|
+
- name: Build sdist and wheel
|
|
20
|
+
run: uv build
|
|
21
|
+
|
|
22
|
+
- name: Check package metadata
|
|
23
|
+
run: uvx twine check dist/*
|
|
24
|
+
|
|
25
|
+
- uses: actions/upload-artifact@v4
|
|
26
|
+
with:
|
|
27
|
+
name: dist
|
|
28
|
+
path: dist/
|
|
29
|
+
|
|
30
|
+
publish:
|
|
31
|
+
name: Publish to PyPI
|
|
32
|
+
needs: build
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
# Scopes the trusted-publisher grant; add required reviewers in the repo's
|
|
35
|
+
# Settings > Environments > pypi for an extra manual approval gate.
|
|
36
|
+
environment:
|
|
37
|
+
name: pypi
|
|
38
|
+
url: https://pypi.org/p/coppuccino
|
|
39
|
+
permissions:
|
|
40
|
+
id-token: write # required for Trusted Publishing (OIDC)
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/download-artifact@v4
|
|
43
|
+
with:
|
|
44
|
+
name: dist
|
|
45
|
+
path: dist/
|
|
46
|
+
|
|
47
|
+
- name: Publish to PyPI
|
|
48
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
fail-fast: false
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-latest]
|
|
16
|
+
python-version: ["3.11", "3.12", "3.13", "3.14"]
|
|
17
|
+
# "highest" uses the committed uv.lock; "lowest-direct" exercises the
|
|
18
|
+
# oldest supported dependency versions declared in pyproject.toml.
|
|
19
|
+
resolution: ["highest", "lowest-direct"]
|
|
20
|
+
exclude:
|
|
21
|
+
# The oldest pinned numpy/scipy predate cp313/cp314 wheels, so the
|
|
22
|
+
# lowest-direct floors aren't installable on 3.13+ (a normal install
|
|
23
|
+
# there resolves newer versions, which the "highest" job covers).
|
|
24
|
+
- python-version: "3.13"
|
|
25
|
+
resolution: "lowest-direct"
|
|
26
|
+
- python-version: "3.14"
|
|
27
|
+
resolution: "lowest-direct"
|
|
28
|
+
include:
|
|
29
|
+
# Thin macOS slice: confirm runtime on the oldest and newest Python
|
|
30
|
+
# (full Python x resolution sweep stays on the faster Ubuntu runners).
|
|
31
|
+
- os: macos-latest
|
|
32
|
+
python-version: "3.11"
|
|
33
|
+
resolution: "highest"
|
|
34
|
+
- os: macos-latest
|
|
35
|
+
python-version: "3.14"
|
|
36
|
+
resolution: "highest"
|
|
37
|
+
|
|
38
|
+
steps:
|
|
39
|
+
- uses: actions/checkout@v4
|
|
40
|
+
|
|
41
|
+
- name: Install uv
|
|
42
|
+
uses: astral-sh/setup-uv@v6
|
|
43
|
+
with:
|
|
44
|
+
enable-cache: true
|
|
45
|
+
|
|
46
|
+
- name: Verify lockfile is current
|
|
47
|
+
run: uv lock --check
|
|
48
|
+
|
|
49
|
+
- name: Run tests (${{ matrix.resolution }} resolution)
|
|
50
|
+
run: >
|
|
51
|
+
uv run --python ${{ matrix.python-version }}
|
|
52
|
+
--resolution ${{ matrix.resolution }}
|
|
53
|
+
pytest tests/ -x -q
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
|
|
110
|
+
# pdm
|
|
111
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
112
|
+
#pdm.lock
|
|
113
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
114
|
+
# in version control.
|
|
115
|
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
116
|
+
.pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
121
|
+
__pypackages__/
|
|
122
|
+
|
|
123
|
+
# Celery stuff
|
|
124
|
+
celerybeat-schedule
|
|
125
|
+
celerybeat.pid
|
|
126
|
+
|
|
127
|
+
# SageMath parsed files
|
|
128
|
+
*.sage.py
|
|
129
|
+
|
|
130
|
+
# Environments
|
|
131
|
+
.env
|
|
132
|
+
.venv
|
|
133
|
+
env/
|
|
134
|
+
venv/
|
|
135
|
+
ENV/
|
|
136
|
+
env.bak/
|
|
137
|
+
venv.bak/
|
|
138
|
+
|
|
139
|
+
# Spyder project settings
|
|
140
|
+
.spyderproject
|
|
141
|
+
.spyproject
|
|
142
|
+
|
|
143
|
+
# Rope project settings
|
|
144
|
+
.ropeproject
|
|
145
|
+
|
|
146
|
+
# mkdocs documentation
|
|
147
|
+
/site
|
|
148
|
+
|
|
149
|
+
# mypy
|
|
150
|
+
.mypy_cache/
|
|
151
|
+
.dmypy.json
|
|
152
|
+
dmypy.json
|
|
153
|
+
|
|
154
|
+
# Pyre type checker
|
|
155
|
+
.pyre/
|
|
156
|
+
|
|
157
|
+
# pytype static type analyzer
|
|
158
|
+
.pytype/
|
|
159
|
+
|
|
160
|
+
# Cython debug symbols
|
|
161
|
+
cython_debug/
|
|
162
|
+
|
|
163
|
+
# PyCharm
|
|
164
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
165
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
166
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
167
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
168
|
+
#.idea/
|
|
169
|
+
|
|
170
|
+
# Ruff stuff:
|
|
171
|
+
.ruff_cache/
|
|
172
|
+
|
|
173
|
+
# PyPI configuration file
|
|
174
|
+
.pypirc
|
|
175
|
+
|
|
176
|
+
# Cursor
|
|
177
|
+
# Cursor is an AI-powered code editor.`.cursorignore` specifies files/directories to
|
|
178
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
179
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
180
|
+
.cursorignore
|
|
181
|
+
.cursorindexingignore
|
|
182
|
+
|
|
183
|
+
.DS_Store
|
|
184
|
+
|
|
185
|
+
# Project-specific
|
|
186
|
+
diagnostics/
|
|
187
|
+
*.pkl
|
|
188
|
+
src/.DS_Store
|
coppuccino-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Aaron D. Johnson
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: coppuccino
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Fit distributions with normalizing flows + copulas with JAX
|
|
5
|
+
Project-URL: Homepage, https://github.com/AaronDJohnson/coppuccino
|
|
6
|
+
Project-URL: Repository, https://github.com/AaronDJohnson/coppuccino
|
|
7
|
+
Author-email: "Aaron D. Johnson" <aaron9035@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
|
19
|
+
Requires-Python: <4.0,>=3.11
|
|
20
|
+
Requires-Dist: cloudpickle<4.0.0,>=2.2.1
|
|
21
|
+
Requires-Dist: equinox<0.14.0,>=0.13.2
|
|
22
|
+
Requires-Dist: flowjax<18.0.0,>=17.2.1
|
|
23
|
+
Requires-Dist: interpax<0.4.0,>=0.3.11
|
|
24
|
+
Requires-Dist: jax<0.8,>=0.4.38
|
|
25
|
+
Requires-Dist: jaxlib<0.8,>=0.4.38
|
|
26
|
+
Requires-Dist: jaxtyping<0.4.0,>=0.3.6
|
|
27
|
+
Requires-Dist: numpy<3.0.0,>=1.26
|
|
28
|
+
Requires-Dist: paramax>=0.0.3
|
|
29
|
+
Requires-Dist: scipy>=1.10; python_version < '3.12'
|
|
30
|
+
Requires-Dist: scipy>=1.11.3; python_version >= '3.12'
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
<p align="center">
|
|
34
|
+
<img src="image.png" alt="coppuccino" width="320">
|
|
35
|
+
</p>
|
|
36
|
+
|
|
37
|
+
<h1 align="center">coppuccino</h1>
|
|
38
|
+
|
|
39
|
+
<p align="center">
|
|
40
|
+
<em>Density estimation for multivariate data with copula normalizing flows, in JAX.</em>
|
|
41
|
+
</p>
|
|
42
|
+
|
|
43
|
+
<p align="center">
|
|
44
|
+
<a href="https://github.com/AaronDJohnson/coppuccino/actions/workflows/test.yml"><img src="https://github.com/AaronDJohnson/coppuccino/actions/workflows/test.yml/badge.svg" alt="Tests"></a>
|
|
45
|
+
<img src="https://img.shields.io/badge/python-3.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue.svg" alt="Python 3.11 | 3.12 | 3.13 | 3.14">
|
|
46
|
+
<img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="MIT License">
|
|
47
|
+
</p>
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
**coppuccino** fits and samples from complex multivariate probability
|
|
52
|
+
distributions by combining two classical ideas: *copulas*, which separate a
|
|
53
|
+
joint distribution into its one-dimensional marginals and a dependency
|
|
54
|
+
structure, and *normalizing flows*, which learn that dependency structure
|
|
55
|
+
flexibly. It is designed for — though not limited to — density estimation on
|
|
56
|
+
**MCMC posterior samples**, where it enables fast resampling, density
|
|
57
|
+
evaluation, importance weighting, and calibration diagnostics.
|
|
58
|
+
|
|
59
|
+
## The idea
|
|
60
|
+
|
|
61
|
+
Any continuous joint distribution can be decomposed into its marginals and a
|
|
62
|
+
*copula* that couples them. This is **Sklar's theorem**: for a joint density
|
|
63
|
+
$p(\mathbf{x})$ on $\mathbb{R}^d$ with marginal CDFs $F_i$ and marginal
|
|
64
|
+
densities $p_i$,
|
|
65
|
+
|
|
66
|
+
$$
|
|
67
|
+
p(\mathbf{x}) = \underbrace{c\big(F_1(x_1), \dots, F_d(x_d)\big)}_{\text{copula density}} \prod_{i=1}^{d} \underbrace{p_i(x_i)}_{\text{marginals}}.
|
|
68
|
+
$$
|
|
69
|
+
|
|
70
|
+
The copula density $c$ captures *all* of the dependence between dimensions; the
|
|
71
|
+
$p_i$ capture the shape of each dimension on its own. Estimating these two
|
|
72
|
+
pieces separately is often much easier than estimating $p(\mathbf{x})$
|
|
73
|
+
directly — especially when the marginals are sharp, bounded, skewed, or
|
|
74
|
+
multimodal, which tends to defeat a plain normalizing flow.
|
|
75
|
+
|
|
76
|
+
coppuccino estimates the marginals **nonparametrically** and the copula with a
|
|
77
|
+
**normalizing flow**.
|
|
78
|
+
|
|
79
|
+
## How it works
|
|
80
|
+
|
|
81
|
+
Fitting proceeds in two stages.
|
|
82
|
+
|
|
83
|
+
```mermaid
|
|
84
|
+
flowchart LR
|
|
85
|
+
X["data x in R^d<br/>(e.g. MCMC samples)"]
|
|
86
|
+
Z["Gaussianized z<br/>standard-normal marginals,<br/>dependency retained"]
|
|
87
|
+
E["base distribution<br/>N(0, I)"]
|
|
88
|
+
X -->|"empirical CDF F_i, then inverse-normal Phi^-1<br/>(per dimension, frozen)"| Z
|
|
89
|
+
Z -->|"triangular spline flow<br/>(learns the copula)"| E
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**1. Empirical marginal transforms (Gaussianization).** Each dimension is
|
|
93
|
+
mapped to a standard normal through the probability integral transform followed
|
|
94
|
+
by the inverse-normal CDF,
|
|
95
|
+
|
|
96
|
+
$$
|
|
97
|
+
z_i = \Phi^{-1}\big(F_i(x_i)\big), \qquad i = 1, \dots, d,
|
|
98
|
+
$$
|
|
99
|
+
|
|
100
|
+
where $\Phi$ is the standard normal CDF and each $F_i$ is an empirical CDF built
|
|
101
|
+
from the data quantiles as a **monotone spline** (a rational-quadratic spline by
|
|
102
|
+
default; see [Marginal transforms](#marginal-transforms)). After this step the
|
|
103
|
+
transformed variable $\mathbf{z}$ has exactly standard-normal marginals while
|
|
104
|
+
retaining the full dependency structure — so the copula is all that remains to
|
|
105
|
+
be modeled. These transforms are estimated once and then frozen.
|
|
106
|
+
|
|
107
|
+
**2. The copula flow.** A [triangular spline flow](https://github.com/danielward27/flowjax)
|
|
108
|
+
$T$ models the joint density of $\mathbf{z}$ against a standard-normal base
|
|
109
|
+
$\boldsymbol{\varepsilon} \sim \mathcal{N}(\mathbf{0}, \mathbf{I})$. If the
|
|
110
|
+
dependence were exactly Gaussian, $T$ would only need to learn a correlation —
|
|
111
|
+
the classical Gaussian copula; the flow generalizes this to arbitrary
|
|
112
|
+
dependence. Only this stage is trained.
|
|
113
|
+
|
|
114
|
+
Putting the stages together, the log-density of a point is available in closed
|
|
115
|
+
form via the change-of-variables formula,
|
|
116
|
+
|
|
117
|
+
$$
|
|
118
|
+
\log p(\mathbf{x}) = \log p_Z(\mathbf{z}) + \sum_{i=1}^{d}\Big( \log p_i(x_i) - \log \phi(z_i) \Big),
|
|
119
|
+
$$
|
|
120
|
+
|
|
121
|
+
where $\log p_Z$ is supplied by the flow, $\phi$ is the standard normal density,
|
|
122
|
+
and the sum is the log-Jacobian of the marginal Gaussianization. **Sampling**
|
|
123
|
+
runs the pipeline in reverse: draw $\boldsymbol\varepsilon$ from the base,
|
|
124
|
+
push it through the flow to obtain $\mathbf{z}$, then invert the marginal
|
|
125
|
+
transforms with $x_i = F_i^{-1}\big(\Phi(z_i)\big)$.
|
|
126
|
+
|
|
127
|
+
### Why this design
|
|
128
|
+
|
|
129
|
+
- **Separation of concerns.** The flow spends all of its capacity on
|
|
130
|
+
dependency, because the marginals are already standardized. The empirical
|
|
131
|
+
CDFs, in turn, reproduce difficult marginal shapes (bounded, spiky,
|
|
132
|
+
heavy-tailed, multimodal) that flows struggle to fit on their own.
|
|
133
|
+
- **Honest density and importance weights.** With the default
|
|
134
|
+
rational-quadratic marginals, the forward transform, its inverse, and its
|
|
135
|
+
derivative all come from a *single* parameterization and are mutually exact to
|
|
136
|
+
machine precision, so `sample_and_log_prob` returns self-consistent
|
|
137
|
+
densities.
|
|
138
|
+
- **Built for bounded posteriors.** By default the marginals clip to the
|
|
139
|
+
observed data range; `prior_bounds` extends them cleanly to a known prior
|
|
140
|
+
support, which is the common situation for MCMC chains.
|
|
141
|
+
|
|
142
|
+
## Installation
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
pip install coppuccino
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
From source:
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
git clone https://github.com/AaronDJohnson/coppuccino.git
|
|
152
|
+
cd coppuccino
|
|
153
|
+
pip install .
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Development
|
|
157
|
+
|
|
158
|
+
This project is managed with [uv](https://docs.astral.sh/uv/):
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
uv sync # create the environment and install coppuccino + dev deps
|
|
162
|
+
uv run pytest # run the test suite
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Quick start
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
import numpy as np
|
|
169
|
+
from coppuccino import normalizing_flows_fit, sample, log_prob, save_flow, load_flow
|
|
170
|
+
|
|
171
|
+
# Fit a copula flow to multivariate data (e.g. MCMC posterior samples)
|
|
172
|
+
data = np.random.randn(5000, 3)
|
|
173
|
+
flow = normalizing_flows_fit(data, max_epochs=200)
|
|
174
|
+
|
|
175
|
+
# Draw new samples from the fitted distribution
|
|
176
|
+
new_samples = sample(flow, n_samples=1000, rng_seed=42)
|
|
177
|
+
|
|
178
|
+
# Evaluate the log probability density
|
|
179
|
+
log_probs = log_prob(flow, new_samples)
|
|
180
|
+
|
|
181
|
+
# Persist and reload the model
|
|
182
|
+
save_flow(flow, "my_flow.pkl")
|
|
183
|
+
loaded_flow = load_flow("my_flow.pkl")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
The [`examples/`](examples/) directory contains a worked, end-to-end notebook —
|
|
187
|
+
[`galactic_binary_example.ipynb`](examples/galactic_binary_example.ipynb) — that
|
|
188
|
+
fits a flow to a real gravitational-wave posterior and compares the samples to
|
|
189
|
+
the original chain, plus an HDR calibration walkthrough in
|
|
190
|
+
[`hdr_credibility.ipynb`](examples/hdr_credibility.ipynb).
|
|
191
|
+
|
|
192
|
+
## API
|
|
193
|
+
|
|
194
|
+
| Function | Purpose |
|
|
195
|
+
| --- | --- |
|
|
196
|
+
| `normalizing_flows_fit(chain, ...)` | Fit a copula flow to data; returns the fitted model. |
|
|
197
|
+
| `sample(flow, n_samples, rng_seed=...)` | Draw samples from a fitted flow. |
|
|
198
|
+
| `log_prob(flow, samples)` | Evaluate the log density at given points. |
|
|
199
|
+
| `sample_and_log_prob(flow, n_samples, ...)` | Draw samples and their log densities together (for importance sampling). |
|
|
200
|
+
| `save_flow(flow, path)` / `load_flow(path)` | Serialize / deserialize a fitted flow. |
|
|
201
|
+
| `compute_injection_hdr(samples, injection_params, ...)` | HDR credibility of injected/true parameters (calibration). |
|
|
202
|
+
| `check_in_support(samples, injection_params)` | Whether a point lies within the sample support. |
|
|
203
|
+
|
|
204
|
+
Key options to `normalizing_flows_fit` include `flow_layers` and `knots` (flow
|
|
205
|
+
capacity), `max_epochs`, `learning_rate`, `patience` (training), `prior_bounds`,
|
|
206
|
+
`tail_model`, and `marginal` (the marginal model, below). See the docstrings for
|
|
207
|
+
the full list.
|
|
208
|
+
|
|
209
|
+
### HDR credibility (inference validation)
|
|
210
|
+
|
|
211
|
+
The highest-density-region (HDR) credibility of a known/injected parameter is
|
|
212
|
+
the fraction of the fitted distribution that is *more probable* than that point
|
|
213
|
+
— equivalently, the smallest HDR credible level whose region contains it. A
|
|
214
|
+
point at the mode scores near 0; a point far in the tails scores near 1. For
|
|
215
|
+
well-calibrated Bayesian inference these values are uniform on $[0, 1]$ across
|
|
216
|
+
many events — a standard probability–probability check.
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from coppuccino import compute_injection_hdr
|
|
220
|
+
|
|
221
|
+
posterior_samples = ... # shape (n_samples, n_params)
|
|
222
|
+
true_params = np.array([1.0, 2.0, 3.0])
|
|
223
|
+
|
|
224
|
+
hdr = compute_injection_hdr(posterior_samples, true_params)
|
|
225
|
+
# `hdr` is an array: hdr[0] for a single injection, or one value per row for a
|
|
226
|
+
# 2D batch of injections. Injections outside the sample support return 1.0.
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
## Marginal transforms
|
|
230
|
+
|
|
231
|
+
The empirical marginals are the heart of the method, and a few options control
|
|
232
|
+
how they behave at and beyond the edges of the data.
|
|
233
|
+
|
|
234
|
+
### Interpolant
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
flow = normalizing_flows_fit(data) # marginal="rqs" (default)
|
|
238
|
+
flow = normalizing_flows_fit(data, marginal="pchip") # original PCHIP interpolant
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
Both families use the same empirical-quantile knots and the same tail and
|
|
242
|
+
prior-bound handling, so they produce nearly identical fits. `"rqs"` (a monotone
|
|
243
|
+
rational-quadratic spline) is preferred because its forward map, inverse, and
|
|
244
|
+
derivative share a single parameterization and are mutually exact to machine
|
|
245
|
+
precision, which keeps `sample_and_log_prob` weights honest. The `"pchip"` path
|
|
246
|
+
builds the CDF and its inverse as two independent splines that are only
|
|
247
|
+
approximate inverses of each other; it is retained for reproducing older fits.
|
|
248
|
+
|
|
249
|
+
### Prior bounds (recommended for MCMC chains)
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
# Extend the empirical CDF out to a known prior support
|
|
253
|
+
bounds = np.array([[-10, 10], [-5, 5], [0, 100]])
|
|
254
|
+
flow = normalizing_flows_fit(data, prior_bounds=bounds)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
By default the marginals clip to the observed data range. Supplying
|
|
258
|
+
`prior_bounds` extends each marginal CDF to the prior edges, so the flow can
|
|
259
|
+
generate samples across the full prior support rather than being capped at the
|
|
260
|
+
most extreme training sample.
|
|
261
|
+
|
|
262
|
+
### Heavy-tailed marginals (experimental)
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
# EXPERIMENTAL: peaks-over-threshold Generalized Pareto tails
|
|
266
|
+
flow = normalizing_flows_fit(data, tail_model="gpd", tail_quantile=0.05)
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
For heavy-tailed marginals, `tail_model="gpd"` fits a Generalized Pareto
|
|
270
|
+
Distribution to each tail via peaks-over-threshold, modeling the extremes more
|
|
271
|
+
faithfully than the default Gaussian tail. `tail_quantile` sets the fraction of
|
|
272
|
+
samples in each tail (default 0.05). This feature is **experimental** — its API
|
|
273
|
+
may change — and it ignores `tail_extension` and `prior_bounds`.
|
|
274
|
+
|
|
275
|
+
## Requirements
|
|
276
|
+
|
|
277
|
+
- Python 3.11–3.14
|
|
278
|
+
- JAX / jaxlib >=0.4.38,<0.8
|
|
279
|
+
- NumPy >=1.26,<3
|
|
280
|
+
- SciPy >=1.10 (>=1.11.3 on Python 3.12, the oldest with cp312 wheels)
|
|
281
|
+
- Equinox >=0.13.2,<0.14
|
|
282
|
+
- jaxtyping >=0.3.6,<0.4
|
|
283
|
+
- interpax >=0.3.11,<0.4
|
|
284
|
+
- FlowJAX >=17.2.1,<18
|
|
285
|
+
- paramax >=0.0.3
|
|
286
|
+
- cloudpickle >=2.2.1,<4 (used by `save_flow` / `load_flow`)
|
|
287
|
+
|
|
288
|
+
Python **3.11 through 3.14** are supported and tested. The lower bounds are the
|
|
289
|
+
oldest versions that pass the test suite on 3.11/3.12; on 3.13 and 3.14 a normal
|
|
290
|
+
install resolves newer versions (the oldest floors predate cp313/cp314 wheels),
|
|
291
|
+
which is what the CI "highest" jobs exercise there. The JAX upper bound is set by
|
|
292
|
+
the flowjax 17.2.1 pairing, not by choice — jax 0.7.x is the newest series that
|
|
293
|
+
runs cleanly with it while still shipping cp314 wheels. The exact ranges
|
|
294
|
+
(including per-Python markers) live in `pyproject.toml` and `pip`/`uv` resolve
|
|
295
|
+
them automatically. The example notebooks need a few extra packages — see
|
|
296
|
+
[`examples/requirements.txt`](examples/requirements.txt).
|
|
297
|
+
|
|
298
|
+
## Citation
|
|
299
|
+
|
|
300
|
+
If you use coppuccino in your research, please cite it:
|
|
301
|
+
|
|
302
|
+
```bibtex
|
|
303
|
+
@software{coppuccino,
|
|
304
|
+
author = {Johnson, Aaron D.},
|
|
305
|
+
title = {coppuccino: copula normalizing flows in JAX},
|
|
306
|
+
year = {2026},
|
|
307
|
+
version = {1.0.0},
|
|
308
|
+
url = {https://github.com/AaronDJohnson/coppuccino}
|
|
309
|
+
}
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## References
|
|
313
|
+
|
|
314
|
+
- A. Sklar (1959). *Fonctions de répartition à n dimensions et leurs marges.*
|
|
315
|
+
Publications de l'Institut de Statistique de l'Université de Paris. — Sklar's
|
|
316
|
+
theorem.
|
|
317
|
+
- C. Durkan, A. Bekasov, I. Murray, G. Papamakarios (2019).
|
|
318
|
+
*[Neural Spline Flows](https://arxiv.org/abs/1906.04032).* NeurIPS. — The
|
|
319
|
+
rational-quadratic spline transforms used here.
|
|
320
|
+
- G. Papamakarios, E. Nalisnick, D. J. Rezende, S. Mohamed, B. Lakshminarayanan
|
|
321
|
+
(2021). *[Normalizing Flows for Probabilistic Modeling and Inference](https://arxiv.org/abs/1912.02762).*
|
|
322
|
+
JMLR. — Review of normalizing flows.
|
|
323
|
+
- The flow implementation builds on
|
|
324
|
+
[FlowJAX](https://github.com/danielward27/flowjax).
|
|
325
|
+
|
|
326
|
+
## License
|
|
327
|
+
|
|
328
|
+
[MIT](LICENSE) © Aaron D. Johnson
|