PyPI - mlxmc - Versions diffs - 0.1.0__tar.gz - Mend

mlxmc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

mlxmc-0.1.0/.gitattributes +2 -0
mlxmc-0.1.0/.github/workflows/tests.yml +52 -0
mlxmc-0.1.0/.gitignore +21 -0
mlxmc-0.1.0/CHANGELOG.md +18 -0
mlxmc-0.1.0/LICENSE +28 -0
mlxmc-0.1.0/PKG-INFO +200 -0
mlxmc-0.1.0/README.md +174 -0
mlxmc-0.1.0/examples/affine_invariance.py +56 -0
mlxmc-0.1.0/examples/gaussian_ess.py +88 -0
mlxmc-0.1.0/examples/hard_targets.py +235 -0
mlxmc-0.1.0/examples/nuts_funnel.py +126 -0
mlxmc-0.1.0/examples/plot_hard_targets.py +159 -0
mlxmc-0.1.0/examples/warmup_validation.py +54 -0
mlxmc-0.1.0/hard_targets_figure.png +0 -0
mlxmc-0.1.0/pixi.lock +1040 -0
mlxmc-0.1.0/pyproject.toml +62 -0
mlxmc-0.1.0/src/mlxmc/__init__.py +26 -0
mlxmc-0.1.0/src/mlxmc/diagnostics.py +49 -0
mlxmc-0.1.0/src/mlxmc/ensemble.py +60 -0
mlxmc-0.1.0/src/mlxmc/hmc.py +59 -0
mlxmc-0.1.0/src/mlxmc/nuts.py +211 -0
mlxmc-0.1.0/src/mlxmc/preconditioned.py +56 -0
mlxmc-0.1.0/src/mlxmc/targets.py +70 -0
mlxmc-0.1.0/src/mlxmc/warmup.py +178 -0
mlxmc-0.1.0/tests/conftest.py +21 -0
mlxmc-0.1.0/tests/test_affine_invariance.py +57 -0
mlxmc-0.1.0/tests/test_diagnostics.py +36 -0
mlxmc-0.1.0/tests/test_samplers_gaussian.py +98 -0
mlxmc-0.1.0/tests/test_warmup.py +37 -0
mlxmc-0.1.0/tests/util.py +29 -0

mlxmc-0.1.0/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # SCM syntax highlighting & preventing 3-way merges
2	+ pixi.lock merge=binary linguist-language=YAML linguist-generated=true -diff

mlxmc-0.1.0/.github/workflows/tests.yml ADDED Viewed

@@ -0,0 +1,52 @@
+name: tests
+# CI runs only on pull requests to main (plus manual dispatch from the Actions tab) to
+# limit GitHub's macOS runner minutes (billed at 10x Linux). This is a Mac-only package,
+# so every job needs a macOS runner; PRs are deliberate and infrequent, while direct
+# pushes to main do NOT trigger CI. Doc-only changes are skipped via paths-ignore. To
+# also run on pushes, add a `push: { branches: [main] }` key here.
+on:
+  workflow_dispatch:
+  pull_request:
+    branches: [main]
+    paths-ignore:
+      - '**.md'
+      - 'LICENSE'
+      - '**.png'
+# Cancel an in-progress run when the same PR gets a new push -- only the latest commit is
+# worth testing, and it stops macOS jobs from stacking up.
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  test:
+    # Apple-silicon runner: MLX needs arm64 macOS. The CPU leg is required; the GPU
+    # leg is allowed to fail because GitHub's virtualized macOS runners may not expose
+    # a usable Metal device (mlxmc is fp32 on both backends, so coverage is equivalent).
+    runs-on: macos-14
+    strategy:
+      fail-fast: false
+      matrix:
+        device: [cpu, gpu]
+    continue-on-error: ${{ matrix.device == 'gpu' }}
+    env:
+      MLXMC_TEST_DEVICE: ${{ matrix.device }}
+    name: test (${{ matrix.device }})
+    steps:
+      - uses: actions/checkout@v4
+      - uses: prefix-dev/setup-pixi@v0.8.1
+        with:
+          manifest-path: pyproject.toml
+          # Tests run in the default env; the optional `viz` env (matplotlib) isn't needed.
+          environments: default
+          # Enforce the pixi.lock co-commit invariant: fail if the lock is stale.
+          locked: true
+      - name: MLX device info
+        run: pixi run python -c "import mlx.core as mx; print('MLX default device:', mx.default_device()); print('requested:', '${{ matrix.device }}')"
+      - name: Run tests
+        run: pixi run test

mlxmc-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,21 @@
+# pixi environments
+.pixi/*
+!.pixi/config.toml
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+.ipynb_checkpoints/
+# Build / test artifacts
+build/
+dist/
+.pytest_cache/
+# macOS
+.DS_Store
+# Local project notes / Claude Code instructions — kept on disk, not part of the
+# public package (its findings live in the README instead).
+CLAUDE.md

mlxmc-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,18 @@
+# Changelog
+All notable changes to `mlxmc` are documented here. The format follows
+[Keep a Changelog](https://keepachangelog.com/), and the project aims to follow
+[Semantic Versioning](https://semver.org/).
+## [0.1.0] — 2026-06-03
+Initial public release.
+- Affine-invariant ensemble sampler (Goodman & Weare 2010).
+- Hamiltonian Monte Carlo: identity-mass (`hmc`) and preconditioned (`preconditioned`).
+- Stan-style warmup: dual-averaging step size + windowed dense mass-matrix estimation (`warmup`).
+- NUTS (multinomial; Hoffman & Gelman 2014), vectorized over chains, with a NUTS-specific warmup (`nuts`).
+- ESS / integrated-autocorrelation diagnostics (`diagnostics`).
+- Example targets with known moments: correlated Gaussian, banana, centered / non-centered funnel (`targets`).
+[0.1.0]: https://github.com/jrcheshire/mlxmc/releases/tag/v0.1.0

mlxmc-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,28 @@
+BSD 3-Clause License
+Copyright (c) 2026, Jamie Cheshire
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

mlxmc-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,200 @@
+Metadata-Version: 2.4
+Name: mlxmc
+Version: 0.1.0
+Summary: MCMC samplers in Apple MLX
+Project-URL: Homepage, https://github.com/jrcheshire/mlxmc
+Project-URL: Repository, https://github.com/jrcheshire/mlxmc
+Project-URL: Issues, https://github.com/jrcheshire/mlxmc/issues
+Author-email: Jamie Cheshire <cheshire@caltech.edu>
+License-Expression: BSD-3-Clause
+License-File: LICENSE
+Keywords: apple-silicon,bayesian,ensemble-sampler,hmc,mcmc,mlx,nuts,sampling
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Operating System :: MacOS
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Scientific/Engineering :: Mathematics
+Requires-Python: >=3.11
+Requires-Dist: mlx<0.30,>=0.29.3
+Requires-Dist: numpy<3,>=2
+Provides-Extra: viz
+Requires-Dist: matplotlib<4,>=3.10; extra == 'viz'
+Description-Content-Type: text/markdown
+# mlxmc
+MCMC samplers written in Apple [MLX](https://github.com/ml-explore/mlx), using its
+`grad` / `vmap` / `compile` transforms. MLX has no probabilistic-programming library
+yet (nothing like BlackJAX or NumPyro), so this is a first pass at one.
+> **Status: research code.** The samplers are tested (moment recovery, Σ-estimation,
+> affine invariance, and the autocorrelation diagnostics, on both the CPU and Metal
+> backends), but the API is young and likely to change.
+## What's here
+The package lives under `src/mlxmc/`; runnable demos and the benchmark study are in
+`examples/`.
+| Module (`mlxmc.`) | Sampler / tool |
+|---|---|
+| `ensemble` | Affine-invariant ensemble (Goodman & Weare 2010 — the `emcee` stretch move). Gradient-free, tuning-free. `make_sampler`, `run_ensemble`. |
+| `hmc` | Hamiltonian Monte Carlo, identity mass. `grad ∘ vmap` batched over chains. `make_hmc`, `run_hmc`. |
+| `preconditioned` | Mass-matrix HMC (M = Σ⁻¹). `make_phmc`, `run_phmc`. |
+| `warmup` | Stan-style warmup: dual-averaging step size + windowed **dense** mass-matrix estimation. `warmup`, `run_chain`. |
+| `nuts` | NUTS (multinomial; Hoffman & Gelman 2014), vectorized over chains. `make_nuts`, `run_nuts`. |
+| `diagnostics` | Effective sample size / integrated autocorrelation time (FFT + Sokal window); the cross-sampler **ESS/sec** metric. |
+| `targets` | Example log-densities: correlated Gaussian, banana, centered / non-centered funnel, with known moments. |
+| Example (`examples/`) | What it shows |
+|---|---|
+| `gaussian_ess.py` | Ensemble vs identity-mass HMC vs preconditioned HMC by ESS/sec on the Gaussian. |
+| `warmup_validation.py` | Warmup recovers the true Σ and matches oracle ESS/sec. |
+| `hard_targets.py` | Banana + funnel benchmark (`lscan` / `dscan` modes). |
+| `nuts_funnel.py` | NUTS correctness on the Gaussian; `funnel` mode for the masking-overhead study. |
+| `affine_invariance.py` | Empirical proof of affine invariance (same RNG → bit-identical acceptance under an affine map). |
+| `plot_hard_targets.py` | Renders `hard_targets_figure.png` (needs the optional `viz` env). |
+## Why MLX
+`grad`, `vmap`, `jvp`/`vjp`, and `compile` transfer almost directly from JAX,
+with JAX-style functional RNG keys (`mx.random.split`). The wrinkles that shape
+this code:
+- **No traced control-flow primitives** (no `while_loop` / `scan` / `cond`). MLX
+  is eager execution plus `compile` of *static* graphs. Fixed-length unrolled
+  loops (leapfrog, fixed-`L` HMC) compile fine; data-dependent trajectory length
+  (NUTS) is the hard case — `mlxmc.nuts` runs every chain to a fixed `max_tree_depth`
+  and **masks** finished chains.
+- **fp32 on the GPU.** Apple Metal has no fp64 in hardware (MLX has fp64 only on
+  the CPU backend). This is fine for sampling — Monte Carlo error (~1/√ESS) swamps
+  fp32 roundoff (~1e-6) — but ill-conditioned linear algebra (covariance, Cholesky
+  in warmup) is kept host-side in numpy fp64; only the leapfrog runs on the GPU.
+## Install
+This is a [pixi](https://pixi.sh) project (installs the package editable):
+```bash
+pixi install
+pixi run python examples/gaussian_ess.py             # ensemble vs HMC vs preconditioned
+pixi run python examples/nuts_funnel.py funnel        # several examples have demo modes
+pixi run -e viz python examples/plot_hard_targets.py  # plotting needs the optional viz env
+```
+Or install into any environment with pip: `pip install -e .` (needs `mlx`, so arm64
+macOS). Add the plotting extra with `pip install -e ".[viz]"` (matplotlib).
+## Usage
+Every sampler takes a single-point log-density `logp(x) -> scalar` for `x` of
+shape `(D,)`; batching over walkers/chains is handled internally with `vmap`.
+Positions are MLX arrays of shape `(n_chains, D)`.
+```python
+import mlx.core as mx
+import numpy as np
+# Target: a strongly correlated 2-D Gaussian (corr 0.9, 25:1 variance ratio).
+# mlxmc.targets ships this one (as `gaussian_logp`) plus banana / funnel.
+mu = mx.array([1.0, -2.0])
+Sig_inv = mx.array(np.linalg.inv([[25.0, 4.5], [4.5, 1.0]]))
+def logp(x):                              # x: (D,) -> scalar
+    d = x - mu
+    return -0.5 * (d @ Sig_inv @ d)
+key = mx.random.key(0)
+```
+**Gradient-free ensemble** — no tuning, handles the ill-conditioning for free:
+```python
+from mlxmc import run_ensemble
+key, k = mx.random.split(key)
+ensemble = mx.random.normal(shape=(2000, 2), key=k) * 5.0     # (n_walkers, D)
+samples, accept_frac = run_ensemble(logp, ensemble, n_steps=3000, burn=1000, key=key)
+```
+**HMC, hand-tuned**, and **NUTS after Stan-style warmup** (same `logp`):
+```python
+from mlxmc import run_hmc, warmup, run_nuts
+key, k = mx.random.split(key)
+q0 = mx.random.normal(shape=(1000, 2), key=k) * 5.0           # (n_chains, D)
+samples, acc = run_hmc(logp, q0, n_steps=1500, burn=500,
+                       eps=0.15, n_leap=40, key=key)
+# Warmup adapts (eps, dense M); NUTS then adapts trajectory length itself.
+q_last, eps, Minv = warmup(logp, q0, n_warmup=600, n_leap=8, key=key)
+chain, mean_depth, max_depth = run_nuts(logp, q_last, n_samples=1500,
+                                        eps=eps, Minv_np=Minv, key=key)
+```
+> **Return shapes differ by sampler.** `run_ensemble` and `run_hmc` return
+> `(samples, accept_frac)` with `samples` flattened to `(n_draws, D)`.
+> `run_phmc`, `run_chain` (post-warmup HMC), and `run_nuts` return a structured
+> `(steps, chains, D)` chain — the layout `mlxmc.diagnostics` expects for ESS —
+> and `run_nuts` additionally returns the mean/max tree depth.
+## Findings
+![Sampler benchmarks on the banana and funnel targets](https://raw.githubusercontent.com/jrcheshire/mlxmc/main/hard_targets_figure.png)
+Validated on a corr-0.9, 25:1-variance Gaussian and on banana / funnel targets;
+every number below is reproducible with the scripts in
+[`examples/`](https://github.com/jrcheshire/mlxmc/tree/main/examples):
+- **Affine-invariant ensemble** is the robust low-D default: gradient-free,
+  tuning-free, handles ill-conditioning for free (acceptance is bit-identical
+  under an affine map). But weaker per-step mixing and it degrades with dimension.
+- **HMC** needs gradients and a tuned `eps`/`L`, but mixes far better
+  (τ≈2 vs ≈26). A **warmup-adapted dense mass matrix** recovers the true Σ to
+  <1% Frobenius error and buys ~7–11× the ESS/sec — HMC's version of affine
+  invariance, earned rather than supplied.
+- **Fixed-`L` HMC has a trajectory resonance:** on near-Gaussian targets, when
+  `eps·L` lands near a multiple of 2π the trajectory returns to its start and
+  mixing collapses. Jittering `eps` per trajectory cures it; NUTS's adaptive
+  trajectory length is the principled fix.
+- **NUTS** is validated exact on the Gaussian (recovered covariance 24.97 vs 25)
+  and auto-tunes trajectory length, but vectorized NUTS pays a real masking cost
+  when trajectory lengths are heterogeneous — with no `while_loop`, every chain
+  runs to the deepest chain's tree depth, up to a ~30× wall-time penalty at the
+  funnel mouth versus the same target reparametrized.
+- **Geometry matters more than the sampler:** on the *centered* funnel the
+  gradient-free ensemble beats a global-metric HMC, because a constant mass matrix
+  is wrong everywhere when the scale is position-dependent; a **non-centered
+  reparametrization** removes the geometry and makes HMC unbiased again.
+- **ESS/sec is the honest efficiency metric** — acceptance fraction is a
+  misleading proxy.
+## Development
+```bash
+pixi run test                                   # full suite on the default device
+MLXMC_TEST_DEVICE=cpu pixi run test             # force the CPU backend
+MLXMC_TEST_DEVICE=gpu pixi run test             # force the Metal GPU
+```
+The suite (`tests/`) checks moment recovery for every sampler, warmup's Σ
+estimate, the affine-invariance identity, and the autocorrelation-time
+diagnostics. A GitHub Actions workflow (`.github/workflows/tests.yml`) runs the
+CPU + GPU matrix on an Apple-silicon runner for pull requests to `main` (and on
+manual dispatch from the Actions tab). Direct pushes to `main` don't trigger it,
+which keeps the (10x-billed) macOS runner minutes down.
+## References
+- Goodman & Weare (2010), *Ensemble samplers with affine invariance.*
+- Hoffman & Gelman (2014), *The No-U-Turn Sampler.*
+- Betancourt (2017), *A Conceptual Introduction to Hamiltonian Monte Carlo.*
+## License
+[BSD-3-Clause](https://github.com/jrcheshire/mlxmc/blob/main/LICENSE).

mlxmc-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,174 @@
+# mlxmc
+MCMC samplers written in Apple [MLX](https://github.com/ml-explore/mlx), using its
+`grad` / `vmap` / `compile` transforms. MLX has no probabilistic-programming library
+yet (nothing like BlackJAX or NumPyro), so this is a first pass at one.
+> **Status: research code.** The samplers are tested (moment recovery, Σ-estimation,
+> affine invariance, and the autocorrelation diagnostics, on both the CPU and Metal
+> backends), but the API is young and likely to change.
+## What's here
+The package lives under `src/mlxmc/`; runnable demos and the benchmark study are in
+`examples/`.
+| Module (`mlxmc.`) | Sampler / tool |
+|---|---|
+| `ensemble` | Affine-invariant ensemble (Goodman & Weare 2010 — the `emcee` stretch move). Gradient-free, tuning-free. `make_sampler`, `run_ensemble`. |
+| `hmc` | Hamiltonian Monte Carlo, identity mass. `grad ∘ vmap` batched over chains. `make_hmc`, `run_hmc`. |
+| `preconditioned` | Mass-matrix HMC (M = Σ⁻¹). `make_phmc`, `run_phmc`. |
+| `warmup` | Stan-style warmup: dual-averaging step size + windowed **dense** mass-matrix estimation. `warmup`, `run_chain`. |
+| `nuts` | NUTS (multinomial; Hoffman & Gelman 2014), vectorized over chains. `make_nuts`, `run_nuts`. |
+| `diagnostics` | Effective sample size / integrated autocorrelation time (FFT + Sokal window); the cross-sampler **ESS/sec** metric. |
+| `targets` | Example log-densities: correlated Gaussian, banana, centered / non-centered funnel, with known moments. |
+| Example (`examples/`) | What it shows |
+|---|---|
+| `gaussian_ess.py` | Ensemble vs identity-mass HMC vs preconditioned HMC by ESS/sec on the Gaussian. |
+| `warmup_validation.py` | Warmup recovers the true Σ and matches oracle ESS/sec. |
+| `hard_targets.py` | Banana + funnel benchmark (`lscan` / `dscan` modes). |
+| `nuts_funnel.py` | NUTS correctness on the Gaussian; `funnel` mode for the masking-overhead study. |
+| `affine_invariance.py` | Empirical proof of affine invariance (same RNG → bit-identical acceptance under an affine map). |
+| `plot_hard_targets.py` | Renders `hard_targets_figure.png` (needs the optional `viz` env). |
+## Why MLX
+`grad`, `vmap`, `jvp`/`vjp`, and `compile` transfer almost directly from JAX,
+with JAX-style functional RNG keys (`mx.random.split`). The wrinkles that shape
+this code:
+- **No traced control-flow primitives** (no `while_loop` / `scan` / `cond`). MLX
+  is eager execution plus `compile` of *static* graphs. Fixed-length unrolled
+  loops (leapfrog, fixed-`L` HMC) compile fine; data-dependent trajectory length
+  (NUTS) is the hard case — `mlxmc.nuts` runs every chain to a fixed `max_tree_depth`
+  and **masks** finished chains.
+- **fp32 on the GPU.** Apple Metal has no fp64 in hardware (MLX has fp64 only on
+  the CPU backend). This is fine for sampling — Monte Carlo error (~1/√ESS) swamps
+  fp32 roundoff (~1e-6) — but ill-conditioned linear algebra (covariance, Cholesky
+  in warmup) is kept host-side in numpy fp64; only the leapfrog runs on the GPU.
+## Install
+This is a [pixi](https://pixi.sh) project (installs the package editable):
+```bash
+pixi install
+pixi run python examples/gaussian_ess.py             # ensemble vs HMC vs preconditioned
+pixi run python examples/nuts_funnel.py funnel        # several examples have demo modes
+pixi run -e viz python examples/plot_hard_targets.py  # plotting needs the optional viz env
+```
+Or install into any environment with pip: `pip install -e .` (needs `mlx`, so arm64
+macOS). Add the plotting extra with `pip install -e ".[viz]"` (matplotlib).
+## Usage
+Every sampler takes a single-point log-density `logp(x) -> scalar` for `x` of
+shape `(D,)`; batching over walkers/chains is handled internally with `vmap`.
+Positions are MLX arrays of shape `(n_chains, D)`.
+```python
+import mlx.core as mx
+import numpy as np
+# Target: a strongly correlated 2-D Gaussian (corr 0.9, 25:1 variance ratio).
+# mlxmc.targets ships this one (as `gaussian_logp`) plus banana / funnel.
+mu = mx.array([1.0, -2.0])
+Sig_inv = mx.array(np.linalg.inv([[25.0, 4.5], [4.5, 1.0]]))
+def logp(x):                              # x: (D,) -> scalar
+    d = x - mu
+    return -0.5 * (d @ Sig_inv @ d)
+key = mx.random.key(0)
+```
+**Gradient-free ensemble** — no tuning, handles the ill-conditioning for free:
+```python
+from mlxmc import run_ensemble
+key, k = mx.random.split(key)
+ensemble = mx.random.normal(shape=(2000, 2), key=k) * 5.0     # (n_walkers, D)
+samples, accept_frac = run_ensemble(logp, ensemble, n_steps=3000, burn=1000, key=key)
+```
+**HMC, hand-tuned**, and **NUTS after Stan-style warmup** (same `logp`):
+```python
+from mlxmc import run_hmc, warmup, run_nuts
+key, k = mx.random.split(key)
+q0 = mx.random.normal(shape=(1000, 2), key=k) * 5.0           # (n_chains, D)
+samples, acc = run_hmc(logp, q0, n_steps=1500, burn=500,
+                       eps=0.15, n_leap=40, key=key)
+# Warmup adapts (eps, dense M); NUTS then adapts trajectory length itself.
+q_last, eps, Minv = warmup(logp, q0, n_warmup=600, n_leap=8, key=key)
+chain, mean_depth, max_depth = run_nuts(logp, q_last, n_samples=1500,
+                                        eps=eps, Minv_np=Minv, key=key)
+```
+> **Return shapes differ by sampler.** `run_ensemble` and `run_hmc` return
+> `(samples, accept_frac)` with `samples` flattened to `(n_draws, D)`.
+> `run_phmc`, `run_chain` (post-warmup HMC), and `run_nuts` return a structured
+> `(steps, chains, D)` chain — the layout `mlxmc.diagnostics` expects for ESS —
+> and `run_nuts` additionally returns the mean/max tree depth.
+## Findings
+![Sampler benchmarks on the banana and funnel targets](https://raw.githubusercontent.com/jrcheshire/mlxmc/main/hard_targets_figure.png)
+Validated on a corr-0.9, 25:1-variance Gaussian and on banana / funnel targets;
+every number below is reproducible with the scripts in
+[`examples/`](https://github.com/jrcheshire/mlxmc/tree/main/examples):
+- **Affine-invariant ensemble** is the robust low-D default: gradient-free,
+  tuning-free, handles ill-conditioning for free (acceptance is bit-identical
+  under an affine map). But weaker per-step mixing and it degrades with dimension.
+- **HMC** needs gradients and a tuned `eps`/`L`, but mixes far better
+  (τ≈2 vs ≈26). A **warmup-adapted dense mass matrix** recovers the true Σ to
+  <1% Frobenius error and buys ~7–11× the ESS/sec — HMC's version of affine
+  invariance, earned rather than supplied.
+- **Fixed-`L` HMC has a trajectory resonance:** on near-Gaussian targets, when
+  `eps·L` lands near a multiple of 2π the trajectory returns to its start and
+  mixing collapses. Jittering `eps` per trajectory cures it; NUTS's adaptive
+  trajectory length is the principled fix.
+- **NUTS** is validated exact on the Gaussian (recovered covariance 24.97 vs 25)
+  and auto-tunes trajectory length, but vectorized NUTS pays a real masking cost
+  when trajectory lengths are heterogeneous — with no `while_loop`, every chain
+  runs to the deepest chain's tree depth, up to a ~30× wall-time penalty at the
+  funnel mouth versus the same target reparametrized.
+- **Geometry matters more than the sampler:** on the *centered* funnel the
+  gradient-free ensemble beats a global-metric HMC, because a constant mass matrix
+  is wrong everywhere when the scale is position-dependent; a **non-centered
+  reparametrization** removes the geometry and makes HMC unbiased again.
+- **ESS/sec is the honest efficiency metric** — acceptance fraction is a
+  misleading proxy.
+## Development
+```bash
+pixi run test                                   # full suite on the default device
+MLXMC_TEST_DEVICE=cpu pixi run test             # force the CPU backend
+MLXMC_TEST_DEVICE=gpu pixi run test             # force the Metal GPU
+```
+The suite (`tests/`) checks moment recovery for every sampler, warmup's Σ
+estimate, the affine-invariance identity, and the autocorrelation-time
+diagnostics. A GitHub Actions workflow (`.github/workflows/tests.yml`) runs the
+CPU + GPU matrix on an Apple-silicon runner for pull requests to `main` (and on
+manual dispatch from the Actions tab). Direct pushes to `main` don't trigger it,
+which keeps the (10x-billed) macOS runner minutes down.
+## References
+- Goodman & Weare (2010), *Ensemble samplers with affine invariance.*
+- Hoffman & Gelman (2014), *The No-U-Turn Sampler.*
+- Betancourt (2017), *A Conceptual Introduction to Hamiltonian Monte Carlo.*
+## License
+[BSD-3-Clause](https://github.com/jrcheshire/mlxmc/blob/main/LICENSE).

mlxmc-0.1.0/examples/affine_invariance.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Empirical proof of affine invariance for the G&W ensemble sampler.
+Map the base target p(x) through y = A x + b to q(y) = p(A^{-1}(y-b)). Running
+the sampler on q from the affine-mapped initial ensemble, with the SAME random
+stream, must reproduce the base run exactly mapped: y_t = A x_t + b for every
+walker and step. So acceptance and mixing are identical -- a 256x-worse-
+conditioned target costs nothing extra. (Exact to float32; a borderline accept
+can rarely flip, which would show up as a large deviation.)
+"""
+import mlx.core as mx
+import numpy as np
+from mlxmc.ensemble import run_ensemble
+rng = np.random.default_rng(0)
+D = 3
+# Base target: isotropic standard normal.
+def logp_base(x):
+    return -0.5 * (x @ x)
+# Ill-conditioned affine map: random rotation times scales [8, 2, 0.5].
+Q, _ = np.linalg.qr(rng.standard_normal((D, D)))
+A_np = Q @ np.diag([8.0, 2.0, 0.5])          # cond(A)=16 -> cond(Sigma)=256
+b_np = np.array([3.0, -5.0, 1.0])
+A = mx.array(A_np)
+A_T = mx.transpose(A)
+b = mx.array(b_np)
+Ainv = mx.array(np.linalg.inv(A_np))
+# Transformed target q(y) = N(b, A A^T): logq(y) = -0.5 |A^{-1}(y-b)|^2.
+def logq(y):
+    r = Ainv @ (y - b)
+    return -0.5 * (r @ r)
+n_walkers, n_steps, burn = 200, 100, 0       # init is in equilibrium, no burn needed
+key = mx.random.key(42)
+key, k_init = mx.random.split(key)
+E0 = mx.random.normal(shape=(n_walkers, D), key=k_init)   # matched to base N(0, I)
+E0_mapped = E0 @ A_T + b                                   # matched to q = N(b, A A^T)
+# SAME key for both runs -> identical random stream.
+xs, acc_base = run_ensemble(logp_base, E0, n_steps, burn, key)
+ys, acc_tr = run_ensemble(logq, E0_mapped, n_steps, burn, key)
+mapped = xs @ A_T + b
+max_dev = float(mx.max(mx.abs(ys - mapped)))
+print(f"condition number:   base target 1   |   transformed target {np.linalg.cond(A_np @ A_np.T):.0f}")
+print(f"acceptance:         base {acc_base:.6f}   transformed {acc_tr:.6f}   (identical => invariant)")
+print(f"max |y - (A x + b)| over {ys.shape[0]:,} samples:  {max_dev:.2e}   (=> exact affine image, to float32)")
+y = np.array(ys)
+print("\ntransformed run recovers N(b, A A^T):")
+print(f"  mean recovered {np.round(y.mean(0), 2)}   vs true {b_np}")
+print(f"  cov diag recovered {np.round(np.cov(y.T).diagonal(), 1)}   vs true {np.round((A_np @ A_np.T).diagonal(), 1)}")

mlxmc-0.1.0/examples/gaussian_ess.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""The Gaussian ESS story: affine-invariant ensemble vs identity-mass HMC vs
+preconditioned HMC (M = Sigma^{-1}), compared by ESS/sec on the canonical
+correlated 2-D Gaussian (corr 0.9, 25:1 variance ratio).
+The point: the ensemble handles the ill-conditioning for free (no tuning, no
+gradients); identity-mass HMC pays for the bad conditioning in mixing; supplying
+the right mass matrix (here the true Sigma) is HMC's affine invariance and recovers
+the gap with far fewer, cheaper leapfrog steps. `examples/warmup_validation.py`
+shows the same M *estimated* during warmup rather than supplied.
+ESS needs the per-chain structure, so these local runners retain the (T, N, D)
+chain -- unlike the library's run_ensemble/run_hmc, which flatten for moment recovery.
+Run:  python examples/gaussian_ess.py
+"""
+import time
+import mlx.core as mx
+import numpy as np
+from mlxmc.diagnostics import report
+from mlxmc.ensemble import make_sampler
+from mlxmc.hmc import make_hmc
+from mlxmc.preconditioned import run_phmc
+from mlxmc.targets import GAUSSIAN_SIGMA, gaussian_logp
+def run_ensemble_chain(e0, n_steps, burn, key, a=2.0):
+    n_walkers, n_dim = e0.shape
+    half = n_walkers // 2
+    update = make_sampler(gaussian_logp, n_dim, a)
+    chain, e = [], e0
+    for t in range(n_steps):
+        key, k0, k1 = mx.random.split(key, 3)
+        h0, h1 = e[:half], e[half:]
+        h0, _ = update(h0, h1, k0)
+        h1, _ = update(h1, h0, k1)
+        e = mx.concatenate([h0, h1], axis=0)
+        mx.eval(e)
+        if t >= burn:
+            chain.append(e)
+    return mx.stack(chain, axis=0)
+def run_hmc_chain(q0, n_steps, burn, eps, n_leap, key):
+    step = make_hmc(gaussian_logp, eps, n_leap)
+    chain, q = [], q0
+    for t in range(n_steps):
+        key, k = mx.random.split(key, 2)
+        q, _ = step(q, k)
+        mx.eval(q)
+        if t >= burn:
+            chain.append(q)
+    return mx.stack(chain, axis=0)
+if __name__ == "__main__":
+    Sigma = GAUSSIAN_SIGMA
+    Minv = Sigma                                       # M^{-1} = Sigma
+    Mhalf = np.linalg.cholesky(np.linalg.inv(Sigma))   # chol(M), M = Sigma^{-1}
+    key = mx.random.key(0)
+    key, ki = mx.random.split(key)
+    ens0 = mx.random.normal(shape=(2000, 2), key=ki) * 5.0
+    t0 = time.time()
+    ec = run_ensemble_chain(ens0, 2000, 500, key)
+    mx.eval(ec)
+    e_ess, e_dt = report(ec, "ensemble (no grad, no tuning)", time.time() - t0)
+    key, ki = mx.random.split(key)
+    q0 = mx.random.normal(shape=(1000, 2), key=ki) * 5.0
+    t0 = time.time()
+    hc = run_hmc_chain(q0, 1500, 500, 0.15, 40, key)
+    mx.eval(hc)
+    h_ess, h_dt = report(hc, "HMC identity mass (eps=0.15, L=40)", time.time() - t0)
+    key, ki = mx.random.split(key)
+    q0p = mx.random.normal(shape=(1000, 2), key=ki) * 5.0
+    t0 = time.time()
+    pc = run_phmc(gaussian_logp, q0p, 1500, 500, 0.7, 6, key, Minv, Mhalf)
+    mx.eval(pc)
+    p_ess, p_dt = report(pc, "HMC preconditioned M=Sigma^-1 (eps=0.7, L=6)", time.time() - t0)
+    print("\n=== ESS/sec ===")
+    print(f"  ensemble:       {e_ess / e_dt:>10,.0f}")
+    print(f"  HMC identity:   {h_ess / h_dt:>10,.0f}")
+    print(f"  HMC precond:    {p_ess / p_dt:>10,.0f}   "
+          f"({(p_ess / p_dt) / (h_ess / h_dt):.1f}x identity HMC, L=6 vs 40 -> 7/41 the gradients/step)")