PyPI - bartz - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

bartz 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

bartz/.DS_Store +0 -0
bartz/BART/__init__.py +27 -0
bartz/BART/_gbart.py +522 -0
bartz/__init__.py +6 -4
bartz/_interface.py +937 -0
bartz/_profiler.py +318 -0
bartz/_version.py +1 -1
bartz/debug.py +1217 -82
bartz/grove.py +205 -103
bartz/jaxext/__init__.py +287 -0
bartz/jaxext/_autobatch.py +444 -0
bartz/jaxext/scipy/__init__.py +25 -0
bartz/jaxext/scipy/special.py +239 -0
bartz/jaxext/scipy/stats.py +36 -0
bartz/mcmcloop.py +662 -314
bartz/mcmcstep/__init__.py +35 -0
bartz/mcmcstep/_moves.py +904 -0
bartz/mcmcstep/_state.py +1114 -0
bartz/mcmcstep/_step.py +1603 -0
bartz/prepcovars.py +140 -44
bartz/testing/__init__.py +29 -0
bartz/testing/_dgp.py +442 -0
{bartz-0.6.0.dist-info → bartz-0.8.0.dist-info}/METADATA +18 -13
bartz-0.8.0.dist-info/RECORD +25 -0
{bartz-0.6.0.dist-info → bartz-0.8.0.dist-info}/WHEEL +1 -1
bartz/BART.py +0 -603
bartz/jaxext.py +0 -423
bartz/mcmcstep.py +0 -2335
bartz-0.6.0.dist-info/RECORD +0 -13

bartz/mcmcloop.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # bartz/src/bartz/mcmcloop.py
 #
-# Copyright (c) 2024-2025, Giacomo Petrillo
+# Copyright (c) 2024-2026, The Bartz Contributors
 #
 # This file is part of bartz.
 #
@@ -22,268 +22,416 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-"""Functions that implement the full BART posterior MCMC loop."""
+"""Functions that implement the full BART posterior MCMC loop.
-import functools
+The entry points are `run_mcmc` and `make_default_callback`.
+"""
+from collections.abc import Callable
+from dataclasses import fields
+from functools import partial, wraps
+from math import floor
+from typing import Any, Protocol
 import jax
 import numpy
-from jax import debug, lax, tree
+from equinox import Module
+from jax import (
+    NamedSharding,
+    ShapeDtypeStruct,
+    debug,
+    device_put,
+    eval_shape,
+    jit,
+    tree,
+)
 from jax import numpy as jnp
-from jaxtyping import Array, Real
+from jax.nn import softmax
+from jax.sharding import Mesh, PartitionSpec
+from jaxtyping import Array, Bool, Float32, Int32, Integer, Key, PyTree, Shaped, UInt
+from bartz import jaxext, mcmcstep
+from bartz._profiler import (
+    cond_if_not_profiling,
+    get_profile_mode,
+    jit_if_not_profiling,
+    scan_if_not_profiling,
+)
+from bartz.grove import TreeHeaps, evaluate_forest, forest_fill, var_histogram
+from bartz.jaxext import autobatch
+from bartz.mcmcstep import State
+from bartz.mcmcstep._state import chain_vmap_axes, field, get_axis_size, get_num_chains
+class BurninTrace(Module):
+    """MCMC trace with only diagnostic values."""
+    error_cov_inv: (
+        Float32[Array, '*chains_and_samples']
+        | Float32[Array, '*chains_and_samples k k']
+        | None
+    ) = field(chains=True)
+    theta: Float32[Array, '*chains_and_samples'] | None = field(chains=True)
+    grow_prop_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    grow_acc_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    prune_prop_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    prune_acc_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    log_likelihood: Float32[Array, '*chains_and_samples'] | None = field(chains=True)
+    log_trans_prior: Float32[Array, '*chains_and_samples'] | None = field(chains=True)
+    @classmethod
+    def from_state(cls, state: State) -> 'BurninTrace':
+        """Create a single-item burn-in trace from a MCMC state."""
+        return cls(
+            error_cov_inv=state.error_cov_inv,
+            theta=state.forest.theta,
+            grow_prop_count=state.forest.grow_prop_count,
+            grow_acc_count=state.forest.grow_acc_count,
+            prune_prop_count=state.forest.prune_prop_count,
+            prune_acc_count=state.forest.prune_acc_count,
+            log_likelihood=state.forest.log_likelihood,
+            log_trans_prior=state.forest.log_trans_prior,
+        )
-from . import grove, jaxext, mcmcstep
-from .mcmcstep import State
+class MainTrace(BurninTrace):
+    """MCMC trace with trees and diagnostic values."""
+    leaf_tree: (
+        Float32[Array, '*chains_and_samples 2**d']
+        | Float32[Array, '*chains_and_samples k 2**d']
+    ) = field(chains=True)
+    var_tree: UInt[Array, '*chains_and_samples 2**(d-1)'] = field(chains=True)
+    split_tree: UInt[Array, '*chains_and_samples 2**(d-1)'] = field(chains=True)
+    offset: Float32[Array, '*samples'] | Float32[Array, '*samples k']
+    varprob: Float32[Array, '*chains_and_samples p'] | None = field(chains=True)
+    @classmethod
+    def from_state(cls, state: State) -> 'MainTrace':
+        """Create a single-item main trace from a MCMC state."""
+        # compute varprob
+        log_s = state.forest.log_s
+        if log_s is None:
+            varprob = None
+        else:
+            varprob = softmax(log_s, where=state.forest.max_split.astype(bool))
+        return cls(
+            leaf_tree=state.forest.leaf_tree,
+            var_tree=state.forest.var_tree,
+            split_tree=state.forest.split_tree,
+            offset=state.offset,
+            varprob=varprob,
+            **vars(BurninTrace.from_state(state)),
+        )
-def default_onlymain_extractor(state: State) -> dict[str, Real[Array, 'samples *']]:
-    """Extract variables for the main trace, to be used in `run_mcmc`."""
-    return dict(
-        leaf_trees=state.forest.leaf_trees,
-        var_trees=state.forest.var_trees,
-        split_trees=state.forest.split_trees,
-        offset=state.offset,
-    )
+CallbackState = PyTree[Any, 'T']
+class Callback(Protocol):
+    """Callback type for `run_mcmc`."""
+    def __call__(
+        self,
+        *,
+        key: Key[Array, ''],
+        bart: State,
+        burnin: Bool[Array, ''],
+        i_total: Int32[Array, ''],
+        i_skip: Int32[Array, ''],
+        callback_state: CallbackState,
+        n_burn: Int32[Array, ''],
+        n_save: Int32[Array, ''],
+        n_skip: Int32[Array, ''],
+        i_outer: Int32[Array, ''],
+        inner_loop_length: int,
+    ) -> tuple[State, CallbackState] | None:
+        """Do an arbitrary action after an iteration of the MCMC.
+        Parameters
+        ----------
+        key
+            A key for random number generation.
+        bart
+            The MCMC state just after updating it.
+        burnin
+            Whether the last iteration was in the burn-in phase.
+        i_total
+            The index of the last MCMC iteration (0-based).
+        i_skip
+            The number of MCMC updates from the last saved state. The initial
+            state counts as saved, even if it's not copied into the trace.
+        callback_state
+            The callback state, initially set to the argument passed to
+            `run_mcmc`, afterwards to the value returned by the last invocation
+            of the callback.
+        n_burn
+        n_save
+        n_skip
+            The corresponding `run_mcmc` arguments as-is.
+        i_outer
+            The index of the last outer loop iteration (0-based).
+        inner_loop_length
+            The number of MCMC iterations in the inner loop.
+        Returns
+        -------
+        bart : State
+            A possibly modified MCMC state. To avoid modifying the state,
+            return the `bart` argument passed to the callback as-is.
+        callback_state : CallbackState
+            The new state to be passed on the next callback invocation.
-def default_both_extractor(state: State) -> dict[str, Real[Array, 'samples *'] | None]:
-    """Extract variables for main & burn-in traces, to be used in `run_mcmc`."""
-    return dict(
-        sigma2=state.sigma2,
-        grow_prop_count=state.forest.grow_prop_count,
-        grow_acc_count=state.forest.grow_acc_count,
-        prune_prop_count=state.forest.prune_prop_count,
-        prune_acc_count=state.forest.prune_acc_count,
-        log_likelihood=state.forest.log_likelihood,
-        log_trans_prior=state.forest.log_trans_prior,
-    )
+        Notes
+        -----
+        For convenience, the callback may return `None`, and the states won't
+        be updated.
+        """
+        ...
+class _Carry(Module):
+    """Carry used in the loop in `run_mcmc`."""
+    bart: State
+    i_total: Int32[Array, '']
+    key: Key[Array, '']
+    burnin_trace: PyTree[
+        Shaped[Array, 'n_burn ...'] | Shaped[Array, 'num_chains n_burn ...']
+    ]
+    main_trace: PyTree[
+        Shaped[Array, 'n_save ...'] | Shaped[Array, 'num_chains n_save ...']
+    ]
+    callback_state: CallbackState
 def run_mcmc(
-    key,
-    bart,
-    n_save,
+    key: Key[Array, ''],
+    bart: State,
+    n_save: int,
     *,
-    n_burn=0,
-    n_skip=1,
-    inner_loop_length=None,
-    allow_overflow=False,
-    inner_callback=None,
-    outer_callback=None,
-    callback_state=None,
-    onlymain_extractor=default_onlymain_extractor,
-    both_extractor=default_both_extractor,
-):
+    n_burn: int = 0,
+    n_skip: int = 1,
+    inner_loop_length: int | None = None,
+    callback: Callback | None = None,
+    callback_state: CallbackState = None,
+    burnin_extractor: Callable[[State], PyTree] = BurninTrace.from_state,
+    main_extractor: Callable[[State], PyTree] = MainTrace.from_state,
+) -> tuple[
+    State,
+    PyTree[Shaped[Array, 'n_burn ...'] | Shaped[Array, 'num_chains n_burn ...']],
+    PyTree[Shaped[Array, 'n_save ...'] | Shaped[Array, 'num_chains n_save ...']],
+]:
     """
     Run the MCMC for the BART posterior.
     Parameters
     ----------
-    key : jax.dtypes.prng_key array
+    key
         A key for random number generation.
-    bart : dict
+    bart
         The initial MCMC state, as created and updated by the functions in
         `bartz.mcmcstep`. The MCMC loop uses buffer donation to avoid copies,
         so this variable is invalidated after running `run_mcmc`. Make a copy
         beforehand to use it again.
-    n_save : int
+    n_save
         The number of iterations to save.
-    n_burn : int, default 0
+    n_burn
         The number of initial iterations which are not saved.
-    n_skip : int, default 1
+    n_skip
         The number of iterations to skip between each saved iteration, plus 1.
         The effective burn-in is ``n_burn + n_skip - 1``.
-    inner_loop_length : int, optional
+    inner_loop_length
         The MCMC loop is split into an outer and an inner loop. The outer loop
         is in Python, while the inner loop is in JAX. `inner_loop_length` is the
         number of iterations of the inner loop to run for each iteration of the
         outer loop. If not specified, the outer loop will iterate just once,
         with all iterations done in a single inner loop run. The inner stride is
         unrelated to the stride used for saving the trace.
-    allow_overflow : bool, default False
-        If `False`, `inner_loop_length` must be a divisor of the total number of
-        iterations ``n_burn + n_skip * n_save``. If `True` and
-        `inner_loop_length` is not a divisor, some of the MCMC iterations in the
-        last outer loop iteration will not be saved to the trace.
-    inner_callback : callable, optional
-    outer_callback : callable, optional
-        Arbitrary functions run during the loop after updating the state.
-        `inner_callback` is called after each update, while `outer_callback` is
-        called after completing an inner loop. The callbacks are invoked with
-        the following arguments, passed by keyword:
-        bart : dict
-            The MCMC state just after updating it.
-        burnin : bool
-            Whether the last iteration was in the burn-in phase.
-        overflow : bool
-            Whether the last iteration was in the overflow phase (iterations
-            not saved due to `inner_loop_length` not being a divisor of the
-            total number of iterations).
-        i_total : int
-            The index of the last MCMC iteration (0-based).
-        i_skip : int
-            The number of MCMC updates from the last saved state. The initial
-            state counts as saved, even if it's not copied into the trace.
-        callback_state : jax pytree
-            The callback state, initially set to the argument passed to
-            `run_mcmc`, afterwards to the value returned by the last invocation
-            of `inner_callback` or `outer_callback`.
-        n_burn, n_save, n_skip : int
-            The corresponding arguments as-is.
-        i_outer : int
-            The index of the last outer loop iteration (0-based).
-        inner_loop_length : int
-            The number of MCMC iterations in the inner loop.
-        `inner_callback` is called under the jax jit, so the argument values are
-        not available at the time the Python code is executed. Use the utilities
-        in `jax.debug` to access the values at actual runtime.
-        The callbacks must return two values:
-        bart : dict
-            A possibly modified MCMC state. To avoid modifying the state,
-            return the `bart` argument passed to the callback as-is.
-        callback_state : jax pytree
-            The new state to be passed on the next callback invocation.
-        For convenience, if a callback returns `None`, the states are not
-        updated.
-    callback_state : jax pytree, optional
-        The initial state for the callbacks.
-    onlymain_extractor : callable, optional
-    both_extractor : callable, optional
-        Functions that extract the variables to be saved respectively only in
-        the main trace and in both traces, given the MCMC state as argument.
-        Must return a pytree, and must be vmappable.
+    callback
+        An arbitrary function run during the loop after updating the state. For
+        the signature, see `Callback`. The callback is called under the jax jit,
+        so the argument values are not available at the time the Python code is
+        executed. Use the utilities in `jax.debug` to access the values at
+        actual runtime. The callback may return new values for the MCMC state
+        and the callback state.
+    callback_state
+        The initial custom state for the callback.
+    burnin_extractor
+    main_extractor
+        Functions that extract the variables to be saved respectively in the
+        burnin trace and main traces, given the MCMC state as argument. Must
+        return a pytree, and must be vmappable.
     Returns
     -------
-    bart : dict
+    bart : State
         The final MCMC state.
-    burnin_trace : dict of (n_burn, ...) arrays
-        The trace of the burn-in phase, containing the following subset of
-        fields from the `bart` dictionary, with an additional head index that
-        runs over MCMC iterations: 'sigma2', 'grow_prop_count',
-        'grow_acc_count', 'prune_prop_count', 'prune_acc_count' (or if specified
-        the fields in `tracevars_both`).
-    main_trace : dict of (n_save, ...) arrays
-        The trace of the main phase, containing the following subset of fields
-        from the `bart` dictionary, with an additional head index that runs over
-        MCMC iterations: 'leaf_trees', 'var_trees', 'split_trees' (or if
-        specified the fields in `tracevars_onlymain`), plus the fields in
-        `burnin_trace`.
+    burnin_trace : PyTree[Shaped[Array, 'n_burn *']]
+        The trace of the burn-in phase. For the default layout, see `BurninTrace`.
+    main_trace : PyTree[Shaped[Array, 'n_save *']]
+        The trace of the main phase. For the default layout, see `MainTrace`.
     Raises
     ------
-    ValueError
-        If `inner_loop_length` is not a divisor of the total number of
-        iterations and `allow_overflow` is `False`.
+    RuntimeError
+        If `run_mcmc` detects it's being invoked in a `jit`-wrapped context and
+        with settings that would create unrolled loops in the trace.
     Notes
     -----
     The number of MCMC updates is ``n_burn + n_skip * n_save``. The traces do
     not include the initial state, and include the final state.
     """
-    def empty_trace(length, bart, extractor):
-        return jax.vmap(extractor, in_axes=None, out_axes=0, axis_size=length)(bart)
-    trace_both = empty_trace(n_burn + n_save, bart, both_extractor)
-    trace_onlymain = empty_trace(n_save, bart, onlymain_extractor)
+    # create empty traces
+    burnin_trace = _empty_trace(n_burn, bart, burnin_extractor)
+    main_trace = _empty_trace(n_save, bart, main_extractor)
     # determine number of iterations for inner and outer loops
     n_iters = n_burn + n_skip * n_save
     if inner_loop_length is None:
         inner_loop_length = n_iters
-    n_outer = n_iters // inner_loop_length
-    if n_iters % inner_loop_length:
-        if allow_overflow:
-            n_outer += 1
-        else:
-            raise ValueError(f'{n_iters=} is not divisible by {inner_loop_length=}')
-    carry = (bart, 0, key, trace_both, trace_onlymain, callback_state)
+    if inner_loop_length:
+        n_outer = n_iters // inner_loop_length + bool(n_iters % inner_loop_length)
+    else:
+        n_outer = 1
+        # setting to 0 would make for a clean noop, but it's useful to keep the
+        # same code path for benchmarking and testing
+    # error if under jit and there are unrolled loops or profile mode is on
+    under_jit = not hasattr(jnp.empty(0), 'platform')
+    if under_jit and (n_outer > 1 or get_profile_mode()):
+        msg = (
+            '`run_mcmc` was called within a jit-compiled function and '
+            'there are either more than 1 outer loops or profile mode is active, '
+            'please either do not jit, set `inner_loop_length=None`, or disable '
+            'profile mode.'
+        )
+        raise RuntimeError(msg)
+    replicate = partial(_replicate, mesh=bart.config.mesh)
+    carry = _Carry(
+        bart,
+        replicate(jnp.int32(0)),
+        replicate(key),
+        burnin_trace,
+        main_trace,
+        callback_state,
+    )
+    _run_mcmc_inner_loop._fun.reset_call_counter()  # noqa: SLF001
     for i_outer in range(n_outer):
         carry = _run_mcmc_inner_loop(
             carry,
             inner_loop_length,
-            inner_callback,
-            onlymain_extractor,
-            both_extractor,
+            callback,
+            burnin_extractor,
+            main_extractor,
             n_burn,
             n_save,
             n_skip,
             i_outer,
+            n_iters,
         )
-        if outer_callback is not None:
-            bart, i_total, key, trace_both, trace_onlymain, callback_state = carry
-            i_total -= 1  # because i_total is updated at the end of the inner loop
-            i_skip = _compute_i_skip(i_total, n_burn, n_skip)
-            rt = outer_callback(
-                bart=bart,
-                burnin=i_total < n_burn,
-                overflow=i_total >= n_iters,
-                i_total=i_total,
-                i_skip=i_skip,
-                callback_state=callback_state,
-                n_burn=n_burn,
-                n_save=n_save,
-                n_skip=n_skip,
-                i_outer=i_outer,
-                inner_loop_length=inner_loop_length,
-            )
-            if rt is not None:
-                bart, callback_state = rt
-                i_total += 1
-                carry = (bart, i_total, key, trace_both, trace_onlymain, callback_state)
-    bart, _, _, trace_both, trace_onlymain, _ = carry
+    return carry.bart, carry.burnin_trace, carry.main_trace
-    burnin_trace = tree.map(lambda x: x[:n_burn, ...], trace_both)
-    main_trace = tree.map(lambda x: x[n_burn:, ...], trace_both)
-    main_trace.update(trace_onlymain)
-    return bart, burnin_trace, main_trace
+def _replicate(x: Array, mesh: Mesh | None) -> Array:
+    if mesh is None:
+        return x
+    else:
+        return device_put(x, NamedSharding(mesh, PartitionSpec()))
-def _compute_i_skip(i_total, n_burn, n_skip):
+@partial(jit, static_argnums=(0, 2))
+def _empty_trace(
+    length: int, bart: State, extractor: Callable[[State], PyTree]
+) -> PyTree:
+    num_chains = get_num_chains(bart)
+    if num_chains is None:
+        out_axes = 0
+    else:
+        example_output = eval_shape(extractor, bart)
+        chain_axes = chain_vmap_axes(example_output)
+        out_axes = tree.map(
+            lambda a: 0 if a is None else 1, chain_axes, is_leaf=lambda a: a is None
+        )
+    return jax.vmap(extractor, in_axes=None, out_axes=out_axes, axis_size=length)(bart)
+@jit
+def _compute_i_skip(
+    i_total: Int32[Array, ''], n_burn: Int32[Array, ''], n_skip: Int32[Array, '']
+) -> Int32[Array, '']:
+    """Compute the `i_skip` argument passed to `callback`."""
     burnin = i_total < n_burn
     return jnp.where(
         burnin,
         i_total + 1,
-        (i_total + 1) % n_skip + jnp.where(i_total + 1 < n_skip, n_burn, 0),
+        (i_total - n_burn + 1) % n_skip
+        + jnp.where(i_total - n_burn + 1 < n_skip, n_burn, 0),
     )
-@functools.partial(jax.jit, donate_argnums=(0,), static_argnums=(1, 2, 3, 4))
-def _run_mcmc_inner_loop(
-    carry,
-    inner_loop_length,
-    inner_callback,
-    onlymain_extractor,
-    both_extractor,
-    n_burn,
-    n_save,
-    n_skip,
-    i_outer,
-):
-    def loop(carry, _):
-        bart, i_total, key, trace_both, trace_onlymain, callback_state = carry
+class _CallCounter:
+    """Wrap a callable to check it's not called more than once."""
+    def __init__(self, func: Callable) -> None:
+        self.func = func
+        self.n_calls = 0
-        keys = jaxext.split(key)
+    def reset_call_counter(self) -> None:
+        """Reset the call counter."""
+        self.n_calls = 0
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        if self.n_calls and not get_profile_mode():
+            msg = (
+                'The inner loop of `run_mcmc` was traced more than once, '
+                'which indicates a double compilation of the MCMC code. This '
+                'probably depends on the input state having different type from the '
+                'output state. Check the input is in a format that is the '
+                'same jax would output, e.g., all arrays and scalars are jax '
+                'arrays, with the right shardings.'
+            )
+            raise RuntimeError(msg)
+        self.n_calls += 1
+        return self.func(*args, **kwargs)
+@partial(jit_if_not_profiling, donate_argnums=(0,), static_argnums=(1, 2, 3, 4))
+@_CallCounter
+def _run_mcmc_inner_loop(
+    carry: _Carry,
+    inner_loop_length: int,
+    callback: Callback | None,
+    burnin_extractor: Callable[[State], PyTree],
+    main_extractor: Callable[[State], PyTree],
+    n_burn: Int32[Array, ''],
+    n_save: Int32[Array, ''],
+    n_skip: Int32[Array, ''],
+    i_outer: Int32[Array, ''],
+    n_iters: Int32[Array, ''],
+) -> _Carry:
+    def loop_impl(carry: _Carry) -> _Carry:
+        """Loop body to run if i_total < n_iters."""
+        # split random key
+        keys = jaxext.split(carry.key, 3)
         key = keys.pop()
-        bart = mcmcstep.step(keys.pop(), bart)
-        burnin = i_total < n_burn
-        if inner_callback is not None:
-            i_skip = _compute_i_skip(i_total, n_burn, n_skip)
-            rt = inner_callback(
+        # update state
+        bart = mcmcstep.step(keys.pop(), carry.bart)
+        # invoke callback
+        callback_state = carry.callback_state
+        if callback is not None:
+            i_skip = _compute_i_skip(carry.i_total, n_burn, n_skip)
+            rt = callback(
+                key=keys.pop(),
                 bart=bart,
-                burnin=burnin,
-                overflow=i_total >= n_burn + n_save * n_skip,
-                i_total=i_total,
+                burnin=carry.i_total < n_burn,
+                i_total=carry.i_total,
                 i_skip=i_skip,
                 callback_state=callback_state,
                 n_burn=n_burn,
@@ -295,137 +443,240 @@ def _run_mcmc_inner_loop(
             if rt is not None:
                 bart, callback_state = rt
-        i_onlymain = jnp.where(burnin, 0, (i_total - n_burn) // n_skip)
-        i_both = jnp.where(burnin, i_total, n_burn + i_onlymain)
+        # save to trace
+        burnin_trace, main_trace = _save_state_to_trace(
+            carry.burnin_trace,
+            carry.main_trace,
+            burnin_extractor,
+            main_extractor,
+            bart,
+            carry.i_total,
+            n_burn,
+            n_skip,
+        )
-        def update_trace(index, trace, state):
-            def assign_at_index(trace_array, state_array):
-                if trace_array.size:
-                    return trace_array.at[index, ...].set(state_array)
-                else:
-                    # this handles the case where a trace is empty (e.g.,
-                    # no burn-in) because jax refuses to index into an array
-                    # of length 0
-                    return trace_array
+        return _Carry(
+            bart=bart,
+            i_total=carry.i_total + 1,
+            key=key,
+            burnin_trace=burnin_trace,
+            main_trace=main_trace,
+            callback_state=callback_state,
+        )
-            return tree.map(assign_at_index, trace, state)
+    def loop_noop(carry: _Carry) -> _Carry:
+        """Loop body to run if i_total >= n_iters; it does nothing."""
+        return carry
-        trace_onlymain = update_trace(
-            i_onlymain, trace_onlymain, onlymain_extractor(bart)
+    def loop(carry: _Carry, _) -> tuple[_Carry, None]:
+        carry = cond_if_not_profiling(
+            carry.i_total < n_iters, loop_impl, loop_noop, carry
         )
-        trace_both = update_trace(i_both, trace_both, both_extractor(bart))
-        i_total += 1
-        carry = (bart, i_total, key, trace_both, trace_onlymain, callback_state)
         return carry, None
-    carry, _ = lax.scan(loop, carry, None, inner_loop_length)
+    carry, _ = scan_if_not_profiling(loop, carry, None, inner_loop_length)
     return carry
-def make_print_callbacks(dot_every_inner=1, report_every_outer=1):
+@partial(jit, donate_argnums=(0, 1), static_argnums=(2, 3))
+# this is jitted because under profiling _run_mcmc_inner_loop and the loop
+# within it are not, so I need the donate_argnums feature of jit to avoid
+# creating copies of the traces
+def _save_state_to_trace(
+    burnin_trace: PyTree,
+    main_trace: PyTree,
+    burnin_extractor: Callable[[State], PyTree],
+    main_extractor: Callable[[State], PyTree],
+    bart: State,
+    i_total: Int32[Array, ''],
+    n_burn: Int32[Array, ''],
+    n_skip: Int32[Array, ''],
+) -> tuple[PyTree, PyTree]:
+    # trace index where to save during burnin; out-of-bounds => noop after
+    # burnin
+    burnin_idx = i_total
+    # trace index where to save during main phase; force it out-of-bounds
+    # during burnin
+    main_idx = (i_total - n_burn) // n_skip
+    noop_idx = jnp.iinfo(jnp.int32).max
+    noop_cond = i_total < n_burn
+    main_idx = jnp.where(noop_cond, noop_idx, main_idx)
+    # prepare array index
+    num_chains = get_num_chains(bart)
+    burnin_trace = _set(burnin_trace, burnin_idx, burnin_extractor(bart), num_chains)
+    main_trace = _set(main_trace, main_idx, main_extractor(bart), num_chains)
+    return burnin_trace, main_trace
+def _set(
+    trace: PyTree[Array, ' T'],
+    index: Int32[Array, ''],
+    val: PyTree[Array, ' T'],
+    num_chains: int | None,
+) -> PyTree[Array, ' T']:
+    """Do ``trace[index] = val`` but fancier."""
+    chain_axis = chain_vmap_axes(val)
+    def at_set(
+        trace: Shaped[Array, 'chains samples *shape']
+        | Shaped[Array, ' samples *shape']
+        | None,
+        val: Shaped[Array, ' chains *shape'] | Shaped[Array, '*shape'] | None,
+        chain_axis: int | None,
+    ):
+        if trace is None or trace.size == 0:
+            # this handles the case where an array is empty because jax refuses
+            # to index into an axis of length 0, even if just in the abstract,
+            # and optional elements that are considered leaves due to `is_leaf`
+            # below needed to traverse `chain_axis`.
+            return trace
+        if num_chains is None or chain_axis is None:
+            ndindex = (index, ...)
+        else:
+            ndindex = (slice(None), index, ...)
+        return trace.at[ndindex].set(val, mode='drop')
+    return tree.map(at_set, trace, val, chain_axis, is_leaf=lambda x: x is None)
+def make_default_callback(
+    state: State,
+    *,
+    dot_every: int | Integer[Array, ''] | None = 1,
+    report_every: int | Integer[Array, ''] | None = 100,
+) -> dict[str, Any]:
     """
-    Prepare logging callbacks for `run_mcmc`.
+    Prepare a default callback for `run_mcmc`.
-    Prepare callbacks which print a dot on every iteration, and a longer
-    report outer loop iteration.
+    The callback prints a dot on every iteration, and a longer
+    report outer loop iteration, and can do variable selection.
     Parameters
     ----------
-    dot_every_inner : int, default 1
-        A dot is printed every `dot_every_inner` MCMC iterations.
-    report_every_outer : int, default 1
-        A report is printed every `report_every_outer` outer loop
-        iterations.
+    state
+        The bart state to use the callback with, used to determine device
+        sharding.
+    dot_every
+        A dot is printed every `dot_every` MCMC iterations, `None` to disable.
+    report_every
+        A one line report is printed every `report_every` MCMC iterations,
+        `None` to disable.
     Returns
     -------
-    kwargs : dict
-        A dictionary with the arguments to pass to `run_mcmc` as keyword
-        arguments to set up the callbacks.
+    A dictionary with the arguments to pass to `run_mcmc` as keyword arguments to set up the callback.
     Examples
     --------
-    >>> run_mcmc(..., **make_print_callbacks())
+    >>> run_mcmc(key, state, ..., **make_default_callback(state, ...))
     """
+    def as_replicated_array_or_none(val: None | Any) -> None | Array:
+        return None if val is None else _replicate(jnp.asarray(val), state.config.mesh)
     return dict(
-        inner_callback=_print_callback_inner,
-        outer_callback=_print_callback_outer,
-        callback_state=dict(
-            dot_every_inner=dot_every_inner, report_every_outer=report_every_outer
+        callback=print_callback,
+        callback_state=PrintCallbackState(
+            as_replicated_array_or_none(dot_every),
+            as_replicated_array_or_none(report_every),
         ),
     )
-def _print_callback_inner(*, i_total, callback_state, **_):
-    dot_every_inner = callback_state['dot_every_inner']
-    if dot_every_inner is not None:
-        cond = (i_total + 1) % dot_every_inner == 0
-        debug.callback(_print_dot, cond)
+class PrintCallbackState(Module):
+    """State for `print_callback`."""
+    dot_every: Int32[Array, ''] | None
+    """A dot is printed every `dot_every` MCMC iterations, `None` to disable."""
-def _print_dot(cond):
-    if cond:
-        print('.', end='', flush=True)
+    report_every: Int32[Array, ''] | None
+    """A one line report is printed every `report_every` MCMC iterations,
+    `None` to disable."""
-def _print_callback_outer(
+def print_callback(
     *,
-    bart,
-    burnin,
-    overflow,
-    i_total,
-    n_burn,
-    n_save,
-    n_skip,
-    callback_state,
-    i_outer,
-    inner_loop_length,
+    bart: State,
+    burnin: Bool[Array, ''],
+    i_total: Int32[Array, ''],
+    n_burn: Int32[Array, ''],
+    n_save: Int32[Array, ''],
+    n_skip: Int32[Array, ''],
+    callback_state: PrintCallbackState,
     **_,
 ):
-    report_every_outer = callback_state['report_every_outer']
-    if report_every_outer is not None:
-        dot_every_inner = callback_state['dot_every_inner']
-        if dot_every_inner is None:
-            newline = False
+    """Print a dot and/or a report periodically during the MCMC."""
+    report_every = callback_state.report_every
+    dot_every = callback_state.dot_every
+    it = i_total + 1
+    def get_cond(every: Int32[Array, ''] | None) -> bool | Bool[Array, '']:
+        return False if every is None else it % every == 0
+    report_cond = get_cond(report_every)
+    dot_cond = get_cond(dot_every)
+    def line_report_branch():
+        if report_every is None:
+            return
+        if dot_every is None:
+            print_newline = False
         else:
-            newline = dot_every_inner < inner_loop_length
+            print_newline = it % report_every > it % dot_every
         debug.callback(
             _print_report,
-            cond=(i_outer + 1) % report_every_outer == 0,
-            newline=newline,
+            print_dot=dot_cond,
+            print_newline=print_newline,
             burnin=burnin,
-            overflow=overflow,
-            i_total=i_total,
+            it=it,
             n_iters=n_burn + n_save * n_skip,
-            grow_prop_count=bart.forest.grow_prop_count,
-            grow_acc_count=bart.forest.grow_acc_count,
-            prune_prop_count=bart.forest.prune_prop_count,
-            prune_acc_count=bart.forest.prune_acc_count,
-            prop_total=len(bart.forest.leaf_trees),
-            fill=grove.forest_fill(bart.forest.split_trees),
+            num_chains=bart.forest.num_chains(),
+            grow_prop_count=bart.forest.grow_prop_count.mean(),
+            grow_acc_count=bart.forest.grow_acc_count.mean(),
+            prune_acc_count=bart.forest.prune_acc_count.mean(),
+            prop_total=bart.forest.split_tree.shape[-2],
+            fill=forest_fill(bart.forest.split_tree),
         )
+    def just_dot_branch():
+        if dot_every is None:
+            return
+        debug.callback(
+            lambda: print('.', end='', flush=True)  # noqa: T201
+        )
+        # logging can't do in-line printing so we use print
+    cond_if_not_profiling(
+        report_cond,
+        line_report_branch,
+        lambda: cond_if_not_profiling(dot_cond, just_dot_branch, lambda: None),
+    )
-def _convert_jax_arrays_in_args(func):
+def _convert_jax_arrays_in_args(func: Callable) -> Callable:
     """Remove jax arrays from a function arguments.
-    Converts all jax.Array instances in the arguments to either Python scalars
+    Converts all `jax.Array` instances in the arguments to either Python scalars
     or numpy arrays.
     """
-    def convert_jax_arrays(pytree):
-        def convert_jax_arrays(val):
-            if not isinstance(val, jax.Array):
+    def convert_jax_arrays(pytree: PyTree) -> PyTree:
+        def convert_jax_array(val: Any) -> Any:
+            if not isinstance(val, Array):
                 return val
             elif val.shape:
                 return numpy.array(val)
             else:
                 return val.item()
-        return tree.map(convert_jax_arrays, pytree)
+        return tree.map(convert_jax_array, pytree)
-    @functools.wraps(func)
+    @wraps(func)
     def new_func(*args, **kw):
         args = convert_jax_arrays(args)
         kw = convert_jax_arrays(kw)
@@ -439,73 +690,170 @@ def _convert_jax_arrays_in_args(func):
 # deadlock with the main thread
 def _print_report(
     *,
-    cond,
-    newline,
-    burnin,
-    overflow,
-    i_total,
-    n_iters,
-    grow_prop_count,
-    grow_acc_count,
-    prune_prop_count,
-    prune_acc_count,
-    prop_total,
-    fill,
+    print_dot: bool,
+    print_newline: bool,
+    burnin: bool,
+    it: int,
+    n_iters: int,
+    num_chains: int | None,
+    grow_prop_count: float,
+    grow_acc_count: float,
+    prune_acc_count: float,
+    prop_total: int,
+    fill: float,
 ):
-    if cond:
-        newline = '\n' if newline else ''
+    """Print the report for `print_callback`."""
+    # compute fractions
+    grow_prop = grow_prop_count / prop_total
+    move_acc = (grow_acc_count + prune_acc_count) / prop_total
+    # determine prefix
+    if print_dot:
+        prefix = '.\n'
+    elif print_newline:
+        prefix = '\n'
+    else:
+        prefix = ''
+    # determine suffix in parentheses
+    msgs = []
+    if num_chains is not None:
+        msgs.append(f'avg. {num_chains} chains')
+    if burnin:
+        msgs.append('burnin')
+    suffix = f' ({", ".join(msgs)})' if msgs else ''
+    print(  # noqa: T201, see print_callback for why not logging
+        f'{prefix}Iteration {it}/{n_iters}, '
+        f'grow prob: {grow_prop:.0%}, '
+        f'move acc: {move_acc:.0%}, '
+        f'fill: {fill:.0%}{suffix}'
+    )
-        def acc_string(acc_count, prop_count):
-            if prop_count:
-                return f'{acc_count / prop_count:.0%}'
-            else:
-                return ' n/d'
-        grow_prop = grow_prop_count / prop_total
-        prune_prop = prune_prop_count / prop_total
-        grow_acc = acc_string(grow_acc_count, grow_prop_count)
-        prune_acc = acc_string(prune_acc_count, prune_prop_count)
+class Trace(TreeHeaps, Protocol):
+    """Protocol for a MCMC trace."""
-        if burnin:
-            flag = ' (burnin)'
-        elif overflow:
-            flag = ' (overflow)'
-        else:
-            flag = ''
+    offset: Float32[Array, '*trace_shape']
-        print(
-            f'{newline}It {i_total + 1}/{n_iters} '
-            f'grow P={grow_prop:.0%} A={grow_acc}, '
-            f'prune P={prune_prop:.0%} A={prune_acc}, '
-            f'fill={fill:.0%}{flag}'
-        )
+class TreesTrace(Module):
+    """Implementation of `bartz.grove.TreeHeaps` for an MCMC trace."""
+    leaf_tree: (
+        Float32[Array, '*trace_shape num_trees 2**d']
+        | Float32[Array, '*trace_shape num_trees k 2**d']
+    )
+    var_tree: UInt[Array, '*trace_shape num_trees 2**(d-1)']
+    split_tree: UInt[Array, '*trace_shape num_trees 2**(d-1)']
-@jax.jit
-def evaluate_trace(trace, X):
+    @classmethod
+    def from_dataclass(cls, obj: TreeHeaps):
+        """Create a `TreesTrace` from any `bartz.grove.TreeHeaps`."""
+        return cls(**{f.name: getattr(obj, f.name) for f in fields(cls)})
+@jit
+def evaluate_trace(
+    X: UInt[Array, 'p n'], trace: Trace
+) -> Float32[Array, '*trace_shape n'] | Float32[Array, '*trace_shape k n']:
     """
     Compute predictions for all iterations of the BART MCMC.
     Parameters
     ----------
-    trace : dict
-        A trace of the BART MCMC, as returned by `run_mcmc`.
-    X : array (p, n)
+    X
         The predictors matrix, with `p` predictors and `n` observations.
+    trace
+        A main trace of the BART MCMC, as returned by `run_mcmc`.
     Returns
     -------
-    y : array (n_trace, n)
-        The predictions for each iteration of the MCMC.
+    The predictions for each chain and iteration of the MCMC.
     """
-    evaluate_trees = functools.partial(grove.evaluate_forest, sum_trees=False)
-    evaluate_trees = jaxext.autobatch(evaluate_trees, 2**29, (None, 0, 0, 0))
+    # per-device memory limit
+    max_io_nbytes = 2**27  # 128 MiB
+    # adjust memory limit for number of devices
+    mesh = jax.typeof(trace.leaf_tree).sharding.mesh
+    num_devices = get_axis_size(mesh, 'chains') * get_axis_size(mesh, 'data')
+    max_io_nbytes *= num_devices
+    # determine batching axes
+    has_chains = trace.split_tree.ndim > 3  # chains, samples, trees, nodes
+    if has_chains:
+        sample_axis = 1
+        tree_axis = 2
+    else:
+        sample_axis = 0
+        tree_axis = 1
+    # batch and sum over trees
+    batched_eval = autobatch(
+        evaluate_forest,
+        max_io_nbytes,
+        (None, tree_axis),
+        tree_axis,
+        reduce_ufunc=jnp.add,
+    )
-    def loop(_, row):
-        values = evaluate_trees(
-            X, row['leaf_trees'], row['var_trees'], row['split_trees']
+    # determine output shape (to avoid autobatch tracing everything 4 times)
+    is_mv = trace.leaf_tree.ndim > trace.split_tree.ndim
+    k = trace.leaf_tree.shape[-2] if is_mv else 1
+    mv_shape = (k,) if is_mv else ()
+    _, n = X.shape
+    out_shape = (*trace.split_tree.shape[:-2], *mv_shape, n)
+    # adjust memory limit keeping into account that trees are summed over
+    num_trees, hts = trace.split_tree.shape[-2:]
+    out_size = k * n * jnp.float32.dtype.itemsize  # the value of the forest
+    core_io_size = (
+        num_trees
+        * hts
+        * (
+            2 * k * trace.leaf_tree.itemsize
+            + trace.var_tree.itemsize
+            + trace.split_tree.itemsize
         )
-        return None, row['offset'] + jnp.sum(values, axis=0, dtype=jnp.float32)
+        + out_size
+    )
+    core_int_size = (num_trees - 1) * out_size
+    max_io_nbytes = max(1, floor(max_io_nbytes / (1 + core_int_size / core_io_size)))
+    # batch over mcmc samples
+    batched_eval = autobatch(
+        batched_eval,
+        max_io_nbytes,
+        (None, sample_axis),
+        sample_axis,
+        warn_on_overflow=False,  # the inner autobatch will handle it
+        result_shape_dtype=ShapeDtypeStruct(out_shape, jnp.float32),
+    )
+    # extract only the trees from the trace
+    trees = TreesTrace.from_dataclass(trace)
+    # evaluate trees
+    y_centered: Float32[Array, '*trace_shape n'] | Float32[Array, '*trace_shape k n']
+    y_centered = batched_eval(X, trees)
+    return y_centered + trace.offset[..., None]
+@partial(jit, static_argnums=(0,))
+def compute_varcount(p: int, trace: TreeHeaps) -> Int32[Array, '*trace_shape {p}']:
+    """
+    Count how many times each predictor is used in each MCMC state.
-    _, y = lax.scan(loop, None, trace)
-    return y
+    Parameters
+    ----------
+    p
+        The number of predictors.
+    trace
+        A main trace of the BART MCMC, as returned by `run_mcmc`.
+    Returns
+    -------
+    Histogram of predictor usage in each MCMC state.
+    """
+    # var_tree has shape (chains? samples trees nodes)
+    return var_histogram(p, trace.var_tree, trace.split_tree, sum_batch_axis=-1)

bartz 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

bartz 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl