PyPI - bartz - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

bartz 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

bartz/BART.py +464 -254
bartz/__init__.py +2 -2
bartz/_version.py +1 -1
bartz/debug.py +1259 -79
bartz/grove.py +139 -93
bartz/jaxext/__init__.py +213 -0
bartz/jaxext/_autobatch.py +238 -0
bartz/jaxext/scipy/__init__.py +25 -0
bartz/jaxext/scipy/special.py +240 -0
bartz/jaxext/scipy/stats.py +36 -0
bartz/mcmcloop.py +468 -311
bartz/mcmcstep.py +734 -453
bartz/prepcovars.py +139 -43
{bartz-0.6.0.dist-info → bartz-0.7.0.dist-info}/METADATA +2 -3
bartz-0.7.0.dist-info/RECORD +17 -0
{bartz-0.6.0.dist-info → bartz-0.7.0.dist-info}/WHEEL +1 -1
bartz/jaxext.py +0 -423
bartz-0.6.0.dist-info/RECORD +0 -13

bartz/mcmcloop.py CHANGED Viewed

@@ -22,164 +22,222 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-"""Functions that implement the full BART posterior MCMC loop."""
+"""Functions that implement the full BART posterior MCMC loop.
-import functools
+The entry points are `run_mcmc` and `make_default_callback`.
+"""
+from collections.abc import Callable
+from dataclasses import fields, replace
+from functools import partial, wraps
+from typing import Any, Protocol
 import jax
 import numpy
+from equinox import Module
 from jax import debug, lax, tree
 from jax import numpy as jnp
-from jaxtyping import Array, Real
+from jax.nn import softmax
+from jaxtyping import Array, Bool, Float32, Int32, Integer, Key, PyTree, Shaped, UInt
+from bartz import grove, jaxext, mcmcstep
+from bartz.mcmcstep import State
+class BurninTrace(Module):
+    """MCMC trace with only diagnostic values."""
+    sigma2: Float32[Array, '*trace_length'] | None
+    theta: Float32[Array, '*trace_length'] | None
+    grow_prop_count: Int32[Array, '*trace_length']
+    grow_acc_count: Int32[Array, '*trace_length']
+    prune_prop_count: Int32[Array, '*trace_length']
+    prune_acc_count: Int32[Array, '*trace_length']
+    log_likelihood: Float32[Array, '*trace_length'] | None
+    log_trans_prior: Float32[Array, '*trace_length'] | None
+    @classmethod
+    def from_state(cls, state: State) -> 'BurninTrace':
+        """Create a single-item burn-in trace from a MCMC state."""
+        return cls(
+            sigma2=state.sigma2,
+            theta=state.forest.theta,
+            grow_prop_count=state.forest.grow_prop_count,
+            grow_acc_count=state.forest.grow_acc_count,
+            prune_prop_count=state.forest.prune_prop_count,
+            prune_acc_count=state.forest.prune_acc_count,
+            log_likelihood=state.forest.log_likelihood,
+            log_trans_prior=state.forest.log_trans_prior,
+        )
-from . import grove, jaxext, mcmcstep
-from .mcmcstep import State
+class MainTrace(BurninTrace):
+    """MCMC trace with trees and diagnostic values."""
-def default_onlymain_extractor(state: State) -> dict[str, Real[Array, 'samples *']]:
-    """Extract variables for the main trace, to be used in `run_mcmc`."""
-    return dict(
-        leaf_trees=state.forest.leaf_trees,
-        var_trees=state.forest.var_trees,
-        split_trees=state.forest.split_trees,
-        offset=state.offset,
-    )
+    leaf_tree: Float32[Array, '*trace_length 2**d']
+    var_tree: UInt[Array, '*trace_length 2**(d-1)']
+    split_tree: UInt[Array, '*trace_length 2**(d-1)']
+    offset: Float32[Array, '*trace_length']
+    varprob: Float32[Array, '*trace_length p'] | None
+    @classmethod
+    def from_state(cls, state: State) -> 'MainTrace':
+        """Create a single-item main trace from a MCMC state."""
+        # compute varprob
+        log_s = state.forest.log_s
+        if log_s is None:
+            varprob = None
+        else:
+            varprob = softmax(log_s, where=state.forest.max_split.astype(bool))
+        return cls(
+            leaf_tree=state.forest.leaf_tree,
+            var_tree=state.forest.var_tree,
+            split_tree=state.forest.split_tree,
+            offset=state.offset,
+            varprob=varprob,
+            **vars(BurninTrace.from_state(state)),
+        )
-def default_both_extractor(state: State) -> dict[str, Real[Array, 'samples *'] | None]:
-    """Extract variables for main & burn-in traces, to be used in `run_mcmc`."""
-    return dict(
-        sigma2=state.sigma2,
-        grow_prop_count=state.forest.grow_prop_count,
-        grow_acc_count=state.forest.grow_acc_count,
-        prune_prop_count=state.forest.prune_prop_count,
-        prune_acc_count=state.forest.prune_acc_count,
-        log_likelihood=state.forest.log_likelihood,
-        log_trans_prior=state.forest.log_trans_prior,
-    )
+CallbackState = PyTree[Any, 'T']
+class Callback(Protocol):
+    """Callback type for `run_mcmc`."""
+    def __call__(
+        self,
+        *,
+        key: Key[Array, ''],
+        bart: State,
+        burnin: Bool[Array, ''],
+        i_total: Int32[Array, ''],
+        i_skip: Int32[Array, ''],
+        callback_state: CallbackState,
+        n_burn: Int32[Array, ''],
+        n_save: Int32[Array, ''],
+        n_skip: Int32[Array, ''],
+        i_outer: Int32[Array, ''],
+        inner_loop_length: int,
+    ) -> tuple[State, CallbackState] | None:
+        """Do an arbitrary action after an iteration of the MCMC.
+        Parameters
+        ----------
+        key
+            A key for random number generation.
+        bart
+            The MCMC state just after updating it.
+        burnin
+            Whether the last iteration was in the burn-in phase.
+        i_total
+            The index of the last MCMC iteration (0-based).
+        i_skip
+            The number of MCMC updates from the last saved state. The initial
+            state counts as saved, even if it's not copied into the trace.
+        callback_state
+            The callback state, initially set to the argument passed to
+            `run_mcmc`, afterwards to the value returned by the last invocation
+            of the callback.
+        n_burn
+        n_save
+        n_skip
+            The corresponding `run_mcmc` arguments as-is.
+        i_outer
+            The index of the last outer loop iteration (0-based).
+        inner_loop_length
+            The number of MCMC iterations in the inner loop.
+        Returns
+        -------
+        bart : State
+            A possibly modified MCMC state. To avoid modifying the state,
+            return the `bart` argument passed to the callback as-is.
+        callback_state : CallbackState
+            The new state to be passed on the next callback invocation.
+        Notes
+        -----
+        For convenience, the callback may return `None`, and the states won't
+        be updated.
+        """
+        ...
+class _Carry(Module):
+    """Carry used in the loop in `run_mcmc`."""
+    bart: State
+    i_total: Int32[Array, '']
+    key: Key[Array, '']
+    burnin_trace: PyTree[Shaped[Array, 'n_burn *']]
+    main_trace: PyTree[Shaped[Array, 'n_save *']]
+    callback_state: CallbackState
 def run_mcmc(
-    key,
-    bart,
-    n_save,
+    key: Key[Array, ''],
+    bart: State,
+    n_save: int,
     *,
-    n_burn=0,
-    n_skip=1,
-    inner_loop_length=None,
-    allow_overflow=False,
-    inner_callback=None,
-    outer_callback=None,
-    callback_state=None,
-    onlymain_extractor=default_onlymain_extractor,
-    both_extractor=default_both_extractor,
-):
+    n_burn: int = 0,
+    n_skip: int = 1,
+    inner_loop_length: int | None = None,
+    callback: Callback | None = None,
+    callback_state: CallbackState = None,
+    burnin_extractor: Callable[[State], PyTree] = BurninTrace.from_state,
+    main_extractor: Callable[[State], PyTree] = MainTrace.from_state,
+) -> tuple[State, PyTree[Shaped[Array, 'n_burn *']], PyTree[Shaped[Array, 'n_save *']]]:
     """
     Run the MCMC for the BART posterior.
     Parameters
     ----------
-    key : jax.dtypes.prng_key array
+    key
         A key for random number generation.
-    bart : dict
+    bart
         The initial MCMC state, as created and updated by the functions in
         `bartz.mcmcstep`. The MCMC loop uses buffer donation to avoid copies,
         so this variable is invalidated after running `run_mcmc`. Make a copy
         beforehand to use it again.
-    n_save : int
+    n_save
         The number of iterations to save.
-    n_burn : int, default 0
+    n_burn
         The number of initial iterations which are not saved.
-    n_skip : int, default 1
+    n_skip
         The number of iterations to skip between each saved iteration, plus 1.
         The effective burn-in is ``n_burn + n_skip - 1``.
-    inner_loop_length : int, optional
+    inner_loop_length
         The MCMC loop is split into an outer and an inner loop. The outer loop
         is in Python, while the inner loop is in JAX. `inner_loop_length` is the
         number of iterations of the inner loop to run for each iteration of the
         outer loop. If not specified, the outer loop will iterate just once,
         with all iterations done in a single inner loop run. The inner stride is
         unrelated to the stride used for saving the trace.
-    allow_overflow : bool, default False
-        If `False`, `inner_loop_length` must be a divisor of the total number of
-        iterations ``n_burn + n_skip * n_save``. If `True` and
-        `inner_loop_length` is not a divisor, some of the MCMC iterations in the
-        last outer loop iteration will not be saved to the trace.
-    inner_callback : callable, optional
-    outer_callback : callable, optional
-        Arbitrary functions run during the loop after updating the state.
-        `inner_callback` is called after each update, while `outer_callback` is
-        called after completing an inner loop. The callbacks are invoked with
-        the following arguments, passed by keyword:
-        bart : dict
-            The MCMC state just after updating it.
-        burnin : bool
-            Whether the last iteration was in the burn-in phase.
-        overflow : bool
-            Whether the last iteration was in the overflow phase (iterations
-            not saved due to `inner_loop_length` not being a divisor of the
-            total number of iterations).
-        i_total : int
-            The index of the last MCMC iteration (0-based).
-        i_skip : int
-            The number of MCMC updates from the last saved state. The initial
-            state counts as saved, even if it's not copied into the trace.
-        callback_state : jax pytree
-            The callback state, initially set to the argument passed to
-            `run_mcmc`, afterwards to the value returned by the last invocation
-            of `inner_callback` or `outer_callback`.
-        n_burn, n_save, n_skip : int
-            The corresponding arguments as-is.
-        i_outer : int
-            The index of the last outer loop iteration (0-based).
-        inner_loop_length : int
-            The number of MCMC iterations in the inner loop.
-        `inner_callback` is called under the jax jit, so the argument values are
-        not available at the time the Python code is executed. Use the utilities
-        in `jax.debug` to access the values at actual runtime.
-        The callbacks must return two values:
-        bart : dict
-            A possibly modified MCMC state. To avoid modifying the state,
-            return the `bart` argument passed to the callback as-is.
-        callback_state : jax pytree
-            The new state to be passed on the next callback invocation.
-        For convenience, if a callback returns `None`, the states are not
-        updated.
-    callback_state : jax pytree, optional
-        The initial state for the callbacks.
-    onlymain_extractor : callable, optional
-    both_extractor : callable, optional
+    callback
+        An arbitrary function run during the loop after updating the state. For
+        the signature, see `Callback`. The callback is called under the jax jit,
+        so the argument values are not available at the time the Python code is
+        executed. Use the utilities in `jax.debug` to access the values at
+        actual runtime. The callback may return new values for the MCMC state
+        and the callback state.
+    callback_state
+        The initial custom state for the callback.
+    burnin_extractor
+    main_extractor
         Functions that extract the variables to be saved respectively only in
         the main trace and in both traces, given the MCMC state as argument.
         Must return a pytree, and must be vmappable.
     Returns
     -------
-    bart : dict
+    bart : State
         The final MCMC state.
-    burnin_trace : dict of (n_burn, ...) arrays
-        The trace of the burn-in phase, containing the following subset of
-        fields from the `bart` dictionary, with an additional head index that
-        runs over MCMC iterations: 'sigma2', 'grow_prop_count',
-        'grow_acc_count', 'prune_prop_count', 'prune_acc_count' (or if specified
-        the fields in `tracevars_both`).
-    main_trace : dict of (n_save, ...) arrays
-        The trace of the main phase, containing the following subset of fields
-        from the `bart` dictionary, with an additional head index that runs over
-        MCMC iterations: 'leaf_trees', 'var_trees', 'split_trees' (or if
-        specified the fields in `tracevars_onlymain`), plus the fields in
-        `burnin_trace`.
-    Raises
-    ------
-    ValueError
-        If `inner_loop_length` is not a divisor of the total number of
-        iterations and `allow_overflow` is `False`.
+    burnin_trace : PyTree[Shaped[Array, 'n_burn *']]
+        The trace of the burn-in phase. For the default layout, see `BurninTrace`.
+    main_trace : PyTree[Shaped[Array, 'n_save *']]
+        The trace of the main phase. For the default layout, see `MainTrace`.
     Notes
     -----
@@ -190,102 +248,85 @@ def run_mcmc(
     def empty_trace(length, bart, extractor):
         return jax.vmap(extractor, in_axes=None, out_axes=0, axis_size=length)(bart)
-    trace_both = empty_trace(n_burn + n_save, bart, both_extractor)
-    trace_onlymain = empty_trace(n_save, bart, onlymain_extractor)
+    burnin_trace = empty_trace(n_burn, bart, burnin_extractor)
+    main_trace = empty_trace(n_save, bart, main_extractor)
     # determine number of iterations for inner and outer loops
     n_iters = n_burn + n_skip * n_save
     if inner_loop_length is None:
         inner_loop_length = n_iters
-    n_outer = n_iters // inner_loop_length
-    if n_iters % inner_loop_length:
-        if allow_overflow:
-            n_outer += 1
-        else:
-            raise ValueError(f'{n_iters=} is not divisible by {inner_loop_length=}')
-    carry = (bart, 0, key, trace_both, trace_onlymain, callback_state)
+    if inner_loop_length:
+        n_outer = n_iters // inner_loop_length + bool(n_iters % inner_loop_length)
+    else:
+        n_outer = 1
+        # setting to 0 would make for a clean noop, but it's useful to keep the
+        # same code path for benchmarking and testing
+    carry = _Carry(bart, jnp.int32(0), key, burnin_trace, main_trace, callback_state)
     for i_outer in range(n_outer):
         carry = _run_mcmc_inner_loop(
             carry,
             inner_loop_length,
-            inner_callback,
-            onlymain_extractor,
-            both_extractor,
+            callback,
+            burnin_extractor,
+            main_extractor,
             n_burn,
             n_save,
             n_skip,
             i_outer,
+            n_iters,
         )
-        if outer_callback is not None:
-            bart, i_total, key, trace_both, trace_onlymain, callback_state = carry
-            i_total -= 1  # because i_total is updated at the end of the inner loop
-            i_skip = _compute_i_skip(i_total, n_burn, n_skip)
-            rt = outer_callback(
-                bart=bart,
-                burnin=i_total < n_burn,
-                overflow=i_total >= n_iters,
-                i_total=i_total,
-                i_skip=i_skip,
-                callback_state=callback_state,
-                n_burn=n_burn,
-                n_save=n_save,
-                n_skip=n_skip,
-                i_outer=i_outer,
-                inner_loop_length=inner_loop_length,
-            )
-            if rt is not None:
-                bart, callback_state = rt
-                i_total += 1
-                carry = (bart, i_total, key, trace_both, trace_onlymain, callback_state)
-    bart, _, _, trace_both, trace_onlymain, _ = carry
-    burnin_trace = tree.map(lambda x: x[:n_burn, ...], trace_both)
-    main_trace = tree.map(lambda x: x[n_burn:, ...], trace_both)
-    main_trace.update(trace_onlymain)
+    return carry.bart, carry.burnin_trace, carry.main_trace
-    return bart, burnin_trace, main_trace
-def _compute_i_skip(i_total, n_burn, n_skip):
+def _compute_i_skip(
+    i_total: Int32[Array, ''], n_burn: Int32[Array, ''], n_skip: Int32[Array, '']
+) -> Int32[Array, '']:
+    """Compute the `i_skip` argument passed to `callback`."""
     burnin = i_total < n_burn
     return jnp.where(
         burnin,
         i_total + 1,
-        (i_total + 1) % n_skip + jnp.where(i_total + 1 < n_skip, n_burn, 0),
+        (i_total - n_burn + 1) % n_skip
+        + jnp.where(i_total - n_burn + 1 < n_skip, n_burn, 0),
     )
-@functools.partial(jax.jit, donate_argnums=(0,), static_argnums=(1, 2, 3, 4))
+@partial(jax.jit, donate_argnums=(0,), static_argnums=(1, 2, 3, 4))
 def _run_mcmc_inner_loop(
-    carry,
-    inner_loop_length,
-    inner_callback,
-    onlymain_extractor,
-    both_extractor,
-    n_burn,
-    n_save,
-    n_skip,
-    i_outer,
+    carry: _Carry,
+    inner_loop_length: int,
+    callback: Callback | None,
+    burnin_extractor: Callable[[State], PyTree],
+    main_extractor: Callable[[State], PyTree],
+    n_burn: Int32[Array, ''],
+    n_save: Int32[Array, ''],
+    n_skip: Int32[Array, ''],
+    i_outer: Int32[Array, ''],
+    n_iters: Int32[Array, ''],
 ):
-    def loop(carry, _):
-        bart, i_total, key, trace_both, trace_onlymain, callback_state = carry
-        keys = jaxext.split(key)
-        key = keys.pop()
-        bart = mcmcstep.step(keys.pop(), bart)
-        burnin = i_total < n_burn
-        if inner_callback is not None:
-            i_skip = _compute_i_skip(i_total, n_burn, n_skip)
-            rt = inner_callback(
-                bart=bart,
+    def loop_impl(carry: _Carry) -> _Carry:
+        """Loop body to run if i_total < n_iters."""
+        # split random key
+        keys = jaxext.split(carry.key, 3)
+        carry = replace(carry, key=keys.pop())
+        # update state
+        carry = replace(carry, bart=mcmcstep.step(keys.pop(), carry.bart))
+        burnin = carry.i_total < n_burn
+        # invoke callback
+        if callback is not None:
+            i_skip = _compute_i_skip(carry.i_total, n_burn, n_skip)
+            rt = callback(
+                key=keys.pop(),
+                bart=carry.bart,
                 burnin=burnin,
-                overflow=i_total >= n_burn + n_save * n_skip,
-                i_total=i_total,
+                i_total=carry.i_total,
                 i_skip=i_skip,
-                callback_state=callback_state,
+                callback_state=carry.callback_state,
                 n_burn=n_burn,
                 n_save=n_save,
                 n_skip=n_skip,
@@ -294,128 +335,178 @@ def _run_mcmc_inner_loop(
             )
             if rt is not None:
                 bart, callback_state = rt
+                carry = replace(carry, bart=bart, callback_state=callback_state)
-        i_onlymain = jnp.where(burnin, 0, (i_total - n_burn) // n_skip)
-        i_both = jnp.where(burnin, i_total, n_burn + i_onlymain)
-        def update_trace(index, trace, state):
-            def assign_at_index(trace_array, state_array):
-                if trace_array.size:
-                    return trace_array.at[index, ...].set(state_array)
-                else:
-                    # this handles the case where a trace is empty (e.g.,
-                    # no burn-in) because jax refuses to index into an array
-                    # of length 0
-                    return trace_array
+        def save_to_burnin_trace() -> tuple[PyTree, PyTree]:
+            return _pytree_at_set(
+                carry.burnin_trace, carry.i_total, burnin_extractor(carry.bart)
+            ), carry.main_trace
-            return tree.map(assign_at_index, trace, state)
+        def save_to_main_trace() -> tuple[PyTree, PyTree]:
+            idx = (carry.i_total - n_burn) // n_skip
+            return carry.burnin_trace, _pytree_at_set(
+                carry.main_trace, idx, main_extractor(carry.bart)
+            )
-        trace_onlymain = update_trace(
-            i_onlymain, trace_onlymain, onlymain_extractor(bart)
+        # save state to trace
+        burnin_trace, main_trace = lax.cond(
+            burnin, save_to_burnin_trace, save_to_main_trace
+        )
+        return replace(
+            carry,
+            i_total=carry.i_total + 1,
+            burnin_trace=burnin_trace,
+            main_trace=main_trace,
         )
-        trace_both = update_trace(i_both, trace_both, both_extractor(bart))
-        i_total += 1
-        carry = (bart, i_total, key, trace_both, trace_onlymain, callback_state)
+    def loop_noop(carry: _Carry) -> _Carry:
+        """Loop body to run if i_total >= n_iters; it does nothing."""
+        return carry
+    def loop(carry: _Carry, _) -> tuple[_Carry, None]:
+        carry = lax.cond(carry.i_total < n_iters, loop_impl, loop_noop, carry)
         return carry, None
     carry, _ = lax.scan(loop, carry, None, inner_loop_length)
     return carry
-def make_print_callbacks(dot_every_inner=1, report_every_outer=1):
+def _pytree_at_set(
+    dest: PyTree[Array, ' T'], index: Int32[Array, ''], val: PyTree[Array]
+) -> PyTree[Array, ' T']:
+    """Map ``dest.at[index].set(val)`` over pytrees."""
+    def at_set(dest, val):
+        if dest.size:
+            return dest.at[index, ...].set(val)
+        else:
+            # this handles the case where an array is empty because jax refuses
+            # to index into an array of length 0, even if just in the abstract
+            return dest
+    return tree.map(at_set, dest, val)
+def make_default_callback(
+    *,
+    dot_every: int | Integer[Array, ''] | None = 1,
+    report_every: int | Integer[Array, ''] | None = 100,
+    sparse_on_at: int | Integer[Array, ''] | None = None,
+) -> dict[str, Any]:
     """
-    Prepare logging callbacks for `run_mcmc`.
+    Prepare a default callback for `run_mcmc`.
-    Prepare callbacks which print a dot on every iteration, and a longer
-    report outer loop iteration.
+    The callback prints a dot on every iteration, and a longer
+    report outer loop iteration, and can do variable selection.
     Parameters
     ----------
-    dot_every_inner : int, default 1
-        A dot is printed every `dot_every_inner` MCMC iterations.
-    report_every_outer : int, default 1
-        A report is printed every `report_every_outer` outer loop
-        iterations.
+    dot_every
+        A dot is printed every `dot_every` MCMC iterations, `None` to disable.
+    report_every
+        A one line report is printed every `report_every` MCMC iterations,
+        `None` to disable.
+    sparse_on_at
+        If specified, variable selection is activated starting from this
+        iteration. If `None`, variable selection is not used.
     Returns
     -------
-    kwargs : dict
-        A dictionary with the arguments to pass to `run_mcmc` as keyword
-        arguments to set up the callbacks.
+    A dictionary with the arguments to pass to `run_mcmc` as keyword arguments to set up the callback.
     Examples
     --------
-    >>> run_mcmc(..., **make_print_callbacks())
+    >>> run_mcmc(..., **make_default_callback())
     """
+    def asarray_or_none(val: None | Any) -> None | Array:
+        return None if val is None else jnp.asarray(val)
+    def callback(*, bart, callback_state, **kwargs):
+        print_state, sparse_state = callback_state
+        bart, _ = sparse_callback(callback_state=sparse_state, bart=bart, **kwargs)
+        print_callback(callback_state=print_state, bart=bart, **kwargs)
+        return bart, callback_state
+        # here I assume that the callbacks don't update their states
     return dict(
-        inner_callback=_print_callback_inner,
-        outer_callback=_print_callback_outer,
-        callback_state=dict(
-            dot_every_inner=dot_every_inner, report_every_outer=report_every_outer
+        callback=callback,
+        callback_state=(
+            PrintCallbackState(
+                asarray_or_none(dot_every), asarray_or_none(report_every)
+            ),
+            SparseCallbackState(asarray_or_none(sparse_on_at)),
         ),
     )
-def _print_callback_inner(*, i_total, callback_state, **_):
-    dot_every_inner = callback_state['dot_every_inner']
-    if dot_every_inner is not None:
-        cond = (i_total + 1) % dot_every_inner == 0
-        debug.callback(_print_dot, cond)
+class PrintCallbackState(Module):
+    """State for `print_callback`.
+    Parameters
+    ----------
+    dot_every
+        A dot is printed every `dot_every` MCMC iterations, `None` to disable.
+    report_every
+        A one line report is printed every `report_every` MCMC iterations,
+        `None` to disable.
+    """
-def _print_dot(cond):
-    if cond:
-        print('.', end='', flush=True)
+    dot_every: Int32[Array, ''] | None
+    report_every: Int32[Array, ''] | None
-def _print_callback_outer(
+def print_callback(
     *,
-    bart,
-    burnin,
-    overflow,
-    i_total,
-    n_burn,
-    n_save,
-    n_skip,
-    callback_state,
-    i_outer,
-    inner_loop_length,
+    bart: State,
+    burnin: Bool[Array, ''],
+    i_total: Int32[Array, ''],
+    n_burn: Int32[Array, ''],
+    n_save: Int32[Array, ''],
+    n_skip: Int32[Array, ''],
+    callback_state: PrintCallbackState,
     **_,
 ):
-    report_every_outer = callback_state['report_every_outer']
-    if report_every_outer is not None:
-        dot_every_inner = callback_state['dot_every_inner']
-        if dot_every_inner is None:
-            newline = False
-        else:
-            newline = dot_every_inner < inner_loop_length
-        debug.callback(
-            _print_report,
-            cond=(i_outer + 1) % report_every_outer == 0,
-            newline=newline,
-            burnin=burnin,
-            overflow=overflow,
-            i_total=i_total,
-            n_iters=n_burn + n_save * n_skip,
-            grow_prop_count=bart.forest.grow_prop_count,
-            grow_acc_count=bart.forest.grow_acc_count,
-            prune_prop_count=bart.forest.prune_prop_count,
-            prune_acc_count=bart.forest.prune_acc_count,
-            prop_total=len(bart.forest.leaf_trees),
-            fill=grove.forest_fill(bart.forest.split_trees),
+    """Print a dot and/or a report periodically during the MCMC."""
+    if callback_state.dot_every is not None:
+        cond = (i_total + 1) % callback_state.dot_every == 0
+        lax.cond(
+            cond,
+            lambda: debug.callback(lambda: print('.', end='', flush=True)),  # noqa: T201
+            # logging can't do in-line printing so I'll stick to print
+            lambda: None,
         )
+    if callback_state.report_every is not None:
+        def print_report():
+            debug.callback(
+                _print_report,
+                newline=callback_state.dot_every is not None,
+                burnin=burnin,
+                i_total=i_total,
+                n_iters=n_burn + n_save * n_skip,
+                grow_prop_count=bart.forest.grow_prop_count,
+                grow_acc_count=bart.forest.grow_acc_count,
+                prune_prop_count=bart.forest.prune_prop_count,
+                prune_acc_count=bart.forest.prune_acc_count,
+                prop_total=len(bart.forest.leaf_tree),
+                fill=grove.forest_fill(bart.forest.split_tree),
+            )
+        cond = (i_total + 1) % callback_state.report_every == 0
+        lax.cond(cond, print_report, lambda: None)
-def _convert_jax_arrays_in_args(func):
+def _convert_jax_arrays_in_args(func: Callable) -> Callable:
     """Remove jax arrays from a function arguments.
-    Converts all jax.Array instances in the arguments to either Python scalars
+    Converts all `jax.Array` instances in the arguments to either Python scalars
     or numpy arrays.
     """
-    def convert_jax_arrays(pytree):
-        def convert_jax_arrays(val):
+    def convert_jax_arrays(pytree: PyTree) -> PyTree:
+        def convert_jax_arrays(val: Any) -> Any:
             if not isinstance(val, jax.Array):
                 return val
             elif val.shape:
@@ -425,7 +516,7 @@ def _convert_jax_arrays_in_args(func):
         return tree.map(convert_jax_arrays, pytree)
-    @functools.wraps(func)
+    @wraps(func)
     def new_func(*args, **kw):
         args = convert_jax_arrays(args)
         kw = convert_jax_arrays(kw)
@@ -439,73 +530,139 @@ def _convert_jax_arrays_in_args(func):
 # deadlock with the main thread
 def _print_report(
     *,
-    cond,
-    newline,
-    burnin,
-    overflow,
-    i_total,
-    n_iters,
-    grow_prop_count,
-    grow_acc_count,
-    prune_prop_count,
-    prune_acc_count,
-    prop_total,
-    fill,
+    newline: bool,
+    burnin: bool,
+    i_total: int,
+    n_iters: int,
+    grow_prop_count: int,
+    grow_acc_count: int,
+    prune_prop_count: int,
+    prune_acc_count: int,
+    prop_total: int,
+    fill: float,
 ):
-    if cond:
-        newline = '\n' if newline else ''
+    """Print the report for `print_callback`."""
-        def acc_string(acc_count, prop_count):
-            if prop_count:
-                return f'{acc_count / prop_count:.0%}'
-            else:
-                return ' n/d'
+    def acc_string(acc_count, prop_count):
+        if prop_count:
+            return f'{acc_count / prop_count:.0%}'
+        else:
+            return 'n/d'
-        grow_prop = grow_prop_count / prop_total
-        prune_prop = prune_prop_count / prop_total
-        grow_acc = acc_string(grow_acc_count, grow_prop_count)
-        prune_acc = acc_string(prune_acc_count, prune_prop_count)
+    grow_prop = grow_prop_count / prop_total
+    prune_prop = prune_prop_count / prop_total
+    grow_acc = acc_string(grow_acc_count, grow_prop_count)
+    prune_acc = acc_string(prune_acc_count, prune_prop_count)
+    prefix = '\n' if newline else ''
+    suffix = ' (burnin)' if burnin else ''
+    print(  # noqa: T201, see print_callback for why not logging
+        f'{prefix}It {i_total + 1}/{n_iters} '
+        f'grow P={grow_prop:.0%} A={grow_acc}, '
+        f'prune P={prune_prop:.0%} A={prune_acc}, '
+        f'fill={fill:.0%}{suffix}'
+    )
-        if burnin:
-            flag = ' (burnin)'
-        elif overflow:
-            flag = ' (overflow)'
-        else:
-            flag = ''
-        print(
-            f'{newline}It {i_total + 1}/{n_iters} '
-            f'grow P={grow_prop:.0%} A={grow_acc}, '
-            f'prune P={prune_prop:.0%} A={prune_acc}, '
-            f'fill={fill:.0%}{flag}'
+class SparseCallbackState(Module):
+    """State for `sparse_callback`.
+    Parameters
+    ----------
+    sparse_on_at
+        If specified, variable selection is activated starting from this
+        iteration. If `None`, variable selection is not used.
+    """
+    sparse_on_at: Int32[Array, ''] | None
+def sparse_callback(
+    *,
+    key: Key[Array, ''],
+    bart: State,
+    i_total: Int32[Array, ''],
+    callback_state: SparseCallbackState,
+    **_,
+):
+    """Perform variable selection, see `mcmcstep.step_sparse`."""
+    if callback_state.sparse_on_at is not None:
+        bart = lax.cond(
+            i_total < callback_state.sparse_on_at,
+            lambda: bart,
+            lambda: mcmcstep.step_sparse(key, bart),
         )
+    return bart, callback_state
+class Trace(grove.TreeHeaps, Protocol):
+    """Protocol for a MCMC trace."""
+    offset: Float32[Array, ' trace_length']
+class TreesTrace(Module):
+    """Implementation of `bartz.grove.TreeHeaps` for an MCMC trace."""
+    leaf_tree: Float32[Array, 'trace_length num_trees 2**d']
+    var_tree: UInt[Array, 'trace_length num_trees 2**(d-1)']
+    split_tree: UInt[Array, 'trace_length num_trees 2**(d-1)']
+    @classmethod
+    def from_dataclass(cls, obj: grove.TreeHeaps):
+        """Create a `TreesTrace` from any `bartz.grove.TreeHeaps`."""
+        return cls(**{f.name: getattr(obj, f.name) for f in fields(cls)})
 @jax.jit
-def evaluate_trace(trace, X):
+def evaluate_trace(
+    trace: Trace, X: UInt[Array, 'p n']
+) -> Float32[Array, 'trace_length n']:
     """
     Compute predictions for all iterations of the BART MCMC.
     Parameters
     ----------
-    trace : dict
+    trace
         A trace of the BART MCMC, as returned by `run_mcmc`.
-    X : array (p, n)
+    X
         The predictors matrix, with `p` predictors and `n` observations.
     Returns
     -------
-    y : array (n_trace, n)
-        The predictions for each iteration of the MCMC.
+    The predictions for each iteration of the MCMC.
     """
-    evaluate_trees = functools.partial(grove.evaluate_forest, sum_trees=False)
-    evaluate_trees = jaxext.autobatch(evaluate_trees, 2**29, (None, 0, 0, 0))
+    evaluate_trees = partial(grove.evaluate_forest, sum_trees=False)
+    evaluate_trees = jaxext.autobatch(evaluate_trees, 2**29, (None, 0))
+    trees = TreesTrace.from_dataclass(trace)
-    def loop(_, row):
-        values = evaluate_trees(
-            X, row['leaf_trees'], row['var_trees'], row['split_trees']
-        )
-        return None, row['offset'] + jnp.sum(values, axis=0, dtype=jnp.float32)
+    def loop(_, item):
+        offset, trees = item
+        values = evaluate_trees(X, trees)
+        return None, offset + jnp.sum(values, axis=0, dtype=jnp.float32)
-    _, y = lax.scan(loop, None, trace)
+    _, y = lax.scan(loop, None, (trace.offset, trees))
     return y
+@partial(jax.jit, static_argnums=(0,))
+def compute_varcount(
+    p: int, trace: grove.TreeHeaps
+) -> Int32[Array, 'trace_length {p}']:
+    """
+    Count how many times each predictor is used in each MCMC state.
+    Parameters
+    ----------
+    p
+        The number of predictors.
+    trace
+        A trace of the BART MCMC, as returned by `run_mcmc`.
+    Returns
+    -------
+    Histogram of predictor usage in each MCMC state.
+    """
+    vmapped_var_histogram = jax.vmap(grove.var_histogram, in_axes=(None, 0, 0))
+    return vmapped_var_histogram(p, trace.var_tree, trace.split_tree)

bartz 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

bartz 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl