PyPI - bartz - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

bartz 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

bartz/.DS_Store +0 -0
bartz/BART/__init__.py +27 -0
bartz/BART/_gbart.py +522 -0
bartz/__init__.py +4 -2
bartz/{BART.py → _interface.py} +256 -132
bartz/_profiler.py +318 -0
bartz/_version.py +1 -1
bartz/debug.py +269 -314
bartz/grove.py +124 -68
bartz/jaxext/__init__.py +101 -27
bartz/jaxext/_autobatch.py +257 -51
bartz/jaxext/scipy/__init__.py +1 -1
bartz/jaxext/scipy/special.py +3 -4
bartz/jaxext/scipy/stats.py +1 -1
bartz/mcmcloop.py +399 -208
bartz/mcmcstep/__init__.py +35 -0
bartz/mcmcstep/_moves.py +904 -0
bartz/mcmcstep/_state.py +1114 -0
bartz/mcmcstep/_step.py +1603 -0
bartz/prepcovars.py +1 -1
bartz/testing/__init__.py +29 -0
bartz/testing/_dgp.py +442 -0
{bartz-0.7.0.dist-info → bartz-0.8.0.dist-info}/METADATA +17 -11
bartz-0.8.0.dist-info/RECORD +25 -0
{bartz-0.7.0.dist-info → bartz-0.8.0.dist-info}/WHEEL +1 -1
bartz/mcmcstep.py +0 -2616
bartz-0.7.0.dist-info/RECORD +0 -17

bartz/mcmcloop.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # bartz/src/bartz/mcmcloop.py
 #
-# Copyright (c) 2024-2025, Giacomo Petrillo
+# Copyright (c) 2024-2026, The Bartz Contributors
 #
 # This file is part of bartz.
 #
@@ -28,39 +28,62 @@ The entry points are `run_mcmc` and `make_default_callback`.
 """
 from collections.abc import Callable
-from dataclasses import fields, replace
+from dataclasses import fields
 from functools import partial, wraps
+from math import floor
 from typing import Any, Protocol
 import jax
 import numpy
 from equinox import Module
-from jax import debug, lax, tree
+from jax import (
+    NamedSharding,
+    ShapeDtypeStruct,
+    debug,
+    device_put,
+    eval_shape,
+    jit,
+    tree,
+)
 from jax import numpy as jnp
 from jax.nn import softmax
+from jax.sharding import Mesh, PartitionSpec
 from jaxtyping import Array, Bool, Float32, Int32, Integer, Key, PyTree, Shaped, UInt
-from bartz import grove, jaxext, mcmcstep
+from bartz import jaxext, mcmcstep
+from bartz._profiler import (
+    cond_if_not_profiling,
+    get_profile_mode,
+    jit_if_not_profiling,
+    scan_if_not_profiling,
+)
+from bartz.grove import TreeHeaps, evaluate_forest, forest_fill, var_histogram
+from bartz.jaxext import autobatch
 from bartz.mcmcstep import State
+from bartz.mcmcstep._state import chain_vmap_axes, field, get_axis_size, get_num_chains
 class BurninTrace(Module):
     """MCMC trace with only diagnostic values."""
-    sigma2: Float32[Array, '*trace_length'] | None
-    theta: Float32[Array, '*trace_length'] | None
-    grow_prop_count: Int32[Array, '*trace_length']
-    grow_acc_count: Int32[Array, '*trace_length']
-    prune_prop_count: Int32[Array, '*trace_length']
-    prune_acc_count: Int32[Array, '*trace_length']
-    log_likelihood: Float32[Array, '*trace_length'] | None
-    log_trans_prior: Float32[Array, '*trace_length'] | None
+    error_cov_inv: (
+        Float32[Array, '*chains_and_samples']
+        | Float32[Array, '*chains_and_samples k k']
+        | None
+    ) = field(chains=True)
+    theta: Float32[Array, '*chains_and_samples'] | None = field(chains=True)
+    grow_prop_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    grow_acc_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    prune_prop_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    prune_acc_count: Int32[Array, '*chains_and_samples'] = field(chains=True)
+    log_likelihood: Float32[Array, '*chains_and_samples'] | None = field(chains=True)
+    log_trans_prior: Float32[Array, '*chains_and_samples'] | None = field(chains=True)
     @classmethod
     def from_state(cls, state: State) -> 'BurninTrace':
         """Create a single-item burn-in trace from a MCMC state."""
         return cls(
-            sigma2=state.sigma2,
+            error_cov_inv=state.error_cov_inv,
             theta=state.forest.theta,
             grow_prop_count=state.forest.grow_prop_count,
             grow_acc_count=state.forest.grow_acc_count,
@@ -74,11 +97,14 @@ class BurninTrace(Module):
 class MainTrace(BurninTrace):
     """MCMC trace with trees and diagnostic values."""
-    leaf_tree: Float32[Array, '*trace_length 2**d']
-    var_tree: UInt[Array, '*trace_length 2**(d-1)']
-    split_tree: UInt[Array, '*trace_length 2**(d-1)']
-    offset: Float32[Array, '*trace_length']
-    varprob: Float32[Array, '*trace_length p'] | None
+    leaf_tree: (
+        Float32[Array, '*chains_and_samples 2**d']
+        | Float32[Array, '*chains_and_samples k 2**d']
+    ) = field(chains=True)
+    var_tree: UInt[Array, '*chains_and_samples 2**(d-1)'] = field(chains=True)
+    split_tree: UInt[Array, '*chains_and_samples 2**(d-1)'] = field(chains=True)
+    offset: Float32[Array, '*samples'] | Float32[Array, '*samples k']
+    varprob: Float32[Array, '*chains_and_samples p'] | None = field(chains=True)
     @classmethod
     def from_state(cls, state: State) -> 'MainTrace':
@@ -171,8 +197,12 @@ class _Carry(Module):
     bart: State
     i_total: Int32[Array, '']
     key: Key[Array, '']
-    burnin_trace: PyTree[Shaped[Array, 'n_burn *']]
-    main_trace: PyTree[Shaped[Array, 'n_save *']]
+    burnin_trace: PyTree[
+        Shaped[Array, 'n_burn ...'] | Shaped[Array, 'num_chains n_burn ...']
+    ]
+    main_trace: PyTree[
+        Shaped[Array, 'n_save ...'] | Shaped[Array, 'num_chains n_save ...']
+    ]
     callback_state: CallbackState
@@ -188,7 +218,11 @@ def run_mcmc(
     callback_state: CallbackState = None,
     burnin_extractor: Callable[[State], PyTree] = BurninTrace.from_state,
     main_extractor: Callable[[State], PyTree] = MainTrace.from_state,
-) -> tuple[State, PyTree[Shaped[Array, 'n_burn *']], PyTree[Shaped[Array, 'n_save *']]]:
+) -> tuple[
+    State,
+    PyTree[Shaped[Array, 'n_burn ...'] | Shaped[Array, 'num_chains n_burn ...']],
+    PyTree[Shaped[Array, 'n_save ...'] | Shaped[Array, 'num_chains n_save ...']],
+]:
     """
     Run the MCMC for the BART posterior.
@@ -226,9 +260,9 @@ def run_mcmc(
         The initial custom state for the callback.
     burnin_extractor
     main_extractor
-        Functions that extract the variables to be saved respectively only in
-        the main trace and in both traces, given the MCMC state as argument.
-        Must return a pytree, and must be vmappable.
+        Functions that extract the variables to be saved respectively in the
+        burnin trace and main traces, given the MCMC state as argument. Must
+        return a pytree, and must be vmappable.
     Returns
     -------
@@ -239,17 +273,20 @@ def run_mcmc(
     main_trace : PyTree[Shaped[Array, 'n_save *']]
         The trace of the main phase. For the default layout, see `MainTrace`.
+    Raises
+    ------
+    RuntimeError
+        If `run_mcmc` detects it's being invoked in a `jit`-wrapped context and
+        with settings that would create unrolled loops in the trace.
     Notes
     -----
     The number of MCMC updates is ``n_burn + n_skip * n_save``. The traces do
     not include the initial state, and include the final state.
     """
-    def empty_trace(length, bart, extractor):
-        return jax.vmap(extractor, in_axes=None, out_axes=0, axis_size=length)(bart)
-    burnin_trace = empty_trace(n_burn, bart, burnin_extractor)
-    main_trace = empty_trace(n_save, bart, main_extractor)
+    # create empty traces
+    burnin_trace = _empty_trace(n_burn, bart, burnin_extractor)
+    main_trace = _empty_trace(n_save, bart, main_extractor)
     # determine number of iterations for inner and outer loops
     n_iters = n_burn + n_skip * n_save
@@ -262,7 +299,27 @@ def run_mcmc(
         # setting to 0 would make for a clean noop, but it's useful to keep the
         # same code path for benchmarking and testing
-    carry = _Carry(bart, jnp.int32(0), key, burnin_trace, main_trace, callback_state)
+    # error if under jit and there are unrolled loops or profile mode is on
+    under_jit = not hasattr(jnp.empty(0), 'platform')
+    if under_jit and (n_outer > 1 or get_profile_mode()):
+        msg = (
+            '`run_mcmc` was called within a jit-compiled function and '
+            'there are either more than 1 outer loops or profile mode is active, '
+            'please either do not jit, set `inner_loop_length=None`, or disable '
+            'profile mode.'
+        )
+        raise RuntimeError(msg)
+    replicate = partial(_replicate, mesh=bart.config.mesh)
+    carry = _Carry(
+        bart,
+        replicate(jnp.int32(0)),
+        replicate(key),
+        burnin_trace,
+        main_trace,
+        callback_state,
+    )
+    _run_mcmc_inner_loop._fun.reset_call_counter()  # noqa: SLF001
     for i_outer in range(n_outer):
         carry = _run_mcmc_inner_loop(
             carry,
@@ -280,6 +337,30 @@ def run_mcmc(
     return carry.bart, carry.burnin_trace, carry.main_trace
+def _replicate(x: Array, mesh: Mesh | None) -> Array:
+    if mesh is None:
+        return x
+    else:
+        return device_put(x, NamedSharding(mesh, PartitionSpec()))
+@partial(jit, static_argnums=(0, 2))
+def _empty_trace(
+    length: int, bart: State, extractor: Callable[[State], PyTree]
+) -> PyTree:
+    num_chains = get_num_chains(bart)
+    if num_chains is None:
+        out_axes = 0
+    else:
+        example_output = eval_shape(extractor, bart)
+        chain_axes = chain_vmap_axes(example_output)
+        out_axes = tree.map(
+            lambda a: 0 if a is None else 1, chain_axes, is_leaf=lambda a: a is None
+        )
+    return jax.vmap(extractor, in_axes=None, out_axes=out_axes, axis_size=length)(bart)
+@jit
 def _compute_i_skip(
     i_total: Int32[Array, ''], n_burn: Int32[Array, ''], n_skip: Int32[Array, '']
 ) -> Int32[Array, '']:
@@ -293,7 +374,34 @@ def _compute_i_skip(
     )
-@partial(jax.jit, donate_argnums=(0,), static_argnums=(1, 2, 3, 4))
+class _CallCounter:
+    """Wrap a callable to check it's not called more than once."""
+    def __init__(self, func: Callable) -> None:
+        self.func = func
+        self.n_calls = 0
+    def reset_call_counter(self) -> None:
+        """Reset the call counter."""
+        self.n_calls = 0
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        if self.n_calls and not get_profile_mode():
+            msg = (
+                'The inner loop of `run_mcmc` was traced more than once, '
+                'which indicates a double compilation of the MCMC code. This '
+                'probably depends on the input state having different type from the '
+                'output state. Check the input is in a format that is the '
+                'same jax would output, e.g., all arrays and scalars are jax '
+                'arrays, with the right shardings.'
+            )
+            raise RuntimeError(msg)
+        self.n_calls += 1
+        return self.func(*args, **kwargs)
+@partial(jit_if_not_profiling, donate_argnums=(0,), static_argnums=(1, 2, 3, 4))
+@_CallCounter
 def _run_mcmc_inner_loop(
     carry: _Carry,
     inner_loop_length: int,
@@ -305,28 +413,27 @@ def _run_mcmc_inner_loop(
     n_skip: Int32[Array, ''],
     i_outer: Int32[Array, ''],
     n_iters: Int32[Array, ''],
-):
+) -> _Carry:
     def loop_impl(carry: _Carry) -> _Carry:
         """Loop body to run if i_total < n_iters."""
         # split random key
         keys = jaxext.split(carry.key, 3)
-        carry = replace(carry, key=keys.pop())
+        key = keys.pop()
         # update state
-        carry = replace(carry, bart=mcmcstep.step(keys.pop(), carry.bart))
-        burnin = carry.i_total < n_burn
+        bart = mcmcstep.step(keys.pop(), carry.bart)
         # invoke callback
+        callback_state = carry.callback_state
         if callback is not None:
             i_skip = _compute_i_skip(carry.i_total, n_burn, n_skip)
             rt = callback(
                 key=keys.pop(),
-                bart=carry.bart,
-                burnin=burnin,
+                bart=bart,
+                burnin=carry.i_total < n_burn,
                 i_total=carry.i_total,
                 i_skip=i_skip,
-                callback_state=carry.callback_state,
+                callback_state=callback_state,
                 n_burn=n_burn,
                 n_save=n_save,
                 n_skip=n_skip,
@@ -335,28 +442,26 @@ def _run_mcmc_inner_loop(
             )
             if rt is not None:
                 bart, callback_state = rt
-                carry = replace(carry, bart=bart, callback_state=callback_state)
-        def save_to_burnin_trace() -> tuple[PyTree, PyTree]:
-            return _pytree_at_set(
-                carry.burnin_trace, carry.i_total, burnin_extractor(carry.bart)
-            ), carry.main_trace
-        def save_to_main_trace() -> tuple[PyTree, PyTree]:
-            idx = (carry.i_total - n_burn) // n_skip
-            return carry.burnin_trace, _pytree_at_set(
-                carry.main_trace, idx, main_extractor(carry.bart)
-            )
-        # save state to trace
-        burnin_trace, main_trace = lax.cond(
-            burnin, save_to_burnin_trace, save_to_main_trace
+        # save to trace
+        burnin_trace, main_trace = _save_state_to_trace(
+            carry.burnin_trace,
+            carry.main_trace,
+            burnin_extractor,
+            main_extractor,
+            bart,
+            carry.i_total,
+            n_burn,
+            n_skip,
         )
-        return replace(
-            carry,
+        return _Carry(
+            bart=bart,
             i_total=carry.i_total + 1,
+            key=key,
             burnin_trace=burnin_trace,
             main_trace=main_trace,
+            callback_state=callback_state,
         )
     def loop_noop(carry: _Carry) -> _Carry:
@@ -364,34 +469,86 @@ def _run_mcmc_inner_loop(
         return carry
     def loop(carry: _Carry, _) -> tuple[_Carry, None]:
-        carry = lax.cond(carry.i_total < n_iters, loop_impl, loop_noop, carry)
+        carry = cond_if_not_profiling(
+            carry.i_total < n_iters, loop_impl, loop_noop, carry
+        )
         return carry, None
-    carry, _ = lax.scan(loop, carry, None, inner_loop_length)
+    carry, _ = scan_if_not_profiling(loop, carry, None, inner_loop_length)
     return carry
-def _pytree_at_set(
-    dest: PyTree[Array, ' T'], index: Int32[Array, ''], val: PyTree[Array]
+@partial(jit, donate_argnums=(0, 1), static_argnums=(2, 3))
+# this is jitted because under profiling _run_mcmc_inner_loop and the loop
+# within it are not, so I need the donate_argnums feature of jit to avoid
+# creating copies of the traces
+def _save_state_to_trace(
+    burnin_trace: PyTree,
+    main_trace: PyTree,
+    burnin_extractor: Callable[[State], PyTree],
+    main_extractor: Callable[[State], PyTree],
+    bart: State,
+    i_total: Int32[Array, ''],
+    n_burn: Int32[Array, ''],
+    n_skip: Int32[Array, ''],
+) -> tuple[PyTree, PyTree]:
+    # trace index where to save during burnin; out-of-bounds => noop after
+    # burnin
+    burnin_idx = i_total
+    # trace index where to save during main phase; force it out-of-bounds
+    # during burnin
+    main_idx = (i_total - n_burn) // n_skip
+    noop_idx = jnp.iinfo(jnp.int32).max
+    noop_cond = i_total < n_burn
+    main_idx = jnp.where(noop_cond, noop_idx, main_idx)
+    # prepare array index
+    num_chains = get_num_chains(bart)
+    burnin_trace = _set(burnin_trace, burnin_idx, burnin_extractor(bart), num_chains)
+    main_trace = _set(main_trace, main_idx, main_extractor(bart), num_chains)
+    return burnin_trace, main_trace
+def _set(
+    trace: PyTree[Array, ' T'],
+    index: Int32[Array, ''],
+    val: PyTree[Array, ' T'],
+    num_chains: int | None,
 ) -> PyTree[Array, ' T']:
-    """Map ``dest.at[index].set(val)`` over pytrees."""
+    """Do ``trace[index] = val`` but fancier."""
+    chain_axis = chain_vmap_axes(val)
+    def at_set(
+        trace: Shaped[Array, 'chains samples *shape']
+        | Shaped[Array, ' samples *shape']
+        | None,
+        val: Shaped[Array, ' chains *shape'] | Shaped[Array, '*shape'] | None,
+        chain_axis: int | None,
+    ):
+        if trace is None or trace.size == 0:
+            # this handles the case where an array is empty because jax refuses
+            # to index into an axis of length 0, even if just in the abstract,
+            # and optional elements that are considered leaves due to `is_leaf`
+            # below needed to traverse `chain_axis`.
+            return trace
-    def at_set(dest, val):
-        if dest.size:
-            return dest.at[index, ...].set(val)
+        if num_chains is None or chain_axis is None:
+            ndindex = (index, ...)
         else:
-            # this handles the case where an array is empty because jax refuses
-            # to index into an array of length 0, even if just in the abstract
-            return dest
+            ndindex = (slice(None), index, ...)
+        return trace.at[ndindex].set(val, mode='drop')
-    return tree.map(at_set, dest, val)
+    return tree.map(at_set, trace, val, chain_axis, is_leaf=lambda x: x is None)
 def make_default_callback(
+    state: State,
     *,
     dot_every: int | Integer[Array, ''] | None = 1,
     report_every: int | Integer[Array, ''] | None = 100,
-    sparse_on_at: int | Integer[Array, ''] | None = None,
 ) -> dict[str, Any]:
     """
     Prepare a default callback for `run_mcmc`.
@@ -401,14 +558,14 @@ def make_default_callback(
     Parameters
     ----------
+    state
+        The bart state to use the callback with, used to determine device
+        sharding.
     dot_every
         A dot is printed every `dot_every` MCMC iterations, `None` to disable.
     report_every
         A one line report is printed every `report_every` MCMC iterations,
         `None` to disable.
-    sparse_on_at
-        If specified, variable selection is activated starting from this
-        iteration. If `None`, variable selection is not used.
     Returns
     -------
@@ -416,44 +573,30 @@ def make_default_callback(
     Examples
     --------
-    >>> run_mcmc(..., **make_default_callback())
+    >>> run_mcmc(key, state, ..., **make_default_callback(state, ...))
     """
-    def asarray_or_none(val: None | Any) -> None | Array:
-        return None if val is None else jnp.asarray(val)
-    def callback(*, bart, callback_state, **kwargs):
-        print_state, sparse_state = callback_state
-        bart, _ = sparse_callback(callback_state=sparse_state, bart=bart, **kwargs)
-        print_callback(callback_state=print_state, bart=bart, **kwargs)
-        return bart, callback_state
-        # here I assume that the callbacks don't update their states
+    def as_replicated_array_or_none(val: None | Any) -> None | Array:
+        return None if val is None else _replicate(jnp.asarray(val), state.config.mesh)
     return dict(
-        callback=callback,
-        callback_state=(
-            PrintCallbackState(
-                asarray_or_none(dot_every), asarray_or_none(report_every)
-            ),
-            SparseCallbackState(asarray_or_none(sparse_on_at)),
+        callback=print_callback,
+        callback_state=PrintCallbackState(
+            as_replicated_array_or_none(dot_every),
+            as_replicated_array_or_none(report_every),
         ),
     )
 class PrintCallbackState(Module):
-    """State for `print_callback`.
-    Parameters
-    ----------
-    dot_every
-        A dot is printed every `dot_every` MCMC iterations, `None` to disable.
-    report_every
-        A one line report is printed every `report_every` MCMC iterations,
-        `None` to disable.
-    """
+    """State for `print_callback`."""
     dot_every: Int32[Array, ''] | None
+    """A dot is printed every `dot_every` MCMC iterations, `None` to disable."""
     report_every: Int32[Array, ''] | None
+    """A one line report is printed every `report_every` MCMC iterations,
+    `None` to disable."""
 def print_callback(
@@ -468,34 +611,51 @@ def print_callback(
     **_,
 ):
     """Print a dot and/or a report periodically during the MCMC."""
-    if callback_state.dot_every is not None:
-        cond = (i_total + 1) % callback_state.dot_every == 0
-        lax.cond(
-            cond,
-            lambda: debug.callback(lambda: print('.', end='', flush=True)),  # noqa: T201
-            # logging can't do in-line printing so I'll stick to print
-            lambda: None,
+    report_every = callback_state.report_every
+    dot_every = callback_state.dot_every
+    it = i_total + 1
+    def get_cond(every: Int32[Array, ''] | None) -> bool | Bool[Array, '']:
+        return False if every is None else it % every == 0
+    report_cond = get_cond(report_every)
+    dot_cond = get_cond(dot_every)
+    def line_report_branch():
+        if report_every is None:
+            return
+        if dot_every is None:
+            print_newline = False
+        else:
+            print_newline = it % report_every > it % dot_every
+        debug.callback(
+            _print_report,
+            print_dot=dot_cond,
+            print_newline=print_newline,
+            burnin=burnin,
+            it=it,
+            n_iters=n_burn + n_save * n_skip,
+            num_chains=bart.forest.num_chains(),
+            grow_prop_count=bart.forest.grow_prop_count.mean(),
+            grow_acc_count=bart.forest.grow_acc_count.mean(),
+            prune_acc_count=bart.forest.prune_acc_count.mean(),
+            prop_total=bart.forest.split_tree.shape[-2],
+            fill=forest_fill(bart.forest.split_tree),
         )
-    if callback_state.report_every is not None:
-        def print_report():
-            debug.callback(
-                _print_report,
-                newline=callback_state.dot_every is not None,
-                burnin=burnin,
-                i_total=i_total,
-                n_iters=n_burn + n_save * n_skip,
-                grow_prop_count=bart.forest.grow_prop_count,
-                grow_acc_count=bart.forest.grow_acc_count,
-                prune_prop_count=bart.forest.prune_prop_count,
-                prune_acc_count=bart.forest.prune_acc_count,
-                prop_total=len(bart.forest.leaf_tree),
-                fill=grove.forest_fill(bart.forest.split_tree),
-            )
+    def just_dot_branch():
+        if dot_every is None:
+            return
+        debug.callback(
+            lambda: print('.', end='', flush=True)  # noqa: T201
+        )
+        # logging can't do in-line printing so we use print
-        cond = (i_total + 1) % callback_state.report_every == 0
-        lax.cond(cond, print_report, lambda: None)
+    cond_if_not_profiling(
+        report_cond,
+        line_report_branch,
+        lambda: cond_if_not_profiling(dot_cond, just_dot_branch, lambda: None),
+    )
 def _convert_jax_arrays_in_args(func: Callable) -> Callable:
@@ -506,15 +666,15 @@ def _convert_jax_arrays_in_args(func: Callable) -> Callable:
     """
     def convert_jax_arrays(pytree: PyTree) -> PyTree:
-        def convert_jax_arrays(val: Any) -> Any:
-            if not isinstance(val, jax.Array):
+        def convert_jax_array(val: Any) -> Any:
+            if not isinstance(val, Array):
                 return val
             elif val.shape:
                 return numpy.array(val)
             else:
                 return val.item()
-        return tree.map(convert_jax_arrays, pytree)
+        return tree.map(convert_jax_array, pytree)
     @wraps(func)
     def new_func(*args, **kw):
@@ -530,126 +690,157 @@ def _convert_jax_arrays_in_args(func: Callable) -> Callable:
 # deadlock with the main thread
 def _print_report(
     *,
-    newline: bool,
+    print_dot: bool,
+    print_newline: bool,
     burnin: bool,
-    i_total: int,
+    it: int,
     n_iters: int,
-    grow_prop_count: int,
-    grow_acc_count: int,
-    prune_prop_count: int,
-    prune_acc_count: int,
+    num_chains: int | None,
+    grow_prop_count: float,
+    grow_acc_count: float,
+    prune_acc_count: float,
     prop_total: int,
     fill: float,
 ):
     """Print the report for `print_callback`."""
-    def acc_string(acc_count, prop_count):
-        if prop_count:
-            return f'{acc_count / prop_count:.0%}'
-        else:
-            return 'n/d'
+    # compute fractions
     grow_prop = grow_prop_count / prop_total
-    prune_prop = prune_prop_count / prop_total
-    grow_acc = acc_string(grow_acc_count, grow_prop_count)
-    prune_acc = acc_string(prune_acc_count, prune_prop_count)
+    move_acc = (grow_acc_count + prune_acc_count) / prop_total
+    # determine prefix
+    if print_dot:
+        prefix = '.\n'
+    elif print_newline:
+        prefix = '\n'
+    else:
+        prefix = ''
-    prefix = '\n' if newline else ''
-    suffix = ' (burnin)' if burnin else ''
+    # determine suffix in parentheses
+    msgs = []
+    if num_chains is not None:
+        msgs.append(f'avg. {num_chains} chains')
+    if burnin:
+        msgs.append('burnin')
+    suffix = f' ({", ".join(msgs)})' if msgs else ''
     print(  # noqa: T201, see print_callback for why not logging
-        f'{prefix}It {i_total + 1}/{n_iters} '
-        f'grow P={grow_prop:.0%} A={grow_acc}, '
-        f'prune P={prune_prop:.0%} A={prune_acc}, '
-        f'fill={fill:.0%}{suffix}'
+        f'{prefix}Iteration {it}/{n_iters}, '
+        f'grow prob: {grow_prop:.0%}, '
+        f'move acc: {move_acc:.0%}, '
+        f'fill: {fill:.0%}{suffix}'
     )
-class SparseCallbackState(Module):
-    """State for `sparse_callback`.
-    Parameters
-    ----------
-    sparse_on_at
-        If specified, variable selection is activated starting from this
-        iteration. If `None`, variable selection is not used.
-    """
-    sparse_on_at: Int32[Array, ''] | None
-def sparse_callback(
-    *,
-    key: Key[Array, ''],
-    bart: State,
-    i_total: Int32[Array, ''],
-    callback_state: SparseCallbackState,
-    **_,
-):
-    """Perform variable selection, see `mcmcstep.step_sparse`."""
-    if callback_state.sparse_on_at is not None:
-        bart = lax.cond(
-            i_total < callback_state.sparse_on_at,
-            lambda: bart,
-            lambda: mcmcstep.step_sparse(key, bart),
-        )
-    return bart, callback_state
-class Trace(grove.TreeHeaps, Protocol):
+class Trace(TreeHeaps, Protocol):
     """Protocol for a MCMC trace."""
-    offset: Float32[Array, ' trace_length']
+    offset: Float32[Array, '*trace_shape']
 class TreesTrace(Module):
     """Implementation of `bartz.grove.TreeHeaps` for an MCMC trace."""
-    leaf_tree: Float32[Array, 'trace_length num_trees 2**d']
-    var_tree: UInt[Array, 'trace_length num_trees 2**(d-1)']
-    split_tree: UInt[Array, 'trace_length num_trees 2**(d-1)']
+    leaf_tree: (
+        Float32[Array, '*trace_shape num_trees 2**d']
+        | Float32[Array, '*trace_shape num_trees k 2**d']
+    )
+    var_tree: UInt[Array, '*trace_shape num_trees 2**(d-1)']
+    split_tree: UInt[Array, '*trace_shape num_trees 2**(d-1)']
     @classmethod
-    def from_dataclass(cls, obj: grove.TreeHeaps):
+    def from_dataclass(cls, obj: TreeHeaps):
         """Create a `TreesTrace` from any `bartz.grove.TreeHeaps`."""
         return cls(**{f.name: getattr(obj, f.name) for f in fields(cls)})
-@jax.jit
+@jit
 def evaluate_trace(
-    trace: Trace, X: UInt[Array, 'p n']
-) -> Float32[Array, 'trace_length n']:
+    X: UInt[Array, 'p n'], trace: Trace
+) -> Float32[Array, '*trace_shape n'] | Float32[Array, '*trace_shape k n']:
     """
     Compute predictions for all iterations of the BART MCMC.
     Parameters
     ----------
-    trace
-        A trace of the BART MCMC, as returned by `run_mcmc`.
     X
         The predictors matrix, with `p` predictors and `n` observations.
+    trace
+        A main trace of the BART MCMC, as returned by `run_mcmc`.
     Returns
     -------
-    The predictions for each iteration of the MCMC.
+    The predictions for each chain and iteration of the MCMC.
     """
-    evaluate_trees = partial(grove.evaluate_forest, sum_trees=False)
-    evaluate_trees = jaxext.autobatch(evaluate_trees, 2**29, (None, 0))
-    trees = TreesTrace.from_dataclass(trace)
+    # per-device memory limit
+    max_io_nbytes = 2**27  # 128 MiB
+    # adjust memory limit for number of devices
+    mesh = jax.typeof(trace.leaf_tree).sharding.mesh
+    num_devices = get_axis_size(mesh, 'chains') * get_axis_size(mesh, 'data')
+    max_io_nbytes *= num_devices
+    # determine batching axes
+    has_chains = trace.split_tree.ndim > 3  # chains, samples, trees, nodes
+    if has_chains:
+        sample_axis = 1
+        tree_axis = 2
+    else:
+        sample_axis = 0
+        tree_axis = 1
+    # batch and sum over trees
+    batched_eval = autobatch(
+        evaluate_forest,
+        max_io_nbytes,
+        (None, tree_axis),
+        tree_axis,
+        reduce_ufunc=jnp.add,
+    )
-    def loop(_, item):
-        offset, trees = item
-        values = evaluate_trees(X, trees)
-        return None, offset + jnp.sum(values, axis=0, dtype=jnp.float32)
+    # determine output shape (to avoid autobatch tracing everything 4 times)
+    is_mv = trace.leaf_tree.ndim > trace.split_tree.ndim
+    k = trace.leaf_tree.shape[-2] if is_mv else 1
+    mv_shape = (k,) if is_mv else ()
+    _, n = X.shape
+    out_shape = (*trace.split_tree.shape[:-2], *mv_shape, n)
+    # adjust memory limit keeping into account that trees are summed over
+    num_trees, hts = trace.split_tree.shape[-2:]
+    out_size = k * n * jnp.float32.dtype.itemsize  # the value of the forest
+    core_io_size = (
+        num_trees
+        * hts
+        * (
+            2 * k * trace.leaf_tree.itemsize
+            + trace.var_tree.itemsize
+            + trace.split_tree.itemsize
+        )
+        + out_size
+    )
+    core_int_size = (num_trees - 1) * out_size
+    max_io_nbytes = max(1, floor(max_io_nbytes / (1 + core_int_size / core_io_size)))
+    # batch over mcmc samples
+    batched_eval = autobatch(
+        batched_eval,
+        max_io_nbytes,
+        (None, sample_axis),
+        sample_axis,
+        warn_on_overflow=False,  # the inner autobatch will handle it
+        result_shape_dtype=ShapeDtypeStruct(out_shape, jnp.float32),
+    )
+    # extract only the trees from the trace
+    trees = TreesTrace.from_dataclass(trace)
-    _, y = lax.scan(loop, None, (trace.offset, trees))
-    return y
+    # evaluate trees
+    y_centered: Float32[Array, '*trace_shape n'] | Float32[Array, '*trace_shape k n']
+    y_centered = batched_eval(X, trees)
+    return y_centered + trace.offset[..., None]
-@partial(jax.jit, static_argnums=(0,))
-def compute_varcount(
-    p: int, trace: grove.TreeHeaps
-) -> Int32[Array, 'trace_length {p}']:
+@partial(jit, static_argnums=(0,))
+def compute_varcount(p: int, trace: TreeHeaps) -> Int32[Array, '*trace_shape {p}']:
     """
     Count how many times each predictor is used in each MCMC state.
@@ -658,11 +849,11 @@ def compute_varcount(
     p
         The number of predictors.
     trace
-        A trace of the BART MCMC, as returned by `run_mcmc`.
+        A main trace of the BART MCMC, as returned by `run_mcmc`.
     Returns
     -------
     Histogram of predictor usage in each MCMC state.
     """
-    vmapped_var_histogram = jax.vmap(grove.var_histogram, in_axes=(None, 0, 0))
-    return vmapped_var_histogram(p, trace.var_tree, trace.split_tree)
+    # var_tree has shape (chains? samples trees nodes)
+    return var_histogram(p, trace.var_tree, trace.split_tree, sum_batch_axis=-1)

bartz 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

bartz 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl