PyPI - bartz - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

bartz 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

bartz/BART.py +43 -18
bartz/_version.py +1 -1
bartz/grove.py +19 -14
bartz/jaxext.py +48 -21
bartz/mcmcloop.py +13 -15
bartz/mcmcstep.py +687 -297
bartz/prepcovars.py +43 -13
bartz-0.3.0.dist-info/METADATA +77 -0
bartz-0.3.0.dist-info/RECORD +13 -0
bartz-0.2.0.dist-info/METADATA +0 -32
bartz-0.2.0.dist-info/RECORD +0 -13
{bartz-0.2.0.dist-info → bartz-0.3.0.dist-info}/LICENSE +0 -0
{bartz-0.2.0.dist-info → bartz-0.3.0.dist-info}/WHEEL +0 -0

bartz/mcmcstep.py CHANGED Viewed

@@ -10,10 +10,10 @@
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-#
+#
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
-#
+#
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -34,6 +34,7 @@ range of possible values.
 """
 import functools
+import math
 import jax
 from jax import random
@@ -54,7 +55,9 @@ def init(*,
     small_float=jnp.float32,
     large_float=jnp.float32,
     min_points_per_leaf=None,
-    suffstat_batch_size='auto',
+    resid_batch_size='auto',
+    count_batch_size='auto',
+    save_ratios=False,
     ):
     """
     Make a BART posterior sampling MCMC initial state.
@@ -82,9 +85,13 @@ def init(*,
         The dtype for scalars, small arrays, and arrays which require accuracy.
     min_points_per_leaf : int, optional
         The minimum number of data points in a leaf node. 0 if not specified.
-    suffstat_batch_size : int, None, str, default 'auto'
-        The batch size for computing sufficient statistics. `None` for no
-        batching. If 'auto', pick a value based on the device of `y`.
+    resid_batch_size, count_batch_sizes : int, None, str, default 'auto'
+        The batch sizes, along datapoints, for summing the residuals and
+        counting the number of datapoints in each leaf. `None` for no batching.
+        If 'auto', pick a value based on the device of `y`, or the default
+        device.
+    save_ratios : bool, default False
+        Whether to save the Metropolis-Hastings ratios.
     Returns
     -------
@@ -110,6 +117,8 @@ def init(*,
         'p_nonterminal' : large_float array (d,)
             The probability of a nonterminal node at each depth, padded with a
             zero.
+        'p_propose_grow' : large_float array (2 ** (d - 1),)
+            The unnormalized probability of picking a leaf for a grow proposal.
         'sigma2_alpha' : large_float
             The shape parameter of the inverse gamma prior on the noise variance.
         'sigma2_beta' : large_float
@@ -120,6 +129,8 @@ def init(*,
             The response.
         'X' : int array (p, n)
             The predictors.
+        'leaf_indices' : int array (num_trees, n)
+            The index of the leaf each datapoints falls into, for each tree.
         'min_points_per_leaf' : int or None
             The minimum number of data points in a leaf node.
         'affluence_trees' : bool array (num_trees, 2 ** (d - 1)) or None
@@ -128,8 +139,6 @@ def init(*,
         'opt' : LeafDict
             A dictionary with config values:
-            'suffstat_batch_size' : int or None
-                The batch size for computing sufficient statistics.
             'small_float' : dtype
                 The dtype for large arrays used in the algorithm.
             'large_float' : dtype
@@ -137,6 +146,8 @@ def init(*,
                 accuracy.
             'require_min_points' : bool
                 Whether the `min_points_per_leaf` parameter is specified.
+            'resid_batch_size', 'count_batch_size' : int or None
+                The data batch sizes for computing the sufficient statistics.
     """
     p_nonterminal = jnp.asarray(p_nonterminal, large_float)
@@ -150,24 +161,28 @@ def init(*,
     small_float = jnp.dtype(small_float)
     large_float = jnp.dtype(large_float)
     y = jnp.asarray(y, small_float)
-    suffstat_batch_size = _choose_suffstat_batch_size(suffstat_batch_size, y)
+    resid_batch_size, count_batch_size = _choose_suffstat_batch_size(resid_batch_size, count_batch_size, y)
+    sigma2 = jnp.array(sigma2_beta / sigma2_alpha, large_float)
+    sigma2 = jnp.where(jnp.isfinite(sigma2) & (sigma2 > 0), sigma2, 1)
     bart = dict(
         leaf_trees=make_forest(max_depth, small_float),
         var_trees=make_forest(max_depth - 1, jaxext.minimal_unsigned_dtype(X.shape[0] - 1)),
         split_trees=make_forest(max_depth - 1, max_split.dtype),
         resid=jnp.asarray(y, large_float),
-        sigma2=jnp.ones((), large_float),
+        sigma2=sigma2,
         grow_prop_count=jnp.zeros((), int),
         grow_acc_count=jnp.zeros((), int),
         prune_prop_count=jnp.zeros((), int),
         prune_acc_count=jnp.zeros((), int),
         p_nonterminal=p_nonterminal,
+        p_propose_grow=p_nonterminal[grove.tree_depths(2 ** (max_depth - 1))],
         sigma2_alpha=jnp.asarray(sigma2_alpha, large_float),
         sigma2_beta=jnp.asarray(sigma2_beta, large_float),
         max_split=jnp.asarray(max_split),
         y=y,
         X=jnp.asarray(X),
+        leaf_indices=jnp.ones((num_trees, y.size), jaxext.minimal_unsigned_dtype(2 ** max_depth - 1)),
         min_points_per_leaf=(
             None if min_points_per_leaf is None else
             jnp.asarray(min_points_per_leaf)
@@ -177,30 +192,61 @@ def init(*,
             make_forest(max_depth - 1, bool).at[:, 1].set(y.size >= 2 * min_points_per_leaf)
         ),
         opt=jaxext.LeafDict(
-            suffstat_batch_size=suffstat_batch_size,
             small_float=small_float,
             large_float=large_float,
             require_min_points=min_points_per_leaf is not None,
+            resid_batch_size=resid_batch_size,
+            count_batch_size=count_batch_size,
         ),
     )
+    if save_ratios:
+        bart['ratios'] = dict(
+            grow=dict(
+                trans_prior=jnp.full(num_trees, jnp.nan),
+                likelihood=jnp.full(num_trees, jnp.nan),
+            ),
+            prune=dict(
+                trans_prior=jnp.full(num_trees, jnp.nan),
+                likelihood=jnp.full(num_trees, jnp.nan),
+            ),
+        )
     return bart
-def _choose_suffstat_batch_size(size, y):
-    if size == 'auto':
-        platform = y.devices().pop().platform
+def _choose_suffstat_batch_size(resid_batch_size, count_batch_size, y):
+    @functools.cache
+    def get_platform():
+        try:
+            device = y.devices().pop()
+        except jax.errors.ConcretizationTypeError:
+            device = jax.devices()[0]
+        platform = device.platform
+        if platform not in ('cpu', 'gpu'):
+            raise KeyError(f'Unknown platform: {platform}')
+        return platform
+    if resid_batch_size == 'auto':
+        platform = get_platform()
+        n = max(1, y.size)
         if platform == 'cpu':
-            return None
-                # maybe I should batch residuals (not counts) for numerical
-                # accuracy, even if it's slower
+            resid_batch_size = 2 ** int(round(math.log2(n / 6))) # n/6
         elif platform == 'gpu':
-            return 128 # 128 is good on A100, and V100 at high n
-                       # 512 is good on T4, and V100 at low n
-        else:
-            raise KeyError(f'Unknown platform: {platform}')
-    elif size is not None:
-        return int(size)
-    return size
+            resid_batch_size = 2 ** int(round((1 + math.log2(n)) / 3)) # n^1/3
+        resid_batch_size = max(1, resid_batch_size)
+    if count_batch_size == 'auto':
+        platform = get_platform()
+        if platform == 'cpu':
+            count_batch_size = None
+        elif platform == 'gpu':
+            n = max(1, y.size)
+            count_batch_size = 2 ** int(round(math.log2(n) / 2 - 2)) # n^1/2
+                # /4 is good on V100, /2 on L4/T4, still haven't tried A100
+            count_batch_size = max(1, count_batch_size)
+    return resid_batch_size, count_batch_size
 def step(bart, key):
     """
@@ -240,14 +286,11 @@ def sample_trees(bart, key):
     Notes
     -----
-    This function zeroes the proposal counters before using them.
+    This function zeroes the proposal counters.
     """
-    bart = bart.copy()
     key, subkey = random.split(key)
     grow_moves, prune_moves = sample_moves(bart, subkey)
-    bart['var_trees'] = grow_moves['var_tree']
-    grow_leaf_indices = grove.traverse_forest(bart['X'], grow_moves['var_tree'], grow_moves['split_tree'])
-    return accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, grow_leaf_indices, key)
+    return accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, key)
 def sample_moves(bart, key):
     """
@@ -266,17 +309,17 @@ def sample_moves(bart, key):
         The proposals for grow and prune moves. See `grow_move` and `prune_move`.
     """
     key = random.split(key, bart['var_trees'].shape[0])
-    return sample_moves_vmap_trees(bart['var_trees'], bart['split_trees'], bart['affluence_trees'], bart['max_split'], bart['p_nonterminal'], key)
+    return _sample_moves_vmap_trees(bart['var_trees'], bart['split_trees'], bart['affluence_trees'], bart['max_split'], bart['p_nonterminal'], bart['p_propose_grow'], key)
-@functools.partial(jaxext.vmap_nodoc, in_axes=(0, 0, 0, None, None, 0))
-def sample_moves_vmap_trees(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, key):
+@functools.partial(jaxext.vmap_nodoc, in_axes=(0, 0, 0, None, None, None, 0))
+def _sample_moves_vmap_trees(*args):
+    args, key = args[:-1], args[-1]
     key, key1 = random.split(key)
-    args = var_tree, split_tree, affluence_tree, max_split, p_nonterminal
     grow = grow_move(*args, key)
     prune = prune_move(*args, key1)
     return grow, prune
-def grow_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, key):
+def grow_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, p_propose_grow, key):
     """
     Tree structure grow move proposal of BART MCMC.
@@ -296,6 +339,8 @@ def grow_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, ke
         The maximum split index for each variable.
     p_nonterminal : array (d,)
         The probability of a nonterminal node at each depth.
+    p_propose_grow : array (2 ** (d - 1),)
+        The unnormalized probability of choosing a leaf to grow.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -304,41 +349,49 @@ def grow_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, ke
     grow_move : dict
         A dictionary with fields:
-        'allowed' : bool
-            Whether the move is possible.
+        'num_growable' : int
+            The number of growable leaves.
         'node' : int
-            The index of the leaf to grow.
-        'var_tree' : array (2 ** (d - 1),)
-            The new decision axes of the tree.
-        'split_tree' : array (2 ** (d - 1),)
-            The new decision boundaries of the tree.
+            The index of the leaf to grow. ``2 ** d`` if there are no growable
+            leaves.
+        'left', 'right' : int
+            The indices of the children of 'node'.
+        'var', 'split' : int
+            The decision axis and boundary of the new rule.
         'partial_ratio' : float
             A factor of the Metropolis-Hastings ratio of the move. It lacks
             the likelihood ratio and the probability of proposing the prune
             move.
+        'var_tree', 'split_tree' : array (2 ** (d - 1),)
+            The updated decision axes and boundaries of the tree.
     """
     key, key1, key2 = random.split(key, 3)
-    leaf_to_grow, num_growable, num_prunable, allowed = choose_leaf(split_tree, affluence_tree, key)
+    leaf_to_grow, num_growable, prob_choose, num_prunable = choose_leaf(split_tree, affluence_tree, p_propose_grow, key)
     var = choose_variable(var_tree, split_tree, max_split, leaf_to_grow, key1)
     var_tree = var_tree.at[leaf_to_grow].set(var.astype(var_tree.dtype))
     split = choose_split(var_tree, split_tree, max_split, leaf_to_grow, key2)
     split_tree = split_tree.at[leaf_to_grow].set(split.astype(split_tree.dtype))
-    ratio = compute_partial_ratio(num_growable, num_prunable, p_nonterminal, leaf_to_grow, split_tree)
+    ratio = compute_partial_ratio(prob_choose, num_prunable, p_nonterminal, leaf_to_grow, split_tree)
+    left = leaf_to_grow << 1
     return dict(
-        allowed=allowed,
+        num_growable=num_growable,
         node=leaf_to_grow,
+        left=left,
+        right=left + 1,
+        var=var,
+        split=split,
         partial_ratio=ratio,
         var_tree=var_tree,
         split_tree=split_tree,
     )
-def choose_leaf(split_tree, affluence_tree, key):
+def choose_leaf(split_tree, affluence_tree, p_propose_grow, key):
     """
     Choose a leaf node to grow in a tree.
@@ -348,6 +401,8 @@ def choose_leaf(split_tree, affluence_tree, key):
         The splitting points of the tree.
     affluence_tree : bool array (2 ** (d - 1),) or None
         Whether a leaf has enough points to be grown.
+    p_propose_grow : array (2 ** (d - 1),)
+        The unnormalized probability of choosing a leaf to grow.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -358,19 +413,21 @@ def choose_leaf(split_tree, affluence_tree, key):
         ``2 ** d``.
     num_growable : int
         The number of leaf nodes that can be grown.
+    prob_choose : float
+        The normalized probability of choosing the selected leaf.
     num_prunable : int
         The number of leaf parents that could be pruned, after converting the
         selected leaf to a non-terminal node.
-    allowed : bool
-        Whether the grow move is allowed.
     """
-    is_growable, allowed = growable_leaves(split_tree, affluence_tree)
-    leaf_to_grow = randint_masked(key, is_growable)
-    leaf_to_grow = jnp.where(allowed, leaf_to_grow, 2 * split_tree.size)
+    is_growable = growable_leaves(split_tree, affluence_tree)
     num_growable = jnp.count_nonzero(is_growable)
+    distr = jnp.where(is_growable, p_propose_grow, 0)
+    leaf_to_grow, distr_norm = categorical(key, distr)
+    leaf_to_grow = jnp.where(num_growable, leaf_to_grow, 2 * split_tree.size)
+    prob_choose = distr[leaf_to_grow] / distr_norm
     is_parent = grove.is_leaves_parent(split_tree.at[leaf_to_grow].set(1))
     num_prunable = jnp.count_nonzero(is_parent)
-    return leaf_to_grow, num_growable, num_prunable, allowed
+    return leaf_to_grow, num_growable, prob_choose, num_prunable
 def growable_leaves(split_tree, affluence_tree):
     """
@@ -389,34 +446,32 @@ def growable_leaves(split_tree, affluence_tree):
         The mask indicating the leaf nodes that can be proposed to grow, i.e.,
         that are not at the bottom level and have at least two times the number
         of minimum points per leaf.
-    allowed : bool
-        Whether the grow move is allowed, i.e., there are growable leaves.
     """
     is_growable = grove.is_actual_leaf(split_tree)
     if affluence_tree is not None:
         is_growable &= affluence_tree
-    return is_growable, jnp.any(is_growable)
+    return is_growable
-def randint_masked(key, mask):
+def categorical(key, distr):
     """
-    Return a random integer in a range, including only some values.
+    Return a random integer from an arbitrary distribution.
     Parameters
     ----------
     key : jax.dtypes.prng_key array
         A jax random key.
-    mask : bool array (n,)
-        The mask indicating the allowed values.
+    distr : float array (n,)
+        An unnormalized probability distribution.
     Returns
     -------
     u : int
-        A random integer in the range ``[0, n)``, and which satisfies
-        ``mask[u] == True``. If all values in the mask are `False`, return `n`.
+        A random integer in the range ``[0, n)``. If all probabilities are zero,
+        return ``n``.
     """
-    ecdf = jnp.cumsum(mask)
-    u = random.randint(key, (), 0, ecdf[-1])
-    return jnp.searchsorted(ecdf, u, 'right')
+    ecdf = jnp.cumsum(distr)
+    u = random.uniform(key, (), ecdf.dtype, 0, ecdf[-1])
+    return jnp.searchsorted(ecdf, u, 'right'), ecdf[-1]
 def choose_variable(var_tree, split_tree, max_split, leaf_index, key):
     """
@@ -471,7 +526,7 @@ def fully_used_variables(var_tree, split_tree, max_split, leaf_index):
         filled with `p`. The fill values are not guaranteed to be placed in any
         particular order. Variables may appear more than once.
     """
     var_to_ignore = ancestor_variables(var_tree, max_split, leaf_index)
     split_range_vec = jax.vmap(split_range, in_axes=(None, None, None, None, 0))
     l, r = split_range_vec(var_tree, split_tree, max_split, leaf_index, var_to_ignore)
@@ -603,7 +658,7 @@ def choose_split(var_tree, split_tree, max_split, leaf_index, key):
     l, r = split_range(var_tree, split_tree, max_split, leaf_index, var)
     return random.randint(key, (), l, r)
-def compute_partial_ratio(num_growable, num_prunable, p_nonterminal, leaf_to_grow, new_split_tree):
+def compute_partial_ratio(prob_choose, num_prunable, p_nonterminal, leaf_to_grow, new_split_tree):
     """
     Compute the product of the transition and prior ratios of a grow move.
@@ -632,6 +687,9 @@ def compute_partial_ratio(num_growable, num_prunable, p_nonterminal, leaf_to_gro
     # the two ratios also contain factors num_available_split *
     # num_available_var, but they cancel out
+    # p_prune can't be computed here because it needs the count trees, which are
+    # computed in the acceptance phase
     prune_allowed = leaf_to_grow != 1
         # prune allowed  <--->  the initial tree is not a root
         # leaf to grow is root  -->  the tree can only be a root
@@ -639,31 +697,33 @@ def compute_partial_ratio(num_growable, num_prunable, p_nonterminal, leaf_to_gro
     p_grow = jnp.where(prune_allowed, 0.5, 1)
-    trans_ratio = num_growable / (p_grow * num_prunable)
+    inv_trans_ratio = p_grow * prob_choose * num_prunable
     depth = grove.tree_depths(new_split_tree.size)[leaf_to_grow]
     p_parent = p_nonterminal[depth]
     cp_children = 1 - p_nonterminal[depth + 1]
     tree_ratio = cp_children * cp_children * p_parent / (1 - p_parent)
-    return trans_ratio * tree_ratio
+    return tree_ratio / inv_trans_ratio
-def prune_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, key):
+def prune_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, p_propose_grow, key):
     """
     Tree structure prune move proposal of BART MCMC.
     Parameters
     ----------
-    var_tree : array (2 ** (d - 1),)
+    var_tree : int array (2 ** (d - 1),)
         The variable indices of the tree.
-    split_tree : array (2 ** (d - 1),)
+    split_tree : int array (2 ** (d - 1),)
         The splitting points of the tree.
     affluence_tree : bool array (2 ** (d - 1),) or None
         Whether a leaf has enough points to be grown.
-    max_split : array (p,)
+    max_split : int array (p,)
         The maximum split index for each variable.
-    p_nonterminal : array (d,)
+    p_nonterminal : float array (d,)
         The probability of a nonterminal node at each depth.
+    p_propose_grow : float array (2 ** (d - 1),)
+        The unnormalized probability of choosing a leaf to grow.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -675,24 +735,29 @@ def prune_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, k
         'allowed' : bool
             Whether the move is possible.
         'node' : int
-            The index of the node to prune.
+            The index of the node to prune. ``2 ** d`` if no node can be pruned.
+        'left', 'right' : int
+            The indices of the children of 'node'.
         'partial_ratio' : float
             A factor of the Metropolis-Hastings ratio of the move. It lacks
             the likelihood ratio and the probability of proposing the prune
             move. This ratio is inverted.
     """
-    node_to_prune, num_prunable, num_growable = choose_leaf_parent(split_tree, affluence_tree, key)
+    node_to_prune, num_prunable, prob_choose = choose_leaf_parent(split_tree, affluence_tree, p_propose_grow, key)
     allowed = split_tree[1].astype(bool) # allowed iff the tree is not a root
-    ratio = compute_partial_ratio(num_growable, num_prunable, p_nonterminal, node_to_prune, split_tree)
+    ratio = compute_partial_ratio(prob_choose, num_prunable, p_nonterminal, node_to_prune, split_tree)
+    left = node_to_prune << 1
     return dict(
         allowed=allowed,
         node=node_to_prune,
+        left=left,
+        right=left + 1,
         partial_ratio=ratio, # it is inverted in accept_move_and_sample_leaves
     )
-def choose_leaf_parent(split_tree, affluence_tree, key):
+def choose_leaf_parent(split_tree, affluence_tree, p_propose_grow, key):
     """
     Pick a non-terminal node with leaf children to prune in a tree.
@@ -702,6 +767,8 @@ def choose_leaf_parent(split_tree, affluence_tree, key):
         The splitting points of the tree.
     affluence_tree : bool array (2 ** (d - 1),) or None
         Whether a leaf has enough points to be grown.
+    p_propose_grow : array (2 ** (d - 1),)
+        The unnormalized probability of choosing a leaf to grow.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -709,28 +776,50 @@ def choose_leaf_parent(split_tree, affluence_tree, key):
     -------
     node_to_prune : int
         The index of the node to prune. If ``num_prunable == 0``, return
-        ``split_tree.size``.
+        ``2 ** d``.
     num_prunable : int
         The number of leaf parents that could be pruned.
-    num_growable : int
-        The number of leaf nodes that can be grown, after pruning the chosen
-        node.
+    prob_choose : float
+        The normalized probability of choosing the node to prune for growth.
     """
     is_prunable = grove.is_leaves_parent(split_tree)
-    node_to_prune = randint_masked(key, is_prunable)
     num_prunable = jnp.count_nonzero(is_prunable)
+    node_to_prune = randint_masked(key, is_prunable)
+    node_to_prune = jnp.where(num_prunable, node_to_prune, 2 * split_tree.size)
-    pruned_split_tree = split_tree.at[node_to_prune].set(0)
-    pruned_affluence_tree = (
+    split_tree = split_tree.at[node_to_prune].set(0)
+    affluence_tree = (
         None if affluence_tree is None else
         affluence_tree.at[node_to_prune].set(True)
     )
-    is_growable_leaf, _ = growable_leaves(pruned_split_tree, pruned_affluence_tree)
-    num_growable = jnp.count_nonzero(is_growable_leaf)
+    is_growable_leaf = growable_leaves(split_tree, affluence_tree)
+    prob_choose = p_propose_grow[node_to_prune]
+    prob_choose /= jnp.sum(p_propose_grow, where=is_growable_leaf)
+    return node_to_prune, num_prunable, prob_choose
+def randint_masked(key, mask):
+    """
+    Return a random integer in a range, including only some values.
-    return node_to_prune, num_prunable, num_growable
+    Parameters
+    ----------
+    key : jax.dtypes.prng_key array
+        A jax random key.
+    mask : bool array (n,)
+        The mask indicating the allowed values.
+    Returns
+    -------
+    u : int
+        A random integer in the range ``[0, n)``, and which satisfies
+        ``mask[u] == True``. If all values in the mask are `False`, return `n`.
+    """
+    ecdf = jnp.cumsum(mask)
+    u = random.randint(key, (), 0, ecdf[-1])
+    return jnp.searchsorted(ecdf, u, 'right')
-def accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, grow_leaf_indices, key):
+def accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, key):
     """
     Accept or reject the proposed moves and sample the new leaf values.
@@ -744,8 +833,6 @@ def accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, grow_leaf_indi
     prune_moves : dict
         The proposals for prune moves, batched over the first axis. See
         `prune_move`.
-    grow_leaf_indices : int array (num_trees, n)
-        The leaf indices of the trees proposed by the grow move.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -754,41 +841,339 @@ def accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, grow_leaf_indi
     bart : dict
         The new BART mcmc state.
     """
+    bart, grow_moves, prune_moves, count_trees, move_counts, u, z = accept_moves_parallel_stage(bart, grow_moves, prune_moves, key)
+    bart, counts = accept_moves_sequential_stage(bart, count_trees, grow_moves, prune_moves, move_counts, u, z)
+    return accept_moves_final_stage(bart, counts, grow_moves, prune_moves)
+def accept_moves_parallel_stage(bart, grow_moves, prune_moves, key):
+    """
+    Pre-computes quantities used to accept moves, in parallel across trees.
+    Parameters
+    ----------
+    bart : dict
+        A BART mcmc state.
+    grow_moves, prune_moves : dict
+        The proposals for the moves, batched over the first axis. See
+        `grow_move` and `prune_move`.
+    key : jax.dtypes.prng_key array
+        A jax random key.
+    Returns
+    -------
+    bart : dict
+        A partially updated BART mcmc state.
+    grow_moves, prune_moves : dict
+        The proposals for the moves, with the field 'partial_ratio' replaced
+        by 'trans_prior_ratio'.
+    count_trees : array (num_trees, 2 ** (d - 1))
+        The number of points in each potential or actual leaf node.
+    move_counts : dict
+        The counts of the number of points in the the nodes modified by the
+        moves.
+    u : float array (num_trees, 2)
+        Random uniform values used to accept the moves.
+    z : float array (num_trees, 2 ** d)
+        Random standard normal values used to sample the new leaf values.
+    """
+    bart = bart.copy()
+    bart['var_trees'] = grow_moves['var_tree']
+        # Since var_tree can contain garbage, I can set the var of leaf to be
+        # grown irrespectively of what move I'm gonna accept in the end.
+    bart['leaf_indices'] = apply_grow_to_indices(grow_moves, bart['leaf_indices'], bart['X'])
+    count_trees, move_counts = compute_count_trees(bart['leaf_indices'], grow_moves, prune_moves, bart['opt']['count_batch_size'])
+    grow_moves, prune_moves = complete_ratio(grow_moves, prune_moves, move_counts, bart['min_points_per_leaf'])
+    if bart['opt']['require_min_points']:
+        count_half_trees = count_trees[:, :grow_moves['split_tree'].shape[1]]
+        bart['affluence_trees'] = count_half_trees >= 2 * bart['min_points_per_leaf']
+    bart['leaf_trees'] = adapt_leaf_trees_to_grow_indices(bart['leaf_trees'], grow_moves)
+    key, subkey = random.split(key)
+    u = random.uniform(subkey, (len(bart['leaf_trees']), 2), bart['opt']['large_float'])
+    z = random.normal(key, bart['leaf_trees'].shape, bart['opt']['large_float'])
+    return bart, grow_moves, prune_moves, count_trees, move_counts, u, z
+def apply_grow_to_indices(grow_moves, leaf_indices, X):
+    """
+    Update the leaf indices to apply a grow move.
+    Parameters
+    ----------
+    grow_moves : dict
+        The proposals for grow moves. See `grow_move`.
+    leaf_indices : array (num_trees, n)
+        The index of the leaf each datapoint falls into.
+    X : array (p, n)
+        The predictors matrix.
+    Returns
+    -------
+    grow_leaf_indices : array (num_trees, n)
+        The updated leaf indices.
+    """
+    left_child = grow_moves['node'].astype(leaf_indices.dtype) << 1
+    go_right = X[grow_moves['var'], :] >= grow_moves['split'][:, None]
+    tree_size = jnp.array(2 * grow_moves['split_tree'].shape[1])
+    node_to_update = jnp.where(grow_moves['num_growable'], grow_moves['node'], tree_size)
+    return jnp.where(
+        leaf_indices == node_to_update[:, None],
+        left_child[:, None] + go_right,
+        leaf_indices,
+    )
+def compute_count_trees(grow_leaf_indices, grow_moves, prune_moves, batch_size):
+    """
+    Count the number of datapoints in each leaf.
+    Parameters
+    ----------
+    grow_leaf_indices : int array (num_trees, n)
+        The index of the leaf each datapoint falls into, if the grow move is
+        accepted.
+    grow_moves, prune_moves : dict
+        The proposals for the moves. See `grow_move` and `prune_move`.
+    batch_size : int or None
+        The data batch size to use for the summation.
+    Returns
+    -------
+    count_trees : int array (num_trees, 2 ** (d - 1))
+        The number of points in each potential or actual leaf node.
+    counts : dict
+        The counts of the number of points in the the nodes modified by the
+        moves, organized as two dictionaries 'grow' and 'prune', with subfields
+        'left', 'right', and 'total'.
+    """
+    ntree, tree_size = grow_moves['split_tree'].shape
+    tree_size *= 2
+    counts = dict(grow=dict(), prune=dict())
+    tree_indices = jnp.arange(ntree)
+    count_trees = count_datapoints_per_leaf(grow_leaf_indices, tree_size, batch_size)
+    # count datapoints in leaf to grow
+    counts['grow']['left'] = count_trees[tree_indices, grow_moves['left']]
+    counts['grow']['right'] = count_trees[tree_indices, grow_moves['right']]
+    counts['grow']['total'] = counts['grow']['left'] + counts['grow']['right']
+    count_trees = count_trees.at[tree_indices, grow_moves['node']].set(counts['grow']['total'])
+    # count datapoints in node to prune
+    counts['prune']['left'] = count_trees[tree_indices, prune_moves['left']]
+    counts['prune']['right'] = count_trees[tree_indices, prune_moves['right']]
+    counts['prune']['total'] = counts['prune']['left'] + counts['prune']['right']
+    count_trees = count_trees.at[tree_indices, prune_moves['node']].set(counts['prune']['total'])
+    return count_trees, counts
+def count_datapoints_per_leaf(leaf_indices, tree_size, batch_size):
+    """
+    Count the number of datapoints in each leaf.
+    Parameters
+    ----------
+    leaf_indices : int array (num_trees, n)
+        The index of the leaf each datapoint falls into.
+    tree_size : int
+        The size of the leaf tree array (2 ** d).
+    batch_size : int or None
+        The data batch size to use for the summation.
+    Returns
+    -------
+    count_trees : int array (num_trees, 2 ** (d - 1))
+        The number of points in each leaf node.
+    """
+    if batch_size is None:
+        return _count_scan(leaf_indices, tree_size)
+    else:
+        return _count_vec(leaf_indices, tree_size, batch_size)
+def _count_scan(leaf_indices, tree_size):
+    def loop(_, leaf_indices):
+        return None, _aggregate_scatter(1, leaf_indices, tree_size, jnp.uint32)
+    _, count_trees = lax.scan(loop, None, leaf_indices)
+    return count_trees
+def _aggregate_scatter(values, indices, size, dtype):
+    return (jnp
+        .zeros(size, dtype)
+        .at[indices]
+        .add(values)
+    )
+def _count_vec(leaf_indices, tree_size, batch_size):
+    return _aggregate_batched_alltrees(1, leaf_indices, tree_size, jnp.uint32, batch_size)
+        # uint16 is super-slow on gpu, don't use it even if n < 2^16
+def _aggregate_batched_alltrees(values, indices, size, dtype, batch_size):
+    ntree, n = indices.shape
+    tree_indices = jnp.arange(ntree)
+    nbatches = n // batch_size + bool(n % batch_size)
+    batch_indices = jnp.arange(n) % nbatches
+    return (jnp
+        .zeros((ntree, size, nbatches), dtype)
+        .at[tree_indices[:, None], indices, batch_indices]
+        .add(values)
+        .sum(axis=2)
+    )
+def complete_ratio(grow_moves, prune_moves, move_counts, min_points_per_leaf):
+    """
+    Complete non-likelihood MH ratio calculation.
+    This functions adds the probability of choosing the prune move.
+    Parameters
+    ----------
+    grow_moves, prune_moves : dict
+        The proposals for the moves. See `grow_move` and `prune_move`.
+    move_counts : dict
+        The counts of the number of points in the the nodes modified by the
+        moves.
+    min_points_per_leaf : int or None
+        The minimum number of data points in a leaf node.
+    Returns
+    -------
+    grow_moves, prune_moves : dict
+        The proposals for the moves, with the field 'partial_ratio' replaced
+        by 'trans_prior_ratio'.
+    """
+    grow_moves = grow_moves.copy()
+    prune_moves = prune_moves.copy()
+    compute_p_prune_vec = jax.vmap(compute_p_prune, in_axes=(0, 0, 0, None))
+    grow_p_prune, prune_p_prune = compute_p_prune_vec(grow_moves, move_counts['grow']['left'], move_counts['grow']['right'], min_points_per_leaf)
+    grow_moves['trans_prior_ratio'] = grow_moves.pop('partial_ratio') * grow_p_prune
+    prune_moves['trans_prior_ratio'] = prune_moves.pop('partial_ratio') * prune_p_prune
+    return grow_moves, prune_moves
+def compute_p_prune(grow_move, grow_left_count, grow_right_count, min_points_per_leaf):
+    """
+    Compute the probability of proposing a prune move.
+    Parameters
+    ----------
+    grow_move : dict
+        The proposal for the grow move, see `grow_move`.
+    grow_left_count, grow_right_count : int
+        The number of datapoints in the proposed children of the leaf to grow.
+    min_points_per_leaf : int or None
+        The minimum number of data points in a leaf node.
+    Returns
+    -------
+    grow_p_prune : float
+        The probability of proposing a prune move, after accepting the grow
+        move.
+    prune_p_prune : float
+        The probability of proposing the prune move.
+    """
+    other_growable_leaves = grow_move['num_growable'] >= 2
+    new_leaves_growable = grow_move['node'] < grow_move['split_tree'].size // 2
+    if min_points_per_leaf is not None:
+        any_above_threshold = grow_left_count >= 2 * min_points_per_leaf
+        any_above_threshold |= grow_right_count >= 2 * min_points_per_leaf
+        new_leaves_growable &= any_above_threshold
+    grow_again_allowed = other_growable_leaves | new_leaves_growable
+    grow_p_prune = jnp.where(grow_again_allowed, 0.5, 1)
+    prune_p_prune = jnp.where(grow_move['num_growable'], 0.5, 1)
+    return grow_p_prune, prune_p_prune
+def adapt_leaf_trees_to_grow_indices(leaf_trees, grow_moves):
+    """
+    Modify leaf values such that the indices of the grow move work on the
+    original tree.
+    Parameters
+    ----------
+    leaf_trees : float array (num_trees, 2 ** d)
+        The leaf values.
+    grow_moves : dict
+        The proposals for grow moves. See `grow_move`.
+    Returns
+    -------
+    leaf_trees : float array (num_trees, 2 ** d)
+        The modified leaf values. The value of the leaf to grow is copied to
+        what would be its children if the grow move was accepted.
+    """
+    ntree, _ = leaf_trees.shape
+    tree_indices = jnp.arange(ntree)
+    values_at_node = leaf_trees[tree_indices, grow_moves['node']]
+    return (leaf_trees
+        .at[tree_indices, grow_moves['left']]
+        .set(values_at_node)
+        .at[tree_indices, grow_moves['right']]
+        .set(values_at_node)
+    )
+def accept_moves_sequential_stage(bart, count_trees, grow_moves, prune_moves, move_counts, u, z):
+    """
+    The part of accepting the moves that has to be done one tree at a time.
+    Parameters
+    ----------
+    bart : dict
+        A partially updated BART mcmc state.
+    count_trees : array (num_trees, 2 ** (d - 1))
+        The number of points in each potential or actual leaf node.
+    grow_moves, prune_moves : dict
+        The proposals for the moves, with completed ratios. See `grow_move` and
+        `prune_move`.
+    move_counts : dict
+        The counts of the number of points in the the nodes modified by the
+        moves.
+    u : float array (num_trees, 2)
+        Random uniform values used to for proposal and accept decisions.
+    z : float array (num_trees, 2 ** d)
+        Random standard normal values used to sample the new leaf values.
+    Returns
+    -------
+    bart : dict
+        A partially updated BART mcmc state.
+    counts : dict
+        The indicators of proposals and acceptances for grow and prune moves.
+    """
     bart = bart.copy()
-    def loop(carry, item):
-        resid = carry.pop('resid')
-        resid, carry, trees = accept_move_and_sample_leaves(
+    def loop(resid, item):
+        resid, leaf_tree, split_tree, counts, ratios = accept_move_and_sample_leaves(
             bart['X'],
             len(bart['leaf_trees']),
-            bart['opt']['suffstat_batch_size'],
+            bart['opt']['resid_batch_size'],
             resid,
             bart['sigma2'],
             bart['min_points_per_leaf'],
-            carry,
+            'ratios' in bart,
             *item,
         )
-        carry['resid'] = resid
-        return carry, trees
-    carry = {
-        k: jnp.zeros_like(bart[k]) for k in
-        ['grow_prop_count', 'prune_prop_count', 'grow_acc_count', 'prune_acc_count']
-    }
-    carry['resid'] = bart['resid']
+        return resid, (leaf_tree, split_tree, counts, ratios)
     items = (
-        bart['leaf_trees'],
-        bart['split_trees'],
-        bart['affluence_trees'],
-        grow_moves,
-        prune_moves,
-        grow_leaf_indices,
-        random.split(key, len(bart['leaf_trees'])),
+        bart['leaf_trees'], count_trees,
+        grow_moves, prune_moves, move_counts,
+        bart['leaf_indices'],
+        u, z,
     )
-    carry, trees = lax.scan(loop, carry, items)
-    bart.update(carry)
-    bart.update(trees)
-    return bart
+    resid, (leaf_trees, split_trees, counts, ratios) = lax.scan(loop, bart['resid'], items)
+    bart['resid'] = resid
+    bart['leaf_trees'] = leaf_trees
+    bart['split_trees'] = split_trees
+    bart.get('ratios', {}).update(ratios)
-def accept_move_and_sample_leaves(X, ntree, suffstat_batch_size, resid, sigma2, min_points_per_leaf, counts, leaf_tree, split_tree, affluence_tree, grow_move, prune_move, grow_leaf_indices, key):
+    return bart, counts
+def accept_move_and_sample_leaves(X, ntree, resid_batch_size, resid, sigma2, min_points_per_leaf, save_ratios, leaf_tree, count_tree, grow_move, prune_move, move_counts, grow_leaf_indices, u, z):
     """
     Accept or reject a proposed move and sample the new leaf values.
@@ -798,158 +1183,157 @@ def accept_move_and_sample_leaves(X, ntree, suffstat_batch_size, resid, sigma2,
         The predictors.
     ntree : int
         The number of trees in the forest.
-    suffstat_batch_size : int, None
-        The batch size for computing sufficient statistics.
+    resid_batch_size : int, None
+        The batch size for computing the sum of residuals in each leaf.
     resid : float array (n,)
         The residuals (data minus forest value).
     sigma2 : float
         The noise variance.
     min_points_per_leaf : int or None
         The minimum number of data points in a leaf node.
-    counts : dict
-        The acceptance counts from the mcmc state dict.
+    save_ratios : bool
+        Whether to save the acceptance ratios.
     leaf_tree : float array (2 ** d,)
         The leaf values of the tree.
-    split_tree : int array (2 ** (d - 1),)
-        The decision boundaries of the tree.
-    affluence_tree : bool array (2 ** (d - 1),) or None
-        Whether a leaf has enough points to be grown.
-    grow_move : dict
-        The proposal for the grow move. See `grow_move`.
-    prune_move : dict
-        The proposal for the prune move. See `prune_move`.
+    count_tree : int array (2 ** d,)
+        The number of datapoints in each leaf.
+    grow_move, prune_move : dict
+        The proposals for the moves, with completed ratios. See `grow_move` and
+        `prune_move`.
     grow_leaf_indices : int array (n,)
         The leaf indices of the tree proposed by the grow move.
-    key : jax.dtypes.prng_key array
-        A jax random key.
+    u : float array (2,)
+        Two uniform random values in [0, 1).
+    z : float array (2 ** d,)
+        Standard normal random values.
     Returns
     -------
     resid : float array (n,)
         The updated residuals (data minus forest value).
+    leaf_tree : float array (2 ** d,)
+        The new leaf values of the tree.
+    split_tree : int array (2 ** (d - 1),)
+        The updated decision boundaries of the tree.
     counts : dict
-        The updated acceptance counts.
-    trees : dict
-        The updated tree arrays.
+        The indicators of proposals and acceptances for grow and prune moves.
+    ratios : dict
+        The acceptance ratios for the moves. Empty if not to be saved.
     """
-    # compute leaf indices in starting tree
-    grow_node = grow_move['node']
-    grow_left = grow_node << 1
-    grow_right = grow_left + 1
-    leaf_indices = jnp.where(
-        (grow_leaf_indices == grow_left) | (grow_leaf_indices == grow_right),
-        grow_node,
-        grow_leaf_indices,
-    )
-    # compute leaf indices in prune tree
-    prune_node = prune_move['node']
-    prune_left = prune_node << 1
-    prune_right = prune_left + 1
-    prune_leaf_indices = jnp.where(
-        (leaf_indices == prune_left) | (leaf_indices == prune_right),
-        prune_node,
-        leaf_indices,
-    )
+    # sum residuals and count units per leaf, in tree proposed by grow move
+    resid_tree = sum_resid(resid, grow_leaf_indices, leaf_tree.size, resid_batch_size)
     # subtract starting tree from function
-    resid += leaf_tree[leaf_indices]
+    resid_tree += count_tree * leaf_tree
-    # aggregate residuals and count units per leaf
-    grow_resid_tree, grow_count_tree = sufficient_stat(resid, grow_leaf_indices, leaf_tree.size, suffstat_batch_size)
+    # get indices of grow move
+    grow_node = grow_move['node']
+    assert grow_node.dtype == jnp.int32
+    grow_left = grow_move['left']
+    grow_right = grow_move['right']
-    # compute aggregations in starting tree
-    # I do not zero the children because garbage there does not matter
-    resid_tree = (grow_resid_tree.at[grow_node]
-        .set(grow_resid_tree[grow_left] + grow_resid_tree[grow_right]))
-    count_tree = (grow_count_tree.at[grow_node]
-        .set(grow_count_tree[grow_left] + grow_count_tree[grow_right]))
+    # sum residuals in leaf to grow
+    grow_resid_left = resid_tree[grow_left]
+    grow_resid_right = resid_tree[grow_right]
+    grow_resid_total = grow_resid_left + grow_resid_right
+    resid_tree = resid_tree.at[grow_node].set(grow_resid_total)
-    # compute aggregations in prune tree
-    prune_resid_tree = (resid_tree.at[prune_node]
-        .set(resid_tree[prune_left] + resid_tree[prune_right]))
-    prune_count_tree = (count_tree.at[prune_node]
-        .set(count_tree[prune_left] + count_tree[prune_right]))
+    # get indices of prune move
+    prune_node = prune_move['node']
+    assert prune_node.dtype == jnp.int32
+    prune_left = prune_move['left']
+    prune_right = prune_move['right']
-    # compute affluence trees
-    if min_points_per_leaf is not None:
-        grow_affluence_tree = grow_count_tree[:grow_count_tree.size // 2] >= 2 * min_points_per_leaf
-        prune_affluence_tree = affluence_tree.at[prune_node].set(True)
+    # sum residuals in node to prune
+    prune_resid_left = resid_tree[prune_left]
+    prune_resid_right = resid_tree[prune_right]
+    prune_resid_total = prune_resid_left + prune_resid_right
+    resid_tree = resid_tree.at[prune_node].set(prune_resid_total)
-    # compute probability of proposing prune
-    grow_p_prune = compute_p_prune_back(grow_move['split_tree'], grow_affluence_tree)
-    prune_p_prune = compute_p_prune_back(split_tree, affluence_tree)
+    # Now resid_tree and count_tree contain correct values whatever move is
+    # accepted.
     # compute likelihood ratios
-    grow_lk_ratio = compute_likelihood_ratio(grow_resid_tree, grow_count_tree, sigma2, grow_node, ntree, min_points_per_leaf)
-    prune_lk_ratio = compute_likelihood_ratio(resid_tree, count_tree, sigma2, prune_node, ntree, min_points_per_leaf)
+    grow_lk_ratio = compute_likelihood_ratio(grow_resid_total, grow_resid_left, grow_resid_right, move_counts['grow']['total'], move_counts['grow']['left'], move_counts['grow']['right'], sigma2, ntree)
+    prune_lk_ratio = compute_likelihood_ratio(prune_resid_total, prune_resid_left, prune_resid_right, move_counts['prune']['total'], move_counts['prune']['left'], move_counts['prune']['right'], sigma2, ntree)
     # compute acceptance ratios
-    grow_ratio = grow_p_prune * grow_move['partial_ratio'] * grow_lk_ratio
-    prune_ratio = prune_p_prune * prune_move['partial_ratio'] * prune_lk_ratio
+    grow_ratio = grow_move['trans_prior_ratio'] * grow_lk_ratio
+    if min_points_per_leaf is not None:
+        grow_ratio = jnp.where(move_counts['grow']['left'] >= min_points_per_leaf, grow_ratio, 0)
+        grow_ratio = jnp.where(move_counts['grow']['right'] >= min_points_per_leaf, grow_ratio, 0)
+    prune_ratio = prune_move['trans_prior_ratio'] * prune_lk_ratio
     prune_ratio = lax.reciprocal(prune_ratio)
-    # random coins in [0, 1) for proposal and acceptance
-    key, subkey = random.split(key)
-    u0, u1 = random.uniform(subkey, (2,))
+    # save acceptance ratios
+    ratios = {}
+    if save_ratios:
+        ratios.update(
+            grow=dict(
+                trans_prior=grow_move['trans_prior_ratio'],
+                likelihood=grow_lk_ratio,
+            ),
+            prune=dict(
+                trans_prior=lax.reciprocal(prune_move['trans_prior_ratio']),
+                likelihood=lax.reciprocal(prune_lk_ratio),
+            ),
+        )
     # determine what move to propose (not proposing anything is an option)
-    p_grow = jnp.where(grow_move['allowed'] & prune_move['allowed'], 0.5, grow_move['allowed'])
-    try_grow = u0 < p_grow
+    grow_allowed = grow_move['num_growable'].astype(bool)
+    p_grow = jnp.where(grow_allowed & prune_move['allowed'], 0.5, grow_allowed)
+    try_grow = u[0] < p_grow # use < instead of <= because coins are in [0, 1)
     try_prune = prune_move['allowed'] & ~try_grow
     # determine whether to accept the move
-    do_grow = try_grow & (u1 < grow_ratio)
-    do_prune = try_prune & (u1 < prune_ratio)
-    # pick trees for chosen move
-    trees = {}
-    split_tree = jnp.where(do_grow, grow_move['split_tree'], split_tree)
-    # the prune var tree is equal to the initial one, because I leave garbage values behind
-    split_tree = split_tree.at[prune_node].set(
-        jnp.where(do_prune, 0, split_tree[prune_node]))
-    if min_points_per_leaf is not None:
-        affluence_tree = jnp.where(do_grow, grow_affluence_tree, affluence_tree)
-        affluence_tree = jnp.where(do_prune, prune_affluence_tree, affluence_tree)
-    resid_tree = jnp.where(do_grow, grow_resid_tree, resid_tree)
-    count_tree = jnp.where(do_grow, grow_count_tree, count_tree)
-    resid_tree = jnp.where(do_prune, prune_resid_tree, resid_tree)
-    count_tree = jnp.where(do_prune, prune_count_tree, count_tree)
-    # update acceptance counts
-    counts = counts.copy()
-    counts['grow_prop_count'] += try_grow
-    counts['grow_acc_count'] += do_grow
-    counts['prune_prop_count'] += try_prune
-    counts['prune_acc_count'] += do_prune
-    # compute leaves posterior
-    prec_lk = count_tree / sigma2
+    do_grow = try_grow & (u[1] < grow_ratio)
+    do_prune = try_prune & (u[1] < prune_ratio)
+    # pick split tree for chosen move
+    split_tree = grow_move['split_tree']
+    split_tree = split_tree.at[jnp.where(do_grow, split_tree.size, grow_node)].set(0)
+    split_tree = split_tree.at[jnp.where(do_prune, prune_node, split_tree.size)].set(0)
+    # I can leave garbage in var_tree, resid_tree, count_tree
+    # compute leaves posterior and sample leaves
+    inv_sigma2 = lax.reciprocal(sigma2)
+    prec_lk = count_tree * inv_sigma2
     var_post = lax.reciprocal(prec_lk + ntree) # = 1 / (prec_lk + prec_prior)
-    mean_post = resid_tree / sigma2 * var_post # = mean_lk * prec_lk * var_post
-    # sample leaves
-    z = random.normal(key, mean_post.shape, mean_post.dtype)
+    mean_post = resid_tree * inv_sigma2 * var_post # = mean_lk * prec_lk * var_post
+    initial_leaf_tree = leaf_tree
     leaf_tree = mean_post + z * jnp.sqrt(var_post)
-    # add new tree to function
-    leaf_indices = jnp.where(do_grow, grow_leaf_indices, leaf_indices)
-    leaf_indices = jnp.where(do_prune, prune_leaf_indices, leaf_indices)
-    resid -= leaf_tree[leaf_indices]
+    # copy leaves around such that the grow leaf indices select the right leaf
+    leaf_tree = (leaf_tree
+        .at[jnp.where(do_prune, prune_left, leaf_tree.size)]
+        .set(leaf_tree[prune_node])
+        .at[jnp.where(do_prune, prune_right, leaf_tree.size)]
+        .set(leaf_tree[prune_node])
+    )
+    leaf_tree = (leaf_tree
+        .at[jnp.where(do_grow, leaf_tree.size, grow_left)]
+        .set(leaf_tree[grow_node])
+        .at[jnp.where(do_grow, leaf_tree.size, grow_right)]
+        .set(leaf_tree[grow_node])
+    )
+    # replace old tree with new tree in function values
+    resid += (initial_leaf_tree - leaf_tree)[grow_leaf_indices]
-    # pack trees
-    trees = {
-        'leaf_trees': leaf_tree,
-        'split_trees': split_tree,
-        'affluence_trees': affluence_tree,
-    }
+    # pack proposal and acceptance indicators
+    counts = dict(
+        grow_prop_count=try_grow,
+        grow_acc_count=do_grow,
+        prune_prop_count=try_prune,
+        prune_acc_count=do_prune,
+    )
-    return resid, counts, trees
+    return resid, leaf_tree, split_tree, counts, ratios
-def sufficient_stat(resid, leaf_indices, tree_size, batch_size):
+def sum_resid(resid, leaf_indices, tree_size, batch_size):
     """
-    Compute the sufficient statistics for the likelihood ratio of a tree move.
+    Sum the residuals in each leaf.
     Parameters
     ----------
@@ -960,104 +1344,56 @@ def sufficient_stat(resid, leaf_indices, tree_size, batch_size):
     tree_size : int
         The size of the tree array (2 ** d).
     batch_size : int, None
-        The batch size for the aggregation. Batching increases numerical
+        The data batch size for the aggregation. Batching increases numerical
         accuracy and parallelism.
     Returns
     -------
     resid_tree : float array (2 ** d,)
         The sum of the residuals at data points in each leaf.
-    count_tree : int array (2 ** d,)
-        The number of data points in each leaf.
     """
     if batch_size is None:
         aggr_func = _aggregate_scatter
     else:
-        aggr_func = functools.partial(_aggregate_batched, batch_size=batch_size)
-    resid_tree = aggr_func(resid, leaf_indices, tree_size, jnp.float32)
-    count_tree = aggr_func(1, leaf_indices, tree_size, jnp.uint32)
-    return resid_tree, count_tree
-def _aggregate_scatter(values, indices, size, dtype):
-    return (jnp
-        .zeros(size, dtype)
-        .at[indices]
-        .add(values)
-    )
+        aggr_func = functools.partial(_aggregate_batched_onetree, batch_size=batch_size)
+    return aggr_func(resid, leaf_indices, tree_size, jnp.float32)
-def _aggregate_batched(values, indices, size, dtype, batch_size):
-    nbatches = indices.size // batch_size + bool(indices.size % batch_size)
-    batch_indices = jnp.arange(indices.size) // batch_size
+def _aggregate_batched_onetree(values, indices, size, dtype, batch_size):
+    n, = indices.shape
+    nbatches = n // batch_size + bool(n % batch_size)
+    batch_indices = jnp.arange(n) % nbatches
     return (jnp
-        .zeros((nbatches, size), dtype)
-        .at[batch_indices, indices]
+        .zeros((size, nbatches), dtype)
+        .at[indices, batch_indices]
         .add(values)
-        .sum(axis=0)
+        .sum(axis=1)
     )
-def compute_p_prune_back(new_split_tree, new_affluence_tree):
-    """
-    Compute the probability of proposing a prune move after doing a grow move.
-    Parameters
-    ----------
-    new_split_tree : int array (2 ** (d - 1),)
-        The decision boundaries of the tree, after the grow move.
-    new_affluence_tree : bool array (2 ** (d - 1),)
-        Which leaves have enough points to be grown, after the grow move.
-    Returns
-    -------
-    p_prune : float
-        The probability of proposing a prune move after the grow move. This is
-        0.5 if grow is possible again, and 1 if it isn't. It can't be 0 because
-        at least the node just grown can be pruned.
-    """
-    _, grow_again_allowed = growable_leaves(new_split_tree, new_affluence_tree)
-    return jnp.where(grow_again_allowed, 0.5, 1)
-def compute_likelihood_ratio(resid_tree, count_tree, sigma2, node, n_tree, min_points_per_leaf):
+def compute_likelihood_ratio(total_resid, left_resid, right_resid, total_count, left_count, right_count, sigma2, n_tree):
     """
     Compute the likelihood ratio of a grow move.
     Parameters
     ----------
-    resid_tree : float array (2 ** d,)
-        The sum of the residuals at data points in each leaf.
-    count_tree : int array (2 ** d,)
-        The number of data points in each leaf.
+    total_resid : float
+        The sum of the residuals in the leaf to grow.
+    left_resid, right_resid : float
+        The sum of the residuals in the left/right child of the leaf to grow.
+    total_count : int
+        The number of datapoints in the leaf to grow.
+    left_count, right_count : int
+        The number of datapoints in the left/right child of the leaf to grow.
     sigma2 : float
         The noise variance.
-    node : int
-        The index of the leaf that has been grown.
     n_tree : int
         The number of trees in the forest.
-    min_points_per_leaf : int or None
-        The minimum number of data points in a leaf node.
     Returns
     -------
     ratio : float
         The likelihood ratio P(data | new tree) / P(data | old tree).
-    Notes
-    -----
-    The ratio is set to 0 if the grow move would create leaves with not enough
-    datapoints per leaf, although this is part of the prior rather than the
-    likelihood.
     """
-    left_child = node << 1
-    right_child = left_child + 1
-    left_resid = resid_tree[left_child]
-    right_resid = resid_tree[right_child]
-    total_resid = left_resid + right_resid
-    left_count = count_tree[left_child]
-    right_count = count_tree[right_child]
-    total_count = left_count + right_count
     sigma_mu2 = 1 / n_tree
     sigma2_left = sigma2 + left_count * sigma_mu2
     sigma2_right = sigma2 + right_count * sigma_mu2
@@ -1071,13 +1407,67 @@ def compute_likelihood_ratio(resid_tree, count_tree, sigma2, node, n_tree, min_p
         total_resid * total_resid / sigma2_total
     )
-    ratio = jnp.sqrt(sqrt_term) * jnp.exp(exp_term)
+    return jnp.sqrt(sqrt_term) * jnp.exp(exp_term)
-    if min_points_per_leaf is not None:
-        ratio = jnp.where(right_count >= min_points_per_leaf, ratio, 0)
-        ratio = jnp.where(left_count >= min_points_per_leaf, ratio, 0)
+def accept_moves_final_stage(bart, counts, grow_moves, prune_moves):
+    """
+    The final part of accepting the moves, in parallel across trees.
+    Parameters
+    ----------
+    bart : dict
+        A partially updated BART mcmc state.
+    counts : dict
+        The indicators of proposals and acceptances for grow and prune moves.
+    grow_moves, prune_moves : dict
+        The proposals for the moves. See `grow_move` and `prune_move`.
+    Returns
+    -------
+    bart : dict
+        The fully updated BART mcmc state.
+    """
+    bart = bart.copy()
+    for k, v in counts.items():
+        bart[k] = jnp.sum(v, axis=0)
-    return ratio
+    bart['leaf_indices'] = apply_moves_to_indices(bart['leaf_indices'], counts, grow_moves, prune_moves)
+    return bart
+def apply_moves_to_indices(leaf_indices, counts, grow_moves, prune_moves):
+    """
+    Update the leaf indices to match the accepted move.
+    Parameters
+    ----------
+    leaf_indices : int array (num_trees, n)
+        The index of the leaf each datapoint falls into, if the grow move was
+        accepted.
+    counts : dict
+        The indicators of proposals and acceptances for grow and prune moves.
+    grow_moves, prune_moves : dict
+        The proposals for the moves. See `grow_move` and `prune_move`.
+    Returns
+    -------
+    leaf_indices : int array (num_trees, n)
+        The updated leaf indices.
+    """
+    mask = ~jnp.array(1, leaf_indices.dtype) # ...1111111110
+    cond = (leaf_indices & mask) == grow_moves['left'][:, None]
+    leaf_indices = jnp.where(
+        cond & ~counts['grow_acc_count'][:, None],
+        grow_moves['node'][:, None].astype(leaf_indices.dtype),
+        leaf_indices,
+    )
+    cond = (leaf_indices & mask) == prune_moves['left'][:, None]
+    return jnp.where(
+        cond & counts['prune_acc_count'][:, None],
+        prune_moves['node'][:, None].astype(leaf_indices.dtype),
+        leaf_indices,
+    )
 def sample_sigma(bart, key):
     """
@@ -1099,7 +1489,7 @@ def sample_sigma(bart, key):
     resid = bart['resid']
     alpha = bart['sigma2_alpha'] + resid.size / 2
-    norm2 = jnp.dot(resid, resid, preferred_element_type=bart['sigma2_beta'].dtype)
+    norm2 = jnp.dot(resid, resid, preferred_element_type=bart['opt']['large_float'])
     beta = bart['sigma2_beta'] + norm2 / 2
     sample = random.gamma(key, alpha)

bartz 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

bartz 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl