PyPI - bartz - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

bartz 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

bartz/BART.py +464 -254
bartz/__init__.py +2 -2
bartz/_version.py +1 -1
bartz/debug.py +1259 -79
bartz/grove.py +139 -93
bartz/jaxext/__init__.py +213 -0
bartz/jaxext/_autobatch.py +238 -0
bartz/jaxext/scipy/__init__.py +25 -0
bartz/jaxext/scipy/special.py +240 -0
bartz/jaxext/scipy/stats.py +36 -0
bartz/mcmcloop.py +468 -311
bartz/mcmcstep.py +734 -453
bartz/prepcovars.py +139 -43
{bartz-0.6.0.dist-info → bartz-0.7.0.dist-info}/METADATA +2 -3
bartz-0.7.0.dist-info/RECORD +17 -0
{bartz-0.6.0.dist-info → bartz-0.7.0.dist-info}/WHEEL +1 -1
bartz/jaxext.py +0 -423
bartz-0.6.0.dist-info/RECORD +0 -13

bartz/mcmcstep.py CHANGED Viewed

@@ -28,26 +28,33 @@ Functions that implement the BART posterior MCMC initialization and update step.
 Functions that do MCMC steps operate by taking as input a bart state, and
 outputting a new state. The inputs are not modified.
-The main entry points are:
+The entry points are:
   - `State`: The dataclass that represents a BART MCMC state.
   - `init`: Creates an initial `State` from data and configurations.
   - `step`: Performs one full MCMC step on a `State`, returning a new `State`.
+  - `step_sparse`: Performs the MCMC update for variable selection, which is skipped in `step`.
 """
 import math
 from dataclasses import replace
 from functools import cache, partial
-from typing import Any
+from typing import Any, Literal
 import jax
-from equinox import Module, field
+from equinox import Module, field, tree_at
 from jax import lax, random
 from jax import numpy as jnp
+from jax.scipy.special import gammaln, logsumexp
 from jaxtyping import Array, Bool, Float32, Int32, Integer, Key, Shaped, UInt
-from . import grove
-from .jaxext import minimal_unsigned_dtype, split, vmap_nodoc
+from bartz import grove
+from bartz.jaxext import (
+    minimal_unsigned_dtype,
+    split,
+    truncated_normal_onesided,
+    vmap_nodoc,
+)
 class Forest(Module):
@@ -56,24 +63,32 @@ class Forest(Module):
     Parameters
     ----------
-    leaf_trees
+    leaf_tree
         The leaf values.
-    var_trees
+    var_tree
         The decision axes.
-    split_trees
+    split_tree
         The decision boundaries.
+    affluence_tree
+        Marks leaves that can be grown.
+    max_split
+        The maximum split index for each predictor.
+    blocked_vars
+        Indices of variables that are not used. This shall include at least
+        the `i` such that ``max_split[i] == 0``, otherwise behavior is
+        undefined.
     p_nonterminal
-        The probability of a nonterminal node at each depth, padded with a
-        zero.
+        The prior probability of each node being nonterminal, conditional on
+        its ancestors. Includes the nodes at maximum depth which should be set
+        to 0.
     p_propose_grow
         The unnormalized probability of picking a leaf for a grow proposal.
     leaf_indices
         The index of the leaf each datapoints falls into, for each tree.
+    min_points_per_decision_node
+        The minimum number of data points in a decision node.
     min_points_per_leaf
         The minimum number of data points in a leaf node.
-    affluence_trees
-        Whether a non-bottom leaf nodes contains twice `min_points_per_leaf`
-        datapoints. If `min_points_per_leaf` is not specified, this is None.
     resid_batch_size
     count_batch_size
         The data batch sizes for computing the sufficient statistics. If `None`,
@@ -91,25 +106,45 @@ class Forest(Module):
         The number of grow/prune moves accepted during one full MCMC cycle.
     sigma_mu2
         The prior variance of a leaf, conditional on the tree structure.
-    """
-    leaf_trees: Float32[Array, 'num_trees 2**d']
-    var_trees: UInt[Array, 'num_trees 2**(d-1)']
-    split_trees: UInt[Array, 'num_trees 2**(d-1)']
-    p_nonterminal: Float32[Array, 'd']
-    p_propose_grow: Float32[Array, '2**(d-1)']
+    log_s
+        The logarithm of the prior probability for choosing a variable to split
+        along in a decision rule, conditional on the ancestors. Not normalized.
+        If `None`, use a uniform distribution.
+    theta
+        The concentration parameter for the Dirichlet prior on the variable
+        distribution `s`. Required only to update `s`.
+    a
+    b
+    rho
+        Parameters of the prior on `theta`. Required only to sample `theta`.
+        See `step_theta`.
+    """
+    leaf_tree: Float32[Array, 'num_trees 2**d']
+    var_tree: UInt[Array, 'num_trees 2**(d-1)']
+    split_tree: UInt[Array, 'num_trees 2**(d-1)']
+    affluence_tree: Bool[Array, 'num_trees 2**(d-1)']
+    max_split: UInt[Array, ' p']
+    blocked_vars: UInt[Array, ' k'] | None
+    p_nonterminal: Float32[Array, ' 2**d']
+    p_propose_grow: Float32[Array, ' 2**(d-1)']
     leaf_indices: UInt[Array, 'num_trees n']
+    min_points_per_decision_node: Int32[Array, ''] | None
     min_points_per_leaf: Int32[Array, ''] | None
-    affluence_trees: Bool[Array, 'num_trees 2**(d-1)'] | None
     resid_batch_size: int | None = field(static=True)
     count_batch_size: int | None = field(static=True)
-    log_trans_prior: Float32[Array, 'num_trees'] | None
-    log_likelihood: Float32[Array, 'num_trees'] | None
+    log_trans_prior: Float32[Array, ' num_trees'] | None
+    log_likelihood: Float32[Array, ' num_trees'] | None
     grow_prop_count: Int32[Array, '']
     prune_prop_count: Int32[Array, '']
     grow_acc_count: Int32[Array, '']
     prune_acc_count: Int32[Array, '']
     sigma_mu2: Float32[Array, '']
+    log_s: Float32[Array, ' p'] | None
+    theta: Float32[Array, ''] | None
+    a: Float32[Array, ''] | None
+    b: Float32[Array, ''] | None
+    rho: Float32[Array, ''] | None
 class State(Module):
@@ -120,8 +155,6 @@ class State(Module):
     ----------
     X
         The predictors.
-    max_split
-        The maximum split index for each predictor.
     y
         The response. If the data type is `bool`, the model is binary regression.
     resid
@@ -145,13 +178,12 @@ class State(Module):
     """
     X: UInt[Array, 'p n']
-    max_split: UInt[Array, 'p']
-    y: Float32[Array, 'n'] | Bool[Array, 'n']
-    z: None | Float32[Array, 'n']
+    y: Float32[Array, ' n'] | Bool[Array, ' n']
+    z: None | Float32[Array, ' n']
     offset: Float32[Array, '']
-    resid: Float32[Array, 'n']
+    resid: Float32[Array, ' n']
     sigma2: Float32[Array, ''] | None
-    prec_scale: Float32[Array, 'n'] | None
+    prec_scale: Float32[Array, ' n'] | None
     sigma2_alpha: Float32[Array, ''] | None
     sigma2_beta: Float32[Array, ''] | None
     forest: Forest
@@ -160,19 +192,26 @@ class State(Module):
 def init(
     *,
     X: UInt[Any, 'p n'],
-    y: Float32[Any, 'n'] | Bool[Any, 'n'],
+    y: Float32[Any, ' n'] | Bool[Any, ' n'],
     offset: float | Float32[Any, ''] = 0.0,
-    max_split: UInt[Any, 'p'],
+    max_split: UInt[Any, ' p'],
     num_trees: int,
-    p_nonterminal: Float32[Any, 'd-1'],
+    p_nonterminal: Float32[Any, ' d-1'],
     sigma_mu2: float | Float32[Any, ''],
     sigma2_alpha: float | Float32[Any, ''] | None = None,
     sigma2_beta: float | Float32[Any, ''] | None = None,
-    error_scale: Float32[Any, 'n'] | None = None,
-    min_points_per_leaf: int | None = None,
-    resid_batch_size: int | None | str = 'auto',
-    count_batch_size: int | None | str = 'auto',
+    error_scale: Float32[Any, ' n'] | None = None,
+    min_points_per_decision_node: int | Integer[Any, ''] | None = None,
+    resid_batch_size: int | None | Literal['auto'] = 'auto',
+    count_batch_size: int | None | Literal['auto'] = 'auto',
     save_ratios: bool = False,
+    filter_splitless_vars: bool = True,
+    min_points_per_leaf: int | Integer[Any, ''] | None = None,
+    log_s: Float32[Any, ' p'] | None = None,
+    theta: float | Float32[Any, ''] | None = None,
+    a: float | Float32[Any, ''] | None = None,
+    b: float | Float32[Any, ''] | None = None,
+    rho: float | Float32[Any, ''] | None = None,
 ) -> State:
     """
     Make a BART posterior sampling MCMC initial state.
@@ -206,8 +245,9 @@ def init(
         the error variance for ``y[i]`` is ``sigma2 * error_scale[i] ** 2``.
         Not supported for binary regression. If not specified, defaults to 1 for
         all points, but potentially skipping calculations.
-    min_points_per_leaf
-        The minimum number of data points in a leaf node. 0 if not specified.
+    min_points_per_decision_node
+        The minimum number of data points in a decision node. 0 if not
+        specified.
     resid_batch_size
     count_batch_size
         The batch sizes, along datapoints, for summing the residuals and
@@ -216,6 +256,33 @@ def init(
         device.
     save_ratios
         Whether to save the Metropolis-Hastings ratios.
+    filter_splitless_vars
+        Whether to check `max_split` for variables without available cutpoints.
+        If any are found, they are put into a list of variables to exclude from
+        the MCMC. If `False`, no check is performed, but the results may be
+        wrong if any variable is blocked. The function is jax-traceable only
+        if this is set to `False`.
+    min_points_per_leaf
+        The minimum number of datapoints in a leaf node. 0 if not specified.
+        Unlike `min_points_per_decision_node`, this constraint is not taken into
+        account in the Metropolis-Hastings ratio because it would be expensive
+        to compute. Grow moves that would violate this constraint are vetoed.
+        This parameter is independent of `min_points_per_decision_node` and
+        there is no check that they are coherent. It makes sense to set
+        ``min_points_per_decision_node >= 2 * min_points_per_leaf``.
+    log_s
+        The logarithm of the prior probability for choosing a variable to split
+        along in a decision rule, conditional on the ancestors. Not normalized.
+        If not specified, use a uniform distribution. If not specified and
+        `theta` or `rho`, `a`, `b` are, it's initialized automatically.
+    theta
+        The concentration parameter for the Dirichlet prior on `s`. Required
+        only to update `log_s`. If not specified, and `rho`, `a`, `b` are
+        specified, it's initialized automatically.
+    a
+    b
+    rho
+        Parameters of the prior on `theta`. Required only to sample `theta`.
     Returns
     -------
@@ -225,6 +292,13 @@ def init(
     ------
     ValueError
         If `y` is boolean and arguments unused in binary regression are set.
+    Notes
+    -----
+    In decision nodes, the values in ``X[i, :]`` are compared to a cutpoint out
+    of the range ``[1, 2, ..., max_split[i]]``. A point belongs to the left
+    child iff ``X[i, j] < cutpoint``. Thus it makes sense for ``X[i, :]`` to be
+    integers in the range ``[0, 1, ..., max_split[i]]``.
     """
     p_nonterminal = jnp.asarray(p_nonterminal)
     p_nonterminal = jnp.pad(p_nonterminal, (0, 1))
@@ -244,22 +318,37 @@ def init(
     is_binary = y.dtype == bool
     if is_binary:
         if (error_scale, sigma2_alpha, sigma2_beta) != 3 * (None,):
-            raise ValueError(
+            msg = (
                 'error_scale, sigma2_alpha, and sigma2_beta must be set '
                 ' to `None` for binary regression.'
             )
+            raise ValueError(msg)
         sigma2 = None
     else:
         sigma2_alpha = jnp.asarray(sigma2_alpha)
         sigma2_beta = jnp.asarray(sigma2_beta)
         sigma2 = sigma2_beta / sigma2_alpha
-        # sigma2 = jnp.where(jnp.isfinite(sigma2) & (sigma2 > 0), sigma2, 1)
-        # TODO: I don't like this isfinite check, these functions should be
-        # low-level and just do the thing. Why was it here?
-    bart = State(
+    max_split = jnp.asarray(max_split)
+    if filter_splitless_vars:
+        (blocked_vars,) = jnp.nonzero(max_split == 0)
+        blocked_vars = blocked_vars.astype(minimal_unsigned_dtype(max_split.size))
+        # see `fully_used_variables` for the type cast
+    else:
+        blocked_vars = None
+    # check and initialize sparsity parameters
+    if not _all_none_or_not_none(rho, a, b):
+        msg = 'rho, a, b are not either all `None` or all set'
+        raise ValueError(msg)
+    if theta is None and rho is not None:
+        theta = rho
+    if log_s is None and theta is not None:
+        log_s = jnp.zeros(max_split.size)
+    return State(
         X=jnp.asarray(X),
-        max_split=jnp.asarray(max_split),
         y=y,
         z=jnp.full(y.shape, offset) if is_binary else None,
         offset=offset,
@@ -271,41 +360,54 @@ def init(
         sigma2_alpha=sigma2_alpha,
         sigma2_beta=sigma2_beta,
         forest=Forest(
-            leaf_trees=make_forest(max_depth, jnp.float32),
-            var_trees=make_forest(
-                max_depth - 1, minimal_unsigned_dtype(X.shape[0] - 1)
+            leaf_tree=make_forest(max_depth, jnp.float32),
+            var_tree=make_forest(max_depth - 1, minimal_unsigned_dtype(X.shape[0] - 1)),
+            split_tree=make_forest(max_depth - 1, max_split.dtype),
+            affluence_tree=(
+                make_forest(max_depth - 1, bool)
+                .at[:, 1]
+                .set(
+                    True
+                    if min_points_per_decision_node is None
+                    else y.size >= min_points_per_decision_node
+                )
             ),
-            split_trees=make_forest(max_depth - 1, max_split.dtype),
+            blocked_vars=blocked_vars,
+            max_split=max_split,
             grow_prop_count=jnp.zeros((), int),
             grow_acc_count=jnp.zeros((), int),
             prune_prop_count=jnp.zeros((), int),
             prune_acc_count=jnp.zeros((), int),
-            p_nonterminal=p_nonterminal,
+            p_nonterminal=p_nonterminal[grove.tree_depths(2**max_depth)],
             p_propose_grow=p_nonterminal[grove.tree_depths(2 ** (max_depth - 1))],
             leaf_indices=jnp.ones(
                 (num_trees, y.size), minimal_unsigned_dtype(2**max_depth - 1)
             ),
-            min_points_per_leaf=(
-                None
-                if min_points_per_leaf is None
-                else jnp.asarray(min_points_per_leaf)
-            ),
-            affluence_trees=(
-                None
-                if min_points_per_leaf is None
-                else make_forest(max_depth - 1, bool)
-                .at[:, 1]
-                .set(y.size >= 2 * min_points_per_leaf)
-            ),
+            min_points_per_decision_node=_asarray_or_none(min_points_per_decision_node),
+            min_points_per_leaf=_asarray_or_none(min_points_per_leaf),
             resid_batch_size=resid_batch_size,
             count_batch_size=count_batch_size,
-            log_trans_prior=jnp.full(num_trees, jnp.nan) if save_ratios else None,
-            log_likelihood=jnp.full(num_trees, jnp.nan) if save_ratios else None,
+            log_trans_prior=jnp.zeros(num_trees) if save_ratios else None,
+            log_likelihood=jnp.zeros(num_trees) if save_ratios else None,
             sigma_mu2=jnp.asarray(sigma_mu2),
+            log_s=_asarray_or_none(log_s),
+            theta=_asarray_or_none(theta),
+            rho=_asarray_or_none(rho),
+            a=_asarray_or_none(a),
+            b=_asarray_or_none(b),
         ),
     )
-    return bart
+def _all_none_or_not_none(*args):
+    is_none = [x is None for x in args]
+    return all(is_none) or not any(is_none)
+def _asarray_or_none(x):
+    if x is None:
+        return None
+    return jnp.asarray(x)
 def _choose_suffstat_batch_size(
@@ -319,16 +421,17 @@ def _choose_suffstat_batch_size(
             device = jax.devices()[0]
         platform = device.platform
         if platform not in ('cpu', 'gpu'):
-            raise KeyError(f'Unknown platform: {platform}')
+            msg = f'Unknown platform: {platform}'
+            raise KeyError(msg)
         return platform
     if resid_batch_size == 'auto':
         platform = get_platform()
         n = max(1, y.size)
         if platform == 'cpu':
-            resid_batch_size = 2 ** int(round(math.log2(n / 6)))  # n/6
+            resid_batch_size = 2 ** round(math.log2(n / 6))  # n/6
         elif platform == 'gpu':
-            resid_batch_size = 2 ** int(round((1 + math.log2(n)) / 3))  # n^1/3
+            resid_batch_size = 2 ** round((1 + math.log2(n)) / 3)  # n^1/3
         resid_batch_size = max(1, resid_batch_size)
     if count_batch_size == 'auto':
@@ -337,11 +440,11 @@ def _choose_suffstat_batch_size(
             count_batch_size = None
         elif platform == 'gpu':
             n = max(1, y.size)
-            count_batch_size = 2 ** int(round(math.log2(n) / 2 - 2))  # n^1/2
+            count_batch_size = 2 ** round(math.log2(n) / 2 - 2)  # n^1/2
             # /4 is good on V100, /2 on L4/T4, still haven't tried A100
             max_memory = 2**29
             itemsize = 4
-            min_batch_size = int(math.ceil(forest_size * itemsize * n / max_memory))
+            min_batch_size = math.ceil(forest_size * itemsize * n / max_memory)
             count_batch_size = max(count_batch_size, min_batch_size)
             count_batch_size = max(1, count_batch_size)
@@ -397,7 +500,7 @@ def step_trees(key: Key[Array, ''], bart: State) -> State:
     This function zeroes the proposal counters.
     """
     keys = split(key)
-    moves = propose_moves(keys.pop(), bart.forest, bart.max_split)
+    moves = propose_moves(keys.pop(), bart.forest)
     return accept_moves_and_sample_leaves(keys.pop(), bart, moves)
@@ -408,9 +511,11 @@ class Moves(Module):
     Parameters
     ----------
     allowed
-        Whether the move is possible in the first place. There are additional
-        constraints that could forbid it, but they are computed at acceptance
-        time.
+        Whether there is a possible move. If `False`, the other values may not
+        make sense. The only case in which a move is marked as allowed but is
+        then vetoed is if it does not satisfy `min_points_per_leaf`, which for
+        efficiency is implemented post-hoc without changing the rest of the
+        MCMC logic.
     grow
         Whether the move is a grow move or a prune move.
     num_growable
@@ -421,20 +526,27 @@ class Moves(Module):
     right
         The indices of the children of 'node'.
     partial_ratio
-        A factor of the Metropolis-Hastings ratio of the move. It lacks
-        the likelihood ratio and the probability of proposing the prune
-        move. If the move is PRUNE, the ratio is inverted. `None` once
+        A factor of the Metropolis-Hastings ratio of the move. It lacks the
+        likelihood ratio, the probability of proposing the prune move, and the
+        probability that the children of the modified node are terminal. If the
+        move is PRUNE, the ratio is inverted. `None` once
         `log_trans_prior_ratio` has been computed.
     log_trans_prior_ratio
         The logarithm of the product of the transition and prior terms of the
         Metropolis-Hastings ratio for the acceptance of the proposed move.
-        `None` if not yet computed.
+        `None` if not yet computed. If PRUNE, the log-ratio is negated.
     grow_var
         The decision axes of the new rules.
     grow_split
         The decision boundaries of the new rules.
-    var_trees
+    var_tree
         The updated decision axes of the trees, valid whatever move.
+    affluence_tree
+        A partially updated `affluence_tree`, marking non-leaf nodes that would
+        become leaves if the move was accepted. This mark initially (out of
+        `propose_moves`) takes into account if there would be available decision
+        rules to grow the leaf, and whether there are enough datapoints in the
+        node is marked in `accept_moves_parallel_stage`.
     logu
         The logarithm of a uniform (0, 1] random variable to be used to
         accept the move. It's in (-oo, 0].
@@ -446,25 +558,24 @@ class Moves(Module):
         computed.
     """
-    allowed: Bool[Array, 'num_trees']
-    grow: Bool[Array, 'num_trees']
-    num_growable: UInt[Array, 'num_trees']
-    node: UInt[Array, 'num_trees']
-    left: UInt[Array, 'num_trees']
-    right: UInt[Array, 'num_trees']
-    partial_ratio: Float32[Array, 'num_trees'] | None
-    log_trans_prior_ratio: None | Float32[Array, 'num_trees']
-    grow_var: UInt[Array, 'num_trees']
-    grow_split: UInt[Array, 'num_trees']
-    var_trees: UInt[Array, 'num_trees 2**(d-1)']
-    logu: Float32[Array, 'num_trees']
-    acc: None | Bool[Array, 'num_trees']
-    to_prune: None | Bool[Array, 'num_trees']
+    allowed: Bool[Array, ' num_trees']
+    grow: Bool[Array, ' num_trees']
+    num_growable: UInt[Array, ' num_trees']
+    node: UInt[Array, ' num_trees']
+    left: UInt[Array, ' num_trees']
+    right: UInt[Array, ' num_trees']
+    partial_ratio: Float32[Array, ' num_trees'] | None
+    log_trans_prior_ratio: None | Float32[Array, ' num_trees']
+    grow_var: UInt[Array, ' num_trees']
+    grow_split: UInt[Array, ' num_trees']
+    var_tree: UInt[Array, 'num_trees 2**(d-1)']
+    affluence_tree: Bool[Array, 'num_trees 2**(d-1)']
+    logu: Float32[Array, ' num_trees']
+    acc: None | Bool[Array, ' num_trees']
+    to_prune: None | Bool[Array, ' num_trees']
-def propose_moves(
-    key: Key[Array, ''], forest: Forest, max_split: UInt[Array, 'p']
-) -> Moves:
+def propose_moves(key: Key[Array, ''], forest: Forest) -> Moves:
     """
     Propose moves for all the trees.
@@ -478,39 +589,40 @@ def propose_moves(
         A jax random key.
     forest
         The `forest` field of a BART MCMC state.
-    max_split
-        The maximum split index for each variable, found in `State`.
     Returns
     -------
     The proposed move for each tree.
     """
-    num_trees, _ = forest.leaf_trees.shape
+    num_trees, _ = forest.leaf_tree.shape
     keys = split(key, 1 + 2 * num_trees)
     # compute moves
     grow_moves = propose_grow_moves(
         keys.pop(num_trees),
-        forest.var_trees,
-        forest.split_trees,
-        forest.affluence_trees,
-        max_split,
+        forest.var_tree,
+        forest.split_tree,
+        forest.affluence_tree,
+        forest.max_split,
+        forest.blocked_vars,
         forest.p_nonterminal,
         forest.p_propose_grow,
+        forest.log_s,
     )
     prune_moves = propose_prune_moves(
         keys.pop(num_trees),
-        forest.split_trees,
-        forest.affluence_trees,
+        forest.split_tree,
+        grow_moves.affluence_tree,
         forest.p_nonterminal,
         forest.p_propose_grow,
     )
-    u, logu = random.uniform(keys.pop(), (2, num_trees), jnp.float32)
+    u, exp1mlogu = random.uniform(keys.pop(), (2, num_trees))
     # choose between grow or prune
-    grow_allowed = grow_moves.num_growable.astype(bool)
-    p_grow = jnp.where(grow_allowed & prune_moves.allowed, 0.5, grow_allowed)
+    p_grow = jnp.where(
+        grow_moves.allowed & prune_moves.allowed, 0.5, grow_moves.allowed
+    )
     grow = u < p_grow  # use < instead of <= because u is in [0, 1)
     # compute children indices
@@ -519,7 +631,7 @@ def propose_moves(
     right = left + 1
     return Moves(
-        allowed=grow | prune_moves.allowed,
+        allowed=grow_moves.allowed | prune_moves.allowed,
         grow=grow,
         num_growable=grow_moves.num_growable,
         node=node,
@@ -531,8 +643,11 @@ def propose_moves(
         log_trans_prior_ratio=None,  # will be set in complete_ratio
         grow_var=grow_moves.var,
         grow_split=grow_moves.split,
-        var_trees=grow_moves.var_tree,
-        logu=jnp.log1p(-logu),
+        # var_tree does not need to be updated if prune
+        var_tree=grow_moves.var_tree,
+        # affluence_tree is updated for both moves unconditionally, prune last
+        affluence_tree=prune_moves.affluence_tree,
+        logu=jnp.log1p(-exp1mlogu),
         acc=None,  # will be set in accept_moves_sequential_stage
         to_prune=None,  # will be set in accept_moves_sequential_stage
     )
@@ -544,8 +659,10 @@ class GrowMoves(Module):
     Parameters
     ----------
+    allowed
+        Whether the move is allowed for proposal.
     num_growable
-        The number of growable leaves.
+        The number of leaves that can be proposed for grow.
     node
         The index of the leaf to grow. ``2 ** d`` if there are no growable
         leaves.
@@ -558,25 +675,32 @@ class GrowMoves(Module):
         move.
     var_tree
         The updated decision axes of the tree.
+    affluence_tree
+        A partially updated `affluence_tree` that marks each new leaf that
+        would be produced as `True` if it would have available decision rules.
     """
-    num_growable: UInt[Array, 'num_trees']
-    node: UInt[Array, 'num_trees']
-    var: UInt[Array, 'num_trees']
-    split: UInt[Array, 'num_trees']
-    partial_ratio: Float32[Array, 'num_trees']
+    allowed: Bool[Array, ' num_trees']
+    num_growable: UInt[Array, ' num_trees']
+    node: UInt[Array, ' num_trees']
+    var: UInt[Array, ' num_trees']
+    split: UInt[Array, ' num_trees']
+    partial_ratio: Float32[Array, ' num_trees']
     var_tree: UInt[Array, 'num_trees 2**(d-1)']
+    affluence_tree: Bool[Array, 'num_trees 2**(d-1)']
-@partial(vmap_nodoc, in_axes=(0, 0, 0, 0, None, None, None))
+@partial(vmap_nodoc, in_axes=(0, 0, 0, 0, None, None, None, None, None))
 def propose_grow_moves(
-    key: Key[Array, ''],
-    var_tree: UInt[Array, '2**(d-1)'],
-    split_tree: UInt[Array, '2**(d-1)'],
-    affluence_tree: Bool[Array, '2**(d-1)'] | None,
-    max_split: UInt[Array, 'p'],
-    p_nonterminal: Float32[Array, 'd'],
-    p_propose_grow: Float32[Array, '2**(d-1)'],
+    key: Key[Array, ' num_trees'],
+    var_tree: UInt[Array, 'num_trees 2**(d-1)'],
+    split_tree: UInt[Array, 'num_trees 2**(d-1)'],
+    affluence_tree: Bool[Array, 'num_trees 2**(d-1)'],
+    max_split: UInt[Array, ' p'],
+    blocked_vars: Int32[Array, ' k'] | None,
+    p_nonterminal: Float32[Array, ' 2**d'],
+    p_propose_grow: Float32[Array, ' 2**(d-1)'],
+    log_s: Float32[Array, ' p'] | None,
 ) -> GrowMoves:
     """
     Propose a GROW move for each tree.
@@ -593,13 +717,19 @@ def propose_grow_moves(
     split_tree
         The splitting points of the tree.
     affluence_tree
-        Whether a leaf has enough points to be grown.
+        Whether each leaf has enough points to be grown.
     max_split
         The maximum split index for each variable.
+    blocked_vars
+        The indices of the variables that have no available cutpoints.
     p_nonterminal
-        The probability of a nonterminal node at each depth.
+        The a priori probability of a node to be nonterminal conditional on the
+        ancestors, including at the maximum depth where it should be zero.
     p_propose_grow
         The unnormalized probability of choosing a leaf to grow.
+    log_s
+        Unnormalized log-probability used to choose a variable to split on
+        amongst the available ones.
     Returns
     -------
@@ -607,16 +737,10 @@ def propose_grow_moves(
     Notes
     -----
-    The move is not proposed if a leaf is already at maximum depth, or if a leaf
-    has less than twice the requested minimum number of datapoints per leaf.
-    This is marked by returning `num_growable` set to 0.
-    The move is also not be possible if the ancestors of a leaf have
-    exhausted the possible decision rules that lead to a non-empty selection.
-    This is marked by returning `var` set to `p` and `split` set to 0. But this
-    does not block the move from counting as "proposed", even though it is
-    predictably going to be rejected. This simplifies the MCMC and should not
-    reduce efficiency if not in unrealistic corner cases.
+    The move is not proposed if each leaf is already at maximum depth, or has
+    less datapoints than the requested threshold `min_points_per_decision_node`,
+    or it does not have any available decision rules given its ancestors. This
+    is marked by setting `allowed` to `False` and `num_growable` to 0.
     """
     keys = split(key, 3)
@@ -624,36 +748,45 @@ def propose_grow_moves(
         keys.pop(), split_tree, affluence_tree, p_propose_grow
     )
-    var = choose_variable(keys.pop(), var_tree, split_tree, max_split, leaf_to_grow)
-    var_tree = var_tree.at[leaf_to_grow].set(var.astype(var_tree.dtype))
+    # sample a decision rule
+    var, num_available_var = choose_variable(
+        keys.pop(), var_tree, split_tree, max_split, leaf_to_grow, blocked_vars, log_s
+    )
+    split_idx, l, r = choose_split(
+        keys.pop(), var, var_tree, split_tree, max_split, leaf_to_grow
+    )
-    split_idx = choose_split(keys.pop(), var_tree, split_tree, max_split, leaf_to_grow)
+    # determine if the new leaves would have available decision rules; if the
+    # move is blocked, these values may not make sense
+    left_growable = right_growable = num_available_var > 1
+    left_growable |= l < split_idx
+    right_growable |= split_idx + 1 < r
+    left = leaf_to_grow << 1
+    right = left + 1
+    affluence_tree = affluence_tree.at[left].set(left_growable)
+    affluence_tree = affluence_tree.at[right].set(right_growable)
     ratio = compute_partial_ratio(
         prob_choose, num_prunable, p_nonterminal, leaf_to_grow
     )
     return GrowMoves(
+        allowed=num_growable > 0,
         num_growable=num_growable,
         node=leaf_to_grow,
         var=var,
         split=split_idx,
         partial_ratio=ratio,
-        var_tree=var_tree,
+        var_tree=var_tree.at[leaf_to_grow].set(var.astype(var_tree.dtype)),
+        affluence_tree=affluence_tree,
     )
-    # TODO it is not clear to me how var=p and split=0 when the move is not
-    # possible lead to corrent behavior downstream. Like, the move is proposed,
-    # but then it's a noop? And since it's a noop, it makes no difference if
-    # it's "accepted" or "rejected", it's like it's always rejected, so who
-    # cares if the likelihood ratio or a lot of other numbers are wrong? Uhm.
 def choose_leaf(
     key: Key[Array, ''],
-    split_tree: UInt[Array, '2**(d-1)'],
-    affluence_tree: Bool[Array, '2**(d-1)'] | None,
-    p_propose_grow: Float32[Array, '2**(d-1)'],
+    split_tree: UInt[Array, ' 2**(d-1)'],
+    affluence_tree: Bool[Array, ' 2**(d-1)'],
+    p_propose_grow: Float32[Array, ' 2**(d-1)'],
 ) -> tuple[Int32[Array, ''], Int32[Array, ''], Float32[Array, ''], Int32[Array, '']]:
     """
     Choose a leaf node to grow in a tree.
@@ -672,16 +805,16 @@ def choose_leaf(
     Returns
     -------
-    leaf_to_grow : int
+    leaf_to_grow : Int32[Array, '']
         The index of the leaf to grow. If ``num_growable == 0``, return
         ``2 ** d``.
-    num_growable : int
+    num_growable : Int32[Array, '']
         The number of leaf nodes that can be grown, i.e., are nonterminal
-        and have at least twice `min_points_per_leaf` if set.
-    prob_choose : float
+        and have at least twice `min_points_per_leaf`.
+    prob_choose : Float32[Array, '']
         The (normalized) probability that this function had to choose that
         specific leaf, given the arguments.
-    num_prunable : int
+    num_prunable : Int32[Array, '']
         The number of leaf parents that could be pruned, after converting the
         selected leaf to a non-terminal node.
     """
@@ -690,41 +823,43 @@ def choose_leaf(
     distr = jnp.where(is_growable, p_propose_grow, 0)
     leaf_to_grow, distr_norm = categorical(key, distr)
     leaf_to_grow = jnp.where(num_growable, leaf_to_grow, 2 * split_tree.size)
-    prob_choose = distr[leaf_to_grow] / distr_norm
+    prob_choose = distr[leaf_to_grow] / jnp.where(distr_norm, distr_norm, 1)
     is_parent = grove.is_leaves_parent(split_tree.at[leaf_to_grow].set(1))
     num_prunable = jnp.count_nonzero(is_parent)
     return leaf_to_grow, num_growable, prob_choose, num_prunable
 def growable_leaves(
-    split_tree: UInt[Array, '2**(d-1)'],
-    affluence_tree: Bool[Array, '2**(d-1)'] | None,
-) -> Bool[Array, '2**(d-1)']:
+    split_tree: UInt[Array, ' 2**(d-1)'], affluence_tree: Bool[Array, ' 2**(d-1)']
+) -> Bool[Array, ' 2**(d-1)']:
     """
     Return a mask indicating the leaf nodes that can be proposed for growth.
-    The condition is that a leaf is not at the bottom level and has at least two
-    times the number of minimum points per leaf.
+    The condition is that a leaf is not at the bottom level, has available
+    decision rules given its ancestors, and has at least
+    `min_points_per_decision_node` points.
     Parameters
     ----------
     split_tree
         The splitting points of the tree.
     affluence_tree
-        Whether a leaf has enough points to be grown.
+        Marks leaves that can be grown.
     Returns
     -------
     The mask indicating the leaf nodes that can be proposed to grow.
+    Notes
+    -----
+    This function needs `split_tree` and not just `affluence_tree` because
+    `affluence_tree` can be "dirty", i.e., mark unused nodes as `True`.
     """
-    is_growable = grove.is_actual_leaf(split_tree)
-    if affluence_tree is not None:
-        is_growable &= affluence_tree
-    return is_growable
+    return grove.is_actual_leaf(split_tree) & affluence_tree
 def categorical(
-    key: Key[Array, ''], distr: Float32[Array, 'n']
+    key: Key[Array, ''], distr: Float32[Array, ' n']
 ) -> tuple[Int32[Array, ''], Float32[Array, '']]:
     """
     Return a random integer from an arbitrary distribution.
@@ -743,6 +878,11 @@ def categorical(
         return ``n``.
     norm : Float32[Array, '']
         The sum of `distr`.
+    Notes
+    -----
+    This function uses a cumsum instead of the Gumbel trick, so it's ok only
+    for small ranges with probabilities well greater than 0.
     """
     ecdf = jnp.cumsum(distr)
     u = random.uniform(key, (), ecdf.dtype, 0, ecdf[-1])
@@ -751,11 +891,13 @@ def categorical(
 def choose_variable(
     key: Key[Array, ''],
-    var_tree: UInt[Array, '2**(d-1)'],
-    split_tree: UInt[Array, '2**(d-1)'],
-    max_split: UInt[Array, 'p'],
+    var_tree: UInt[Array, ' 2**(d-1)'],
+    split_tree: UInt[Array, ' 2**(d-1)'],
+    max_split: UInt[Array, ' p'],
     leaf_index: Int32[Array, ''],
-) -> Int32[Array, '']:
+    blocked_vars: Int32[Array, ' k'] | None,
+    log_s: Float32[Array, ' p'] | None,
+) -> tuple[Int32[Array, ''], Int32[Array, '']]:
     """
     Choose a variable to split on for a new non-terminal node.
@@ -771,28 +913,39 @@ def choose_variable(
         The maximum split index for each variable.
     leaf_index
         The index of the leaf to grow.
+    blocked_vars
+        The indices of the variables that have no available cutpoints. If
+        `None`, all variables are assumed unblocked.
+    log_s
+        The logarithm of the prior probability for choosing a variable. If
+        `None`, use a uniform distribution.
     Returns
     -------
-    The index of the variable to split on.
-    Notes
-    -----
-    The variable is chosen among the variables that have a non-empty range of
-    allowed splits. If no variable has a non-empty range, return `p`.
+    var : Int32[Array, '']
+        The index of the variable to split on.
+    num_available_var : Int32[Array, '']
+        The number of variables with available decision rules `var` was chosen
+        from.
     """
     var_to_ignore = fully_used_variables(var_tree, split_tree, max_split, leaf_index)
-    return randint_exclude(key, max_split.size, var_to_ignore)
+    if blocked_vars is not None:
+        var_to_ignore = jnp.concatenate([var_to_ignore, blocked_vars])
+    if log_s is None:
+        return randint_exclude(key, max_split.size, var_to_ignore)
+    else:
+        return categorical_exclude(key, log_s, var_to_ignore)
 def fully_used_variables(
-    var_tree: UInt[Array, '2**(d-1)'],
-    split_tree: UInt[Array, '2**(d-1)'],
-    max_split: UInt[Array, 'p'],
+    var_tree: UInt[Array, ' 2**(d-1)'],
+    split_tree: UInt[Array, ' 2**(d-1)'],
+    max_split: UInt[Array, ' p'],
     leaf_index: Int32[Array, ''],
-) -> UInt[Array, 'd-2']:
+) -> UInt[Array, ' d-2']:
     """
-    Return a list of variables that have an empty split range at a given node.
+    Find variables in the ancestors of a node that have an empty split range.
     Parameters
     ----------
@@ -820,23 +973,25 @@ def fully_used_variables(
     l, r = split_range_vec(var_tree, split_tree, max_split, leaf_index, var_to_ignore)
     num_split = r - l
     return jnp.where(num_split == 0, var_to_ignore, max_split.size)
+    # the type of var_to_ignore is already sufficient to hold max_split.size,
+    # see ancestor_variables()
 def ancestor_variables(
-    var_tree: UInt[Array, '2**(d-1)'],
-    max_split: UInt[Array, 'p'],
+    var_tree: UInt[Array, ' 2**(d-1)'],
+    max_split: UInt[Array, ' p'],
     node_index: Int32[Array, ''],
-) -> UInt[Array, 'd-2']:
+) -> UInt[Array, ' d-2']:
     """
     Return the list of variables in the ancestors of a node.
     Parameters
     ----------
-    var_tree : int array (2 ** (d - 1),)
+    var_tree
         The variable indices of the tree.
-    max_split : int array (p,)
+    max_split
         The maximum split index for each variable. Used only to get `p`.
-    node_index : int
+    node_index
         The index of the node, assumed to be valid for `var_tree`.
     Returns
@@ -866,9 +1021,9 @@ def ancestor_variables(
 def split_range(
-    var_tree: UInt[Array, '2**(d-1)'],
-    split_tree: UInt[Array, '2**(d-1)'],
-    max_split: UInt[Array, 'p'],
+    var_tree: UInt[Array, ' 2**(d-1)'],
+    split_tree: UInt[Array, ' 2**(d-1)'],
+    max_split: UInt[Array, ' p'],
     node_index: Int32[Array, ''],
     ref_var: Int32[Array, ''],
 ) -> tuple[Int32[Array, ''], Int32[Array, '']]:
@@ -890,13 +1045,13 @@ def split_range(
     Returns
     -------
-    The range of allowed splits as [l, r). If `ref_var` is out of bounds, l=r=0.
+    The range of allowed splits as [l, r). If `ref_var` is out of bounds, l=r=1.
     """
     max_num_ancestors = grove.tree_depth(var_tree) - 1
     initial_r = 1 + max_split.at[ref_var].get(mode='fill', fill_value=0).astype(
         jnp.int32
     )
-    carry = 0, initial_r, node_index
+    carry = jnp.int32(0), initial_r, node_index
     def loop(carry, _):
         l, r, index = carry
@@ -913,8 +1068,8 @@ def split_range(
 def randint_exclude(
-    key: Key[Array, ''], sup: int, exclude: Integer[Array, 'n']
-) -> Int32[Array, '']:
+    key: Key[Array, ''], sup: int | Integer[Array, ''], exclude: Integer[Array, ' n']
+) -> tuple[Int32[Array, ''], Int32[Array, '']]:
     """
     Return a random integer in a range, excluding some values.
@@ -930,30 +1085,74 @@ def randint_exclude(
     Returns
     -------
-    A random integer `u` in the range ``[0, sup)`` such that ``u not in exclude``.
+    u : Int32[Array, '']
+        A random integer `u` in the range ``[0, sup)`` such that ``u not in
+        exclude``.
+    num_allowed : Int32[Array, '']
+        The number of integers in the range that were not excluded.
     Notes
     -----
     If all values in the range are excluded, return `sup`.
     """
-    exclude = jnp.unique(exclude, size=exclude.size, fill_value=sup)
-    num_allowed = sup - jnp.count_nonzero(exclude < sup)
+    exclude, num_allowed = _process_exclude(sup, exclude)
     u = random.randint(key, (), 0, num_allowed)
-    def loop(u, i):
-        return jnp.where(i <= u, u + 1, u), None
+    def loop(u, i_excluded):
+        return jnp.where(i_excluded <= u, u + 1, u), None
     u, _ = lax.scan(loop, u, exclude)
-    return u
+    return u, num_allowed
+def _process_exclude(sup, exclude):
+    exclude = jnp.unique(exclude, size=exclude.size, fill_value=sup)
+    num_allowed = sup - jnp.count_nonzero(exclude < sup)
+    return exclude, num_allowed
+def categorical_exclude(
+    key: Key[Array, ''], logits: Float32[Array, ' k'], exclude: Integer[Array, ' n']
+) -> tuple[Int32[Array, ''], Int32[Array, '']]:
+    """
+    Draw from a categorical distribution, excluding a set of values.
+    Parameters
+    ----------
+    key
+        A jax random key.
+    logits
+        The unnormalized log-probabilities of each category.
+    exclude
+        The values to exclude from the range [0, k). Values greater than or
+        equal to `logits.size` are ignored. Values can appear more than once.
+    Returns
+    -------
+    u : Int32[Array, '']
+        A random integer in the range ``[0, k)`` such that ``u not in exclude``.
+    num_allowed : Int32[Array, '']
+        The number of integers in the range that were not excluded.
+    Notes
+    -----
+    If all values in the range are excluded, the result is unspecified.
+    """
+    exclude, num_allowed = _process_exclude(logits.size, exclude)
+    kinda_neg_inf = jnp.finfo(logits.dtype).min
+    logits = logits.at[exclude].set(kinda_neg_inf)
+    u = random.categorical(key, logits)
+    return u, num_allowed
 def choose_split(
     key: Key[Array, ''],
-    var_tree: UInt[Array, '2**(d-1)'],
-    split_tree: UInt[Array, '2**(d-1)'],
-    max_split: UInt[Array, 'p'],
+    var: Int32[Array, ''],
+    var_tree: UInt[Array, ' 2**(d-1)'],
+    split_tree: UInt[Array, ' 2**(d-1)'],
+    max_split: UInt[Array, ' p'],
     leaf_index: Int32[Array, ''],
-) -> Int32[Array, '']:
+) -> tuple[Int32[Array, ''], Int32[Array, ''], Int32[Array, '']]:
     """
     Choose a split point for a new non-terminal node.
@@ -961,32 +1160,39 @@ def choose_split(
     ----------
     key
         A jax random key.
+    var
+        The variable to split on.
     var_tree
-        The splitting axes of the tree.
+        The splitting axes of the tree. Does not need to already contain `var`
+        at `leaf_index`.
     split_tree
         The splitting points of the tree.
     max_split
         The maximum split index for each variable.
     leaf_index
-        The index of the leaf to grow. It is assumed that `var_tree` already
-        contains the target variable at this index.
+        The index of the leaf to grow.
     Returns
     -------
-    The cutpoint. If ``var_tree[leaf_index]`` is out of bounds, return 0.
+    split : Int32[Array, '']
+        The cutpoint.
+    l : Int32[Array, '']
+    r : Int32[Array, '']
+        The integer range `split` was drawn from is [l, r).
+    Notes
+    -----
+    If `var` is out of bounds, or if the available split range on that variable
+    is empty, return 0.
     """
-    var = var_tree[leaf_index]
     l, r = split_range(var_tree, split_tree, max_split, leaf_index, var)
-    return random.randint(key, (), l, r)
-    # TODO what happens if leaf_index is out of bounds? And is the value used
-    # in that case?
+    return jnp.where(l < r, random.randint(key, (), l, r), 0), l, r
 def compute_partial_ratio(
     prob_choose: Float32[Array, ''],
     num_prunable: Int32[Array, ''],
-    p_nonterminal: Float32[Array, 'd'],
+    p_nonterminal: Float32[Array, ' 2**d'],
     leaf_to_grow: Int32[Array, ''],
 ) -> Float32[Array, '']:
     """
@@ -1001,7 +1207,8 @@ def compute_partial_ratio(
         The number of leaf parents that could be pruned, after converting the
         leaf to be grown to a non-terminal node.
     p_nonterminal
-        The probability of a nonterminal node at each depth.
+        The a priori probability of each node being nonterminal conditional on
+        its ancestors.
     leaf_to_grow
         The index of the leaf to grow.
@@ -1013,29 +1220,29 @@ def compute_partial_ratio(
     -----
     The transition ratio is P(new tree => old tree) / P(old tree => new tree).
     The "partial" transition ratio returned is missing the factor P(propose
-    prune) in the numerator. The prior ratio is P(new tree) / P(old tree).
+    prune) in the numerator. The prior ratio is P(new tree) / P(old tree). The
+    "partial" prior ratio is missing the factor P(children are leaves).
     """
     # the two ratios also contain factors num_available_split *
-    # num_available_var, but they cancel out
+    # num_available_var * s[var], but they cancel out
-    # p_prune can't be computed here because it needs the count trees, which are
-    # computed in the acceptance phase
+    # p_prune and 1 - p_nonterminal[child] * I(is the child growable) can't be
+    # computed here because they need the count trees, which are computed in the
+    # acceptance phase
     prune_allowed = leaf_to_grow != 1
     # prune allowed  <--->  the initial tree is not a root
     # leaf to grow is root  -->  the tree can only be a root
     # tree is a root  -->  the only leaf I can grow is root
     p_grow = jnp.where(prune_allowed, 0.5, 1)
     inv_trans_ratio = p_grow * prob_choose * num_prunable
-    depth = grove.tree_depths(2 ** (p_nonterminal.size - 1))[leaf_to_grow]
-    p_parent = p_nonterminal[depth]
-    cp_children = 1 - p_nonterminal[depth + 1]
-    tree_ratio = cp_children * cp_children * p_parent / (1 - p_parent)
+    # .at.get because if leaf_to_grow is out of bounds (move not allowed), this
+    # would produce a 0 and then an inf when `complete_ratio` takes the log
+    pnt = p_nonterminal.at[leaf_to_grow].get(mode='fill', fill_value=0.5)
+    tree_ratio = pnt / (1 - pnt)
-    return tree_ratio / inv_trans_ratio
+    return tree_ratio / jnp.where(inv_trans_ratio, inv_trans_ratio, 1)
 class PruneMoves(Module):
@@ -1049,24 +1256,26 @@ class PruneMoves(Module):
     node
         The index of the node to prune. ``2 ** d`` if no node can be pruned.
     partial_ratio
-        A factor of the Metropolis-Hastings ratio of the move. It lacks
-        the likelihood ratio and the probability of proposing the prune
-        move. This ratio is inverted, and is meant to be inverted back in
+        A factor of the Metropolis-Hastings ratio of the move. It lacks the
+        likelihood ratio, the probability of proposing the prune move, and the
+        prior probability that the children of the node to prune are leaves.
+        This ratio is inverted, and is meant to be inverted back in
         `accept_move_and_sample_leaves`.
     """
-    allowed: Bool[Array, 'num_trees']
-    node: UInt[Array, 'num_trees']
-    partial_ratio: Float32[Array, 'num_trees']
+    allowed: Bool[Array, ' num_trees']
+    node: UInt[Array, ' num_trees']
+    partial_ratio: Float32[Array, ' num_trees']
+    affluence_tree: Bool[Array, 'num_trees 2**(d-1)']
 @partial(vmap_nodoc, in_axes=(0, 0, 0, None, None))
 def propose_prune_moves(
     key: Key[Array, ''],
-    split_tree: UInt[Array, '2**(d-1)'],
-    affluence_tree: Bool[Array, '2**(d-1)'] | None,
-    p_nonterminal: Float32[Array, 'd'],
-    p_propose_grow: Float32[Array, '2**(d-1)'],
+    split_tree: UInt[Array, ' 2**(d-1)'],
+    affluence_tree: Bool[Array, ' 2**(d-1)'],
+    p_nonterminal: Float32[Array, ' 2**d'],
+    p_propose_grow: Float32[Array, ' 2**(d-1)'],
 ) -> PruneMoves:
     """
     Tree structure prune move proposal of BART MCMC.
@@ -1078,9 +1287,10 @@ def propose_prune_moves(
     split_tree
         The splitting points of the tree.
     affluence_tree
-        Whether a leaf has enough points to be grown.
+        Whether each leaf can be grown.
     p_nonterminal
-        The probability of a nonterminal node at each depth.
+        The a priori probability of a node to be nonterminal conditional on
+        the ancestors, including at the maximum depth where it should be zero.
     p_propose_grow
         The unnormalized probability of choosing a leaf to grow.
@@ -1088,28 +1298,33 @@ def propose_prune_moves(
     -------
     An object representing the proposed moves.
     """
-    node_to_prune, num_prunable, prob_choose = choose_leaf_parent(
+    node_to_prune, num_prunable, prob_choose, affluence_tree = choose_leaf_parent(
         key, split_tree, affluence_tree, p_propose_grow
     )
-    allowed = split_tree[1].astype(bool)  # allowed iff the tree is not a root
     ratio = compute_partial_ratio(
         prob_choose, num_prunable, p_nonterminal, node_to_prune
     )
     return PruneMoves(
-        allowed=allowed,
+        allowed=split_tree[1].astype(bool),  # allowed iff the tree is not a root
         node=node_to_prune,
         partial_ratio=ratio,
+        affluence_tree=affluence_tree,
     )
 def choose_leaf_parent(
     key: Key[Array, ''],
-    split_tree: UInt[Array, '2**(d-1)'],
-    affluence_tree: Bool[Array, '2**(d-1)'] | None,
-    p_propose_grow: Float32[Array, '2**(d-1)'],
-) -> tuple[Int32[Array, ''], Int32[Array, ''], Float32[Array, '']]:
+    split_tree: UInt[Array, ' 2**(d-1)'],
+    affluence_tree: Bool[Array, ' 2**(d-1)'],
+    p_propose_grow: Float32[Array, ' 2**(d-1)'],
+) -> tuple[
+    Int32[Array, ''],
+    Int32[Array, ''],
+    Float32[Array, ''],
+    Bool[Array, 'num_trees 2**(d-1)'],
+]:
     """
     Pick a non-terminal node with leaf children to prune in a tree.
@@ -1135,23 +1350,28 @@ def choose_leaf_parent(
         The (normalized) probability that `choose_leaf` would chose
         `node_to_prune` as leaf to grow, if passed the tree where
         `node_to_prune` had been pruned.
+    affluence_tree : Bool[Array, 'num_trees 2**(d-1)']
+        A partially updated `affluence_tree`, marking the node to prune as
+        growable.
     """
+    # sample a node to prune
     is_prunable = grove.is_leaves_parent(split_tree)
     num_prunable = jnp.count_nonzero(is_prunable)
     node_to_prune = randint_masked(key, is_prunable)
     node_to_prune = jnp.where(num_prunable, node_to_prune, 2 * split_tree.size)
+    # compute stuff for reverse move
     split_tree = split_tree.at[node_to_prune].set(0)
-    if affluence_tree is not None:
-        affluence_tree = affluence_tree.at[node_to_prune].set(True)
+    affluence_tree = affluence_tree.at[node_to_prune].set(True)
     is_growable_leaf = growable_leaves(split_tree, affluence_tree)
-    prob_choose = p_propose_grow[node_to_prune]
-    prob_choose /= jnp.sum(p_propose_grow, where=is_growable_leaf)
+    distr_norm = jnp.sum(p_propose_grow, where=is_growable_leaf)
+    prob_choose = p_propose_grow.at[node_to_prune].get(mode='fill', fill_value=0)
+    prob_choose = prob_choose / jnp.where(distr_norm, distr_norm, 1)
-    return node_to_prune, num_prunable, prob_choose
+    return node_to_prune, num_prunable, prob_choose, affluence_tree
-def randint_masked(key: Key[Array, ''], mask: Bool[Array, 'n']) -> Int32[Array, '']:
+def randint_masked(key: Key[Array, ''], mask: Bool[Array, ' n']) -> Int32[Array, '']:
     """
     Return a random integer in a range, including only some values.
@@ -1213,9 +1433,9 @@ class Counts(Module):
         Number of datapoints in the parent (``= left + right``).
     """
-    left: UInt[Array, 'num_trees']
-    right: UInt[Array, 'num_trees']
-    total: UInt[Array, 'num_trees']
+    left: UInt[Array, ' num_trees']
+    right: UInt[Array, ' num_trees']
+    total: UInt[Array, ' num_trees']
 class Precs(Module):
@@ -1235,9 +1455,9 @@ class Precs(Module):
         Likelihood precision scale in the parent (``= left + right``).
     """
-    left: Float32[Array, 'num_trees']
-    right: Float32[Array, 'num_trees']
-    total: Float32[Array, 'num_trees']
+    left: Float32[Array, ' num_trees']
+    right: Float32[Array, ' num_trees']
+    total: Float32[Array, ' num_trees']
 class PreLkV(Module):
@@ -1261,10 +1481,10 @@ class PreLkV(Module):
         The **logarithm** of the square root term of the likelihood ratio.
     """
-    sigma2_left: Float32[Array, 'num_trees']
-    sigma2_right: Float32[Array, 'num_trees']
-    sigma2_total: Float32[Array, 'num_trees']
-    sqrt_term: Float32[Array, 'num_trees']
+    sigma2_left: Float32[Array, ' num_trees']
+    sigma2_right: Float32[Array, ' num_trees']
+    sigma2_total: Float32[Array, ' num_trees']
+    sqrt_term: Float32[Array, ' num_trees']
 class PreLk(Module):
@@ -1331,7 +1551,6 @@ class ParallelStageOut(Module):
     bart: State
     moves: Moves
     prec_trees: Float32[Array, 'num_trees 2**d'] | Int32[Array, 'num_trees 2**d']
-    move_counts: Counts | None
     move_precs: Precs | Counts
     prelkv: PreLkV
     prelk: PreLk
@@ -1342,7 +1561,7 @@ def accept_moves_parallel_stage(
     key: Key[Array, ''], bart: State, moves: Moves
 ) -> ParallelStageOut:
     """
-    Pre-computes quantities used to accept moves, in parallel across trees.
+    Pre-compute quantities used to accept moves, in parallel across trees.
     Parameters
     ----------
@@ -1362,33 +1581,41 @@ def accept_moves_parallel_stage(
         bart,
         forest=replace(
             bart.forest,
-            var_trees=moves.var_trees,
+            var_tree=moves.var_tree,
             leaf_indices=apply_grow_to_indices(moves, bart.forest.leaf_indices, bart.X),
-            leaf_trees=adapt_leaf_trees_to_grow_indices(bart.forest.leaf_trees, moves),
+            leaf_tree=adapt_leaf_trees_to_grow_indices(bart.forest.leaf_tree, moves),
         ),
     )
     # count number of datapoints per leaf
-    if bart.forest.min_points_per_leaf is not None or bart.prec_scale is None:
+    if (
+        bart.forest.min_points_per_decision_node is not None
+        or bart.forest.min_points_per_leaf is not None
+        or bart.prec_scale is None
+    ):
         count_trees, move_counts = compute_count_trees(
             bart.forest.leaf_indices, moves, bart.forest.count_batch_size
         )
-    else:
-        # move_counts is passed later to a function, but then is unused under
-        # this condition
-        move_counts = None
-    # Check if some nodes can't surely be grown because they don't have enough
-    # datapoints. This check is not actually used now, it will be used at the
-    # beginning of the next step to propose moves.
+    # mark which leaves & potential leaves have enough points to be grown
+    if bart.forest.min_points_per_decision_node is not None:
+        count_half_trees = count_trees[:, : bart.forest.var_tree.shape[1]]
+        moves = replace(
+            moves,
+            affluence_tree=moves.affluence_tree
+            & (count_half_trees >= bart.forest.min_points_per_decision_node),
+        )
+    # copy updated affluence_tree to state
+    bart = tree_at(lambda bart: bart.forest.affluence_tree, bart, moves.affluence_tree)
+    # veto grove move if new leaves don't have enough datapoints
     if bart.forest.min_points_per_leaf is not None:
-        count_half_trees = count_trees[:, : bart.forest.var_trees.shape[1]]
-        bart = replace(
-            bart,
-            forest=replace(
-                bart.forest,
-                affluence_trees=count_half_trees >= 2 * bart.forest.min_points_per_leaf,
-            ),
+        moves = replace(
+            moves,
+            allowed=moves.allowed
+            & (move_counts.left >= bart.forest.min_points_per_leaf)
+            & (move_counts.right >= bart.forest.min_points_per_leaf),
         )
     # count number of datapoints per leaf, weighted by error precision scale
@@ -1402,18 +1629,23 @@ def accept_moves_parallel_stage(
             moves,
             bart.forest.count_batch_size,
         )
+    assert move_precs is not None
     # compute some missing information about moves
-    moves = complete_ratio(moves, move_counts, bart.forest.min_points_per_leaf)
+    moves = complete_ratio(moves, bart.forest.p_nonterminal)
+    save_ratios = bart.forest.log_likelihood is not None
     bart = replace(
         bart,
         forest=replace(
             bart.forest,
             grow_prop_count=jnp.sum(moves.grow),
             prune_prop_count=jnp.sum(moves.allowed & ~moves.grow),
+            log_trans_prior=moves.log_trans_prior_ratio if save_ratios else None,
         ),
     )
+    # pre-compute some likelihood ratio & posterior terms
+    assert bart.sigma2 is not None  # `step` shall temporarily set it to 1
     prelkv, prelk = precompute_likelihood_terms(
         bart.sigma2, bart.forest.sigma_mu2, move_precs
     )
@@ -1423,7 +1655,6 @@ def accept_moves_parallel_stage(
         bart=bart,
         moves=moves,
         prec_trees=prec_trees,
-        move_counts=move_counts,
         move_precs=move_precs,
         prelkv=prelkv,
         prelk=prelk,
@@ -1453,12 +1684,10 @@ def apply_grow_to_indices(
     """
     left_child = moves.node.astype(leaf_indices.dtype) << 1
     go_right = X[moves.grow_var, :] >= moves.grow_split
-    tree_size = jnp.array(2 * moves.var_trees.size)
+    tree_size = jnp.array(2 * moves.var_tree.size)
     node_to_update = jnp.where(moves.grow, moves.node, tree_size)
     return jnp.where(
-        leaf_indices == node_to_update,
-        left_child + go_right,
-        leaf_indices,
+        leaf_indices == node_to_update, left_child + go_right, leaf_indices
     )
@@ -1486,7 +1715,7 @@ def compute_count_trees(
         The counts of the number of points in the leaves grown or pruned by the
         moves.
     """
-    num_trees, tree_size = moves.var_trees.shape
+    num_trees, tree_size = moves.var_tree.shape
     tree_size *= 2
     tree_indices = jnp.arange(num_trees)
@@ -1543,7 +1772,7 @@ def _aggregate_scatter(
     indices: Integer[Array, '*'],
     size: int,
     dtype: jnp.dtype,
-) -> Shaped[Array, '{size}']:
+) -> Shaped[Array, ' {size}']:
     return jnp.zeros(size, dtype).at[indices].add(values)
@@ -1576,7 +1805,7 @@ def _aggregate_batched_alltrees(
 def compute_prec_trees(
-    prec_scale: Float32[Array, 'n'],
+    prec_scale: Float32[Array, ' n'],
     leaf_indices: UInt[Array, 'num_trees n'],
     moves: Moves,
     batch_size: int | None,
@@ -1603,7 +1832,7 @@ def compute_prec_trees(
     precs : Precs
         The likelihood precision scale in the nodes involved in the moves.
     """
-    num_trees, tree_size = moves.var_trees.shape
+    num_trees, tree_size = moves.var_tree.shape
     tree_size *= 2
     tree_indices = jnp.arange(num_trees)
@@ -1621,7 +1850,7 @@ def compute_prec_trees(
 def prec_per_leaf(
-    prec_scale: Float32[Array, 'n'],
+    prec_scale: Float32[Array, ' n'],
     leaf_indices: UInt[Array, 'num_trees n'],
     tree_size: int,
     batch_size: int | None,
@@ -1651,7 +1880,7 @@ def prec_per_leaf(
 def _prec_scan(
-    prec_scale: Float32[Array, 'n'],
+    prec_scale: Float32[Array, ' n'],
     leaf_indices: UInt[Array, 'num_trees n'],
     tree_size: int,
 ) -> Float32[Array, 'num_trees {tree_size}']:
@@ -1665,7 +1894,7 @@ def _prec_scan(
 def _prec_vec(
-    prec_scale: Float32[Array, 'n'],
+    prec_scale: Float32[Array, ' n'],
     leaf_indices: UInt[Array, 'num_trees n'],
     tree_size: int,
     batch_size: int,
@@ -1675,77 +1904,59 @@ def _prec_vec(
     )
-def complete_ratio(
-    moves: Moves, move_counts: Counts | None, min_points_per_leaf: int | None
-) -> Moves:
+def complete_ratio(moves: Moves, p_nonterminal: Float32[Array, ' 2**d']) -> Moves:
     """
     Complete non-likelihood MH ratio calculation.
-    This function adds the probability of choosing the prune move.
+    This function adds the probability of choosing a prune move over the grow
+    move in the inverse transition, and the a priori probability that the
+    children nodes are leaves.
     Parameters
     ----------
     moves
-        The proposed moves, see `propose_moves`.
-    move_counts
-        The counts of the number of points in the the nodes modified by the
-        moves.
-    min_points_per_leaf
-        The minimum number of data points in a leaf node.
+        The proposed moves. Must have already been updated to keep into account
+        the thresholds on the number of datapoints per node, this happens in
+        `accept_moves_parallel_stage`.
+    p_nonterminal
+        The a priori probability of each node being nonterminal conditional on
+        its ancestors, including at the maximum depth where it should be zero.
     Returns
     -------
     The updated moves, with `partial_ratio=None` and `log_trans_prior_ratio` set.
     """
-    p_prune = compute_p_prune(moves, move_counts, min_points_per_leaf)
-    return replace(
-        moves,
-        log_trans_prior_ratio=jnp.log(moves.partial_ratio * p_prune),
-        partial_ratio=None,
+    # can the leaves can be grown?
+    num_trees, _ = moves.affluence_tree.shape
+    tree_indices = jnp.arange(num_trees)
+    left_growable = moves.affluence_tree.at[tree_indices, moves.left].get(
+        mode='fill', fill_value=False
+    )
+    right_growable = moves.affluence_tree.at[tree_indices, moves.right].get(
+        mode='fill', fill_value=False
     )
-def compute_p_prune(
-    moves: Moves, move_counts: Counts | None, min_points_per_leaf: int | None
-) -> Float32[Array, 'num_trees']:
-    """
-    Compute the probability of proposing a prune move for each tree.
-    Parameters
-    ----------
-    moves
-        The proposed moves, see `propose_moves`.
-    move_counts
-        The number of datapoints in the proposed children of the leaf to grow.
-        Not used if `min_points_per_leaf` is `None`.
-    min_points_per_leaf
-        The minimum number of data points in a leaf node.
-    Returns
-    -------
-    The probability of proposing a prune move.
-    Notes
-    -----
-    This probability is computed for going from the state with the deeper tree
-    to the one with the shallower one. This means, if grow: after accepting the
-    grow move, if prune: right away.
-    """
-    # calculation in case the move is grow
+    # p_prune if grow
     other_growable_leaves = moves.num_growable >= 2
-    new_leaves_growable = moves.node < moves.var_trees.shape[1] // 2
-    if min_points_per_leaf is not None:
-        assert move_counts is not None
-        any_above_threshold = move_counts.left >= 2 * min_points_per_leaf
-        any_above_threshold |= move_counts.right >= 2 * min_points_per_leaf
-        new_leaves_growable &= any_above_threshold
-    grow_again_allowed = other_growable_leaves | new_leaves_growable
+    grow_again_allowed = other_growable_leaves | left_growable | right_growable
     grow_p_prune = jnp.where(grow_again_allowed, 0.5, 1)
-    # calculation in case the move is prune
+    # p_prune if prune
     prune_p_prune = jnp.where(moves.num_growable, 0.5, 1)
-    return jnp.where(moves.grow, grow_p_prune, prune_p_prune)
+    # select p_prune
+    p_prune = jnp.where(moves.grow, grow_p_prune, prune_p_prune)
+    # prior probability of both children being terminal
+    pt_left = 1 - p_nonterminal[moves.left] * left_growable
+    pt_right = 1 - p_nonterminal[moves.right] * right_growable
+    pt_children = pt_left * pt_right
+    return replace(
+        moves,
+        log_trans_prior_ratio=jnp.log(moves.partial_ratio * pt_children * p_prune),
+        partial_ratio=None,
+    )
 @vmap_nodoc
@@ -1815,9 +2026,7 @@ def precompute_likelihood_terms(
         sigma2_total=sigma2_total,
         sqrt_term=jnp.log(sigma2 * sigma2_total / (sigma2_left * sigma2_right)) / 2,
     )
-    return prelkv, PreLk(
-        exp_factor=sigma_mu2 / (2 * sigma2),
-    )
+    return prelkv, PreLk(exp_factor=sigma_mu2 / (2 * sigma2))
 def precompute_leaf_terms(
@@ -1851,14 +2060,14 @@ def precompute_leaf_terms(
     z = random.normal(key, prec_trees.shape, sigma2.dtype)
     return PreLf(
         mean_factor=var_post / sigma2,
-        # mean = mean_lk * prec_lk * var_post
-        # resid_tree = mean_lk * prec_tree  -->
-        #    -->  mean_lk = resid_tree / prec_tree  (kind of)
-        # mean_factor =
-        #    = mean / resid_tree =
-        #    = resid_tree / prec_tree * prec_lk * var_post / resid_tree =
-        #    = 1 / prec_tree * prec_tree / sigma2 * var_post =
-        #    = var_post / sigma2
+        # | mean = mean_lk * prec_lk * var_post
+        # | resid_tree = mean_lk * prec_tree  -->
+        # |    -->  mean_lk = resid_tree / prec_tree  (kind of)
+        # | mean_factor =
+        # |    = mean / resid_tree =
+        # |    = resid_tree / prec_tree * prec_lk * var_post / resid_tree =
+        # |    = 1 / prec_tree * prec_tree / sigma2 * var_post =
+        # |    = var_post / sigma2
         centered_leaves=z * jnp.sqrt(var_post),
     )
@@ -1884,42 +2093,34 @@ def accept_moves_sequential_stage(pso: ParallelStageOut) -> tuple[State, Moves]:
     """
     def loop(resid, pt):
-        resid, leaf_tree, acc, to_prune, ratios = accept_move_and_sample_leaves(
+        resid, leaf_tree, acc, to_prune, lkratio = accept_move_and_sample_leaves(
             resid,
             SeqStageInAllTrees(
                 pso.bart.X,
                 pso.bart.forest.resid_batch_size,
                 pso.bart.prec_scale,
-                pso.bart.forest.min_points_per_leaf,
                 pso.bart.forest.log_likelihood is not None,
                 pso.prelk,
             ),
             pt,
         )
-        return resid, (leaf_tree, acc, to_prune, ratios)
+        return resid, (leaf_tree, acc, to_prune, lkratio)
     pts = SeqStageInPerTree(
-        pso.bart.forest.leaf_trees,
+        pso.bart.forest.leaf_tree,
         pso.prec_trees,
         pso.moves,
-        pso.move_counts,
         pso.move_precs,
         pso.bart.forest.leaf_indices,
         pso.prelkv,
         pso.prelf,
     )
-    resid, (leaf_trees, acc, to_prune, ratios) = lax.scan(loop, pso.bart.resid, pts)
+    resid, (leaf_trees, acc, to_prune, lkratio) = lax.scan(loop, pso.bart.resid, pts)
-    save_ratios = pso.bart.forest.log_likelihood is not None
     bart = replace(
         pso.bart,
         resid=resid,
-        forest=replace(
-            pso.bart.forest,
-            leaf_trees=leaf_trees,
-            log_likelihood=ratios['log_likelihood'] if save_ratios else None,
-            log_trans_prior=ratios['log_trans_prior'] if save_ratios else None,
-        ),
+        forest=replace(pso.bart.forest, leaf_tree=leaf_trees, log_likelihood=lkratio),
     )
     moves = replace(pso.moves, acc=acc, to_prune=to_prune)
@@ -1928,7 +2129,7 @@ def accept_moves_sequential_stage(pso: ParallelStageOut) -> tuple[State, Moves]:
 class SeqStageInAllTrees(Module):
     """
-    The inputs to `accept_move_and_sample_leaves` that are the same for all trees.
+    The inputs to `accept_move_and_sample_leaves` that are shared by all trees.
     Parameters
     ----------
@@ -1939,8 +2140,6 @@ class SeqStageInAllTrees(Module):
     prec_scale
         The scale of the precision of the error on each datapoint. If None, it
         is assumed to be 1.
-    min_points_per_leaf
-        The minimum number of data points in a leaf node.
     save_ratios
         Whether to save the acceptance ratios.
     prelk
@@ -1949,10 +2148,9 @@ class SeqStageInAllTrees(Module):
     """
     X: UInt[Array, 'p n']
-    resid_batch_size: int | None
-    prec_scale: Float32[Array, 'n'] | None
-    min_points_per_leaf: Int32[Array, ''] | None
-    save_ratios: bool
+    resid_batch_size: int | None = field(static=True)
+    prec_scale: Float32[Array, ' n'] | None
+    save_ratios: bool = field(static=True)
     prelk: PreLk
@@ -1968,9 +2166,6 @@ class SeqStageInPerTree(Module):
         The likelihood precision scale in each potential or actual leaf node.
     move
         The proposed move, see `propose_moves`.
-    move_counts
-        The counts of the number of points in the the nodes modified by the
-        moves.
     move_precs
         The likelihood precision scale in each node modified by the moves.
     leaf_indices
@@ -1982,26 +2177,23 @@ class SeqStageInPerTree(Module):
         are specific to the tree.
     """
-    leaf_tree: Float32[Array, '2**d']
-    prec_tree: Float32[Array, '2**d']
+    leaf_tree: Float32[Array, ' 2**d']
+    prec_tree: Float32[Array, ' 2**d']
     move: Moves
-    move_counts: Counts | None
     move_precs: Precs | Counts
-    leaf_indices: UInt[Array, 'n']
+    leaf_indices: UInt[Array, ' n']
     prelkv: PreLkV
     prelf: PreLf
 def accept_move_and_sample_leaves(
-    resid: Float32[Array, 'n'],
-    at: SeqStageInAllTrees,
-    pt: SeqStageInPerTree,
+    resid: Float32[Array, ' n'], at: SeqStageInAllTrees, pt: SeqStageInPerTree
 ) -> tuple[
-    Float32[Array, 'n'],
-    Float32[Array, '2**d'],
+    Float32[Array, ' n'],
+    Float32[Array, ' 2**d'],
     Bool[Array, ''],
     Bool[Array, ''],
-    dict[str, Float32[Array, '']],
+    Float32[Array, ''] | None,
 ]:
     """
     Accept or reject a proposed move and sample the new leaf values.
@@ -2026,8 +2218,9 @@ def accept_move_and_sample_leaves(
     to_prune : Bool[Array, '']
         Whether, to reflect the acceptance status of the move, the state should
         be updated by pruning the leaves involved in the move.
-    ratios : dict[str, Float32[Array, '']]
-        The acceptance ratios for the moves. Empty if not to be saved.
+    log_lk_ratio : Float32[Array, ''] | None
+        The logarithm of the likelihood ratio for the move. `None` if not to be
+        saved.
     """
     # sum residuals in each leaf, in tree proposed by grow move
     if at.prec_scale is None:
@@ -2041,17 +2234,12 @@ def accept_move_and_sample_leaves(
     # subtract starting tree from function
     resid_tree += pt.prec_tree * pt.leaf_tree
-    # get indices of move
-    node = pt.move.node
-    assert node.dtype == jnp.int32
-    left = pt.move.left
-    right = pt.move.right
     # sum residuals in parent node modified by move
-    resid_left = resid_tree[left]
-    resid_right = resid_tree[right]
+    resid_left = resid_tree[pt.move.left]
+    resid_right = resid_tree[pt.move.right]
     resid_total = resid_left + resid_right
-    resid_tree = resid_tree.at[node].set(resid_total)
+    assert pt.move.node.dtype == jnp.int32
+    resid_tree = resid_tree.at[pt.move.node].set(resid_total)
     # compute acceptance ratio
     log_lk_ratio = compute_likelihood_ratio(
@@ -2059,48 +2247,37 @@ def accept_move_and_sample_leaves(
     )
     log_ratio = pt.move.log_trans_prior_ratio + log_lk_ratio
     log_ratio = jnp.where(pt.move.grow, log_ratio, -log_ratio)
-    ratios = {}
-    if at.save_ratios:
-        ratios.update(
-            log_trans_prior=pt.move.log_trans_prior_ratio,
-            # TODO save log_trans_prior_ratio as a vector outside of this loop,
-            # then change the option everywhere to `save_likelihood_ratio`.
-            log_likelihood=log_lk_ratio,
-        )
+    if not at.save_ratios:
+        log_lk_ratio = None
     # determine whether to accept the move
     acc = pt.move.allowed & (pt.move.logu <= log_ratio)
-    if at.min_points_per_leaf is not None:
-        assert pt.move_counts is not None
-        acc &= pt.move_counts.left >= at.min_points_per_leaf
-        acc &= pt.move_counts.right >= at.min_points_per_leaf
     # compute leaves posterior and sample leaves
-    initial_leaf_tree = pt.leaf_tree
     mean_post = resid_tree * pt.prelf.mean_factor
     leaf_tree = mean_post + pt.prelf.centered_leaves
     # copy leaves around such that the leaf indices point to the correct leaf
     to_prune = acc ^ pt.move.grow
     leaf_tree = (
-        leaf_tree.at[jnp.where(to_prune, left, leaf_tree.size)]
-        .set(leaf_tree[node])
-        .at[jnp.where(to_prune, right, leaf_tree.size)]
-        .set(leaf_tree[node])
+        leaf_tree.at[jnp.where(to_prune, pt.move.left, leaf_tree.size)]
+        .set(leaf_tree[pt.move.node])
+        .at[jnp.where(to_prune, pt.move.right, leaf_tree.size)]
+        .set(leaf_tree[pt.move.node])
     )
     # replace old tree with new tree in function values
-    resid += (initial_leaf_tree - leaf_tree)[pt.leaf_indices]
+    resid += (pt.leaf_tree - leaf_tree)[pt.leaf_indices]
-    return resid, leaf_tree, acc, to_prune, ratios
+    return resid, leaf_tree, acc, to_prune, log_lk_ratio
 def sum_resid(
-    scaled_resid: Float32[Array, 'n'],
-    leaf_indices: UInt[Array, 'n'],
+    scaled_resid: Float32[Array, ' n'],
+    leaf_indices: UInt[Array, ' n'],
     tree_size: int,
     batch_size: int | None,
-) -> Float32[Array, '{tree_size}']:
+) -> Float32[Array, ' {tree_size}']:
     """
     Sum the residuals in each leaf.
@@ -2134,7 +2311,7 @@ def _aggregate_batched_onetree(
     size: int,
     dtype: jnp.dtype,
     batch_size: int,
-) -> Float32[Array, '{size}']:
+) -> Float32[Array, ' {size}']:
     (n,) = indices.shape
     nbatches = n // batch_size + bool(n % batch_size)
     batch_indices = jnp.arange(n) % nbatches
@@ -2206,7 +2383,7 @@ def accept_moves_final_stage(bart: State, moves: Moves) -> State:
             grow_acc_count=jnp.sum(moves.acc & moves.grow),
             prune_acc_count=jnp.sum(moves.acc & ~moves.grow),
             leaf_indices=apply_moves_to_leaf_indices(bart.forest.leaf_indices, moves),
-            split_trees=apply_moves_to_split_trees(bart.forest.split_trees, moves),
+            split_tree=apply_moves_to_split_trees(bart.forest.split_tree, moves),
         ),
     )
@@ -2234,22 +2411,20 @@ def apply_moves_to_leaf_indices(
     mask = ~jnp.array(1, leaf_indices.dtype)  # ...1111111110
     is_child = (leaf_indices & mask) == moves.left
     return jnp.where(
-        is_child & moves.to_prune,
-        moves.node.astype(leaf_indices.dtype),
-        leaf_indices,
+        is_child & moves.to_prune, moves.node.astype(leaf_indices.dtype), leaf_indices
     )
 @vmap_nodoc
 def apply_moves_to_split_trees(
-    split_trees: UInt[Array, 'num_trees 2**(d-1)'], moves: Moves
+    split_tree: UInt[Array, 'num_trees 2**(d-1)'], moves: Moves
 ) -> UInt[Array, 'num_trees 2**(d-1)']:
     """
     Update the split trees to match the accepted move.
     Parameters
     ----------
-    split_trees
+    split_tree
         The cutpoints of the decision nodes in the initial trees.
     moves
         The proposed moves (see `propose_moves`), as updated by
@@ -2261,21 +2436,9 @@ def apply_moves_to_split_trees(
     """
     assert moves.to_prune is not None
     return (
-        split_trees.at[
-            jnp.where(
-                moves.grow,
-                moves.node,
-                split_trees.size,
-            )
-        ]
-        .set(moves.grow_split.astype(split_trees.dtype))
-        .at[
-            jnp.where(
-                moves.to_prune,
-                moves.node,
-                split_trees.size,
-            )
-        ]
+        split_tree.at[jnp.where(moves.grow, moves.node, split_tree.size)]
+        .set(moves.grow_split.astype(split_tree.dtype))
+        .at[jnp.where(moves.to_prune, moves.node, split_tree.size)]
         .set(0)
     )
@@ -2305,6 +2468,8 @@ def step_sigma(key: Key[Array, ''], bart: State) -> State:
     beta = bart.sigma2_beta + norm2 / 2
     sample = random.gamma(key, alpha)
+    # random.gamma seems to be slow at compiling, maybe cdf inversion would
+    # be better, but it's not implemented in jax
     return replace(bart, sigma2=beta / sample)
@@ -2324,12 +2489,128 @@ def step_z(key: Key[Array, ''], bart: State) -> State:
     The updated BART MCMC state.
     """
     trees_plus_offset = bart.z - bart.resid
-    lower = jnp.where(bart.y, -trees_plus_offset, -jnp.inf)
-    upper = jnp.where(bart.y, jnp.inf, -trees_plus_offset)
-    resid = random.truncated_normal(key, lower, upper)
-    # TODO jax's implementation of truncated_normal is not good, it just does
-    # cdf inversion with erf and erf_inv. I can do better, at least avoiding to
-    # compute one of the boundaries, and maybe also flipping and using ndtr
-    # instead of erf for numerical stability (open an issue in jax?)
+    assert bart.y.dtype == bool
+    resid = truncated_normal_onesided(key, (), ~bart.y, -trees_plus_offset)
     z = trees_plus_offset + resid
     return replace(bart, z=z, resid=resid)
+def step_s(key: Key[Array, ''], bart: State) -> State:
+    """
+    Update `log_s` using Dirichlet sampling.
+    The prior is s ~ Dirichlet(theta/p, ..., theta/p), and the posterior
+    is s ~ Dirichlet(theta/p + varcount, ..., theta/p + varcount), where
+    varcount is the count of how many times each variable is used in the
+    current forest.
+    Parameters
+    ----------
+    key
+        Random key for sampling.
+    bart
+        The current BART state.
+    Returns
+    -------
+    Updated BART state with re-sampled `log_s`.
+    """
+    assert bart.forest.theta is not None
+    # histogram current variable usage
+    p = bart.forest.max_split.size
+    varcount = grove.var_histogram(p, bart.forest.var_tree, bart.forest.split_tree)
+    # sample from Dirichlet posterior
+    alpha = bart.forest.theta / p + varcount
+    log_s = random.loggamma(key, alpha)
+    # update forest with new s
+    return replace(bart, forest=replace(bart.forest, log_s=log_s))
+def step_theta(key: Key[Array, ''], bart: State, *, num_grid: int = 1000) -> State:
+    """
+    Update `theta`.
+    The prior is theta / (theta + rho) ~ Beta(a, b).
+    Parameters
+    ----------
+    key
+        Random key for sampling.
+    bart
+        The current BART state.
+    num_grid
+        The number of points in the evenly-spaced grid used to sample
+        theta / (theta + rho).
+    Returns
+    -------
+    Updated BART state with re-sampled `theta`.
+    """
+    assert bart.forest.log_s is not None
+    assert bart.forest.rho is not None
+    assert bart.forest.a is not None
+    assert bart.forest.b is not None
+    # the grid points are the midpoints of num_grid bins in (0, 1)
+    padding = 1 / (2 * num_grid)
+    lamda_grid = jnp.linspace(padding, 1 - padding, num_grid)
+    # normalize s
+    log_s = bart.forest.log_s - logsumexp(bart.forest.log_s)
+    # sample lambda
+    logp, theta_grid = _log_p_lamda(
+        lamda_grid, log_s, bart.forest.rho, bart.forest.a, bart.forest.b
+    )
+    i = random.categorical(key, logp)
+    theta = theta_grid[i]
+    return replace(bart, forest=replace(bart.forest, theta=theta))
+def _log_p_lamda(
+    lamda: Float32[Array, ' num_grid'],
+    log_s: Float32[Array, ' p'],
+    rho: Float32[Array, ''],
+    a: Float32[Array, ''],
+    b: Float32[Array, ''],
+) -> tuple[Float32[Array, ' num_grid'], Float32[Array, ' num_grid']]:
+    # in the following I use lamda[::-1] == 1 - lamda
+    theta = rho * lamda / lamda[::-1]
+    p = log_s.size
+    return (
+        (a - 1) * jnp.log1p(-lamda[::-1])  # log(lambda)
+        + (b - 1) * jnp.log1p(-lamda)  # log(1 - lambda)
+        + gammaln(theta)
+        - p * gammaln(theta / p)
+        + theta / p * jnp.sum(log_s)
+    ), theta
+def step_sparse(key: Key[Array, ''], bart: State) -> State:
+    """
+    Update the sparsity parameters.
+    This invokes `step_s`, and then `step_theta` only if the parameters of
+    the theta prior are defined.
+    Parameters
+    ----------
+    key
+        Random key for sampling.
+    bart
+        The current BART state.
+    Returns
+    -------
+    Updated BART state with re-sampled `log_s` and `theta`.
+    """
+    keys = split(key)
+    bart = step_s(keys.pop(), bart)
+    if bart.forest.rho is not None:
+        bart = step_theta(keys.pop(), bart)
+    return bart

bartz 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

bartz 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl