PyPI - bartz - Versions diffs - 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

bartz 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

bartz/__init__.py +1 -1
bartz/_version.py +1 -0
bartz/debug.py +5 -19
bartz/grove.py +71 -118
bartz/interface.py +6 -15
bartz/mcmcloop.py +12 -6
bartz/mcmcstep.py +379 -427
{bartz-0.0.1.dist-info → bartz-0.1.0.dist-info}/METADATA +1 -1
bartz-0.1.0.dist-info/RECORD +13 -0
bartz-0.0.1.dist-info/RECORD +0 -12
{bartz-0.0.1.dist-info → bartz-0.1.0.dist-info}/LICENSE +0 -0
{bartz-0.0.1.dist-info → bartz-0.1.0.dist-info}/WHEEL +0 -0

bartz/mcmcstep.py CHANGED Viewed

@@ -41,9 +41,10 @@ from jax import random
 from jax import numpy as jnp
 from jax import lax
+from . import jaxext
 from . import grove
-def make_bart(*,
+def init(*,
     X,
     y,
     max_split,
@@ -51,8 +52,8 @@ def make_bart(*,
     p_nonterminal,
     sigma2_alpha,
     sigma2_beta,
-    small_float_dtype=jnp.float32,
-    large_float_dtype=jnp.float32,
+    small_float=jnp.float32,
+    large_float=jnp.float32,
     min_points_per_leaf=None,
     ):
     """
@@ -75,9 +76,9 @@ def make_bart(*,
         The shape parameter of the inverse gamma prior on the noise variance.
     sigma2_beta : float
         The scale parameter of the inverse gamma prior on the noise variance.
-    small_float_dtype : dtype, default float32
+    small_float : dtype, default float32
         The dtype for large arrays used in the algorithm.
-    large_float_dtype : dtype, default float32
+    large_float : dtype, default float32
         The dtype for scalars, small arrays, and arrays which require accuracy.
     min_points_per_leaf : int, optional
         The minimum number of data points in a leaf node. 0 if not specified.
@@ -88,31 +89,30 @@ def make_bart(*,
         A dictionary with array values, representing a BART mcmc state. The
         keys are:
-        'leaf_trees' : int array (num_trees, 2 ** d)
-            The leaf values of the trees.
+        'leaf_trees' : small_float array (num_trees, 2 ** d)
+            The leaf values.
         'var_trees' : int array (num_trees, 2 ** (d - 1))
-            The variable indices of the trees. The bottom level is missing since
-            it can only contain leaves.
+            The decision axes.
         'split_trees' : int array (num_trees, 2 ** (d - 1))
-            The splitting points.
-        'resid' : large_float_dtype array (n,)
+            The decision boundaries.
+        'resid' : large_float array (n,)
             The residuals (data minus forest value). Large float to avoid
             roundoff.
-        'sigma2' : large_float_dtype
+        'sigma2' : large_float
             The noise variance.
         'grow_prop_count', 'prune_prop_count' : int
             The number of grow/prune proposals made during one full MCMC cycle.
         'grow_acc_count', 'prune_acc_count' : int
             The number of grow/prune moves accepted during one full MCMC cycle.
-        'p_nonterminal' : large_float_dtype array (d - 1,)
+        'p_nonterminal' : large_float array (d - 1,)
             The probability of a nonterminal node at each depth.
-        'sigma2_alpha' : large_float_dtype
+        'sigma2_alpha' : large_float
             The shape parameter of the inverse gamma prior on the noise variance.
-        'sigma2_beta' : large_float_dtype
+        'sigma2_beta' : large_float
             The scale parameter of the inverse gamma prior on the noise variance.
         'max_split' : int array (p,)
             The maximum split index for each variable.
-        'y' : small_float_dtype array (n,)
+        'y' : small_float array (n,)
             The response.
         'X' : int array (p, n)
             The predictors.
@@ -123,7 +123,7 @@ def make_bart(*,
             datapoints. If `min_points_per_leaf` is not specified, this is None.
     """
-    p_nonterminal = jnp.asarray(p_nonterminal, large_float_dtype)
+    p_nonterminal = jnp.asarray(p_nonterminal, large_float)
     max_depth = p_nonterminal.size + 1
     @functools.partial(jax.vmap, in_axes=None, out_axes=0, axis_size=num_trees)
@@ -131,20 +131,20 @@ def make_bart(*,
         return grove.make_tree(max_depth, dtype)
     bart = dict(
-        leaf_trees=make_forest(max_depth, small_float_dtype),
+        leaf_trees=make_forest(max_depth, small_float),
         var_trees=make_forest(max_depth - 1, grove.minimal_unsigned_dtype(X.shape[0] - 1)),
         split_trees=make_forest(max_depth - 1, max_split.dtype),
-        resid=jnp.asarray(y, large_float_dtype),
-        sigma2=jnp.ones((), large_float_dtype),
+        resid=jnp.asarray(y, large_float),
+        sigma2=jnp.ones((), large_float),
         grow_prop_count=jnp.zeros((), int),
         grow_acc_count=jnp.zeros((), int),
         prune_prop_count=jnp.zeros((), int),
         prune_acc_count=jnp.zeros((), int),
         p_nonterminal=p_nonterminal,
-        sigma2_alpha=jnp.asarray(sigma2_alpha, large_float_dtype),
-        sigma2_beta=jnp.asarray(sigma2_beta, large_float_dtype),
+        sigma2_alpha=jnp.asarray(sigma2_alpha, large_float),
+        sigma2_beta=jnp.asarray(sigma2_beta, large_float),
         max_split=max_split,
-        y=jnp.asarray(y, small_float_dtype),
+        y=jnp.asarray(y, small_float),
         X=X,
         min_points_per_leaf=(
             None if min_points_per_leaf is None else
@@ -158,14 +158,14 @@ def make_bart(*,
     return bart
-def mcmc_step(bart, key):
+def step(bart, key):
     """
     Perform one full MCMC step on a BART state.
     Parameters
     ----------
     bart : dict
-        A BART mcmc state, as created by `make_bart`.
+        A BART mcmc state, as created by `init`.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -174,19 +174,18 @@ def mcmc_step(bart, key):
     bart : dict
         The new BART mcmc state.
     """
-    key1, key2 = random.split(key, 2)
-    bart = mcmc_sample_trees(bart, key1)
-    bart = mcmc_sample_sigma(bart, key2)
-    return bart
+    key, subkey = random.split(key)
+    bart = sample_trees(bart, subkey)
+    return sample_sigma(bart, key)
-def mcmc_sample_trees(bart, key):
+def sample_trees(bart, key):
     """
     Forest sampling step of BART MCMC.
     Parameters
     ----------
     bart : dict
-        A BART mcmc state, as created by `make_bart`.
+        A BART mcmc state, as created by `init`.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -199,148 +198,60 @@ def mcmc_sample_trees(bart, key):
     -----
     This function zeroes the proposal counters.
     """
-    bart = bart.copy()
-    for count_var in ['grow_prop_count', 'grow_acc_count', 'prune_prop_count', 'prune_acc_count']:
-        bart[count_var] = jnp.zeros_like(bart[count_var])
-    carry = 0, bart, key
-    def loop(carry, _):
-        i, bart, key = carry
-        key, subkey = random.split(key)
-        bart = mcmc_sample_tree(bart, subkey, i)
-        return (i + 1, bart, key), None
-    (_, bart, _), _ = lax.scan(loop, carry, None, len(bart['leaf_trees']))
-    return bart
-def mcmc_sample_tree(bart, key, i_tree):
-    """
-    Single tree sampling step of BART MCMC.
-    Parameters
-    ----------
-    bart : dict
-        A BART mcmc state, as created by `make_bart`.
-    key : jax.dtypes.prng_key array
-        A jax random key.
-    i_tree : int
-        The index of the tree to sample.
-    Returns
-    -------
-    bart : dict
-        The new BART mcmc state.
-    """
-    bart = bart.copy()
-    y_tree = grove.evaluate_tree_vmap_x(
-        bart['X'],
-        bart['leaf_trees'][i_tree],
-        bart['var_trees'][i_tree],
-        bart['split_trees'][i_tree],
-        bart['resid'].dtype,
-    )
-    bart['resid'] += y_tree
-    key1, key2 = random.split(key, 2)
-    bart = mcmc_sample_tree_structure(bart, key1, i_tree)
-    bart = mcmc_sample_tree_leaves(bart, key2, i_tree)
-    y_tree = grove.evaluate_tree_vmap_x(
-        bart['X'],
-        bart['leaf_trees'][i_tree],
-        bart['var_trees'][i_tree],
-        bart['split_trees'][i_tree],
-        bart['resid'].dtype,
-    )
-    bart['resid'] -= y_tree
-    return bart
+    key, subkey = random.split(key)
+    grow_moves, prune_moves = sample_moves(bart, subkey)
+    return accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, key)
-def mcmc_sample_tree_structure(bart, key, i_tree):
+def sample_moves(bart, key):
     """
-    Single tree structure sampling step of BART MCMC.
+    Propose moves for all the trees.
     Parameters
     ----------
     bart : dict
-        A BART mcmc state, as created by `make_bart`. The ``'resid'`` field
-        shall contain only the residuals w.r.t. the other trees.
+        BART mcmc state.
     key : jax.dtypes.prng_key array
         A jax random key.
-    i_tree : int
-        The index of the tree to sample.
     Returns
     -------
-    bart : dict
-        The new BART mcmc state.
-    """
-    bart = bart.copy()
-    var_tree = bart['var_trees'][i_tree]
-    split_tree = bart['split_trees'][i_tree]
-    affluence_tree = (
-        None if bart['affluence_trees'] is None else
-        bart['affluence_trees'][i_tree]
-    )
-    key1, key2, key3 = random.split(key, 3)
-    args = [
-        bart['X'],
-        var_tree,
-        split_tree,
-        affluence_tree,
-        bart['max_split'],
-        bart['p_nonterminal'],
-        bart['sigma2'],
-        bart['resid'],
-        len(bart['var_trees']),
-        bart['min_points_per_leaf'],
-        key1,
-    ]
-    grow_var_tree, grow_split_tree, grow_affluence_tree, grow_allowed, grow_ratio = grow_move(*args)
-    args[-1] = key2
-    prune_var_tree, prune_split_tree, prune_affluence_tree, prune_allowed, prune_ratio = prune_move(*args)
-    u0, u1 = random.uniform(key3, (2,))
-    p_grow = jnp.where(grow_allowed & prune_allowed, 0.5, grow_allowed)
-    try_grow = u0 < p_grow
-    try_prune = prune_allowed & ~try_grow
-    do_grow = try_grow & (u1 < grow_ratio)
-    do_prune = try_prune & (u1 < prune_ratio)
-    var_tree = jnp.where(do_grow, grow_var_tree, var_tree)
-    split_tree = jnp.where(do_grow, grow_split_tree, split_tree)
-    var_tree = jnp.where(do_prune, prune_var_tree, var_tree)
-    split_tree = jnp.where(do_prune, prune_split_tree, split_tree)
-    bart['var_trees'] = bart['var_trees'].at[i_tree].set(var_tree)
-    bart['split_trees'] = bart['split_trees'].at[i_tree].set(split_tree)
-    if bart['min_points_per_leaf'] is not None:
-        affluence_tree = jnp.where(do_grow, grow_affluence_tree, affluence_tree)
-        affluence_tree = jnp.where(do_prune, prune_affluence_tree, affluence_tree)
-        bart['affluence_trees'] = bart['affluence_trees'].at[i_tree].set(affluence_tree)
-    bart['grow_prop_count'] += try_grow
-    bart['grow_acc_count'] += do_grow
-    bart['prune_prop_count'] += try_prune
-    bart['prune_acc_count'] += do_prune
-    return bart
-def grow_move(X, var_tree, split_tree, affluence_tree, max_split, p_nonterminal, sigma2, resid, n_tree, min_points_per_leaf, key):
+    grow_moves, prune_moves : dict
+        The proposals for grow and prune moves, with these fields:
+        'allowed' : bool array (num_trees,)
+            Whether the move is possible.
+        'node' : int array (num_trees,)
+            The index of the leaf to grow or node to prune.
+        'var_tree' : int array (num_trees, 2 ** (d - 1),)
+            The new decision axes of the tree.
+        'split_tree' : int array (num_trees, 2 ** (d - 1),)
+            The new decision boundaries of the tree.
+        'partial_ratio' : float array (num_trees,)
+            A factor of the Metropolis-Hastings ratio of the move. It lacks
+            the likelihood ratio, and the probability of proposing the prune
+            move. For the prune move, the ratio is inverted.
+    """
+    key = random.split(key, bart['var_trees'].shape[0])
+    return sample_moves_vmap_trees(bart['var_trees'], bart['split_trees'], bart['affluence_trees'], bart['max_split'], bart['p_nonterminal'], key)
+@functools.partial(jaxext.vmap_nodoc, in_axes=(0, 0, 0, None, None, 0))
+def sample_moves_vmap_trees(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, key):
+    key, key1 = random.split(key)
+    args = var_tree, split_tree, affluence_tree, max_split, p_nonterminal
+    grow = grow_move(*args, key)
+    prune = prune_move(*args, key1)
+    return grow, prune
+def grow_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, key):
     """
     Tree structure grow move proposal of BART MCMC.
+    This moves picks a leaf node and converts it to a non-terminal node with
+    two leaf children. The move is not possible if all the leaves are already at
+    maximum depth.
     Parameters
     ----------
-    X : array (p, n)
-        The predictors.
     var_tree : array (2 ** (d - 1),)
         The variable indices of the tree.
     split_tree : array (2 ** (d - 1),)
@@ -351,80 +262,47 @@ def grow_move(X, var_tree, split_tree, affluence_tree, max_split, p_nonterminal,
         The maximum split index for each variable.
     p_nonterminal : array (d - 1,)
         The probability of a nonterminal node at each depth.
-    sigma2 : float
-        The noise variance.
-    resid : array (n,)
-        The residuals (data minus forest value), computed using all trees but
-        the tree under consideration.
-    n_tree : int
-        The number of trees in the forest.
-    min_points_per_leaf : int
-        The minimum number of data points in a leaf node.
     key : jax.dtypes.prng_key array
         A jax random key.
     Returns
     -------
-    var_tree : array (2 ** (d - 1),)
-        The new variable indices of the tree.
-    split_tree : array (2 ** (d - 1),)
-        The new splitting points of the tree.
-    affluence_tree : bool array (2 ** (d - 1),) or None
-        The new indicator whether a leaf has enough points to be grown.
-    allowed : bool
-        Whether the move is allowed.
-    ratio : float
-        The Metropolis-Hastings ratio.
-    Notes
-    -----
-    This moves picks a leaf node and converts it to a non-terminal node with
-    two leaf children. The move is not possible if all the leaves are already at
-    maximum depth.
-    """
-    key1, key2, key3 = random.split(key, 3)
+    grow_move : dict
+        A dictionary with fields:
+        'allowed' : bool
+            Whether the move is possible.
+        'node' : int
+            The index of the leaf to grow.
+        'var_tree' : array (2 ** (d - 1),)
+            The new decision axes of the tree.
+        'split_tree' : array (2 ** (d - 1),)
+            The new decision boundaries of the tree.
+        'partial_ratio' : float
+            A factor of the Metropolis-Hastings ratio of the move. It lacks
+            the likelihood ratio and the probability of proposing the prune
+            move.
+    """
+    key, key1, key2 = random.split(key, 3)
-    leaf_to_grow, num_growable, num_prunable, allowed = choose_leaf(split_tree, affluence_tree, key1)
-    var = choose_variable(var_tree, split_tree, max_split, leaf_to_grow, key2)
+    leaf_to_grow, num_growable, num_prunable, allowed = choose_leaf(split_tree, affluence_tree, key)
+    var = choose_variable(var_tree, split_tree, max_split, leaf_to_grow, key1)
     var_tree = var_tree.at[leaf_to_grow].set(var.astype(var_tree.dtype))
-    split = choose_split(var_tree, split_tree, max_split, leaf_to_grow, key3)
+    split = choose_split(var_tree, split_tree, max_split, leaf_to_grow, key2)
     new_split_tree = split_tree.at[leaf_to_grow].set(split.astype(split_tree.dtype))
-    likelihood_ratio, new_affluence_tree = compute_likelihood_ratio(X, var_tree, new_split_tree, resid, sigma2, leaf_to_grow, n_tree, min_points_per_leaf)
-    trans_tree_ratio = compute_trans_tree_ratio(num_growable, num_prunable, split_tree.size, p_nonterminal, leaf_to_grow, split_tree, new_split_tree, affluence_tree, new_affluence_tree)
-    ratio = trans_tree_ratio * likelihood_ratio
-    return var_tree, new_split_tree, new_affluence_tree, allowed, ratio
-def growable_leaves(split_tree, affluence_tree):
-    """
-    Return a mask indicating the leaf nodes that can be proposed for growth.
-    Parameters
-    ----------
-    split_tree : array (2 ** (d - 1),)
-        The splitting points of the tree.
-    affluence_tree : bool array (2 ** (d - 1),) or None
-        Whether a leaf has enough points to be grown.
+    ratio = compute_partial_ratio(num_growable, num_prunable, p_nonterminal, leaf_to_grow, split_tree, new_split_tree)
-    Returns
-    -------
-    is_growable : bool array (2 ** (d - 1),)
-        The mask indicating the leaf nodes that can be proposed to grow, i.e.,
-        that are not at the bottom level and have at least two times the number
-        of minimum points per leaf.
-    allowed : bool
-        Whether the grow move is allowed, i.e., there are growable leaves.
-    """
-    is_growable = grove.is_actual_leaf(split_tree)
-    if affluence_tree is not None:
-        is_growable &= affluence_tree
-    return is_growable, jnp.any(is_growable)
+    return dict(
+        allowed=allowed,
+        node=leaf_to_grow,
+        var_tree=var_tree,
+        split_tree=new_split_tree,
+        partial_ratio=ratio,
+    )
 def choose_leaf(split_tree, affluence_tree, key):
     """
@@ -443,7 +321,7 @@ def choose_leaf(split_tree, affluence_tree, key):
     -------
     leaf_to_grow : int
         The index of the leaf to grow. If ``num_growable == 0``, return
-        ``split_tree.size``.
+        ``2 ** d``.
     num_growable : int
         The number of leaf nodes that can be grown.
     num_prunable : int
@@ -454,11 +332,37 @@ def choose_leaf(split_tree, affluence_tree, key):
     """
     is_growable, allowed = growable_leaves(split_tree, affluence_tree)
     leaf_to_grow = randint_masked(key, is_growable)
+    leaf_to_grow = jnp.where(allowed, leaf_to_grow, 2 * split_tree.size)
     num_growable = jnp.count_nonzero(is_growable)
     is_parent = grove.is_leaves_parent(split_tree.at[leaf_to_grow].set(1))
     num_prunable = jnp.count_nonzero(is_parent)
     return leaf_to_grow, num_growable, num_prunable, allowed
+def growable_leaves(split_tree, affluence_tree):
+    """
+    Return a mask indicating the leaf nodes that can be proposed for growth.
+    Parameters
+    ----------
+    split_tree : array (2 ** (d - 1),)
+        The splitting points of the tree.
+    affluence_tree : bool array (2 ** (d - 1),) or None
+        Whether a leaf has enough points to be grown.
+    Returns
+    -------
+    is_growable : bool array (2 ** (d - 1),)
+        The mask indicating the leaf nodes that can be proposed to grow, i.e.,
+        that are not at the bottom level and have at least two times the number
+        of minimum points per leaf.
+    allowed : bool
+        Whether the grow move is allowed, i.e., there are growable leaves.
+    """
+    is_growable = grove.is_actual_leaf(split_tree)
+    if affluence_tree is not None:
+        is_growable &= affluence_tree
+    return is_growable, jnp.any(is_growable)
 def randint_masked(key, mask):
     """
     Return a random integer in a range, including only some values.
@@ -665,7 +569,7 @@ def choose_split(var_tree, split_tree, max_split, leaf_index, key):
     l, r = split_range(var_tree, split_tree, max_split, leaf_index, var)
     return random.randint(key, (), l, r)
-def compute_trans_tree_ratio(num_growable, num_prunable, tree_halfsize, p_nonterminal, leaf_to_grow, initial_split_tree, new_split_tree, initial_affluence_tree, new_affluence_tree):
+def compute_partial_ratio(num_growable, num_prunable, p_nonterminal, leaf_to_grow, initial_split_tree, new_split_tree):
     """
     Compute the product of the transition and prior ratios of a grow move.
@@ -676,8 +580,6 @@ def compute_trans_tree_ratio(num_growable, num_prunable, tree_halfsize, p_nonter
     num_prunable : int
         The number of leaf parents that could be pruned, after converting the
         leaf to be grown to a non-terminal node.
-    tree_halfsize : int
-        Half the length of the tree array, i.e., 2 ** (d - 1).
     p_nonterminal : array (d - 1,)
         The probability of a nonterminal node at each depth.
     leaf_to_grow : int
@@ -686,16 +588,13 @@ def compute_trans_tree_ratio(num_growable, num_prunable, tree_halfsize, p_nonter
         The splitting points of the tree, before the leaf is grown.
     new_split_tree : array (2 ** (d - 1),)
         The splitting points of the tree, after the leaf is grown.
-    initial_affluence_tree : bool array (2 ** (d - 1),) or None
-        Whether a leaf has enough points to be grown, before the leaf is grown.
-    new_affluence_tree : bool array (2 ** (d - 1),) or None
-        Whether a leaf has enough points to be grown, after the leaf is grown.
     Returns
     -------
     ratio : float
         The transition ratio P(new tree -> old tree) / P(old tree -> new tree)
-        times the prior ratio P(new tree) / P(old tree).
+        times the prior ratio P(new tree) / P(old tree), but the transition
+        ratio is missing the factor P(propose prune) in the numerator.
     """
     # the two ratios also contain factors num_available_split *
@@ -704,101 +603,21 @@ def compute_trans_tree_ratio(num_growable, num_prunable, tree_halfsize, p_nonter
     prune_was_allowed = prune_allowed(initial_split_tree)
     p_grow = jnp.where(prune_was_allowed, 0.5, 1)
-    _, grow_again_allowed = growable_leaves(new_split_tree, new_affluence_tree)
-    p_prune = jnp.where(grow_again_allowed, 0.5, 1)
-    trans_ratio = p_prune * num_growable / (p_grow * num_prunable)
+    trans_ratio = num_growable / (p_grow * num_prunable)
-    depth = grove.index_depth(leaf_to_grow, tree_halfsize)
+    depth = grove.tree_depths(initial_split_tree.size)[leaf_to_grow]
     p_parent = p_nonterminal[depth]
     cp_children = 1 - p_nonterminal.at[depth + 1].get(mode='fill', fill_value=0)
     tree_ratio = cp_children * cp_children * p_parent / (1 - p_parent)
     return trans_ratio * tree_ratio
-def compute_likelihood_ratio(X, var_tree, split_tree, resid, sigma2, new_node, n_tree, min_points_per_leaf):
-    """
-    Compute the likelihood ratio of a grow move.
-    Parameters
-    ----------
-    X : array (p, n)
-        The predictors.
-    var_tree : array (2 ** (d - 1),)
-        The variable indices of the tree, after the grow move.
-    split_tree : array (2 ** (d - 1),)
-        The splitting points of the tree, after the grow move.
-    resid : array (n,)
-        The residuals (data minus forest value), for all trees but the one
-        under consideration.
-    sigma2 : float
-        The noise variance.
-    new_node : int
-        The index of the leaf that has been grown.
-    n_tree : int
-        The number of trees in the forest.
-    min_points_per_leaf : int or None
-        The minimum number of data points in a leaf node.
-    Returns
-    -------
-    ratio : float
-        The likelihood ratio P(data | new tree) / P(data | old tree).
-    affluence_tree : bool array (2 ** (d - 1),) or None
-        Whether a leaf has enough points to be grown, after the grow move.
-    """
-    resid_tree, count_tree = agg_values(
-        X,
-        var_tree,
-        split_tree,
-        resid,
-        sigma2.dtype,
-    )
-    left_child = new_node << 1
-    right_child = left_child + 1
-    left_resid = resid_tree[left_child]
-    right_resid = resid_tree[right_child]
-    total_resid = left_resid + right_resid
-    left_count = count_tree[left_child]
-    right_count = count_tree[right_child]
-    total_count = left_count + right_count
-    sigma_mu2 = 1 / n_tree
-    sigma2_left = sigma2 + left_count * sigma_mu2
-    sigma2_right = sigma2 + right_count * sigma_mu2
-    sigma2_total = sigma2 + total_count * sigma_mu2
-    sqrt_term = sigma2 * sigma2_total / (sigma2_left * sigma2_right)
-    exp_term = sigma_mu2 / (2 * sigma2) * (
-        left_resid * left_resid / sigma2_left +
-        right_resid * right_resid / sigma2_right -
-        total_resid * total_resid / sigma2_total
-    )
-    ratio = jnp.sqrt(sqrt_term) * jnp.exp(exp_term)
-    if min_points_per_leaf is not None:
-        ratio = jnp.where(right_count >= min_points_per_leaf, ratio, 0)
-        ratio = jnp.where(left_count >= min_points_per_leaf, ratio, 0)
-        affluence_tree = count_tree[:count_tree.size // 2] >= 2 * min_points_per_leaf
-    else:
-        affluence_tree = None
-    return ratio, affluence_tree
-def prune_move(X, var_tree, split_tree, affluence_tree, max_split, p_nonterminal, sigma2, resid, n_tree, min_points_per_leaf, key):
+def prune_move(var_tree, split_tree, affluence_tree, max_split, p_nonterminal, key):
     """
     Tree structure prune move proposal of BART MCMC.
     Parameters
     ----------
-    X : array (p, n)
-        The predictors.
     var_tree : array (2 ** (d - 1),)
         The variable indices of the tree.
     split_tree : array (2 ** (d - 1),)
@@ -809,48 +628,41 @@ def prune_move(X, var_tree, split_tree, affluence_tree, max_split, p_nonterminal
         The maximum split index for each variable.
     p_nonterminal : array (d - 1,)
         The probability of a nonterminal node at each depth.
-    sigma2 : float
-        The noise variance.
-    resid : array (n,)
-        The residuals (data minus forest value), computed using all trees but
-        the tree under consideration.
-    n_tree : int
-        The number of trees in the forest.
-    min_points_per_leaf : int
-        The minimum number of data points in a leaf node.
     key : jax.dtypes.prng_key array
         A jax random key.
     Returns
     -------
-    var_tree : array (2 ** (d - 1),)
-        The new variable indices of the tree.
-    split_tree : array (2 ** (d - 1),)
-        The new splitting points of the tree.
-    affluence_tree : bool array (2 ** (d - 1),) or None
-        The new indicator whether a leaf has enough points to be grown.
-    allowed : bool
-        Whether the move is allowed.
-    ratio : float
-        The Metropolis-Hastings ratio.
+    prune_move : dict
+        A dictionary with fields:
+        'allowed' : bool
+            Whether the move is possible.
+        'node' : int
+            The index of the leaf to grow.
+        'var_tree' : array (2 ** (d - 1),)
+            The new decision axes of the tree.
+        'split_tree' : array (2 ** (d - 1),)
+            The new decision boundaries of the tree.
+        'partial_ratio' : float
+            A factor of the Metropolis-Hastings ratio of the move. It lacks
+            the likelihood ratio and the probability of proposing the prune
+            move. This ratio is inverted.
     """
     node_to_prune, num_prunable, num_growable = choose_leaf_parent(split_tree, affluence_tree, key)
     allowed = prune_allowed(split_tree)
     new_split_tree = split_tree.at[node_to_prune].set(0)
-    # should I clean up var_tree as well? just for debugging. it hasn't given me problems though
-    likelihood_ratio, _ = compute_likelihood_ratio(X, var_tree, split_tree, resid, sigma2, node_to_prune, n_tree, min_points_per_leaf)
-    new_affluence_tree = (
-        None if affluence_tree is None else
-        affluence_tree.at[node_to_prune].set(True)
-    )
-    trans_tree_ratio = compute_trans_tree_ratio(num_growable, num_prunable, split_tree.size, p_nonterminal, node_to_prune, new_split_tree, split_tree, new_affluence_tree, affluence_tree)
-    ratio = trans_tree_ratio * likelihood_ratio
-    ratio = 1 / ratio # Question: should I use lax.reciprocal for this?
+    ratio = compute_partial_ratio(num_growable, num_prunable, p_nonterminal, node_to_prune, new_split_tree, split_tree)
-    return var_tree, new_split_tree, new_affluence_tree, allowed, ratio
+    return dict(
+        allowed=allowed,
+        node=node_to_prune,
+        var_tree=var_tree,
+        split_tree=new_split_tree,
+        partial_ratio=ratio, # it is inverted in accept_move_and_sample_leaves
+    )
 def choose_leaf_parent(split_tree, affluence_tree, key):
     """
@@ -906,116 +718,256 @@ def prune_allowed(split_tree):
     """
     return split_tree.at[1].get(mode='fill', fill_value=0).astype(bool)
-def mcmc_sample_tree_leaves(bart, key, i_tree):
-    """
-    Single tree leaves sampling step of BART MCMC.
-    Parameters
-    ----------
-    bart : dict
-        A BART mcmc state, as created by `make_bart`. The ``'resid'`` field
-        shall contain the residuals only w.r.t. the other trees.
-    key : jax.dtypes.prng_key array
-        A jax random key.
-    i_tree : int
-        The index of the tree to sample.
-    Returns
-    -------
-    bart : dict
-        The new BART mcmc state.
-    """
+def accept_moves_and_sample_leaves(bart, grow_moves, prune_moves, key):
     bart = bart.copy()
+    def loop(carry, item):
+        resid = carry.pop('resid')
+        resid, carry, trees = accept_move_and_sample_leaves(
+            bart['X'],
+            len(bart['leaf_trees']),
+            resid,
+            bart['sigma2'],
+            bart['min_points_per_leaf'],
+            carry,
+            *item,
+        )
+        carry['resid'] = resid
+        return carry, trees
+    carry = {
+        k: jnp.zeros_like(bart[k]) for k in
+        ['grow_prop_count', 'prune_prop_count', 'grow_acc_count', 'prune_acc_count']
+    }
+    carry['resid'] = bart['resid']
+    items = (
+        bart['leaf_trees'],
+        bart['var_trees'],
+        bart['split_trees'],
+        bart['affluence_trees'],
+        grow_moves,
+        prune_moves,
+        random.split(key, len(bart['leaf_trees'])),
+    )
+    carry, trees = lax.scan(loop, carry, items)
+    bart.update(carry)
+    bart.update(trees)
+    return bart
+def accept_move_and_sample_leaves(X, ntree, resid, sigma2, min_points_per_leaf, counts, leaf_tree, var_tree, split_tree, affluence_tree, grow_move, prune_move, key):
+    # compute leaf indices according to grow move tree
+    traverse_tree = jax.vmap(grove.traverse_tree, in_axes=(1, None, None))
+    grow_leaf_indices = traverse_tree(X, grow_move['var_tree'], grow_move['split_tree'])
+    # compute leaf indices in starting tree
+    grow_node = grow_move['node']
+    grow_left = grow_node << 1
+    grow_right = grow_left + 1
+    leaf_indices = jnp.where(
+        (grow_leaf_indices == grow_left) | (grow_leaf_indices == grow_right),
+        grow_node,
+        grow_leaf_indices,
+    )
-    resid_tree, count_tree = agg_values(
-        bart['X'],
-        bart['var_trees'][i_tree],
-        bart['split_trees'][i_tree],
-        bart['resid'],
-        bart['sigma2'].dtype,
+    # compute leaf indices in prune tree
+    prune_node = prune_move['node']
+    prune_left = prune_node << 1
+    prune_right = prune_left + 1
+    prune_leaf_indices = jnp.where(
+        (leaf_indices == prune_left) | (leaf_indices == prune_right),
+        prune_node,
+        leaf_indices,
     )
-    prec_lk = count_tree / bart['sigma2']
-    prec_prior = len(bart['leaf_trees'])
-    var_post = 1 / (prec_lk + prec_prior) # lax.reciprocal?
-    mean_post = resid_tree / bart['sigma2'] * var_post # = mean_lk * prec_lk * var_post
+    # subtract starting tree from function
+    resid += leaf_tree[leaf_indices]
+    # aggregate residuals and count units per leaf
+    grow_resid_tree = jnp.zeros_like(leaf_tree, sigma2.dtype)
+    grow_resid_tree = grow_resid_tree.at[grow_leaf_indices].add(resid)
+    grow_count_tree = jnp.zeros_like(leaf_tree, grove.minimal_unsigned_dtype(resid.size))
+    grow_count_tree = grow_count_tree.at[grow_leaf_indices].add(1)
+    # compute aggregations in starting tree
+    # I do not zero the children because garbage there does not matter
+    resid_tree = (grow_resid_tree.at[grow_node]
+        .set(grow_resid_tree[grow_left] + grow_resid_tree[grow_right]))
+    count_tree = (grow_count_tree.at[grow_node]
+        .set(grow_count_tree[grow_left] + grow_count_tree[grow_right]))
+    # compute aggregations in prune tree
+    prune_resid_tree = (resid_tree.at[prune_node]
+        .set(resid_tree[prune_left] + resid_tree[prune_right]))
+    prune_count_tree = (count_tree.at[prune_node]
+        .set(count_tree[prune_left] + count_tree[prune_right]))
+    # compute affluence trees
+    if min_points_per_leaf is not None:
+        grow_affluence_tree = grow_count_tree[:grow_count_tree.size // 2] >= 2 * min_points_per_leaf
+        prune_affluence_tree = affluence_tree.at[prune_node].set(True)
+    # compute probability of proposing prune
+    grow_p_prune = compute_p_prune_back(grow_move['split_tree'], grow_affluence_tree)
+    prune_p_prune = compute_p_prune_back(split_tree, affluence_tree)
+    # compute likelihood ratios
+    grow_lk_ratio = compute_likelihood_ratio(grow_resid_tree, grow_count_tree, sigma2, grow_node, ntree, min_points_per_leaf)
+    prune_lk_ratio = compute_likelihood_ratio(resid_tree, count_tree, sigma2, prune_node, ntree, min_points_per_leaf)
+    # compute acceptance ratios
+    grow_ratio = grow_p_prune * grow_move['partial_ratio'] * grow_lk_ratio
+    prune_ratio = prune_p_prune * prune_move['partial_ratio'] * prune_lk_ratio
+    prune_ratio = lax.reciprocal(prune_ratio)
+    # random coins in [0, 1) for proposal and acceptance
+    key, subkey = random.split(key)
+    u0, u1 = random.uniform(subkey, (2,))
+    # determine what move to propose (not proposing anything is an option)
+    p_grow = jnp.where(grow_move['allowed'] & prune_move['allowed'], 0.5, grow_move['allowed'])
+    try_grow = u0 < p_grow
+    try_prune = prune_move['allowed'] & ~try_grow
+    # determine whether to accept the move
+    do_grow = try_grow & (u1 < grow_ratio)
+    do_prune = try_prune & (u1 < prune_ratio)
+    # pick trees for chosen move
+    trees = {}
+    var_tree = jnp.where(do_grow, grow_move['var_tree'], var_tree)
+    split_tree = jnp.where(do_grow, grow_move['split_tree'], split_tree)
+    var_tree = jnp.where(do_prune, prune_move['var_tree'], var_tree)
+    split_tree = jnp.where(do_prune, prune_move['split_tree'], split_tree)
+    if min_points_per_leaf is not None:
+        affluence_tree = jnp.where(do_grow, grow_affluence_tree, affluence_tree)
+        affluence_tree = jnp.where(do_prune, prune_affluence_tree, affluence_tree)
+    resid_tree = jnp.where(do_grow, grow_resid_tree, resid_tree)
+    count_tree = jnp.where(do_grow, grow_count_tree, count_tree)
+    resid_tree = jnp.where(do_prune, prune_resid_tree, resid_tree)
+    count_tree = jnp.where(do_prune, prune_count_tree, count_tree)
+    # update acceptance counts
+    counts = counts.copy()
+    counts['grow_prop_count'] += try_grow
+    counts['grow_acc_count'] += do_grow
+    counts['prune_prop_count'] += try_prune
+    counts['prune_acc_count'] += do_prune
+    # compute leaves posterior
+    prec_lk = count_tree / sigma2
+    var_post = lax.reciprocal(prec_lk + ntree) # = 1 / (prec_lk + prec_prior)
+    mean_post = resid_tree / sigma2 * var_post # = mean_lk * prec_lk * var_post
+    # sample leaves
     z = random.normal(key, mean_post.shape, mean_post.dtype)
-        # TODO maybe use long float here, I guess this part is not a bottleneck
     leaf_tree = mean_post + z * jnp.sqrt(var_post)
-    leaf_tree = leaf_tree.at[0].set(0) # this 0 is used by evaluate_tree
-    bart['leaf_trees'] = bart['leaf_trees'].at[i_tree].set(leaf_tree)
-    return bart
+    # add new tree to function
+    leaf_indices = jnp.where(do_grow, grow_leaf_indices, leaf_indices)
+    leaf_indices = jnp.where(do_prune, prune_leaf_indices, leaf_indices)
+    resid -= leaf_tree[leaf_indices]
-def agg_values(X, var_tree, split_tree, values, acc_dtype):
+    # pack trees
+    trees = {
+        'leaf_trees': leaf_tree,
+        'var_trees': var_tree,
+        'split_trees': split_tree,
+        'affluence_trees': affluence_tree,
+    }
+    return resid, counts, trees
+def compute_p_prune_back(new_split_tree, new_affluence_tree):
     """
-    Aggregate values at the leaves of a tree.
+    Compute the probability of proposing a prune move after doing a grow move.
     Parameters
     ----------
-    X : array (p, n)
-        The predictors.
-    var_tree : array (2 ** (d - 1),)
-        The variable indices of the tree.
-    split_tree : array (2 ** (d - 1),)
-        The splitting points of the tree.
-    values : array (n,)
-        The values to aggregate.
-    acc_dtype : dtype
-        The dtype of the output.
+    new_split_tree : int array (2 ** (d - 1),)
+        The decision boundaries of the tree, after the grow move.
+    new_affluence_tree : bool array (2 ** (d - 1),)
+        Which leaves have enough points to be grown, after the grow move.
     Returns
     -------
-    acc_tree : acc_dtype array (2 ** d,)
-        Tree leaves for the tree structure indicated by the arguments, where
-        each leaf contains the sum of the `values` whose corresponding `X` fall
-        into the leaf.
+    p_prune : float
+        The probability of proposing a prune move after the grow move. This is
+        0.5 if grow is possible again, and 1 if it isn't. It can't be 0 because
+        at least the node just grown can be pruned.
+    """
+    _, grow_again_allowed = growable_leaves(new_split_tree, new_affluence_tree)
+    return jnp.where(grow_again_allowed, 0.5, 1)
+def compute_likelihood_ratio(resid_tree, count_tree, sigma2, node, n_tree, min_points_per_leaf):
+    """
+    Compute the likelihood ratio of a grow move.
+    Parameters
+    ----------
+    resid_tree : float array (2 ** d,)
+        The sum of the residuals at data points in each leaf.
     count_tree : int array (2 ** d,)
-        Tree leaves containing the count of such values.
+        The number of data points in each leaf.
+    sigma2 : float
+        The noise variance.
+    node : int
+        The index of the leaf that has been grown.
+    n_tree : int
+        The number of trees in the forest.
+    min_points_per_leaf : int or None
+        The minimum number of data points in a leaf node.
+    Returns
+    -------
+    ratio : float
+        The likelihood ratio P(data | new tree) / P(data | old tree).
+    Notes
+    -----
+    The ratio is set to 0 if the grow move would create leaves with not enough
+    datapoints per leaf, although this is part of the prior rather than the
+    likelihood.
     """
-    depth = grove.tree_depth(var_tree) + 1
-    carry = (
-        jnp.zeros(values.size, bool),
-        jnp.ones(values.size, grove.minimal_unsigned_dtype(2 * var_tree.size - 1)),
-        grove.make_tree(depth, acc_dtype),
-        grove.make_tree(depth, grove.minimal_unsigned_dtype(values.size - 1)),
+    left_child = node << 1
+    right_child = left_child + 1
+    left_resid = resid_tree[left_child]
+    right_resid = resid_tree[right_child]
+    total_resid = left_resid + right_resid
+    left_count = count_tree[left_child]
+    right_count = count_tree[right_child]
+    total_count = left_count + right_count
+    sigma_mu2 = 1 / n_tree
+    sigma2_left = sigma2 + left_count * sigma_mu2
+    sigma2_right = sigma2 + right_count * sigma_mu2
+    sigma2_total = sigma2 + total_count * sigma_mu2
+    sqrt_term = sigma2 * sigma2_total / (sigma2_left * sigma2_right)
+    exp_term = sigma_mu2 / (2 * sigma2) * (
+        left_resid * left_resid / sigma2_left +
+        right_resid * right_resid / sigma2_right -
+        total_resid * total_resid / sigma2_total
     )
-    unit_index = jnp.arange(values.size, dtype=grove.minimal_unsigned_dtype(values.size - 1))
-    def loop(carry, _):
-        leaf_found, node_index, acc_tree, count_tree = carry
-        is_leaf = split_tree.at[node_index].get(mode='fill', fill_value=0) == 0
-        leaf_count = is_leaf & ~leaf_found
-        leaf_values = jnp.where(leaf_count, values, jnp.array(0, values.dtype))
-        acc_tree = acc_tree.at[node_index].add(leaf_values)
-        count_tree = count_tree.at[node_index].add(leaf_count)
-        leaf_found |= is_leaf
-        split = split_tree[node_index]
-        var = var_tree.at[node_index].get(mode='fill', fill_value=0)
-        x = X[var, unit_index]
-        node_index <<= 1
-        node_index += x >= split
-        node_index = jnp.where(leaf_found, 0, node_index)
-        carry = leaf_found, node_index, acc_tree, count_tree
-        return carry, None
-    (_, _, acc_tree, count_tree), _ = lax.scan(loop, carry, None, depth)
-    return acc_tree, count_tree
-def mcmc_sample_sigma(bart, key):
+    ratio = jnp.sqrt(sqrt_term) * jnp.exp(exp_term)
+    if min_points_per_leaf is not None:
+        ratio = jnp.where(right_count >= min_points_per_leaf, ratio, 0)
+        ratio = jnp.where(left_count >= min_points_per_leaf, ratio, 0)
+    return ratio
+def sample_sigma(bart, key):
     """
     Noise variance sampling step of BART MCMC.
     Parameters
     ----------
     bart : dict
-        A BART mcmc state, as created by `make_bart`.
+        A BART mcmc state, as created by `init`.
     key : jax.dtypes.prng_key array
         A jax random key.
@@ -1028,8 +980,8 @@ def mcmc_sample_sigma(bart, key):
     resid = bart['resid']
     alpha = bart['sigma2_alpha'] + resid.size / 2
-    norm = jnp.dot(resid, resid, preferred_element_type=bart['sigma2_beta'].dtype)
-    beta = bart['sigma2_beta'] + norm / 2
+    norm2 = jnp.dot(resid, resid, preferred_element_type=bart['sigma2_beta'].dtype)
+    beta = bart['sigma2_beta'] + norm2 / 2
     sample = random.gamma(key, alpha)
     bart['sigma2'] = beta / sample

bartz 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

bartz 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl