PyPI - bartz - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

bartz 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

bartz/BART.py +43 -18
bartz/_version.py +1 -1
bartz/grove.py +19 -14
bartz/jaxext.py +48 -21
bartz/mcmcloop.py +13 -15
bartz/mcmcstep.py +681 -299
bartz/prepcovars.py +43 -13
bartz-0.3.0.dist-info/METADATA +77 -0
bartz-0.3.0.dist-info/RECORD +13 -0
bartz-0.2.1.dist-info/METADATA +0 -32
bartz-0.2.1.dist-info/RECORD +0 -13
{bartz-0.2.1.dist-info → bartz-0.3.0.dist-info}/LICENSE +0 -0
{bartz-0.2.1.dist-info → bartz-0.3.0.dist-info}/WHEEL +0 -0

bartz/BART.py CHANGED Viewed

@@ -10,10 +10,10 @@
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-#
+#
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
-#
+#
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -49,6 +49,9 @@ class gbart:
         The training responses.
     x_test : array (p, m) or DataFrame, optional
         The test predictors.
+    usequants : bool, default False
+        Whether to use predictors quantiles instead of a uniform grid to bin
+        predictors.
     sigest : float, optional
         An estimate of the residual standard deviation on `y_train`, used to
         set `lamda`. If not specified, it is estimated by linear regression.
@@ -82,10 +85,16 @@ class gbart:
     ntree : int, default 200
         The number of trees used to represent the latent mean function.
     numcut : int, default 255
-        The maximum number of cutpoints to use for binning the predictors. Each
-        predictor is binned such that its distribution in `x_train` is
-        approximately uniform across bins. The number of bins is at most the
-        number of unique values appearing in `x_train`, or ``numcut + 1``.
+        If `usequants` is `False`: the exact number of cutpoints used to bin the
+        predictors, ranging between the minimum and maximum observed values
+        (excluded).
+        If `usequants` is `True`: the maximum number of cutpoints to use for
+        binning the predictors. Each predictor is binned such that its
+        distribution in `x_train` is approximately uniform across bins. The
+        number of bins is at most the number of unique values appearing in
+        `x_train`, or ``numcut + 1``.
         Before running the algorithm, the predictors are compressed to the
         smallest integer type that fits the bin indices, so `numcut` is best set
         to the maximum value of an unsigned integer type.
@@ -126,6 +135,8 @@ class gbart:
         The number of trees.
     maxdepth : int
         The maximum depth of the trees.
+    initkw : dict
+        Additional arguments passed to `mcmcstep.init`.
     Methods
     -------
@@ -133,21 +144,26 @@ class gbart:
     Notes
     -----
-    This interface imitates the function `gbart` from the R package `BART
+    This interface imitates the function ``gbart`` from the R package `BART
     <https://cran.r-project.org/package=BART>`_, but with these differences:
     - If `x_train` and `x_test` are matrices, they have one predictor per row
       instead of per column.
+    - If ``usequants=False``, R BART switches to quantiles anyway if there are
+      less predictor values than the required number of bins, while bartz
+      always follows the specification.
     - The error variance parameter is called `lamda` instead of `lambda`.
-    - `usequants` is always `True`.
     - `rm_const` is always `False`.
     - The default `numcut` is 255 instead of 100.
     - A lot of functionality is missing (variable selection, discrete response).
     - There are some additional attributes, and some missing.
+    The linear regression used to set `sigest` adds an intercept.
     """
     def __init__(self, x_train, y_train, *,
         x_test=None,
+        usequants=False,
         sigest=None,
         sigdf=3,
         sigquant=0.9,
@@ -164,24 +180,25 @@ class gbart:
         keepevery=1,
         printevery=100,
         seed=0,
+        initkw={},
         ):
         x_train, x_train_fmt = self._process_predictor_input(x_train)
         y_train, y_train_fmt = self._process_response_input(y_train)
         self._check_same_length(x_train, y_train)
         offset = self._process_offset_settings(y_train, offset)
         scale = self._process_scale_settings(y_train, k)
         lamda, sigest = self._process_noise_variance_settings(x_train, y_train, sigest, sigdf, sigquant, lamda, offset)
-        splits, max_split = self._determine_splits(x_train, numcut)
+        splits, max_split = self._determine_splits(x_train, usequants, numcut)
         x_train = self._bin_predictors(x_train, splits)
         y_train = self._transform_input(y_train, offset, scale)
         lamda_scaled = lamda / (scale * scale)
-        mcmc_state = self._setup_mcmc(x_train, y_train, max_split, lamda_scaled, sigdf, power, base, maxdepth, ntree)
+        mcmc_state = self._setup_mcmc(x_train, y_train, max_split, lamda_scaled, sigdf, power, base, maxdepth, ntree, initkw)
         final_state, burnin_trace, main_trace = self._run_mcmc(mcmc_state, ndpost, nskip, keepevery, printevery, seed)
         sigma = self._extract_sigma(main_trace, scale)
@@ -279,7 +296,10 @@ class gbart:
             elif y_train.size <= x_train.shape[0]:
                 sigest2 = jnp.var(y_train - offset)
             else:
-                _, chisq, rank, _ = jnp.linalg.lstsq(x_train.T, y_train - offset)
+                x_centered = x_train.T - x_train.mean(axis=1)
+                y_centered = y_train - y_train.mean()
+                    # centering is equivalent to adding an intercept column
+                _, chisq, rank, _ = jnp.linalg.lstsq(x_centered, y_centered)
                 chisq = chisq.squeeze(0)
                 dof = len(y_train) - rank
                 sigest2 = chisq / dof
@@ -305,8 +325,11 @@ class gbart:
             return (y_train.max() - y_train.min()) / (2 * k)
     @staticmethod
-    def _determine_splits(x_train, numcut):
-        return prepcovars.quantilized_splits_from_matrix(x_train, numcut + 1)
+    def _determine_splits(x_train, usequants, numcut):
+        if usequants:
+            return prepcovars.quantilized_splits_from_matrix(x_train, numcut + 1)
+        else:
+            return prepcovars.uniform_splits_from_matrix(x_train, numcut + 1)
     @staticmethod
     def _bin_predictors(x, splits):
@@ -317,12 +340,12 @@ class gbart:
         return (y - offset) / scale
     @staticmethod
-    def _setup_mcmc(x_train, y_train, max_split, lamda, sigdf, power, base, maxdepth, ntree):
+    def _setup_mcmc(x_train, y_train, max_split, lamda, sigdf, power, base, maxdepth, ntree, initkw):
         depth = jnp.arange(maxdepth - 1)
         p_nonterminal = base / (1 + depth).astype(float) ** power
         sigma2_alpha = sigdf / 2
         sigma2_beta = lamda * sigma2_alpha
-        return mcmcstep.init(
+        kw = dict(
             X=x_train,
             y=y_train,
             max_split=max_split,
@@ -332,6 +355,8 @@ class gbart:
             sigma2_beta=sigma2_beta,
             min_points_per_leaf=5,
         )
+        kw.update(initkw)
+        return mcmcstep.init(**kw)
     @staticmethod
     def _run_mcmc(mcmc_state, ndpost, nskip, keepevery, printevery, seed):
@@ -354,7 +379,7 @@ class gbart:
     def _extract_sigma(trace, scale):
         return scale * jnp.sqrt(trace['sigma2'])
     def _show_tree(self, i_sample, i_tree, print_all=False):
         from . import debug
         trace = self._main_trace

bartz/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '0.2.1'
1	+ __version__ = '0.3.0'

bartz/grove.py CHANGED Viewed

@@ -10,10 +10,10 @@
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-#
+#
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
-#
+#
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -114,7 +114,7 @@ def traverse_tree(x, var_tree, split_tree):
         split = split_tree[index]
         var = var_tree[index]
         leaf_found |= split == 0
         child_index = (index << 1) + (x[var] >= split)
         index = jnp.where(leaf_found, index, child_index)
@@ -147,7 +147,7 @@ def traverse_forest(X, var_trees, split_trees):
     """
     return traverse_tree(X, var_trees, split_trees)
-def evaluate_forest(X, leaf_trees, var_trees, split_trees, dtype):
+def evaluate_forest(X, leaf_trees, var_trees, split_trees, dtype=None, sum_trees=True):
     """
     Evaluate a ensemble of trees at an array of points.
@@ -162,21 +162,26 @@ def evaluate_forest(X, leaf_trees, var_trees, split_trees, dtype):
         The decision axes of the trees.
     split_trees : array (m, 2 ** (d - 1))
         The decision boundaries of the trees.
-    dtype : dtype
-        The dtype of the output.
+    dtype : dtype, optional
+        The dtype of the output. Ignored if `sum_trees` is `False`.
+    sum_trees : bool, default True
+        Whether to sum the values across trees.
     Returns
     -------
-    out : array (n,)
-        The sum of the values of the trees at the points in `X`.
+    out : array (n,) or (m, n)
+        The (sum of) the values of the trees at the points in `X`.
     """
     indices = traverse_forest(X, var_trees, split_trees)
     ntree, _ = leaf_trees.shape
-    tree_index = jnp.arange(ntree, dtype=jaxext.minimal_unsigned_dtype(ntree - 1))[:, None]
-    leaves = leaf_trees[tree_index, indices]
-    return jnp.sum(leaves, axis=0, dtype=dtype)
-        # this sum suggests to swap the vmaps, but I think it's better for X
-        # copying to keep it that way
+    tree_index = jnp.arange(ntree, dtype=jaxext.minimal_unsigned_dtype(ntree - 1))
+    leaves = leaf_trees[tree_index[:, None], indices]
+    if sum_trees:
+        return jnp.sum(leaves, axis=0, dtype=dtype)
+            # this sum suggests to swap the vmaps, but I think it's better for X
+            # copying to keep it that way
+    else:
+        return leaves
 def is_actual_leaf(split_tree, *, add_bottom_level=False):
     """
@@ -238,7 +243,7 @@ def tree_depths(tree_length):
     tree_length : int
         The length of the tree array, i.e., 2 ** d.
-    Returns
+    Returns
     -------
     depth : array (tree_length,)
         The depth of each node. The root node (index 1) has depth 0. The depth

bartz/jaxext.py CHANGED Viewed

@@ -196,13 +196,14 @@ def autobatch(func, max_io_nbytes, in_axes=0, out_axes=0, return_nbatches=False)
         A jittable function with positional arguments only, with inputs and
         outputs pytrees of arrays.
     max_io_nbytes : int
-        The maximum number of input + output bytes in each batch.
-    in_axes : pytree of ints, default 0
+        The maximum number of input + output bytes in each batch (excluding
+        unbatched arguments.)
+    in_axes : pytree of int or None, default 0
         A tree matching the structure of the function input, indicating along
         which axes each array should be batched. If a single integer, it is
-        used for all arrays.
+        used for all arrays. A `None` axis indicates to not batch an argument.
     out_axes : pytree of ints, default 0
-        The same for outputs.
+        The same for outputs (but non-batching is not allowed).
     return_nbatches : bool, default False
         If True, the number of batches is returned as a second output.
@@ -218,8 +219,18 @@ def autobatch(func, max_io_nbytes, in_axes=0, out_axes=0, return_nbatches=False)
             return tree_util.tree_map(lambda _: axes, tree)
         return tree_util.tree_map(lambda _, axis: axis, tree, axes)
+    def check_no_nones(axes, tree):
+        def check_not_none(_, axis):
+            assert axis is not None
+        tree_util.tree_map(check_not_none, tree, axes)
     def extract_size(axes, tree):
-        sizes = tree_util.tree_map(lambda x, axis: x.shape[axis], tree, axes)
+        def get_size(x, axis):
+            if axis is None:
+                return None
+            else:
+                return x.shape[axis]
+        sizes = tree_util.tree_map(get_size, tree, axes)
         sizes, _ = tree_util.tree_flatten(sizes)
         assert all(s == sizes[0] for s in sizes)
         return sizes[0]
@@ -243,23 +254,37 @@ def autobatch(func, max_io_nbytes, in_axes=0, out_axes=0, return_nbatches=False)
         return dividend
     def next_divisor(dividend, min_divisor):
+        if dividend == 0:
+            return min_divisor
         if min_divisor * min_divisor <= dividend:
             return next_divisor_small(dividend, min_divisor)
         return next_divisor_large(dividend, min_divisor)
+    def pull_nonbatched(axes, tree):
+        def pull_nonbatched(x, axis):
+            if axis is None:
+                return None
+            else:
+                return x
+        return tree_util.tree_map(pull_nonbatched, tree, axes), tree
+    def push_nonbatched(axes, tree, original_tree):
+        def push_nonbatched(original_x, x, axis):
+            if axis is None:
+                return original_x
+            else:
+                return x
+        return tree_util.tree_map(push_nonbatched, original_tree, tree, axes)
     def move_axes_out(axes, tree):
-        def move_axis_out(axis, x):
-            if axis != 0:
-                return jnp.moveaxis(x, axis, 0)
-            return x
-        return tree_util.tree_map(move_axis_out, axes, tree)
+        def move_axis_out(x, axis):
+            return jnp.moveaxis(x, axis, 0)
+        return tree_util.tree_map(move_axis_out, tree, axes)
     def move_axes_in(axes, tree):
-        def move_axis_in(axis, x):
-            if axis != 0:
-                return jnp.moveaxis(x, 0, axis)
-            return x
-        return tree_util.tree_map(move_axis_in, axes, tree)
+        def move_axis_in(x, axis):
+            return jnp.moveaxis(x, 0, axis)
+        return tree_util.tree_map(move_axis_in, tree, axes)
     def batch(tree, nbatches):
         def batch(x):
@@ -287,16 +312,17 @@ def autobatch(func, max_io_nbytes, in_axes=0, out_axes=0, return_nbatches=False)
         in_axes = expand_axes(initial_in_axes, args)
         out_axes = expand_axes(initial_out_axes, example_result)
+        check_no_nones(out_axes, example_result)
+        size = extract_size((in_axes, out_axes), (args, example_result))
-        in_size = extract_size(in_axes, args)
-        out_size = extract_size(out_axes, example_result)
-        assert in_size == out_size
-        size = in_size
+        args, nonbatched_args = pull_nonbatched(in_axes, args)
-        total_nbytes = sum_nbytes(args) + sum_nbytes(example_result)
+        total_nbytes = sum_nbytes((args, example_result))
         min_nbatches = total_nbytes // max_io_nbytes + bool(total_nbytes % max_io_nbytes)
+        min_nbatches = max(1, min_nbatches)
         nbatches = next_divisor(size, min_nbatches)
-        assert 1 <= nbatches <= size
+        assert 1 <= nbatches <= max(1, size)
         assert size % nbatches == 0
         assert total_nbytes % nbatches == 0
@@ -307,6 +333,7 @@ def autobatch(func, max_io_nbytes, in_axes=0, out_axes=0, return_nbatches=False)
         def loop(_, args):
             args = move_axes_in(in_axes, args)
+            args = push_nonbatched(in_axes, args, nonbatched_args)
             result = func(*args)
             result = move_axes_out(out_axes, result)
             return None, result

bartz/mcmcloop.py CHANGED Viewed

@@ -10,10 +10,10 @@
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
-#
+#
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
-#
+#
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -34,8 +34,9 @@ from jax import debug
 from jax import numpy as jnp
 from jax import lax
-from . import mcmcstep
+from . import jaxext
 from . import grove
+from . import mcmcstep
 @functools.partial(jax.jit, static_argnums=(1, 2, 3, 4))
 def run_mcmc(bart, n_burn, n_save, n_skip, callback, key):
@@ -91,7 +92,7 @@ def run_mcmc(bart, n_burn, n_save, n_skip, callback, key):
         the fields in `burnin_trace`.
     """
-    tracelist_burnin = 'sigma2', 'grow_prop_count', 'grow_acc_count', 'prune_prop_count', 'prune_acc_count'
+    tracelist_burnin = 'sigma2', 'grow_prop_count', 'grow_acc_count', 'prune_prop_count', 'prune_acc_count', 'ratios'
     tracelist_main = tracelist_burnin + ('leaf_trees', 'var_trees', 'split_trees')
@@ -102,14 +103,11 @@ def run_mcmc(bart, n_burn, n_save, n_skip, callback, key):
         key, subkey = random.split(key)
         bart = mcmcstep.step(bart, subkey)
         callback(bart=bart, burnin=burnin, i_total=i_total, i_skip=i_skip, **callback_kw)
-        output = {key: bart[key] for key in tracelist}
+        output = {key: bart[key] for key in tracelist if key in bart}
         return (bart, i_total + 1, i_skip + 1, key), output
     def empty_trace(bart, tracelist):
-        return {
-            key: jnp.empty((0,) + bart[key].shape, bart[key].dtype)
-            for key in tracelist
-        }
+        return jax.vmap(lambda x: x, in_axes=None, out_axes=0, axis_size=0)(bart)
     if n_burn > 0:
         carry = bart, 0, 0, key
@@ -124,7 +122,7 @@ def run_mcmc(bart, n_burn, n_save, n_skip, callback, key):
         main_loop = functools.partial(inner_loop, tracelist=[], burnin=False)
         inner_carry = bart, i_total, 0, key
         (bart, i_total, _, key), _ = lax.scan(main_loop, inner_carry, None, n_skip)
-        output = {key: bart[key] for key in tracelist_main}
+        output = {key: bart[key] for key in tracelist_main if key in bart}
         return (bart, i_total, key), output
     if n_save > 0:
@@ -135,12 +133,9 @@ def run_mcmc(bart, n_burn, n_save, n_skip, callback, key):
     return bart, burnin_trace, main_trace
-    # TODO I could add an argument callback_state to carry over state. This would allow e.g. accumulating counts. If I made the callback return the mcmc state, I could modify the mcmc from the callback.
 @functools.lru_cache
     # cache to make the callback function object unique, such that the jit
-    # of run_mcmc recognizes it => with the callback state, I can make
-    # printevery a runtime quantity
+    # of run_mcmc recognizes it
 def make_simple_print_callback(printevery):
     """
     Create a logging callback function for MCMC iterations.
@@ -193,7 +188,10 @@ def evaluate_trace(trace, X):
     y : array (n_trace, n)
         The predictions for each iteration of the MCMC.
     """
+    evaluate_trees = functools.partial(grove.evaluate_forest, sum_trees=False)
+    evaluate_trees = jaxext.autobatch(evaluate_trees, 2 ** 29, (None, 0, 0, 0))
     def loop(_, state):
-        return None, grove.evaluate_forest(X, state['leaf_trees'], state['var_trees'], state['split_trees'], jnp.float32)
+        values = evaluate_trees(X, state['leaf_trees'], state['var_trees'], state['split_trees'])
+        return None, jnp.sum(values, axis=0, dtype=jnp.float32)
     _, y = lax.scan(loop, None, trace)
     return y

bartz 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

bartz 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl