PyPI - bartz - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

bartz 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

bartz/.DS_Store +0 -0
bartz/BART/__init__.py +27 -0
bartz/BART/_gbart.py +522 -0
bartz/__init__.py +4 -2
bartz/{BART.py → _interface.py} +256 -132
bartz/_profiler.py +318 -0
bartz/_version.py +1 -1
bartz/debug.py +269 -314
bartz/grove.py +124 -68
bartz/jaxext/__init__.py +101 -27
bartz/jaxext/_autobatch.py +257 -51
bartz/jaxext/scipy/__init__.py +1 -1
bartz/jaxext/scipy/special.py +3 -4
bartz/jaxext/scipy/stats.py +1 -1
bartz/mcmcloop.py +399 -208
bartz/mcmcstep/__init__.py +35 -0
bartz/mcmcstep/_moves.py +904 -0
bartz/mcmcstep/_state.py +1114 -0
bartz/mcmcstep/_step.py +1603 -0
bartz/prepcovars.py +1 -1
bartz/testing/__init__.py +29 -0
bartz/testing/_dgp.py +442 -0
{bartz-0.7.0.dist-info → bartz-0.8.0.dist-info}/METADATA +17 -11
bartz-0.8.0.dist-info/RECORD +25 -0
{bartz-0.7.0.dist-info → bartz-0.8.0.dist-info}/WHEEL +1 -1
bartz/mcmcstep.py +0 -2616
bartz-0.7.0.dist-info/RECORD +0 -17

bartz/jaxext/_autobatch.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # bartz/src/bartz/jaxext/_autobatch.py
 #
-# Copyright (c) 2025, Giacomo Petrillo
+# Copyright (c) 2025-2026, The Bartz Contributors
 #
 # This file is part of bartz.
 #
@@ -26,16 +26,23 @@
 import math
 from collections.abc import Callable
-from functools import wraps
+from functools import partial, wraps
 from warnings import warn
-from jax import eval_shape, jit
+from jax.typing import DTypeLike
+try:
+    from numpy.lib.array_utils import normalize_axis_index  # numpy 2
+except ImportError:
+    from numpy.core.numeric import normalize_axis_index  # numpy 1
+from jax import ShapeDtypeStruct, eval_shape, jit
 from jax import numpy as jnp
 from jax.lax import scan
 from jax.tree import flatten as tree_flatten
 from jax.tree import map as tree_map
 from jax.tree import reduce as tree_reduce
-from jaxtyping import PyTree
+from jaxtyping import Array, PyTree, Shaped
 def expand_axes(axes, tree):
@@ -47,14 +54,43 @@ def expand_axes(axes, tree):
     return tree_map(expand_axis, axes, tree, is_leaf=lambda x: x is None)
+def normalize_axes(
+    axes: PyTree[int | None, ' T'], tree: PyTree[Array, ' T']
+) -> PyTree[int | None, ' T']:
+    """Normalize axes to be non-negative and valid for the corresponding arrays in the tree."""
+    def normalize_axis(axis: int | None, x: Array) -> int | None:
+        if axis is None:
+            return None
+        else:
+            return normalize_axis_index(axis, len(x.shape))
+    return tree_map(normalize_axis, axes, tree, is_leaf=lambda x: x is None)
 def check_no_nones(axes, tree):
     def check_not_none(_, axis):
         assert axis is not None
-    tree_map(check_not_none, tree, axes)
+    tree_map(check_not_none, tree, axes, is_leaf=lambda x: x is None)
+def remove_axis(
+    x: PyTree[ShapeDtypeStruct, ' T'], axis: PyTree[int, ' T'], ufunc: jnp.ufunc
+) -> PyTree[ShapeDtypeStruct, ' T']:
+    """Remove an axis from dummy arrays and change the type to reduction type."""
+    def remove_axis(x: ShapeDtypeStruct, axis: int) -> ShapeDtypeStruct:
+        new_shape = x.shape[:axis] + x.shape[axis + 1 :]
+        new_dtype = reduction_dtype(ufunc, x.dtype)
+        return ShapeDtypeStruct(new_shape, new_dtype)
+    return tree_map(remove_axis, x, axis)
 def extract_size(axes, tree):
+    """Get the size of each array in tree at the axis in axes, check they are equal and return it."""
     def get_size(x, axis):
         if axis is None:
             return None
@@ -90,6 +126,7 @@ def next_divisor_large(dividend, min_divisor):
 def next_divisor(dividend, min_divisor):
+    """Return divisor >= min_divisor such that divided % divisor == 0."""
     if dividend == 0:
         return min_divisor
     if min_divisor * min_divisor <= dividend:
@@ -131,20 +168,73 @@ def move_axes_in(axes, tree):
     return tree_map(move_axis_in, tree, axes)
-def batch(tree, nbatches):
+def batch(tree: PyTree[Array, ' T'], nbatches: int) -> PyTree[Array, ' T']:
+    """Split the first axis into two axes, the first of size `nbatches`."""
     def batch(x):
-        return x.reshape((nbatches, x.shape[0] // nbatches) + x.shape[1:])
+        return x.reshape(nbatches, x.shape[0] // nbatches, *x.shape[1:])
     return tree_map(batch, tree)
-def unbatch(tree):
+def unbatch(tree: PyTree[Array, ' T']) -> PyTree[Array, ' T']:
+    """Merge the first two axes into a single axis."""
     def unbatch(x):
-        return x.reshape((x.shape[0] * x.shape[1],) + x.shape[2:])
+        return x.reshape(x.shape[0] * x.shape[1], *x.shape[2:])
     return tree_map(unbatch, tree)
+def reduce(
+    ufunc: jnp.ufunc,
+    x: PyTree[Array, ' T'],
+    axes: PyTree[int, ' T'],
+    initial: PyTree[Array, ' T'] | None,
+) -> PyTree[Array, ' T']:
+    """Reduce each array in `x` along the axes in `axes` starting from `initial` using `ufunc.reduce`."""
+    if initial is None:
+        def reduce(x: Array, axis: int) -> Array:
+            return ufunc.reduce(x, axis=axis)
+        return tree_map(reduce, x, axes)
+    else:
+        def reduce(x: Array, initial: Array, axis: int) -> Array:
+            reduced = ufunc.reduce(x, axis=axis)
+            return ufunc(initial, reduced)
+        return tree_map(reduce, x, initial, axes)
+def identity(
+    ufunc: jnp.ufunc, x: PyTree[ShapeDtypeStruct, ' T']
+) -> PyTree[Array, ' T']:
+    """Get the identity element for `ufunc` and each array in `x`."""
+    def identity(x: ShapeDtypeStruct) -> Array:
+        identity = identity_for(ufunc, x.dtype)
+        return jnp.broadcast_to(identity, x.shape)
+    return tree_map(identity, x)
+def reduction_dtype(ufunc: jnp.ufunc, input_dtype: DTypeLike) -> DTypeLike:
+    """Return the output dtype for a reduction with `ufunc` on inputs of type `dtype`."""
+    return ufunc.reduce(jnp.empty(1, input_dtype)).dtype
+def identity_for(ufunc: jnp.ufunc, input_dtype: DTypeLike) -> Shaped[Array, '']:
+    """Return the identity for ufunc as an array scalar with the right dtype."""
+    # get output type from input type, e.g., int8 is accumulated to int32
+    dtype = reduction_dtype(ufunc, input_dtype)
+    # return as explicitly typed array
+    return jnp.array(ufunc.identity, dtype)
 def check_same(tree1, tree2):
     def check_same(x1, x2):
         assert x1.shape == x2.shape
@@ -153,12 +243,20 @@ def check_same(tree1, tree2):
     tree_map(check_same, tree1, tree2)
+class NotDefined:
+    pass
 def autobatch(
     func: Callable,
     max_io_nbytes: int,
     in_axes: PyTree[int | None] = 0,
     out_axes: PyTree[int] = 0,
+    *,
     return_nbatches: bool = False,
+    reduce_ufunc: jnp.ufunc | None = None,
+    warn_on_overflow: bool = True,
+    result_shape_dtype: PyTree[ShapeDtypeStruct] = NotDefined,
 ) -> Callable:
     """
     Batch a function such that each batch is smaller than a threshold.
@@ -179,60 +277,168 @@ def autobatch(
         The same for outputs (but non-batching is not allowed).
     return_nbatches
         If True, the number of batches is returned as a second output.
+    reduce_ufunc
+        Function used to reduce the output along the batched axis (e.g.,
+        `jax.numpy.add`).
+    warn_on_overflow
+        If True, a warning is raised if the memory limit could not be
+        respected.
+    result_shape_dtype
+        A pytree of dummy arrays matching the expected output. If not provided,
+        the function is traced an additional time to determine the output
+        structure.
     Returns
     -------
     A function with the same signature as `func`, save for the return value if `return_nbatches`.
+    Notes
+    -----
+    Unless `return_nbatches` or `reduce_ufunc` are set, `autobatch` at given
+    arguments is idempotent. Furthermore, `autobatch` can be applied multiple
+    times over multiple axes with the same `max_io_nbytes` limit to work on
+    multiple axes; in this case it won't unnecessarily loop over additional axes
+    if one or more outer `autobatch` are already sufficient.
+    To handle memory used in intermediate values: assuming all intermediate
+    values have size that scales linearly with the axis batched over, say the
+    batched input/output total size is ``batched_size * core_io_size``, and the
+    intermediate values have size ``batched_size * core_int_size``, then to take
+    them into account divide `max_io_nbytes` by ``(1 + core_int_size /
+    core_io_size)``.
     """
-    initial_in_axes = in_axes
-    initial_out_axes = out_axes
     @jit
     @wraps(func)
-    def batched_func(*args):
-        example_result = eval_shape(func, *args)
-        in_axes = expand_axes(initial_in_axes, args)
-        out_axes = expand_axes(initial_out_axes, example_result)
-        check_no_nones(out_axes, example_result)
-        size = extract_size((in_axes, out_axes), (args, example_result))
+    def autobatch_wrapper(*args):
+        return batched_func(
+            func,
+            max_io_nbytes,
+            in_axes,
+            out_axes,
+            return_nbatches,
+            reduce_ufunc,
+            warn_on_overflow,
+            result_shape_dtype,
+            args,
+        )
-        args, nonbatched_args = pull_nonbatched(in_axes, args)
+    return autobatch_wrapper
-        total_nbytes = sum_nbytes((args, example_result))
-        min_nbatches = total_nbytes // max_io_nbytes + bool(
-            total_nbytes % max_io_nbytes
-        )
-        min_nbatches = max(1, min_nbatches)
-        nbatches = next_divisor(size, min_nbatches)
-        assert 1 <= nbatches <= max(1, size)
-        assert size % nbatches == 0
-        assert total_nbytes % nbatches == 0
-        batch_nbytes = total_nbytes // nbatches
-        if batch_nbytes > max_io_nbytes:
-            assert size == nbatches
-            msg = f'batch_nbytes = {batch_nbytes} > max_io_nbytes = {max_io_nbytes}'
-            warn(msg)
-        def loop(_, args):
-            args = move_axes_in(in_axes, args)
-            args = push_nonbatched(in_axes, args, nonbatched_args)
-            result = func(*args)
-            result = move_axes_out(out_axes, result)
-            return None, result
+def batched_func(
+    func: Callable,
+    max_io_nbytes: int,
+    in_axes: PyTree[int | None],
+    out_axes: PyTree[int],
+    return_nbatches: bool,
+    reduce_ufunc: jnp.ufunc | None,
+    warn_on_overflow: bool,
+    result_shape_dtype: PyTree[ShapeDtypeStruct] | NotDefined,
+    args: tuple[PyTree[Array], ...],
+) -> PyTree[Array]:
+    """Implement the wrapper used in `autobatch`."""
+    # determine the output structure of the function
+    if result_shape_dtype is NotDefined:
+        example_result = eval_shape(func, *args)
+    else:
+        example_result = result_shape_dtype
+    # expand the axes pytrees if they are prefixes
+    in_axes = expand_axes(in_axes, args)
+    out_axes = expand_axes(out_axes, example_result)
+    check_no_nones(out_axes, example_result)
+    # check the axes are valid
+    in_axes = normalize_axes(in_axes, args)
+    out_axes = normalize_axes(out_axes, example_result)
+    # get the size of the batched axis
+    size = extract_size((in_axes, out_axes), (args, example_result))
+    # split arguments in batched and not batched
+    original_args = args
+    args, nonbatched_args = pull_nonbatched(in_axes, args)
+    # determine the number of batches to respect the memory limit
+    total_nbytes = sum_nbytes((args, example_result))
+    min_nbatches = total_nbytes // max_io_nbytes + bool(total_nbytes % max_io_nbytes)
+    min_nbatches = max(1, min_nbatches)
+    nbatches = next_divisor(size, min_nbatches)
+    assert 1 <= nbatches <= max(1, size)
+    assert size % nbatches == 0
+    assert total_nbytes % nbatches == 0
+    # warn if the memory limit could not be respected
+    batch_nbytes = total_nbytes // nbatches
+    if batch_nbytes > max_io_nbytes and warn_on_overflow:
+        assert size == nbatches
+        msg = f'batch_nbytes = {batch_nbytes:_} > max_io_nbytes = {max_io_nbytes:_}'
+        warn(msg)
+    # squeeze out the output dims that will be reduced
+    if reduce_ufunc is not None:
+        example_result = remove_axis(example_result, out_axes, reduce_ufunc)
+    if nbatches > 1:
+        # prepare arguments for looping
         args = move_axes_out(in_axes, args)
         args = batch(args, nbatches)
-        _, result = scan(loop, None, args)
-        result = unbatch(result)
-        result = move_axes_in(out_axes, result)
-        check_same(example_result, result)
-        if return_nbatches:
-            return result, nbatches
-        return result
+        # prepare carry for reduction
+        if reduce_ufunc is None:
+            initial = None
+        else:
+            initial = identity(reduce_ufunc, example_result)
+        # loop and invoke the function in batches
+        loop = partial(
+            batching_loop,
+            func=func,
+            nonbatched_args=nonbatched_args,
+            in_axes=in_axes,
+            out_axes=out_axes,
+            reduce_ufunc=reduce_ufunc,
+        )
+        reduced_result, result = scan(loop, initial, args)
-    return batched_func
+        # remove auxiliary batching axis and reverse transposition
+        if reduce_ufunc is None:
+            assert reduced_result is None
+            result = unbatch(result)
+            result = move_axes_in(out_axes, result)
+        else:
+            assert result is None
+            result = reduced_result
+    # trivial case: no batching needed
+    else:
+        result = func(*original_args)
+        if reduce_ufunc is not None:
+            result = reduce(reduce_ufunc, result, out_axes, None)
+    check_same(example_result, result)
+    if return_nbatches:
+        return result, nbatches
+    return result
+def batching_loop(
+    initial, args, *, func, nonbatched_args, in_axes, out_axes, reduce_ufunc
+):
+    """Implement the batching loop in `autobatch`."""
+    # evaluate the function
+    args = move_axes_in(in_axes, args)
+    args = push_nonbatched(in_axes, args, nonbatched_args)
+    result = func(*args)
+    # unreduced case: transpose for concatenation and return
+    if reduce_ufunc is None:
+        result = move_axes_out(out_axes, result)
+        return None, result
+    # reduced case: reduce starting from initial
+    else:
+        reduced_result = reduce(reduce_ufunc, result, out_axes, initial)
+        return reduced_result, None

bartz/jaxext/scipy/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # bartz/src/bartz/jaxext/scipy/__init__.py
 #
-# Copyright (c) 2025, Giacomo Petrillo
+# Copyright (c) 2025, The Bartz Contributors
 #
 # This file is part of bartz.
 #

bartz/jaxext/scipy/special.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # bartz/src/bartz/jaxext/scipy/special.py
 #
-# Copyright (c) 2025, Giacomo Petrillo
+# Copyright (c) 2025, The Bartz Contributors
 #
 # This file is part of bartz.
 #
@@ -26,7 +26,7 @@
 from functools import wraps
-from jax import ShapeDtypeStruct, pure_callback
+from jax import ShapeDtypeStruct, jit, pure_callback
 from jax import numpy as jnp
 from scipy.special import gammainccinv as scipy_gammainccinv
@@ -45,10 +45,9 @@ def _castto(func, dtype):
     return newfunc
+@jit
 def gammainccinv(a, y):
     """Survival function inverse of the Gamma(a, 1) distribution."""
-    a = jnp.asarray(a)
-    y = jnp.asarray(y)
     shape = jnp.broadcast_shapes(a.shape, y.shape)
     dtype = _float_type(a.dtype, y.dtype)
     dummy = ShapeDtypeStruct(shape, dtype)

bartz/jaxext/scipy/stats.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # bartz/src/bartz/jaxext/scipy/stats.py
 #
-# Copyright (c) 2025, Giacomo Petrillo
+# Copyright (c) 2025, The Bartz Contributors
 #
 # This file is part of bartz.
 #

bartz 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

bartz 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl