PyPI - bartz - Versions diffs - 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

bartz 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

bartz/.DS_Store +0 -0
bartz/BART/__init__.py +27 -0
bartz/BART/_gbart.py +522 -0
bartz/__init__.py +4 -2
bartz/{BART.py → _interface.py} +256 -132
bartz/_profiler.py +318 -0
bartz/_version.py +1 -1
bartz/debug.py +269 -314
bartz/grove.py +124 -68
bartz/jaxext/__init__.py +101 -27
bartz/jaxext/_autobatch.py +257 -51
bartz/jaxext/scipy/__init__.py +1 -1
bartz/jaxext/scipy/special.py +3 -4
bartz/jaxext/scipy/stats.py +1 -1
bartz/mcmcloop.py +399 -208
bartz/mcmcstep/__init__.py +35 -0
bartz/mcmcstep/_moves.py +904 -0
bartz/mcmcstep/_state.py +1114 -0
bartz/mcmcstep/_step.py +1603 -0
bartz/prepcovars.py +1 -1
bartz/testing/__init__.py +29 -0
bartz/testing/_dgp.py +442 -0
{bartz-0.7.0.dist-info → bartz-0.8.0.dist-info}/METADATA +17 -11
bartz-0.8.0.dist-info/RECORD +25 -0
{bartz-0.7.0.dist-info → bartz-0.8.0.dist-info}/WHEEL +1 -1
bartz/mcmcstep.py +0 -2616
bartz-0.7.0.dist-info/RECORD +0 -17

bartz/{BART.py → _interface.py} RENAMED Viewed

@@ -1,6 +1,6 @@
-# bartz/src/bartz/BART.py
+# bartz/src/bartz/_interface.py
 #
-# Copyright (c) 2024-2025, Giacomo Petrillo
+# Copyright (c) 2025-2026, The Bartz Contributors
 #
 # This file is part of bartz.
 #
@@ -22,17 +22,20 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-"""Implement a class `gbart` that mimics the R BART package."""
+"""Main high-level interface of the package."""
 import math
 from collections.abc import Sequence
 from functools import cached_property
-from typing import Any, Literal, Protocol
+from typing import Any, Literal, Protocol, TypedDict
 import jax
 import jax.numpy as jnp
 from equinox import Module, field
+from jax import Device, device_put, jit, make_mesh
+from jax.lax import collapse
 from jax.scipy.special import ndtr
+from jax.sharding import AxisType, Mesh
 from jaxtyping import (
     Array,
     Bool,
@@ -48,22 +51,21 @@ from jaxtyping import (
 from numpy import ndarray
 from bartz import mcmcloop, mcmcstep, prepcovars
+from bartz.jaxext import is_key
 from bartz.jaxext.scipy.special import ndtri
 from bartz.jaxext.scipy.stats import invgamma
+from bartz.mcmcloop import compute_varcount, evaluate_trace, run_mcmc
+from bartz.mcmcstep import make_p_nonterminal
+from bartz.mcmcstep._state import get_num_chains
 FloatLike = float | Float[Any, '']
 class DataFrame(Protocol):
-    """DataFrame duck-type for `gbart`.
-    Attributes
-    ----------
-    columns : Sequence[str]
-        The names of the columns.
-    """
+    """DataFrame duck-type for `Bart`."""
     columns: Sequence[str]
+    """The names of the columns."""
     def to_numpy(self) -> ndarray:
         """Convert the dataframe to a 2d numpy array with columns on the second axis."""
@@ -71,22 +73,17 @@ class DataFrame(Protocol):
 class Series(Protocol):
-    """Series duck-type for `gbart`.
-    Attributes
-    ----------
-    name : str | None
-        The name of the series.
-    """
+    """Series duck-type for `Bart`."""
     name: str | None
+    """The name of the series."""
     def to_numpy(self) -> ndarray:
         """Convert the series to a 1d numpy array."""
         ...
-class gbart(Module):
+class Bart(Module):
     R"""
     Nonparametric regression with Bayesian Additive Regression Trees (BART) [2]_.
@@ -147,10 +144,7 @@ class gbart(Module):
     rm_const
         How to treat predictors with no associated decision rules (i.e., there
         are no available cutpoints for that predictor). If `True` (default),
-        they are ignored. If `False`, an error is raised if there are any. If
-        `None`, no check is performed, and the output of the MCMC may not make
-        sense if there are predictors without cutpoints. The option `None` is
-        provided only to allow jax tracing.
+        they are ignored. If `False`, an error is raised if there are any.
     sigest
         An estimate of the residual standard deviation on `y_train`, used to set
         `lamda`. If not specified, it is estimated by linear regression (with
@@ -214,21 +208,40 @@ class gbart(Module):
         Ignored if `xinfo` is specified.
     ndpost
-        The number of MCMC samples to save, after burn-in.
+        The number of MCMC samples to save, after burn-in. `ndpost` is the
+        total number of samples across all chains. `ndpost` is rounded up to the
+        first multiple of `mc_cores`.
     nskip
-        The number of initial MCMC samples to discard as burn-in.
+        The number of initial MCMC samples to discard as burn-in. This number
+        of samples is discarded from each chain.
     keepevery
         The thinning factor for the MCMC samples, after burn-in. By default, 1
         for continuous regression and 10 for binary regression.
     printevery
         The number of iterations (including thinned-away ones) between each log
-        line. Set to `None` to disable logging.
-        `printevery` has a few unexpected side effects. On cpu, interrupting
-        with ^C halts the MCMC only on the next log. And the total number of
-        iterations is a multiple of `printevery`, so if ``nskip + keepevery *
-        ndpost`` is not a multiple of `printevery`, some of the last iterations
-        will not be saved.
+        line. Set to `None` to disable logging. ^C interrupts the MCMC only
+        every `printevery` iterations, so with logging disabled it's impossible
+        to kill the MCMC conveniently.
+    num_chains
+        The number of independent Markov chains to run. By default only one
+        chain is run.
+        The difference between not specifying `num_chains` and setting it to 1
+        is that in the latter case in the object attributes and some methods
+        there will be an explicit chain axis of size 1.
+    num_chain_devices
+        The number of devices to spread the chains across. Must be a divisor of
+        `num_chains`. Each device will run a fraction of the chains.
+    num_data_devices
+        The number of devices to split datapoints across. Must be a divisor of
+        `n`. This is useful only with very high `n`, about > 1000_000.
+        If both num_chain_devices and num_data_devices are specified, the total
+        number of devices used is the product of the two.
+    devices
+        One or more devices used to run the MCMC on. If not specified, the
+        computation will follow the placement of the input arrays. If a list of
+        devices, this argument can be longer than the number of devices needed.
     seed
         The seed for the random number generator.
     maxdepth
@@ -239,34 +252,6 @@ class gbart(Module):
     run_mcmc_kw
         Additional arguments passed to `bartz.mcmcloop.run_mcmc`.
-    Attributes
-    ----------
-    offset : Float32[Array, '']
-        The prior mean of the latent mean function.
-    sigest : Float32[Array, ''] | None
-        The estimated standard deviation of the error used to set `lamda`.
-    yhat_test : Float32[Array, 'ndpost m'] | None
-        The conditional posterior mean at `x_test` for each MCMC iteration.
-    Notes
-    -----
-    This interface imitates the function ``gbart`` from the R package `BART
-    <https://cran.r-project.org/package=BART>`_, but with these differences:
-    - If `x_train` and `x_test` are matrices, they have one predictor per row
-      instead of per column.
-    - If ``usequants=False``, R BART switches to quantiles anyway if there are
-      less predictor values than the required number of bins, while bartz
-      always follows the specification.
-    - Some functionality is missing.
-    - The error variance parameter is called `lamda` instead of `lambda`.
-    - There are some additional attributes, and some missing.
-    - The trees have a maximum depth.
-    - `rm_const` refers to predictors without decision rules instead of
-      predictors that are constant in `x_train`.
-    - If `rm_const=True` and some variables are dropped, the predictors
-      matrix/dataframe passed to `predict` should still include them.
     References
     ----------
     .. [1] Linero, Antonio R. (2018). “Bayesian Regression Trees for
@@ -283,10 +268,14 @@ class gbart(Module):
     _splits: Real[Array, 'p max_num_splits']
     _x_train_fmt: Any = field(static=True)
-    ndpost: int = field(static=True)
     offset: Float32[Array, '']
+    """The prior mean of the latent mean function."""
     sigest: Float32[Array, ''] | None = None
+    """The estimated standard deviation of the error used to set `lamda`."""
     yhat_test: Float32[Array, 'ndpost m'] | None = None
+    """The conditional posterior mean at `x_test` for each MCMC iteration."""
     def __init__(
         self,
@@ -302,7 +291,7 @@ class gbart(Module):
         rho: FloatLike | None = None,
         xinfo: Float[Array, 'p n'] | None = None,
         usequants: bool = False,
-        rm_const: bool | None = True,
+        rm_const: bool = True,
         sigest: FloatLike | None = None,
         sigdf: FloatLike = 3.0,
         sigquant: FloatLike = 0.9,
@@ -312,13 +301,17 @@ class gbart(Module):
         lamda: FloatLike | None = None,
         tau_num: FloatLike | None = None,
         offset: FloatLike | None = None,
-        w: Float[Array, ' n'] | None = None,
+        w: Float[Array, ' n'] | Series | None = None,
         ntree: int | None = None,
         numcut: int = 100,
         ndpost: int = 1000,
         nskip: int = 100,
         keepevery: int | None = None,
         printevery: int | None = 100,
+        num_chains: int | None = None,
+        num_chain_devices: int | None = None,
+        num_data_devices: int | None = None,
+        devices: Device | Sequence[Device] | None = None,
         seed: int | Key[Array, ''] = 0,
         maxdepth: int = 6,
         init_kw: dict | None = None,
@@ -378,21 +371,19 @@ class gbart(Module):
             a,
             b,
             rho,
+            num_chains,
+            num_chain_devices,
+            num_data_devices,
+            devices,
+            sparse,
+            nskip,
         )
         final_state, burnin_trace, main_trace = self._run_mcmc(
-            initial_state,
-            ndpost,
-            nskip,
-            keepevery,
-            printevery,
-            seed,
-            run_mcmc_kw,
-            sparse,
+            initial_state, ndpost, nskip, keepevery, printevery, seed, run_mcmc_kw
         )
         # set public attributes
         self.offset = final_state.offset  # from the state because of buffer donation
-        self.ndpost = ndpost
         self.sigest = sigest
         # set private attributes
@@ -406,6 +397,15 @@ class gbart(Module):
         if x_test is not None:
             self.yhat_test = self.predict(x_test)
+    @property
+    def ndpost(self):
+        """The total number of posterior samples after burn-in across all chains.
+        May be larger than the initialization argument `ndpost` if it was not
+        divisible by the number of chains.
+        """
+        return self._main_trace.grow_prop_count.size
     @cached_property
     def prob_test(self) -> Float32[Array, 'ndpost m'] | None:
         """The posterior probability of y being True at `x_test` for each MCMC iteration."""
@@ -439,30 +439,53 @@ class gbart(Module):
             return self.prob_train.mean(axis=0)
     @cached_property
-    def sigma(self) -> Float32[Array, ' nskip+ndpost'] | None:
+    def sigma(
+        self,
+    ) -> (
+        Float32[Array, ' nskip+ndpost']
+        | Float32[Array, 'nskip+ndpost/mc_cores mc_cores']
+        | None
+    ):
         """The standard deviation of the error, including burn-in samples."""
-        if self._burnin_trace.sigma2 is None:
+        if self._burnin_trace.error_cov_inv is None:
             return None
-        else:
-            assert self._main_trace.sigma2 is not None
-            return jnp.sqrt(
-                jnp.concatenate([self._burnin_trace.sigma2, self._main_trace.sigma2])
+        assert self._main_trace.error_cov_inv is not None
+        return jnp.sqrt(
+            jnp.reciprocal(
+                jnp.concatenate(
+                    [
+                        self._burnin_trace.error_cov_inv.T,
+                        self._main_trace.error_cov_inv.T,
+                    ],
+                    axis=0,
+                    # error_cov_inv has shape (chains? samples) in the trace
+                )
             )
+        )
+    @cached_property
+    def sigma_(self) -> Float32[Array, 'ndpost'] | None:
+        """The standard deviation of the error, only over the post-burnin samples and flattened."""
+        error_cov_inv = self._main_trace.error_cov_inv
+        if error_cov_inv is None:
+            return None
+        else:
+            return jnp.sqrt(jnp.reciprocal(error_cov_inv)).reshape(-1)
     @cached_property
     def sigma_mean(self) -> Float32[Array, ''] | None:
         """The mean of `sigma`, only over the post-burnin samples."""
-        if self.sigma is None:
+        if self.sigma_ is None:
             return None
-        else:
-            return self.sigma[len(self.sigma) - self.ndpost :].mean(axis=0)
+        return self.sigma_.mean()
     @cached_property
     def varcount(self) -> Int32[Array, 'ndpost p']:
         """Histogram of predictor usage for decision rules in the trees."""
-        return mcmcloop.compute_varcount(
-            self._mcmc_state.forest.max_split.size, self._main_trace
-        )
+        p = self._mcmc_state.forest.max_split.size
+        varcount: Int32[Array, '*chains samples p']
+        varcount = compute_varcount(p, self._main_trace)
+        return collapse(varcount, 0, -1)
     @cached_property
     def varcount_mean(self) -> Float32[Array, ' p']:
@@ -472,13 +495,15 @@ class gbart(Module):
     @cached_property
     def varprob(self) -> Float32[Array, 'ndpost p']:
         """Posterior samples of the probability of choosing each predictor for a decision rule."""
+        max_split = self._mcmc_state.forest.max_split
+        p = max_split.size
         varprob = self._main_trace.varprob
         if varprob is None:
-            max_split = self._mcmc_state.forest.max_split
-            p = max_split.size
             peff = jnp.count_nonzero(max_split)
             varprob = jnp.where(max_split, 1 / peff, 0)
             varprob = jnp.broadcast_to(varprob, (self.ndpost, p))
+        else:
+            varprob = varprob.reshape(-1, p)
         return varprob
     @cached_property
@@ -567,10 +592,11 @@ class gbart(Module):
         get_length = lambda x: x.shape[-1]
         assert get_length(x1) == get_length(x2)
-    @staticmethod
+    @classmethod
     def _process_error_variance_settings(
-        x_train, y_train, sigest, sigdf, sigquant, lamda
+        cls, x_train, y_train, sigest, sigdf, sigquant, lamda
     ) -> tuple[Float32[Array, ''] | None, ...]:
+        """Return (lamda, sigest)."""
         if y_train.dtype == bool:
             if sigest is not None:
                 msg = 'Let `sigest=None` for binary regression'
@@ -592,18 +618,26 @@ class gbart(Module):
             elif y_train.size <= x_train.shape[0]:
                 sigest2 = jnp.var(y_train)
             else:
-                x_centered = x_train.T - x_train.mean(axis=1)
-                y_centered = y_train - y_train.mean()
-                # centering is equivalent to adding an intercept column
-                _, chisq, rank, _ = jnp.linalg.lstsq(x_centered, y_centered)
-                chisq = chisq.squeeze(0)
-                dof = len(y_train) - rank
-                sigest2 = chisq / dof
+                sigest2 = cls._linear_regression(x_train, y_train)
             alpha = sigdf / 2
             invchi2 = invgamma.ppf(sigquant, alpha) / 2
             invchi2rid = invchi2 * sigdf
             return sigest2 / invchi2rid, jnp.sqrt(sigest2)
+    @staticmethod
+    @jit
+    def _linear_regression(
+        x_train: Shaped[Array, 'p n'], y_train: Float32[Array, ' n']
+    ):
+        """Return the error variance estimated with OLS with intercept."""
+        x_centered = x_train.T - x_train.mean(axis=1)
+        y_centered = y_train - y_train.mean()
+        # centering is equivalent to adding an intercept column
+        _, chisq, rank, _ = jnp.linalg.lstsq(x_centered, y_centered)
+        chisq = chisq.squeeze(0)
+        dof = len(y_train) - rank
+        return chisq / dof
     @staticmethod
     def _check_type_settings(y_train, type, w):  # noqa: A002
         match type:
@@ -641,6 +675,7 @@ class gbart(Module):
         | tuple[FloatLike, None, None, None]
         | tuple[None, FloatLike, FloatLike, FloatLike]
     ):
+        """Return (theta, a, b, rho)."""
         if not sparse:
             return None, None, None, None
         elif theta is not None:
@@ -656,6 +691,7 @@ class gbart(Module):
         y_train: Float32[Array, ' n'] | Bool[Array, ' n'],
         offset: float | Float32[Any, ''] | None,
     ) -> Float32[Array, '']:
+        """Return offset."""
         if offset is not None:
             return jnp.asarray(offset)
         elif y_train.size < 1:
@@ -677,6 +713,7 @@ class gbart(Module):
         ntree: int,
         tau_num: FloatLike | None,
     ):
+        """Return sigma_mu."""
         if tau_num is None:
             if y_train.dtype == bool:
                 tau_num = 3.0
@@ -705,7 +742,9 @@ class gbart(Module):
             return prepcovars.uniform_splits_from_matrix(x_train, numcut + 1)
     @staticmethod
-    def _bin_predictors(x, splits) -> UInt[Array, 'p n']:
+    def _bin_predictors(
+        x: Real[Array, 'p n'], splits: Real[Array, 'p max_num_splits']
+    ) -> UInt[Array, 'p n']:
         return prepcovars.bin_predictors(x, splits)
     @staticmethod
@@ -723,23 +762,35 @@ class gbart(Module):
         maxdepth: int,
         ntree: int,
         init_kw: dict[str, Any] | None,
-        rm_const: bool | None,
+        rm_const: bool,
         theta: FloatLike | None,
         a: FloatLike | None,
         b: FloatLike | None,
         rho: FloatLike | None,
+        num_chains: int | None,
+        num_chain_devices: int | None,
+        num_data_devices: int | None,
+        devices: Device | Sequence[Device] | None,
+        sparse: bool,
+        nskip: int,
     ):
-        depth = jnp.arange(maxdepth - 1)
-        p_nonterminal = base / (1 + depth).astype(float) ** power
+        p_nonterminal = make_p_nonterminal(maxdepth, base, power)
         if y_train.dtype == bool:
-            sigma2_alpha = None
-            sigma2_beta = None
+            error_cov_df = None
+            error_cov_scale = None
         else:
-            sigma2_alpha = sigdf / 2
-            sigma2_beta = lamda * sigma2_alpha
+            assert lamda is not None
+            # inverse gamma prior: alpha = df / 2, beta = scale / 2
+            error_cov_df = sigdf
+            error_cov_scale = lamda * sigdf
+        # process device settings
+        device_kw, device = process_device_settings(
+            y_train, num_chains, num_chain_devices, num_data_devices, devices
+        )
-        kw = dict(
+        kw: dict = dict(
             X=x_train,
             # copy y_train because it's going to be donated in the mcmc loop
             y=jnp.array(y_train),
@@ -748,35 +799,37 @@ class gbart(Module):
             max_split=max_split,
             num_trees=ntree,
             p_nonterminal=p_nonterminal,
-            sigma_mu2=jnp.square(sigma_mu),
-            sigma2_alpha=sigma2_alpha,
-            sigma2_beta=sigma2_beta,
+            leaf_prior_cov_inv=jnp.reciprocal(jnp.square(sigma_mu)),
+            error_cov_df=error_cov_df,
+            error_cov_scale=error_cov_scale,
             min_points_per_decision_node=10,
             min_points_per_leaf=5,
             theta=theta,
             a=a,
             b=b,
             rho=rho,
+            sparse_on_at=nskip // 2 if sparse else None,
+            **device_kw,
         )
-        if rm_const is None:
-            kw.update(filter_splitless_vars=False)
-        elif rm_const:
-            kw.update(filter_splitless_vars=True)
-        else:
-            n_empty = jnp.count_nonzero(max_split == 0)
-            if n_empty:
-                msg = f'There are {n_empty}/{max_split.size} predictors without decision rules'
-                raise ValueError(msg)
-            kw.update(filter_splitless_vars=False)
+        if rm_const:
+            n_empty = jnp.sum(max_split == 0).item()
+            kw.update(filter_splitless_vars=n_empty)
         if init_kw is not None:
             kw.update(init_kw)
-        return mcmcstep.init(**kw)
+        state = mcmcstep.init(**kw)
-    @staticmethod
+        # put state on device if requested explicitly by the user
+        if device is not None:
+            state = device_put(state, device, donate=True)
+        return state
+    @classmethod
     def _run_mcmc(
+        cls,
         mcmc_state: mcmcstep.State,
         ndpost: int,
         nskip: int,
@@ -784,30 +837,101 @@ class gbart(Module):
         printevery: int | None,
         seed: int | Integer[Array, ''] | Key[Array, ''],
         run_mcmc_kw: dict | None,
-        sparse: bool,
-    ):
+    ) -> tuple[mcmcstep.State, mcmcloop.BurninTrace, mcmcloop.MainTrace]:
         # prepare random generator seed
-        if isinstance(seed, jax.Array) and jnp.issubdtype(
-            seed.dtype, jax.dtypes.prng_key
-        ):
-            key = seed.copy()
-            # copy because the inner loop in run_mcmc will donate the buffer
+        if is_key(seed):
+            key = jnp.copy(seed)
         else:
             key = jax.random.key(seed)
+        # round up ndpost
+        num_chains = get_num_chains(mcmc_state)
+        if num_chains is None:
+            num_chains = 1
+        n_save = ndpost // num_chains + bool(ndpost % num_chains)
         # prepare arguments
-        kw = dict(n_burn=nskip, n_skip=keepevery, inner_loop_length=printevery)
+        kw: dict = dict(n_burn=nskip, n_skip=keepevery, inner_loop_length=printevery)
         kw.update(
             mcmcloop.make_default_callback(
+                mcmc_state,
                 dot_every=None if printevery is None or printevery == 1 else 1,
                 report_every=printevery,
-                sparse_on_at=nskip // 2 if sparse else None,
             )
         )
         if run_mcmc_kw is not None:
             kw.update(run_mcmc_kw)
-        return mcmcloop.run_mcmc(key, mcmc_state, ndpost, **kw)
-    def _predict(self, x):
-        return mcmcloop.evaluate_trace(self._main_trace, x)
+        return run_mcmc(key, mcmc_state, n_save, **kw)
+    def _predict(self, x: UInt[Array, 'p m']) -> Float32[Array, 'ndpost m']:
+        """Evaluate trees on already quantized `x`."""
+        out = evaluate_trace(x, self._main_trace)
+        return collapse(out, 0, -1)
+class DeviceKwArgs(TypedDict):
+    num_chains: int | None
+    mesh: Mesh | None
+    target_platform: Literal['cpu', 'gpu'] | None
+def process_device_settings(
+    y_train: Array,
+    num_chains: int | None,
+    num_chain_devices: int | None,
+    num_data_devices: int | None,
+    devices: Device | Sequence[Device] | None,
+) -> tuple[DeviceKwArgs, Device | None]:
+    """Return the arguments for `mcmcstep.init` related to devices, and an optional device where to put the state."""
+    # determine devices
+    if devices is not None:
+        if not hasattr(devices, '__len__'):
+            devices = (devices,)
+        device = devices[0]
+        platform = device.platform
+    elif hasattr(y_train, 'platform'):
+        platform = y_train.platform()
+        device = None
+        # set device=None because if the devices were not specified explicitly
+        # we may be in the case where computation will follow data placement,
+        # do not disturb jax as the user may be playing with vmap, jit, reshard...
+        devices = jax.devices(platform)
+    else:
+        msg = 'not possible to infer device from `y_train`, please set `devices`'
+        raise ValueError(msg)
+    # create mesh
+    if num_chain_devices is None and num_data_devices is None:
+        mesh = None
+    else:
+        mesh = dict()
+        if num_chain_devices is not None:
+            mesh.update(chains=num_chain_devices)
+        if num_data_devices is not None:
+            mesh.update(data=num_data_devices)
+        mesh = make_mesh(
+            axis_shapes=tuple(mesh.values()),
+            axis_names=tuple(mesh),
+            axis_types=(AxisType.Auto,) * len(mesh),
+            devices=devices,
+        )
+        device = None
+        # set device=None because `mcmcstep.init` will `device_put` with the
+        # mesh already, we don't want to undo its work
+    # prepare arguments to `init`
+    settings = DeviceKwArgs(
+        num_chains=num_chains,
+        mesh=mesh,
+        target_platform=None
+        if mesh is not None or hasattr(y_train, 'platform')
+        else platform,
+        # here we don't take into account the case where the user has set both
+        # batch sizes; since the user has to be playing with `init_kw` to do
+        # that, we'll let `init` throw the error and the user set
+        # `target_platform` themselves so they have a clearer idea how the
+        # thing works.
+    )
+    return settings, device

bartz 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

bartz 0.7.0py3-none-any.whl → 0.8.0py3-none-any.whl