PyPI - bartz - Versions diffs - 0.0.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

bartz 0.0.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

bartz/{interface.py → BART.py} +10 -18
bartz/__init__.py +7 -2
bartz/_version.py +1 -0
bartz/debug.py +9 -22
bartz/grove.py +73 -120
bartz/jaxext.py +261 -5
bartz/mcmcloop.py +27 -13
bartz/mcmcstep.py +510 -439
bartz/prepcovars.py +25 -30
{bartz-0.0.1.dist-info → bartz-0.2.0.dist-info}/METADATA +7 -1
bartz-0.2.0.dist-info/RECORD +13 -0
bartz-0.0.1.dist-info/RECORD +0 -12
{bartz-0.0.1.dist-info → bartz-0.2.0.dist-info}/LICENSE +0 -0
{bartz-0.0.1.dist-info → bartz-0.2.0.dist-info}/WHEEL +0 -0

bartz/prepcovars.py CHANGED Viewed

@@ -27,8 +27,10 @@ import functools
 import jax
 from jax import numpy as jnp
+from . import jaxext
 from . import grove
+@functools.partial(jax.jit, static_argnums=(1,))
 def quantilized_splits_from_matrix(X, max_bins):
     """
     Determine bins that make the distribution of each predictor uniform.
@@ -52,48 +54,41 @@ def quantilized_splits_from_matrix(X, max_bins):
         The number of actually used values in each row of `splits`.
     """
     out_length = min(max_bins, X.shape[1]) - 1
-    return quantilized_splits_from_matrix_impl(X, out_length)
+    # return _quantilized_splits_from_matrix(X, out_length)
+    @functools.partial(jaxext.autobatch, max_io_nbytes=500_000_000)
+    def func(X):
+        return _quantilized_splits_from_matrix(X, out_length)
+    return func(X)
 @functools.partial(jax.vmap, in_axes=(0, None))
-def quantilized_splits_from_matrix_impl(x, out_length):
-    huge = huge_value(x)
-    u = jnp.unique(x, size=x.size, fill_value=huge)
-    actual_length = jnp.count_nonzero(u < huge) - 1
-    midpoints = (u[1:] + u[:-1]) / 2
+def _quantilized_splits_from_matrix(x, out_length):
+    huge = jaxext.huge_value(x)
+    u, actual_length = jaxext.unique(x, size=x.size, fill_value=huge)
+    actual_length -= 1
+    if jnp.issubdtype(x.dtype, jnp.integer):
+        midpoints = u[:-1] + jaxext.ensure_unsigned(u[1:] - u[:-1]) // 2
+        indices = jnp.arange(midpoints.size, dtype=jaxext.minimal_unsigned_dtype(midpoints.size - 1))
+        midpoints = jnp.where(indices < actual_length, midpoints, huge)
+    else:
+        midpoints = (u[1:] + u[:-1]) / 2
     indices = jnp.linspace(-1, actual_length, out_length + 2)[1:-1]
-    indices = jnp.around(indices).astype(grove.minimal_unsigned_dtype(midpoints.size - 1))
+    indices = jnp.around(indices).astype(jaxext.minimal_unsigned_dtype(midpoints.size - 1))
         # indices calculation with float rather than int to avoid potential
         # overflow with int32, and to round to nearest instead of rounding down
     decimated_midpoints = midpoints[indices]
     truncated_midpoints = midpoints[:out_length]
     splits = jnp.where(actual_length > out_length, decimated_midpoints, truncated_midpoints)
     max_split = jnp.minimum(actual_length, out_length)
-    max_split = max_split.astype(grove.minimal_unsigned_dtype(out_length))
+    max_split = max_split.astype(jaxext.minimal_unsigned_dtype(out_length))
     return splits, max_split
-def huge_value(x):
-    """
-    Return the maximum value that can be stored in `x`.
-    Parameters
-    ----------
-    x : array
-        A numerical numpy or jax array.
-    Returns
-    -------
-    maxval : scalar
-        The maximum value allowed by `x`'s type (+inf for floats).
-    """
-    if jnp.issubdtype(x.dtype, jnp.integer):
-        return jnp.iinfo(x.dtype).max
-    else:
-        return jnp.inf
+@jax.jit
 def bin_predictors(X, splits):
     """
     Bin the predictors according to the given splits.
+    A value ``x`` is mapped to bin ``i`` iff ``splits[i - 1] < x <= splits[i]``.
     Parameters
     ----------
     X : array (p, n)
@@ -110,9 +105,9 @@ def bin_predictors(X, splits):
         A matrix with `p` predictors and `n` observations, where each predictor
         has been replaced by the index of the bin it falls into.
     """
-    return bin_predictors_impl(X, splits)
+    return _bin_predictors(X, splits)
 @jax.vmap
-def bin_predictors_impl(x, splits):
-    dtype = grove.minimal_unsigned_dtype(splits.size)
+def _bin_predictors(x, splits):
+    dtype = jaxext.minimal_unsigned_dtype(splits.size)
     return jnp.searchsorted(splits, x).astype(dtype)

{bartz-0.0.1.dist-info → bartz-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bartz
-Version: 0.0.1
+Version: 0.2.0
 Summary: A JAX implementation of BART
 Home-page: https://github.com/Gattocrucco/bartz
 License: MIT
@@ -20,7 +20,13 @@ Project-URL: Bug Tracker, https://github.com/Gattocrucco/bartz/issues
 Project-URL: Repository, https://github.com/Gattocrucco/bartz
 Description-Content-Type: text/markdown
+[![PyPI](https://img.shields.io/pypi/v/bartz)](https://pypi.org/project/bartz/)
 # BART vectoriZed
 A branchless vectorized implementation of Bayesian Additive Regression Trees (BART) in JAX.
+BART is a nonparametric Bayesian regression technique. Given predictors $X$ and responses $y$, BART finds a function to predict $y$ given $X$. The result of the inference is a sample of possible functions, representing the uncertainty over the determination of the function.
+This Python module provides an implementation of BART that runs on GPU, to process large datasets faster. It is also a good on CPU. Most other implementations of BART are for R, and run on CPU only.

bartz-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+bartz/BART.py,sha256=pRG7mALenknX2JHqY-VyhO9-evDgEC6hWBp4jpecBdM,15801
+bartz/__init__.py,sha256=E96vsP0bZ8brejpZmEmRoXuMsUdinO_B_SKUUl1rLsg,1448
+bartz/_version.py,sha256=FVHPBGkfhbQDi_z3v0PiKJrXXqXOx0vGW_1VaqNJi7U,22
+bartz/debug.py,sha256=9ZH-JfwZVu5OPhHBEyXQHAU5H9KIu1vxLK7yNv4m4Ew,5314
+bartz/grove.py,sha256=Wj_7jHl9w3uwuVdH4hoeXowimGpdRE2lGIzr4aDkzsI,8291
+bartz/jaxext.py,sha256=VYA41D5F7DYcAAVtkcZtEN927HxQGOOQM-uGsgr2CPc,10996
+bartz/mcmcloop.py,sha256=lheLrjVxmlyQzc_92zeNsFhdkrhEWQEjoAWFbVzknnw,7701
+bartz/mcmcstep.py,sha256=3ba94hXBW4UAZ11SFshnwJAgn6bpIqSZdRy_wQjEkrk,39278
+bartz/prepcovars.py,sha256=iiQ0WjSj4--l5DgPW626Qg2SSB6ljnaaUsBz_A8kFrI,4634
+bartz-0.2.0.dist-info/LICENSE,sha256=heuIJZQK9IexJYC-fYHoLUrgj8HG8yS3G072EvKh-94,1073
+bartz-0.2.0.dist-info/METADATA,sha256=LiYjTAzgoxUM2MAuaKtf0VW-_zciTKBkTX5B7HNvUbI,1490
+bartz-0.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+bartz-0.2.0.dist-info/RECORD,,

bartz-0.0.1.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-bartz/__init__.py,sha256=PL-vhhEHoVMWOPLG_M45TIZVkbQia5riJbQboy-BNH8,1333
-bartz/debug.py,sha256=FHnCalpK1uO1CN9QQ5DPj70JKR4Thltzp9o0BeYthIo,5741
-bartz/grove.py,sha256=v2k10EBjgi2aLCsGvM01z0z--9Xv4ApBOxpke-6gIYM,10309
-bartz/interface.py,sha256=GBwLwqEF_6EmeteFtsPw6ANdisnvMoWi_fKBJiQq-Vc,16129
-bartz/jaxext.py,sha256=FK5j1zfW1yR4-yPKcD7ZvKSkVQ5--jHjQpVCl4n4gXY,2844
-bartz/mcmcloop.py,sha256=N815-eJxsS_X85okXRO2kSOlikw8dPN05_krm0iT9Sg,7321
-bartz/mcmcstep.py,sha256=acy_2rSIEXV5BzqLY96aQaqlsxtalxyO3Q4gPvUMRVU,35912
-bartz/prepcovars.py,sha256=3ddDOtNNop3Ba2Kgy_dZ6apFydtwaEXH3uXSmmKf9Fs,4421
-bartz-0.0.1.dist-info/LICENSE,sha256=heuIJZQK9IexJYC-fYHoLUrgj8HG8yS3G072EvKh-94,1073
-bartz-0.0.1.dist-info/METADATA,sha256=zDW1dM58gV7c_8ZTjEtTt_tcXabbz5roZBf36EdLxls,933
-bartz-0.0.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-bartz-0.0.1.dist-info/RECORD,,

{bartz-0.0.1.dist-info → bartz-0.2.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{bartz-0.0.1.dist-info → bartz-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

bartz 0.0.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

bartz 0.0.1py3-none-any.whl → 0.2.0py3-none-any.whl