PyPI - cellarr-array - Versions diffs - 0.1.0__tar.gz → 0.2.0__tar.gz - Mend

cellarr-array 0.1.0tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cellarr-array might be problematic. Click here for more details.

Files changed (55) hide show

{cellarr_array-0.1.0 → cellarr_array-0.2.0}/.gitignore RENAMED Viewed

@@ -52,3 +52,5 @@ MANIFEST
 .venv*/
 .conda*/
 .python-version
+*.tdb

{cellarr_array-0.1.0 → cellarr_array-0.2.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,11 @@
 # Changelog
+## Version 0.2.0
+- Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
+- Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
+- Update documentation and tests.
 ## Version 0.1.0
 - Support cellarr-arrays on user provided tiledb array objects.

{cellarr_array-0.1.0 → cellarr_array-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cellarr-array
-Version: 0.1.0
+Version: 0.2.0
 Summary: Base class for handling TileDB backed arrays.
 Home-page: https://github.com/cellarr/cellarr-array
 Author: Jayaram Kancherla
@@ -16,10 +16,13 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
 Requires-Dist: tiledb
 Requires-Dist: numpy
 Requires-Dist: scipy
+Provides-Extra: optional
+Requires-Dist: torch; extra == "optional"
 Provides-Extra: testing
 Requires-Dist: setuptools; extra == "testing"
 Requires-Dist: pytest; extra == "testing"
 Requires-Dist: pytest-cov; extra == "testing"
+Requires-Dist: torch; extra == "testing"
 Dynamic: license-file
 [![PyPI-Server](https://img.shields.io/pypi/v/cellarr-array.svg)](https://pypi.org/project/cellarr-array/)

{cellarr_array-0.1.0 → cellarr_array-0.2.0}/setup.cfg RENAMED Viewed

@@ -33,10 +33,13 @@ exclude =
 	tests
 [options.extras_require]
+optional =
+	torch
 testing =
 	setuptools
 	pytest
 	pytest-cov
+	%(optional)s
 [options.entry_points]

{cellarr_array-0.1.0 → cellarr_array-0.2.0}/src/cellarr_array/__init__.py RENAMED Viewed

@@ -15,7 +15,5 @@ except PackageNotFoundError:  # pragma: no cover
 finally:
     del version, PackageNotFoundError
-from .config import CellArrConfig, ConsolidationConfig
-from .cellarray_dense import DenseCellArray
-from .cellarray_sparse import SparseCellArray
-from .helpers import create_cellarray, SliceHelper
+from .core import DenseCellArray, SparseCellArray
+from .utils import CellArrConfig, ConsolidationConfig, create_cellarray

cellarr_array-0.2.0/src/cellarr_array/core/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .base import CellArray
+from .dense import DenseCellArray
+from .sparse import SparseCellArray

cellarr_array-0.1.0/src/cellarr_array/cellarray_base.py → cellarr_array-0.2.0/src/cellarr_array/core/base.py RENAMED Viewed

@@ -12,7 +12,7 @@ import numpy as np
 import tiledb
 from scipy import sparse
-from .config import ConsolidationConfig
+from ..utils.config import ConsolidationConfig
 from .helpers import SliceHelper
 __author__ = "Jayaram Kancherla"

cellarr_array-0.1.0/src/cellarr_array/cellarray_dense.py → cellarr_array-0.2.0/src/cellarr_array/core/dense.py RENAMED Viewed

@@ -7,7 +7,7 @@ from typing import List, Tuple, Union
 import numpy as np
-from .cellarray_base import CellArray
+from .base import CellArray
 from .helpers import SliceHelper
 __author__ = "Jayaram Kancherla"
@@ -92,7 +92,6 @@ class DenseCellArray(CellArray):
         if len(data.shape) != self.ndim:
             raise ValueError(f"Data dimensions {data.shape} don't match array dimensions {self.shape}.")
-        # Check bounds
         end_row = start_row + data.shape[0]
         if end_row > self.shape[0]:
             raise ValueError(
@@ -102,7 +101,6 @@ class DenseCellArray(CellArray):
         if self.ndim == 2 and data.shape[1] != self.shape[1]:
             raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
-        # Construct write region
         if self.ndim == 1:
             write_region = slice(start_row, end_row)
         else:  # 2D
@@ -110,4 +108,5 @@ class DenseCellArray(CellArray):
         # write_data = {self._attr: data} if len(self.attr_names) > 1 else data
         with self.open_array(mode="w") as array:
+            print("write_region", write_region)
             array[write_region] = data

{cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.2.0/src/cellarr_array/core}/helpers.py RENAMED Viewed

@@ -8,7 +8,7 @@ from typing import List, Optional, Tuple, Union
 import numpy as np
 import tiledb
-from .config import CellArrConfig
+from ..utils.config import CellArrConfig
 __author__ = "Jayaram Kancherla"
 __copyright__ = "Jayaram Kancherla"
@@ -52,7 +52,7 @@ def create_cellarray(
             Optional list of dimension names.
         dim_dtypes:
-            Optional list of dimension dtypes.
+            Optional list of dimension dtypes. Defaults to numpy's uint32.
         attr_name:
             Name of the data attribute.
@@ -67,29 +67,28 @@ def create_cellarray(
         ValueError: If dimensions are invalid or inputs are inconsistent.
     """
     config = config or CellArrConfig()
+    tiledb_ctx = tiledb.Config(config.ctx_config) if config.ctx_config else None
     if attr_dtype is None:
         attr_dtype = np.float32
     if isinstance(attr_dtype, str):
         attr_dtype = np.dtype(attr_dtype)
-    # Require either shape or dim_dtypes
     if shape is None and dim_dtypes is None:
         raise ValueError("Either 'shape' or 'dim_dtypes' must be provided.")
     if shape is not None:
         if len(shape) not in (1, 2):
-            raise ValueError("Only 1D and 2D arrays are supported.")
+            raise ValueError("Shape must have 1 or 2 dimensions.")
     # Set dimension dtypes, defaults to numpy uint32
     if dim_dtypes is None:
         dim_dtypes = [np.uint32] * len(shape)
     else:
         if len(dim_dtypes) not in (1, 2):
-            raise ValueError("Only 1D and 2D arrays are supported.")
+            raise ValueError("Array must have 1 or 2 dimensions.")
         dim_dtypes = [np.dtype(dt) if isinstance(dt, str) else dt for dt in dim_dtypes]
-    # Calculate shape from dtypes if needed
     if shape is None:
         shape = tuple(np.iinfo(dt).max if np.issubdtype(dt, np.integer) else None for dt in dim_dtypes)
     if None in shape:
@@ -97,7 +96,6 @@ def create_cellarray(
             np.iinfo(dt).max if s is None and np.issubdtype(dt, np.integer) else s for s, dt in zip(shape, dim_dtypes)
         )
-    # Set dimension names
     if dim_names is None:
         dim_names = [f"dim_{i}" for i in range(len(shape))]
@@ -107,40 +105,43 @@ def create_cellarray(
     dom = tiledb.Domain(
         *[
-            tiledb.Dim(name=name, domain=(0, s - 1), tile=min(s, config.tile_capacity), dtype=dt)
+            tiledb.Dim(
+                name=name,
+                # supporting empty dimensions
+                domain=(0, 0 if s == 0 else s - 1),
+                tile=min(1 if s == 0 else s // 2, config.tile_capacity // 2),
+                dtype=dt,
+            )
             for name, s, dt in zip(dim_names, shape, dim_dtypes)
         ],
-        ctx=tiledb.Ctx(config.ctx_config),
+        ctx=tiledb_ctx,
     )
-    attr = tiledb.Attr(
+    attr_obj = tiledb.Attr(
         name=attr_name,
         dtype=attr_dtype,
         filters=config.attrs_filters.get(attr_name, config.attrs_filters.get("", None)),
+        ctx=tiledb_ctx,
     )
     schema = tiledb.ArraySchema(
         domain=dom,
-        attrs=[attr],
+        attrs=[attr_obj],
         cell_order=config.cell_order,
         tile_order=config.tile_order,
         sparse=sparse,
         coords_filters=config.coords_filters,
         offsets_filters=config.offsets_filters,
-        ctx=tiledb.Ctx(config.ctx_config),
+        ctx=tiledb_ctx,
     )
-    tiledb.Array.create(uri, schema)
+    tiledb.Array.create(uri, schema, ctx=tiledb_ctx)
     # Import here to avoid circular imports
-    from .cellarray_dense import DenseCellArray
-    from .cellarray_sparse import SparseCellArray
+    from .dense import DenseCellArray
+    from .sparse import SparseCellArray
-    # Return appropriate array type
     return (
-        SparseCellArray(uri=uri, attr=attr_name, mode=mode)
+        SparseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
         if sparse
-        else DenseCellArray(uri=uri, attr=attr_name, mode=mode)
+        else DenseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
     )
@@ -149,19 +150,27 @@ class SliceHelper:
     @staticmethod
     def is_contiguous_indices(indices: List[int]) -> Optional[slice]:
-        """Check if indices can be represented as a contiguous slice."""
         if not indices:
             return None
-        diffs = np.diff(indices)
+        sorted_indices = sorted(list(set(indices)))
+        if not sorted_indices:
+            return None
+        if len(sorted_indices) == 1:
+            return slice(sorted_indices[0], sorted_indices[0] + 1, None)
+        diffs = np.diff(sorted_indices)
         if np.all(diffs == 1):
-            return slice(indices[0], indices[-1] + 1, None)
+            return slice(sorted_indices[0], sorted_indices[-1] + 1, None)
         return None
     @staticmethod
-    def normalize_index(idx: Union[int, slice, List[int]], dim_size: int) -> Union[slice, List[int], EllipsisType]:
+    def normalize_index(
+        idx: Union[int, range, slice, List[int], EllipsisType], dim_size: int
+    ) -> Union[slice, List[int], EllipsisType]:
         """Normalize index to handle negative indices and ensure consistency."""
         if isinstance(idx, EllipsisType):
             return idx
@@ -170,36 +179,61 @@ class SliceHelper:
             idx = slice(idx.start, idx.stop, idx.step)
         if isinstance(idx, slice):
-            start = idx.start if idx.start is not None else 0
-            stop = idx.stop if idx.stop is not None else dim_size
+            start = idx.start
+            stop = idx.stop
             step = idx.step
+            # Resolve None to full dimension slice parts
+            if start is None:
+                start = 0
+            if stop is None:
+                stop = dim_size
             # Handle negative indices
             if start < 0:
-                start = dim_size + start
+                start += dim_size
             if stop < 0:
-                stop = dim_size + stop
+                stop += dim_size
-            if start < 0 or start > dim_size:
-                raise IndexError(f"Start index {start} out of bounds for dimension size {dim_size}")
-            if stop < 0 or stop > dim_size:
-                raise IndexError(f"Stop index {stop} out of bounds for dimension size {dim_size}")
+            # slice allows start > dim_size or stop < 0 to result in empty slices.
+            # Note: start == dim_size is OK for empty slice like arr[dim_size:]
+            if start < 0 or (start >= dim_size and dim_size > 0):
+                if not (start == dim_size and (step is None or step > 0)):
+                    if start >= dim_size:
+                        raise IndexError(
+                            f"Start index {idx.start if idx.start is not None else 'None'} results in {start}, which is out of bounds for dimension size {dim_size}."
+                        )
-            return slice(start, stop, step)
+            # Clamping slice arguments to dimensions
+            stop = min(stop, dim_size)
+            start = max(0, start)
+            return slice(start, stop, step)
         elif isinstance(idx, list):
+            if not idx:
+                return []
             norm_idx = [i if i >= 0 else dim_size + i for i in idx]
             if any(i < 0 or i >= dim_size for i in norm_idx):
-                raise IndexError(f"List indices {idx} out of bounds for dimension size {dim_size}")
-            return norm_idx
-        else:  # Single integer index
-            norm_idx = idx if idx >= 0 else dim_size + idx
-            if norm_idx < 0 or norm_idx >= dim_size:
+                oob_indices = [orig_i for orig_i, norm_i in zip(idx, norm_idx) if not (0 <= norm_i < dim_size)]
+                raise IndexError(
+                    f"List indices {oob_indices} (original values) are out of bounds for dimension size {dim_size}."
+                )
+            # TileDB multi_index usually returns data sorted by coordinates
+            return sorted(list(set(norm_idx)))
+        elif isinstance(idx, (int, np.integer)):
+            norm_idx = int(idx)
+            if norm_idx < 0:
+                norm_idx += dim_size
+            if not (0 <= norm_idx < dim_size):
                 raise IndexError(f"Index {idx} out of bounds for dimension size {dim_size}")
             return slice(norm_idx, norm_idx + 1, None)
+        else:
+            raise TypeError(f"Index type {type(idx)} not supported for normalization.")
 def create_group(output_path, group_name):

cellarr_array-0.1.0/src/cellarr_array/cellarray_sparse.py → cellarr_array-0.2.0/src/cellarr_array/core/sparse.py RENAMED Viewed

@@ -9,8 +9,8 @@ import numpy as np
 import tiledb
 from scipy import sparse
-from .cellarray_base import CellArray
 from .helpers import SliceHelper
+from .base import CellArray
 __author__ = "Jayaram Kancherla"
 __copyright__ = "Jayaram Kancherla"
@@ -28,7 +28,7 @@ class SparseCellArray(CellArray):
         mode: Optional[Literal["r", "w", "d", "m"]] = None,
         config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
         return_sparse: bool = True,
-        sparse_coerce: Union[sparse.csr_matrix, sparse.csc_matrix] = sparse.csr_matrix,
+        sparse_format: Union[sparse.csr_matrix, sparse.csc_matrix] = sparse.csr_matrix,
         validate: bool = True,
         **kwargs,
     ):
@@ -66,7 +66,7 @@ class SparseCellArray(CellArray):
                 Whether to return a sparse representation of the data when object is sliced.
                 Default is to return a dictionary that contains coordinates and values.
-            sparse_coerce:
+            sparse_format:
                 Format to return, defaults to csr_matrix.
             validate:
@@ -86,7 +86,7 @@ class SparseCellArray(CellArray):
         )
         self.return_sparse = return_sparse
-        self.sparse_coerce = sparse.csr_matrix if sparse_coerce is None else sparse_coerce
+        self.sparse_format = sparse.csr_matrix if sparse_format is None else sparse_format
     def _validate_matrix_dims(self, data: sparse.spmatrix) -> Tuple[sparse.coo_matrix, bool]:
         """Validate and adjust matrix dimensions if needed.
@@ -126,7 +126,7 @@ class SparseCellArray(CellArray):
                 shape.append(idx.stop - (idx.start or 0))
             elif isinstance(idx, list):
                 shape.append(len(set(idx)))
-            else:  # single integer
+            else:
                 shape.append(1)
         # Always return (n,1) shape for CSR matrix
@@ -140,20 +140,17 @@ class SparseCellArray(CellArray):
         """Convert TileDB result to CSR format or dense array."""
         data = result[self._attr]
-        # empty result
         if len(data) == 0:
             print("is emoty")
             if not self.return_sparse:
                 return result
             else:
-                # For COO output, return empty sparse matrix
                 if self.ndim == 1:
-                    matrix = self.sparse_coerce((1, shape[0]))
+                    matrix = self.sparse_format((1, shape[0]))
                     return matrix[:, key[0]]
-                return self.sparse_coerce(shape)[key]
+                return self.sparse_format(shape)[key]
-        # Get coordinates
         coords = []
         for dim_name in self.dim_names:
             dim_coords = result[dim_name]
@@ -164,11 +161,12 @@ class SparseCellArray(CellArray):
             coords = [np.zeros_like(coords[0]), coords[0]]
             shape = (1, shape[0])
-        # Create sparse matrix
         matrix = sparse.coo_matrix((data, tuple(coords)), shape=shape)
-        if self.sparse_coerce in (sparse.csr_matrix, sparse.csr_array):
+        sliced = matrix
+        if self.sparse_format in (sparse.csr_matrix, sparse.csr_array):
             sliced = matrix.tocsr()
-        elif self.sparse_coerce in (sparse.csc_matrix, sparse.csc_array):
+        elif self.sparse_format in (sparse.csc_matrix, sparse.csc_array):
             sliced = matrix.tocsc()
         if self.ndim == 1:
@@ -200,7 +198,6 @@ class SparseCellArray(CellArray):
         if all(isinstance(idx, slice) for idx in optimized_key):
             return self._direct_slice(tuple(optimized_key))
-        # For mixed slice-list queries, adjust slice bounds
         tiledb_key = []
         for idx in key:
             if isinstance(idx, slice):
@@ -239,10 +236,8 @@ class SparseCellArray(CellArray):
         if not sparse.issparse(data):
             raise TypeError("Input must be a scipy sparse matrix.")
-        # Validate and adjust dimensions
         coo_data, is_1d = self._validate_matrix_dims(data)
-        # Check bounds
         end_row = start_row + coo_data.shape[0]
         if end_row > self.shape[0]:
             raise ValueError(

cellarr_array-0.2.0/src/cellarr_array/dataloaders/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .denseloader import DenseArrayDataset, construct_dense_array_dataloader
+from .iterabledataloader import CellArrayIterableDataset, construct_iterable_dataloader
+from .sparseloader import SparseArrayDataset, construct_sparse_array_dataloader

cellarr_array-0.2.0/src/cellarr_array/dataloaders/denseloader.py ADDED Viewed

@@ -0,0 +1,198 @@
+from typing import Optional
+from warnings import warn
+import numpy as np
+import tiledb
+import torch
+from torch.utils.data import DataLoader, Dataset
+from ..core.dense import DenseCellArray
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+class DenseArrayDataset(Dataset):
+    def __init__(
+        self,
+        array_uri: str,
+        attribute_name: str = "data",
+        num_rows: Optional[int] = None,
+        num_columns: Optional[int] = None,
+        cellarr_ctx_config: Optional[dict] = None,
+        transform=None,
+    ):
+        """PyTorch Dataset for dense TileDB arrays accessed via DenseCellArray.
+        Args:
+            array_uri:
+                URI of the TileDB dense array.
+            attribute_name:
+                Name of the attribute to read from.
+            num_rows:
+                Total number of rows in the dataset.
+                If None, will infer from `array.shape[0]`.
+            num_columns:
+                The number of columns in the dataset.
+                If None, will attempt to infer `from array.shape[1]`.
+            cellarr_ctx_config:
+                Optional TileDB context configuration dict for CellArray.
+            transform:
+                Optional transform to be applied on a sample.
+        """
+        self.array_uri = array_uri
+        self.attribute_name = attribute_name
+        self.cellarr_ctx_config = cellarr_ctx_config
+        self.transform = transform
+        self.cell_array_instance = None
+        if num_rows is not None and num_columns is not None:
+            self._len = num_rows
+            self.num_columns = num_columns
+        else:
+            # Infer the array shape
+            print(f"Dataset '{array_uri}': num_rows or num_columns not provided. Probing array...")
+            init_ctx_config = tiledb.Config(self.cellarr_ctx_config) if self.cellarr_ctx_config else None
+            try:
+                temp_arr = DenseCellArray(
+                    uri=self.array_uri, attr=self.attribute_name, config_or_context=init_ctx_config
+                )
+                if temp_arr.ndim == 1:
+                    self._len = num_rows if num_rows is not None else temp_arr.shape[0]
+                    self.num_columns = 1
+                elif temp_arr.ndim == 2:
+                    self._len = num_rows if num_rows is not None else temp_arr.shape[0]
+                    self.num_columns = num_columns if num_columns is not None else temp_arr.shape[1]
+                else:
+                    raise ValueError(f"Array ndim {temp_arr.ndim} not supported.")
+                print(f"Dataset '{array_uri}': Inferred shape. Rows: {self._len}, Columns: {self.num_columns}")
+            except Exception as e:
+                if num_rows is None or num_columns is None:
+                    raise ValueError(
+                        f"num_rows and num_columns must be provided if inferring array shape fails for '{array_uri}'."
+                    ) from e
+                self._len = num_rows
+                self.feature_dim = num_columns
+                warn(
+                    f"Falling back to provided or zero dimensions for '{array_uri}' due to inference error: {e}",
+                    RuntimeWarning,
+                )
+        if self.num_columns is None or self.num_columns <= 0 and self._len > 0:  # Check if num_columns is valid
+            raise ValueError(
+                f"num_columns ({self.num_columns}) is invalid or could not be determined for array '{array_uri}'."
+            )
+        if self._len == 0:
+            warn(f"Dataset for '{array_uri}' has length 0.", RuntimeWarning)
+    def _init_worker_state(self):
+        """Initializes the DenseCellArray instance for the current worker."""
+        if self.cell_array_instance is None:
+            ctx = tiledb.Ctx(self.cellarr_ctx_config) if self.cellarr_ctx_config else None
+            self.cell_array_instance = DenseCellArray(
+                uri=self.array_uri, attr=self.attribute_name, mode="r", config_or_context=ctx
+            )
+            # Sanity check: worker's shape against dataset's established dims
+            # if self.cell_array_instance.shape[0] != self._len or \
+            #    (self.cell_array_instance.ndim > 1 and self.cell_array_instance.shape[1] != self.feature_dim) or \
+            #    (self.cell_array_instance.ndim == 1 and self.feature_dim != 1) :
+            #     print(f"Warning: Worker for {self.array_uri} sees shape {self.cell_array_instance.shape} "
+            #           f"but dataset initialized with len={self._len}, feat={self.feature_dim}")
+    def __len__(self):
+        return self._len
+    def __getitem__(self, idx):
+        if not 0 <= idx < self._len:
+            raise IndexError(f"Index {idx} out of bounds for dataset of length {self._len}.")
+        self._init_worker_state()
+        if self.cell_array_instance.ndim == 2:
+            item_slice = (slice(idx, idx + 1), slice(None))
+        elif self.cell_array_instance.ndim == 1:
+            item_slice = slice(idx, idx + 1)
+        else:
+            raise ValueError(f"Array ndim {self.cell_array_instance.ndim} not supported in __getitem__.")
+        sample_data_np = self.cell_array_instance[item_slice]
+        if sample_data_np.ndim == 2 and sample_data_np.shape[0] == 1:
+            sample_data_np = sample_data_np.squeeze(0)
+        elif sample_data_np.ndim == 1 and sample_data_np.shape[0] == 1 and self.feature_dim == 1:
+            pass
+        elif sample_data_np.ndim == 0 and self.feature_dim == 1:
+            sample_data_np = np.array([sample_data_np])
+        if self.transform:
+            sample_data_np = self.transform(sample_data_np)
+        return torch.from_numpy(sample_data_np)
+def construct_dense_array_dataloader(
+    array_uri: str,
+    attribute_name: str = "data",
+    num_rows: Optional[int] = None,
+    num_columns: Optional[int] = None,
+    batch_size: int = 1000,
+    num_workers_dl: int = 2,
+) -> DataLoader:
+    """Construct an instance of `DenseArrayDataset` with PyTorch DataLoader.
+    Args:
+        array_uri:
+            URI of the TileDB array.
+        attribute_name:
+            Name of the attribute to read from.
+        num_rows:
+            The total number of rows in the TileDB array.
+        num_columns:
+            The total number of columns in the TileDB array.
+        batch_size:
+            Number of random samples per batch generated by the dataset.
+        num_workers_dl:
+            Number of worker processes for the DataLoader.
+    """
+    tiledb_ctx_config = {
+        "sm.tile_cache_size": 1000 * 1024**2,  # 1000 MB tile cache per worker
+        "sm.num_reader_threads": 4,
+    }
+    dataset = DenseArrayDataset(
+        array_uri=array_uri,
+        attribute_name=attribute_name,
+        num_rows=num_rows,
+        num_columns=num_columns,
+        cellarr_ctx_config=tiledb_ctx_config,
+    )
+    if len(dataset) == 0:
+        print("Dataset is empty, cannot create DataLoader.")
+        return
+    dataloader = DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=num_workers_dl,
+        pin_memory=True,
+        prefetch_factor=2,
+        persistent_workers=True if num_workers_dl > 0 else False,
+    )
+    return dataloader

cellarr-array 0.1.0__tar.gz → 0.2.0__tar.gz

Potentially problematic release.

cellarr-array 0.1.0tar.gz → 0.2.0tar.gz