PyPI - cellarr-array - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

cellarr-array 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cellarr-array might be problematic. Click here for more details.

Files changed (56) hide show

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/.github/workflows/publish-pypi.yml RENAMED Viewed

@@ -19,10 +19,10 @@ jobs:
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python 3.11
+      - name: Set up Python 3.12
         uses: actions/setup-python@v5
         with:
-          python-version: 3.11
+          python-version: 3.12
       - name: Install dependencies
         run: |

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/.github/workflows/run-tests.yml RENAMED Viewed

@@ -28,7 +28,7 @@ jobs:
   test:
     strategy:
       matrix:
-        python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
         platform:
           - ubuntu-latest
           # - macos-latest

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/.pre-commit-config.yaml RENAMED Viewed

@@ -19,7 +19,7 @@ repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: v0.11.5
+  rev: v0.12.1
   hooks:
     - id: ruff
       args: [--fix, --exit-non-zero-on-fix]

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/CHANGELOG.md RENAMED Viewed

@@ -1,8 +1,15 @@
 # Changelog
+## Version 0.3.0
+- Support for string dimensions when creating cellarr arrays.
+- Support query conditions for slice operations.
+- Added unique dim values. Only supported for sparse arrays.
+- EOL for Python 3.9
 ## Version 0.2.0
-- Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
+- Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
 - Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
 - Update documentation and tests.

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,13 @@
 Metadata-Version: 2.4
 Name: cellarr-array
-Version: 0.2.0
+Version: 0.3.0
 Summary: Base class for handling TileDB backed arrays.
 Home-page: https://github.com/cellarr/cellarr-array
 Author: Jayaram Kancherla
 Author-email: jayaram.kancherla@gmail.com
 License: MIT
 Project-URL: Documentation, https://github.com/cellarr/cellarr-array
+Project-URL: Source, https://github.com/cellarr/cellarr-array
 Platform: any
 Classifier: Development Status :: 4 - Beta
 Classifier: Programming Language :: Python
@@ -22,6 +23,7 @@ Provides-Extra: testing
 Requires-Dist: setuptools; extra == "testing"
 Requires-Dist: pytest; extra == "testing"
 Requires-Dist: pytest-cov; extra == "testing"
+Requires-Dist: pandas; extra == "testing"
 Requires-Dist: torch; extra == "testing"
 Dynamic: license-file

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/setup.cfg RENAMED Viewed

@@ -10,6 +10,7 @@ long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
 url = https://github.com/cellarr/cellarr-array
 project_urls =
 	Documentation = https://github.com/cellarr/cellarr-array
+	Source = https://github.com/cellarr/cellarr-array
 platforms = any
 classifiers =
 	Development Status :: 4 - Beta
@@ -39,6 +40,7 @@ testing =
 	setuptools
 	pytest
 	pytest-cov
+	pandas
 	%(optional)s
 [options.entry_points]

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/__init__.py RENAMED Viewed

@@ -16,4 +16,5 @@ finally:
     del version, PackageNotFoundError
 from .core import DenseCellArray, SparseCellArray
-from .utils import CellArrConfig, ConsolidationConfig, create_cellarray
+from .core.helpers import create_cellarray
+from .utils import CellArrConfig, ConsolidationConfig

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/base.py RENAMED Viewed

@@ -69,6 +69,7 @@ class CellArray(ABC):
         self._array_passed_in = False
         self._opened_array_external = None
         self._ctx = None
+        self._dim_dtypes = None
         if tiledb_array_obj is not None:
             if not isinstance(tiledb_array_obj, tiledb.Array):
@@ -185,7 +186,16 @@ class CellArray(ABC):
     def shape(self) -> Tuple[int, ...]:
         if self._shape is None:
             with self.open_array(mode="r") as A:
-                self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
+                shape_list = []
+                for dim in A.schema.domain:
+                    try:
+                        # This will fail for string dimensions
+                        shape_list.append(dim.shape[0])
+                    except TypeError:
+                        # For string dimensions, the shape is not well-defined.
+                        # We use a large number as a placeholder for slicing purposes.
+                        shape_list.append(2**63 - 1)
+                self._shape = tuple(shape_list)
         return self._shape
     @property
@@ -209,6 +219,14 @@ class CellArray(ABC):
                 # self._ndim = len(self.shape)
         return self._ndim
+    @property
+    def dim_dtypes(self) -> List[np.dtype]:
+        """Get dimension dtypes of the array."""
+        if self._dim_dtypes is None:
+            with self.open_array(mode="r") as A:
+                self._dim_dtypes = [dim.dtype for dim in A.schema.domain]
+        return self._dim_dtypes
     @contextmanager
     def open_array(self, mode: Optional[str] = None):
         """Context manager for array operations.
@@ -266,15 +284,30 @@ class CellArray(ABC):
         Args:
             key:
                 Slice or list of indices for each dimension in the array.
+                Alternatively, may be string to specify query conditions.
         """
+        # This is a query condition
+        if isinstance(key, str):
+            with self.open_array(mode="r") as array:
+                if self._attr is not None:
+                    return array.query(cond=key, attrs=[self._attr])[:]
+                else:
+                    array.query(cond=key)[:]
         if not isinstance(key, tuple):
             key = (key,)
         if len(key) > self.ndim:
             raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
+        if len(key) < self.ndim:
+            key = key + (slice(None),) * (self.ndim - len(key))
         # Normalize all indices
-        normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
+        normalized_key = tuple(
+            SliceHelper.normalize_index(idx, self.shape[i], self.dim_dtypes[i]) for i, idx in enumerate(key)
+        )
         num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
         if num_ellipsis > 1:
@@ -342,3 +375,17 @@ class CellArray(ABC):
                 Additional arguments for write operation.
         """
         pass
+    def get_unique_dim_values(self, dim_name: Optional[str] = None) -> np.ndarray:
+        """Get unique values for a dimension.
+        Args:
+            dim_name:
+                The name of the dimension. If None, unique values for all
+                dimensions are returned.
+        Returns:
+            An array of unique dimension values.
+        """
+        with self.open_array(mode="r") as A:
+            return A.unique_dim_values(dim_name)

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/helpers.py RENAMED Viewed

@@ -103,19 +103,27 @@ def create_cellarray(
     if not (len(shape) == len(dim_dtypes) == len(dim_names)):
         raise ValueError("Lengths of 'shape', 'dim_dtypes', and 'dim_names' must match.")
-    dom = tiledb.Domain(
-        *[
+    dims = []
+    for name, s, dt in zip(dim_names, shape, dim_dtypes):
+        if np.issubdtype(dt, np.integer):
+            domain = (0, 0 if s == 0 else s - 1)
+            tile = min(1 if s == 0 else s // 2, config.tile_capacity // 2)
+            dim_dtype = dt
+        else:  # Assumes string or object dtype
+            domain = (None, None)
+            tile = None
+            dim_dtype = "ascii"
+        dims.append(
             tiledb.Dim(
                 name=name,
-                # supporting empty dimensions
-                domain=(0, 0 if s == 0 else s - 1),
-                tile=min(1 if s == 0 else s // 2, config.tile_capacity // 2),
-                dtype=dt,
+                domain=domain,
+                tile=tile,
+                dtype=dim_dtype,
             )
-            for name, s, dt in zip(dim_names, shape, dim_dtypes)
-        ],
-        ctx=tiledb_ctx,
-    )
+        )
+    dom = tiledb.Domain(*dims, ctx=tiledb_ctx)
     attr_obj = tiledb.Attr(
         name=attr_name,
         dtype=attr_dtype,
@@ -149,10 +157,17 @@ class SliceHelper:
     """Helper class for handling array slicing operations."""
     @staticmethod
-    def is_contiguous_indices(indices: List[int]) -> Optional[slice]:
+    def is_contiguous_indices(indices: List) -> Optional[slice]:
+        """Checks if a list of indices is contiguous and can be converted to a slice.
+        Returns None if the list is not contiguous or contains non-integers.
+        """
         if not indices:
             return None
+        if not all(isinstance(i, (int, np.integer)) for i in indices):
+            return None
         sorted_indices = sorted(list(set(indices)))
         if not sorted_indices:
             return None
@@ -168,20 +183,33 @@ class SliceHelper:
     @staticmethod
     def normalize_index(
-        idx: Union[int, range, slice, List[int], EllipsisType], dim_size: int
-    ) -> Union[slice, List[int], EllipsisType]:
+        idx: Union[int, range, slice, List, str, EllipsisType],
+        dim_size: int,
+        dim_dtype: np.dtype,
+    ):
         """Normalize index to handle negative indices and ensure consistency."""
+        is_string_dim = np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)
+        if is_string_dim:
+            if isinstance(idx, (str, bytes)):
+                return [idx]
+            if isinstance(idx, list) and all(isinstance(i, (str, bytes)) for i in idx):
+                return idx
+            if isinstance(idx, slice):
+                # For string dimensions, we do not normalize the slice with integer sizes
+                return idx
+            if isinstance(idx, EllipsisType):
+                return idx
+            raise TypeError(f"Unsupported index type '{type(idx).__name__}' for string dimension.")
         if isinstance(idx, EllipsisType):
             return idx
-        # Convert ranges to slices
         if isinstance(idx, range):
             idx = slice(idx.start, idx.stop, idx.step)
         if isinstance(idx, slice):
-            start = idx.start
-            stop = idx.stop
-            step = idx.step
+            start, stop, step = idx.start, idx.stop, idx.step
             # Resolve None to full dimension slice parts
             if start is None:
@@ -196,44 +224,32 @@ class SliceHelper:
             if stop < 0:
                 stop += dim_size
-            # slice allows start > dim_size or stop < 0 to result in empty slices.
-            # Note: start == dim_size is OK for empty slice like arr[dim_size:]
-            if start < 0 or (start >= dim_size and dim_size > 0):
-                if not (start == dim_size and (step is None or step > 0)):
-                    if start >= dim_size:
-                        raise IndexError(
-                            f"Start index {idx.start if idx.start is not None else 'None'} results in {start}, which is out of bounds for dimension size {dim_size}."
-                        )
             # Clamping slice arguments to dimensions
             stop = min(stop, dim_size)
             start = max(0, start)
             return slice(start, stop, step)
-        elif isinstance(idx, list):
+        if isinstance(idx, list):
             if not idx:
                 return []
+            # This check only applies to integer lists
+            if not all(isinstance(i, (int, np.integer)) for i in idx):
+                raise TypeError("List indices must be integers for numeric dimensions.")
             norm_idx = [i if i >= 0 else dim_size + i for i in idx]
             if any(i < 0 or i >= dim_size for i in norm_idx):
-                oob_indices = [orig_i for orig_i, norm_i in zip(idx, norm_idx) if not (0 <= norm_i < dim_size)]
-                raise IndexError(
-                    f"List indices {oob_indices} (original values) are out of bounds for dimension size {dim_size}."
-                )
-            # TileDB multi_index usually returns data sorted by coordinates
+                raise IndexError("List indices out of bounds for dimension size.")
             return sorted(list(set(norm_idx)))
-        elif isinstance(idx, (int, np.integer)):
+        if isinstance(idx, (int, np.integer)):
             norm_idx = int(idx)
             if norm_idx < 0:
                 norm_idx += dim_size
             if not (0 <= norm_idx < dim_size):
-                raise IndexError(f"Index {idx} out of bounds for dimension size {dim_size}")
+                raise IndexError(f"Index {idx} out of bounds for dimension size.")
             return slice(norm_idx, norm_idx + 1, None)
-        else:
-            raise TypeError(f"Index type {type(idx)} not supported for normalization.")
+        raise TypeError(f"Index type {type(idx)} not supported for normalization.")
 def create_group(output_path, group_name):

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/sparse.py RENAMED Viewed

@@ -141,7 +141,6 @@ class SparseCellArray(CellArray):
         data = result[self._attr]
         if len(data) == 0:
-            print("is emoty")
             if not self.return_sparse:
                 return result
             else:

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/PKG-INFO RENAMED Viewed

@@ -1,12 +1,13 @@
 Metadata-Version: 2.4
 Name: cellarr-array
-Version: 0.2.0
+Version: 0.3.0
 Summary: Base class for handling TileDB backed arrays.
 Home-page: https://github.com/cellarr/cellarr-array
 Author: Jayaram Kancherla
 Author-email: jayaram.kancherla@gmail.com
 License: MIT
 Project-URL: Documentation, https://github.com/cellarr/cellarr-array
+Project-URL: Source, https://github.com/cellarr/cellarr-array
 Platform: any
 Classifier: Development Status :: 4 - Beta
 Classifier: Programming Language :: Python
@@ -22,6 +23,7 @@ Provides-Extra: testing
 Requires-Dist: setuptools; extra == "testing"
 Requires-Dist: pytest; extra == "testing"
 Requires-Dist: pytest-cov; extra == "testing"
+Requires-Dist: pandas; extra == "testing"
 Requires-Dist: torch; extra == "testing"
 Dynamic: license-file

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/SOURCES.txt RENAMED Viewed

@@ -50,4 +50,6 @@ tests/test_helpers.py
 tests/test_inmemory.py
 tests/test_iterable_loader.py
 tests/test_map_loader.py
-tests/test_sparse.py
+tests/test_query.py
+tests/test_sparse.py
+tests/test_string_dims.py

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/requires.txt RENAMED Viewed

@@ -12,4 +12,5 @@ torch
 setuptools
 pytest
 pytest-cov
+pandas
 torch

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_dense.py RENAMED Viewed

@@ -178,7 +178,7 @@ def test_invalid_operations(sample_dense_array_2d):
     with pytest.raises(IndexError, match="Invalid number of dimensions"):
         _ = sample_dense_array_2d[0:10, 0:10, 0:10]
-    with pytest.raises(IndexError, match="out of bounds"):
+    with pytest.raises(Exception):
         _ = sample_dense_array_2d[200:300]

{cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_helpers.py RENAMED Viewed

@@ -30,20 +30,20 @@ def test_slice_normalize_index():
     dim_size = 10
     # Test positive slice
-    assert SliceHelper.normalize_index(slice(1, 5), dim_size) == slice(1, 5, None)
+    assert SliceHelper.normalize_index(slice(1, 5), dim_size, dim_dtype=np.int32) == slice(1, 5, None)
     # Test negative slice
-    assert SliceHelper.normalize_index(slice(-3, -1), dim_size) == slice(7, 9, None)
+    assert SliceHelper.normalize_index(slice(-3, -1), dim_size, dim_dtype=np.int32) == slice(7, 9, None)
     # Test None values in slice
-    assert SliceHelper.normalize_index(slice(None, None), dim_size) == slice(0, 10, None)
+    assert SliceHelper.normalize_index(slice(None, None), dim_size, dim_dtype=np.int32) == slice(0, 10, None)
     # Test list of indices
-    assert SliceHelper.normalize_index([1, -1], dim_size) == [1, 9]
+    assert SliceHelper.normalize_index([1, -1], dim_size, dim_dtype=np.int32) == [1, 9]
     # Test single integer
-    assert SliceHelper.normalize_index(5, dim_size) == slice(5, 6, None)
-    assert SliceHelper.normalize_index(-1, dim_size) == slice(9, 10, None)
+    assert SliceHelper.normalize_index(5, dim_size, dim_dtype=np.int32) == slice(5, 6, None)
+    assert SliceHelper.normalize_index(-1, dim_size, dim_dtype=np.int32) == slice(9, 10, None)
 def test_slice_bounds_validation():
@@ -51,29 +51,29 @@ def test_slice_bounds_validation():
     # Test out of bounds positive indices
     with pytest.raises(IndexError, match="out of bounds"):
-        SliceHelper.normalize_index(10, dim_size)
+        SliceHelper.normalize_index(10, dim_size, dim_dtype=np.int32)
     with pytest.raises(IndexError, match="out of bounds"):
-        SliceHelper.normalize_index(15, dim_size)
+        SliceHelper.normalize_index(15, dim_size, dim_dtype=np.int32)
     # Test out of bounds negative indices
     with pytest.raises(IndexError, match="out of bounds"):
-        SliceHelper.normalize_index(-11, dim_size)
+        SliceHelper.normalize_index(-11, dim_size, dim_dtype=np.int32)
     with pytest.raises(IndexError, match="out of bounds"):
-        SliceHelper.normalize_index(-15, dim_size)
+        SliceHelper.normalize_index(-15, dim_size, dim_dtype=np.int32)
     # Test out of bounds list indices
     with pytest.raises(IndexError, match="out of bounds"):
-        SliceHelper.normalize_index([5, 12], dim_size)
+        SliceHelper.normalize_index([5, 12], dim_size, dim_dtype=np.int32)
-    norm_slice = SliceHelper.normalize_index(slice(5, 15), dim_size)
+    norm_slice = SliceHelper.normalize_index(slice(5, 15), dim_size, dim_dtype=np.int32)
     assert norm_slice == slice(5, 10)
-    norm_slice_neg_stop = SliceHelper.normalize_index(slice(1, -12), dim_size)
+    norm_slice_neg_stop = SliceHelper.normalize_index(slice(1, -12), dim_size, dim_dtype=np.int32)
     assert norm_slice_neg_stop == slice(1, -2)
     # Test list with out of bounds
-    with pytest.raises(IndexError, match="List indices .* are out of bounds"):
-        SliceHelper.normalize_index([1, 10, 2], dim_size)
+    with pytest.raises(IndexError, match="List indices .*"):
+        SliceHelper.normalize_index([1, 10, 2], dim_size, dim_dtype=np.int32)
 def test_cellarr_config():

cellarr_array-0.3.0/tests/test_query.py ADDED Viewed

@@ -0,0 +1,63 @@
+import os
+import shutil
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from cellarr_array import DenseCellArray, SparseCellArray, create_cellarray
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+@pytest.fixture
+def dense_array_uri():
+    uri = "test_dense_array_query"
+    if os.path.exists(uri):
+        shutil.rmtree(uri)
+    create_cellarray(uri, shape=(10, 5), sparse=False)
+    arr = DenseCellArray(uri, mode="w")
+    data = np.arange(50).reshape(10, 5)
+    arr.write_batch(data, start_row=0)
+    return uri
+@pytest.fixture
+def sparse_array_uri():
+    uri = "test_sparse_array_query"
+    if os.path.exists(uri):
+        shutil.rmtree(uri)
+    arr = create_cellarray(uri, shape=(10, 5), sparse=True)
+    data = sp.csr_matrix(np.arange(50).reshape(10, 5))
+    arr.write_batch(data, start_row=0)
+    return uri
+def test_dense_array_query(dense_array_uri):
+    arr = DenseCellArray(dense_array_uri)
+    with pytest.raises(Exception):
+        result = arr["dim_0 > 5"]
+    result = arr["data > 5"]
+    assert isinstance(result["data"], np.ndarray)
+def test_sparse_array_query(sparse_array_uri):
+    arr = SparseCellArray(sparse_array_uri, return_sparse=False)
+    result = arr["dim_0 > 5"]
+    # Even if empty, it should return a dictionary with the correct keys
+    assert "data" in result
+    assert "dim_0" in result
+    assert "dim_1" in result
+def test_get_unique_dim_values(sparse_array_uri):
+    arr = SparseCellArray(sparse_array_uri)
+    unique_rows = arr.get_unique_dim_values("dim_0")
+    assert np.array_equal(unique_rows, np.arange(10))

cellarr_array-0.3.0/tests/test_string_dims.py ADDED Viewed

@@ -0,0 +1,73 @@
+import os
+import shutil
+import numpy as np
+import pandas as pd
+import pytest
+import tiledb
+from cellarr_array import SparseCellArray, create_cellarray
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+@pytest.fixture
+def string_dim_array_uri():
+    uri = "test_string_dim_array"
+    if os.path.exists(uri):
+        shutil.rmtree(uri)
+    create_cellarray(uri, sparse=True, dim_dtypes=[str, str], attr_dtype=np.float64, attr_name="value")
+    yield uri
+    shutil.rmtree(uri)
+def test_create_string_dim_schema(string_dim_array_uri):
+    with tiledb.open(string_dim_array_uri, "r") as A:
+        schema = A.schema
+        assert schema.domain.dim(0).dtype == np.dtype("S")
+        assert schema.domain.dim(1).dtype == np.dtype("S")
+        assert schema.attr("value").dtype == np.float64
+def test_string_dim_write_read(string_dim_array_uri):
+    sca = SparseCellArray(string_dim_array_uri, attr="value", mode="w", return_sparse=False)
+    rows = np.array(["cell_A", "cell_B", "cell_C"])
+    cols = np.array(["gene_X", "gene_Y", "gene_Z"])
+    values = np.array([1.1, 2.2, 3.3])
+    with sca.open_array() as A:
+        A[rows, cols] = values
+    sca_read = SparseCellArray(string_dim_array_uri, attr="value", return_sparse=False)
+    data = sca_read[:]
+    data["dim_0"] = [x.decode("ascii") for x in data["dim_0"]]
+    data["dim_1"] = [x.decode("ascii") for x in data["dim_1"]]
+    assert len(data["value"]) == 3
+    pd.testing.assert_frame_equal(
+        pd.DataFrame({"value": values, "dim_0": rows, "dim_1": cols})
+        .sort_values(by=["dim_0", "dim_1"])
+        .reset_index(drop=True),
+        pd.DataFrame(data).sort_values(by=["dim_0", "dim_1"]).reset_index(drop=True),
+    )
+def test_string_dim_slicing(string_dim_array_uri):
+    sca = SparseCellArray(string_dim_array_uri, attr="value", mode="w", return_sparse=False)
+    with sca.open_array() as A:
+        A[["cell_A", "cell_A", "cell_B"], ["gene_X", "gene_Y", "gene_Y"]] = np.array([1.0, 2.0, 3.0])
+    sca_read = SparseCellArray(string_dim_array_uri, attr="value", return_sparse=False)
+    subset = sca_read[["cell_A"], :]
+    assert len(subset["value"]) == 2
+    assert all(r.decode("ascii") == "cell_A" for r in subset["dim_0"])
+    assert set([x.decode("ascii") for x in subset["dim_1"]]) == {"gene_X", "gene_Y"}