cellarr-array 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cellarr-array might be problematic. Click here for more details.

Files changed (56) hide show
  1. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.github/workflows/publish-pypi.yml +2 -2
  2. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.github/workflows/run-tests.yml +1 -1
  3. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.pre-commit-config.yaml +1 -1
  4. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/CHANGELOG.md +8 -1
  5. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/PKG-INFO +3 -1
  6. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/setup.cfg +2 -0
  7. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/__init__.py +2 -1
  8. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/base.py +49 -2
  9. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/helpers.py +56 -40
  10. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/sparse.py +0 -1
  11. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/PKG-INFO +3 -1
  12. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/SOURCES.txt +3 -1
  13. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/requires.txt +1 -0
  14. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_dense.py +1 -1
  15. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_helpers.py +15 -15
  16. cellarr_array-0.3.0/tests/test_query.py +63 -0
  17. cellarr_array-0.3.0/tests/test_string_dims.py +73 -0
  18. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.coveragerc +0 -0
  19. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.gitignore +0 -0
  20. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.readthedocs.yml +0 -0
  21. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/AUTHORS.md +0 -0
  22. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/CONTRIBUTING.md +0 -0
  23. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/LICENSE.txt +0 -0
  24. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/README.md +0 -0
  25. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/Makefile +0 -0
  26. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/_static/.gitignore +0 -0
  27. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/authors.md +0 -0
  28. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/changelog.md +0 -0
  29. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/conf.py +0 -0
  30. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/contributing.md +0 -0
  31. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/index.md +0 -0
  32. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/license.md +0 -0
  33. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/readme.md +0 -0
  34. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/requirements.txt +0 -0
  35. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/pyproject.toml +0 -0
  36. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/setup.py +0 -0
  37. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/__init__.py +0 -0
  38. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/dense.py +0 -0
  39. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/__init__.py +0 -0
  40. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/denseloader.py +0 -0
  41. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/iterabledataloader.py +0 -0
  42. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/sparseloader.py +0 -0
  43. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/utils.py +0 -0
  44. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/utils/__init__.py +0 -0
  45. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/utils/config.py +0 -0
  46. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/utils/mock.py +0 -0
  47. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
  48. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/not-zip-safe +0 -0
  49. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/top_level.txt +0 -0
  50. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/conftest.py +0 -0
  51. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_all.py +0 -0
  52. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_inmemory.py +0 -0
  53. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_iterable_loader.py +0 -0
  54. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_map_loader.py +0 -0
  55. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_sparse.py +0 -0
  56. {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tox.ini +0 -0
@@ -19,10 +19,10 @@ jobs:
19
19
  steps:
20
20
  - uses: actions/checkout@v4
21
21
 
22
- - name: Set up Python 3.11
22
+ - name: Set up Python 3.12
23
23
  uses: actions/setup-python@v5
24
24
  with:
25
- python-version: 3.11
25
+ python-version: 3.12
26
26
 
27
27
  - name: Install dependencies
28
28
  run: |
@@ -28,7 +28,7 @@ jobs:
28
28
  test:
29
29
  strategy:
30
30
  matrix:
31
- python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
31
+ python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
32
32
  platform:
33
33
  - ubuntu-latest
34
34
  # - macos-latest
@@ -19,7 +19,7 @@ repos:
19
19
 
20
20
  - repo: https://github.com/astral-sh/ruff-pre-commit
21
21
  # Ruff version.
22
- rev: v0.11.5
22
+ rev: v0.12.1
23
23
  hooks:
24
24
  - id: ruff
25
25
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,8 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## Version 0.3.0
4
+
5
+ - Support for string dimensions when creating cellarr arrays.
6
+ - Support query conditions for slice operations.
7
+ - Added unique dim values. Only supported for sparse arrays.
8
+ - EOL for Python 3.9
9
+
3
10
  ## Version 0.2.0
4
11
 
5
- - Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
12
+ - Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
6
13
  - Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
7
14
  - Update documentation and tests.
8
15
 
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
7
7
  Author-email: jayaram.kancherla@gmail.com
8
8
  License: MIT
9
9
  Project-URL: Documentation, https://github.com/cellarr/cellarr-array
10
+ Project-URL: Source, https://github.com/cellarr/cellarr-array
10
11
  Platform: any
11
12
  Classifier: Development Status :: 4 - Beta
12
13
  Classifier: Programming Language :: Python
@@ -22,6 +23,7 @@ Provides-Extra: testing
22
23
  Requires-Dist: setuptools; extra == "testing"
23
24
  Requires-Dist: pytest; extra == "testing"
24
25
  Requires-Dist: pytest-cov; extra == "testing"
26
+ Requires-Dist: pandas; extra == "testing"
25
27
  Requires-Dist: torch; extra == "testing"
26
28
  Dynamic: license-file
27
29
 
@@ -10,6 +10,7 @@ long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
10
10
  url = https://github.com/cellarr/cellarr-array
11
11
  project_urls =
12
12
  Documentation = https://github.com/cellarr/cellarr-array
13
+ Source = https://github.com/cellarr/cellarr-array
13
14
  platforms = any
14
15
  classifiers =
15
16
  Development Status :: 4 - Beta
@@ -39,6 +40,7 @@ testing =
39
40
  setuptools
40
41
  pytest
41
42
  pytest-cov
43
+ pandas
42
44
  %(optional)s
43
45
 
44
46
  [options.entry_points]
@@ -16,4 +16,5 @@ finally:
16
16
  del version, PackageNotFoundError
17
17
 
18
18
  from .core import DenseCellArray, SparseCellArray
19
- from .utils import CellArrConfig, ConsolidationConfig, create_cellarray
19
+ from .core.helpers import create_cellarray
20
+ from .utils import CellArrConfig, ConsolidationConfig
@@ -69,6 +69,7 @@ class CellArray(ABC):
69
69
  self._array_passed_in = False
70
70
  self._opened_array_external = None
71
71
  self._ctx = None
72
+ self._dim_dtypes = None
72
73
 
73
74
  if tiledb_array_obj is not None:
74
75
  if not isinstance(tiledb_array_obj, tiledb.Array):
@@ -185,7 +186,16 @@ class CellArray(ABC):
185
186
  def shape(self) -> Tuple[int, ...]:
186
187
  if self._shape is None:
187
188
  with self.open_array(mode="r") as A:
188
- self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
189
+ shape_list = []
190
+ for dim in A.schema.domain:
191
+ try:
192
+ # This will fail for string dimensions
193
+ shape_list.append(dim.shape[0])
194
+ except TypeError:
195
+ # For string dimensions, the shape is not well-defined.
196
+ # We use a large number as a placeholder for slicing purposes.
197
+ shape_list.append(2**63 - 1)
198
+ self._shape = tuple(shape_list)
189
199
  return self._shape
190
200
 
191
201
  @property
@@ -209,6 +219,14 @@ class CellArray(ABC):
209
219
  # self._ndim = len(self.shape)
210
220
  return self._ndim
211
221
 
222
+ @property
223
+ def dim_dtypes(self) -> List[np.dtype]:
224
+ """Get dimension dtypes of the array."""
225
+ if self._dim_dtypes is None:
226
+ with self.open_array(mode="r") as A:
227
+ self._dim_dtypes = [dim.dtype for dim in A.schema.domain]
228
+ return self._dim_dtypes
229
+
212
230
  @contextmanager
213
231
  def open_array(self, mode: Optional[str] = None):
214
232
  """Context manager for array operations.
@@ -266,15 +284,30 @@ class CellArray(ABC):
266
284
  Args:
267
285
  key:
268
286
  Slice or list of indices for each dimension in the array.
287
+
288
+ Alternatively, may be string to specify query conditions.
269
289
  """
290
+ # This is a query condition
291
+ if isinstance(key, str):
292
+ with self.open_array(mode="r") as array:
293
+ if self._attr is not None:
294
+ return array.query(cond=key, attrs=[self._attr])[:]
295
+ else:
296
+ array.query(cond=key)[:]
297
+
270
298
  if not isinstance(key, tuple):
271
299
  key = (key,)
272
300
 
273
301
  if len(key) > self.ndim:
274
302
  raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
275
303
 
304
+ if len(key) < self.ndim:
305
+ key = key + (slice(None),) * (self.ndim - len(key))
306
+
276
307
  # Normalize all indices
277
- normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
308
+ normalized_key = tuple(
309
+ SliceHelper.normalize_index(idx, self.shape[i], self.dim_dtypes[i]) for i, idx in enumerate(key)
310
+ )
278
311
 
279
312
  num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
280
313
  if num_ellipsis > 1:
@@ -342,3 +375,17 @@ class CellArray(ABC):
342
375
  Additional arguments for write operation.
343
376
  """
344
377
  pass
378
+
379
+ def get_unique_dim_values(self, dim_name: Optional[str] = None) -> np.ndarray:
380
+ """Get unique values for a dimension.
381
+
382
+ Args:
383
+ dim_name:
384
+ The name of the dimension. If None, unique values for all
385
+ dimensions are returned.
386
+
387
+ Returns:
388
+ An array of unique dimension values.
389
+ """
390
+ with self.open_array(mode="r") as A:
391
+ return A.unique_dim_values(dim_name)
@@ -103,19 +103,27 @@ def create_cellarray(
103
103
  if not (len(shape) == len(dim_dtypes) == len(dim_names)):
104
104
  raise ValueError("Lengths of 'shape', 'dim_dtypes', and 'dim_names' must match.")
105
105
 
106
- dom = tiledb.Domain(
107
- *[
106
+ dims = []
107
+ for name, s, dt in zip(dim_names, shape, dim_dtypes):
108
+ if np.issubdtype(dt, np.integer):
109
+ domain = (0, 0 if s == 0 else s - 1)
110
+ tile = min(1 if s == 0 else s // 2, config.tile_capacity // 2)
111
+ dim_dtype = dt
112
+ else: # Assumes string or object dtype
113
+ domain = (None, None)
114
+ tile = None
115
+ dim_dtype = "ascii"
116
+
117
+ dims.append(
108
118
  tiledb.Dim(
109
119
  name=name,
110
- # supporting empty dimensions
111
- domain=(0, 0 if s == 0 else s - 1),
112
- tile=min(1 if s == 0 else s // 2, config.tile_capacity // 2),
113
- dtype=dt,
120
+ domain=domain,
121
+ tile=tile,
122
+ dtype=dim_dtype,
114
123
  )
115
- for name, s, dt in zip(dim_names, shape, dim_dtypes)
116
- ],
117
- ctx=tiledb_ctx,
118
- )
124
+ )
125
+
126
+ dom = tiledb.Domain(*dims, ctx=tiledb_ctx)
119
127
  attr_obj = tiledb.Attr(
120
128
  name=attr_name,
121
129
  dtype=attr_dtype,
@@ -149,10 +157,17 @@ class SliceHelper:
149
157
  """Helper class for handling array slicing operations."""
150
158
 
151
159
  @staticmethod
152
- def is_contiguous_indices(indices: List[int]) -> Optional[slice]:
160
+ def is_contiguous_indices(indices: List) -> Optional[slice]:
161
+ """Checks if a list of indices is contiguous and can be converted to a slice.
162
+
163
+ Returns None if the list is not contiguous or contains non-integers.
164
+ """
153
165
  if not indices:
154
166
  return None
155
167
 
168
+ if not all(isinstance(i, (int, np.integer)) for i in indices):
169
+ return None
170
+
156
171
  sorted_indices = sorted(list(set(indices)))
157
172
  if not sorted_indices:
158
173
  return None
@@ -168,20 +183,33 @@ class SliceHelper:
168
183
 
169
184
  @staticmethod
170
185
  def normalize_index(
171
- idx: Union[int, range, slice, List[int], EllipsisType], dim_size: int
172
- ) -> Union[slice, List[int], EllipsisType]:
186
+ idx: Union[int, range, slice, List, str, EllipsisType],
187
+ dim_size: int,
188
+ dim_dtype: np.dtype,
189
+ ):
173
190
  """Normalize index to handle negative indices and ensure consistency."""
191
+ is_string_dim = np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)
192
+
193
+ if is_string_dim:
194
+ if isinstance(idx, (str, bytes)):
195
+ return [idx]
196
+ if isinstance(idx, list) and all(isinstance(i, (str, bytes)) for i in idx):
197
+ return idx
198
+ if isinstance(idx, slice):
199
+ # For string dimensions, we do not normalize the slice with integer sizes
200
+ return idx
201
+ if isinstance(idx, EllipsisType):
202
+ return idx
203
+ raise TypeError(f"Unsupported index type '{type(idx).__name__}' for string dimension.")
204
+
174
205
  if isinstance(idx, EllipsisType):
175
206
  return idx
176
207
 
177
- # Convert ranges to slices
178
208
  if isinstance(idx, range):
179
209
  idx = slice(idx.start, idx.stop, idx.step)
180
210
 
181
211
  if isinstance(idx, slice):
182
- start = idx.start
183
- stop = idx.stop
184
- step = idx.step
212
+ start, stop, step = idx.start, idx.stop, idx.step
185
213
 
186
214
  # Resolve None to full dimension slice parts
187
215
  if start is None:
@@ -196,44 +224,32 @@ class SliceHelper:
196
224
  if stop < 0:
197
225
  stop += dim_size
198
226
 
199
- # slice allows start > dim_size or stop < 0 to result in empty slices.
200
- # Note: start == dim_size is OK for empty slice like arr[dim_size:]
201
- if start < 0 or (start >= dim_size and dim_size > 0):
202
- if not (start == dim_size and (step is None or step > 0)):
203
- if start >= dim_size:
204
- raise IndexError(
205
- f"Start index {idx.start if idx.start is not None else 'None'} results in {start}, which is out of bounds for dimension size {dim_size}."
206
- )
207
-
208
227
  # Clamping slice arguments to dimensions
209
228
  stop = min(stop, dim_size)
210
229
  start = max(0, start)
211
-
212
230
  return slice(start, stop, step)
213
- elif isinstance(idx, list):
231
+
232
+ if isinstance(idx, list):
214
233
  if not idx:
215
234
  return []
235
+ # This check only applies to integer lists
236
+ if not all(isinstance(i, (int, np.integer)) for i in idx):
237
+ raise TypeError("List indices must be integers for numeric dimensions.")
216
238
 
217
239
  norm_idx = [i if i >= 0 else dim_size + i for i in idx]
218
240
  if any(i < 0 or i >= dim_size for i in norm_idx):
219
- oob_indices = [orig_i for orig_i, norm_i in zip(idx, norm_idx) if not (0 <= norm_i < dim_size)]
220
- raise IndexError(
221
- f"List indices {oob_indices} (original values) are out of bounds for dimension size {dim_size}."
222
- )
223
-
224
- # TileDB multi_index usually returns data sorted by coordinates
241
+ raise IndexError("List indices out of bounds for dimension size.")
225
242
  return sorted(list(set(norm_idx)))
226
- elif isinstance(idx, (int, np.integer)):
243
+
244
+ if isinstance(idx, (int, np.integer)):
227
245
  norm_idx = int(idx)
228
246
  if norm_idx < 0:
229
247
  norm_idx += dim_size
230
-
231
248
  if not (0 <= norm_idx < dim_size):
232
- raise IndexError(f"Index {idx} out of bounds for dimension size {dim_size}")
233
-
249
+ raise IndexError(f"Index {idx} out of bounds for dimension size.")
234
250
  return slice(norm_idx, norm_idx + 1, None)
235
- else:
236
- raise TypeError(f"Index type {type(idx)} not supported for normalization.")
251
+
252
+ raise TypeError(f"Index type {type(idx)} not supported for normalization.")
237
253
 
238
254
 
239
255
  def create_group(output_path, group_name):
@@ -141,7 +141,6 @@ class SparseCellArray(CellArray):
141
141
  data = result[self._attr]
142
142
 
143
143
  if len(data) == 0:
144
- print("is emoty")
145
144
  if not self.return_sparse:
146
145
  return result
147
146
  else:
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
7
7
  Author-email: jayaram.kancherla@gmail.com
8
8
  License: MIT
9
9
  Project-URL: Documentation, https://github.com/cellarr/cellarr-array
10
+ Project-URL: Source, https://github.com/cellarr/cellarr-array
10
11
  Platform: any
11
12
  Classifier: Development Status :: 4 - Beta
12
13
  Classifier: Programming Language :: Python
@@ -22,6 +23,7 @@ Provides-Extra: testing
22
23
  Requires-Dist: setuptools; extra == "testing"
23
24
  Requires-Dist: pytest; extra == "testing"
24
25
  Requires-Dist: pytest-cov; extra == "testing"
26
+ Requires-Dist: pandas; extra == "testing"
25
27
  Requires-Dist: torch; extra == "testing"
26
28
  Dynamic: license-file
27
29
 
@@ -50,4 +50,6 @@ tests/test_helpers.py
50
50
  tests/test_inmemory.py
51
51
  tests/test_iterable_loader.py
52
52
  tests/test_map_loader.py
53
- tests/test_sparse.py
53
+ tests/test_query.py
54
+ tests/test_sparse.py
55
+ tests/test_string_dims.py
@@ -12,4 +12,5 @@ torch
12
12
  setuptools
13
13
  pytest
14
14
  pytest-cov
15
+ pandas
15
16
  torch
@@ -178,7 +178,7 @@ def test_invalid_operations(sample_dense_array_2d):
178
178
  with pytest.raises(IndexError, match="Invalid number of dimensions"):
179
179
  _ = sample_dense_array_2d[0:10, 0:10, 0:10]
180
180
 
181
- with pytest.raises(IndexError, match="out of bounds"):
181
+ with pytest.raises(Exception):
182
182
  _ = sample_dense_array_2d[200:300]
183
183
 
184
184
 
@@ -30,20 +30,20 @@ def test_slice_normalize_index():
30
30
  dim_size = 10
31
31
 
32
32
  # Test positive slice
33
- assert SliceHelper.normalize_index(slice(1, 5), dim_size) == slice(1, 5, None)
33
+ assert SliceHelper.normalize_index(slice(1, 5), dim_size, dim_dtype=np.int32) == slice(1, 5, None)
34
34
 
35
35
  # Test negative slice
36
- assert SliceHelper.normalize_index(slice(-3, -1), dim_size) == slice(7, 9, None)
36
+ assert SliceHelper.normalize_index(slice(-3, -1), dim_size, dim_dtype=np.int32) == slice(7, 9, None)
37
37
 
38
38
  # Test None values in slice
39
- assert SliceHelper.normalize_index(slice(None, None), dim_size) == slice(0, 10, None)
39
+ assert SliceHelper.normalize_index(slice(None, None), dim_size, dim_dtype=np.int32) == slice(0, 10, None)
40
40
 
41
41
  # Test list of indices
42
- assert SliceHelper.normalize_index([1, -1], dim_size) == [1, 9]
42
+ assert SliceHelper.normalize_index([1, -1], dim_size, dim_dtype=np.int32) == [1, 9]
43
43
 
44
44
  # Test single integer
45
- assert SliceHelper.normalize_index(5, dim_size) == slice(5, 6, None)
46
- assert SliceHelper.normalize_index(-1, dim_size) == slice(9, 10, None)
45
+ assert SliceHelper.normalize_index(5, dim_size, dim_dtype=np.int32) == slice(5, 6, None)
46
+ assert SliceHelper.normalize_index(-1, dim_size, dim_dtype=np.int32) == slice(9, 10, None)
47
47
 
48
48
 
49
49
  def test_slice_bounds_validation():
@@ -51,29 +51,29 @@ def test_slice_bounds_validation():
51
51
 
52
52
  # Test out of bounds positive indices
53
53
  with pytest.raises(IndexError, match="out of bounds"):
54
- SliceHelper.normalize_index(10, dim_size)
54
+ SliceHelper.normalize_index(10, dim_size, dim_dtype=np.int32)
55
55
  with pytest.raises(IndexError, match="out of bounds"):
56
- SliceHelper.normalize_index(15, dim_size)
56
+ SliceHelper.normalize_index(15, dim_size, dim_dtype=np.int32)
57
57
 
58
58
  # Test out of bounds negative indices
59
59
  with pytest.raises(IndexError, match="out of bounds"):
60
- SliceHelper.normalize_index(-11, dim_size)
60
+ SliceHelper.normalize_index(-11, dim_size, dim_dtype=np.int32)
61
61
  with pytest.raises(IndexError, match="out of bounds"):
62
- SliceHelper.normalize_index(-15, dim_size)
62
+ SliceHelper.normalize_index(-15, dim_size, dim_dtype=np.int32)
63
63
 
64
64
  # Test out of bounds list indices
65
65
  with pytest.raises(IndexError, match="out of bounds"):
66
- SliceHelper.normalize_index([5, 12], dim_size)
66
+ SliceHelper.normalize_index([5, 12], dim_size, dim_dtype=np.int32)
67
67
 
68
- norm_slice = SliceHelper.normalize_index(slice(5, 15), dim_size)
68
+ norm_slice = SliceHelper.normalize_index(slice(5, 15), dim_size, dim_dtype=np.int32)
69
69
  assert norm_slice == slice(5, 10)
70
70
 
71
- norm_slice_neg_stop = SliceHelper.normalize_index(slice(1, -12), dim_size)
71
+ norm_slice_neg_stop = SliceHelper.normalize_index(slice(1, -12), dim_size, dim_dtype=np.int32)
72
72
  assert norm_slice_neg_stop == slice(1, -2)
73
73
 
74
74
  # Test list with out of bounds
75
- with pytest.raises(IndexError, match="List indices .* are out of bounds"):
76
- SliceHelper.normalize_index([1, 10, 2], dim_size)
75
+ with pytest.raises(IndexError, match="List indices .*"):
76
+ SliceHelper.normalize_index([1, 10, 2], dim_size, dim_dtype=np.int32)
77
77
 
78
78
 
79
79
  def test_cellarr_config():
@@ -0,0 +1,63 @@
1
+ import os
2
+ import shutil
3
+
4
+ import numpy as np
5
+ import pytest
6
+ import scipy.sparse as sp
7
+
8
+ from cellarr_array import DenseCellArray, SparseCellArray, create_cellarray
9
+
10
+ __author__ = "Jayaram Kancherla"
11
+ __copyright__ = "Jayaram Kancherla"
12
+ __license__ = "MIT"
13
+
14
+
15
+ @pytest.fixture
16
+ def dense_array_uri():
17
+ uri = "test_dense_array_query"
18
+ if os.path.exists(uri):
19
+ shutil.rmtree(uri)
20
+
21
+ create_cellarray(uri, shape=(10, 5), sparse=False)
22
+
23
+ arr = DenseCellArray(uri, mode="w")
24
+ data = np.arange(50).reshape(10, 5)
25
+ arr.write_batch(data, start_row=0)
26
+
27
+ return uri
28
+
29
+
30
+ @pytest.fixture
31
+ def sparse_array_uri():
32
+ uri = "test_sparse_array_query"
33
+ if os.path.exists(uri):
34
+ shutil.rmtree(uri)
35
+
36
+ arr = create_cellarray(uri, shape=(10, 5), sparse=True)
37
+ data = sp.csr_matrix(np.arange(50).reshape(10, 5))
38
+ arr.write_batch(data, start_row=0)
39
+ return uri
40
+
41
+
42
+ def test_dense_array_query(dense_array_uri):
43
+ arr = DenseCellArray(dense_array_uri)
44
+ with pytest.raises(Exception):
45
+ result = arr["dim_0 > 5"]
46
+
47
+ result = arr["data > 5"]
48
+ assert isinstance(result["data"], np.ndarray)
49
+
50
+
51
+ def test_sparse_array_query(sparse_array_uri):
52
+ arr = SparseCellArray(sparse_array_uri, return_sparse=False)
53
+ result = arr["dim_0 > 5"]
54
+ # Even if empty, it should return a dictionary with the correct keys
55
+ assert "data" in result
56
+ assert "dim_0" in result
57
+ assert "dim_1" in result
58
+
59
+
60
+ def test_get_unique_dim_values(sparse_array_uri):
61
+ arr = SparseCellArray(sparse_array_uri)
62
+ unique_rows = arr.get_unique_dim_values("dim_0")
63
+ assert np.array_equal(unique_rows, np.arange(10))
@@ -0,0 +1,73 @@
1
+ import os
2
+ import shutil
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import pytest
7
+ import tiledb
8
+
9
+ from cellarr_array import SparseCellArray, create_cellarray
10
+
11
+ __author__ = "Jayaram Kancherla"
12
+ __copyright__ = "Jayaram Kancherla"
13
+ __license__ = "MIT"
14
+
15
+
16
+ @pytest.fixture
17
+ def string_dim_array_uri():
18
+ uri = "test_string_dim_array"
19
+ if os.path.exists(uri):
20
+ shutil.rmtree(uri)
21
+
22
+ create_cellarray(uri, sparse=True, dim_dtypes=[str, str], attr_dtype=np.float64, attr_name="value")
23
+
24
+ yield uri
25
+
26
+ shutil.rmtree(uri)
27
+
28
+
29
+ def test_create_string_dim_schema(string_dim_array_uri):
30
+ with tiledb.open(string_dim_array_uri, "r") as A:
31
+ schema = A.schema
32
+ assert schema.domain.dim(0).dtype == np.dtype("S")
33
+ assert schema.domain.dim(1).dtype == np.dtype("S")
34
+ assert schema.attr("value").dtype == np.float64
35
+
36
+
37
+ def test_string_dim_write_read(string_dim_array_uri):
38
+ sca = SparseCellArray(string_dim_array_uri, attr="value", mode="w", return_sparse=False)
39
+
40
+ rows = np.array(["cell_A", "cell_B", "cell_C"])
41
+ cols = np.array(["gene_X", "gene_Y", "gene_Z"])
42
+ values = np.array([1.1, 2.2, 3.3])
43
+
44
+ with sca.open_array() as A:
45
+ A[rows, cols] = values
46
+
47
+ sca_read = SparseCellArray(string_dim_array_uri, attr="value", return_sparse=False)
48
+ data = sca_read[:]
49
+ data["dim_0"] = [x.decode("ascii") for x in data["dim_0"]]
50
+ data["dim_1"] = [x.decode("ascii") for x in data["dim_1"]]
51
+
52
+ assert len(data["value"]) == 3
53
+ pd.testing.assert_frame_equal(
54
+ pd.DataFrame({"value": values, "dim_0": rows, "dim_1": cols})
55
+ .sort_values(by=["dim_0", "dim_1"])
56
+ .reset_index(drop=True),
57
+ pd.DataFrame(data).sort_values(by=["dim_0", "dim_1"]).reset_index(drop=True),
58
+ )
59
+
60
+
61
+ def test_string_dim_slicing(string_dim_array_uri):
62
+ sca = SparseCellArray(string_dim_array_uri, attr="value", mode="w", return_sparse=False)
63
+
64
+ with sca.open_array() as A:
65
+ A[["cell_A", "cell_A", "cell_B"], ["gene_X", "gene_Y", "gene_Y"]] = np.array([1.0, 2.0, 3.0])
66
+
67
+ sca_read = SparseCellArray(string_dim_array_uri, attr="value", return_sparse=False)
68
+
69
+ subset = sca_read[["cell_A"], :]
70
+
71
+ assert len(subset["value"]) == 2
72
+ assert all(r.decode("ascii") == "cell_A" for r in subset["dim_0"])
73
+ assert set([x.decode("ascii") for x in subset["dim_1"]]) == {"gene_X", "gene_Y"}
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes