cellarr-array 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cellarr-array might be problematic. Click here for more details.
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.github/workflows/publish-pypi.yml +2 -2
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.github/workflows/run-tests.yml +1 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.pre-commit-config.yaml +1 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/CHANGELOG.md +8 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/PKG-INFO +3 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/setup.cfg +2 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/__init__.py +2 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/base.py +49 -2
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/helpers.py +56 -40
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/sparse.py +0 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/PKG-INFO +3 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/SOURCES.txt +3 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/requires.txt +1 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_dense.py +1 -1
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_helpers.py +15 -15
- cellarr_array-0.3.0/tests/test_query.py +63 -0
- cellarr_array-0.3.0/tests/test_string_dims.py +73 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.coveragerc +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.gitignore +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/.readthedocs.yml +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/AUTHORS.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/CONTRIBUTING.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/LICENSE.txt +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/README.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/Makefile +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/_static/.gitignore +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/authors.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/changelog.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/conf.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/contributing.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/index.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/license.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/readme.md +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/docs/requirements.txt +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/pyproject.toml +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/setup.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/__init__.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/core/dense.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/__init__.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/denseloader.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/iterabledataloader.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/sparseloader.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/utils.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/utils/__init__.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/utils/config.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/utils/mock.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/not-zip-safe +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/top_level.txt +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/conftest.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_all.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_inmemory.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_iterable_loader.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_map_loader.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tests/test_sparse.py +0 -0
- {cellarr_array-0.2.0 → cellarr_array-0.3.0}/tox.ini +0 -0
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## Version 0.3.0
|
|
4
|
+
|
|
5
|
+
- Support for string dimensions when creating cellarr arrays.
|
|
6
|
+
- Support query conditions for slice operations.
|
|
7
|
+
- Added unique dim values. Only supported for sparse arrays.
|
|
8
|
+
- EOL for Python 3.9
|
|
9
|
+
|
|
3
10
|
## Version 0.2.0
|
|
4
11
|
|
|
5
|
-
- Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
|
|
12
|
+
- Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
|
|
6
13
|
- Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
|
|
7
14
|
- Update documentation and tests.
|
|
8
15
|
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
7
7
|
Author-email: jayaram.kancherla@gmail.com
|
|
8
8
|
License: MIT
|
|
9
9
|
Project-URL: Documentation, https://github.com/cellarr/cellarr-array
|
|
10
|
+
Project-URL: Source, https://github.com/cellarr/cellarr-array
|
|
10
11
|
Platform: any
|
|
11
12
|
Classifier: Development Status :: 4 - Beta
|
|
12
13
|
Classifier: Programming Language :: Python
|
|
@@ -22,6 +23,7 @@ Provides-Extra: testing
|
|
|
22
23
|
Requires-Dist: setuptools; extra == "testing"
|
|
23
24
|
Requires-Dist: pytest; extra == "testing"
|
|
24
25
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
26
|
+
Requires-Dist: pandas; extra == "testing"
|
|
25
27
|
Requires-Dist: torch; extra == "testing"
|
|
26
28
|
Dynamic: license-file
|
|
27
29
|
|
|
@@ -10,6 +10,7 @@ long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
|
|
|
10
10
|
url = https://github.com/cellarr/cellarr-array
|
|
11
11
|
project_urls =
|
|
12
12
|
Documentation = https://github.com/cellarr/cellarr-array
|
|
13
|
+
Source = https://github.com/cellarr/cellarr-array
|
|
13
14
|
platforms = any
|
|
14
15
|
classifiers =
|
|
15
16
|
Development Status :: 4 - Beta
|
|
@@ -39,6 +40,7 @@ testing =
|
|
|
39
40
|
setuptools
|
|
40
41
|
pytest
|
|
41
42
|
pytest-cov
|
|
43
|
+
pandas
|
|
42
44
|
%(optional)s
|
|
43
45
|
|
|
44
46
|
[options.entry_points]
|
|
@@ -16,4 +16,5 @@ finally:
|
|
|
16
16
|
del version, PackageNotFoundError
|
|
17
17
|
|
|
18
18
|
from .core import DenseCellArray, SparseCellArray
|
|
19
|
-
from .
|
|
19
|
+
from .core.helpers import create_cellarray
|
|
20
|
+
from .utils import CellArrConfig, ConsolidationConfig
|
|
@@ -69,6 +69,7 @@ class CellArray(ABC):
|
|
|
69
69
|
self._array_passed_in = False
|
|
70
70
|
self._opened_array_external = None
|
|
71
71
|
self._ctx = None
|
|
72
|
+
self._dim_dtypes = None
|
|
72
73
|
|
|
73
74
|
if tiledb_array_obj is not None:
|
|
74
75
|
if not isinstance(tiledb_array_obj, tiledb.Array):
|
|
@@ -185,7 +186,16 @@ class CellArray(ABC):
|
|
|
185
186
|
def shape(self) -> Tuple[int, ...]:
|
|
186
187
|
if self._shape is None:
|
|
187
188
|
with self.open_array(mode="r") as A:
|
|
188
|
-
|
|
189
|
+
shape_list = []
|
|
190
|
+
for dim in A.schema.domain:
|
|
191
|
+
try:
|
|
192
|
+
# This will fail for string dimensions
|
|
193
|
+
shape_list.append(dim.shape[0])
|
|
194
|
+
except TypeError:
|
|
195
|
+
# For string dimensions, the shape is not well-defined.
|
|
196
|
+
# We use a large number as a placeholder for slicing purposes.
|
|
197
|
+
shape_list.append(2**63 - 1)
|
|
198
|
+
self._shape = tuple(shape_list)
|
|
189
199
|
return self._shape
|
|
190
200
|
|
|
191
201
|
@property
|
|
@@ -209,6 +219,14 @@ class CellArray(ABC):
|
|
|
209
219
|
# self._ndim = len(self.shape)
|
|
210
220
|
return self._ndim
|
|
211
221
|
|
|
222
|
+
@property
|
|
223
|
+
def dim_dtypes(self) -> List[np.dtype]:
|
|
224
|
+
"""Get dimension dtypes of the array."""
|
|
225
|
+
if self._dim_dtypes is None:
|
|
226
|
+
with self.open_array(mode="r") as A:
|
|
227
|
+
self._dim_dtypes = [dim.dtype for dim in A.schema.domain]
|
|
228
|
+
return self._dim_dtypes
|
|
229
|
+
|
|
212
230
|
@contextmanager
|
|
213
231
|
def open_array(self, mode: Optional[str] = None):
|
|
214
232
|
"""Context manager for array operations.
|
|
@@ -266,15 +284,30 @@ class CellArray(ABC):
|
|
|
266
284
|
Args:
|
|
267
285
|
key:
|
|
268
286
|
Slice or list of indices for each dimension in the array.
|
|
287
|
+
|
|
288
|
+
Alternatively, may be string to specify query conditions.
|
|
269
289
|
"""
|
|
290
|
+
# This is a query condition
|
|
291
|
+
if isinstance(key, str):
|
|
292
|
+
with self.open_array(mode="r") as array:
|
|
293
|
+
if self._attr is not None:
|
|
294
|
+
return array.query(cond=key, attrs=[self._attr])[:]
|
|
295
|
+
else:
|
|
296
|
+
array.query(cond=key)[:]
|
|
297
|
+
|
|
270
298
|
if not isinstance(key, tuple):
|
|
271
299
|
key = (key,)
|
|
272
300
|
|
|
273
301
|
if len(key) > self.ndim:
|
|
274
302
|
raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
|
|
275
303
|
|
|
304
|
+
if len(key) < self.ndim:
|
|
305
|
+
key = key + (slice(None),) * (self.ndim - len(key))
|
|
306
|
+
|
|
276
307
|
# Normalize all indices
|
|
277
|
-
normalized_key = tuple(
|
|
308
|
+
normalized_key = tuple(
|
|
309
|
+
SliceHelper.normalize_index(idx, self.shape[i], self.dim_dtypes[i]) for i, idx in enumerate(key)
|
|
310
|
+
)
|
|
278
311
|
|
|
279
312
|
num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
|
|
280
313
|
if num_ellipsis > 1:
|
|
@@ -342,3 +375,17 @@ class CellArray(ABC):
|
|
|
342
375
|
Additional arguments for write operation.
|
|
343
376
|
"""
|
|
344
377
|
pass
|
|
378
|
+
|
|
379
|
+
def get_unique_dim_values(self, dim_name: Optional[str] = None) -> np.ndarray:
|
|
380
|
+
"""Get unique values for a dimension.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
dim_name:
|
|
384
|
+
The name of the dimension. If None, unique values for all
|
|
385
|
+
dimensions are returned.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
An array of unique dimension values.
|
|
389
|
+
"""
|
|
390
|
+
with self.open_array(mode="r") as A:
|
|
391
|
+
return A.unique_dim_values(dim_name)
|
|
@@ -103,19 +103,27 @@ def create_cellarray(
|
|
|
103
103
|
if not (len(shape) == len(dim_dtypes) == len(dim_names)):
|
|
104
104
|
raise ValueError("Lengths of 'shape', 'dim_dtypes', and 'dim_names' must match.")
|
|
105
105
|
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
dims = []
|
|
107
|
+
for name, s, dt in zip(dim_names, shape, dim_dtypes):
|
|
108
|
+
if np.issubdtype(dt, np.integer):
|
|
109
|
+
domain = (0, 0 if s == 0 else s - 1)
|
|
110
|
+
tile = min(1 if s == 0 else s // 2, config.tile_capacity // 2)
|
|
111
|
+
dim_dtype = dt
|
|
112
|
+
else: # Assumes string or object dtype
|
|
113
|
+
domain = (None, None)
|
|
114
|
+
tile = None
|
|
115
|
+
dim_dtype = "ascii"
|
|
116
|
+
|
|
117
|
+
dims.append(
|
|
108
118
|
tiledb.Dim(
|
|
109
119
|
name=name,
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
dtype=dt,
|
|
120
|
+
domain=domain,
|
|
121
|
+
tile=tile,
|
|
122
|
+
dtype=dim_dtype,
|
|
114
123
|
)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
)
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
dom = tiledb.Domain(*dims, ctx=tiledb_ctx)
|
|
119
127
|
attr_obj = tiledb.Attr(
|
|
120
128
|
name=attr_name,
|
|
121
129
|
dtype=attr_dtype,
|
|
@@ -149,10 +157,17 @@ class SliceHelper:
|
|
|
149
157
|
"""Helper class for handling array slicing operations."""
|
|
150
158
|
|
|
151
159
|
@staticmethod
|
|
152
|
-
def is_contiguous_indices(indices: List
|
|
160
|
+
def is_contiguous_indices(indices: List) -> Optional[slice]:
|
|
161
|
+
"""Checks if a list of indices is contiguous and can be converted to a slice.
|
|
162
|
+
|
|
163
|
+
Returns None if the list is not contiguous or contains non-integers.
|
|
164
|
+
"""
|
|
153
165
|
if not indices:
|
|
154
166
|
return None
|
|
155
167
|
|
|
168
|
+
if not all(isinstance(i, (int, np.integer)) for i in indices):
|
|
169
|
+
return None
|
|
170
|
+
|
|
156
171
|
sorted_indices = sorted(list(set(indices)))
|
|
157
172
|
if not sorted_indices:
|
|
158
173
|
return None
|
|
@@ -168,20 +183,33 @@ class SliceHelper:
|
|
|
168
183
|
|
|
169
184
|
@staticmethod
|
|
170
185
|
def normalize_index(
|
|
171
|
-
idx: Union[int, range, slice, List
|
|
172
|
-
|
|
186
|
+
idx: Union[int, range, slice, List, str, EllipsisType],
|
|
187
|
+
dim_size: int,
|
|
188
|
+
dim_dtype: np.dtype,
|
|
189
|
+
):
|
|
173
190
|
"""Normalize index to handle negative indices and ensure consistency."""
|
|
191
|
+
is_string_dim = np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)
|
|
192
|
+
|
|
193
|
+
if is_string_dim:
|
|
194
|
+
if isinstance(idx, (str, bytes)):
|
|
195
|
+
return [idx]
|
|
196
|
+
if isinstance(idx, list) and all(isinstance(i, (str, bytes)) for i in idx):
|
|
197
|
+
return idx
|
|
198
|
+
if isinstance(idx, slice):
|
|
199
|
+
# For string dimensions, we do not normalize the slice with integer sizes
|
|
200
|
+
return idx
|
|
201
|
+
if isinstance(idx, EllipsisType):
|
|
202
|
+
return idx
|
|
203
|
+
raise TypeError(f"Unsupported index type '{type(idx).__name__}' for string dimension.")
|
|
204
|
+
|
|
174
205
|
if isinstance(idx, EllipsisType):
|
|
175
206
|
return idx
|
|
176
207
|
|
|
177
|
-
# Convert ranges to slices
|
|
178
208
|
if isinstance(idx, range):
|
|
179
209
|
idx = slice(idx.start, idx.stop, idx.step)
|
|
180
210
|
|
|
181
211
|
if isinstance(idx, slice):
|
|
182
|
-
start = idx.start
|
|
183
|
-
stop = idx.stop
|
|
184
|
-
step = idx.step
|
|
212
|
+
start, stop, step = idx.start, idx.stop, idx.step
|
|
185
213
|
|
|
186
214
|
# Resolve None to full dimension slice parts
|
|
187
215
|
if start is None:
|
|
@@ -196,44 +224,32 @@ class SliceHelper:
|
|
|
196
224
|
if stop < 0:
|
|
197
225
|
stop += dim_size
|
|
198
226
|
|
|
199
|
-
# slice allows start > dim_size or stop < 0 to result in empty slices.
|
|
200
|
-
# Note: start == dim_size is OK for empty slice like arr[dim_size:]
|
|
201
|
-
if start < 0 or (start >= dim_size and dim_size > 0):
|
|
202
|
-
if not (start == dim_size and (step is None or step > 0)):
|
|
203
|
-
if start >= dim_size:
|
|
204
|
-
raise IndexError(
|
|
205
|
-
f"Start index {idx.start if idx.start is not None else 'None'} results in {start}, which is out of bounds for dimension size {dim_size}."
|
|
206
|
-
)
|
|
207
|
-
|
|
208
227
|
# Clamping slice arguments to dimensions
|
|
209
228
|
stop = min(stop, dim_size)
|
|
210
229
|
start = max(0, start)
|
|
211
|
-
|
|
212
230
|
return slice(start, stop, step)
|
|
213
|
-
|
|
231
|
+
|
|
232
|
+
if isinstance(idx, list):
|
|
214
233
|
if not idx:
|
|
215
234
|
return []
|
|
235
|
+
# This check only applies to integer lists
|
|
236
|
+
if not all(isinstance(i, (int, np.integer)) for i in idx):
|
|
237
|
+
raise TypeError("List indices must be integers for numeric dimensions.")
|
|
216
238
|
|
|
217
239
|
norm_idx = [i if i >= 0 else dim_size + i for i in idx]
|
|
218
240
|
if any(i < 0 or i >= dim_size for i in norm_idx):
|
|
219
|
-
|
|
220
|
-
raise IndexError(
|
|
221
|
-
f"List indices {oob_indices} (original values) are out of bounds for dimension size {dim_size}."
|
|
222
|
-
)
|
|
223
|
-
|
|
224
|
-
# TileDB multi_index usually returns data sorted by coordinates
|
|
241
|
+
raise IndexError("List indices out of bounds for dimension size.")
|
|
225
242
|
return sorted(list(set(norm_idx)))
|
|
226
|
-
|
|
243
|
+
|
|
244
|
+
if isinstance(idx, (int, np.integer)):
|
|
227
245
|
norm_idx = int(idx)
|
|
228
246
|
if norm_idx < 0:
|
|
229
247
|
norm_idx += dim_size
|
|
230
|
-
|
|
231
248
|
if not (0 <= norm_idx < dim_size):
|
|
232
|
-
raise IndexError(f"Index {idx} out of bounds for dimension size
|
|
233
|
-
|
|
249
|
+
raise IndexError(f"Index {idx} out of bounds for dimension size.")
|
|
234
250
|
return slice(norm_idx, norm_idx + 1, None)
|
|
235
|
-
|
|
236
|
-
|
|
251
|
+
|
|
252
|
+
raise TypeError(f"Index type {type(idx)} not supported for normalization.")
|
|
237
253
|
|
|
238
254
|
|
|
239
255
|
def create_group(output_path, group_name):
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
7
7
|
Author-email: jayaram.kancherla@gmail.com
|
|
8
8
|
License: MIT
|
|
9
9
|
Project-URL: Documentation, https://github.com/cellarr/cellarr-array
|
|
10
|
+
Project-URL: Source, https://github.com/cellarr/cellarr-array
|
|
10
11
|
Platform: any
|
|
11
12
|
Classifier: Development Status :: 4 - Beta
|
|
12
13
|
Classifier: Programming Language :: Python
|
|
@@ -22,6 +23,7 @@ Provides-Extra: testing
|
|
|
22
23
|
Requires-Dist: setuptools; extra == "testing"
|
|
23
24
|
Requires-Dist: pytest; extra == "testing"
|
|
24
25
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
26
|
+
Requires-Dist: pandas; extra == "testing"
|
|
25
27
|
Requires-Dist: torch; extra == "testing"
|
|
26
28
|
Dynamic: license-file
|
|
27
29
|
|
|
@@ -178,7 +178,7 @@ def test_invalid_operations(sample_dense_array_2d):
|
|
|
178
178
|
with pytest.raises(IndexError, match="Invalid number of dimensions"):
|
|
179
179
|
_ = sample_dense_array_2d[0:10, 0:10, 0:10]
|
|
180
180
|
|
|
181
|
-
with pytest.raises(
|
|
181
|
+
with pytest.raises(Exception):
|
|
182
182
|
_ = sample_dense_array_2d[200:300]
|
|
183
183
|
|
|
184
184
|
|
|
@@ -30,20 +30,20 @@ def test_slice_normalize_index():
|
|
|
30
30
|
dim_size = 10
|
|
31
31
|
|
|
32
32
|
# Test positive slice
|
|
33
|
-
assert SliceHelper.normalize_index(slice(1, 5), dim_size) == slice(1, 5, None)
|
|
33
|
+
assert SliceHelper.normalize_index(slice(1, 5), dim_size, dim_dtype=np.int32) == slice(1, 5, None)
|
|
34
34
|
|
|
35
35
|
# Test negative slice
|
|
36
|
-
assert SliceHelper.normalize_index(slice(-3, -1), dim_size) == slice(7, 9, None)
|
|
36
|
+
assert SliceHelper.normalize_index(slice(-3, -1), dim_size, dim_dtype=np.int32) == slice(7, 9, None)
|
|
37
37
|
|
|
38
38
|
# Test None values in slice
|
|
39
|
-
assert SliceHelper.normalize_index(slice(None, None), dim_size) == slice(0, 10, None)
|
|
39
|
+
assert SliceHelper.normalize_index(slice(None, None), dim_size, dim_dtype=np.int32) == slice(0, 10, None)
|
|
40
40
|
|
|
41
41
|
# Test list of indices
|
|
42
|
-
assert SliceHelper.normalize_index([1, -1], dim_size) == [1, 9]
|
|
42
|
+
assert SliceHelper.normalize_index([1, -1], dim_size, dim_dtype=np.int32) == [1, 9]
|
|
43
43
|
|
|
44
44
|
# Test single integer
|
|
45
|
-
assert SliceHelper.normalize_index(5, dim_size) == slice(5, 6, None)
|
|
46
|
-
assert SliceHelper.normalize_index(-1, dim_size) == slice(9, 10, None)
|
|
45
|
+
assert SliceHelper.normalize_index(5, dim_size, dim_dtype=np.int32) == slice(5, 6, None)
|
|
46
|
+
assert SliceHelper.normalize_index(-1, dim_size, dim_dtype=np.int32) == slice(9, 10, None)
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def test_slice_bounds_validation():
|
|
@@ -51,29 +51,29 @@ def test_slice_bounds_validation():
|
|
|
51
51
|
|
|
52
52
|
# Test out of bounds positive indices
|
|
53
53
|
with pytest.raises(IndexError, match="out of bounds"):
|
|
54
|
-
SliceHelper.normalize_index(10, dim_size)
|
|
54
|
+
SliceHelper.normalize_index(10, dim_size, dim_dtype=np.int32)
|
|
55
55
|
with pytest.raises(IndexError, match="out of bounds"):
|
|
56
|
-
SliceHelper.normalize_index(15, dim_size)
|
|
56
|
+
SliceHelper.normalize_index(15, dim_size, dim_dtype=np.int32)
|
|
57
57
|
|
|
58
58
|
# Test out of bounds negative indices
|
|
59
59
|
with pytest.raises(IndexError, match="out of bounds"):
|
|
60
|
-
SliceHelper.normalize_index(-11, dim_size)
|
|
60
|
+
SliceHelper.normalize_index(-11, dim_size, dim_dtype=np.int32)
|
|
61
61
|
with pytest.raises(IndexError, match="out of bounds"):
|
|
62
|
-
SliceHelper.normalize_index(-15, dim_size)
|
|
62
|
+
SliceHelper.normalize_index(-15, dim_size, dim_dtype=np.int32)
|
|
63
63
|
|
|
64
64
|
# Test out of bounds list indices
|
|
65
65
|
with pytest.raises(IndexError, match="out of bounds"):
|
|
66
|
-
SliceHelper.normalize_index([5, 12], dim_size)
|
|
66
|
+
SliceHelper.normalize_index([5, 12], dim_size, dim_dtype=np.int32)
|
|
67
67
|
|
|
68
|
-
norm_slice = SliceHelper.normalize_index(slice(5, 15), dim_size)
|
|
68
|
+
norm_slice = SliceHelper.normalize_index(slice(5, 15), dim_size, dim_dtype=np.int32)
|
|
69
69
|
assert norm_slice == slice(5, 10)
|
|
70
70
|
|
|
71
|
-
norm_slice_neg_stop = SliceHelper.normalize_index(slice(1, -12), dim_size)
|
|
71
|
+
norm_slice_neg_stop = SliceHelper.normalize_index(slice(1, -12), dim_size, dim_dtype=np.int32)
|
|
72
72
|
assert norm_slice_neg_stop == slice(1, -2)
|
|
73
73
|
|
|
74
74
|
# Test list with out of bounds
|
|
75
|
-
with pytest.raises(IndexError, match="List indices .*
|
|
76
|
-
SliceHelper.normalize_index([1, 10, 2], dim_size)
|
|
75
|
+
with pytest.raises(IndexError, match="List indices .*"):
|
|
76
|
+
SliceHelper.normalize_index([1, 10, 2], dim_size, dim_dtype=np.int32)
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
def test_cellarr_config():
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
import scipy.sparse as sp
|
|
7
|
+
|
|
8
|
+
from cellarr_array import DenseCellArray, SparseCellArray, create_cellarray
|
|
9
|
+
|
|
10
|
+
__author__ = "Jayaram Kancherla"
|
|
11
|
+
__copyright__ = "Jayaram Kancherla"
|
|
12
|
+
__license__ = "MIT"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture
|
|
16
|
+
def dense_array_uri():
|
|
17
|
+
uri = "test_dense_array_query"
|
|
18
|
+
if os.path.exists(uri):
|
|
19
|
+
shutil.rmtree(uri)
|
|
20
|
+
|
|
21
|
+
create_cellarray(uri, shape=(10, 5), sparse=False)
|
|
22
|
+
|
|
23
|
+
arr = DenseCellArray(uri, mode="w")
|
|
24
|
+
data = np.arange(50).reshape(10, 5)
|
|
25
|
+
arr.write_batch(data, start_row=0)
|
|
26
|
+
|
|
27
|
+
return uri
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@pytest.fixture
|
|
31
|
+
def sparse_array_uri():
|
|
32
|
+
uri = "test_sparse_array_query"
|
|
33
|
+
if os.path.exists(uri):
|
|
34
|
+
shutil.rmtree(uri)
|
|
35
|
+
|
|
36
|
+
arr = create_cellarray(uri, shape=(10, 5), sparse=True)
|
|
37
|
+
data = sp.csr_matrix(np.arange(50).reshape(10, 5))
|
|
38
|
+
arr.write_batch(data, start_row=0)
|
|
39
|
+
return uri
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_dense_array_query(dense_array_uri):
|
|
43
|
+
arr = DenseCellArray(dense_array_uri)
|
|
44
|
+
with pytest.raises(Exception):
|
|
45
|
+
result = arr["dim_0 > 5"]
|
|
46
|
+
|
|
47
|
+
result = arr["data > 5"]
|
|
48
|
+
assert isinstance(result["data"], np.ndarray)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_sparse_array_query(sparse_array_uri):
|
|
52
|
+
arr = SparseCellArray(sparse_array_uri, return_sparse=False)
|
|
53
|
+
result = arr["dim_0 > 5"]
|
|
54
|
+
# Even if empty, it should return a dictionary with the correct keys
|
|
55
|
+
assert "data" in result
|
|
56
|
+
assert "dim_0" in result
|
|
57
|
+
assert "dim_1" in result
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_get_unique_dim_values(sparse_array_uri):
|
|
61
|
+
arr = SparseCellArray(sparse_array_uri)
|
|
62
|
+
unique_rows = arr.get_unique_dim_values("dim_0")
|
|
63
|
+
assert np.array_equal(unique_rows, np.arange(10))
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pytest
|
|
7
|
+
import tiledb
|
|
8
|
+
|
|
9
|
+
from cellarr_array import SparseCellArray, create_cellarray
|
|
10
|
+
|
|
11
|
+
__author__ = "Jayaram Kancherla"
|
|
12
|
+
__copyright__ = "Jayaram Kancherla"
|
|
13
|
+
__license__ = "MIT"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def string_dim_array_uri():
|
|
18
|
+
uri = "test_string_dim_array"
|
|
19
|
+
if os.path.exists(uri):
|
|
20
|
+
shutil.rmtree(uri)
|
|
21
|
+
|
|
22
|
+
create_cellarray(uri, sparse=True, dim_dtypes=[str, str], attr_dtype=np.float64, attr_name="value")
|
|
23
|
+
|
|
24
|
+
yield uri
|
|
25
|
+
|
|
26
|
+
shutil.rmtree(uri)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_create_string_dim_schema(string_dim_array_uri):
|
|
30
|
+
with tiledb.open(string_dim_array_uri, "r") as A:
|
|
31
|
+
schema = A.schema
|
|
32
|
+
assert schema.domain.dim(0).dtype == np.dtype("S")
|
|
33
|
+
assert schema.domain.dim(1).dtype == np.dtype("S")
|
|
34
|
+
assert schema.attr("value").dtype == np.float64
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_string_dim_write_read(string_dim_array_uri):
|
|
38
|
+
sca = SparseCellArray(string_dim_array_uri, attr="value", mode="w", return_sparse=False)
|
|
39
|
+
|
|
40
|
+
rows = np.array(["cell_A", "cell_B", "cell_C"])
|
|
41
|
+
cols = np.array(["gene_X", "gene_Y", "gene_Z"])
|
|
42
|
+
values = np.array([1.1, 2.2, 3.3])
|
|
43
|
+
|
|
44
|
+
with sca.open_array() as A:
|
|
45
|
+
A[rows, cols] = values
|
|
46
|
+
|
|
47
|
+
sca_read = SparseCellArray(string_dim_array_uri, attr="value", return_sparse=False)
|
|
48
|
+
data = sca_read[:]
|
|
49
|
+
data["dim_0"] = [x.decode("ascii") for x in data["dim_0"]]
|
|
50
|
+
data["dim_1"] = [x.decode("ascii") for x in data["dim_1"]]
|
|
51
|
+
|
|
52
|
+
assert len(data["value"]) == 3
|
|
53
|
+
pd.testing.assert_frame_equal(
|
|
54
|
+
pd.DataFrame({"value": values, "dim_0": rows, "dim_1": cols})
|
|
55
|
+
.sort_values(by=["dim_0", "dim_1"])
|
|
56
|
+
.reset_index(drop=True),
|
|
57
|
+
pd.DataFrame(data).sort_values(by=["dim_0", "dim_1"]).reset_index(drop=True),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_string_dim_slicing(string_dim_array_uri):
|
|
62
|
+
sca = SparseCellArray(string_dim_array_uri, attr="value", mode="w", return_sparse=False)
|
|
63
|
+
|
|
64
|
+
with sca.open_array() as A:
|
|
65
|
+
A[["cell_A", "cell_A", "cell_B"], ["gene_X", "gene_Y", "gene_Y"]] = np.array([1.0, 2.0, 3.0])
|
|
66
|
+
|
|
67
|
+
sca_read = SparseCellArray(string_dim_array_uri, attr="value", return_sparse=False)
|
|
68
|
+
|
|
69
|
+
subset = sca_read[["cell_A"], :]
|
|
70
|
+
|
|
71
|
+
assert len(subset["value"]) == 2
|
|
72
|
+
assert all(r.decode("ascii") == "cell_A" for r in subset["dim_0"])
|
|
73
|
+
assert set([x.decode("ascii") for x in subset["dim_1"]]) == {"gene_X", "gene_Y"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cellarr_array-0.2.0 → cellarr_array-0.3.0}/src/cellarr_array/dataloaders/iterabledataloader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|