cellarr-array 0.1.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cellarr-array might be problematic. Click here for more details.
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/.github/workflows/publish-pypi.yml +2 -2
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/.github/workflows/run-tests.yml +1 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/.gitignore +2 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/.pre-commit-config.yaml +1 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/CHANGELOG.md +13 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/PKG-INFO +6 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/setup.cfg +5 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/src/cellarr_array/__init__.py +3 -4
- cellarr_array-0.3.0/src/cellarr_array/core/__init__.py +3 -0
- cellarr_array-0.1.0/src/cellarr_array/cellarray_base.py → cellarr_array-0.3.0/src/cellarr_array/core/base.py +50 -3
- cellarr_array-0.1.0/src/cellarr_array/cellarray_dense.py → cellarr_array-0.3.0/src/cellarr_array/core/dense.py +2 -3
- {cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.0/src/cellarr_array/core}/helpers.py +101 -51
- cellarr_array-0.1.0/src/cellarr_array/cellarray_sparse.py → cellarr_array-0.3.0/src/cellarr_array/core/sparse.py +11 -17
- cellarr_array-0.3.0/src/cellarr_array/dataloaders/__init__.py +3 -0
- cellarr_array-0.3.0/src/cellarr_array/dataloaders/denseloader.py +198 -0
- cellarr_array-0.3.0/src/cellarr_array/dataloaders/iterabledataloader.py +320 -0
- cellarr_array-0.3.0/src/cellarr_array/dataloaders/sparseloader.py +230 -0
- cellarr_array-0.3.0/src/cellarr_array/dataloaders/utils.py +26 -0
- cellarr_array-0.3.0/src/cellarr_array/utils/__init__.py +3 -0
- cellarr_array-0.3.0/src/cellarr_array/utils/mock.py +167 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/PKG-INFO +6 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/SOURCES.txt +18 -6
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/requires.txt +5 -0
- cellarr_array-0.3.0/tests/conftest.py +233 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/tests/test_all.py +1 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/tests/test_dense.py +5 -5
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/tests/test_helpers.py +25 -13
- cellarr_array-0.3.0/tests/test_iterable_loader.py +288 -0
- cellarr_array-0.3.0/tests/test_map_loader.py +289 -0
- cellarr_array-0.3.0/tests/test_query.py +63 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/tests/test_sparse.py +1 -1
- cellarr_array-0.3.0/tests/test_string_dims.py +73 -0
- cellarr_array-0.1.0/tests/conftest.py +0 -91
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/.coveragerc +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/.readthedocs.yml +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/AUTHORS.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/CONTRIBUTING.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/LICENSE.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/README.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/Makefile +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/_static/.gitignore +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/authors.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/changelog.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/conf.py +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/contributing.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/index.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/license.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/readme.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/docs/requirements.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/pyproject.toml +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/setup.py +0 -0
- {cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.0/src/cellarr_array/utils}/config.py +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/not-zip-safe +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/src/cellarr_array.egg-info/top_level.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/tests/test_inmemory.py +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.0}/tox.ini +0 -0
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## Version 0.3.0
|
|
4
|
+
|
|
5
|
+
- Support for string dimensions when creating cellarr arrays.
|
|
6
|
+
- Support query conditions for slice operations.
|
|
7
|
+
- Added unique dim values. Only supported for sparse arrays.
|
|
8
|
+
- EOL for Python 3.9
|
|
9
|
+
|
|
10
|
+
## Version 0.2.0
|
|
11
|
+
|
|
12
|
+
- Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
|
|
13
|
+
- Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
|
|
14
|
+
- Update documentation and tests.
|
|
15
|
+
|
|
3
16
|
## Version 0.1.0
|
|
4
17
|
|
|
5
18
|
- Support cellarr-arrays on user provided tiledb array objects.
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
7
7
|
Author-email: jayaram.kancherla@gmail.com
|
|
8
8
|
License: MIT
|
|
9
9
|
Project-URL: Documentation, https://github.com/cellarr/cellarr-array
|
|
10
|
+
Project-URL: Source, https://github.com/cellarr/cellarr-array
|
|
10
11
|
Platform: any
|
|
11
12
|
Classifier: Development Status :: 4 - Beta
|
|
12
13
|
Classifier: Programming Language :: Python
|
|
@@ -16,10 +17,14 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
|
|
|
16
17
|
Requires-Dist: tiledb
|
|
17
18
|
Requires-Dist: numpy
|
|
18
19
|
Requires-Dist: scipy
|
|
20
|
+
Provides-Extra: optional
|
|
21
|
+
Requires-Dist: torch; extra == "optional"
|
|
19
22
|
Provides-Extra: testing
|
|
20
23
|
Requires-Dist: setuptools; extra == "testing"
|
|
21
24
|
Requires-Dist: pytest; extra == "testing"
|
|
22
25
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
26
|
+
Requires-Dist: pandas; extra == "testing"
|
|
27
|
+
Requires-Dist: torch; extra == "testing"
|
|
23
28
|
Dynamic: license-file
|
|
24
29
|
|
|
25
30
|
[](https://pypi.org/project/cellarr-array/)
|
|
@@ -10,6 +10,7 @@ long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
|
|
|
10
10
|
url = https://github.com/cellarr/cellarr-array
|
|
11
11
|
project_urls =
|
|
12
12
|
Documentation = https://github.com/cellarr/cellarr-array
|
|
13
|
+
Source = https://github.com/cellarr/cellarr-array
|
|
13
14
|
platforms = any
|
|
14
15
|
classifiers =
|
|
15
16
|
Development Status :: 4 - Beta
|
|
@@ -33,10 +34,14 @@ exclude =
|
|
|
33
34
|
tests
|
|
34
35
|
|
|
35
36
|
[options.extras_require]
|
|
37
|
+
optional =
|
|
38
|
+
torch
|
|
36
39
|
testing =
|
|
37
40
|
setuptools
|
|
38
41
|
pytest
|
|
39
42
|
pytest-cov
|
|
43
|
+
pandas
|
|
44
|
+
%(optional)s
|
|
40
45
|
|
|
41
46
|
[options.entry_points]
|
|
42
47
|
|
|
@@ -15,7 +15,6 @@ except PackageNotFoundError: # pragma: no cover
|
|
|
15
15
|
finally:
|
|
16
16
|
del version, PackageNotFoundError
|
|
17
17
|
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .helpers import create_cellarray, SliceHelper
|
|
18
|
+
from .core import DenseCellArray, SparseCellArray
|
|
19
|
+
from .core.helpers import create_cellarray
|
|
20
|
+
from .utils import CellArrConfig, ConsolidationConfig
|
|
@@ -12,7 +12,7 @@ import numpy as np
|
|
|
12
12
|
import tiledb
|
|
13
13
|
from scipy import sparse
|
|
14
14
|
|
|
15
|
-
from .config import ConsolidationConfig
|
|
15
|
+
from ..utils.config import ConsolidationConfig
|
|
16
16
|
from .helpers import SliceHelper
|
|
17
17
|
|
|
18
18
|
__author__ = "Jayaram Kancherla"
|
|
@@ -69,6 +69,7 @@ class CellArray(ABC):
|
|
|
69
69
|
self._array_passed_in = False
|
|
70
70
|
self._opened_array_external = None
|
|
71
71
|
self._ctx = None
|
|
72
|
+
self._dim_dtypes = None
|
|
72
73
|
|
|
73
74
|
if tiledb_array_obj is not None:
|
|
74
75
|
if not isinstance(tiledb_array_obj, tiledb.Array):
|
|
@@ -185,7 +186,16 @@ class CellArray(ABC):
|
|
|
185
186
|
def shape(self) -> Tuple[int, ...]:
|
|
186
187
|
if self._shape is None:
|
|
187
188
|
with self.open_array(mode="r") as A:
|
|
188
|
-
|
|
189
|
+
shape_list = []
|
|
190
|
+
for dim in A.schema.domain:
|
|
191
|
+
try:
|
|
192
|
+
# This will fail for string dimensions
|
|
193
|
+
shape_list.append(dim.shape[0])
|
|
194
|
+
except TypeError:
|
|
195
|
+
# For string dimensions, the shape is not well-defined.
|
|
196
|
+
# We use a large number as a placeholder for slicing purposes.
|
|
197
|
+
shape_list.append(2**63 - 1)
|
|
198
|
+
self._shape = tuple(shape_list)
|
|
189
199
|
return self._shape
|
|
190
200
|
|
|
191
201
|
@property
|
|
@@ -209,6 +219,14 @@ class CellArray(ABC):
|
|
|
209
219
|
# self._ndim = len(self.shape)
|
|
210
220
|
return self._ndim
|
|
211
221
|
|
|
222
|
+
@property
|
|
223
|
+
def dim_dtypes(self) -> List[np.dtype]:
|
|
224
|
+
"""Get dimension dtypes of the array."""
|
|
225
|
+
if self._dim_dtypes is None:
|
|
226
|
+
with self.open_array(mode="r") as A:
|
|
227
|
+
self._dim_dtypes = [dim.dtype for dim in A.schema.domain]
|
|
228
|
+
return self._dim_dtypes
|
|
229
|
+
|
|
212
230
|
@contextmanager
|
|
213
231
|
def open_array(self, mode: Optional[str] = None):
|
|
214
232
|
"""Context manager for array operations.
|
|
@@ -266,15 +284,30 @@ class CellArray(ABC):
|
|
|
266
284
|
Args:
|
|
267
285
|
key:
|
|
268
286
|
Slice or list of indices for each dimension in the array.
|
|
287
|
+
|
|
288
|
+
Alternatively, may be string to specify query conditions.
|
|
269
289
|
"""
|
|
290
|
+
# This is a query condition
|
|
291
|
+
if isinstance(key, str):
|
|
292
|
+
with self.open_array(mode="r") as array:
|
|
293
|
+
if self._attr is not None:
|
|
294
|
+
return array.query(cond=key, attrs=[self._attr])[:]
|
|
295
|
+
else:
|
|
296
|
+
array.query(cond=key)[:]
|
|
297
|
+
|
|
270
298
|
if not isinstance(key, tuple):
|
|
271
299
|
key = (key,)
|
|
272
300
|
|
|
273
301
|
if len(key) > self.ndim:
|
|
274
302
|
raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
|
|
275
303
|
|
|
304
|
+
if len(key) < self.ndim:
|
|
305
|
+
key = key + (slice(None),) * (self.ndim - len(key))
|
|
306
|
+
|
|
276
307
|
# Normalize all indices
|
|
277
|
-
normalized_key = tuple(
|
|
308
|
+
normalized_key = tuple(
|
|
309
|
+
SliceHelper.normalize_index(idx, self.shape[i], self.dim_dtypes[i]) for i, idx in enumerate(key)
|
|
310
|
+
)
|
|
278
311
|
|
|
279
312
|
num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
|
|
280
313
|
if num_ellipsis > 1:
|
|
@@ -342,3 +375,17 @@ class CellArray(ABC):
|
|
|
342
375
|
Additional arguments for write operation.
|
|
343
376
|
"""
|
|
344
377
|
pass
|
|
378
|
+
|
|
379
|
+
def get_unique_dim_values(self, dim_name: Optional[str] = None) -> np.ndarray:
|
|
380
|
+
"""Get unique values for a dimension.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
dim_name:
|
|
384
|
+
The name of the dimension. If None, unique values for all
|
|
385
|
+
dimensions are returned.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
An array of unique dimension values.
|
|
389
|
+
"""
|
|
390
|
+
with self.open_array(mode="r") as A:
|
|
391
|
+
return A.unique_dim_values(dim_name)
|
|
@@ -7,7 +7,7 @@ from typing import List, Tuple, Union
|
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
|
-
from .
|
|
10
|
+
from .base import CellArray
|
|
11
11
|
from .helpers import SliceHelper
|
|
12
12
|
|
|
13
13
|
__author__ = "Jayaram Kancherla"
|
|
@@ -92,7 +92,6 @@ class DenseCellArray(CellArray):
|
|
|
92
92
|
if len(data.shape) != self.ndim:
|
|
93
93
|
raise ValueError(f"Data dimensions {data.shape} don't match array dimensions {self.shape}.")
|
|
94
94
|
|
|
95
|
-
# Check bounds
|
|
96
95
|
end_row = start_row + data.shape[0]
|
|
97
96
|
if end_row > self.shape[0]:
|
|
98
97
|
raise ValueError(
|
|
@@ -102,7 +101,6 @@ class DenseCellArray(CellArray):
|
|
|
102
101
|
if self.ndim == 2 and data.shape[1] != self.shape[1]:
|
|
103
102
|
raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
|
|
104
103
|
|
|
105
|
-
# Construct write region
|
|
106
104
|
if self.ndim == 1:
|
|
107
105
|
write_region = slice(start_row, end_row)
|
|
108
106
|
else: # 2D
|
|
@@ -110,4 +108,5 @@ class DenseCellArray(CellArray):
|
|
|
110
108
|
|
|
111
109
|
# write_data = {self._attr: data} if len(self.attr_names) > 1 else data
|
|
112
110
|
with self.open_array(mode="w") as array:
|
|
111
|
+
print("write_region", write_region)
|
|
113
112
|
array[write_region] = data
|
{cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.0/src/cellarr_array/core}/helpers.py
RENAMED
|
@@ -8,7 +8,7 @@ from typing import List, Optional, Tuple, Union
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import tiledb
|
|
10
10
|
|
|
11
|
-
from .config import CellArrConfig
|
|
11
|
+
from ..utils.config import CellArrConfig
|
|
12
12
|
|
|
13
13
|
__author__ = "Jayaram Kancherla"
|
|
14
14
|
__copyright__ = "Jayaram Kancherla"
|
|
@@ -52,7 +52,7 @@ def create_cellarray(
|
|
|
52
52
|
Optional list of dimension names.
|
|
53
53
|
|
|
54
54
|
dim_dtypes:
|
|
55
|
-
Optional list of dimension dtypes.
|
|
55
|
+
Optional list of dimension dtypes. Defaults to numpy's uint32.
|
|
56
56
|
|
|
57
57
|
attr_name:
|
|
58
58
|
Name of the data attribute.
|
|
@@ -67,29 +67,28 @@ def create_cellarray(
|
|
|
67
67
|
ValueError: If dimensions are invalid or inputs are inconsistent.
|
|
68
68
|
"""
|
|
69
69
|
config = config or CellArrConfig()
|
|
70
|
+
tiledb_ctx = tiledb.Config(config.ctx_config) if config.ctx_config else None
|
|
70
71
|
|
|
71
72
|
if attr_dtype is None:
|
|
72
73
|
attr_dtype = np.float32
|
|
73
74
|
if isinstance(attr_dtype, str):
|
|
74
75
|
attr_dtype = np.dtype(attr_dtype)
|
|
75
76
|
|
|
76
|
-
# Require either shape or dim_dtypes
|
|
77
77
|
if shape is None and dim_dtypes is None:
|
|
78
78
|
raise ValueError("Either 'shape' or 'dim_dtypes' must be provided.")
|
|
79
79
|
|
|
80
80
|
if shape is not None:
|
|
81
81
|
if len(shape) not in (1, 2):
|
|
82
|
-
raise ValueError("
|
|
82
|
+
raise ValueError("Shape must have 1 or 2 dimensions.")
|
|
83
83
|
|
|
84
84
|
# Set dimension dtypes, defaults to numpy uint32
|
|
85
85
|
if dim_dtypes is None:
|
|
86
86
|
dim_dtypes = [np.uint32] * len(shape)
|
|
87
87
|
else:
|
|
88
88
|
if len(dim_dtypes) not in (1, 2):
|
|
89
|
-
raise ValueError("
|
|
89
|
+
raise ValueError("Array must have 1 or 2 dimensions.")
|
|
90
90
|
dim_dtypes = [np.dtype(dt) if isinstance(dt, str) else dt for dt in dim_dtypes]
|
|
91
91
|
|
|
92
|
-
# Calculate shape from dtypes if needed
|
|
93
92
|
if shape is None:
|
|
94
93
|
shape = tuple(np.iinfo(dt).max if np.issubdtype(dt, np.integer) else None for dt in dim_dtypes)
|
|
95
94
|
if None in shape:
|
|
@@ -97,7 +96,6 @@ def create_cellarray(
|
|
|
97
96
|
np.iinfo(dt).max if s is None and np.issubdtype(dt, np.integer) else s for s, dt in zip(shape, dim_dtypes)
|
|
98
97
|
)
|
|
99
98
|
|
|
100
|
-
# Set dimension names
|
|
101
99
|
if dim_names is None:
|
|
102
100
|
dim_names = [f"dim_{i}" for i in range(len(shape))]
|
|
103
101
|
|
|
@@ -105,42 +103,53 @@ def create_cellarray(
|
|
|
105
103
|
if not (len(shape) == len(dim_dtypes) == len(dim_names)):
|
|
106
104
|
raise ValueError("Lengths of 'shape', 'dim_dtypes', and 'dim_names' must match.")
|
|
107
105
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
106
|
+
dims = []
|
|
107
|
+
for name, s, dt in zip(dim_names, shape, dim_dtypes):
|
|
108
|
+
if np.issubdtype(dt, np.integer):
|
|
109
|
+
domain = (0, 0 if s == 0 else s - 1)
|
|
110
|
+
tile = min(1 if s == 0 else s // 2, config.tile_capacity // 2)
|
|
111
|
+
dim_dtype = dt
|
|
112
|
+
else: # Assumes string or object dtype
|
|
113
|
+
domain = (None, None)
|
|
114
|
+
tile = None
|
|
115
|
+
dim_dtype = "ascii"
|
|
116
|
+
|
|
117
|
+
dims.append(
|
|
118
|
+
tiledb.Dim(
|
|
119
|
+
name=name,
|
|
120
|
+
domain=domain,
|
|
121
|
+
tile=tile,
|
|
122
|
+
dtype=dim_dtype,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
115
125
|
|
|
116
|
-
|
|
126
|
+
dom = tiledb.Domain(*dims, ctx=tiledb_ctx)
|
|
127
|
+
attr_obj = tiledb.Attr(
|
|
117
128
|
name=attr_name,
|
|
118
129
|
dtype=attr_dtype,
|
|
119
130
|
filters=config.attrs_filters.get(attr_name, config.attrs_filters.get("", None)),
|
|
131
|
+
ctx=tiledb_ctx,
|
|
120
132
|
)
|
|
121
|
-
|
|
122
133
|
schema = tiledb.ArraySchema(
|
|
123
134
|
domain=dom,
|
|
124
|
-
attrs=[
|
|
135
|
+
attrs=[attr_obj],
|
|
125
136
|
cell_order=config.cell_order,
|
|
126
137
|
tile_order=config.tile_order,
|
|
127
138
|
sparse=sparse,
|
|
128
139
|
coords_filters=config.coords_filters,
|
|
129
140
|
offsets_filters=config.offsets_filters,
|
|
130
|
-
ctx=
|
|
141
|
+
ctx=tiledb_ctx,
|
|
131
142
|
)
|
|
132
|
-
|
|
133
|
-
tiledb.Array.create(uri, schema)
|
|
143
|
+
tiledb.Array.create(uri, schema, ctx=tiledb_ctx)
|
|
134
144
|
|
|
135
145
|
# Import here to avoid circular imports
|
|
136
|
-
from .
|
|
137
|
-
from .
|
|
146
|
+
from .dense import DenseCellArray
|
|
147
|
+
from .sparse import SparseCellArray
|
|
138
148
|
|
|
139
|
-
# Return appropriate array type
|
|
140
149
|
return (
|
|
141
|
-
SparseCellArray(uri=uri, attr=attr_name, mode=mode)
|
|
150
|
+
SparseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
|
|
142
151
|
if sparse
|
|
143
|
-
else DenseCellArray(uri=uri, attr=attr_name, mode=mode)
|
|
152
|
+
else DenseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
|
|
144
153
|
)
|
|
145
154
|
|
|
146
155
|
|
|
@@ -148,59 +157,100 @@ class SliceHelper:
|
|
|
148
157
|
"""Helper class for handling array slicing operations."""
|
|
149
158
|
|
|
150
159
|
@staticmethod
|
|
151
|
-
def is_contiguous_indices(indices: List
|
|
152
|
-
"""
|
|
160
|
+
def is_contiguous_indices(indices: List) -> Optional[slice]:
|
|
161
|
+
"""Checks if a list of indices is contiguous and can be converted to a slice.
|
|
162
|
+
|
|
163
|
+
Returns None if the list is not contiguous or contains non-integers.
|
|
164
|
+
"""
|
|
153
165
|
if not indices:
|
|
154
166
|
return None
|
|
155
167
|
|
|
156
|
-
|
|
168
|
+
if not all(isinstance(i, (int, np.integer)) for i in indices):
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
sorted_indices = sorted(list(set(indices)))
|
|
172
|
+
if not sorted_indices:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
if len(sorted_indices) == 1:
|
|
176
|
+
return slice(sorted_indices[0], sorted_indices[0] + 1, None)
|
|
177
|
+
|
|
178
|
+
diffs = np.diff(sorted_indices)
|
|
157
179
|
if np.all(diffs == 1):
|
|
158
|
-
return slice(
|
|
180
|
+
return slice(sorted_indices[0], sorted_indices[-1] + 1, None)
|
|
181
|
+
|
|
159
182
|
return None
|
|
160
183
|
|
|
161
184
|
@staticmethod
|
|
162
|
-
def normalize_index(
|
|
185
|
+
def normalize_index(
|
|
186
|
+
idx: Union[int, range, slice, List, str, EllipsisType],
|
|
187
|
+
dim_size: int,
|
|
188
|
+
dim_dtype: np.dtype,
|
|
189
|
+
):
|
|
163
190
|
"""Normalize index to handle negative indices and ensure consistency."""
|
|
191
|
+
is_string_dim = np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)
|
|
192
|
+
|
|
193
|
+
if is_string_dim:
|
|
194
|
+
if isinstance(idx, (str, bytes)):
|
|
195
|
+
return [idx]
|
|
196
|
+
if isinstance(idx, list) and all(isinstance(i, (str, bytes)) for i in idx):
|
|
197
|
+
return idx
|
|
198
|
+
if isinstance(idx, slice):
|
|
199
|
+
# For string dimensions, we do not normalize the slice with integer sizes
|
|
200
|
+
return idx
|
|
201
|
+
if isinstance(idx, EllipsisType):
|
|
202
|
+
return idx
|
|
203
|
+
raise TypeError(f"Unsupported index type '{type(idx).__name__}' for string dimension.")
|
|
164
204
|
|
|
165
205
|
if isinstance(idx, EllipsisType):
|
|
166
206
|
return idx
|
|
167
207
|
|
|
168
|
-
# Convert ranges to slices
|
|
169
208
|
if isinstance(idx, range):
|
|
170
209
|
idx = slice(idx.start, idx.stop, idx.step)
|
|
171
210
|
|
|
172
211
|
if isinstance(idx, slice):
|
|
173
|
-
start = idx.start
|
|
174
|
-
|
|
175
|
-
|
|
212
|
+
start, stop, step = idx.start, idx.stop, idx.step
|
|
213
|
+
|
|
214
|
+
# Resolve None to full dimension slice parts
|
|
215
|
+
if start is None:
|
|
216
|
+
start = 0
|
|
217
|
+
|
|
218
|
+
if stop is None:
|
|
219
|
+
stop = dim_size
|
|
176
220
|
|
|
177
221
|
# Handle negative indices
|
|
178
222
|
if start < 0:
|
|
179
|
-
start
|
|
180
|
-
|
|
223
|
+
start += dim_size
|
|
181
224
|
if stop < 0:
|
|
182
|
-
stop
|
|
183
|
-
|
|
184
|
-
if start < 0 or start > dim_size:
|
|
185
|
-
raise IndexError(f"Start index {start} out of bounds for dimension size {dim_size}")
|
|
186
|
-
if stop < 0 or stop > dim_size:
|
|
187
|
-
raise IndexError(f"Stop index {stop} out of bounds for dimension size {dim_size}")
|
|
225
|
+
stop += dim_size
|
|
188
226
|
|
|
227
|
+
# Clamping slice arguments to dimensions
|
|
228
|
+
stop = min(stop, dim_size)
|
|
229
|
+
start = max(0, start)
|
|
189
230
|
return slice(start, stop, step)
|
|
190
231
|
|
|
191
|
-
|
|
232
|
+
if isinstance(idx, list):
|
|
233
|
+
if not idx:
|
|
234
|
+
return []
|
|
235
|
+
# This check only applies to integer lists
|
|
236
|
+
if not all(isinstance(i, (int, np.integer)) for i in idx):
|
|
237
|
+
raise TypeError("List indices must be integers for numeric dimensions.")
|
|
238
|
+
|
|
192
239
|
norm_idx = [i if i >= 0 else dim_size + i for i in idx]
|
|
193
240
|
if any(i < 0 or i >= dim_size for i in norm_idx):
|
|
194
|
-
raise IndexError(
|
|
195
|
-
return norm_idx
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
norm_idx = idx
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
241
|
+
raise IndexError("List indices out of bounds for dimension size.")
|
|
242
|
+
return sorted(list(set(norm_idx)))
|
|
243
|
+
|
|
244
|
+
if isinstance(idx, (int, np.integer)):
|
|
245
|
+
norm_idx = int(idx)
|
|
246
|
+
if norm_idx < 0:
|
|
247
|
+
norm_idx += dim_size
|
|
248
|
+
if not (0 <= norm_idx < dim_size):
|
|
249
|
+
raise IndexError(f"Index {idx} out of bounds for dimension size.")
|
|
202
250
|
return slice(norm_idx, norm_idx + 1, None)
|
|
203
251
|
|
|
252
|
+
raise TypeError(f"Index type {type(idx)} not supported for normalization.")
|
|
253
|
+
|
|
204
254
|
|
|
205
255
|
def create_group(output_path, group_name):
|
|
206
256
|
tiledb.group_create(f"{output_path}/{group_name}")
|
|
@@ -9,8 +9,8 @@ import numpy as np
|
|
|
9
9
|
import tiledb
|
|
10
10
|
from scipy import sparse
|
|
11
11
|
|
|
12
|
-
from .cellarray_base import CellArray
|
|
13
12
|
from .helpers import SliceHelper
|
|
13
|
+
from .base import CellArray
|
|
14
14
|
|
|
15
15
|
__author__ = "Jayaram Kancherla"
|
|
16
16
|
__copyright__ = "Jayaram Kancherla"
|
|
@@ -28,7 +28,7 @@ class SparseCellArray(CellArray):
|
|
|
28
28
|
mode: Optional[Literal["r", "w", "d", "m"]] = None,
|
|
29
29
|
config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
|
|
30
30
|
return_sparse: bool = True,
|
|
31
|
-
|
|
31
|
+
sparse_format: Union[sparse.csr_matrix, sparse.csc_matrix] = sparse.csr_matrix,
|
|
32
32
|
validate: bool = True,
|
|
33
33
|
**kwargs,
|
|
34
34
|
):
|
|
@@ -66,7 +66,7 @@ class SparseCellArray(CellArray):
|
|
|
66
66
|
Whether to return a sparse representation of the data when object is sliced.
|
|
67
67
|
Default is to return a dictionary that contains coordinates and values.
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
sparse_format:
|
|
70
70
|
Format to return, defaults to csr_matrix.
|
|
71
71
|
|
|
72
72
|
validate:
|
|
@@ -86,7 +86,7 @@ class SparseCellArray(CellArray):
|
|
|
86
86
|
)
|
|
87
87
|
|
|
88
88
|
self.return_sparse = return_sparse
|
|
89
|
-
self.
|
|
89
|
+
self.sparse_format = sparse.csr_matrix if sparse_format is None else sparse_format
|
|
90
90
|
|
|
91
91
|
def _validate_matrix_dims(self, data: sparse.spmatrix) -> Tuple[sparse.coo_matrix, bool]:
|
|
92
92
|
"""Validate and adjust matrix dimensions if needed.
|
|
@@ -126,7 +126,7 @@ class SparseCellArray(CellArray):
|
|
|
126
126
|
shape.append(idx.stop - (idx.start or 0))
|
|
127
127
|
elif isinstance(idx, list):
|
|
128
128
|
shape.append(len(set(idx)))
|
|
129
|
-
else:
|
|
129
|
+
else:
|
|
130
130
|
shape.append(1)
|
|
131
131
|
|
|
132
132
|
# Always return (n,1) shape for CSR matrix
|
|
@@ -140,20 +140,16 @@ class SparseCellArray(CellArray):
|
|
|
140
140
|
"""Convert TileDB result to CSR format or dense array."""
|
|
141
141
|
data = result[self._attr]
|
|
142
142
|
|
|
143
|
-
# empty result
|
|
144
143
|
if len(data) == 0:
|
|
145
|
-
print("is emoty")
|
|
146
144
|
if not self.return_sparse:
|
|
147
145
|
return result
|
|
148
146
|
else:
|
|
149
|
-
# For COO output, return empty sparse matrix
|
|
150
147
|
if self.ndim == 1:
|
|
151
|
-
matrix = self.
|
|
148
|
+
matrix = self.sparse_format((1, shape[0]))
|
|
152
149
|
return matrix[:, key[0]]
|
|
153
150
|
|
|
154
|
-
return self.
|
|
151
|
+
return self.sparse_format(shape)[key]
|
|
155
152
|
|
|
156
|
-
# Get coordinates
|
|
157
153
|
coords = []
|
|
158
154
|
for dim_name in self.dim_names:
|
|
159
155
|
dim_coords = result[dim_name]
|
|
@@ -164,11 +160,12 @@ class SparseCellArray(CellArray):
|
|
|
164
160
|
coords = [np.zeros_like(coords[0]), coords[0]]
|
|
165
161
|
shape = (1, shape[0])
|
|
166
162
|
|
|
167
|
-
# Create sparse matrix
|
|
168
163
|
matrix = sparse.coo_matrix((data, tuple(coords)), shape=shape)
|
|
169
|
-
|
|
164
|
+
|
|
165
|
+
sliced = matrix
|
|
166
|
+
if self.sparse_format in (sparse.csr_matrix, sparse.csr_array):
|
|
170
167
|
sliced = matrix.tocsr()
|
|
171
|
-
elif self.
|
|
168
|
+
elif self.sparse_format in (sparse.csc_matrix, sparse.csc_array):
|
|
172
169
|
sliced = matrix.tocsc()
|
|
173
170
|
|
|
174
171
|
if self.ndim == 1:
|
|
@@ -200,7 +197,6 @@ class SparseCellArray(CellArray):
|
|
|
200
197
|
if all(isinstance(idx, slice) for idx in optimized_key):
|
|
201
198
|
return self._direct_slice(tuple(optimized_key))
|
|
202
199
|
|
|
203
|
-
# For mixed slice-list queries, adjust slice bounds
|
|
204
200
|
tiledb_key = []
|
|
205
201
|
for idx in key:
|
|
206
202
|
if isinstance(idx, slice):
|
|
@@ -239,10 +235,8 @@ class SparseCellArray(CellArray):
|
|
|
239
235
|
if not sparse.issparse(data):
|
|
240
236
|
raise TypeError("Input must be a scipy sparse matrix.")
|
|
241
237
|
|
|
242
|
-
# Validate and adjust dimensions
|
|
243
238
|
coo_data, is_1d = self._validate_matrix_dims(data)
|
|
244
239
|
|
|
245
|
-
# Check bounds
|
|
246
240
|
end_row = start_row + coo_data.shape[0]
|
|
247
241
|
if end_row > self.shape[0]:
|
|
248
242
|
raise ValueError(
|