cellarr-array 0.1.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.github/workflows/publish-pypi.yml +5 -5
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.github/workflows/run-tests.yml +1 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.gitignore +2 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.pre-commit-config.yaml +2 -2
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/CHANGELOG.md +14 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/PKG-INFO +6 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/setup.cfg +5 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array/__init__.py +3 -4
- cellarr_array-0.3.1/src/cellarr_array/core/__init__.py +3 -0
- cellarr_array-0.1.0/src/cellarr_array/cellarray_base.py → cellarr_array-0.3.1/src/cellarr_array/core/base.py +66 -15
- cellarr_array-0.1.0/src/cellarr_array/cellarray_dense.py → cellarr_array-0.3.1/src/cellarr_array/core/dense.py +2 -3
- {cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.1/src/cellarr_array/core}/helpers.py +103 -52
- cellarr_array-0.1.0/src/cellarr_array/cellarray_sparse.py → cellarr_array-0.3.1/src/cellarr_array/core/sparse.py +74 -52
- cellarr_array-0.3.1/src/cellarr_array/dataloaders/__init__.py +3 -0
- cellarr_array-0.3.1/src/cellarr_array/dataloaders/denseloader.py +198 -0
- cellarr_array-0.3.1/src/cellarr_array/dataloaders/iterabledataloader.py +320 -0
- cellarr_array-0.3.1/src/cellarr_array/dataloaders/sparseloader.py +230 -0
- cellarr_array-0.3.1/src/cellarr_array/dataloaders/utils.py +26 -0
- cellarr_array-0.3.1/src/cellarr_array/utils/__init__.py +3 -0
- cellarr_array-0.3.1/src/cellarr_array/utils/mock.py +167 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/PKG-INFO +6 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/SOURCES.txt +18 -6
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/requires.txt +5 -0
- cellarr_array-0.3.1/tests/conftest.py +233 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_all.py +1 -1
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_dense.py +5 -7
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_helpers.py +25 -13
- cellarr_array-0.3.1/tests/test_iterable_loader.py +288 -0
- cellarr_array-0.3.1/tests/test_map_loader.py +289 -0
- cellarr_array-0.3.1/tests/test_query.py +63 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_sparse.py +4 -3
- cellarr_array-0.3.1/tests/test_string_dims.py +73 -0
- cellarr_array-0.1.0/tests/conftest.py +0 -91
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.coveragerc +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.readthedocs.yml +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/AUTHORS.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/CONTRIBUTING.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/LICENSE.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/README.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/Makefile +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/_static/.gitignore +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/authors.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/changelog.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/conf.py +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/contributing.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/index.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/license.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/readme.md +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/requirements.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/pyproject.toml +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/setup.py +0 -0
- {cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.1/src/cellarr_array/utils}/config.py +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/not-zip-safe +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/top_level.txt +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_inmemory.py +0 -0
- {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tox.ini +0 -0
|
@@ -19,19 +19,19 @@ jobs:
|
|
|
19
19
|
steps:
|
|
20
20
|
- uses: actions/checkout@v4
|
|
21
21
|
|
|
22
|
-
- name: Set up Python 3.
|
|
22
|
+
- name: Set up Python 3.12
|
|
23
23
|
uses: actions/setup-python@v5
|
|
24
24
|
with:
|
|
25
|
-
python-version: 3.
|
|
25
|
+
python-version: 3.12
|
|
26
26
|
|
|
27
27
|
- name: Install dependencies
|
|
28
28
|
run: |
|
|
29
29
|
python -m pip install --upgrade pip
|
|
30
30
|
pip install tox
|
|
31
31
|
|
|
32
|
-
- name: Test with tox
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
# - name: Test with tox
|
|
33
|
+
# run: |
|
|
34
|
+
# tox
|
|
35
35
|
|
|
36
36
|
- name: Build docs
|
|
37
37
|
run: |
|
|
@@ -2,7 +2,7 @@ exclude: '^docs/conf.py'
|
|
|
2
2
|
|
|
3
3
|
repos:
|
|
4
4
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
5
|
-
rev:
|
|
5
|
+
rev: v6.0.0
|
|
6
6
|
hooks:
|
|
7
7
|
- id: trailing-whitespace
|
|
8
8
|
- id: check-added-large-files
|
|
@@ -19,7 +19,7 @@ repos:
|
|
|
19
19
|
|
|
20
20
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
21
21
|
# Ruff version.
|
|
22
|
-
rev: v0.
|
|
22
|
+
rev: v0.14.3
|
|
23
23
|
hooks:
|
|
24
24
|
- id: ruff
|
|
25
25
|
args: [--fix, --exit-non-zero-on-fix]
|
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## Version 0.3.0 - 0.3.1
|
|
4
|
+
|
|
5
|
+
- Support for string dimensions when creating cellarr arrays.
|
|
6
|
+
- Support query conditions for slice operations.
|
|
7
|
+
- Added unique dim values. Only supported for sparse arrays.
|
|
8
|
+
- Fix a minor bug causing memory leaks on large sparse arrays.
|
|
9
|
+
- EOL for Python 3.9
|
|
10
|
+
|
|
11
|
+
## Version 0.2.0
|
|
12
|
+
|
|
13
|
+
- Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
|
|
14
|
+
- Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
|
|
15
|
+
- Update documentation and tests.
|
|
16
|
+
|
|
3
17
|
## Version 0.1.0
|
|
4
18
|
|
|
5
19
|
- Support cellarr-arrays on user provided tiledb array objects.
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.1
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
7
7
|
Author-email: jayaram.kancherla@gmail.com
|
|
8
8
|
License: MIT
|
|
9
9
|
Project-URL: Documentation, https://github.com/cellarr/cellarr-array
|
|
10
|
+
Project-URL: Source, https://github.com/cellarr/cellarr-array
|
|
10
11
|
Platform: any
|
|
11
12
|
Classifier: Development Status :: 4 - Beta
|
|
12
13
|
Classifier: Programming Language :: Python
|
|
@@ -16,10 +17,14 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
|
|
|
16
17
|
Requires-Dist: tiledb
|
|
17
18
|
Requires-Dist: numpy
|
|
18
19
|
Requires-Dist: scipy
|
|
20
|
+
Provides-Extra: optional
|
|
21
|
+
Requires-Dist: torch; extra == "optional"
|
|
19
22
|
Provides-Extra: testing
|
|
20
23
|
Requires-Dist: setuptools; extra == "testing"
|
|
21
24
|
Requires-Dist: pytest; extra == "testing"
|
|
22
25
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
26
|
+
Requires-Dist: pandas; extra == "testing"
|
|
27
|
+
Requires-Dist: torch; extra == "testing"
|
|
23
28
|
Dynamic: license-file
|
|
24
29
|
|
|
25
30
|
[](https://pypi.org/project/cellarr-array/)
|
|
@@ -10,6 +10,7 @@ long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
|
|
|
10
10
|
url = https://github.com/cellarr/cellarr-array
|
|
11
11
|
project_urls =
|
|
12
12
|
Documentation = https://github.com/cellarr/cellarr-array
|
|
13
|
+
Source = https://github.com/cellarr/cellarr-array
|
|
13
14
|
platforms = any
|
|
14
15
|
classifiers =
|
|
15
16
|
Development Status :: 4 - Beta
|
|
@@ -33,10 +34,14 @@ exclude =
|
|
|
33
34
|
tests
|
|
34
35
|
|
|
35
36
|
[options.extras_require]
|
|
37
|
+
optional =
|
|
38
|
+
torch
|
|
36
39
|
testing =
|
|
37
40
|
setuptools
|
|
38
41
|
pytest
|
|
39
42
|
pytest-cov
|
|
43
|
+
pandas
|
|
44
|
+
%(optional)s
|
|
40
45
|
|
|
41
46
|
[options.entry_points]
|
|
42
47
|
|
|
@@ -15,7 +15,6 @@ except PackageNotFoundError: # pragma: no cover
|
|
|
15
15
|
finally:
|
|
16
16
|
del version, PackageNotFoundError
|
|
17
17
|
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .helpers import create_cellarray, SliceHelper
|
|
18
|
+
from .core import DenseCellArray, SparseCellArray
|
|
19
|
+
from .core.helpers import create_cellarray
|
|
20
|
+
from .utils import CellArrConfig, ConsolidationConfig
|
|
@@ -12,7 +12,7 @@ import numpy as np
|
|
|
12
12
|
import tiledb
|
|
13
13
|
from scipy import sparse
|
|
14
14
|
|
|
15
|
-
from .config import ConsolidationConfig
|
|
15
|
+
from ..utils.config import ConsolidationConfig
|
|
16
16
|
from .helpers import SliceHelper
|
|
17
17
|
|
|
18
18
|
__author__ = "Jayaram Kancherla"
|
|
@@ -119,6 +119,7 @@ class CellArray(ABC):
|
|
|
119
119
|
self._shape = None
|
|
120
120
|
self._ndim = None
|
|
121
121
|
self._dim_names = None
|
|
122
|
+
self._dim_dtypes = None
|
|
122
123
|
self._attr_names = None
|
|
123
124
|
self._nonempty_domain = None
|
|
124
125
|
|
|
@@ -185,7 +186,16 @@ class CellArray(ABC):
|
|
|
185
186
|
def shape(self) -> Tuple[int, ...]:
|
|
186
187
|
if self._shape is None:
|
|
187
188
|
with self.open_array(mode="r") as A:
|
|
188
|
-
|
|
189
|
+
shape_list = []
|
|
190
|
+
for dim in A.schema.domain:
|
|
191
|
+
try:
|
|
192
|
+
# This will fail for string dimensions
|
|
193
|
+
shape_list.append(dim.shape[0])
|
|
194
|
+
except TypeError:
|
|
195
|
+
# For string dimensions, the shape is not well-defined.
|
|
196
|
+
# We use a large number as a placeholder for slicing purposes.
|
|
197
|
+
shape_list.append(2**63 - 1)
|
|
198
|
+
self._shape = tuple(shape_list)
|
|
189
199
|
return self._shape
|
|
190
200
|
|
|
191
201
|
@property
|
|
@@ -209,6 +219,14 @@ class CellArray(ABC):
|
|
|
209
219
|
# self._ndim = len(self.shape)
|
|
210
220
|
return self._ndim
|
|
211
221
|
|
|
222
|
+
@property
|
|
223
|
+
def dim_dtypes(self) -> List[np.dtype]:
|
|
224
|
+
"""Get dimension dtypes of the array."""
|
|
225
|
+
if self._dim_dtypes is None:
|
|
226
|
+
with self.open_array(mode="r") as A:
|
|
227
|
+
self._dim_dtypes = [dim.dtype for dim in A.schema.domain]
|
|
228
|
+
return self._dim_dtypes
|
|
229
|
+
|
|
212
230
|
@contextmanager
|
|
213
231
|
def open_array(self, mode: Optional[str] = None):
|
|
214
232
|
"""Context manager for array operations.
|
|
@@ -235,8 +253,8 @@ class CellArray(ABC):
|
|
|
235
253
|
) from e
|
|
236
254
|
|
|
237
255
|
effective_mode = mode if mode is not None else self._opened_array_external.mode
|
|
238
|
-
|
|
239
256
|
current_external_mode = self._opened_array_external.mode
|
|
257
|
+
|
|
240
258
|
if effective_mode == "r" and current_external_mode not in ["r", "w", "m"]:
|
|
241
259
|
# Read ops ok on write/modify modes
|
|
242
260
|
pass
|
|
@@ -259,35 +277,54 @@ class CellArray(ABC):
|
|
|
259
277
|
finally:
|
|
260
278
|
array.close()
|
|
261
279
|
|
|
262
|
-
def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
|
|
263
|
-
"""Get item implementation that routes to either direct slicing
|
|
264
|
-
based on the type of indices provided.
|
|
280
|
+
def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType, str]):
|
|
281
|
+
"""Get item implementation that routes to either direct slicing, multi_index,
|
|
282
|
+
or query based on the type of indices provided.
|
|
265
283
|
|
|
266
284
|
Args:
|
|
267
285
|
key:
|
|
268
286
|
Slice or list of indices for each dimension in the array.
|
|
287
|
+
|
|
288
|
+
Alternatively, may be string to specify query conditions.
|
|
269
289
|
"""
|
|
290
|
+
# This is a query condition
|
|
291
|
+
if isinstance(key, str):
|
|
292
|
+
with self.open_array(mode="r") as array:
|
|
293
|
+
if self._attr is not None:
|
|
294
|
+
return array.query(cond=key, attrs=[self._attr])[:]
|
|
295
|
+
else:
|
|
296
|
+
array.query(cond=key)[:]
|
|
297
|
+
|
|
270
298
|
if not isinstance(key, tuple):
|
|
271
299
|
key = (key,)
|
|
272
300
|
|
|
273
|
-
|
|
301
|
+
num_ellipsis = sum(isinstance(i, EllipsisType) for i in key)
|
|
302
|
+
if num_ellipsis > 1:
|
|
303
|
+
raise IndexError("an index can only have a single ellipsis ('...')")
|
|
304
|
+
|
|
305
|
+
if num_ellipsis == 1:
|
|
306
|
+
ellipsis_idx = key.index(Ellipsis)
|
|
307
|
+
num_other_indices = len(key) - 1
|
|
308
|
+
num_slices_to_add = self.ndim - num_other_indices
|
|
309
|
+
|
|
310
|
+
key = key[:ellipsis_idx] + (slice(None),) * num_slices_to_add + key[ellipsis_idx + 1 :]
|
|
311
|
+
|
|
312
|
+
if len(key) < self.ndim:
|
|
313
|
+
key = key + (slice(None),) * (self.ndim - len(key))
|
|
314
|
+
elif len(key) > self.ndim:
|
|
274
315
|
raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
|
|
275
316
|
|
|
276
317
|
# Normalize all indices
|
|
277
|
-
normalized_key = tuple(
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
if num_ellipsis > 1:
|
|
281
|
-
raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
|
|
318
|
+
normalized_key = tuple(
|
|
319
|
+
SliceHelper.normalize_index(idx, self.shape[i], self.dim_dtypes[i]) for i, idx in enumerate(key)
|
|
320
|
+
)
|
|
282
321
|
|
|
283
322
|
# Check if we can use direct slicing
|
|
284
|
-
use_direct = all(isinstance(idx,
|
|
323
|
+
use_direct = all(isinstance(idx, slice) for idx in normalized_key)
|
|
285
324
|
|
|
286
325
|
if use_direct:
|
|
287
326
|
return self._direct_slice(normalized_key)
|
|
288
327
|
else:
|
|
289
|
-
if num_ellipsis > 0:
|
|
290
|
-
raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
|
|
291
328
|
return self._multi_index(normalized_key)
|
|
292
329
|
|
|
293
330
|
@abstractmethod
|
|
@@ -342,3 +379,17 @@ class CellArray(ABC):
|
|
|
342
379
|
Additional arguments for write operation.
|
|
343
380
|
"""
|
|
344
381
|
pass
|
|
382
|
+
|
|
383
|
+
def get_unique_dim_values(self, dim_name: Optional[str] = None) -> np.ndarray:
|
|
384
|
+
"""Get unique values for a dimension.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
dim_name:
|
|
388
|
+
The name of the dimension. If None, unique values for all
|
|
389
|
+
dimensions are returned.
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
An array of unique dimension values.
|
|
393
|
+
"""
|
|
394
|
+
with self.open_array(mode="r") as A:
|
|
395
|
+
return A.unique_dim_values(dim_name)
|
|
@@ -7,7 +7,7 @@ from typing import List, Tuple, Union
|
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
|
-
from .
|
|
10
|
+
from .base import CellArray
|
|
11
11
|
from .helpers import SliceHelper
|
|
12
12
|
|
|
13
13
|
__author__ = "Jayaram Kancherla"
|
|
@@ -92,7 +92,6 @@ class DenseCellArray(CellArray):
|
|
|
92
92
|
if len(data.shape) != self.ndim:
|
|
93
93
|
raise ValueError(f"Data dimensions {data.shape} don't match array dimensions {self.shape}.")
|
|
94
94
|
|
|
95
|
-
# Check bounds
|
|
96
95
|
end_row = start_row + data.shape[0]
|
|
97
96
|
if end_row > self.shape[0]:
|
|
98
97
|
raise ValueError(
|
|
@@ -102,7 +101,6 @@ class DenseCellArray(CellArray):
|
|
|
102
101
|
if self.ndim == 2 and data.shape[1] != self.shape[1]:
|
|
103
102
|
raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
|
|
104
103
|
|
|
105
|
-
# Construct write region
|
|
106
104
|
if self.ndim == 1:
|
|
107
105
|
write_region = slice(start_row, end_row)
|
|
108
106
|
else: # 2D
|
|
@@ -110,4 +108,5 @@ class DenseCellArray(CellArray):
|
|
|
110
108
|
|
|
111
109
|
# write_data = {self._attr: data} if len(self.attr_names) > 1 else data
|
|
112
110
|
with self.open_array(mode="w") as array:
|
|
111
|
+
print("write_region", write_region)
|
|
113
112
|
array[write_region] = data
|
{cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.1/src/cellarr_array/core}/helpers.py
RENAMED
|
@@ -8,7 +8,7 @@ from typing import List, Optional, Tuple, Union
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
import tiledb
|
|
10
10
|
|
|
11
|
-
from .config import CellArrConfig
|
|
11
|
+
from ..utils.config import CellArrConfig
|
|
12
12
|
|
|
13
13
|
__author__ = "Jayaram Kancherla"
|
|
14
14
|
__copyright__ = "Jayaram Kancherla"
|
|
@@ -52,7 +52,7 @@ def create_cellarray(
|
|
|
52
52
|
Optional list of dimension names.
|
|
53
53
|
|
|
54
54
|
dim_dtypes:
|
|
55
|
-
Optional list of dimension dtypes.
|
|
55
|
+
Optional list of dimension dtypes. Defaults to numpy's uint32.
|
|
56
56
|
|
|
57
57
|
attr_name:
|
|
58
58
|
Name of the data attribute.
|
|
@@ -67,29 +67,28 @@ def create_cellarray(
|
|
|
67
67
|
ValueError: If dimensions are invalid or inputs are inconsistent.
|
|
68
68
|
"""
|
|
69
69
|
config = config or CellArrConfig()
|
|
70
|
+
tiledb_ctx = tiledb.Config(config.ctx_config) if config.ctx_config else None
|
|
70
71
|
|
|
71
72
|
if attr_dtype is None:
|
|
72
73
|
attr_dtype = np.float32
|
|
73
74
|
if isinstance(attr_dtype, str):
|
|
74
75
|
attr_dtype = np.dtype(attr_dtype)
|
|
75
76
|
|
|
76
|
-
# Require either shape or dim_dtypes
|
|
77
77
|
if shape is None and dim_dtypes is None:
|
|
78
78
|
raise ValueError("Either 'shape' or 'dim_dtypes' must be provided.")
|
|
79
79
|
|
|
80
80
|
if shape is not None:
|
|
81
81
|
if len(shape) not in (1, 2):
|
|
82
|
-
raise ValueError("
|
|
82
|
+
raise ValueError("Shape must have 1 or 2 dimensions.")
|
|
83
83
|
|
|
84
84
|
# Set dimension dtypes, defaults to numpy uint32
|
|
85
85
|
if dim_dtypes is None:
|
|
86
86
|
dim_dtypes = [np.uint32] * len(shape)
|
|
87
87
|
else:
|
|
88
88
|
if len(dim_dtypes) not in (1, 2):
|
|
89
|
-
raise ValueError("
|
|
89
|
+
raise ValueError("Array must have 1 or 2 dimensions.")
|
|
90
90
|
dim_dtypes = [np.dtype(dt) if isinstance(dt, str) else dt for dt in dim_dtypes]
|
|
91
91
|
|
|
92
|
-
# Calculate shape from dtypes if needed
|
|
93
92
|
if shape is None:
|
|
94
93
|
shape = tuple(np.iinfo(dt).max if np.issubdtype(dt, np.integer) else None for dt in dim_dtypes)
|
|
95
94
|
if None in shape:
|
|
@@ -97,7 +96,6 @@ def create_cellarray(
|
|
|
97
96
|
np.iinfo(dt).max if s is None and np.issubdtype(dt, np.integer) else s for s, dt in zip(shape, dim_dtypes)
|
|
98
97
|
)
|
|
99
98
|
|
|
100
|
-
# Set dimension names
|
|
101
99
|
if dim_names is None:
|
|
102
100
|
dim_names = [f"dim_{i}" for i in range(len(shape))]
|
|
103
101
|
|
|
@@ -105,42 +103,52 @@ def create_cellarray(
|
|
|
105
103
|
if not (len(shape) == len(dim_dtypes) == len(dim_names)):
|
|
106
104
|
raise ValueError("Lengths of 'shape', 'dim_dtypes', and 'dim_names' must match.")
|
|
107
105
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
106
|
+
dims = []
|
|
107
|
+
for name, s, dt in zip(dim_names, shape, dim_dtypes):
|
|
108
|
+
if np.issubdtype(dt, np.integer):
|
|
109
|
+
domain = (0, 0 if s == 0 else s - 1)
|
|
110
|
+
tile = min(1 if s == 0 else s // 2, config.tile_capacity // 2)
|
|
111
|
+
dim_dtype = dt
|
|
112
|
+
else: # Assumes string or object dtype
|
|
113
|
+
domain = (None, None)
|
|
114
|
+
tile = None
|
|
115
|
+
dim_dtype = "ascii"
|
|
116
|
+
|
|
117
|
+
dims.append(
|
|
118
|
+
tiledb.Dim(
|
|
119
|
+
name=name,
|
|
120
|
+
domain=domain,
|
|
121
|
+
tile=tile,
|
|
122
|
+
dtype=dim_dtype,
|
|
123
|
+
filters=config.coords_filters,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
115
126
|
|
|
116
|
-
|
|
127
|
+
dom = tiledb.Domain(*dims, ctx=tiledb_ctx)
|
|
128
|
+
attr_obj = tiledb.Attr(
|
|
117
129
|
name=attr_name,
|
|
118
130
|
dtype=attr_dtype,
|
|
119
131
|
filters=config.attrs_filters.get(attr_name, config.attrs_filters.get("", None)),
|
|
132
|
+
ctx=tiledb_ctx,
|
|
120
133
|
)
|
|
121
|
-
|
|
122
134
|
schema = tiledb.ArraySchema(
|
|
123
135
|
domain=dom,
|
|
124
|
-
attrs=[
|
|
136
|
+
attrs=[attr_obj],
|
|
125
137
|
cell_order=config.cell_order,
|
|
126
138
|
tile_order=config.tile_order,
|
|
127
139
|
sparse=sparse,
|
|
128
|
-
coords_filters=config.coords_filters,
|
|
129
140
|
offsets_filters=config.offsets_filters,
|
|
130
|
-
ctx=
|
|
141
|
+
ctx=tiledb_ctx,
|
|
131
142
|
)
|
|
143
|
+
tiledb.Array.create(uri, schema, ctx=tiledb_ctx)
|
|
132
144
|
|
|
133
|
-
|
|
145
|
+
from .dense import DenseCellArray
|
|
146
|
+
from .sparse import SparseCellArray
|
|
134
147
|
|
|
135
|
-
# Import here to avoid circular imports
|
|
136
|
-
from .cellarray_dense import DenseCellArray
|
|
137
|
-
from .cellarray_sparse import SparseCellArray
|
|
138
|
-
|
|
139
|
-
# Return appropriate array type
|
|
140
148
|
return (
|
|
141
|
-
SparseCellArray(uri=uri, attr=attr_name, mode=mode)
|
|
149
|
+
SparseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
|
|
142
150
|
if sparse
|
|
143
|
-
else DenseCellArray(uri=uri, attr=attr_name, mode=mode)
|
|
151
|
+
else DenseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
|
|
144
152
|
)
|
|
145
153
|
|
|
146
154
|
|
|
@@ -149,58 +157,101 @@ class SliceHelper:
|
|
|
149
157
|
|
|
150
158
|
@staticmethod
|
|
151
159
|
def is_contiguous_indices(indices: List[int]) -> Optional[slice]:
|
|
152
|
-
"""
|
|
160
|
+
"""Checks if a list of indices is contiguous and can be converted to a slice.
|
|
161
|
+
|
|
162
|
+
Returns None if the list is not contiguous or contains non-integers.
|
|
163
|
+
"""
|
|
153
164
|
if not indices:
|
|
154
165
|
return None
|
|
155
166
|
|
|
156
|
-
|
|
167
|
+
if not all(isinstance(i, (int, np.integer)) for i in indices):
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
sorted_indices = sorted(list(set(indices)))
|
|
171
|
+
if not sorted_indices:
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
if len(sorted_indices) == 1:
|
|
175
|
+
return slice(sorted_indices[0], sorted_indices[0] + 1, None)
|
|
176
|
+
|
|
177
|
+
diffs = np.diff(sorted_indices)
|
|
157
178
|
if np.all(diffs == 1):
|
|
158
|
-
return slice(
|
|
179
|
+
return slice(sorted_indices[0], sorted_indices[-1] + 1, None)
|
|
180
|
+
|
|
159
181
|
return None
|
|
160
182
|
|
|
161
183
|
@staticmethod
|
|
162
|
-
def normalize_index(
|
|
184
|
+
def normalize_index(
|
|
185
|
+
idx: Union[int, range, slice, List, str, EllipsisType],
|
|
186
|
+
dim_size: int,
|
|
187
|
+
dim_dtype: np.dtype,
|
|
188
|
+
) -> Union[slice, List, EllipsisType]:
|
|
163
189
|
"""Normalize index to handle negative indices and ensure consistency."""
|
|
164
190
|
|
|
191
|
+
is_string_dim = np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)
|
|
192
|
+
|
|
193
|
+
if is_string_dim:
|
|
194
|
+
if isinstance(idx, (str, bytes)):
|
|
195
|
+
return [idx]
|
|
196
|
+
if isinstance(idx, list) and all(isinstance(i, (str, bytes)) for i in idx):
|
|
197
|
+
return idx
|
|
198
|
+
if isinstance(idx, slice):
|
|
199
|
+
# For string dimensions, we do not normalize the slice with integer sizes
|
|
200
|
+
return idx
|
|
201
|
+
raise TypeError(f"Unsupported index type '{type(idx).__name__}' for string dimension.")
|
|
202
|
+
|
|
165
203
|
if isinstance(idx, EllipsisType):
|
|
166
204
|
return idx
|
|
167
205
|
|
|
168
|
-
# Convert ranges to slices
|
|
169
206
|
if isinstance(idx, range):
|
|
170
207
|
idx = slice(idx.start, idx.stop, idx.step)
|
|
171
208
|
|
|
172
209
|
if isinstance(idx, slice):
|
|
173
|
-
start = idx.start
|
|
174
|
-
|
|
175
|
-
|
|
210
|
+
start, stop, step = idx.start, idx.stop, idx.step
|
|
211
|
+
|
|
212
|
+
# Resolve None to full dimension slice parts
|
|
213
|
+
if start is None:
|
|
214
|
+
start = 0
|
|
215
|
+
|
|
216
|
+
if stop is None:
|
|
217
|
+
stop = dim_size
|
|
176
218
|
|
|
177
219
|
# Handle negative indices
|
|
178
220
|
if start < 0:
|
|
179
|
-
start
|
|
180
|
-
|
|
221
|
+
start += dim_size
|
|
181
222
|
if stop < 0:
|
|
182
|
-
stop
|
|
183
|
-
|
|
184
|
-
if start < 0 or start > dim_size:
|
|
185
|
-
raise IndexError(f"Start index {start} out of bounds for dimension size {dim_size}")
|
|
186
|
-
if stop < 0 or stop > dim_size:
|
|
187
|
-
raise IndexError(f"Stop index {stop} out of bounds for dimension size {dim_size}")
|
|
223
|
+
stop += dim_size
|
|
188
224
|
|
|
225
|
+
# Clamping slice arguments to dimensions
|
|
226
|
+
stop = min(stop, dim_size)
|
|
227
|
+
start = max(0, start)
|
|
189
228
|
return slice(start, stop, step)
|
|
190
229
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
raise IndexError(f"List indices {idx} out of bounds for dimension size {dim_size}")
|
|
195
|
-
return norm_idx
|
|
230
|
+
if isinstance(idx, list):
|
|
231
|
+
if not idx:
|
|
232
|
+
return []
|
|
196
233
|
|
|
197
|
-
|
|
198
|
-
|
|
234
|
+
# This check only applies to integer lists
|
|
235
|
+
if not all(isinstance(i, (int, np.integer)) for i in idx):
|
|
236
|
+
raise TypeError(
|
|
237
|
+
"List indices must be all integers or all strings, but got mixed types or non-string/int types."
|
|
238
|
+
)
|
|
199
239
|
|
|
200
|
-
if
|
|
201
|
-
|
|
240
|
+
norm_idx = [i if i >= 0 else dim_size + i for i in idx]
|
|
241
|
+
if any(i < 0 or i >= dim_size for i in norm_idx):
|
|
242
|
+
raise IndexError("List indices out of bounds for dimension size.")
|
|
243
|
+
return sorted(list(set(norm_idx)))
|
|
244
|
+
|
|
245
|
+
if isinstance(idx, (int, np.integer)):
|
|
246
|
+
norm_idx = int(idx)
|
|
247
|
+
if norm_idx < 0:
|
|
248
|
+
norm_idx += dim_size
|
|
249
|
+
if not (0 <= norm_idx < dim_size):
|
|
250
|
+
raise IndexError(f"Index {idx} out of bounds for dimension size.")
|
|
202
251
|
return slice(norm_idx, norm_idx + 1, None)
|
|
203
252
|
|
|
253
|
+
raise TypeError(f"Index type {type(idx)} not supported for normalization.")
|
|
254
|
+
|
|
204
255
|
|
|
205
256
|
def create_group(output_path, group_name):
|
|
206
257
|
tiledb.group_create(f"{output_path}/{group_name}")
|