cellarr-array 0.1.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.github/workflows/publish-pypi.yml +5 -5
  2. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.github/workflows/run-tests.yml +1 -1
  3. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.gitignore +2 -0
  4. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.pre-commit-config.yaml +2 -2
  5. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/CHANGELOG.md +14 -0
  6. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/PKG-INFO +6 -1
  7. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/setup.cfg +5 -0
  8. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array/__init__.py +3 -4
  9. cellarr_array-0.3.1/src/cellarr_array/core/__init__.py +3 -0
  10. cellarr_array-0.1.0/src/cellarr_array/cellarray_base.py → cellarr_array-0.3.1/src/cellarr_array/core/base.py +66 -15
  11. cellarr_array-0.1.0/src/cellarr_array/cellarray_dense.py → cellarr_array-0.3.1/src/cellarr_array/core/dense.py +2 -3
  12. {cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.1/src/cellarr_array/core}/helpers.py +103 -52
  13. cellarr_array-0.1.0/src/cellarr_array/cellarray_sparse.py → cellarr_array-0.3.1/src/cellarr_array/core/sparse.py +74 -52
  14. cellarr_array-0.3.1/src/cellarr_array/dataloaders/__init__.py +3 -0
  15. cellarr_array-0.3.1/src/cellarr_array/dataloaders/denseloader.py +198 -0
  16. cellarr_array-0.3.1/src/cellarr_array/dataloaders/iterabledataloader.py +320 -0
  17. cellarr_array-0.3.1/src/cellarr_array/dataloaders/sparseloader.py +230 -0
  18. cellarr_array-0.3.1/src/cellarr_array/dataloaders/utils.py +26 -0
  19. cellarr_array-0.3.1/src/cellarr_array/utils/__init__.py +3 -0
  20. cellarr_array-0.3.1/src/cellarr_array/utils/mock.py +167 -0
  21. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/PKG-INFO +6 -1
  22. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/SOURCES.txt +18 -6
  23. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/requires.txt +5 -0
  24. cellarr_array-0.3.1/tests/conftest.py +233 -0
  25. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_all.py +1 -1
  26. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_dense.py +5 -7
  27. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_helpers.py +25 -13
  28. cellarr_array-0.3.1/tests/test_iterable_loader.py +288 -0
  29. cellarr_array-0.3.1/tests/test_map_loader.py +289 -0
  30. cellarr_array-0.3.1/tests/test_query.py +63 -0
  31. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_sparse.py +4 -3
  32. cellarr_array-0.3.1/tests/test_string_dims.py +73 -0
  33. cellarr_array-0.1.0/tests/conftest.py +0 -91
  34. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.coveragerc +0 -0
  35. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/.readthedocs.yml +0 -0
  36. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/AUTHORS.md +0 -0
  37. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/CONTRIBUTING.md +0 -0
  38. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/LICENSE.txt +0 -0
  39. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/README.md +0 -0
  40. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/Makefile +0 -0
  41. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/_static/.gitignore +0 -0
  42. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/authors.md +0 -0
  43. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/changelog.md +0 -0
  44. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/conf.py +0 -0
  45. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/contributing.md +0 -0
  46. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/index.md +0 -0
  47. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/license.md +0 -0
  48. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/readme.md +0 -0
  49. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/docs/requirements.txt +0 -0
  50. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/pyproject.toml +0 -0
  51. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/setup.py +0 -0
  52. {cellarr_array-0.1.0/src/cellarr_array → cellarr_array-0.3.1/src/cellarr_array/utils}/config.py +0 -0
  53. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
  54. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/not-zip-safe +0 -0
  55. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/src/cellarr_array.egg-info/top_level.txt +0 -0
  56. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tests/test_inmemory.py +0 -0
  57. {cellarr_array-0.1.0 → cellarr_array-0.3.1}/tox.ini +0 -0
@@ -19,19 +19,19 @@ jobs:
19
19
  steps:
20
20
  - uses: actions/checkout@v4
21
21
 
22
- - name: Set up Python 3.11
22
+ - name: Set up Python 3.12
23
23
  uses: actions/setup-python@v5
24
24
  with:
25
- python-version: 3.11
25
+ python-version: 3.12
26
26
 
27
27
  - name: Install dependencies
28
28
  run: |
29
29
  python -m pip install --upgrade pip
30
30
  pip install tox
31
31
 
32
- - name: Test with tox
33
- run: |
34
- tox
32
+ # - name: Test with tox
33
+ # run: |
34
+ # tox
35
35
 
36
36
  - name: Build docs
37
37
  run: |
@@ -28,7 +28,7 @@ jobs:
28
28
  test:
29
29
  strategy:
30
30
  matrix:
31
- python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
31
+ python: ["3.10", "3.11", "3.12", "3.13", "3.14"]
32
32
  platform:
33
33
  - ubuntu-latest
34
34
  # - macos-latest
@@ -52,3 +52,5 @@ MANIFEST
52
52
  .venv*/
53
53
  .conda*/
54
54
  .python-version
55
+
56
+ *.tdb
@@ -2,7 +2,7 @@ exclude: '^docs/conf.py'
2
2
 
3
3
  repos:
4
4
  - repo: https://github.com/pre-commit/pre-commit-hooks
5
- rev: v5.0.0
5
+ rev: v6.0.0
6
6
  hooks:
7
7
  - id: trailing-whitespace
8
8
  - id: check-added-large-files
@@ -19,7 +19,7 @@ repos:
19
19
 
20
20
  - repo: https://github.com/astral-sh/ruff-pre-commit
21
21
  # Ruff version.
22
- rev: v0.11.5
22
+ rev: v0.14.3
23
23
  hooks:
24
24
  - id: ruff
25
25
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## Version 0.3.0 - 0.3.1
4
+
5
+ - Support for string dimensions when creating cellarr arrays.
6
+ - Support query conditions for slice operations.
7
+ - Added unique dim values. Only supported for sparse arrays.
8
+ - Fix a minor bug causing memory leaks on large sparse arrays.
9
+ - EOL for Python 3.9
10
+
11
+ ## Version 0.2.0
12
+
13
+ - Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
14
+ - Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
15
+ - Update documentation and tests.
16
+
3
17
  ## Version 0.1.0
4
18
 
5
19
  - Support cellarr-arrays on user provided tiledb array objects.
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.1.0
3
+ Version: 0.3.1
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
7
7
  Author-email: jayaram.kancherla@gmail.com
8
8
  License: MIT
9
9
  Project-URL: Documentation, https://github.com/cellarr/cellarr-array
10
+ Project-URL: Source, https://github.com/cellarr/cellarr-array
10
11
  Platform: any
11
12
  Classifier: Development Status :: 4 - Beta
12
13
  Classifier: Programming Language :: Python
@@ -16,10 +17,14 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
16
17
  Requires-Dist: tiledb
17
18
  Requires-Dist: numpy
18
19
  Requires-Dist: scipy
20
+ Provides-Extra: optional
21
+ Requires-Dist: torch; extra == "optional"
19
22
  Provides-Extra: testing
20
23
  Requires-Dist: setuptools; extra == "testing"
21
24
  Requires-Dist: pytest; extra == "testing"
22
25
  Requires-Dist: pytest-cov; extra == "testing"
26
+ Requires-Dist: pandas; extra == "testing"
27
+ Requires-Dist: torch; extra == "testing"
23
28
  Dynamic: license-file
24
29
 
25
30
  [![PyPI-Server](https://img.shields.io/pypi/v/cellarr-array.svg)](https://pypi.org/project/cellarr-array/)
@@ -10,6 +10,7 @@ long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
10
10
  url = https://github.com/cellarr/cellarr-array
11
11
  project_urls =
12
12
  Documentation = https://github.com/cellarr/cellarr-array
13
+ Source = https://github.com/cellarr/cellarr-array
13
14
  platforms = any
14
15
  classifiers =
15
16
  Development Status :: 4 - Beta
@@ -33,10 +34,14 @@ exclude =
33
34
  tests
34
35
 
35
36
  [options.extras_require]
37
+ optional =
38
+ torch
36
39
  testing =
37
40
  setuptools
38
41
  pytest
39
42
  pytest-cov
43
+ pandas
44
+ %(optional)s
40
45
 
41
46
  [options.entry_points]
42
47
 
@@ -15,7 +15,6 @@ except PackageNotFoundError: # pragma: no cover
15
15
  finally:
16
16
  del version, PackageNotFoundError
17
17
 
18
- from .config import CellArrConfig, ConsolidationConfig
19
- from .cellarray_dense import DenseCellArray
20
- from .cellarray_sparse import SparseCellArray
21
- from .helpers import create_cellarray, SliceHelper
18
+ from .core import DenseCellArray, SparseCellArray
19
+ from .core.helpers import create_cellarray
20
+ from .utils import CellArrConfig, ConsolidationConfig
@@ -0,0 +1,3 @@
1
+ from .base import CellArray
2
+ from .dense import DenseCellArray
3
+ from .sparse import SparseCellArray
@@ -12,7 +12,7 @@ import numpy as np
12
12
  import tiledb
13
13
  from scipy import sparse
14
14
 
15
- from .config import ConsolidationConfig
15
+ from ..utils.config import ConsolidationConfig
16
16
  from .helpers import SliceHelper
17
17
 
18
18
  __author__ = "Jayaram Kancherla"
@@ -119,6 +119,7 @@ class CellArray(ABC):
119
119
  self._shape = None
120
120
  self._ndim = None
121
121
  self._dim_names = None
122
+ self._dim_dtypes = None
122
123
  self._attr_names = None
123
124
  self._nonempty_domain = None
124
125
 
@@ -185,7 +186,16 @@ class CellArray(ABC):
185
186
  def shape(self) -> Tuple[int, ...]:
186
187
  if self._shape is None:
187
188
  with self.open_array(mode="r") as A:
188
- self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
189
+ shape_list = []
190
+ for dim in A.schema.domain:
191
+ try:
192
+ # This will fail for string dimensions
193
+ shape_list.append(dim.shape[0])
194
+ except TypeError:
195
+ # For string dimensions, the shape is not well-defined.
196
+ # We use a large number as a placeholder for slicing purposes.
197
+ shape_list.append(2**63 - 1)
198
+ self._shape = tuple(shape_list)
189
199
  return self._shape
190
200
 
191
201
  @property
@@ -209,6 +219,14 @@ class CellArray(ABC):
209
219
  # self._ndim = len(self.shape)
210
220
  return self._ndim
211
221
 
222
+ @property
223
+ def dim_dtypes(self) -> List[np.dtype]:
224
+ """Get dimension dtypes of the array."""
225
+ if self._dim_dtypes is None:
226
+ with self.open_array(mode="r") as A:
227
+ self._dim_dtypes = [dim.dtype for dim in A.schema.domain]
228
+ return self._dim_dtypes
229
+
212
230
  @contextmanager
213
231
  def open_array(self, mode: Optional[str] = None):
214
232
  """Context manager for array operations.
@@ -235,8 +253,8 @@ class CellArray(ABC):
235
253
  ) from e
236
254
 
237
255
  effective_mode = mode if mode is not None else self._opened_array_external.mode
238
-
239
256
  current_external_mode = self._opened_array_external.mode
257
+
240
258
  if effective_mode == "r" and current_external_mode not in ["r", "w", "m"]:
241
259
  # Read ops ok on write/modify modes
242
260
  pass
@@ -259,35 +277,54 @@ class CellArray(ABC):
259
277
  finally:
260
278
  array.close()
261
279
 
262
- def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
263
- """Get item implementation that routes to either direct slicing or multi_index
264
- based on the type of indices provided.
280
+ def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType, str]):
281
+ """Get item implementation that routes to either direct slicing, multi_index,
282
+ or query based on the type of indices provided.
265
283
 
266
284
  Args:
267
285
  key:
268
286
  Slice or list of indices for each dimension in the array.
287
+
288
+ Alternatively, may be string to specify query conditions.
269
289
  """
290
+ # This is a query condition
291
+ if isinstance(key, str):
292
+ with self.open_array(mode="r") as array:
293
+ if self._attr is not None:
294
+ return array.query(cond=key, attrs=[self._attr])[:]
295
+ else:
296
+ array.query(cond=key)[:]
297
+
270
298
  if not isinstance(key, tuple):
271
299
  key = (key,)
272
300
 
273
- if len(key) > self.ndim:
301
+ num_ellipsis = sum(isinstance(i, EllipsisType) for i in key)
302
+ if num_ellipsis > 1:
303
+ raise IndexError("an index can only have a single ellipsis ('...')")
304
+
305
+ if num_ellipsis == 1:
306
+ ellipsis_idx = key.index(Ellipsis)
307
+ num_other_indices = len(key) - 1
308
+ num_slices_to_add = self.ndim - num_other_indices
309
+
310
+ key = key[:ellipsis_idx] + (slice(None),) * num_slices_to_add + key[ellipsis_idx + 1 :]
311
+
312
+ if len(key) < self.ndim:
313
+ key = key + (slice(None),) * (self.ndim - len(key))
314
+ elif len(key) > self.ndim:
274
315
  raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
275
316
 
276
317
  # Normalize all indices
277
- normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
278
-
279
- num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
280
- if num_ellipsis > 1:
281
- raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
318
+ normalized_key = tuple(
319
+ SliceHelper.normalize_index(idx, self.shape[i], self.dim_dtypes[i]) for i, idx in enumerate(key)
320
+ )
282
321
 
283
322
  # Check if we can use direct slicing
284
- use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
323
+ use_direct = all(isinstance(idx, slice) for idx in normalized_key)
285
324
 
286
325
  if use_direct:
287
326
  return self._direct_slice(normalized_key)
288
327
  else:
289
- if num_ellipsis > 0:
290
- raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
291
328
  return self._multi_index(normalized_key)
292
329
 
293
330
  @abstractmethod
@@ -342,3 +379,17 @@ class CellArray(ABC):
342
379
  Additional arguments for write operation.
343
380
  """
344
381
  pass
382
+
383
+ def get_unique_dim_values(self, dim_name: Optional[str] = None) -> np.ndarray:
384
+ """Get unique values for a dimension.
385
+
386
+ Args:
387
+ dim_name:
388
+ The name of the dimension. If None, unique values for all
389
+ dimensions are returned.
390
+
391
+ Returns:
392
+ An array of unique dimension values.
393
+ """
394
+ with self.open_array(mode="r") as A:
395
+ return A.unique_dim_values(dim_name)
@@ -7,7 +7,7 @@ from typing import List, Tuple, Union
7
7
 
8
8
  import numpy as np
9
9
 
10
- from .cellarray_base import CellArray
10
+ from .base import CellArray
11
11
  from .helpers import SliceHelper
12
12
 
13
13
  __author__ = "Jayaram Kancherla"
@@ -92,7 +92,6 @@ class DenseCellArray(CellArray):
92
92
  if len(data.shape) != self.ndim:
93
93
  raise ValueError(f"Data dimensions {data.shape} don't match array dimensions {self.shape}.")
94
94
 
95
- # Check bounds
96
95
  end_row = start_row + data.shape[0]
97
96
  if end_row > self.shape[0]:
98
97
  raise ValueError(
@@ -102,7 +101,6 @@ class DenseCellArray(CellArray):
102
101
  if self.ndim == 2 and data.shape[1] != self.shape[1]:
103
102
  raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
104
103
 
105
- # Construct write region
106
104
  if self.ndim == 1:
107
105
  write_region = slice(start_row, end_row)
108
106
  else: # 2D
@@ -110,4 +108,5 @@ class DenseCellArray(CellArray):
110
108
 
111
109
  # write_data = {self._attr: data} if len(self.attr_names) > 1 else data
112
110
  with self.open_array(mode="w") as array:
111
+ print("write_region", write_region)
113
112
  array[write_region] = data
@@ -8,7 +8,7 @@ from typing import List, Optional, Tuple, Union
8
8
  import numpy as np
9
9
  import tiledb
10
10
 
11
- from .config import CellArrConfig
11
+ from ..utils.config import CellArrConfig
12
12
 
13
13
  __author__ = "Jayaram Kancherla"
14
14
  __copyright__ = "Jayaram Kancherla"
@@ -52,7 +52,7 @@ def create_cellarray(
52
52
  Optional list of dimension names.
53
53
 
54
54
  dim_dtypes:
55
- Optional list of dimension dtypes.
55
+ Optional list of dimension dtypes. Defaults to numpy's uint32.
56
56
 
57
57
  attr_name:
58
58
  Name of the data attribute.
@@ -67,29 +67,28 @@ def create_cellarray(
67
67
  ValueError: If dimensions are invalid or inputs are inconsistent.
68
68
  """
69
69
  config = config or CellArrConfig()
70
+ tiledb_ctx = tiledb.Config(config.ctx_config) if config.ctx_config else None
70
71
 
71
72
  if attr_dtype is None:
72
73
  attr_dtype = np.float32
73
74
  if isinstance(attr_dtype, str):
74
75
  attr_dtype = np.dtype(attr_dtype)
75
76
 
76
- # Require either shape or dim_dtypes
77
77
  if shape is None and dim_dtypes is None:
78
78
  raise ValueError("Either 'shape' or 'dim_dtypes' must be provided.")
79
79
 
80
80
  if shape is not None:
81
81
  if len(shape) not in (1, 2):
82
- raise ValueError("Only 1D and 2D arrays are supported.")
82
+ raise ValueError("Shape must have 1 or 2 dimensions.")
83
83
 
84
84
  # Set dimension dtypes, defaults to numpy uint32
85
85
  if dim_dtypes is None:
86
86
  dim_dtypes = [np.uint32] * len(shape)
87
87
  else:
88
88
  if len(dim_dtypes) not in (1, 2):
89
- raise ValueError("Only 1D and 2D arrays are supported.")
89
+ raise ValueError("Array must have 1 or 2 dimensions.")
90
90
  dim_dtypes = [np.dtype(dt) if isinstance(dt, str) else dt for dt in dim_dtypes]
91
91
 
92
- # Calculate shape from dtypes if needed
93
92
  if shape is None:
94
93
  shape = tuple(np.iinfo(dt).max if np.issubdtype(dt, np.integer) else None for dt in dim_dtypes)
95
94
  if None in shape:
@@ -97,7 +96,6 @@ def create_cellarray(
97
96
  np.iinfo(dt).max if s is None and np.issubdtype(dt, np.integer) else s for s, dt in zip(shape, dim_dtypes)
98
97
  )
99
98
 
100
- # Set dimension names
101
99
  if dim_names is None:
102
100
  dim_names = [f"dim_{i}" for i in range(len(shape))]
103
101
 
@@ -105,42 +103,52 @@ def create_cellarray(
105
103
  if not (len(shape) == len(dim_dtypes) == len(dim_names)):
106
104
  raise ValueError("Lengths of 'shape', 'dim_dtypes', and 'dim_names' must match.")
107
105
 
108
- dom = tiledb.Domain(
109
- *[
110
- tiledb.Dim(name=name, domain=(0, s - 1), tile=min(s, config.tile_capacity), dtype=dt)
111
- for name, s, dt in zip(dim_names, shape, dim_dtypes)
112
- ],
113
- ctx=tiledb.Ctx(config.ctx_config),
114
- )
106
+ dims = []
107
+ for name, s, dt in zip(dim_names, shape, dim_dtypes):
108
+ if np.issubdtype(dt, np.integer):
109
+ domain = (0, 0 if s == 0 else s - 1)
110
+ tile = min(1 if s == 0 else s // 2, config.tile_capacity // 2)
111
+ dim_dtype = dt
112
+ else: # Assumes string or object dtype
113
+ domain = (None, None)
114
+ tile = None
115
+ dim_dtype = "ascii"
116
+
117
+ dims.append(
118
+ tiledb.Dim(
119
+ name=name,
120
+ domain=domain,
121
+ tile=tile,
122
+ dtype=dim_dtype,
123
+ filters=config.coords_filters,
124
+ )
125
+ )
115
126
 
116
- attr = tiledb.Attr(
127
+ dom = tiledb.Domain(*dims, ctx=tiledb_ctx)
128
+ attr_obj = tiledb.Attr(
117
129
  name=attr_name,
118
130
  dtype=attr_dtype,
119
131
  filters=config.attrs_filters.get(attr_name, config.attrs_filters.get("", None)),
132
+ ctx=tiledb_ctx,
120
133
  )
121
-
122
134
  schema = tiledb.ArraySchema(
123
135
  domain=dom,
124
- attrs=[attr],
136
+ attrs=[attr_obj],
125
137
  cell_order=config.cell_order,
126
138
  tile_order=config.tile_order,
127
139
  sparse=sparse,
128
- coords_filters=config.coords_filters,
129
140
  offsets_filters=config.offsets_filters,
130
- ctx=tiledb.Ctx(config.ctx_config),
141
+ ctx=tiledb_ctx,
131
142
  )
143
+ tiledb.Array.create(uri, schema, ctx=tiledb_ctx)
132
144
 
133
- tiledb.Array.create(uri, schema)
145
+ from .dense import DenseCellArray
146
+ from .sparse import SparseCellArray
134
147
 
135
- # Import here to avoid circular imports
136
- from .cellarray_dense import DenseCellArray
137
- from .cellarray_sparse import SparseCellArray
138
-
139
- # Return appropriate array type
140
148
  return (
141
- SparseCellArray(uri=uri, attr=attr_name, mode=mode)
149
+ SparseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
142
150
  if sparse
143
- else DenseCellArray(uri=uri, attr=attr_name, mode=mode)
151
+ else DenseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
144
152
  )
145
153
 
146
154
 
@@ -149,58 +157,101 @@ class SliceHelper:
149
157
 
150
158
  @staticmethod
151
159
  def is_contiguous_indices(indices: List[int]) -> Optional[slice]:
152
- """Check if indices can be represented as a contiguous slice."""
160
+ """Checks if a list of indices is contiguous and can be converted to a slice.
161
+
162
+ Returns None if the list is not contiguous or contains non-integers.
163
+ """
153
164
  if not indices:
154
165
  return None
155
166
 
156
- diffs = np.diff(indices)
167
+ if not all(isinstance(i, (int, np.integer)) for i in indices):
168
+ return None
169
+
170
+ sorted_indices = sorted(list(set(indices)))
171
+ if not sorted_indices:
172
+ return None
173
+
174
+ if len(sorted_indices) == 1:
175
+ return slice(sorted_indices[0], sorted_indices[0] + 1, None)
176
+
177
+ diffs = np.diff(sorted_indices)
157
178
  if np.all(diffs == 1):
158
- return slice(indices[0], indices[-1] + 1, None)
179
+ return slice(sorted_indices[0], sorted_indices[-1] + 1, None)
180
+
159
181
  return None
160
182
 
161
183
  @staticmethod
162
- def normalize_index(idx: Union[int, slice, List[int]], dim_size: int) -> Union[slice, List[int], EllipsisType]:
184
+ def normalize_index(
185
+ idx: Union[int, range, slice, List, str, EllipsisType],
186
+ dim_size: int,
187
+ dim_dtype: np.dtype,
188
+ ) -> Union[slice, List, EllipsisType]:
163
189
  """Normalize index to handle negative indices and ensure consistency."""
164
190
 
191
+ is_string_dim = np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)
192
+
193
+ if is_string_dim:
194
+ if isinstance(idx, (str, bytes)):
195
+ return [idx]
196
+ if isinstance(idx, list) and all(isinstance(i, (str, bytes)) for i in idx):
197
+ return idx
198
+ if isinstance(idx, slice):
199
+ # For string dimensions, we do not normalize the slice with integer sizes
200
+ return idx
201
+ raise TypeError(f"Unsupported index type '{type(idx).__name__}' for string dimension.")
202
+
165
203
  if isinstance(idx, EllipsisType):
166
204
  return idx
167
205
 
168
- # Convert ranges to slices
169
206
  if isinstance(idx, range):
170
207
  idx = slice(idx.start, idx.stop, idx.step)
171
208
 
172
209
  if isinstance(idx, slice):
173
- start = idx.start if idx.start is not None else 0
174
- stop = idx.stop if idx.stop is not None else dim_size
175
- step = idx.step
210
+ start, stop, step = idx.start, idx.stop, idx.step
211
+
212
+ # Resolve None to full dimension slice parts
213
+ if start is None:
214
+ start = 0
215
+
216
+ if stop is None:
217
+ stop = dim_size
176
218
 
177
219
  # Handle negative indices
178
220
  if start < 0:
179
- start = dim_size + start
180
-
221
+ start += dim_size
181
222
  if stop < 0:
182
- stop = dim_size + stop
183
-
184
- if start < 0 or start > dim_size:
185
- raise IndexError(f"Start index {start} out of bounds for dimension size {dim_size}")
186
- if stop < 0 or stop > dim_size:
187
- raise IndexError(f"Stop index {stop} out of bounds for dimension size {dim_size}")
223
+ stop += dim_size
188
224
 
225
+ # Clamping slice arguments to dimensions
226
+ stop = min(stop, dim_size)
227
+ start = max(0, start)
189
228
  return slice(start, stop, step)
190
229
 
191
- elif isinstance(idx, list):
192
- norm_idx = [i if i >= 0 else dim_size + i for i in idx]
193
- if any(i < 0 or i >= dim_size for i in norm_idx):
194
- raise IndexError(f"List indices {idx} out of bounds for dimension size {dim_size}")
195
- return norm_idx
230
+ if isinstance(idx, list):
231
+ if not idx:
232
+ return []
196
233
 
197
- else: # Single integer index
198
- norm_idx = idx if idx >= 0 else dim_size + idx
234
+ # This check only applies to integer lists
235
+ if not all(isinstance(i, (int, np.integer)) for i in idx):
236
+ raise TypeError(
237
+ "List indices must be all integers or all strings, but got mixed types or non-string/int types."
238
+ )
199
239
 
200
- if norm_idx < 0 or norm_idx >= dim_size:
201
- raise IndexError(f"Index {idx} out of bounds for dimension size {dim_size}")
240
+ norm_idx = [i if i >= 0 else dim_size + i for i in idx]
241
+ if any(i < 0 or i >= dim_size for i in norm_idx):
242
+ raise IndexError("List indices out of bounds for dimension size.")
243
+ return sorted(list(set(norm_idx)))
244
+
245
+ if isinstance(idx, (int, np.integer)):
246
+ norm_idx = int(idx)
247
+ if norm_idx < 0:
248
+ norm_idx += dim_size
249
+ if not (0 <= norm_idx < dim_size):
250
+ raise IndexError(f"Index {idx} out of bounds for dimension size.")
202
251
  return slice(norm_idx, norm_idx + 1, None)
203
252
 
253
+ raise TypeError(f"Index type {type(idx)} not supported for normalization.")
254
+
204
255
 
205
256
  def create_group(output_path, group_name):
206
257
  tiledb.group_create(f"{output_path}/{group_name}")