cellarr-array 0.0.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cellarr-array might be problematic. Click here for more details.

cellarr_array/__init__.py CHANGED
@@ -15,7 +15,5 @@ except PackageNotFoundError: # pragma: no cover
15
15
  finally:
16
16
  del version, PackageNotFoundError
17
17
 
18
- from .config import CellArrConfig, ConsolidationConfig
19
- from .DenseCellArray import DenseCellArray
20
- from .SparseCellArray import SparseCellArray
21
- from .helpers import create_cellarray, SliceHelper
18
+ from .core import DenseCellArray, SparseCellArray
19
+ from .utils import CellArrConfig, ConsolidationConfig, create_cellarray
@@ -0,0 +1,3 @@
1
+ from .base import CellArray
2
+ from .dense import DenseCellArray
3
+ from .sparse import SparseCellArray
@@ -0,0 +1,344 @@
1
+ from abc import ABC, abstractmethod
2
+ from contextlib import contextmanager
3
+
4
+ try:
5
+ from types import EllipsisType
6
+ except ImportError:
7
+ # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
8
+ EllipsisType = type(...)
9
+ from typing import Any, List, Literal, Optional, Tuple, Union
10
+
11
+ import numpy as np
12
+ import tiledb
13
+ from scipy import sparse
14
+
15
+ from ..utils.config import ConsolidationConfig
16
+ from .helpers import SliceHelper
17
+
18
+ __author__ = "Jayaram Kancherla"
19
+ __copyright__ = "Jayaram Kancherla"
20
+ __license__ = "MIT"
21
+
22
+
23
+ class CellArray(ABC):
24
+ """Abstract base class for TileDB array operations."""
25
+
26
+ def __init__(
27
+ self,
28
+ uri: Optional[str] = None,
29
+ tiledb_array_obj: Optional[tiledb.Array] = None,
30
+ attr: str = "data",
31
+ mode: Optional[Literal["r", "w", "d", "m"]] = None,
32
+ config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
33
+ validate: bool = True,
34
+ ):
35
+ """Initialize the object.
36
+
37
+ Args:
38
+ uri:
39
+ URI to the array.
40
+ Required if 'tiledb_array_obj' is not provided.
41
+
42
+ tiledb_array_obj:
43
+ Optional, an already opened ``tiledb.Array`` instance.
44
+ If provided, 'uri' can be None, and 'config_or_context' is ignored.
45
+
46
+ attr:
47
+ Attribute to access.
48
+ Defaults to "data".
49
+
50
+ mode:
51
+ Open the array object in read 'r', write 'w', modify
52
+ 'm' mode, or delete 'd' mode.
53
+
54
+ Defaults to None for automatic mode switching.
55
+
56
+ If 'tiledb_array_obj' is provided, this mode should ideally match
57
+ the mode of the provided array or be None.
58
+
59
+ config_or_context:
60
+ Optional config or context object. Ignored if 'tiledb_array_obj' is provided,
61
+ as context will be derived from the object.
62
+
63
+ Defaults to None.
64
+
65
+ validate:
66
+ Whether to validate the attributes.
67
+ Defaults to True.
68
+ """
69
+ self._array_passed_in = False
70
+ self._opened_array_external = None
71
+ self._ctx = None
72
+
73
+ if tiledb_array_obj is not None:
74
+ if not isinstance(tiledb_array_obj, tiledb.Array):
75
+ raise ValueError("'tiledb_array_obj' must be a tiledb.Array instance.")
76
+
77
+ if not tiledb_array_obj.isopen:
78
+ # Option 1: Raise error
79
+ raise ValueError("If 'tiledb_array_obj' is provided, it must be an open tiledb.Array instance.")
80
+ # Option 2: Try to reopen (less safe as we don't know original intent)
81
+ # try:
82
+ # tiledb_array_obj.reopen()
83
+ # except tiledb.TileDBError as e:
84
+ # raise ValueError(
85
+ # f"Provided 'tiledb_array_obj' is closed and could not be reopened: {e}"
86
+ # )
87
+
88
+ self.uri = tiledb_array_obj.uri
89
+ self._array_passed_in = True
90
+ self._opened_array_external = tiledb_array_obj
91
+
92
+ # infer mode if possible, or require it matches
93
+ if mode is not None and tiledb_array_obj.mode != mode:
94
+ # we could try to reopen with the desired mode
95
+ raise ValueError(
96
+ f"Provided array mode '{tiledb_array_obj.mode}' does not match requested mode '{mode}'.",
97
+ "Re-open the external array with the desired mode or pass matching mode.",
98
+ )
99
+
100
+ self._mode = tiledb_array_obj.mode
101
+ self._ctx = tiledb_array_obj.ctx
102
+ elif uri is not None:
103
+ self.uri = uri
104
+ self._mode = mode
105
+ self._array_passed_in = False
106
+ self._opened_array_external = None
107
+
108
+ if config_or_context is None:
109
+ self._ctx = None
110
+ elif isinstance(config_or_context, tiledb.Config):
111
+ self._ctx = tiledb.Ctx(config_or_context)
112
+ elif isinstance(config_or_context, tiledb.Ctx):
113
+ self._ctx = config_or_context
114
+ else:
115
+ raise TypeError("'config_or_context' must be a TileDB Config or Ctx object.")
116
+ else:
117
+ raise ValueError("Either 'uri' or 'tiledb_array_obj' must be provided.")
118
+
119
+ self._shape = None
120
+ self._ndim = None
121
+ self._dim_names = None
122
+ self._attr_names = None
123
+ self._nonempty_domain = None
124
+
125
+ if validate:
126
+ self._validate(attr=attr)
127
+
128
+ self._attr = attr
129
+
130
+ def _validate(self, attr):
131
+ with self.open_array(mode="r") as A:
132
+ schema = A.schema
133
+ if schema.ndim > 2:
134
+ raise ValueError("Only 1D and 2D arrays are supported.")
135
+
136
+ current_attr_names = [schema.attr(i).name for i in range(schema.nattr)]
137
+ if attr not in current_attr_names:
138
+ raise ValueError(
139
+ f"Attribute '{attr}' does not exist in the array. Available attributes: {current_attr_names}."
140
+ )
141
+
142
+ @property
143
+ def mode(self) -> Optional[str]:
144
+ """Get current array mode. If an external array is used, this is its open mode."""
145
+ if self._array_passed_in and self._opened_array_external is not None:
146
+ return self._opened_array_external.mode
147
+ return self._mode
148
+
149
+ @mode.setter
150
+ def mode(self, value: Optional[str]):
151
+ """Set array mode for subsequent operations if not using an external array.
152
+
153
+ This action does not affect an already passed-in external array's mode.
154
+ """
155
+ if self._array_passed_in:
156
+ # To change mode of an external array, user must reopen it and pass it again.
157
+ current_ext_mode = self._opened_array_external.mode if self._opened_array_external else "unknown"
158
+ if value != current_ext_mode:
159
+ raise ValueError(
160
+ f"Cannot change mode of an externally managed array (current: {current_ext_mode}). "
161
+ "Re-open the external array with the new mode and re-initialize CellArray."
162
+ )
163
+ if value is not None and value not in ["r", "w", "m", "d"]:
164
+ raise ValueError("Mode must be one of: None, 'r', 'w', 'm', 'd'")
165
+
166
+ self._mode = value
167
+
168
+ @property
169
+ def dim_names(self) -> List[str]:
170
+ """Get dimension names of the array."""
171
+ if self._dim_names is None:
172
+ with self.open_array(mode="r") as A:
173
+ self._dim_names = [dim.name for dim in A.schema.domain]
174
+ return self._dim_names
175
+
176
+ @property
177
+ def attr_names(self) -> List[str]:
178
+ """Get attribute names of the array."""
179
+ if self._attr_names is None:
180
+ with self.open_array(mode="r") as A:
181
+ self._attr_names = [A.schema.attr(i).name for i in range(A.schema.nattr)]
182
+ return self._attr_names
183
+
184
+ @property
185
+ def shape(self) -> Tuple[int, ...]:
186
+ if self._shape is None:
187
+ with self.open_array(mode="r") as A:
188
+ self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
189
+ return self._shape
190
+
191
+ @property
192
+ def nonempty_domain(self) -> Optional[Tuple[Any, ...]]:
193
+ if self._nonempty_domain is None:
194
+ with self.open_array(mode="r") as A:
195
+ # nonempty_domain() can return None if the array is empty.
196
+ ned = A.nonempty_domain()
197
+ if ned is None:
198
+ self._nonempty_domain = None
199
+ else:
200
+ self._nonempty_domain = tuple(ned) if isinstance(ned[0], tuple) else (ned,)
201
+ return self._nonempty_domain
202
+
203
+ @property
204
+ def ndim(self) -> int:
205
+ """Get number of dimensions."""
206
+ if self._ndim is None:
207
+ with self.open_array(mode="r") as A:
208
+ self._ndim = A.schema.ndim
209
+ # self._ndim = len(self.shape)
210
+ return self._ndim
211
+
212
+ @contextmanager
213
+ def open_array(self, mode: Optional[str] = None):
214
+ """Context manager for array operations.
215
+
216
+ Uses the externally provided array if available, otherwise opens from URI.
217
+
218
+ Args:
219
+ mode:
220
+ Desired mode for the operation ('r', 'w', 'm', 'd').
221
+ If an external array is used, this mode must be compatible with
222
+ (or same as) the mode the external array was opened with.
223
+
224
+ If None, uses the CellArray's default mode.
225
+ """
226
+ if self._array_passed_in and self._opened_array_external is not None:
227
+ if not self._opened_array_external.isopen:
228
+ # Attempt to reopen if closed. This assumes the user might have closed it
229
+ # and expects CellArr to reopen it if still possible.
230
+ try:
231
+ self._opened_array_external.reopen()
232
+ except Exception as e:
233
+ raise tiledb.TileDBError(
234
+ f"Externally provided array is closed and could not be reopened: {e}"
235
+ ) from e
236
+
237
+ effective_mode = mode if mode is not None else self._opened_array_external.mode
238
+
239
+ current_external_mode = self._opened_array_external.mode
240
+ if effective_mode == "r" and current_external_mode not in ["r", "w", "m"]:
241
+ # Read ops ok on write/modify modes
242
+ pass
243
+ elif effective_mode in ["w", "d"] and current_external_mode != effective_mode:
244
+ raise tiledb.TileDBError(
245
+ f"Requested operation mode '{effective_mode}' is incompatible with the "
246
+ f"externally provided array's mode '{current_external_mode}'. "
247
+ "Ensure the external array is opened in a compatible mode."
248
+ )
249
+
250
+ # DO NOT close self._opened_array_external here; its lifecycle is managed by the user.
251
+ yield self._opened_array_external
252
+ else:
253
+ effective_mode = mode if mode is not None else self.mode
254
+ effective_mode = effective_mode if effective_mode is not None else "r"
255
+ array = tiledb.open(self.uri, mode=effective_mode, ctx=self._ctx)
256
+
257
+ try:
258
+ yield array
259
+ finally:
260
+ array.close()
261
+
262
+ def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
263
+ """Get item implementation that routes to either direct slicing or multi_index
264
+ based on the type of indices provided.
265
+
266
+ Args:
267
+ key:
268
+ Slice or list of indices for each dimension in the array.
269
+ """
270
+ if not isinstance(key, tuple):
271
+ key = (key,)
272
+
273
+ if len(key) > self.ndim:
274
+ raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
275
+
276
+ # Normalize all indices
277
+ normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
278
+
279
+ num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
280
+ if num_ellipsis > 1:
281
+ raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
282
+
283
+ # Check if we can use direct slicing
284
+ use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
285
+
286
+ if use_direct:
287
+ return self._direct_slice(normalized_key)
288
+ else:
289
+ if num_ellipsis > 0:
290
+ raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
291
+ return self._multi_index(normalized_key)
292
+
293
+ @abstractmethod
294
+ def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
295
+ """Implementation for direct slicing."""
296
+ pass
297
+
298
+ @abstractmethod
299
+ def _multi_index(self, key: Tuple[Union[slice, List[int]], ...]) -> np.ndarray:
300
+ """Implementation for multi-index access."""
301
+ pass
302
+
303
+ def vacuum(self) -> None:
304
+ """Remove deleted fragments from the array."""
305
+ tiledb.vacuum(self.uri)
306
+
307
+ def consolidate(self, config: Optional[ConsolidationConfig] = None) -> None:
308
+ """Consolidate array fragments.
309
+
310
+ Args:
311
+ config:
312
+ Optional consolidation configuration.
313
+ """
314
+ if config is None:
315
+ config = ConsolidationConfig()
316
+
317
+ consolidation_cfg = tiledb.Config()
318
+
319
+ consolidation_cfg["sm.consolidation.steps"] = config.steps
320
+ consolidation_cfg["sm.consolidation.step_min_frags"] = config.step_min_frags
321
+ consolidation_cfg["sm.consolidation.step_max_frags"] = config.step_max_frags
322
+ consolidation_cfg["sm.consolidation.buffer_size"] = config.buffer_size
323
+ consolidation_cfg["sm.mem.total_budget"] = config.total_budget
324
+
325
+ tiledb.consolidate(self.uri, config=consolidation_cfg)
326
+
327
+ if config.vacuum_after:
328
+ self.vacuum()
329
+
330
+ @abstractmethod
331
+ def write_batch(self, data: Union[np.ndarray, sparse.spmatrix], start_row: int, **kwargs) -> None:
332
+ """Write a batch of data to the array starting at the specified row.
333
+
334
+ Args:
335
+ data:
336
+ Data to write (numpy array for dense, scipy sparse matrix for sparse).
337
+
338
+ start_row:
339
+ Starting row index for writing.
340
+
341
+ **kwargs:
342
+ Additional arguments for write operation.
343
+ """
344
+ pass
@@ -7,7 +7,7 @@ from typing import List, Tuple, Union
7
7
 
8
8
  import numpy as np
9
9
 
10
- from .CellArray import CellArray
10
+ from .base import CellArray
11
11
  from .helpers import SliceHelper
12
12
 
13
13
  __author__ = "Jayaram Kancherla"
@@ -92,7 +92,6 @@ class DenseCellArray(CellArray):
92
92
  if len(data.shape) != self.ndim:
93
93
  raise ValueError(f"Data dimensions {data.shape} don't match array dimensions {self.shape}.")
94
94
 
95
- # Check bounds
96
95
  end_row = start_row + data.shape[0]
97
96
  if end_row > self.shape[0]:
98
97
  raise ValueError(
@@ -102,7 +101,6 @@ class DenseCellArray(CellArray):
102
101
  if self.ndim == 2 and data.shape[1] != self.shape[1]:
103
102
  raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
104
103
 
105
- # Construct write region
106
104
  if self.ndim == 1:
107
105
  write_region = slice(start_row, end_row)
108
106
  else: # 2D
@@ -110,4 +108,5 @@ class DenseCellArray(CellArray):
110
108
 
111
109
  # write_data = {self._attr: data} if len(self.attr_names) > 1 else data
112
110
  with self.open_array(mode="w") as array:
111
+ print("write_region", write_region)
113
112
  array[write_region] = data
@@ -8,7 +8,7 @@ from typing import List, Optional, Tuple, Union
8
8
  import numpy as np
9
9
  import tiledb
10
10
 
11
- from .config import CellArrConfig
11
+ from ..utils.config import CellArrConfig
12
12
 
13
13
  __author__ = "Jayaram Kancherla"
14
14
  __copyright__ = "Jayaram Kancherla"
@@ -52,7 +52,7 @@ def create_cellarray(
52
52
  Optional list of dimension names.
53
53
 
54
54
  dim_dtypes:
55
- Optional list of dimension dtypes.
55
+ Optional list of dimension dtypes. Defaults to numpy's uint32.
56
56
 
57
57
  attr_name:
58
58
  Name of the data attribute.
@@ -67,29 +67,28 @@ def create_cellarray(
67
67
  ValueError: If dimensions are invalid or inputs are inconsistent.
68
68
  """
69
69
  config = config or CellArrConfig()
70
+ tiledb_ctx = tiledb.Config(config.ctx_config) if config.ctx_config else None
70
71
 
71
72
  if attr_dtype is None:
72
73
  attr_dtype = np.float32
73
74
  if isinstance(attr_dtype, str):
74
75
  attr_dtype = np.dtype(attr_dtype)
75
76
 
76
- # Require either shape or dim_dtypes
77
77
  if shape is None and dim_dtypes is None:
78
78
  raise ValueError("Either 'shape' or 'dim_dtypes' must be provided.")
79
79
 
80
80
  if shape is not None:
81
81
  if len(shape) not in (1, 2):
82
- raise ValueError("Only 1D and 2D arrays are supported.")
82
+ raise ValueError("Shape must have 1 or 2 dimensions.")
83
83
 
84
84
  # Set dimension dtypes, defaults to numpy uint32
85
85
  if dim_dtypes is None:
86
86
  dim_dtypes = [np.uint32] * len(shape)
87
87
  else:
88
88
  if len(dim_dtypes) not in (1, 2):
89
- raise ValueError("Only 1D and 2D arrays are supported.")
89
+ raise ValueError("Array must have 1 or 2 dimensions.")
90
90
  dim_dtypes = [np.dtype(dt) if isinstance(dt, str) else dt for dt in dim_dtypes]
91
91
 
92
- # Calculate shape from dtypes if needed
93
92
  if shape is None:
94
93
  shape = tuple(np.iinfo(dt).max if np.issubdtype(dt, np.integer) else None for dt in dim_dtypes)
95
94
  if None in shape:
@@ -97,7 +96,6 @@ def create_cellarray(
97
96
  np.iinfo(dt).max if s is None and np.issubdtype(dt, np.integer) else s for s, dt in zip(shape, dim_dtypes)
98
97
  )
99
98
 
100
- # Set dimension names
101
99
  if dim_names is None:
102
100
  dim_names = [f"dim_{i}" for i in range(len(shape))]
103
101
 
@@ -107,37 +105,44 @@ def create_cellarray(
107
105
 
108
106
  dom = tiledb.Domain(
109
107
  *[
110
- tiledb.Dim(name=name, domain=(0, s - 1), tile=min(s, config.tile_capacity), dtype=dt)
108
+ tiledb.Dim(
109
+ name=name,
110
+ # supporting empty dimensions
111
+ domain=(0, 0 if s == 0 else s - 1),
112
+ tile=min(1 if s == 0 else s // 2, config.tile_capacity // 2),
113
+ dtype=dt,
114
+ )
111
115
  for name, s, dt in zip(dim_names, shape, dim_dtypes)
112
116
  ],
113
- ctx=tiledb.Ctx(config.ctx_config),
117
+ ctx=tiledb_ctx,
114
118
  )
115
-
116
- attr = tiledb.Attr(
119
+ attr_obj = tiledb.Attr(
117
120
  name=attr_name,
118
121
  dtype=attr_dtype,
119
122
  filters=config.attrs_filters.get(attr_name, config.attrs_filters.get("", None)),
123
+ ctx=tiledb_ctx,
120
124
  )
121
-
122
125
  schema = tiledb.ArraySchema(
123
126
  domain=dom,
124
- attrs=[attr],
127
+ attrs=[attr_obj],
125
128
  cell_order=config.cell_order,
126
129
  tile_order=config.tile_order,
127
130
  sparse=sparse,
128
131
  coords_filters=config.coords_filters,
129
132
  offsets_filters=config.offsets_filters,
130
- ctx=tiledb.Ctx(config.ctx_config),
133
+ ctx=tiledb_ctx,
131
134
  )
132
-
133
- tiledb.Array.create(uri, schema)
135
+ tiledb.Array.create(uri, schema, ctx=tiledb_ctx)
134
136
 
135
137
  # Import here to avoid circular imports
136
- from .DenseCellArray import DenseCellArray
137
- from .SparseCellArray import SparseCellArray
138
+ from .dense import DenseCellArray
139
+ from .sparse import SparseCellArray
138
140
 
139
- # Return appropriate array type
140
- return SparseCellArray(uri, attr=attr_name, mode=mode) if sparse else DenseCellArray(uri, attr=attr_name, mode=mode)
141
+ return (
142
+ SparseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
143
+ if sparse
144
+ else DenseCellArray(uri=uri, attr=attr_name, mode=mode, config_or_context=tiledb_ctx)
145
+ )
141
146
 
142
147
 
143
148
  class SliceHelper:
@@ -145,19 +150,27 @@ class SliceHelper:
145
150
 
146
151
  @staticmethod
147
152
  def is_contiguous_indices(indices: List[int]) -> Optional[slice]:
148
- """Check if indices can be represented as a contiguous slice."""
149
153
  if not indices:
150
154
  return None
151
155
 
152
- diffs = np.diff(indices)
156
+ sorted_indices = sorted(list(set(indices)))
157
+ if not sorted_indices:
158
+ return None
159
+
160
+ if len(sorted_indices) == 1:
161
+ return slice(sorted_indices[0], sorted_indices[0] + 1, None)
162
+
163
+ diffs = np.diff(sorted_indices)
153
164
  if np.all(diffs == 1):
154
- return slice(indices[0], indices[-1] + 1, None)
165
+ return slice(sorted_indices[0], sorted_indices[-1] + 1, None)
166
+
155
167
  return None
156
168
 
157
169
  @staticmethod
158
- def normalize_index(idx: Union[int, slice, List[int]], dim_size: int) -> Union[slice, List[int], EllipsisType]:
170
+ def normalize_index(
171
+ idx: Union[int, range, slice, List[int], EllipsisType], dim_size: int
172
+ ) -> Union[slice, List[int], EllipsisType]:
159
173
  """Normalize index to handle negative indices and ensure consistency."""
160
-
161
174
  if isinstance(idx, EllipsisType):
162
175
  return idx
163
176
 
@@ -166,36 +179,61 @@ class SliceHelper:
166
179
  idx = slice(idx.start, idx.stop, idx.step)
167
180
 
168
181
  if isinstance(idx, slice):
169
- start = idx.start if idx.start is not None else 0
170
- stop = idx.stop if idx.stop is not None else dim_size
182
+ start = idx.start
183
+ stop = idx.stop
171
184
  step = idx.step
172
185
 
186
+ # Resolve None to full dimension slice parts
187
+ if start is None:
188
+ start = 0
189
+
190
+ if stop is None:
191
+ stop = dim_size
192
+
173
193
  # Handle negative indices
174
194
  if start < 0:
175
- start = dim_size + start
176
-
195
+ start += dim_size
177
196
  if stop < 0:
178
- stop = dim_size + stop
197
+ stop += dim_size
179
198
 
180
- if start < 0 or start > dim_size:
181
- raise IndexError(f"Start index {start} out of bounds for dimension size {dim_size}")
182
- if stop < 0 or stop > dim_size:
183
- raise IndexError(f"Stop index {stop} out of bounds for dimension size {dim_size}")
199
+ # slice allows start > dim_size or stop < 0 to result in empty slices.
200
+ # Note: start == dim_size is OK for empty slice like arr[dim_size:]
201
+ if start < 0 or (start >= dim_size and dim_size > 0):
202
+ if not (start == dim_size and (step is None or step > 0)):
203
+ if start >= dim_size:
204
+ raise IndexError(
205
+ f"Start index {idx.start if idx.start is not None else 'None'} results in {start}, which is out of bounds for dimension size {dim_size}."
206
+ )
184
207
 
185
- return slice(start, stop, step)
208
+ # Clamping slice arguments to dimensions
209
+ stop = min(stop, dim_size)
210
+ start = max(0, start)
186
211
 
212
+ return slice(start, stop, step)
187
213
  elif isinstance(idx, list):
214
+ if not idx:
215
+ return []
216
+
188
217
  norm_idx = [i if i >= 0 else dim_size + i for i in idx]
189
218
  if any(i < 0 or i >= dim_size for i in norm_idx):
190
- raise IndexError(f"List indices {idx} out of bounds for dimension size {dim_size}")
191
- return norm_idx
192
-
193
- else: # Single integer index
194
- norm_idx = idx if idx >= 0 else dim_size + idx
195
-
196
- if norm_idx < 0 or norm_idx >= dim_size:
219
+ oob_indices = [orig_i for orig_i, norm_i in zip(idx, norm_idx) if not (0 <= norm_i < dim_size)]
220
+ raise IndexError(
221
+ f"List indices {oob_indices} (original values) are out of bounds for dimension size {dim_size}."
222
+ )
223
+
224
+ # TileDB multi_index usually returns data sorted by coordinates
225
+ return sorted(list(set(norm_idx)))
226
+ elif isinstance(idx, (int, np.integer)):
227
+ norm_idx = int(idx)
228
+ if norm_idx < 0:
229
+ norm_idx += dim_size
230
+
231
+ if not (0 <= norm_idx < dim_size):
197
232
  raise IndexError(f"Index {idx} out of bounds for dimension size {dim_size}")
233
+
198
234
  return slice(norm_idx, norm_idx + 1, None)
235
+ else:
236
+ raise TypeError(f"Index type {type(idx)} not supported for normalization.")
199
237
 
200
238
 
201
239
  def create_group(output_path, group_name):