cellarr-array 0.0.2__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cellarr-array might be problematic. Click here for more details.

cellarr_array/__init__.py CHANGED
@@ -16,6 +16,6 @@ finally:
16
16
  del version, PackageNotFoundError
17
17
 
18
18
  from .config import CellArrConfig, ConsolidationConfig
19
- from .DenseCellArray import DenseCellArray
20
- from .SparseCellArray import SparseCellArray
21
- from .helpers import create_cellarray, SliceHelper
19
+ from .cellarray_dense import DenseCellArray
20
+ from .cellarray_sparse import SparseCellArray
21
+ from .helpers import create_cellarray, SliceHelper
@@ -0,0 +1,344 @@
1
+ from abc import ABC, abstractmethod
2
+ from contextlib import contextmanager
3
+
4
+ try:
5
+ from types import EllipsisType
6
+ except ImportError:
7
+ # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
8
+ EllipsisType = type(...)
9
+ from typing import Any, List, Literal, Optional, Tuple, Union
10
+
11
+ import numpy as np
12
+ import tiledb
13
+ from scipy import sparse
14
+
15
+ from .config import ConsolidationConfig
16
+ from .helpers import SliceHelper
17
+
18
+ __author__ = "Jayaram Kancherla"
19
+ __copyright__ = "Jayaram Kancherla"
20
+ __license__ = "MIT"
21
+
22
+
23
+ class CellArray(ABC):
24
+ """Abstract base class for TileDB array operations."""
25
+
26
+ def __init__(
27
+ self,
28
+ uri: Optional[str] = None,
29
+ tiledb_array_obj: Optional[tiledb.Array] = None,
30
+ attr: str = "data",
31
+ mode: Optional[Literal["r", "w", "d", "m"]] = None,
32
+ config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
33
+ validate: bool = True,
34
+ ):
35
+ """Initialize the object.
36
+
37
+ Args:
38
+ uri:
39
+ URI to the array.
40
+ Required if 'tiledb_array_obj' is not provided.
41
+
42
+ tiledb_array_obj:
43
+ Optional, an already opened ``tiledb.Array`` instance.
44
+ If provided, 'uri' can be None, and 'config_or_context' is ignored.
45
+
46
+ attr:
47
+ Attribute to access.
48
+ Defaults to "data".
49
+
50
+ mode:
51
+ Open the array object in read 'r', write 'w', modify
52
+ 'm' mode, or delete 'd' mode.
53
+
54
+ Defaults to None for automatic mode switching.
55
+
56
+ If 'tiledb_array_obj' is provided, this mode should ideally match
57
+ the mode of the provided array or be None.
58
+
59
+ config_or_context:
60
+ Optional config or context object. Ignored if 'tiledb_array_obj' is provided,
61
+ as context will be derived from the object.
62
+
63
+ Defaults to None.
64
+
65
+ validate:
66
+ Whether to validate the attributes.
67
+ Defaults to True.
68
+ """
69
+ self._array_passed_in = False
70
+ self._opened_array_external = None
71
+ self._ctx = None
72
+
73
+ if tiledb_array_obj is not None:
74
+ if not isinstance(tiledb_array_obj, tiledb.Array):
75
+ raise ValueError("'tiledb_array_obj' must be a tiledb.Array instance.")
76
+
77
+ if not tiledb_array_obj.isopen:
78
+ # Option 1: Raise error
79
+ raise ValueError("If 'tiledb_array_obj' is provided, it must be an open tiledb.Array instance.")
80
+ # Option 2: Try to reopen (less safe as we don't know original intent)
81
+ # try:
82
+ # tiledb_array_obj.reopen()
83
+ # except tiledb.TileDBError as e:
84
+ # raise ValueError(
85
+ # f"Provided 'tiledb_array_obj' is closed and could not be reopened: {e}"
86
+ # )
87
+
88
+ self.uri = tiledb_array_obj.uri
89
+ self._array_passed_in = True
90
+ self._opened_array_external = tiledb_array_obj
91
+
92
+ # infer mode if possible, or require it matches
93
+ if mode is not None and tiledb_array_obj.mode != mode:
94
+ # we could try to reopen with the desired mode
95
+ raise ValueError(
96
+ f"Provided array mode '{tiledb_array_obj.mode}' does not match requested mode '{mode}'.",
97
+ "Re-open the external array with the desired mode or pass matching mode.",
98
+ )
99
+
100
+ self._mode = tiledb_array_obj.mode
101
+ self._ctx = tiledb_array_obj.ctx
102
+ elif uri is not None:
103
+ self.uri = uri
104
+ self._mode = mode
105
+ self._array_passed_in = False
106
+ self._opened_array_external = None
107
+
108
+ if config_or_context is None:
109
+ self._ctx = None
110
+ elif isinstance(config_or_context, tiledb.Config):
111
+ self._ctx = tiledb.Ctx(config_or_context)
112
+ elif isinstance(config_or_context, tiledb.Ctx):
113
+ self._ctx = config_or_context
114
+ else:
115
+ raise TypeError("'config_or_context' must be a TileDB Config or Ctx object.")
116
+ else:
117
+ raise ValueError("Either 'uri' or 'tiledb_array_obj' must be provided.")
118
+
119
+ self._shape = None
120
+ self._ndim = None
121
+ self._dim_names = None
122
+ self._attr_names = None
123
+ self._nonempty_domain = None
124
+
125
+ if validate:
126
+ self._validate(attr=attr)
127
+
128
+ self._attr = attr
129
+
130
+ def _validate(self, attr):
131
+ with self.open_array(mode="r") as A:
132
+ schema = A.schema
133
+ if schema.ndim > 2:
134
+ raise ValueError("Only 1D and 2D arrays are supported.")
135
+
136
+ current_attr_names = [schema.attr(i).name for i in range(schema.nattr)]
137
+ if attr not in current_attr_names:
138
+ raise ValueError(
139
+ f"Attribute '{attr}' does not exist in the array. Available attributes: {current_attr_names}."
140
+ )
141
+
142
+ @property
143
+ def mode(self) -> Optional[str]:
144
+ """Get current array mode. If an external array is used, this is its open mode."""
145
+ if self._array_passed_in and self._opened_array_external is not None:
146
+ return self._opened_array_external.mode
147
+ return self._mode
148
+
149
+ @mode.setter
150
+ def mode(self, value: Optional[str]):
151
+ """Set array mode for subsequent operations if not using an external array.
152
+
153
+ This action does not affect an already passed-in external array's mode.
154
+ """
155
+ if self._array_passed_in:
156
+ # To change mode of an external array, user must reopen it and pass it again.
157
+ current_ext_mode = self._opened_array_external.mode if self._opened_array_external else "unknown"
158
+ if value != current_ext_mode:
159
+ raise ValueError(
160
+ f"Cannot change mode of an externally managed array (current: {current_ext_mode}). "
161
+ "Re-open the external array with the new mode and re-initialize CellArray."
162
+ )
163
+ if value is not None and value not in ["r", "w", "m", "d"]:
164
+ raise ValueError("Mode must be one of: None, 'r', 'w', 'm', 'd'")
165
+
166
+ self._mode = value
167
+
168
+ @property
169
+ def dim_names(self) -> List[str]:
170
+ """Get dimension names of the array."""
171
+ if self._dim_names is None:
172
+ with self.open_array(mode="r") as A:
173
+ self._dim_names = [dim.name for dim in A.schema.domain]
174
+ return self._dim_names
175
+
176
+ @property
177
+ def attr_names(self) -> List[str]:
178
+ """Get attribute names of the array."""
179
+ if self._attr_names is None:
180
+ with self.open_array(mode="r") as A:
181
+ self._attr_names = [A.schema.attr(i).name for i in range(A.schema.nattr)]
182
+ return self._attr_names
183
+
184
+ @property
185
+ def shape(self) -> Tuple[int, ...]:
186
+ if self._shape is None:
187
+ with self.open_array(mode="r") as A:
188
+ self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
189
+ return self._shape
190
+
191
+ @property
192
+ def nonempty_domain(self) -> Optional[Tuple[Any, ...]]:
193
+ if self._nonempty_domain is None:
194
+ with self.open_array(mode="r") as A:
195
+ # nonempty_domain() can return None if the array is empty.
196
+ ned = A.nonempty_domain()
197
+ if ned is None:
198
+ self._nonempty_domain = None
199
+ else:
200
+ self._nonempty_domain = tuple(ned) if isinstance(ned[0], tuple) else (ned,)
201
+ return self._nonempty_domain
202
+
203
+ @property
204
+ def ndim(self) -> int:
205
+ """Get number of dimensions."""
206
+ if self._ndim is None:
207
+ with self.open_array(mode="r") as A:
208
+ self._ndim = A.schema.ndim
209
+ # self._ndim = len(self.shape)
210
+ return self._ndim
211
+
212
+ @contextmanager
213
+ def open_array(self, mode: Optional[str] = None):
214
+ """Context manager for array operations.
215
+
216
+ Uses the externally provided array if available, otherwise opens from URI.
217
+
218
+ Args:
219
+ mode:
220
+ Desired mode for the operation ('r', 'w', 'm', 'd').
221
+ If an external array is used, this mode must be compatible with
222
+ (or same as) the mode the external array was opened with.
223
+
224
+ If None, uses the CellArray's default mode.
225
+ """
226
+ if self._array_passed_in and self._opened_array_external is not None:
227
+ if not self._opened_array_external.isopen:
228
+ # Attempt to reopen if closed. This assumes the user might have closed it
229
+ # and expects CellArr to reopen it if still possible.
230
+ try:
231
+ self._opened_array_external.reopen()
232
+ except Exception as e:
233
+ raise tiledb.TileDBError(
234
+ f"Externally provided array is closed and could not be reopened: {e}"
235
+ ) from e
236
+
237
+ effective_mode = mode if mode is not None else self._opened_array_external.mode
238
+
239
+ current_external_mode = self._opened_array_external.mode
240
+ if effective_mode == "r" and current_external_mode not in ["r", "w", "m"]:
241
+ # Read ops ok on write/modify modes
242
+ pass
243
+ elif effective_mode in ["w", "d"] and current_external_mode != effective_mode:
244
+ raise tiledb.TileDBError(
245
+ f"Requested operation mode '{effective_mode}' is incompatible with the "
246
+ f"externally provided array's mode '{current_external_mode}'. "
247
+ "Ensure the external array is opened in a compatible mode."
248
+ )
249
+
250
+ # DO NOT close self._opened_array_external here; its lifecycle is managed by the user.
251
+ yield self._opened_array_external
252
+ else:
253
+ effective_mode = mode if mode is not None else self.mode
254
+ effective_mode = effective_mode if effective_mode is not None else "r"
255
+ array = tiledb.open(self.uri, mode=effective_mode, ctx=self._ctx)
256
+
257
+ try:
258
+ yield array
259
+ finally:
260
+ array.close()
261
+
262
+ def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
263
+ """Get item implementation that routes to either direct slicing or multi_index
264
+ based on the type of indices provided.
265
+
266
+ Args:
267
+ key:
268
+ Slice or list of indices for each dimension in the array.
269
+ """
270
+ if not isinstance(key, tuple):
271
+ key = (key,)
272
+
273
+ if len(key) > self.ndim:
274
+ raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
275
+
276
+ # Normalize all indices
277
+ normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
278
+
279
+ num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
280
+ if num_ellipsis > 1:
281
+ raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
282
+
283
+ # Check if we can use direct slicing
284
+ use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
285
+
286
+ if use_direct:
287
+ return self._direct_slice(normalized_key)
288
+ else:
289
+ if num_ellipsis > 0:
290
+ raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
291
+ return self._multi_index(normalized_key)
292
+
293
+ @abstractmethod
294
+ def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
295
+ """Implementation for direct slicing."""
296
+ pass
297
+
298
+ @abstractmethod
299
+ def _multi_index(self, key: Tuple[Union[slice, List[int]], ...]) -> np.ndarray:
300
+ """Implementation for multi-index access."""
301
+ pass
302
+
303
+ def vacuum(self) -> None:
304
+ """Remove deleted fragments from the array."""
305
+ tiledb.vacuum(self.uri)
306
+
307
+ def consolidate(self, config: Optional[ConsolidationConfig] = None) -> None:
308
+ """Consolidate array fragments.
309
+
310
+ Args:
311
+ config:
312
+ Optional consolidation configuration.
313
+ """
314
+ if config is None:
315
+ config = ConsolidationConfig()
316
+
317
+ consolidation_cfg = tiledb.Config()
318
+
319
+ consolidation_cfg["sm.consolidation.steps"] = config.steps
320
+ consolidation_cfg["sm.consolidation.step_min_frags"] = config.step_min_frags
321
+ consolidation_cfg["sm.consolidation.step_max_frags"] = config.step_max_frags
322
+ consolidation_cfg["sm.consolidation.buffer_size"] = config.buffer_size
323
+ consolidation_cfg["sm.mem.total_budget"] = config.total_budget
324
+
325
+ tiledb.consolidate(self.uri, config=consolidation_cfg)
326
+
327
+ if config.vacuum_after:
328
+ self.vacuum()
329
+
330
+ @abstractmethod
331
+ def write_batch(self, data: Union[np.ndarray, sparse.spmatrix], start_row: int, **kwargs) -> None:
332
+ """Write a batch of data to the array starting at the specified row.
333
+
334
+ Args:
335
+ data:
336
+ Data to write (numpy array for dense, scipy sparse matrix for sparse).
337
+
338
+ start_row:
339
+ Starting row index for writing.
340
+
341
+ **kwargs:
342
+ Additional arguments for write operation.
343
+ """
344
+ pass
@@ -1,8 +1,13 @@
1
+ try:
2
+ from types import EllipsisType
3
+ except ImportError:
4
+ # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
5
+ EllipsisType = type(...)
1
6
  from typing import List, Tuple, Union
2
7
 
3
8
  import numpy as np
4
9
 
5
- from .CellArray import CellArray
10
+ from .cellarray_base import CellArray
6
11
  from .helpers import SliceHelper
7
12
 
8
13
  __author__ = "Jayaram Kancherla"
@@ -13,7 +18,7 @@ __license__ = "MIT"
13
18
  class DenseCellArray(CellArray):
14
19
  """Implementation for dense TileDB arrays."""
15
20
 
16
- def _direct_slice(self, key: Tuple[slice, ...]) -> np.ndarray:
21
+ def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
17
22
  """Implementation for direct slicing of dense arrays.
18
23
 
19
24
  Args:
@@ -1,10 +1,15 @@
1
- from typing import Dict, List, Optional, Tuple, Union
1
+ try:
2
+ from types import EllipsisType
3
+ except ImportError:
4
+ # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
5
+ EllipsisType = type(...)
6
+ from typing import Dict, List, Literal, Optional, Tuple, Union
2
7
 
3
8
  import numpy as np
4
9
  import tiledb
5
10
  from scipy import sparse
6
11
 
7
- from .CellArray import CellArray
12
+ from .cellarray_base import CellArray
8
13
  from .helpers import SliceHelper
9
14
 
10
15
  __author__ = "Jayaram Kancherla"
@@ -17,15 +22,68 @@ class SparseCellArray(CellArray):
17
22
 
18
23
  def __init__(
19
24
  self,
20
- uri: str,
25
+ uri: Optional[str] = None,
26
+ tiledb_array_obj: Optional[tiledb.Array] = None,
21
27
  attr: str = "data",
22
- mode: str = None,
28
+ mode: Optional[Literal["r", "w", "d", "m"]] = None,
23
29
  config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
24
30
  return_sparse: bool = True,
25
31
  sparse_coerce: Union[sparse.csr_matrix, sparse.csc_matrix] = sparse.csr_matrix,
32
+ validate: bool = True,
33
+ **kwargs,
26
34
  ):
27
- """Initialize SparseCellArray."""
28
- super().__init__(uri, attr, mode, config_or_context)
35
+ """Initialize the object.
36
+
37
+ Args:
38
+ uri:
39
+ URI to the array.
40
+ Required if 'tiledb_array_obj' is not provided.
41
+
42
+ tiledb_array_obj:
43
+ Optional, an already opened ``tiledb.Array`` instance.
44
+ If provided, 'uri' can be None, and 'config_or_context' is ignored.
45
+
46
+ attr:
47
+ Attribute to access.
48
+ Defaults to "data".
49
+
50
+ mode:
51
+ Open the array object in read 'r', write 'w', modify
52
+ 'm' mode, or delete 'd' mode.
53
+
54
+ Defaults to None for automatic mode switching.
55
+
56
+ If 'tiledb_array_obj' is provided, this mode should ideally match
57
+ the mode of the provided array or be None.
58
+
59
+ config_or_context:
60
+ Optional config or context object. Ignored if 'tiledb_array_obj' is provided,
61
+ as context will be derived from the object.
62
+
63
+ Defaults to None.
64
+
65
+ return_sparse:
66
+ Whether to return a sparse representation of the data when object is sliced.
67
+ Default is to return a dictionary that contains coordinates and values.
68
+
69
+ sparse_coerce:
70
+ Format to return, defaults to csr_matrix.
71
+
72
+ validate:
73
+ Whether to validate the attributes.
74
+ Defaults to True.
75
+
76
+ kwargs:
77
+ Additional arguments.
78
+ """
79
+ super().__init__(
80
+ uri=uri,
81
+ tiledb_array_obj=tiledb_array_obj,
82
+ attr=attr,
83
+ mode=mode,
84
+ config_or_context=config_or_context,
85
+ validate=validate,
86
+ )
29
87
 
30
88
  self.return_sparse = return_sparse
31
89
  self.sparse_coerce = sparse.csr_matrix if sparse_coerce is None else sparse_coerce
@@ -118,7 +176,7 @@ class SparseCellArray(CellArray):
118
176
 
119
177
  return sliced[key]
120
178
 
121
- def _direct_slice(self, key: Tuple[slice, ...]) -> Union[np.ndarray, sparse.coo_matrix]:
179
+ def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> Union[np.ndarray, sparse.coo_matrix]:
122
180
  """Implementation for direct slicing of sparse arrays."""
123
181
  with self.open_array(mode="r") as array:
124
182
  result = array[key]
@@ -182,21 +240,21 @@ class SparseCellArray(CellArray):
182
240
  raise TypeError("Input must be a scipy sparse matrix.")
183
241
 
184
242
  # Validate and adjust dimensions
185
- data, is_1d = self._validate_matrix_dims(data)
243
+ coo_data, is_1d = self._validate_matrix_dims(data)
186
244
 
187
245
  # Check bounds
188
- end_row = start_row + data.shape[0]
246
+ end_row = start_row + coo_data.shape[0]
189
247
  if end_row > self.shape[0]:
190
248
  raise ValueError(
191
249
  f"Write operation would exceed array bounds. End row {end_row} > array rows {self.shape[0]}."
192
250
  )
193
251
 
194
- if not is_1d and data.shape[1] != self.shape[1]:
195
- raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
252
+ if not is_1d and coo_data.shape[1] != self.shape[1]:
253
+ raise ValueError(f"Data columns {coo_data.shape[1]} don't match array columns {self.shape[1]}.")
196
254
 
197
- adjusted_rows = data.row + start_row
255
+ adjusted_rows = coo_data.row + start_row
198
256
  with self.open_array(mode="w") as array:
199
257
  if is_1d:
200
- array[adjusted_rows] = data.data
258
+ array[adjusted_rows] = coo_data.data
201
259
  else:
202
- array[adjusted_rows, data.col] = data.data
260
+ array[adjusted_rows, coo_data.col] = coo_data.data
cellarr_array/helpers.py CHANGED
@@ -1,3 +1,8 @@
1
+ try:
2
+ from types import EllipsisType
3
+ except ImportError:
4
+ # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
5
+ EllipsisType = type(...)
1
6
  from typing import List, Optional, Tuple, Union
2
7
 
3
8
  import numpy as np
@@ -128,11 +133,15 @@ def create_cellarray(
128
133
  tiledb.Array.create(uri, schema)
129
134
 
130
135
  # Import here to avoid circular imports
131
- from .DenseCellArray import DenseCellArray
132
- from .SparseCellArray import SparseCellArray
136
+ from .cellarray_dense import DenseCellArray
137
+ from .cellarray_sparse import SparseCellArray
133
138
 
134
139
  # Return appropriate array type
135
- return SparseCellArray(uri, attr=attr_name, mode=mode) if sparse else DenseCellArray(uri, attr=attr_name, mode=mode)
140
+ return (
141
+ SparseCellArray(uri=uri, attr=attr_name, mode=mode)
142
+ if sparse
143
+ else DenseCellArray(uri=uri, attr=attr_name, mode=mode)
144
+ )
136
145
 
137
146
 
138
147
  class SliceHelper:
@@ -150,9 +159,12 @@ class SliceHelper:
150
159
  return None
151
160
 
152
161
  @staticmethod
153
- def normalize_index(idx: Union[int, slice, List[int]], dim_size: int) -> Union[slice, List[int]]:
162
+ def normalize_index(idx: Union[int, slice, List[int]], dim_size: int) -> Union[slice, List[int], EllipsisType]:
154
163
  """Normalize index to handle negative indices and ensure consistency."""
155
164
 
165
+ if isinstance(idx, EllipsisType):
166
+ return idx
167
+
156
168
  # Convert ranges to slices
157
169
  if isinstance(idx, range):
158
170
  idx = slice(idx.start, idx.stop, idx.step)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.0.2
3
+ Version: 0.1.0
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
@@ -20,6 +20,7 @@ Provides-Extra: testing
20
20
  Requires-Dist: setuptools; extra == "testing"
21
21
  Requires-Dist: pytest; extra == "testing"
22
22
  Requires-Dist: pytest-cov; extra == "testing"
23
+ Dynamic: license-file
23
24
 
24
25
  [![PyPI-Server](https://img.shields.io/pypi/v/cellarr-array.svg)](https://pypi.org/project/cellarr-array/)
25
26
  ![Unit tests](https://github.com/cellarr/cellarr-array/actions/workflows/run-tests.yml/badge.svg)
@@ -0,0 +1,11 @@
1
+ cellarr_array/__init__.py,sha256=iCU5zmXXmTwk-VuwrTdVl5STRAL2xeYpq05fL9_bW6w,781
2
+ cellarr_array/cellarray_base.py,sha256=CSYsA_Ra-RcwsyHzwayL-w10EhpbIC3u7ZAbyQMO6ks,13451
3
+ cellarr_array/cellarray_dense.py,sha256=skunPy_WyOMuS_3SxcAW_gm8d5FiWeV7ZCQp4HLRUUY,3958
4
+ cellarr_array/cellarray_sparse.py,sha256=YYZymvWGDG1c2EeOLMBPP5_u4qM8uhxyWJY6PnFWMVo,9112
5
+ cellarr_array/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
6
+ cellarr_array/helpers.py,sha256=eIeymmvY4KZ-cAiROo3DcYYzP39NQBj-4Nrba9rrEKQ,6491
7
+ cellarr_array-0.1.0.dist-info/licenses/LICENSE.txt,sha256=JUlHIfWcRe_MZop18pQvMIPLKSSPz3XQ06ASHuW5Wh8,1076
8
+ cellarr_array-0.1.0.dist-info/METADATA,sha256=ELBRCXkEyxhPeGHlA62i2QIzz7yYlLUSy7bfOe6aAdE,4120
9
+ cellarr_array-0.1.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
10
+ cellarr_array-0.1.0.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
11
+ cellarr_array-0.1.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2025 Jayaram Kancherla
3
+ Copyright (c) 2025 Genentech
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,238 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from contextlib import contextmanager
3
- from typing import List, Literal, Optional, Tuple, Union
4
-
5
- import numpy as np
6
- import tiledb
7
- from scipy import sparse
8
-
9
- from .config import ConsolidationConfig
10
- from .helpers import SliceHelper
11
-
12
- __author__ = "Jayaram Kancherla"
13
- __copyright__ = "Jayaram Kancherla"
14
- __license__ = "MIT"
15
-
16
-
17
- class CellArray(ABC):
18
- """Abstract base class for TileDB array operations."""
19
-
20
- def __init__(
21
- self,
22
- uri: str,
23
- attr: str = "data",
24
- mode: Optional[Literal["r", "w", "n", "d"]] = None,
25
- config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
26
- validate: bool = True,
27
- ):
28
- """Initialize the object.
29
-
30
- Args:
31
- uri:
32
- URI to the array.
33
-
34
- attr:
35
- Attribute to access.
36
- Defaults to "data".
37
-
38
- mode:
39
- Open the array object in read 'r', write 'w', modify
40
- exclusive 'm' mode, or delete 'd' mode.
41
-
42
- Defaults to None for automatic mode switching.
43
-
44
- config_or_context:
45
- Optional config or context object.
46
-
47
- Defaults to None.
48
-
49
- validate:
50
- Whether to validate the attributes.
51
- Defaults to True.
52
- """
53
- self.uri = uri
54
- self._mode = mode
55
-
56
- if config_or_context is None:
57
- # config_or_context = tiledb.Config()
58
- ctx = None
59
- else:
60
- if isinstance(config_or_context, tiledb.Config):
61
- ctx = tiledb.Ctx(config_or_context)
62
- elif isinstance(config_or_context, tiledb.Ctx):
63
- ctx = config_or_context
64
- else:
65
- raise TypeError("'config_or_context' must be either TileDB config or a context object.")
66
-
67
- self._ctx = ctx
68
- self._array = None
69
- self._shape = None
70
- self._ndim = None
71
- self._dim_names = None
72
- self._attr_names = None
73
-
74
- if validate:
75
- self._validate(attr=attr)
76
-
77
- self._attr = attr
78
-
79
- def _validate(self, attr):
80
- with self.open_array(mode="r") as A:
81
- if A.ndim > 2:
82
- raise ValueError("Only 1D and 2D arrays are supported.")
83
-
84
- if attr not in self.attr_names:
85
- raise ValueError(
86
- f"Attribute '{attr}' does not exist in the array. Available attributes: {self.attr_names}."
87
- )
88
-
89
- @property
90
- def mode(self) -> Optional[str]:
91
- """Get current array mode."""
92
- return self._mode
93
-
94
- @mode.setter
95
- def mode(self, value: Optional[str]):
96
- """Set array mode.
97
-
98
- Args:
99
- value:
100
- One of `None`, 'r', 'w', or 'm', 'd'.
101
- """
102
- if value is not None and value not in ["r", "w", "m", "d"]:
103
- raise ValueError("Mode must be one of: None, 'r', 'w', 'm', 'd'")
104
- self._mode = value
105
-
106
- @property
107
- def dim_names(self) -> List[str]:
108
- """Get dimension names of the array."""
109
- if self._dim_names is None:
110
- with self.open_array(mode="r") as A:
111
- self._dim_names = [dim.name for dim in A.schema.domain]
112
- return self._dim_names
113
-
114
- @property
115
- def attr_names(self) -> List[str]:
116
- """Get attribute names of the array."""
117
- if self._attr_names is None:
118
- with self.open_array(mode="r") as A:
119
- self._attr_names = [A.schema.attr(i).name for i in range(A.schema.nattr)]
120
- return self._attr_names
121
-
122
- @property
123
- def shape(self) -> Tuple[int, ...]:
124
- """Get array shape from schema domain."""
125
- if self._shape is None:
126
- with self.open_array(mode="r") as A:
127
- self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
128
- return self._shape
129
-
130
- @property
131
- def nonempty_domain(self) -> Tuple[int, ...]:
132
- """Get array non-empty domain."""
133
- if self._nonempty_domain is None:
134
- with self.open_array(mode="r") as A:
135
- self._nonempty_domain = A.nonempty_domain()
136
- return self._nonempty_domain
137
-
138
- @property
139
- def ndim(self) -> int:
140
- """Get number of dimensions."""
141
- if self._ndim is None:
142
- self._ndim = len(self.shape)
143
- return self._ndim
144
-
145
- @contextmanager
146
- def open_array(self, mode: Optional[str] = None):
147
- """Context manager for array operations.
148
-
149
- Args:
150
- mode:
151
- Override mode for this operation.
152
- """
153
- mode = mode if mode is not None else self.mode
154
- mode = mode if mode is not None else "r" # Default to read mode
155
-
156
- array = tiledb.open(self.uri, mode=mode, ctx=self._ctx)
157
- try:
158
- yield array
159
- finally:
160
- array.close()
161
-
162
- def __getitem__(self, key: Union[slice, Tuple[Union[slice, List[int]], ...]]):
163
- """Get item implementation that routes to either direct slicing or multi_index
164
- based on the type of indices provided.
165
-
166
- Args:
167
- key:
168
- Slice or list of indices for each dimension in the array.
169
- """
170
- if not isinstance(key, tuple):
171
- key = (key,)
172
-
173
- if len(key) > self.ndim:
174
- raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
175
-
176
- # Normalize all indices
177
- normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
178
-
179
- # Check if we can use direct slicing
180
- use_direct = all(isinstance(idx, slice) for idx in normalized_key)
181
-
182
- if use_direct:
183
- return self._direct_slice(normalized_key)
184
- else:
185
- return self._multi_index(normalized_key)
186
-
187
- @abstractmethod
188
- def _direct_slice(self, key: Tuple[slice, ...]) -> np.ndarray:
189
- """Implementation for direct slicing."""
190
- pass
191
-
192
- @abstractmethod
193
- def _multi_index(self, key: Tuple[Union[slice, List[int]], ...]) -> np.ndarray:
194
- """Implementation for multi-index access."""
195
- pass
196
-
197
- def vacuum(self) -> None:
198
- """Remove deleted fragments from the array."""
199
- tiledb.vacuum(self.uri)
200
-
201
- def consolidate(self, config: Optional[ConsolidationConfig] = None) -> None:
202
- """Consolidate array fragments.
203
-
204
- Args:
205
- config:
206
- Optional consolidation configuration.
207
- """
208
- if config is None:
209
- config = ConsolidationConfig()
210
-
211
- consolidation_cfg = tiledb.Config()
212
-
213
- consolidation_cfg["sm.consolidation.steps"] = config.steps
214
- consolidation_cfg["sm.consolidation.step_min_frags"] = config.step_min_frags
215
- consolidation_cfg["sm.consolidation.step_max_frags"] = config.step_max_frags
216
- consolidation_cfg["sm.consolidation.buffer_size"] = config.buffer_size
217
- consolidation_cfg["sm.mem.total_budget"] = config.total_budget
218
-
219
- tiledb.consolidate(self.uri, config=consolidation_cfg)
220
-
221
- if config.vacuum_after:
222
- self.vacuum()
223
-
224
- @abstractmethod
225
- def write_batch(self, data: Union[np.ndarray, sparse.spmatrix], start_row: int, **kwargs) -> None:
226
- """Write a batch of data to the array starting at the specified row.
227
-
228
- Args:
229
- data:
230
- Data to write (numpy array for dense, scipy sparse matrix for sparse).
231
-
232
- start_row:
233
- Starting row index for writing.
234
-
235
- **kwargs:
236
- Additional arguments for write operation.
237
- """
238
- pass
@@ -1,11 +0,0 @@
1
- cellarr_array/CellArray.py,sha256=sFD258mPp4w-8-xmjAgoicKo0Nbu0GGa-1gMXxt5cZ0,7570
2
- cellarr_array/DenseCellArray.py,sha256=iPrjFtGolnHB0BTi4A8ncEpoFI9FWe6oZHhA1Men3Wo,3745
3
- cellarr_array/SparseCellArray.py,sha256=8bajVOvUMaQhWU-_pZY0Cg9sD6kWRAJCu2G45uY-W4Q,7096
4
- cellarr_array/__init__.py,sha256=8m0_shRPKNNaNab5tGBL2l0K5XgkKCFuLAh7QGogfYo,778
5
- cellarr_array/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
6
- cellarr_array/helpers.py,sha256=O0RgDLIdYbWc01yp2Cw0EmjJ3g_uzlz2JnYE8W7PZEE,6182
7
- cellarr_array-0.0.2.dist-info/LICENSE.txt,sha256=qI2hRZobcUlj8gqFqXwqt522HeYyWvHLF00zCSZofHA,1084
8
- cellarr_array-0.0.2.dist-info/METADATA,sha256=-VmLQZQmbUhNkD_Y9ZLeZkBgLf4H5YIXgO_rDj7zKmw,4098
9
- cellarr_array-0.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
10
- cellarr_array-0.0.2.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
11
- cellarr_array-0.0.2.dist-info/RECORD,,