cellarr-array 0.0.3__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. cellarr_array-0.1.0/.github/workflows/run-tests.yml +73 -0
  2. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/CHANGELOG.md +6 -0
  3. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/LICENSE.txt +1 -1
  4. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/PKG-INFO +1 -1
  5. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array/__init__.py +2 -2
  6. cellarr_array-0.1.0/src/cellarr_array/cellarray_base.py +344 -0
  7. cellarr_array-0.0.3/src/cellarr_array/DenseCellArray.py → cellarr_array-0.1.0/src/cellarr_array/cellarray_dense.py +1 -1
  8. cellarr_array-0.0.3/src/cellarr_array/SparseCellArray.py → cellarr_array-0.1.0/src/cellarr_array/cellarray_sparse.py +66 -13
  9. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array/helpers.py +7 -3
  10. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array.egg-info/PKG-INFO +1 -1
  11. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array.egg-info/SOURCES.txt +3 -3
  12. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/tests/test_all.py +1 -1
  13. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/tests/test_dense.py +16 -1
  14. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/tests/test_inmemory.py +11 -2
  15. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/tests/test_sparse.py +21 -6
  16. cellarr_array-0.0.3/.github/workflows/run-tests.yml +0 -33
  17. cellarr_array-0.0.3/src/cellarr_array/CellArray.py +0 -251
  18. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/.coveragerc +0 -0
  19. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/.github/workflows/publish-pypi.yml +0 -0
  20. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/.gitignore +0 -0
  21. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/.pre-commit-config.yaml +0 -0
  22. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/.readthedocs.yml +0 -0
  23. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/AUTHORS.md +0 -0
  24. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/CONTRIBUTING.md +0 -0
  25. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/README.md +0 -0
  26. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/Makefile +0 -0
  27. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/_static/.gitignore +0 -0
  28. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/authors.md +0 -0
  29. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/changelog.md +0 -0
  30. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/conf.py +0 -0
  31. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/contributing.md +0 -0
  32. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/index.md +0 -0
  33. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/license.md +0 -0
  34. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/readme.md +0 -0
  35. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/docs/requirements.txt +0 -0
  36. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/pyproject.toml +0 -0
  37. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/setup.cfg +0 -0
  38. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/setup.py +0 -0
  39. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array/config.py +0 -0
  40. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
  41. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array.egg-info/not-zip-safe +0 -0
  42. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array.egg-info/requires.txt +0 -0
  43. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/src/cellarr_array.egg-info/top_level.txt +0 -0
  44. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/tests/conftest.py +0 -0
  45. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/tests/test_helpers.py +0 -0
  46. {cellarr_array-0.0.3 → cellarr_array-0.1.0}/tox.ini +0 -0
@@ -0,0 +1,73 @@
1
+ name: Test the library
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master # for legacy repos
7
+ - main
8
+ pull_request:
9
+ branches:
10
+ - master # for legacy repos
11
+ - main
12
+ workflow_dispatch: # Allow manually triggering the workflow
13
+ schedule:
14
+ # Run roughly every 15 days at 00:00 UTC
15
+ # (useful to check if updates on dependencies break the package)
16
+ - cron: "0 0 1,16 * *"
17
+
18
+ permissions:
19
+ contents: read
20
+
21
+ concurrency:
22
+ group: >-
23
+ ${{ github.workflow }}-${{ github.ref_type }}-
24
+ ${{ github.event.pull_request.number || github.sha }}
25
+ cancel-in-progress: true
26
+
27
+ jobs:
28
+ test:
29
+ strategy:
30
+ matrix:
31
+ python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
32
+ platform:
33
+ - ubuntu-latest
34
+ # - macos-latest
35
+ # - windows-latest
36
+ runs-on: ${{ matrix.platform }}
37
+ name: Python ${{ matrix.python }}, ${{ matrix.platform }}
38
+ steps:
39
+ - uses: actions/checkout@v4
40
+
41
+ - uses: actions/setup-python@v5
42
+ id: setup-python
43
+ with:
44
+ python-version: ${{ matrix.python }}
45
+
46
+ - name: Install dependencies
47
+ run: |
48
+ python -m pip install --upgrade pip
49
+ pip install tox coverage
50
+
51
+ - name: Run tests
52
+ run: >-
53
+ pipx run --python '${{ steps.setup-python.outputs.python-path }}'
54
+ tox
55
+ -- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args
56
+
57
+ - name: Check for codecov token availability
58
+ id: codecov-check
59
+ shell: bash
60
+ run: |
61
+ if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then
62
+ echo "codecov=true" >> $GITHUB_OUTPUT;
63
+ else
64
+ echo "codecov=false" >> $GITHUB_OUTPUT;
65
+ fi
66
+
67
+ - name: Upload coverage reports to Codecov with GitHub Action
68
+ uses: codecov/codecov-action@v5
69
+ if: ${{ steps.codecov-check.outputs.codecov == 'true' }}
70
+ env:
71
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
72
+ slug: ${{ github.repository }}
73
+ flags: ${{ matrix.platform }} - py${{ matrix.python }}
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## Version 0.1.0
4
+
5
+ - Support cellarr-arrays on user provided tiledb array objects.
6
+ - Migrate github actions to the newer version from biocsetup.
7
+ - Renaming module names, documentation and tests
8
+
3
9
  ## Version 0.0.2
4
10
 
5
11
  - Support in-memory tiledb objects. Updated tests and documentation.
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2025 Jayaram Kancherla
3
+ Copyright (c) 2025 Genentech
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.0.3
3
+ Version: 0.1.0
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
@@ -16,6 +16,6 @@ finally:
16
16
  del version, PackageNotFoundError
17
17
 
18
18
  from .config import CellArrConfig, ConsolidationConfig
19
- from .DenseCellArray import DenseCellArray
20
- from .SparseCellArray import SparseCellArray
19
+ from .cellarray_dense import DenseCellArray
20
+ from .cellarray_sparse import SparseCellArray
21
21
  from .helpers import create_cellarray, SliceHelper
@@ -0,0 +1,344 @@
1
+ from abc import ABC, abstractmethod
2
+ from contextlib import contextmanager
3
+
4
+ try:
5
+ from types import EllipsisType
6
+ except ImportError:
7
+ # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
8
+ EllipsisType = type(...)
9
+ from typing import Any, List, Literal, Optional, Tuple, Union
10
+
11
+ import numpy as np
12
+ import tiledb
13
+ from scipy import sparse
14
+
15
+ from .config import ConsolidationConfig
16
+ from .helpers import SliceHelper
17
+
18
+ __author__ = "Jayaram Kancherla"
19
+ __copyright__ = "Jayaram Kancherla"
20
+ __license__ = "MIT"
21
+
22
+
23
+ class CellArray(ABC):
24
+ """Abstract base class for TileDB array operations."""
25
+
26
+ def __init__(
27
+ self,
28
+ uri: Optional[str] = None,
29
+ tiledb_array_obj: Optional[tiledb.Array] = None,
30
+ attr: str = "data",
31
+ mode: Optional[Literal["r", "w", "d", "m"]] = None,
32
+ config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
33
+ validate: bool = True,
34
+ ):
35
+ """Initialize the object.
36
+
37
+ Args:
38
+ uri:
39
+ URI to the array.
40
+ Required if 'tiledb_array_obj' is not provided.
41
+
42
+ tiledb_array_obj:
43
+ Optional, an already opened ``tiledb.Array`` instance.
44
+ If provided, 'uri' can be None, and 'config_or_context' is ignored.
45
+
46
+ attr:
47
+ Attribute to access.
48
+ Defaults to "data".
49
+
50
+ mode:
51
+ Open the array object in read 'r', write 'w', modify
52
+ 'm' mode, or delete 'd' mode.
53
+
54
+ Defaults to None for automatic mode switching.
55
+
56
+ If 'tiledb_array_obj' is provided, this mode should ideally match
57
+ the mode of the provided array or be None.
58
+
59
+ config_or_context:
60
+ Optional config or context object. Ignored if 'tiledb_array_obj' is provided,
61
+ as context will be derived from the object.
62
+
63
+ Defaults to None.
64
+
65
+ validate:
66
+ Whether to validate the attributes.
67
+ Defaults to True.
68
+ """
69
+ self._array_passed_in = False
70
+ self._opened_array_external = None
71
+ self._ctx = None
72
+
73
+ if tiledb_array_obj is not None:
74
+ if not isinstance(tiledb_array_obj, tiledb.Array):
75
+ raise ValueError("'tiledb_array_obj' must be a tiledb.Array instance.")
76
+
77
+ if not tiledb_array_obj.isopen:
78
+ # Option 1: Raise error
79
+ raise ValueError("If 'tiledb_array_obj' is provided, it must be an open tiledb.Array instance.")
80
+ # Option 2: Try to reopen (less safe as we don't know original intent)
81
+ # try:
82
+ # tiledb_array_obj.reopen()
83
+ # except tiledb.TileDBError as e:
84
+ # raise ValueError(
85
+ # f"Provided 'tiledb_array_obj' is closed and could not be reopened: {e}"
86
+ # )
87
+
88
+ self.uri = tiledb_array_obj.uri
89
+ self._array_passed_in = True
90
+ self._opened_array_external = tiledb_array_obj
91
+
92
+ # infer mode if possible, or require it matches
93
+ if mode is not None and tiledb_array_obj.mode != mode:
94
+ # we could try to reopen with the desired mode
95
+ raise ValueError(
96
+ f"Provided array mode '{tiledb_array_obj.mode}' does not match requested mode '{mode}'.",
97
+ "Re-open the external array with the desired mode or pass matching mode.",
98
+ )
99
+
100
+ self._mode = tiledb_array_obj.mode
101
+ self._ctx = tiledb_array_obj.ctx
102
+ elif uri is not None:
103
+ self.uri = uri
104
+ self._mode = mode
105
+ self._array_passed_in = False
106
+ self._opened_array_external = None
107
+
108
+ if config_or_context is None:
109
+ self._ctx = None
110
+ elif isinstance(config_or_context, tiledb.Config):
111
+ self._ctx = tiledb.Ctx(config_or_context)
112
+ elif isinstance(config_or_context, tiledb.Ctx):
113
+ self._ctx = config_or_context
114
+ else:
115
+ raise TypeError("'config_or_context' must be a TileDB Config or Ctx object.")
116
+ else:
117
+ raise ValueError("Either 'uri' or 'tiledb_array_obj' must be provided.")
118
+
119
+ self._shape = None
120
+ self._ndim = None
121
+ self._dim_names = None
122
+ self._attr_names = None
123
+ self._nonempty_domain = None
124
+
125
+ if validate:
126
+ self._validate(attr=attr)
127
+
128
+ self._attr = attr
129
+
130
+ def _validate(self, attr):
131
+ with self.open_array(mode="r") as A:
132
+ schema = A.schema
133
+ if schema.ndim > 2:
134
+ raise ValueError("Only 1D and 2D arrays are supported.")
135
+
136
+ current_attr_names = [schema.attr(i).name for i in range(schema.nattr)]
137
+ if attr not in current_attr_names:
138
+ raise ValueError(
139
+ f"Attribute '{attr}' does not exist in the array. Available attributes: {current_attr_names}."
140
+ )
141
+
142
+ @property
143
+ def mode(self) -> Optional[str]:
144
+ """Get current array mode. If an external array is used, this is its open mode."""
145
+ if self._array_passed_in and self._opened_array_external is not None:
146
+ return self._opened_array_external.mode
147
+ return self._mode
148
+
149
+ @mode.setter
150
+ def mode(self, value: Optional[str]):
151
+ """Set array mode for subsequent operations if not using an external array.
152
+
153
+ This action does not affect an already passed-in external array's mode.
154
+ """
155
+ if self._array_passed_in:
156
+ # To change mode of an external array, user must reopen it and pass it again.
157
+ current_ext_mode = self._opened_array_external.mode if self._opened_array_external else "unknown"
158
+ if value != current_ext_mode:
159
+ raise ValueError(
160
+ f"Cannot change mode of an externally managed array (current: {current_ext_mode}). "
161
+ "Re-open the external array with the new mode and re-initialize CellArray."
162
+ )
163
+ if value is not None and value not in ["r", "w", "m", "d"]:
164
+ raise ValueError("Mode must be one of: None, 'r', 'w', 'm', 'd'")
165
+
166
+ self._mode = value
167
+
168
+ @property
169
+ def dim_names(self) -> List[str]:
170
+ """Get dimension names of the array."""
171
+ if self._dim_names is None:
172
+ with self.open_array(mode="r") as A:
173
+ self._dim_names = [dim.name for dim in A.schema.domain]
174
+ return self._dim_names
175
+
176
+ @property
177
+ def attr_names(self) -> List[str]:
178
+ """Get attribute names of the array."""
179
+ if self._attr_names is None:
180
+ with self.open_array(mode="r") as A:
181
+ self._attr_names = [A.schema.attr(i).name for i in range(A.schema.nattr)]
182
+ return self._attr_names
183
+
184
+ @property
185
+ def shape(self) -> Tuple[int, ...]:
186
+ if self._shape is None:
187
+ with self.open_array(mode="r") as A:
188
+ self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
189
+ return self._shape
190
+
191
+ @property
192
+ def nonempty_domain(self) -> Optional[Tuple[Any, ...]]:
193
+ if self._nonempty_domain is None:
194
+ with self.open_array(mode="r") as A:
195
+ # nonempty_domain() can return None if the array is empty.
196
+ ned = A.nonempty_domain()
197
+ if ned is None:
198
+ self._nonempty_domain = None
199
+ else:
200
+ self._nonempty_domain = tuple(ned) if isinstance(ned[0], tuple) else (ned,)
201
+ return self._nonempty_domain
202
+
203
+ @property
204
+ def ndim(self) -> int:
205
+ """Get number of dimensions."""
206
+ if self._ndim is None:
207
+ with self.open_array(mode="r") as A:
208
+ self._ndim = A.schema.ndim
209
+ # self._ndim = len(self.shape)
210
+ return self._ndim
211
+
212
+ @contextmanager
213
+ def open_array(self, mode: Optional[str] = None):
214
+ """Context manager for array operations.
215
+
216
+ Uses the externally provided array if available, otherwise opens from URI.
217
+
218
+ Args:
219
+ mode:
220
+ Desired mode for the operation ('r', 'w', 'm', 'd').
221
+ If an external array is used, this mode must be compatible with
222
+ (or same as) the mode the external array was opened with.
223
+
224
+ If None, uses the CellArray's default mode.
225
+ """
226
+ if self._array_passed_in and self._opened_array_external is not None:
227
+ if not self._opened_array_external.isopen:
228
+ # Attempt to reopen if closed. This assumes the user might have closed it
229
+ # and expects CellArr to reopen it if still possible.
230
+ try:
231
+ self._opened_array_external.reopen()
232
+ except Exception as e:
233
+ raise tiledb.TileDBError(
234
+ f"Externally provided array is closed and could not be reopened: {e}"
235
+ ) from e
236
+
237
+ effective_mode = mode if mode is not None else self._opened_array_external.mode
238
+
239
+ current_external_mode = self._opened_array_external.mode
240
+ if effective_mode == "r" and current_external_mode not in ["r", "w", "m"]:
241
+ # Read ops ok on write/modify modes
242
+ pass
243
+ elif effective_mode in ["w", "d"] and current_external_mode != effective_mode:
244
+ raise tiledb.TileDBError(
245
+ f"Requested operation mode '{effective_mode}' is incompatible with the "
246
+ f"externally provided array's mode '{current_external_mode}'. "
247
+ "Ensure the external array is opened in a compatible mode."
248
+ )
249
+
250
+ # DO NOT close self._opened_array_external here; its lifecycle is managed by the user.
251
+ yield self._opened_array_external
252
+ else:
253
+ effective_mode = mode if mode is not None else self.mode
254
+ effective_mode = effective_mode if effective_mode is not None else "r"
255
+ array = tiledb.open(self.uri, mode=effective_mode, ctx=self._ctx)
256
+
257
+ try:
258
+ yield array
259
+ finally:
260
+ array.close()
261
+
262
+ def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
263
+ """Get item implementation that routes to either direct slicing or multi_index
264
+ based on the type of indices provided.
265
+
266
+ Args:
267
+ key:
268
+ Slice or list of indices for each dimension in the array.
269
+ """
270
+ if not isinstance(key, tuple):
271
+ key = (key,)
272
+
273
+ if len(key) > self.ndim:
274
+ raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
275
+
276
+ # Normalize all indices
277
+ normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
278
+
279
+ num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
280
+ if num_ellipsis > 1:
281
+ raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
282
+
283
+ # Check if we can use direct slicing
284
+ use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
285
+
286
+ if use_direct:
287
+ return self._direct_slice(normalized_key)
288
+ else:
289
+ if num_ellipsis > 0:
290
+ raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
291
+ return self._multi_index(normalized_key)
292
+
293
+ @abstractmethod
294
+ def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
295
+ """Implementation for direct slicing."""
296
+ pass
297
+
298
+ @abstractmethod
299
+ def _multi_index(self, key: Tuple[Union[slice, List[int]], ...]) -> np.ndarray:
300
+ """Implementation for multi-index access."""
301
+ pass
302
+
303
+ def vacuum(self) -> None:
304
+ """Remove deleted fragments from the array."""
305
+ tiledb.vacuum(self.uri)
306
+
307
+ def consolidate(self, config: Optional[ConsolidationConfig] = None) -> None:
308
+ """Consolidate array fragments.
309
+
310
+ Args:
311
+ config:
312
+ Optional consolidation configuration.
313
+ """
314
+ if config is None:
315
+ config = ConsolidationConfig()
316
+
317
+ consolidation_cfg = tiledb.Config()
318
+
319
+ consolidation_cfg["sm.consolidation.steps"] = config.steps
320
+ consolidation_cfg["sm.consolidation.step_min_frags"] = config.step_min_frags
321
+ consolidation_cfg["sm.consolidation.step_max_frags"] = config.step_max_frags
322
+ consolidation_cfg["sm.consolidation.buffer_size"] = config.buffer_size
323
+ consolidation_cfg["sm.mem.total_budget"] = config.total_budget
324
+
325
+ tiledb.consolidate(self.uri, config=consolidation_cfg)
326
+
327
+ if config.vacuum_after:
328
+ self.vacuum()
329
+
330
+ @abstractmethod
331
+ def write_batch(self, data: Union[np.ndarray, sparse.spmatrix], start_row: int, **kwargs) -> None:
332
+ """Write a batch of data to the array starting at the specified row.
333
+
334
+ Args:
335
+ data:
336
+ Data to write (numpy array for dense, scipy sparse matrix for sparse).
337
+
338
+ start_row:
339
+ Starting row index for writing.
340
+
341
+ **kwargs:
342
+ Additional arguments for write operation.
343
+ """
344
+ pass
@@ -7,7 +7,7 @@ from typing import List, Tuple, Union
7
7
 
8
8
  import numpy as np
9
9
 
10
- from .CellArray import CellArray
10
+ from .cellarray_base import CellArray
11
11
  from .helpers import SliceHelper
12
12
 
13
13
  __author__ = "Jayaram Kancherla"
@@ -3,13 +3,13 @@ try:
3
3
  except ImportError:
4
4
  # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
5
5
  EllipsisType = type(...)
6
- from typing import Dict, List, Optional, Tuple, Union
6
+ from typing import Dict, List, Literal, Optional, Tuple, Union
7
7
 
8
8
  import numpy as np
9
9
  import tiledb
10
10
  from scipy import sparse
11
11
 
12
- from .CellArray import CellArray
12
+ from .cellarray_base import CellArray
13
13
  from .helpers import SliceHelper
14
14
 
15
15
  __author__ = "Jayaram Kancherla"
@@ -22,15 +22,68 @@ class SparseCellArray(CellArray):
22
22
 
23
23
  def __init__(
24
24
  self,
25
- uri: str,
25
+ uri: Optional[str] = None,
26
+ tiledb_array_obj: Optional[tiledb.Array] = None,
26
27
  attr: str = "data",
27
- mode: str = None,
28
+ mode: Optional[Literal["r", "w", "d", "m"]] = None,
28
29
  config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
29
30
  return_sparse: bool = True,
30
31
  sparse_coerce: Union[sparse.csr_matrix, sparse.csc_matrix] = sparse.csr_matrix,
32
+ validate: bool = True,
33
+ **kwargs,
31
34
  ):
32
- """Initialize SparseCellArray."""
33
- super().__init__(uri, attr, mode, config_or_context)
35
+ """Initialize the object.
36
+
37
+ Args:
38
+ uri:
39
+ URI to the array.
40
+ Required if 'tiledb_array_obj' is not provided.
41
+
42
+ tiledb_array_obj:
43
+ Optional, an already opened ``tiledb.Array`` instance.
44
+ If provided, 'uri' can be None, and 'config_or_context' is ignored.
45
+
46
+ attr:
47
+ Attribute to access.
48
+ Defaults to "data".
49
+
50
+ mode:
51
+ Open the array object in read 'r', write 'w', modify
52
+ 'm' mode, or delete 'd' mode.
53
+
54
+ Defaults to None for automatic mode switching.
55
+
56
+ If 'tiledb_array_obj' is provided, this mode should ideally match
57
+ the mode of the provided array or be None.
58
+
59
+ config_or_context:
60
+ Optional config or context object. Ignored if 'tiledb_array_obj' is provided,
61
+ as context will be derived from the object.
62
+
63
+ Defaults to None.
64
+
65
+ return_sparse:
66
+ Whether to return a sparse representation of the data when object is sliced.
67
+ Default is to return a dictionary that contains coordinates and values.
68
+
69
+ sparse_coerce:
70
+ Format to return, defaults to csr_matrix.
71
+
72
+ validate:
73
+ Whether to validate the attributes.
74
+ Defaults to True.
75
+
76
+ kwargs:
77
+ Additional arguments.
78
+ """
79
+ super().__init__(
80
+ uri=uri,
81
+ tiledb_array_obj=tiledb_array_obj,
82
+ attr=attr,
83
+ mode=mode,
84
+ config_or_context=config_or_context,
85
+ validate=validate,
86
+ )
34
87
 
35
88
  self.return_sparse = return_sparse
36
89
  self.sparse_coerce = sparse.csr_matrix if sparse_coerce is None else sparse_coerce
@@ -187,21 +240,21 @@ class SparseCellArray(CellArray):
187
240
  raise TypeError("Input must be a scipy sparse matrix.")
188
241
 
189
242
  # Validate and adjust dimensions
190
- data, is_1d = self._validate_matrix_dims(data)
243
+ coo_data, is_1d = self._validate_matrix_dims(data)
191
244
 
192
245
  # Check bounds
193
- end_row = start_row + data.shape[0]
246
+ end_row = start_row + coo_data.shape[0]
194
247
  if end_row > self.shape[0]:
195
248
  raise ValueError(
196
249
  f"Write operation would exceed array bounds. End row {end_row} > array rows {self.shape[0]}."
197
250
  )
198
251
 
199
- if not is_1d and data.shape[1] != self.shape[1]:
200
- raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
252
+ if not is_1d and coo_data.shape[1] != self.shape[1]:
253
+ raise ValueError(f"Data columns {coo_data.shape[1]} don't match array columns {self.shape[1]}.")
201
254
 
202
- adjusted_rows = data.row + start_row
255
+ adjusted_rows = coo_data.row + start_row
203
256
  with self.open_array(mode="w") as array:
204
257
  if is_1d:
205
- array[adjusted_rows] = data.data
258
+ array[adjusted_rows] = coo_data.data
206
259
  else:
207
- array[adjusted_rows, data.col] = data.data
260
+ array[adjusted_rows, coo_data.col] = coo_data.data
@@ -133,11 +133,15 @@ def create_cellarray(
133
133
  tiledb.Array.create(uri, schema)
134
134
 
135
135
  # Import here to avoid circular imports
136
- from .DenseCellArray import DenseCellArray
137
- from .SparseCellArray import SparseCellArray
136
+ from .cellarray_dense import DenseCellArray
137
+ from .cellarray_sparse import SparseCellArray
138
138
 
139
139
  # Return appropriate array type
140
- return SparseCellArray(uri, attr=attr_name, mode=mode) if sparse else DenseCellArray(uri, attr=attr_name, mode=mode)
140
+ return (
141
+ SparseCellArray(uri=uri, attr=attr_name, mode=mode)
142
+ if sparse
143
+ else DenseCellArray(uri=uri, attr=attr_name, mode=mode)
144
+ )
141
145
 
142
146
 
143
147
  class SliceHelper:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.0.3
3
+ Version: 0.1.0
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
@@ -23,10 +23,10 @@ docs/license.md
23
23
  docs/readme.md
24
24
  docs/requirements.txt
25
25
  docs/_static/.gitignore
26
- src/cellarr_array/CellArray.py
27
- src/cellarr_array/DenseCellArray.py
28
- src/cellarr_array/SparseCellArray.py
29
26
  src/cellarr_array/__init__.py
27
+ src/cellarr_array/cellarray_base.py
28
+ src/cellarr_array/cellarray_dense.py
29
+ src/cellarr_array/cellarray_sparse.py
30
30
  src/cellarr_array/config.py
31
31
  src/cellarr_array/helpers.py
32
32
  src/cellarr_array.egg-info/PKG-INFO
@@ -24,7 +24,7 @@ def test_attribute_validation(temp_dir):
24
24
  create_cellarray(uri=uri, shape=(10, 10), attr_dtype=np.float32, attr_name="values")
25
25
 
26
26
  with pytest.raises(ValueError, match="Attribute 'invalid' does not exist"):
27
- DenseCellArray(uri, attr="invalid")
27
+ DenseCellArray(uri=uri, attr="invalid")
28
28
 
29
29
 
30
30
  def test_1d_integration(temp_dir):
@@ -2,6 +2,7 @@ from pathlib import Path
2
2
 
3
3
  import numpy as np
4
4
  import pytest
5
+ import tiledb
5
6
 
6
7
  from cellarr_array import DenseCellArray, create_cellarray
7
8
 
@@ -172,10 +173,24 @@ def test_invalid_operations(sample_dense_array_2d):
172
173
  sample_dense_array_2d.mode = "invalid"
173
174
 
174
175
  with pytest.raises(ValueError, match="Attribute .* does not exist"):
175
- DenseCellArray(sample_dense_array_2d.uri, attr="invalid_attr")
176
+ DenseCellArray(uri=sample_dense_array_2d.uri, attr="invalid_attr")
176
177
 
177
178
  with pytest.raises(IndexError, match="Invalid number of dimensions"):
178
179
  _ = sample_dense_array_2d[0:10, 0:10, 0:10]
179
180
 
180
181
  with pytest.raises(IndexError, match="out of bounds"):
181
182
  _ = sample_dense_array_2d[200:300]
183
+
184
+
185
+ def test_array_object(temp_dir):
186
+ uri = str(Path(temp_dir) / "test_dense_1d")
187
+ array = create_cellarray(uri=uri, shape=(100,), attr_dtype=np.float32, sparse=False)
188
+ tdb_obj = tiledb.open(uri, "r")
189
+ alt_array = DenseCellArray(tiledb_array_obj=tdb_obj)
190
+
191
+ assert isinstance(array, DenseCellArray)
192
+ assert array.shape == alt_array.shape
193
+ assert array.ndim == alt_array.ndim
194
+ assert array.dim_names == alt_array.dim_names
195
+ assert "data" in array.attr_names
196
+ assert "data" in alt_array.attr_names
@@ -1,7 +1,8 @@
1
1
  import numpy as np
2
2
  import scipy as sp
3
+ import tiledb
3
4
 
4
- from cellarr_array import create_cellarray
5
+ from cellarr_array import DenseCellArray, SparseCellArray, create_cellarray
5
6
 
6
7
  __author__ = "Jayaram Kancherla"
7
8
  __copyright__ = "Jayaram Kancherla"
@@ -13,11 +14,15 @@ def test_inmem_uri():
13
14
  arr = np.arange(100_000_000).reshape(shape)
14
15
  uri = "mem://dense"
15
16
 
16
- dense_inmem = create_cellarray(uri, shape=(shape))
17
+ dense_inmem = create_cellarray(uri=uri, shape=(shape))
17
18
  dense_inmem.write_batch(arr, start_row=0)
18
19
 
19
20
  assert np.allclose(dense_inmem[:10, :10], arr[:10, :10])
20
21
 
22
+ tdb_obj = tiledb.open(uri, "r")
23
+ alt_array = DenseCellArray(tiledb_array_obj=tdb_obj)
24
+ assert np.allclose(alt_array[:10, :10], arr[:10, :10])
25
+
21
26
 
22
27
  def test_inmem_uri_sparse():
23
28
  shape = (1000, 1000)
@@ -29,3 +34,7 @@ def test_inmem_uri_sparse():
29
34
  dense_inmem.write_batch(s, start_row=0)
30
35
 
31
36
  assert np.allclose(dense_inmem[:10, :10].toarray(), s.tocsr()[:10, :10].toarray())
37
+
38
+ tdb_obj = tiledb.open(uri, "r")
39
+ alt_array = SparseCellArray(tiledb_array_obj=tdb_obj)
40
+ assert np.allclose(alt_array[:10, :10].toarray(), s.tocsr()[:10, :10].toarray())
@@ -2,6 +2,7 @@ from pathlib import Path
2
2
 
3
3
  import numpy as np
4
4
  import pytest
5
+ import tiledb
5
6
  from scipy import sparse
6
7
 
7
8
  from cellarr_array import SparseCellArray, create_cellarray
@@ -44,7 +45,7 @@ def test_1d_write_batch(sample_sparse_array_1d):
44
45
  expected = sparse_data.toarray().flatten()
45
46
  np.testing.assert_array_almost_equal(result.toarray().flatten(), expected)
46
47
 
47
- read_arr = SparseCellArray(sample_sparse_array_1d.uri, return_sparse=False)
48
+ read_arr = SparseCellArray(uri=sample_sparse_array_1d.uri, return_sparse=False)
48
49
 
49
50
  # Full slice
50
51
  result = read_arr[0:10]
@@ -68,7 +69,7 @@ def test_1d_empty_regions(sample_sparse_array_1d):
68
69
 
69
70
  sample_sparse_array_1d.write_batch(sparse_data, start_row=0)
70
71
 
71
- read_arr = SparseCellArray(sample_sparse_array_1d.uri, return_sparse=True)
72
+ read_arr = SparseCellArray(uri=sample_sparse_array_1d.uri, return_sparse=True)
72
73
 
73
74
  # Query empty region
74
75
  result = read_arr[7:10]
@@ -82,7 +83,7 @@ def test_2d_formats(sample_sparse_array_2d):
82
83
  data = sparse.random(10, 50, density=0.1, format="coo", dtype=np.float32)
83
84
 
84
85
  sample_sparse_array_2d.write_batch(data, start_row=0)
85
- array_coo = SparseCellArray(sample_sparse_array_2d.uri, return_sparse=True)
86
+ array_coo = SparseCellArray(uri=sample_sparse_array_2d.uri, return_sparse=True)
86
87
  result = array_coo[0:10, :]
87
88
  np.testing.assert_array_almost_equal(result.toarray(), data.toarray())
88
89
 
@@ -91,7 +92,7 @@ def test_coo_output(sample_sparse_array_2d):
91
92
  data = sparse.random(10, 50, density=0.1, format="coo", dtype=np.float32)
92
93
  sample_sparse_array_2d.write_batch(data, start_row=0)
93
94
 
94
- array_coo = SparseCellArray(sample_sparse_array_2d.uri, return_sparse=True)
95
+ array_coo = SparseCellArray(uri=sample_sparse_array_2d.uri, return_sparse=True)
95
96
 
96
97
  # Test full slice
97
98
  result = array_coo[0:10, :]
@@ -114,7 +115,7 @@ def test_mixed_slice_list_bounds(sample_sparse_array_2d):
114
115
  data = sparse.random(100, 50, density=0.2, format="csr", dtype=np.float32)
115
116
  sample_sparse_array_2d.write_batch(data, start_row=0)
116
117
 
117
- array_coo = SparseCellArray(sample_sparse_array_2d.uri, return_sparse=True)
118
+ array_coo = SparseCellArray(uri=sample_sparse_array_2d.uri, return_sparse=True)
118
119
 
119
120
  cols = [2, 4, 6]
120
121
 
@@ -142,7 +143,7 @@ def test_empty_regions(sample_sparse_array_2d):
142
143
  data = sparse.random(10, 50, density=0.1, format="coo", dtype=np.float32)
143
144
  sample_sparse_array_2d.write_batch(data, start_row=0)
144
145
 
145
- array_coo = SparseCellArray(sample_sparse_array_2d.uri, return_sparse=True)
146
+ array_coo = SparseCellArray(uri=sample_sparse_array_2d.uri, return_sparse=True)
146
147
 
147
148
  # Query empty region
148
149
  result = array_coo[50:60, :]
@@ -178,3 +179,17 @@ def test_invalid_inputs(sample_sparse_array_2d):
178
179
  attr_dtype=np.float32,
179
180
  sparse=True,
180
181
  )
182
+
183
+
184
+ def test_array_object(temp_dir):
185
+ uri = str(Path(temp_dir) / "test_sparse_1d")
186
+ array = create_cellarray(uri=uri, shape=(100,), attr_dtype=np.float32, sparse=True)
187
+ tdb_obj = tiledb.open(uri, "r")
188
+ alt_array = SparseCellArray(tiledb_array_obj=tdb_obj)
189
+
190
+ assert isinstance(array, SparseCellArray)
191
+ assert array.shape == alt_array.shape
192
+ assert array.ndim == alt_array.ndim
193
+ assert array.dim_names == alt_array.dim_names
194
+ assert "data" in array.attr_names
195
+ assert "data" in alt_array.attr_names
@@ -1,33 +0,0 @@
1
- name: Run tests
2
-
3
- on:
4
- push:
5
- branches: [master]
6
- pull_request:
7
- branches: [master]
8
-
9
- jobs:
10
- build:
11
- runs-on: ubuntu-latest
12
- strategy:
13
- matrix:
14
- python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
15
-
16
- name: Python ${{ matrix.python-version }}
17
- steps:
18
- - uses: actions/checkout@v4
19
-
20
- - name: Setup Python
21
- uses: actions/setup-python@v5
22
- with:
23
- python-version: ${{ matrix.python-version }}
24
- cache: "pip"
25
-
26
- - name: Install dependencies
27
- run: |
28
- python -m pip install --upgrade pip
29
- pip install tox
30
-
31
- - name: Test with tox
32
- run: |
33
- tox
@@ -1,251 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from contextlib import contextmanager
3
-
4
- try:
5
- from types import EllipsisType
6
- except ImportError:
7
- # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
8
- EllipsisType = type(...)
9
- from typing import List, Literal, Optional, Tuple, Union
10
-
11
- import numpy as np
12
- import tiledb
13
- from scipy import sparse
14
-
15
- from .config import ConsolidationConfig
16
- from .helpers import SliceHelper
17
-
18
- __author__ = "Jayaram Kancherla"
19
- __copyright__ = "Jayaram Kancherla"
20
- __license__ = "MIT"
21
-
22
-
23
- class CellArray(ABC):
24
- """Abstract base class for TileDB array operations."""
25
-
26
- def __init__(
27
- self,
28
- uri: str,
29
- attr: str = "data",
30
- mode: Optional[Literal["r", "w", "n", "d"]] = None,
31
- config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
32
- validate: bool = True,
33
- ):
34
- """Initialize the object.
35
-
36
- Args:
37
- uri:
38
- URI to the array.
39
-
40
- attr:
41
- Attribute to access.
42
- Defaults to "data".
43
-
44
- mode:
45
- Open the array object in read 'r', write 'w', modify
46
- exclusive 'm' mode, or delete 'd' mode.
47
-
48
- Defaults to None for automatic mode switching.
49
-
50
- config_or_context:
51
- Optional config or context object.
52
-
53
- Defaults to None.
54
-
55
- validate:
56
- Whether to validate the attributes.
57
- Defaults to True.
58
- """
59
- self.uri = uri
60
- self._mode = mode
61
-
62
- if config_or_context is None:
63
- # config_or_context = tiledb.Config()
64
- ctx = None
65
- else:
66
- if isinstance(config_or_context, tiledb.Config):
67
- ctx = tiledb.Ctx(config_or_context)
68
- elif isinstance(config_or_context, tiledb.Ctx):
69
- ctx = config_or_context
70
- else:
71
- raise TypeError("'config_or_context' must be either TileDB config or a context object.")
72
-
73
- self._ctx = ctx
74
- self._array = None
75
- self._shape = None
76
- self._ndim = None
77
- self._dim_names = None
78
- self._attr_names = None
79
- self._nonempty_domain = None
80
-
81
- if validate:
82
- self._validate(attr=attr)
83
-
84
- self._attr = attr
85
-
86
- def _validate(self, attr):
87
- with self.open_array(mode="r") as A:
88
- if A.ndim > 2:
89
- raise ValueError("Only 1D and 2D arrays are supported.")
90
-
91
- if attr not in self.attr_names:
92
- raise ValueError(
93
- f"Attribute '{attr}' does not exist in the array. Available attributes: {self.attr_names}."
94
- )
95
-
96
- @property
97
- def mode(self) -> Optional[str]:
98
- """Get current array mode."""
99
- return self._mode
100
-
101
- @mode.setter
102
- def mode(self, value: Optional[str]):
103
- """Set array mode.
104
-
105
- Args:
106
- value:
107
- One of `None`, 'r', 'w', or 'm', 'd'.
108
- """
109
- if value is not None and value not in ["r", "w", "m", "d"]:
110
- raise ValueError("Mode must be one of: None, 'r', 'w', 'm', 'd'")
111
- self._mode = value
112
-
113
- @property
114
- def dim_names(self) -> List[str]:
115
- """Get dimension names of the array."""
116
- if self._dim_names is None:
117
- with self.open_array(mode="r") as A:
118
- self._dim_names = [dim.name for dim in A.schema.domain]
119
- return self._dim_names
120
-
121
- @property
122
- def attr_names(self) -> List[str]:
123
- """Get attribute names of the array."""
124
- if self._attr_names is None:
125
- with self.open_array(mode="r") as A:
126
- self._attr_names = [A.schema.attr(i).name for i in range(A.schema.nattr)]
127
- return self._attr_names
128
-
129
- @property
130
- def shape(self) -> Tuple[int, ...]:
131
- """Get array shape from schema domain."""
132
- if self._shape is None:
133
- with self.open_array(mode="r") as A:
134
- self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
135
- return self._shape
136
-
137
- @property
138
- def nonempty_domain(self) -> Tuple[int, ...]:
139
- """Get array non-empty domain."""
140
- if self._nonempty_domain is None:
141
- with self.open_array(mode="r") as A:
142
- self._nonempty_domain = A.nonempty_domain()
143
- return self._nonempty_domain
144
-
145
- @property
146
- def ndim(self) -> int:
147
- """Get number of dimensions."""
148
- if self._ndim is None:
149
- self._ndim = len(self.shape)
150
- return self._ndim
151
-
152
- @contextmanager
153
- def open_array(self, mode: Optional[str] = None):
154
- """Context manager for array operations.
155
-
156
- Args:
157
- mode:
158
- Override mode for this operation.
159
- """
160
- mode = mode if mode is not None else self.mode
161
- mode = mode if mode is not None else "r" # Default to read mode
162
-
163
- array = tiledb.open(self.uri, mode=mode, ctx=self._ctx)
164
- try:
165
- yield array
166
- finally:
167
- array.close()
168
-
169
- def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
170
- """Get item implementation that routes to either direct slicing or multi_index
171
- based on the type of indices provided.
172
-
173
- Args:
174
- key:
175
- Slice or list of indices for each dimension in the array.
176
- """
177
- if not isinstance(key, tuple):
178
- key = (key,)
179
-
180
- if len(key) > self.ndim:
181
- raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
182
-
183
- # Normalize all indices
184
- normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
185
-
186
- num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
187
- if num_ellipsis > 1:
188
- raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
189
-
190
- # Check if we can use direct slicing
191
- use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
192
-
193
- if use_direct:
194
- return self._direct_slice(normalized_key)
195
- else:
196
- if num_ellipsis > 0:
197
- raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
198
- return self._multi_index(normalized_key)
199
-
200
- @abstractmethod
201
- def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
202
- """Implementation for direct slicing."""
203
- pass
204
-
205
- @abstractmethod
206
- def _multi_index(self, key: Tuple[Union[slice, List[int]], ...]) -> np.ndarray:
207
- """Implementation for multi-index access."""
208
- pass
209
-
210
- def vacuum(self) -> None:
211
- """Remove deleted fragments from the array."""
212
- tiledb.vacuum(self.uri)
213
-
214
- def consolidate(self, config: Optional[ConsolidationConfig] = None) -> None:
215
- """Consolidate array fragments.
216
-
217
- Args:
218
- config:
219
- Optional consolidation configuration.
220
- """
221
- if config is None:
222
- config = ConsolidationConfig()
223
-
224
- consolidation_cfg = tiledb.Config()
225
-
226
- consolidation_cfg["sm.consolidation.steps"] = config.steps
227
- consolidation_cfg["sm.consolidation.step_min_frags"] = config.step_min_frags
228
- consolidation_cfg["sm.consolidation.step_max_frags"] = config.step_max_frags
229
- consolidation_cfg["sm.consolidation.buffer_size"] = config.buffer_size
230
- consolidation_cfg["sm.mem.total_budget"] = config.total_budget
231
-
232
- tiledb.consolidate(self.uri, config=consolidation_cfg)
233
-
234
- if config.vacuum_after:
235
- self.vacuum()
236
-
237
- @abstractmethod
238
- def write_batch(self, data: Union[np.ndarray, sparse.spmatrix], start_row: int, **kwargs) -> None:
239
- """Write a batch of data to the array starting at the specified row.
240
-
241
- Args:
242
- data:
243
- Data to write (numpy array for dense, scipy sparse matrix for sparse).
244
-
245
- start_row:
246
- Starting row index for writing.
247
-
248
- **kwargs:
249
- Additional arguments for write operation.
250
- """
251
- pass
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes