cellarr-array 0.0.3__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cellarr-array might be problematic. Click here for more details.

Files changed (57) hide show
  1. cellarr_array-0.2.0/.github/workflows/run-tests.yml +73 -0
  2. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/.gitignore +2 -0
  3. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/CHANGELOG.md +12 -0
  4. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/LICENSE.txt +1 -1
  5. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/PKG-INFO +4 -1
  6. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/setup.cfg +3 -0
  7. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/src/cellarr_array/__init__.py +2 -4
  8. cellarr_array-0.2.0/src/cellarr_array/core/__init__.py +3 -0
  9. cellarr_array-0.2.0/src/cellarr_array/core/base.py +344 -0
  10. cellarr_array-0.0.3/src/cellarr_array/DenseCellArray.py → cellarr_array-0.2.0/src/cellarr_array/core/dense.py +2 -3
  11. {cellarr_array-0.0.3/src/cellarr_array → cellarr_array-0.2.0/src/cellarr_array/core}/helpers.py +80 -42
  12. cellarr_array-0.0.3/src/cellarr_array/SparseCellArray.py → cellarr_array-0.2.0/src/cellarr_array/core/sparse.py +75 -27
  13. cellarr_array-0.2.0/src/cellarr_array/dataloaders/__init__.py +3 -0
  14. cellarr_array-0.2.0/src/cellarr_array/dataloaders/denseloader.py +198 -0
  15. cellarr_array-0.2.0/src/cellarr_array/dataloaders/iterabledataloader.py +320 -0
  16. cellarr_array-0.2.0/src/cellarr_array/dataloaders/sparseloader.py +230 -0
  17. cellarr_array-0.2.0/src/cellarr_array/dataloaders/utils.py +26 -0
  18. cellarr_array-0.2.0/src/cellarr_array/utils/__init__.py +3 -0
  19. cellarr_array-0.2.0/src/cellarr_array/utils/mock.py +167 -0
  20. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/src/cellarr_array.egg-info/PKG-INFO +4 -1
  21. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/src/cellarr_array.egg-info/SOURCES.txt +15 -5
  22. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/src/cellarr_array.egg-info/requires.txt +4 -0
  23. cellarr_array-0.2.0/tests/conftest.py +233 -0
  24. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/tests/test_all.py +2 -2
  25. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/tests/test_dense.py +20 -5
  26. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/tests/test_helpers.py +17 -5
  27. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/tests/test_inmemory.py +11 -2
  28. cellarr_array-0.2.0/tests/test_iterable_loader.py +288 -0
  29. cellarr_array-0.2.0/tests/test_map_loader.py +289 -0
  30. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/tests/test_sparse.py +22 -7
  31. cellarr_array-0.0.3/.github/workflows/run-tests.yml +0 -33
  32. cellarr_array-0.0.3/src/cellarr_array/CellArray.py +0 -251
  33. cellarr_array-0.0.3/tests/conftest.py +0 -91
  34. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/.coveragerc +0 -0
  35. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/.github/workflows/publish-pypi.yml +0 -0
  36. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/.pre-commit-config.yaml +0 -0
  37. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/.readthedocs.yml +0 -0
  38. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/AUTHORS.md +0 -0
  39. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/CONTRIBUTING.md +0 -0
  40. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/README.md +0 -0
  41. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/Makefile +0 -0
  42. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/_static/.gitignore +0 -0
  43. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/authors.md +0 -0
  44. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/changelog.md +0 -0
  45. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/conf.py +0 -0
  46. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/contributing.md +0 -0
  47. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/index.md +0 -0
  48. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/license.md +0 -0
  49. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/readme.md +0 -0
  50. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/docs/requirements.txt +0 -0
  51. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/pyproject.toml +0 -0
  52. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/setup.py +0 -0
  53. {cellarr_array-0.0.3/src/cellarr_array → cellarr_array-0.2.0/src/cellarr_array/utils}/config.py +0 -0
  54. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
  55. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/src/cellarr_array.egg-info/not-zip-safe +0 -0
  56. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/src/cellarr_array.egg-info/top_level.txt +0 -0
  57. {cellarr_array-0.0.3 → cellarr_array-0.2.0}/tox.ini +0 -0
@@ -0,0 +1,73 @@
1
+ name: Test the library
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master # for legacy repos
7
+ - main
8
+ pull_request:
9
+ branches:
10
+ - master # for legacy repos
11
+ - main
12
+ workflow_dispatch: # Allow manually triggering the workflow
13
+ schedule:
14
+ # Run roughly every 15 days at 00:00 UTC
15
+ # (useful to check if updates on dependencies break the package)
16
+ - cron: "0 0 1,16 * *"
17
+
18
+ permissions:
19
+ contents: read
20
+
21
+ concurrency:
22
+ group: >-
23
+ ${{ github.workflow }}-${{ github.ref_type }}-
24
+ ${{ github.event.pull_request.number || github.sha }}
25
+ cancel-in-progress: true
26
+
27
+ jobs:
28
+ test:
29
+ strategy:
30
+ matrix:
31
+ python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
32
+ platform:
33
+ - ubuntu-latest
34
+ # - macos-latest
35
+ # - windows-latest
36
+ runs-on: ${{ matrix.platform }}
37
+ name: Python ${{ matrix.python }}, ${{ matrix.platform }}
38
+ steps:
39
+ - uses: actions/checkout@v4
40
+
41
+ - uses: actions/setup-python@v5
42
+ id: setup-python
43
+ with:
44
+ python-version: ${{ matrix.python }}
45
+
46
+ - name: Install dependencies
47
+ run: |
48
+ python -m pip install --upgrade pip
49
+ pip install tox coverage
50
+
51
+ - name: Run tests
52
+ run: >-
53
+ pipx run --python '${{ steps.setup-python.outputs.python-path }}'
54
+ tox
55
+ -- -rFEx --durations 10 --color yes --cov --cov-branch --cov-report=xml # pytest args
56
+
57
+ - name: Check for codecov token availability
58
+ id: codecov-check
59
+ shell: bash
60
+ run: |
61
+ if [ ${{ secrets.CODECOV_TOKEN }} != '' ]; then
62
+ echo "codecov=true" >> $GITHUB_OUTPUT;
63
+ else
64
+ echo "codecov=false" >> $GITHUB_OUTPUT;
65
+ fi
66
+
67
+ - name: Upload coverage reports to Codecov with GitHub Action
68
+ uses: codecov/codecov-action@v5
69
+ if: ${{ steps.codecov-check.outputs.codecov == 'true' }}
70
+ env:
71
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
72
+ slug: ${{ github.repository }}
73
+ flags: ${{ matrix.platform }} - py${{ matrix.python }}
@@ -52,3 +52,5 @@ MANIFEST
52
52
  .venv*/
53
53
  .conda*/
54
54
  .python-version
55
+
56
+ *.tdb
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## Version 0.2.0
4
+
5
+ - Dataloaders for sparse and dense arrays, We provide templates for both map and Iterable style dataloaders. Users are expected the caveats of both of these approaches.
6
+ - Fixed a bug with slicing on 1D arrays and many improvements for optimizing slicing parameters.
7
+ - Update documentation and tests.
8
+
9
+ ## Version 0.1.0
10
+
11
+ - Support cellarr-arrays on user provided tiledb array objects.
12
+ - Migrate github actions to the newer version from biocsetup.
13
+ - Renaming module names, documentation and tests
14
+
3
15
  ## Version 0.0.2
4
16
 
5
17
  - Support in-memory tiledb objects. Updated tests and documentation.
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2025 Jayaram Kancherla
3
+ Copyright (c) 2025 Genentech
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.0.3
3
+ Version: 0.2.0
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
@@ -16,10 +16,13 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
16
16
  Requires-Dist: tiledb
17
17
  Requires-Dist: numpy
18
18
  Requires-Dist: scipy
19
+ Provides-Extra: optional
20
+ Requires-Dist: torch; extra == "optional"
19
21
  Provides-Extra: testing
20
22
  Requires-Dist: setuptools; extra == "testing"
21
23
  Requires-Dist: pytest; extra == "testing"
22
24
  Requires-Dist: pytest-cov; extra == "testing"
25
+ Requires-Dist: torch; extra == "testing"
23
26
  Dynamic: license-file
24
27
 
25
28
  [![PyPI-Server](https://img.shields.io/pypi/v/cellarr-array.svg)](https://pypi.org/project/cellarr-array/)
@@ -33,10 +33,13 @@ exclude =
33
33
  tests
34
34
 
35
35
  [options.extras_require]
36
+ optional =
37
+ torch
36
38
  testing =
37
39
  setuptools
38
40
  pytest
39
41
  pytest-cov
42
+ %(optional)s
40
43
 
41
44
  [options.entry_points]
42
45
 
@@ -15,7 +15,5 @@ except PackageNotFoundError: # pragma: no cover
15
15
  finally:
16
16
  del version, PackageNotFoundError
17
17
 
18
- from .config import CellArrConfig, ConsolidationConfig
19
- from .DenseCellArray import DenseCellArray
20
- from .SparseCellArray import SparseCellArray
21
- from .helpers import create_cellarray, SliceHelper
18
+ from .core import DenseCellArray, SparseCellArray
19
+ from .utils import CellArrConfig, ConsolidationConfig, create_cellarray
@@ -0,0 +1,3 @@
1
+ from .base import CellArray
2
+ from .dense import DenseCellArray
3
+ from .sparse import SparseCellArray
@@ -0,0 +1,344 @@
1
+ from abc import ABC, abstractmethod
2
+ from contextlib import contextmanager
3
+
4
+ try:
5
+ from types import EllipsisType
6
+ except ImportError:
7
+ # TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
8
+ EllipsisType = type(...)
9
+ from typing import Any, List, Literal, Optional, Tuple, Union
10
+
11
+ import numpy as np
12
+ import tiledb
13
+ from scipy import sparse
14
+
15
+ from ..utils.config import ConsolidationConfig
16
+ from .helpers import SliceHelper
17
+
18
+ __author__ = "Jayaram Kancherla"
19
+ __copyright__ = "Jayaram Kancherla"
20
+ __license__ = "MIT"
21
+
22
+
23
+ class CellArray(ABC):
24
+ """Abstract base class for TileDB array operations."""
25
+
26
+ def __init__(
27
+ self,
28
+ uri: Optional[str] = None,
29
+ tiledb_array_obj: Optional[tiledb.Array] = None,
30
+ attr: str = "data",
31
+ mode: Optional[Literal["r", "w", "d", "m"]] = None,
32
+ config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
33
+ validate: bool = True,
34
+ ):
35
+ """Initialize the object.
36
+
37
+ Args:
38
+ uri:
39
+ URI to the array.
40
+ Required if 'tiledb_array_obj' is not provided.
41
+
42
+ tiledb_array_obj:
43
+ Optional, an already opened ``tiledb.Array`` instance.
44
+ If provided, 'uri' can be None, and 'config_or_context' is ignored.
45
+
46
+ attr:
47
+ Attribute to access.
48
+ Defaults to "data".
49
+
50
+ mode:
51
+ Open the array object in read 'r', write 'w', modify
52
+ 'm' mode, or delete 'd' mode.
53
+
54
+ Defaults to None for automatic mode switching.
55
+
56
+ If 'tiledb_array_obj' is provided, this mode should ideally match
57
+ the mode of the provided array or be None.
58
+
59
+ config_or_context:
60
+ Optional config or context object. Ignored if 'tiledb_array_obj' is provided,
61
+ as context will be derived from the object.
62
+
63
+ Defaults to None.
64
+
65
+ validate:
66
+ Whether to validate the attributes.
67
+ Defaults to True.
68
+ """
69
+ self._array_passed_in = False
70
+ self._opened_array_external = None
71
+ self._ctx = None
72
+
73
+ if tiledb_array_obj is not None:
74
+ if not isinstance(tiledb_array_obj, tiledb.Array):
75
+ raise ValueError("'tiledb_array_obj' must be a tiledb.Array instance.")
76
+
77
+ if not tiledb_array_obj.isopen:
78
+ # Option 1: Raise error
79
+ raise ValueError("If 'tiledb_array_obj' is provided, it must be an open tiledb.Array instance.")
80
+ # Option 2: Try to reopen (less safe as we don't know original intent)
81
+ # try:
82
+ # tiledb_array_obj.reopen()
83
+ # except tiledb.TileDBError as e:
84
+ # raise ValueError(
85
+ # f"Provided 'tiledb_array_obj' is closed and could not be reopened: {e}"
86
+ # )
87
+
88
+ self.uri = tiledb_array_obj.uri
89
+ self._array_passed_in = True
90
+ self._opened_array_external = tiledb_array_obj
91
+
92
+ # infer mode if possible, or require it matches
93
+ if mode is not None and tiledb_array_obj.mode != mode:
94
+ # we could try to reopen with the desired mode
95
+ raise ValueError(
96
+ f"Provided array mode '{tiledb_array_obj.mode}' does not match requested mode '{mode}'.",
97
+ "Re-open the external array with the desired mode or pass matching mode.",
98
+ )
99
+
100
+ self._mode = tiledb_array_obj.mode
101
+ self._ctx = tiledb_array_obj.ctx
102
+ elif uri is not None:
103
+ self.uri = uri
104
+ self._mode = mode
105
+ self._array_passed_in = False
106
+ self._opened_array_external = None
107
+
108
+ if config_or_context is None:
109
+ self._ctx = None
110
+ elif isinstance(config_or_context, tiledb.Config):
111
+ self._ctx = tiledb.Ctx(config_or_context)
112
+ elif isinstance(config_or_context, tiledb.Ctx):
113
+ self._ctx = config_or_context
114
+ else:
115
+ raise TypeError("'config_or_context' must be a TileDB Config or Ctx object.")
116
+ else:
117
+ raise ValueError("Either 'uri' or 'tiledb_array_obj' must be provided.")
118
+
119
+ self._shape = None
120
+ self._ndim = None
121
+ self._dim_names = None
122
+ self._attr_names = None
123
+ self._nonempty_domain = None
124
+
125
+ if validate:
126
+ self._validate(attr=attr)
127
+
128
+ self._attr = attr
129
+
130
+ def _validate(self, attr):
131
+ with self.open_array(mode="r") as A:
132
+ schema = A.schema
133
+ if schema.ndim > 2:
134
+ raise ValueError("Only 1D and 2D arrays are supported.")
135
+
136
+ current_attr_names = [schema.attr(i).name for i in range(schema.nattr)]
137
+ if attr not in current_attr_names:
138
+ raise ValueError(
139
+ f"Attribute '{attr}' does not exist in the array. Available attributes: {current_attr_names}."
140
+ )
141
+
142
+ @property
143
+ def mode(self) -> Optional[str]:
144
+ """Get current array mode. If an external array is used, this is its open mode."""
145
+ if self._array_passed_in and self._opened_array_external is not None:
146
+ return self._opened_array_external.mode
147
+ return self._mode
148
+
149
+ @mode.setter
150
+ def mode(self, value: Optional[str]):
151
+ """Set array mode for subsequent operations if not using an external array.
152
+
153
+ This action does not affect an already passed-in external array's mode.
154
+ """
155
+ if self._array_passed_in:
156
+ # To change mode of an external array, user must reopen it and pass it again.
157
+ current_ext_mode = self._opened_array_external.mode if self._opened_array_external else "unknown"
158
+ if value != current_ext_mode:
159
+ raise ValueError(
160
+ f"Cannot change mode of an externally managed array (current: {current_ext_mode}). "
161
+ "Re-open the external array with the new mode and re-initialize CellArray."
162
+ )
163
+ if value is not None and value not in ["r", "w", "m", "d"]:
164
+ raise ValueError("Mode must be one of: None, 'r', 'w', 'm', 'd'")
165
+
166
+ self._mode = value
167
+
168
+ @property
169
+ def dim_names(self) -> List[str]:
170
+ """Get dimension names of the array."""
171
+ if self._dim_names is None:
172
+ with self.open_array(mode="r") as A:
173
+ self._dim_names = [dim.name for dim in A.schema.domain]
174
+ return self._dim_names
175
+
176
+ @property
177
+ def attr_names(self) -> List[str]:
178
+ """Get attribute names of the array."""
179
+ if self._attr_names is None:
180
+ with self.open_array(mode="r") as A:
181
+ self._attr_names = [A.schema.attr(i).name for i in range(A.schema.nattr)]
182
+ return self._attr_names
183
+
184
+ @property
185
+ def shape(self) -> Tuple[int, ...]:
186
+ if self._shape is None:
187
+ with self.open_array(mode="r") as A:
188
+ self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
189
+ return self._shape
190
+
191
+ @property
192
+ def nonempty_domain(self) -> Optional[Tuple[Any, ...]]:
193
+ if self._nonempty_domain is None:
194
+ with self.open_array(mode="r") as A:
195
+ # nonempty_domain() can return None if the array is empty.
196
+ ned = A.nonempty_domain()
197
+ if ned is None:
198
+ self._nonempty_domain = None
199
+ else:
200
+ self._nonempty_domain = tuple(ned) if isinstance(ned[0], tuple) else (ned,)
201
+ return self._nonempty_domain
202
+
203
+ @property
204
+ def ndim(self) -> int:
205
+ """Get number of dimensions."""
206
+ if self._ndim is None:
207
+ with self.open_array(mode="r") as A:
208
+ self._ndim = A.schema.ndim
209
+ # self._ndim = len(self.shape)
210
+ return self._ndim
211
+
212
+ @contextmanager
213
+ def open_array(self, mode: Optional[str] = None):
214
+ """Context manager for array operations.
215
+
216
+ Uses the externally provided array if available, otherwise opens from URI.
217
+
218
+ Args:
219
+ mode:
220
+ Desired mode for the operation ('r', 'w', 'm', 'd').
221
+ If an external array is used, this mode must be compatible with
222
+ (or same as) the mode the external array was opened with.
223
+
224
+ If None, uses the CellArray's default mode.
225
+ """
226
+ if self._array_passed_in and self._opened_array_external is not None:
227
+ if not self._opened_array_external.isopen:
228
+ # Attempt to reopen if closed. This assumes the user might have closed it
229
+ # and expects CellArr to reopen it if still possible.
230
+ try:
231
+ self._opened_array_external.reopen()
232
+ except Exception as e:
233
+ raise tiledb.TileDBError(
234
+ f"Externally provided array is closed and could not be reopened: {e}"
235
+ ) from e
236
+
237
+ effective_mode = mode if mode is not None else self._opened_array_external.mode
238
+
239
+ current_external_mode = self._opened_array_external.mode
240
+ if effective_mode == "r" and current_external_mode not in ["r", "w", "m"]:
241
+ # Read ops ok on write/modify modes
242
+ pass
243
+ elif effective_mode in ["w", "d"] and current_external_mode != effective_mode:
244
+ raise tiledb.TileDBError(
245
+ f"Requested operation mode '{effective_mode}' is incompatible with the "
246
+ f"externally provided array's mode '{current_external_mode}'. "
247
+ "Ensure the external array is opened in a compatible mode."
248
+ )
249
+
250
+ # DO NOT close self._opened_array_external here; its lifecycle is managed by the user.
251
+ yield self._opened_array_external
252
+ else:
253
+ effective_mode = mode if mode is not None else self.mode
254
+ effective_mode = effective_mode if effective_mode is not None else "r"
255
+ array = tiledb.open(self.uri, mode=effective_mode, ctx=self._ctx)
256
+
257
+ try:
258
+ yield array
259
+ finally:
260
+ array.close()
261
+
262
+ def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
263
+ """Get item implementation that routes to either direct slicing or multi_index
264
+ based on the type of indices provided.
265
+
266
+ Args:
267
+ key:
268
+ Slice or list of indices for each dimension in the array.
269
+ """
270
+ if not isinstance(key, tuple):
271
+ key = (key,)
272
+
273
+ if len(key) > self.ndim:
274
+ raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
275
+
276
+ # Normalize all indices
277
+ normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
278
+
279
+ num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
280
+ if num_ellipsis > 1:
281
+ raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
282
+
283
+ # Check if we can use direct slicing
284
+ use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
285
+
286
+ if use_direct:
287
+ return self._direct_slice(normalized_key)
288
+ else:
289
+ if num_ellipsis > 0:
290
+ raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
291
+ return self._multi_index(normalized_key)
292
+
293
+ @abstractmethod
294
+ def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
295
+ """Implementation for direct slicing."""
296
+ pass
297
+
298
+ @abstractmethod
299
+ def _multi_index(self, key: Tuple[Union[slice, List[int]], ...]) -> np.ndarray:
300
+ """Implementation for multi-index access."""
301
+ pass
302
+
303
+ def vacuum(self) -> None:
304
+ """Remove deleted fragments from the array."""
305
+ tiledb.vacuum(self.uri)
306
+
307
+ def consolidate(self, config: Optional[ConsolidationConfig] = None) -> None:
308
+ """Consolidate array fragments.
309
+
310
+ Args:
311
+ config:
312
+ Optional consolidation configuration.
313
+ """
314
+ if config is None:
315
+ config = ConsolidationConfig()
316
+
317
+ consolidation_cfg = tiledb.Config()
318
+
319
+ consolidation_cfg["sm.consolidation.steps"] = config.steps
320
+ consolidation_cfg["sm.consolidation.step_min_frags"] = config.step_min_frags
321
+ consolidation_cfg["sm.consolidation.step_max_frags"] = config.step_max_frags
322
+ consolidation_cfg["sm.consolidation.buffer_size"] = config.buffer_size
323
+ consolidation_cfg["sm.mem.total_budget"] = config.total_budget
324
+
325
+ tiledb.consolidate(self.uri, config=consolidation_cfg)
326
+
327
+ if config.vacuum_after:
328
+ self.vacuum()
329
+
330
+ @abstractmethod
331
+ def write_batch(self, data: Union[np.ndarray, sparse.spmatrix], start_row: int, **kwargs) -> None:
332
+ """Write a batch of data to the array starting at the specified row.
333
+
334
+ Args:
335
+ data:
336
+ Data to write (numpy array for dense, scipy sparse matrix for sparse).
337
+
338
+ start_row:
339
+ Starting row index for writing.
340
+
341
+ **kwargs:
342
+ Additional arguments for write operation.
343
+ """
344
+ pass
@@ -7,7 +7,7 @@ from typing import List, Tuple, Union
7
7
 
8
8
  import numpy as np
9
9
 
10
- from .CellArray import CellArray
10
+ from .base import CellArray
11
11
  from .helpers import SliceHelper
12
12
 
13
13
  __author__ = "Jayaram Kancherla"
@@ -92,7 +92,6 @@ class DenseCellArray(CellArray):
92
92
  if len(data.shape) != self.ndim:
93
93
  raise ValueError(f"Data dimensions {data.shape} don't match array dimensions {self.shape}.")
94
94
 
95
- # Check bounds
96
95
  end_row = start_row + data.shape[0]
97
96
  if end_row > self.shape[0]:
98
97
  raise ValueError(
@@ -102,7 +101,6 @@ class DenseCellArray(CellArray):
102
101
  if self.ndim == 2 and data.shape[1] != self.shape[1]:
103
102
  raise ValueError(f"Data columns {data.shape[1]} don't match array columns {self.shape[1]}.")
104
103
 
105
- # Construct write region
106
104
  if self.ndim == 1:
107
105
  write_region = slice(start_row, end_row)
108
106
  else: # 2D
@@ -110,4 +108,5 @@ class DenseCellArray(CellArray):
110
108
 
111
109
  # write_data = {self._attr: data} if len(self.attr_names) > 1 else data
112
110
  with self.open_array(mode="w") as array:
111
+ print("write_region", write_region)
113
112
  array[write_region] = data