cellarr-array 0.0.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cellarr-array might be problematic. Click here for more details.
- cellarr_array/__init__.py +2 -4
- cellarr_array/core/__init__.py +3 -0
- cellarr_array/core/base.py +344 -0
- cellarr_array/{DenseCellArray.py → core/dense.py} +2 -3
- cellarr_array/{helpers.py → core/helpers.py} +80 -42
- cellarr_array/{SparseCellArray.py → core/sparse.py} +75 -27
- cellarr_array/dataloaders/__init__.py +3 -0
- cellarr_array/dataloaders/denseloader.py +198 -0
- cellarr_array/dataloaders/iterabledataloader.py +320 -0
- cellarr_array/dataloaders/sparseloader.py +230 -0
- cellarr_array/dataloaders/utils.py +26 -0
- cellarr_array/utils/__init__.py +3 -0
- cellarr_array/utils/mock.py +167 -0
- {cellarr_array-0.0.3.dist-info → cellarr_array-0.2.0.dist-info}/METADATA +4 -1
- cellarr_array-0.2.0.dist-info/RECORD +19 -0
- {cellarr_array-0.0.3.dist-info → cellarr_array-0.2.0.dist-info}/WHEEL +1 -1
- {cellarr_array-0.0.3.dist-info → cellarr_array-0.2.0.dist-info}/licenses/LICENSE.txt +1 -1
- cellarr_array/CellArray.py +0 -251
- cellarr_array-0.0.3.dist-info/RECORD +0 -11
- /cellarr_array/{config.py → utils/config.py} +0 -0
- {cellarr_array-0.0.3.dist-info → cellarr_array-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import scipy.sparse as sp
|
|
6
|
+
import tiledb
|
|
7
|
+
|
|
8
|
+
from ..core import DenseCellArray, SparseCellArray
|
|
9
|
+
from ..core.helpers import CellArrConfig, create_cellarray
|
|
10
|
+
|
|
11
|
+
__author__ = "Jayaram Kancherla"
|
|
12
|
+
__copyright__ = "Jayaram Kancherla"
|
|
13
|
+
__license__ = "MIT"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def generate_tiledb_dense_array(
|
|
17
|
+
uri: str,
|
|
18
|
+
rows: int,
|
|
19
|
+
cols: int,
|
|
20
|
+
attr_name: str = "data",
|
|
21
|
+
attr_dtype: np.dtype = np.float32,
|
|
22
|
+
chunk_size: int = 1000,
|
|
23
|
+
tiledb_config: Optional[Dict] = None,
|
|
24
|
+
):
|
|
25
|
+
"""Generates a dense TileDB array and fills it with random float32 data.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
uri:
|
|
29
|
+
URI for the new TileDB array.
|
|
30
|
+
|
|
31
|
+
rows:
|
|
32
|
+
Number of rows.
|
|
33
|
+
|
|
34
|
+
cols:
|
|
35
|
+
Number of columns (features).
|
|
36
|
+
|
|
37
|
+
attr_name:
|
|
38
|
+
Name of the attribute.
|
|
39
|
+
|
|
40
|
+
attr_dtype:
|
|
41
|
+
Data type of the attribute.
|
|
42
|
+
|
|
43
|
+
chunk_size:
|
|
44
|
+
Number of rows to write per batch.
|
|
45
|
+
|
|
46
|
+
tiledb_config:
|
|
47
|
+
TileDB context configuration.
|
|
48
|
+
"""
|
|
49
|
+
if tiledb.array_exists(uri):
|
|
50
|
+
print(f"Array {uri} already exists. Removing.")
|
|
51
|
+
shutil.rmtree(uri)
|
|
52
|
+
|
|
53
|
+
print(f"Creating dense array at '{uri}' with shape ({rows}, {cols})")
|
|
54
|
+
cfg = CellArrConfig(ctx_config=tiledb_config if tiledb_config else {})
|
|
55
|
+
|
|
56
|
+
create_cellarray(
|
|
57
|
+
uri=uri,
|
|
58
|
+
shape=(rows, cols),
|
|
59
|
+
attr_dtype=attr_dtype,
|
|
60
|
+
sparse=False,
|
|
61
|
+
dim_names=["rows", "cols"],
|
|
62
|
+
attr_name=attr_name,
|
|
63
|
+
# config=cfg
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
ctx = tiledb.Ctx(cfg.ctx_config) if cfg.ctx_config else None
|
|
67
|
+
arr_writer = DenseCellArray(uri=uri, attr=attr_name, mode="w", config_or_context=ctx)
|
|
68
|
+
|
|
69
|
+
print("shape of writer", arr_writer.shape)
|
|
70
|
+
|
|
71
|
+
print(f"Writing data to dense array '{uri}'...")
|
|
72
|
+
for i in range(0, rows, chunk_size):
|
|
73
|
+
end_row = min(i + chunk_size, rows)
|
|
74
|
+
num_chunk_rows = end_row - i
|
|
75
|
+
data_chunk = np.random.rand(num_chunk_rows, cols).astype(attr_dtype)
|
|
76
|
+
print(i, end_row, num_chunk_rows, data_chunk.shape)
|
|
77
|
+
arr_writer.write_batch(data_chunk, start_row=i)
|
|
78
|
+
if (i // chunk_size) % 10 == 0:
|
|
79
|
+
print(f" Dense write: {end_row}/{rows} rows written.")
|
|
80
|
+
|
|
81
|
+
print(f"Finished writing to dense array '{uri}'.")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def generate_tiledb_sparse_array(
|
|
85
|
+
uri: str,
|
|
86
|
+
rows: int,
|
|
87
|
+
cols: int,
|
|
88
|
+
density: float = 0.01,
|
|
89
|
+
attr_name: str = "data",
|
|
90
|
+
attr_dtype: np.dtype = np.float32,
|
|
91
|
+
chunk_size: int = 1000,
|
|
92
|
+
tiledb_config: Optional[Dict] = None,
|
|
93
|
+
sparse_format_to_write="coo",
|
|
94
|
+
):
|
|
95
|
+
"""Generates a sparse TileDB array and fills it with random float32 data.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
uri:
|
|
99
|
+
URI for the new TileDB array.
|
|
100
|
+
|
|
101
|
+
rows:
|
|
102
|
+
Number of rows.
|
|
103
|
+
|
|
104
|
+
cols:
|
|
105
|
+
Number of columns (features).
|
|
106
|
+
|
|
107
|
+
density:
|
|
108
|
+
Density of the sparse matrix.
|
|
109
|
+
|
|
110
|
+
attr_name:
|
|
111
|
+
Name of the attribute.
|
|
112
|
+
|
|
113
|
+
attr_dtype:
|
|
114
|
+
Data type of the attribute.
|
|
115
|
+
|
|
116
|
+
chunk_size:
|
|
117
|
+
Number of rows to generate and write per batch.
|
|
118
|
+
|
|
119
|
+
tiledb_configs:
|
|
120
|
+
TileDB context configuration.
|
|
121
|
+
|
|
122
|
+
sparse_format_to_write:
|
|
123
|
+
Scipy sparse format to use for generating chunks ('coo', 'csr', 'csc').
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
if tiledb.array_exists(uri):
|
|
127
|
+
print(f"Array {uri} already exists. Removing.")
|
|
128
|
+
shutil.rmtree(uri)
|
|
129
|
+
|
|
130
|
+
print(f"Creating sparse array at '{uri}' with shape ({rows}, {cols}), density ~{density}")
|
|
131
|
+
cfg = CellArrConfig(ctx_config=tiledb_config if tiledb_config else {})
|
|
132
|
+
create_cellarray(
|
|
133
|
+
uri=uri,
|
|
134
|
+
shape=(rows, cols),
|
|
135
|
+
attr_dtype=attr_dtype,
|
|
136
|
+
sparse=True,
|
|
137
|
+
dim_names=["rows", "cols"],
|
|
138
|
+
attr_name=attr_name,
|
|
139
|
+
# config=cfg
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
ctx = tiledb.Ctx(cfg.ctx_config) if cfg.ctx_config else None
|
|
143
|
+
arr_writer = SparseCellArray(
|
|
144
|
+
uri=uri,
|
|
145
|
+
attr=attr_name,
|
|
146
|
+
mode="w",
|
|
147
|
+
config_or_context=ctx,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
print(f"Writing data to sparse array '{uri}'...")
|
|
151
|
+
for i in range(0, rows, chunk_size):
|
|
152
|
+
end_row = min(i + chunk_size, rows)
|
|
153
|
+
num_chunk_rows = end_row - i
|
|
154
|
+
if num_chunk_rows <= 0:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
data_chunk_scipy = sp.random(
|
|
158
|
+
num_chunk_rows, cols, density=density, format=sparse_format_to_write, dtype=attr_dtype
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if data_chunk_scipy.nnz > 0:
|
|
162
|
+
arr_writer.write_batch(data_chunk_scipy, start_row=i)
|
|
163
|
+
|
|
164
|
+
if (i // chunk_size) % 10 == 0:
|
|
165
|
+
print(f" Sparse write: {end_row}/{rows} rows processed for writing.")
|
|
166
|
+
|
|
167
|
+
print(f"Finished writing to sparse array '{uri}'.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
@@ -16,10 +16,13 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
|
|
|
16
16
|
Requires-Dist: tiledb
|
|
17
17
|
Requires-Dist: numpy
|
|
18
18
|
Requires-Dist: scipy
|
|
19
|
+
Provides-Extra: optional
|
|
20
|
+
Requires-Dist: torch; extra == "optional"
|
|
19
21
|
Provides-Extra: testing
|
|
20
22
|
Requires-Dist: setuptools; extra == "testing"
|
|
21
23
|
Requires-Dist: pytest; extra == "testing"
|
|
22
24
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
25
|
+
Requires-Dist: torch; extra == "testing"
|
|
23
26
|
Dynamic: license-file
|
|
24
27
|
|
|
25
28
|
[](https://pypi.org/project/cellarr-array/)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
cellarr_array/__init__.py,sha256=coBnoCq1_cv6FnnbowNt6wEIDfVl2GlGTkjnveP-8C4,707
|
|
2
|
+
cellarr_array/core/__init__.py,sha256=fvM-FEiDn8TKDbHxhhzp9FXZFNovFwvIUSY6SpLQRdk,98
|
|
3
|
+
cellarr_array/core/base.py,sha256=3FlhzZSh4ePz3Zm_dU8XNXJ6xgs7rKGi5HgCVWJLhXY,13458
|
|
4
|
+
cellarr_array/core/dense.py,sha256=LODRH4utpKs8xhT79Q2-nRiam_s68_a0qPj0unEM7rg,3940
|
|
5
|
+
cellarr_array/core/helpers.py,sha256=Z_2zRUULFTm7Lo9EpkGvIeRraP6XNDRB-o3rh9ChKQQ,7856
|
|
6
|
+
cellarr_array/core/sparse.py,sha256=XifIWhbTRAQ6qL096th-dCkqscNRwFZuTd7uaRf9aGM,8844
|
|
7
|
+
cellarr_array/dataloaders/__init__.py,sha256=U-MfwC2K84OIXT75in41fe_wvoxjUC5Krb5zICQn_O8,245
|
|
8
|
+
cellarr_array/dataloaders/denseloader.py,sha256=JYJlbuX5My64iIPW_-nlPFkNIezxL3Z3mkwInS3hH9M,7291
|
|
9
|
+
cellarr_array/dataloaders/iterabledataloader.py,sha256=lR2T1YatyBlDM5Sy_75B7_8ORiWfn3cp4q48Oujwf-c,11916
|
|
10
|
+
cellarr_array/dataloaders/sparseloader.py,sha256=V_eKw-Z_CNxHP8c2BN3sOuuv6RPiWBzRfW1BYLhNaQc,7962
|
|
11
|
+
cellarr_array/dataloaders/utils.py,sha256=buJ87x1YBTt5-nZoy_I5j6ko1lVlHdiGpQCusdLoRLI,600
|
|
12
|
+
cellarr_array/utils/__init__.py,sha256=DM5jeUMbxbRzTu2QCjpLlrTQ5uionF887S_7i6_952U,177
|
|
13
|
+
cellarr_array/utils/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
|
|
14
|
+
cellarr_array/utils/mock.py,sha256=7GyCbtM7u94pm7qhjsPRSO2IWYLmd4UrjyvLnQtMMkc,4579
|
|
15
|
+
cellarr_array-0.2.0.dist-info/licenses/LICENSE.txt,sha256=JUlHIfWcRe_MZop18pQvMIPLKSSPz3XQ06ASHuW5Wh8,1076
|
|
16
|
+
cellarr_array-0.2.0.dist-info/METADATA,sha256=NbNM3Gyu4t2f1odp26QiUutfic0IdnagSCnJUn9NLSs,4228
|
|
17
|
+
cellarr_array-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
cellarr_array-0.2.0.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
|
|
19
|
+
cellarr_array-0.2.0.dist-info/RECORD,,
|
cellarr_array/CellArray.py
DELETED
|
@@ -1,251 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from contextlib import contextmanager
|
|
3
|
-
|
|
4
|
-
try:
|
|
5
|
-
from types import EllipsisType
|
|
6
|
-
except ImportError:
|
|
7
|
-
# TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
|
|
8
|
-
EllipsisType = type(...)
|
|
9
|
-
from typing import List, Literal, Optional, Tuple, Union
|
|
10
|
-
|
|
11
|
-
import numpy as np
|
|
12
|
-
import tiledb
|
|
13
|
-
from scipy import sparse
|
|
14
|
-
|
|
15
|
-
from .config import ConsolidationConfig
|
|
16
|
-
from .helpers import SliceHelper
|
|
17
|
-
|
|
18
|
-
__author__ = "Jayaram Kancherla"
|
|
19
|
-
__copyright__ = "Jayaram Kancherla"
|
|
20
|
-
__license__ = "MIT"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class CellArray(ABC):
|
|
24
|
-
"""Abstract base class for TileDB array operations."""
|
|
25
|
-
|
|
26
|
-
def __init__(
|
|
27
|
-
self,
|
|
28
|
-
uri: str,
|
|
29
|
-
attr: str = "data",
|
|
30
|
-
mode: Optional[Literal["r", "w", "n", "d"]] = None,
|
|
31
|
-
config_or_context: Optional[Union[tiledb.Config, tiledb.Ctx]] = None,
|
|
32
|
-
validate: bool = True,
|
|
33
|
-
):
|
|
34
|
-
"""Initialize the object.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
uri:
|
|
38
|
-
URI to the array.
|
|
39
|
-
|
|
40
|
-
attr:
|
|
41
|
-
Attribute to access.
|
|
42
|
-
Defaults to "data".
|
|
43
|
-
|
|
44
|
-
mode:
|
|
45
|
-
Open the array object in read 'r', write 'w', modify
|
|
46
|
-
exclusive 'm' mode, or delete 'd' mode.
|
|
47
|
-
|
|
48
|
-
Defaults to None for automatic mode switching.
|
|
49
|
-
|
|
50
|
-
config_or_context:
|
|
51
|
-
Optional config or context object.
|
|
52
|
-
|
|
53
|
-
Defaults to None.
|
|
54
|
-
|
|
55
|
-
validate:
|
|
56
|
-
Whether to validate the attributes.
|
|
57
|
-
Defaults to True.
|
|
58
|
-
"""
|
|
59
|
-
self.uri = uri
|
|
60
|
-
self._mode = mode
|
|
61
|
-
|
|
62
|
-
if config_or_context is None:
|
|
63
|
-
# config_or_context = tiledb.Config()
|
|
64
|
-
ctx = None
|
|
65
|
-
else:
|
|
66
|
-
if isinstance(config_or_context, tiledb.Config):
|
|
67
|
-
ctx = tiledb.Ctx(config_or_context)
|
|
68
|
-
elif isinstance(config_or_context, tiledb.Ctx):
|
|
69
|
-
ctx = config_or_context
|
|
70
|
-
else:
|
|
71
|
-
raise TypeError("'config_or_context' must be either TileDB config or a context object.")
|
|
72
|
-
|
|
73
|
-
self._ctx = ctx
|
|
74
|
-
self._array = None
|
|
75
|
-
self._shape = None
|
|
76
|
-
self._ndim = None
|
|
77
|
-
self._dim_names = None
|
|
78
|
-
self._attr_names = None
|
|
79
|
-
self._nonempty_domain = None
|
|
80
|
-
|
|
81
|
-
if validate:
|
|
82
|
-
self._validate(attr=attr)
|
|
83
|
-
|
|
84
|
-
self._attr = attr
|
|
85
|
-
|
|
86
|
-
def _validate(self, attr):
|
|
87
|
-
with self.open_array(mode="r") as A:
|
|
88
|
-
if A.ndim > 2:
|
|
89
|
-
raise ValueError("Only 1D and 2D arrays are supported.")
|
|
90
|
-
|
|
91
|
-
if attr not in self.attr_names:
|
|
92
|
-
raise ValueError(
|
|
93
|
-
f"Attribute '{attr}' does not exist in the array. Available attributes: {self.attr_names}."
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
@property
|
|
97
|
-
def mode(self) -> Optional[str]:
|
|
98
|
-
"""Get current array mode."""
|
|
99
|
-
return self._mode
|
|
100
|
-
|
|
101
|
-
@mode.setter
|
|
102
|
-
def mode(self, value: Optional[str]):
|
|
103
|
-
"""Set array mode.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
value:
|
|
107
|
-
One of `None`, 'r', 'w', or 'm', 'd'.
|
|
108
|
-
"""
|
|
109
|
-
if value is not None and value not in ["r", "w", "m", "d"]:
|
|
110
|
-
raise ValueError("Mode must be one of: None, 'r', 'w', 'm', 'd'")
|
|
111
|
-
self._mode = value
|
|
112
|
-
|
|
113
|
-
@property
|
|
114
|
-
def dim_names(self) -> List[str]:
|
|
115
|
-
"""Get dimension names of the array."""
|
|
116
|
-
if self._dim_names is None:
|
|
117
|
-
with self.open_array(mode="r") as A:
|
|
118
|
-
self._dim_names = [dim.name for dim in A.schema.domain]
|
|
119
|
-
return self._dim_names
|
|
120
|
-
|
|
121
|
-
@property
|
|
122
|
-
def attr_names(self) -> List[str]:
|
|
123
|
-
"""Get attribute names of the array."""
|
|
124
|
-
if self._attr_names is None:
|
|
125
|
-
with self.open_array(mode="r") as A:
|
|
126
|
-
self._attr_names = [A.schema.attr(i).name for i in range(A.schema.nattr)]
|
|
127
|
-
return self._attr_names
|
|
128
|
-
|
|
129
|
-
@property
|
|
130
|
-
def shape(self) -> Tuple[int, ...]:
|
|
131
|
-
"""Get array shape from schema domain."""
|
|
132
|
-
if self._shape is None:
|
|
133
|
-
with self.open_array(mode="r") as A:
|
|
134
|
-
self._shape = tuple(int(dim.domain[1] - dim.domain[0] + 1) for dim in A.schema.domain)
|
|
135
|
-
return self._shape
|
|
136
|
-
|
|
137
|
-
@property
|
|
138
|
-
def nonempty_domain(self) -> Tuple[int, ...]:
|
|
139
|
-
"""Get array non-empty domain."""
|
|
140
|
-
if self._nonempty_domain is None:
|
|
141
|
-
with self.open_array(mode="r") as A:
|
|
142
|
-
self._nonempty_domain = A.nonempty_domain()
|
|
143
|
-
return self._nonempty_domain
|
|
144
|
-
|
|
145
|
-
@property
|
|
146
|
-
def ndim(self) -> int:
|
|
147
|
-
"""Get number of dimensions."""
|
|
148
|
-
if self._ndim is None:
|
|
149
|
-
self._ndim = len(self.shape)
|
|
150
|
-
return self._ndim
|
|
151
|
-
|
|
152
|
-
@contextmanager
|
|
153
|
-
def open_array(self, mode: Optional[str] = None):
|
|
154
|
-
"""Context manager for array operations.
|
|
155
|
-
|
|
156
|
-
Args:
|
|
157
|
-
mode:
|
|
158
|
-
Override mode for this operation.
|
|
159
|
-
"""
|
|
160
|
-
mode = mode if mode is not None else self.mode
|
|
161
|
-
mode = mode if mode is not None else "r" # Default to read mode
|
|
162
|
-
|
|
163
|
-
array = tiledb.open(self.uri, mode=mode, ctx=self._ctx)
|
|
164
|
-
try:
|
|
165
|
-
yield array
|
|
166
|
-
finally:
|
|
167
|
-
array.close()
|
|
168
|
-
|
|
169
|
-
def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
|
|
170
|
-
"""Get item implementation that routes to either direct slicing or multi_index
|
|
171
|
-
based on the type of indices provided.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
key:
|
|
175
|
-
Slice or list of indices for each dimension in the array.
|
|
176
|
-
"""
|
|
177
|
-
if not isinstance(key, tuple):
|
|
178
|
-
key = (key,)
|
|
179
|
-
|
|
180
|
-
if len(key) > self.ndim:
|
|
181
|
-
raise IndexError(f"Invalid number of dimensions: got {len(key)}, expected {self.ndim}")
|
|
182
|
-
|
|
183
|
-
# Normalize all indices
|
|
184
|
-
normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
|
|
185
|
-
|
|
186
|
-
num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
|
|
187
|
-
if num_ellipsis > 1:
|
|
188
|
-
raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
|
|
189
|
-
|
|
190
|
-
# Check if we can use direct slicing
|
|
191
|
-
use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
|
|
192
|
-
|
|
193
|
-
if use_direct:
|
|
194
|
-
return self._direct_slice(normalized_key)
|
|
195
|
-
else:
|
|
196
|
-
if num_ellipsis > 0:
|
|
197
|
-
raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
|
|
198
|
-
return self._multi_index(normalized_key)
|
|
199
|
-
|
|
200
|
-
@abstractmethod
|
|
201
|
-
def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
|
|
202
|
-
"""Implementation for direct slicing."""
|
|
203
|
-
pass
|
|
204
|
-
|
|
205
|
-
@abstractmethod
|
|
206
|
-
def _multi_index(self, key: Tuple[Union[slice, List[int]], ...]) -> np.ndarray:
|
|
207
|
-
"""Implementation for multi-index access."""
|
|
208
|
-
pass
|
|
209
|
-
|
|
210
|
-
def vacuum(self) -> None:
|
|
211
|
-
"""Remove deleted fragments from the array."""
|
|
212
|
-
tiledb.vacuum(self.uri)
|
|
213
|
-
|
|
214
|
-
def consolidate(self, config: Optional[ConsolidationConfig] = None) -> None:
|
|
215
|
-
"""Consolidate array fragments.
|
|
216
|
-
|
|
217
|
-
Args:
|
|
218
|
-
config:
|
|
219
|
-
Optional consolidation configuration.
|
|
220
|
-
"""
|
|
221
|
-
if config is None:
|
|
222
|
-
config = ConsolidationConfig()
|
|
223
|
-
|
|
224
|
-
consolidation_cfg = tiledb.Config()
|
|
225
|
-
|
|
226
|
-
consolidation_cfg["sm.consolidation.steps"] = config.steps
|
|
227
|
-
consolidation_cfg["sm.consolidation.step_min_frags"] = config.step_min_frags
|
|
228
|
-
consolidation_cfg["sm.consolidation.step_max_frags"] = config.step_max_frags
|
|
229
|
-
consolidation_cfg["sm.consolidation.buffer_size"] = config.buffer_size
|
|
230
|
-
consolidation_cfg["sm.mem.total_budget"] = config.total_budget
|
|
231
|
-
|
|
232
|
-
tiledb.consolidate(self.uri, config=consolidation_cfg)
|
|
233
|
-
|
|
234
|
-
if config.vacuum_after:
|
|
235
|
-
self.vacuum()
|
|
236
|
-
|
|
237
|
-
@abstractmethod
|
|
238
|
-
def write_batch(self, data: Union[np.ndarray, sparse.spmatrix], start_row: int, **kwargs) -> None:
|
|
239
|
-
"""Write a batch of data to the array starting at the specified row.
|
|
240
|
-
|
|
241
|
-
Args:
|
|
242
|
-
data:
|
|
243
|
-
Data to write (numpy array for dense, scipy sparse matrix for sparse).
|
|
244
|
-
|
|
245
|
-
start_row:
|
|
246
|
-
Starting row index for writing.
|
|
247
|
-
|
|
248
|
-
**kwargs:
|
|
249
|
-
Additional arguments for write operation.
|
|
250
|
-
"""
|
|
251
|
-
pass
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
cellarr_array/CellArray.py,sha256=vc_6oDLCpVgUaP8HsQz4vE0ZyJ1SPdX43s7VQyh7gF0,8204
|
|
2
|
-
cellarr_array/DenseCellArray.py,sha256=rlu2xq8SONwIswqe0TzRNCwM5f0HYgxr4QBtvbBe8ro,3953
|
|
3
|
-
cellarr_array/SparseCellArray.py,sha256=cOIbs_97j5u13FU7FfEfRNqAZi8rHUkypgLgRcubXrU,7304
|
|
4
|
-
cellarr_array/__init__.py,sha256=IUE9wMDISgRkWp-Fc0KJpDiezCJ61kzuTqS9HdK-JeE,779
|
|
5
|
-
cellarr_array/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
|
|
6
|
-
cellarr_array/helpers.py,sha256=ZqK_josEzKzTMP62P9pb4qBiOTisFofTCnu1LETYJT4,6449
|
|
7
|
-
cellarr_array-0.0.3.dist-info/licenses/LICENSE.txt,sha256=qI2hRZobcUlj8gqFqXwqt522HeYyWvHLF00zCSZofHA,1084
|
|
8
|
-
cellarr_array-0.0.3.dist-info/METADATA,sha256=1KgSZEEF2i9aCr4mkkyAmuPXht4y5ZG2-YdH4dBELpQ,4120
|
|
9
|
-
cellarr_array-0.0.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
10
|
-
cellarr_array-0.0.3.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
|
|
11
|
-
cellarr_array-0.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|