cellarr-array 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cellarr-array might be problematic. Click here for more details.
- cellarr_array/__init__.py +2 -4
- cellarr_array/core/__init__.py +3 -0
- cellarr_array/{cellarray_base.py → core/base.py} +1 -1
- cellarr_array/{cellarray_dense.py → core/dense.py} +2 -3
- cellarr_array/{helpers.py → core/helpers.py} +77 -43
- cellarr_array/{cellarray_sparse.py → core/sparse.py} +11 -16
- cellarr_array/dataloaders/__init__.py +3 -0
- cellarr_array/dataloaders/denseloader.py +198 -0
- cellarr_array/dataloaders/iterabledataloader.py +320 -0
- cellarr_array/dataloaders/sparseloader.py +230 -0
- cellarr_array/dataloaders/utils.py +26 -0
- cellarr_array/utils/__init__.py +3 -0
- cellarr_array/utils/mock.py +167 -0
- {cellarr_array-0.1.0.dist-info → cellarr_array-0.2.0.dist-info}/METADATA +4 -1
- cellarr_array-0.2.0.dist-info/RECORD +19 -0
- {cellarr_array-0.1.0.dist-info → cellarr_array-0.2.0.dist-info}/WHEEL +1 -1
- cellarr_array-0.1.0.dist-info/RECORD +0 -11
- /cellarr_array/{config.py → utils/config.py} +0 -0
- {cellarr_array-0.1.0.dist-info → cellarr_array-0.2.0.dist-info}/licenses/LICENSE.txt +0 -0
- {cellarr_array-0.1.0.dist-info → cellarr_array-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from typing import Dict, Optional
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import scipy.sparse as sp
|
|
6
|
+
import tiledb
|
|
7
|
+
|
|
8
|
+
from ..core import DenseCellArray, SparseCellArray
|
|
9
|
+
from ..core.helpers import CellArrConfig, create_cellarray
|
|
10
|
+
|
|
11
|
+
__author__ = "Jayaram Kancherla"
|
|
12
|
+
__copyright__ = "Jayaram Kancherla"
|
|
13
|
+
__license__ = "MIT"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def generate_tiledb_dense_array(
|
|
17
|
+
uri: str,
|
|
18
|
+
rows: int,
|
|
19
|
+
cols: int,
|
|
20
|
+
attr_name: str = "data",
|
|
21
|
+
attr_dtype: np.dtype = np.float32,
|
|
22
|
+
chunk_size: int = 1000,
|
|
23
|
+
tiledb_config: Optional[Dict] = None,
|
|
24
|
+
):
|
|
25
|
+
"""Generates a dense TileDB array and fills it with random float32 data.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
uri:
|
|
29
|
+
URI for the new TileDB array.
|
|
30
|
+
|
|
31
|
+
rows:
|
|
32
|
+
Number of rows.
|
|
33
|
+
|
|
34
|
+
cols:
|
|
35
|
+
Number of columns (features).
|
|
36
|
+
|
|
37
|
+
attr_name:
|
|
38
|
+
Name of the attribute.
|
|
39
|
+
|
|
40
|
+
attr_dtype:
|
|
41
|
+
Data type of the attribute.
|
|
42
|
+
|
|
43
|
+
chunk_size:
|
|
44
|
+
Number of rows to write per batch.
|
|
45
|
+
|
|
46
|
+
tiledb_config:
|
|
47
|
+
TileDB context configuration.
|
|
48
|
+
"""
|
|
49
|
+
if tiledb.array_exists(uri):
|
|
50
|
+
print(f"Array {uri} already exists. Removing.")
|
|
51
|
+
shutil.rmtree(uri)
|
|
52
|
+
|
|
53
|
+
print(f"Creating dense array at '{uri}' with shape ({rows}, {cols})")
|
|
54
|
+
cfg = CellArrConfig(ctx_config=tiledb_config if tiledb_config else {})
|
|
55
|
+
|
|
56
|
+
create_cellarray(
|
|
57
|
+
uri=uri,
|
|
58
|
+
shape=(rows, cols),
|
|
59
|
+
attr_dtype=attr_dtype,
|
|
60
|
+
sparse=False,
|
|
61
|
+
dim_names=["rows", "cols"],
|
|
62
|
+
attr_name=attr_name,
|
|
63
|
+
# config=cfg
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
ctx = tiledb.Ctx(cfg.ctx_config) if cfg.ctx_config else None
|
|
67
|
+
arr_writer = DenseCellArray(uri=uri, attr=attr_name, mode="w", config_or_context=ctx)
|
|
68
|
+
|
|
69
|
+
print("shape of writer", arr_writer.shape)
|
|
70
|
+
|
|
71
|
+
print(f"Writing data to dense array '{uri}'...")
|
|
72
|
+
for i in range(0, rows, chunk_size):
|
|
73
|
+
end_row = min(i + chunk_size, rows)
|
|
74
|
+
num_chunk_rows = end_row - i
|
|
75
|
+
data_chunk = np.random.rand(num_chunk_rows, cols).astype(attr_dtype)
|
|
76
|
+
print(i, end_row, num_chunk_rows, data_chunk.shape)
|
|
77
|
+
arr_writer.write_batch(data_chunk, start_row=i)
|
|
78
|
+
if (i // chunk_size) % 10 == 0:
|
|
79
|
+
print(f" Dense write: {end_row}/{rows} rows written.")
|
|
80
|
+
|
|
81
|
+
print(f"Finished writing to dense array '{uri}'.")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def generate_tiledb_sparse_array(
|
|
85
|
+
uri: str,
|
|
86
|
+
rows: int,
|
|
87
|
+
cols: int,
|
|
88
|
+
density: float = 0.01,
|
|
89
|
+
attr_name: str = "data",
|
|
90
|
+
attr_dtype: np.dtype = np.float32,
|
|
91
|
+
chunk_size: int = 1000,
|
|
92
|
+
tiledb_config: Optional[Dict] = None,
|
|
93
|
+
sparse_format_to_write="coo",
|
|
94
|
+
):
|
|
95
|
+
"""Generates a sparse TileDB array and fills it with random float32 data.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
uri:
|
|
99
|
+
URI for the new TileDB array.
|
|
100
|
+
|
|
101
|
+
rows:
|
|
102
|
+
Number of rows.
|
|
103
|
+
|
|
104
|
+
cols:
|
|
105
|
+
Number of columns (features).
|
|
106
|
+
|
|
107
|
+
density:
|
|
108
|
+
Density of the sparse matrix.
|
|
109
|
+
|
|
110
|
+
attr_name:
|
|
111
|
+
Name of the attribute.
|
|
112
|
+
|
|
113
|
+
attr_dtype:
|
|
114
|
+
Data type of the attribute.
|
|
115
|
+
|
|
116
|
+
chunk_size:
|
|
117
|
+
Number of rows to generate and write per batch.
|
|
118
|
+
|
|
119
|
+
tiledb_configs:
|
|
120
|
+
TileDB context configuration.
|
|
121
|
+
|
|
122
|
+
sparse_format_to_write:
|
|
123
|
+
Scipy sparse format to use for generating chunks ('coo', 'csr', 'csc').
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
if tiledb.array_exists(uri):
|
|
127
|
+
print(f"Array {uri} already exists. Removing.")
|
|
128
|
+
shutil.rmtree(uri)
|
|
129
|
+
|
|
130
|
+
print(f"Creating sparse array at '{uri}' with shape ({rows}, {cols}), density ~{density}")
|
|
131
|
+
cfg = CellArrConfig(ctx_config=tiledb_config if tiledb_config else {})
|
|
132
|
+
create_cellarray(
|
|
133
|
+
uri=uri,
|
|
134
|
+
shape=(rows, cols),
|
|
135
|
+
attr_dtype=attr_dtype,
|
|
136
|
+
sparse=True,
|
|
137
|
+
dim_names=["rows", "cols"],
|
|
138
|
+
attr_name=attr_name,
|
|
139
|
+
# config=cfg
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
ctx = tiledb.Ctx(cfg.ctx_config) if cfg.ctx_config else None
|
|
143
|
+
arr_writer = SparseCellArray(
|
|
144
|
+
uri=uri,
|
|
145
|
+
attr=attr_name,
|
|
146
|
+
mode="w",
|
|
147
|
+
config_or_context=ctx,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
print(f"Writing data to sparse array '{uri}'...")
|
|
151
|
+
for i in range(0, rows, chunk_size):
|
|
152
|
+
end_row = min(i + chunk_size, rows)
|
|
153
|
+
num_chunk_rows = end_row - i
|
|
154
|
+
if num_chunk_rows <= 0:
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
data_chunk_scipy = sp.random(
|
|
158
|
+
num_chunk_rows, cols, density=density, format=sparse_format_to_write, dtype=attr_dtype
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if data_chunk_scipy.nnz > 0:
|
|
162
|
+
arr_writer.write_batch(data_chunk_scipy, start_row=i)
|
|
163
|
+
|
|
164
|
+
if (i // chunk_size) % 10 == 0:
|
|
165
|
+
print(f" Sparse write: {end_row}/{rows} rows processed for writing.")
|
|
166
|
+
|
|
167
|
+
print(f"Finished writing to sparse array '{uri}'.")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
@@ -16,10 +16,13 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
|
|
|
16
16
|
Requires-Dist: tiledb
|
|
17
17
|
Requires-Dist: numpy
|
|
18
18
|
Requires-Dist: scipy
|
|
19
|
+
Provides-Extra: optional
|
|
20
|
+
Requires-Dist: torch; extra == "optional"
|
|
19
21
|
Provides-Extra: testing
|
|
20
22
|
Requires-Dist: setuptools; extra == "testing"
|
|
21
23
|
Requires-Dist: pytest; extra == "testing"
|
|
22
24
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
25
|
+
Requires-Dist: torch; extra == "testing"
|
|
23
26
|
Dynamic: license-file
|
|
24
27
|
|
|
25
28
|
[](https://pypi.org/project/cellarr-array/)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
cellarr_array/__init__.py,sha256=coBnoCq1_cv6FnnbowNt6wEIDfVl2GlGTkjnveP-8C4,707
|
|
2
|
+
cellarr_array/core/__init__.py,sha256=fvM-FEiDn8TKDbHxhhzp9FXZFNovFwvIUSY6SpLQRdk,98
|
|
3
|
+
cellarr_array/core/base.py,sha256=3FlhzZSh4ePz3Zm_dU8XNXJ6xgs7rKGi5HgCVWJLhXY,13458
|
|
4
|
+
cellarr_array/core/dense.py,sha256=LODRH4utpKs8xhT79Q2-nRiam_s68_a0qPj0unEM7rg,3940
|
|
5
|
+
cellarr_array/core/helpers.py,sha256=Z_2zRUULFTm7Lo9EpkGvIeRraP6XNDRB-o3rh9ChKQQ,7856
|
|
6
|
+
cellarr_array/core/sparse.py,sha256=XifIWhbTRAQ6qL096th-dCkqscNRwFZuTd7uaRf9aGM,8844
|
|
7
|
+
cellarr_array/dataloaders/__init__.py,sha256=U-MfwC2K84OIXT75in41fe_wvoxjUC5Krb5zICQn_O8,245
|
|
8
|
+
cellarr_array/dataloaders/denseloader.py,sha256=JYJlbuX5My64iIPW_-nlPFkNIezxL3Z3mkwInS3hH9M,7291
|
|
9
|
+
cellarr_array/dataloaders/iterabledataloader.py,sha256=lR2T1YatyBlDM5Sy_75B7_8ORiWfn3cp4q48Oujwf-c,11916
|
|
10
|
+
cellarr_array/dataloaders/sparseloader.py,sha256=V_eKw-Z_CNxHP8c2BN3sOuuv6RPiWBzRfW1BYLhNaQc,7962
|
|
11
|
+
cellarr_array/dataloaders/utils.py,sha256=buJ87x1YBTt5-nZoy_I5j6ko1lVlHdiGpQCusdLoRLI,600
|
|
12
|
+
cellarr_array/utils/__init__.py,sha256=DM5jeUMbxbRzTu2QCjpLlrTQ5uionF887S_7i6_952U,177
|
|
13
|
+
cellarr_array/utils/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
|
|
14
|
+
cellarr_array/utils/mock.py,sha256=7GyCbtM7u94pm7qhjsPRSO2IWYLmd4UrjyvLnQtMMkc,4579
|
|
15
|
+
cellarr_array-0.2.0.dist-info/licenses/LICENSE.txt,sha256=JUlHIfWcRe_MZop18pQvMIPLKSSPz3XQ06ASHuW5Wh8,1076
|
|
16
|
+
cellarr_array-0.2.0.dist-info/METADATA,sha256=NbNM3Gyu4t2f1odp26QiUutfic0IdnagSCnJUn9NLSs,4228
|
|
17
|
+
cellarr_array-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
cellarr_array-0.2.0.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
|
|
19
|
+
cellarr_array-0.2.0.dist-info/RECORD,,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
cellarr_array/__init__.py,sha256=iCU5zmXXmTwk-VuwrTdVl5STRAL2xeYpq05fL9_bW6w,781
|
|
2
|
-
cellarr_array/cellarray_base.py,sha256=CSYsA_Ra-RcwsyHzwayL-w10EhpbIC3u7ZAbyQMO6ks,13451
|
|
3
|
-
cellarr_array/cellarray_dense.py,sha256=skunPy_WyOMuS_3SxcAW_gm8d5FiWeV7ZCQp4HLRUUY,3958
|
|
4
|
-
cellarr_array/cellarray_sparse.py,sha256=YYZymvWGDG1c2EeOLMBPP5_u4qM8uhxyWJY6PnFWMVo,9112
|
|
5
|
-
cellarr_array/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
|
|
6
|
-
cellarr_array/helpers.py,sha256=eIeymmvY4KZ-cAiROo3DcYYzP39NQBj-4Nrba9rrEKQ,6491
|
|
7
|
-
cellarr_array-0.1.0.dist-info/licenses/LICENSE.txt,sha256=JUlHIfWcRe_MZop18pQvMIPLKSSPz3XQ06ASHuW5Wh8,1076
|
|
8
|
-
cellarr_array-0.1.0.dist-info/METADATA,sha256=ELBRCXkEyxhPeGHlA62i2QIzz7yYlLUSy7bfOe6aAdE,4120
|
|
9
|
-
cellarr_array-0.1.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
10
|
-
cellarr_array-0.1.0.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
|
|
11
|
-
cellarr_array-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|