cellarr-array 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cellarr-array might be problematic. Click here for more details.

@@ -0,0 +1,167 @@
1
+ import shutil
2
+ from typing import Dict, Optional
3
+
4
+ import numpy as np
5
+ import scipy.sparse as sp
6
+ import tiledb
7
+
8
+ from ..core import DenseCellArray, SparseCellArray
9
+ from ..core.helpers import CellArrConfig, create_cellarray
10
+
11
+ __author__ = "Jayaram Kancherla"
12
+ __copyright__ = "Jayaram Kancherla"
13
+ __license__ = "MIT"
14
+
15
+
16
+ def generate_tiledb_dense_array(
17
+ uri: str,
18
+ rows: int,
19
+ cols: int,
20
+ attr_name: str = "data",
21
+ attr_dtype: np.dtype = np.float32,
22
+ chunk_size: int = 1000,
23
+ tiledb_config: Optional[Dict] = None,
24
+ ):
25
+ """Generates a dense TileDB array and fills it with random float32 data.
26
+
27
+ Args:
28
+ uri:
29
+ URI for the new TileDB array.
30
+
31
+ rows:
32
+ Number of rows.
33
+
34
+ cols:
35
+ Number of columns (features).
36
+
37
+ attr_name:
38
+ Name of the attribute.
39
+
40
+ attr_dtype:
41
+ Data type of the attribute.
42
+
43
+ chunk_size:
44
+ Number of rows to write per batch.
45
+
46
+ tiledb_config:
47
+ TileDB context configuration.
48
+ """
49
+ if tiledb.array_exists(uri):
50
+ print(f"Array {uri} already exists. Removing.")
51
+ shutil.rmtree(uri)
52
+
53
+ print(f"Creating dense array at '{uri}' with shape ({rows}, {cols})")
54
+ cfg = CellArrConfig(ctx_config=tiledb_config if tiledb_config else {})
55
+
56
+ create_cellarray(
57
+ uri=uri,
58
+ shape=(rows, cols),
59
+ attr_dtype=attr_dtype,
60
+ sparse=False,
61
+ dim_names=["rows", "cols"],
62
+ attr_name=attr_name,
63
+ # config=cfg
64
+ )
65
+
66
+ ctx = tiledb.Ctx(cfg.ctx_config) if cfg.ctx_config else None
67
+ arr_writer = DenseCellArray(uri=uri, attr=attr_name, mode="w", config_or_context=ctx)
68
+
69
+ print("shape of writer", arr_writer.shape)
70
+
71
+ print(f"Writing data to dense array '{uri}'...")
72
+ for i in range(0, rows, chunk_size):
73
+ end_row = min(i + chunk_size, rows)
74
+ num_chunk_rows = end_row - i
75
+ data_chunk = np.random.rand(num_chunk_rows, cols).astype(attr_dtype)
76
+ print(i, end_row, num_chunk_rows, data_chunk.shape)
77
+ arr_writer.write_batch(data_chunk, start_row=i)
78
+ if (i // chunk_size) % 10 == 0:
79
+ print(f" Dense write: {end_row}/{rows} rows written.")
80
+
81
+ print(f"Finished writing to dense array '{uri}'.")
82
+
83
+
84
+ def generate_tiledb_sparse_array(
85
+ uri: str,
86
+ rows: int,
87
+ cols: int,
88
+ density: float = 0.01,
89
+ attr_name: str = "data",
90
+ attr_dtype: np.dtype = np.float32,
91
+ chunk_size: int = 1000,
92
+ tiledb_config: Optional[Dict] = None,
93
+ sparse_format_to_write="coo",
94
+ ):
95
+ """Generates a sparse TileDB array and fills it with random float32 data.
96
+
97
+ Args:
98
+ uri:
99
+ URI for the new TileDB array.
100
+
101
+ rows:
102
+ Number of rows.
103
+
104
+ cols:
105
+ Number of columns (features).
106
+
107
+ density:
108
+ Density of the sparse matrix.
109
+
110
+ attr_name:
111
+ Name of the attribute.
112
+
113
+ attr_dtype:
114
+ Data type of the attribute.
115
+
116
+ chunk_size:
117
+ Number of rows to generate and write per batch.
118
+
119
+ tiledb_configs:
120
+ TileDB context configuration.
121
+
122
+ sparse_format_to_write:
123
+ Scipy sparse format to use for generating chunks ('coo', 'csr', 'csc').
124
+
125
+ """
126
+ if tiledb.array_exists(uri):
127
+ print(f"Array {uri} already exists. Removing.")
128
+ shutil.rmtree(uri)
129
+
130
+ print(f"Creating sparse array at '{uri}' with shape ({rows}, {cols}), density ~{density}")
131
+ cfg = CellArrConfig(ctx_config=tiledb_config if tiledb_config else {})
132
+ create_cellarray(
133
+ uri=uri,
134
+ shape=(rows, cols),
135
+ attr_dtype=attr_dtype,
136
+ sparse=True,
137
+ dim_names=["rows", "cols"],
138
+ attr_name=attr_name,
139
+ # config=cfg
140
+ )
141
+
142
+ ctx = tiledb.Ctx(cfg.ctx_config) if cfg.ctx_config else None
143
+ arr_writer = SparseCellArray(
144
+ uri=uri,
145
+ attr=attr_name,
146
+ mode="w",
147
+ config_or_context=ctx,
148
+ )
149
+
150
+ print(f"Writing data to sparse array '{uri}'...")
151
+ for i in range(0, rows, chunk_size):
152
+ end_row = min(i + chunk_size, rows)
153
+ num_chunk_rows = end_row - i
154
+ if num_chunk_rows <= 0:
155
+ continue
156
+
157
+ data_chunk_scipy = sp.random(
158
+ num_chunk_rows, cols, density=density, format=sparse_format_to_write, dtype=attr_dtype
159
+ )
160
+
161
+ if data_chunk_scipy.nnz > 0:
162
+ arr_writer.write_batch(data_chunk_scipy, start_row=i)
163
+
164
+ if (i // chunk_size) % 10 == 0:
165
+ print(f" Sparse write: {end_row}/{rows} rows processed for writing.")
166
+
167
+ print(f"Finished writing to sparse array '{uri}'.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cellarr-array
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Base class for handling TileDB backed arrays.
5
5
  Home-page: https://github.com/cellarr/cellarr-array
6
6
  Author: Jayaram Kancherla
@@ -16,10 +16,13 @@ Requires-Dist: importlib-metadata; python_version < "3.8"
16
16
  Requires-Dist: tiledb
17
17
  Requires-Dist: numpy
18
18
  Requires-Dist: scipy
19
+ Provides-Extra: optional
20
+ Requires-Dist: torch; extra == "optional"
19
21
  Provides-Extra: testing
20
22
  Requires-Dist: setuptools; extra == "testing"
21
23
  Requires-Dist: pytest; extra == "testing"
22
24
  Requires-Dist: pytest-cov; extra == "testing"
25
+ Requires-Dist: torch; extra == "testing"
23
26
  Dynamic: license-file
24
27
 
25
28
  [![PyPI-Server](https://img.shields.io/pypi/v/cellarr-array.svg)](https://pypi.org/project/cellarr-array/)
@@ -0,0 +1,19 @@
1
+ cellarr_array/__init__.py,sha256=coBnoCq1_cv6FnnbowNt6wEIDfVl2GlGTkjnveP-8C4,707
2
+ cellarr_array/core/__init__.py,sha256=fvM-FEiDn8TKDbHxhhzp9FXZFNovFwvIUSY6SpLQRdk,98
3
+ cellarr_array/core/base.py,sha256=3FlhzZSh4ePz3Zm_dU8XNXJ6xgs7rKGi5HgCVWJLhXY,13458
4
+ cellarr_array/core/dense.py,sha256=LODRH4utpKs8xhT79Q2-nRiam_s68_a0qPj0unEM7rg,3940
5
+ cellarr_array/core/helpers.py,sha256=Z_2zRUULFTm7Lo9EpkGvIeRraP6XNDRB-o3rh9ChKQQ,7856
6
+ cellarr_array/core/sparse.py,sha256=XifIWhbTRAQ6qL096th-dCkqscNRwFZuTd7uaRf9aGM,8844
7
+ cellarr_array/dataloaders/__init__.py,sha256=U-MfwC2K84OIXT75in41fe_wvoxjUC5Krb5zICQn_O8,245
8
+ cellarr_array/dataloaders/denseloader.py,sha256=JYJlbuX5My64iIPW_-nlPFkNIezxL3Z3mkwInS3hH9M,7291
9
+ cellarr_array/dataloaders/iterabledataloader.py,sha256=lR2T1YatyBlDM5Sy_75B7_8ORiWfn3cp4q48Oujwf-c,11916
10
+ cellarr_array/dataloaders/sparseloader.py,sha256=V_eKw-Z_CNxHP8c2BN3sOuuv6RPiWBzRfW1BYLhNaQc,7962
11
+ cellarr_array/dataloaders/utils.py,sha256=buJ87x1YBTt5-nZoy_I5j6ko1lVlHdiGpQCusdLoRLI,600
12
+ cellarr_array/utils/__init__.py,sha256=DM5jeUMbxbRzTu2QCjpLlrTQ5uionF887S_7i6_952U,177
13
+ cellarr_array/utils/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
14
+ cellarr_array/utils/mock.py,sha256=7GyCbtM7u94pm7qhjsPRSO2IWYLmd4UrjyvLnQtMMkc,4579
15
+ cellarr_array-0.2.0.dist-info/licenses/LICENSE.txt,sha256=JUlHIfWcRe_MZop18pQvMIPLKSSPz3XQ06ASHuW5Wh8,1076
16
+ cellarr_array-0.2.0.dist-info/METADATA,sha256=NbNM3Gyu4t2f1odp26QiUutfic0IdnagSCnJUn9NLSs,4228
17
+ cellarr_array-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ cellarr_array-0.2.0.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
19
+ cellarr_array-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- cellarr_array/__init__.py,sha256=iCU5zmXXmTwk-VuwrTdVl5STRAL2xeYpq05fL9_bW6w,781
2
- cellarr_array/cellarray_base.py,sha256=CSYsA_Ra-RcwsyHzwayL-w10EhpbIC3u7ZAbyQMO6ks,13451
3
- cellarr_array/cellarray_dense.py,sha256=skunPy_WyOMuS_3SxcAW_gm8d5FiWeV7ZCQp4HLRUUY,3958
4
- cellarr_array/cellarray_sparse.py,sha256=YYZymvWGDG1c2EeOLMBPP5_u4qM8uhxyWJY6PnFWMVo,9112
5
- cellarr_array/config.py,sha256=67zBxpYY9N_v6TMdyljUIZmckbwOBcuLC99aJooGmfA,2917
6
- cellarr_array/helpers.py,sha256=eIeymmvY4KZ-cAiROo3DcYYzP39NQBj-4Nrba9rrEKQ,6491
7
- cellarr_array-0.1.0.dist-info/licenses/LICENSE.txt,sha256=JUlHIfWcRe_MZop18pQvMIPLKSSPz3XQ06ASHuW5Wh8,1076
8
- cellarr_array-0.1.0.dist-info/METADATA,sha256=ELBRCXkEyxhPeGHlA62i2QIzz7yYlLUSy7bfOe6aAdE,4120
9
- cellarr_array-0.1.0.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
10
- cellarr_array-0.1.0.dist-info/top_level.txt,sha256=oErp0D8ABZV-QPtTiXT8_F2z36Ic7ykuDg_1Y84HLZM,14
11
- cellarr_array-0.1.0.dist-info/RECORD,,
File without changes