cellarr-array 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cellarr-array might be problematic. Click here for more details.
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/.github/workflows/publish-pypi.yml +1 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/.pre-commit-config.yaml +1 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/CHANGELOG.md +8 -4
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/PKG-INFO +31 -11
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/README.md +28 -9
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/conf.py +1 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/index.md +1 -12
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/setup.py +5 -5
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array/CellArray.py +26 -11
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array/DenseCellArray.py +6 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array/SparseCellArray.py +6 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array/__init__.py +1 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array/helpers.py +9 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array.egg-info/PKG-INFO +31 -11
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array.egg-info/SOURCES.txt +1 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/tests/conftest.py +1 -1
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/tests/test_all.py +2 -2
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/tests/test_dense.py +17 -0
- cellarr_array-0.0.3/tests/test_inmemory.py +31 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/tests/test_sparse.py +5 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/.coveragerc +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/.github/workflows/run-tests.yml +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/.gitignore +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/.readthedocs.yml +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/AUTHORS.md +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/CONTRIBUTING.md +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/LICENSE.txt +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/Makefile +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/_static/.gitignore +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/authors.md +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/changelog.md +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/contributing.md +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/license.md +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/readme.md +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/docs/requirements.txt +2 -2
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/pyproject.toml +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/setup.cfg +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array/config.py +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array.egg-info/dependency_links.txt +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array.egg-info/not-zip-safe +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array.egg-info/requires.txt +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/src/cellarr_array.egg-info/top_level.txt +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/tests/test_helpers.py +0 -0
- {cellarr_array-0.0.1 → cellarr_array-0.0.3}/tox.ini +0 -0
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## Version 0.0.2
|
|
4
|
+
|
|
5
|
+
- Support in-memory tiledb objects. Updated tests and documentation.
|
|
6
|
+
|
|
3
7
|
## Version 0.0.1
|
|
4
8
|
|
|
5
|
-
Initial implementation of the sparse and dense arrays backed by TileDB.
|
|
9
|
+
Initial implementation of the sparse and dense arrays backed by TileDB.
|
|
6
10
|
|
|
7
11
|
- Supports reading of objects
|
|
8
|
-
- Directly slices the TileDB object is all arguments to subset are contiguous blocks.
|
|
9
|
-
- Otherwise redirects them to `multi_index`, if one of the argument to subset is a slice, drops the last because of inclusive upper bounds in this method.
|
|
12
|
+
- Directly slices the TileDB object is all arguments to subset are contiguous blocks.
|
|
13
|
+
- Otherwise redirects them to `multi_index`, if one of the argument to subset is a slice, drops the last because of inclusive upper bounds in this method.
|
|
10
14
|
|
|
11
15
|
This helps keeps slicing consistent across various operations and trying to be performant in the process.
|
|
12
16
|
|
|
13
|
-
- Supports writing of various data objects into dense and sparse arrays. Expects all chunks to be aligned along the rows.
|
|
17
|
+
- Supports writing of various data objects into dense and sparse arrays. Expects all chunks to be aligned along the rows.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
@@ -20,13 +20,14 @@ Provides-Extra: testing
|
|
|
20
20
|
Requires-Dist: setuptools; extra == "testing"
|
|
21
21
|
Requires-Dist: pytest; extra == "testing"
|
|
22
22
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
23
|
+
Dynamic: license-file
|
|
23
24
|
|
|
24
25
|
[](https://pypi.org/project/cellarr-array/)
|
|
25
26
|

|
|
26
27
|
|
|
27
28
|
# cellarr-array
|
|
28
29
|
|
|
29
|
-
This package provided high-level wrappers for TileDB arrays
|
|
30
|
+
This package provided high-level wrappers for TileDB arrays, for handling genomic data matrices.
|
|
30
31
|
|
|
31
32
|
## Install
|
|
32
33
|
|
|
@@ -116,24 +117,43 @@ subset = dense_array[100:200, genes]
|
|
|
116
117
|
### Working with Sparse Arrays
|
|
117
118
|
|
|
118
119
|
```python
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
from cellarr_array import SparseCellArray
|
|
121
|
+
|
|
122
|
+
# Create a sparse array with CSR output format
|
|
123
|
+
csr_array = SparseCellArray(
|
|
121
124
|
uri="sparse_matrix.tdb",
|
|
122
|
-
|
|
125
|
+
return_sparse=True
|
|
123
126
|
)
|
|
124
127
|
|
|
125
|
-
# Get result as
|
|
126
|
-
result =
|
|
128
|
+
# Get result as CSR matrix
|
|
129
|
+
result = csr_array[100:200, 500:1000]
|
|
127
130
|
|
|
128
131
|
# Result is scipy.sparse.coo_matrix
|
|
129
|
-
assert sparse.
|
|
132
|
+
assert sparse.isspmatrix_csr(result)
|
|
130
133
|
|
|
131
134
|
# Perform sparse operations
|
|
132
135
|
nnz = result.nnz
|
|
133
136
|
density = result.nnz / (result.shape[0] * result.shape[1])
|
|
134
137
|
|
|
135
138
|
# Convert to other sparse formats if needed
|
|
136
|
-
|
|
139
|
+
result_csc = result.tocsc()
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Likewise create a CSC output format
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from scipy import sparse
|
|
146
|
+
|
|
147
|
+
# Create a sparse array with CSC output format
|
|
148
|
+
csc_array = SparseCellArray(
|
|
149
|
+
uri="sparse_matrix.tdb",
|
|
150
|
+
return_sparse=True,
|
|
151
|
+
sparse_coerce=sparse.csc_matrix
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Get result as CSR matrix
|
|
155
|
+
result = csc_array[100:200, 500:1000]
|
|
156
|
+
print(result)
|
|
137
157
|
```
|
|
138
158
|
|
|
139
159
|
### Array Maintenance
|
|
@@ -144,7 +164,7 @@ array.consolidate()
|
|
|
144
164
|
|
|
145
165
|
# Custom consolidation
|
|
146
166
|
config = ConsolidationConfig(
|
|
147
|
-
steps=
|
|
167
|
+
steps=2,
|
|
148
168
|
vacuum_after=True
|
|
149
169
|
)
|
|
150
170
|
array.consolidate(config)
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
# cellarr-array
|
|
5
5
|
|
|
6
|
-
This package provided high-level wrappers for TileDB arrays
|
|
6
|
+
This package provided high-level wrappers for TileDB arrays, for handling genomic data matrices.
|
|
7
7
|
|
|
8
8
|
## Install
|
|
9
9
|
|
|
@@ -93,24 +93,43 @@ subset = dense_array[100:200, genes]
|
|
|
93
93
|
### Working with Sparse Arrays
|
|
94
94
|
|
|
95
95
|
```python
|
|
96
|
-
|
|
97
|
-
|
|
96
|
+
from cellarr_array import SparseCellArray
|
|
97
|
+
|
|
98
|
+
# Create a sparse array with CSR output format
|
|
99
|
+
csr_array = SparseCellArray(
|
|
98
100
|
uri="sparse_matrix.tdb",
|
|
99
|
-
|
|
101
|
+
return_sparse=True
|
|
100
102
|
)
|
|
101
103
|
|
|
102
|
-
# Get result as
|
|
103
|
-
result =
|
|
104
|
+
# Get result as CSR matrix
|
|
105
|
+
result = csr_array[100:200, 500:1000]
|
|
104
106
|
|
|
105
107
|
# Result is scipy.sparse.coo_matrix
|
|
106
|
-
assert sparse.
|
|
108
|
+
assert sparse.isspmatrix_csr(result)
|
|
107
109
|
|
|
108
110
|
# Perform sparse operations
|
|
109
111
|
nnz = result.nnz
|
|
110
112
|
density = result.nnz / (result.shape[0] * result.shape[1])
|
|
111
113
|
|
|
112
114
|
# Convert to other sparse formats if needed
|
|
113
|
-
|
|
115
|
+
result_csc = result.tocsc()
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Likewise create a CSC output format
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
from scipy import sparse
|
|
122
|
+
|
|
123
|
+
# Create a sparse array with CSC output format
|
|
124
|
+
csc_array = SparseCellArray(
|
|
125
|
+
uri="sparse_matrix.tdb",
|
|
126
|
+
return_sparse=True,
|
|
127
|
+
sparse_coerce=sparse.csc_matrix
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Get result as CSR matrix
|
|
131
|
+
result = csc_array[100:200, 500:1000]
|
|
132
|
+
print(result)
|
|
114
133
|
```
|
|
115
134
|
|
|
116
135
|
### Array Maintenance
|
|
@@ -121,7 +140,7 @@ array.consolidate()
|
|
|
121
140
|
|
|
122
141
|
# Custom consolidation
|
|
123
142
|
config = ConsolidationConfig(
|
|
124
|
-
steps=
|
|
143
|
+
steps=2,
|
|
125
144
|
vacuum_after=True
|
|
126
145
|
)
|
|
127
146
|
array.consolidate(config)
|
|
@@ -311,7 +311,7 @@ extensions.remove('myst_parser')
|
|
|
311
311
|
extensions.append('myst_nb')
|
|
312
312
|
|
|
313
313
|
# Less verbose api documentation
|
|
314
|
-
|
|
314
|
+
extensions.append('sphinx_autodoc_typehints')
|
|
315
315
|
|
|
316
316
|
autodoc_default_options = {
|
|
317
317
|
"special-members": True,
|
|
@@ -1,17 +1,6 @@
|
|
|
1
1
|
# cellarr-array
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
## Note
|
|
7
|
-
|
|
8
|
-
> This is the main page of your project's [Sphinx] documentation. It is
|
|
9
|
-
> formatted in [Markdown]. Add additional pages by creating md-files in
|
|
10
|
-
> `docs` or rst-files (formatted in [reStructuredText]) and adding links to
|
|
11
|
-
> them in the `Contents` section below.
|
|
12
|
-
>
|
|
13
|
-
> Please check [Sphinx] and [MyST] for more information
|
|
14
|
-
> about how to document your project and how to configure your preferences.
|
|
3
|
+
This package provided high-level wrappers for TileDB arrays optimized for handling genomic data matrices.
|
|
15
4
|
|
|
16
5
|
|
|
17
6
|
## Contents
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
Setup file for cellarr-array.
|
|
3
|
+
Use setup.cfg to configure your project.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
This file was generated with PyScaffold 4.6.
|
|
6
|
+
PyScaffold helps you to put up the scaffold of your new Python project.
|
|
7
|
+
Learn more under: https://pyscaffold.org/
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from setuptools import setup
|
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from contextlib import contextmanager
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from types import EllipsisType
|
|
6
|
+
except ImportError:
|
|
7
|
+
# TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
|
|
8
|
+
EllipsisType = type(...)
|
|
3
9
|
from typing import List, Literal, Optional, Tuple, Union
|
|
4
10
|
|
|
5
11
|
import numpy as np
|
|
@@ -42,7 +48,8 @@ class CellArray(ABC):
|
|
|
42
48
|
Defaults to None for automatic mode switching.
|
|
43
49
|
|
|
44
50
|
config_or_context:
|
|
45
|
-
|
|
51
|
+
Optional config or context object.
|
|
52
|
+
|
|
46
53
|
Defaults to None.
|
|
47
54
|
|
|
48
55
|
validate:
|
|
@@ -53,14 +60,15 @@ class CellArray(ABC):
|
|
|
53
60
|
self._mode = mode
|
|
54
61
|
|
|
55
62
|
if config_or_context is None:
|
|
56
|
-
config_or_context = tiledb.Config()
|
|
57
|
-
|
|
58
|
-
if isinstance(config_or_context, tiledb.Config):
|
|
59
|
-
ctx = tiledb.Ctx(config_or_context)
|
|
60
|
-
elif isinstance(config_or_context, tiledb.Ctx):
|
|
61
|
-
ctx = config_or_context
|
|
63
|
+
# config_or_context = tiledb.Config()
|
|
64
|
+
ctx = None
|
|
62
65
|
else:
|
|
63
|
-
|
|
66
|
+
if isinstance(config_or_context, tiledb.Config):
|
|
67
|
+
ctx = tiledb.Ctx(config_or_context)
|
|
68
|
+
elif isinstance(config_or_context, tiledb.Ctx):
|
|
69
|
+
ctx = config_or_context
|
|
70
|
+
else:
|
|
71
|
+
raise TypeError("'config_or_context' must be either TileDB config or a context object.")
|
|
64
72
|
|
|
65
73
|
self._ctx = ctx
|
|
66
74
|
self._array = None
|
|
@@ -68,6 +76,7 @@ class CellArray(ABC):
|
|
|
68
76
|
self._ndim = None
|
|
69
77
|
self._dim_names = None
|
|
70
78
|
self._attr_names = None
|
|
79
|
+
self._nonempty_domain = None
|
|
71
80
|
|
|
72
81
|
if validate:
|
|
73
82
|
self._validate(attr=attr)
|
|
@@ -157,7 +166,7 @@ class CellArray(ABC):
|
|
|
157
166
|
finally:
|
|
158
167
|
array.close()
|
|
159
168
|
|
|
160
|
-
def __getitem__(self, key: Union[slice, Tuple[Union[slice, List[int]], ...]]):
|
|
169
|
+
def __getitem__(self, key: Union[slice, EllipsisType, Tuple[Union[slice, List[int]], ...], EllipsisType]):
|
|
161
170
|
"""Get item implementation that routes to either direct slicing or multi_index
|
|
162
171
|
based on the type of indices provided.
|
|
163
172
|
|
|
@@ -174,16 +183,22 @@ class CellArray(ABC):
|
|
|
174
183
|
# Normalize all indices
|
|
175
184
|
normalized_key = tuple(SliceHelper.normalize_index(idx, self.shape[i]) for i, idx in enumerate(key))
|
|
176
185
|
|
|
186
|
+
num_ellipsis = sum(isinstance(i, EllipsisType) for i in normalized_key)
|
|
187
|
+
if num_ellipsis > 1:
|
|
188
|
+
raise IndexError(f"Found more than 1 Ellipsis (...) in key: {normalized_key}")
|
|
189
|
+
|
|
177
190
|
# Check if we can use direct slicing
|
|
178
|
-
use_direct = all(isinstance(idx, slice) for idx in normalized_key)
|
|
191
|
+
use_direct = all(isinstance(idx, (slice, EllipsisType)) for idx in normalized_key)
|
|
179
192
|
|
|
180
193
|
if use_direct:
|
|
181
194
|
return self._direct_slice(normalized_key)
|
|
182
195
|
else:
|
|
196
|
+
if num_ellipsis > 0:
|
|
197
|
+
raise IndexError(f"tiledb does not support ellipsis in multi-index access: {normalized_key}")
|
|
183
198
|
return self._multi_index(normalized_key)
|
|
184
199
|
|
|
185
200
|
@abstractmethod
|
|
186
|
-
def _direct_slice(self, key: Tuple[slice, ...]) -> np.ndarray:
|
|
201
|
+
def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
|
|
187
202
|
"""Implementation for direct slicing."""
|
|
188
203
|
pass
|
|
189
204
|
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from types import EllipsisType
|
|
3
|
+
except ImportError:
|
|
4
|
+
# TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
|
|
5
|
+
EllipsisType = type(...)
|
|
1
6
|
from typing import List, Tuple, Union
|
|
2
7
|
|
|
3
8
|
import numpy as np
|
|
@@ -13,7 +18,7 @@ __license__ = "MIT"
|
|
|
13
18
|
class DenseCellArray(CellArray):
|
|
14
19
|
"""Implementation for dense TileDB arrays."""
|
|
15
20
|
|
|
16
|
-
def _direct_slice(self, key: Tuple[slice, ...]) -> np.ndarray:
|
|
21
|
+
def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> np.ndarray:
|
|
17
22
|
"""Implementation for direct slicing of dense arrays.
|
|
18
23
|
|
|
19
24
|
Args:
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from types import EllipsisType
|
|
3
|
+
except ImportError:
|
|
4
|
+
# TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
|
|
5
|
+
EllipsisType = type(...)
|
|
1
6
|
from typing import Dict, List, Optional, Tuple, Union
|
|
2
7
|
|
|
3
8
|
import numpy as np
|
|
@@ -118,7 +123,7 @@ class SparseCellArray(CellArray):
|
|
|
118
123
|
|
|
119
124
|
return sliced[key]
|
|
120
125
|
|
|
121
|
-
def _direct_slice(self, key: Tuple[slice, ...]) -> Union[np.ndarray, sparse.coo_matrix]:
|
|
126
|
+
def _direct_slice(self, key: Tuple[Union[slice, EllipsisType], ...]) -> Union[np.ndarray, sparse.coo_matrix]:
|
|
122
127
|
"""Implementation for direct slicing of sparse arrays."""
|
|
123
128
|
with self.open_array(mode="r") as array:
|
|
124
129
|
result = array[key]
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from types import EllipsisType
|
|
3
|
+
except ImportError:
|
|
4
|
+
# TODO: This is required for Python <3.10. Remove once Python 3.9 reaches EOL in October 2025
|
|
5
|
+
EllipsisType = type(...)
|
|
1
6
|
from typing import List, Optional, Tuple, Union
|
|
2
7
|
|
|
3
8
|
import numpy as np
|
|
@@ -150,9 +155,12 @@ class SliceHelper:
|
|
|
150
155
|
return None
|
|
151
156
|
|
|
152
157
|
@staticmethod
|
|
153
|
-
def normalize_index(idx: Union[int, slice, List[int]], dim_size: int) -> Union[slice, List[int]]:
|
|
158
|
+
def normalize_index(idx: Union[int, slice, List[int]], dim_size: int) -> Union[slice, List[int], EllipsisType]:
|
|
154
159
|
"""Normalize index to handle negative indices and ensure consistency."""
|
|
155
160
|
|
|
161
|
+
if isinstance(idx, EllipsisType):
|
|
162
|
+
return idx
|
|
163
|
+
|
|
156
164
|
# Convert ranges to slices
|
|
157
165
|
if isinstance(idx, range):
|
|
158
166
|
idx = slice(idx.start, idx.stop, idx.step)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cellarr-array
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Base class for handling TileDB backed arrays.
|
|
5
5
|
Home-page: https://github.com/cellarr/cellarr-array
|
|
6
6
|
Author: Jayaram Kancherla
|
|
@@ -20,13 +20,14 @@ Provides-Extra: testing
|
|
|
20
20
|
Requires-Dist: setuptools; extra == "testing"
|
|
21
21
|
Requires-Dist: pytest; extra == "testing"
|
|
22
22
|
Requires-Dist: pytest-cov; extra == "testing"
|
|
23
|
+
Dynamic: license-file
|
|
23
24
|
|
|
24
25
|
[](https://pypi.org/project/cellarr-array/)
|
|
25
26
|

|
|
26
27
|
|
|
27
28
|
# cellarr-array
|
|
28
29
|
|
|
29
|
-
This package provided high-level wrappers for TileDB arrays
|
|
30
|
+
This package provided high-level wrappers for TileDB arrays, for handling genomic data matrices.
|
|
30
31
|
|
|
31
32
|
## Install
|
|
32
33
|
|
|
@@ -116,24 +117,43 @@ subset = dense_array[100:200, genes]
|
|
|
116
117
|
### Working with Sparse Arrays
|
|
117
118
|
|
|
118
119
|
```python
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
from cellarr_array import SparseCellArray
|
|
121
|
+
|
|
122
|
+
# Create a sparse array with CSR output format
|
|
123
|
+
csr_array = SparseCellArray(
|
|
121
124
|
uri="sparse_matrix.tdb",
|
|
122
|
-
|
|
125
|
+
return_sparse=True
|
|
123
126
|
)
|
|
124
127
|
|
|
125
|
-
# Get result as
|
|
126
|
-
result =
|
|
128
|
+
# Get result as CSR matrix
|
|
129
|
+
result = csr_array[100:200, 500:1000]
|
|
127
130
|
|
|
128
131
|
# Result is scipy.sparse.coo_matrix
|
|
129
|
-
assert sparse.
|
|
132
|
+
assert sparse.isspmatrix_csr(result)
|
|
130
133
|
|
|
131
134
|
# Perform sparse operations
|
|
132
135
|
nnz = result.nnz
|
|
133
136
|
density = result.nnz / (result.shape[0] * result.shape[1])
|
|
134
137
|
|
|
135
138
|
# Convert to other sparse formats if needed
|
|
136
|
-
|
|
139
|
+
result_csc = result.tocsc()
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Likewise create a CSC output format
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from scipy import sparse
|
|
146
|
+
|
|
147
|
+
# Create a sparse array with CSC output format
|
|
148
|
+
csc_array = SparseCellArray(
|
|
149
|
+
uri="sparse_matrix.tdb",
|
|
150
|
+
return_sparse=True,
|
|
151
|
+
sparse_coerce=sparse.csc_matrix
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Get result as CSR matrix
|
|
155
|
+
result = csc_array[100:200, 500:1000]
|
|
156
|
+
print(result)
|
|
137
157
|
```
|
|
138
158
|
|
|
139
159
|
### Array Maintenance
|
|
@@ -144,7 +164,7 @@ array.consolidate()
|
|
|
144
164
|
|
|
145
165
|
# Custom consolidation
|
|
146
166
|
config = ConsolidationConfig(
|
|
147
|
-
steps=
|
|
167
|
+
steps=2,
|
|
148
168
|
vacuum_after=True
|
|
149
169
|
)
|
|
150
170
|
array.consolidate(config)
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
import pytest
|
|
5
5
|
from scipy import sparse
|
|
6
6
|
|
|
7
|
-
from cellarr_array import CellArrConfig, ConsolidationConfig, DenseCellArray,
|
|
7
|
+
from cellarr_array import CellArrConfig, ConsolidationConfig, DenseCellArray, create_cellarray
|
|
8
8
|
|
|
9
9
|
__author__ = "Jayaram Kancherla"
|
|
10
10
|
__copyright__ = "Jayaram Kancherla"
|
|
@@ -21,7 +21,7 @@ def test_dimension_validation(temp_dir):
|
|
|
21
21
|
def test_attribute_validation(temp_dir):
|
|
22
22
|
uri = str(Path(temp_dir) / "attr_test")
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
create_cellarray(uri=uri, shape=(10, 10), attr_dtype=np.float32, attr_name="values")
|
|
25
25
|
|
|
26
26
|
with pytest.raises(ValueError, match="Attribute 'invalid' does not exist"):
|
|
27
27
|
DenseCellArray(uri, attr="invalid")
|
|
@@ -84,6 +84,11 @@ def test_1d_slicing(sample_dense_array_1d):
|
|
|
84
84
|
result = sample_dense_array_1d[-10:]
|
|
85
85
|
np.testing.assert_array_almost_equal(result, data[-10:])
|
|
86
86
|
|
|
87
|
+
# Ellipsis
|
|
88
|
+
result = sample_dense_array_1d[...]
|
|
89
|
+
actual = data[...]
|
|
90
|
+
np.testing.assert_array_almost_equal(result, actual), f"{actual} != {result}"
|
|
91
|
+
|
|
87
92
|
|
|
88
93
|
def test_2d_slicing(sample_dense_array_2d):
|
|
89
94
|
data = np.random.random((100, 50)).astype(np.float32)
|
|
@@ -105,6 +110,18 @@ def test_2d_slicing(sample_dense_array_2d):
|
|
|
105
110
|
result = sample_dense_array_2d[-10:, -5:]
|
|
106
111
|
np.testing.assert_array_almost_equal(result, data[-10:, -5:])
|
|
107
112
|
|
|
113
|
+
# Ellipsis
|
|
114
|
+
result = sample_dense_array_2d[..., :1]
|
|
115
|
+
np.testing.assert_array_almost_equal(result, data[..., :1])
|
|
116
|
+
result = sample_dense_array_2d[..., :]
|
|
117
|
+
np.testing.assert_array_almost_equal(result, data[..., :])
|
|
118
|
+
result = sample_dense_array_2d[-1:, ...]
|
|
119
|
+
np.testing.assert_array_almost_equal(result, data[-1:, ...])
|
|
120
|
+
with pytest.raises(IndexError):
|
|
121
|
+
_ = sample_dense_array_2d[..., ...]
|
|
122
|
+
with pytest.raises(IndexError):
|
|
123
|
+
_ = sample_dense_array_2d[[0, 3], ...]
|
|
124
|
+
|
|
108
125
|
|
|
109
126
|
def test_multi_index_access(sample_dense_array_2d):
|
|
110
127
|
data = np.random.random((100, 50)).astype(np.float32)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scipy as sp
|
|
3
|
+
|
|
4
|
+
from cellarr_array import create_cellarray
|
|
5
|
+
|
|
6
|
+
__author__ = "Jayaram Kancherla"
|
|
7
|
+
__copyright__ = "Jayaram Kancherla"
|
|
8
|
+
__license__ = "MIT"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_inmem_uri():
|
|
12
|
+
shape = (10_000, 10_000)
|
|
13
|
+
arr = np.arange(100_000_000).reshape(shape)
|
|
14
|
+
uri = "mem://dense"
|
|
15
|
+
|
|
16
|
+
dense_inmem = create_cellarray(uri, shape=(shape))
|
|
17
|
+
dense_inmem.write_batch(arr, start_row=0)
|
|
18
|
+
|
|
19
|
+
assert np.allclose(dense_inmem[:10, :10], arr[:10, :10])
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_inmem_uri_sparse():
|
|
23
|
+
shape = (1000, 1000)
|
|
24
|
+
|
|
25
|
+
s = sp.sparse.random(1000, 1000, density=0.25)
|
|
26
|
+
uri = "mem://sparse"
|
|
27
|
+
|
|
28
|
+
dense_inmem = create_cellarray(uri, shape=(shape), sparse=True)
|
|
29
|
+
dense_inmem.write_batch(s, start_row=0)
|
|
30
|
+
|
|
31
|
+
assert np.allclose(dense_inmem[:10, :10].toarray(), s.tocsr()[:10, :10].toarray())
|
|
@@ -98,6 +98,11 @@ def test_coo_output(sample_sparse_array_2d):
|
|
|
98
98
|
assert sparse.isspmatrix_csr(result)
|
|
99
99
|
np.testing.assert_array_almost_equal(result.toarray(), data.toarray())
|
|
100
100
|
|
|
101
|
+
# Test full slice with ellipsis
|
|
102
|
+
result = array_coo[0:10, ...]
|
|
103
|
+
assert sparse.isspmatrix_csr(result)
|
|
104
|
+
np.testing.assert_array_almost_equal(result.toarray(), data.toarray())
|
|
105
|
+
|
|
101
106
|
# Test partial slice
|
|
102
107
|
data_csr = data.tocsr()
|
|
103
108
|
result = array_coo[2:5, 10:20]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
+
furo
|
|
2
|
+
myst-nb
|
|
1
3
|
# Requirements file for ReadTheDocs, check .readthedocs.yml.
|
|
2
4
|
# To build the module reference correctly, make sure every external package
|
|
3
5
|
# under `install_requires` in `setup.cfg` is also listed here!
|
|
4
6
|
# sphinx_rtd_theme
|
|
5
7
|
myst-parser[linkify]
|
|
6
8
|
sphinx>=3.2.1
|
|
7
|
-
myst-nb
|
|
8
|
-
furo
|
|
9
9
|
sphinx-autodoc-typehints
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|