cap-anndata 0.1.0__tar.gz → 0.1.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/PKG-INFO +1 -1
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata/cap_anndata.py +5 -2
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata.egg-info/PKG-INFO +1 -1
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/setup.py +1 -1
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/test/test_cap_anndata.py +310 -291
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/LICENSE +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/README.md +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata/__init__.py +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata/backed_df.py +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata/backed_uns.py +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata.egg-info/SOURCES.txt +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata.egg-info/dependency_links.txt +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata.egg-info/requires.txt +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/cap_anndata.egg-info/top_level.txt +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/setup.cfg +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/test/test_backed_df.py +0 -0
- {cap_anndata-0.1.0 → cap_anndata-0.1.1}/test/test_backed_uns.py +0 -0
@@ -68,7 +68,7 @@ class CapAnnData:
|
|
68
68
|
if raw:
|
69
69
|
# Check if raw exists first
|
70
70
|
if "raw" not in self._file.keys():
|
71
|
-
logger.
|
71
|
+
logger.warning("Can't read raw.var since raw layer doesn't exist!")
|
72
72
|
return
|
73
73
|
|
74
74
|
if self._raw is None:
|
@@ -97,6 +97,10 @@ class CapAnnData:
|
|
97
97
|
|
98
98
|
for col in cols_to_read:
|
99
99
|
df[col] = read_elem(h5_group[col])
|
100
|
+
if df.column_order.dtype != object:
|
101
|
+
# empty DataFrame will have column_order as float64
|
102
|
+
# which leads to failure in overwrite method
|
103
|
+
df.column_order = df.column_order.astype(object)
|
100
104
|
return df
|
101
105
|
|
102
106
|
@staticmethod
|
@@ -190,7 +194,6 @@ class CapAnnData:
|
|
190
194
|
else:
|
191
195
|
# sparse array
|
192
196
|
self._obsm[entity_name] = ad.experimental.sparse_dataset(entity)
|
193
|
-
logger.debug(f"obsm={self._obsm}")
|
194
197
|
|
195
198
|
def obsm_keys(self) -> List[str]:
|
196
199
|
return list(self.obsm.keys())
|
@@ -1,291 +1,310 @@
|
|
1
|
-
from cap_anndata import CapAnnData
|
2
|
-
import anndata as ad
|
3
|
-
import numpy as np
|
4
|
-
import tempfile
|
5
|
-
import os
|
6
|
-
import h5py
|
7
|
-
import pandas as pd
|
8
|
-
import scipy.sparse as sp
|
9
|
-
import pytest
|
10
|
-
|
11
|
-
|
12
|
-
def get_base_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
|
13
|
-
x = np.eye(n_rows, n_genes).astype(np.float32)
|
14
|
-
if sparse:
|
15
|
-
x = sp.csr_matrix(x, dtype=np.float32)
|
16
|
-
adata = ad.AnnData(X=x)
|
17
|
-
return adata
|
18
|
-
|
19
|
-
|
20
|
-
def get_filled_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
|
21
|
-
adata = get_base_anndata(n_rows, n_genes, sparse)
|
22
|
-
|
23
|
-
adata.obs["cell_type"] = [f"cell_{i%3}" for i in range(adata.shape[0])]
|
24
|
-
adata.obs["number"] = [i / 10 for i in range(adata.shape[0])]
|
25
|
-
adata.obs.index = [f"obs_{i}" for i in range(adata.shape[0])]
|
26
|
-
|
27
|
-
adata.var.index = [f"gene_{i}" for i in range(adata.shape[1])]
|
28
|
-
adata.var["filtered"] = [i > 4 for i in range(adata.shape[1])]
|
29
|
-
adata.var["gene_names"] = [f"gene_name_{i}" for i in range(adata.shape[1])]
|
30
|
-
adata.var["dispersion"] = [i / 100 for i in range(adata.shape[1])]
|
31
|
-
|
32
|
-
adata.raw = adata
|
33
|
-
return adata
|
34
|
-
|
35
|
-
|
36
|
-
def test_read_anndata_file():
|
37
|
-
adata = get_base_anndata()
|
38
|
-
temp_folder = tempfile.mkdtemp()
|
39
|
-
file_path = os.path.join(temp_folder, "test_read_anndata_file.h5ad")
|
40
|
-
adata.write_h5ad(file_path)
|
41
|
-
del adata
|
42
|
-
|
43
|
-
with CapAnnData.read_anndata_file(file_path=file_path) as adata:
|
44
|
-
assert adata is not None, "AnnData file must be valid!"
|
45
|
-
|
46
|
-
os.remove(file_path)
|
47
|
-
|
48
|
-
|
49
|
-
def test_read_shape():
|
50
|
-
n_rows = 10
|
51
|
-
n_genes = 20
|
52
|
-
adata = get_base_anndata(n_rows, n_genes)
|
53
|
-
temp_folder = tempfile.mkdtemp()
|
54
|
-
file_path = os.path.join(temp_folder, "test_read_shape.h5ad")
|
55
|
-
adata.write_h5ad(file_path)
|
56
|
-
|
57
|
-
with h5py.File(file_path) as file:
|
58
|
-
cap_adata = CapAnnData(file)
|
59
|
-
shape = cap_adata.shape
|
60
|
-
|
61
|
-
os.remove(file_path)
|
62
|
-
assert shape[0] == n_rows
|
63
|
-
assert shape[1] == n_genes
|
64
|
-
|
65
|
-
|
66
|
-
def test_read_df():
|
67
|
-
adata = get_filled_anndata()
|
68
|
-
temp_folder = tempfile.mkdtemp()
|
69
|
-
file_path = os.path.join(temp_folder, "test_read_obs.h5ad")
|
70
|
-
|
71
|
-
adata.write_h5ad(file_path)
|
72
|
-
|
73
|
-
with h5py.File(file_path, 'r') as file:
|
74
|
-
cap_adata = CapAnnData(file)
|
75
|
-
cap_adata.read_obs()
|
76
|
-
cap_adata.read_var()
|
77
|
-
cap_adata.read_var(raw=True)
|
78
|
-
|
79
|
-
os.remove(file_path)
|
80
|
-
pd.testing.assert_frame_equal(adata.obs, cap_adata.obs, check_frame_type=False)
|
81
|
-
pd.testing.assert_frame_equal(adata.var, cap_adata.var, check_frame_type=False)
|
82
|
-
pd.testing.assert_frame_equal(adata.raw.var, cap_adata.raw.var, check_frame_type=False)
|
83
|
-
|
84
|
-
|
85
|
-
def test_partial_read():
|
86
|
-
adata = get_filled_anndata()
|
87
|
-
temp_folder = tempfile.mkdtemp()
|
88
|
-
file_path = os.path.join(temp_folder, "test_partial_read.h5ad")
|
89
|
-
adata.write_h5ad(file_path)
|
90
|
-
|
91
|
-
with h5py.File(file_path, 'r') as file:
|
92
|
-
cap_adata = CapAnnData(file)
|
93
|
-
cap_adata.read_obs(columns=['cell_type'])
|
94
|
-
cap_adata.read_obs(columns=['cell_type'])
|
95
|
-
cap_adata.read_var(columns=['dispersion'])
|
96
|
-
cap_adata.read_var(columns=['dispersion'], raw=True)
|
97
|
-
|
98
|
-
os.remove(file_path)
|
99
|
-
|
100
|
-
assert len(adata.obs.columns) == len(cap_adata.obs.column_order)
|
101
|
-
assert len(adata.var.columns) == len(cap_adata.var.column_order)
|
102
|
-
assert len(adata.raw.var.columns) == len(cap_adata.raw.var.column_order)
|
103
|
-
|
104
|
-
assert len(cap_adata.obs.columns) == 1
|
105
|
-
assert len(cap_adata.var.columns) == 1
|
106
|
-
assert len(cap_adata.raw.var.columns) == 1
|
107
|
-
|
108
|
-
pd.testing.assert_index_equal(adata.obs.index, cap_adata.obs.index)
|
109
|
-
pd.testing.assert_index_equal(adata.var.index, cap_adata.var.index)
|
110
|
-
pd.testing.assert_index_equal(adata.raw.var.index, cap_adata.raw.var.index)
|
111
|
-
|
112
|
-
|
113
|
-
def test_overwrite_df():
|
114
|
-
adata = get_filled_anndata()
|
115
|
-
temp_folder = tempfile.mkdtemp()
|
116
|
-
file_path = os.path.join(temp_folder, "test_overwrite_df.h5ad")
|
117
|
-
adata.write_h5ad(file_path)
|
118
|
-
|
119
|
-
with h5py.File(file_path, 'r+') as file:
|
120
|
-
cap_adata = CapAnnData(file)
|
121
|
-
cap_adata.read_obs(columns=["cell_type"])
|
122
|
-
cap_adata.obs["cell_type"] = [f"new_cell_type_{i%2}" for i in range(cap_adata.shape[0])]
|
123
|
-
cap_adata.obs["const_str"] = "some string"
|
124
|
-
ref_obs = cap_adata.obs.copy()
|
125
|
-
|
126
|
-
# Modify 'var'
|
127
|
-
cap_adata.read_var()
|
128
|
-
cap_adata.var["gene_names"] = [f"new_gene_{i}" for i in range(cap_adata.shape[1])]
|
129
|
-
cap_adata.var["extra_info"] = np.random.rand(cap_adata.shape[1])
|
130
|
-
ref_var = cap_adata.var.copy()
|
131
|
-
|
132
|
-
# Modify 'raw.var', assuming 'raw' is also a CapAnnData
|
133
|
-
cap_adata.read_var(raw=True)
|
134
|
-
cap_adata.raw.var["gene_names"] = [f"raw_new_gene_{i}" for i in range(cap_adata.raw.shape[1])]
|
135
|
-
cap_adata.raw.var["extra_info"] = np.random.rand(cap_adata.shape[1])
|
136
|
-
ref_raw_var = cap_adata.raw.var.copy()
|
137
|
-
|
138
|
-
cap_adata.overwrite(['obs', 'var', 'raw.var'])
|
139
|
-
|
140
|
-
adata = ad.read_h5ad(file_path)
|
141
|
-
os.remove(file_path)
|
142
|
-
|
143
|
-
# Assert changes in 'obs'
|
144
|
-
assert all([c in adata.obs.columns for c in ref_obs.columns])
|
145
|
-
pd.testing.assert_frame_equal(ref_obs, adata.obs[ref_obs.columns.to_list()], check_frame_type=False)
|
146
|
-
|
147
|
-
# Assert changes in 'var'
|
148
|
-
assert all([c in adata.var.columns for c in ref_var.columns])
|
149
|
-
pd.testing.assert_frame_equal(ref_var, adata.var[ref_var.columns.to_list()], check_frame_type=False)
|
150
|
-
|
151
|
-
# Assert changes in 'raw.var'
|
152
|
-
assert all([c in adata.raw.var.columns for c in ref_raw_var.columns])
|
153
|
-
pd.testing.assert_frame_equal(ref_raw_var, adata.raw.var[ref_raw_var.columns.to_list()], check_frame_type=False)
|
154
|
-
|
155
|
-
|
156
|
-
@pytest.mark.parametrize("sparse", [False, True])
|
157
|
-
@pytest.mark.parametrize("vertical_slice", [None, False, True, "mask"])
|
158
|
-
def test_link_x(sparse, vertical_slice):
|
159
|
-
adata = get_filled_anndata(sparse=sparse)
|
160
|
-
temp_folder = tempfile.mkdtemp()
|
161
|
-
file_path = os.path.join(temp_folder, "test_link_x.h5ad")
|
162
|
-
adata.write_h5ad(file_path)
|
163
|
-
|
164
|
-
if vertical_slice is None:
|
165
|
-
s_ = np.s_[:]
|
166
|
-
elif vertical_slice == "mask":
|
167
|
-
mask = np.array([i < 5 for i in range(adata.shape[0])])
|
168
|
-
s_ = np.s_[mask, :5]
|
169
|
-
else:
|
170
|
-
# slice over var or obs
|
171
|
-
s_ = np.s_[:, 0:5] if vertical_slice else np.s_[0:5, :]
|
172
|
-
|
173
|
-
with h5py.File(file_path, 'r') as file:
|
174
|
-
cap_adata = CapAnnData(file)
|
175
|
-
x = cap_adata.X[s_]
|
176
|
-
raw_x = cap_adata.raw.X[s_]
|
177
|
-
|
178
|
-
os.remove(file_path)
|
179
|
-
if sparse:
|
180
|
-
assert np.allclose(adata.X.A[s_], x.A)
|
181
|
-
assert np.allclose(adata.raw.X.A[s_], raw_x.A)
|
182
|
-
else:
|
183
|
-
assert np.allclose(adata.X[s_], x)
|
184
|
-
assert np.allclose(adata.raw.X[s_], raw_x)
|
185
|
-
|
186
|
-
|
187
|
-
@pytest.mark.parametrize("sparse", [False, True])
|
188
|
-
def test_shape(sparse):
|
189
|
-
n_rows = 15
|
190
|
-
n_genes = 25
|
191
|
-
|
192
|
-
adata = get_filled_anndata(n_rows, n_genes, sparse)
|
193
|
-
temp_folder = tempfile.mkdtemp()
|
194
|
-
file_path = os.path.join(temp_folder, "test_shape.h5ad")
|
195
|
-
adata.write_h5ad(file_path)
|
196
|
-
|
197
|
-
with h5py.File(file_path) as file:
|
198
|
-
cap_adata = CapAnnData(file)
|
199
|
-
shape = cap_adata.shape
|
200
|
-
shape_raw = cap_adata.raw.shape
|
201
|
-
|
202
|
-
os.remove(file_path)
|
203
|
-
for sh in [shape, shape_raw]:
|
204
|
-
assert sh == (n_rows, n_genes)
|
205
|
-
|
206
|
-
|
207
|
-
def test_read_obsm():
|
208
|
-
adata = get_filled_anndata()
|
209
|
-
obsm_names = [f"X_test{i}" for i in range(2)]
|
210
|
-
|
211
|
-
for emb in obsm_names:
|
212
|
-
adata.obsm[emb] = np.random.random(size=(adata.shape[0], 2))
|
213
|
-
|
214
|
-
temp_folder = tempfile.mkdtemp()
|
215
|
-
file_path = os.path.join(temp_folder, "test_read_obsm.h5ad")
|
216
|
-
adata.write_h5ad(file_path)
|
217
|
-
|
218
|
-
with h5py.File(file_path, 'r') as f:
|
219
|
-
cap_adata = CapAnnData(f)
|
220
|
-
|
221
|
-
ss = []
|
222
|
-
for emb in obsm_names:
|
223
|
-
assert emb in cap_adata.obsm_keys()
|
224
|
-
assert cap_adata.obsm[emb].shape == adata.obsm[emb].shape
|
225
|
-
|
226
|
-
x_1 = cap_adata.obsm[obsm_names[0]][:]
|
227
|
-
x_2 = cap_adata.obsm[obsm_names[1]][:]
|
228
|
-
|
229
|
-
os.remove(file_path)
|
230
|
-
assert np.allclose(adata.obsm[obsm_names[0]], x_1)
|
231
|
-
assert np.allclose(adata.obsm[obsm_names[1]], x_2)
|
232
|
-
|
233
|
-
|
234
|
-
def test_read_uns():
|
235
|
-
adata = get_base_anndata()
|
236
|
-
key1, key2 = "key1", "key2"
|
237
|
-
keys = (key1, key2)
|
238
|
-
|
239
|
-
adata.uns = {k: {k: k} for k in keys}
|
240
|
-
temp_folder = tempfile.mkdtemp()
|
241
|
-
file_path = os.path.join(temp_folder, "test_read_uns.h5ad")
|
242
|
-
adata.write_h5ad(file_path)
|
243
|
-
|
244
|
-
with h5py.File(file_path, 'r') as f:
|
245
|
-
cap_adata = CapAnnData(f)
|
246
|
-
|
247
|
-
for k in keys:
|
248
|
-
assert k in cap_adata.uns
|
249
|
-
|
250
|
-
cap_adata.read_uns(keys=[key1])
|
251
|
-
|
252
|
-
assert cap_adata.uns[key1] == adata.uns[key1] # connected
|
253
|
-
assert cap_adata.uns[key2] != adata.uns[key2] # not connected
|
254
|
-
|
255
|
-
os.remove(file_path)
|
256
|
-
|
257
|
-
|
258
|
-
def test_modify_uns():
|
259
|
-
adata = get_base_anndata()
|
260
|
-
adata.uns = {
|
261
|
-
"field_to_ingore": list(range(100)),
|
262
|
-
"field_to_rename": "value",
|
263
|
-
"field_to_expand": {"key1": {}},
|
264
|
-
"field_to_modify": {"a": "b"}
|
265
|
-
}
|
266
|
-
new_name = "renamed_field"
|
267
|
-
d_to_exp = {"sub_key1": "v1", "sub_key2": "v2"}
|
268
|
-
v_to_mod = "value"
|
269
|
-
|
270
|
-
temp_folder = tempfile.mkdtemp()
|
271
|
-
file_path = os.path.join(temp_folder, "test_modify_uns.h5ad")
|
272
|
-
adata.write_h5ad(file_path)
|
273
|
-
|
274
|
-
with h5py.File(file_path, 'r+') as f:
|
275
|
-
cap_adata = CapAnnData(f)
|
276
|
-
|
277
|
-
cap_adata.read_uns(keys=["field_to_rename", "field_to_expand", "field_to_modify"])
|
278
|
-
|
279
|
-
cap_adata.uns[new_name] = cap_adata.uns.pop("field_to_rename")
|
280
|
-
cap_adata.uns["field_to_expand"]["key1"] = d_to_exp
|
281
|
-
cap_adata.uns["field_to_modify"] = v_to_mod
|
282
|
-
|
283
|
-
cap_adata.overwrite(['uns'])
|
284
|
-
|
285
|
-
adata = ad.read_h5ad(file_path)
|
286
|
-
|
287
|
-
assert adata.uns is not None
|
288
|
-
assert len(adata.uns.keys()) == 4
|
289
|
-
assert new_name in adata.uns.keys()
|
290
|
-
assert adata.uns['field_to_expand']["key1"] == d_to_exp
|
291
|
-
assert adata.uns['field_to_modify'] == v_to_mod
|
1
|
+
from cap_anndata import CapAnnData
|
2
|
+
import anndata as ad
|
3
|
+
import numpy as np
|
4
|
+
import tempfile
|
5
|
+
import os
|
6
|
+
import h5py
|
7
|
+
import pandas as pd
|
8
|
+
import scipy.sparse as sp
|
9
|
+
import pytest
|
10
|
+
|
11
|
+
|
12
|
+
def get_base_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
|
13
|
+
x = np.eye(n_rows, n_genes).astype(np.float32)
|
14
|
+
if sparse:
|
15
|
+
x = sp.csr_matrix(x, dtype=np.float32)
|
16
|
+
adata = ad.AnnData(X=x)
|
17
|
+
return adata
|
18
|
+
|
19
|
+
|
20
|
+
def get_filled_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
|
21
|
+
adata = get_base_anndata(n_rows, n_genes, sparse)
|
22
|
+
|
23
|
+
adata.obs["cell_type"] = [f"cell_{i%3}" for i in range(adata.shape[0])]
|
24
|
+
adata.obs["number"] = [i / 10 for i in range(adata.shape[0])]
|
25
|
+
adata.obs.index = [f"obs_{i}" for i in range(adata.shape[0])]
|
26
|
+
|
27
|
+
adata.var.index = [f"gene_{i}" for i in range(adata.shape[1])]
|
28
|
+
adata.var["filtered"] = [i > 4 for i in range(adata.shape[1])]
|
29
|
+
adata.var["gene_names"] = [f"gene_name_{i}" for i in range(adata.shape[1])]
|
30
|
+
adata.var["dispersion"] = [i / 100 for i in range(adata.shape[1])]
|
31
|
+
|
32
|
+
adata.raw = adata
|
33
|
+
return adata
|
34
|
+
|
35
|
+
|
36
|
+
def test_read_anndata_file():
|
37
|
+
adata = get_base_anndata()
|
38
|
+
temp_folder = tempfile.mkdtemp()
|
39
|
+
file_path = os.path.join(temp_folder, "test_read_anndata_file.h5ad")
|
40
|
+
adata.write_h5ad(file_path)
|
41
|
+
del adata
|
42
|
+
|
43
|
+
with CapAnnData.read_anndata_file(file_path=file_path) as adata:
|
44
|
+
assert adata is not None, "AnnData file must be valid!"
|
45
|
+
|
46
|
+
os.remove(file_path)
|
47
|
+
|
48
|
+
|
49
|
+
def test_read_shape():
|
50
|
+
n_rows = 10
|
51
|
+
n_genes = 20
|
52
|
+
adata = get_base_anndata(n_rows, n_genes)
|
53
|
+
temp_folder = tempfile.mkdtemp()
|
54
|
+
file_path = os.path.join(temp_folder, "test_read_shape.h5ad")
|
55
|
+
adata.write_h5ad(file_path)
|
56
|
+
|
57
|
+
with h5py.File(file_path) as file:
|
58
|
+
cap_adata = CapAnnData(file)
|
59
|
+
shape = cap_adata.shape
|
60
|
+
|
61
|
+
os.remove(file_path)
|
62
|
+
assert shape[0] == n_rows
|
63
|
+
assert shape[1] == n_genes
|
64
|
+
|
65
|
+
|
66
|
+
def test_read_df():
|
67
|
+
adata = get_filled_anndata()
|
68
|
+
temp_folder = tempfile.mkdtemp()
|
69
|
+
file_path = os.path.join(temp_folder, "test_read_obs.h5ad")
|
70
|
+
|
71
|
+
adata.write_h5ad(file_path)
|
72
|
+
|
73
|
+
with h5py.File(file_path, 'r') as file:
|
74
|
+
cap_adata = CapAnnData(file)
|
75
|
+
cap_adata.read_obs()
|
76
|
+
cap_adata.read_var()
|
77
|
+
cap_adata.read_var(raw=True)
|
78
|
+
|
79
|
+
os.remove(file_path)
|
80
|
+
pd.testing.assert_frame_equal(adata.obs, cap_adata.obs, check_frame_type=False)
|
81
|
+
pd.testing.assert_frame_equal(adata.var, cap_adata.var, check_frame_type=False)
|
82
|
+
pd.testing.assert_frame_equal(adata.raw.var, cap_adata.raw.var, check_frame_type=False)
|
83
|
+
|
84
|
+
|
85
|
+
def test_partial_read():
|
86
|
+
adata = get_filled_anndata()
|
87
|
+
temp_folder = tempfile.mkdtemp()
|
88
|
+
file_path = os.path.join(temp_folder, "test_partial_read.h5ad")
|
89
|
+
adata.write_h5ad(file_path)
|
90
|
+
|
91
|
+
with h5py.File(file_path, 'r') as file:
|
92
|
+
cap_adata = CapAnnData(file)
|
93
|
+
cap_adata.read_obs(columns=['cell_type'])
|
94
|
+
cap_adata.read_obs(columns=['cell_type'])
|
95
|
+
cap_adata.read_var(columns=['dispersion'])
|
96
|
+
cap_adata.read_var(columns=['dispersion'], raw=True)
|
97
|
+
|
98
|
+
os.remove(file_path)
|
99
|
+
|
100
|
+
assert len(adata.obs.columns) == len(cap_adata.obs.column_order)
|
101
|
+
assert len(adata.var.columns) == len(cap_adata.var.column_order)
|
102
|
+
assert len(adata.raw.var.columns) == len(cap_adata.raw.var.column_order)
|
103
|
+
|
104
|
+
assert len(cap_adata.obs.columns) == 1
|
105
|
+
assert len(cap_adata.var.columns) == 1
|
106
|
+
assert len(cap_adata.raw.var.columns) == 1
|
107
|
+
|
108
|
+
pd.testing.assert_index_equal(adata.obs.index, cap_adata.obs.index)
|
109
|
+
pd.testing.assert_index_equal(adata.var.index, cap_adata.var.index)
|
110
|
+
pd.testing.assert_index_equal(adata.raw.var.index, cap_adata.raw.var.index)
|
111
|
+
|
112
|
+
|
113
|
+
def test_overwrite_df():
|
114
|
+
adata = get_filled_anndata()
|
115
|
+
temp_folder = tempfile.mkdtemp()
|
116
|
+
file_path = os.path.join(temp_folder, "test_overwrite_df.h5ad")
|
117
|
+
adata.write_h5ad(file_path)
|
118
|
+
|
119
|
+
with h5py.File(file_path, 'r+') as file:
|
120
|
+
cap_adata = CapAnnData(file)
|
121
|
+
cap_adata.read_obs(columns=["cell_type"])
|
122
|
+
cap_adata.obs["cell_type"] = [f"new_cell_type_{i%2}" for i in range(cap_adata.shape[0])]
|
123
|
+
cap_adata.obs["const_str"] = "some string"
|
124
|
+
ref_obs = cap_adata.obs.copy()
|
125
|
+
|
126
|
+
# Modify 'var'
|
127
|
+
cap_adata.read_var()
|
128
|
+
cap_adata.var["gene_names"] = [f"new_gene_{i}" for i in range(cap_adata.shape[1])]
|
129
|
+
cap_adata.var["extra_info"] = np.random.rand(cap_adata.shape[1])
|
130
|
+
ref_var = cap_adata.var.copy()
|
131
|
+
|
132
|
+
# Modify 'raw.var', assuming 'raw' is also a CapAnnData
|
133
|
+
cap_adata.read_var(raw=True)
|
134
|
+
cap_adata.raw.var["gene_names"] = [f"raw_new_gene_{i}" for i in range(cap_adata.raw.shape[1])]
|
135
|
+
cap_adata.raw.var["extra_info"] = np.random.rand(cap_adata.shape[1])
|
136
|
+
ref_raw_var = cap_adata.raw.var.copy()
|
137
|
+
|
138
|
+
cap_adata.overwrite(['obs', 'var', 'raw.var'])
|
139
|
+
|
140
|
+
adata = ad.read_h5ad(file_path)
|
141
|
+
os.remove(file_path)
|
142
|
+
|
143
|
+
# Assert changes in 'obs'
|
144
|
+
assert all([c in adata.obs.columns for c in ref_obs.columns])
|
145
|
+
pd.testing.assert_frame_equal(ref_obs, adata.obs[ref_obs.columns.to_list()], check_frame_type=False)
|
146
|
+
|
147
|
+
# Assert changes in 'var'
|
148
|
+
assert all([c in adata.var.columns for c in ref_var.columns])
|
149
|
+
pd.testing.assert_frame_equal(ref_var, adata.var[ref_var.columns.to_list()], check_frame_type=False)
|
150
|
+
|
151
|
+
# Assert changes in 'raw.var'
|
152
|
+
assert all([c in adata.raw.var.columns for c in ref_raw_var.columns])
|
153
|
+
pd.testing.assert_frame_equal(ref_raw_var, adata.raw.var[ref_raw_var.columns.to_list()], check_frame_type=False)
|
154
|
+
|
155
|
+
|
156
|
+
@pytest.mark.parametrize("sparse", [False, True])
|
157
|
+
@pytest.mark.parametrize("vertical_slice", [None, False, True, "mask"])
|
158
|
+
def test_link_x(sparse, vertical_slice):
|
159
|
+
adata = get_filled_anndata(sparse=sparse)
|
160
|
+
temp_folder = tempfile.mkdtemp()
|
161
|
+
file_path = os.path.join(temp_folder, "test_link_x.h5ad")
|
162
|
+
adata.write_h5ad(file_path)
|
163
|
+
|
164
|
+
if vertical_slice is None:
|
165
|
+
s_ = np.s_[:]
|
166
|
+
elif vertical_slice == "mask":
|
167
|
+
mask = np.array([i < 5 for i in range(adata.shape[0])])
|
168
|
+
s_ = np.s_[mask, :5]
|
169
|
+
else:
|
170
|
+
# slice over var or obs
|
171
|
+
s_ = np.s_[:, 0:5] if vertical_slice else np.s_[0:5, :]
|
172
|
+
|
173
|
+
with h5py.File(file_path, 'r') as file:
|
174
|
+
cap_adata = CapAnnData(file)
|
175
|
+
x = cap_adata.X[s_]
|
176
|
+
raw_x = cap_adata.raw.X[s_]
|
177
|
+
|
178
|
+
os.remove(file_path)
|
179
|
+
if sparse:
|
180
|
+
assert np.allclose(adata.X.A[s_], x.A)
|
181
|
+
assert np.allclose(adata.raw.X.A[s_], raw_x.A)
|
182
|
+
else:
|
183
|
+
assert np.allclose(adata.X[s_], x)
|
184
|
+
assert np.allclose(adata.raw.X[s_], raw_x)
|
185
|
+
|
186
|
+
|
187
|
+
@pytest.mark.parametrize("sparse", [False, True])
|
188
|
+
def test_shape(sparse):
|
189
|
+
n_rows = 15
|
190
|
+
n_genes = 25
|
191
|
+
|
192
|
+
adata = get_filled_anndata(n_rows, n_genes, sparse)
|
193
|
+
temp_folder = tempfile.mkdtemp()
|
194
|
+
file_path = os.path.join(temp_folder, "test_shape.h5ad")
|
195
|
+
adata.write_h5ad(file_path)
|
196
|
+
|
197
|
+
with h5py.File(file_path) as file:
|
198
|
+
cap_adata = CapAnnData(file)
|
199
|
+
shape = cap_adata.shape
|
200
|
+
shape_raw = cap_adata.raw.shape
|
201
|
+
|
202
|
+
os.remove(file_path)
|
203
|
+
for sh in [shape, shape_raw]:
|
204
|
+
assert sh == (n_rows, n_genes)
|
205
|
+
|
206
|
+
|
207
|
+
def test_read_obsm():
|
208
|
+
adata = get_filled_anndata()
|
209
|
+
obsm_names = [f"X_test{i}" for i in range(2)]
|
210
|
+
|
211
|
+
for emb in obsm_names:
|
212
|
+
adata.obsm[emb] = np.random.random(size=(adata.shape[0], 2))
|
213
|
+
|
214
|
+
temp_folder = tempfile.mkdtemp()
|
215
|
+
file_path = os.path.join(temp_folder, "test_read_obsm.h5ad")
|
216
|
+
adata.write_h5ad(file_path)
|
217
|
+
|
218
|
+
with h5py.File(file_path, 'r') as f:
|
219
|
+
cap_adata = CapAnnData(f)
|
220
|
+
|
221
|
+
ss = []
|
222
|
+
for emb in obsm_names:
|
223
|
+
assert emb in cap_adata.obsm_keys()
|
224
|
+
assert cap_adata.obsm[emb].shape == adata.obsm[emb].shape
|
225
|
+
|
226
|
+
x_1 = cap_adata.obsm[obsm_names[0]][:]
|
227
|
+
x_2 = cap_adata.obsm[obsm_names[1]][:]
|
228
|
+
|
229
|
+
os.remove(file_path)
|
230
|
+
assert np.allclose(adata.obsm[obsm_names[0]], x_1)
|
231
|
+
assert np.allclose(adata.obsm[obsm_names[1]], x_2)
|
232
|
+
|
233
|
+
|
234
|
+
def test_read_uns():
|
235
|
+
adata = get_base_anndata()
|
236
|
+
key1, key2 = "key1", "key2"
|
237
|
+
keys = (key1, key2)
|
238
|
+
|
239
|
+
adata.uns = {k: {k: k} for k in keys}
|
240
|
+
temp_folder = tempfile.mkdtemp()
|
241
|
+
file_path = os.path.join(temp_folder, "test_read_uns.h5ad")
|
242
|
+
adata.write_h5ad(file_path)
|
243
|
+
|
244
|
+
with h5py.File(file_path, 'r') as f:
|
245
|
+
cap_adata = CapAnnData(f)
|
246
|
+
|
247
|
+
for k in keys:
|
248
|
+
assert k in cap_adata.uns
|
249
|
+
|
250
|
+
cap_adata.read_uns(keys=[key1])
|
251
|
+
|
252
|
+
assert cap_adata.uns[key1] == adata.uns[key1] # connected
|
253
|
+
assert cap_adata.uns[key2] != adata.uns[key2] # not connected
|
254
|
+
|
255
|
+
os.remove(file_path)
|
256
|
+
|
257
|
+
|
258
|
+
def test_modify_uns():
|
259
|
+
adata = get_base_anndata()
|
260
|
+
adata.uns = {
|
261
|
+
"field_to_ingore": list(range(100)),
|
262
|
+
"field_to_rename": "value",
|
263
|
+
"field_to_expand": {"key1": {}},
|
264
|
+
"field_to_modify": {"a": "b"}
|
265
|
+
}
|
266
|
+
new_name = "renamed_field"
|
267
|
+
d_to_exp = {"sub_key1": "v1", "sub_key2": "v2"}
|
268
|
+
v_to_mod = "value"
|
269
|
+
|
270
|
+
temp_folder = tempfile.mkdtemp()
|
271
|
+
file_path = os.path.join(temp_folder, "test_modify_uns.h5ad")
|
272
|
+
adata.write_h5ad(file_path)
|
273
|
+
|
274
|
+
with h5py.File(file_path, 'r+') as f:
|
275
|
+
cap_adata = CapAnnData(f)
|
276
|
+
|
277
|
+
cap_adata.read_uns(keys=["field_to_rename", "field_to_expand", "field_to_modify"])
|
278
|
+
|
279
|
+
cap_adata.uns[new_name] = cap_adata.uns.pop("field_to_rename")
|
280
|
+
cap_adata.uns["field_to_expand"]["key1"] = d_to_exp
|
281
|
+
cap_adata.uns["field_to_modify"] = v_to_mod
|
282
|
+
|
283
|
+
cap_adata.overwrite(['uns'])
|
284
|
+
|
285
|
+
adata = ad.read_h5ad(file_path)
|
286
|
+
|
287
|
+
assert adata.uns is not None
|
288
|
+
assert len(adata.uns.keys()) == 4
|
289
|
+
assert new_name in adata.uns.keys()
|
290
|
+
assert adata.uns['field_to_expand']["key1"] == d_to_exp
|
291
|
+
assert adata.uns['field_to_modify'] == v_to_mod
|
292
|
+
|
293
|
+
|
294
|
+
def test_empty_obs_override():
|
295
|
+
"""
|
296
|
+
especially for solving the issue:
|
297
|
+
https://github.com/cellannotation/cap-anndata/pull/5
|
298
|
+
"""
|
299
|
+
adata = get_base_anndata()
|
300
|
+
temp_folder = tempfile.mkdtemp()
|
301
|
+
file_path = os.path.join(temp_folder, "test_modify_uns.h5ad")
|
302
|
+
adata.write_h5ad(file_path)
|
303
|
+
|
304
|
+
with h5py.File(file_path, 'r+') as f:
|
305
|
+
cap_adata = CapAnnData(f)
|
306
|
+
cap_adata.read_obs()
|
307
|
+
|
308
|
+
cap_adata.obs["cell_type_1"] = pd.Series(data=np.nan, index=cap_adata.obs.index, dtype="category")
|
309
|
+
cap_adata.obs["cell_type_new"] = pd.Series(data=np.nan, index=cap_adata.obs.index, dtype="category")
|
310
|
+
cap_adata.overwrite(fields=["obs"])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|