cap-anndata 0.1.0__tar.gz → 0.1.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cap_anndata
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Partial read of AnnData files for low-memory operations with large datasets.
5
5
  Home-page: https://github.com/cellannotation/cap-anndata
6
6
  Author: R. Mukhin, A. Isaev
@@ -68,7 +68,7 @@ class CapAnnData:
68
68
  if raw:
69
69
  # Check if raw exists first
70
70
  if "raw" not in self._file.keys():
71
- logger.debug("Can't read raw.var since raw layer doesn't exist!")
71
+ logger.warning("Can't read raw.var since raw layer doesn't exist!")
72
72
  return
73
73
 
74
74
  if self._raw is None:
@@ -97,6 +97,10 @@ class CapAnnData:
97
97
 
98
98
  for col in cols_to_read:
99
99
  df[col] = read_elem(h5_group[col])
100
+ if df.column_order.dtype != object:
101
+ # empty DataFrame will have column_order as float64
102
+ # which leads to failure in overwrite method
103
+ df.column_order = df.column_order.astype(object)
100
104
  return df
101
105
 
102
106
  @staticmethod
@@ -190,7 +194,6 @@ class CapAnnData:
190
194
  else:
191
195
  # sparse array
192
196
  self._obsm[entity_name] = ad.experimental.sparse_dataset(entity)
193
- logger.debug(f"obsm={self._obsm}")
194
197
 
195
198
  def obsm_keys(self) -> List[str]:
196
199
  return list(self.obsm.keys())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cap_anndata
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Partial read of AnnData files for low-memory operations with large datasets.
5
5
  Home-page: https://github.com/cellannotation/cap-anndata
6
6
  Author: R. Mukhin, A. Isaev
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='cap_anndata',
5
- version='0.1.0',
5
+ version='0.1.1',
6
6
  author='R. Mukhin, A. Isaev',
7
7
  author_email='roman@ebookapplications.com',
8
8
  packages=find_packages(exclude=["test"]),
@@ -1,291 +1,310 @@
1
- from cap_anndata import CapAnnData
2
- import anndata as ad
3
- import numpy as np
4
- import tempfile
5
- import os
6
- import h5py
7
- import pandas as pd
8
- import scipy.sparse as sp
9
- import pytest
10
-
11
-
12
- def get_base_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
13
- x = np.eye(n_rows, n_genes).astype(np.float32)
14
- if sparse:
15
- x = sp.csr_matrix(x, dtype=np.float32)
16
- adata = ad.AnnData(X=x)
17
- return adata
18
-
19
-
20
- def get_filled_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
21
- adata = get_base_anndata(n_rows, n_genes, sparse)
22
-
23
- adata.obs["cell_type"] = [f"cell_{i%3}" for i in range(adata.shape[0])]
24
- adata.obs["number"] = [i / 10 for i in range(adata.shape[0])]
25
- adata.obs.index = [f"obs_{i}" for i in range(adata.shape[0])]
26
-
27
- adata.var.index = [f"gene_{i}" for i in range(adata.shape[1])]
28
- adata.var["filtered"] = [i > 4 for i in range(adata.shape[1])]
29
- adata.var["gene_names"] = [f"gene_name_{i}" for i in range(adata.shape[1])]
30
- adata.var["dispersion"] = [i / 100 for i in range(adata.shape[1])]
31
-
32
- adata.raw = adata
33
- return adata
34
-
35
-
36
- def test_read_anndata_file():
37
- adata = get_base_anndata()
38
- temp_folder = tempfile.mkdtemp()
39
- file_path = os.path.join(temp_folder, "test_read_anndata_file.h5ad")
40
- adata.write_h5ad(file_path)
41
- del adata
42
-
43
- with CapAnnData.read_anndata_file(file_path=file_path) as adata:
44
- assert adata is not None, "AnnData file must be valid!"
45
-
46
- os.remove(file_path)
47
-
48
-
49
- def test_read_shape():
50
- n_rows = 10
51
- n_genes = 20
52
- adata = get_base_anndata(n_rows, n_genes)
53
- temp_folder = tempfile.mkdtemp()
54
- file_path = os.path.join(temp_folder, "test_read_shape.h5ad")
55
- adata.write_h5ad(file_path)
56
-
57
- with h5py.File(file_path) as file:
58
- cap_adata = CapAnnData(file)
59
- shape = cap_adata.shape
60
-
61
- os.remove(file_path)
62
- assert shape[0] == n_rows
63
- assert shape[1] == n_genes
64
-
65
-
66
- def test_read_df():
67
- adata = get_filled_anndata()
68
- temp_folder = tempfile.mkdtemp()
69
- file_path = os.path.join(temp_folder, "test_read_obs.h5ad")
70
-
71
- adata.write_h5ad(file_path)
72
-
73
- with h5py.File(file_path, 'r') as file:
74
- cap_adata = CapAnnData(file)
75
- cap_adata.read_obs()
76
- cap_adata.read_var()
77
- cap_adata.read_var(raw=True)
78
-
79
- os.remove(file_path)
80
- pd.testing.assert_frame_equal(adata.obs, cap_adata.obs, check_frame_type=False)
81
- pd.testing.assert_frame_equal(adata.var, cap_adata.var, check_frame_type=False)
82
- pd.testing.assert_frame_equal(adata.raw.var, cap_adata.raw.var, check_frame_type=False)
83
-
84
-
85
- def test_partial_read():
86
- adata = get_filled_anndata()
87
- temp_folder = tempfile.mkdtemp()
88
- file_path = os.path.join(temp_folder, "test_partial_read.h5ad")
89
- adata.write_h5ad(file_path)
90
-
91
- with h5py.File(file_path, 'r') as file:
92
- cap_adata = CapAnnData(file)
93
- cap_adata.read_obs(columns=['cell_type'])
94
- cap_adata.read_obs(columns=['cell_type'])
95
- cap_adata.read_var(columns=['dispersion'])
96
- cap_adata.read_var(columns=['dispersion'], raw=True)
97
-
98
- os.remove(file_path)
99
-
100
- assert len(adata.obs.columns) == len(cap_adata.obs.column_order)
101
- assert len(adata.var.columns) == len(cap_adata.var.column_order)
102
- assert len(adata.raw.var.columns) == len(cap_adata.raw.var.column_order)
103
-
104
- assert len(cap_adata.obs.columns) == 1
105
- assert len(cap_adata.var.columns) == 1
106
- assert len(cap_adata.raw.var.columns) == 1
107
-
108
- pd.testing.assert_index_equal(adata.obs.index, cap_adata.obs.index)
109
- pd.testing.assert_index_equal(adata.var.index, cap_adata.var.index)
110
- pd.testing.assert_index_equal(adata.raw.var.index, cap_adata.raw.var.index)
111
-
112
-
113
- def test_overwrite_df():
114
- adata = get_filled_anndata()
115
- temp_folder = tempfile.mkdtemp()
116
- file_path = os.path.join(temp_folder, "test_overwrite_df.h5ad")
117
- adata.write_h5ad(file_path)
118
-
119
- with h5py.File(file_path, 'r+') as file:
120
- cap_adata = CapAnnData(file)
121
- cap_adata.read_obs(columns=["cell_type"])
122
- cap_adata.obs["cell_type"] = [f"new_cell_type_{i%2}" for i in range(cap_adata.shape[0])]
123
- cap_adata.obs["const_str"] = "some string"
124
- ref_obs = cap_adata.obs.copy()
125
-
126
- # Modify 'var'
127
- cap_adata.read_var()
128
- cap_adata.var["gene_names"] = [f"new_gene_{i}" for i in range(cap_adata.shape[1])]
129
- cap_adata.var["extra_info"] = np.random.rand(cap_adata.shape[1])
130
- ref_var = cap_adata.var.copy()
131
-
132
- # Modify 'raw.var', assuming 'raw' is also a CapAnnData
133
- cap_adata.read_var(raw=True)
134
- cap_adata.raw.var["gene_names"] = [f"raw_new_gene_{i}" for i in range(cap_adata.raw.shape[1])]
135
- cap_adata.raw.var["extra_info"] = np.random.rand(cap_adata.shape[1])
136
- ref_raw_var = cap_adata.raw.var.copy()
137
-
138
- cap_adata.overwrite(['obs', 'var', 'raw.var'])
139
-
140
- adata = ad.read_h5ad(file_path)
141
- os.remove(file_path)
142
-
143
- # Assert changes in 'obs'
144
- assert all([c in adata.obs.columns for c in ref_obs.columns])
145
- pd.testing.assert_frame_equal(ref_obs, adata.obs[ref_obs.columns.to_list()], check_frame_type=False)
146
-
147
- # Assert changes in 'var'
148
- assert all([c in adata.var.columns for c in ref_var.columns])
149
- pd.testing.assert_frame_equal(ref_var, adata.var[ref_var.columns.to_list()], check_frame_type=False)
150
-
151
- # Assert changes in 'raw.var'
152
- assert all([c in adata.raw.var.columns for c in ref_raw_var.columns])
153
- pd.testing.assert_frame_equal(ref_raw_var, adata.raw.var[ref_raw_var.columns.to_list()], check_frame_type=False)
154
-
155
-
156
- @pytest.mark.parametrize("sparse", [False, True])
157
- @pytest.mark.parametrize("vertical_slice", [None, False, True, "mask"])
158
- def test_link_x(sparse, vertical_slice):
159
- adata = get_filled_anndata(sparse=sparse)
160
- temp_folder = tempfile.mkdtemp()
161
- file_path = os.path.join(temp_folder, "test_link_x.h5ad")
162
- adata.write_h5ad(file_path)
163
-
164
- if vertical_slice is None:
165
- s_ = np.s_[:]
166
- elif vertical_slice == "mask":
167
- mask = np.array([i < 5 for i in range(adata.shape[0])])
168
- s_ = np.s_[mask, :5]
169
- else:
170
- # slice over var or obs
171
- s_ = np.s_[:, 0:5] if vertical_slice else np.s_[0:5, :]
172
-
173
- with h5py.File(file_path, 'r') as file:
174
- cap_adata = CapAnnData(file)
175
- x = cap_adata.X[s_]
176
- raw_x = cap_adata.raw.X[s_]
177
-
178
- os.remove(file_path)
179
- if sparse:
180
- assert np.allclose(adata.X.A[s_], x.A)
181
- assert np.allclose(adata.raw.X.A[s_], raw_x.A)
182
- else:
183
- assert np.allclose(adata.X[s_], x)
184
- assert np.allclose(adata.raw.X[s_], raw_x)
185
-
186
-
187
- @pytest.mark.parametrize("sparse", [False, True])
188
- def test_shape(sparse):
189
- n_rows = 15
190
- n_genes = 25
191
-
192
- adata = get_filled_anndata(n_rows, n_genes, sparse)
193
- temp_folder = tempfile.mkdtemp()
194
- file_path = os.path.join(temp_folder, "test_shape.h5ad")
195
- adata.write_h5ad(file_path)
196
-
197
- with h5py.File(file_path) as file:
198
- cap_adata = CapAnnData(file)
199
- shape = cap_adata.shape
200
- shape_raw = cap_adata.raw.shape
201
-
202
- os.remove(file_path)
203
- for sh in [shape, shape_raw]:
204
- assert sh == (n_rows, n_genes)
205
-
206
-
207
- def test_read_obsm():
208
- adata = get_filled_anndata()
209
- obsm_names = [f"X_test{i}" for i in range(2)]
210
-
211
- for emb in obsm_names:
212
- adata.obsm[emb] = np.random.random(size=(adata.shape[0], 2))
213
-
214
- temp_folder = tempfile.mkdtemp()
215
- file_path = os.path.join(temp_folder, "test_read_obsm.h5ad")
216
- adata.write_h5ad(file_path)
217
-
218
- with h5py.File(file_path, 'r') as f:
219
- cap_adata = CapAnnData(f)
220
-
221
- ss = []
222
- for emb in obsm_names:
223
- assert emb in cap_adata.obsm_keys()
224
- assert cap_adata.obsm[emb].shape == adata.obsm[emb].shape
225
-
226
- x_1 = cap_adata.obsm[obsm_names[0]][:]
227
- x_2 = cap_adata.obsm[obsm_names[1]][:]
228
-
229
- os.remove(file_path)
230
- assert np.allclose(adata.obsm[obsm_names[0]], x_1)
231
- assert np.allclose(adata.obsm[obsm_names[1]], x_2)
232
-
233
-
234
- def test_read_uns():
235
- adata = get_base_anndata()
236
- key1, key2 = "key1", "key2"
237
- keys = (key1, key2)
238
-
239
- adata.uns = {k: {k: k} for k in keys}
240
- temp_folder = tempfile.mkdtemp()
241
- file_path = os.path.join(temp_folder, "test_read_uns.h5ad")
242
- adata.write_h5ad(file_path)
243
-
244
- with h5py.File(file_path, 'r') as f:
245
- cap_adata = CapAnnData(f)
246
-
247
- for k in keys:
248
- assert k in cap_adata.uns
249
-
250
- cap_adata.read_uns(keys=[key1])
251
-
252
- assert cap_adata.uns[key1] == adata.uns[key1] # connected
253
- assert cap_adata.uns[key2] != adata.uns[key2] # not connected
254
-
255
- os.remove(file_path)
256
-
257
-
258
- def test_modify_uns():
259
- adata = get_base_anndata()
260
- adata.uns = {
261
- "field_to_ingore": list(range(100)),
262
- "field_to_rename": "value",
263
- "field_to_expand": {"key1": {}},
264
- "field_to_modify": {"a": "b"}
265
- }
266
- new_name = "renamed_field"
267
- d_to_exp = {"sub_key1": "v1", "sub_key2": "v2"}
268
- v_to_mod = "value"
269
-
270
- temp_folder = tempfile.mkdtemp()
271
- file_path = os.path.join(temp_folder, "test_modify_uns.h5ad")
272
- adata.write_h5ad(file_path)
273
-
274
- with h5py.File(file_path, 'r+') as f:
275
- cap_adata = CapAnnData(f)
276
-
277
- cap_adata.read_uns(keys=["field_to_rename", "field_to_expand", "field_to_modify"])
278
-
279
- cap_adata.uns[new_name] = cap_adata.uns.pop("field_to_rename")
280
- cap_adata.uns["field_to_expand"]["key1"] = d_to_exp
281
- cap_adata.uns["field_to_modify"] = v_to_mod
282
-
283
- cap_adata.overwrite(['uns'])
284
-
285
- adata = ad.read_h5ad(file_path)
286
-
287
- assert adata.uns is not None
288
- assert len(adata.uns.keys()) == 4
289
- assert new_name in adata.uns.keys()
290
- assert adata.uns['field_to_expand']["key1"] == d_to_exp
291
- assert adata.uns['field_to_modify'] == v_to_mod
1
+ from cap_anndata import CapAnnData
2
+ import anndata as ad
3
+ import numpy as np
4
+ import tempfile
5
+ import os
6
+ import h5py
7
+ import pandas as pd
8
+ import scipy.sparse as sp
9
+ import pytest
10
+
11
+
12
+ def get_base_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
13
+ x = np.eye(n_rows, n_genes).astype(np.float32)
14
+ if sparse:
15
+ x = sp.csr_matrix(x, dtype=np.float32)
16
+ adata = ad.AnnData(X=x)
17
+ return adata
18
+
19
+
20
+ def get_filled_anndata(n_rows: int = 10, n_genes: int = 10, sparse=False) -> ad.AnnData:
21
+ adata = get_base_anndata(n_rows, n_genes, sparse)
22
+
23
+ adata.obs["cell_type"] = [f"cell_{i%3}" for i in range(adata.shape[0])]
24
+ adata.obs["number"] = [i / 10 for i in range(adata.shape[0])]
25
+ adata.obs.index = [f"obs_{i}" for i in range(adata.shape[0])]
26
+
27
+ adata.var.index = [f"gene_{i}" for i in range(adata.shape[1])]
28
+ adata.var["filtered"] = [i > 4 for i in range(adata.shape[1])]
29
+ adata.var["gene_names"] = [f"gene_name_{i}" for i in range(adata.shape[1])]
30
+ adata.var["dispersion"] = [i / 100 for i in range(adata.shape[1])]
31
+
32
+ adata.raw = adata
33
+ return adata
34
+
35
+
36
+ def test_read_anndata_file():
37
+ adata = get_base_anndata()
38
+ temp_folder = tempfile.mkdtemp()
39
+ file_path = os.path.join(temp_folder, "test_read_anndata_file.h5ad")
40
+ adata.write_h5ad(file_path)
41
+ del adata
42
+
43
+ with CapAnnData.read_anndata_file(file_path=file_path) as adata:
44
+ assert adata is not None, "AnnData file must be valid!"
45
+
46
+ os.remove(file_path)
47
+
48
+
49
+ def test_read_shape():
50
+ n_rows = 10
51
+ n_genes = 20
52
+ adata = get_base_anndata(n_rows, n_genes)
53
+ temp_folder = tempfile.mkdtemp()
54
+ file_path = os.path.join(temp_folder, "test_read_shape.h5ad")
55
+ adata.write_h5ad(file_path)
56
+
57
+ with h5py.File(file_path) as file:
58
+ cap_adata = CapAnnData(file)
59
+ shape = cap_adata.shape
60
+
61
+ os.remove(file_path)
62
+ assert shape[0] == n_rows
63
+ assert shape[1] == n_genes
64
+
65
+
66
+ def test_read_df():
67
+ adata = get_filled_anndata()
68
+ temp_folder = tempfile.mkdtemp()
69
+ file_path = os.path.join(temp_folder, "test_read_obs.h5ad")
70
+
71
+ adata.write_h5ad(file_path)
72
+
73
+ with h5py.File(file_path, 'r') as file:
74
+ cap_adata = CapAnnData(file)
75
+ cap_adata.read_obs()
76
+ cap_adata.read_var()
77
+ cap_adata.read_var(raw=True)
78
+
79
+ os.remove(file_path)
80
+ pd.testing.assert_frame_equal(adata.obs, cap_adata.obs, check_frame_type=False)
81
+ pd.testing.assert_frame_equal(adata.var, cap_adata.var, check_frame_type=False)
82
+ pd.testing.assert_frame_equal(adata.raw.var, cap_adata.raw.var, check_frame_type=False)
83
+
84
+
85
+ def test_partial_read():
86
+ adata = get_filled_anndata()
87
+ temp_folder = tempfile.mkdtemp()
88
+ file_path = os.path.join(temp_folder, "test_partial_read.h5ad")
89
+ adata.write_h5ad(file_path)
90
+
91
+ with h5py.File(file_path, 'r') as file:
92
+ cap_adata = CapAnnData(file)
93
+ cap_adata.read_obs(columns=['cell_type'])
94
+ cap_adata.read_obs(columns=['cell_type'])
95
+ cap_adata.read_var(columns=['dispersion'])
96
+ cap_adata.read_var(columns=['dispersion'], raw=True)
97
+
98
+ os.remove(file_path)
99
+
100
+ assert len(adata.obs.columns) == len(cap_adata.obs.column_order)
101
+ assert len(adata.var.columns) == len(cap_adata.var.column_order)
102
+ assert len(adata.raw.var.columns) == len(cap_adata.raw.var.column_order)
103
+
104
+ assert len(cap_adata.obs.columns) == 1
105
+ assert len(cap_adata.var.columns) == 1
106
+ assert len(cap_adata.raw.var.columns) == 1
107
+
108
+ pd.testing.assert_index_equal(adata.obs.index, cap_adata.obs.index)
109
+ pd.testing.assert_index_equal(adata.var.index, cap_adata.var.index)
110
+ pd.testing.assert_index_equal(adata.raw.var.index, cap_adata.raw.var.index)
111
+
112
+
113
+ def test_overwrite_df():
114
+ adata = get_filled_anndata()
115
+ temp_folder = tempfile.mkdtemp()
116
+ file_path = os.path.join(temp_folder, "test_overwrite_df.h5ad")
117
+ adata.write_h5ad(file_path)
118
+
119
+ with h5py.File(file_path, 'r+') as file:
120
+ cap_adata = CapAnnData(file)
121
+ cap_adata.read_obs(columns=["cell_type"])
122
+ cap_adata.obs["cell_type"] = [f"new_cell_type_{i%2}" for i in range(cap_adata.shape[0])]
123
+ cap_adata.obs["const_str"] = "some string"
124
+ ref_obs = cap_adata.obs.copy()
125
+
126
+ # Modify 'var'
127
+ cap_adata.read_var()
128
+ cap_adata.var["gene_names"] = [f"new_gene_{i}" for i in range(cap_adata.shape[1])]
129
+ cap_adata.var["extra_info"] = np.random.rand(cap_adata.shape[1])
130
+ ref_var = cap_adata.var.copy()
131
+
132
+ # Modify 'raw.var', assuming 'raw' is also a CapAnnData
133
+ cap_adata.read_var(raw=True)
134
+ cap_adata.raw.var["gene_names"] = [f"raw_new_gene_{i}" for i in range(cap_adata.raw.shape[1])]
135
+ cap_adata.raw.var["extra_info"] = np.random.rand(cap_adata.shape[1])
136
+ ref_raw_var = cap_adata.raw.var.copy()
137
+
138
+ cap_adata.overwrite(['obs', 'var', 'raw.var'])
139
+
140
+ adata = ad.read_h5ad(file_path)
141
+ os.remove(file_path)
142
+
143
+ # Assert changes in 'obs'
144
+ assert all([c in adata.obs.columns for c in ref_obs.columns])
145
+ pd.testing.assert_frame_equal(ref_obs, adata.obs[ref_obs.columns.to_list()], check_frame_type=False)
146
+
147
+ # Assert changes in 'var'
148
+ assert all([c in adata.var.columns for c in ref_var.columns])
149
+ pd.testing.assert_frame_equal(ref_var, adata.var[ref_var.columns.to_list()], check_frame_type=False)
150
+
151
+ # Assert changes in 'raw.var'
152
+ assert all([c in adata.raw.var.columns for c in ref_raw_var.columns])
153
+ pd.testing.assert_frame_equal(ref_raw_var, adata.raw.var[ref_raw_var.columns.to_list()], check_frame_type=False)
154
+
155
+
156
+ @pytest.mark.parametrize("sparse", [False, True])
157
+ @pytest.mark.parametrize("vertical_slice", [None, False, True, "mask"])
158
+ def test_link_x(sparse, vertical_slice):
159
+ adata = get_filled_anndata(sparse=sparse)
160
+ temp_folder = tempfile.mkdtemp()
161
+ file_path = os.path.join(temp_folder, "test_link_x.h5ad")
162
+ adata.write_h5ad(file_path)
163
+
164
+ if vertical_slice is None:
165
+ s_ = np.s_[:]
166
+ elif vertical_slice == "mask":
167
+ mask = np.array([i < 5 for i in range(adata.shape[0])])
168
+ s_ = np.s_[mask, :5]
169
+ else:
170
+ # slice over var or obs
171
+ s_ = np.s_[:, 0:5] if vertical_slice else np.s_[0:5, :]
172
+
173
+ with h5py.File(file_path, 'r') as file:
174
+ cap_adata = CapAnnData(file)
175
+ x = cap_adata.X[s_]
176
+ raw_x = cap_adata.raw.X[s_]
177
+
178
+ os.remove(file_path)
179
+ if sparse:
180
+ assert np.allclose(adata.X.A[s_], x.A)
181
+ assert np.allclose(adata.raw.X.A[s_], raw_x.A)
182
+ else:
183
+ assert np.allclose(adata.X[s_], x)
184
+ assert np.allclose(adata.raw.X[s_], raw_x)
185
+
186
+
187
+ @pytest.mark.parametrize("sparse", [False, True])
188
+ def test_shape(sparse):
189
+ n_rows = 15
190
+ n_genes = 25
191
+
192
+ adata = get_filled_anndata(n_rows, n_genes, sparse)
193
+ temp_folder = tempfile.mkdtemp()
194
+ file_path = os.path.join(temp_folder, "test_shape.h5ad")
195
+ adata.write_h5ad(file_path)
196
+
197
+ with h5py.File(file_path) as file:
198
+ cap_adata = CapAnnData(file)
199
+ shape = cap_adata.shape
200
+ shape_raw = cap_adata.raw.shape
201
+
202
+ os.remove(file_path)
203
+ for sh in [shape, shape_raw]:
204
+ assert sh == (n_rows, n_genes)
205
+
206
+
207
+ def test_read_obsm():
208
+ adata = get_filled_anndata()
209
+ obsm_names = [f"X_test{i}" for i in range(2)]
210
+
211
+ for emb in obsm_names:
212
+ adata.obsm[emb] = np.random.random(size=(adata.shape[0], 2))
213
+
214
+ temp_folder = tempfile.mkdtemp()
215
+ file_path = os.path.join(temp_folder, "test_read_obsm.h5ad")
216
+ adata.write_h5ad(file_path)
217
+
218
+ with h5py.File(file_path, 'r') as f:
219
+ cap_adata = CapAnnData(f)
220
+
221
+ ss = []
222
+ for emb in obsm_names:
223
+ assert emb in cap_adata.obsm_keys()
224
+ assert cap_adata.obsm[emb].shape == adata.obsm[emb].shape
225
+
226
+ x_1 = cap_adata.obsm[obsm_names[0]][:]
227
+ x_2 = cap_adata.obsm[obsm_names[1]][:]
228
+
229
+ os.remove(file_path)
230
+ assert np.allclose(adata.obsm[obsm_names[0]], x_1)
231
+ assert np.allclose(adata.obsm[obsm_names[1]], x_2)
232
+
233
+
234
+ def test_read_uns():
235
+ adata = get_base_anndata()
236
+ key1, key2 = "key1", "key2"
237
+ keys = (key1, key2)
238
+
239
+ adata.uns = {k: {k: k} for k in keys}
240
+ temp_folder = tempfile.mkdtemp()
241
+ file_path = os.path.join(temp_folder, "test_read_uns.h5ad")
242
+ adata.write_h5ad(file_path)
243
+
244
+ with h5py.File(file_path, 'r') as f:
245
+ cap_adata = CapAnnData(f)
246
+
247
+ for k in keys:
248
+ assert k in cap_adata.uns
249
+
250
+ cap_adata.read_uns(keys=[key1])
251
+
252
+ assert cap_adata.uns[key1] == adata.uns[key1] # connected
253
+ assert cap_adata.uns[key2] != adata.uns[key2] # not connected
254
+
255
+ os.remove(file_path)
256
+
257
+
258
+ def test_modify_uns():
259
+ adata = get_base_anndata()
260
+ adata.uns = {
261
+ "field_to_ingore": list(range(100)),
262
+ "field_to_rename": "value",
263
+ "field_to_expand": {"key1": {}},
264
+ "field_to_modify": {"a": "b"}
265
+ }
266
+ new_name = "renamed_field"
267
+ d_to_exp = {"sub_key1": "v1", "sub_key2": "v2"}
268
+ v_to_mod = "value"
269
+
270
+ temp_folder = tempfile.mkdtemp()
271
+ file_path = os.path.join(temp_folder, "test_modify_uns.h5ad")
272
+ adata.write_h5ad(file_path)
273
+
274
+ with h5py.File(file_path, 'r+') as f:
275
+ cap_adata = CapAnnData(f)
276
+
277
+ cap_adata.read_uns(keys=["field_to_rename", "field_to_expand", "field_to_modify"])
278
+
279
+ cap_adata.uns[new_name] = cap_adata.uns.pop("field_to_rename")
280
+ cap_adata.uns["field_to_expand"]["key1"] = d_to_exp
281
+ cap_adata.uns["field_to_modify"] = v_to_mod
282
+
283
+ cap_adata.overwrite(['uns'])
284
+
285
+ adata = ad.read_h5ad(file_path)
286
+
287
+ assert adata.uns is not None
288
+ assert len(adata.uns.keys()) == 4
289
+ assert new_name in adata.uns.keys()
290
+ assert adata.uns['field_to_expand']["key1"] == d_to_exp
291
+ assert adata.uns['field_to_modify'] == v_to_mod
292
+
293
+
294
+ def test_empty_obs_override():
295
+ """
296
+ especially for solving the issue:
297
+ https://github.com/cellannotation/cap-anndata/pull/5
298
+ """
299
+ adata = get_base_anndata()
300
+ temp_folder = tempfile.mkdtemp()
301
+ file_path = os.path.join(temp_folder, "test_modify_uns.h5ad")
302
+ adata.write_h5ad(file_path)
303
+
304
+ with h5py.File(file_path, 'r+') as f:
305
+ cap_adata = CapAnnData(f)
306
+ cap_adata.read_obs()
307
+
308
+ cap_adata.obs["cell_type_1"] = pd.Series(data=np.nan, index=cap_adata.obs.index, dtype="category")
309
+ cap_adata.obs["cell_type_new"] = pd.Series(data=np.nan, index=cap_adata.obs.index, dtype="category")
310
+ cap_adata.overwrite(fields=["obs"])
File without changes
File without changes
File without changes