cap-anndata 0.3.1__tar.gz → 0.4.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/PKG-INFO +13 -2
- cap_anndata-0.4.0/cap_anndata/backed_df.py +81 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata/cap_anndata.py +64 -39
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata.egg-info/PKG-INFO +13 -2
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/setup.py +1 -1
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/test/test_backed_df.py +10 -12
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/test/test_cap_anndata.py +127 -0
- cap_anndata-0.3.1/cap_anndata/backed_df.py +0 -69
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/LICENSE +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/README.md +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata/__init__.py +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata/backed_dict.py +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata/reader.py +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata.egg-info/SOURCES.txt +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata.egg-info/dependency_links.txt +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata.egg-info/requires.txt +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/cap_anndata.egg-info/top_level.txt +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/setup.cfg +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/test/test_backed_dict.py +0 -0
- {cap_anndata-0.3.1 → cap_anndata-0.4.0}/test/test_reader.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: cap_anndata
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
|
5
5
|
Home-page: https://github.com/cellannotation/cap-anndata
|
6
6
|
Author: R. Mukhin, A. Isaev
|
@@ -20,6 +20,17 @@ Requires-Dist: anndata>=0.10.0
|
|
20
20
|
Provides-Extra: dev
|
21
21
|
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
22
22
|
Requires-Dist: setuptools~=69.1.1; extra == "dev"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: classifier
|
26
|
+
Dynamic: description
|
27
|
+
Dynamic: description-content-type
|
28
|
+
Dynamic: home-page
|
29
|
+
Dynamic: project-url
|
30
|
+
Dynamic: provides-extra
|
31
|
+
Dynamic: requires-dist
|
32
|
+
Dynamic: requires-python
|
33
|
+
Dynamic: summary
|
23
34
|
|
24
35
|
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
25
36
|
|
@@ -0,0 +1,81 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
from typing import List, Any, Union
|
4
|
+
|
5
|
+
from pandas._typing import Self
|
6
|
+
from pandas.core.generic import bool_t
|
7
|
+
|
8
|
+
|
9
|
+
class CapAnnDataDF(pd.DataFrame):
|
10
|
+
"""
|
11
|
+
The class to expand the pandas DataFrame behaviour to support partial
|
12
|
+
reading and writing of AnnData obs and var (raw.var) fields.
|
13
|
+
The main feature of the class is handling <column-order> attribute
|
14
|
+
which must be a copy of h5py.Group attribute
|
15
|
+
"""
|
16
|
+
|
17
|
+
_metadata = ["column_order"]
|
18
|
+
|
19
|
+
def column_order_array(self) -> np.array:
|
20
|
+
order = self.column_order
|
21
|
+
if order is not None and isinstance(order, List):
|
22
|
+
# Convert it to numpy array of str elements
|
23
|
+
return np.array(order, dtype=object)
|
24
|
+
else:
|
25
|
+
return order
|
26
|
+
|
27
|
+
def rename_column(self, old_name: str, new_name: str) -> None:
|
28
|
+
i = np.where(self.column_order_array() == old_name)[0]
|
29
|
+
tmp_array = self.column_order_array().copy()
|
30
|
+
tmp_array[i] = new_name
|
31
|
+
self.column_order = tmp_array.copy()
|
32
|
+
self.rename(columns={old_name: new_name}, inplace=True)
|
33
|
+
|
34
|
+
def remove_column(self, col_name: str) -> None:
|
35
|
+
i = np.where(self.column_order_array() == col_name)[0]
|
36
|
+
self.column_order = np.delete(self.column_order_array(), i)
|
37
|
+
self.drop(columns=[col_name], inplace=True)
|
38
|
+
|
39
|
+
def __setitem__(self, key, value) -> None:
|
40
|
+
if key not in self.column_order_array():
|
41
|
+
self.column_order = np.append(self.column_order_array(), key)
|
42
|
+
return super().__setitem__(key, value)
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
def from_df(cls, df: pd.DataFrame, column_order: Union[np.array, List[str], None] = None) -> Self:
|
46
|
+
if column_order is None:
|
47
|
+
column_order = df.columns.to_numpy()
|
48
|
+
elif isinstance(column_order, List):
|
49
|
+
column_order = np.array(column_order)
|
50
|
+
new_inst = cls(df)
|
51
|
+
new_inst.column_order = column_order
|
52
|
+
return new_inst
|
53
|
+
|
54
|
+
def join(self, other: Any, **kwargs) -> Self:
|
55
|
+
result = super().join(other=other, **kwargs)
|
56
|
+
if isinstance(other, CapAnnDataDF):
|
57
|
+
new_columns = [
|
58
|
+
col for col in other.column_order_array() if col not in self.column_order_array()
|
59
|
+
]
|
60
|
+
else:
|
61
|
+
new_columns = [col for col in other.columns if col not in self.column_order_array()]
|
62
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
63
|
+
df = self.from_df(result, column_order=column_order)
|
64
|
+
return df
|
65
|
+
|
66
|
+
def merge(self, right, **kwargs) -> Self:
|
67
|
+
result = super().merge(right=right, **kwargs)
|
68
|
+
if isinstance(right, CapAnnDataDF):
|
69
|
+
new_columns = [
|
70
|
+
col for col in right.column_order_array() if col not in self.column_order_array()
|
71
|
+
]
|
72
|
+
else:
|
73
|
+
new_columns = [col for col in right.columns if col not in self.column_order_array()]
|
74
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
75
|
+
df = self.from_df(result, column_order=column_order)
|
76
|
+
return df
|
77
|
+
|
78
|
+
def copy(self, deep: Union[bool_t, None] = True) -> Self:
|
79
|
+
column_order = self.column_order_array()
|
80
|
+
df = self.from_df(super().copy(deep=deep), column_order=column_order)
|
81
|
+
return df
|
@@ -7,19 +7,33 @@ import scipy.sparse as ss
|
|
7
7
|
from packaging import version
|
8
8
|
|
9
9
|
if version.parse(ad.__version__) < version.parse("0.11.0"):
|
10
|
-
from anndata.experimental import
|
10
|
+
from anndata.experimental import (
|
11
|
+
sparse_dataset,
|
12
|
+
read_elem,
|
13
|
+
write_elem,
|
14
|
+
CSRDataset,
|
15
|
+
CSCDataset,
|
16
|
+
)
|
11
17
|
else:
|
12
|
-
from anndata.io import
|
18
|
+
from anndata.io import (
|
19
|
+
sparse_dataset,
|
20
|
+
read_elem,
|
21
|
+
write_elem,
|
22
|
+
)
|
23
|
+
from anndata.abc import (
|
24
|
+
CSRDataset,
|
25
|
+
CSCDataset,
|
26
|
+
)
|
13
27
|
|
14
28
|
from cap_anndata import CapAnnDataDF, CapAnnDataDict
|
15
29
|
|
16
30
|
logger = logging.getLogger(__name__)
|
17
31
|
|
18
32
|
X_NOTATION = Union[
|
19
|
-
h5py.Dataset,
|
33
|
+
h5py.Dataset, CSRDataset, CSCDataset, None
|
20
34
|
]
|
21
35
|
ARRAY_MAPPING_NOTATION = CapAnnDataDict[str, X_NOTATION]
|
22
|
-
|
36
|
+
FIELDS_SUPPORTED_TO_OVERWRITE = ["obs", "var", "raw.var", "uns", "layers", "obsm", "varm", "obsp", "varp"]
|
23
37
|
NotLinkedObject: Final = "__NotLinkedObject"
|
24
38
|
|
25
39
|
|
@@ -57,15 +71,7 @@ class BaseLayerMatrixAndDf:
|
|
57
71
|
return shape
|
58
72
|
|
59
73
|
def _lazy_df_load(self, key: str) -> CapAnnDataDF:
|
60
|
-
|
61
|
-
attribute = self._path_to_content + key
|
62
|
-
column_order = self._read_attr(self._file[attribute], "column-order")
|
63
|
-
df.column_order = column_order
|
64
|
-
if df.column_order.dtype != object:
|
65
|
-
# empty DataFrame will have column_order as float64
|
66
|
-
# which leads to failure in overwrite method
|
67
|
-
df.column_order = df.column_order.astype(object)
|
68
|
-
return df
|
74
|
+
return self._read_df(key=key, columns=[])
|
69
75
|
|
70
76
|
@staticmethod
|
71
77
|
def _read_attr(obj: Union[h5py.Group, h5py.Dataset], attr_name: str) -> any:
|
@@ -93,8 +99,10 @@ class BaseLayerMatrixAndDf:
|
|
93
99
|
cols_to_read = [c for c in columns if c in column_order]
|
94
100
|
df = CapAnnDataDF()
|
95
101
|
df.column_order = column_order
|
102
|
+
|
96
103
|
index_col = self._read_attr(h5_group, "_index")
|
97
|
-
|
104
|
+
index = read_elem(h5_group[index_col])
|
105
|
+
df.index = index
|
98
106
|
|
99
107
|
for col in cols_to_read:
|
100
108
|
df[col] = read_elem(h5_group[col])
|
@@ -366,37 +374,43 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
366
374
|
return list(self.obsm.keys())
|
367
375
|
|
368
376
|
def obs_keys(self) -> List[str]:
|
369
|
-
return self.obs.
|
377
|
+
return self.obs.column_order_array().tolist()
|
370
378
|
|
371
379
|
def var_keys(self) -> List[str]:
|
372
|
-
return self.var.
|
380
|
+
return self.var.column_order_array().tolist()
|
381
|
+
|
382
|
+
def field_to_entity(self, key):
|
383
|
+
if key == "obs":
|
384
|
+
return self.obs
|
385
|
+
elif key == "var":
|
386
|
+
return self.var
|
387
|
+
elif key == "raw.var":
|
388
|
+
return self.raw.var if self.raw is not None else None
|
389
|
+
elif key == "uns":
|
390
|
+
return self.uns
|
391
|
+
elif key == "layers":
|
392
|
+
return self.layers
|
393
|
+
elif key == "obsm":
|
394
|
+
return self.obsm
|
395
|
+
elif key == "varm":
|
396
|
+
return self.varm
|
397
|
+
elif key == "obsp":
|
398
|
+
return self.obsp
|
399
|
+
elif key == "varp":
|
400
|
+
return self.varp
|
401
|
+
else:
|
402
|
+
raise KeyError(
|
403
|
+
f"The field {key} is not supported! The list of supported fields are equal to {FIELDS_SUPPORTED_TO_OVERWRITE} "
|
404
|
+
f"attributes of the CapAnnData class."
|
405
|
+
)
|
373
406
|
|
374
407
|
def overwrite(self, fields: List[str] = None, compression: str = "lzf") -> None:
|
375
|
-
field_to_entity = {
|
376
|
-
"obs": self.obs,
|
377
|
-
"var": self.var,
|
378
|
-
"raw.var": self.raw.var if self.raw is not None else None,
|
379
|
-
"uns": self.uns,
|
380
|
-
"layers": self.layers,
|
381
|
-
"obsm": self.obsm,
|
382
|
-
"varm": self.varm,
|
383
|
-
"obsp": self.obsp,
|
384
|
-
"varp": self.varp,
|
385
|
-
}
|
386
|
-
|
387
408
|
if fields is None:
|
388
|
-
fields =
|
389
|
-
else:
|
390
|
-
for f in fields:
|
391
|
-
if f not in field_to_entity.keys():
|
392
|
-
raise KeyError(
|
393
|
-
f"The field {f} is not supported! The list of supported fields are equal to supported "
|
394
|
-
f"attributes of the CapAnnData class: obs, var, raw.var and uns."
|
395
|
-
)
|
409
|
+
fields = FIELDS_SUPPORTED_TO_OVERWRITE
|
396
410
|
|
397
411
|
for key in ["obs", "var", "raw.var"]:
|
398
412
|
if key in fields:
|
399
|
-
entity: CapAnnDataDF = field_to_entity
|
413
|
+
entity: CapAnnDataDF = self.field_to_entity(key)
|
400
414
|
if entity is None:
|
401
415
|
continue
|
402
416
|
|
@@ -407,11 +421,22 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
407
421
|
f"{key}/{col}", entity[col].values, compression=compression
|
408
422
|
)
|
409
423
|
|
410
|
-
column_order = entity.
|
424
|
+
column_order = entity.column_order_array()
|
411
425
|
if (
|
412
426
|
column_order.size == 0
|
413
427
|
): # Refs https://github.com/cellannotation/cap-anndata/issues/6
|
414
428
|
column_order = np.array([], dtype=np.float64)
|
429
|
+
|
430
|
+
# Index update
|
431
|
+
index_name = entity.index.name
|
432
|
+
if not index_name:
|
433
|
+
index_name = "_index"
|
434
|
+
self._file[key].attrs["_index"] = index_name
|
435
|
+
index_col = self._read_attr(self._file[key], "_index")
|
436
|
+
self._write_elem(
|
437
|
+
f"{key}/{index_col}", entity.index.to_numpy(), compression=compression
|
438
|
+
)
|
439
|
+
|
415
440
|
self._file[key].attrs["column-order"] = column_order
|
416
441
|
|
417
442
|
if "uns" in fields:
|
@@ -424,7 +449,7 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
424
449
|
|
425
450
|
for field in ["layers", "obsm", "varm", "obsp", "varp"]:
|
426
451
|
if field in fields:
|
427
|
-
for key in field_to_entity
|
452
|
+
for key in self.field_to_entity(field).keys_to_remove:
|
428
453
|
del self._file[f"{field}/{key}"]
|
429
454
|
|
430
455
|
def create_layer(
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: cap_anndata
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
|
5
5
|
Home-page: https://github.com/cellannotation/cap-anndata
|
6
6
|
Author: R. Mukhin, A. Isaev
|
@@ -20,6 +20,17 @@ Requires-Dist: anndata>=0.10.0
|
|
20
20
|
Provides-Extra: dev
|
21
21
|
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
22
22
|
Requires-Dist: setuptools~=69.1.1; extra == "dev"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: classifier
|
26
|
+
Dynamic: description
|
27
|
+
Dynamic: description-content-type
|
28
|
+
Dynamic: home-page
|
29
|
+
Dynamic: project-url
|
30
|
+
Dynamic: provides-extra
|
31
|
+
Dynamic: requires-dist
|
32
|
+
Dynamic: requires-python
|
33
|
+
Dynamic: summary
|
23
34
|
|
24
35
|
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
25
36
|
|
@@ -41,9 +41,8 @@ def test_remove_column():
|
|
41
41
|
|
42
42
|
def test_from_df_class_method():
|
43
43
|
data = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
44
|
-
new_df = CapAnnDataDF.from_df(data
|
45
|
-
|
46
|
-
assert list(new_df.column_order) == ["B", "A"]
|
44
|
+
new_df = CapAnnDataDF.from_df(data)
|
45
|
+
assert list(new_df.column_order) == ["A", "B"]
|
47
46
|
|
48
47
|
|
49
48
|
def test_column_order_integrity():
|
@@ -59,23 +58,22 @@ def test_column_order_integrity():
|
|
59
58
|
|
60
59
|
def test_join():
|
61
60
|
data1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
62
|
-
data2 = pd.DataFrame({"
|
63
|
-
cap_anndata_df1 = CapAnnDataDF.from_df(data1
|
64
|
-
|
65
|
-
cap_anndata_df1 = cap_anndata_df1.join(data2, how="left")
|
61
|
+
data2 = pd.DataFrame({"C": [7, 8, 9], "D": [10, 11, 12]})
|
62
|
+
cap_anndata_df1 = CapAnnDataDF.from_df(data1)
|
63
|
+
cap_anndata_df2 = cap_anndata_df1.join(data2, how="left")
|
66
64
|
|
67
|
-
expected_order = ["A", "B", "C", "D"
|
68
|
-
assert list(
|
69
|
-
assert
|
65
|
+
expected_order = ["A", "B", "C", "D"]
|
66
|
+
assert list(cap_anndata_df2.column_order) == expected_order
|
67
|
+
assert cap_anndata_df2.shape == (3, 4)
|
70
68
|
|
71
69
|
|
72
70
|
def test_merge():
|
73
71
|
data1 = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
|
74
72
|
data2 = pd.DataFrame({"A": [2, 3, 4], "D": [10, 11, 12]})
|
75
|
-
cap_anndata_df1 = CapAnnDataDF.from_df(data1
|
73
|
+
cap_anndata_df1 = CapAnnDataDF.from_df(data1)
|
76
74
|
|
77
75
|
cap_anndata_df1 = cap_anndata_df1.merge(data2, how="inner", on="A")
|
78
76
|
|
79
|
-
expected_order = ["A", "B", "
|
77
|
+
expected_order = ["A", "B", "D"]
|
80
78
|
assert list(cap_anndata_df1.column_order) == expected_order
|
81
79
|
assert cap_anndata_df1.shape == (2, 3)
|
@@ -103,6 +103,26 @@ def test_partial_read():
|
|
103
103
|
pd.testing.assert_index_equal(adata.raw.var.index, cap_adata.raw.var.index)
|
104
104
|
|
105
105
|
|
106
|
+
def test_overwrite_dataframe_before_read_obs():
|
107
|
+
path = "tmp.h5ad"
|
108
|
+
x = np.ones((10, 10), dtype=np.float32)
|
109
|
+
adata = ad.AnnData(X=x)
|
110
|
+
adata.obs["columns"] = "value"
|
111
|
+
adata.write_h5ad(path)
|
112
|
+
del adata
|
113
|
+
|
114
|
+
with read_h5ad(path, True) as adata:
|
115
|
+
# https://github.com/cellannotation/cap-anndata/issues/33
|
116
|
+
adata.obs["new_column"] = "new_value"
|
117
|
+
adata.overwrite(["obs"])
|
118
|
+
|
119
|
+
with read_h5ad(path) as adata:
|
120
|
+
adata.read_obs("new_column")
|
121
|
+
assert (adata.obs["new_column"] == "new_value").all(), "Wrong values in column!"
|
122
|
+
|
123
|
+
os.remove(path)
|
124
|
+
|
125
|
+
|
106
126
|
@pytest.mark.parametrize("compression", ["gzip", "lzf"])
|
107
127
|
def test_overwrite_df(compression):
|
108
128
|
adata = get_filled_anndata()
|
@@ -110,12 +130,17 @@ def test_overwrite_df(compression):
|
|
110
130
|
file_path = os.path.join(temp_folder, "test_overwrite_df.h5ad")
|
111
131
|
adata.write_h5ad(file_path)
|
112
132
|
|
133
|
+
new_obs_index = None
|
113
134
|
with read_h5ad(file_path, edit=True) as cap_adata:
|
135
|
+
# Modify 'obs'
|
114
136
|
cap_adata.read_obs(columns=["cell_type"])
|
115
137
|
cap_adata.obs["cell_type"] = [
|
116
138
|
f"new_cell_type_{i%2}" for i in range(cap_adata.shape[0])
|
117
139
|
]
|
118
140
|
cap_adata.obs["const_str"] = "some string"
|
141
|
+
# Modify obs 'index'
|
142
|
+
new_obs_index = [s + "_new" for s in cap_adata.obs.index]
|
143
|
+
cap_adata.obs.index = new_obs_index
|
119
144
|
ref_obs = cap_adata.obs.copy()
|
120
145
|
|
121
146
|
# Modify 'var'
|
@@ -144,6 +169,7 @@ def test_overwrite_df(compression):
|
|
144
169
|
pd.testing.assert_frame_equal(
|
145
170
|
ref_obs, adata.obs[ref_obs.columns.to_list()], check_frame_type=False
|
146
171
|
)
|
172
|
+
assert (adata.obs.index == new_obs_index).all(), "Index must be changed!"
|
147
173
|
|
148
174
|
# Assert changes in 'var'
|
149
175
|
assert all([c in adata.var.columns for c in ref_var.columns])
|
@@ -689,3 +715,104 @@ def test_modify_obsp_varp(field):
|
|
689
715
|
assert len(getattr(cap_adata, field).keys()) == 0
|
690
716
|
|
691
717
|
os.remove(file_path)
|
718
|
+
|
719
|
+
|
720
|
+
def test_main_var_layers():
|
721
|
+
var_index = [f"ind_{i}" for i in range(10)]
|
722
|
+
raw_var_index = [f"raw_ind_{i}" for i in range(10)]
|
723
|
+
|
724
|
+
x = np.eye(10, dtype=np.float32)
|
725
|
+
raw_x = x * 2
|
726
|
+
adata = ad.AnnData(X=raw_x)
|
727
|
+
adata.var.index = raw_var_index
|
728
|
+
adata.raw = adata
|
729
|
+
adata.X = x
|
730
|
+
adata.var.index = var_index
|
731
|
+
|
732
|
+
temp_folder = tempfile.mkdtemp()
|
733
|
+
file_path = os.path.join(temp_folder, "test_main_var_layers.h5ad")
|
734
|
+
adata.write_h5ad(file_path)
|
735
|
+
|
736
|
+
with read_h5ad(file_path) as cap_anndata:
|
737
|
+
assert cap_anndata.var.index.tolist() == var_index
|
738
|
+
assert cap_anndata.raw.var.index.tolist() == raw_var_index
|
739
|
+
assert np.allclose(cap_anndata.X[:], x)
|
740
|
+
assert np.allclose(cap_anndata.raw.X[:], raw_x)
|
741
|
+
|
742
|
+
os.remove(file_path)
|
743
|
+
|
744
|
+
|
745
|
+
@pytest.mark.parametrize("name", ["barcodes", "", None])
|
746
|
+
def test_modify_index(name):
|
747
|
+
adata = get_base_anndata()
|
748
|
+
|
749
|
+
temp_folder = tempfile.mkdtemp()
|
750
|
+
file_path = os.path.join(temp_folder, "test_main_var_layers.h5ad")
|
751
|
+
adata.write_h5ad(file_path)
|
752
|
+
|
753
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
754
|
+
cap_adata.read_obs()
|
755
|
+
cap_adata.overwrite(["obs"])
|
756
|
+
|
757
|
+
cap_adata = ad.read_h5ad(file_path)
|
758
|
+
pd.testing.assert_frame_equal(
|
759
|
+
left=adata.obs,
|
760
|
+
right=cap_adata.obs,
|
761
|
+
check_dtype=True,
|
762
|
+
check_index_type=True,
|
763
|
+
check_names=True,
|
764
|
+
)
|
765
|
+
|
766
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
767
|
+
cap_adata.read_obs()
|
768
|
+
cap_adata.obs.index = pd.Series(data=[f"cell_{i}" for i in range(cap_adata.shape[0])], name=name)
|
769
|
+
cap_adata.overwrite(["obs"])
|
770
|
+
|
771
|
+
with read_h5ad(file_path=file_path, edit=False) as cap_adata:
|
772
|
+
cap_adata.read_obs()
|
773
|
+
obs = cap_adata.obs
|
774
|
+
|
775
|
+
assert obs is not None, "DataFrame must be loaded!"
|
776
|
+
assert obs.index is not None, "DataFrame must have Index!"
|
777
|
+
if not name:
|
778
|
+
assert obs.index.name == None, "Index name must not be set!"
|
779
|
+
else:
|
780
|
+
assert obs.index.name == name, "Index name must be set!"
|
781
|
+
assert obs.index.to_list() == [f"cell_{i}" for i in range(cap_adata.shape[0])], "Wrong index values!"
|
782
|
+
|
783
|
+
|
784
|
+
def test_column_order_changes():
|
785
|
+
adata = get_base_anndata(n_rows = 3, n_genes = 2, sparse=False)
|
786
|
+
|
787
|
+
temp_folder = tempfile.mkdtemp()
|
788
|
+
file_path = os.path.join(temp_folder, "test_column_order.h5ad")
|
789
|
+
adata.write_h5ad(file_path)
|
790
|
+
|
791
|
+
data = {"A": [1, 2, 3], "B": [4, 5, 6]}
|
792
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
793
|
+
df = pd.DataFrame(data)
|
794
|
+
cap_df = CapAnnDataDF.from_df(df)
|
795
|
+
cap_adata.obs = CapAnnDataDF.from_df(cap_df)
|
796
|
+
cap_adata.overwrite(["obs"])
|
797
|
+
|
798
|
+
new_column_order = list(data.keys())
|
799
|
+
new_column_order.reverse()
|
800
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
801
|
+
cap_adata.read_obs()
|
802
|
+
df = cap_adata.obs[new_column_order] # change order via dataframe
|
803
|
+
cap_df = CapAnnDataDF.from_df(df)
|
804
|
+
cap_adata.obs = cap_df
|
805
|
+
cap_adata.overwrite(["obs"])
|
806
|
+
|
807
|
+
new_column_order.reverse()
|
808
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
809
|
+
cap_adata.read_obs()
|
810
|
+
cap_df = cap_adata.obs
|
811
|
+
cap_df.column_order = new_column_order # change order via column_order
|
812
|
+
cap_adata.obs = cap_df
|
813
|
+
cap_adata.overwrite(["obs"])
|
814
|
+
|
815
|
+
with read_h5ad(file_path=file_path) as cap_adata:
|
816
|
+
cap_adata.read_obs()
|
817
|
+
assert list(cap_adata.obs.column_order) == new_column_order
|
818
|
+
assert list(cap_adata.obs.columns) == new_column_order
|
@@ -1,69 +0,0 @@
|
|
1
|
-
import pandas as pd
|
2
|
-
import numpy as np
|
3
|
-
from typing import List, Any, Union
|
4
|
-
import logging
|
5
|
-
|
6
|
-
from pandas._typing import Self
|
7
|
-
from pandas.core.generic import bool_t
|
8
|
-
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
|
11
|
-
|
12
|
-
class CapAnnDataDF(pd.DataFrame):
|
13
|
-
"""
|
14
|
-
The class to expand the pandas DataFrame behaviour to support partial
|
15
|
-
reading and writing of AnnData obs and var (raw.var) fields.
|
16
|
-
The main feature of the class is handling <column-order> attribute
|
17
|
-
which must be a copy of h5py.Group attribute
|
18
|
-
"""
|
19
|
-
|
20
|
-
_metadata = ["column_order"]
|
21
|
-
|
22
|
-
def rename_column(self, old_name: str, new_name: str) -> None:
|
23
|
-
i = np.where(self.column_order == old_name)[0]
|
24
|
-
self.column_order[i] = new_name
|
25
|
-
self.rename(columns={old_name: new_name}, inplace=True)
|
26
|
-
|
27
|
-
def remove_column(self, col_name: str) -> None:
|
28
|
-
i = np.where(self.column_order == col_name)[0]
|
29
|
-
self.column_order = np.delete(self.column_order, i)
|
30
|
-
self.drop(columns=[col_name], inplace=True)
|
31
|
-
|
32
|
-
def __setitem__(self, key, value) -> None:
|
33
|
-
if key not in self.column_order:
|
34
|
-
self.column_order = np.append(self.column_order, key)
|
35
|
-
return super().__setitem__(key, value)
|
36
|
-
|
37
|
-
@classmethod
|
38
|
-
def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
|
39
|
-
if column_order is None:
|
40
|
-
column_order = df.columns.to_numpy()
|
41
|
-
|
42
|
-
new_inst = cls(df)
|
43
|
-
new_inst.column_order = column_order
|
44
|
-
return new_inst
|
45
|
-
|
46
|
-
def join(self, other: Any, **kwargs) -> Self:
|
47
|
-
result = super().join(other=other, **kwargs)
|
48
|
-
if isinstance(other, CapAnnDataDF):
|
49
|
-
new_columns = [
|
50
|
-
col for col in other.column_order if col not in self.column_order
|
51
|
-
]
|
52
|
-
else:
|
53
|
-
new_columns = [col for col in other.columns if col not in self.column_order]
|
54
|
-
column_order = np.append(self.column_order, new_columns)
|
55
|
-
return self.from_df(result, column_order=column_order)
|
56
|
-
|
57
|
-
def merge(self, right, **kwargs) -> Self:
|
58
|
-
result = super().merge(right=right, **kwargs)
|
59
|
-
if isinstance(right, CapAnnDataDF):
|
60
|
-
new_columns = [
|
61
|
-
col for col in right.column_order if col not in self.column_order
|
62
|
-
]
|
63
|
-
else:
|
64
|
-
new_columns = [col for col in right.columns if col not in self.column_order]
|
65
|
-
column_order = np.append(self.column_order, new_columns)
|
66
|
-
return self.from_df(result, column_order=column_order)
|
67
|
-
|
68
|
-
def copy(self, deep: Union[bool_t, None] = True) -> Self:
|
69
|
-
return self.from_df(super().copy(deep=deep), column_order=self.column_order)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|