cap-anndata 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- cap_anndata/backed_df.py +32 -20
- cap_anndata/cap_anndata.py +64 -39
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/METADATA +18 -7
- cap_anndata-0.4.0.dist-info/RECORD +10 -0
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/WHEEL +1 -1
- cap_anndata-0.3.1.dist-info/RECORD +0 -10
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/LICENSE +0 -0
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/top_level.txt +0 -0
cap_anndata/backed_df.py
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
import pandas as pd
|
2
2
|
import numpy as np
|
3
3
|
from typing import List, Any, Union
|
4
|
-
import logging
|
5
4
|
|
6
5
|
from pandas._typing import Self
|
7
6
|
from pandas.core.generic import bool_t
|
8
7
|
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
|
11
8
|
|
12
9
|
class CapAnnDataDF(pd.DataFrame):
|
13
10
|
"""
|
@@ -19,26 +16,37 @@ class CapAnnDataDF(pd.DataFrame):
|
|
19
16
|
|
20
17
|
_metadata = ["column_order"]
|
21
18
|
|
19
|
+
def column_order_array(self) -> np.array:
|
20
|
+
order = self.column_order
|
21
|
+
if order is not None and isinstance(order, List):
|
22
|
+
# Convert it to numpy array of str elements
|
23
|
+
return np.array(order, dtype=object)
|
24
|
+
else:
|
25
|
+
return order
|
26
|
+
|
22
27
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
23
|
-
i = np.where(self.
|
24
|
-
|
28
|
+
i = np.where(self.column_order_array() == old_name)[0]
|
29
|
+
tmp_array = self.column_order_array().copy()
|
30
|
+
tmp_array[i] = new_name
|
31
|
+
self.column_order = tmp_array.copy()
|
25
32
|
self.rename(columns={old_name: new_name}, inplace=True)
|
26
33
|
|
27
34
|
def remove_column(self, col_name: str) -> None:
|
28
|
-
i = np.where(self.
|
29
|
-
self.column_order = np.delete(self.
|
35
|
+
i = np.where(self.column_order_array() == col_name)[0]
|
36
|
+
self.column_order = np.delete(self.column_order_array(), i)
|
30
37
|
self.drop(columns=[col_name], inplace=True)
|
31
38
|
|
32
39
|
def __setitem__(self, key, value) -> None:
|
33
|
-
if key not in self.
|
34
|
-
self.column_order = np.append(self.
|
40
|
+
if key not in self.column_order_array():
|
41
|
+
self.column_order = np.append(self.column_order_array(), key)
|
35
42
|
return super().__setitem__(key, value)
|
36
43
|
|
37
44
|
@classmethod
|
38
|
-
def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
|
45
|
+
def from_df(cls, df: pd.DataFrame, column_order: Union[np.array, List[str], None] = None) -> Self:
|
39
46
|
if column_order is None:
|
40
47
|
column_order = df.columns.to_numpy()
|
41
|
-
|
48
|
+
elif isinstance(column_order, List):
|
49
|
+
column_order = np.array(column_order)
|
42
50
|
new_inst = cls(df)
|
43
51
|
new_inst.column_order = column_order
|
44
52
|
return new_inst
|
@@ -47,23 +55,27 @@ class CapAnnDataDF(pd.DataFrame):
|
|
47
55
|
result = super().join(other=other, **kwargs)
|
48
56
|
if isinstance(other, CapAnnDataDF):
|
49
57
|
new_columns = [
|
50
|
-
col for col in other.
|
58
|
+
col for col in other.column_order_array() if col not in self.column_order_array()
|
51
59
|
]
|
52
60
|
else:
|
53
|
-
new_columns = [col for col in other.columns if col not in self.
|
54
|
-
column_order = np.append(self.
|
55
|
-
|
61
|
+
new_columns = [col for col in other.columns if col not in self.column_order_array()]
|
62
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
63
|
+
df = self.from_df(result, column_order=column_order)
|
64
|
+
return df
|
56
65
|
|
57
66
|
def merge(self, right, **kwargs) -> Self:
|
58
67
|
result = super().merge(right=right, **kwargs)
|
59
68
|
if isinstance(right, CapAnnDataDF):
|
60
69
|
new_columns = [
|
61
|
-
col for col in right.
|
70
|
+
col for col in right.column_order_array() if col not in self.column_order_array()
|
62
71
|
]
|
63
72
|
else:
|
64
|
-
new_columns = [col for col in right.columns if col not in self.
|
65
|
-
column_order = np.append(self.
|
66
|
-
|
73
|
+
new_columns = [col for col in right.columns if col not in self.column_order_array()]
|
74
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
75
|
+
df = self.from_df(result, column_order=column_order)
|
76
|
+
return df
|
67
77
|
|
68
78
|
def copy(self, deep: Union[bool_t, None] = True) -> Self:
|
69
|
-
|
79
|
+
column_order = self.column_order_array()
|
80
|
+
df = self.from_df(super().copy(deep=deep), column_order=column_order)
|
81
|
+
return df
|
cap_anndata/cap_anndata.py
CHANGED
@@ -7,19 +7,33 @@ import scipy.sparse as ss
|
|
7
7
|
from packaging import version
|
8
8
|
|
9
9
|
if version.parse(ad.__version__) < version.parse("0.11.0"):
|
10
|
-
from anndata.experimental import
|
10
|
+
from anndata.experimental import (
|
11
|
+
sparse_dataset,
|
12
|
+
read_elem,
|
13
|
+
write_elem,
|
14
|
+
CSRDataset,
|
15
|
+
CSCDataset,
|
16
|
+
)
|
11
17
|
else:
|
12
|
-
from anndata.io import
|
18
|
+
from anndata.io import (
|
19
|
+
sparse_dataset,
|
20
|
+
read_elem,
|
21
|
+
write_elem,
|
22
|
+
)
|
23
|
+
from anndata.abc import (
|
24
|
+
CSRDataset,
|
25
|
+
CSCDataset,
|
26
|
+
)
|
13
27
|
|
14
28
|
from cap_anndata import CapAnnDataDF, CapAnnDataDict
|
15
29
|
|
16
30
|
logger = logging.getLogger(__name__)
|
17
31
|
|
18
32
|
X_NOTATION = Union[
|
19
|
-
h5py.Dataset,
|
33
|
+
h5py.Dataset, CSRDataset, CSCDataset, None
|
20
34
|
]
|
21
35
|
ARRAY_MAPPING_NOTATION = CapAnnDataDict[str, X_NOTATION]
|
22
|
-
|
36
|
+
FIELDS_SUPPORTED_TO_OVERWRITE = ["obs", "var", "raw.var", "uns", "layers", "obsm", "varm", "obsp", "varp"]
|
23
37
|
NotLinkedObject: Final = "__NotLinkedObject"
|
24
38
|
|
25
39
|
|
@@ -57,15 +71,7 @@ class BaseLayerMatrixAndDf:
|
|
57
71
|
return shape
|
58
72
|
|
59
73
|
def _lazy_df_load(self, key: str) -> CapAnnDataDF:
|
60
|
-
|
61
|
-
attribute = self._path_to_content + key
|
62
|
-
column_order = self._read_attr(self._file[attribute], "column-order")
|
63
|
-
df.column_order = column_order
|
64
|
-
if df.column_order.dtype != object:
|
65
|
-
# empty DataFrame will have column_order as float64
|
66
|
-
# which leads to failure in overwrite method
|
67
|
-
df.column_order = df.column_order.astype(object)
|
68
|
-
return df
|
74
|
+
return self._read_df(key=key, columns=[])
|
69
75
|
|
70
76
|
@staticmethod
|
71
77
|
def _read_attr(obj: Union[h5py.Group, h5py.Dataset], attr_name: str) -> any:
|
@@ -93,8 +99,10 @@ class BaseLayerMatrixAndDf:
|
|
93
99
|
cols_to_read = [c for c in columns if c in column_order]
|
94
100
|
df = CapAnnDataDF()
|
95
101
|
df.column_order = column_order
|
102
|
+
|
96
103
|
index_col = self._read_attr(h5_group, "_index")
|
97
|
-
|
104
|
+
index = read_elem(h5_group[index_col])
|
105
|
+
df.index = index
|
98
106
|
|
99
107
|
for col in cols_to_read:
|
100
108
|
df[col] = read_elem(h5_group[col])
|
@@ -366,37 +374,43 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
366
374
|
return list(self.obsm.keys())
|
367
375
|
|
368
376
|
def obs_keys(self) -> List[str]:
|
369
|
-
return self.obs.
|
377
|
+
return self.obs.column_order_array().tolist()
|
370
378
|
|
371
379
|
def var_keys(self) -> List[str]:
|
372
|
-
return self.var.
|
380
|
+
return self.var.column_order_array().tolist()
|
381
|
+
|
382
|
+
def field_to_entity(self, key):
|
383
|
+
if key == "obs":
|
384
|
+
return self.obs
|
385
|
+
elif key == "var":
|
386
|
+
return self.var
|
387
|
+
elif key == "raw.var":
|
388
|
+
return self.raw.var if self.raw is not None else None
|
389
|
+
elif key == "uns":
|
390
|
+
return self.uns
|
391
|
+
elif key == "layers":
|
392
|
+
return self.layers
|
393
|
+
elif key == "obsm":
|
394
|
+
return self.obsm
|
395
|
+
elif key == "varm":
|
396
|
+
return self.varm
|
397
|
+
elif key == "obsp":
|
398
|
+
return self.obsp
|
399
|
+
elif key == "varp":
|
400
|
+
return self.varp
|
401
|
+
else:
|
402
|
+
raise KeyError(
|
403
|
+
f"The field {key} is not supported! The list of supported fields are equal to {FIELDS_SUPPORTED_TO_OVERWRITE} "
|
404
|
+
f"attributes of the CapAnnData class."
|
405
|
+
)
|
373
406
|
|
374
407
|
def overwrite(self, fields: List[str] = None, compression: str = "lzf") -> None:
|
375
|
-
field_to_entity = {
|
376
|
-
"obs": self.obs,
|
377
|
-
"var": self.var,
|
378
|
-
"raw.var": self.raw.var if self.raw is not None else None,
|
379
|
-
"uns": self.uns,
|
380
|
-
"layers": self.layers,
|
381
|
-
"obsm": self.obsm,
|
382
|
-
"varm": self.varm,
|
383
|
-
"obsp": self.obsp,
|
384
|
-
"varp": self.varp,
|
385
|
-
}
|
386
|
-
|
387
408
|
if fields is None:
|
388
|
-
fields =
|
389
|
-
else:
|
390
|
-
for f in fields:
|
391
|
-
if f not in field_to_entity.keys():
|
392
|
-
raise KeyError(
|
393
|
-
f"The field {f} is not supported! The list of supported fields are equal to supported "
|
394
|
-
f"attributes of the CapAnnData class: obs, var, raw.var and uns."
|
395
|
-
)
|
409
|
+
fields = FIELDS_SUPPORTED_TO_OVERWRITE
|
396
410
|
|
397
411
|
for key in ["obs", "var", "raw.var"]:
|
398
412
|
if key in fields:
|
399
|
-
entity: CapAnnDataDF = field_to_entity
|
413
|
+
entity: CapAnnDataDF = self.field_to_entity(key)
|
400
414
|
if entity is None:
|
401
415
|
continue
|
402
416
|
|
@@ -407,11 +421,22 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
407
421
|
f"{key}/{col}", entity[col].values, compression=compression
|
408
422
|
)
|
409
423
|
|
410
|
-
column_order = entity.
|
424
|
+
column_order = entity.column_order_array()
|
411
425
|
if (
|
412
426
|
column_order.size == 0
|
413
427
|
): # Refs https://github.com/cellannotation/cap-anndata/issues/6
|
414
428
|
column_order = np.array([], dtype=np.float64)
|
429
|
+
|
430
|
+
# Index update
|
431
|
+
index_name = entity.index.name
|
432
|
+
if not index_name:
|
433
|
+
index_name = "_index"
|
434
|
+
self._file[key].attrs["_index"] = index_name
|
435
|
+
index_col = self._read_attr(self._file[key], "_index")
|
436
|
+
self._write_elem(
|
437
|
+
f"{key}/{index_col}", entity.index.to_numpy(), compression=compression
|
438
|
+
)
|
439
|
+
|
415
440
|
self._file[key].attrs["column-order"] = column_order
|
416
441
|
|
417
442
|
if "uns" in fields:
|
@@ -424,7 +449,7 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
424
449
|
|
425
450
|
for field in ["layers", "obsm", "varm", "obsp", "varp"]:
|
426
451
|
if field in fields:
|
427
|
-
for key in field_to_entity
|
452
|
+
for key in self.field_to_entity(field).keys_to_remove:
|
428
453
|
del self._file[f"{field}/{key}"]
|
429
454
|
|
430
455
|
def create_layer(
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: cap_anndata
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
|
5
5
|
Home-page: https://github.com/cellannotation/cap-anndata
|
6
6
|
Author: R. Mukhin, A. Isaev
|
@@ -14,12 +14,23 @@ Classifier: Operating System :: OS Independent
|
|
14
14
|
Requires-Python: >=3.9
|
15
15
|
Description-Content-Type: text/markdown
|
16
16
|
License-File: LICENSE
|
17
|
-
Requires-Dist: numpy
|
18
|
-
Requires-Dist: pandas
|
19
|
-
Requires-Dist: anndata
|
17
|
+
Requires-Dist: numpy>=1.23.5
|
18
|
+
Requires-Dist: pandas>=2.2.0
|
19
|
+
Requires-Dist: anndata>=0.10.0
|
20
20
|
Provides-Extra: dev
|
21
|
-
Requires-Dist: pytest
|
22
|
-
Requires-Dist: setuptools
|
21
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
22
|
+
Requires-Dist: setuptools~=69.1.1; extra == "dev"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: classifier
|
26
|
+
Dynamic: description
|
27
|
+
Dynamic: description-content-type
|
28
|
+
Dynamic: home-page
|
29
|
+
Dynamic: project-url
|
30
|
+
Dynamic: provides-extra
|
31
|
+
Dynamic: requires-dist
|
32
|
+
Dynamic: requires-python
|
33
|
+
Dynamic: summary
|
23
34
|
|
24
35
|
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
25
36
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
|
2
|
+
cap_anndata/backed_df.py,sha256=2OVomvTY51V05sYwEXg-4JYBgd9iJCA2-Lt7nEAL1Ug,3255
|
3
|
+
cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
|
4
|
+
cap_anndata/cap_anndata.py,sha256=-Lp6wxPncVcl_TaECnE6uHTfD9j_Ow_rScvpAWKK_fs,21081
|
5
|
+
cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
|
6
|
+
cap_anndata-0.4.0.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
|
7
|
+
cap_anndata-0.4.0.dist-info/METADATA,sha256=IXvItMAdXH-CunN3fNlyHPNFmxfoF9dOrU58tl17eLQ,2539
|
8
|
+
cap_anndata-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
9
|
+
cap_anndata-0.4.0.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
|
10
|
+
cap_anndata-0.4.0.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
|
2
|
-
cap_anndata/backed_df.py,sha256=bMNsArbPjA-TN7eQB4-9Y2l3s8o03-dM4hPnOR9tROc,2622
|
3
|
-
cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
|
4
|
-
cap_anndata/cap_anndata.py,sha256=uQh49Kwu2cE4-ebgOvb78mMGA_afkZcsr71j6f8EX2I,20600
|
5
|
-
cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
|
6
|
-
cap_anndata-0.3.1.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
|
7
|
-
cap_anndata-0.3.1.dist-info/METADATA,sha256=688YuF45IuOvu1Hqxbt_O1aeYkoMX4tjV0b2hb1WY8I,2304
|
8
|
-
cap_anndata-0.3.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
9
|
-
cap_anndata-0.3.1.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
|
10
|
-
cap_anndata-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|