cap-anndata 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cap_anndata/backed_df.py +32 -20
- cap_anndata/cap_anndata.py +64 -39
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/METADATA +18 -7
- cap_anndata-0.4.0.dist-info/RECORD +10 -0
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/WHEEL +1 -1
- cap_anndata-0.3.1.dist-info/RECORD +0 -10
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/LICENSE +0 -0
- {cap_anndata-0.3.1.dist-info → cap_anndata-0.4.0.dist-info}/top_level.txt +0 -0
cap_anndata/backed_df.py
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
import pandas as pd
|
2
2
|
import numpy as np
|
3
3
|
from typing import List, Any, Union
|
4
|
-
import logging
|
5
4
|
|
6
5
|
from pandas._typing import Self
|
7
6
|
from pandas.core.generic import bool_t
|
8
7
|
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
|
11
8
|
|
12
9
|
class CapAnnDataDF(pd.DataFrame):
|
13
10
|
"""
|
@@ -19,26 +16,37 @@ class CapAnnDataDF(pd.DataFrame):
|
|
19
16
|
|
20
17
|
_metadata = ["column_order"]
|
21
18
|
|
19
|
+
def column_order_array(self) -> np.array:
|
20
|
+
order = self.column_order
|
21
|
+
if order is not None and isinstance(order, List):
|
22
|
+
# Convert it to numpy array of str elements
|
23
|
+
return np.array(order, dtype=object)
|
24
|
+
else:
|
25
|
+
return order
|
26
|
+
|
22
27
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
23
|
-
i = np.where(self.
|
24
|
-
|
28
|
+
i = np.where(self.column_order_array() == old_name)[0]
|
29
|
+
tmp_array = self.column_order_array().copy()
|
30
|
+
tmp_array[i] = new_name
|
31
|
+
self.column_order = tmp_array.copy()
|
25
32
|
self.rename(columns={old_name: new_name}, inplace=True)
|
26
33
|
|
27
34
|
def remove_column(self, col_name: str) -> None:
|
28
|
-
i = np.where(self.
|
29
|
-
self.column_order = np.delete(self.
|
35
|
+
i = np.where(self.column_order_array() == col_name)[0]
|
36
|
+
self.column_order = np.delete(self.column_order_array(), i)
|
30
37
|
self.drop(columns=[col_name], inplace=True)
|
31
38
|
|
32
39
|
def __setitem__(self, key, value) -> None:
|
33
|
-
if key not in self.
|
34
|
-
self.column_order = np.append(self.
|
40
|
+
if key not in self.column_order_array():
|
41
|
+
self.column_order = np.append(self.column_order_array(), key)
|
35
42
|
return super().__setitem__(key, value)
|
36
43
|
|
37
44
|
@classmethod
|
38
|
-
def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
|
45
|
+
def from_df(cls, df: pd.DataFrame, column_order: Union[np.array, List[str], None] = None) -> Self:
|
39
46
|
if column_order is None:
|
40
47
|
column_order = df.columns.to_numpy()
|
41
|
-
|
48
|
+
elif isinstance(column_order, List):
|
49
|
+
column_order = np.array(column_order)
|
42
50
|
new_inst = cls(df)
|
43
51
|
new_inst.column_order = column_order
|
44
52
|
return new_inst
|
@@ -47,23 +55,27 @@ class CapAnnDataDF(pd.DataFrame):
|
|
47
55
|
result = super().join(other=other, **kwargs)
|
48
56
|
if isinstance(other, CapAnnDataDF):
|
49
57
|
new_columns = [
|
50
|
-
col for col in other.
|
58
|
+
col for col in other.column_order_array() if col not in self.column_order_array()
|
51
59
|
]
|
52
60
|
else:
|
53
|
-
new_columns = [col for col in other.columns if col not in self.
|
54
|
-
column_order = np.append(self.
|
55
|
-
|
61
|
+
new_columns = [col for col in other.columns if col not in self.column_order_array()]
|
62
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
63
|
+
df = self.from_df(result, column_order=column_order)
|
64
|
+
return df
|
56
65
|
|
57
66
|
def merge(self, right, **kwargs) -> Self:
|
58
67
|
result = super().merge(right=right, **kwargs)
|
59
68
|
if isinstance(right, CapAnnDataDF):
|
60
69
|
new_columns = [
|
61
|
-
col for col in right.
|
70
|
+
col for col in right.column_order_array() if col not in self.column_order_array()
|
62
71
|
]
|
63
72
|
else:
|
64
|
-
new_columns = [col for col in right.columns if col not in self.
|
65
|
-
column_order = np.append(self.
|
66
|
-
|
73
|
+
new_columns = [col for col in right.columns if col not in self.column_order_array()]
|
74
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
75
|
+
df = self.from_df(result, column_order=column_order)
|
76
|
+
return df
|
67
77
|
|
68
78
|
def copy(self, deep: Union[bool_t, None] = True) -> Self:
|
69
|
-
|
79
|
+
column_order = self.column_order_array()
|
80
|
+
df = self.from_df(super().copy(deep=deep), column_order=column_order)
|
81
|
+
return df
|
cap_anndata/cap_anndata.py
CHANGED
@@ -7,19 +7,33 @@ import scipy.sparse as ss
|
|
7
7
|
from packaging import version
|
8
8
|
|
9
9
|
if version.parse(ad.__version__) < version.parse("0.11.0"):
|
10
|
-
from anndata.experimental import
|
10
|
+
from anndata.experimental import (
|
11
|
+
sparse_dataset,
|
12
|
+
read_elem,
|
13
|
+
write_elem,
|
14
|
+
CSRDataset,
|
15
|
+
CSCDataset,
|
16
|
+
)
|
11
17
|
else:
|
12
|
-
from anndata.io import
|
18
|
+
from anndata.io import (
|
19
|
+
sparse_dataset,
|
20
|
+
read_elem,
|
21
|
+
write_elem,
|
22
|
+
)
|
23
|
+
from anndata.abc import (
|
24
|
+
CSRDataset,
|
25
|
+
CSCDataset,
|
26
|
+
)
|
13
27
|
|
14
28
|
from cap_anndata import CapAnnDataDF, CapAnnDataDict
|
15
29
|
|
16
30
|
logger = logging.getLogger(__name__)
|
17
31
|
|
18
32
|
X_NOTATION = Union[
|
19
|
-
h5py.Dataset,
|
33
|
+
h5py.Dataset, CSRDataset, CSCDataset, None
|
20
34
|
]
|
21
35
|
ARRAY_MAPPING_NOTATION = CapAnnDataDict[str, X_NOTATION]
|
22
|
-
|
36
|
+
FIELDS_SUPPORTED_TO_OVERWRITE = ["obs", "var", "raw.var", "uns", "layers", "obsm", "varm", "obsp", "varp"]
|
23
37
|
NotLinkedObject: Final = "__NotLinkedObject"
|
24
38
|
|
25
39
|
|
@@ -57,15 +71,7 @@ class BaseLayerMatrixAndDf:
|
|
57
71
|
return shape
|
58
72
|
|
59
73
|
def _lazy_df_load(self, key: str) -> CapAnnDataDF:
|
60
|
-
|
61
|
-
attribute = self._path_to_content + key
|
62
|
-
column_order = self._read_attr(self._file[attribute], "column-order")
|
63
|
-
df.column_order = column_order
|
64
|
-
if df.column_order.dtype != object:
|
65
|
-
# empty DataFrame will have column_order as float64
|
66
|
-
# which leads to failure in overwrite method
|
67
|
-
df.column_order = df.column_order.astype(object)
|
68
|
-
return df
|
74
|
+
return self._read_df(key=key, columns=[])
|
69
75
|
|
70
76
|
@staticmethod
|
71
77
|
def _read_attr(obj: Union[h5py.Group, h5py.Dataset], attr_name: str) -> any:
|
@@ -93,8 +99,10 @@ class BaseLayerMatrixAndDf:
|
|
93
99
|
cols_to_read = [c for c in columns if c in column_order]
|
94
100
|
df = CapAnnDataDF()
|
95
101
|
df.column_order = column_order
|
102
|
+
|
96
103
|
index_col = self._read_attr(h5_group, "_index")
|
97
|
-
|
104
|
+
index = read_elem(h5_group[index_col])
|
105
|
+
df.index = index
|
98
106
|
|
99
107
|
for col in cols_to_read:
|
100
108
|
df[col] = read_elem(h5_group[col])
|
@@ -366,37 +374,43 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
366
374
|
return list(self.obsm.keys())
|
367
375
|
|
368
376
|
def obs_keys(self) -> List[str]:
|
369
|
-
return self.obs.
|
377
|
+
return self.obs.column_order_array().tolist()
|
370
378
|
|
371
379
|
def var_keys(self) -> List[str]:
|
372
|
-
return self.var.
|
380
|
+
return self.var.column_order_array().tolist()
|
381
|
+
|
382
|
+
def field_to_entity(self, key):
|
383
|
+
if key == "obs":
|
384
|
+
return self.obs
|
385
|
+
elif key == "var":
|
386
|
+
return self.var
|
387
|
+
elif key == "raw.var":
|
388
|
+
return self.raw.var if self.raw is not None else None
|
389
|
+
elif key == "uns":
|
390
|
+
return self.uns
|
391
|
+
elif key == "layers":
|
392
|
+
return self.layers
|
393
|
+
elif key == "obsm":
|
394
|
+
return self.obsm
|
395
|
+
elif key == "varm":
|
396
|
+
return self.varm
|
397
|
+
elif key == "obsp":
|
398
|
+
return self.obsp
|
399
|
+
elif key == "varp":
|
400
|
+
return self.varp
|
401
|
+
else:
|
402
|
+
raise KeyError(
|
403
|
+
f"The field {key} is not supported! The list of supported fields are equal to {FIELDS_SUPPORTED_TO_OVERWRITE} "
|
404
|
+
f"attributes of the CapAnnData class."
|
405
|
+
)
|
373
406
|
|
374
407
|
def overwrite(self, fields: List[str] = None, compression: str = "lzf") -> None:
|
375
|
-
field_to_entity = {
|
376
|
-
"obs": self.obs,
|
377
|
-
"var": self.var,
|
378
|
-
"raw.var": self.raw.var if self.raw is not None else None,
|
379
|
-
"uns": self.uns,
|
380
|
-
"layers": self.layers,
|
381
|
-
"obsm": self.obsm,
|
382
|
-
"varm": self.varm,
|
383
|
-
"obsp": self.obsp,
|
384
|
-
"varp": self.varp,
|
385
|
-
}
|
386
|
-
|
387
408
|
if fields is None:
|
388
|
-
fields =
|
389
|
-
else:
|
390
|
-
for f in fields:
|
391
|
-
if f not in field_to_entity.keys():
|
392
|
-
raise KeyError(
|
393
|
-
f"The field {f} is not supported! The list of supported fields are equal to supported "
|
394
|
-
f"attributes of the CapAnnData class: obs, var, raw.var and uns."
|
395
|
-
)
|
409
|
+
fields = FIELDS_SUPPORTED_TO_OVERWRITE
|
396
410
|
|
397
411
|
for key in ["obs", "var", "raw.var"]:
|
398
412
|
if key in fields:
|
399
|
-
entity: CapAnnDataDF = field_to_entity
|
413
|
+
entity: CapAnnDataDF = self.field_to_entity(key)
|
400
414
|
if entity is None:
|
401
415
|
continue
|
402
416
|
|
@@ -407,11 +421,22 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
407
421
|
f"{key}/{col}", entity[col].values, compression=compression
|
408
422
|
)
|
409
423
|
|
410
|
-
column_order = entity.
|
424
|
+
column_order = entity.column_order_array()
|
411
425
|
if (
|
412
426
|
column_order.size == 0
|
413
427
|
): # Refs https://github.com/cellannotation/cap-anndata/issues/6
|
414
428
|
column_order = np.array([], dtype=np.float64)
|
429
|
+
|
430
|
+
# Index update
|
431
|
+
index_name = entity.index.name
|
432
|
+
if not index_name:
|
433
|
+
index_name = "_index"
|
434
|
+
self._file[key].attrs["_index"] = index_name
|
435
|
+
index_col = self._read_attr(self._file[key], "_index")
|
436
|
+
self._write_elem(
|
437
|
+
f"{key}/{index_col}", entity.index.to_numpy(), compression=compression
|
438
|
+
)
|
439
|
+
|
415
440
|
self._file[key].attrs["column-order"] = column_order
|
416
441
|
|
417
442
|
if "uns" in fields:
|
@@ -424,7 +449,7 @@ class CapAnnData(BaseLayerMatrixAndDf):
|
|
424
449
|
|
425
450
|
for field in ["layers", "obsm", "varm", "obsp", "varp"]:
|
426
451
|
if field in fields:
|
427
|
-
for key in field_to_entity
|
452
|
+
for key in self.field_to_entity(field).keys_to_remove:
|
428
453
|
del self._file[f"{field}/{key}"]
|
429
454
|
|
430
455
|
def create_layer(
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: cap_anndata
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
|
5
5
|
Home-page: https://github.com/cellannotation/cap-anndata
|
6
6
|
Author: R. Mukhin, A. Isaev
|
@@ -14,12 +14,23 @@ Classifier: Operating System :: OS Independent
|
|
14
14
|
Requires-Python: >=3.9
|
15
15
|
Description-Content-Type: text/markdown
|
16
16
|
License-File: LICENSE
|
17
|
-
Requires-Dist: numpy
|
18
|
-
Requires-Dist: pandas
|
19
|
-
Requires-Dist: anndata
|
17
|
+
Requires-Dist: numpy>=1.23.5
|
18
|
+
Requires-Dist: pandas>=2.2.0
|
19
|
+
Requires-Dist: anndata>=0.10.0
|
20
20
|
Provides-Extra: dev
|
21
|
-
Requires-Dist: pytest
|
22
|
-
Requires-Dist: setuptools
|
21
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
22
|
+
Requires-Dist: setuptools~=69.1.1; extra == "dev"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: classifier
|
26
|
+
Dynamic: description
|
27
|
+
Dynamic: description-content-type
|
28
|
+
Dynamic: home-page
|
29
|
+
Dynamic: project-url
|
30
|
+
Dynamic: provides-extra
|
31
|
+
Dynamic: requires-dist
|
32
|
+
Dynamic: requires-python
|
33
|
+
Dynamic: summary
|
23
34
|
|
24
35
|
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
25
36
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
|
2
|
+
cap_anndata/backed_df.py,sha256=2OVomvTY51V05sYwEXg-4JYBgd9iJCA2-Lt7nEAL1Ug,3255
|
3
|
+
cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
|
4
|
+
cap_anndata/cap_anndata.py,sha256=-Lp6wxPncVcl_TaECnE6uHTfD9j_Ow_rScvpAWKK_fs,21081
|
5
|
+
cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
|
6
|
+
cap_anndata-0.4.0.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
|
7
|
+
cap_anndata-0.4.0.dist-info/METADATA,sha256=IXvItMAdXH-CunN3fNlyHPNFmxfoF9dOrU58tl17eLQ,2539
|
8
|
+
cap_anndata-0.4.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
9
|
+
cap_anndata-0.4.0.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
|
10
|
+
cap_anndata-0.4.0.dist-info/RECORD,,
|
@@ -1,10 +0,0 @@
|
|
1
|
-
cap_anndata/__init__.py,sha256=WRAQEDsWTvLbJWVUA5FmKCVrD2GN4oRd5I3c8jc9ajo,197
|
2
|
-
cap_anndata/backed_df.py,sha256=bMNsArbPjA-TN7eQB4-9Y2l3s8o03-dM4hPnOR9tROc,2622
|
3
|
-
cap_anndata/backed_dict.py,sha256=Hb1SjnKuQ13mBUitQ5sL3kmcQ1j3GgB19r3yXkC0oIo,1019
|
4
|
-
cap_anndata/cap_anndata.py,sha256=uQh49Kwu2cE4-ebgOvb78mMGA_afkZcsr71j6f8EX2I,20600
|
5
|
-
cap_anndata/reader.py,sha256=UpZBCjaS4-K2w_9m6IuYetO9LwmEEJ5KvAw9aAoMRno,1609
|
6
|
-
cap_anndata-0.3.1.dist-info/LICENSE,sha256=XXTH6JikkxH7Gqy9VEj4crSizuwxzv04ROzkQ-ZS6o4,1532
|
7
|
-
cap_anndata-0.3.1.dist-info/METADATA,sha256=688YuF45IuOvu1Hqxbt_O1aeYkoMX4tjV0b2hb1WY8I,2304
|
8
|
-
cap_anndata-0.3.1.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
9
|
-
cap_anndata-0.3.1.dist-info/top_level.txt,sha256=GKi_Uk4LUhXwWBfFCTIyJvEoJqFREt_4uH4CWgeLsg4,12
|
10
|
-
cap_anndata-0.3.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|