cap-anndata 0.2.2__tar.gz → 0.3.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/LICENSE +28 -28
- cap_anndata-0.3.1/PKG-INFO +56 -0
- cap_anndata-0.3.1/README.md +33 -0
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata/__init__.py +10 -10
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata/backed_df.py +69 -69
- cap_anndata-0.3.1/cap_anndata/backed_dict.py +34 -0
- cap_anndata-0.3.1/cap_anndata/cap_anndata.py +600 -0
- cap_anndata-0.3.1/cap_anndata/reader.py +57 -0
- cap_anndata-0.3.1/cap_anndata.egg-info/PKG-INFO +56 -0
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata.egg-info/SOURCES.txt +2 -2
- cap_anndata-0.3.1/cap_anndata.egg-info/requires.txt +7 -0
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/setup.cfg +4 -4
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/setup.py +33 -32
- cap_anndata-0.3.1/test/test_backed_df.py +81 -0
- cap_anndata-0.3.1/test/test_backed_dict.py +36 -0
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/test/test_cap_anndata.py +691 -433
- cap_anndata-0.3.1/test/test_reader.py +63 -0
- cap_anndata-0.2.2/PKG-INFO +0 -253
- cap_anndata-0.2.2/README.md +0 -231
- cap_anndata-0.2.2/cap_anndata/backed_uns.py +0 -28
- cap_anndata-0.2.2/cap_anndata/cap_anndata.py +0 -287
- cap_anndata-0.2.2/cap_anndata/reader.py +0 -44
- cap_anndata-0.2.2/cap_anndata.egg-info/PKG-INFO +0 -253
- cap_anndata-0.2.2/cap_anndata.egg-info/requires.txt +0 -8
- cap_anndata-0.2.2/test/test_backed_df.py +0 -81
- cap_anndata-0.2.2/test/test_backed_uns.py +0 -36
- cap_anndata-0.2.2/test/test_reader.py +0 -22
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata.egg-info/dependency_links.txt +0 -0
- {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata.egg-info/top_level.txt +0 -0
@@ -1,28 +1,28 @@
|
|
1
|
-
BSD 3-Clause License
|
2
|
-
|
3
|
-
Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
|
4
|
-
|
5
|
-
Redistribution and use in source and binary forms, with or without
|
6
|
-
modification, are permitted provided that the following conditions are met:
|
7
|
-
|
8
|
-
1. Redistributions of source code must retain the above copyright notice, this
|
9
|
-
list of conditions and the following disclaimer.
|
10
|
-
|
11
|
-
2. Redistributions in binary form must reproduce the above copyright notice,
|
12
|
-
this list of conditions and the following disclaimer in the documentation
|
13
|
-
and/or other materials provided with the distribution.
|
14
|
-
|
15
|
-
3. Neither the name of the copyright holder nor the names of its
|
16
|
-
contributors may be used to endorse or promote products derived from
|
17
|
-
this software without specific prior written permission.
|
18
|
-
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
-
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
-
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
-
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23
|
-
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
-
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25
|
-
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26
|
-
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27
|
-
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
1
|
+
BSD 3-Clause License
|
2
|
+
|
3
|
+
Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
7
|
+
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
9
|
+
list of conditions and the following disclaimer.
|
10
|
+
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
13
|
+
and/or other materials provided with the distribution.
|
14
|
+
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
16
|
+
contributors may be used to endorse or promote products derived from
|
17
|
+
this software without specific prior written permission.
|
18
|
+
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@@ -0,0 +1,56 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: cap_anndata
|
3
|
+
Version: 0.3.1
|
4
|
+
Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
|
5
|
+
Home-page: https://github.com/cellannotation/cap-anndata
|
6
|
+
Author: R. Mukhin, A. Isaev
|
7
|
+
Author-email: roman@ebookapplications.com
|
8
|
+
Project-URL: Bug Tracker, https://github.com/cellannotation/cap-anndata/issues
|
9
|
+
Project-URL: Changelog, https://github.com/cellannotation/cap-anndata/blob/main/CHANGELOG.md
|
10
|
+
Project-URL: Documentation, https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
12
|
+
Classifier: License :: OSI Approved :: BSD License
|
13
|
+
Classifier: Operating System :: OS Independent
|
14
|
+
Requires-Python: >=3.9
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
License-File: LICENSE
|
17
|
+
Requires-Dist: numpy>=1.23.5
|
18
|
+
Requires-Dist: pandas>=2.2.0
|
19
|
+
Requires-Dist: anndata>=0.10.0
|
20
|
+
Provides-Extra: dev
|
21
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
22
|
+
Requires-Dist: setuptools~=69.1.1; extra == "dev"
|
23
|
+
|
24
|
+
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
25
|
+
|
26
|
+
## Overview
|
27
|
+
CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
|
28
|
+
file fields without the need for loading entire dataset (or even entire field) into memory.
|
29
|
+
For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
|
30
|
+
Package eager to replicate the original AnnData API as much as possible,
|
31
|
+
while providing additional features for efficient data manipulation for heavy datasets.
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
Install CAP-AnnData via pip:
|
35
|
+
|
36
|
+
```commandline
|
37
|
+
pip install -U cap-anndata
|
38
|
+
```
|
39
|
+
|
40
|
+
## Basic Example
|
41
|
+
|
42
|
+
The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
|
43
|
+
```python
|
44
|
+
from cap_anndata import read_h5ad
|
45
|
+
|
46
|
+
file_path = "your_data.h5ad"
|
47
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
48
|
+
print(cap_adata.obs_keys()) # ['a', 'b', 'c']
|
49
|
+
print(cap_adata.obs) # Empty DataFrame
|
50
|
+
cap_adata.read_obs(columns=['a'])
|
51
|
+
print(cap_adata.obs.columns) # ['a']
|
52
|
+
cap_adata.obs['new_col'] = cap_adata.obs['a']
|
53
|
+
cap_adata.overwrite(fields=['obs'])
|
54
|
+
```
|
55
|
+
|
56
|
+
More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
|
5
|
+
file fields without the need for loading entire dataset (or even entire field) into memory.
|
6
|
+
For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
|
7
|
+
Package eager to replicate the original AnnData API as much as possible,
|
8
|
+
while providing additional features for efficient data manipulation for heavy datasets.
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
Install CAP-AnnData via pip:
|
12
|
+
|
13
|
+
```commandline
|
14
|
+
pip install -U cap-anndata
|
15
|
+
```
|
16
|
+
|
17
|
+
## Basic Example
|
18
|
+
|
19
|
+
The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
|
20
|
+
```python
|
21
|
+
from cap_anndata import read_h5ad
|
22
|
+
|
23
|
+
file_path = "your_data.h5ad"
|
24
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
25
|
+
print(cap_adata.obs_keys()) # ['a', 'b', 'c']
|
26
|
+
print(cap_adata.obs) # Empty DataFrame
|
27
|
+
cap_adata.read_obs(columns=['a'])
|
28
|
+
print(cap_adata.obs.columns) # ['a']
|
29
|
+
cap_adata.obs['new_col'] = cap_adata.obs['a']
|
30
|
+
cap_adata.overwrite(fields=['obs'])
|
31
|
+
```
|
32
|
+
|
33
|
+
More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
|
@@ -1,10 +1,10 @@
|
|
1
|
-
from .backed_df import CapAnnDataDF
|
2
|
-
from .
|
3
|
-
from .cap_anndata import CapAnnData
|
4
|
-
from .reader import (
|
5
|
-
read_directly,
|
6
|
-
read_h5ad,
|
7
|
-
)
|
8
|
-
|
9
|
-
|
10
|
-
__all__ = ["CapAnnData"]
|
1
|
+
from .backed_df import CapAnnDataDF
|
2
|
+
from .backed_dict import CapAnnDataDict
|
3
|
+
from .cap_anndata import CapAnnData
|
4
|
+
from .reader import (
|
5
|
+
read_directly,
|
6
|
+
read_h5ad,
|
7
|
+
)
|
8
|
+
|
9
|
+
|
10
|
+
__all__ = ["CapAnnData"]
|
@@ -1,69 +1,69 @@
|
|
1
|
-
import pandas as pd
|
2
|
-
import numpy as np
|
3
|
-
from typing import List, Any, Union
|
4
|
-
import logging
|
5
|
-
|
6
|
-
from pandas._typing import Self
|
7
|
-
from pandas.core.generic import bool_t
|
8
|
-
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
|
11
|
-
|
12
|
-
class CapAnnDataDF(pd.DataFrame):
|
13
|
-
"""
|
14
|
-
The class to expand the pandas DataFrame behaviour to support partial
|
15
|
-
reading and writing of AnnData obs and var (raw.var) fields.
|
16
|
-
The main feature of the class is handling <column-order> attribute
|
17
|
-
which must be a copy of h5py.Group attribute
|
18
|
-
"""
|
19
|
-
|
20
|
-
_metadata = ["column_order"]
|
21
|
-
|
22
|
-
def rename_column(self, old_name: str, new_name: str) -> None:
|
23
|
-
i = np.where(self.column_order == old_name)[0]
|
24
|
-
self.column_order[i] = new_name
|
25
|
-
self.rename(columns={old_name: new_name}, inplace=True)
|
26
|
-
|
27
|
-
def remove_column(self, col_name: str) -> None:
|
28
|
-
i = np.where(self.column_order == col_name)[0]
|
29
|
-
self.column_order = np.delete(self.column_order, i)
|
30
|
-
self.drop(columns=[col_name], inplace=True)
|
31
|
-
|
32
|
-
def __setitem__(self, key, value) -> None:
|
33
|
-
if key not in self.column_order:
|
34
|
-
self.column_order = np.append(self.column_order, key)
|
35
|
-
return super().__setitem__(key, value)
|
36
|
-
|
37
|
-
@classmethod
|
38
|
-
def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
|
39
|
-
if column_order is None:
|
40
|
-
column_order = df.columns.to_numpy()
|
41
|
-
|
42
|
-
new_inst = cls(df)
|
43
|
-
new_inst.column_order = column_order
|
44
|
-
return new_inst
|
45
|
-
|
46
|
-
def join(self, other: Any, **kwargs) -> Self:
|
47
|
-
result = super().join(other=other, **kwargs)
|
48
|
-
if isinstance(other, CapAnnDataDF):
|
49
|
-
new_columns = [
|
50
|
-
col for col in other.column_order if col not in self.column_order
|
51
|
-
]
|
52
|
-
else:
|
53
|
-
new_columns = [col for col in other.columns if col not in self.column_order]
|
54
|
-
column_order = np.append(self.column_order, new_columns)
|
55
|
-
return self.from_df(result, column_order=column_order)
|
56
|
-
|
57
|
-
def merge(self, right, **kwargs) -> Self:
|
58
|
-
result = super().merge(right=right, **kwargs)
|
59
|
-
if isinstance(right, CapAnnDataDF):
|
60
|
-
new_columns = [
|
61
|
-
col for col in right.column_order if col not in self.column_order
|
62
|
-
]
|
63
|
-
else:
|
64
|
-
new_columns = [col for col in right.columns if col not in self.column_order]
|
65
|
-
column_order = np.append(self.column_order, new_columns)
|
66
|
-
return self.from_df(result, column_order=column_order)
|
67
|
-
|
68
|
-
def copy(self, deep: Union[bool_t, None] = True) -> Self:
|
69
|
-
return self.from_df(super().copy(deep=deep), column_order=self.column_order)
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
from typing import List, Any, Union
|
4
|
+
import logging
|
5
|
+
|
6
|
+
from pandas._typing import Self
|
7
|
+
from pandas.core.generic import bool_t
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
class CapAnnDataDF(pd.DataFrame):
|
13
|
+
"""
|
14
|
+
The class to expand the pandas DataFrame behaviour to support partial
|
15
|
+
reading and writing of AnnData obs and var (raw.var) fields.
|
16
|
+
The main feature of the class is handling <column-order> attribute
|
17
|
+
which must be a copy of h5py.Group attribute
|
18
|
+
"""
|
19
|
+
|
20
|
+
_metadata = ["column_order"]
|
21
|
+
|
22
|
+
def rename_column(self, old_name: str, new_name: str) -> None:
|
23
|
+
i = np.where(self.column_order == old_name)[0]
|
24
|
+
self.column_order[i] = new_name
|
25
|
+
self.rename(columns={old_name: new_name}, inplace=True)
|
26
|
+
|
27
|
+
def remove_column(self, col_name: str) -> None:
|
28
|
+
i = np.where(self.column_order == col_name)[0]
|
29
|
+
self.column_order = np.delete(self.column_order, i)
|
30
|
+
self.drop(columns=[col_name], inplace=True)
|
31
|
+
|
32
|
+
def __setitem__(self, key, value) -> None:
|
33
|
+
if key not in self.column_order:
|
34
|
+
self.column_order = np.append(self.column_order, key)
|
35
|
+
return super().__setitem__(key, value)
|
36
|
+
|
37
|
+
@classmethod
|
38
|
+
def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
|
39
|
+
if column_order is None:
|
40
|
+
column_order = df.columns.to_numpy()
|
41
|
+
|
42
|
+
new_inst = cls(df)
|
43
|
+
new_inst.column_order = column_order
|
44
|
+
return new_inst
|
45
|
+
|
46
|
+
def join(self, other: Any, **kwargs) -> Self:
|
47
|
+
result = super().join(other=other, **kwargs)
|
48
|
+
if isinstance(other, CapAnnDataDF):
|
49
|
+
new_columns = [
|
50
|
+
col for col in other.column_order if col not in self.column_order
|
51
|
+
]
|
52
|
+
else:
|
53
|
+
new_columns = [col for col in other.columns if col not in self.column_order]
|
54
|
+
column_order = np.append(self.column_order, new_columns)
|
55
|
+
return self.from_df(result, column_order=column_order)
|
56
|
+
|
57
|
+
def merge(self, right, **kwargs) -> Self:
|
58
|
+
result = super().merge(right=right, **kwargs)
|
59
|
+
if isinstance(right, CapAnnDataDF):
|
60
|
+
new_columns = [
|
61
|
+
col for col in right.column_order if col not in self.column_order
|
62
|
+
]
|
63
|
+
else:
|
64
|
+
new_columns = [col for col in right.columns if col not in self.column_order]
|
65
|
+
column_order = np.append(self.column_order, new_columns)
|
66
|
+
return self.from_df(result, column_order=column_order)
|
67
|
+
|
68
|
+
def copy(self, deep: Union[bool_t, None] = True) -> Self:
|
69
|
+
return self.from_df(super().copy(deep=deep), column_order=self.column_order)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from typing import Set, Any
|
2
|
+
|
3
|
+
|
4
|
+
class CapAnnDataDict(dict):
|
5
|
+
__keys_to_remove: Set[str] = None
|
6
|
+
|
7
|
+
def __delitem__(self, __key: Any) -> None:
|
8
|
+
self.keys_to_remove.add(__key)
|
9
|
+
return super().__delitem__(__key)
|
10
|
+
|
11
|
+
def __setitem__(self, __key: Any, __value: Any) -> None:
|
12
|
+
if __value is not None:
|
13
|
+
if __key in self.keys_to_remove:
|
14
|
+
self.keys_to_remove.remove(__key)
|
15
|
+
else:
|
16
|
+
self.keys_to_remove.add(__key)
|
17
|
+
return super().__setitem__(__key, __value)
|
18
|
+
|
19
|
+
@property
|
20
|
+
def keys_to_remove(self) -> Set[str]:
|
21
|
+
if self.__keys_to_remove is None:
|
22
|
+
self.__keys_to_remove = set()
|
23
|
+
return self.__keys_to_remove
|
24
|
+
|
25
|
+
def pop(self, __key: Any, __default: Any = None) -> Any:
|
26
|
+
if __key in self:
|
27
|
+
self.keys_to_remove.add(__key)
|
28
|
+
return super().pop(__key, __default)
|
29
|
+
|
30
|
+
def popitem(self) -> Any:
|
31
|
+
item = super().popitem()
|
32
|
+
key = item[0]
|
33
|
+
self.keys_to_remove.add(key)
|
34
|
+
return item
|