cap-anndata 0.3.0__tar.gz → 0.4.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/LICENSE +28 -28
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/PKG-INFO +67 -54
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/README.md +33 -33
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata/__init__.py +10 -10
- cap_anndata-0.4.0/cap_anndata/backed_df.py +81 -0
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata/backed_dict.py +34 -34
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata/cap_anndata.py +625 -600
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata/reader.py +57 -57
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata.egg-info/PKG-INFO +67 -54
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/setup.cfg +4 -4
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/setup.py +33 -31
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/test/test_backed_df.py +79 -81
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/test/test_backed_dict.py +36 -36
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/test/test_cap_anndata.py +818 -691
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/test/test_reader.py +63 -63
- cap_anndata-0.3.0/cap_anndata/backed_df.py +0 -69
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata.egg-info/SOURCES.txt +0 -0
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata.egg-info/dependency_links.txt +0 -0
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata.egg-info/requires.txt +0 -0
- {cap_anndata-0.3.0 → cap_anndata-0.4.0}/cap_anndata.egg-info/top_level.txt +0 -0
@@ -1,28 +1,28 @@
|
|
1
|
-
BSD 3-Clause License
|
2
|
-
|
3
|
-
Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
|
4
|
-
|
5
|
-
Redistribution and use in source and binary forms, with or without
|
6
|
-
modification, are permitted provided that the following conditions are met:
|
7
|
-
|
8
|
-
1. Redistributions of source code must retain the above copyright notice, this
|
9
|
-
list of conditions and the following disclaimer.
|
10
|
-
|
11
|
-
2. Redistributions in binary form must reproduce the above copyright notice,
|
12
|
-
this list of conditions and the following disclaimer in the documentation
|
13
|
-
and/or other materials provided with the distribution.
|
14
|
-
|
15
|
-
3. Neither the name of the copyright holder nor the names of its
|
16
|
-
contributors may be used to endorse or promote products derived from
|
17
|
-
this software without specific prior written permission.
|
18
|
-
|
19
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
-
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
-
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
-
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23
|
-
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
-
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25
|
-
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26
|
-
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27
|
-
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
1
|
+
BSD 3-Clause License
|
2
|
+
|
3
|
+
Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
7
|
+
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
9
|
+
list of conditions and the following disclaimer.
|
10
|
+
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
13
|
+
and/or other materials provided with the distribution.
|
14
|
+
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
16
|
+
contributors may be used to endorse or promote products derived from
|
17
|
+
this software without specific prior written permission.
|
18
|
+
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@@ -1,54 +1,67 @@
|
|
1
|
-
Metadata-Version: 2.
|
2
|
-
Name: cap_anndata
|
3
|
-
Version: 0.
|
4
|
-
Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
|
5
|
-
Home-page: https://github.com/cellannotation/cap-anndata
|
6
|
-
Author: R. Mukhin, A. Isaev
|
7
|
-
Author-email: roman@ebookapplications.com
|
8
|
-
Project-URL: Bug Tracker, https://github.com/cellannotation/cap-anndata/issues
|
9
|
-
|
10
|
-
|
11
|
-
Classifier:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
Requires-Dist:
|
18
|
-
|
19
|
-
Requires-Dist:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: cap_anndata
|
3
|
+
Version: 0.4.0
|
4
|
+
Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
|
5
|
+
Home-page: https://github.com/cellannotation/cap-anndata
|
6
|
+
Author: R. Mukhin, A. Isaev
|
7
|
+
Author-email: roman@ebookapplications.com
|
8
|
+
Project-URL: Bug Tracker, https://github.com/cellannotation/cap-anndata/issues
|
9
|
+
Project-URL: Changelog, https://github.com/cellannotation/cap-anndata/blob/main/CHANGELOG.md
|
10
|
+
Project-URL: Documentation, https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md
|
11
|
+
Classifier: Programming Language :: Python :: 3.9
|
12
|
+
Classifier: License :: OSI Approved :: BSD License
|
13
|
+
Classifier: Operating System :: OS Independent
|
14
|
+
Requires-Python: >=3.9
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
License-File: LICENSE
|
17
|
+
Requires-Dist: numpy>=1.23.5
|
18
|
+
Requires-Dist: pandas>=2.2.0
|
19
|
+
Requires-Dist: anndata>=0.10.0
|
20
|
+
Provides-Extra: dev
|
21
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
22
|
+
Requires-Dist: setuptools~=69.1.1; extra == "dev"
|
23
|
+
Dynamic: author
|
24
|
+
Dynamic: author-email
|
25
|
+
Dynamic: classifier
|
26
|
+
Dynamic: description
|
27
|
+
Dynamic: description-content-type
|
28
|
+
Dynamic: home-page
|
29
|
+
Dynamic: project-url
|
30
|
+
Dynamic: provides-extra
|
31
|
+
Dynamic: requires-dist
|
32
|
+
Dynamic: requires-python
|
33
|
+
Dynamic: summary
|
34
|
+
|
35
|
+
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
36
|
+
|
37
|
+
## Overview
|
38
|
+
CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
|
39
|
+
file fields without the need for loading entire dataset (or even entire field) into memory.
|
40
|
+
For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
|
41
|
+
Package eager to replicate the original AnnData API as much as possible,
|
42
|
+
while providing additional features for efficient data manipulation for heavy datasets.
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
Install CAP-AnnData via pip:
|
46
|
+
|
47
|
+
```commandline
|
48
|
+
pip install -U cap-anndata
|
49
|
+
```
|
50
|
+
|
51
|
+
## Basic Example
|
52
|
+
|
53
|
+
The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
|
54
|
+
```python
|
55
|
+
from cap_anndata import read_h5ad
|
56
|
+
|
57
|
+
file_path = "your_data.h5ad"
|
58
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
59
|
+
print(cap_adata.obs_keys()) # ['a', 'b', 'c']
|
60
|
+
print(cap_adata.obs) # Empty DataFrame
|
61
|
+
cap_adata.read_obs(columns=['a'])
|
62
|
+
print(cap_adata.obs.columns) # ['a']
|
63
|
+
cap_adata.obs['new_col'] = cap_adata.obs['a']
|
64
|
+
cap_adata.overwrite(fields=['obs'])
|
65
|
+
```
|
66
|
+
|
67
|
+
More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
|
@@ -1,33 +1,33 @@
|
|
1
|
-
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
2
|
-
|
3
|
-
## Overview
|
4
|
-
CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
|
5
|
-
file fields without the need for loading entire dataset (or even entire field) into memory.
|
6
|
-
For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
|
7
|
-
Package eager to replicate the original AnnData API as much as possible,
|
8
|
-
while providing additional features for efficient data manipulation for heavy datasets.
|
9
|
-
|
10
|
-
## Installation
|
11
|
-
Install CAP-AnnData via pip:
|
12
|
-
|
13
|
-
```commandline
|
14
|
-
pip install -U cap-anndata
|
15
|
-
```
|
16
|
-
|
17
|
-
## Basic Example
|
18
|
-
|
19
|
-
The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
|
20
|
-
```python
|
21
|
-
from cap_anndata import read_h5ad
|
22
|
-
|
23
|
-
file_path = "your_data.h5ad"
|
24
|
-
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
25
|
-
print(cap_adata.obs_keys()) # ['a', 'b', 'c']
|
26
|
-
print(cap_adata.obs) # Empty DataFrame
|
27
|
-
cap_adata.read_obs(columns=['a'])
|
28
|
-
print(cap_adata.obs.columns) # ['a']
|
29
|
-
cap_adata.obs['new_col'] = cap_adata.obs['a']
|
30
|
-
cap_adata.overwrite(fields=['obs'])
|
31
|
-
```
|
32
|
-
|
33
|
-
More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
|
1
|
+
# CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
|
5
|
+
file fields without the need for loading entire dataset (or even entire field) into memory.
|
6
|
+
For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
|
7
|
+
Package eager to replicate the original AnnData API as much as possible,
|
8
|
+
while providing additional features for efficient data manipulation for heavy datasets.
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
Install CAP-AnnData via pip:
|
12
|
+
|
13
|
+
```commandline
|
14
|
+
pip install -U cap-anndata
|
15
|
+
```
|
16
|
+
|
17
|
+
## Basic Example
|
18
|
+
|
19
|
+
The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
|
20
|
+
```python
|
21
|
+
from cap_anndata import read_h5ad
|
22
|
+
|
23
|
+
file_path = "your_data.h5ad"
|
24
|
+
with read_h5ad(file_path=file_path, edit=True) as cap_adata:
|
25
|
+
print(cap_adata.obs_keys()) # ['a', 'b', 'c']
|
26
|
+
print(cap_adata.obs) # Empty DataFrame
|
27
|
+
cap_adata.read_obs(columns=['a'])
|
28
|
+
print(cap_adata.obs.columns) # ['a']
|
29
|
+
cap_adata.obs['new_col'] = cap_adata.obs['a']
|
30
|
+
cap_adata.overwrite(fields=['obs'])
|
31
|
+
```
|
32
|
+
|
33
|
+
More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
|
@@ -1,10 +1,10 @@
|
|
1
|
-
from .backed_df import CapAnnDataDF
|
2
|
-
from .backed_dict import CapAnnDataDict
|
3
|
-
from .cap_anndata import CapAnnData
|
4
|
-
from .reader import (
|
5
|
-
read_directly,
|
6
|
-
read_h5ad,
|
7
|
-
)
|
8
|
-
|
9
|
-
|
10
|
-
__all__ = ["CapAnnData"]
|
1
|
+
from .backed_df import CapAnnDataDF
|
2
|
+
from .backed_dict import CapAnnDataDict
|
3
|
+
from .cap_anndata import CapAnnData
|
4
|
+
from .reader import (
|
5
|
+
read_directly,
|
6
|
+
read_h5ad,
|
7
|
+
)
|
8
|
+
|
9
|
+
|
10
|
+
__all__ = ["CapAnnData"]
|
@@ -0,0 +1,81 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
from typing import List, Any, Union
|
4
|
+
|
5
|
+
from pandas._typing import Self
|
6
|
+
from pandas.core.generic import bool_t
|
7
|
+
|
8
|
+
|
9
|
+
class CapAnnDataDF(pd.DataFrame):
|
10
|
+
"""
|
11
|
+
The class to expand the pandas DataFrame behaviour to support partial
|
12
|
+
reading and writing of AnnData obs and var (raw.var) fields.
|
13
|
+
The main feature of the class is handling <column-order> attribute
|
14
|
+
which must be a copy of h5py.Group attribute
|
15
|
+
"""
|
16
|
+
|
17
|
+
_metadata = ["column_order"]
|
18
|
+
|
19
|
+
def column_order_array(self) -> np.array:
|
20
|
+
order = self.column_order
|
21
|
+
if order is not None and isinstance(order, List):
|
22
|
+
# Convert it to numpy array of str elements
|
23
|
+
return np.array(order, dtype=object)
|
24
|
+
else:
|
25
|
+
return order
|
26
|
+
|
27
|
+
def rename_column(self, old_name: str, new_name: str) -> None:
|
28
|
+
i = np.where(self.column_order_array() == old_name)[0]
|
29
|
+
tmp_array = self.column_order_array().copy()
|
30
|
+
tmp_array[i] = new_name
|
31
|
+
self.column_order = tmp_array.copy()
|
32
|
+
self.rename(columns={old_name: new_name}, inplace=True)
|
33
|
+
|
34
|
+
def remove_column(self, col_name: str) -> None:
|
35
|
+
i = np.where(self.column_order_array() == col_name)[0]
|
36
|
+
self.column_order = np.delete(self.column_order_array(), i)
|
37
|
+
self.drop(columns=[col_name], inplace=True)
|
38
|
+
|
39
|
+
def __setitem__(self, key, value) -> None:
|
40
|
+
if key not in self.column_order_array():
|
41
|
+
self.column_order = np.append(self.column_order_array(), key)
|
42
|
+
return super().__setitem__(key, value)
|
43
|
+
|
44
|
+
@classmethod
|
45
|
+
def from_df(cls, df: pd.DataFrame, column_order: Union[np.array, List[str], None] = None) -> Self:
|
46
|
+
if column_order is None:
|
47
|
+
column_order = df.columns.to_numpy()
|
48
|
+
elif isinstance(column_order, List):
|
49
|
+
column_order = np.array(column_order)
|
50
|
+
new_inst = cls(df)
|
51
|
+
new_inst.column_order = column_order
|
52
|
+
return new_inst
|
53
|
+
|
54
|
+
def join(self, other: Any, **kwargs) -> Self:
|
55
|
+
result = super().join(other=other, **kwargs)
|
56
|
+
if isinstance(other, CapAnnDataDF):
|
57
|
+
new_columns = [
|
58
|
+
col for col in other.column_order_array() if col not in self.column_order_array()
|
59
|
+
]
|
60
|
+
else:
|
61
|
+
new_columns = [col for col in other.columns if col not in self.column_order_array()]
|
62
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
63
|
+
df = self.from_df(result, column_order=column_order)
|
64
|
+
return df
|
65
|
+
|
66
|
+
def merge(self, right, **kwargs) -> Self:
|
67
|
+
result = super().merge(right=right, **kwargs)
|
68
|
+
if isinstance(right, CapAnnDataDF):
|
69
|
+
new_columns = [
|
70
|
+
col for col in right.column_order_array() if col not in self.column_order_array()
|
71
|
+
]
|
72
|
+
else:
|
73
|
+
new_columns = [col for col in right.columns if col not in self.column_order_array()]
|
74
|
+
column_order = np.append(self.column_order_array(), new_columns)
|
75
|
+
df = self.from_df(result, column_order=column_order)
|
76
|
+
return df
|
77
|
+
|
78
|
+
def copy(self, deep: Union[bool_t, None] = True) -> Self:
|
79
|
+
column_order = self.column_order_array()
|
80
|
+
df = self.from_df(super().copy(deep=deep), column_order=column_order)
|
81
|
+
return df
|
@@ -1,34 +1,34 @@
|
|
1
|
-
from typing import Set, Any
|
2
|
-
|
3
|
-
|
4
|
-
class CapAnnDataDict(dict):
|
5
|
-
__keys_to_remove: Set[str] = None
|
6
|
-
|
7
|
-
def __delitem__(self, __key: Any) -> None:
|
8
|
-
self.keys_to_remove.add(__key)
|
9
|
-
return super().__delitem__(__key)
|
10
|
-
|
11
|
-
def __setitem__(self, __key: Any, __value: Any) -> None:
|
12
|
-
if __value is not None:
|
13
|
-
if __key in self.keys_to_remove:
|
14
|
-
self.keys_to_remove.remove(__key)
|
15
|
-
else:
|
16
|
-
self.keys_to_remove.add(__key)
|
17
|
-
return super().__setitem__(__key, __value)
|
18
|
-
|
19
|
-
@property
|
20
|
-
def keys_to_remove(self) -> Set[str]:
|
21
|
-
if self.__keys_to_remove is None:
|
22
|
-
self.__keys_to_remove = set()
|
23
|
-
return self.__keys_to_remove
|
24
|
-
|
25
|
-
def pop(self, __key: Any, __default: Any = None) -> Any:
|
26
|
-
if __key in self:
|
27
|
-
self.keys_to_remove.add(__key)
|
28
|
-
return super().pop(__key, __default)
|
29
|
-
|
30
|
-
def popitem(self) -> Any:
|
31
|
-
item = super().popitem()
|
32
|
-
key = item[0]
|
33
|
-
self.keys_to_remove.add(key)
|
34
|
-
return item
|
1
|
+
from typing import Set, Any
|
2
|
+
|
3
|
+
|
4
|
+
class CapAnnDataDict(dict):
|
5
|
+
__keys_to_remove: Set[str] = None
|
6
|
+
|
7
|
+
def __delitem__(self, __key: Any) -> None:
|
8
|
+
self.keys_to_remove.add(__key)
|
9
|
+
return super().__delitem__(__key)
|
10
|
+
|
11
|
+
def __setitem__(self, __key: Any, __value: Any) -> None:
|
12
|
+
if __value is not None:
|
13
|
+
if __key in self.keys_to_remove:
|
14
|
+
self.keys_to_remove.remove(__key)
|
15
|
+
else:
|
16
|
+
self.keys_to_remove.add(__key)
|
17
|
+
return super().__setitem__(__key, __value)
|
18
|
+
|
19
|
+
@property
|
20
|
+
def keys_to_remove(self) -> Set[str]:
|
21
|
+
if self.__keys_to_remove is None:
|
22
|
+
self.__keys_to_remove = set()
|
23
|
+
return self.__keys_to_remove
|
24
|
+
|
25
|
+
def pop(self, __key: Any, __default: Any = None) -> Any:
|
26
|
+
if __key in self:
|
27
|
+
self.keys_to_remove.add(__key)
|
28
|
+
return super().pop(__key, __default)
|
29
|
+
|
30
|
+
def popitem(self) -> Any:
|
31
|
+
item = super().popitem()
|
32
|
+
key = item[0]
|
33
|
+
self.keys_to_remove.add(key)
|
34
|
+
return item
|