cap-anndata 0.3.0__tar.gz → 0.4.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,28 +1,28 @@
1
- BSD 3-Clause License
2
-
3
- Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
4
-
5
- Redistribution and use in source and binary forms, with or without
6
- modification, are permitted provided that the following conditions are met:
7
-
8
- 1. Redistributions of source code must retain the above copyright notice, this
9
- list of conditions and the following disclaimer.
10
-
11
- 2. Redistributions in binary form must reproduce the above copyright notice,
12
- this list of conditions and the following disclaimer in the documentation
13
- and/or other materials provided with the distribution.
14
-
15
- 3. Neither the name of the copyright holder nor the names of its
16
- contributors may be used to endorse or promote products derived from
17
- this software without specific prior written permission.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -1,54 +1,67 @@
1
- Metadata-Version: 2.1
2
- Name: cap_anndata
3
- Version: 0.3.0
4
- Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
5
- Home-page: https://github.com/cellannotation/cap-anndata
6
- Author: R. Mukhin, A. Isaev
7
- Author-email: roman@ebookapplications.com
8
- Project-URL: Bug Tracker, https://github.com/cellannotation/cap-anndata/issues
9
- Classifier: Programming Language :: Python :: 3.9
10
- Classifier: License :: OSI Approved :: BSD License
11
- Classifier: Operating System :: OS Independent
12
- Requires-Python: >=3.9
13
- Description-Content-Type: text/markdown
14
- License-File: LICENSE
15
- Requires-Dist: numpy>=1.23.5
16
- Requires-Dist: pandas>=2.2.0
17
- Requires-Dist: anndata>=0.10.0
18
- Provides-Extra: dev
19
- Requires-Dist: pytest>=8.0.0; extra == "dev"
20
- Requires-Dist: setuptools~=69.1.1; extra == "dev"
21
-
22
- # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
23
-
24
- ## Overview
25
- CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
26
- file fields without the need for loading entire dataset (or even entire field) into memory.
27
- For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
28
- Package eager to replicate the original AnnData API as much as possible,
29
- while providing additional features for efficient data manipulation for heavy datasets.
30
-
31
- ## Installation
32
- Install CAP-AnnData via pip:
33
-
34
- ```commandline
35
- pip install -U cap-anndata
36
- ```
37
-
38
- ## Basic Example
39
-
40
- The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
41
- ```python
42
- from cap_anndata import read_h5ad
43
-
44
- file_path = "your_data.h5ad"
45
- with read_h5ad(file_path=file_path, edit=True) as cap_adata:
46
- print(cap_adata.obs_keys()) # ['a', 'b', 'c']
47
- print(cap_adata.obs) # Empty DataFrame
48
- cap_adata.read_obs(columns=['a'])
49
- print(cap_adata.obs.columns) # ['a']
50
- cap_adata.obs['new_col'] = cap_adata.obs['a']
51
- cap_adata.overwrite(fields=['obs'])
52
- ```
53
-
54
- More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
1
+ Metadata-Version: 2.2
2
+ Name: cap_anndata
3
+ Version: 0.4.0
4
+ Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
5
+ Home-page: https://github.com/cellannotation/cap-anndata
6
+ Author: R. Mukhin, A. Isaev
7
+ Author-email: roman@ebookapplications.com
8
+ Project-URL: Bug Tracker, https://github.com/cellannotation/cap-anndata/issues
9
+ Project-URL: Changelog, https://github.com/cellannotation/cap-anndata/blob/main/CHANGELOG.md
10
+ Project-URL: Documentation, https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.9
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: numpy>=1.23.5
18
+ Requires-Dist: pandas>=2.2.0
19
+ Requires-Dist: anndata>=0.10.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
22
+ Requires-Dist: setuptools~=69.1.1; extra == "dev"
23
+ Dynamic: author
24
+ Dynamic: author-email
25
+ Dynamic: classifier
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: project-url
30
+ Dynamic: provides-extra
31
+ Dynamic: requires-dist
32
+ Dynamic: requires-python
33
+ Dynamic: summary
34
+
35
+ # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
36
+
37
+ ## Overview
38
+ CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
39
+ file fields without the need for loading entire dataset (or even entire field) into memory.
40
+ For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
41
+ Package eager to replicate the original AnnData API as much as possible,
42
+ while providing additional features for efficient data manipulation for heavy datasets.
43
+
44
+ ## Installation
45
+ Install CAP-AnnData via pip:
46
+
47
+ ```commandline
48
+ pip install -U cap-anndata
49
+ ```
50
+
51
+ ## Basic Example
52
+
53
+ The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
54
+ ```python
55
+ from cap_anndata import read_h5ad
56
+
57
+ file_path = "your_data.h5ad"
58
+ with read_h5ad(file_path=file_path, edit=True) as cap_adata:
59
+ print(cap_adata.obs_keys()) # ['a', 'b', 'c']
60
+ print(cap_adata.obs) # Empty DataFrame
61
+ cap_adata.read_obs(columns=['a'])
62
+ print(cap_adata.obs.columns) # ['a']
63
+ cap_adata.obs['new_col'] = cap_adata.obs['a']
64
+ cap_adata.overwrite(fields=['obs'])
65
+ ```
66
+
67
+ More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
@@ -1,33 +1,33 @@
1
- # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
2
-
3
- ## Overview
4
- CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
5
- file fields without the need for loading entire dataset (or even entire field) into memory.
6
- For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
7
- Package eager to replicate the original AnnData API as much as possible,
8
- while providing additional features for efficient data manipulation for heavy datasets.
9
-
10
- ## Installation
11
- Install CAP-AnnData via pip:
12
-
13
- ```commandline
14
- pip install -U cap-anndata
15
- ```
16
-
17
- ## Basic Example
18
-
19
- The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
20
- ```python
21
- from cap_anndata import read_h5ad
22
-
23
- file_path = "your_data.h5ad"
24
- with read_h5ad(file_path=file_path, edit=True) as cap_adata:
25
- print(cap_adata.obs_keys()) # ['a', 'b', 'c']
26
- print(cap_adata.obs) # Empty DataFrame
27
- cap_adata.read_obs(columns=['a'])
28
- print(cap_adata.obs.columns) # ['a']
29
- cap_adata.obs['new_col'] = cap_adata.obs['a']
30
- cap_adata.overwrite(fields=['obs'])
31
- ```
32
-
33
- More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
1
+ # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
2
+
3
+ ## Overview
4
+ CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
5
+ file fields without the need for loading entire dataset (or even entire field) into memory.
6
+ For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
7
+ Package eager to replicate the original AnnData API as much as possible,
8
+ while providing additional features for efficient data manipulation for heavy datasets.
9
+
10
+ ## Installation
11
+ Install CAP-AnnData via pip:
12
+
13
+ ```commandline
14
+ pip install -U cap-anndata
15
+ ```
16
+
17
+ ## Basic Example
18
+
19
+ The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
20
+ ```python
21
+ from cap_anndata import read_h5ad
22
+
23
+ file_path = "your_data.h5ad"
24
+ with read_h5ad(file_path=file_path, edit=True) as cap_adata:
25
+ print(cap_adata.obs_keys()) # ['a', 'b', 'c']
26
+ print(cap_adata.obs) # Empty DataFrame
27
+ cap_adata.read_obs(columns=['a'])
28
+ print(cap_adata.obs.columns) # ['a']
29
+ cap_adata.obs['new_col'] = cap_adata.obs['a']
30
+ cap_adata.overwrite(fields=['obs'])
31
+ ```
32
+
33
+ More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
@@ -1,10 +1,10 @@
1
- from .backed_df import CapAnnDataDF
2
- from .backed_dict import CapAnnDataDict
3
- from .cap_anndata import CapAnnData
4
- from .reader import (
5
- read_directly,
6
- read_h5ad,
7
- )
8
-
9
-
10
- __all__ = ["CapAnnData"]
1
+ from .backed_df import CapAnnDataDF
2
+ from .backed_dict import CapAnnDataDict
3
+ from .cap_anndata import CapAnnData
4
+ from .reader import (
5
+ read_directly,
6
+ read_h5ad,
7
+ )
8
+
9
+
10
+ __all__ = ["CapAnnData"]
@@ -0,0 +1,81 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from typing import List, Any, Union
4
+
5
+ from pandas._typing import Self
6
+ from pandas.core.generic import bool_t
7
+
8
+
9
+ class CapAnnDataDF(pd.DataFrame):
10
+ """
11
+ The class to expand the pandas DataFrame behaviour to support partial
12
+ reading and writing of AnnData obs and var (raw.var) fields.
13
+ The main feature of the class is handling <column-order> attribute
14
+ which must be a copy of h5py.Group attribute
15
+ """
16
+
17
+ _metadata = ["column_order"]
18
+
19
+ def column_order_array(self) -> np.array:
20
+ order = self.column_order
21
+ if order is not None and isinstance(order, List):
22
+ # Convert it to numpy array of str elements
23
+ return np.array(order, dtype=object)
24
+ else:
25
+ return order
26
+
27
+ def rename_column(self, old_name: str, new_name: str) -> None:
28
+ i = np.where(self.column_order_array() == old_name)[0]
29
+ tmp_array = self.column_order_array().copy()
30
+ tmp_array[i] = new_name
31
+ self.column_order = tmp_array.copy()
32
+ self.rename(columns={old_name: new_name}, inplace=True)
33
+
34
+ def remove_column(self, col_name: str) -> None:
35
+ i = np.where(self.column_order_array() == col_name)[0]
36
+ self.column_order = np.delete(self.column_order_array(), i)
37
+ self.drop(columns=[col_name], inplace=True)
38
+
39
+ def __setitem__(self, key, value) -> None:
40
+ if key not in self.column_order_array():
41
+ self.column_order = np.append(self.column_order_array(), key)
42
+ return super().__setitem__(key, value)
43
+
44
+ @classmethod
45
+ def from_df(cls, df: pd.DataFrame, column_order: Union[np.array, List[str], None] = None) -> Self:
46
+ if column_order is None:
47
+ column_order = df.columns.to_numpy()
48
+ elif isinstance(column_order, List):
49
+ column_order = np.array(column_order)
50
+ new_inst = cls(df)
51
+ new_inst.column_order = column_order
52
+ return new_inst
53
+
54
+ def join(self, other: Any, **kwargs) -> Self:
55
+ result = super().join(other=other, **kwargs)
56
+ if isinstance(other, CapAnnDataDF):
57
+ new_columns = [
58
+ col for col in other.column_order_array() if col not in self.column_order_array()
59
+ ]
60
+ else:
61
+ new_columns = [col for col in other.columns if col not in self.column_order_array()]
62
+ column_order = np.append(self.column_order_array(), new_columns)
63
+ df = self.from_df(result, column_order=column_order)
64
+ return df
65
+
66
+ def merge(self, right, **kwargs) -> Self:
67
+ result = super().merge(right=right, **kwargs)
68
+ if isinstance(right, CapAnnDataDF):
69
+ new_columns = [
70
+ col for col in right.column_order_array() if col not in self.column_order_array()
71
+ ]
72
+ else:
73
+ new_columns = [col for col in right.columns if col not in self.column_order_array()]
74
+ column_order = np.append(self.column_order_array(), new_columns)
75
+ df = self.from_df(result, column_order=column_order)
76
+ return df
77
+
78
+ def copy(self, deep: Union[bool_t, None] = True) -> Self:
79
+ column_order = self.column_order_array()
80
+ df = self.from_df(super().copy(deep=deep), column_order=column_order)
81
+ return df
@@ -1,34 +1,34 @@
1
- from typing import Set, Any
2
-
3
-
4
- class CapAnnDataDict(dict):
5
- __keys_to_remove: Set[str] = None
6
-
7
- def __delitem__(self, __key: Any) -> None:
8
- self.keys_to_remove.add(__key)
9
- return super().__delitem__(__key)
10
-
11
- def __setitem__(self, __key: Any, __value: Any) -> None:
12
- if __value is not None:
13
- if __key in self.keys_to_remove:
14
- self.keys_to_remove.remove(__key)
15
- else:
16
- self.keys_to_remove.add(__key)
17
- return super().__setitem__(__key, __value)
18
-
19
- @property
20
- def keys_to_remove(self) -> Set[str]:
21
- if self.__keys_to_remove is None:
22
- self.__keys_to_remove = set()
23
- return self.__keys_to_remove
24
-
25
- def pop(self, __key: Any, __default: Any = None) -> Any:
26
- if __key in self:
27
- self.keys_to_remove.add(__key)
28
- return super().pop(__key, __default)
29
-
30
- def popitem(self) -> Any:
31
- item = super().popitem()
32
- key = item[0]
33
- self.keys_to_remove.add(key)
34
- return item
1
+ from typing import Set, Any
2
+
3
+
4
+ class CapAnnDataDict(dict):
5
+ __keys_to_remove: Set[str] = None
6
+
7
+ def __delitem__(self, __key: Any) -> None:
8
+ self.keys_to_remove.add(__key)
9
+ return super().__delitem__(__key)
10
+
11
+ def __setitem__(self, __key: Any, __value: Any) -> None:
12
+ if __value is not None:
13
+ if __key in self.keys_to_remove:
14
+ self.keys_to_remove.remove(__key)
15
+ else:
16
+ self.keys_to_remove.add(__key)
17
+ return super().__setitem__(__key, __value)
18
+
19
+ @property
20
+ def keys_to_remove(self) -> Set[str]:
21
+ if self.__keys_to_remove is None:
22
+ self.__keys_to_remove = set()
23
+ return self.__keys_to_remove
24
+
25
+ def pop(self, __key: Any, __default: Any = None) -> Any:
26
+ if __key in self:
27
+ self.keys_to_remove.add(__key)
28
+ return super().pop(__key, __default)
29
+
30
+ def popitem(self) -> Any:
31
+ item = super().popitem()
32
+ key = item[0]
33
+ self.keys_to_remove.add(key)
34
+ return item