cap-anndata 0.2.2__tar.gz → 0.3.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/LICENSE +28 -28
  2. cap_anndata-0.3.1/PKG-INFO +56 -0
  3. cap_anndata-0.3.1/README.md +33 -0
  4. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata/__init__.py +10 -10
  5. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata/backed_df.py +69 -69
  6. cap_anndata-0.3.1/cap_anndata/backed_dict.py +34 -0
  7. cap_anndata-0.3.1/cap_anndata/cap_anndata.py +600 -0
  8. cap_anndata-0.3.1/cap_anndata/reader.py +57 -0
  9. cap_anndata-0.3.1/cap_anndata.egg-info/PKG-INFO +56 -0
  10. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata.egg-info/SOURCES.txt +2 -2
  11. cap_anndata-0.3.1/cap_anndata.egg-info/requires.txt +7 -0
  12. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/setup.cfg +4 -4
  13. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/setup.py +33 -32
  14. cap_anndata-0.3.1/test/test_backed_df.py +81 -0
  15. cap_anndata-0.3.1/test/test_backed_dict.py +36 -0
  16. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/test/test_cap_anndata.py +691 -433
  17. cap_anndata-0.3.1/test/test_reader.py +63 -0
  18. cap_anndata-0.2.2/PKG-INFO +0 -253
  19. cap_anndata-0.2.2/README.md +0 -231
  20. cap_anndata-0.2.2/cap_anndata/backed_uns.py +0 -28
  21. cap_anndata-0.2.2/cap_anndata/cap_anndata.py +0 -287
  22. cap_anndata-0.2.2/cap_anndata/reader.py +0 -44
  23. cap_anndata-0.2.2/cap_anndata.egg-info/PKG-INFO +0 -253
  24. cap_anndata-0.2.2/cap_anndata.egg-info/requires.txt +0 -8
  25. cap_anndata-0.2.2/test/test_backed_df.py +0 -81
  26. cap_anndata-0.2.2/test/test_backed_uns.py +0 -36
  27. cap_anndata-0.2.2/test/test_reader.py +0 -22
  28. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata.egg-info/dependency_links.txt +0 -0
  29. {cap_anndata-0.2.2 → cap_anndata-0.3.1}/cap_anndata.egg-info/top_level.txt +0 -0
@@ -1,28 +1,28 @@
1
- BSD 3-Clause License
2
-
3
- Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
4
-
5
- Redistribution and use in source and binary forms, with or without
6
- modification, are permitted provided that the following conditions are met:
7
-
8
- 1. Redistributions of source code must retain the above copyright notice, this
9
- list of conditions and the following disclaimer.
10
-
11
- 2. Redistributions in binary form must reproduce the above copyright notice,
12
- this list of conditions and the following disclaimer in the documentation
13
- and/or other materials provided with the distribution.
14
-
15
- 3. Neither the name of the copyright holder nor the names of its
16
- contributors may be used to endorse or promote products derived from
17
- this software without specific prior written permission.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2024, R. Mukhin, A. Isaev, Cell-Annotation Platform
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.1
2
+ Name: cap_anndata
3
+ Version: 0.3.1
4
+ Summary: Partial read/write of AnnData (h5ad) files for low-memory operations with large datasets.
5
+ Home-page: https://github.com/cellannotation/cap-anndata
6
+ Author: R. Mukhin, A. Isaev
7
+ Author-email: roman@ebookapplications.com
8
+ Project-URL: Bug Tracker, https://github.com/cellannotation/cap-anndata/issues
9
+ Project-URL: Changelog, https://github.com/cellannotation/cap-anndata/blob/main/CHANGELOG.md
10
+ Project-URL: Documentation, https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.9
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: numpy>=1.23.5
18
+ Requires-Dist: pandas>=2.2.0
19
+ Requires-Dist: anndata>=0.10.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
22
+ Requires-Dist: setuptools~=69.1.1; extra == "dev"
23
+
24
+ # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
25
+
26
+ ## Overview
27
+ CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
28
+ file fields without the need for loading entire dataset (or even entire field) into memory.
29
+ For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
30
+ Package eager to replicate the original AnnData API as much as possible,
31
+ while providing additional features for efficient data manipulation for heavy datasets.
32
+
33
+ ## Installation
34
+ Install CAP-AnnData via pip:
35
+
36
+ ```commandline
37
+ pip install -U cap-anndata
38
+ ```
39
+
40
+ ## Basic Example
41
+
42
+ The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
43
+ ```python
44
+ from cap_anndata import read_h5ad
45
+
46
+ file_path = "your_data.h5ad"
47
+ with read_h5ad(file_path=file_path, edit=True) as cap_adata:
48
+ print(cap_adata.obs_keys()) # ['a', 'b', 'c']
49
+ print(cap_adata.obs) # Empty DataFrame
50
+ cap_adata.read_obs(columns=['a'])
51
+ print(cap_adata.obs.columns) # ['a']
52
+ cap_adata.obs['new_col'] = cap_adata.obs['a']
53
+ cap_adata.overwrite(fields=['obs'])
54
+ ```
55
+
56
+ More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
@@ -0,0 +1,33 @@
1
+ # CAP-AnnData: Partial I/O for AnnData (.h5ad) Files
2
+
3
+ ## Overview
4
+ CAP-AnnData offering functionalities for selective reading and writing of [AnnData](https://pypi.org/project/anndata/)
5
+ file fields without the need for loading entire dataset (or even entire field) into memory.
6
+ For example, it allows to read and modify the single `obs` column taking nothing into memory except the column itself.
7
+ Package eager to replicate the original AnnData API as much as possible,
8
+ while providing additional features for efficient data manipulation for heavy datasets.
9
+
10
+ ## Installation
11
+ Install CAP-AnnData via pip:
12
+
13
+ ```commandline
14
+ pip install -U cap-anndata
15
+ ```
16
+
17
+ ## Basic Example
18
+
19
+ The example below displayes how to read a single `obs` column, create new obs column and propagate it to the `.h5ad` file.
20
+ ```python
21
+ from cap_anndata import read_h5ad
22
+
23
+ file_path = "your_data.h5ad"
24
+ with read_h5ad(file_path=file_path, edit=True) as cap_adata:
25
+ print(cap_adata.obs_keys()) # ['a', 'b', 'c']
26
+ print(cap_adata.obs) # Empty DataFrame
27
+ cap_adata.read_obs(columns=['a'])
28
+ print(cap_adata.obs.columns) # ['a']
29
+ cap_adata.obs['new_col'] = cap_adata.obs['a']
30
+ cap_adata.overwrite(fields=['obs'])
31
+ ```
32
+
33
+ More example can be found in the [How-TO](https://github.com/cellannotation/cap-anndata/blob/main/HOWTO.md) file.
@@ -1,10 +1,10 @@
1
- from .backed_df import CapAnnDataDF
2
- from .backed_uns import CapAnnDataUns
3
- from .cap_anndata import CapAnnData
4
- from .reader import (
5
- read_directly,
6
- read_h5ad,
7
- )
8
-
9
-
10
- __all__ = ["CapAnnData"]
1
+ from .backed_df import CapAnnDataDF
2
+ from .backed_dict import CapAnnDataDict
3
+ from .cap_anndata import CapAnnData
4
+ from .reader import (
5
+ read_directly,
6
+ read_h5ad,
7
+ )
8
+
9
+
10
+ __all__ = ["CapAnnData"]
@@ -1,69 +1,69 @@
1
- import pandas as pd
2
- import numpy as np
3
- from typing import List, Any, Union
4
- import logging
5
-
6
- from pandas._typing import Self
7
- from pandas.core.generic import bool_t
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
-
12
- class CapAnnDataDF(pd.DataFrame):
13
- """
14
- The class to expand the pandas DataFrame behaviour to support partial
15
- reading and writing of AnnData obs and var (raw.var) fields.
16
- The main feature of the class is handling <column-order> attribute
17
- which must be a copy of h5py.Group attribute
18
- """
19
-
20
- _metadata = ["column_order"]
21
-
22
- def rename_column(self, old_name: str, new_name: str) -> None:
23
- i = np.where(self.column_order == old_name)[0]
24
- self.column_order[i] = new_name
25
- self.rename(columns={old_name: new_name}, inplace=True)
26
-
27
- def remove_column(self, col_name: str) -> None:
28
- i = np.where(self.column_order == col_name)[0]
29
- self.column_order = np.delete(self.column_order, i)
30
- self.drop(columns=[col_name], inplace=True)
31
-
32
- def __setitem__(self, key, value) -> None:
33
- if key not in self.column_order:
34
- self.column_order = np.append(self.column_order, key)
35
- return super().__setitem__(key, value)
36
-
37
- @classmethod
38
- def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
39
- if column_order is None:
40
- column_order = df.columns.to_numpy()
41
-
42
- new_inst = cls(df)
43
- new_inst.column_order = column_order
44
- return new_inst
45
-
46
- def join(self, other: Any, **kwargs) -> Self:
47
- result = super().join(other=other, **kwargs)
48
- if isinstance(other, CapAnnDataDF):
49
- new_columns = [
50
- col for col in other.column_order if col not in self.column_order
51
- ]
52
- else:
53
- new_columns = [col for col in other.columns if col not in self.column_order]
54
- column_order = np.append(self.column_order, new_columns)
55
- return self.from_df(result, column_order=column_order)
56
-
57
- def merge(self, right, **kwargs) -> Self:
58
- result = super().merge(right=right, **kwargs)
59
- if isinstance(right, CapAnnDataDF):
60
- new_columns = [
61
- col for col in right.column_order if col not in self.column_order
62
- ]
63
- else:
64
- new_columns = [col for col in right.columns if col not in self.column_order]
65
- column_order = np.append(self.column_order, new_columns)
66
- return self.from_df(result, column_order=column_order)
67
-
68
- def copy(self, deep: Union[bool_t, None] = True) -> Self:
69
- return self.from_df(super().copy(deep=deep), column_order=self.column_order)
1
+ import pandas as pd
2
+ import numpy as np
3
+ from typing import List, Any, Union
4
+ import logging
5
+
6
+ from pandas._typing import Self
7
+ from pandas.core.generic import bool_t
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class CapAnnDataDF(pd.DataFrame):
13
+ """
14
+ The class to expand the pandas DataFrame behaviour to support partial
15
+ reading and writing of AnnData obs and var (raw.var) fields.
16
+ The main feature of the class is handling <column-order> attribute
17
+ which must be a copy of h5py.Group attribute
18
+ """
19
+
20
+ _metadata = ["column_order"]
21
+
22
+ def rename_column(self, old_name: str, new_name: str) -> None:
23
+ i = np.where(self.column_order == old_name)[0]
24
+ self.column_order[i] = new_name
25
+ self.rename(columns={old_name: new_name}, inplace=True)
26
+
27
+ def remove_column(self, col_name: str) -> None:
28
+ i = np.where(self.column_order == col_name)[0]
29
+ self.column_order = np.delete(self.column_order, i)
30
+ self.drop(columns=[col_name], inplace=True)
31
+
32
+ def __setitem__(self, key, value) -> None:
33
+ if key not in self.column_order:
34
+ self.column_order = np.append(self.column_order, key)
35
+ return super().__setitem__(key, value)
36
+
37
+ @classmethod
38
+ def from_df(cls, df: pd.DataFrame, column_order: List[str] = None) -> Self:
39
+ if column_order is None:
40
+ column_order = df.columns.to_numpy()
41
+
42
+ new_inst = cls(df)
43
+ new_inst.column_order = column_order
44
+ return new_inst
45
+
46
+ def join(self, other: Any, **kwargs) -> Self:
47
+ result = super().join(other=other, **kwargs)
48
+ if isinstance(other, CapAnnDataDF):
49
+ new_columns = [
50
+ col for col in other.column_order if col not in self.column_order
51
+ ]
52
+ else:
53
+ new_columns = [col for col in other.columns if col not in self.column_order]
54
+ column_order = np.append(self.column_order, new_columns)
55
+ return self.from_df(result, column_order=column_order)
56
+
57
+ def merge(self, right, **kwargs) -> Self:
58
+ result = super().merge(right=right, **kwargs)
59
+ if isinstance(right, CapAnnDataDF):
60
+ new_columns = [
61
+ col for col in right.column_order if col not in self.column_order
62
+ ]
63
+ else:
64
+ new_columns = [col for col in right.columns if col not in self.column_order]
65
+ column_order = np.append(self.column_order, new_columns)
66
+ return self.from_df(result, column_order=column_order)
67
+
68
+ def copy(self, deep: Union[bool_t, None] = True) -> Self:
69
+ return self.from_df(super().copy(deep=deep), column_order=self.column_order)
@@ -0,0 +1,34 @@
1
+ from typing import Set, Any
2
+
3
+
4
+ class CapAnnDataDict(dict):
5
+ __keys_to_remove: Set[str] = None
6
+
7
+ def __delitem__(self, __key: Any) -> None:
8
+ self.keys_to_remove.add(__key)
9
+ return super().__delitem__(__key)
10
+
11
+ def __setitem__(self, __key: Any, __value: Any) -> None:
12
+ if __value is not None:
13
+ if __key in self.keys_to_remove:
14
+ self.keys_to_remove.remove(__key)
15
+ else:
16
+ self.keys_to_remove.add(__key)
17
+ return super().__setitem__(__key, __value)
18
+
19
+ @property
20
+ def keys_to_remove(self) -> Set[str]:
21
+ if self.__keys_to_remove is None:
22
+ self.__keys_to_remove = set()
23
+ return self.__keys_to_remove
24
+
25
+ def pop(self, __key: Any, __default: Any = None) -> Any:
26
+ if __key in self:
27
+ self.keys_to_remove.add(__key)
28
+ return super().pop(__key, __default)
29
+
30
+ def popitem(self) -> Any:
31
+ item = super().popitem()
32
+ key = item[0]
33
+ self.keys_to_remove.add(key)
34
+ return item