ngio 0.5.0b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +69 -0
- ngio/common/__init__.py +28 -0
- ngio/common/_dimensions.py +335 -0
- ngio/common/_masking_roi.py +153 -0
- ngio/common/_pyramid.py +408 -0
- ngio/common/_roi.py +315 -0
- ngio/common/_synt_images_utils.py +101 -0
- ngio/common/_zoom.py +188 -0
- ngio/experimental/__init__.py +5 -0
- ngio/experimental/iterators/__init__.py +15 -0
- ngio/experimental/iterators/_abstract_iterator.py +390 -0
- ngio/experimental/iterators/_feature.py +189 -0
- ngio/experimental/iterators/_image_processing.py +130 -0
- ngio/experimental/iterators/_mappers.py +48 -0
- ngio/experimental/iterators/_rois_utils.py +126 -0
- ngio/experimental/iterators/_segmentation.py +235 -0
- ngio/hcs/__init__.py +19 -0
- ngio/hcs/_plate.py +1354 -0
- ngio/images/__init__.py +44 -0
- ngio/images/_abstract_image.py +967 -0
- ngio/images/_create_synt_container.py +132 -0
- ngio/images/_create_utils.py +423 -0
- ngio/images/_image.py +926 -0
- ngio/images/_label.py +411 -0
- ngio/images/_masked_image.py +531 -0
- ngio/images/_ome_zarr_container.py +1237 -0
- ngio/images/_table_ops.py +471 -0
- ngio/io_pipes/__init__.py +75 -0
- ngio/io_pipes/_io_pipes.py +361 -0
- ngio/io_pipes/_io_pipes_masked.py +488 -0
- ngio/io_pipes/_io_pipes_roi.py +146 -0
- ngio/io_pipes/_io_pipes_types.py +56 -0
- ngio/io_pipes/_match_shape.py +377 -0
- ngio/io_pipes/_ops_axes.py +344 -0
- ngio/io_pipes/_ops_slices.py +411 -0
- ngio/io_pipes/_ops_slices_utils.py +199 -0
- ngio/io_pipes/_ops_transforms.py +104 -0
- ngio/io_pipes/_zoom_transform.py +180 -0
- ngio/ome_zarr_meta/__init__.py +65 -0
- ngio/ome_zarr_meta/_meta_handlers.py +536 -0
- ngio/ome_zarr_meta/ngio_specs/__init__.py +77 -0
- ngio/ome_zarr_meta/ngio_specs/_axes.py +515 -0
- ngio/ome_zarr_meta/ngio_specs/_channels.py +462 -0
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +89 -0
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +539 -0
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +438 -0
- ngio/ome_zarr_meta/ngio_specs/_pixel_size.py +122 -0
- ngio/ome_zarr_meta/v04/__init__.py +27 -0
- ngio/ome_zarr_meta/v04/_custom_models.py +18 -0
- ngio/ome_zarr_meta/v04/_v04_spec.py +473 -0
- ngio/ome_zarr_meta/v05/__init__.py +27 -0
- ngio/ome_zarr_meta/v05/_custom_models.py +18 -0
- ngio/ome_zarr_meta/v05/_v05_spec.py +511 -0
- ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/mask.png +0 -0
- ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/nuclei.png +0 -0
- ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/raw.jpg +0 -0
- ngio/resources/__init__.py +55 -0
- ngio/resources/resource_model.py +36 -0
- ngio/tables/__init__.py +43 -0
- ngio/tables/_abstract_table.py +270 -0
- ngio/tables/_tables_container.py +449 -0
- ngio/tables/backends/__init__.py +57 -0
- ngio/tables/backends/_abstract_backend.py +240 -0
- ngio/tables/backends/_anndata.py +139 -0
- ngio/tables/backends/_anndata_utils.py +90 -0
- ngio/tables/backends/_csv.py +19 -0
- ngio/tables/backends/_json.py +92 -0
- ngio/tables/backends/_parquet.py +19 -0
- ngio/tables/backends/_py_arrow_backends.py +222 -0
- ngio/tables/backends/_table_backends.py +226 -0
- ngio/tables/backends/_utils.py +608 -0
- ngio/tables/v1/__init__.py +23 -0
- ngio/tables/v1/_condition_table.py +71 -0
- ngio/tables/v1/_feature_table.py +125 -0
- ngio/tables/v1/_generic_table.py +49 -0
- ngio/tables/v1/_roi_table.py +575 -0
- ngio/transforms/__init__.py +5 -0
- ngio/transforms/_zoom.py +19 -0
- ngio/utils/__init__.py +45 -0
- ngio/utils/_cache.py +48 -0
- ngio/utils/_datasets.py +165 -0
- ngio/utils/_errors.py +37 -0
- ngio/utils/_fractal_fsspec_store.py +42 -0
- ngio/utils/_zarr_utils.py +534 -0
- ngio-0.5.0b6.dist-info/METADATA +148 -0
- ngio-0.5.0b6.dist-info/RECORD +88 -0
- ngio-0.5.0b6.dist-info/WHEEL +4 -0
- ngio-0.5.0b6.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Ngio Tables backend implementations."""
|
|
2
|
+
|
|
3
|
+
from ngio.tables.backends._abstract_backend import AbstractTableBackend, BackendMeta
|
|
4
|
+
from ngio.tables.backends._anndata import AnnDataBackend, AnnDataBackendV1
|
|
5
|
+
from ngio.tables.backends._csv import CsvTableBackend
|
|
6
|
+
from ngio.tables.backends._json import JsonTableBackend
|
|
7
|
+
from ngio.tables.backends._parquet import ParquetTableBackend
|
|
8
|
+
from ngio.tables.backends._table_backends import (
|
|
9
|
+
DefaultTableBackend,
|
|
10
|
+
ImplementedTableBackends,
|
|
11
|
+
TableBackend,
|
|
12
|
+
TableBackendProtocol,
|
|
13
|
+
)
|
|
14
|
+
from ngio.tables.backends._utils import (
|
|
15
|
+
TabularData,
|
|
16
|
+
convert_anndata_to_pandas,
|
|
17
|
+
convert_anndata_to_polars,
|
|
18
|
+
convert_pandas_to_anndata,
|
|
19
|
+
convert_pandas_to_polars,
|
|
20
|
+
convert_polars_to_anndata,
|
|
21
|
+
convert_polars_to_pandas,
|
|
22
|
+
convert_to_anndata,
|
|
23
|
+
convert_to_pandas,
|
|
24
|
+
convert_to_polars,
|
|
25
|
+
normalize_anndata,
|
|
26
|
+
normalize_pandas_df,
|
|
27
|
+
normalize_polars_lf,
|
|
28
|
+
normalize_table,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"AbstractTableBackend",
|
|
33
|
+
"AnnDataBackend",
|
|
34
|
+
"AnnDataBackendV1",
|
|
35
|
+
"BackendMeta",
|
|
36
|
+
"CsvTableBackend",
|
|
37
|
+
"DefaultTableBackend",
|
|
38
|
+
"ImplementedTableBackends",
|
|
39
|
+
"JsonTableBackend",
|
|
40
|
+
"ParquetTableBackend",
|
|
41
|
+
"TableBackend",
|
|
42
|
+
"TableBackendProtocol",
|
|
43
|
+
"TabularData",
|
|
44
|
+
"convert_anndata_to_pandas",
|
|
45
|
+
"convert_anndata_to_polars",
|
|
46
|
+
"convert_pandas_to_anndata",
|
|
47
|
+
"convert_pandas_to_polars",
|
|
48
|
+
"convert_polars_to_anndata",
|
|
49
|
+
"convert_polars_to_pandas",
|
|
50
|
+
"convert_to_anndata",
|
|
51
|
+
"convert_to_pandas",
|
|
52
|
+
"convert_to_polars",
|
|
53
|
+
"normalize_anndata",
|
|
54
|
+
"normalize_pandas_df",
|
|
55
|
+
"normalize_polars_lf",
|
|
56
|
+
"normalize_table",
|
|
57
|
+
]
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
from anndata import AnnData
|
|
5
|
+
from pandas import DataFrame
|
|
6
|
+
from polars import DataFrame as PolarsDataFrame
|
|
7
|
+
from polars import LazyFrame
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
|
+
|
|
10
|
+
from ngio.tables.backends._utils import (
|
|
11
|
+
TabularData,
|
|
12
|
+
convert_to_anndata,
|
|
13
|
+
convert_to_pandas,
|
|
14
|
+
convert_to_polars,
|
|
15
|
+
)
|
|
16
|
+
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BackendMeta(BaseModel):
|
|
20
|
+
"""Metadata for the backend."""
|
|
21
|
+
|
|
22
|
+
backend: str = "anndata_v1"
|
|
23
|
+
index_key: str | None = None
|
|
24
|
+
index_type: Literal["int", "str"] | None = None
|
|
25
|
+
|
|
26
|
+
model_config = ConfigDict(extra="allow")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AbstractTableBackend(ABC):
|
|
30
|
+
"""Abstract class for table backends."""
|
|
31
|
+
|
|
32
|
+
def set_group_handler(
|
|
33
|
+
self,
|
|
34
|
+
group_handler: ZarrGroupHandler,
|
|
35
|
+
index_key: str | None = None,
|
|
36
|
+
index_type: Literal["int", "str"] | None = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Attach a group handler to the backend.
|
|
39
|
+
|
|
40
|
+
Index keys and index types are used to ensure that the
|
|
41
|
+
serialization and deserialization of the table
|
|
42
|
+
is consistent across different backends.
|
|
43
|
+
|
|
44
|
+
Making sure that this is consistent is
|
|
45
|
+
a duty of the backend implementations.
|
|
46
|
+
"""
|
|
47
|
+
self._group_handler = group_handler
|
|
48
|
+
self._index_key = index_key
|
|
49
|
+
self._index_type = index_type
|
|
50
|
+
|
|
51
|
+
@staticmethod
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def backend_name() -> str:
|
|
54
|
+
"""Return the name of the backend.
|
|
55
|
+
|
|
56
|
+
As a convention we set name as:
|
|
57
|
+
{backend_name}_v{version}
|
|
58
|
+
|
|
59
|
+
Where the version is a integer.
|
|
60
|
+
"""
|
|
61
|
+
raise NotImplementedError
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
@abstractmethod
|
|
65
|
+
def implements_anndata() -> bool:
|
|
66
|
+
"""Check if the backend implements the anndata protocol.
|
|
67
|
+
|
|
68
|
+
If this is True, the backend should implement the
|
|
69
|
+
`write_from_anndata` method.
|
|
70
|
+
|
|
71
|
+
AnnData objects are more complex than DataFrames,
|
|
72
|
+
so if this is true the backend should implement the
|
|
73
|
+
full serialization of the AnnData object.
|
|
74
|
+
|
|
75
|
+
If this is False, these methods should raise a
|
|
76
|
+
`NotImplementedError`.
|
|
77
|
+
"""
|
|
78
|
+
raise NotImplementedError
|
|
79
|
+
|
|
80
|
+
@staticmethod
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def implements_pandas() -> bool:
|
|
83
|
+
"""Check if the backend implements the pandas protocol.
|
|
84
|
+
|
|
85
|
+
If this is True, the backend should implement the
|
|
86
|
+
`write_from_dataframe` methods.
|
|
87
|
+
|
|
88
|
+
If this is False, these methods should raise a
|
|
89
|
+
`NotImplementedError`.
|
|
90
|
+
"""
|
|
91
|
+
raise NotImplementedError
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def implements_polars() -> bool:
|
|
96
|
+
"""Check if the backend implements the polars protocol.
|
|
97
|
+
|
|
98
|
+
If this is True, the backend should implement the
|
|
99
|
+
`write_from_polars` methods.
|
|
100
|
+
|
|
101
|
+
If this is False, these methods should raise a
|
|
102
|
+
`NotImplementedError`.
|
|
103
|
+
"""
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def group_handler(self) -> ZarrGroupHandler:
|
|
108
|
+
"""Get the group handler."""
|
|
109
|
+
return self._group_handler
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def index_key(self) -> str | None:
|
|
113
|
+
"""Get the index key."""
|
|
114
|
+
return self._index_key
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def index_type(self) -> Literal["int", "str"] | None:
|
|
118
|
+
"""Get the index type."""
|
|
119
|
+
if self._index_type is None:
|
|
120
|
+
return None
|
|
121
|
+
|
|
122
|
+
if self._index_type not in ["int", "str"]:
|
|
123
|
+
raise NgioValueError(
|
|
124
|
+
f"Invalid index type {self._index_type}. Must be 'int' or 'str'."
|
|
125
|
+
)
|
|
126
|
+
return self._index_type # type: ignore[return-value]
|
|
127
|
+
|
|
128
|
+
@abstractmethod
|
|
129
|
+
def load(self) -> TabularData:
|
|
130
|
+
"""Load the table from the store.
|
|
131
|
+
|
|
132
|
+
This is a generic load method.
|
|
133
|
+
Based on the explicit mode or the type of the table,
|
|
134
|
+
it will call the appropriate load method.
|
|
135
|
+
"""
|
|
136
|
+
...
|
|
137
|
+
|
|
138
|
+
def load_as_anndata(self) -> AnnData:
|
|
139
|
+
"""Load the table as an AnnData object.
|
|
140
|
+
|
|
141
|
+
Since the AnnData object is more complex than a DataFrame,
|
|
142
|
+
selecting columns is not implemented, because it is not
|
|
143
|
+
straightforward to do so for an arbitrary AnnData object.
|
|
144
|
+
"""
|
|
145
|
+
table = self.load()
|
|
146
|
+
return convert_to_anndata(
|
|
147
|
+
table,
|
|
148
|
+
index_key=self.index_key,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def load_as_pandas_df(self) -> DataFrame:
|
|
152
|
+
"""Load the table as a pandas DataFrame.
|
|
153
|
+
|
|
154
|
+
If columns are provided, the table should be filtered
|
|
155
|
+
"""
|
|
156
|
+
table = self.load()
|
|
157
|
+
return convert_to_pandas(
|
|
158
|
+
table,
|
|
159
|
+
index_key=self.index_key,
|
|
160
|
+
index_type=self.index_type,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def load_as_polars_lf(self) -> LazyFrame:
|
|
164
|
+
"""Load the table as a polars LazyFrame.
|
|
165
|
+
|
|
166
|
+
If columns are provided, the table should be filtered
|
|
167
|
+
"""
|
|
168
|
+
table = self.load()
|
|
169
|
+
return convert_to_polars(
|
|
170
|
+
table,
|
|
171
|
+
index_key=self.index_key,
|
|
172
|
+
index_type=self.index_type,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def write_from_pandas(self, table: DataFrame) -> None:
|
|
176
|
+
"""Serialize the table from a pandas DataFrame."""
|
|
177
|
+
raise NotImplementedError(
|
|
178
|
+
f"Backend {self.backend_name()} does not support "
|
|
179
|
+
"serialization of DataFrame objects."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def write_from_anndata(self, table: AnnData) -> None:
|
|
183
|
+
"""Serialize the table from an AnnData object."""
|
|
184
|
+
raise NotImplementedError(
|
|
185
|
+
f"Backend {self.backend_name()} does not support "
|
|
186
|
+
"serialization of AnnData objects."
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
|
|
190
|
+
"""Serialize the table from a polars DataFrame or LazyFrame."""
|
|
191
|
+
raise NotImplementedError(
|
|
192
|
+
f"Backend {self.backend_name()} does not support "
|
|
193
|
+
"serialization of Polars objects."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def write_metadata(self, metadata: dict | None = None) -> None:
|
|
197
|
+
"""Write the metadata to the store."""
|
|
198
|
+
if metadata is None:
|
|
199
|
+
metadata = {}
|
|
200
|
+
|
|
201
|
+
attrs = self._group_handler.reopen_group().attrs.asdict()
|
|
202
|
+
# This is required by anndata to identify the format
|
|
203
|
+
if "encoding-type" in attrs:
|
|
204
|
+
metadata["encoding-type"] = attrs["encoding-type"]
|
|
205
|
+
if "encoding-version" in attrs:
|
|
206
|
+
metadata["encoding-version"] = attrs["encoding-version"]
|
|
207
|
+
|
|
208
|
+
backend_metadata = BackendMeta(
|
|
209
|
+
backend=self.backend_name(),
|
|
210
|
+
index_key=self.index_key,
|
|
211
|
+
index_type=self.index_type,
|
|
212
|
+
).model_dump(exclude_none=True)
|
|
213
|
+
metadata.update(backend_metadata)
|
|
214
|
+
self._group_handler.write_attrs(metadata)
|
|
215
|
+
|
|
216
|
+
def write(
|
|
217
|
+
self,
|
|
218
|
+
table_data: TabularData,
|
|
219
|
+
metadata: dict | None = None,
|
|
220
|
+
) -> None:
|
|
221
|
+
"""Serialize the table to the store, and write the metadata.
|
|
222
|
+
|
|
223
|
+
This is a generic write method.
|
|
224
|
+
Based on the explicit mode or the type of the table,
|
|
225
|
+
it will call the appropriate write method.
|
|
226
|
+
"""
|
|
227
|
+
if isinstance(table_data, DataFrame):
|
|
228
|
+
self.write_from_pandas(table_data)
|
|
229
|
+
elif isinstance(table_data, AnnData):
|
|
230
|
+
self.write_from_anndata(table_data)
|
|
231
|
+
elif isinstance(table_data, PolarsDataFrame | LazyFrame):
|
|
232
|
+
self.write_from_polars(table_data)
|
|
233
|
+
else:
|
|
234
|
+
raise NgioValueError(
|
|
235
|
+
f"Unsupported table type {type(table_data)}. "
|
|
236
|
+
"Please specify the mode explicitly. "
|
|
237
|
+
"Supported serialization modes are: "
|
|
238
|
+
"'pandas', 'anndata', 'polars'."
|
|
239
|
+
)
|
|
240
|
+
self.write_metadata(metadata)
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import zarr
|
|
2
|
+
from anndata import AnnData
|
|
3
|
+
from anndata._settings import settings
|
|
4
|
+
from pandas import DataFrame
|
|
5
|
+
from polars import DataFrame as PolarsDataFrame
|
|
6
|
+
from polars import LazyFrame
|
|
7
|
+
from zarr.storage import FsspecStore, LocalStore, MemoryStore
|
|
8
|
+
|
|
9
|
+
from ngio.tables.backends._abstract_backend import AbstractTableBackend
|
|
10
|
+
from ngio.tables.backends._anndata_utils import (
|
|
11
|
+
custom_anndata_read_zarr,
|
|
12
|
+
)
|
|
13
|
+
from ngio.tables.backends._utils import (
|
|
14
|
+
convert_pandas_to_anndata,
|
|
15
|
+
convert_polars_to_anndata,
|
|
16
|
+
normalize_anndata,
|
|
17
|
+
)
|
|
18
|
+
from ngio.utils import NgioValueError, copy_group
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AnnDataBackend(AbstractTableBackend):
|
|
22
|
+
"""A class to load and write tables from/to an AnnData object."""
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def backend_name() -> str:
|
|
26
|
+
"""Return the name of the backend."""
|
|
27
|
+
return "anndata"
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def implements_anndata() -> bool:
|
|
31
|
+
"""Check if the backend implements the anndata protocol."""
|
|
32
|
+
return True
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def implements_pandas() -> bool:
|
|
36
|
+
"""Whether the handler implements the dataframe protocol."""
|
|
37
|
+
return True
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def implements_polars() -> bool:
|
|
41
|
+
"""Whether the handler implements the polars protocol."""
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
def load_as_anndata(self) -> AnnData:
|
|
45
|
+
"""Load the table as an AnnData object."""
|
|
46
|
+
settings.zarr_write_format = self._group_handler.zarr_format
|
|
47
|
+
anndata = custom_anndata_read_zarr(self._group_handler._group)
|
|
48
|
+
anndata = normalize_anndata(anndata, index_key=self.index_key)
|
|
49
|
+
return anndata
|
|
50
|
+
|
|
51
|
+
def load(self) -> AnnData:
|
|
52
|
+
"""Load the table as an AnnData object."""
|
|
53
|
+
return self.load_as_anndata()
|
|
54
|
+
|
|
55
|
+
def _write_to_local_store(
|
|
56
|
+
self, store: LocalStore, path: str, table: AnnData
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Write the AnnData table to a LocalStore."""
|
|
59
|
+
store_path = f"{store.root}/{path}"
|
|
60
|
+
table.write_zarr(store_path)
|
|
61
|
+
|
|
62
|
+
def _write_to_fsspec_store(
|
|
63
|
+
self, store: FsspecStore, path: str, table: AnnData
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Write the AnnData table to a FsspecStore."""
|
|
66
|
+
full_url = f"{store.path}/{path}"
|
|
67
|
+
fs = store.fs
|
|
68
|
+
mapper = fs.get_mapper(full_url)
|
|
69
|
+
table.write_zarr(mapper)
|
|
70
|
+
|
|
71
|
+
def _write_to_memory_store(
|
|
72
|
+
self, store: MemoryStore, path: str, table: AnnData
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Write the AnnData table to a MemoryStore."""
|
|
75
|
+
store = MemoryStore()
|
|
76
|
+
table.write_zarr(store)
|
|
77
|
+
anndata_group = zarr.open_group(store, mode="r")
|
|
78
|
+
copy_group(
|
|
79
|
+
anndata_group,
|
|
80
|
+
self._group_handler._group,
|
|
81
|
+
suppress_warnings=True,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def write_from_anndata(self, table: AnnData) -> None:
|
|
85
|
+
"""Serialize the table from an AnnData object."""
|
|
86
|
+
# Make sure to use the correct zarr format
|
|
87
|
+
settings.zarr_write_format = self._group_handler.zarr_format
|
|
88
|
+
store = self._group_handler.store
|
|
89
|
+
path = self._group_handler.group.path
|
|
90
|
+
if isinstance(store, LocalStore):
|
|
91
|
+
self._write_to_local_store(
|
|
92
|
+
store,
|
|
93
|
+
path,
|
|
94
|
+
table,
|
|
95
|
+
)
|
|
96
|
+
elif isinstance(store, FsspecStore):
|
|
97
|
+
self._write_to_fsspec_store(
|
|
98
|
+
store,
|
|
99
|
+
path,
|
|
100
|
+
table,
|
|
101
|
+
)
|
|
102
|
+
elif isinstance(store, MemoryStore):
|
|
103
|
+
self._write_to_memory_store(
|
|
104
|
+
store,
|
|
105
|
+
path,
|
|
106
|
+
table,
|
|
107
|
+
)
|
|
108
|
+
else:
|
|
109
|
+
raise NgioValueError(
|
|
110
|
+
f"Ngio does not support writing an AnnData table to a "
|
|
111
|
+
f"store of type {type(store)}. "
|
|
112
|
+
"Please make sure to use a compatible "
|
|
113
|
+
"store like a LocalStore, or FsspecStore."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def write_from_pandas(self, table: DataFrame) -> None:
|
|
117
|
+
"""Serialize the table from a pandas DataFrame."""
|
|
118
|
+
anndata = convert_pandas_to_anndata(
|
|
119
|
+
table,
|
|
120
|
+
index_key=self.index_key,
|
|
121
|
+
)
|
|
122
|
+
self.write_from_anndata(anndata)
|
|
123
|
+
|
|
124
|
+
def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
|
|
125
|
+
"""Consolidate the metadata in the store."""
|
|
126
|
+
anndata = convert_polars_to_anndata(
|
|
127
|
+
table,
|
|
128
|
+
index_key=self.index_key,
|
|
129
|
+
)
|
|
130
|
+
self.write_from_anndata(anndata)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class AnnDataBackendV1(AnnDataBackend):
|
|
134
|
+
"""A wrapper for the AnnData backend that for backwards compatibility."""
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def backend_name() -> str:
|
|
138
|
+
"""Return the name of the backend."""
|
|
139
|
+
return "anndata_v1"
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
import zarr
|
|
6
|
+
from anndata import AnnData
|
|
7
|
+
from anndata._io.specs import read_elem
|
|
8
|
+
from anndata._io.utils import _read_legacy_raw
|
|
9
|
+
from anndata._io.zarr import read_dataframe
|
|
10
|
+
from anndata.compat import _clean_uns
|
|
11
|
+
from anndata.experimental import read_dispatched
|
|
12
|
+
|
|
13
|
+
from ngio.utils import (
|
|
14
|
+
NgioValueError,
|
|
15
|
+
StoreOrGroup,
|
|
16
|
+
open_group_wrapper,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Callable, Sequence
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def custom_anndata_read_zarr(
|
|
24
|
+
store: StoreOrGroup, elem_to_read: Sequence[str] | None = None
|
|
25
|
+
) -> AnnData:
|
|
26
|
+
"""Read from a hierarchical Zarr array store.
|
|
27
|
+
|
|
28
|
+
# Implementation originally from https://github.com/scverse/anndata/blob/main/src/anndata/_io/zarr.py
|
|
29
|
+
# Original implementation would not work with remote storages so we had to copy it
|
|
30
|
+
# here and slightly modified it to work with remote storages.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
store (StoreOrGroup): A store or group to read the AnnData from.
|
|
34
|
+
elem_to_read (Sequence[str] | None): The elements to read from the store.
|
|
35
|
+
"""
|
|
36
|
+
group = open_group_wrapper(store=store, mode="r")
|
|
37
|
+
if elem_to_read is None:
|
|
38
|
+
elem_to_read = [
|
|
39
|
+
"X",
|
|
40
|
+
"obs",
|
|
41
|
+
"var",
|
|
42
|
+
"uns",
|
|
43
|
+
"obsm",
|
|
44
|
+
"varm",
|
|
45
|
+
"obsp",
|
|
46
|
+
"varp",
|
|
47
|
+
"layers",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
# Read with handling for backwards compat
|
|
51
|
+
def callback(func: Callable, elem_name: str, elem: Any, iospec: Any) -> Any:
|
|
52
|
+
if iospec.encoding_type == "anndata" or elem_name.endswith("/"):
|
|
53
|
+
ad_kwargs = {}
|
|
54
|
+
# Some of these elem fail on https
|
|
55
|
+
# So we only include the ones that are strictly necessary
|
|
56
|
+
# for fractal tables
|
|
57
|
+
# This fails on some https
|
|
58
|
+
# base_elem += list(elem.keys())
|
|
59
|
+
for k in elem_to_read:
|
|
60
|
+
v = elem.get(k)
|
|
61
|
+
if v is not None and not k.startswith("raw."):
|
|
62
|
+
ad_kwargs[k] = read_dispatched(v, callback) # type: ignore
|
|
63
|
+
return AnnData(**ad_kwargs)
|
|
64
|
+
|
|
65
|
+
elif elem_name.startswith("/raw."):
|
|
66
|
+
return None
|
|
67
|
+
elif elem_name in {"/obs", "/var"}:
|
|
68
|
+
return read_dataframe(elem)
|
|
69
|
+
elif elem_name == "/raw":
|
|
70
|
+
# Backwards compat
|
|
71
|
+
return _read_legacy_raw(group, func(elem), read_dataframe, func)
|
|
72
|
+
return func(elem)
|
|
73
|
+
|
|
74
|
+
adata = read_dispatched(group, callback=callback) # type: ignore
|
|
75
|
+
|
|
76
|
+
# Backwards compat (should figure out which version)
|
|
77
|
+
if "raw.X" in group:
|
|
78
|
+
raw = AnnData(**_read_legacy_raw(group, adata.raw, read_dataframe, read_elem)) # type: ignore
|
|
79
|
+
raw.obs_names = adata.obs_names # type: ignore
|
|
80
|
+
adata.raw = raw # type: ignore
|
|
81
|
+
|
|
82
|
+
# Backwards compat for <0.7
|
|
83
|
+
if isinstance(group["obs"], zarr.Array):
|
|
84
|
+
_clean_uns(adata)
|
|
85
|
+
|
|
86
|
+
if isinstance(adata, dict):
|
|
87
|
+
adata = AnnData(**adata) # type: ignore
|
|
88
|
+
if not isinstance(adata, AnnData):
|
|
89
|
+
raise NgioValueError(f"Expected an AnnData object, but got {type(adata)}")
|
|
90
|
+
return adata
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from ngio.tables.backends._py_arrow_backends import PyArrowBackend
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CsvTableBackend(PyArrowBackend):
|
|
5
|
+
"""A class to load and write small tables in CSV format."""
|
|
6
|
+
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
):
|
|
10
|
+
"""Initialize the CsvTableBackend."""
|
|
11
|
+
super().__init__(
|
|
12
|
+
table_name="table.csv",
|
|
13
|
+
table_format="csv",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def backend_name() -> str:
|
|
18
|
+
"""Return the name of the backend."""
|
|
19
|
+
return "csv"
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from pandas import DataFrame
|
|
3
|
+
from polars import DataFrame as PolarsDataFrame
|
|
4
|
+
from polars import LazyFrame
|
|
5
|
+
|
|
6
|
+
from ngio.tables.backends._abstract_backend import AbstractTableBackend
|
|
7
|
+
from ngio.tables.backends._utils import (
|
|
8
|
+
normalize_pandas_df,
|
|
9
|
+
normalize_polars_lf,
|
|
10
|
+
)
|
|
11
|
+
from ngio.utils import NgioError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class JsonTableBackend(AbstractTableBackend):
|
|
15
|
+
"""A class to load and write small tables in the zarr group .attrs (json)."""
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def backend_name() -> str:
|
|
19
|
+
"""Return the name of the backend."""
|
|
20
|
+
return "json"
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def implements_anndata() -> bool:
|
|
24
|
+
"""Whether the handler implements the anndata protocol."""
|
|
25
|
+
return False
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def implements_pandas() -> bool:
|
|
29
|
+
"""Whether the handler implements the dataframe protocol."""
|
|
30
|
+
return True
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def implements_polars() -> bool:
|
|
34
|
+
"""Whether the handler implements the polars protocol."""
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
def _get_table_group(self):
|
|
38
|
+
"""Get the table group, creating it if it doesn't exist."""
|
|
39
|
+
try:
|
|
40
|
+
table_group = self._group_handler.get_group(path="table", create_mode=True)
|
|
41
|
+
except NgioError as e:
|
|
42
|
+
raise NgioError(
|
|
43
|
+
"Could not get or create a 'table' group in the store "
|
|
44
|
+
f"{self._group_handler.store} path "
|
|
45
|
+
f"{self._group_handler.group.path}/table."
|
|
46
|
+
) from e
|
|
47
|
+
return table_group
|
|
48
|
+
|
|
49
|
+
def load_as_pandas_df(self) -> DataFrame:
|
|
50
|
+
"""Load the table as a pandas DataFrame."""
|
|
51
|
+
table_dict = self._get_table_group().attrs.asdict()
|
|
52
|
+
data_frame = pd.DataFrame.from_dict(table_dict)
|
|
53
|
+
data_frame = normalize_pandas_df(
|
|
54
|
+
data_frame,
|
|
55
|
+
index_key=self.index_key,
|
|
56
|
+
index_type=self.index_type,
|
|
57
|
+
reset_index=False,
|
|
58
|
+
)
|
|
59
|
+
return data_frame
|
|
60
|
+
|
|
61
|
+
def load(self) -> DataFrame:
|
|
62
|
+
return self.load_as_pandas_df()
|
|
63
|
+
|
|
64
|
+
def _write_from_dict(self, table: dict) -> None:
|
|
65
|
+
"""Write the table from a dictionary to the store."""
|
|
66
|
+
table_group = self._get_table_group()
|
|
67
|
+
table_group.attrs.clear()
|
|
68
|
+
table_group.attrs.update(table)
|
|
69
|
+
|
|
70
|
+
def write_from_pandas(self, table: DataFrame) -> None:
|
|
71
|
+
"""Write the table from a pandas DataFrame."""
|
|
72
|
+
table = normalize_pandas_df(
|
|
73
|
+
table,
|
|
74
|
+
index_key=self.index_key,
|
|
75
|
+
index_type=self.index_type,
|
|
76
|
+
reset_index=True,
|
|
77
|
+
)
|
|
78
|
+
table_dict = table.to_dict(orient="list")
|
|
79
|
+
self._write_from_dict(table=table_dict)
|
|
80
|
+
|
|
81
|
+
def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
|
|
82
|
+
"""Write the table from a polars DataFrame or LazyFrame."""
|
|
83
|
+
table = normalize_polars_lf(
|
|
84
|
+
table,
|
|
85
|
+
index_key=self.index_key,
|
|
86
|
+
index_type=self.index_type,
|
|
87
|
+
)
|
|
88
|
+
if isinstance(table, LazyFrame):
|
|
89
|
+
table = table.collect()
|
|
90
|
+
|
|
91
|
+
table_dict = table.to_dict(as_series=False)
|
|
92
|
+
self._write_from_dict(table=table_dict)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from ngio.tables.backends._py_arrow_backends import PyArrowBackend
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ParquetTableBackend(PyArrowBackend):
|
|
5
|
+
"""A class to load and write small tables in Parquet format."""
|
|
6
|
+
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
):
|
|
10
|
+
"""Initialize the ParquetTableBackend."""
|
|
11
|
+
super().__init__(
|
|
12
|
+
table_name="table.parquet",
|
|
13
|
+
table_format="parquet",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def backend_name() -> str:
|
|
18
|
+
"""Return the name of the backend."""
|
|
19
|
+
return "parquet"
|