ngio 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/common/__init__.py +16 -0
- ngio/common/_array_pipe.py +50 -27
- ngio/common/_table_ops.py +471 -0
- ngio/hcs/__init__.py +1 -1
- ngio/hcs/{plate.py → _plate.py} +451 -78
- ngio/images/__init__.py +3 -3
- ngio/images/{image.py → _image.py} +26 -21
- ngio/images/{label.py → _label.py} +6 -4
- ngio/images/{masked_image.py → _masked_image.py} +2 -2
- ngio/images/{ome_zarr_container.py → _ome_zarr_container.py} +152 -86
- ngio/ome_zarr_meta/_meta_handlers.py +16 -8
- ngio/ome_zarr_meta/ngio_specs/_channels.py +41 -29
- ngio/tables/__init__.py +14 -2
- ngio/tables/_abstract_table.py +269 -0
- ngio/tables/{tables_container.py → _tables_container.py} +186 -100
- ngio/tables/backends/__init__.py +20 -0
- ngio/tables/backends/_abstract_backend.py +58 -80
- ngio/tables/backends/{_anndata_v1.py → _anndata.py} +5 -1
- ngio/tables/backends/_csv.py +35 -0
- ngio/tables/backends/{_json_v1.py → _json.py} +4 -1
- ngio/tables/backends/{_csv_v1.py → _non_zarr_backends.py} +61 -27
- ngio/tables/backends/_parquet.py +47 -0
- ngio/tables/backends/_table_backends.py +39 -18
- ngio/tables/backends/_utils.py +147 -1
- ngio/tables/v1/__init__.py +19 -3
- ngio/tables/v1/_condition_table.py +71 -0
- ngio/tables/v1/_feature_table.py +63 -129
- ngio/tables/v1/_generic_table.py +21 -159
- ngio/tables/v1/_roi_table.py +285 -201
- ngio/utils/_fractal_fsspec_store.py +29 -0
- {ngio-0.2.9.dist-info → ngio-0.3.0.dist-info}/METADATA +4 -3
- ngio-0.3.0.dist-info/RECORD +61 -0
- ngio/tables/_validators.py +0 -108
- ngio-0.2.9.dist-info/RECORD +0 -57
- /ngio/images/{abstract_image.py → _abstract_image.py} +0 -0
- /ngio/images/{create.py → _create.py} +0 -0
- {ngio-0.2.9.dist-info → ngio-0.3.0.dist-info}/WHEEL +0 -0
- {ngio-0.2.9.dist-info → ngio-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,15 +5,13 @@ from anndata import AnnData
|
|
|
5
5
|
from pandas import DataFrame
|
|
6
6
|
from polars import DataFrame as PolarsDataFrame
|
|
7
7
|
from polars import LazyFrame
|
|
8
|
-
from pydantic import BaseModel
|
|
8
|
+
from pydantic import BaseModel, ConfigDict
|
|
9
9
|
|
|
10
10
|
from ngio.tables.backends._utils import (
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
convert_polars_to_anndata,
|
|
16
|
-
convert_polars_to_pandas,
|
|
11
|
+
TabularData,
|
|
12
|
+
convert_to_anndata,
|
|
13
|
+
convert_to_pandas,
|
|
14
|
+
convert_to_polars,
|
|
17
15
|
)
|
|
18
16
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
19
17
|
|
|
@@ -21,29 +19,30 @@ from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
|
21
19
|
class BackendMeta(BaseModel):
|
|
22
20
|
"""Metadata for the backend."""
|
|
23
21
|
|
|
24
|
-
backend: str
|
|
22
|
+
backend: str = "anndata"
|
|
25
23
|
index_key: str | None = None
|
|
26
24
|
index_type: Literal["int", "str"] | None = None
|
|
27
25
|
|
|
26
|
+
model_config = ConfigDict(extra="allow")
|
|
27
|
+
|
|
28
28
|
|
|
29
29
|
class AbstractTableBackend(ABC):
|
|
30
30
|
"""Abstract class for table backends."""
|
|
31
31
|
|
|
32
|
-
def
|
|
32
|
+
def set_group_handler(
|
|
33
33
|
self,
|
|
34
34
|
group_handler: ZarrGroupHandler,
|
|
35
35
|
index_key: str | None = None,
|
|
36
36
|
index_type: Literal["int", "str"] | None = None,
|
|
37
|
-
):
|
|
38
|
-
"""
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Attach a group handler to the backend.
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
Index keys and index types are used to ensure that the
|
|
41
|
+
serialization and deserialization of the table
|
|
42
|
+
is consistent across different backends.
|
|
41
43
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
containing the table data.
|
|
45
|
-
index_key (str): The column name to use as the index of the DataFrame.
|
|
46
|
-
index_type (str): The type of the index column in the DataFrame.
|
|
44
|
+
Making sure that this is consistent is
|
|
45
|
+
a duty of the backend implementations.
|
|
47
46
|
"""
|
|
48
47
|
self._group_handler = group_handler
|
|
49
48
|
self._index_key = index_key
|
|
@@ -67,7 +66,11 @@ class AbstractTableBackend(ABC):
|
|
|
67
66
|
"""Check if the backend implements the anndata protocol.
|
|
68
67
|
|
|
69
68
|
If this is True, the backend should implement the
|
|
70
|
-
`
|
|
69
|
+
`write_from_anndata` method.
|
|
70
|
+
|
|
71
|
+
AnnData objects are more complex than DataFrames,
|
|
72
|
+
so if this is true the backend should implement the
|
|
73
|
+
full serialization of the AnnData object.
|
|
71
74
|
|
|
72
75
|
If this is False, these methods should raise a
|
|
73
76
|
`NotImplementedError`.
|
|
@@ -80,7 +83,7 @@ class AbstractTableBackend(ABC):
|
|
|
80
83
|
"""Check if the backend implements the pandas protocol.
|
|
81
84
|
|
|
82
85
|
If this is True, the backend should implement the
|
|
83
|
-
`
|
|
86
|
+
`write_from_dataframe` methods.
|
|
84
87
|
|
|
85
88
|
If this is False, these methods should raise a
|
|
86
89
|
`NotImplementedError`.
|
|
@@ -93,7 +96,7 @@ class AbstractTableBackend(ABC):
|
|
|
93
96
|
"""Check if the backend implements the polars protocol.
|
|
94
97
|
|
|
95
98
|
If this is True, the backend should implement the
|
|
96
|
-
`
|
|
99
|
+
`write_from_polars` methods.
|
|
97
100
|
|
|
98
101
|
If this is False, these methods should raise a
|
|
99
102
|
`NotImplementedError`.
|
|
@@ -122,6 +125,16 @@ class AbstractTableBackend(ABC):
|
|
|
122
125
|
)
|
|
123
126
|
return self._index_type # type: ignore[return-value]
|
|
124
127
|
|
|
128
|
+
@abstractmethod
|
|
129
|
+
def load(self) -> TabularData:
|
|
130
|
+
"""Load the table from the store.
|
|
131
|
+
|
|
132
|
+
This is a generic load method.
|
|
133
|
+
Based on the explicit mode or the type of the table,
|
|
134
|
+
it will call the appropriate load method.
|
|
135
|
+
"""
|
|
136
|
+
...
|
|
137
|
+
|
|
125
138
|
def load_as_anndata(self) -> AnnData:
|
|
126
139
|
"""Load the table as an AnnData object.
|
|
127
140
|
|
|
@@ -129,70 +142,35 @@ class AbstractTableBackend(ABC):
|
|
|
129
142
|
selecting columns is not implemented, because it is not
|
|
130
143
|
straightforward to do so for an arbitrary AnnData object.
|
|
131
144
|
"""
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
elif self.implements_polars():
|
|
138
|
-
return convert_polars_to_anndata(
|
|
139
|
-
self.load_as_polars_lf(),
|
|
140
|
-
index_key=self.index_key,
|
|
141
|
-
)
|
|
142
|
-
else:
|
|
143
|
-
raise NgioValueError(
|
|
144
|
-
"Backend does not implement any of the protocols. "
|
|
145
|
-
"A backend should implement at least one of the "
|
|
146
|
-
"following protocols: anndata, pandas, polars."
|
|
147
|
-
)
|
|
145
|
+
table = self.load()
|
|
146
|
+
return convert_to_anndata(
|
|
147
|
+
table,
|
|
148
|
+
index_key=self.index_key,
|
|
149
|
+
)
|
|
148
150
|
|
|
149
151
|
def load_as_pandas_df(self) -> DataFrame:
|
|
150
152
|
"""Load the table as a pandas DataFrame.
|
|
151
153
|
|
|
152
154
|
If columns are provided, the table should be filtered
|
|
153
155
|
"""
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
elif self.implements_polars():
|
|
161
|
-
return convert_polars_to_pandas(
|
|
162
|
-
self.load_as_polars_lf(),
|
|
163
|
-
index_key=self.index_key,
|
|
164
|
-
index_type=self.index_type,
|
|
165
|
-
)
|
|
166
|
-
else:
|
|
167
|
-
raise NgioValueError(
|
|
168
|
-
"Backend does not implement any of the protocols. "
|
|
169
|
-
"A backend should implement at least one of the "
|
|
170
|
-
"following protocols: anndata, pandas, polars."
|
|
171
|
-
)
|
|
156
|
+
table = self.load()
|
|
157
|
+
return convert_to_pandas(
|
|
158
|
+
table,
|
|
159
|
+
index_key=self.index_key,
|
|
160
|
+
index_type=self.index_type,
|
|
161
|
+
)
|
|
172
162
|
|
|
173
163
|
def load_as_polars_lf(self) -> LazyFrame:
|
|
174
164
|
"""Load the table as a polars LazyFrame.
|
|
175
165
|
|
|
176
166
|
If columns are provided, the table should be filtered
|
|
177
167
|
"""
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
elif self.implements_pandas():
|
|
185
|
-
return convert_pandas_to_polars(
|
|
186
|
-
self.load_as_pandas_df(),
|
|
187
|
-
index_key=self.index_key,
|
|
188
|
-
index_type=self.index_type,
|
|
189
|
-
).lazy()
|
|
190
|
-
else:
|
|
191
|
-
raise NgioValueError(
|
|
192
|
-
"Backend does not implement any of the protocols. "
|
|
193
|
-
"A backend should implement at least one of the "
|
|
194
|
-
"following protocols: anndata, pandas, polars."
|
|
195
|
-
)
|
|
168
|
+
table = self.load()
|
|
169
|
+
return convert_to_polars(
|
|
170
|
+
table,
|
|
171
|
+
index_key=self.index_key,
|
|
172
|
+
index_type=self.index_type,
|
|
173
|
+
)
|
|
196
174
|
|
|
197
175
|
def write_from_pandas(self, table: DataFrame) -> None:
|
|
198
176
|
"""Serialize the table from a pandas DataFrame."""
|
|
@@ -230,7 +208,7 @@ class AbstractTableBackend(ABC):
|
|
|
230
208
|
|
|
231
209
|
def write(
|
|
232
210
|
self,
|
|
233
|
-
|
|
211
|
+
table_data: TabularData,
|
|
234
212
|
metadata: dict | None = None,
|
|
235
213
|
mode: Literal["pandas", "anndata", "polars"] | None = None,
|
|
236
214
|
) -> None:
|
|
@@ -240,15 +218,15 @@ class AbstractTableBackend(ABC):
|
|
|
240
218
|
Based on the explicit mode or the type of the table,
|
|
241
219
|
it will call the appropriate write method.
|
|
242
220
|
"""
|
|
243
|
-
if mode == "pandas" or isinstance(
|
|
244
|
-
self.write_from_pandas(
|
|
245
|
-
elif mode == "anndata" or isinstance(
|
|
246
|
-
self.write_from_anndata(
|
|
247
|
-
elif mode == "polars" or isinstance(
|
|
248
|
-
self.write_from_polars(
|
|
221
|
+
if mode == "pandas" or isinstance(table_data, DataFrame):
|
|
222
|
+
self.write_from_pandas(table_data) # type: ignore[arg-type]
|
|
223
|
+
elif mode == "anndata" or isinstance(table_data, AnnData):
|
|
224
|
+
self.write_from_anndata(table_data) # type: ignore[arg-type]
|
|
225
|
+
elif mode == "polars" or isinstance(table_data, PolarsDataFrame | LazyFrame):
|
|
226
|
+
self.write_from_polars(table_data)
|
|
249
227
|
else:
|
|
250
228
|
raise NgioValueError(
|
|
251
|
-
f"Unsupported table type {type(
|
|
229
|
+
f"Unsupported table type {type(table_data)}. "
|
|
252
230
|
"Please specify the mode explicitly. "
|
|
253
231
|
"Supported serialization modes are: "
|
|
254
232
|
"'pandas', 'anndata', 'polars'."
|
|
@@ -21,7 +21,7 @@ class AnnDataBackend(AbstractTableBackend):
|
|
|
21
21
|
@staticmethod
|
|
22
22
|
def backend_name() -> str:
|
|
23
23
|
"""Return the name of the backend."""
|
|
24
|
-
return "
|
|
24
|
+
return "anndata"
|
|
25
25
|
|
|
26
26
|
@staticmethod
|
|
27
27
|
def implements_anndata() -> bool:
|
|
@@ -44,6 +44,10 @@ class AnnDataBackend(AbstractTableBackend):
|
|
|
44
44
|
anndata = normalize_anndata(anndata, index_key=self.index_key)
|
|
45
45
|
return anndata
|
|
46
46
|
|
|
47
|
+
def load(self) -> AnnData:
|
|
48
|
+
"""Load the table as an AnnData object."""
|
|
49
|
+
return self.load_as_anndata()
|
|
50
|
+
|
|
47
51
|
def write_from_anndata(self, table: AnnData) -> None:
|
|
48
52
|
"""Serialize the table from an AnnData object."""
|
|
49
53
|
full_url = self._group_handler.full_url
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import polars as pl
|
|
3
|
+
|
|
4
|
+
from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def write_lf_to_csv(path: str, table: pl.DataFrame) -> None:
|
|
8
|
+
"""Write a polars DataFrame to a CSV file."""
|
|
9
|
+
table.write_csv(path)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def write_df_to_csv(path: str, table: pd.DataFrame) -> None:
|
|
13
|
+
"""Write a pandas DataFrame to a CSV file."""
|
|
14
|
+
table.to_csv(path, index=False)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CsvTableBackend(NonZarrBaseBackend):
|
|
18
|
+
"""A class to load and write small tables in CSV format."""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
):
|
|
23
|
+
"""Initialize the CsvTableBackend."""
|
|
24
|
+
super().__init__(
|
|
25
|
+
lf_reader=pl.scan_csv,
|
|
26
|
+
df_reader=pd.read_csv,
|
|
27
|
+
lf_writer=write_lf_to_csv,
|
|
28
|
+
df_writer=write_df_to_csv,
|
|
29
|
+
table_name="table.csv",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def backend_name() -> str:
|
|
34
|
+
"""Return the name of the backend."""
|
|
35
|
+
return "csv"
|
|
@@ -17,7 +17,7 @@ class JsonTableBackend(AbstractTableBackend):
|
|
|
17
17
|
@staticmethod
|
|
18
18
|
def backend_name() -> str:
|
|
19
19
|
"""Return the name of the backend."""
|
|
20
|
-
return "
|
|
20
|
+
return "json"
|
|
21
21
|
|
|
22
22
|
@staticmethod
|
|
23
23
|
def implements_anndata() -> bool:
|
|
@@ -61,6 +61,9 @@ class JsonTableBackend(AbstractTableBackend):
|
|
|
61
61
|
)
|
|
62
62
|
return data_frame
|
|
63
63
|
|
|
64
|
+
def load(self) -> DataFrame:
|
|
65
|
+
return self.load_as_pandas_df()
|
|
66
|
+
|
|
64
67
|
def _write_from_dict(self, table: dict) -> None:
|
|
65
68
|
"""Write the table from a dictionary to the store."""
|
|
66
69
|
table_group = self._get_table_group()
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import io
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from typing import Any
|
|
2
4
|
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import polars as pl
|
|
5
5
|
from pandas import DataFrame
|
|
6
6
|
from polars import DataFrame as PolarsDataFrame
|
|
7
7
|
from polars import LazyFrame
|
|
@@ -12,15 +12,22 @@ from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
|
|
|
12
12
|
from ngio.utils import NgioFileNotFoundError, NgioValueError
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class
|
|
15
|
+
class NonZarrBaseBackend(AbstractTableBackend):
|
|
16
16
|
"""A class to load and write small tables in CSV format."""
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
df_reader: Callable[[Any], DataFrame],
|
|
21
|
+
lf_reader: Callable[[Any], LazyFrame],
|
|
22
|
+
df_writer: Callable[[str, DataFrame], None],
|
|
23
|
+
lf_writer: Callable[[str, PolarsDataFrame], None],
|
|
24
|
+
table_name: str,
|
|
25
|
+
):
|
|
26
|
+
self.df_reader = df_reader
|
|
27
|
+
self.lf_reader = lf_reader
|
|
28
|
+
self.df_writer = df_writer
|
|
29
|
+
self.lf_writer = lf_writer
|
|
30
|
+
self.table_name = table_name
|
|
24
31
|
|
|
25
32
|
@staticmethod
|
|
26
33
|
def implements_anndata() -> bool:
|
|
@@ -37,38 +44,58 @@ class CsvTableBackend(AbstractTableBackend):
|
|
|
37
44
|
"""Whether the handler implements the polars protocol."""
|
|
38
45
|
return True
|
|
39
46
|
|
|
47
|
+
@staticmethod
|
|
48
|
+
def backend_name() -> str:
|
|
49
|
+
"""Return the name of the backend."""
|
|
50
|
+
raise NotImplementedError(
|
|
51
|
+
"The backend_name method must be implemented in the subclass."
|
|
52
|
+
)
|
|
53
|
+
|
|
40
54
|
def _load_from_directory_store(self, reader):
|
|
41
55
|
"""Load the table from a directory store."""
|
|
42
56
|
url = self._group_handler.full_url
|
|
43
57
|
if url is None:
|
|
58
|
+
ext = self.table_name.split(".")[-1]
|
|
44
59
|
raise NgioValueError(
|
|
45
|
-
f"Ngio does not support reading a
|
|
60
|
+
f"Ngio does not support reading a {ext} table from a "
|
|
46
61
|
f"store of type {type(self._group_handler)}. "
|
|
47
62
|
"Please make sure to use a compatible "
|
|
48
63
|
"store like a zarr.DirectoryStore."
|
|
49
64
|
)
|
|
50
|
-
|
|
51
|
-
dataframe = reader(
|
|
65
|
+
table_path = f"{url}/{self.table_name}"
|
|
66
|
+
dataframe = reader(table_path)
|
|
52
67
|
return dataframe
|
|
53
68
|
|
|
54
|
-
def
|
|
69
|
+
def _load_from_fs_store_df(self, reader):
|
|
55
70
|
"""Load the table from an FS store."""
|
|
56
|
-
|
|
71
|
+
path = self._group_handler.group.path
|
|
72
|
+
table_path = f"{path}/{self.table_name}"
|
|
73
|
+
bytes_table = self._group_handler.store.get(table_path)
|
|
57
74
|
if bytes_table is None:
|
|
58
|
-
raise NgioFileNotFoundError(f"No table found at {
|
|
75
|
+
raise NgioFileNotFoundError(f"No table found at {table_path}. ")
|
|
59
76
|
dataframe = reader(io.BytesIO(bytes_table))
|
|
60
77
|
return dataframe
|
|
61
78
|
|
|
79
|
+
def _load_from_fs_store_lf(self, reader):
|
|
80
|
+
"""Load the table from an FS store."""
|
|
81
|
+
full_url = self._group_handler.full_url
|
|
82
|
+
parquet_path = f"{full_url}/{self.table_name}"
|
|
83
|
+
store_fs = self._group_handler.store.fs # type: ignore
|
|
84
|
+
with store_fs.open(parquet_path, "rb") as f:
|
|
85
|
+
dataframe = reader(f)
|
|
86
|
+
return dataframe
|
|
87
|
+
|
|
62
88
|
def load_as_pandas_df(self) -> DataFrame:
|
|
63
89
|
"""Load the table as a pandas DataFrame."""
|
|
64
90
|
store = self._group_handler.store
|
|
65
91
|
if isinstance(store, DirectoryStore):
|
|
66
|
-
dataframe = self._load_from_directory_store(reader=
|
|
92
|
+
dataframe = self._load_from_directory_store(reader=self.df_reader)
|
|
67
93
|
elif isinstance(store, FSStore):
|
|
68
|
-
dataframe = self.
|
|
94
|
+
dataframe = self._load_from_fs_store_df(reader=self.df_reader)
|
|
69
95
|
else:
|
|
96
|
+
ext = self.table_name.split(".")[-1]
|
|
70
97
|
raise NgioValueError(
|
|
71
|
-
f"Ngio does not support reading a
|
|
98
|
+
f"Ngio does not support reading a {ext} table from a "
|
|
72
99
|
f"store of type {type(store)}. "
|
|
73
100
|
"Please make sure to use a compatible "
|
|
74
101
|
"store like a zarr.DirectoryStore or "
|
|
@@ -83,16 +110,21 @@ class CsvTableBackend(AbstractTableBackend):
|
|
|
83
110
|
)
|
|
84
111
|
return dataframe
|
|
85
112
|
|
|
113
|
+
def load(self) -> DataFrame:
|
|
114
|
+
"""Load the table as a pandas DataFrame."""
|
|
115
|
+
return self.load_as_pandas_df()
|
|
116
|
+
|
|
86
117
|
def load_as_polars_lf(self) -> LazyFrame:
|
|
87
118
|
"""Load the table as a polars LazyFrame."""
|
|
88
119
|
store = self._group_handler.store
|
|
89
120
|
if isinstance(store, DirectoryStore):
|
|
90
|
-
lazy_frame = self._load_from_directory_store(reader=
|
|
121
|
+
lazy_frame = self._load_from_directory_store(reader=self.lf_reader)
|
|
91
122
|
elif isinstance(store, FSStore):
|
|
92
|
-
lazy_frame = self.
|
|
123
|
+
lazy_frame = self._load_from_fs_store_lf(reader=self.lf_reader)
|
|
93
124
|
else:
|
|
125
|
+
ext = self.table_name.split(".")[-1]
|
|
94
126
|
raise NgioValueError(
|
|
95
|
-
f"Ngio does not support reading a
|
|
127
|
+
f"Ngio does not support reading a {ext} from a "
|
|
96
128
|
f"store of type {type(store)}. "
|
|
97
129
|
"Please make sure to use a compatible "
|
|
98
130
|
"store like a zarr.DirectoryStore or "
|
|
@@ -117,16 +149,18 @@ class CsvTableBackend(AbstractTableBackend):
|
|
|
117
149
|
if isinstance(store, DirectoryStore):
|
|
118
150
|
full_url = self._group_handler.full_url
|
|
119
151
|
else:
|
|
152
|
+
ext = self.table_name.split(".")[-1]
|
|
120
153
|
raise NgioValueError(
|
|
121
|
-
f"Ngio does not support writing a
|
|
154
|
+
f"Ngio does not support writing a {ext} file to a "
|
|
122
155
|
f"store of type {type(store)}. "
|
|
123
156
|
"Please make sure to use a compatible "
|
|
124
157
|
"store like a zarr.DirectoryStore or "
|
|
125
158
|
"zarr.FSStore."
|
|
126
159
|
)
|
|
127
160
|
if full_url is None:
|
|
161
|
+
ext = self.table_name.split(".")[-1]
|
|
128
162
|
raise NgioValueError(
|
|
129
|
-
f"Ngio does not support writing a
|
|
163
|
+
f"Ngio does not support writing a {ext} file to a "
|
|
130
164
|
f"store of type {type(store)}. "
|
|
131
165
|
"Please make sure to use a compatible "
|
|
132
166
|
"store like a zarr.DirectoryStore or "
|
|
@@ -143,8 +177,8 @@ class CsvTableBackend(AbstractTableBackend):
|
|
|
143
177
|
reset_index=True,
|
|
144
178
|
)
|
|
145
179
|
full_url = self._get_store_url()
|
|
146
|
-
|
|
147
|
-
|
|
180
|
+
table_path = f"{full_url}/{self.table_name}"
|
|
181
|
+
self.df_writer(table_path, table)
|
|
148
182
|
|
|
149
183
|
def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
|
|
150
184
|
"""Write the table from a polars DataFrame or LazyFrame."""
|
|
@@ -158,5 +192,5 @@ class CsvTableBackend(AbstractTableBackend):
|
|
|
158
192
|
table = table.collect()
|
|
159
193
|
|
|
160
194
|
full_url = self._get_store_url()
|
|
161
|
-
|
|
162
|
-
|
|
195
|
+
table_path = f"{full_url}/{self.table_name}"
|
|
196
|
+
self.lf_writer(table_path, table)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import polars as pl
|
|
3
|
+
|
|
4
|
+
from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
|
|
8
|
+
"""Write a polars DataFrame to a Parquet file."""
|
|
9
|
+
# make categorical into string (for pandas compatibility)
|
|
10
|
+
schema = table.collect_schema()
|
|
11
|
+
|
|
12
|
+
categorical_columns = []
|
|
13
|
+
for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
|
|
14
|
+
if dtype == pl.Categorical:
|
|
15
|
+
categorical_columns.append(name)
|
|
16
|
+
|
|
17
|
+
for col in categorical_columns:
|
|
18
|
+
table = table.with_columns(pl.col(col).cast(pl.Utf8))
|
|
19
|
+
|
|
20
|
+
# write to parquet
|
|
21
|
+
table.write_parquet(path)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
|
|
25
|
+
"""Write a pandas DataFrame to a Parquet file."""
|
|
26
|
+
table.to_parquet(path, index=False)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ParquetTableBackend(NonZarrBaseBackend):
|
|
30
|
+
"""A class to load and write small tables in Parquet format."""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
):
|
|
35
|
+
"""Initialize the ParquetTableBackend."""
|
|
36
|
+
super().__init__(
|
|
37
|
+
lf_reader=pl.scan_parquet,
|
|
38
|
+
df_reader=pd.read_parquet,
|
|
39
|
+
lf_writer=write_lf_to_parquet,
|
|
40
|
+
df_writer=write_df_to_parquet,
|
|
41
|
+
table_name="table.parquet",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def backend_name() -> str:
|
|
46
|
+
"""Return the name of the backend."""
|
|
47
|
+
return "parquet"
|
|
@@ -7,20 +7,22 @@ from pandas import DataFrame
|
|
|
7
7
|
from polars import DataFrame as PolarsDataFrame
|
|
8
8
|
from polars import LazyFrame
|
|
9
9
|
|
|
10
|
-
from ngio.tables.backends.
|
|
11
|
-
from ngio.tables.backends.
|
|
12
|
-
from ngio.tables.backends.
|
|
10
|
+
from ngio.tables.backends._anndata import AnnDataBackend
|
|
11
|
+
from ngio.tables.backends._csv import CsvTableBackend
|
|
12
|
+
from ngio.tables.backends._json import JsonTableBackend
|
|
13
|
+
from ngio.tables.backends._parquet import ParquetTableBackend
|
|
14
|
+
from ngio.tables.backends._utils import TabularData
|
|
13
15
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class TableBackendProtocol(Protocol):
|
|
17
|
-
def
|
|
19
|
+
def set_group_handler(
|
|
18
20
|
self,
|
|
19
21
|
group_handler: ZarrGroupHandler,
|
|
20
22
|
index_key: str | None = None,
|
|
21
23
|
index_type: Literal["int", "str"] | None = None,
|
|
22
|
-
):
|
|
23
|
-
"""
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Attach a group handler to the backend.
|
|
24
26
|
|
|
25
27
|
Index keys and index types are used to ensure that the
|
|
26
28
|
serialization and deserialization of the table
|
|
@@ -42,12 +44,21 @@ class TableBackendProtocol(Protocol):
|
|
|
42
44
|
"""
|
|
43
45
|
...
|
|
44
46
|
|
|
47
|
+
@property
|
|
48
|
+
def group_handler(self) -> ZarrGroupHandler:
|
|
49
|
+
"""Return the group handler."""
|
|
50
|
+
...
|
|
51
|
+
|
|
45
52
|
@staticmethod
|
|
46
53
|
def implements_anndata() -> bool:
|
|
47
54
|
"""Check if the backend implements the anndata protocol.
|
|
48
55
|
|
|
49
56
|
If this is True, the backend should implement the
|
|
50
|
-
`
|
|
57
|
+
`write_from_anndata` method.
|
|
58
|
+
|
|
59
|
+
AnnData objects are more complex than DataFrames,
|
|
60
|
+
so if this is true the backend should implement the
|
|
61
|
+
full serialization of the AnnData object.
|
|
51
62
|
|
|
52
63
|
If this is False, these methods should raise a
|
|
53
64
|
`NotImplementedError`.
|
|
@@ -59,7 +70,7 @@ class TableBackendProtocol(Protocol):
|
|
|
59
70
|
"""Check if the backend implements the pandas protocol.
|
|
60
71
|
|
|
61
72
|
If this is True, the backend should implement the
|
|
62
|
-
`
|
|
73
|
+
`write_from_dataframe` methods.
|
|
63
74
|
|
|
64
75
|
If this is False, these methods should raise a
|
|
65
76
|
`NotImplementedError`.
|
|
@@ -71,7 +82,7 @@ class TableBackendProtocol(Protocol):
|
|
|
71
82
|
"""Check if the backend implements the polars protocol.
|
|
72
83
|
|
|
73
84
|
If this is True, the backend should implement the
|
|
74
|
-
`
|
|
85
|
+
`write_from_polars` methods.
|
|
75
86
|
|
|
76
87
|
If this is False, these methods should raise a
|
|
77
88
|
`NotImplementedError`.
|
|
@@ -90,6 +101,16 @@ class TableBackendProtocol(Protocol):
|
|
|
90
101
|
"""Load the table as a polars LazyFrame."""
|
|
91
102
|
...
|
|
92
103
|
|
|
104
|
+
def load(self) -> TabularData:
|
|
105
|
+
"""The default load method.
|
|
106
|
+
|
|
107
|
+
This method will be default way to load the table
|
|
108
|
+
from the backend. This method should wrap one of the
|
|
109
|
+
`load_as_anndata`, `load_as_dataframe` or `load_as_polars`
|
|
110
|
+
methods depending on the backend implementation.
|
|
111
|
+
"""
|
|
112
|
+
...
|
|
113
|
+
|
|
93
114
|
def write_from_pandas(self, table: DataFrame) -> None:
|
|
94
115
|
"""Serialize the table from a pandas DataFrame."""
|
|
95
116
|
...
|
|
@@ -104,7 +125,7 @@ class TableBackendProtocol(Protocol):
|
|
|
104
125
|
|
|
105
126
|
def write(
|
|
106
127
|
self,
|
|
107
|
-
|
|
128
|
+
table_data: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
|
|
108
129
|
metadata: dict[str, str] | None = None,
|
|
109
130
|
mode: Literal["pandas", "anndata", "polars"] | None = None,
|
|
110
131
|
) -> None:
|
|
@@ -144,23 +165,20 @@ class ImplementedTableBackends:
|
|
|
144
165
|
|
|
145
166
|
def get_backend(
|
|
146
167
|
self,
|
|
147
|
-
|
|
168
|
+
*,
|
|
148
169
|
group_handler: ZarrGroupHandler,
|
|
170
|
+
backend_name: str = "anndata",
|
|
149
171
|
index_key: str | None = None,
|
|
150
172
|
index_type: Literal["int", "str"] | None = None,
|
|
151
173
|
) -> TableBackendProtocol:
|
|
152
174
|
"""Try to get a handler for the given store based on the metadata version."""
|
|
153
|
-
if backend_name is None:
|
|
154
|
-
# Default to anndata since it is currently
|
|
155
|
-
# the only backend in use.
|
|
156
|
-
backend_name = "anndata_v1"
|
|
157
|
-
|
|
158
175
|
if backend_name not in self._implemented_backends:
|
|
159
176
|
raise NgioValueError(f"Table backend {backend_name} not implemented.")
|
|
160
|
-
|
|
177
|
+
backend = self._implemented_backends[backend_name]()
|
|
178
|
+
backend.set_group_handler(
|
|
161
179
|
group_handler=group_handler, index_key=index_key, index_type=index_type
|
|
162
180
|
)
|
|
163
|
-
return
|
|
181
|
+
return backend
|
|
164
182
|
|
|
165
183
|
def add_backend(
|
|
166
184
|
self,
|
|
@@ -180,3 +198,6 @@ class ImplementedTableBackends:
|
|
|
180
198
|
ImplementedTableBackends().add_backend(AnnDataBackend)
|
|
181
199
|
ImplementedTableBackends().add_backend(JsonTableBackend)
|
|
182
200
|
ImplementedTableBackends().add_backend(CsvTableBackend)
|
|
201
|
+
ImplementedTableBackends().add_backend(ParquetTableBackend)
|
|
202
|
+
|
|
203
|
+
TableBackend = Literal["anndata", "json", "csv", "parquet"] | str | TableBackendProtocol
|