ngio 0.5.0__py3-none-any.whl → 0.5.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +2 -5
- ngio/common/__init__.py +6 -11
- ngio/common/_masking_roi.py +54 -34
- ngio/common/_pyramid.py +85 -309
- ngio/common/_roi.py +330 -258
- ngio/experimental/iterators/_feature.py +3 -3
- ngio/experimental/iterators/_rois_utils.py +11 -10
- ngio/hcs/_plate.py +60 -132
- ngio/images/_abstract_image.py +35 -539
- ngio/images/_create.py +287 -0
- ngio/images/_create_synt_container.py +42 -39
- ngio/images/_image.py +250 -516
- ngio/images/_label.py +172 -249
- ngio/images/_masked_image.py +2 -2
- ngio/images/_ome_zarr_container.py +241 -644
- ngio/io_pipes/_io_pipes.py +9 -9
- ngio/io_pipes/_io_pipes_masked.py +7 -7
- ngio/io_pipes/_io_pipes_roi.py +6 -6
- ngio/io_pipes/_io_pipes_types.py +3 -3
- ngio/io_pipes/_match_shape.py +8 -6
- ngio/io_pipes/_ops_slices_utils.py +5 -8
- ngio/ome_zarr_meta/__init__.py +18 -29
- ngio/ome_zarr_meta/_meta_handlers.py +708 -392
- ngio/ome_zarr_meta/ngio_specs/__init__.py +0 -4
- ngio/ome_zarr_meta/ngio_specs/_axes.py +51 -152
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +22 -13
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +91 -129
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +68 -57
- ngio/ome_zarr_meta/v04/__init__.py +1 -5
- ngio/ome_zarr_meta/v04/{_v04_spec.py → _v04_spec_utils.py} +85 -54
- ngio/ome_zarr_meta/v05/__init__.py +1 -5
- ngio/ome_zarr_meta/v05/{_v05_spec.py → _v05_spec_utils.py} +87 -64
- ngio/resources/__init__.py +1 -1
- ngio/resources/resource_model.py +1 -1
- ngio/tables/_tables_container.py +11 -62
- ngio/tables/backends/_anndata.py +8 -58
- ngio/tables/backends/_anndata_utils.py +6 -1
- ngio/tables/backends/_csv.py +19 -3
- ngio/tables/backends/_json.py +13 -10
- ngio/tables/backends/_non_zarr_backends.py +196 -0
- ngio/tables/backends/_parquet.py +31 -3
- ngio/tables/v1/_roi_table.py +24 -41
- ngio/utils/__init__.py +12 -6
- ngio/utils/_datasets.py +0 -6
- ngio/utils/_logger.py +50 -0
- ngio/utils/_zarr_utils.py +58 -167
- {ngio-0.5.0.dist-info → ngio-0.5.0a2.dist-info}/METADATA +4 -11
- ngio-0.5.0a2.dist-info/RECORD +89 -0
- {ngio-0.5.0.dist-info → ngio-0.5.0a2.dist-info}/WHEEL +1 -1
- ngio/images/_create_utils.py +0 -406
- ngio/tables/backends/_py_arrow_backends.py +0 -222
- ngio-0.5.0.dist-info/RECORD +0 -88
- {ngio-0.5.0.dist-info → ngio-0.5.0a2.dist-info}/licenses/LICENSE +0 -0
ngio/tables/_tables_container.py
CHANGED
|
@@ -229,10 +229,10 @@ class ImplementedTables:
|
|
|
229
229
|
|
|
230
230
|
|
|
231
231
|
class TablesContainer:
|
|
232
|
-
"""A class to handle the /
|
|
232
|
+
"""A class to handle the /labels group in an OME-NGFF file."""
|
|
233
233
|
|
|
234
234
|
def __init__(self, group_handler: ZarrGroupHandler) -> None:
|
|
235
|
-
"""Initialize the
|
|
235
|
+
"""Initialize the LabelGroupHandler."""
|
|
236
236
|
self._group_handler = group_handler
|
|
237
237
|
|
|
238
238
|
# Validate the group
|
|
@@ -252,24 +252,17 @@ class TablesContainer:
|
|
|
252
252
|
)
|
|
253
253
|
|
|
254
254
|
def _get_tables_list(self) -> list[str]:
|
|
255
|
-
"""
|
|
255
|
+
"""Create the /tables group if it doesn't exist."""
|
|
256
256
|
attrs = self._group_handler.load_attrs()
|
|
257
257
|
return attrs.get("tables", [])
|
|
258
258
|
|
|
259
259
|
def _get_table_group_handler(self, name: str) -> ZarrGroupHandler:
|
|
260
260
|
"""Get the group handler for a table."""
|
|
261
|
-
handler = self._group_handler.
|
|
261
|
+
handler = self._group_handler.derive_handler(path=name)
|
|
262
262
|
return handler
|
|
263
263
|
|
|
264
264
|
def list(self, filter_types: TypedTable | str | None = None) -> list[str]:
|
|
265
|
-
"""List all
|
|
266
|
-
|
|
267
|
-
Args:
|
|
268
|
-
filter_types: If provided, only return tables of this type.
|
|
269
|
-
|
|
270
|
-
Returns:
|
|
271
|
-
A list of table names.
|
|
272
|
-
"""
|
|
265
|
+
"""List all labels in the group."""
|
|
273
266
|
tables = self._get_tables_list()
|
|
274
267
|
if filter_types is None:
|
|
275
268
|
return tables
|
|
@@ -288,16 +281,7 @@ class TablesContainer:
|
|
|
288
281
|
backend: TableBackend | None = None,
|
|
289
282
|
strict: bool = True,
|
|
290
283
|
) -> Table:
|
|
291
|
-
"""Get a
|
|
292
|
-
|
|
293
|
-
Args:
|
|
294
|
-
name: The name of the table.
|
|
295
|
-
backend: The backend to use for reading the table.
|
|
296
|
-
strict: If True, raise an error if the table type is not implemented.
|
|
297
|
-
|
|
298
|
-
Returns:
|
|
299
|
-
The table object.
|
|
300
|
-
"""
|
|
284
|
+
"""Get a label from the group."""
|
|
301
285
|
if name not in self.list():
|
|
302
286
|
raise NgioValueError(f"Table '{name}' not found in the group.")
|
|
303
287
|
|
|
@@ -317,16 +301,7 @@ class TablesContainer:
|
|
|
317
301
|
table_cls: type[TableType],
|
|
318
302
|
backend: TableBackend | None = None,
|
|
319
303
|
) -> TableType:
|
|
320
|
-
"""Get a table from the group as a specific type.
|
|
321
|
-
|
|
322
|
-
Args:
|
|
323
|
-
name: The name of the table.
|
|
324
|
-
table_cls: The table class to use for loading the table.
|
|
325
|
-
backend: The backend to use for reading the table.
|
|
326
|
-
|
|
327
|
-
Returns:
|
|
328
|
-
The table object of the specified type.
|
|
329
|
-
"""
|
|
304
|
+
"""Get a table from the group as a specific type."""
|
|
330
305
|
if name not in self.list():
|
|
331
306
|
raise NgioValueError(f"Table '{name}' not found in the group.")
|
|
332
307
|
|
|
@@ -336,27 +311,6 @@ class TablesContainer:
|
|
|
336
311
|
backend=backend,
|
|
337
312
|
) # type: ignore[return-value]
|
|
338
313
|
|
|
339
|
-
def delete(self, name: str, missing_ok: bool = False) -> None:
|
|
340
|
-
"""Delete a table from the group.
|
|
341
|
-
|
|
342
|
-
Args:
|
|
343
|
-
name (str): The name of the table to delete.
|
|
344
|
-
missing_ok (bool): If True, do not raise an error if
|
|
345
|
-
the table does not exist.
|
|
346
|
-
"""
|
|
347
|
-
existing_tables = self._get_tables_list()
|
|
348
|
-
if name not in existing_tables:
|
|
349
|
-
if missing_ok:
|
|
350
|
-
return
|
|
351
|
-
raise NgioValueError(
|
|
352
|
-
f"Table '{name}' not found in the Tables group. "
|
|
353
|
-
f"Available tables: {existing_tables}"
|
|
354
|
-
)
|
|
355
|
-
|
|
356
|
-
self._group_handler.delete_group(name)
|
|
357
|
-
existing_tables.remove(name)
|
|
358
|
-
self._group_handler.write_attrs({"tables": existing_tables})
|
|
359
|
-
|
|
360
314
|
def add(
|
|
361
315
|
self,
|
|
362
316
|
name: str,
|
|
@@ -364,14 +318,7 @@ class TablesContainer:
|
|
|
364
318
|
backend: TableBackend = DefaultTableBackend,
|
|
365
319
|
overwrite: bool = False,
|
|
366
320
|
) -> None:
|
|
367
|
-
"""Add a table to the group.
|
|
368
|
-
|
|
369
|
-
Args:
|
|
370
|
-
name: The name of the table.
|
|
371
|
-
table: The table object to add.
|
|
372
|
-
backend: The backend to use for writing the table.
|
|
373
|
-
overwrite: Whether to overwrite an existing table with the same name.
|
|
374
|
-
"""
|
|
321
|
+
"""Add a table to the group."""
|
|
375
322
|
existing_tables = self._get_tables_list()
|
|
376
323
|
if name in existing_tables and not overwrite:
|
|
377
324
|
raise NgioValueError(
|
|
@@ -379,7 +326,9 @@ class TablesContainer:
|
|
|
379
326
|
"Use overwrite=True to replace it."
|
|
380
327
|
)
|
|
381
328
|
|
|
382
|
-
table_handler = self._group_handler.
|
|
329
|
+
table_handler = self._group_handler.derive_handler(
|
|
330
|
+
path=name, overwrite=overwrite
|
|
331
|
+
)
|
|
383
332
|
|
|
384
333
|
if backend is None:
|
|
385
334
|
backend = table.backend_name
|
ngio/tables/backends/_anndata.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import zarr
|
|
2
1
|
from anndata import AnnData
|
|
3
2
|
from anndata._settings import settings
|
|
4
3
|
from pandas import DataFrame
|
|
5
4
|
from polars import DataFrame as PolarsDataFrame
|
|
6
5
|
from polars import LazyFrame
|
|
7
|
-
from zarr.storage import FsspecStore, LocalStore, MemoryStore
|
|
8
6
|
|
|
9
7
|
from ngio.tables.backends._abstract_backend import AbstractTableBackend
|
|
10
8
|
from ngio.tables.backends._anndata_utils import (
|
|
@@ -15,7 +13,7 @@ from ngio.tables.backends._utils import (
|
|
|
15
13
|
convert_polars_to_anndata,
|
|
16
14
|
normalize_anndata,
|
|
17
15
|
)
|
|
18
|
-
from ngio.utils import NgioValueError
|
|
16
|
+
from ngio.utils import NgioValueError
|
|
19
17
|
|
|
20
18
|
|
|
21
19
|
class AnnDataBackend(AbstractTableBackend):
|
|
@@ -52,66 +50,18 @@ class AnnDataBackend(AbstractTableBackend):
|
|
|
52
50
|
"""Load the table as an AnnData object."""
|
|
53
51
|
return self.load_as_anndata()
|
|
54
52
|
|
|
55
|
-
def _write_to_local_store(
|
|
56
|
-
self, store: LocalStore, path: str, table: AnnData
|
|
57
|
-
) -> None:
|
|
58
|
-
"""Write the AnnData table to a LocalStore."""
|
|
59
|
-
store_path = f"{store.root}/{path}"
|
|
60
|
-
table.write_zarr(store_path)
|
|
61
|
-
|
|
62
|
-
def _write_to_fsspec_store(
|
|
63
|
-
self, store: FsspecStore, path: str, table: AnnData
|
|
64
|
-
) -> None:
|
|
65
|
-
"""Write the AnnData table to a FsspecStore."""
|
|
66
|
-
full_url = f"{store.path}/{path}"
|
|
67
|
-
fs = store.fs
|
|
68
|
-
mapper = fs.get_mapper(full_url)
|
|
69
|
-
table.write_zarr(mapper)
|
|
70
|
-
|
|
71
|
-
def _write_to_memory_store(
|
|
72
|
-
self, store: MemoryStore, path: str, table: AnnData
|
|
73
|
-
) -> None:
|
|
74
|
-
"""Write the AnnData table to a MemoryStore."""
|
|
75
|
-
store = MemoryStore()
|
|
76
|
-
table.write_zarr(store)
|
|
77
|
-
anndata_group = zarr.open_group(store, mode="r")
|
|
78
|
-
copy_group(
|
|
79
|
-
anndata_group,
|
|
80
|
-
self._group_handler._group,
|
|
81
|
-
suppress_warnings=True,
|
|
82
|
-
)
|
|
83
|
-
|
|
84
53
|
def write_from_anndata(self, table: AnnData) -> None:
|
|
85
54
|
"""Serialize the table from an AnnData object."""
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
store = self._group_handler.store
|
|
89
|
-
path = self._group_handler.group.path
|
|
90
|
-
if isinstance(store, LocalStore):
|
|
91
|
-
self._write_to_local_store(
|
|
92
|
-
store,
|
|
93
|
-
path,
|
|
94
|
-
table,
|
|
95
|
-
)
|
|
96
|
-
elif isinstance(store, FsspecStore):
|
|
97
|
-
self._write_to_fsspec_store(
|
|
98
|
-
store,
|
|
99
|
-
path,
|
|
100
|
-
table,
|
|
101
|
-
)
|
|
102
|
-
elif isinstance(store, MemoryStore):
|
|
103
|
-
self._write_to_memory_store(
|
|
104
|
-
store,
|
|
105
|
-
path,
|
|
106
|
-
table,
|
|
107
|
-
)
|
|
108
|
-
else:
|
|
55
|
+
full_url = self._group_handler.full_url
|
|
56
|
+
if full_url is None:
|
|
109
57
|
raise NgioValueError(
|
|
110
|
-
f"Ngio does not support writing
|
|
111
|
-
f"store of type {type(
|
|
58
|
+
f"Ngio does not support writing file from a "
|
|
59
|
+
f"store of type {type(self._group_handler)}. "
|
|
112
60
|
"Please make sure to use a compatible "
|
|
113
|
-
"store like a
|
|
61
|
+
"store like a zarr.DirectoryStore."
|
|
114
62
|
)
|
|
63
|
+
settings.zarr_write_format = self._group_handler.zarr_format
|
|
64
|
+
table.write_zarr(full_url)
|
|
115
65
|
|
|
116
66
|
def write_from_pandas(self, table: DataFrame) -> None:
|
|
117
67
|
"""Serialize the table from a pandas DataFrame."""
|
|
@@ -9,6 +9,7 @@ from anndata._io.utils import _read_legacy_raw
|
|
|
9
9
|
from anndata._io.zarr import read_dataframe
|
|
10
10
|
from anndata.compat import _clean_uns
|
|
11
11
|
from anndata.experimental import read_dispatched
|
|
12
|
+
from zarr.storage import LocalStore
|
|
12
13
|
|
|
13
14
|
from ngio.utils import (
|
|
14
15
|
NgioValueError,
|
|
@@ -34,6 +35,10 @@ def custom_anndata_read_zarr(
|
|
|
34
35
|
elem_to_read (Sequence[str] | None): The elements to read from the store.
|
|
35
36
|
"""
|
|
36
37
|
group = open_group_wrapper(store=store, mode="r")
|
|
38
|
+
|
|
39
|
+
if not isinstance(group.store, LocalStore):
|
|
40
|
+
elem_to_read = ["X", "obs", "var"]
|
|
41
|
+
|
|
37
42
|
if elem_to_read is None:
|
|
38
43
|
elem_to_read = [
|
|
39
44
|
"X",
|
|
@@ -84,7 +89,7 @@ def custom_anndata_read_zarr(
|
|
|
84
89
|
_clean_uns(adata)
|
|
85
90
|
|
|
86
91
|
if isinstance(adata, dict):
|
|
87
|
-
adata = AnnData(**adata)
|
|
92
|
+
adata = AnnData(**adata)
|
|
88
93
|
if not isinstance(adata, AnnData):
|
|
89
94
|
raise NgioValueError(f"Expected an AnnData object, but got {type(adata)}")
|
|
90
95
|
return adata
|
ngio/tables/backends/_csv.py
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
|
-
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import polars as pl
|
|
2
3
|
|
|
4
|
+
from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
|
|
3
5
|
|
|
4
|
-
|
|
6
|
+
|
|
7
|
+
def write_lf_to_csv(path: str, table: pl.DataFrame) -> None:
|
|
8
|
+
"""Write a polars DataFrame to a CSV file."""
|
|
9
|
+
table.write_csv(path)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def write_df_to_csv(path: str, table: pd.DataFrame) -> None:
|
|
13
|
+
"""Write a pandas DataFrame to a CSV file."""
|
|
14
|
+
table.to_csv(path, index=False)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CsvTableBackend(NonZarrBaseBackend):
|
|
5
18
|
"""A class to load and write small tables in CSV format."""
|
|
6
19
|
|
|
7
20
|
def __init__(
|
|
@@ -9,8 +22,11 @@ class CsvTableBackend(PyArrowBackend):
|
|
|
9
22
|
):
|
|
10
23
|
"""Initialize the CsvTableBackend."""
|
|
11
24
|
super().__init__(
|
|
25
|
+
lf_reader=pl.scan_csv,
|
|
26
|
+
df_reader=pd.read_csv,
|
|
27
|
+
lf_writer=write_lf_to_csv,
|
|
28
|
+
df_writer=write_df_to_csv,
|
|
12
29
|
table_name="table.csv",
|
|
13
|
-
table_format="csv",
|
|
14
30
|
)
|
|
15
31
|
|
|
16
32
|
@staticmethod
|
ngio/tables/backends/_json.py
CHANGED
|
@@ -8,7 +8,7 @@ from ngio.tables.backends._utils import (
|
|
|
8
8
|
normalize_pandas_df,
|
|
9
9
|
normalize_polars_lf,
|
|
10
10
|
)
|
|
11
|
-
from ngio.utils import
|
|
11
|
+
from ngio.utils import NgioFileNotFoundError
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class JsonTableBackend(AbstractTableBackend):
|
|
@@ -37,19 +37,22 @@ class JsonTableBackend(AbstractTableBackend):
|
|
|
37
37
|
def _get_table_group(self):
|
|
38
38
|
"""Get the table group, creating it if it doesn't exist."""
|
|
39
39
|
try:
|
|
40
|
-
table_group = self._group_handler.get_group(path="table"
|
|
41
|
-
except
|
|
42
|
-
|
|
43
|
-
"Could not get or create a 'table' group in the store "
|
|
44
|
-
f"{self._group_handler.store} path "
|
|
45
|
-
f"{self._group_handler.group.path}/table."
|
|
46
|
-
) from e
|
|
40
|
+
table_group = self._group_handler.get_group(path="table")
|
|
41
|
+
except NgioFileNotFoundError:
|
|
42
|
+
table_group = self._group_handler.group.create_group("table")
|
|
47
43
|
return table_group
|
|
48
44
|
|
|
49
|
-
def
|
|
45
|
+
def _load_as_pandas_df(self) -> DataFrame:
|
|
50
46
|
"""Load the table as a pandas DataFrame."""
|
|
51
|
-
|
|
47
|
+
table_group = self._get_table_group()
|
|
48
|
+
table_dict = dict(table_group.attrs)
|
|
49
|
+
|
|
52
50
|
data_frame = pd.DataFrame.from_dict(table_dict)
|
|
51
|
+
return data_frame
|
|
52
|
+
|
|
53
|
+
def load_as_pandas_df(self) -> DataFrame:
|
|
54
|
+
"""Load the table as a pandas DataFrame."""
|
|
55
|
+
data_frame = self._load_as_pandas_df()
|
|
53
56
|
data_frame = normalize_pandas_df(
|
|
54
57
|
data_frame,
|
|
55
58
|
index_key=self.index_key,
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import io
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pandas import DataFrame
|
|
6
|
+
from polars import DataFrame as PolarsDataFrame
|
|
7
|
+
from polars import LazyFrame
|
|
8
|
+
from zarr.storage import FsspecStore, LocalStore
|
|
9
|
+
|
|
10
|
+
from ngio.tables.backends._abstract_backend import AbstractTableBackend
|
|
11
|
+
from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
|
|
12
|
+
from ngio.utils import NgioFileNotFoundError, NgioValueError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NonZarrBaseBackend(AbstractTableBackend):
|
|
16
|
+
"""A class to load and write small tables in CSV format."""
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
df_reader: Callable[[Any], DataFrame],
|
|
21
|
+
lf_reader: Callable[[Any], LazyFrame],
|
|
22
|
+
df_writer: Callable[[str, DataFrame], None],
|
|
23
|
+
lf_writer: Callable[[str, PolarsDataFrame], None],
|
|
24
|
+
table_name: str,
|
|
25
|
+
):
|
|
26
|
+
self.df_reader = df_reader
|
|
27
|
+
self.lf_reader = lf_reader
|
|
28
|
+
self.df_writer = df_writer
|
|
29
|
+
self.lf_writer = lf_writer
|
|
30
|
+
self.table_name = table_name
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def implements_anndata() -> bool:
|
|
34
|
+
"""Whether the handler implements the anndata protocol."""
|
|
35
|
+
return False
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def implements_pandas() -> bool:
|
|
39
|
+
"""Whether the handler implements the dataframe protocol."""
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def implements_polars() -> bool:
|
|
44
|
+
"""Whether the handler implements the polars protocol."""
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def backend_name() -> str:
|
|
49
|
+
"""Return the name of the backend."""
|
|
50
|
+
raise NotImplementedError(
|
|
51
|
+
"The backend_name method must be implemented in the subclass."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def _load_from_directory_store(self, reader):
|
|
55
|
+
"""Load the table from a directory store."""
|
|
56
|
+
url = self._group_handler.full_url
|
|
57
|
+
if url is None:
|
|
58
|
+
ext = self.table_name.split(".")[-1]
|
|
59
|
+
raise NgioValueError(
|
|
60
|
+
f"Ngio does not support reading a {ext} table from a "
|
|
61
|
+
f"store of type {type(self._group_handler)}. "
|
|
62
|
+
"Please make sure to use a compatible "
|
|
63
|
+
"store like a zarr.DirectoryStore."
|
|
64
|
+
)
|
|
65
|
+
table_path = f"{url}/{self.table_name}"
|
|
66
|
+
dataframe = reader(table_path)
|
|
67
|
+
return dataframe
|
|
68
|
+
|
|
69
|
+
def _load_from_fs_store_df(self, reader):
|
|
70
|
+
"""Load the table from an FS store."""
|
|
71
|
+
path = self._group_handler.group.path
|
|
72
|
+
table_path = f"{path}/{self.table_name}"
|
|
73
|
+
bytes_table = self._group_handler.store.get(table_path)
|
|
74
|
+
if bytes_table is None:
|
|
75
|
+
raise NgioFileNotFoundError(f"No table found at {table_path}. ")
|
|
76
|
+
dataframe = reader(io.BytesIO(bytes_table))
|
|
77
|
+
return dataframe
|
|
78
|
+
|
|
79
|
+
def _load_from_fs_store_lf(self, reader):
|
|
80
|
+
"""Load the table from an FS store."""
|
|
81
|
+
full_url = self._group_handler.full_url
|
|
82
|
+
parquet_path = f"{full_url}/{self.table_name}"
|
|
83
|
+
store_fs = self._group_handler.store.fs # type: ignore (in this context, store_fs is a fs.FSStore)
|
|
84
|
+
with store_fs.open(parquet_path, "rb") as f:
|
|
85
|
+
dataframe = reader(f)
|
|
86
|
+
return dataframe
|
|
87
|
+
|
|
88
|
+
def load_as_pandas_df(self) -> DataFrame:
|
|
89
|
+
"""Load the table as a pandas DataFrame."""
|
|
90
|
+
store = self._group_handler.store
|
|
91
|
+
if isinstance(store, LocalStore):
|
|
92
|
+
dataframe = self._load_from_directory_store(reader=self.df_reader)
|
|
93
|
+
elif isinstance(store, FsspecStore):
|
|
94
|
+
dataframe = self._load_from_fs_store_df(reader=self.df_reader)
|
|
95
|
+
else:
|
|
96
|
+
ext = self.table_name.split(".")[-1]
|
|
97
|
+
raise NgioValueError(
|
|
98
|
+
f"Ngio does not support reading a {ext} table from a "
|
|
99
|
+
f"store of type {type(store)}. "
|
|
100
|
+
"Please make sure to use a compatible "
|
|
101
|
+
"store like a zarr.DirectoryStore or "
|
|
102
|
+
"zarr.FSStore."
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
dataframe = normalize_pandas_df(
|
|
106
|
+
dataframe,
|
|
107
|
+
index_key=self.index_key,
|
|
108
|
+
index_type=self.index_type,
|
|
109
|
+
reset_index=False,
|
|
110
|
+
)
|
|
111
|
+
return dataframe
|
|
112
|
+
|
|
113
|
+
def load(self) -> DataFrame:
|
|
114
|
+
"""Load the table as a pandas DataFrame."""
|
|
115
|
+
return self.load_as_pandas_df()
|
|
116
|
+
|
|
117
|
+
def load_as_polars_lf(self) -> LazyFrame:
|
|
118
|
+
"""Load the table as a polars LazyFrame."""
|
|
119
|
+
store = self._group_handler.store
|
|
120
|
+
if isinstance(store, LocalStore):
|
|
121
|
+
lazy_frame = self._load_from_directory_store(reader=self.lf_reader)
|
|
122
|
+
elif isinstance(store, FsspecStore):
|
|
123
|
+
lazy_frame = self._load_from_fs_store_lf(reader=self.lf_reader)
|
|
124
|
+
else:
|
|
125
|
+
ext = self.table_name.split(".")[-1]
|
|
126
|
+
raise NgioValueError(
|
|
127
|
+
f"Ngio does not support reading a {ext} from a "
|
|
128
|
+
f"store of type {type(store)}. "
|
|
129
|
+
"Please make sure to use a compatible "
|
|
130
|
+
"store like a zarr.DirectoryStore or "
|
|
131
|
+
"zarr.FSStore."
|
|
132
|
+
)
|
|
133
|
+
if not isinstance(lazy_frame, LazyFrame):
|
|
134
|
+
raise NgioValueError(
|
|
135
|
+
"Table is not a lazy frame. Please report this issue as an ngio bug."
|
|
136
|
+
f" {type(lazy_frame)}"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
lazy_frame = normalize_polars_lf(
|
|
140
|
+
lazy_frame,
|
|
141
|
+
index_key=self.index_key,
|
|
142
|
+
index_type=self.index_type,
|
|
143
|
+
)
|
|
144
|
+
return lazy_frame
|
|
145
|
+
|
|
146
|
+
def _get_store_url(self) -> str:
|
|
147
|
+
"""Get the store URL."""
|
|
148
|
+
store = self._group_handler.store
|
|
149
|
+
if isinstance(store, LocalStore):
|
|
150
|
+
full_url = self._group_handler.full_url
|
|
151
|
+
else:
|
|
152
|
+
ext = self.table_name.split(".")[-1]
|
|
153
|
+
raise NgioValueError(
|
|
154
|
+
f"Ngio does not support writing a {ext} file to a "
|
|
155
|
+
f"store of type {type(store)}. "
|
|
156
|
+
"Please make sure to use a compatible "
|
|
157
|
+
"store like a zarr.DirectoryStore or "
|
|
158
|
+
"zarr.FSStore."
|
|
159
|
+
)
|
|
160
|
+
if full_url is None:
|
|
161
|
+
ext = self.table_name.split(".")[-1]
|
|
162
|
+
raise NgioValueError(
|
|
163
|
+
f"Ngio does not support writing a {ext} file to a "
|
|
164
|
+
f"store of type {type(store)}. "
|
|
165
|
+
"Please make sure to use a compatible "
|
|
166
|
+
"store like a zarr.DirectoryStore or "
|
|
167
|
+
"zarr.FSStore."
|
|
168
|
+
)
|
|
169
|
+
return full_url
|
|
170
|
+
|
|
171
|
+
def write_from_pandas(self, table: DataFrame) -> None:
|
|
172
|
+
"""Write the table from a pandas DataFrame."""
|
|
173
|
+
table = normalize_pandas_df(
|
|
174
|
+
table,
|
|
175
|
+
index_key=self.index_key,
|
|
176
|
+
index_type=self.index_type,
|
|
177
|
+
reset_index=True,
|
|
178
|
+
)
|
|
179
|
+
full_url = self._get_store_url()
|
|
180
|
+
table_path = f"{full_url}/{self.table_name}"
|
|
181
|
+
self.df_writer(table_path, table)
|
|
182
|
+
|
|
183
|
+
def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
|
|
184
|
+
"""Write the table from a polars DataFrame or LazyFrame."""
|
|
185
|
+
table = normalize_polars_lf(
|
|
186
|
+
table,
|
|
187
|
+
index_key=self.index_key,
|
|
188
|
+
index_type=self.index_type,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if isinstance(table, LazyFrame):
|
|
192
|
+
table = table.collect()
|
|
193
|
+
|
|
194
|
+
full_url = self._get_store_url()
|
|
195
|
+
table_path = f"{full_url}/{self.table_name}"
|
|
196
|
+
self.lf_writer(table_path, table)
|
ngio/tables/backends/_parquet.py
CHANGED
|
@@ -1,7 +1,32 @@
|
|
|
1
|
-
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import polars as pl
|
|
2
3
|
|
|
4
|
+
from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
|
|
3
5
|
|
|
4
|
-
|
|
6
|
+
|
|
7
|
+
def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
|
|
8
|
+
"""Write a polars DataFrame to a Parquet file."""
|
|
9
|
+
# make categorical into string (for pandas compatibility)
|
|
10
|
+
schema = table.collect_schema()
|
|
11
|
+
|
|
12
|
+
categorical_columns = []
|
|
13
|
+
for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
|
|
14
|
+
if dtype == pl.Categorical:
|
|
15
|
+
categorical_columns.append(name)
|
|
16
|
+
|
|
17
|
+
for col in categorical_columns:
|
|
18
|
+
table = table.with_columns(pl.col(col).cast(pl.Utf8))
|
|
19
|
+
|
|
20
|
+
# write to parquet
|
|
21
|
+
table.write_parquet(path)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
|
|
25
|
+
"""Write a pandas DataFrame to a Parquet file."""
|
|
26
|
+
table.to_parquet(path, index=False)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ParquetTableBackend(NonZarrBaseBackend):
|
|
5
30
|
"""A class to load and write small tables in Parquet format."""
|
|
6
31
|
|
|
7
32
|
def __init__(
|
|
@@ -9,8 +34,11 @@ class ParquetTableBackend(PyArrowBackend):
|
|
|
9
34
|
):
|
|
10
35
|
"""Initialize the ParquetTableBackend."""
|
|
11
36
|
super().__init__(
|
|
37
|
+
lf_reader=pl.scan_parquet,
|
|
38
|
+
df_reader=pd.read_parquet,
|
|
39
|
+
lf_writer=write_lf_to_parquet,
|
|
40
|
+
df_writer=write_df_to_parquet,
|
|
12
41
|
table_name="table.parquet",
|
|
13
|
-
table_format="parquet",
|
|
14
42
|
)
|
|
15
43
|
|
|
16
44
|
@staticmethod
|
ngio/tables/v1/_roi_table.py
CHANGED
|
@@ -4,7 +4,6 @@ This class follows the roi_table specification at:
|
|
|
4
4
|
https://fractal-analytics-platform.github.io/fractal-tasks-core/tables/
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import warnings
|
|
8
7
|
from collections.abc import Iterable
|
|
9
8
|
from typing import Literal
|
|
10
9
|
from uuid import uuid4
|
|
@@ -27,6 +26,7 @@ from ngio.utils import (
|
|
|
27
26
|
NgioTableValidationError,
|
|
28
27
|
NgioValueError,
|
|
29
28
|
ZarrGroupHandler,
|
|
29
|
+
ngio_warn,
|
|
30
30
|
)
|
|
31
31
|
|
|
32
32
|
REQUIRED_COLUMNS = [
|
|
@@ -77,9 +77,7 @@ OPTIONAL_COLUMNS = ORIGIN_COLUMNS + TRANSLATION_COLUMNS + PLATE_COLUMNS + INDEX_
|
|
|
77
77
|
def _check_optional_columns(col_name: str) -> None:
|
|
78
78
|
"""Check if the column name is in the optional columns."""
|
|
79
79
|
if col_name not in OPTIONAL_COLUMNS + TIME_COLUMNS:
|
|
80
|
-
|
|
81
|
-
f"Column {col_name} is not in the optional columns.", stacklevel=2
|
|
82
|
-
)
|
|
80
|
+
ngio_warn(f"Column {col_name} is not in the optional columns.")
|
|
83
81
|
|
|
84
82
|
|
|
85
83
|
def _dataframe_to_rois(
|
|
@@ -122,17 +120,17 @@ def _dataframe_to_rois(
|
|
|
122
120
|
else:
|
|
123
121
|
label = getattr(row, "label", None)
|
|
124
122
|
|
|
125
|
-
|
|
126
|
-
"x": (row.x_micrometer, row.len_x_micrometer),
|
|
127
|
-
"y": (row.y_micrometer, row.len_y_micrometer),
|
|
128
|
-
"z": (z_micrometer, z_length_micrometer),
|
|
129
|
-
}
|
|
130
|
-
if t_second is not None or t_length_second is not None:
|
|
131
|
-
slices["t"] = (t_second, t_length_second)
|
|
132
|
-
roi = Roi.from_values(
|
|
123
|
+
roi = Roi(
|
|
133
124
|
name=str(row.Index),
|
|
134
|
-
|
|
135
|
-
|
|
125
|
+
x=row.x_micrometer, # type: ignore (type can not be known here)
|
|
126
|
+
y=row.y_micrometer, # type: ignore (type can not be known here)
|
|
127
|
+
z=z_micrometer,
|
|
128
|
+
t=t_second,
|
|
129
|
+
x_length=row.len_x_micrometer, # type: ignore (type can not be known here)
|
|
130
|
+
y_length=row.len_y_micrometer, # type: ignore (type can not be known here)
|
|
131
|
+
z_length=z_length_micrometer,
|
|
132
|
+
t_length=t_length_second,
|
|
133
|
+
unit="micrometer",
|
|
136
134
|
label=label,
|
|
137
135
|
**extras,
|
|
138
136
|
)
|
|
@@ -145,39 +143,24 @@ def _rois_to_dataframe(rois: dict[str, Roi], index_key: str | None) -> pd.DataFr
|
|
|
145
143
|
data = []
|
|
146
144
|
for roi in rois.values():
|
|
147
145
|
# This normalization is necessary for backward compatibility
|
|
148
|
-
if roi.
|
|
149
|
-
|
|
150
|
-
"Only ROIs in world coordinates can be serialized."
|
|
151
|
-
)
|
|
146
|
+
z_micrometer = roi.z if roi.z is not None else 0.0
|
|
147
|
+
len_z_micrometer = roi.z_length if roi.z_length is not None else 1.0
|
|
152
148
|
|
|
153
|
-
z_slice = roi.get("z")
|
|
154
|
-
if z_slice is None:
|
|
155
|
-
z_micrometer = 0.0
|
|
156
|
-
len_z_micrometer = 1.0
|
|
157
|
-
else:
|
|
158
|
-
z_micrometer = z_slice.start if z_slice.start is not None else 0.0
|
|
159
|
-
len_z_micrometer = z_slice.length if z_slice.length is not None else 1.0
|
|
160
|
-
|
|
161
|
-
x_slice = roi.get("x")
|
|
162
|
-
if x_slice is None:
|
|
163
|
-
raise NgioValueError("ROI is missing 'x' slice.")
|
|
164
|
-
y_slice = roi.get("y")
|
|
165
|
-
if y_slice is None:
|
|
166
|
-
raise NgioValueError("ROI is missing 'y' slice.")
|
|
167
149
|
row = {
|
|
168
150
|
index_key: roi.get_name(),
|
|
169
|
-
"x_micrometer":
|
|
170
|
-
"y_micrometer":
|
|
151
|
+
"x_micrometer": roi.x,
|
|
152
|
+
"y_micrometer": roi.y,
|
|
171
153
|
"z_micrometer": z_micrometer,
|
|
172
|
-
"len_x_micrometer":
|
|
173
|
-
"len_y_micrometer":
|
|
154
|
+
"len_x_micrometer": roi.x_length,
|
|
155
|
+
"len_y_micrometer": roi.y_length,
|
|
174
156
|
"len_z_micrometer": len_z_micrometer,
|
|
175
157
|
}
|
|
176
158
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
159
|
+
if roi.t is not None:
|
|
160
|
+
row["t_second"] = roi.t
|
|
161
|
+
|
|
162
|
+
if roi.t_length is not None:
|
|
163
|
+
row["len_t_second"] = roi.t_length
|
|
181
164
|
|
|
182
165
|
if roi.label is not None and index_key != "label":
|
|
183
166
|
row["label"] = roi.label
|
|
@@ -200,7 +183,7 @@ class RoiDictWrapper:
|
|
|
200
183
|
self._rois_by_name = {}
|
|
201
184
|
self._rois_by_label = {}
|
|
202
185
|
for roi in rois:
|
|
203
|
-
name = roi.
|
|
186
|
+
name = roi.get_name()
|
|
204
187
|
if name in self._rois_by_name:
|
|
205
188
|
name = f"{name}_{uuid4().hex[:8]}"
|
|
206
189
|
self._rois_by_name[name] = roi
|