ngio 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/common/_pyramid.py +5 -1
- ngio/hcs/plate.py +133 -2
- ngio/images/abstract_image.py +1 -0
- ngio/images/image.py +42 -0
- ngio/images/label.py +15 -7
- ngio/images/ome_zarr_container.py +20 -11
- ngio/tables/_validators.py +1 -83
- ngio/tables/backends/__init__.py +27 -1
- ngio/tables/backends/_abstract_backend.py +207 -22
- ngio/tables/backends/_anndata_utils.py +3 -109
- ngio/tables/backends/_anndata_v1.py +43 -46
- ngio/tables/backends/_csv_v1.py +162 -0
- ngio/tables/backends/_json_v1.py +54 -18
- ngio/tables/backends/_table_backends.py +98 -18
- ngio/tables/backends/_utils.py +462 -0
- ngio/tables/tables_container.py +14 -3
- ngio/tables/v1/_feature_table.py +20 -11
- ngio/tables/v1/_generic_table.py +20 -15
- ngio/tables/v1/_roi_table.py +15 -12
- ngio/utils/_zarr_utils.py +46 -32
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/METADATA +3 -1
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/RECORD +24 -22
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/WHEEL +0 -0
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import io
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import polars as pl
|
|
5
|
+
from pandas import DataFrame
|
|
6
|
+
from polars import DataFrame as PolarsDataFrame
|
|
7
|
+
from polars import LazyFrame
|
|
8
|
+
from zarr.storage import DirectoryStore, FSStore
|
|
9
|
+
|
|
10
|
+
from ngio.tables.backends._abstract_backend import AbstractTableBackend
|
|
11
|
+
from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
|
|
12
|
+
from ngio.utils import NgioFileNotFoundError, NgioValueError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CsvTableBackend(AbstractTableBackend):
|
|
16
|
+
"""A class to load and write small tables in CSV format."""
|
|
17
|
+
|
|
18
|
+
csv_name = "table.csv"
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def backend_name() -> str:
|
|
22
|
+
"""Return the name of the backend."""
|
|
23
|
+
return "experimental_csv_v1"
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def implements_anndata() -> bool:
|
|
27
|
+
"""Whether the handler implements the anndata protocol."""
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def implements_pandas() -> bool:
|
|
32
|
+
"""Whether the handler implements the dataframe protocol."""
|
|
33
|
+
return True
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def implements_polars() -> bool:
|
|
37
|
+
"""Whether the handler implements the polars protocol."""
|
|
38
|
+
return True
|
|
39
|
+
|
|
40
|
+
def _load_from_directory_store(self, reader):
|
|
41
|
+
"""Load the table from a directory store."""
|
|
42
|
+
url = self._group_handler.full_url
|
|
43
|
+
if url is None:
|
|
44
|
+
raise NgioValueError(
|
|
45
|
+
f"Ngio does not support reading a CSV file from a "
|
|
46
|
+
f"store of type {type(self._group_handler)}. "
|
|
47
|
+
"Please make sure to use a compatible "
|
|
48
|
+
"store like a zarr.DirectoryStore."
|
|
49
|
+
)
|
|
50
|
+
csv_path = f"{url}/{self.csv_name}"
|
|
51
|
+
dataframe = reader(csv_path)
|
|
52
|
+
return dataframe
|
|
53
|
+
|
|
54
|
+
def _load_from_fs_store(self, reader):
|
|
55
|
+
"""Load the table from an FS store."""
|
|
56
|
+
bytes_table = self._group_handler.store.get(self.csv_name)
|
|
57
|
+
if bytes_table is None:
|
|
58
|
+
raise NgioFileNotFoundError(f"No table found at {self.csv_name}. ")
|
|
59
|
+
dataframe = reader(io.BytesIO(bytes_table))
|
|
60
|
+
return dataframe
|
|
61
|
+
|
|
62
|
+
def load_as_pandas_df(self) -> DataFrame:
|
|
63
|
+
"""Load the table as a pandas DataFrame."""
|
|
64
|
+
store = self._group_handler.store
|
|
65
|
+
if isinstance(store, DirectoryStore):
|
|
66
|
+
dataframe = self._load_from_directory_store(reader=pd.read_csv)
|
|
67
|
+
elif isinstance(store, FSStore):
|
|
68
|
+
dataframe = self._load_from_fs_store(reader=pd.read_csv)
|
|
69
|
+
else:
|
|
70
|
+
raise NgioValueError(
|
|
71
|
+
f"Ngio does not support reading a CSV file from a "
|
|
72
|
+
f"store of type {type(store)}. "
|
|
73
|
+
"Please make sure to use a compatible "
|
|
74
|
+
"store like a zarr.DirectoryStore or "
|
|
75
|
+
"zarr.FSStore."
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
dataframe = normalize_pandas_df(
|
|
79
|
+
dataframe,
|
|
80
|
+
index_key=self.index_key,
|
|
81
|
+
index_type=self.index_type,
|
|
82
|
+
reset_index=False,
|
|
83
|
+
)
|
|
84
|
+
return dataframe
|
|
85
|
+
|
|
86
|
+
def load_as_polars_lf(self) -> LazyFrame:
|
|
87
|
+
"""Load the table as a polars LazyFrame."""
|
|
88
|
+
store = self._group_handler.store
|
|
89
|
+
if isinstance(store, DirectoryStore):
|
|
90
|
+
lazy_frame = self._load_from_directory_store(reader=pl.scan_csv)
|
|
91
|
+
elif isinstance(store, FSStore):
|
|
92
|
+
lazy_frame = self._load_from_fs_store(reader=pl.scan_csv)
|
|
93
|
+
else:
|
|
94
|
+
raise NgioValueError(
|
|
95
|
+
f"Ngio does not support reading a CSV file from a "
|
|
96
|
+
f"store of type {type(store)}. "
|
|
97
|
+
"Please make sure to use a compatible "
|
|
98
|
+
"store like a zarr.DirectoryStore or "
|
|
99
|
+
"zarr.FSStore."
|
|
100
|
+
)
|
|
101
|
+
if not isinstance(lazy_frame, LazyFrame):
|
|
102
|
+
raise NgioValueError(
|
|
103
|
+
"Table is not a lazy frame. Please report this issue as an ngio bug."
|
|
104
|
+
f" {type(lazy_frame)}"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
lazy_frame = normalize_polars_lf(
|
|
108
|
+
lazy_frame,
|
|
109
|
+
index_key=self.index_key,
|
|
110
|
+
index_type=self.index_type,
|
|
111
|
+
)
|
|
112
|
+
return lazy_frame
|
|
113
|
+
|
|
114
|
+
def _get_store_url(self) -> str:
|
|
115
|
+
"""Get the store URL."""
|
|
116
|
+
store = self._group_handler.store
|
|
117
|
+
if isinstance(store, DirectoryStore):
|
|
118
|
+
full_url = self._group_handler.full_url
|
|
119
|
+
else:
|
|
120
|
+
raise NgioValueError(
|
|
121
|
+
f"Ngio does not support writing a CSV file to a "
|
|
122
|
+
f"store of type {type(store)}. "
|
|
123
|
+
"Please make sure to use a compatible "
|
|
124
|
+
"store like a zarr.DirectoryStore or "
|
|
125
|
+
"zarr.FSStore."
|
|
126
|
+
)
|
|
127
|
+
if full_url is None:
|
|
128
|
+
raise NgioValueError(
|
|
129
|
+
f"Ngio does not support writing a CSV file to a "
|
|
130
|
+
f"store of type {type(store)}. "
|
|
131
|
+
"Please make sure to use a compatible "
|
|
132
|
+
"store like a zarr.DirectoryStore or "
|
|
133
|
+
"zarr.FSStore."
|
|
134
|
+
)
|
|
135
|
+
return full_url
|
|
136
|
+
|
|
137
|
+
def write_from_pandas(self, table: DataFrame) -> None:
|
|
138
|
+
"""Write the table from a pandas DataFrame."""
|
|
139
|
+
table = normalize_pandas_df(
|
|
140
|
+
table,
|
|
141
|
+
index_key=self.index_key,
|
|
142
|
+
index_type=self.index_type,
|
|
143
|
+
reset_index=True,
|
|
144
|
+
)
|
|
145
|
+
full_url = self._get_store_url()
|
|
146
|
+
csv_path = f"{full_url}/{self.csv_name}"
|
|
147
|
+
table.to_csv(csv_path, index=False)
|
|
148
|
+
|
|
149
|
+
def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
|
|
150
|
+
"""Write the table from a polars DataFrame or LazyFrame."""
|
|
151
|
+
table = normalize_polars_lf(
|
|
152
|
+
table,
|
|
153
|
+
index_key=self.index_key,
|
|
154
|
+
index_type=self.index_type,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
if isinstance(table, LazyFrame):
|
|
158
|
+
table = table.collect()
|
|
159
|
+
|
|
160
|
+
full_url = self._get_store_url()
|
|
161
|
+
csv_path = f"{full_url}/{self.csv_name}"
|
|
162
|
+
table.write_csv(csv_path)
|
ngio/tables/backends/_json_v1.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
from collections.abc import Collection
|
|
2
|
-
|
|
3
1
|
import pandas as pd
|
|
4
2
|
from pandas import DataFrame
|
|
3
|
+
from polars import DataFrame as PolarsDataFrame
|
|
4
|
+
from polars import LazyFrame
|
|
5
5
|
|
|
6
6
|
from ngio.tables.backends._abstract_backend import AbstractTableBackend
|
|
7
|
+
from ngio.tables.backends._utils import (
|
|
8
|
+
normalize_pandas_df,
|
|
9
|
+
normalize_polars_lf,
|
|
10
|
+
)
|
|
7
11
|
from ngio.utils import NgioFileNotFoundError
|
|
8
12
|
|
|
9
13
|
|
|
@@ -12,7 +16,7 @@ class JsonTableBackend(AbstractTableBackend):
|
|
|
12
16
|
|
|
13
17
|
@staticmethod
|
|
14
18
|
def backend_name() -> str:
|
|
15
|
-
"""
|
|
19
|
+
"""Return the name of the backend."""
|
|
16
20
|
return "experimental_json_v1"
|
|
17
21
|
|
|
18
22
|
@staticmethod
|
|
@@ -21,36 +25,68 @@ class JsonTableBackend(AbstractTableBackend):
|
|
|
21
25
|
return False
|
|
22
26
|
|
|
23
27
|
@staticmethod
|
|
24
|
-
def
|
|
28
|
+
def implements_pandas() -> bool:
|
|
25
29
|
"""Whether the handler implements the dataframe protocol."""
|
|
26
30
|
return True
|
|
27
31
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
32
|
+
@staticmethod
|
|
33
|
+
def implements_polars() -> bool:
|
|
34
|
+
"""Whether the handler implements the polars protocol."""
|
|
35
|
+
return True
|
|
31
36
|
|
|
32
37
|
def _get_table_group(self):
|
|
38
|
+
"""Get the table group, creating it if it doesn't exist."""
|
|
33
39
|
try:
|
|
34
40
|
table_group = self._group_handler.get_group(path="table")
|
|
35
41
|
except NgioFileNotFoundError:
|
|
36
42
|
table_group = self._group_handler.group.create_group("table")
|
|
37
43
|
return table_group
|
|
38
44
|
|
|
39
|
-
def
|
|
40
|
-
"""
|
|
45
|
+
def _load_as_pandas_df(self) -> DataFrame:
|
|
46
|
+
"""Load the table as a pandas DataFrame."""
|
|
41
47
|
table_group = self._get_table_group()
|
|
42
48
|
table_dict = dict(table_group.attrs)
|
|
49
|
+
|
|
43
50
|
data_frame = pd.DataFrame.from_dict(table_dict)
|
|
44
|
-
if columns is not None:
|
|
45
|
-
data_frame = data_frame[columns]
|
|
46
51
|
return data_frame
|
|
47
52
|
|
|
48
|
-
def
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
def load_as_pandas_df(self) -> DataFrame:
|
|
54
|
+
"""Load the table as a pandas DataFrame."""
|
|
55
|
+
data_frame = self._load_as_pandas_df()
|
|
56
|
+
data_frame = normalize_pandas_df(
|
|
57
|
+
data_frame,
|
|
58
|
+
index_key=self.index_key,
|
|
59
|
+
index_type=self.index_type,
|
|
60
|
+
reset_index=False,
|
|
61
|
+
)
|
|
62
|
+
return data_frame
|
|
63
|
+
|
|
64
|
+
def _write_from_dict(self, table: dict) -> None:
|
|
65
|
+
"""Write the table from a dictionary to the store."""
|
|
52
66
|
table_group = self._get_table_group()
|
|
53
67
|
table_group.attrs.clear()
|
|
54
|
-
table_group.attrs.update(table
|
|
55
|
-
|
|
56
|
-
|
|
68
|
+
table_group.attrs.update(table)
|
|
69
|
+
|
|
70
|
+
def write_from_pandas(self, table: DataFrame) -> None:
|
|
71
|
+
"""Write the table from a pandas DataFrame."""
|
|
72
|
+
table = normalize_pandas_df(
|
|
73
|
+
table,
|
|
74
|
+
index_key=self.index_key,
|
|
75
|
+
index_type=self.index_type,
|
|
76
|
+
reset_index=True,
|
|
77
|
+
)
|
|
78
|
+
table_dict = table.to_dict(orient="list")
|
|
79
|
+
self._write_from_dict(table=table_dict)
|
|
80
|
+
|
|
81
|
+
def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
|
|
82
|
+
"""Write the table from a polars DataFrame or LazyFrame."""
|
|
83
|
+
table = normalize_polars_lf(
|
|
84
|
+
table,
|
|
85
|
+
index_key=self.index_key,
|
|
86
|
+
index_type=self.index_type,
|
|
87
|
+
)
|
|
88
|
+
if isinstance(table, LazyFrame):
|
|
89
|
+
table = table.collect()
|
|
90
|
+
|
|
91
|
+
table_dict = table.to_dict(as_series=False)
|
|
92
|
+
self._write_from_dict(table=table_dict)
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
"""Protocol for table backends handlers."""
|
|
2
2
|
|
|
3
|
-
from collections.abc import Collection
|
|
4
3
|
from typing import Literal, Protocol
|
|
5
4
|
|
|
6
5
|
from anndata import AnnData
|
|
7
6
|
from pandas import DataFrame
|
|
7
|
+
from polars import DataFrame as PolarsDataFrame
|
|
8
|
+
from polars import LazyFrame
|
|
8
9
|
|
|
9
10
|
from ngio.tables.backends._anndata_v1 import AnnDataBackend
|
|
11
|
+
from ngio.tables.backends._csv_v1 import CsvTableBackend
|
|
10
12
|
from ngio.tables.backends._json_v1 import JsonTableBackend
|
|
11
13
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
12
14
|
|
|
@@ -16,33 +18,110 @@ class TableBackendProtocol(Protocol):
|
|
|
16
18
|
self,
|
|
17
19
|
group_handler: ZarrGroupHandler,
|
|
18
20
|
index_key: str | None = None,
|
|
19
|
-
index_type: Literal["int", "str"] =
|
|
20
|
-
):
|
|
21
|
+
index_type: Literal["int", "str"] | None = None,
|
|
22
|
+
):
|
|
23
|
+
"""Backend constructor.
|
|
24
|
+
|
|
25
|
+
Index keys and index types are used to ensure that the
|
|
26
|
+
serialization and deserialization of the table
|
|
27
|
+
is consistent across different backends.
|
|
28
|
+
|
|
29
|
+
Making sure that this is consistent is
|
|
30
|
+
a duty of the backend implementations.
|
|
31
|
+
"""
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def backend_name() -> str:
|
|
36
|
+
"""Return the name of the backend.
|
|
37
|
+
|
|
38
|
+
As a convention we set name as:
|
|
39
|
+
{backend_name}_v{version}
|
|
40
|
+
|
|
41
|
+
Where the version is a integer.
|
|
42
|
+
"""
|
|
43
|
+
...
|
|
21
44
|
|
|
22
45
|
@staticmethod
|
|
23
|
-
def
|
|
46
|
+
def implements_anndata() -> bool:
|
|
47
|
+
"""Check if the backend implements the anndata protocol.
|
|
48
|
+
|
|
49
|
+
If this is True, the backend should implement the
|
|
50
|
+
`load_as_anndata` and `write_from_anndata` methods.
|
|
51
|
+
|
|
52
|
+
If this is False, these methods should raise a
|
|
53
|
+
`NotImplementedError`.
|
|
54
|
+
"""
|
|
55
|
+
...
|
|
24
56
|
|
|
25
57
|
@staticmethod
|
|
26
|
-
def
|
|
58
|
+
def implements_pandas() -> bool:
|
|
59
|
+
"""Check if the backend implements the pandas protocol.
|
|
60
|
+
|
|
61
|
+
If this is True, the backend should implement the
|
|
62
|
+
`load_as_dataframe` and `write_from_dataframe` methods.
|
|
63
|
+
|
|
64
|
+
If this is False, these methods should raise a
|
|
65
|
+
`NotImplementedError`.
|
|
66
|
+
"""
|
|
67
|
+
...
|
|
27
68
|
|
|
28
69
|
@staticmethod
|
|
29
|
-
def
|
|
70
|
+
def implements_polars() -> bool:
|
|
71
|
+
"""Check if the backend implements the polars protocol.
|
|
72
|
+
|
|
73
|
+
If this is True, the backend should implement the
|
|
74
|
+
`load_as_polars` and `write_from_polars` methods.
|
|
75
|
+
|
|
76
|
+
If this is False, these methods should raise a
|
|
77
|
+
`NotImplementedError`.
|
|
78
|
+
"""
|
|
79
|
+
...
|
|
80
|
+
|
|
81
|
+
def load_as_anndata(self) -> AnnData:
|
|
82
|
+
"""Load the table as an AnnData object."""
|
|
83
|
+
...
|
|
84
|
+
|
|
85
|
+
def load_as_pandas_df(self) -> DataFrame:
|
|
86
|
+
"""Load the table as a pandas DataFrame."""
|
|
87
|
+
...
|
|
30
88
|
|
|
31
|
-
def
|
|
89
|
+
def load_as_polars_lf(self) -> LazyFrame:
|
|
90
|
+
"""Load the table as a polars LazyFrame."""
|
|
91
|
+
...
|
|
92
|
+
|
|
93
|
+
def write_from_pandas(self, table: DataFrame) -> None:
|
|
94
|
+
"""Serialize the table from a pandas DataFrame."""
|
|
95
|
+
...
|
|
96
|
+
|
|
97
|
+
def write_from_anndata(self, table: AnnData) -> None:
|
|
98
|
+
"""Serialize the table from an AnnData object."""
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
def write_from_polars(self, table: LazyFrame | PolarsDataFrame) -> None:
|
|
102
|
+
"""Serialize the table from a polars DataFrame or LazyFrame."""
|
|
103
|
+
...
|
|
104
|
+
|
|
105
|
+
def write(
|
|
106
|
+
self,
|
|
107
|
+
table: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
|
|
108
|
+
metadata: dict[str, str] | None = None,
|
|
109
|
+
mode: Literal["pandas", "anndata", "polars"] | None = None,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""This is a generic write method.
|
|
32
112
|
|
|
33
|
-
|
|
113
|
+
Will call the appropriate write method
|
|
114
|
+
depending on the type of the table.
|
|
34
115
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
) -> DataFrame: ...
|
|
116
|
+
Moreover it will also write the metadata
|
|
117
|
+
if provided, and the backend methadata
|
|
38
118
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
119
|
+
the backend should write in the zarr group attributes
|
|
120
|
+
- backend: the backend name (self.backend_name())
|
|
121
|
+
- index_key: the index key
|
|
122
|
+
- index_type: the index type
|
|
42
123
|
|
|
43
|
-
|
|
44
|
-
self, table: AnnData, metadata: dict | None = None
|
|
45
|
-
) -> None: ...
|
|
124
|
+
"""
|
|
46
125
|
|
|
47
126
|
|
|
48
127
|
class ImplementedTableBackends:
|
|
@@ -68,7 +147,7 @@ class ImplementedTableBackends:
|
|
|
68
147
|
backend_name: str | None,
|
|
69
148
|
group_handler: ZarrGroupHandler,
|
|
70
149
|
index_key: str | None = None,
|
|
71
|
-
index_type: Literal["int", "str"] =
|
|
150
|
+
index_type: Literal["int", "str"] | None = None,
|
|
72
151
|
) -> TableBackendProtocol:
|
|
73
152
|
"""Try to get a handler for the given store based on the metadata version."""
|
|
74
153
|
if backend_name is None:
|
|
@@ -100,3 +179,4 @@ class ImplementedTableBackends:
|
|
|
100
179
|
|
|
101
180
|
ImplementedTableBackends().add_backend(AnnDataBackend)
|
|
102
181
|
ImplementedTableBackends().add_backend(JsonTableBackend)
|
|
182
|
+
ImplementedTableBackends().add_backend(CsvTableBackend)
|