ngio 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +20 -2
- ngio/common/_pyramid.py +5 -1
- ngio/common/_roi.py +2 -2
- ngio/hcs/__init__.py +16 -2
- ngio/hcs/plate.py +496 -18
- ngio/images/abstract_image.py +11 -0
- ngio/images/create.py +25 -36
- ngio/images/image.py +80 -6
- ngio/images/label.py +38 -9
- ngio/images/ome_zarr_container.py +70 -33
- ngio/ome_zarr_meta/__init__.py +5 -3
- ngio/ome_zarr_meta/ngio_specs/__init__.py +10 -2
- ngio/ome_zarr_meta/ngio_specs/_axes.py +90 -65
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +46 -8
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +242 -70
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +49 -11
- ngio/ome_zarr_meta/ngio_specs/_pixel_size.py +28 -11
- ngio/ome_zarr_meta/v04/_custom_models.py +18 -0
- ngio/ome_zarr_meta/v04/_v04_spec_utils.py +2 -2
- ngio/tables/_validators.py +1 -83
- ngio/tables/backends/__init__.py +27 -1
- ngio/tables/backends/_abstract_backend.py +207 -22
- ngio/tables/backends/_anndata_utils.py +3 -109
- ngio/tables/backends/_anndata_v1.py +43 -46
- ngio/tables/backends/_csv_v1.py +162 -0
- ngio/tables/backends/_json_v1.py +54 -18
- ngio/tables/backends/_table_backends.py +98 -18
- ngio/tables/backends/_utils.py +458 -0
- ngio/tables/tables_container.py +3 -1
- ngio/tables/v1/_feature_table.py +20 -11
- ngio/tables/v1/_generic_table.py +20 -15
- ngio/tables/v1/_roi_table.py +7 -9
- ngio/utils/_zarr_utils.py +46 -32
- {ngio-0.2.1.dist-info → ngio-0.2.3.dist-info}/METADATA +3 -1
- ngio-0.2.3.dist-info/RECORD +57 -0
- ngio-0.2.1.dist-info/RECORD +0 -54
- {ngio-0.2.1.dist-info → ngio-0.2.3.dist-info}/WHEEL +0 -0
- {ngio-0.2.1.dist-info → ngio-0.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
"""Utility functions for converting between different tables formats.
|
|
2
|
+
|
|
3
|
+
The supported formats are:
|
|
4
|
+
- pandas DataFrame
|
|
5
|
+
- polars DataFrame or LazyFrame
|
|
6
|
+
- AnnData
|
|
7
|
+
|
|
8
|
+
These functions are used to validate and normalize the tables
|
|
9
|
+
to ensure that conversion between formats is consistent.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
# %%
|
|
13
|
+
from copy import deepcopy
|
|
14
|
+
from typing import Literal
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import pandas.api.types as ptypes
|
|
19
|
+
import polars as pl
|
|
20
|
+
from anndata import AnnData
|
|
21
|
+
from pandas import DataFrame
|
|
22
|
+
from polars import DataFrame as PolarsDataFrame
|
|
23
|
+
from polars import LazyFrame
|
|
24
|
+
|
|
25
|
+
from ngio.utils import NgioTableValidationError, NgioValueError
|
|
26
|
+
|
|
27
|
+
# -----------------
|
|
28
|
+
# Validation utils
|
|
29
|
+
# -----------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _validate_index_key_df(pandas_df: DataFrame, index_key: str | None) -> DataFrame:
|
|
33
|
+
"""Validate the index key of the pandas DataFrame.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
pandas_df (DataFrame): The pandas DataFrame to validate.
|
|
37
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
DataFrame: DataFrame with validated index key.
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
NgioTableValidationError: If index key is not found in DataFrame.
|
|
44
|
+
"""
|
|
45
|
+
if index_key is None:
|
|
46
|
+
return pandas_df
|
|
47
|
+
|
|
48
|
+
if pandas_df.index.name == index_key:
|
|
49
|
+
return pandas_df
|
|
50
|
+
|
|
51
|
+
if index_key in pandas_df.columns:
|
|
52
|
+
pandas_df = pandas_df.set_index(index_key)
|
|
53
|
+
pandas_df.index.name = index_key
|
|
54
|
+
return pandas_df
|
|
55
|
+
|
|
56
|
+
raise NgioTableValidationError(f"Index key '{index_key}' is not found in DataFrame")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _validate_cast_index_dtype_df(
|
|
60
|
+
pandas_df: DataFrame, index_type: str | None
|
|
61
|
+
) -> DataFrame:
|
|
62
|
+
"""Check if the index of the DataFrame has the correct dtype.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
pandas_df (DataFrame): The pandas DataFrame to validate.
|
|
66
|
+
index_type (str | None): The type to cast the index to ('str' or 'int').
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
DataFrame: DataFrame with index of the specified type.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
NgioTableValidationError: If index cannot be cast to the specified type.
|
|
73
|
+
NgioValueError: If index_type is not 'str' or 'int'.
|
|
74
|
+
"""
|
|
75
|
+
if index_type is None:
|
|
76
|
+
# Nothing to do
|
|
77
|
+
return pandas_df
|
|
78
|
+
|
|
79
|
+
if index_type == "str":
|
|
80
|
+
if ptypes.is_integer_dtype(pandas_df.index):
|
|
81
|
+
# Convert the int index to string is generally safe
|
|
82
|
+
pandas_df = pandas_df.set_index(pandas_df.index.astype(str))
|
|
83
|
+
|
|
84
|
+
if not ptypes.is_string_dtype(pandas_df.index):
|
|
85
|
+
raise NgioTableValidationError(
|
|
86
|
+
f"Table index must be of string type, got {pandas_df.index.dtype}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
elif index_type == "int":
|
|
90
|
+
if ptypes.is_string_dtype(pandas_df.index):
|
|
91
|
+
# Try to convert the string index to int
|
|
92
|
+
try:
|
|
93
|
+
pandas_df = pandas_df.set_index(pandas_df.index.astype(int))
|
|
94
|
+
except ValueError as e:
|
|
95
|
+
if "invalid literal for int() with base 10" in str(e):
|
|
96
|
+
raise NgioTableValidationError(
|
|
97
|
+
"Table index must be of integer type, got str."
|
|
98
|
+
f" We tried implicit conversion and failed: {e}"
|
|
99
|
+
) from None
|
|
100
|
+
else:
|
|
101
|
+
raise e from e
|
|
102
|
+
|
|
103
|
+
if not ptypes.is_integer_dtype(pandas_df.index):
|
|
104
|
+
raise NgioTableValidationError(
|
|
105
|
+
f"Table index must be of integer type, got {pandas_df.index.dtype}"
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
raise NgioValueError(
|
|
109
|
+
f"Invalid index type '{index_type}'. Must be 'int' or 'str'."
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return pandas_df
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _check_for_mixed_types(series: pd.Series) -> None:
|
|
116
|
+
"""Check if the column has mixed types.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
series (pd.Series): The pandas Series to check.
|
|
120
|
+
|
|
121
|
+
Raises:
|
|
122
|
+
NgioTableValidationError: If the column has mixed types.
|
|
123
|
+
"""
|
|
124
|
+
if series.apply(type).nunique() > 1: # type: ignore
|
|
125
|
+
raise NgioTableValidationError(
|
|
126
|
+
f"Column {series.name} has mixed types: "
|
|
127
|
+
f"{series.apply(type).unique()}. " # type: ignore
|
|
128
|
+
"Type of all elements must be the same."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _check_for_supported_types(series: pd.Series) -> Literal["str", "int", "numeric"]:
|
|
133
|
+
"""Check if the column has supported types.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
series (pd.Series): The pandas Series to check.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Literal["str", "int", "numeric"]: The type category of the series.
|
|
140
|
+
|
|
141
|
+
Raises:
|
|
142
|
+
NgioTableValidationError: If the column has unsupported types.
|
|
143
|
+
"""
|
|
144
|
+
if ptypes.is_string_dtype(series):
|
|
145
|
+
return "str"
|
|
146
|
+
if ptypes.is_integer_dtype(series):
|
|
147
|
+
return "int"
|
|
148
|
+
if ptypes.is_numeric_dtype(series):
|
|
149
|
+
return "numeric"
|
|
150
|
+
raise NgioTableValidationError(
|
|
151
|
+
f"Column {series.name} has unsupported type: {series.dtype}."
|
|
152
|
+
" Supported types are string and numerics."
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# -----------------
|
|
157
|
+
# Normalization functions
|
|
158
|
+
# -----------------
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def normalize_pandas_df(
|
|
162
|
+
pandas_df: DataFrame,
|
|
163
|
+
index_key: str | None = None,
|
|
164
|
+
index_type: Literal["int", "str"] | None = None,
|
|
165
|
+
reset_index: bool = False,
|
|
166
|
+
) -> DataFrame:
|
|
167
|
+
"""Make sure the DataFrame has the correct index and dtype.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
pandas_df (DataFrame): The pandas DataFrame to validate.
|
|
171
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
172
|
+
Default is None.
|
|
173
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
174
|
+
Either 'str' or 'int'. Default is None.
|
|
175
|
+
reset_index (bool): If True the index will be reset (i.e. the index will be
|
|
176
|
+
converted to a column). If False, the index will be kept as is.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
DataFrame: Normalized pandas DataFrame.
|
|
180
|
+
"""
|
|
181
|
+
pandas_df = _validate_index_key_df(pandas_df, index_key)
|
|
182
|
+
pandas_df = _validate_cast_index_dtype_df(pandas_df, index_type)
|
|
183
|
+
if pandas_df.index.name is not None:
|
|
184
|
+
index_key = pandas_df.index.name
|
|
185
|
+
|
|
186
|
+
if reset_index and pandas_df.index.name is not None:
|
|
187
|
+
pandas_df = pandas_df.reset_index()
|
|
188
|
+
return pandas_df
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def normalize_polars_lf(
|
|
192
|
+
polars_lf: LazyFrame | PolarsDataFrame,
|
|
193
|
+
index_key: str | None = None,
|
|
194
|
+
index_type: Literal["int", "str"] | None = None,
|
|
195
|
+
) -> LazyFrame:
|
|
196
|
+
"""Validate the polars LazyFrame.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
polars_lf (LazyFrame | PolarsDataFrame): The polars LazyFrame to validate.
|
|
200
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
201
|
+
Default is None.
|
|
202
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
203
|
+
Either 'str' or 'int'. Default is None.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
LazyFrame: Normalized polars LazyFrame.
|
|
207
|
+
|
|
208
|
+
Raises:
|
|
209
|
+
ValueError: If index_key is not found or index_type is invalid.
|
|
210
|
+
"""
|
|
211
|
+
if index_key is not None:
|
|
212
|
+
schema = polars_lf.collect_schema()
|
|
213
|
+
if index_key not in schema:
|
|
214
|
+
raise NgioTableValidationError(
|
|
215
|
+
f"Index key '{index_key}' not found in LazyFrame columns."
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if index_type is not None:
|
|
219
|
+
if index_type not in ["int", "str"]:
|
|
220
|
+
raise NgioTableValidationError(
|
|
221
|
+
f"Invalid index type '{index_type}'. Must be 'int' or 'str'."
|
|
222
|
+
)
|
|
223
|
+
if index_type == "int" and not schema[index_key].is_integer():
|
|
224
|
+
polars_lf = polars_lf.with_columns(pl.col(index_key).cast(pl.Int64))
|
|
225
|
+
elif index_type == "str" and not schema[index_key] == pl.String():
|
|
226
|
+
polars_lf = polars_lf.with_columns(pl.col(index_key).cast(pl.String()))
|
|
227
|
+
|
|
228
|
+
if isinstance(polars_lf, PolarsDataFrame):
|
|
229
|
+
polars_lf = polars_lf.lazy()
|
|
230
|
+
return polars_lf
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def normalize_anndata(
|
|
234
|
+
anndata: AnnData,
|
|
235
|
+
index_key: str | None = None,
|
|
236
|
+
) -> AnnData:
|
|
237
|
+
"""Validate the AnnData object.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
anndata (AnnData): The AnnData object to validate.
|
|
241
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
242
|
+
Default is None.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
AnnData: Normalized AnnData object.
|
|
246
|
+
"""
|
|
247
|
+
if index_key is None:
|
|
248
|
+
return anndata
|
|
249
|
+
obs = _validate_index_key_df(anndata.obs, index_key)
|
|
250
|
+
obs = _validate_cast_index_dtype_df(obs, "str")
|
|
251
|
+
|
|
252
|
+
if obs.equals(anndata.obs):
|
|
253
|
+
return anndata
|
|
254
|
+
|
|
255
|
+
anndata = deepcopy(anndata)
|
|
256
|
+
anndata.obs = obs
|
|
257
|
+
return anndata
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# -----------------
|
|
261
|
+
# Conversion functions
|
|
262
|
+
# -----------------
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def convert_pandas_to_polars(
|
|
266
|
+
pandas_df: DataFrame,
|
|
267
|
+
index_key: str | None = None,
|
|
268
|
+
index_type: Literal["int", "str"] | None = None,
|
|
269
|
+
) -> LazyFrame:
|
|
270
|
+
"""Convert a pandas DataFrame to a polars LazyFrame.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
pandas_df (DataFrame): The pandas DataFrame to convert.
|
|
274
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
275
|
+
Default is None.
|
|
276
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
277
|
+
Either 'str' or 'int'. Default is None.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
LazyFrame: Converted and normalized polars LazyFrame.
|
|
281
|
+
"""
|
|
282
|
+
pandas_df = normalize_pandas_df(
|
|
283
|
+
pandas_df,
|
|
284
|
+
index_key=index_key,
|
|
285
|
+
index_type=index_type,
|
|
286
|
+
reset_index=True,
|
|
287
|
+
)
|
|
288
|
+
return pl.from_pandas(pandas_df).lazy()
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def convert_polars_to_pandas(
|
|
292
|
+
polars_df: PolarsDataFrame | LazyFrame,
|
|
293
|
+
index_key: str | None = None,
|
|
294
|
+
index_type: Literal["int", "str"] | None = None,
|
|
295
|
+
reset_index: bool = False,
|
|
296
|
+
) -> DataFrame:
|
|
297
|
+
"""Convert a polars DataFrame or LazyFrame to a pandas DataFrame.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
polars_df (PolarsDataFrame | LazyFrame): The polars DataFrame or
|
|
301
|
+
LazyFrame to convert.
|
|
302
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
303
|
+
Default is None.
|
|
304
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
305
|
+
Either 'str' or 'int'. Default is None.
|
|
306
|
+
reset_index (bool): If True the index will be reset (i.e., the index will be
|
|
307
|
+
converted to a column). If False, the index will be kept as is.
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
DataFrame: Converted and normalized pandas DataFrame.
|
|
311
|
+
"""
|
|
312
|
+
if isinstance(polars_df, LazyFrame):
|
|
313
|
+
polars_df = polars_df.collect()
|
|
314
|
+
|
|
315
|
+
pandas_df = polars_df.to_pandas()
|
|
316
|
+
pandas_df = normalize_pandas_df(
|
|
317
|
+
pandas_df,
|
|
318
|
+
index_key=index_key,
|
|
319
|
+
index_type=index_type,
|
|
320
|
+
reset_index=reset_index,
|
|
321
|
+
)
|
|
322
|
+
return pandas_df
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def convert_pandas_to_anndata(
|
|
326
|
+
pandas_df: DataFrame,
|
|
327
|
+
index_key: str | None = None,
|
|
328
|
+
) -> AnnData:
|
|
329
|
+
"""Convert a pandas DataFrame to an AnnData object.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
pandas_df (DataFrame): The pandas DataFrame to convert.
|
|
333
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
334
|
+
Default is None.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
AnnData: Converted AnnData object.
|
|
338
|
+
"""
|
|
339
|
+
pandas_df = normalize_pandas_df(
|
|
340
|
+
pandas_df,
|
|
341
|
+
index_key=index_key,
|
|
342
|
+
index_type="str",
|
|
343
|
+
reset_index=False,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
str_columns, int_columns, num_columns = [], [], []
|
|
347
|
+
for col_name in pandas_df.columns:
|
|
348
|
+
column = pandas_df[col_name]
|
|
349
|
+
_check_for_mixed_types(column) # Mixed types are not allowed in the table
|
|
350
|
+
col_type = _check_for_supported_types(
|
|
351
|
+
column
|
|
352
|
+
) # Only string and numeric types are allowed
|
|
353
|
+
|
|
354
|
+
if col_type == "str":
|
|
355
|
+
str_columns.append(col_name)
|
|
356
|
+
|
|
357
|
+
elif col_type == "int":
|
|
358
|
+
int_columns.append(col_name)
|
|
359
|
+
|
|
360
|
+
elif col_type == "numeric":
|
|
361
|
+
num_columns.append(col_name)
|
|
362
|
+
|
|
363
|
+
# Converting all observations to string
|
|
364
|
+
obs_df = pandas_df[str_columns + int_columns]
|
|
365
|
+
obs_df.index = pandas_df.index
|
|
366
|
+
|
|
367
|
+
x_df = pandas_df[num_columns]
|
|
368
|
+
|
|
369
|
+
if x_df.dtypes.nunique() > 1:
|
|
370
|
+
x_df = x_df.astype("float64")
|
|
371
|
+
|
|
372
|
+
if x_df.empty:
|
|
373
|
+
# If there are no numeric columns, create an empty array
|
|
374
|
+
# to avoid AnnData failing to create the object
|
|
375
|
+
x_df = np.zeros((len(obs_df), 0), dtype="float64")
|
|
376
|
+
|
|
377
|
+
return AnnData(X=x_df, obs=obs_df)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def convert_anndata_to_pandas(
|
|
381
|
+
anndata: AnnData,
|
|
382
|
+
index_key: str | None = None,
|
|
383
|
+
index_type: Literal["int", "str"] | None = None,
|
|
384
|
+
reset_index: bool = False,
|
|
385
|
+
) -> DataFrame:
|
|
386
|
+
"""Convert an AnnData object to a pandas DataFrame.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
anndata (AnnData): An AnnData object to convert.
|
|
390
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
391
|
+
Default is None.
|
|
392
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
393
|
+
Either 'str' or 'int'. Default is None.
|
|
394
|
+
reset_index (bool): If True the index will be reset (i.e., the index will be
|
|
395
|
+
converted to a column). If False, the index will be kept as is.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
DataFrame: Converted and normalized pandas DataFrame.
|
|
399
|
+
"""
|
|
400
|
+
pandas_df = anndata.to_df()
|
|
401
|
+
pandas_df[anndata.obs_keys()] = anndata.obs
|
|
402
|
+
pandas_df = normalize_pandas_df(
|
|
403
|
+
pandas_df,
|
|
404
|
+
index_key=index_key,
|
|
405
|
+
index_type=index_type,
|
|
406
|
+
reset_index=reset_index,
|
|
407
|
+
)
|
|
408
|
+
return pandas_df
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def convert_anndata_to_polars(
|
|
412
|
+
anndata: AnnData,
|
|
413
|
+
index_key: str | None = None,
|
|
414
|
+
index_type: Literal["int", "str"] | None = None,
|
|
415
|
+
) -> LazyFrame:
|
|
416
|
+
"""Convert an AnnData object to a polars LazyFrame.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
anndata (AnnData): An AnnData object to convert.
|
|
420
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
421
|
+
Default is None.
|
|
422
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
423
|
+
Either 'str' or 'int'. Default is None.
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
LazyFrame: Converted and normalized polars LazyFrame.
|
|
427
|
+
"""
|
|
428
|
+
pandas_df = convert_anndata_to_pandas(
|
|
429
|
+
anndata,
|
|
430
|
+
index_key=index_key,
|
|
431
|
+
index_type=index_type,
|
|
432
|
+
reset_index=True,
|
|
433
|
+
)
|
|
434
|
+
return pl.from_pandas(pandas_df).lazy()
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def convert_polars_to_anndata(
|
|
438
|
+
polars_df: LazyFrame | PolarsDataFrame,
|
|
439
|
+
index_key: str | None = None,
|
|
440
|
+
) -> AnnData:
|
|
441
|
+
"""Convert a polars LazyFrame or DataFrame to an AnnData object.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
polars_df (LazyFrame | PolarsDataFrame): The polars LazyFrame or
|
|
445
|
+
DataFrame to convert.
|
|
446
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
447
|
+
Default is None.
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
AnnData: Converted AnnData object.
|
|
451
|
+
"""
|
|
452
|
+
if isinstance(polars_df, LazyFrame):
|
|
453
|
+
polars_df = polars_df.collect()
|
|
454
|
+
pandas_df = polars_df.to_pandas()
|
|
455
|
+
return convert_pandas_to_anndata(
|
|
456
|
+
pandas_df,
|
|
457
|
+
index_key=index_key,
|
|
458
|
+
)
|
ngio/tables/tables_container.py
CHANGED
|
@@ -237,7 +237,9 @@ class TablesContainer:
|
|
|
237
237
|
"Use overwrite=True to replace it."
|
|
238
238
|
)
|
|
239
239
|
|
|
240
|
-
table_handler = self._group_handler.derive_handler(
|
|
240
|
+
table_handler = self._group_handler.derive_handler(
|
|
241
|
+
path=name, overwrite=overwrite
|
|
242
|
+
)
|
|
241
243
|
|
|
242
244
|
if backend is None:
|
|
243
245
|
backend = table.backend_name
|
ngio/tables/v1/_feature_table.py
CHANGED
|
@@ -9,8 +9,8 @@ from typing import Literal
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
|
-
from ngio.tables.
|
|
13
|
-
from ngio.tables.backends import
|
|
12
|
+
from ngio.tables.backends import BackendMeta, ImplementedTableBackends
|
|
13
|
+
from ngio.tables.backends._utils import normalize_pandas_df
|
|
14
14
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
15
15
|
|
|
16
16
|
|
|
@@ -20,12 +20,11 @@ class RegionMeta(BaseModel):
|
|
|
20
20
|
path: str
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class FeatureTableMeta(
|
|
23
|
+
class FeatureTableMeta(BackendMeta):
|
|
24
24
|
"""Metadata for the ROI table."""
|
|
25
25
|
|
|
26
26
|
fractal_table_version: Literal["1"] = "1"
|
|
27
27
|
type: Literal["feature_table"] = "feature_table"
|
|
28
|
-
backend: str | None = None
|
|
29
28
|
region: RegionMeta | None = None
|
|
30
29
|
instance_key: str = "label"
|
|
31
30
|
|
|
@@ -53,8 +52,11 @@ class FeatureTableV1:
|
|
|
53
52
|
if dataframe is None:
|
|
54
53
|
self._dataframe = None
|
|
55
54
|
else:
|
|
56
|
-
self._dataframe =
|
|
57
|
-
dataframe,
|
|
55
|
+
self._dataframe = normalize_pandas_df(
|
|
56
|
+
dataframe,
|
|
57
|
+
index_key=self._instance_key,
|
|
58
|
+
index_type="int",
|
|
59
|
+
reset_index=False,
|
|
58
60
|
)
|
|
59
61
|
self._table_backend = None
|
|
60
62
|
|
|
@@ -107,7 +109,7 @@ class FeatureTableV1:
|
|
|
107
109
|
)
|
|
108
110
|
|
|
109
111
|
if self._dataframe is None and self._table_backend is not None:
|
|
110
|
-
self._dataframe = self._table_backend.
|
|
112
|
+
self._dataframe = self._table_backend.load_as_pandas_df()
|
|
111
113
|
|
|
112
114
|
if self._dataframe is None:
|
|
113
115
|
raise NgioValueError(
|
|
@@ -118,7 +120,12 @@ class FeatureTableV1:
|
|
|
118
120
|
@dataframe.setter
|
|
119
121
|
def dataframe(self, dataframe: pd.DataFrame) -> None:
|
|
120
122
|
"""Set the table as a DataFrame."""
|
|
121
|
-
self._dataframe =
|
|
123
|
+
self._dataframe = normalize_pandas_df(
|
|
124
|
+
dataframe,
|
|
125
|
+
index_key=self._instance_key,
|
|
126
|
+
index_type="int",
|
|
127
|
+
reset_index=False,
|
|
128
|
+
)
|
|
122
129
|
|
|
123
130
|
@classmethod
|
|
124
131
|
def _from_handler(
|
|
@@ -143,7 +150,7 @@ class FeatureTableV1:
|
|
|
143
150
|
)
|
|
144
151
|
meta.backend = backend_name
|
|
145
152
|
|
|
146
|
-
if not backend.
|
|
153
|
+
if not backend.implements_pandas:
|
|
147
154
|
raise NgioValueError(
|
|
148
155
|
"The backend does not implement the dataframe protocol."
|
|
149
156
|
)
|
|
@@ -177,6 +184,8 @@ class FeatureTableV1:
|
|
|
177
184
|
"Please add the table to a OME-Zarr Image before calling consolidate."
|
|
178
185
|
)
|
|
179
186
|
|
|
180
|
-
self._table_backend.
|
|
181
|
-
self.dataframe,
|
|
187
|
+
self._table_backend.write(
|
|
188
|
+
self.dataframe,
|
|
189
|
+
metadata=self._meta.model_dump(exclude_none=True),
|
|
190
|
+
mode="pandas",
|
|
182
191
|
)
|
ngio/tables/v1/_generic_table.py
CHANGED
|
@@ -2,22 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from anndata import AnnData
|
|
5
|
-
from pydantic import BaseModel
|
|
6
5
|
|
|
7
|
-
from ngio.tables.backends import
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
6
|
+
from ngio.tables.backends import (
|
|
7
|
+
BackendMeta,
|
|
8
|
+
ImplementedTableBackends,
|
|
9
|
+
convert_anndata_to_pandas,
|
|
10
|
+
convert_pandas_to_anndata,
|
|
11
11
|
)
|
|
12
12
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class GenericTableMeta(
|
|
15
|
+
class GenericTableMeta(BackendMeta):
|
|
16
16
|
"""Metadata for the ROI table."""
|
|
17
17
|
|
|
18
18
|
fractal_table_version: str | None = None
|
|
19
19
|
type: str | None = None
|
|
20
|
-
backend: str | None = None
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class GenericTable:
|
|
@@ -88,7 +87,7 @@ class GenericTable:
|
|
|
88
87
|
return self._dataframe
|
|
89
88
|
|
|
90
89
|
if self._anndata is not None:
|
|
91
|
-
return
|
|
90
|
+
return convert_anndata_to_pandas(self._anndata)
|
|
92
91
|
|
|
93
92
|
raise NgioValueError("No table loaded.")
|
|
94
93
|
|
|
@@ -105,7 +104,9 @@ class GenericTable:
|
|
|
105
104
|
return self._anndata
|
|
106
105
|
|
|
107
106
|
if self._dataframe is not None:
|
|
108
|
-
return
|
|
107
|
+
return convert_pandas_to_anndata(
|
|
108
|
+
self._dataframe,
|
|
109
|
+
)
|
|
109
110
|
raise NgioValueError("No table loaded.")
|
|
110
111
|
|
|
111
112
|
@anndata.setter
|
|
@@ -138,8 +139,8 @@ class GenericTable:
|
|
|
138
139
|
anndata = backend.load_as_anndata()
|
|
139
140
|
table = cls(anndata=anndata)
|
|
140
141
|
|
|
141
|
-
elif backend.
|
|
142
|
-
dataframe = backend.
|
|
142
|
+
elif backend.implements_pandas():
|
|
143
|
+
dataframe = backend.load_as_pandas_df()
|
|
143
144
|
table = cls(dataframe=dataframe)
|
|
144
145
|
else:
|
|
145
146
|
raise NgioValueError(
|
|
@@ -173,10 +174,14 @@ class GenericTable:
|
|
|
173
174
|
)
|
|
174
175
|
|
|
175
176
|
if self.anndata_native:
|
|
176
|
-
self._table_backend.
|
|
177
|
-
self.anndata,
|
|
177
|
+
self._table_backend.write(
|
|
178
|
+
self.anndata,
|
|
179
|
+
metadata=self._meta.model_dump(exclude_none=True),
|
|
180
|
+
mode="anndata",
|
|
178
181
|
)
|
|
179
182
|
else:
|
|
180
|
-
self._table_backend.
|
|
181
|
-
self.dataframe,
|
|
183
|
+
self._table_backend.write(
|
|
184
|
+
self.dataframe,
|
|
185
|
+
metadata=self._meta.model_dump(exclude_none=True),
|
|
186
|
+
mode="pandas",
|
|
182
187
|
)
|
ngio/tables/v1/_roi_table.py
CHANGED
|
@@ -14,7 +14,7 @@ from pydantic import BaseModel
|
|
|
14
14
|
|
|
15
15
|
from ngio.common import Roi
|
|
16
16
|
from ngio.tables._validators import validate_columns
|
|
17
|
-
from ngio.tables.backends import ImplementedTableBackends
|
|
17
|
+
from ngio.tables.backends import BackendMeta, ImplementedTableBackends
|
|
18
18
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
19
19
|
|
|
20
20
|
REQUIRED_COLUMNS = [
|
|
@@ -91,12 +91,11 @@ def _rois_to_dataframe(rois: dict[str, Roi], index_key: str) -> pd.DataFrame:
|
|
|
91
91
|
return dataframe
|
|
92
92
|
|
|
93
93
|
|
|
94
|
-
class RoiTableV1Meta(
|
|
94
|
+
class RoiTableV1Meta(BackendMeta):
|
|
95
95
|
"""Metadata for the ROI table."""
|
|
96
96
|
|
|
97
97
|
fractal_table_version: Literal["1"] = "1"
|
|
98
98
|
type: Literal["roi_table"] = "roi_table"
|
|
99
|
-
backend: str | None = None
|
|
100
99
|
|
|
101
100
|
|
|
102
101
|
class RegionMeta(BaseModel):
|
|
@@ -105,12 +104,11 @@ class RegionMeta(BaseModel):
|
|
|
105
104
|
path: str
|
|
106
105
|
|
|
107
106
|
|
|
108
|
-
class MaskingRoiTableV1Meta(
|
|
107
|
+
class MaskingRoiTableV1Meta(BackendMeta):
|
|
109
108
|
"""Metadata for the ROI table."""
|
|
110
109
|
|
|
111
110
|
fractal_table_version: Literal["1"] = "1"
|
|
112
111
|
type: Literal["masking_roi_table"] = "masking_roi_table"
|
|
113
|
-
backend: str | None = None
|
|
114
112
|
region: RegionMeta | None = None
|
|
115
113
|
instance_key: str = "label"
|
|
116
114
|
|
|
@@ -191,7 +189,7 @@ class _GenericRoiTableV1(Generic[_roi_meta]):
|
|
|
191
189
|
)
|
|
192
190
|
meta.backend = backend_name
|
|
193
191
|
|
|
194
|
-
if not backend.
|
|
192
|
+
if not backend.implements_pandas:
|
|
195
193
|
raise NgioValueError(
|
|
196
194
|
"The backend does not implement the dataframe protocol."
|
|
197
195
|
)
|
|
@@ -201,7 +199,7 @@ class _GenericRoiTableV1(Generic[_roi_meta]):
|
|
|
201
199
|
table._meta = meta
|
|
202
200
|
table._table_backend = backend
|
|
203
201
|
|
|
204
|
-
dataframe = backend.
|
|
202
|
+
dataframe = backend.load_as_pandas_df()
|
|
205
203
|
dataframe = validate_columns(
|
|
206
204
|
dataframe,
|
|
207
205
|
required_columns=REQUIRED_COLUMNS,
|
|
@@ -253,8 +251,8 @@ class _GenericRoiTableV1(Generic[_roi_meta]):
|
|
|
253
251
|
required_columns=REQUIRED_COLUMNS,
|
|
254
252
|
optional_columns=OPTIONAL_COLUMNS,
|
|
255
253
|
)
|
|
256
|
-
self._table_backend.
|
|
257
|
-
dataframe, metadata=self._meta.model_dump(exclude_none=True)
|
|
254
|
+
self._table_backend.write(
|
|
255
|
+
dataframe, metadata=self._meta.model_dump(exclude_none=True), mode="pandas"
|
|
258
256
|
)
|
|
259
257
|
|
|
260
258
|
|