ngio 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/common/_pyramid.py +5 -1
- ngio/hcs/plate.py +133 -2
- ngio/images/abstract_image.py +1 -0
- ngio/images/image.py +42 -0
- ngio/images/label.py +15 -7
- ngio/images/ome_zarr_container.py +20 -11
- ngio/tables/_validators.py +1 -83
- ngio/tables/backends/__init__.py +27 -1
- ngio/tables/backends/_abstract_backend.py +207 -22
- ngio/tables/backends/_anndata_utils.py +3 -109
- ngio/tables/backends/_anndata_v1.py +43 -46
- ngio/tables/backends/_csv_v1.py +162 -0
- ngio/tables/backends/_json_v1.py +54 -18
- ngio/tables/backends/_table_backends.py +98 -18
- ngio/tables/backends/_utils.py +462 -0
- ngio/tables/tables_container.py +14 -3
- ngio/tables/v1/_feature_table.py +20 -11
- ngio/tables/v1/_generic_table.py +20 -15
- ngio/tables/v1/_roi_table.py +15 -12
- ngio/utils/_zarr_utils.py +46 -32
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/METADATA +3 -1
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/RECORD +24 -22
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/WHEEL +0 -0
- {ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
"""Utility functions for converting between different tables formats.
|
|
2
|
+
|
|
3
|
+
The supported formats are:
|
|
4
|
+
- pandas DataFrame
|
|
5
|
+
- polars DataFrame or LazyFrame
|
|
6
|
+
- AnnData
|
|
7
|
+
|
|
8
|
+
These functions are used to validate and normalize the tables
|
|
9
|
+
to ensure that conversion between formats is consistent.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
# %%
|
|
13
|
+
from copy import deepcopy
|
|
14
|
+
from typing import Literal
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import pandas.api.types as ptypes
|
|
19
|
+
import polars as pl
|
|
20
|
+
from anndata import AnnData
|
|
21
|
+
from pandas import DataFrame
|
|
22
|
+
from polars import DataFrame as PolarsDataFrame
|
|
23
|
+
from polars import LazyFrame
|
|
24
|
+
|
|
25
|
+
from ngio.utils import NgioTableValidationError, NgioValueError
|
|
26
|
+
|
|
27
|
+
# -----------------
|
|
28
|
+
# Validation utils
|
|
29
|
+
# -----------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _validate_index_key_df(pandas_df: DataFrame, index_key: str | None) -> DataFrame:
|
|
33
|
+
"""Validate the index key of the pandas DataFrame.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
pandas_df (DataFrame): The pandas DataFrame to validate.
|
|
37
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
DataFrame: DataFrame with validated index key.
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
NgioTableValidationError: If index key is not found in DataFrame.
|
|
44
|
+
"""
|
|
45
|
+
if index_key is None:
|
|
46
|
+
return pandas_df
|
|
47
|
+
|
|
48
|
+
if pandas_df.index.name == index_key:
|
|
49
|
+
return pandas_df
|
|
50
|
+
|
|
51
|
+
if index_key in pandas_df.columns:
|
|
52
|
+
pandas_df = pandas_df.set_index(index_key)
|
|
53
|
+
pandas_df.index.name = index_key
|
|
54
|
+
return pandas_df
|
|
55
|
+
|
|
56
|
+
if pandas_df.index.name is None:
|
|
57
|
+
pandas_df.index.name = index_key
|
|
58
|
+
return pandas_df
|
|
59
|
+
|
|
60
|
+
raise NgioTableValidationError(f"Index key '{index_key}' is not found in DataFrame")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _validate_cast_index_dtype_df(
|
|
64
|
+
pandas_df: DataFrame, index_type: str | None
|
|
65
|
+
) -> DataFrame:
|
|
66
|
+
"""Check if the index of the DataFrame has the correct dtype.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
pandas_df (DataFrame): The pandas DataFrame to validate.
|
|
70
|
+
index_type (str | None): The type to cast the index to ('str' or 'int').
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
DataFrame: DataFrame with index of the specified type.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
NgioTableValidationError: If index cannot be cast to the specified type.
|
|
77
|
+
NgioValueError: If index_type is not 'str' or 'int'.
|
|
78
|
+
"""
|
|
79
|
+
if index_type is None:
|
|
80
|
+
# Nothing to do
|
|
81
|
+
return pandas_df
|
|
82
|
+
|
|
83
|
+
if index_type == "str":
|
|
84
|
+
if ptypes.is_integer_dtype(pandas_df.index):
|
|
85
|
+
# Convert the int index to string is generally safe
|
|
86
|
+
pandas_df = pandas_df.set_index(pandas_df.index.astype(str))
|
|
87
|
+
|
|
88
|
+
if not ptypes.is_string_dtype(pandas_df.index):
|
|
89
|
+
raise NgioTableValidationError(
|
|
90
|
+
f"Table index must be of string type, got {pandas_df.index.dtype}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
elif index_type == "int":
|
|
94
|
+
if ptypes.is_string_dtype(pandas_df.index):
|
|
95
|
+
# Try to convert the string index to int
|
|
96
|
+
try:
|
|
97
|
+
pandas_df = pandas_df.set_index(pandas_df.index.astype(int))
|
|
98
|
+
except ValueError as e:
|
|
99
|
+
if "invalid literal for int() with base 10" in str(e):
|
|
100
|
+
raise NgioTableValidationError(
|
|
101
|
+
"Table index must be of integer type, got str."
|
|
102
|
+
f" We tried implicit conversion and failed: {e}"
|
|
103
|
+
) from None
|
|
104
|
+
else:
|
|
105
|
+
raise e from e
|
|
106
|
+
|
|
107
|
+
if not ptypes.is_integer_dtype(pandas_df.index):
|
|
108
|
+
raise NgioTableValidationError(
|
|
109
|
+
f"Table index must be of integer type, got {pandas_df.index.dtype}"
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
raise NgioValueError(
|
|
113
|
+
f"Invalid index type '{index_type}'. Must be 'int' or 'str'."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return pandas_df
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _check_for_mixed_types(series: pd.Series) -> None:
|
|
120
|
+
"""Check if the column has mixed types.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
series (pd.Series): The pandas Series to check.
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
NgioTableValidationError: If the column has mixed types.
|
|
127
|
+
"""
|
|
128
|
+
if series.apply(type).nunique() > 1: # type: ignore
|
|
129
|
+
raise NgioTableValidationError(
|
|
130
|
+
f"Column {series.name} has mixed types: "
|
|
131
|
+
f"{series.apply(type).unique()}. " # type: ignore
|
|
132
|
+
"Type of all elements must be the same."
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _check_for_supported_types(series: pd.Series) -> Literal["str", "int", "numeric"]:
|
|
137
|
+
"""Check if the column has supported types.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
series (pd.Series): The pandas Series to check.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Literal["str", "int", "numeric"]: The type category of the series.
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
NgioTableValidationError: If the column has unsupported types.
|
|
147
|
+
"""
|
|
148
|
+
if ptypes.is_string_dtype(series):
|
|
149
|
+
return "str"
|
|
150
|
+
if ptypes.is_integer_dtype(series):
|
|
151
|
+
return "int"
|
|
152
|
+
if ptypes.is_numeric_dtype(series):
|
|
153
|
+
return "numeric"
|
|
154
|
+
raise NgioTableValidationError(
|
|
155
|
+
f"Column {series.name} has unsupported type: {series.dtype}."
|
|
156
|
+
" Supported types are string and numerics."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# -----------------
|
|
161
|
+
# Normalization functions
|
|
162
|
+
# -----------------
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def normalize_pandas_df(
|
|
166
|
+
pandas_df: DataFrame,
|
|
167
|
+
index_key: str | None = None,
|
|
168
|
+
index_type: Literal["int", "str"] | None = None,
|
|
169
|
+
reset_index: bool = False,
|
|
170
|
+
) -> DataFrame:
|
|
171
|
+
"""Make sure the DataFrame has the correct index and dtype.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
pandas_df (DataFrame): The pandas DataFrame to validate.
|
|
175
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
176
|
+
Default is None.
|
|
177
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
178
|
+
Either 'str' or 'int'. Default is None.
|
|
179
|
+
reset_index (bool): If True the index will be reset (i.e. the index will be
|
|
180
|
+
converted to a column). If False, the index will be kept as is.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
DataFrame: Normalized pandas DataFrame.
|
|
184
|
+
"""
|
|
185
|
+
pandas_df = _validate_index_key_df(pandas_df, index_key)
|
|
186
|
+
pandas_df = _validate_cast_index_dtype_df(pandas_df, index_type)
|
|
187
|
+
if pandas_df.index.name is not None:
|
|
188
|
+
index_key = pandas_df.index.name
|
|
189
|
+
|
|
190
|
+
if reset_index and pandas_df.index.name is not None:
|
|
191
|
+
pandas_df = pandas_df.reset_index()
|
|
192
|
+
return pandas_df
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def normalize_polars_lf(
|
|
196
|
+
polars_lf: LazyFrame | PolarsDataFrame,
|
|
197
|
+
index_key: str | None = None,
|
|
198
|
+
index_type: Literal["int", "str"] | None = None,
|
|
199
|
+
) -> LazyFrame:
|
|
200
|
+
"""Validate the polars LazyFrame.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
polars_lf (LazyFrame | PolarsDataFrame): The polars LazyFrame to validate.
|
|
204
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
205
|
+
Default is None.
|
|
206
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
207
|
+
Either 'str' or 'int'. Default is None.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
LazyFrame: Normalized polars LazyFrame.
|
|
211
|
+
|
|
212
|
+
Raises:
|
|
213
|
+
ValueError: If index_key is not found or index_type is invalid.
|
|
214
|
+
"""
|
|
215
|
+
if index_key is not None:
|
|
216
|
+
schema = polars_lf.collect_schema()
|
|
217
|
+
if index_key not in schema:
|
|
218
|
+
raise NgioTableValidationError(
|
|
219
|
+
f"Index key '{index_key}' not found in LazyFrame columns."
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if index_type is not None:
|
|
223
|
+
if index_type not in ["int", "str"]:
|
|
224
|
+
raise NgioTableValidationError(
|
|
225
|
+
f"Invalid index type '{index_type}'. Must be 'int' or 'str'."
|
|
226
|
+
)
|
|
227
|
+
if index_type == "int" and not schema[index_key].is_integer():
|
|
228
|
+
polars_lf = polars_lf.with_columns(pl.col(index_key).cast(pl.Int64))
|
|
229
|
+
elif index_type == "str" and not schema[index_key] == pl.String():
|
|
230
|
+
polars_lf = polars_lf.with_columns(pl.col(index_key).cast(pl.String()))
|
|
231
|
+
|
|
232
|
+
if isinstance(polars_lf, PolarsDataFrame):
|
|
233
|
+
polars_lf = polars_lf.lazy()
|
|
234
|
+
return polars_lf
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def normalize_anndata(
|
|
238
|
+
anndata: AnnData,
|
|
239
|
+
index_key: str | None = None,
|
|
240
|
+
) -> AnnData:
|
|
241
|
+
"""Validate the AnnData object.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
anndata (AnnData): The AnnData object to validate.
|
|
245
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
246
|
+
Default is None.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
AnnData: Normalized AnnData object.
|
|
250
|
+
"""
|
|
251
|
+
if index_key is None:
|
|
252
|
+
return anndata
|
|
253
|
+
obs = _validate_index_key_df(anndata.obs, index_key)
|
|
254
|
+
obs = _validate_cast_index_dtype_df(obs, "str")
|
|
255
|
+
|
|
256
|
+
if obs.equals(anndata.obs):
|
|
257
|
+
return anndata
|
|
258
|
+
|
|
259
|
+
anndata = deepcopy(anndata)
|
|
260
|
+
anndata.obs = obs
|
|
261
|
+
return anndata
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# -----------------
|
|
265
|
+
# Conversion functions
|
|
266
|
+
# -----------------
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def convert_pandas_to_polars(
|
|
270
|
+
pandas_df: DataFrame,
|
|
271
|
+
index_key: str | None = None,
|
|
272
|
+
index_type: Literal["int", "str"] | None = None,
|
|
273
|
+
) -> LazyFrame:
|
|
274
|
+
"""Convert a pandas DataFrame to a polars LazyFrame.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
pandas_df (DataFrame): The pandas DataFrame to convert.
|
|
278
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
279
|
+
Default is None.
|
|
280
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
281
|
+
Either 'str' or 'int'. Default is None.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
LazyFrame: Converted and normalized polars LazyFrame.
|
|
285
|
+
"""
|
|
286
|
+
pandas_df = normalize_pandas_df(
|
|
287
|
+
pandas_df,
|
|
288
|
+
index_key=index_key,
|
|
289
|
+
index_type=index_type,
|
|
290
|
+
reset_index=True,
|
|
291
|
+
)
|
|
292
|
+
return pl.from_pandas(pandas_df).lazy()
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def convert_polars_to_pandas(
|
|
296
|
+
polars_df: PolarsDataFrame | LazyFrame,
|
|
297
|
+
index_key: str | None = None,
|
|
298
|
+
index_type: Literal["int", "str"] | None = None,
|
|
299
|
+
reset_index: bool = False,
|
|
300
|
+
) -> DataFrame:
|
|
301
|
+
"""Convert a polars DataFrame or LazyFrame to a pandas DataFrame.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
polars_df (PolarsDataFrame | LazyFrame): The polars DataFrame or
|
|
305
|
+
LazyFrame to convert.
|
|
306
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
307
|
+
Default is None.
|
|
308
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
309
|
+
Either 'str' or 'int'. Default is None.
|
|
310
|
+
reset_index (bool): If True the index will be reset (i.e., the index will be
|
|
311
|
+
converted to a column). If False, the index will be kept as is.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
DataFrame: Converted and normalized pandas DataFrame.
|
|
315
|
+
"""
|
|
316
|
+
if isinstance(polars_df, LazyFrame):
|
|
317
|
+
polars_df = polars_df.collect()
|
|
318
|
+
|
|
319
|
+
pandas_df = polars_df.to_pandas()
|
|
320
|
+
pandas_df = normalize_pandas_df(
|
|
321
|
+
pandas_df,
|
|
322
|
+
index_key=index_key,
|
|
323
|
+
index_type=index_type,
|
|
324
|
+
reset_index=reset_index,
|
|
325
|
+
)
|
|
326
|
+
return pandas_df
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def convert_pandas_to_anndata(
|
|
330
|
+
pandas_df: DataFrame,
|
|
331
|
+
index_key: str | None = None,
|
|
332
|
+
) -> AnnData:
|
|
333
|
+
"""Convert a pandas DataFrame to an AnnData object.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
pandas_df (DataFrame): The pandas DataFrame to convert.
|
|
337
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
338
|
+
Default is None.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
AnnData: Converted AnnData object.
|
|
342
|
+
"""
|
|
343
|
+
pandas_df = normalize_pandas_df(
|
|
344
|
+
pandas_df,
|
|
345
|
+
index_key=index_key,
|
|
346
|
+
index_type="str",
|
|
347
|
+
reset_index=False,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
str_columns, int_columns, num_columns = [], [], []
|
|
351
|
+
for col_name in pandas_df.columns:
|
|
352
|
+
column = pandas_df[col_name]
|
|
353
|
+
_check_for_mixed_types(column) # Mixed types are not allowed in the table
|
|
354
|
+
col_type = _check_for_supported_types(
|
|
355
|
+
column
|
|
356
|
+
) # Only string and numeric types are allowed
|
|
357
|
+
|
|
358
|
+
if col_type == "str":
|
|
359
|
+
str_columns.append(col_name)
|
|
360
|
+
|
|
361
|
+
elif col_type == "int":
|
|
362
|
+
int_columns.append(col_name)
|
|
363
|
+
|
|
364
|
+
elif col_type == "numeric":
|
|
365
|
+
num_columns.append(col_name)
|
|
366
|
+
|
|
367
|
+
# Converting all observations to string
|
|
368
|
+
obs_df = pandas_df[str_columns + int_columns]
|
|
369
|
+
obs_df.index = pandas_df.index
|
|
370
|
+
|
|
371
|
+
x_df = pandas_df[num_columns]
|
|
372
|
+
|
|
373
|
+
if x_df.dtypes.nunique() > 1:
|
|
374
|
+
x_df = x_df.astype("float64")
|
|
375
|
+
|
|
376
|
+
if x_df.empty:
|
|
377
|
+
# If there are no numeric columns, create an empty array
|
|
378
|
+
# to avoid AnnData failing to create the object
|
|
379
|
+
x_df = np.zeros((len(obs_df), 0), dtype="float64")
|
|
380
|
+
|
|
381
|
+
return AnnData(X=x_df, obs=obs_df)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def convert_anndata_to_pandas(
|
|
385
|
+
anndata: AnnData,
|
|
386
|
+
index_key: str | None = None,
|
|
387
|
+
index_type: Literal["int", "str"] | None = None,
|
|
388
|
+
reset_index: bool = False,
|
|
389
|
+
) -> DataFrame:
|
|
390
|
+
"""Convert an AnnData object to a pandas DataFrame.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
anndata (AnnData): An AnnData object to convert.
|
|
394
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
395
|
+
Default is None.
|
|
396
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
397
|
+
Either 'str' or 'int'. Default is None.
|
|
398
|
+
reset_index (bool): If True the index will be reset (i.e., the index will be
|
|
399
|
+
converted to a column). If False, the index will be kept as is.
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
DataFrame: Converted and normalized pandas DataFrame.
|
|
403
|
+
"""
|
|
404
|
+
pandas_df = anndata.to_df()
|
|
405
|
+
pandas_df[anndata.obs_keys()] = anndata.obs
|
|
406
|
+
pandas_df = normalize_pandas_df(
|
|
407
|
+
pandas_df,
|
|
408
|
+
index_key=index_key,
|
|
409
|
+
index_type=index_type,
|
|
410
|
+
reset_index=reset_index,
|
|
411
|
+
)
|
|
412
|
+
return pandas_df
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def convert_anndata_to_polars(
|
|
416
|
+
anndata: AnnData,
|
|
417
|
+
index_key: str | None = None,
|
|
418
|
+
index_type: Literal["int", "str"] | None = None,
|
|
419
|
+
) -> LazyFrame:
|
|
420
|
+
"""Convert an AnnData object to a polars LazyFrame.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
anndata (AnnData): An AnnData object to convert.
|
|
424
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
425
|
+
Default is None.
|
|
426
|
+
index_type (str | None): The type of the index column in the DataFrame.
|
|
427
|
+
Either 'str' or 'int'. Default is None.
|
|
428
|
+
|
|
429
|
+
Returns:
|
|
430
|
+
LazyFrame: Converted and normalized polars LazyFrame.
|
|
431
|
+
"""
|
|
432
|
+
pandas_df = convert_anndata_to_pandas(
|
|
433
|
+
anndata,
|
|
434
|
+
index_key=index_key,
|
|
435
|
+
index_type=index_type,
|
|
436
|
+
reset_index=True,
|
|
437
|
+
)
|
|
438
|
+
return pl.from_pandas(pandas_df).lazy()
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def convert_polars_to_anndata(
|
|
442
|
+
polars_df: LazyFrame | PolarsDataFrame,
|
|
443
|
+
index_key: str | None = None,
|
|
444
|
+
) -> AnnData:
|
|
445
|
+
"""Convert a polars LazyFrame or DataFrame to an AnnData object.
|
|
446
|
+
|
|
447
|
+
Args:
|
|
448
|
+
polars_df (LazyFrame | PolarsDataFrame): The polars LazyFrame or
|
|
449
|
+
DataFrame to convert.
|
|
450
|
+
index_key (str | None): The column name to use as the index of the DataFrame.
|
|
451
|
+
Default is None.
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
AnnData: Converted AnnData object.
|
|
455
|
+
"""
|
|
456
|
+
if isinstance(polars_df, LazyFrame):
|
|
457
|
+
polars_df = polars_df.collect()
|
|
458
|
+
pandas_df = polars_df.to_pandas()
|
|
459
|
+
return convert_pandas_to_anndata(
|
|
460
|
+
pandas_df,
|
|
461
|
+
index_key=index_key,
|
|
462
|
+
)
|
ngio/tables/tables_container.py
CHANGED
|
@@ -90,6 +90,7 @@ class ImplementedTables:
|
|
|
90
90
|
version: str,
|
|
91
91
|
handler: ZarrGroupHandler,
|
|
92
92
|
backend_name: str | None = None,
|
|
93
|
+
strict: bool = True,
|
|
93
94
|
) -> Table:
|
|
94
95
|
"""Try to get a handler for the given store based on the metadata version."""
|
|
95
96
|
_errors = {}
|
|
@@ -102,7 +103,12 @@ class ImplementedTables:
|
|
|
102
103
|
)
|
|
103
104
|
return table
|
|
104
105
|
except Exception as e:
|
|
105
|
-
|
|
106
|
+
if strict:
|
|
107
|
+
raise NgioValidationError(
|
|
108
|
+
f"Could not load table {name} from handler. Error: {e}"
|
|
109
|
+
) from e
|
|
110
|
+
else:
|
|
111
|
+
_errors[name] = e
|
|
106
112
|
# If no table was found, we can try to load the table from a generic table
|
|
107
113
|
try:
|
|
108
114
|
table = GenericTable._from_handler(
|
|
@@ -207,7 +213,9 @@ class TablesContainer:
|
|
|
207
213
|
filtered_tables.append(table_name)
|
|
208
214
|
return filtered_tables
|
|
209
215
|
|
|
210
|
-
def get(
|
|
216
|
+
def get(
|
|
217
|
+
self, name: str, backend_name: str | None = None, strict: bool = True
|
|
218
|
+
) -> Table:
|
|
211
219
|
"""Get a label from the group."""
|
|
212
220
|
if name not in self.list():
|
|
213
221
|
raise KeyError(f"Table '{name}' not found in the group.")
|
|
@@ -220,6 +228,7 @@ class TablesContainer:
|
|
|
220
228
|
version=table_version,
|
|
221
229
|
handler=table_handler,
|
|
222
230
|
backend_name=backend_name,
|
|
231
|
+
strict=strict,
|
|
223
232
|
)
|
|
224
233
|
|
|
225
234
|
def add(
|
|
@@ -237,7 +246,9 @@ class TablesContainer:
|
|
|
237
246
|
"Use overwrite=True to replace it."
|
|
238
247
|
)
|
|
239
248
|
|
|
240
|
-
table_handler = self._group_handler.derive_handler(
|
|
249
|
+
table_handler = self._group_handler.derive_handler(
|
|
250
|
+
path=name, overwrite=overwrite
|
|
251
|
+
)
|
|
241
252
|
|
|
242
253
|
if backend is None:
|
|
243
254
|
backend = table.backend_name
|
ngio/tables/v1/_feature_table.py
CHANGED
|
@@ -9,8 +9,8 @@ from typing import Literal
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
|
|
12
|
-
from ngio.tables.
|
|
13
|
-
from ngio.tables.backends import
|
|
12
|
+
from ngio.tables.backends import BackendMeta, ImplementedTableBackends
|
|
13
|
+
from ngio.tables.backends._utils import normalize_pandas_df
|
|
14
14
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
15
15
|
|
|
16
16
|
|
|
@@ -20,12 +20,11 @@ class RegionMeta(BaseModel):
|
|
|
20
20
|
path: str
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class FeatureTableMeta(
|
|
23
|
+
class FeatureTableMeta(BackendMeta):
|
|
24
24
|
"""Metadata for the ROI table."""
|
|
25
25
|
|
|
26
26
|
fractal_table_version: Literal["1"] = "1"
|
|
27
27
|
type: Literal["feature_table"] = "feature_table"
|
|
28
|
-
backend: str | None = None
|
|
29
28
|
region: RegionMeta | None = None
|
|
30
29
|
instance_key: str = "label"
|
|
31
30
|
|
|
@@ -53,8 +52,11 @@ class FeatureTableV1:
|
|
|
53
52
|
if dataframe is None:
|
|
54
53
|
self._dataframe = None
|
|
55
54
|
else:
|
|
56
|
-
self._dataframe =
|
|
57
|
-
dataframe,
|
|
55
|
+
self._dataframe = normalize_pandas_df(
|
|
56
|
+
dataframe,
|
|
57
|
+
index_key=self._instance_key,
|
|
58
|
+
index_type="int",
|
|
59
|
+
reset_index=False,
|
|
58
60
|
)
|
|
59
61
|
self._table_backend = None
|
|
60
62
|
|
|
@@ -107,7 +109,7 @@ class FeatureTableV1:
|
|
|
107
109
|
)
|
|
108
110
|
|
|
109
111
|
if self._dataframe is None and self._table_backend is not None:
|
|
110
|
-
self._dataframe = self._table_backend.
|
|
112
|
+
self._dataframe = self._table_backend.load_as_pandas_df()
|
|
111
113
|
|
|
112
114
|
if self._dataframe is None:
|
|
113
115
|
raise NgioValueError(
|
|
@@ -118,7 +120,12 @@ class FeatureTableV1:
|
|
|
118
120
|
@dataframe.setter
|
|
119
121
|
def dataframe(self, dataframe: pd.DataFrame) -> None:
|
|
120
122
|
"""Set the table as a DataFrame."""
|
|
121
|
-
self._dataframe =
|
|
123
|
+
self._dataframe = normalize_pandas_df(
|
|
124
|
+
dataframe,
|
|
125
|
+
index_key=self._instance_key,
|
|
126
|
+
index_type="int",
|
|
127
|
+
reset_index=False,
|
|
128
|
+
)
|
|
122
129
|
|
|
123
130
|
@classmethod
|
|
124
131
|
def _from_handler(
|
|
@@ -143,7 +150,7 @@ class FeatureTableV1:
|
|
|
143
150
|
)
|
|
144
151
|
meta.backend = backend_name
|
|
145
152
|
|
|
146
|
-
if not backend.
|
|
153
|
+
if not backend.implements_pandas:
|
|
147
154
|
raise NgioValueError(
|
|
148
155
|
"The backend does not implement the dataframe protocol."
|
|
149
156
|
)
|
|
@@ -177,6 +184,8 @@ class FeatureTableV1:
|
|
|
177
184
|
"Please add the table to a OME-Zarr Image before calling consolidate."
|
|
178
185
|
)
|
|
179
186
|
|
|
180
|
-
self._table_backend.
|
|
181
|
-
self.dataframe,
|
|
187
|
+
self._table_backend.write(
|
|
188
|
+
self.dataframe,
|
|
189
|
+
metadata=self._meta.model_dump(exclude_none=True),
|
|
190
|
+
mode="pandas",
|
|
182
191
|
)
|
ngio/tables/v1/_generic_table.py
CHANGED
|
@@ -2,22 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
from anndata import AnnData
|
|
5
|
-
from pydantic import BaseModel
|
|
6
5
|
|
|
7
|
-
from ngio.tables.backends import
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
6
|
+
from ngio.tables.backends import (
|
|
7
|
+
BackendMeta,
|
|
8
|
+
ImplementedTableBackends,
|
|
9
|
+
convert_anndata_to_pandas,
|
|
10
|
+
convert_pandas_to_anndata,
|
|
11
11
|
)
|
|
12
12
|
from ngio.utils import NgioValueError, ZarrGroupHandler
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class GenericTableMeta(
|
|
15
|
+
class GenericTableMeta(BackendMeta):
|
|
16
16
|
"""Metadata for the ROI table."""
|
|
17
17
|
|
|
18
18
|
fractal_table_version: str | None = None
|
|
19
19
|
type: str | None = None
|
|
20
|
-
backend: str | None = None
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class GenericTable:
|
|
@@ -88,7 +87,7 @@ class GenericTable:
|
|
|
88
87
|
return self._dataframe
|
|
89
88
|
|
|
90
89
|
if self._anndata is not None:
|
|
91
|
-
return
|
|
90
|
+
return convert_anndata_to_pandas(self._anndata)
|
|
92
91
|
|
|
93
92
|
raise NgioValueError("No table loaded.")
|
|
94
93
|
|
|
@@ -105,7 +104,9 @@ class GenericTable:
|
|
|
105
104
|
return self._anndata
|
|
106
105
|
|
|
107
106
|
if self._dataframe is not None:
|
|
108
|
-
return
|
|
107
|
+
return convert_pandas_to_anndata(
|
|
108
|
+
self._dataframe,
|
|
109
|
+
)
|
|
109
110
|
raise NgioValueError("No table loaded.")
|
|
110
111
|
|
|
111
112
|
@anndata.setter
|
|
@@ -138,8 +139,8 @@ class GenericTable:
|
|
|
138
139
|
anndata = backend.load_as_anndata()
|
|
139
140
|
table = cls(anndata=anndata)
|
|
140
141
|
|
|
141
|
-
elif backend.
|
|
142
|
-
dataframe = backend.
|
|
142
|
+
elif backend.implements_pandas():
|
|
143
|
+
dataframe = backend.load_as_pandas_df()
|
|
143
144
|
table = cls(dataframe=dataframe)
|
|
144
145
|
else:
|
|
145
146
|
raise NgioValueError(
|
|
@@ -173,10 +174,14 @@ class GenericTable:
|
|
|
173
174
|
)
|
|
174
175
|
|
|
175
176
|
if self.anndata_native:
|
|
176
|
-
self._table_backend.
|
|
177
|
-
self.anndata,
|
|
177
|
+
self._table_backend.write(
|
|
178
|
+
self.anndata,
|
|
179
|
+
metadata=self._meta.model_dump(exclude_none=True),
|
|
180
|
+
mode="anndata",
|
|
178
181
|
)
|
|
179
182
|
else:
|
|
180
|
-
self._table_backend.
|
|
181
|
-
self.dataframe,
|
|
183
|
+
self._table_backend.write(
|
|
184
|
+
self.dataframe,
|
|
185
|
+
metadata=self._meta.model_dump(exclude_none=True),
|
|
186
|
+
mode="pandas",
|
|
182
187
|
)
|