ngio 0.1.6__py3-none-any.whl → 0.2.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. ngio/__init__.py +31 -5
  2. ngio/common/__init__.py +44 -0
  3. ngio/common/_array_pipe.py +160 -0
  4. ngio/common/_axes_transforms.py +63 -0
  5. ngio/common/_common_types.py +5 -0
  6. ngio/common/_dimensions.py +113 -0
  7. ngio/common/_pyramid.py +223 -0
  8. ngio/{core/roi.py → common/_roi.py} +22 -23
  9. ngio/common/_slicer.py +97 -0
  10. ngio/{pipes/_zoom_utils.py → common/_zoom.py} +2 -78
  11. ngio/hcs/__init__.py +60 -0
  12. ngio/images/__init__.py +23 -0
  13. ngio/images/abstract_image.py +240 -0
  14. ngio/images/create.py +251 -0
  15. ngio/images/image.py +389 -0
  16. ngio/images/label.py +236 -0
  17. ngio/images/omezarr_container.py +535 -0
  18. ngio/ome_zarr_meta/__init__.py +35 -0
  19. ngio/ome_zarr_meta/_generic_handlers.py +320 -0
  20. ngio/ome_zarr_meta/_meta_handlers.py +142 -0
  21. ngio/ome_zarr_meta/ngio_specs/__init__.py +63 -0
  22. ngio/ome_zarr_meta/ngio_specs/_axes.py +481 -0
  23. ngio/ome_zarr_meta/ngio_specs/_channels.py +378 -0
  24. ngio/ome_zarr_meta/ngio_specs/_dataset.py +134 -0
  25. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +5 -0
  26. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +434 -0
  27. ngio/ome_zarr_meta/ngio_specs/_pixel_size.py +84 -0
  28. ngio/ome_zarr_meta/v04/__init__.py +11 -0
  29. ngio/ome_zarr_meta/v04/_meta_handlers.py +54 -0
  30. ngio/ome_zarr_meta/v04/_v04_spec_utils.py +412 -0
  31. ngio/tables/__init__.py +21 -5
  32. ngio/tables/_validators.py +192 -0
  33. ngio/tables/backends/__init__.py +8 -0
  34. ngio/tables/backends/_abstract_backend.py +71 -0
  35. ngio/tables/backends/_anndata_utils.py +194 -0
  36. ngio/tables/backends/_anndata_v1.py +75 -0
  37. ngio/tables/backends/_json_v1.py +56 -0
  38. ngio/tables/backends/_table_backends.py +102 -0
  39. ngio/tables/tables_container.py +300 -0
  40. ngio/tables/v1/__init__.py +6 -5
  41. ngio/tables/v1/_feature_table.py +161 -0
  42. ngio/tables/v1/_generic_table.py +99 -182
  43. ngio/tables/v1/_masking_roi_table.py +175 -0
  44. ngio/tables/v1/_roi_table.py +226 -0
  45. ngio/utils/__init__.py +23 -10
  46. ngio/utils/_datasets.py +51 -0
  47. ngio/utils/_errors.py +10 -4
  48. ngio/utils/_zarr_utils.py +378 -0
  49. {ngio-0.1.6.dist-info → ngio-0.2.0a2.dist-info}/METADATA +18 -39
  50. ngio-0.2.0a2.dist-info/RECORD +53 -0
  51. ngio/core/__init__.py +0 -7
  52. ngio/core/dimensions.py +0 -122
  53. ngio/core/image_handler.py +0 -228
  54. ngio/core/image_like_handler.py +0 -549
  55. ngio/core/label_handler.py +0 -410
  56. ngio/core/ngff_image.py +0 -387
  57. ngio/core/utils.py +0 -287
  58. ngio/io/__init__.py +0 -19
  59. ngio/io/_zarr.py +0 -88
  60. ngio/io/_zarr_array_utils.py +0 -0
  61. ngio/io/_zarr_group_utils.py +0 -60
  62. ngio/iterators/__init__.py +0 -1
  63. ngio/ngff_meta/__init__.py +0 -27
  64. ngio/ngff_meta/fractal_image_meta.py +0 -1267
  65. ngio/ngff_meta/meta_handler.py +0 -92
  66. ngio/ngff_meta/utils.py +0 -235
  67. ngio/ngff_meta/v04/__init__.py +0 -6
  68. ngio/ngff_meta/v04/specs.py +0 -158
  69. ngio/ngff_meta/v04/zarr_utils.py +0 -376
  70. ngio/pipes/__init__.py +0 -7
  71. ngio/pipes/_slicer_transforms.py +0 -176
  72. ngio/pipes/_transforms.py +0 -33
  73. ngio/pipes/data_pipe.py +0 -52
  74. ngio/tables/_ad_reader.py +0 -80
  75. ngio/tables/_utils.py +0 -301
  76. ngio/tables/tables_group.py +0 -252
  77. ngio/tables/v1/feature_tables.py +0 -182
  78. ngio/tables/v1/masking_roi_tables.py +0 -243
  79. ngio/tables/v1/roi_tables.py +0 -285
  80. ngio/utils/_common_types.py +0 -5
  81. ngio/utils/_pydantic_utils.py +0 -52
  82. ngio-0.1.6.dist-info/RECORD +0 -44
  83. {ngio-0.1.6.dist-info → ngio-0.2.0a2.dist-info}/WHEEL +0 -0
  84. {ngio-0.1.6.dist-info → ngio-0.2.0a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,412 @@
1
+ """Utilities for OME-Zarr v04 specs.
2
+
3
+ This module provides a set of classes to internally handle the metadata
4
+ of the OME-Zarr v04 specification.
5
+
6
+ For Images and Labels implements the following functionalities:
7
+ - A function to find if a dict view of the metadata is a valid OME-Zarr v04 metadata.
8
+ - A function to convert a v04 image metadata to a ngio image metadata.
9
+ - A function to convert a ngio image metadata to a v04 image metadata.
10
+ """
11
+
12
+ from ome_zarr_models.common.multiscales import ValidTransform as ValidTransformV04
13
+ from ome_zarr_models.v04.axes import Axis as AxisV04
14
+ from ome_zarr_models.v04.coordinate_transformations import VectorScale as VectorScaleV04
15
+ from ome_zarr_models.v04.coordinate_transformations import (
16
+ VectorTranslation as VectorTranslationV04,
17
+ )
18
+ from ome_zarr_models.v04.image import ImageAttrs as ImageAttrsV04
19
+ from ome_zarr_models.v04.image_label import ImageLabelAttrs as LabelAttrsV04
20
+ from ome_zarr_models.v04.multiscales import Dataset as DatasetV04
21
+ from ome_zarr_models.v04.multiscales import Multiscale as MultiscaleV04
22
+ from ome_zarr_models.v04.omero import Channel as ChannelV04
23
+ from ome_zarr_models.v04.omero import Omero as OmeroV04
24
+ from ome_zarr_models.v04.omero import Window as WindowV04
25
+ from pydantic import ValidationError
26
+
27
+ from ngio.ome_zarr_meta.ngio_specs import (
28
+ AxesSetup,
29
+ Axis,
30
+ AxisType,
31
+ Channel,
32
+ ChannelsMeta,
33
+ ChannelVisualisation,
34
+ Dataset,
35
+ ImageLabelSource,
36
+ NgioImageMeta,
37
+ NgioLabelMeta,
38
+ default_channel_name,
39
+ )
40
+ from ngio.ome_zarr_meta.ngio_specs._ngio_image import NgffVersion
41
+
42
+
43
+ def _is_v04_image_meta(metadata: dict) -> ImageAttrsV04 | ValidationError:
44
+ """Check if the metadata is a valid OME-Zarr v04 metadata.
45
+
46
+ Args:
47
+ metadata (dict): The metadata to check.
48
+
49
+ Returns:
50
+ bool: True if the metadata is a valid OME-Zarr v04 metadata, False otherwise.
51
+ """
52
+ try:
53
+ return ImageAttrsV04(**metadata)
54
+ except ValidationError as e:
55
+ return e
56
+
57
+
58
+ def _is_v04_label_meta(metadata: dict) -> LabelAttrsV04 | ValidationError:
59
+ """Check if the metadata is a valid OME-Zarr v04 metadata.
60
+
61
+ Args:
62
+ metadata (dict): The metadata to check.
63
+
64
+ Returns:
65
+ bool: True if the metadata is a valid OME-Zarr v04 metadata, False otherwise.
66
+ """
67
+ try:
68
+ return LabelAttrsV04(**metadata)
69
+ except ValidationError as e:
70
+ return e
71
+ raise RuntimeError("Unreachable code")
72
+
73
+
74
+ def _v04_omero_to_channels(v04_omero: OmeroV04 | None) -> ChannelsMeta | None:
75
+ if v04_omero is None:
76
+ return None
77
+
78
+ ngio_channels = []
79
+ for idx, v04_channel in enumerate(v04_omero.channels):
80
+ channel_extra = v04_channel.model_extra
81
+
82
+ if channel_extra is None:
83
+ channel_extra = {}
84
+
85
+ if "label" in channel_extra:
86
+ label = channel_extra.pop("label")
87
+ else:
88
+ label = default_channel_name(idx)
89
+
90
+ if "wavelength_id" in channel_extra:
91
+ wavelength_id = channel_extra.pop("wavelength_id")
92
+ else:
93
+ wavelength_id = label
94
+
95
+ if "active" in channel_extra:
96
+ active = channel_extra.pop("active")
97
+ else:
98
+ active = True
99
+
100
+ channel_visualisation = ChannelVisualisation(
101
+ color=v04_channel.color,
102
+ start=v04_channel.window.start,
103
+ end=v04_channel.window.end,
104
+ min=v04_channel.window.min,
105
+ max=v04_channel.window.max,
106
+ active=active,
107
+ **channel_extra,
108
+ )
109
+
110
+ ngio_channels.append(
111
+ Channel(
112
+ label=label,
113
+ wavelength_id=wavelength_id,
114
+ channel_visualisation=channel_visualisation,
115
+ )
116
+ )
117
+
118
+ v04_omero_extra = v04_omero.model_extra if v04_omero.model_extra is not None else {}
119
+ return ChannelsMeta(channels=ngio_channels, **v04_omero_extra)
120
+
121
+
122
+ def _compute_scale_translation(
123
+ v04_transforms: ValidTransformV04,
124
+ scale: list[float],
125
+ translation: list[float],
126
+ ) -> tuple[list[float], list[float]]:
127
+ for v04_transform in v04_transforms:
128
+ if isinstance(v04_transform, VectorScaleV04):
129
+ scale = [t1 * t2 for t1, t2 in zip(scale, v04_transform.scale, strict=True)]
130
+
131
+ elif isinstance(v04_transform, VectorTranslationV04):
132
+ translation = [
133
+ t1 + t2
134
+ for t1, t2 in zip(translation, v04_transform.translation, strict=True)
135
+ ]
136
+ else:
137
+ raise NotImplementedError(
138
+ f"Coordinate transformation {v04_transform} is not supported."
139
+ )
140
+ return scale, translation
141
+
142
+
143
+ def _v04_to_ngio_datasets(
144
+ v04_multiscale: MultiscaleV04,
145
+ axes_setup: AxesSetup,
146
+ allow_non_canonical_axes: bool = False,
147
+ strict_canonical_order: bool = True,
148
+ ) -> list[Dataset]:
149
+ """Convert a v04 multiscale to a list of ngio datasets."""
150
+ datasets = []
151
+
152
+ global_scale = [1.0] * len(v04_multiscale.axes)
153
+ global_translation = [0.0] * len(v04_multiscale.axes)
154
+
155
+ if v04_multiscale.coordinateTransformations is not None:
156
+ global_scale, global_translation = _compute_scale_translation(
157
+ v04_multiscale.coordinateTransformations, global_scale, global_translation
158
+ )
159
+
160
+ for v04_dataset in v04_multiscale.datasets:
161
+ axes = []
162
+ for v04_axis in v04_multiscale.axes:
163
+ unit = v04_axis.unit
164
+ if unit is not None and not isinstance(unit, str):
165
+ unit = str(unit)
166
+ axes.append(
167
+ Axis(
168
+ on_disk_name=v04_axis.name,
169
+ axis_type=AxisType(v04_axis.type),
170
+ # (for some reason the type is a generic JsonValue,
171
+ # but it should be a string or None)
172
+ unit=v04_axis.unit, # type: ignore
173
+ )
174
+ )
175
+
176
+ _on_disk_scale, _on_disk_translation = _compute_scale_translation(
177
+ v04_dataset.coordinateTransformations, global_scale, global_translation
178
+ )
179
+ datasets.append(
180
+ Dataset(
181
+ path=v04_dataset.path,
182
+ on_disk_axes=axes,
183
+ on_disk_scale=_on_disk_scale,
184
+ on_disk_translation=_on_disk_translation,
185
+ axes_setup=axes_setup,
186
+ allow_non_canonical_axes=allow_non_canonical_axes,
187
+ strict_canonical_order=strict_canonical_order,
188
+ )
189
+ )
190
+ return datasets
191
+
192
+
193
+ def v04_to_ngio_image_meta(
194
+ metadata: dict,
195
+ axes_setup: AxesSetup | None = None,
196
+ allow_non_canonical_axes: bool = False,
197
+ strict_canonical_order: bool = True,
198
+ ) -> tuple[bool, NgioImageMeta | ValidationError]:
199
+ """Convert a v04 image metadata to a ngio image metadata.
200
+
201
+ Args:
202
+ metadata (dict): The v04 image metadata.
203
+ axes_setup (AxesSetup, optional): The axes setup. This is
204
+ required to convert image with non-canonical axes names.
205
+ allow_non_canonical_axes (bool, optional): Allow non-canonical axes.
206
+ strict_canonical_order (bool, optional): Strict canonical order.
207
+
208
+ Returns:
209
+ NgioImageMeta: The ngio image metadata.
210
+ """
211
+ v04_image = _is_v04_image_meta(metadata)
212
+ if isinstance(v04_image, ValidationError):
213
+ return False, v04_image
214
+
215
+ if len(v04_image.multiscales) > 1:
216
+ raise NotImplementedError(
217
+ "Multiple multiscales in a single image are not supported in ngio."
218
+ )
219
+
220
+ v04_muliscale = v04_image.multiscales[0]
221
+
222
+ channels_meta = _v04_omero_to_channels(v04_image.omero)
223
+ axes_setup = axes_setup if axes_setup is not None else AxesSetup()
224
+ datasets = _v04_to_ngio_datasets(
225
+ v04_muliscale,
226
+ axes_setup=axes_setup,
227
+ allow_non_canonical_axes=allow_non_canonical_axes,
228
+ strict_canonical_order=strict_canonical_order,
229
+ )
230
+
231
+ name = v04_muliscale.name
232
+ if name is not None and not isinstance(name, str):
233
+ name = str(name)
234
+ return True, NgioImageMeta(
235
+ version="0.4",
236
+ name=name,
237
+ datasets=datasets,
238
+ channels=channels_meta,
239
+ )
240
+
241
+
242
+ def v04_to_ngio_label_meta(
243
+ metadata: dict,
244
+ axes_setup: AxesSetup | None = None,
245
+ allow_non_canonical_axes: bool = False,
246
+ strict_canonical_order: bool = True,
247
+ ) -> tuple[bool, NgioLabelMeta | ValidationError]:
248
+ """Convert a v04 image metadata to a ngio image metadata.
249
+
250
+ Args:
251
+ metadata (dict): The v04 image metadata.
252
+ axes_setup (AxesSetup, optional): The axes setup. This is
253
+ required to convert image with non-canonical axes names.
254
+ allow_non_canonical_axes (bool, optional): Allow non-canonical axes.
255
+ strict_canonical_order (bool, optional): Strict canonical order.
256
+
257
+ Returns:
258
+ NgioImageMeta: The ngio image metadata.
259
+ """
260
+ v04_label = _is_v04_label_meta(metadata)
261
+ if isinstance(v04_label, ValidationError):
262
+ return False, v04_label
263
+
264
+ if len(v04_label.multiscales) > 1:
265
+ raise NotImplementedError(
266
+ "Multiple multiscales in a single image are not supported in ngio."
267
+ )
268
+
269
+ v04_muliscale = v04_label.multiscales[0]
270
+
271
+ axes_setup = axes_setup if axes_setup is not None else AxesSetup()
272
+ datasets = _v04_to_ngio_datasets(
273
+ v04_muliscale,
274
+ axes_setup=axes_setup,
275
+ allow_non_canonical_axes=allow_non_canonical_axes,
276
+ strict_canonical_order=strict_canonical_order,
277
+ )
278
+
279
+ source = v04_label.image_label.source
280
+ if source is None:
281
+ image_label_source = None
282
+ else:
283
+ source = v04_label.image_label.source
284
+ if source is None:
285
+ image_label_source = None
286
+ else:
287
+ image_label_source = source.image
288
+ image_label_source = ImageLabelSource(
289
+ version=NgffVersion.v04,
290
+ source={"image": image_label_source},
291
+ )
292
+ name = v04_muliscale.name
293
+ if name is not None and not isinstance(name, str):
294
+ name = str(name)
295
+
296
+ return True, NgioLabelMeta(
297
+ version="0.4",
298
+ name=name,
299
+ datasets=datasets,
300
+ image_label=image_label_source,
301
+ )
302
+
303
+
304
+ def _ngio_to_v04_multiscale(datasets: list[Dataset]) -> MultiscaleV04:
305
+ """Convert a ngio multiscale to a v04 multiscale.
306
+
307
+ Args:
308
+ datasets (list[Dataset]): The ngio datasets.
309
+
310
+ Returns:
311
+ MultiscaleV04: The v04 multiscale.
312
+ """
313
+ ax_mapper = datasets[0].axes_mapper
314
+ v04_axes = []
315
+ for axis in ax_mapper.on_disk_axes:
316
+ v04_axes.append(
317
+ AxisV04(
318
+ name=axis.on_disk_name,
319
+ type=axis.axis_type.value if axis.axis_type is not None else None,
320
+ unit=axis.unit.value if axis.unit is not None else None,
321
+ )
322
+ )
323
+
324
+ v04_datasets = []
325
+ for dataset in datasets:
326
+ transform = [VectorScaleV04(type="scale", scale=list(dataset._on_disk_scale))]
327
+ if sum(dataset._on_disk_translation) > 0:
328
+ transform = (
329
+ VectorScaleV04(type="scale", scale=list(dataset._on_disk_scale)),
330
+ VectorTranslationV04(
331
+ type="translation", translation=list(dataset._on_disk_translation)
332
+ ),
333
+ )
334
+ else:
335
+ transform = (
336
+ VectorScaleV04(type="scale", scale=list(dataset._on_disk_scale)),
337
+ )
338
+
339
+ v04_datasets.append(
340
+ DatasetV04(path=dataset.path, coordinateTransformations=transform)
341
+ )
342
+ return MultiscaleV04(
343
+ axes=v04_axes,
344
+ datasets=tuple(v04_datasets),
345
+ version="0.4",
346
+ )
347
+
348
+
349
+ def _ngio_to_v04_omero(channels: ChannelsMeta | None) -> OmeroV04 | None:
350
+ """Convert a ngio channels to a v04 omero."""
351
+ if channels is None:
352
+ return None
353
+
354
+ v04_channels = []
355
+ for channel in channels.channels:
356
+ _model_extra = {
357
+ "label": channel.label,
358
+ "wavelength_id": channel.wavelength_id,
359
+ "active": channel.channel_visualisation.active,
360
+ }
361
+ if channel.channel_visualisation.model_extra is not None:
362
+ _model_extra.update(channel.channel_visualisation.model_extra)
363
+
364
+ v04_channels.append(
365
+ ChannelV04(
366
+ color=channel.channel_visualisation.valid_color,
367
+ window=WindowV04(
368
+ start=channel.channel_visualisation.start,
369
+ end=channel.channel_visualisation.end,
370
+ min=channel.channel_visualisation.min,
371
+ max=channel.channel_visualisation.max,
372
+ ),
373
+ **_model_extra,
374
+ )
375
+ )
376
+
377
+ _model_extra = channels.model_extra if channels.model_extra is not None else {}
378
+ return OmeroV04(channels=v04_channels, **_model_extra)
379
+
380
+
381
+ def ngio_to_v04_image_meta(metadata: NgioImageMeta) -> dict:
382
+ """Convert a ngio image metadata to a v04 image metadata.
383
+
384
+ Args:
385
+ metadata (NgioImageMeta): The ngio image metadata.
386
+
387
+ Returns:
388
+ dict: The v04 image metadata.
389
+ """
390
+ v04_muliscale = _ngio_to_v04_multiscale(metadata.datasets)
391
+ v04_omero = _ngio_to_v04_omero(metadata._channels_meta)
392
+
393
+ v04_image = ImageAttrsV04(multiscales=[v04_muliscale], omero=v04_omero)
394
+ return v04_image.model_dump(exclude_none=True)
395
+
396
+
397
+ def ngio_to_v04_label_meta(metadata: NgioLabelMeta) -> dict:
398
+ """Convert a ngio image metadata to a v04 image metadata.
399
+
400
+ Args:
401
+ metadata (NgioImageMeta): The ngio image metadata.
402
+
403
+ Returns:
404
+ dict: The v04 image metadata.
405
+ """
406
+ v04_muliscale = _ngio_to_v04_multiscale(metadata.datasets)
407
+ v04_label = LabelAttrsV04(
408
+ multiscales=[v04_muliscale],
409
+ # image_label is aliased as 'imae-label'
410
+ image_label=metadata.image_label.model_dump(), # type: ignore
411
+ )
412
+ return v04_label.model_dump(exclude_none=True)
ngio/tables/__init__.py CHANGED
@@ -1,11 +1,27 @@
1
- """Module for handling tables in the Fractal format."""
1
+ """Ngio Tables implementations."""
2
2
 
3
- from ngio.tables.tables_group import (
3
+ from ngio.tables.backends import ImplementedTableBackends
4
+ from ngio.tables.tables_container import (
4
5
  FeatureTable,
5
6
  MaskingROITable,
6
- ROITable,
7
+ RoiTable,
7
8
  Table,
8
- TableGroup,
9
+ TablesContainer,
10
+ TypedTable,
11
+ open_table,
12
+ open_tables_container,
9
13
  )
14
+ from ngio.tables.v1._generic_table import GenericTable
10
15
 
11
- __all__ = ["Table", "ROITable", "FeatureTable", "MaskingROITable", "TableGroup"]
16
+ __all__ = [
17
+ "FeatureTable",
18
+ "GenericTable",
19
+ "ImplementedTableBackends",
20
+ "MaskingROITable",
21
+ "RoiTable",
22
+ "Table",
23
+ "TablesContainer",
24
+ "TypedTable",
25
+ "open_table",
26
+ "open_tables_container",
27
+ ]
@@ -0,0 +1,192 @@
1
+ from collections.abc import Iterable
2
+ from typing import Protocol
3
+
4
+ import pandas as pd
5
+ import pandas.api.types as ptypes
6
+
7
+ from ngio.utils import (
8
+ NgioTableValidationError,
9
+ )
10
+
11
+
12
+ class TableValidator(Protocol):
13
+ def __call__(self, table: pd.DataFrame) -> pd.DataFrame:
14
+ """Validate the table DataFrame.
15
+
16
+ A Validator is just a simple callable that takes a
17
+ DataFrame and returns a DataFrame.
18
+
19
+ If the DataFrame is valid, the same DataFrame is returned.
20
+ If the DataFrame is invalid, the Validator can either modify the DataFrame
21
+ to make it valid or raise a NgioTableValidationError.
22
+
23
+ Args:
24
+ table (pd.DataFrame): The DataFrame to validate.
25
+
26
+ Returns:
27
+ pd.DataFrame: The validated DataFrame.
28
+
29
+ """
30
+ ...
31
+
32
+
33
+ def validate_table(
34
+ table_df: pd.DataFrame,
35
+ validators: Iterable[TableValidator] | None = None,
36
+ ) -> pd.DataFrame:
37
+ """Validate the table DataFrame.
38
+
39
+ Args:
40
+ table_df (pd.DataFrame): The DataFrame to validate.
41
+ validators (Collection[Validator] | None): A collection of functions
42
+ used to validate the table. Default is None.
43
+
44
+ Returns:
45
+ pd.DataFrame: The validated DataFrame.
46
+ """
47
+ validators = validators or []
48
+
49
+ # Apply all provided validators
50
+ for validator in validators:
51
+ table_df = validator(table_df)
52
+
53
+ return table_df
54
+
55
+
56
+ ####################################################################################################
57
+ #
58
+ # Common table validators
59
+ #
60
+ ####################################################################################################
61
+ def validate_index_key(
62
+ dataframe: pd.DataFrame, index_key: str | None, overwrite: bool = False
63
+ ) -> pd.DataFrame:
64
+ """Correctly set the index of the DataFrame.
65
+
66
+ This function checks if the index_key is present in the DataFrame.
67
+ If not it tries to set sensible defaults.
68
+
69
+ In order:
70
+ - If index_key is None, nothing can be done.
71
+ - If index_key is already the index of the DataFrame, nothing is done.
72
+ - If index_key is in the columns, we set the index to that column.
73
+ - If current index is None, we set the index to the index_key.
74
+ - If current index is not None and overwrite is True,
75
+ we set the index to the index_key.
76
+
77
+ """
78
+ if index_key is None:
79
+ # Nothing to do
80
+ return dataframe
81
+
82
+ if dataframe.index.name == index_key:
83
+ # Index is already set to index_key correctly
84
+ return dataframe
85
+
86
+ if index_key in dataframe.columns:
87
+ dataframe = dataframe.set_index(index_key)
88
+ return dataframe
89
+
90
+ if dataframe.index.name is None:
91
+ dataframe.index.name = index_key
92
+ return dataframe
93
+
94
+ elif overwrite:
95
+ dataframe.index.name = index_key
96
+ return dataframe
97
+ else:
98
+ raise NgioTableValidationError(
99
+ f"Index key {index_key} not found in DataFrame. "
100
+ f"Current index is {dataframe.index.name}. If you want to overwrite the "
101
+ "index set overwrite=True."
102
+ )
103
+
104
+
105
+ def validate_index_dtype(dataframe: pd.DataFrame, index_type: str) -> pd.DataFrame:
106
+ """Check if the index of the DataFrame has the correct dtype."""
107
+ match index_type:
108
+ case "str":
109
+ if ptypes.is_integer_dtype(dataframe.index):
110
+ # Convert the int index to string is generally safe
111
+ dataframe = dataframe.set_index(dataframe.index.astype(str))
112
+
113
+ if not ptypes.is_string_dtype(dataframe.index):
114
+ raise NgioTableValidationError(
115
+ f"Table index must be of string type, got {dataframe.index.dtype}"
116
+ )
117
+
118
+ case "int":
119
+ if ptypes.is_string_dtype(dataframe.index):
120
+ # Try to convert the string index to int
121
+ try:
122
+ dataframe = dataframe.set_index(dataframe.index.astype(int))
123
+ except ValueError as e:
124
+ if "invalid literal for int() with base 10" in str(e):
125
+ raise NgioTableValidationError(
126
+ "Table index must be of integer type, got str."
127
+ f" We tried implicit conversion and failed: {e}"
128
+ ) from None
129
+ else:
130
+ raise e from e
131
+
132
+ if not ptypes.is_integer_dtype(dataframe.index):
133
+ raise NgioTableValidationError(
134
+ f"Table index must be of integer type, got {dataframe.index.dtype}"
135
+ )
136
+ case _:
137
+ raise ValueError(f"index_type {index_type} not recognized")
138
+
139
+ return dataframe
140
+
141
+
142
+ def validate_columns(
143
+ table_df: pd.DataFrame,
144
+ required_columns: list[str],
145
+ optional_columns: list[str] | None = None,
146
+ ) -> pd.DataFrame:
147
+ """Validate the columns headers of the table.
148
+
149
+ If a required column is missing, a TableValidationError is raised.
150
+ If a list of optional columns is provided, only required and optional columns are
151
+ allowed in the table.
152
+
153
+ Args:
154
+ table_df (pd.DataFrame): The DataFrame to validate.
155
+ required_columns (list[str]): A list of required columns.
156
+ optional_columns (list[str] | None): A list of optional columns.
157
+ Default is None.
158
+
159
+ Returns:
160
+ pd.DataFrame: The validated DataFrame.
161
+ """
162
+ table_header = table_df.columns
163
+ for column in required_columns:
164
+ if column not in table_header:
165
+ raise NgioTableValidationError(
166
+ f"Could not find required column: {column} in the table"
167
+ )
168
+
169
+ if optional_columns is None:
170
+ return table_df
171
+
172
+ possible_columns = [*required_columns, *optional_columns]
173
+ for column in table_header:
174
+ if column not in possible_columns:
175
+ raise NgioTableValidationError(
176
+ f"Could not find column: {column} in the list of possible columns. ",
177
+ f"Possible columns are: {possible_columns}",
178
+ )
179
+
180
+ return table_df
181
+
182
+
183
+ def validate_unique_index(table_df: pd.DataFrame) -> pd.DataFrame:
184
+ """Validate that the index of the table is unique."""
185
+ if table_df.index.is_unique:
186
+ return table_df
187
+
188
+ # Find the duplicates
189
+ duplicates = table_df.index[table_df.index.duplicated()].tolist()
190
+ raise NgioTableValidationError(
191
+ f"Index of the table contains duplicates values. Duplicate: {duplicates}"
192
+ )
@@ -0,0 +1,8 @@
1
+ """Ngio Tables backend implementations."""
2
+
3
+ from ngio.tables.backends._table_backends import (
4
+ ImplementedTableBackends,
5
+ TableBackendProtocol,
6
+ )
7
+
8
+ __all__ = ["ImplementedTableBackends", "TableBackendProtocol"]