ngio 0.5.0a1__py3-none-any.whl → 0.5.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngio/__init__.py +2 -2
- ngio/common/__init__.py +11 -6
- ngio/common/_masking_roi.py +12 -41
- ngio/common/_pyramid.py +218 -78
- ngio/common/_roi.py +257 -329
- ngio/experimental/iterators/_feature.py +3 -3
- ngio/experimental/iterators/_rois_utils.py +10 -11
- ngio/hcs/_plate.py +114 -123
- ngio/images/_abstract_image.py +417 -35
- ngio/images/_create_synt_container.py +36 -43
- ngio/images/_create_utils.py +423 -0
- ngio/images/_image.py +155 -177
- ngio/images/_label.py +144 -119
- ngio/images/_ome_zarr_container.py +361 -196
- ngio/io_pipes/_io_pipes.py +9 -9
- ngio/io_pipes/_io_pipes_masked.py +7 -7
- ngio/io_pipes/_io_pipes_roi.py +6 -6
- ngio/io_pipes/_io_pipes_types.py +3 -3
- ngio/io_pipes/_match_shape.py +5 -4
- ngio/io_pipes/_ops_slices_utils.py +8 -5
- ngio/ome_zarr_meta/__init__.py +15 -18
- ngio/ome_zarr_meta/_meta_handlers.py +334 -713
- ngio/ome_zarr_meta/ngio_specs/_axes.py +1 -0
- ngio/ome_zarr_meta/ngio_specs/_dataset.py +13 -22
- ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +54 -61
- ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +14 -68
- ngio/ome_zarr_meta/v04/__init__.py +1 -1
- ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +16 -61
- ngio/ome_zarr_meta/v05/__init__.py +1 -1
- ngio/ome_zarr_meta/v05/{_v05_spec_utils.py → _v05_spec.py} +18 -61
- ngio/tables/_tables_container.py +25 -20
- ngio/tables/backends/_anndata.py +57 -8
- ngio/tables/backends/_anndata_utils.py +1 -6
- ngio/tables/backends/_csv.py +3 -19
- ngio/tables/backends/_json.py +10 -13
- ngio/tables/backends/_parquet.py +3 -31
- ngio/tables/backends/_py_arrow_backends.py +222 -0
- ngio/tables/v1/_roi_table.py +44 -27
- ngio/utils/__init__.py +6 -12
- ngio/utils/_cache.py +48 -0
- ngio/utils/_zarr_utils.py +285 -245
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/METADATA +8 -4
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/RECORD +45 -45
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/WHEEL +1 -1
- ngio/images/_create.py +0 -283
- ngio/tables/backends/_non_zarr_backends.py +0 -196
- ngio/utils/_logger.py +0 -50
- {ngio-0.5.0a1.dist-info → ngio-0.5.0a3.dist-info}/licenses/LICENSE +0 -0
|
@@ -23,7 +23,7 @@ from ome_zarr_models.v05.image_label import ImageLabelAttrs as LabelAttrsV05
|
|
|
23
23
|
from ome_zarr_models.v05.multiscales import Dataset as DatasetV05
|
|
24
24
|
from ome_zarr_models.v05.multiscales import Multiscale as MultiscaleV05
|
|
25
25
|
from ome_zarr_models.v05.multiscales import ValidTransform as ValidTransformV05
|
|
26
|
-
from pydantic import BaseModel
|
|
26
|
+
from pydantic import BaseModel
|
|
27
27
|
|
|
28
28
|
from ngio.ome_zarr_meta.ngio_specs import (
|
|
29
29
|
AxesHandler,
|
|
@@ -56,36 +56,6 @@ class ImageLabelV05(BaseModel):
|
|
|
56
56
|
ome: LabelAttrsV05
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
def _is_v05_image_meta(metadata: dict) -> ImageV05WithOmero | ValidationError:
|
|
60
|
-
"""Check if the metadata is a valid OME-Zarr v05 metadata.
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
metadata (dict): The metadata to check.
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
bool: True if the metadata is a valid OME-Zarr v05 metadata, False otherwise.
|
|
67
|
-
"""
|
|
68
|
-
try:
|
|
69
|
-
return ImageV05WithOmero(**metadata)
|
|
70
|
-
except ValidationError as e:
|
|
71
|
-
return e
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def _is_v05_label_meta(metadata: dict) -> ImageLabelV05 | ValidationError:
|
|
75
|
-
"""Check if the metadata is a valid OME-Zarr v05 metadata.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
metadata (dict): The metadata to check.
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
bool: True if the metadata is a valid OME-Zarr v05 metadata, False otherwise.
|
|
82
|
-
"""
|
|
83
|
-
try:
|
|
84
|
-
return ImageLabelV05(**metadata)
|
|
85
|
-
except ValidationError as e:
|
|
86
|
-
return e
|
|
87
|
-
|
|
88
|
-
|
|
89
59
|
def _v05_omero_to_channels(v05_omero: OmeroV05 | None) -> ChannelsMeta | None:
|
|
90
60
|
if v05_omero is None:
|
|
91
61
|
return None
|
|
@@ -214,7 +184,7 @@ def v05_to_ngio_image_meta(
|
|
|
214
184
|
axes_setup: AxesSetup | None = None,
|
|
215
185
|
allow_non_canonical_axes: bool = False,
|
|
216
186
|
strict_canonical_order: bool = True,
|
|
217
|
-
) ->
|
|
187
|
+
) -> NgioImageMeta:
|
|
218
188
|
"""Convert a v05 image metadata to a ngio image metadata.
|
|
219
189
|
|
|
220
190
|
Args:
|
|
@@ -227,9 +197,7 @@ def v05_to_ngio_image_meta(
|
|
|
227
197
|
Returns:
|
|
228
198
|
NgioImageMeta: The ngio image metadata.
|
|
229
199
|
"""
|
|
230
|
-
v05_image =
|
|
231
|
-
if isinstance(v05_image, ValidationError):
|
|
232
|
-
return False, v05_image
|
|
200
|
+
v05_image = ImageV05WithOmero(**metadata)
|
|
233
201
|
v05_image = v05_image.ome
|
|
234
202
|
if len(v05_image.multiscales) > 1:
|
|
235
203
|
raise NotImplementedError(
|
|
@@ -250,7 +218,7 @@ def v05_to_ngio_image_meta(
|
|
|
250
218
|
name = v05_multiscale.name
|
|
251
219
|
if name is not None and not isinstance(name, str):
|
|
252
220
|
name = str(name)
|
|
253
|
-
return
|
|
221
|
+
return NgioImageMeta(
|
|
254
222
|
version="0.5",
|
|
255
223
|
name=name,
|
|
256
224
|
datasets=datasets,
|
|
@@ -263,7 +231,7 @@ def v05_to_ngio_label_meta(
|
|
|
263
231
|
axes_setup: AxesSetup | None = None,
|
|
264
232
|
allow_non_canonical_axes: bool = False,
|
|
265
233
|
strict_canonical_order: bool = True,
|
|
266
|
-
) ->
|
|
234
|
+
) -> NgioLabelMeta:
|
|
267
235
|
"""Convert a v05 image metadata to a ngio image metadata.
|
|
268
236
|
|
|
269
237
|
Args:
|
|
@@ -274,11 +242,9 @@ def v05_to_ngio_label_meta(
|
|
|
274
242
|
strict_canonical_order (bool, optional): Strict canonical order.
|
|
275
243
|
|
|
276
244
|
Returns:
|
|
277
|
-
|
|
245
|
+
NgioLabelMeta: The ngio label metadata.
|
|
278
246
|
"""
|
|
279
|
-
v05_label =
|
|
280
|
-
if isinstance(v05_label, ValidationError):
|
|
281
|
-
return False, v05_label
|
|
247
|
+
v05_label = ImageLabelV05(**metadata)
|
|
282
248
|
v05_label = v05_label.ome
|
|
283
249
|
|
|
284
250
|
if len(v05_label.multiscales) > 1:
|
|
@@ -316,7 +282,7 @@ def v05_to_ngio_label_meta(
|
|
|
316
282
|
if name is not None and not isinstance(name, str):
|
|
317
283
|
name = str(name)
|
|
318
284
|
|
|
319
|
-
return
|
|
285
|
+
return NgioLabelMeta(
|
|
320
286
|
version="0.5",
|
|
321
287
|
name=name,
|
|
322
288
|
datasets=datasets,
|
|
@@ -452,42 +418,33 @@ class HCSV05(BaseModel):
|
|
|
452
418
|
|
|
453
419
|
def v05_to_ngio_well_meta(
|
|
454
420
|
metadata: dict,
|
|
455
|
-
) ->
|
|
421
|
+
) -> NgioWellMeta:
|
|
456
422
|
"""Convert a v05 well metadata to a ngio well metadata.
|
|
457
423
|
|
|
458
424
|
Args:
|
|
459
425
|
metadata (dict): The v05 well metadata.
|
|
460
426
|
|
|
461
427
|
Returns:
|
|
462
|
-
|
|
463
|
-
ngio_well_meta (NgioWellMeta): The ngio well metadata.
|
|
428
|
+
NgioWellMeta: The ngio well metadata.
|
|
464
429
|
"""
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
return False, e
|
|
469
|
-
|
|
470
|
-
return True, NgioWellMeta(**v05_well.ome.model_dump())
|
|
430
|
+
v05_well = WellV05(**metadata).ome.well.model_dump()
|
|
431
|
+
images = v05_well.get("images", [])
|
|
432
|
+
return NgioWellMeta(images=images, version="0.5")
|
|
471
433
|
|
|
472
434
|
|
|
473
435
|
def v05_to_ngio_plate_meta(
|
|
474
436
|
metadata: dict,
|
|
475
|
-
) ->
|
|
437
|
+
) -> NgioPlateMeta:
|
|
476
438
|
"""Convert a v05 plate metadata to a ngio plate metadata.
|
|
477
439
|
|
|
478
440
|
Args:
|
|
479
441
|
metadata (dict): The v05 plate metadata.
|
|
480
442
|
|
|
481
443
|
Returns:
|
|
482
|
-
|
|
483
|
-
ngio_plate_meta (NgioPlateMeta): The ngio plate metadata.
|
|
444
|
+
NgioPlateMeta: The ngio plate metadata.
|
|
484
445
|
"""
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
except ValidationError as e:
|
|
488
|
-
return False, e
|
|
489
|
-
|
|
490
|
-
return True, NgioPlateMeta(**v05_plate.ome.model_dump())
|
|
446
|
+
v05_plate = HCSV05(**metadata).ome.plate.model_dump()
|
|
447
|
+
return NgioPlateMeta(plate=v05_plate, version="0.5") # type: ignore
|
|
491
448
|
|
|
492
449
|
|
|
493
450
|
def ngio_to_v05_well_meta(metadata: NgioWellMeta) -> dict:
|
|
@@ -499,7 +456,7 @@ def ngio_to_v05_well_meta(metadata: NgioWellMeta) -> dict:
|
|
|
499
456
|
Returns:
|
|
500
457
|
dict: The v05 well metadata.
|
|
501
458
|
"""
|
|
502
|
-
v05_well = WellAttrsV05(
|
|
459
|
+
v05_well = WellAttrsV05(well=metadata.model_dump()) # type: ignore
|
|
503
460
|
v05_well = WellV05(ome=v05_well)
|
|
504
461
|
return v05_well.model_dump(exclude_none=True, by_alias=True)
|
|
505
462
|
|
ngio/tables/_tables_container.py
CHANGED
|
@@ -258,7 +258,7 @@ class TablesContainer:
|
|
|
258
258
|
|
|
259
259
|
def _get_table_group_handler(self, name: str) -> ZarrGroupHandler:
|
|
260
260
|
"""Get the group handler for a table."""
|
|
261
|
-
handler = self._group_handler.
|
|
261
|
+
handler = self._group_handler.get_handler(path=name)
|
|
262
262
|
return handler
|
|
263
263
|
|
|
264
264
|
def list(self, filter_types: TypedTable | str | None = None) -> list[str]:
|
|
@@ -326,9 +326,7 @@ class TablesContainer:
|
|
|
326
326
|
"Use overwrite=True to replace it."
|
|
327
327
|
)
|
|
328
328
|
|
|
329
|
-
table_handler = self._group_handler.
|
|
330
|
-
path=name, overwrite=overwrite
|
|
331
|
-
)
|
|
329
|
+
table_handler = self._group_handler.get_handler(path=name, overwrite=overwrite)
|
|
332
330
|
|
|
333
331
|
if backend is None:
|
|
334
332
|
backend = table.backend_name
|
|
@@ -359,13 +357,10 @@ ImplementedTables().add_implementation(ConditionTableV1)
|
|
|
359
357
|
def open_tables_container(
|
|
360
358
|
store: StoreOrGroup,
|
|
361
359
|
cache: bool = False,
|
|
362
|
-
mode: AccessModeLiteral = "
|
|
363
|
-
parallel_safe: bool = False,
|
|
360
|
+
mode: AccessModeLiteral = "r+",
|
|
364
361
|
) -> TablesContainer:
|
|
365
362
|
"""Open a table handler from a Zarr store."""
|
|
366
|
-
handler = ZarrGroupHandler(
|
|
367
|
-
store=store, cache=cache, mode=mode, parallel_safe=parallel_safe
|
|
368
|
-
)
|
|
363
|
+
handler = ZarrGroupHandler(store=store, cache=cache, mode=mode)
|
|
369
364
|
return TablesContainer(handler)
|
|
370
365
|
|
|
371
366
|
|
|
@@ -373,12 +368,13 @@ def open_table(
|
|
|
373
368
|
store: StoreOrGroup,
|
|
374
369
|
backend: TableBackend | None = None,
|
|
375
370
|
cache: bool = False,
|
|
376
|
-
mode: AccessModeLiteral = "
|
|
377
|
-
parallel_safe: bool = False,
|
|
371
|
+
mode: AccessModeLiteral = "r+",
|
|
378
372
|
) -> Table:
|
|
379
373
|
"""Open a table from a Zarr store."""
|
|
380
374
|
handler = ZarrGroupHandler(
|
|
381
|
-
store=store,
|
|
375
|
+
store=store,
|
|
376
|
+
cache=cache,
|
|
377
|
+
mode=mode,
|
|
382
378
|
)
|
|
383
379
|
meta = _get_meta(handler)
|
|
384
380
|
return ImplementedTables().get_table(
|
|
@@ -391,12 +387,13 @@ def open_table_as(
|
|
|
391
387
|
table_cls: type[TableType],
|
|
392
388
|
backend: TableBackend | None = None,
|
|
393
389
|
cache: bool = False,
|
|
394
|
-
mode: AccessModeLiteral = "
|
|
395
|
-
parallel_safe: bool = False,
|
|
390
|
+
mode: AccessModeLiteral = "r+",
|
|
396
391
|
) -> TableType:
|
|
397
392
|
"""Open a table from a Zarr store as a specific type."""
|
|
398
393
|
handler = ZarrGroupHandler(
|
|
399
|
-
store=store,
|
|
394
|
+
store=store,
|
|
395
|
+
cache=cache,
|
|
396
|
+
mode=mode,
|
|
400
397
|
)
|
|
401
398
|
return table_cls.from_handler(
|
|
402
399
|
handler=handler,
|
|
@@ -410,12 +407,20 @@ def write_table(
|
|
|
410
407
|
backend: TableBackend = DefaultTableBackend,
|
|
411
408
|
cache: bool = False,
|
|
412
409
|
mode: AccessModeLiteral = "a",
|
|
413
|
-
parallel_safe: bool = False,
|
|
414
410
|
) -> None:
|
|
415
|
-
"""Write a table to a Zarr store.
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
411
|
+
"""Write a table to a Zarr store.
|
|
412
|
+
|
|
413
|
+
A table will be created at the given store location.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
store (StoreOrGroup): The Zarr store or group to write the table to.
|
|
417
|
+
table (Table): The table to write.
|
|
418
|
+
backend (TableBackend): The backend to use for writing the table.
|
|
419
|
+
cache (bool): Whether to use caching for the Zarr group handler.
|
|
420
|
+
mode (AccessModeLiteral): The access mode to use for the Zarr group handler.
|
|
421
|
+
|
|
422
|
+
"""
|
|
423
|
+
handler = ZarrGroupHandler(store=store, cache=cache, mode=mode)
|
|
419
424
|
table.set_backend(
|
|
420
425
|
handler=handler,
|
|
421
426
|
backend=backend,
|
ngio/tables/backends/_anndata.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import zarr
|
|
1
2
|
from anndata import AnnData
|
|
2
3
|
from anndata._settings import settings
|
|
3
4
|
from pandas import DataFrame
|
|
4
5
|
from polars import DataFrame as PolarsDataFrame
|
|
5
6
|
from polars import LazyFrame
|
|
7
|
+
from zarr.storage import FsspecStore, LocalStore, MemoryStore
|
|
6
8
|
|
|
7
9
|
from ngio.tables.backends._abstract_backend import AbstractTableBackend
|
|
8
10
|
from ngio.tables.backends._anndata_utils import (
|
|
@@ -13,7 +15,7 @@ from ngio.tables.backends._utils import (
|
|
|
13
15
|
convert_polars_to_anndata,
|
|
14
16
|
normalize_anndata,
|
|
15
17
|
)
|
|
16
|
-
from ngio.utils import NgioValueError
|
|
18
|
+
from ngio.utils import NgioValueError, copy_group
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
class AnnDataBackend(AbstractTableBackend):
|
|
@@ -50,18 +52,65 @@ class AnnDataBackend(AbstractTableBackend):
|
|
|
50
52
|
"""Load the table as an AnnData object."""
|
|
51
53
|
return self.load_as_anndata()
|
|
52
54
|
|
|
55
|
+
def _write_to_local_store(
|
|
56
|
+
self, store: LocalStore, path: str, table: AnnData
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Write the AnnData table to a LocalStore."""
|
|
59
|
+
store_path = f"{store.root}/{path}"
|
|
60
|
+
table.write_zarr(store_path)
|
|
61
|
+
|
|
62
|
+
def _write_to_fsspec_store(
|
|
63
|
+
self, store: FsspecStore, path: str, table: AnnData
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Write the AnnData table to a FsspecStore."""
|
|
66
|
+
full_url = f"{store.path}/{path}"
|
|
67
|
+
fs = store.fs
|
|
68
|
+
mapper = fs.get_mapper(full_url)
|
|
69
|
+
table.write_zarr(mapper)
|
|
70
|
+
|
|
71
|
+
def _write_to_memory_store(
|
|
72
|
+
self, store: MemoryStore, path: str, table: AnnData
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Write the AnnData table to a MemoryStore."""
|
|
75
|
+
store = MemoryStore()
|
|
76
|
+
table.write_zarr(store)
|
|
77
|
+
anndata_group = zarr.open_group(store, mode="r")
|
|
78
|
+
copy_group(
|
|
79
|
+
anndata_group,
|
|
80
|
+
self._group_handler._group,
|
|
81
|
+
)
|
|
82
|
+
|
|
53
83
|
def write_from_anndata(self, table: AnnData) -> None:
|
|
54
84
|
"""Serialize the table from an AnnData object."""
|
|
55
|
-
|
|
56
|
-
|
|
85
|
+
# Make sure to use the correct zarr format
|
|
86
|
+
settings.zarr_write_format = self._group_handler.zarr_format
|
|
87
|
+
store = self._group_handler.store
|
|
88
|
+
path = self._group_handler.group.path
|
|
89
|
+
if isinstance(store, LocalStore):
|
|
90
|
+
self._write_to_local_store(
|
|
91
|
+
store,
|
|
92
|
+
path,
|
|
93
|
+
table,
|
|
94
|
+
)
|
|
95
|
+
elif isinstance(store, FsspecStore):
|
|
96
|
+
self._write_to_fsspec_store(
|
|
97
|
+
store,
|
|
98
|
+
path,
|
|
99
|
+
table,
|
|
100
|
+
)
|
|
101
|
+
elif isinstance(store, MemoryStore):
|
|
102
|
+
self._write_to_memory_store(
|
|
103
|
+
store,
|
|
104
|
+
path,
|
|
105
|
+
table,
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
57
108
|
raise NgioValueError(
|
|
58
|
-
f"Ngio does not support writing
|
|
59
|
-
f"store of type {type(
|
|
109
|
+
f"Ngio does not support writing an AnnData table to a "
|
|
110
|
+
f"store of type {type(store)}. "
|
|
60
111
|
"Please make sure to use a compatible "
|
|
61
|
-
"store like a
|
|
112
|
+
"store like a LocalStore, or FsspecStore."
|
|
62
113
|
)
|
|
63
|
-
settings.zarr_write_format = self._group_handler.zarr_format
|
|
64
|
-
table.write_zarr(full_url)
|
|
65
114
|
|
|
66
115
|
def write_from_pandas(self, table: DataFrame) -> None:
|
|
67
116
|
"""Serialize the table from a pandas DataFrame."""
|
|
@@ -9,7 +9,6 @@ from anndata._io.utils import _read_legacy_raw
|
|
|
9
9
|
from anndata._io.zarr import read_dataframe
|
|
10
10
|
from anndata.compat import _clean_uns
|
|
11
11
|
from anndata.experimental import read_dispatched
|
|
12
|
-
from zarr.storage import LocalStore
|
|
13
12
|
|
|
14
13
|
from ngio.utils import (
|
|
15
14
|
NgioValueError,
|
|
@@ -35,10 +34,6 @@ def custom_anndata_read_zarr(
|
|
|
35
34
|
elem_to_read (Sequence[str] | None): The elements to read from the store.
|
|
36
35
|
"""
|
|
37
36
|
group = open_group_wrapper(store=store, mode="r")
|
|
38
|
-
|
|
39
|
-
if not isinstance(group.store, LocalStore):
|
|
40
|
-
elem_to_read = ["X", "obs", "var"]
|
|
41
|
-
|
|
42
37
|
if elem_to_read is None:
|
|
43
38
|
elem_to_read = [
|
|
44
39
|
"X",
|
|
@@ -89,7 +84,7 @@ def custom_anndata_read_zarr(
|
|
|
89
84
|
_clean_uns(adata)
|
|
90
85
|
|
|
91
86
|
if isinstance(adata, dict):
|
|
92
|
-
adata = AnnData(**adata)
|
|
87
|
+
adata = AnnData(**adata) # type: ignore
|
|
93
88
|
if not isinstance(adata, AnnData):
|
|
94
89
|
raise NgioValueError(f"Expected an AnnData object, but got {type(adata)}")
|
|
95
90
|
return adata
|
ngio/tables/backends/_csv.py
CHANGED
|
@@ -1,20 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
import polars as pl
|
|
1
|
+
from ngio.tables.backends._py_arrow_backends import PyArrowBackend
|
|
3
2
|
|
|
4
|
-
from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
|
|
5
3
|
|
|
6
|
-
|
|
7
|
-
def write_lf_to_csv(path: str, table: pl.DataFrame) -> None:
|
|
8
|
-
"""Write a polars DataFrame to a CSV file."""
|
|
9
|
-
table.write_csv(path)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def write_df_to_csv(path: str, table: pd.DataFrame) -> None:
|
|
13
|
-
"""Write a pandas DataFrame to a CSV file."""
|
|
14
|
-
table.to_csv(path, index=False)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class CsvTableBackend(NonZarrBaseBackend):
|
|
4
|
+
class CsvTableBackend(PyArrowBackend):
|
|
18
5
|
"""A class to load and write small tables in CSV format."""
|
|
19
6
|
|
|
20
7
|
def __init__(
|
|
@@ -22,11 +9,8 @@ class CsvTableBackend(NonZarrBaseBackend):
|
|
|
22
9
|
):
|
|
23
10
|
"""Initialize the CsvTableBackend."""
|
|
24
11
|
super().__init__(
|
|
25
|
-
lf_reader=pl.scan_csv,
|
|
26
|
-
df_reader=pd.read_csv,
|
|
27
|
-
lf_writer=write_lf_to_csv,
|
|
28
|
-
df_writer=write_df_to_csv,
|
|
29
12
|
table_name="table.csv",
|
|
13
|
+
table_format="csv",
|
|
30
14
|
)
|
|
31
15
|
|
|
32
16
|
@staticmethod
|
ngio/tables/backends/_json.py
CHANGED
|
@@ -8,7 +8,7 @@ from ngio.tables.backends._utils import (
|
|
|
8
8
|
normalize_pandas_df,
|
|
9
9
|
normalize_polars_lf,
|
|
10
10
|
)
|
|
11
|
-
from ngio.utils import
|
|
11
|
+
from ngio.utils import NgioError
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class JsonTableBackend(AbstractTableBackend):
|
|
@@ -37,22 +37,19 @@ class JsonTableBackend(AbstractTableBackend):
|
|
|
37
37
|
def _get_table_group(self):
|
|
38
38
|
"""Get the table group, creating it if it doesn't exist."""
|
|
39
39
|
try:
|
|
40
|
-
table_group = self._group_handler.get_group(path="table")
|
|
41
|
-
except
|
|
42
|
-
|
|
40
|
+
table_group = self._group_handler.get_group(path="table", create_mode=True)
|
|
41
|
+
except NgioError as e:
|
|
42
|
+
raise NgioError(
|
|
43
|
+
"Could not get or create a 'table' group in the store "
|
|
44
|
+
f"{self._group_handler.store} path "
|
|
45
|
+
f"{self._group_handler.group.path}/table."
|
|
46
|
+
) from e
|
|
43
47
|
return table_group
|
|
44
48
|
|
|
45
|
-
def _load_as_pandas_df(self) -> DataFrame:
|
|
46
|
-
"""Load the table as a pandas DataFrame."""
|
|
47
|
-
table_group = self._get_table_group()
|
|
48
|
-
table_dict = dict(table_group.attrs)
|
|
49
|
-
|
|
50
|
-
data_frame = pd.DataFrame.from_dict(table_dict)
|
|
51
|
-
return data_frame
|
|
52
|
-
|
|
53
49
|
def load_as_pandas_df(self) -> DataFrame:
|
|
54
50
|
"""Load the table as a pandas DataFrame."""
|
|
55
|
-
|
|
51
|
+
table_dict = self._get_table_group().attrs.asdict()
|
|
52
|
+
data_frame = pd.DataFrame.from_dict(table_dict)
|
|
56
53
|
data_frame = normalize_pandas_df(
|
|
57
54
|
data_frame,
|
|
58
55
|
index_key=self.index_key,
|
ngio/tables/backends/_parquet.py
CHANGED
|
@@ -1,32 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
import polars as pl
|
|
1
|
+
from ngio.tables.backends._py_arrow_backends import PyArrowBackend
|
|
3
2
|
|
|
4
|
-
from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
|
|
5
3
|
|
|
6
|
-
|
|
7
|
-
def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
|
|
8
|
-
"""Write a polars DataFrame to a Parquet file."""
|
|
9
|
-
# make categorical into string (for pandas compatibility)
|
|
10
|
-
schema = table.collect_schema()
|
|
11
|
-
|
|
12
|
-
categorical_columns = []
|
|
13
|
-
for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
|
|
14
|
-
if dtype == pl.Categorical:
|
|
15
|
-
categorical_columns.append(name)
|
|
16
|
-
|
|
17
|
-
for col in categorical_columns:
|
|
18
|
-
table = table.with_columns(pl.col(col).cast(pl.Utf8))
|
|
19
|
-
|
|
20
|
-
# write to parquet
|
|
21
|
-
table.write_parquet(path)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
|
|
25
|
-
"""Write a pandas DataFrame to a Parquet file."""
|
|
26
|
-
table.to_parquet(path, index=False)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class ParquetTableBackend(NonZarrBaseBackend):
|
|
4
|
+
class ParquetTableBackend(PyArrowBackend):
|
|
30
5
|
"""A class to load and write small tables in Parquet format."""
|
|
31
6
|
|
|
32
7
|
def __init__(
|
|
@@ -34,11 +9,8 @@ class ParquetTableBackend(NonZarrBaseBackend):
|
|
|
34
9
|
):
|
|
35
10
|
"""Initialize the ParquetTableBackend."""
|
|
36
11
|
super().__init__(
|
|
37
|
-
lf_reader=pl.scan_parquet,
|
|
38
|
-
df_reader=pd.read_parquet,
|
|
39
|
-
lf_writer=write_lf_to_parquet,
|
|
40
|
-
df_writer=write_df_to_parquet,
|
|
41
12
|
table_name="table.parquet",
|
|
13
|
+
table_format="parquet",
|
|
42
14
|
)
|
|
43
15
|
|
|
44
16
|
@staticmethod
|