ngio 0.5.0a2__py3-none-any.whl → 0.5.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. ngio/__init__.py +2 -2
  2. ngio/common/__init__.py +11 -6
  3. ngio/common/_masking_roi.py +12 -41
  4. ngio/common/_pyramid.py +206 -76
  5. ngio/common/_roi.py +257 -329
  6. ngio/experimental/iterators/_feature.py +3 -3
  7. ngio/experimental/iterators/_rois_utils.py +10 -11
  8. ngio/hcs/_plate.py +50 -43
  9. ngio/images/_abstract_image.py +417 -35
  10. ngio/images/_create_synt_container.py +35 -42
  11. ngio/images/_create_utils.py +423 -0
  12. ngio/images/_image.py +154 -176
  13. ngio/images/_label.py +144 -119
  14. ngio/images/_ome_zarr_container.py +361 -196
  15. ngio/io_pipes/_io_pipes.py +9 -9
  16. ngio/io_pipes/_io_pipes_masked.py +7 -7
  17. ngio/io_pipes/_io_pipes_roi.py +6 -6
  18. ngio/io_pipes/_io_pipes_types.py +3 -3
  19. ngio/io_pipes/_match_shape.py +5 -4
  20. ngio/io_pipes/_ops_slices_utils.py +8 -5
  21. ngio/ome_zarr_meta/__init__.py +15 -18
  22. ngio/ome_zarr_meta/_meta_handlers.py +334 -713
  23. ngio/ome_zarr_meta/ngio_specs/_axes.py +1 -0
  24. ngio/ome_zarr_meta/ngio_specs/_dataset.py +13 -22
  25. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +54 -61
  26. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +14 -68
  27. ngio/ome_zarr_meta/v04/__init__.py +1 -1
  28. ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +16 -61
  29. ngio/ome_zarr_meta/v05/__init__.py +1 -1
  30. ngio/ome_zarr_meta/v05/{_v05_spec_utils.py → _v05_spec.py} +18 -61
  31. ngio/tables/_tables_container.py +2 -4
  32. ngio/tables/backends/_anndata.py +57 -8
  33. ngio/tables/backends/_anndata_utils.py +1 -6
  34. ngio/tables/backends/_csv.py +3 -19
  35. ngio/tables/backends/_json.py +10 -13
  36. ngio/tables/backends/_parquet.py +3 -31
  37. ngio/tables/backends/_py_arrow_backends.py +222 -0
  38. ngio/tables/v1/_roi_table.py +41 -24
  39. ngio/utils/__init__.py +4 -12
  40. ngio/utils/_zarr_utils.py +160 -53
  41. {ngio-0.5.0a2.dist-info → ngio-0.5.0a3.dist-info}/METADATA +6 -2
  42. {ngio-0.5.0a2.dist-info → ngio-0.5.0a3.dist-info}/RECORD +44 -45
  43. {ngio-0.5.0a2.dist-info → ngio-0.5.0a3.dist-info}/WHEEL +1 -1
  44. ngio/images/_create.py +0 -287
  45. ngio/tables/backends/_non_zarr_backends.py +0 -196
  46. ngio/utils/_logger.py +0 -50
  47. {ngio-0.5.0a2.dist-info → ngio-0.5.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -23,7 +23,7 @@ from ome_zarr_models.v05.image_label import ImageLabelAttrs as LabelAttrsV05
23
23
  from ome_zarr_models.v05.multiscales import Dataset as DatasetV05
24
24
  from ome_zarr_models.v05.multiscales import Multiscale as MultiscaleV05
25
25
  from ome_zarr_models.v05.multiscales import ValidTransform as ValidTransformV05
26
- from pydantic import BaseModel, ValidationError
26
+ from pydantic import BaseModel
27
27
 
28
28
  from ngio.ome_zarr_meta.ngio_specs import (
29
29
  AxesHandler,
@@ -56,36 +56,6 @@ class ImageLabelV05(BaseModel):
56
56
  ome: LabelAttrsV05
57
57
 
58
58
 
59
- def _is_v05_image_meta(metadata: dict) -> ImageV05WithOmero | ValidationError:
60
- """Check if the metadata is a valid OME-Zarr v05 metadata.
61
-
62
- Args:
63
- metadata (dict): The metadata to check.
64
-
65
- Returns:
66
- bool: True if the metadata is a valid OME-Zarr v05 metadata, False otherwise.
67
- """
68
- try:
69
- return ImageV05WithOmero(**metadata)
70
- except ValidationError as e:
71
- return e
72
-
73
-
74
- def _is_v05_label_meta(metadata: dict) -> ImageLabelV05 | ValidationError:
75
- """Check if the metadata is a valid OME-Zarr v05 metadata.
76
-
77
- Args:
78
- metadata (dict): The metadata to check.
79
-
80
- Returns:
81
- bool: True if the metadata is a valid OME-Zarr v05 metadata, False otherwise.
82
- """
83
- try:
84
- return ImageLabelV05(**metadata)
85
- except ValidationError as e:
86
- return e
87
-
88
-
89
59
  def _v05_omero_to_channels(v05_omero: OmeroV05 | None) -> ChannelsMeta | None:
90
60
  if v05_omero is None:
91
61
  return None
@@ -214,7 +184,7 @@ def v05_to_ngio_image_meta(
214
184
  axes_setup: AxesSetup | None = None,
215
185
  allow_non_canonical_axes: bool = False,
216
186
  strict_canonical_order: bool = True,
217
- ) -> tuple[bool, NgioImageMeta | ValidationError]:
187
+ ) -> NgioImageMeta:
218
188
  """Convert a v05 image metadata to a ngio image metadata.
219
189
 
220
190
  Args:
@@ -227,9 +197,7 @@ def v05_to_ngio_image_meta(
227
197
  Returns:
228
198
  NgioImageMeta: The ngio image metadata.
229
199
  """
230
- v05_image = _is_v05_image_meta(metadata)
231
- if isinstance(v05_image, ValidationError):
232
- return False, v05_image
200
+ v05_image = ImageV05WithOmero(**metadata)
233
201
  v05_image = v05_image.ome
234
202
  if len(v05_image.multiscales) > 1:
235
203
  raise NotImplementedError(
@@ -250,7 +218,7 @@ def v05_to_ngio_image_meta(
250
218
  name = v05_multiscale.name
251
219
  if name is not None and not isinstance(name, str):
252
220
  name = str(name)
253
- return True, NgioImageMeta(
221
+ return NgioImageMeta(
254
222
  version="0.5",
255
223
  name=name,
256
224
  datasets=datasets,
@@ -263,7 +231,7 @@ def v05_to_ngio_label_meta(
263
231
  axes_setup: AxesSetup | None = None,
264
232
  allow_non_canonical_axes: bool = False,
265
233
  strict_canonical_order: bool = True,
266
- ) -> tuple[bool, NgioLabelMeta | ValidationError]:
234
+ ) -> NgioLabelMeta:
267
235
  """Convert a v05 image metadata to a ngio image metadata.
268
236
 
269
237
  Args:
@@ -274,11 +242,9 @@ def v05_to_ngio_label_meta(
274
242
  strict_canonical_order (bool, optional): Strict canonical order.
275
243
 
276
244
  Returns:
277
- NgioImageMeta: The ngio image metadata.
245
+ NgioLabelMeta: The ngio label metadata.
278
246
  """
279
- v05_label = _is_v05_label_meta(metadata)
280
- if isinstance(v05_label, ValidationError):
281
- return False, v05_label
247
+ v05_label = ImageLabelV05(**metadata)
282
248
  v05_label = v05_label.ome
283
249
 
284
250
  if len(v05_label.multiscales) > 1:
@@ -316,7 +282,7 @@ def v05_to_ngio_label_meta(
316
282
  if name is not None and not isinstance(name, str):
317
283
  name = str(name)
318
284
 
319
- return True, NgioLabelMeta(
285
+ return NgioLabelMeta(
320
286
  version="0.5",
321
287
  name=name,
322
288
  datasets=datasets,
@@ -452,42 +418,33 @@ class HCSV05(BaseModel):
452
418
 
453
419
  def v05_to_ngio_well_meta(
454
420
  metadata: dict,
455
- ) -> tuple[bool, NgioWellMeta | ValidationError]:
421
+ ) -> NgioWellMeta:
456
422
  """Convert a v05 well metadata to a ngio well metadata.
457
423
 
458
424
  Args:
459
425
  metadata (dict): The v05 well metadata.
460
426
 
461
427
  Returns:
462
- result (bool): True if the conversion was successful, False otherwise.
463
- ngio_well_meta (NgioWellMeta): The ngio well metadata.
428
+ NgioWellMeta: The ngio well metadata.
464
429
  """
465
- try:
466
- v05_well = WellV05(**metadata)
467
- except ValidationError as e:
468
- return False, e
469
-
470
- return True, NgioWellMeta(**v05_well.ome.model_dump())
430
+ v05_well = WellV05(**metadata).ome.well.model_dump()
431
+ images = v05_well.get("images", [])
432
+ return NgioWellMeta(images=images, version="0.5")
471
433
 
472
434
 
473
435
  def v05_to_ngio_plate_meta(
474
436
  metadata: dict,
475
- ) -> tuple[bool, NgioPlateMeta | ValidationError]:
437
+ ) -> NgioPlateMeta:
476
438
  """Convert a v05 plate metadata to a ngio plate metadata.
477
439
 
478
440
  Args:
479
441
  metadata (dict): The v05 plate metadata.
480
442
 
481
443
  Returns:
482
- result (bool): True if the conversion was successful, False otherwise.
483
- ngio_plate_meta (NgioPlateMeta): The ngio plate metadata.
444
+ NgioPlateMeta: The ngio plate metadata.
484
445
  """
485
- try:
486
- v05_plate = HCSV05(**metadata)
487
- except ValidationError as e:
488
- return False, e
489
-
490
- return True, NgioPlateMeta(**v05_plate.ome.model_dump())
446
+ v05_plate = HCSV05(**metadata).ome.plate.model_dump()
447
+ return NgioPlateMeta(plate=v05_plate, version="0.5") # type: ignore
491
448
 
492
449
 
493
450
  def ngio_to_v05_well_meta(metadata: NgioWellMeta) -> dict:
@@ -499,7 +456,7 @@ def ngio_to_v05_well_meta(metadata: NgioWellMeta) -> dict:
499
456
  Returns:
500
457
  dict: The v05 well metadata.
501
458
  """
502
- v05_well = WellAttrsV05(**metadata.model_dump())
459
+ v05_well = WellAttrsV05(well=metadata.model_dump()) # type: ignore
503
460
  v05_well = WellV05(ome=v05_well)
504
461
  return v05_well.model_dump(exclude_none=True, by_alias=True)
505
462
 
@@ -258,7 +258,7 @@ class TablesContainer:
258
258
 
259
259
  def _get_table_group_handler(self, name: str) -> ZarrGroupHandler:
260
260
  """Get the group handler for a table."""
261
- handler = self._group_handler.derive_handler(path=name)
261
+ handler = self._group_handler.get_handler(path=name)
262
262
  return handler
263
263
 
264
264
  def list(self, filter_types: TypedTable | str | None = None) -> list[str]:
@@ -326,9 +326,7 @@ class TablesContainer:
326
326
  "Use overwrite=True to replace it."
327
327
  )
328
328
 
329
- table_handler = self._group_handler.derive_handler(
330
- path=name, overwrite=overwrite
331
- )
329
+ table_handler = self._group_handler.get_handler(path=name, overwrite=overwrite)
332
330
 
333
331
  if backend is None:
334
332
  backend = table.backend_name
@@ -1,8 +1,10 @@
1
+ import zarr
1
2
  from anndata import AnnData
2
3
  from anndata._settings import settings
3
4
  from pandas import DataFrame
4
5
  from polars import DataFrame as PolarsDataFrame
5
6
  from polars import LazyFrame
7
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore
6
8
 
7
9
  from ngio.tables.backends._abstract_backend import AbstractTableBackend
8
10
  from ngio.tables.backends._anndata_utils import (
@@ -13,7 +15,7 @@ from ngio.tables.backends._utils import (
13
15
  convert_polars_to_anndata,
14
16
  normalize_anndata,
15
17
  )
16
- from ngio.utils import NgioValueError
18
+ from ngio.utils import NgioValueError, copy_group
17
19
 
18
20
 
19
21
  class AnnDataBackend(AbstractTableBackend):
@@ -50,18 +52,65 @@ class AnnDataBackend(AbstractTableBackend):
50
52
  """Load the table as an AnnData object."""
51
53
  return self.load_as_anndata()
52
54
 
55
+ def _write_to_local_store(
56
+ self, store: LocalStore, path: str, table: AnnData
57
+ ) -> None:
58
+ """Write the AnnData table to a LocalStore."""
59
+ store_path = f"{store.root}/{path}"
60
+ table.write_zarr(store_path)
61
+
62
+ def _write_to_fsspec_store(
63
+ self, store: FsspecStore, path: str, table: AnnData
64
+ ) -> None:
65
+ """Write the AnnData table to a FsspecStore."""
66
+ full_url = f"{store.path}/{path}"
67
+ fs = store.fs
68
+ mapper = fs.get_mapper(full_url)
69
+ table.write_zarr(mapper)
70
+
71
+ def _write_to_memory_store(
72
+ self, store: MemoryStore, path: str, table: AnnData
73
+ ) -> None:
74
+ """Write the AnnData table to a MemoryStore."""
75
+ store = MemoryStore()
76
+ table.write_zarr(store)
77
+ anndata_group = zarr.open_group(store, mode="r")
78
+ copy_group(
79
+ anndata_group,
80
+ self._group_handler._group,
81
+ )
82
+
53
83
  def write_from_anndata(self, table: AnnData) -> None:
54
84
  """Serialize the table from an AnnData object."""
55
- full_url = self._group_handler.full_url
56
- if full_url is None:
85
+ # Make sure to use the correct zarr format
86
+ settings.zarr_write_format = self._group_handler.zarr_format
87
+ store = self._group_handler.store
88
+ path = self._group_handler.group.path
89
+ if isinstance(store, LocalStore):
90
+ self._write_to_local_store(
91
+ store,
92
+ path,
93
+ table,
94
+ )
95
+ elif isinstance(store, FsspecStore):
96
+ self._write_to_fsspec_store(
97
+ store,
98
+ path,
99
+ table,
100
+ )
101
+ elif isinstance(store, MemoryStore):
102
+ self._write_to_memory_store(
103
+ store,
104
+ path,
105
+ table,
106
+ )
107
+ else:
57
108
  raise NgioValueError(
58
- f"Ngio does not support writing file from a "
59
- f"store of type {type(self._group_handler)}. "
109
+ f"Ngio does not support writing an AnnData table to a "
110
+ f"store of type {type(store)}. "
60
111
  "Please make sure to use a compatible "
61
- "store like a zarr.DirectoryStore."
112
+ "store like a LocalStore, or FsspecStore."
62
113
  )
63
- settings.zarr_write_format = self._group_handler.zarr_format
64
- table.write_zarr(full_url)
65
114
 
66
115
  def write_from_pandas(self, table: DataFrame) -> None:
67
116
  """Serialize the table from a pandas DataFrame."""
@@ -9,7 +9,6 @@ from anndata._io.utils import _read_legacy_raw
9
9
  from anndata._io.zarr import read_dataframe
10
10
  from anndata.compat import _clean_uns
11
11
  from anndata.experimental import read_dispatched
12
- from zarr.storage import LocalStore
13
12
 
14
13
  from ngio.utils import (
15
14
  NgioValueError,
@@ -35,10 +34,6 @@ def custom_anndata_read_zarr(
35
34
  elem_to_read (Sequence[str] | None): The elements to read from the store.
36
35
  """
37
36
  group = open_group_wrapper(store=store, mode="r")
38
-
39
- if not isinstance(group.store, LocalStore):
40
- elem_to_read = ["X", "obs", "var"]
41
-
42
37
  if elem_to_read is None:
43
38
  elem_to_read = [
44
39
  "X",
@@ -89,7 +84,7 @@ def custom_anndata_read_zarr(
89
84
  _clean_uns(adata)
90
85
 
91
86
  if isinstance(adata, dict):
92
- adata = AnnData(**adata)
87
+ adata = AnnData(**adata) # type: ignore
93
88
  if not isinstance(adata, AnnData):
94
89
  raise NgioValueError(f"Expected an AnnData object, but got {type(adata)}")
95
90
  return adata
@@ -1,20 +1,7 @@
1
- import pandas as pd
2
- import polars as pl
1
+ from ngio.tables.backends._py_arrow_backends import PyArrowBackend
3
2
 
4
- from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
5
3
 
6
-
7
- def write_lf_to_csv(path: str, table: pl.DataFrame) -> None:
8
- """Write a polars DataFrame to a CSV file."""
9
- table.write_csv(path)
10
-
11
-
12
- def write_df_to_csv(path: str, table: pd.DataFrame) -> None:
13
- """Write a pandas DataFrame to a CSV file."""
14
- table.to_csv(path, index=False)
15
-
16
-
17
- class CsvTableBackend(NonZarrBaseBackend):
4
+ class CsvTableBackend(PyArrowBackend):
18
5
  """A class to load and write small tables in CSV format."""
19
6
 
20
7
  def __init__(
@@ -22,11 +9,8 @@ class CsvTableBackend(NonZarrBaseBackend):
22
9
  ):
23
10
  """Initialize the CsvTableBackend."""
24
11
  super().__init__(
25
- lf_reader=pl.scan_csv,
26
- df_reader=pd.read_csv,
27
- lf_writer=write_lf_to_csv,
28
- df_writer=write_df_to_csv,
29
12
  table_name="table.csv",
13
+ table_format="csv",
30
14
  )
31
15
 
32
16
  @staticmethod
@@ -8,7 +8,7 @@ from ngio.tables.backends._utils import (
8
8
  normalize_pandas_df,
9
9
  normalize_polars_lf,
10
10
  )
11
- from ngio.utils import NgioFileNotFoundError
11
+ from ngio.utils import NgioError
12
12
 
13
13
 
14
14
  class JsonTableBackend(AbstractTableBackend):
@@ -37,22 +37,19 @@ class JsonTableBackend(AbstractTableBackend):
37
37
  def _get_table_group(self):
38
38
  """Get the table group, creating it if it doesn't exist."""
39
39
  try:
40
- table_group = self._group_handler.get_group(path="table")
41
- except NgioFileNotFoundError:
42
- table_group = self._group_handler.group.create_group("table")
40
+ table_group = self._group_handler.get_group(path="table", create_mode=True)
41
+ except NgioError as e:
42
+ raise NgioError(
43
+ "Could not get or create a 'table' group in the store "
44
+ f"{self._group_handler.store} path "
45
+ f"{self._group_handler.group.path}/table."
46
+ ) from e
43
47
  return table_group
44
48
 
45
- def _load_as_pandas_df(self) -> DataFrame:
46
- """Load the table as a pandas DataFrame."""
47
- table_group = self._get_table_group()
48
- table_dict = dict(table_group.attrs)
49
-
50
- data_frame = pd.DataFrame.from_dict(table_dict)
51
- return data_frame
52
-
53
49
  def load_as_pandas_df(self) -> DataFrame:
54
50
  """Load the table as a pandas DataFrame."""
55
- data_frame = self._load_as_pandas_df()
51
+ table_dict = self._get_table_group().attrs.asdict()
52
+ data_frame = pd.DataFrame.from_dict(table_dict)
56
53
  data_frame = normalize_pandas_df(
57
54
  data_frame,
58
55
  index_key=self.index_key,
@@ -1,32 +1,7 @@
1
- import pandas as pd
2
- import polars as pl
1
+ from ngio.tables.backends._py_arrow_backends import PyArrowBackend
3
2
 
4
- from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
5
3
 
6
-
7
- def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
8
- """Write a polars DataFrame to a Parquet file."""
9
- # make categorical into string (for pandas compatibility)
10
- schema = table.collect_schema()
11
-
12
- categorical_columns = []
13
- for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
14
- if dtype == pl.Categorical:
15
- categorical_columns.append(name)
16
-
17
- for col in categorical_columns:
18
- table = table.with_columns(pl.col(col).cast(pl.Utf8))
19
-
20
- # write to parquet
21
- table.write_parquet(path)
22
-
23
-
24
- def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
25
- """Write a pandas DataFrame to a Parquet file."""
26
- table.to_parquet(path, index=False)
27
-
28
-
29
- class ParquetTableBackend(NonZarrBaseBackend):
4
+ class ParquetTableBackend(PyArrowBackend):
30
5
  """A class to load and write small tables in Parquet format."""
31
6
 
32
7
  def __init__(
@@ -34,11 +9,8 @@ class ParquetTableBackend(NonZarrBaseBackend):
34
9
  ):
35
10
  """Initialize the ParquetTableBackend."""
36
11
  super().__init__(
37
- lf_reader=pl.scan_parquet,
38
- df_reader=pd.read_parquet,
39
- lf_writer=write_lf_to_parquet,
40
- df_writer=write_df_to_parquet,
41
12
  table_name="table.parquet",
13
+ table_format="parquet",
42
14
  )
43
15
 
44
16
  @staticmethod
@@ -0,0 +1,222 @@
1
+ from typing import Literal
2
+
3
+ import polars as pl
4
+ import pyarrow as pa
5
+ import pyarrow.csv as pa_csv
6
+ import pyarrow.dataset as pa_ds
7
+ import pyarrow.fs as pa_fs
8
+ import pyarrow.parquet as pa_parquet
9
+ from pandas import DataFrame
10
+ from polars import DataFrame as PolarsDataFrame
11
+ from polars import LazyFrame
12
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
13
+
14
+ from ngio.tables.backends._abstract_backend import AbstractTableBackend
15
+ from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
16
+ from ngio.utils import NgioValueError
17
+ from ngio.utils._zarr_utils import _make_sync_fs
18
+
19
+
20
+ class PyArrowBackend(AbstractTableBackend):
21
+ """A class to load and write small tables in CSV format."""
22
+
23
+ def __init__(
24
+ self,
25
+ table_name: str,
26
+ table_format: Literal["csv", "parquet"] = "parquet",
27
+ ):
28
+ self.table_name = table_name
29
+ self.table_format = table_format
30
+
31
+ @staticmethod
32
+ def implements_anndata() -> bool:
33
+ """Whether the handler implements the anndata protocol."""
34
+ return False
35
+
36
+ @staticmethod
37
+ def implements_pandas() -> bool:
38
+ """Whether the handler implements the dataframe protocol."""
39
+ return True
40
+
41
+ @staticmethod
42
+ def implements_polars() -> bool:
43
+ """Whether the handler implements the polars protocol."""
44
+ return True
45
+
46
+ @staticmethod
47
+ def backend_name() -> str:
48
+ """Return the name of the backend."""
49
+ raise NotImplementedError(
50
+ "The backend_name method must be implemented in the subclass."
51
+ )
52
+
53
+ def _raise_store_type_not_supported(self):
54
+ """Raise an error for unsupported store types."""
55
+ ext = self.table_name.split(".")[-1]
56
+ store = self._group_handler.store
57
+ raise NgioValueError(
58
+ f"Ngio does not support reading a {ext} table from a "
59
+ f"store of type {type(store)}. "
60
+ "Please make sure to use a compatible "
61
+ "store like a LocalStore, or "
62
+ "FsspecStore, or MemoryStore, or ZipStore."
63
+ )
64
+
65
+ def _load_from_local_store(self, store: LocalStore, path: str) -> pa_ds.Dataset:
66
+ """Load the table from a directory store."""
67
+ root_path = store.root
68
+ table_path = f"{root_path}/{path}/{self.table_name}"
69
+ dataset = pa_ds.dataset(table_path, format=self.table_format)
70
+ return dataset
71
+
72
+ def _load_from_fsspec_store(self, store: FsspecStore, path: str) -> pa_ds.Dataset:
73
+ """Load the table from an FS store."""
74
+ table_path = f"{store.path}/{path}/{self.table_name}"
75
+ fs = _make_sync_fs(store.fs)
76
+ dataset = pa_ds.dataset(table_path, format=self.table_format, filesystem=fs)
77
+ return dataset
78
+
79
+ def _load_from_in_memory_store(
80
+ self, store: MemoryStore, path: str
81
+ ) -> pa_ds.Dataset:
82
+ """Load the table from an in-memory store."""
83
+ table_path = f"{path}/{self.table_name}"
84
+ table = store._store_dict.get(table_path, None)
85
+ if table is None:
86
+ raise NgioValueError(
87
+ f"Table {self.table_name} not found in the in-memory store at "
88
+ f"path {path}."
89
+ )
90
+ assert isinstance(table, pa.Table)
91
+ dataset = pa_ds.dataset(table)
92
+ return dataset
93
+
94
+ def _load_from_zip_store(self, store: ZipStore, path: str) -> pa_ds.Dataset:
95
+ """Load the table from a zip store."""
96
+ raise NotImplementedError("Zip store loading is not implemented yet.")
97
+
98
+ def _load_pyarrow_dataset(self) -> pa_ds.Dataset:
99
+ """Load the table as a pyarrow Dataset."""
100
+ store = self._group_handler.store
101
+ path = self._group_handler.group.path
102
+ if isinstance(store, LocalStore):
103
+ return self._load_from_local_store(store, path)
104
+ elif isinstance(store, FsspecStore):
105
+ return self._load_from_fsspec_store(store, path)
106
+ elif isinstance(store, MemoryStore):
107
+ return self._load_from_in_memory_store(store, path)
108
+ elif isinstance(store, ZipStore):
109
+ return self._load_from_zip_store(store, path)
110
+ self._raise_store_type_not_supported()
111
+
112
+ def load_as_pandas_df(self) -> DataFrame:
113
+ """Load the table as a pandas DataFrame."""
114
+ dataset = self._load_pyarrow_dataset()
115
+ dataframe = dataset.to_table().to_pandas()
116
+ dataframe = normalize_pandas_df(
117
+ dataframe,
118
+ index_key=self.index_key,
119
+ index_type=self.index_type,
120
+ reset_index=False,
121
+ )
122
+ return dataframe
123
+
124
+ def load(self) -> DataFrame:
125
+ """Load the table as a pandas DataFrame."""
126
+ return self.load_as_pandas_df()
127
+
128
+ def load_as_polars_lf(self) -> LazyFrame:
129
+ """Load the table as a polars LazyFrame."""
130
+ dataset = self._load_pyarrow_dataset()
131
+ lazy_frame = pl.scan_pyarrow_dataset(dataset)
132
+ if not isinstance(lazy_frame, LazyFrame):
133
+ raise NgioValueError(
134
+ "Table is not a lazy frame. Please report this issue as an ngio bug."
135
+ f" {type(lazy_frame)}"
136
+ )
137
+
138
+ lazy_frame = normalize_polars_lf(
139
+ lazy_frame,
140
+ index_key=self.index_key,
141
+ index_type=self.index_type,
142
+ )
143
+ return lazy_frame
144
+
145
+ def _write_to_stream(self, stream, table: pa.Table) -> None:
146
+ """Write the table to a stream."""
147
+ if self.table_format == "parquet":
148
+ pa_parquet.write_table(table, stream)
149
+ elif self.table_format == "csv":
150
+ pa_csv.write_csv(table, stream)
151
+ else:
152
+ raise NgioValueError(
153
+ f"Unsupported table format: {self.table_format}. "
154
+ "Supported formats are 'parquet' and 'csv'."
155
+ )
156
+
157
+ def _write_to_local_store(
158
+ self, store: LocalStore, path: str, table: pa.Table
159
+ ) -> None:
160
+ """Write the table to a directory store."""
161
+ root_path = store.root
162
+ table_path = f"{root_path}/{path}/{self.table_name}"
163
+ self._write_to_stream(table_path, table)
164
+
165
+ def _write_to_fsspec_store(
166
+ self, store: FsspecStore, path: str, table: pa.Table
167
+ ) -> None:
168
+ """Write the table to an FS store."""
169
+ table_path = f"{store.path}/{path}/{self.table_name}"
170
+ fs = _make_sync_fs(store.fs)
171
+ fs = pa_fs.PyFileSystem(pa_fs.FSSpecHandler(fs))
172
+ with fs.open_output_stream(table_path) as out_stream:
173
+ self._write_to_stream(out_stream, table)
174
+
175
+ def _write_to_in_memory_store(
176
+ self, store: MemoryStore, path: str, table: pa.Table
177
+ ) -> None:
178
+ """Write the table to an in-memory store."""
179
+ table_path = f"{path}/{self.table_name}"
180
+ store._store_dict[table_path] = table
181
+
182
+ def _write_to_zip_store(self, store: ZipStore, path: str, table: pa.Table) -> None:
183
+ """Write the table to a zip store."""
184
+ raise NotImplementedError("Writing to zip store is not implemented yet.")
185
+
186
+ def _write_pyarrow_dataset(self, dataset: pa.Table) -> None:
187
+ """Write the table from a pyarrow Dataset."""
188
+ store = self._group_handler.store
189
+ path = self._group_handler.group.path
190
+ if isinstance(store, LocalStore):
191
+ return self._write_to_local_store(store=store, path=path, table=dataset)
192
+ elif isinstance(store, FsspecStore):
193
+ return self._write_to_fsspec_store(store=store, path=path, table=dataset)
194
+ elif isinstance(store, MemoryStore):
195
+ return self._write_to_in_memory_store(store=store, path=path, table=dataset)
196
+ elif isinstance(store, ZipStore):
197
+ return self._write_to_zip_store(store=store, path=path, table=dataset)
198
+ self._raise_store_type_not_supported()
199
+
200
+ def write_from_pandas(self, table: DataFrame) -> None:
201
+ """Write the table from a pandas DataFrame."""
202
+ table = normalize_pandas_df(
203
+ table,
204
+ index_key=self.index_key,
205
+ index_type=self.index_type,
206
+ reset_index=True,
207
+ )
208
+ table = pa.Table.from_pandas(table, preserve_index=False)
209
+ self._write_pyarrow_dataset(table)
210
+
211
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
212
+ """Write the table from a polars DataFrame or LazyFrame."""
213
+ table = normalize_polars_lf(
214
+ table,
215
+ index_key=self.index_key,
216
+ index_type=self.index_type,
217
+ )
218
+
219
+ if isinstance(table, LazyFrame):
220
+ table = table.collect()
221
+ table = table.to_arrow()
222
+ self._write_pyarrow_dataset(table)