ngio 0.5.0__py3-none-any.whl → 0.5.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. ngio/__init__.py +2 -5
  2. ngio/common/__init__.py +6 -11
  3. ngio/common/_masking_roi.py +54 -34
  4. ngio/common/_pyramid.py +87 -321
  5. ngio/common/_roi.py +330 -258
  6. ngio/experimental/iterators/_feature.py +3 -3
  7. ngio/experimental/iterators/_rois_utils.py +11 -10
  8. ngio/hcs/_plate.py +136 -192
  9. ngio/images/_abstract_image.py +35 -539
  10. ngio/images/_create.py +283 -0
  11. ngio/images/_create_synt_container.py +43 -40
  12. ngio/images/_image.py +251 -517
  13. ngio/images/_label.py +172 -249
  14. ngio/images/_masked_image.py +2 -2
  15. ngio/images/_ome_zarr_container.py +241 -644
  16. ngio/io_pipes/_io_pipes.py +9 -9
  17. ngio/io_pipes/_io_pipes_masked.py +7 -7
  18. ngio/io_pipes/_io_pipes_roi.py +6 -6
  19. ngio/io_pipes/_io_pipes_types.py +3 -3
  20. ngio/io_pipes/_match_shape.py +8 -6
  21. ngio/io_pipes/_ops_slices_utils.py +5 -8
  22. ngio/ome_zarr_meta/__init__.py +18 -29
  23. ngio/ome_zarr_meta/_meta_handlers.py +708 -392
  24. ngio/ome_zarr_meta/ngio_specs/__init__.py +0 -4
  25. ngio/ome_zarr_meta/ngio_specs/_axes.py +51 -152
  26. ngio/ome_zarr_meta/ngio_specs/_dataset.py +22 -13
  27. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +91 -129
  28. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +68 -57
  29. ngio/ome_zarr_meta/v04/__init__.py +1 -5
  30. ngio/ome_zarr_meta/v04/{_v04_spec.py → _v04_spec_utils.py} +85 -54
  31. ngio/ome_zarr_meta/v05/__init__.py +1 -5
  32. ngio/ome_zarr_meta/v05/{_v05_spec.py → _v05_spec_utils.py} +87 -64
  33. ngio/resources/__init__.py +1 -1
  34. ngio/resources/resource_model.py +1 -1
  35. ngio/tables/_tables_container.py +27 -85
  36. ngio/tables/backends/_anndata.py +8 -58
  37. ngio/tables/backends/_anndata_utils.py +6 -1
  38. ngio/tables/backends/_csv.py +19 -3
  39. ngio/tables/backends/_json.py +13 -10
  40. ngio/tables/backends/_non_zarr_backends.py +196 -0
  41. ngio/tables/backends/_parquet.py +31 -3
  42. ngio/tables/v1/_roi_table.py +27 -44
  43. ngio/utils/__init__.py +12 -8
  44. ngio/utils/_datasets.py +0 -6
  45. ngio/utils/_logger.py +50 -0
  46. ngio/utils/_zarr_utils.py +250 -292
  47. {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/METADATA +6 -13
  48. ngio-0.5.0a1.dist-info/RECORD +88 -0
  49. {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/WHEEL +1 -1
  50. ngio/images/_create_utils.py +0 -406
  51. ngio/tables/backends/_py_arrow_backends.py +0 -222
  52. ngio/utils/_cache.py +0 -48
  53. ngio-0.5.0.dist-info/RECORD +0 -88
  54. {ngio-0.5.0.dist-info → ngio-0.5.0a1.dist-info}/licenses/LICENSE +0 -0
@@ -229,10 +229,10 @@ class ImplementedTables:
229
229
 
230
230
 
231
231
  class TablesContainer:
232
- """A class to handle the /tables group in an OME-NGFF file."""
232
+ """A class to handle the /labels group in an OME-NGFF file."""
233
233
 
234
234
  def __init__(self, group_handler: ZarrGroupHandler) -> None:
235
- """Initialize the TablesContainer."""
235
+ """Initialize the LabelGroupHandler."""
236
236
  self._group_handler = group_handler
237
237
 
238
238
  # Validate the group
@@ -252,24 +252,17 @@ class TablesContainer:
252
252
  )
253
253
 
254
254
  def _get_tables_list(self) -> list[str]:
255
- """Return the list of table names from the group attributes."""
255
+ """Create the /tables group if it doesn't exist."""
256
256
  attrs = self._group_handler.load_attrs()
257
257
  return attrs.get("tables", [])
258
258
 
259
259
  def _get_table_group_handler(self, name: str) -> ZarrGroupHandler:
260
260
  """Get the group handler for a table."""
261
- handler = self._group_handler.get_handler(path=name)
261
+ handler = self._group_handler.derive_handler(path=name)
262
262
  return handler
263
263
 
264
264
  def list(self, filter_types: TypedTable | str | None = None) -> list[str]:
265
- """List all tables in the group.
266
-
267
- Args:
268
- filter_types: If provided, only return tables of this type.
269
-
270
- Returns:
271
- A list of table names.
272
- """
265
+ """List all labels in the group."""
273
266
  tables = self._get_tables_list()
274
267
  if filter_types is None:
275
268
  return tables
@@ -288,16 +281,7 @@ class TablesContainer:
288
281
  backend: TableBackend | None = None,
289
282
  strict: bool = True,
290
283
  ) -> Table:
291
- """Get a table from the group.
292
-
293
- Args:
294
- name: The name of the table.
295
- backend: The backend to use for reading the table.
296
- strict: If True, raise an error if the table type is not implemented.
297
-
298
- Returns:
299
- The table object.
300
- """
284
+ """Get a label from the group."""
301
285
  if name not in self.list():
302
286
  raise NgioValueError(f"Table '{name}' not found in the group.")
303
287
 
@@ -317,16 +301,7 @@ class TablesContainer:
317
301
  table_cls: type[TableType],
318
302
  backend: TableBackend | None = None,
319
303
  ) -> TableType:
320
- """Get a table from the group as a specific type.
321
-
322
- Args:
323
- name: The name of the table.
324
- table_cls: The table class to use for loading the table.
325
- backend: The backend to use for reading the table.
326
-
327
- Returns:
328
- The table object of the specified type.
329
- """
304
+ """Get a table from the group as a specific type."""
330
305
  if name not in self.list():
331
306
  raise NgioValueError(f"Table '{name}' not found in the group.")
332
307
 
@@ -336,27 +311,6 @@ class TablesContainer:
336
311
  backend=backend,
337
312
  ) # type: ignore[return-value]
338
313
 
339
- def delete(self, name: str, missing_ok: bool = False) -> None:
340
- """Delete a table from the group.
341
-
342
- Args:
343
- name (str): The name of the table to delete.
344
- missing_ok (bool): If True, do not raise an error if
345
- the table does not exist.
346
- """
347
- existing_tables = self._get_tables_list()
348
- if name not in existing_tables:
349
- if missing_ok:
350
- return
351
- raise NgioValueError(
352
- f"Table '{name}' not found in the Tables group. "
353
- f"Available tables: {existing_tables}"
354
- )
355
-
356
- self._group_handler.delete_group(name)
357
- existing_tables.remove(name)
358
- self._group_handler.write_attrs({"tables": existing_tables})
359
-
360
314
  def add(
361
315
  self,
362
316
  name: str,
@@ -364,14 +318,7 @@ class TablesContainer:
364
318
  backend: TableBackend = DefaultTableBackend,
365
319
  overwrite: bool = False,
366
320
  ) -> None:
367
- """Add a table to the group.
368
-
369
- Args:
370
- name: The name of the table.
371
- table: The table object to add.
372
- backend: The backend to use for writing the table.
373
- overwrite: Whether to overwrite an existing table with the same name.
374
- """
321
+ """Add a table to the group."""
375
322
  existing_tables = self._get_tables_list()
376
323
  if name in existing_tables and not overwrite:
377
324
  raise NgioValueError(
@@ -379,7 +326,9 @@ class TablesContainer:
379
326
  "Use overwrite=True to replace it."
380
327
  )
381
328
 
382
- table_handler = self._group_handler.get_handler(path=name, overwrite=overwrite)
329
+ table_handler = self._group_handler.derive_handler(
330
+ path=name, overwrite=overwrite
331
+ )
383
332
 
384
333
  if backend is None:
385
334
  backend = table.backend_name
@@ -410,10 +359,13 @@ ImplementedTables().add_implementation(ConditionTableV1)
410
359
  def open_tables_container(
411
360
  store: StoreOrGroup,
412
361
  cache: bool = False,
413
- mode: AccessModeLiteral = "r+",
362
+ mode: AccessModeLiteral = "a",
363
+ parallel_safe: bool = False,
414
364
  ) -> TablesContainer:
415
365
  """Open a table handler from a Zarr store."""
416
- handler = ZarrGroupHandler(store=store, cache=cache, mode=mode)
366
+ handler = ZarrGroupHandler(
367
+ store=store, cache=cache, mode=mode, parallel_safe=parallel_safe
368
+ )
417
369
  return TablesContainer(handler)
418
370
 
419
371
 
@@ -421,13 +373,12 @@ def open_table(
421
373
  store: StoreOrGroup,
422
374
  backend: TableBackend | None = None,
423
375
  cache: bool = False,
424
- mode: AccessModeLiteral = "r+",
376
+ mode: AccessModeLiteral = "a",
377
+ parallel_safe: bool = False,
425
378
  ) -> Table:
426
379
  """Open a table from a Zarr store."""
427
380
  handler = ZarrGroupHandler(
428
- store=store,
429
- cache=cache,
430
- mode=mode,
381
+ store=store, cache=cache, mode=mode, parallel_safe=parallel_safe
431
382
  )
432
383
  meta = _get_meta(handler)
433
384
  return ImplementedTables().get_table(
@@ -440,13 +391,12 @@ def open_table_as(
440
391
  table_cls: type[TableType],
441
392
  backend: TableBackend | None = None,
442
393
  cache: bool = False,
443
- mode: AccessModeLiteral = "r+",
394
+ mode: AccessModeLiteral = "a",
395
+ parallel_safe: bool = False,
444
396
  ) -> TableType:
445
397
  """Open a table from a Zarr store as a specific type."""
446
398
  handler = ZarrGroupHandler(
447
- store=store,
448
- cache=cache,
449
- mode=mode,
399
+ store=store, cache=cache, mode=mode, parallel_safe=parallel_safe
450
400
  )
451
401
  return table_cls.from_handler(
452
402
  handler=handler,
@@ -460,20 +410,12 @@ def write_table(
460
410
  backend: TableBackend = DefaultTableBackend,
461
411
  cache: bool = False,
462
412
  mode: AccessModeLiteral = "a",
413
+ parallel_safe: bool = False,
463
414
  ) -> None:
464
- """Write a table to a Zarr store.
465
-
466
- A table will be created at the given store location.
467
-
468
- Args:
469
- store (StoreOrGroup): The Zarr store or group to write the table to.
470
- table (Table): The table to write.
471
- backend (TableBackend): The backend to use for writing the table.
472
- cache (bool): Whether to use caching for the Zarr group handler.
473
- mode (AccessModeLiteral): The access mode to use for the Zarr group handler.
474
-
475
- """
476
- handler = ZarrGroupHandler(store=store, cache=cache, mode=mode)
415
+ """Write a table to a Zarr store."""
416
+ handler = ZarrGroupHandler(
417
+ store=store, cache=cache, mode=mode, parallel_safe=parallel_safe
418
+ )
477
419
  table.set_backend(
478
420
  handler=handler,
479
421
  backend=backend,
@@ -1,10 +1,8 @@
1
- import zarr
2
1
  from anndata import AnnData
3
2
  from anndata._settings import settings
4
3
  from pandas import DataFrame
5
4
  from polars import DataFrame as PolarsDataFrame
6
5
  from polars import LazyFrame
7
- from zarr.storage import FsspecStore, LocalStore, MemoryStore
8
6
 
9
7
  from ngio.tables.backends._abstract_backend import AbstractTableBackend
10
8
  from ngio.tables.backends._anndata_utils import (
@@ -15,7 +13,7 @@ from ngio.tables.backends._utils import (
15
13
  convert_polars_to_anndata,
16
14
  normalize_anndata,
17
15
  )
18
- from ngio.utils import NgioValueError, copy_group
16
+ from ngio.utils import NgioValueError
19
17
 
20
18
 
21
19
  class AnnDataBackend(AbstractTableBackend):
@@ -52,66 +50,18 @@ class AnnDataBackend(AbstractTableBackend):
52
50
  """Load the table as an AnnData object."""
53
51
  return self.load_as_anndata()
54
52
 
55
- def _write_to_local_store(
56
- self, store: LocalStore, path: str, table: AnnData
57
- ) -> None:
58
- """Write the AnnData table to a LocalStore."""
59
- store_path = f"{store.root}/{path}"
60
- table.write_zarr(store_path)
61
-
62
- def _write_to_fsspec_store(
63
- self, store: FsspecStore, path: str, table: AnnData
64
- ) -> None:
65
- """Write the AnnData table to a FsspecStore."""
66
- full_url = f"{store.path}/{path}"
67
- fs = store.fs
68
- mapper = fs.get_mapper(full_url)
69
- table.write_zarr(mapper)
70
-
71
- def _write_to_memory_store(
72
- self, store: MemoryStore, path: str, table: AnnData
73
- ) -> None:
74
- """Write the AnnData table to a MemoryStore."""
75
- store = MemoryStore()
76
- table.write_zarr(store)
77
- anndata_group = zarr.open_group(store, mode="r")
78
- copy_group(
79
- anndata_group,
80
- self._group_handler._group,
81
- suppress_warnings=True,
82
- )
83
-
84
53
  def write_from_anndata(self, table: AnnData) -> None:
85
54
  """Serialize the table from an AnnData object."""
86
- # Make sure to use the correct zarr format
87
- settings.zarr_write_format = self._group_handler.zarr_format
88
- store = self._group_handler.store
89
- path = self._group_handler.group.path
90
- if isinstance(store, LocalStore):
91
- self._write_to_local_store(
92
- store,
93
- path,
94
- table,
95
- )
96
- elif isinstance(store, FsspecStore):
97
- self._write_to_fsspec_store(
98
- store,
99
- path,
100
- table,
101
- )
102
- elif isinstance(store, MemoryStore):
103
- self._write_to_memory_store(
104
- store,
105
- path,
106
- table,
107
- )
108
- else:
55
+ full_url = self._group_handler.full_url
56
+ if full_url is None:
109
57
  raise NgioValueError(
110
- f"Ngio does not support writing an AnnData table to a "
111
- f"store of type {type(store)}. "
58
+ f"Ngio does not support writing file from a "
59
+ f"store of type {type(self._group_handler)}. "
112
60
  "Please make sure to use a compatible "
113
- "store like a LocalStore, or FsspecStore."
61
+ "store like a zarr.DirectoryStore."
114
62
  )
63
+ settings.zarr_write_format = self._group_handler.zarr_format
64
+ table.write_zarr(full_url)
115
65
 
116
66
  def write_from_pandas(self, table: DataFrame) -> None:
117
67
  """Serialize the table from a pandas DataFrame."""
@@ -9,6 +9,7 @@ from anndata._io.utils import _read_legacy_raw
9
9
  from anndata._io.zarr import read_dataframe
10
10
  from anndata.compat import _clean_uns
11
11
  from anndata.experimental import read_dispatched
12
+ from zarr.storage import LocalStore
12
13
 
13
14
  from ngio.utils import (
14
15
  NgioValueError,
@@ -34,6 +35,10 @@ def custom_anndata_read_zarr(
34
35
  elem_to_read (Sequence[str] | None): The elements to read from the store.
35
36
  """
36
37
  group = open_group_wrapper(store=store, mode="r")
38
+
39
+ if not isinstance(group.store, LocalStore):
40
+ elem_to_read = ["X", "obs", "var"]
41
+
37
42
  if elem_to_read is None:
38
43
  elem_to_read = [
39
44
  "X",
@@ -84,7 +89,7 @@ def custom_anndata_read_zarr(
84
89
  _clean_uns(adata)
85
90
 
86
91
  if isinstance(adata, dict):
87
- adata = AnnData(**adata) # type: ignore
92
+ adata = AnnData(**adata)
88
93
  if not isinstance(adata, AnnData):
89
94
  raise NgioValueError(f"Expected an AnnData object, but got {type(adata)}")
90
95
  return adata
@@ -1,7 +1,20 @@
1
- from ngio.tables.backends._py_arrow_backends import PyArrowBackend
1
+ import pandas as pd
2
+ import polars as pl
2
3
 
4
+ from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
3
5
 
4
- class CsvTableBackend(PyArrowBackend):
6
+
7
+ def write_lf_to_csv(path: str, table: pl.DataFrame) -> None:
8
+ """Write a polars DataFrame to a CSV file."""
9
+ table.write_csv(path)
10
+
11
+
12
+ def write_df_to_csv(path: str, table: pd.DataFrame) -> None:
13
+ """Write a pandas DataFrame to a CSV file."""
14
+ table.to_csv(path, index=False)
15
+
16
+
17
+ class CsvTableBackend(NonZarrBaseBackend):
5
18
  """A class to load and write small tables in CSV format."""
6
19
 
7
20
  def __init__(
@@ -9,8 +22,11 @@ class CsvTableBackend(PyArrowBackend):
9
22
  ):
10
23
  """Initialize the CsvTableBackend."""
11
24
  super().__init__(
25
+ lf_reader=pl.scan_csv,
26
+ df_reader=pd.read_csv,
27
+ lf_writer=write_lf_to_csv,
28
+ df_writer=write_df_to_csv,
12
29
  table_name="table.csv",
13
- table_format="csv",
14
30
  )
15
31
 
16
32
  @staticmethod
@@ -8,7 +8,7 @@ from ngio.tables.backends._utils import (
8
8
  normalize_pandas_df,
9
9
  normalize_polars_lf,
10
10
  )
11
- from ngio.utils import NgioError
11
+ from ngio.utils import NgioFileNotFoundError
12
12
 
13
13
 
14
14
  class JsonTableBackend(AbstractTableBackend):
@@ -37,19 +37,22 @@ class JsonTableBackend(AbstractTableBackend):
37
37
  def _get_table_group(self):
38
38
  """Get the table group, creating it if it doesn't exist."""
39
39
  try:
40
- table_group = self._group_handler.get_group(path="table", create_mode=True)
41
- except NgioError as e:
42
- raise NgioError(
43
- "Could not get or create a 'table' group in the store "
44
- f"{self._group_handler.store} path "
45
- f"{self._group_handler.group.path}/table."
46
- ) from e
40
+ table_group = self._group_handler.get_group(path="table")
41
+ except NgioFileNotFoundError:
42
+ table_group = self._group_handler.group.create_group("table")
47
43
  return table_group
48
44
 
49
- def load_as_pandas_df(self) -> DataFrame:
45
+ def _load_as_pandas_df(self) -> DataFrame:
50
46
  """Load the table as a pandas DataFrame."""
51
- table_dict = self._get_table_group().attrs.asdict()
47
+ table_group = self._get_table_group()
48
+ table_dict = dict(table_group.attrs)
49
+
52
50
  data_frame = pd.DataFrame.from_dict(table_dict)
51
+ return data_frame
52
+
53
+ def load_as_pandas_df(self) -> DataFrame:
54
+ """Load the table as a pandas DataFrame."""
55
+ data_frame = self._load_as_pandas_df()
53
56
  data_frame = normalize_pandas_df(
54
57
  data_frame,
55
58
  index_key=self.index_key,
@@ -0,0 +1,196 @@
1
+ import io
2
+ from collections.abc import Callable
3
+ from typing import Any
4
+
5
+ from pandas import DataFrame
6
+ from polars import DataFrame as PolarsDataFrame
7
+ from polars import LazyFrame
8
+ from zarr.storage import FsspecStore, LocalStore
9
+
10
+ from ngio.tables.backends._abstract_backend import AbstractTableBackend
11
+ from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
12
+ from ngio.utils import NgioFileNotFoundError, NgioValueError
13
+
14
+
15
+ class NonZarrBaseBackend(AbstractTableBackend):
16
+ """A class to load and write small tables in CSV format."""
17
+
18
+ def __init__(
19
+ self,
20
+ df_reader: Callable[[Any], DataFrame],
21
+ lf_reader: Callable[[Any], LazyFrame],
22
+ df_writer: Callable[[str, DataFrame], None],
23
+ lf_writer: Callable[[str, PolarsDataFrame], None],
24
+ table_name: str,
25
+ ):
26
+ self.df_reader = df_reader
27
+ self.lf_reader = lf_reader
28
+ self.df_writer = df_writer
29
+ self.lf_writer = lf_writer
30
+ self.table_name = table_name
31
+
32
+ @staticmethod
33
+ def implements_anndata() -> bool:
34
+ """Whether the handler implements the anndata protocol."""
35
+ return False
36
+
37
+ @staticmethod
38
+ def implements_pandas() -> bool:
39
+ """Whether the handler implements the dataframe protocol."""
40
+ return True
41
+
42
+ @staticmethod
43
+ def implements_polars() -> bool:
44
+ """Whether the handler implements the polars protocol."""
45
+ return True
46
+
47
+ @staticmethod
48
+ def backend_name() -> str:
49
+ """Return the name of the backend."""
50
+ raise NotImplementedError(
51
+ "The backend_name method must be implemented in the subclass."
52
+ )
53
+
54
+ def _load_from_directory_store(self, reader):
55
+ """Load the table from a directory store."""
56
+ url = self._group_handler.full_url
57
+ if url is None:
58
+ ext = self.table_name.split(".")[-1]
59
+ raise NgioValueError(
60
+ f"Ngio does not support reading a {ext} table from a "
61
+ f"store of type {type(self._group_handler)}. "
62
+ "Please make sure to use a compatible "
63
+ "store like a zarr.DirectoryStore."
64
+ )
65
+ table_path = f"{url}/{self.table_name}"
66
+ dataframe = reader(table_path)
67
+ return dataframe
68
+
69
+ def _load_from_fs_store_df(self, reader):
70
+ """Load the table from an FS store."""
71
+ path = self._group_handler.group.path
72
+ table_path = f"{path}/{self.table_name}"
73
+ bytes_table = self._group_handler.store.get(table_path)
74
+ if bytes_table is None:
75
+ raise NgioFileNotFoundError(f"No table found at {table_path}. ")
76
+ dataframe = reader(io.BytesIO(bytes_table))
77
+ return dataframe
78
+
79
+ def _load_from_fs_store_lf(self, reader):
80
+ """Load the table from an FS store."""
81
+ full_url = self._group_handler.full_url
82
+ parquet_path = f"{full_url}/{self.table_name}"
83
+ store_fs = self._group_handler.store.fs # type: ignore (in this context, store_fs is a fs.FSStore)
84
+ with store_fs.open(parquet_path, "rb") as f:
85
+ dataframe = reader(f)
86
+ return dataframe
87
+
88
+ def load_as_pandas_df(self) -> DataFrame:
89
+ """Load the table as a pandas DataFrame."""
90
+ store = self._group_handler.store
91
+ if isinstance(store, LocalStore):
92
+ dataframe = self._load_from_directory_store(reader=self.df_reader)
93
+ elif isinstance(store, FsspecStore):
94
+ dataframe = self._load_from_fs_store_df(reader=self.df_reader)
95
+ else:
96
+ ext = self.table_name.split(".")[-1]
97
+ raise NgioValueError(
98
+ f"Ngio does not support reading a {ext} table from a "
99
+ f"store of type {type(store)}. "
100
+ "Please make sure to use a compatible "
101
+ "store like a zarr.DirectoryStore or "
102
+ "zarr.FSStore."
103
+ )
104
+
105
+ dataframe = normalize_pandas_df(
106
+ dataframe,
107
+ index_key=self.index_key,
108
+ index_type=self.index_type,
109
+ reset_index=False,
110
+ )
111
+ return dataframe
112
+
113
+ def load(self) -> DataFrame:
114
+ """Load the table as a pandas DataFrame."""
115
+ return self.load_as_pandas_df()
116
+
117
+ def load_as_polars_lf(self) -> LazyFrame:
118
+ """Load the table as a polars LazyFrame."""
119
+ store = self._group_handler.store
120
+ if isinstance(store, LocalStore):
121
+ lazy_frame = self._load_from_directory_store(reader=self.lf_reader)
122
+ elif isinstance(store, FsspecStore):
123
+ lazy_frame = self._load_from_fs_store_lf(reader=self.lf_reader)
124
+ else:
125
+ ext = self.table_name.split(".")[-1]
126
+ raise NgioValueError(
127
+ f"Ngio does not support reading a {ext} from a "
128
+ f"store of type {type(store)}. "
129
+ "Please make sure to use a compatible "
130
+ "store like a zarr.DirectoryStore or "
131
+ "zarr.FSStore."
132
+ )
133
+ if not isinstance(lazy_frame, LazyFrame):
134
+ raise NgioValueError(
135
+ "Table is not a lazy frame. Please report this issue as an ngio bug."
136
+ f" {type(lazy_frame)}"
137
+ )
138
+
139
+ lazy_frame = normalize_polars_lf(
140
+ lazy_frame,
141
+ index_key=self.index_key,
142
+ index_type=self.index_type,
143
+ )
144
+ return lazy_frame
145
+
146
+ def _get_store_url(self) -> str:
147
+ """Get the store URL."""
148
+ store = self._group_handler.store
149
+ if isinstance(store, LocalStore):
150
+ full_url = self._group_handler.full_url
151
+ else:
152
+ ext = self.table_name.split(".")[-1]
153
+ raise NgioValueError(
154
+ f"Ngio does not support writing a {ext} file to a "
155
+ f"store of type {type(store)}. "
156
+ "Please make sure to use a compatible "
157
+ "store like a zarr.DirectoryStore or "
158
+ "zarr.FSStore."
159
+ )
160
+ if full_url is None:
161
+ ext = self.table_name.split(".")[-1]
162
+ raise NgioValueError(
163
+ f"Ngio does not support writing a {ext} file to a "
164
+ f"store of type {type(store)}. "
165
+ "Please make sure to use a compatible "
166
+ "store like a zarr.DirectoryStore or "
167
+ "zarr.FSStore."
168
+ )
169
+ return full_url
170
+
171
+ def write_from_pandas(self, table: DataFrame) -> None:
172
+ """Write the table from a pandas DataFrame."""
173
+ table = normalize_pandas_df(
174
+ table,
175
+ index_key=self.index_key,
176
+ index_type=self.index_type,
177
+ reset_index=True,
178
+ )
179
+ full_url = self._get_store_url()
180
+ table_path = f"{full_url}/{self.table_name}"
181
+ self.df_writer(table_path, table)
182
+
183
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
184
+ """Write the table from a polars DataFrame or LazyFrame."""
185
+ table = normalize_polars_lf(
186
+ table,
187
+ index_key=self.index_key,
188
+ index_type=self.index_type,
189
+ )
190
+
191
+ if isinstance(table, LazyFrame):
192
+ table = table.collect()
193
+
194
+ full_url = self._get_store_url()
195
+ table_path = f"{full_url}/{self.table_name}"
196
+ self.lf_writer(table_path, table)
@@ -1,7 +1,32 @@
1
- from ngio.tables.backends._py_arrow_backends import PyArrowBackend
1
+ import pandas as pd
2
+ import polars as pl
2
3
 
4
+ from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
3
5
 
4
- class ParquetTableBackend(PyArrowBackend):
6
+
7
+ def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
8
+ """Write a polars DataFrame to a Parquet file."""
9
+ # make categorical into string (for pandas compatibility)
10
+ schema = table.collect_schema()
11
+
12
+ categorical_columns = []
13
+ for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
14
+ if dtype == pl.Categorical:
15
+ categorical_columns.append(name)
16
+
17
+ for col in categorical_columns:
18
+ table = table.with_columns(pl.col(col).cast(pl.Utf8))
19
+
20
+ # write to parquet
21
+ table.write_parquet(path)
22
+
23
+
24
+ def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
25
+ """Write a pandas DataFrame to a Parquet file."""
26
+ table.to_parquet(path, index=False)
27
+
28
+
29
+ class ParquetTableBackend(NonZarrBaseBackend):
5
30
  """A class to load and write small tables in Parquet format."""
6
31
 
7
32
  def __init__(
@@ -9,8 +34,11 @@ class ParquetTableBackend(PyArrowBackend):
9
34
  ):
10
35
  """Initialize the ParquetTableBackend."""
11
36
  super().__init__(
37
+ lf_reader=pl.scan_parquet,
38
+ df_reader=pd.read_parquet,
39
+ lf_writer=write_lf_to_parquet,
40
+ df_writer=write_df_to_parquet,
12
41
  table_name="table.parquet",
13
- table_format="parquet",
14
42
  )
15
43
 
16
44
  @staticmethod