ngio 0.5.0a2__py3-none-any.whl → 0.5.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. ngio/__init__.py +2 -2
  2. ngio/common/__init__.py +11 -6
  3. ngio/common/_masking_roi.py +12 -41
  4. ngio/common/_pyramid.py +206 -76
  5. ngio/common/_roi.py +257 -329
  6. ngio/experimental/iterators/_feature.py +3 -3
  7. ngio/experimental/iterators/_rois_utils.py +10 -11
  8. ngio/hcs/_plate.py +50 -43
  9. ngio/images/_abstract_image.py +418 -35
  10. ngio/images/_create_synt_container.py +35 -42
  11. ngio/images/_create_utils.py +423 -0
  12. ngio/images/_image.py +162 -176
  13. ngio/images/_label.py +182 -137
  14. ngio/images/_ome_zarr_container.py +372 -197
  15. ngio/io_pipes/_io_pipes.py +9 -9
  16. ngio/io_pipes/_io_pipes_masked.py +7 -7
  17. ngio/io_pipes/_io_pipes_roi.py +6 -6
  18. ngio/io_pipes/_io_pipes_types.py +3 -3
  19. ngio/io_pipes/_match_shape.py +5 -4
  20. ngio/io_pipes/_ops_slices_utils.py +8 -5
  21. ngio/ome_zarr_meta/__init__.py +21 -18
  22. ngio/ome_zarr_meta/_meta_handlers.py +409 -701
  23. ngio/ome_zarr_meta/ngio_specs/__init__.py +2 -0
  24. ngio/ome_zarr_meta/ngio_specs/_axes.py +1 -0
  25. ngio/ome_zarr_meta/ngio_specs/_dataset.py +13 -22
  26. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +54 -61
  27. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +21 -68
  28. ngio/ome_zarr_meta/v04/__init__.py +5 -1
  29. ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +49 -63
  30. ngio/ome_zarr_meta/v05/__init__.py +5 -1
  31. ngio/ome_zarr_meta/v05/{_v05_spec_utils.py → _v05_spec.py} +57 -64
  32. ngio/tables/_tables_container.py +2 -4
  33. ngio/tables/backends/_anndata.py +58 -8
  34. ngio/tables/backends/_anndata_utils.py +1 -6
  35. ngio/tables/backends/_csv.py +3 -19
  36. ngio/tables/backends/_json.py +10 -13
  37. ngio/tables/backends/_parquet.py +3 -31
  38. ngio/tables/backends/_py_arrow_backends.py +222 -0
  39. ngio/tables/v1/_roi_table.py +41 -24
  40. ngio/utils/__init__.py +4 -12
  41. ngio/utils/_zarr_utils.py +163 -53
  42. {ngio-0.5.0a2.dist-info → ngio-0.5.0b1.dist-info}/METADATA +6 -2
  43. ngio-0.5.0b1.dist-info/RECORD +88 -0
  44. {ngio-0.5.0a2.dist-info → ngio-0.5.0b1.dist-info}/WHEEL +1 -1
  45. ngio/images/_create.py +0 -287
  46. ngio/tables/backends/_non_zarr_backends.py +0 -196
  47. ngio/utils/_logger.py +0 -50
  48. ngio-0.5.0a2.dist-info/RECORD +0 -89
  49. {ngio-0.5.0a2.dist-info → ngio-0.5.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -8,7 +8,7 @@ from ngio.tables.backends._utils import (
8
8
  normalize_pandas_df,
9
9
  normalize_polars_lf,
10
10
  )
11
- from ngio.utils import NgioFileNotFoundError
11
+ from ngio.utils import NgioError
12
12
 
13
13
 
14
14
  class JsonTableBackend(AbstractTableBackend):
@@ -37,22 +37,19 @@ class JsonTableBackend(AbstractTableBackend):
37
37
  def _get_table_group(self):
38
38
  """Get the table group, creating it if it doesn't exist."""
39
39
  try:
40
- table_group = self._group_handler.get_group(path="table")
41
- except NgioFileNotFoundError:
42
- table_group = self._group_handler.group.create_group("table")
40
+ table_group = self._group_handler.get_group(path="table", create_mode=True)
41
+ except NgioError as e:
42
+ raise NgioError(
43
+ "Could not get or create a 'table' group in the store "
44
+ f"{self._group_handler.store} path "
45
+ f"{self._group_handler.group.path}/table."
46
+ ) from e
43
47
  return table_group
44
48
 
45
- def _load_as_pandas_df(self) -> DataFrame:
46
- """Load the table as a pandas DataFrame."""
47
- table_group = self._get_table_group()
48
- table_dict = dict(table_group.attrs)
49
-
50
- data_frame = pd.DataFrame.from_dict(table_dict)
51
- return data_frame
52
-
53
49
  def load_as_pandas_df(self) -> DataFrame:
54
50
  """Load the table as a pandas DataFrame."""
55
- data_frame = self._load_as_pandas_df()
51
+ table_dict = self._get_table_group().attrs.asdict()
52
+ data_frame = pd.DataFrame.from_dict(table_dict)
56
53
  data_frame = normalize_pandas_df(
57
54
  data_frame,
58
55
  index_key=self.index_key,
@@ -1,32 +1,7 @@
1
- import pandas as pd
2
- import polars as pl
1
+ from ngio.tables.backends._py_arrow_backends import PyArrowBackend
3
2
 
4
- from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
5
3
 
6
-
7
- def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
8
- """Write a polars DataFrame to a Parquet file."""
9
- # make categorical into string (for pandas compatibility)
10
- schema = table.collect_schema()
11
-
12
- categorical_columns = []
13
- for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
14
- if dtype == pl.Categorical:
15
- categorical_columns.append(name)
16
-
17
- for col in categorical_columns:
18
- table = table.with_columns(pl.col(col).cast(pl.Utf8))
19
-
20
- # write to parquet
21
- table.write_parquet(path)
22
-
23
-
24
- def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
25
- """Write a pandas DataFrame to a Parquet file."""
26
- table.to_parquet(path, index=False)
27
-
28
-
29
- class ParquetTableBackend(NonZarrBaseBackend):
4
+ class ParquetTableBackend(PyArrowBackend):
30
5
  """A class to load and write small tables in Parquet format."""
31
6
 
32
7
  def __init__(
@@ -34,11 +9,8 @@ class ParquetTableBackend(NonZarrBaseBackend):
34
9
  ):
35
10
  """Initialize the ParquetTableBackend."""
36
11
  super().__init__(
37
- lf_reader=pl.scan_parquet,
38
- df_reader=pd.read_parquet,
39
- lf_writer=write_lf_to_parquet,
40
- df_writer=write_df_to_parquet,
41
12
  table_name="table.parquet",
13
+ table_format="parquet",
42
14
  )
43
15
 
44
16
  @staticmethod
@@ -0,0 +1,222 @@
1
+ from typing import Literal
2
+
3
+ import polars as pl
4
+ import pyarrow as pa
5
+ import pyarrow.csv as pa_csv
6
+ import pyarrow.dataset as pa_ds
7
+ import pyarrow.fs as pa_fs
8
+ import pyarrow.parquet as pa_parquet
9
+ from pandas import DataFrame
10
+ from polars import DataFrame as PolarsDataFrame
11
+ from polars import LazyFrame
12
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore, ZipStore
13
+
14
+ from ngio.tables.backends._abstract_backend import AbstractTableBackend
15
+ from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
16
+ from ngio.utils import NgioValueError
17
+ from ngio.utils._zarr_utils import _make_sync_fs
18
+
19
+
20
+ class PyArrowBackend(AbstractTableBackend):
21
+ """A class to load and write small tables in CSV format."""
22
+
23
+ def __init__(
24
+ self,
25
+ table_name: str,
26
+ table_format: Literal["csv", "parquet"] = "parquet",
27
+ ):
28
+ self.table_name = table_name
29
+ self.table_format = table_format
30
+
31
+ @staticmethod
32
+ def implements_anndata() -> bool:
33
+ """Whether the handler implements the anndata protocol."""
34
+ return False
35
+
36
+ @staticmethod
37
+ def implements_pandas() -> bool:
38
+ """Whether the handler implements the dataframe protocol."""
39
+ return True
40
+
41
+ @staticmethod
42
+ def implements_polars() -> bool:
43
+ """Whether the handler implements the polars protocol."""
44
+ return True
45
+
46
+ @staticmethod
47
+ def backend_name() -> str:
48
+ """Return the name of the backend."""
49
+ raise NotImplementedError(
50
+ "The backend_name method must be implemented in the subclass."
51
+ )
52
+
53
+ def _raise_store_type_not_supported(self):
54
+ """Raise an error for unsupported store types."""
55
+ ext = self.table_name.split(".")[-1]
56
+ store = self._group_handler.store
57
+ raise NgioValueError(
58
+ f"Ngio does not support reading a {ext} table from a "
59
+ f"store of type {type(store)}. "
60
+ "Please make sure to use a compatible "
61
+ "store like a LocalStore, or "
62
+ "FsspecStore, or MemoryStore, or ZipStore."
63
+ )
64
+
65
+ def _load_from_local_store(self, store: LocalStore, path: str) -> pa_ds.Dataset:
66
+ """Load the table from a directory store."""
67
+ root_path = store.root
68
+ table_path = f"{root_path}/{path}/{self.table_name}"
69
+ dataset = pa_ds.dataset(table_path, format=self.table_format)
70
+ return dataset
71
+
72
+ def _load_from_fsspec_store(self, store: FsspecStore, path: str) -> pa_ds.Dataset:
73
+ """Load the table from an FS store."""
74
+ table_path = f"{store.path}/{path}/{self.table_name}"
75
+ fs = _make_sync_fs(store.fs)
76
+ dataset = pa_ds.dataset(table_path, format=self.table_format, filesystem=fs)
77
+ return dataset
78
+
79
+ def _load_from_in_memory_store(
80
+ self, store: MemoryStore, path: str
81
+ ) -> pa_ds.Dataset:
82
+ """Load the table from an in-memory store."""
83
+ table_path = f"{path}/{self.table_name}"
84
+ table = store._store_dict.get(table_path, None)
85
+ if table is None:
86
+ raise NgioValueError(
87
+ f"Table {self.table_name} not found in the in-memory store at "
88
+ f"path {path}."
89
+ )
90
+ assert isinstance(table, pa.Table)
91
+ dataset = pa_ds.dataset(table)
92
+ return dataset
93
+
94
+ def _load_from_zip_store(self, store: ZipStore, path: str) -> pa_ds.Dataset:
95
+ """Load the table from a zip store."""
96
+ raise NotImplementedError("Zip store loading is not implemented yet.")
97
+
98
+ def _load_pyarrow_dataset(self) -> pa_ds.Dataset:
99
+ """Load the table as a pyarrow Dataset."""
100
+ store = self._group_handler.store
101
+ path = self._group_handler.group.path
102
+ if isinstance(store, LocalStore):
103
+ return self._load_from_local_store(store, path)
104
+ elif isinstance(store, FsspecStore):
105
+ return self._load_from_fsspec_store(store, path)
106
+ elif isinstance(store, MemoryStore):
107
+ return self._load_from_in_memory_store(store, path)
108
+ elif isinstance(store, ZipStore):
109
+ return self._load_from_zip_store(store, path)
110
+ self._raise_store_type_not_supported()
111
+
112
+ def load_as_pandas_df(self) -> DataFrame:
113
+ """Load the table as a pandas DataFrame."""
114
+ dataset = self._load_pyarrow_dataset()
115
+ dataframe = dataset.to_table().to_pandas()
116
+ dataframe = normalize_pandas_df(
117
+ dataframe,
118
+ index_key=self.index_key,
119
+ index_type=self.index_type,
120
+ reset_index=False,
121
+ )
122
+ return dataframe
123
+
124
+ def load(self) -> DataFrame:
125
+ """Load the table as a pandas DataFrame."""
126
+ return self.load_as_pandas_df()
127
+
128
+ def load_as_polars_lf(self) -> LazyFrame:
129
+ """Load the table as a polars LazyFrame."""
130
+ dataset = self._load_pyarrow_dataset()
131
+ lazy_frame = pl.scan_pyarrow_dataset(dataset)
132
+ if not isinstance(lazy_frame, LazyFrame):
133
+ raise NgioValueError(
134
+ "Table is not a lazy frame. Please report this issue as an ngio bug."
135
+ f" {type(lazy_frame)}"
136
+ )
137
+
138
+ lazy_frame = normalize_polars_lf(
139
+ lazy_frame,
140
+ index_key=self.index_key,
141
+ index_type=self.index_type,
142
+ )
143
+ return lazy_frame
144
+
145
+ def _write_to_stream(self, stream, table: pa.Table) -> None:
146
+ """Write the table to a stream."""
147
+ if self.table_format == "parquet":
148
+ pa_parquet.write_table(table, stream)
149
+ elif self.table_format == "csv":
150
+ pa_csv.write_csv(table, stream)
151
+ else:
152
+ raise NgioValueError(
153
+ f"Unsupported table format: {self.table_format}. "
154
+ "Supported formats are 'parquet' and 'csv'."
155
+ )
156
+
157
+ def _write_to_local_store(
158
+ self, store: LocalStore, path: str, table: pa.Table
159
+ ) -> None:
160
+ """Write the table to a directory store."""
161
+ root_path = store.root
162
+ table_path = f"{root_path}/{path}/{self.table_name}"
163
+ self._write_to_stream(table_path, table)
164
+
165
+ def _write_to_fsspec_store(
166
+ self, store: FsspecStore, path: str, table: pa.Table
167
+ ) -> None:
168
+ """Write the table to an FS store."""
169
+ table_path = f"{store.path}/{path}/{self.table_name}"
170
+ fs = _make_sync_fs(store.fs)
171
+ fs = pa_fs.PyFileSystem(pa_fs.FSSpecHandler(fs))
172
+ with fs.open_output_stream(table_path) as out_stream:
173
+ self._write_to_stream(out_stream, table)
174
+
175
+ def _write_to_in_memory_store(
176
+ self, store: MemoryStore, path: str, table: pa.Table
177
+ ) -> None:
178
+ """Write the table to an in-memory store."""
179
+ table_path = f"{path}/{self.table_name}"
180
+ store._store_dict[table_path] = table
181
+
182
+ def _write_to_zip_store(self, store: ZipStore, path: str, table: pa.Table) -> None:
183
+ """Write the table to a zip store."""
184
+ raise NotImplementedError("Writing to zip store is not implemented yet.")
185
+
186
+ def _write_pyarrow_dataset(self, dataset: pa.Table) -> None:
187
+ """Write the table from a pyarrow Dataset."""
188
+ store = self._group_handler.store
189
+ path = self._group_handler.group.path
190
+ if isinstance(store, LocalStore):
191
+ return self._write_to_local_store(store=store, path=path, table=dataset)
192
+ elif isinstance(store, FsspecStore):
193
+ return self._write_to_fsspec_store(store=store, path=path, table=dataset)
194
+ elif isinstance(store, MemoryStore):
195
+ return self._write_to_in_memory_store(store=store, path=path, table=dataset)
196
+ elif isinstance(store, ZipStore):
197
+ return self._write_to_zip_store(store=store, path=path, table=dataset)
198
+ self._raise_store_type_not_supported()
199
+
200
+ def write_from_pandas(self, table: DataFrame) -> None:
201
+ """Write the table from a pandas DataFrame."""
202
+ table = normalize_pandas_df(
203
+ table,
204
+ index_key=self.index_key,
205
+ index_type=self.index_type,
206
+ reset_index=True,
207
+ )
208
+ table = pa.Table.from_pandas(table, preserve_index=False)
209
+ self._write_pyarrow_dataset(table)
210
+
211
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
212
+ """Write the table from a polars DataFrame or LazyFrame."""
213
+ table = normalize_polars_lf(
214
+ table,
215
+ index_key=self.index_key,
216
+ index_type=self.index_type,
217
+ )
218
+
219
+ if isinstance(table, LazyFrame):
220
+ table = table.collect()
221
+ table = table.to_arrow()
222
+ self._write_pyarrow_dataset(table)
@@ -4,6 +4,7 @@ This class follows the roi_table specification at:
4
4
  https://fractal-analytics-platform.github.io/fractal-tasks-core/tables/
5
5
  """
6
6
 
7
+ import warnings
7
8
  from collections.abc import Iterable
8
9
  from typing import Literal
9
10
  from uuid import uuid4
@@ -26,7 +27,6 @@ from ngio.utils import (
26
27
  NgioTableValidationError,
27
28
  NgioValueError,
28
29
  ZarrGroupHandler,
29
- ngio_warn,
30
30
  )
31
31
 
32
32
  REQUIRED_COLUMNS = [
@@ -77,7 +77,9 @@ OPTIONAL_COLUMNS = ORIGIN_COLUMNS + TRANSLATION_COLUMNS + PLATE_COLUMNS + INDEX_
77
77
  def _check_optional_columns(col_name: str) -> None:
78
78
  """Check if the column name is in the optional columns."""
79
79
  if col_name not in OPTIONAL_COLUMNS + TIME_COLUMNS:
80
- ngio_warn(f"Column {col_name} is not in the optional columns.")
80
+ warnings.warn(
81
+ f"Column {col_name} is not in the optional columns.", stacklevel=2
82
+ )
81
83
 
82
84
 
83
85
  def _dataframe_to_rois(
@@ -120,17 +122,17 @@ def _dataframe_to_rois(
120
122
  else:
121
123
  label = getattr(row, "label", None)
122
124
 
123
- roi = Roi(
125
+ slices = {
126
+ "x": (row.x_micrometer, row.len_x_micrometer),
127
+ "y": (row.y_micrometer, row.len_y_micrometer),
128
+ "z": (z_micrometer, z_length_micrometer),
129
+ }
130
+ if t_second is not None or t_length_second is not None:
131
+ slices["t"] = (t_second, t_length_second)
132
+ roi = Roi.from_values(
124
133
  name=str(row.Index),
125
- x=row.x_micrometer, # type: ignore (type can not be known here)
126
- y=row.y_micrometer, # type: ignore (type can not be known here)
127
- z=z_micrometer,
128
- t=t_second,
129
- x_length=row.len_x_micrometer, # type: ignore (type can not be known here)
130
- y_length=row.len_y_micrometer, # type: ignore (type can not be known here)
131
- z_length=z_length_micrometer,
132
- t_length=t_length_second,
133
- unit="micrometer",
134
+ slices=slices,
135
+ space="world",
134
136
  label=label,
135
137
  **extras,
136
138
  )
@@ -143,24 +145,39 @@ def _rois_to_dataframe(rois: dict[str, Roi], index_key: str | None) -> pd.DataFr
143
145
  data = []
144
146
  for roi in rois.values():
145
147
  # This normalization is necessary for backward compatibility
146
- z_micrometer = roi.z if roi.z is not None else 0.0
147
- len_z_micrometer = roi.z_length if roi.z_length is not None else 1.0
148
+ if roi.space != "world":
149
+ raise NotImplementedError(
150
+ "Only ROIs in world coordinates can be serialized."
151
+ )
148
152
 
153
+ z_slice = roi.get("z")
154
+ if z_slice is None:
155
+ z_micrometer = 0.0
156
+ len_z_micrometer = 1.0
157
+ else:
158
+ z_micrometer = z_slice.start if z_slice.start is not None else 0.0
159
+ len_z_micrometer = z_slice.length if z_slice.length is not None else 1.0
160
+
161
+ x_slice = roi.get("x")
162
+ if x_slice is None:
163
+ raise NgioValueError("ROI is missing 'x' slice.")
164
+ y_slice = roi.get("y")
165
+ if y_slice is None:
166
+ raise NgioValueError("ROI is missing 'y' slice.")
149
167
  row = {
150
168
  index_key: roi.get_name(),
151
- "x_micrometer": roi.x,
152
- "y_micrometer": roi.y,
169
+ "x_micrometer": x_slice.start if x_slice.start is not None else 0.0,
170
+ "y_micrometer": y_slice.start if y_slice.start is not None else 0.0,
153
171
  "z_micrometer": z_micrometer,
154
- "len_x_micrometer": roi.x_length,
155
- "len_y_micrometer": roi.y_length,
172
+ "len_x_micrometer": x_slice.length if x_slice.length is not None else 1.0,
173
+ "len_y_micrometer": y_slice.length if y_slice.length is not None else 1.0,
156
174
  "len_z_micrometer": len_z_micrometer,
157
175
  }
158
176
 
159
- if roi.t is not None:
160
- row["t_second"] = roi.t
161
-
162
- if roi.t_length is not None:
163
- row["len_t_second"] = roi.t_length
177
+ t_slice = roi.get("t")
178
+ if t_slice is not None:
179
+ row["t_second"] = t_slice.start if t_slice.start is not None else 0.0
180
+ row["len_t_second"] = t_slice.length if t_slice.length is not None else 1.0
164
181
 
165
182
  if roi.label is not None and index_key != "label":
166
183
  row["label"] = roi.label
@@ -183,7 +200,7 @@ class RoiDictWrapper:
183
200
  self._rois_by_name = {}
184
201
  self._rois_by_label = {}
185
202
  for roi in rois:
186
- name = roi.get_name()
203
+ name = roi.name
187
204
  if name in self._rois_by_name:
188
205
  name = f"{name}_{uuid4().hex[:8]}"
189
206
  self._rois_by_name[name] = roi
ngio/utils/__init__.py CHANGED
@@ -1,13 +1,12 @@
1
1
  """Various utilities for the ngio package."""
2
2
 
3
- import os
4
-
5
3
  from ngio.utils._datasets import (
6
4
  download_ome_zarr_dataset,
7
5
  list_ome_zarr_datasets,
8
6
  print_datasets_infos,
9
7
  )
10
8
  from ngio.utils._errors import (
9
+ NgioError,
11
10
  NgioFileExistsError,
12
11
  NgioFileNotFoundError,
13
12
  NgioTableValidationError,
@@ -15,22 +14,19 @@ from ngio.utils._errors import (
15
14
  NgioValueError,
16
15
  )
17
16
  from ngio.utils._fractal_fsspec_store import fractal_fsspec_store
18
- from ngio.utils._logger import ngio_logger, ngio_warn, set_logger_level
19
17
  from ngio.utils._zarr_utils import (
20
18
  AccessModeLiteral,
21
19
  NgioCache,
22
20
  StoreOrGroup,
23
21
  ZarrGroupHandler,
22
+ copy_group,
24
23
  open_group_wrapper,
25
24
  )
26
25
 
27
- set_logger_level(os.getenv("NGIO_LOGGER_LEVEL", "WARNING"))
28
-
29
26
  __all__ = [
30
- # Zarr
31
27
  "AccessModeLiteral",
32
28
  "NgioCache",
33
- # Errors
29
+ "NgioError",
34
30
  "NgioFileExistsError",
35
31
  "NgioFileNotFoundError",
36
32
  "NgioTableValidationError",
@@ -38,14 +34,10 @@ __all__ = [
38
34
  "NgioValueError",
39
35
  "StoreOrGroup",
40
36
  "ZarrGroupHandler",
41
- # Other
37
+ "copy_group",
42
38
  "download_ome_zarr_dataset",
43
39
  "fractal_fsspec_store",
44
40
  "list_ome_zarr_datasets",
45
- # Logger
46
- "ngio_logger",
47
- "ngio_warn",
48
41
  "open_group_wrapper",
49
42
  "print_datasets_infos",
50
- "set_logger_level",
51
43
  ]