ngio 0.2.0a2__py3-none-any.whl → 0.5.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. ngio/__init__.py +40 -12
  2. ngio/common/__init__.py +16 -32
  3. ngio/common/_dimensions.py +270 -48
  4. ngio/common/_masking_roi.py +153 -0
  5. ngio/common/_pyramid.py +267 -73
  6. ngio/common/_roi.py +290 -66
  7. ngio/common/_synt_images_utils.py +101 -0
  8. ngio/common/_zoom.py +54 -22
  9. ngio/experimental/__init__.py +5 -0
  10. ngio/experimental/iterators/__init__.py +15 -0
  11. ngio/experimental/iterators/_abstract_iterator.py +390 -0
  12. ngio/experimental/iterators/_feature.py +189 -0
  13. ngio/experimental/iterators/_image_processing.py +130 -0
  14. ngio/experimental/iterators/_mappers.py +48 -0
  15. ngio/experimental/iterators/_rois_utils.py +126 -0
  16. ngio/experimental/iterators/_segmentation.py +235 -0
  17. ngio/hcs/__init__.py +17 -58
  18. ngio/hcs/_plate.py +1354 -0
  19. ngio/images/__init__.py +30 -9
  20. ngio/images/_abstract_image.py +968 -0
  21. ngio/images/_create_synt_container.py +132 -0
  22. ngio/images/_create_utils.py +423 -0
  23. ngio/images/_image.py +926 -0
  24. ngio/images/_label.py +417 -0
  25. ngio/images/_masked_image.py +531 -0
  26. ngio/images/_ome_zarr_container.py +1235 -0
  27. ngio/images/_table_ops.py +471 -0
  28. ngio/io_pipes/__init__.py +75 -0
  29. ngio/io_pipes/_io_pipes.py +361 -0
  30. ngio/io_pipes/_io_pipes_masked.py +488 -0
  31. ngio/io_pipes/_io_pipes_roi.py +146 -0
  32. ngio/io_pipes/_io_pipes_types.py +56 -0
  33. ngio/io_pipes/_match_shape.py +377 -0
  34. ngio/io_pipes/_ops_axes.py +344 -0
  35. ngio/io_pipes/_ops_slices.py +411 -0
  36. ngio/io_pipes/_ops_slices_utils.py +199 -0
  37. ngio/io_pipes/_ops_transforms.py +104 -0
  38. ngio/io_pipes/_zoom_transform.py +180 -0
  39. ngio/ome_zarr_meta/__init__.py +39 -15
  40. ngio/ome_zarr_meta/_meta_handlers.py +490 -96
  41. ngio/ome_zarr_meta/ngio_specs/__init__.py +24 -10
  42. ngio/ome_zarr_meta/ngio_specs/_axes.py +268 -234
  43. ngio/ome_zarr_meta/ngio_specs/_channels.py +125 -41
  44. ngio/ome_zarr_meta/ngio_specs/_dataset.py +42 -87
  45. ngio/ome_zarr_meta/ngio_specs/_ngio_hcs.py +536 -2
  46. ngio/ome_zarr_meta/ngio_specs/_ngio_image.py +202 -198
  47. ngio/ome_zarr_meta/ngio_specs/_pixel_size.py +72 -34
  48. ngio/ome_zarr_meta/v04/__init__.py +21 -5
  49. ngio/ome_zarr_meta/v04/_custom_models.py +18 -0
  50. ngio/ome_zarr_meta/v04/{_v04_spec_utils.py → _v04_spec.py} +151 -90
  51. ngio/ome_zarr_meta/v05/__init__.py +27 -0
  52. ngio/ome_zarr_meta/v05/_custom_models.py +18 -0
  53. ngio/ome_zarr_meta/v05/_v05_spec.py +511 -0
  54. ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/mask.png +0 -0
  55. ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/nuclei.png +0 -0
  56. ngio/resources/20200812-CardiomyocyteDifferentiation14-Cycle1_B03/raw.jpg +0 -0
  57. ngio/resources/__init__.py +55 -0
  58. ngio/resources/resource_model.py +36 -0
  59. ngio/tables/__init__.py +20 -4
  60. ngio/tables/_abstract_table.py +270 -0
  61. ngio/tables/_tables_container.py +449 -0
  62. ngio/tables/backends/__init__.py +50 -1
  63. ngio/tables/backends/_abstract_backend.py +200 -31
  64. ngio/tables/backends/_anndata.py +139 -0
  65. ngio/tables/backends/_anndata_utils.py +10 -114
  66. ngio/tables/backends/_csv.py +19 -0
  67. ngio/tables/backends/_json.py +92 -0
  68. ngio/tables/backends/_parquet.py +19 -0
  69. ngio/tables/backends/_py_arrow_backends.py +222 -0
  70. ngio/tables/backends/_table_backends.py +162 -38
  71. ngio/tables/backends/_utils.py +608 -0
  72. ngio/tables/v1/__init__.py +19 -4
  73. ngio/tables/v1/_condition_table.py +71 -0
  74. ngio/tables/v1/_feature_table.py +79 -115
  75. ngio/tables/v1/_generic_table.py +21 -90
  76. ngio/tables/v1/_roi_table.py +486 -137
  77. ngio/transforms/__init__.py +5 -0
  78. ngio/transforms/_zoom.py +19 -0
  79. ngio/utils/__init__.py +16 -14
  80. ngio/utils/_cache.py +48 -0
  81. ngio/utils/_datasets.py +121 -13
  82. ngio/utils/_fractal_fsspec_store.py +42 -0
  83. ngio/utils/_zarr_utils.py +374 -218
  84. ngio-0.5.0b4.dist-info/METADATA +147 -0
  85. ngio-0.5.0b4.dist-info/RECORD +88 -0
  86. {ngio-0.2.0a2.dist-info → ngio-0.5.0b4.dist-info}/WHEEL +1 -1
  87. ngio/common/_array_pipe.py +0 -160
  88. ngio/common/_axes_transforms.py +0 -63
  89. ngio/common/_common_types.py +0 -5
  90. ngio/common/_slicer.py +0 -97
  91. ngio/images/abstract_image.py +0 -240
  92. ngio/images/create.py +0 -251
  93. ngio/images/image.py +0 -389
  94. ngio/images/label.py +0 -236
  95. ngio/images/omezarr_container.py +0 -535
  96. ngio/ome_zarr_meta/_generic_handlers.py +0 -320
  97. ngio/ome_zarr_meta/v04/_meta_handlers.py +0 -54
  98. ngio/tables/_validators.py +0 -192
  99. ngio/tables/backends/_anndata_v1.py +0 -75
  100. ngio/tables/backends/_json_v1.py +0 -56
  101. ngio/tables/tables_container.py +0 -300
  102. ngio/tables/v1/_masking_roi_table.py +0 -175
  103. ngio/utils/_logger.py +0 -29
  104. ngio-0.2.0a2.dist-info/METADATA +0 -95
  105. ngio-0.2.0a2.dist-info/RECORD +0 -53
  106. {ngio-0.2.0a2.dist-info → ngio-0.5.0b4.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,57 @@
1
1
  """Ngio Tables backend implementations."""
2
2
 
3
+ from ngio.tables.backends._abstract_backend import AbstractTableBackend, BackendMeta
4
+ from ngio.tables.backends._anndata import AnnDataBackend, AnnDataBackendV1
5
+ from ngio.tables.backends._csv import CsvTableBackend
6
+ from ngio.tables.backends._json import JsonTableBackend
7
+ from ngio.tables.backends._parquet import ParquetTableBackend
3
8
  from ngio.tables.backends._table_backends import (
9
+ DefaultTableBackend,
4
10
  ImplementedTableBackends,
11
+ TableBackend,
5
12
  TableBackendProtocol,
6
13
  )
14
+ from ngio.tables.backends._utils import (
15
+ TabularData,
16
+ convert_anndata_to_pandas,
17
+ convert_anndata_to_polars,
18
+ convert_pandas_to_anndata,
19
+ convert_pandas_to_polars,
20
+ convert_polars_to_anndata,
21
+ convert_polars_to_pandas,
22
+ convert_to_anndata,
23
+ convert_to_pandas,
24
+ convert_to_polars,
25
+ normalize_anndata,
26
+ normalize_pandas_df,
27
+ normalize_polars_lf,
28
+ normalize_table,
29
+ )
7
30
 
8
- __all__ = ["ImplementedTableBackends", "TableBackendProtocol"]
31
+ __all__ = [
32
+ "AbstractTableBackend",
33
+ "AnnDataBackend",
34
+ "AnnDataBackendV1",
35
+ "BackendMeta",
36
+ "CsvTableBackend",
37
+ "DefaultTableBackend",
38
+ "ImplementedTableBackends",
39
+ "JsonTableBackend",
40
+ "ParquetTableBackend",
41
+ "TableBackend",
42
+ "TableBackendProtocol",
43
+ "TabularData",
44
+ "convert_anndata_to_pandas",
45
+ "convert_anndata_to_polars",
46
+ "convert_pandas_to_anndata",
47
+ "convert_pandas_to_polars",
48
+ "convert_polars_to_anndata",
49
+ "convert_polars_to_pandas",
50
+ "convert_to_anndata",
51
+ "convert_to_pandas",
52
+ "convert_to_polars",
53
+ "normalize_anndata",
54
+ "normalize_pandas_df",
55
+ "normalize_polars_lf",
56
+ "normalize_table",
57
+ ]
@@ -1,29 +1,48 @@
1
1
  from abc import ABC, abstractmethod
2
- from collections.abc import Collection
3
2
  from typing import Literal
4
3
 
5
4
  from anndata import AnnData
6
5
  from pandas import DataFrame
6
+ from polars import DataFrame as PolarsDataFrame
7
+ from polars import LazyFrame
8
+ from pydantic import BaseModel, ConfigDict
7
9
 
8
- from ngio.utils import ZarrGroupHandler
10
+ from ngio.tables.backends._utils import (
11
+ TabularData,
12
+ convert_to_anndata,
13
+ convert_to_pandas,
14
+ convert_to_polars,
15
+ )
16
+ from ngio.utils import NgioValueError, ZarrGroupHandler
17
+
18
+
19
+ class BackendMeta(BaseModel):
20
+ """Metadata for the backend."""
21
+
22
+ backend: str = "anndata_v1"
23
+ index_key: str | None = None
24
+ index_type: Literal["int", "str"] | None = None
25
+
26
+ model_config = ConfigDict(extra="allow")
9
27
 
10
28
 
11
29
  class AbstractTableBackend(ABC):
12
30
  """Abstract class for table backends."""
13
31
 
14
- def __init__(
32
+ def set_group_handler(
15
33
  self,
16
34
  group_handler: ZarrGroupHandler,
17
35
  index_key: str | None = None,
18
- index_type: Literal["int", "str"] = "int",
19
- ):
20
- """Initialize the handler.
21
-
22
- Args:
23
- group_handler (ZarrGroupHandler): An object to handle the Zarr group
24
- containing the table data.
25
- index_key (str): The column name to use as the index of the DataFrame.
26
- index_type (str): The type of the index column in the DataFrame.
36
+ index_type: Literal["int", "str"] | None = None,
37
+ ) -> None:
38
+ """Attach a group handler to the backend.
39
+
40
+ Index keys and index types are used to ensure that the
41
+ serialization and deserialization of the table
42
+ is consistent across different backends.
43
+
44
+ Making sure that this is consistent is
45
+ a duty of the backend implementations.
27
46
  """
28
47
  self._group_handler = group_handler
29
48
  self._index_key = index_key
@@ -32,40 +51,190 @@ class AbstractTableBackend(ABC):
32
51
  @staticmethod
33
52
  @abstractmethod
34
53
  def backend_name() -> str:
35
- """The name of the backend."""
54
+ """Return the name of the backend.
55
+
56
+ As a convention we set name as:
57
+ {backend_name}_v{version}
58
+
59
+ Where the version is a integer.
60
+ """
36
61
  raise NotImplementedError
37
62
 
38
63
  @staticmethod
39
64
  @abstractmethod
40
65
  def implements_anndata() -> bool:
41
- """Whether the handler implements the anndata protocol."""
66
+ """Check if the backend implements the anndata protocol.
67
+
68
+ If this is True, the backend should implement the
69
+ `write_from_anndata` method.
70
+
71
+ AnnData objects are more complex than DataFrames,
72
+ so if this is true the backend should implement the
73
+ full serialization of the AnnData object.
74
+
75
+ If this is False, these methods should raise a
76
+ `NotImplementedError`.
77
+ """
42
78
  raise NotImplementedError
43
79
 
44
80
  @staticmethod
45
81
  @abstractmethod
46
- def implements_dataframe() -> bool:
47
- """Whether the handler implements the dataframe protocol."""
82
+ def implements_pandas() -> bool:
83
+ """Check if the backend implements the pandas protocol.
84
+
85
+ If this is True, the backend should implement the
86
+ `write_from_dataframe` methods.
87
+
88
+ If this is False, these methods should raise a
89
+ `NotImplementedError`.
90
+ """
48
91
  raise NotImplementedError
49
92
 
93
+ @staticmethod
50
94
  @abstractmethod
51
- def load_columns(self) -> list[str]:
52
- """List all labels in the group."""
53
- raise NotImplementedError
95
+ def implements_polars() -> bool:
96
+ """Check if the backend implements the polars protocol.
54
97
 
55
- def load_as_anndata(self, columns: Collection[str] | None = None) -> AnnData:
56
- """Load the metadata in the store."""
57
- raise NotImplementedError
98
+ If this is True, the backend should implement the
99
+ `write_from_polars` methods.
58
100
 
59
- def load_as_dataframe(self, columns: Collection[str] | None = None) -> DataFrame:
60
- """List all labels in the group."""
101
+ If this is False, these methods should raise a
102
+ `NotImplementedError`.
103
+ """
61
104
  raise NotImplementedError
62
105
 
63
- def write_from_dataframe(
64
- self, table: DataFrame, metadata: dict | None = None
106
+ @property
107
+ def group_handler(self) -> ZarrGroupHandler:
108
+ """Get the group handler."""
109
+ return self._group_handler
110
+
111
+ @property
112
+ def index_key(self) -> str | None:
113
+ """Get the index key."""
114
+ return self._index_key
115
+
116
+ @property
117
+ def index_type(self) -> Literal["int", "str"] | None:
118
+ """Get the index type."""
119
+ if self._index_type is None:
120
+ return None
121
+
122
+ if self._index_type not in ["int", "str"]:
123
+ raise NgioValueError(
124
+ f"Invalid index type {self._index_type}. Must be 'int' or 'str'."
125
+ )
126
+ return self._index_type # type: ignore[return-value]
127
+
128
+ @abstractmethod
129
+ def load(self) -> TabularData:
130
+ """Load the table from the store.
131
+
132
+ This is a generic load method.
133
+ Based on the explicit mode or the type of the table,
134
+ it will call the appropriate load method.
135
+ """
136
+ ...
137
+
138
+ def load_as_anndata(self) -> AnnData:
139
+ """Load the table as an AnnData object.
140
+
141
+ Since the AnnData object is more complex than a DataFrame,
142
+ selecting columns is not implemented, because it is not
143
+ straightforward to do so for an arbitrary AnnData object.
144
+ """
145
+ table = self.load()
146
+ return convert_to_anndata(
147
+ table,
148
+ index_key=self.index_key,
149
+ )
150
+
151
+ def load_as_pandas_df(self) -> DataFrame:
152
+ """Load the table as a pandas DataFrame.
153
+
154
+ If columns are provided, the table should be filtered
155
+ """
156
+ table = self.load()
157
+ return convert_to_pandas(
158
+ table,
159
+ index_key=self.index_key,
160
+ index_type=self.index_type,
161
+ )
162
+
163
+ def load_as_polars_lf(self) -> LazyFrame:
164
+ """Load the table as a polars LazyFrame.
165
+
166
+ If columns are provided, the table should be filtered
167
+ """
168
+ table = self.load()
169
+ return convert_to_polars(
170
+ table,
171
+ index_key=self.index_key,
172
+ index_type=self.index_type,
173
+ )
174
+
175
+ def write_from_pandas(self, table: DataFrame) -> None:
176
+ """Serialize the table from a pandas DataFrame."""
177
+ raise NotImplementedError(
178
+ f"Backend {self.backend_name()} does not support "
179
+ "serialization of DataFrame objects."
180
+ )
181
+
182
+ def write_from_anndata(self, table: AnnData) -> None:
183
+ """Serialize the table from an AnnData object."""
184
+ raise NotImplementedError(
185
+ f"Backend {self.backend_name()} does not support "
186
+ "serialization of AnnData objects."
187
+ )
188
+
189
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
190
+ """Serialize the table from a polars DataFrame or LazyFrame."""
191
+ raise NotImplementedError(
192
+ f"Backend {self.backend_name()} does not support "
193
+ "serialization of Polars objects."
194
+ )
195
+
196
+ def write_metadata(self, metadata: dict | None = None) -> None:
197
+ """Write the metadata to the store."""
198
+ if metadata is None:
199
+ metadata = {}
200
+
201
+ attrs = self._group_handler.reopen_group().attrs.asdict()
202
+ # This is required by anndata to identify the format
203
+ if "encoding-type" in attrs:
204
+ metadata["encoding-type"] = attrs["encoding-type"]
205
+ if "encoding-version" in attrs:
206
+ metadata["encoding-version"] = attrs["encoding-version"]
207
+
208
+ backend_metadata = BackendMeta(
209
+ backend=self.backend_name(),
210
+ index_key=self.index_key,
211
+ index_type=self.index_type,
212
+ ).model_dump(exclude_none=True)
213
+ metadata.update(backend_metadata)
214
+ self._group_handler.write_attrs(metadata)
215
+
216
+ def write(
217
+ self,
218
+ table_data: TabularData,
219
+ metadata: dict | None = None,
65
220
  ) -> None:
66
- """Consolidate the metadata in the store."""
67
- raise NotImplementedError
221
+ """Serialize the table to the store, and write the metadata.
68
222
 
69
- def write_from_anndata(self, table: AnnData, metadata: dict | None = None) -> None:
70
- """Consolidate the metadata in the store."""
71
- raise NotImplementedError
223
+ This is a generic write method.
224
+ Based on the explicit mode or the type of the table,
225
+ it will call the appropriate write method.
226
+ """
227
+ if isinstance(table_data, DataFrame):
228
+ self.write_from_pandas(table_data)
229
+ elif isinstance(table_data, AnnData):
230
+ self.write_from_anndata(table_data)
231
+ elif isinstance(table_data, PolarsDataFrame | LazyFrame):
232
+ self.write_from_polars(table_data)
233
+ else:
234
+ raise NgioValueError(
235
+ f"Unsupported table type {type(table_data)}. "
236
+ "Please specify the mode explicitly. "
237
+ "Supported serialization modes are: "
238
+ "'pandas', 'anndata', 'polars'."
239
+ )
240
+ self.write_metadata(metadata)
@@ -0,0 +1,139 @@
1
+ import zarr
2
+ from anndata import AnnData
3
+ from anndata._settings import settings
4
+ from pandas import DataFrame
5
+ from polars import DataFrame as PolarsDataFrame
6
+ from polars import LazyFrame
7
+ from zarr.storage import FsspecStore, LocalStore, MemoryStore
8
+
9
+ from ngio.tables.backends._abstract_backend import AbstractTableBackend
10
+ from ngio.tables.backends._anndata_utils import (
11
+ custom_anndata_read_zarr,
12
+ )
13
+ from ngio.tables.backends._utils import (
14
+ convert_pandas_to_anndata,
15
+ convert_polars_to_anndata,
16
+ normalize_anndata,
17
+ )
18
+ from ngio.utils import NgioValueError, copy_group
19
+
20
+
21
+ class AnnDataBackend(AbstractTableBackend):
22
+ """A class to load and write tables from/to an AnnData object."""
23
+
24
+ @staticmethod
25
+ def backend_name() -> str:
26
+ """Return the name of the backend."""
27
+ return "anndata"
28
+
29
+ @staticmethod
30
+ def implements_anndata() -> bool:
31
+ """Check if the backend implements the anndata protocol."""
32
+ return True
33
+
34
+ @staticmethod
35
+ def implements_pandas() -> bool:
36
+ """Whether the handler implements the dataframe protocol."""
37
+ return True
38
+
39
+ @staticmethod
40
+ def implements_polars() -> bool:
41
+ """Whether the handler implements the polars protocol."""
42
+ return True
43
+
44
+ def load_as_anndata(self) -> AnnData:
45
+ """Load the table as an AnnData object."""
46
+ settings.zarr_write_format = self._group_handler.zarr_format
47
+ anndata = custom_anndata_read_zarr(self._group_handler._group)
48
+ anndata = normalize_anndata(anndata, index_key=self.index_key)
49
+ return anndata
50
+
51
+ def load(self) -> AnnData:
52
+ """Load the table as an AnnData object."""
53
+ return self.load_as_anndata()
54
+
55
+ def _write_to_local_store(
56
+ self, store: LocalStore, path: str, table: AnnData
57
+ ) -> None:
58
+ """Write the AnnData table to a LocalStore."""
59
+ store_path = f"{store.root}/{path}"
60
+ table.write_zarr(store_path)
61
+
62
+ def _write_to_fsspec_store(
63
+ self, store: FsspecStore, path: str, table: AnnData
64
+ ) -> None:
65
+ """Write the AnnData table to a FsspecStore."""
66
+ full_url = f"{store.path}/{path}"
67
+ fs = store.fs
68
+ mapper = fs.get_mapper(full_url)
69
+ table.write_zarr(mapper)
70
+
71
+ def _write_to_memory_store(
72
+ self, store: MemoryStore, path: str, table: AnnData
73
+ ) -> None:
74
+ """Write the AnnData table to a MemoryStore."""
75
+ store = MemoryStore()
76
+ table.write_zarr(store)
77
+ anndata_group = zarr.open_group(store, mode="r")
78
+ copy_group(
79
+ anndata_group,
80
+ self._group_handler._group,
81
+ suppress_warnings=True,
82
+ )
83
+
84
+ def write_from_anndata(self, table: AnnData) -> None:
85
+ """Serialize the table from an AnnData object."""
86
+ # Make sure to use the correct zarr format
87
+ settings.zarr_write_format = self._group_handler.zarr_format
88
+ store = self._group_handler.store
89
+ path = self._group_handler.group.path
90
+ if isinstance(store, LocalStore):
91
+ self._write_to_local_store(
92
+ store,
93
+ path,
94
+ table,
95
+ )
96
+ elif isinstance(store, FsspecStore):
97
+ self._write_to_fsspec_store(
98
+ store,
99
+ path,
100
+ table,
101
+ )
102
+ elif isinstance(store, MemoryStore):
103
+ self._write_to_memory_store(
104
+ store,
105
+ path,
106
+ table,
107
+ )
108
+ else:
109
+ raise NgioValueError(
110
+ f"Ngio does not support writing an AnnData table to a "
111
+ f"store of type {type(store)}. "
112
+ "Please make sure to use a compatible "
113
+ "store like a LocalStore, or FsspecStore."
114
+ )
115
+
116
+ def write_from_pandas(self, table: DataFrame) -> None:
117
+ """Serialize the table from a pandas DataFrame."""
118
+ anndata = convert_pandas_to_anndata(
119
+ table,
120
+ index_key=self.index_key,
121
+ )
122
+ self.write_from_anndata(anndata)
123
+
124
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
125
+ """Consolidate the metadata in the store."""
126
+ anndata = convert_polars_to_anndata(
127
+ table,
128
+ index_key=self.index_key,
129
+ )
130
+ self.write_from_anndata(anndata)
131
+
132
+
133
+ class AnnDataBackendV1(AnnDataBackend):
134
+ """A wrapper for the AnnData backend that for backwards compatibility."""
135
+
136
+ @staticmethod
137
+ def backend_name() -> str:
138
+ """Return the name of the backend."""
139
+ return "anndata_v1"
@@ -1,11 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Literal
3
+ from typing import TYPE_CHECKING, Any
4
4
 
5
- import anndata as ad
6
- import numpy as np
7
- import pandas as pd
8
- import pandas.api.types as ptypes
9
5
  import zarr
10
6
  from anndata import AnnData
11
7
  from anndata._io.specs import read_elem
@@ -14,19 +10,18 @@ from anndata._io.zarr import read_dataframe
14
10
  from anndata.compat import _clean_uns
15
11
  from anndata.experimental import read_dispatched
16
12
 
17
- from ngio.tables._validators import validate_index_dtype, validate_index_key
18
13
  from ngio.utils import (
19
- NgioTableValidationError,
14
+ NgioValueError,
20
15
  StoreOrGroup,
21
16
  open_group_wrapper,
22
17
  )
23
18
 
24
19
  if TYPE_CHECKING:
25
- from collections.abc import Callable, Collection
20
+ from collections.abc import Callable, Sequence
26
21
 
27
22
 
28
- def custom_read_zarr(
29
- store: StoreOrGroup, elem_to_read: Collection[str] | None = None
23
+ def custom_anndata_read_zarr(
24
+ store: StoreOrGroup, elem_to_read: Sequence[str] | None = None
30
25
  ) -> AnnData:
31
26
  """Read from a hierarchical Zarr array store.
32
27
 
@@ -36,10 +31,9 @@ def custom_read_zarr(
36
31
 
37
32
  Args:
38
33
  store (StoreOrGroup): A store or group to read the AnnData from.
39
- elem_to_read (Collection[str] | None): The elements to read from the store.
34
+ elem_to_read (Sequence[str] | None): The elements to read from the store.
40
35
  """
41
- group, _ = open_group_wrapper(store=store, mode="r")
42
-
36
+ group = open_group_wrapper(store=store, mode="r")
43
37
  if elem_to_read is None:
44
38
  elem_to_read = [
45
39
  "X",
@@ -89,106 +83,8 @@ def custom_read_zarr(
89
83
  if isinstance(group["obs"], zarr.Array):
90
84
  _clean_uns(adata)
91
85
 
86
+ if isinstance(adata, dict):
87
+ adata = AnnData(**adata) # type: ignore
92
88
  if not isinstance(adata, AnnData):
93
- raise ValueError(f"Expected an AnnData object, but got {type(adata)}")
89
+ raise NgioValueError(f"Expected an AnnData object, but got {type(adata)}")
94
90
  return adata
95
-
96
-
97
- def _check_for_mixed_types(series: pd.Series) -> None:
98
- """Check if the column has mixed types."""
99
- if series.apply(type).nunique() > 1: # type: ignore
100
- raise NgioTableValidationError(
101
- f"Column {series.name} has mixed types: "
102
- f"{series.apply(type).unique()}. " # type: ignore
103
- "Type of all elements must be the same."
104
- )
105
-
106
-
107
- def _check_for_supported_types(series: pd.Series) -> Literal["str", "int", "numeric"]:
108
- """Check if the column has supported types."""
109
- if ptypes.is_string_dtype(series):
110
- return "str"
111
- if ptypes.is_integer_dtype(series):
112
- return "int"
113
- if ptypes.is_numeric_dtype(series):
114
- return "numeric"
115
- raise NgioTableValidationError(
116
- f"Column {series.name} has unsupported type: {series.dtype}."
117
- " Supported types are string and numerics."
118
- )
119
-
120
-
121
- def dataframe_to_anndata(
122
- dataframe: pd.DataFrame,
123
- index_key: str | None = None,
124
- overwrite: bool = False,
125
- ) -> ad.AnnData:
126
- """Convert a table DataFrame to an AnnData object.
127
-
128
- Args:
129
- dataframe (pd.DataFrame): A pandas DataFrame representing a fractal table.
130
- index_key (str): The column name to use as the index of the DataFrame.
131
- Default is None.
132
- overwrite (bool): Whether to overwrite the index if a different index is found.
133
- Default is False.
134
- """
135
- # Check if the index_key is present in the data frame + optional validations
136
- dataframe = validate_index_key(dataframe, index_key, overwrite=overwrite)
137
- dataframe = validate_index_dtype(dataframe, index_type="str")
138
-
139
- str_columns, int_columns, num_columns = [], [], []
140
- for c_name in dataframe.columns:
141
- column_df = dataframe[c_name]
142
- _check_for_mixed_types(column_df) # Mixed types are not allowed in the table
143
- c_type = _check_for_supported_types(
144
- column_df
145
- ) # Only string and numeric types are allowed
146
-
147
- if c_type == "str":
148
- str_columns.append(c_name)
149
-
150
- elif c_type == "int":
151
- int_columns.append(c_name)
152
-
153
- elif c_type == "numeric":
154
- num_columns.append(c_name)
155
-
156
- # Converting all observations to string
157
- obs_df = dataframe[str_columns + int_columns]
158
- obs_df.index = dataframe.index
159
-
160
- x_df = dataframe[num_columns]
161
-
162
- if x_df.dtypes.nunique() > 1:
163
- x_df = x_df.astype("float64")
164
-
165
- if x_df.empty:
166
- # If there are no numeric columns, create an empty array
167
- # to avoid AnnData failing to create the object
168
- x_df = np.zeros((len(obs_df), 0), dtype="float64")
169
-
170
- return ad.AnnData(X=x_df, obs=obs_df)
171
-
172
-
173
- def anndata_to_dataframe(
174
- anndata: ad.AnnData,
175
- index_key: str | None = "label",
176
- index_type: str = "int",
177
- overwrite: bool = False,
178
- ) -> pd.DataFrame:
179
- """Convert a AnnData object representing a fractal table to a pandas DataFrame.
180
-
181
- Args:
182
- anndata (ad.AnnData): An AnnData object representing a fractal table.
183
- index_key (str): The column name to use as the index of the DataFrame.
184
- Default is 'label'.
185
- index_type (str): The type of the index column in the DataFrame.
186
- Either 'str' or 'int'. Default is 'int'.
187
- overwrite (bool): Whether to overwrite the index if a different index is found.
188
- Default is False.
189
- """
190
- dataframe = anndata.to_df()
191
- dataframe[anndata.obs_keys()] = anndata.obs
192
- dataframe = validate_index_key(dataframe, index_key, overwrite=overwrite)
193
- dataframe = validate_index_dtype(dataframe, index_type)
194
- return dataframe
@@ -0,0 +1,19 @@
1
+ from ngio.tables.backends._py_arrow_backends import PyArrowBackend
2
+
3
+
4
+ class CsvTableBackend(PyArrowBackend):
5
+ """A class to load and write small tables in CSV format."""
6
+
7
+ def __init__(
8
+ self,
9
+ ):
10
+ """Initialize the CsvTableBackend."""
11
+ super().__init__(
12
+ table_name="table.csv",
13
+ table_format="csv",
14
+ )
15
+
16
+ @staticmethod
17
+ def backend_name() -> str:
18
+ """Return the name of the backend."""
19
+ return "csv"