ngio 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. ngio/common/__init__.py +16 -0
  2. ngio/common/_array_pipe.py +50 -27
  3. ngio/common/_table_ops.py +471 -0
  4. ngio/hcs/__init__.py +1 -1
  5. ngio/hcs/{plate.py → _plate.py} +451 -78
  6. ngio/images/__init__.py +3 -3
  7. ngio/images/{image.py → _image.py} +26 -21
  8. ngio/images/{label.py → _label.py} +6 -4
  9. ngio/images/{masked_image.py → _masked_image.py} +2 -2
  10. ngio/images/{ome_zarr_container.py → _ome_zarr_container.py} +152 -86
  11. ngio/ome_zarr_meta/_meta_handlers.py +16 -8
  12. ngio/ome_zarr_meta/ngio_specs/_channels.py +41 -29
  13. ngio/tables/__init__.py +14 -2
  14. ngio/tables/_abstract_table.py +269 -0
  15. ngio/tables/{tables_container.py → _tables_container.py} +186 -100
  16. ngio/tables/backends/__init__.py +20 -0
  17. ngio/tables/backends/_abstract_backend.py +58 -80
  18. ngio/tables/backends/{_anndata_v1.py → _anndata.py} +5 -1
  19. ngio/tables/backends/_csv.py +35 -0
  20. ngio/tables/backends/{_json_v1.py → _json.py} +4 -1
  21. ngio/tables/backends/{_csv_v1.py → _non_zarr_backends.py} +61 -27
  22. ngio/tables/backends/_parquet.py +47 -0
  23. ngio/tables/backends/_table_backends.py +39 -18
  24. ngio/tables/backends/_utils.py +147 -1
  25. ngio/tables/v1/__init__.py +19 -3
  26. ngio/tables/v1/_condition_table.py +71 -0
  27. ngio/tables/v1/_feature_table.py +63 -129
  28. ngio/tables/v1/_generic_table.py +21 -159
  29. ngio/tables/v1/_roi_table.py +285 -201
  30. ngio/utils/_fractal_fsspec_store.py +29 -0
  31. {ngio-0.2.9.dist-info → ngio-0.3.0.dist-info}/METADATA +4 -3
  32. ngio-0.3.0.dist-info/RECORD +61 -0
  33. ngio/tables/_validators.py +0 -108
  34. ngio-0.2.9.dist-info/RECORD +0 -57
  35. /ngio/images/{abstract_image.py → _abstract_image.py} +0 -0
  36. /ngio/images/{create.py → _create.py} +0 -0
  37. {ngio-0.2.9.dist-info → ngio-0.3.0.dist-info}/WHEEL +0 -0
  38. {ngio-0.2.9.dist-info → ngio-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -5,15 +5,13 @@ from anndata import AnnData
5
5
  from pandas import DataFrame
6
6
  from polars import DataFrame as PolarsDataFrame
7
7
  from polars import LazyFrame
8
- from pydantic import BaseModel
8
+ from pydantic import BaseModel, ConfigDict
9
9
 
10
10
  from ngio.tables.backends._utils import (
11
- convert_anndata_to_pandas,
12
- convert_anndata_to_polars,
13
- convert_pandas_to_anndata,
14
- convert_pandas_to_polars,
15
- convert_polars_to_anndata,
16
- convert_polars_to_pandas,
11
+ TabularData,
12
+ convert_to_anndata,
13
+ convert_to_pandas,
14
+ convert_to_polars,
17
15
  )
18
16
  from ngio.utils import NgioValueError, ZarrGroupHandler
19
17
 
@@ -21,29 +19,30 @@ from ngio.utils import NgioValueError, ZarrGroupHandler
21
19
  class BackendMeta(BaseModel):
22
20
  """Metadata for the backend."""
23
21
 
24
- backend: str | None = None
22
+ backend: str = "anndata"
25
23
  index_key: str | None = None
26
24
  index_type: Literal["int", "str"] | None = None
27
25
 
26
+ model_config = ConfigDict(extra="allow")
27
+
28
28
 
29
29
  class AbstractTableBackend(ABC):
30
30
  """Abstract class for table backends."""
31
31
 
32
- def __init__(
32
+ def set_group_handler(
33
33
  self,
34
34
  group_handler: ZarrGroupHandler,
35
35
  index_key: str | None = None,
36
36
  index_type: Literal["int", "str"] | None = None,
37
- ):
38
- """Initialize the handler.
37
+ ) -> None:
38
+ """Attach a group handler to the backend.
39
39
 
40
- This is a base class for the table backends protocol.
40
+ Index keys and index types are used to ensure that the
41
+ serialization and deserialization of the table
42
+ is consistent across different backends.
41
43
 
42
- Args:
43
- group_handler (ZarrGroupHandler): An object to handle the Zarr group
44
- containing the table data.
45
- index_key (str): The column name to use as the index of the DataFrame.
46
- index_type (str): The type of the index column in the DataFrame.
44
+ Making sure that this is consistent is
45
+ a duty of the backend implementations.
47
46
  """
48
47
  self._group_handler = group_handler
49
48
  self._index_key = index_key
@@ -67,7 +66,11 @@ class AbstractTableBackend(ABC):
67
66
  """Check if the backend implements the anndata protocol.
68
67
 
69
68
  If this is True, the backend should implement the
70
- `load_as_anndata` and `write_from_anndata` methods.
69
+ `write_from_anndata` method.
70
+
71
+ AnnData objects are more complex than DataFrames,
72
+ so if this is true the backend should implement the
73
+ full serialization of the AnnData object.
71
74
 
72
75
  If this is False, these methods should raise a
73
76
  `NotImplementedError`.
@@ -80,7 +83,7 @@ class AbstractTableBackend(ABC):
80
83
  """Check if the backend implements the pandas protocol.
81
84
 
82
85
  If this is True, the backend should implement the
83
- `load_as_dataframe` and `write_from_dataframe` methods.
86
+ `write_from_dataframe` methods.
84
87
 
85
88
  If this is False, these methods should raise a
86
89
  `NotImplementedError`.
@@ -93,7 +96,7 @@ class AbstractTableBackend(ABC):
93
96
  """Check if the backend implements the polars protocol.
94
97
 
95
98
  If this is True, the backend should implement the
96
- `load_as_polars` and `write_from_polars` methods.
99
+ `write_from_polars` methods.
97
100
 
98
101
  If this is False, these methods should raise a
99
102
  `NotImplementedError`.
@@ -122,6 +125,16 @@ class AbstractTableBackend(ABC):
122
125
  )
123
126
  return self._index_type # type: ignore[return-value]
124
127
 
128
+ @abstractmethod
129
+ def load(self) -> TabularData:
130
+ """Load the table from the store.
131
+
132
+ This is a generic load method.
133
+ Based on the explicit mode or the type of the table,
134
+ it will call the appropriate load method.
135
+ """
136
+ ...
137
+
125
138
  def load_as_anndata(self) -> AnnData:
126
139
  """Load the table as an AnnData object.
127
140
 
@@ -129,70 +142,35 @@ class AbstractTableBackend(ABC):
129
142
  selecting columns is not implemented, because it is not
130
143
  straightforward to do so for an arbitrary AnnData object.
131
144
  """
132
- if self.implements_pandas():
133
- return convert_pandas_to_anndata(
134
- self.load_as_pandas_df(),
135
- index_key=self.index_key,
136
- )
137
- elif self.implements_polars():
138
- return convert_polars_to_anndata(
139
- self.load_as_polars_lf(),
140
- index_key=self.index_key,
141
- )
142
- else:
143
- raise NgioValueError(
144
- "Backend does not implement any of the protocols. "
145
- "A backend should implement at least one of the "
146
- "following protocols: anndata, pandas, polars."
147
- )
145
+ table = self.load()
146
+ return convert_to_anndata(
147
+ table,
148
+ index_key=self.index_key,
149
+ )
148
150
 
149
151
  def load_as_pandas_df(self) -> DataFrame:
150
152
  """Load the table as a pandas DataFrame.
151
153
 
152
154
  If columns are provided, the table should be filtered
153
155
  """
154
- if self.implements_anndata():
155
- return convert_anndata_to_pandas(
156
- self.load_as_anndata(),
157
- index_key=self.index_key,
158
- index_type=self.index_type,
159
- )
160
- elif self.implements_polars():
161
- return convert_polars_to_pandas(
162
- self.load_as_polars_lf(),
163
- index_key=self.index_key,
164
- index_type=self.index_type,
165
- )
166
- else:
167
- raise NgioValueError(
168
- "Backend does not implement any of the protocols. "
169
- "A backend should implement at least one of the "
170
- "following protocols: anndata, pandas, polars."
171
- )
156
+ table = self.load()
157
+ return convert_to_pandas(
158
+ table,
159
+ index_key=self.index_key,
160
+ index_type=self.index_type,
161
+ )
172
162
 
173
163
  def load_as_polars_lf(self) -> LazyFrame:
174
164
  """Load the table as a polars LazyFrame.
175
165
 
176
166
  If columns are provided, the table should be filtered
177
167
  """
178
- if self.implements_anndata():
179
- return convert_anndata_to_polars(
180
- self.load_as_anndata(),
181
- index_key=self.index_key,
182
- index_type=self.index_type,
183
- ).lazy()
184
- elif self.implements_pandas():
185
- return convert_pandas_to_polars(
186
- self.load_as_pandas_df(),
187
- index_key=self.index_key,
188
- index_type=self.index_type,
189
- ).lazy()
190
- else:
191
- raise NgioValueError(
192
- "Backend does not implement any of the protocols. "
193
- "A backend should implement at least one of the "
194
- "following protocols: anndata, pandas, polars."
195
- )
168
+ table = self.load()
169
+ return convert_to_polars(
170
+ table,
171
+ index_key=self.index_key,
172
+ index_type=self.index_type,
173
+ )
196
174
 
197
175
  def write_from_pandas(self, table: DataFrame) -> None:
198
176
  """Serialize the table from a pandas DataFrame."""
@@ -230,7 +208,7 @@ class AbstractTableBackend(ABC):
230
208
 
231
209
  def write(
232
210
  self,
233
- table: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
211
+ table_data: TabularData,
234
212
  metadata: dict | None = None,
235
213
  mode: Literal["pandas", "anndata", "polars"] | None = None,
236
214
  ) -> None:
@@ -240,15 +218,15 @@ class AbstractTableBackend(ABC):
240
218
  Based on the explicit mode or the type of the table,
241
219
  it will call the appropriate write method.
242
220
  """
243
- if mode == "pandas" or isinstance(table, DataFrame):
244
- self.write_from_pandas(table) # type: ignore[arg-type]
245
- elif mode == "anndata" or isinstance(table, AnnData):
246
- self.write_from_anndata(table) # type: ignore[arg-type]
247
- elif mode == "polars" or isinstance(table, PolarsDataFrame | LazyFrame):
248
- self.write_from_polars(table)
221
+ if mode == "pandas" or isinstance(table_data, DataFrame):
222
+ self.write_from_pandas(table_data) # type: ignore[arg-type]
223
+ elif mode == "anndata" or isinstance(table_data, AnnData):
224
+ self.write_from_anndata(table_data) # type: ignore[arg-type]
225
+ elif mode == "polars" or isinstance(table_data, PolarsDataFrame | LazyFrame):
226
+ self.write_from_polars(table_data)
249
227
  else:
250
228
  raise NgioValueError(
251
- f"Unsupported table type {type(table)}. "
229
+ f"Unsupported table type {type(table_data)}. "
252
230
  "Please specify the mode explicitly. "
253
231
  "Supported serialization modes are: "
254
232
  "'pandas', 'anndata', 'polars'."
@@ -21,7 +21,7 @@ class AnnDataBackend(AbstractTableBackend):
21
21
  @staticmethod
22
22
  def backend_name() -> str:
23
23
  """Return the name of the backend."""
24
- return "anndata_v1"
24
+ return "anndata"
25
25
 
26
26
  @staticmethod
27
27
  def implements_anndata() -> bool:
@@ -44,6 +44,10 @@ class AnnDataBackend(AbstractTableBackend):
44
44
  anndata = normalize_anndata(anndata, index_key=self.index_key)
45
45
  return anndata
46
46
 
47
+ def load(self) -> AnnData:
48
+ """Load the table as an AnnData object."""
49
+ return self.load_as_anndata()
50
+
47
51
  def write_from_anndata(self, table: AnnData) -> None:
48
52
  """Serialize the table from an AnnData object."""
49
53
  full_url = self._group_handler.full_url
@@ -0,0 +1,35 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+
4
+ from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
5
+
6
+
7
+ def write_lf_to_csv(path: str, table: pl.DataFrame) -> None:
8
+ """Write a polars DataFrame to a CSV file."""
9
+ table.write_csv(path)
10
+
11
+
12
+ def write_df_to_csv(path: str, table: pd.DataFrame) -> None:
13
+ """Write a pandas DataFrame to a CSV file."""
14
+ table.to_csv(path, index=False)
15
+
16
+
17
+ class CsvTableBackend(NonZarrBaseBackend):
18
+ """A class to load and write small tables in CSV format."""
19
+
20
+ def __init__(
21
+ self,
22
+ ):
23
+ """Initialize the CsvTableBackend."""
24
+ super().__init__(
25
+ lf_reader=pl.scan_csv,
26
+ df_reader=pd.read_csv,
27
+ lf_writer=write_lf_to_csv,
28
+ df_writer=write_df_to_csv,
29
+ table_name="table.csv",
30
+ )
31
+
32
+ @staticmethod
33
+ def backend_name() -> str:
34
+ """Return the name of the backend."""
35
+ return "csv"
@@ -17,7 +17,7 @@ class JsonTableBackend(AbstractTableBackend):
17
17
  @staticmethod
18
18
  def backend_name() -> str:
19
19
  """Return the name of the backend."""
20
- return "experimental_json_v1"
20
+ return "json"
21
21
 
22
22
  @staticmethod
23
23
  def implements_anndata() -> bool:
@@ -61,6 +61,9 @@ class JsonTableBackend(AbstractTableBackend):
61
61
  )
62
62
  return data_frame
63
63
 
64
+ def load(self) -> DataFrame:
65
+ return self.load_as_pandas_df()
66
+
64
67
  def _write_from_dict(self, table: dict) -> None:
65
68
  """Write the table from a dictionary to the store."""
66
69
  table_group = self._get_table_group()
@@ -1,7 +1,7 @@
1
1
  import io
2
+ from collections.abc import Callable
3
+ from typing import Any
2
4
 
3
- import pandas as pd
4
- import polars as pl
5
5
  from pandas import DataFrame
6
6
  from polars import DataFrame as PolarsDataFrame
7
7
  from polars import LazyFrame
@@ -12,15 +12,22 @@ from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
12
12
  from ngio.utils import NgioFileNotFoundError, NgioValueError
13
13
 
14
14
 
15
- class CsvTableBackend(AbstractTableBackend):
15
+ class NonZarrBaseBackend(AbstractTableBackend):
16
16
  """A class to load and write small tables in CSV format."""
17
17
 
18
- csv_name = "table.csv"
19
-
20
- @staticmethod
21
- def backend_name() -> str:
22
- """Return the name of the backend."""
23
- return "experimental_csv_v1"
18
+ def __init__(
19
+ self,
20
+ df_reader: Callable[[Any], DataFrame],
21
+ lf_reader: Callable[[Any], LazyFrame],
22
+ df_writer: Callable[[str, DataFrame], None],
23
+ lf_writer: Callable[[str, PolarsDataFrame], None],
24
+ table_name: str,
25
+ ):
26
+ self.df_reader = df_reader
27
+ self.lf_reader = lf_reader
28
+ self.df_writer = df_writer
29
+ self.lf_writer = lf_writer
30
+ self.table_name = table_name
24
31
 
25
32
  @staticmethod
26
33
  def implements_anndata() -> bool:
@@ -37,38 +44,58 @@ class CsvTableBackend(AbstractTableBackend):
37
44
  """Whether the handler implements the polars protocol."""
38
45
  return True
39
46
 
47
+ @staticmethod
48
+ def backend_name() -> str:
49
+ """Return the name of the backend."""
50
+ raise NotImplementedError(
51
+ "The backend_name method must be implemented in the subclass."
52
+ )
53
+
40
54
  def _load_from_directory_store(self, reader):
41
55
  """Load the table from a directory store."""
42
56
  url = self._group_handler.full_url
43
57
  if url is None:
58
+ ext = self.table_name.split(".")[-1]
44
59
  raise NgioValueError(
45
- f"Ngio does not support reading a CSV file from a "
60
+ f"Ngio does not support reading a {ext} table from a "
46
61
  f"store of type {type(self._group_handler)}. "
47
62
  "Please make sure to use a compatible "
48
63
  "store like a zarr.DirectoryStore."
49
64
  )
50
- csv_path = f"{url}/{self.csv_name}"
51
- dataframe = reader(csv_path)
65
+ table_path = f"{url}/{self.table_name}"
66
+ dataframe = reader(table_path)
52
67
  return dataframe
53
68
 
54
- def _load_from_fs_store(self, reader):
69
+ def _load_from_fs_store_df(self, reader):
55
70
  """Load the table from an FS store."""
56
- bytes_table = self._group_handler.store.get(self.csv_name)
71
+ path = self._group_handler.group.path
72
+ table_path = f"{path}/{self.table_name}"
73
+ bytes_table = self._group_handler.store.get(table_path)
57
74
  if bytes_table is None:
58
- raise NgioFileNotFoundError(f"No table found at {self.csv_name}. ")
75
+ raise NgioFileNotFoundError(f"No table found at {table_path}. ")
59
76
  dataframe = reader(io.BytesIO(bytes_table))
60
77
  return dataframe
61
78
 
79
+ def _load_from_fs_store_lf(self, reader):
80
+ """Load the table from an FS store."""
81
+ full_url = self._group_handler.full_url
82
+ parquet_path = f"{full_url}/{self.table_name}"
83
+ store_fs = self._group_handler.store.fs # type: ignore
84
+ with store_fs.open(parquet_path, "rb") as f:
85
+ dataframe = reader(f)
86
+ return dataframe
87
+
62
88
  def load_as_pandas_df(self) -> DataFrame:
63
89
  """Load the table as a pandas DataFrame."""
64
90
  store = self._group_handler.store
65
91
  if isinstance(store, DirectoryStore):
66
- dataframe = self._load_from_directory_store(reader=pd.read_csv)
92
+ dataframe = self._load_from_directory_store(reader=self.df_reader)
67
93
  elif isinstance(store, FSStore):
68
- dataframe = self._load_from_fs_store(reader=pd.read_csv)
94
+ dataframe = self._load_from_fs_store_df(reader=self.df_reader)
69
95
  else:
96
+ ext = self.table_name.split(".")[-1]
70
97
  raise NgioValueError(
71
- f"Ngio does not support reading a CSV file from a "
98
+ f"Ngio does not support reading a {ext} table from a "
72
99
  f"store of type {type(store)}. "
73
100
  "Please make sure to use a compatible "
74
101
  "store like a zarr.DirectoryStore or "
@@ -83,16 +110,21 @@ class CsvTableBackend(AbstractTableBackend):
83
110
  )
84
111
  return dataframe
85
112
 
113
+ def load(self) -> DataFrame:
114
+ """Load the table as a pandas DataFrame."""
115
+ return self.load_as_pandas_df()
116
+
86
117
  def load_as_polars_lf(self) -> LazyFrame:
87
118
  """Load the table as a polars LazyFrame."""
88
119
  store = self._group_handler.store
89
120
  if isinstance(store, DirectoryStore):
90
- lazy_frame = self._load_from_directory_store(reader=pl.scan_csv)
121
+ lazy_frame = self._load_from_directory_store(reader=self.lf_reader)
91
122
  elif isinstance(store, FSStore):
92
- lazy_frame = self._load_from_fs_store(reader=pl.scan_csv)
123
+ lazy_frame = self._load_from_fs_store_lf(reader=self.lf_reader)
93
124
  else:
125
+ ext = self.table_name.split(".")[-1]
94
126
  raise NgioValueError(
95
- f"Ngio does not support reading a CSV file from a "
127
+ f"Ngio does not support reading a {ext} from a "
96
128
  f"store of type {type(store)}. "
97
129
  "Please make sure to use a compatible "
98
130
  "store like a zarr.DirectoryStore or "
@@ -117,16 +149,18 @@ class CsvTableBackend(AbstractTableBackend):
117
149
  if isinstance(store, DirectoryStore):
118
150
  full_url = self._group_handler.full_url
119
151
  else:
152
+ ext = self.table_name.split(".")[-1]
120
153
  raise NgioValueError(
121
- f"Ngio does not support writing a CSV file to a "
154
+ f"Ngio does not support writing a {ext} file to a "
122
155
  f"store of type {type(store)}. "
123
156
  "Please make sure to use a compatible "
124
157
  "store like a zarr.DirectoryStore or "
125
158
  "zarr.FSStore."
126
159
  )
127
160
  if full_url is None:
161
+ ext = self.table_name.split(".")[-1]
128
162
  raise NgioValueError(
129
- f"Ngio does not support writing a CSV file to a "
163
+ f"Ngio does not support writing a {ext} file to a "
130
164
  f"store of type {type(store)}. "
131
165
  "Please make sure to use a compatible "
132
166
  "store like a zarr.DirectoryStore or "
@@ -143,8 +177,8 @@ class CsvTableBackend(AbstractTableBackend):
143
177
  reset_index=True,
144
178
  )
145
179
  full_url = self._get_store_url()
146
- csv_path = f"{full_url}/{self.csv_name}"
147
- table.to_csv(csv_path, index=False)
180
+ table_path = f"{full_url}/{self.table_name}"
181
+ self.df_writer(table_path, table)
148
182
 
149
183
  def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
150
184
  """Write the table from a polars DataFrame or LazyFrame."""
@@ -158,5 +192,5 @@ class CsvTableBackend(AbstractTableBackend):
158
192
  table = table.collect()
159
193
 
160
194
  full_url = self._get_store_url()
161
- csv_path = f"{full_url}/{self.csv_name}"
162
- table.write_csv(csv_path)
195
+ table_path = f"{full_url}/{self.table_name}"
196
+ self.lf_writer(table_path, table)
@@ -0,0 +1,47 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+
4
+ from ngio.tables.backends._non_zarr_backends import NonZarrBaseBackend
5
+
6
+
7
+ def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
8
+ """Write a polars DataFrame to a Parquet file."""
9
+ # make categorical into string (for pandas compatibility)
10
+ schema = table.collect_schema()
11
+
12
+ categorical_columns = []
13
+ for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
14
+ if dtype == pl.Categorical:
15
+ categorical_columns.append(name)
16
+
17
+ for col in categorical_columns:
18
+ table = table.with_columns(pl.col(col).cast(pl.Utf8))
19
+
20
+ # write to parquet
21
+ table.write_parquet(path)
22
+
23
+
24
+ def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
25
+ """Write a pandas DataFrame to a Parquet file."""
26
+ table.to_parquet(path, index=False)
27
+
28
+
29
+ class ParquetTableBackend(NonZarrBaseBackend):
30
+ """A class to load and write small tables in Parquet format."""
31
+
32
+ def __init__(
33
+ self,
34
+ ):
35
+ """Initialize the ParquetTableBackend."""
36
+ super().__init__(
37
+ lf_reader=pl.scan_parquet,
38
+ df_reader=pd.read_parquet,
39
+ lf_writer=write_lf_to_parquet,
40
+ df_writer=write_df_to_parquet,
41
+ table_name="table.parquet",
42
+ )
43
+
44
+ @staticmethod
45
+ def backend_name() -> str:
46
+ """Return the name of the backend."""
47
+ return "parquet"
@@ -7,20 +7,22 @@ from pandas import DataFrame
7
7
  from polars import DataFrame as PolarsDataFrame
8
8
  from polars import LazyFrame
9
9
 
10
- from ngio.tables.backends._anndata_v1 import AnnDataBackend
11
- from ngio.tables.backends._csv_v1 import CsvTableBackend
12
- from ngio.tables.backends._json_v1 import JsonTableBackend
10
+ from ngio.tables.backends._anndata import AnnDataBackend
11
+ from ngio.tables.backends._csv import CsvTableBackend
12
+ from ngio.tables.backends._json import JsonTableBackend
13
+ from ngio.tables.backends._parquet import ParquetTableBackend
14
+ from ngio.tables.backends._utils import TabularData
13
15
  from ngio.utils import NgioValueError, ZarrGroupHandler
14
16
 
15
17
 
16
18
  class TableBackendProtocol(Protocol):
17
- def __init__(
19
+ def set_group_handler(
18
20
  self,
19
21
  group_handler: ZarrGroupHandler,
20
22
  index_key: str | None = None,
21
23
  index_type: Literal["int", "str"] | None = None,
22
- ):
23
- """Backend constructor.
24
+ ) -> None:
25
+ """Attach a group handler to the backend.
24
26
 
25
27
  Index keys and index types are used to ensure that the
26
28
  serialization and deserialization of the table
@@ -42,12 +44,21 @@ class TableBackendProtocol(Protocol):
42
44
  """
43
45
  ...
44
46
 
47
+ @property
48
+ def group_handler(self) -> ZarrGroupHandler:
49
+ """Return the group handler."""
50
+ ...
51
+
45
52
  @staticmethod
46
53
  def implements_anndata() -> bool:
47
54
  """Check if the backend implements the anndata protocol.
48
55
 
49
56
  If this is True, the backend should implement the
50
- `load_as_anndata` and `write_from_anndata` methods.
57
+ `write_from_anndata` method.
58
+
59
+ AnnData objects are more complex than DataFrames,
60
+ so if this is true the backend should implement the
61
+ full serialization of the AnnData object.
51
62
 
52
63
  If this is False, these methods should raise a
53
64
  `NotImplementedError`.
@@ -59,7 +70,7 @@ class TableBackendProtocol(Protocol):
59
70
  """Check if the backend implements the pandas protocol.
60
71
 
61
72
  If this is True, the backend should implement the
62
- `load_as_dataframe` and `write_from_dataframe` methods.
73
+ `write_from_dataframe` methods.
63
74
 
64
75
  If this is False, these methods should raise a
65
76
  `NotImplementedError`.
@@ -71,7 +82,7 @@ class TableBackendProtocol(Protocol):
71
82
  """Check if the backend implements the polars protocol.
72
83
 
73
84
  If this is True, the backend should implement the
74
- `load_as_polars` and `write_from_polars` methods.
85
+ `write_from_polars` methods.
75
86
 
76
87
  If this is False, these methods should raise a
77
88
  `NotImplementedError`.
@@ -90,6 +101,16 @@ class TableBackendProtocol(Protocol):
90
101
  """Load the table as a polars LazyFrame."""
91
102
  ...
92
103
 
104
+ def load(self) -> TabularData:
105
+ """The default load method.
106
+
107
+ This method will be default way to load the table
108
+ from the backend. This method should wrap one of the
109
+ `load_as_anndata`, `load_as_dataframe` or `load_as_polars`
110
+ methods depending on the backend implementation.
111
+ """
112
+ ...
113
+
93
114
  def write_from_pandas(self, table: DataFrame) -> None:
94
115
  """Serialize the table from a pandas DataFrame."""
95
116
  ...
@@ -104,7 +125,7 @@ class TableBackendProtocol(Protocol):
104
125
 
105
126
  def write(
106
127
  self,
107
- table: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
128
+ table_data: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
108
129
  metadata: dict[str, str] | None = None,
109
130
  mode: Literal["pandas", "anndata", "polars"] | None = None,
110
131
  ) -> None:
@@ -144,23 +165,20 @@ class ImplementedTableBackends:
144
165
 
145
166
  def get_backend(
146
167
  self,
147
- backend_name: str | None,
168
+ *,
148
169
  group_handler: ZarrGroupHandler,
170
+ backend_name: str = "anndata",
149
171
  index_key: str | None = None,
150
172
  index_type: Literal["int", "str"] | None = None,
151
173
  ) -> TableBackendProtocol:
152
174
  """Try to get a handler for the given store based on the metadata version."""
153
- if backend_name is None:
154
- # Default to anndata since it is currently
155
- # the only backend in use.
156
- backend_name = "anndata_v1"
157
-
158
175
  if backend_name not in self._implemented_backends:
159
176
  raise NgioValueError(f"Table backend {backend_name} not implemented.")
160
- handler = self._implemented_backends[backend_name](
177
+ backend = self._implemented_backends[backend_name]()
178
+ backend.set_group_handler(
161
179
  group_handler=group_handler, index_key=index_key, index_type=index_type
162
180
  )
163
- return handler
181
+ return backend
164
182
 
165
183
  def add_backend(
166
184
  self,
@@ -180,3 +198,6 @@ class ImplementedTableBackends:
180
198
  ImplementedTableBackends().add_backend(AnnDataBackend)
181
199
  ImplementedTableBackends().add_backend(JsonTableBackend)
182
200
  ImplementedTableBackends().add_backend(CsvTableBackend)
201
+ ImplementedTableBackends().add_backend(ParquetTableBackend)
202
+
203
+ TableBackend = Literal["anndata", "json", "csv", "parquet"] | str | TableBackendProtocol