ngio 0.2.8__py3-none-any.whl → 0.3.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,15 +5,13 @@ from anndata import AnnData
5
5
  from pandas import DataFrame
6
6
  from polars import DataFrame as PolarsDataFrame
7
7
  from polars import LazyFrame
8
- from pydantic import BaseModel
8
+ from pydantic import BaseModel, ConfigDict
9
9
 
10
10
  from ngio.tables.backends._utils import (
11
- convert_anndata_to_pandas,
12
- convert_anndata_to_polars,
13
- convert_pandas_to_anndata,
14
- convert_pandas_to_polars,
15
- convert_polars_to_anndata,
16
- convert_polars_to_pandas,
11
+ TabularData,
12
+ convert_to_anndata,
13
+ convert_to_pandas,
14
+ convert_to_polars,
17
15
  )
18
16
  from ngio.utils import NgioValueError, ZarrGroupHandler
19
17
 
@@ -21,29 +19,30 @@ from ngio.utils import NgioValueError, ZarrGroupHandler
21
19
  class BackendMeta(BaseModel):
22
20
  """Metadata for the backend."""
23
21
 
24
- backend: str | None = None
22
+ backend: str = "anndata_v1"
25
23
  index_key: str | None = None
26
24
  index_type: Literal["int", "str"] | None = None
27
25
 
26
+ model_config = ConfigDict(extra="allow")
27
+
28
28
 
29
29
  class AbstractTableBackend(ABC):
30
30
  """Abstract class for table backends."""
31
31
 
32
- def __init__(
32
+ def set_group_handler(
33
33
  self,
34
34
  group_handler: ZarrGroupHandler,
35
35
  index_key: str | None = None,
36
36
  index_type: Literal["int", "str"] | None = None,
37
- ):
38
- """Initialize the handler.
37
+ ) -> None:
38
+ """Attach a group handler to the backend.
39
39
 
40
- This is a base class for the table backends protocol.
40
+ Index keys and index types are used to ensure that the
41
+ serialization and deserialization of the table
42
+ is consistent across different backends.
41
43
 
42
- Args:
43
- group_handler (ZarrGroupHandler): An object to handle the Zarr group
44
- containing the table data.
45
- index_key (str): The column name to use as the index of the DataFrame.
46
- index_type (str): The type of the index column in the DataFrame.
44
+ Making sure that this is consistent is
45
+ a duty of the backend implementations.
47
46
  """
48
47
  self._group_handler = group_handler
49
48
  self._index_key = index_key
@@ -67,7 +66,11 @@ class AbstractTableBackend(ABC):
67
66
  """Check if the backend implements the anndata protocol.
68
67
 
69
68
  If this is True, the backend should implement the
70
- `load_as_anndata` and `write_from_anndata` methods.
69
+ `write_from_anndata` method.
70
+
71
+ AnnData objects are more complex than DataFrames,
72
+ so if this is true the backend should implement the
73
+ full serialization of the AnnData object.
71
74
 
72
75
  If this is False, these methods should raise a
73
76
  `NotImplementedError`.
@@ -80,7 +83,7 @@ class AbstractTableBackend(ABC):
80
83
  """Check if the backend implements the pandas protocol.
81
84
 
82
85
  If this is True, the backend should implement the
83
- `load_as_dataframe` and `write_from_dataframe` methods.
86
+ `write_from_dataframe` methods.
84
87
 
85
88
  If this is False, these methods should raise a
86
89
  `NotImplementedError`.
@@ -93,7 +96,7 @@ class AbstractTableBackend(ABC):
93
96
  """Check if the backend implements the polars protocol.
94
97
 
95
98
  If this is True, the backend should implement the
96
- `load_as_polars` and `write_from_polars` methods.
99
+ `write_from_polars` methods.
97
100
 
98
101
  If this is False, these methods should raise a
99
102
  `NotImplementedError`.
@@ -122,6 +125,16 @@ class AbstractTableBackend(ABC):
122
125
  )
123
126
  return self._index_type # type: ignore[return-value]
124
127
 
128
+ @abstractmethod
129
+ def load(self) -> TabularData:
130
+ """Load the table from the store.
131
+
132
+ This is a generic load method.
133
+ Based on the explicit mode or the type of the table,
134
+ it will call the appropriate load method.
135
+ """
136
+ ...
137
+
125
138
  def load_as_anndata(self) -> AnnData:
126
139
  """Load the table as an AnnData object.
127
140
 
@@ -129,70 +142,35 @@ class AbstractTableBackend(ABC):
129
142
  selecting columns is not implemented, because it is not
130
143
  straightforward to do so for an arbitrary AnnData object.
131
144
  """
132
- if self.implements_pandas():
133
- return convert_pandas_to_anndata(
134
- self.load_as_pandas_df(),
135
- index_key=self.index_key,
136
- )
137
- elif self.implements_polars():
138
- return convert_polars_to_anndata(
139
- self.load_as_polars_lf(),
140
- index_key=self.index_key,
141
- )
142
- else:
143
- raise NgioValueError(
144
- "Backend does not implement any of the protocols. "
145
- "A backend should implement at least one of the "
146
- "following protocols: anndata, pandas, polars."
147
- )
145
+ table = self.load()
146
+ return convert_to_anndata(
147
+ table,
148
+ index_key=self.index_key,
149
+ )
148
150
 
149
151
  def load_as_pandas_df(self) -> DataFrame:
150
152
  """Load the table as a pandas DataFrame.
151
153
 
152
154
  If columns are provided, the table should be filtered
153
155
  """
154
- if self.implements_anndata():
155
- return convert_anndata_to_pandas(
156
- self.load_as_anndata(),
157
- index_key=self.index_key,
158
- index_type=self.index_type,
159
- )
160
- elif self.implements_polars():
161
- return convert_polars_to_pandas(
162
- self.load_as_polars_lf(),
163
- index_key=self.index_key,
164
- index_type=self.index_type,
165
- )
166
- else:
167
- raise NgioValueError(
168
- "Backend does not implement any of the protocols. "
169
- "A backend should implement at least one of the "
170
- "following protocols: anndata, pandas, polars."
171
- )
156
+ table = self.load()
157
+ return convert_to_pandas(
158
+ table,
159
+ index_key=self.index_key,
160
+ index_type=self.index_type,
161
+ )
172
162
 
173
163
  def load_as_polars_lf(self) -> LazyFrame:
174
164
  """Load the table as a polars LazyFrame.
175
165
 
176
166
  If columns are provided, the table should be filtered
177
167
  """
178
- if self.implements_anndata():
179
- return convert_anndata_to_polars(
180
- self.load_as_anndata(),
181
- index_key=self.index_key,
182
- index_type=self.index_type,
183
- ).lazy()
184
- elif self.implements_pandas():
185
- return convert_pandas_to_polars(
186
- self.load_as_pandas_df(),
187
- index_key=self.index_key,
188
- index_type=self.index_type,
189
- ).lazy()
190
- else:
191
- raise NgioValueError(
192
- "Backend does not implement any of the protocols. "
193
- "A backend should implement at least one of the "
194
- "following protocols: anndata, pandas, polars."
195
- )
168
+ table = self.load()
169
+ return convert_to_polars(
170
+ table,
171
+ index_key=self.index_key,
172
+ index_type=self.index_type,
173
+ )
196
174
 
197
175
  def write_from_pandas(self, table: DataFrame) -> None:
198
176
  """Serialize the table from a pandas DataFrame."""
@@ -230,7 +208,7 @@ class AbstractTableBackend(ABC):
230
208
 
231
209
  def write(
232
210
  self,
233
- table: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
211
+ table_data: TabularData,
234
212
  metadata: dict | None = None,
235
213
  mode: Literal["pandas", "anndata", "polars"] | None = None,
236
214
  ) -> None:
@@ -240,15 +218,15 @@ class AbstractTableBackend(ABC):
240
218
  Based on the explicit mode or the type of the table,
241
219
  it will call the appropriate write method.
242
220
  """
243
- if mode == "pandas" or isinstance(table, DataFrame):
244
- self.write_from_pandas(table) # type: ignore[arg-type]
245
- elif mode == "anndata" or isinstance(table, AnnData):
246
- self.write_from_anndata(table) # type: ignore[arg-type]
247
- elif mode == "polars" or isinstance(table, PolarsDataFrame | LazyFrame):
248
- self.write_from_polars(table)
221
+ if mode == "pandas" or isinstance(table_data, DataFrame):
222
+ self.write_from_pandas(table_data) # type: ignore[arg-type]
223
+ elif mode == "anndata" or isinstance(table_data, AnnData):
224
+ self.write_from_anndata(table_data) # type: ignore[arg-type]
225
+ elif mode == "polars" or isinstance(table_data, PolarsDataFrame | LazyFrame):
226
+ self.write_from_polars(table_data)
249
227
  else:
250
228
  raise NgioValueError(
251
- f"Unsupported table type {type(table)}. "
229
+ f"Unsupported table type {type(table_data)}. "
252
230
  "Please specify the mode explicitly. "
253
231
  "Supported serialization modes are: "
254
232
  "'pandas', 'anndata', 'polars'."
@@ -44,6 +44,10 @@ class AnnDataBackend(AbstractTableBackend):
44
44
  anndata = normalize_anndata(anndata, index_key=self.index_key)
45
45
  return anndata
46
46
 
47
+ def load(self) -> AnnData:
48
+ """Load the table as an AnnData object."""
49
+ return self.load_as_anndata()
50
+
47
51
  def write_from_anndata(self, table: AnnData) -> None:
48
52
  """Serialize the table from an AnnData object."""
49
53
  full_url = self._group_handler.full_url
@@ -1,162 +1,35 @@
1
- import io
2
-
3
1
  import pandas as pd
4
2
  import polars as pl
5
- from pandas import DataFrame
6
- from polars import DataFrame as PolarsDataFrame
7
- from polars import LazyFrame
8
- from zarr.storage import DirectoryStore, FSStore
9
-
10
- from ngio.tables.backends._abstract_backend import AbstractTableBackend
11
- from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
12
- from ngio.utils import NgioFileNotFoundError, NgioValueError
13
-
14
-
15
- class CsvTableBackend(AbstractTableBackend):
16
- """A class to load and write small tables in CSV format."""
17
-
18
- csv_name = "table.csv"
19
-
20
- @staticmethod
21
- def backend_name() -> str:
22
- """Return the name of the backend."""
23
- return "experimental_csv_v1"
24
3
 
25
- @staticmethod
26
- def implements_anndata() -> bool:
27
- """Whether the handler implements the anndata protocol."""
28
- return False
4
+ from ngio.tables.backends._non_zarr_backends_v1 import NonZarrBaseBackend
29
5
 
30
- @staticmethod
31
- def implements_pandas() -> bool:
32
- """Whether the handler implements the dataframe protocol."""
33
- return True
34
6
 
35
- @staticmethod
36
- def implements_polars() -> bool:
37
- """Whether the handler implements the polars protocol."""
38
- return True
7
+ def write_lf_to_csv(path: str, table: pl.DataFrame) -> None:
8
+ """Write a polars DataFrame to a CSV file."""
9
+ table.write_csv(path)
39
10
 
40
- def _load_from_directory_store(self, reader):
41
- """Load the table from a directory store."""
42
- url = self._group_handler.full_url
43
- if url is None:
44
- raise NgioValueError(
45
- f"Ngio does not support reading a CSV file from a "
46
- f"store of type {type(self._group_handler)}. "
47
- "Please make sure to use a compatible "
48
- "store like a zarr.DirectoryStore."
49
- )
50
- csv_path = f"{url}/{self.csv_name}"
51
- dataframe = reader(csv_path)
52
- return dataframe
53
11
 
54
- def _load_from_fs_store(self, reader):
55
- """Load the table from an FS store."""
56
- bytes_table = self._group_handler.store.get(self.csv_name)
57
- if bytes_table is None:
58
- raise NgioFileNotFoundError(f"No table found at {self.csv_name}. ")
59
- dataframe = reader(io.BytesIO(bytes_table))
60
- return dataframe
12
+ def write_df_to_csv(path: str, table: pd.DataFrame) -> None:
13
+ """Write a pandas DataFrame to a CSV file."""
14
+ table.to_csv(path, index=False)
61
15
 
62
- def load_as_pandas_df(self) -> DataFrame:
63
- """Load the table as a pandas DataFrame."""
64
- store = self._group_handler.store
65
- if isinstance(store, DirectoryStore):
66
- dataframe = self._load_from_directory_store(reader=pd.read_csv)
67
- elif isinstance(store, FSStore):
68
- dataframe = self._load_from_fs_store(reader=pd.read_csv)
69
- else:
70
- raise NgioValueError(
71
- f"Ngio does not support reading a CSV file from a "
72
- f"store of type {type(store)}. "
73
- "Please make sure to use a compatible "
74
- "store like a zarr.DirectoryStore or "
75
- "zarr.FSStore."
76
- )
77
16
 
78
- dataframe = normalize_pandas_df(
79
- dataframe,
80
- index_key=self.index_key,
81
- index_type=self.index_type,
82
- reset_index=False,
83
- )
84
- return dataframe
85
-
86
- def load_as_polars_lf(self) -> LazyFrame:
87
- """Load the table as a polars LazyFrame."""
88
- store = self._group_handler.store
89
- if isinstance(store, DirectoryStore):
90
- lazy_frame = self._load_from_directory_store(reader=pl.scan_csv)
91
- elif isinstance(store, FSStore):
92
- lazy_frame = self._load_from_fs_store(reader=pl.scan_csv)
93
- else:
94
- raise NgioValueError(
95
- f"Ngio does not support reading a CSV file from a "
96
- f"store of type {type(store)}. "
97
- "Please make sure to use a compatible "
98
- "store like a zarr.DirectoryStore or "
99
- "zarr.FSStore."
100
- )
101
- if not isinstance(lazy_frame, LazyFrame):
102
- raise NgioValueError(
103
- "Table is not a lazy frame. Please report this issue as an ngio bug."
104
- f" {type(lazy_frame)}"
105
- )
106
-
107
- lazy_frame = normalize_polars_lf(
108
- lazy_frame,
109
- index_key=self.index_key,
110
- index_type=self.index_type,
111
- )
112
- return lazy_frame
113
-
114
- def _get_store_url(self) -> str:
115
- """Get the store URL."""
116
- store = self._group_handler.store
117
- if isinstance(store, DirectoryStore):
118
- full_url = self._group_handler.full_url
119
- else:
120
- raise NgioValueError(
121
- f"Ngio does not support writing a CSV file to a "
122
- f"store of type {type(store)}. "
123
- "Please make sure to use a compatible "
124
- "store like a zarr.DirectoryStore or "
125
- "zarr.FSStore."
126
- )
127
- if full_url is None:
128
- raise NgioValueError(
129
- f"Ngio does not support writing a CSV file to a "
130
- f"store of type {type(store)}. "
131
- "Please make sure to use a compatible "
132
- "store like a zarr.DirectoryStore or "
133
- "zarr.FSStore."
134
- )
135
- return full_url
136
-
137
- def write_from_pandas(self, table: DataFrame) -> None:
138
- """Write the table from a pandas DataFrame."""
139
- table = normalize_pandas_df(
140
- table,
141
- index_key=self.index_key,
142
- index_type=self.index_type,
143
- reset_index=True,
144
- )
145
- full_url = self._get_store_url()
146
- csv_path = f"{full_url}/{self.csv_name}"
147
- table.to_csv(csv_path, index=False)
17
+ class CsvTableBackend(NonZarrBaseBackend):
18
+ """A class to load and write small tables in CSV format."""
148
19
 
149
- def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
150
- """Write the table from a polars DataFrame or LazyFrame."""
151
- table = normalize_polars_lf(
152
- table,
153
- index_key=self.index_key,
154
- index_type=self.index_type,
20
+ def __init__(
21
+ self,
22
+ ):
23
+ """Initialize the CsvTableBackend."""
24
+ super().__init__(
25
+ lf_reader=pl.scan_csv,
26
+ df_reader=pd.read_csv,
27
+ lf_writer=write_lf_to_csv,
28
+ df_writer=write_df_to_csv,
29
+ table_name="table.csv",
155
30
  )
156
31
 
157
- if isinstance(table, LazyFrame):
158
- table = table.collect()
159
-
160
- full_url = self._get_store_url()
161
- csv_path = f"{full_url}/{self.csv_name}"
162
- table.write_csv(csv_path)
32
+ @staticmethod
33
+ def backend_name() -> str:
34
+ """Return the name of the backend."""
35
+ return "experimental_csv_v1"
@@ -61,6 +61,9 @@ class JsonTableBackend(AbstractTableBackend):
61
61
  )
62
62
  return data_frame
63
63
 
64
+ def load(self) -> DataFrame:
65
+ return self.load_as_pandas_df()
66
+
64
67
  def _write_from_dict(self, table: dict) -> None:
65
68
  """Write the table from a dictionary to the store."""
66
69
  table_group = self._get_table_group()
@@ -0,0 +1,196 @@
1
+ import io
2
+ from collections.abc import Callable
3
+ from typing import Any
4
+
5
+ from pandas import DataFrame
6
+ from polars import DataFrame as PolarsDataFrame
7
+ from polars import LazyFrame
8
+ from zarr.storage import DirectoryStore, FSStore
9
+
10
+ from ngio.tables.backends._abstract_backend import AbstractTableBackend
11
+ from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
12
+ from ngio.utils import NgioFileNotFoundError, NgioValueError
13
+
14
+
15
+ class NonZarrBaseBackend(AbstractTableBackend):
16
+ """A class to load and write small tables in CSV format."""
17
+
18
+ def __init__(
19
+ self,
20
+ df_reader: Callable[[Any], DataFrame],
21
+ lf_reader: Callable[[Any], LazyFrame],
22
+ df_writer: Callable[[str, DataFrame], None],
23
+ lf_writer: Callable[[str, PolarsDataFrame], None],
24
+ table_name: str,
25
+ ):
26
+ self.df_reader = df_reader
27
+ self.lf_reader = lf_reader
28
+ self.df_writer = df_writer
29
+ self.lf_writer = lf_writer
30
+ self.table_name = table_name
31
+
32
+ @staticmethod
33
+ def implements_anndata() -> bool:
34
+ """Whether the handler implements the anndata protocol."""
35
+ return False
36
+
37
+ @staticmethod
38
+ def implements_pandas() -> bool:
39
+ """Whether the handler implements the dataframe protocol."""
40
+ return True
41
+
42
+ @staticmethod
43
+ def implements_polars() -> bool:
44
+ """Whether the handler implements the polars protocol."""
45
+ return True
46
+
47
+ @staticmethod
48
+ def backend_name() -> str:
49
+ """Return the name of the backend."""
50
+ raise NotImplementedError(
51
+ "The backend_name method must be implemented in the subclass."
52
+ )
53
+
54
+ def _load_from_directory_store(self, reader):
55
+ """Load the table from a directory store."""
56
+ url = self._group_handler.full_url
57
+ if url is None:
58
+ ext = self.table_name.split(".")[-1]
59
+ raise NgioValueError(
60
+ f"Ngio does not support reading a {ext} table from a "
61
+ f"store of type {type(self._group_handler)}. "
62
+ "Please make sure to use a compatible "
63
+ "store like a zarr.DirectoryStore."
64
+ )
65
+ table_path = f"{url}/{self.table_name}"
66
+ dataframe = reader(table_path)
67
+ return dataframe
68
+
69
+ def _load_from_fs_store_df(self, reader):
70
+ """Load the table from an FS store."""
71
+ path = self._group_handler.group.path
72
+ table_path = f"{path}/{self.table_name}"
73
+ bytes_table = self._group_handler.store.get(table_path)
74
+ if bytes_table is None:
75
+ raise NgioFileNotFoundError(f"No table found at {table_path}. ")
76
+ dataframe = reader(io.BytesIO(bytes_table))
77
+ return dataframe
78
+
79
+ def _load_from_fs_store_lf(self, reader):
80
+ """Load the table from an FS store."""
81
+ full_url = self._group_handler.full_url
82
+ parquet_path = f"{full_url}/{self.table_name}"
83
+ store_fs = self._group_handler.store.fs # type: ignore
84
+ with store_fs.open(parquet_path, "rb") as f:
85
+ dataframe = reader(f)
86
+ return dataframe
87
+
88
+ def load_as_pandas_df(self) -> DataFrame:
89
+ """Load the table as a pandas DataFrame."""
90
+ store = self._group_handler.store
91
+ if isinstance(store, DirectoryStore):
92
+ dataframe = self._load_from_directory_store(reader=self.df_reader)
93
+ elif isinstance(store, FSStore):
94
+ dataframe = self._load_from_fs_store_df(reader=self.df_reader)
95
+ else:
96
+ ext = self.table_name.split(".")[-1]
97
+ raise NgioValueError(
98
+ f"Ngio does not support reading a {ext} table from a "
99
+ f"store of type {type(store)}. "
100
+ "Please make sure to use a compatible "
101
+ "store like a zarr.DirectoryStore or "
102
+ "zarr.FSStore."
103
+ )
104
+
105
+ dataframe = normalize_pandas_df(
106
+ dataframe,
107
+ index_key=self.index_key,
108
+ index_type=self.index_type,
109
+ reset_index=False,
110
+ )
111
+ return dataframe
112
+
113
+ def load(self) -> DataFrame:
114
+ """Load the table as a pandas DataFrame."""
115
+ return self.load_as_pandas_df()
116
+
117
+ def load_as_polars_lf(self) -> LazyFrame:
118
+ """Load the table as a polars LazyFrame."""
119
+ store = self._group_handler.store
120
+ if isinstance(store, DirectoryStore):
121
+ lazy_frame = self._load_from_directory_store(reader=self.lf_reader)
122
+ elif isinstance(store, FSStore):
123
+ lazy_frame = self._load_from_fs_store_lf(reader=self.lf_reader)
124
+ else:
125
+ ext = self.table_name.split(".")[-1]
126
+ raise NgioValueError(
127
+ f"Ngio does not support reading a {ext} from a "
128
+ f"store of type {type(store)}. "
129
+ "Please make sure to use a compatible "
130
+ "store like a zarr.DirectoryStore or "
131
+ "zarr.FSStore."
132
+ )
133
+ if not isinstance(lazy_frame, LazyFrame):
134
+ raise NgioValueError(
135
+ "Table is not a lazy frame. Please report this issue as an ngio bug."
136
+ f" {type(lazy_frame)}"
137
+ )
138
+
139
+ lazy_frame = normalize_polars_lf(
140
+ lazy_frame,
141
+ index_key=self.index_key,
142
+ index_type=self.index_type,
143
+ )
144
+ return lazy_frame
145
+
146
+ def _get_store_url(self) -> str:
147
+ """Get the store URL."""
148
+ store = self._group_handler.store
149
+ if isinstance(store, DirectoryStore):
150
+ full_url = self._group_handler.full_url
151
+ else:
152
+ ext = self.table_name.split(".")[-1]
153
+ raise NgioValueError(
154
+ f"Ngio does not support writing a {ext} file to a "
155
+ f"store of type {type(store)}. "
156
+ "Please make sure to use a compatible "
157
+ "store like a zarr.DirectoryStore or "
158
+ "zarr.FSStore."
159
+ )
160
+ if full_url is None:
161
+ ext = self.table_name.split(".")[-1]
162
+ raise NgioValueError(
163
+ f"Ngio does not support writing a {ext} file to a "
164
+ f"store of type {type(store)}. "
165
+ "Please make sure to use a compatible "
166
+ "store like a zarr.DirectoryStore or "
167
+ "zarr.FSStore."
168
+ )
169
+ return full_url
170
+
171
+ def write_from_pandas(self, table: DataFrame) -> None:
172
+ """Write the table from a pandas DataFrame."""
173
+ table = normalize_pandas_df(
174
+ table,
175
+ index_key=self.index_key,
176
+ index_type=self.index_type,
177
+ reset_index=True,
178
+ )
179
+ full_url = self._get_store_url()
180
+ table_path = f"{full_url}/{self.table_name}"
181
+ self.df_writer(table_path, table)
182
+
183
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
184
+ """Write the table from a polars DataFrame or LazyFrame."""
185
+ table = normalize_polars_lf(
186
+ table,
187
+ index_key=self.index_key,
188
+ index_type=self.index_type,
189
+ )
190
+
191
+ if isinstance(table, LazyFrame):
192
+ table = table.collect()
193
+
194
+ full_url = self._get_store_url()
195
+ table_path = f"{full_url}/{self.table_name}"
196
+ self.lf_writer(table_path, table)
@@ -0,0 +1,47 @@
1
+ import pandas as pd
2
+ import polars as pl
3
+
4
+ from ngio.tables.backends._non_zarr_backends_v1 import NonZarrBaseBackend
5
+
6
+
7
+ def write_lf_to_parquet(path: str, table: pl.DataFrame) -> None:
8
+ """Write a polars DataFrame to a Parquet file."""
9
+ # make categorical into string (for pandas compatibility)
10
+ schema = table.collect_schema()
11
+
12
+ categorical_columns = []
13
+ for name, dtype in zip(schema.names(), schema.dtypes(), strict=True):
14
+ if dtype == pl.Categorical:
15
+ categorical_columns.append(name)
16
+
17
+ for col in categorical_columns:
18
+ table = table.with_columns(pl.col(col).cast(pl.Utf8))
19
+
20
+ # write to parquet
21
+ table.write_parquet(path)
22
+
23
+
24
+ def write_df_to_parquet(path: str, table: pd.DataFrame) -> None:
25
+ """Write a pandas DataFrame to a Parquet file."""
26
+ table.to_parquet(path, index=False)
27
+
28
+
29
+ class ParquetTableBackend(NonZarrBaseBackend):
30
+ """A class to load and write small tables in Parquet format."""
31
+
32
+ def __init__(
33
+ self,
34
+ ):
35
+ """Initialize the ParquetTableBackend."""
36
+ super().__init__(
37
+ lf_reader=pl.scan_parquet,
38
+ df_reader=pd.read_parquet,
39
+ lf_writer=write_lf_to_parquet,
40
+ df_writer=write_df_to_parquet,
41
+ table_name="table.parquet",
42
+ )
43
+
44
+ @staticmethod
45
+ def backend_name() -> str:
46
+ """Return the name of the backend."""
47
+ return "experimental_parquet_v1"