ngio 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,162 @@
1
+ import io
2
+
3
+ import pandas as pd
4
+ import polars as pl
5
+ from pandas import DataFrame
6
+ from polars import DataFrame as PolarsDataFrame
7
+ from polars import LazyFrame
8
+ from zarr.storage import DirectoryStore, FSStore
9
+
10
+ from ngio.tables.backends._abstract_backend import AbstractTableBackend
11
+ from ngio.tables.backends._utils import normalize_pandas_df, normalize_polars_lf
12
+ from ngio.utils import NgioFileNotFoundError, NgioValueError
13
+
14
+
15
+ class CsvTableBackend(AbstractTableBackend):
16
+ """A class to load and write small tables in CSV format."""
17
+
18
+ csv_name = "table.csv"
19
+
20
+ @staticmethod
21
+ def backend_name() -> str:
22
+ """Return the name of the backend."""
23
+ return "experimental_csv_v1"
24
+
25
+ @staticmethod
26
+ def implements_anndata() -> bool:
27
+ """Whether the handler implements the anndata protocol."""
28
+ return False
29
+
30
+ @staticmethod
31
+ def implements_pandas() -> bool:
32
+ """Whether the handler implements the dataframe protocol."""
33
+ return True
34
+
35
+ @staticmethod
36
+ def implements_polars() -> bool:
37
+ """Whether the handler implements the polars protocol."""
38
+ return True
39
+
40
+ def _load_from_directory_store(self, reader):
41
+ """Load the table from a directory store."""
42
+ url = self._group_handler.full_url
43
+ if url is None:
44
+ raise NgioValueError(
45
+ f"Ngio does not support reading a CSV file from a "
46
+ f"store of type {type(self._group_handler)}. "
47
+ "Please make sure to use a compatible "
48
+ "store like a zarr.DirectoryStore."
49
+ )
50
+ csv_path = f"{url}/{self.csv_name}"
51
+ dataframe = reader(csv_path)
52
+ return dataframe
53
+
54
+ def _load_from_fs_store(self, reader):
55
+ """Load the table from an FS store."""
56
+ bytes_table = self._group_handler.store.get(self.csv_name)
57
+ if bytes_table is None:
58
+ raise NgioFileNotFoundError(f"No table found at {self.csv_name}. ")
59
+ dataframe = reader(io.BytesIO(bytes_table))
60
+ return dataframe
61
+
62
+ def load_as_pandas_df(self) -> DataFrame:
63
+ """Load the table as a pandas DataFrame."""
64
+ store = self._group_handler.store
65
+ if isinstance(store, DirectoryStore):
66
+ dataframe = self._load_from_directory_store(reader=pd.read_csv)
67
+ elif isinstance(store, FSStore):
68
+ dataframe = self._load_from_fs_store(reader=pd.read_csv)
69
+ else:
70
+ raise NgioValueError(
71
+ f"Ngio does not support reading a CSV file from a "
72
+ f"store of type {type(store)}. "
73
+ "Please make sure to use a compatible "
74
+ "store like a zarr.DirectoryStore or "
75
+ "zarr.FSStore."
76
+ )
77
+
78
+ dataframe = normalize_pandas_df(
79
+ dataframe,
80
+ index_key=self.index_key,
81
+ index_type=self.index_type,
82
+ reset_index=False,
83
+ )
84
+ return dataframe
85
+
86
+ def load_as_polars_lf(self) -> LazyFrame:
87
+ """Load the table as a polars LazyFrame."""
88
+ store = self._group_handler.store
89
+ if isinstance(store, DirectoryStore):
90
+ lazy_frame = self._load_from_directory_store(reader=pl.scan_csv)
91
+ elif isinstance(store, FSStore):
92
+ lazy_frame = self._load_from_fs_store(reader=pl.scan_csv)
93
+ else:
94
+ raise NgioValueError(
95
+ f"Ngio does not support reading a CSV file from a "
96
+ f"store of type {type(store)}. "
97
+ "Please make sure to use a compatible "
98
+ "store like a zarr.DirectoryStore or "
99
+ "zarr.FSStore."
100
+ )
101
+ if not isinstance(lazy_frame, LazyFrame):
102
+ raise NgioValueError(
103
+ "Table is not a lazy frame. Please report this issue as an ngio bug."
104
+ f" {type(lazy_frame)}"
105
+ )
106
+
107
+ lazy_frame = normalize_polars_lf(
108
+ lazy_frame,
109
+ index_key=self.index_key,
110
+ index_type=self.index_type,
111
+ )
112
+ return lazy_frame
113
+
114
+ def _get_store_url(self) -> str:
115
+ """Get the store URL."""
116
+ store = self._group_handler.store
117
+ if isinstance(store, DirectoryStore):
118
+ full_url = self._group_handler.full_url
119
+ else:
120
+ raise NgioValueError(
121
+ f"Ngio does not support writing a CSV file to a "
122
+ f"store of type {type(store)}. "
123
+ "Please make sure to use a compatible "
124
+ "store like a zarr.DirectoryStore or "
125
+ "zarr.FSStore."
126
+ )
127
+ if full_url is None:
128
+ raise NgioValueError(
129
+ f"Ngio does not support writing a CSV file to a "
130
+ f"store of type {type(store)}. "
131
+ "Please make sure to use a compatible "
132
+ "store like a zarr.DirectoryStore or "
133
+ "zarr.FSStore."
134
+ )
135
+ return full_url
136
+
137
+ def write_from_pandas(self, table: DataFrame) -> None:
138
+ """Write the table from a pandas DataFrame."""
139
+ table = normalize_pandas_df(
140
+ table,
141
+ index_key=self.index_key,
142
+ index_type=self.index_type,
143
+ reset_index=True,
144
+ )
145
+ full_url = self._get_store_url()
146
+ csv_path = f"{full_url}/{self.csv_name}"
147
+ table.to_csv(csv_path, index=False)
148
+
149
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
150
+ """Write the table from a polars DataFrame or LazyFrame."""
151
+ table = normalize_polars_lf(
152
+ table,
153
+ index_key=self.index_key,
154
+ index_type=self.index_type,
155
+ )
156
+
157
+ if isinstance(table, LazyFrame):
158
+ table = table.collect()
159
+
160
+ full_url = self._get_store_url()
161
+ csv_path = f"{full_url}/{self.csv_name}"
162
+ table.write_csv(csv_path)
@@ -1,9 +1,13 @@
1
- from collections.abc import Collection
2
-
3
1
  import pandas as pd
4
2
  from pandas import DataFrame
3
+ from polars import DataFrame as PolarsDataFrame
4
+ from polars import LazyFrame
5
5
 
6
6
  from ngio.tables.backends._abstract_backend import AbstractTableBackend
7
+ from ngio.tables.backends._utils import (
8
+ normalize_pandas_df,
9
+ normalize_polars_lf,
10
+ )
7
11
  from ngio.utils import NgioFileNotFoundError
8
12
 
9
13
 
@@ -12,7 +16,7 @@ class JsonTableBackend(AbstractTableBackend):
12
16
 
13
17
  @staticmethod
14
18
  def backend_name() -> str:
15
- """The name of the backend."""
19
+ """Return the name of the backend."""
16
20
  return "experimental_json_v1"
17
21
 
18
22
  @staticmethod
@@ -21,36 +25,68 @@ class JsonTableBackend(AbstractTableBackend):
21
25
  return False
22
26
 
23
27
  @staticmethod
24
- def implements_dataframe() -> bool:
28
+ def implements_pandas() -> bool:
25
29
  """Whether the handler implements the dataframe protocol."""
26
30
  return True
27
31
 
28
- def load_columns(self) -> list[str]:
29
- """List all labels in the group."""
30
- return list(self.load_as_dataframe().columns)
32
+ @staticmethod
33
+ def implements_polars() -> bool:
34
+ """Whether the handler implements the polars protocol."""
35
+ return True
31
36
 
32
37
  def _get_table_group(self):
38
+ """Get the table group, creating it if it doesn't exist."""
33
39
  try:
34
40
  table_group = self._group_handler.get_group(path="table")
35
41
  except NgioFileNotFoundError:
36
42
  table_group = self._group_handler.group.create_group("table")
37
43
  return table_group
38
44
 
39
- def load_as_dataframe(self, columns: Collection[str] | None = None) -> DataFrame:
40
- """List all labels in the group."""
45
+ def _load_as_pandas_df(self) -> DataFrame:
46
+ """Load the table as a pandas DataFrame."""
41
47
  table_group = self._get_table_group()
42
48
  table_dict = dict(table_group.attrs)
49
+
43
50
  data_frame = pd.DataFrame.from_dict(table_dict)
44
- if columns is not None:
45
- data_frame = data_frame[columns]
46
51
  return data_frame
47
52
 
48
- def write_from_dataframe(
49
- self, table: DataFrame, metadata: dict | None = None
50
- ) -> None:
51
- """Consolidate the metadata in the store."""
53
+ def load_as_pandas_df(self) -> DataFrame:
54
+ """Load the table as a pandas DataFrame."""
55
+ data_frame = self._load_as_pandas_df()
56
+ data_frame = normalize_pandas_df(
57
+ data_frame,
58
+ index_key=self.index_key,
59
+ index_type=self.index_type,
60
+ reset_index=False,
61
+ )
62
+ return data_frame
63
+
64
+ def _write_from_dict(self, table: dict) -> None:
65
+ """Write the table from a dictionary to the store."""
52
66
  table_group = self._get_table_group()
53
67
  table_group.attrs.clear()
54
- table_group.attrs.update(table.to_dict())
55
- if metadata is not None:
56
- self._group_handler.write_attrs(metadata)
68
+ table_group.attrs.update(table)
69
+
70
+ def write_from_pandas(self, table: DataFrame) -> None:
71
+ """Write the table from a pandas DataFrame."""
72
+ table = normalize_pandas_df(
73
+ table,
74
+ index_key=self.index_key,
75
+ index_type=self.index_type,
76
+ reset_index=True,
77
+ )
78
+ table_dict = table.to_dict(orient="list")
79
+ self._write_from_dict(table=table_dict)
80
+
81
+ def write_from_polars(self, table: PolarsDataFrame | LazyFrame) -> None:
82
+ """Write the table from a polars DataFrame or LazyFrame."""
83
+ table = normalize_polars_lf(
84
+ table,
85
+ index_key=self.index_key,
86
+ index_type=self.index_type,
87
+ )
88
+ if isinstance(table, LazyFrame):
89
+ table = table.collect()
90
+
91
+ table_dict = table.to_dict(as_series=False)
92
+ self._write_from_dict(table=table_dict)
@@ -1,12 +1,14 @@
1
1
  """Protocol for table backends handlers."""
2
2
 
3
- from collections.abc import Collection
4
3
  from typing import Literal, Protocol
5
4
 
6
5
  from anndata import AnnData
7
6
  from pandas import DataFrame
7
+ from polars import DataFrame as PolarsDataFrame
8
+ from polars import LazyFrame
8
9
 
9
10
  from ngio.tables.backends._anndata_v1 import AnnDataBackend
11
+ from ngio.tables.backends._csv_v1 import CsvTableBackend
10
12
  from ngio.tables.backends._json_v1 import JsonTableBackend
11
13
  from ngio.utils import NgioValueError, ZarrGroupHandler
12
14
 
@@ -16,33 +18,110 @@ class TableBackendProtocol(Protocol):
16
18
  self,
17
19
  group_handler: ZarrGroupHandler,
18
20
  index_key: str | None = None,
19
- index_type: Literal["int", "str"] = "int",
20
- ): ...
21
+ index_type: Literal["int", "str"] | None = None,
22
+ ):
23
+ """Backend constructor.
24
+
25
+ Index keys and index types are used to ensure that the
26
+ serialization and deserialization of the table
27
+ is consistent across different backends.
28
+
29
+ Making sure that this is consistent is
30
+ a duty of the backend implementations.
31
+ """
32
+ ...
33
+
34
+ @staticmethod
35
+ def backend_name() -> str:
36
+ """Return the name of the backend.
37
+
38
+ As a convention we set name as:
39
+ {backend_name}_v{version}
40
+
41
+ Where the version is a integer.
42
+ """
43
+ ...
21
44
 
22
45
  @staticmethod
23
- def backend_name() -> str: ...
46
+ def implements_anndata() -> bool:
47
+ """Check if the backend implements the anndata protocol.
48
+
49
+ If this is True, the backend should implement the
50
+ `load_as_anndata` and `write_from_anndata` methods.
51
+
52
+ If this is False, these methods should raise a
53
+ `NotImplementedError`.
54
+ """
55
+ ...
24
56
 
25
57
  @staticmethod
26
- def implements_anndata() -> bool: ...
58
+ def implements_pandas() -> bool:
59
+ """Check if the backend implements the pandas protocol.
60
+
61
+ If this is True, the backend should implement the
62
+ `load_as_dataframe` and `write_from_dataframe` methods.
63
+
64
+ If this is False, these methods should raise a
65
+ `NotImplementedError`.
66
+ """
67
+ ...
27
68
 
28
69
  @staticmethod
29
- def implements_dataframe() -> bool: ...
70
+ def implements_polars() -> bool:
71
+ """Check if the backend implements the polars protocol.
72
+
73
+ If this is True, the backend should implement the
74
+ `load_as_polars` and `write_from_polars` methods.
75
+
76
+ If this is False, these methods should raise a
77
+ `NotImplementedError`.
78
+ """
79
+ ...
80
+
81
+ def load_as_anndata(self) -> AnnData:
82
+ """Load the table as an AnnData object."""
83
+ ...
84
+
85
+ def load_as_pandas_df(self) -> DataFrame:
86
+ """Load the table as a pandas DataFrame."""
87
+ ...
30
88
 
31
- def load_columns(self) -> list[str]: ...
89
+ def load_as_polars_lf(self) -> LazyFrame:
90
+ """Load the table as a polars LazyFrame."""
91
+ ...
92
+
93
+ def write_from_pandas(self, table: DataFrame) -> None:
94
+ """Serialize the table from a pandas DataFrame."""
95
+ ...
96
+
97
+ def write_from_anndata(self, table: AnnData) -> None:
98
+ """Serialize the table from an AnnData object."""
99
+ ...
100
+
101
+ def write_from_polars(self, table: LazyFrame | PolarsDataFrame) -> None:
102
+ """Serialize the table from a polars DataFrame or LazyFrame."""
103
+ ...
104
+
105
+ def write(
106
+ self,
107
+ table: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
108
+ metadata: dict[str, str] | None = None,
109
+ mode: Literal["pandas", "anndata", "polars"] | None = None,
110
+ ) -> None:
111
+ """This is a generic write method.
32
112
 
33
- def load_as_anndata(self, columns: Collection[str] | None = None) -> AnnData: ...
113
+ Will call the appropriate write method
114
+ depending on the type of the table.
34
115
 
35
- def load_as_dataframe(
36
- self, columns: Collection[str] | None = None
37
- ) -> DataFrame: ...
116
+ Moreover it will also write the metadata
117
+ if provided, and the backend methadata
38
118
 
39
- def write_from_dataframe(
40
- self, table: DataFrame, metadata: dict | None = None
41
- ) -> None: ...
119
+ the backend should write in the zarr group attributes
120
+ - backend: the backend name (self.backend_name())
121
+ - index_key: the index key
122
+ - index_type: the index type
42
123
 
43
- def write_from_anndata(
44
- self, table: AnnData, metadata: dict | None = None
45
- ) -> None: ...
124
+ """
46
125
 
47
126
 
48
127
  class ImplementedTableBackends:
@@ -68,7 +147,7 @@ class ImplementedTableBackends:
68
147
  backend_name: str | None,
69
148
  group_handler: ZarrGroupHandler,
70
149
  index_key: str | None = None,
71
- index_type: Literal["int", "str"] = "int",
150
+ index_type: Literal["int", "str"] | None = None,
72
151
  ) -> TableBackendProtocol:
73
152
  """Try to get a handler for the given store based on the metadata version."""
74
153
  if backend_name is None:
@@ -100,3 +179,4 @@ class ImplementedTableBackends:
100
179
 
101
180
  ImplementedTableBackends().add_backend(AnnDataBackend)
102
181
  ImplementedTableBackends().add_backend(JsonTableBackend)
182
+ ImplementedTableBackends().add_backend(CsvTableBackend)