ngio 0.2.9__py3-none-any.whl → 0.3.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,20 +7,22 @@ from pandas import DataFrame
7
7
  from polars import DataFrame as PolarsDataFrame
8
8
  from polars import LazyFrame
9
9
 
10
- from ngio.tables.backends._anndata_v1 import AnnDataBackend
11
- from ngio.tables.backends._csv_v1 import CsvTableBackend
12
- from ngio.tables.backends._json_v1 import JsonTableBackend
10
+ from ngio.tables.backends._anndata import AnnDataBackend
11
+ from ngio.tables.backends._csv import CsvTableBackend
12
+ from ngio.tables.backends._json import JsonTableBackend
13
+ from ngio.tables.backends._parquet import ParquetTableBackend
14
+ from ngio.tables.backends._utils import TabularData
13
15
  from ngio.utils import NgioValueError, ZarrGroupHandler
14
16
 
15
17
 
16
18
  class TableBackendProtocol(Protocol):
17
- def __init__(
19
+ def set_group_handler(
18
20
  self,
19
21
  group_handler: ZarrGroupHandler,
20
22
  index_key: str | None = None,
21
23
  index_type: Literal["int", "str"] | None = None,
22
- ):
23
- """Backend constructor.
24
+ ) -> None:
25
+ """Attach a group handler to the backend.
24
26
 
25
27
  Index keys and index types are used to ensure that the
26
28
  serialization and deserialization of the table
@@ -42,12 +44,21 @@ class TableBackendProtocol(Protocol):
42
44
  """
43
45
  ...
44
46
 
47
+ @property
48
+ def group_handler(self) -> ZarrGroupHandler:
49
+ """Return the group handler."""
50
+ ...
51
+
45
52
  @staticmethod
46
53
  def implements_anndata() -> bool:
47
54
  """Check if the backend implements the anndata protocol.
48
55
 
49
56
  If this is True, the backend should implement the
50
- `load_as_anndata` and `write_from_anndata` methods.
57
+ `write_from_anndata` method.
58
+
59
+ AnnData objects are more complex than DataFrames,
60
+ so if this is true the backend should implement the
61
+ full serialization of the AnnData object.
51
62
 
52
63
  If this is False, these methods should raise a
53
64
  `NotImplementedError`.
@@ -59,7 +70,7 @@ class TableBackendProtocol(Protocol):
59
70
  """Check if the backend implements the pandas protocol.
60
71
 
61
72
  If this is True, the backend should implement the
62
- `load_as_dataframe` and `write_from_dataframe` methods.
73
+ `write_from_dataframe` methods.
63
74
 
64
75
  If this is False, these methods should raise a
65
76
  `NotImplementedError`.
@@ -71,7 +82,7 @@ class TableBackendProtocol(Protocol):
71
82
  """Check if the backend implements the polars protocol.
72
83
 
73
84
  If this is True, the backend should implement the
74
- `load_as_polars` and `write_from_polars` methods.
85
+ `write_from_polars` methods.
75
86
 
76
87
  If this is False, these methods should raise a
77
88
  `NotImplementedError`.
@@ -90,6 +101,16 @@ class TableBackendProtocol(Protocol):
90
101
  """Load the table as a polars LazyFrame."""
91
102
  ...
92
103
 
104
+ def load(self) -> TabularData:
105
+ """The default load method.
106
+
107
+ This method will be default way to load the table
108
+ from the backend. This method should wrap one of the
109
+ `load_as_anndata`, `load_as_dataframe` or `load_as_polars`
110
+ methods depending on the backend implementation.
111
+ """
112
+ ...
113
+
93
114
  def write_from_pandas(self, table: DataFrame) -> None:
94
115
  """Serialize the table from a pandas DataFrame."""
95
116
  ...
@@ -104,7 +125,7 @@ class TableBackendProtocol(Protocol):
104
125
 
105
126
  def write(
106
127
  self,
107
- table: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
128
+ table_data: DataFrame | AnnData | PolarsDataFrame | LazyFrame,
108
129
  metadata: dict[str, str] | None = None,
109
130
  mode: Literal["pandas", "anndata", "polars"] | None = None,
110
131
  ) -> None:
@@ -144,23 +165,20 @@ class ImplementedTableBackends:
144
165
 
145
166
  def get_backend(
146
167
  self,
147
- backend_name: str | None,
168
+ *,
148
169
  group_handler: ZarrGroupHandler,
170
+ backend_name: str = "anndata",
149
171
  index_key: str | None = None,
150
172
  index_type: Literal["int", "str"] | None = None,
151
173
  ) -> TableBackendProtocol:
152
174
  """Try to get a handler for the given store based on the metadata version."""
153
- if backend_name is None:
154
- # Default to anndata since it is currently
155
- # the only backend in use.
156
- backend_name = "anndata_v1"
157
-
158
175
  if backend_name not in self._implemented_backends:
159
176
  raise NgioValueError(f"Table backend {backend_name} not implemented.")
160
- handler = self._implemented_backends[backend_name](
177
+ backend = self._implemented_backends[backend_name]()
178
+ backend.set_group_handler(
161
179
  group_handler=group_handler, index_key=index_key, index_type=index_type
162
180
  )
163
- return handler
181
+ return backend
164
182
 
165
183
  def add_backend(
166
184
  self,
@@ -180,3 +198,6 @@ class ImplementedTableBackends:
180
198
  ImplementedTableBackends().add_backend(AnnDataBackend)
181
199
  ImplementedTableBackends().add_backend(JsonTableBackend)
182
200
  ImplementedTableBackends().add_backend(CsvTableBackend)
201
+ ImplementedTableBackends().add_backend(ParquetTableBackend)
202
+
203
+ TableBackend = Literal["anndata", "json", "csv", "parquet"] | str | TableBackendProtocol
@@ -9,7 +9,6 @@ These functions are used to validate and normalize the tables
9
9
  to ensure that conversion between formats is consistent.
10
10
  """
11
11
 
12
- # %%
13
12
  from copy import deepcopy
14
13
  from typing import Literal
15
14
 
@@ -24,6 +23,8 @@ from polars import LazyFrame
24
23
 
25
24
  from ngio.utils import NgioTableValidationError, NgioValueError
26
25
 
26
+ TabularData = AnnData | DataFrame | PolarsDataFrame | LazyFrame
27
+
27
28
  # -----------------
28
29
  # Validation utils
29
30
  # -----------------
@@ -460,3 +461,148 @@ def convert_polars_to_anndata(
460
461
  pandas_df,
461
462
  index_key=index_key,
462
463
  )
464
+
465
+
466
+ # -----------------
467
+ # Conversion functions
468
+ # -----------------
469
+
470
+
471
+ def normalize_table(
472
+ table_data: TabularData,
473
+ index_key: str | None = None,
474
+ index_type: Literal["int", "str"] | None = None,
475
+ ) -> TabularData:
476
+ """Normalize a table to a specific format.
477
+
478
+ Args:
479
+ table_data (TabularData): The table to normalize.
480
+ index_key (str | None): The column name to use as the index of the DataFrame.
481
+ Default is None.
482
+ index_type (str | None): The type of the index column in the DataFrame.
483
+ Either 'str' or 'int'. Default is None.
484
+
485
+ Returns:
486
+ DataFrame | AnnData | PolarsDataFrame | LazyFrame: Normalized table.
487
+ """
488
+ if isinstance(table_data, DataFrame):
489
+ return normalize_pandas_df(
490
+ table_data,
491
+ index_key=index_key,
492
+ index_type=index_type,
493
+ reset_index=False,
494
+ )
495
+ if isinstance(table_data, AnnData):
496
+ return normalize_anndata(table_data, index_key=index_key)
497
+ if isinstance(table_data, PolarsDataFrame) or isinstance(table_data, LazyFrame):
498
+ return normalize_polars_lf(
499
+ table_data,
500
+ index_key=index_key,
501
+ index_type=index_type,
502
+ )
503
+ raise NgioValueError(f"Unsupported table type: {type(table_data)}")
504
+
505
+
506
+ def convert_to_anndata(
507
+ table_data: TabularData,
508
+ index_key: str | None = None,
509
+ ) -> AnnData:
510
+ """Convert a table to an AnnData object.
511
+
512
+ Args:
513
+ table_data (TabularData): The table to convert.
514
+ index_key (str | None): The column name to use as the index of the DataFrame.
515
+ Default is None.
516
+
517
+ Returns:
518
+ AnnData: Converted AnnData object.
519
+ """
520
+ if isinstance(table_data, AnnData):
521
+ return normalize_anndata(table_data, index_key=index_key)
522
+ if isinstance(table_data, DataFrame):
523
+ return convert_pandas_to_anndata(table_data, index_key=index_key)
524
+ if isinstance(table_data, PolarsDataFrame) or isinstance(table_data, LazyFrame):
525
+ return convert_polars_to_anndata(table_data, index_key=index_key)
526
+ raise NgioValueError(f"Unsupported table type: {type(table_data)}")
527
+
528
+
529
+ def convert_to_pandas(
530
+ table_data: TabularData,
531
+ index_key: str | None = None,
532
+ index_type: Literal["int", "str"] | None = None,
533
+ reset_index: bool = False,
534
+ ) -> DataFrame:
535
+ """Convert a table to a pandas DataFrame.
536
+
537
+ Args:
538
+ table_data (TabularData): The table to convert.
539
+ index_key (str | None): The column name to use as the index of the DataFrame.
540
+ Default is None.
541
+ index_type (str | None): The type of the index column in the DataFrame.
542
+ Either 'str' or 'int'. Default is None.
543
+ reset_index (bool): If True the index will be reset (i.e., the index will be
544
+ converted to a column). If False, the index will be kept as is.
545
+
546
+ Returns:
547
+ DataFrame: Converted pandas DataFrame.
548
+ """
549
+ if isinstance(table_data, DataFrame):
550
+ return normalize_pandas_df(
551
+ table_data,
552
+ index_key=index_key,
553
+ index_type=index_type,
554
+ reset_index=reset_index,
555
+ )
556
+ if isinstance(table_data, AnnData):
557
+ return convert_anndata_to_pandas(
558
+ table_data,
559
+ index_key=index_key,
560
+ index_type=index_type,
561
+ reset_index=reset_index,
562
+ )
563
+ if isinstance(table_data, PolarsDataFrame) or isinstance(table_data, LazyFrame):
564
+ return convert_polars_to_pandas(
565
+ table_data,
566
+ index_key=index_key,
567
+ index_type=index_type,
568
+ reset_index=reset_index,
569
+ )
570
+ raise NgioValueError(f"Unsupported table type: {type(table_data)}")
571
+
572
+
573
+ def convert_to_polars(
574
+ table_data: TabularData,
575
+ index_key: str | None = None,
576
+ index_type: Literal["int", "str"] | None = None,
577
+ ) -> LazyFrame:
578
+ """Convert a table to a polars LazyFrame.
579
+
580
+ Args:
581
+ table_data (TabularData): The table to convert.
582
+ index_key (str | None): The column name to use as the index of the DataFrame.
583
+ Default is None.
584
+ index_type (str | None): The type of the index column in the DataFrame.
585
+ Either 'str' or 'int'. Default is None.
586
+
587
+ Returns:
588
+ LazyFrame: Converted polars LazyFrame.
589
+ """
590
+ if isinstance(table_data, PolarsDataFrame) or isinstance(table_data, LazyFrame):
591
+ return normalize_polars_lf(
592
+ table_data,
593
+ index_key=index_key,
594
+ index_type=index_type,
595
+ )
596
+ if isinstance(table_data, DataFrame):
597
+ return convert_pandas_to_polars(
598
+ table_data,
599
+ index_key=index_key,
600
+ index_type=index_type,
601
+ )
602
+ if isinstance(table_data, AnnData):
603
+ return convert_anndata_to_polars(
604
+ table_data,
605
+ index_key=index_key,
606
+ index_type=index_type,
607
+ )
608
+ raise NgioValueError(f"Unsupported table type: {type(table_data)}")