PyPI - ngio - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

ngio 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

ngio/common/_pyramid.py +5 -1
ngio/hcs/plate.py +133 -2
ngio/images/abstract_image.py +1 -0
ngio/images/image.py +42 -0
ngio/images/label.py +15 -7
ngio/images/ome_zarr_container.py +20 -11
ngio/tables/_validators.py +1 -83
ngio/tables/backends/__init__.py +27 -1
ngio/tables/backends/_abstract_backend.py +207 -22
ngio/tables/backends/_anndata_utils.py +3 -109
ngio/tables/backends/_anndata_v1.py +43 -46
ngio/tables/backends/_csv_v1.py +162 -0
ngio/tables/backends/_json_v1.py +54 -18
ngio/tables/backends/_table_backends.py +98 -18
ngio/tables/backends/_utils.py +462 -0
ngio/tables/tables_container.py +14 -3
ngio/tables/v1/_feature_table.py +20 -11
ngio/tables/v1/_generic_table.py +20 -15
ngio/tables/v1/_roi_table.py +15 -12
ngio/utils/_zarr_utils.py +46 -32
{ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/METADATA +3 -1
{ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/RECORD +24 -22
{ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/WHEEL +0 -0
{ngio-0.2.2.dist-info → ngio-0.2.4.dist-info}/licenses/LICENSE +0 -0

ngio/tables/backends/_utils.py ADDED Viewed

@@ -0,0 +1,462 @@
+"""Utility functions for converting between different tables formats.
+The supported formats are:
+- pandas DataFrame
+- polars DataFrame or LazyFrame
+- AnnData
+These functions are used to validate and normalize the tables
+to ensure that conversion between formats is consistent.
+"""
+# %%
+from copy import deepcopy
+from typing import Literal
+import numpy as np
+import pandas as pd
+import pandas.api.types as ptypes
+import polars as pl
+from anndata import AnnData
+from pandas import DataFrame
+from polars import DataFrame as PolarsDataFrame
+from polars import LazyFrame
+from ngio.utils import NgioTableValidationError, NgioValueError
+# -----------------
+# Validation utils
+# -----------------
+def _validate_index_key_df(pandas_df: DataFrame, index_key: str | None) -> DataFrame:
+    """Validate the index key of the pandas DataFrame.
+    Args:
+        pandas_df (DataFrame): The pandas DataFrame to validate.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+    Returns:
+        DataFrame: DataFrame with validated index key.
+    Raises:
+        NgioTableValidationError: If index key is not found in DataFrame.
+    """
+    if index_key is None:
+        return pandas_df
+    if pandas_df.index.name == index_key:
+        return pandas_df
+    if index_key in pandas_df.columns:
+        pandas_df = pandas_df.set_index(index_key)
+        pandas_df.index.name = index_key
+        return pandas_df
+    if pandas_df.index.name is None:
+        pandas_df.index.name = index_key
+        return pandas_df
+    raise NgioTableValidationError(f"Index key '{index_key}' is not found in DataFrame")
+def _validate_cast_index_dtype_df(
+    pandas_df: DataFrame, index_type: str | None
+) -> DataFrame:
+    """Check if the index of the DataFrame has the correct dtype.
+    Args:
+        pandas_df (DataFrame): The pandas DataFrame to validate.
+        index_type (str | None): The type to cast the index to ('str' or 'int').
+    Returns:
+        DataFrame: DataFrame with index of the specified type.
+    Raises:
+        NgioTableValidationError: If index cannot be cast to the specified type.
+        NgioValueError: If index_type is not 'str' or 'int'.
+    """
+    if index_type is None:
+        # Nothing to do
+        return pandas_df
+    if index_type == "str":
+        if ptypes.is_integer_dtype(pandas_df.index):
+            # Convert the int index to string is generally safe
+            pandas_df = pandas_df.set_index(pandas_df.index.astype(str))
+        if not ptypes.is_string_dtype(pandas_df.index):
+            raise NgioTableValidationError(
+                f"Table index must be of string type, got {pandas_df.index.dtype}"
+            )
+    elif index_type == "int":
+        if ptypes.is_string_dtype(pandas_df.index):
+            # Try to convert the string index to int
+            try:
+                pandas_df = pandas_df.set_index(pandas_df.index.astype(int))
+            except ValueError as e:
+                if "invalid literal for int() with base 10" in str(e):
+                    raise NgioTableValidationError(
+                        "Table index must be of integer type, got str."
+                        f" We tried implicit conversion and failed: {e}"
+                    ) from None
+                else:
+                    raise e from e
+        if not ptypes.is_integer_dtype(pandas_df.index):
+            raise NgioTableValidationError(
+                f"Table index must be of integer type, got {pandas_df.index.dtype}"
+            )
+    else:
+        raise NgioValueError(
+            f"Invalid index type '{index_type}'. Must be 'int' or 'str'."
+        )
+    return pandas_df
+def _check_for_mixed_types(series: pd.Series) -> None:
+    """Check if the column has mixed types.
+    Args:
+        series (pd.Series): The pandas Series to check.
+    Raises:
+        NgioTableValidationError: If the column has mixed types.
+    """
+    if series.apply(type).nunique() > 1:  # type: ignore
+        raise NgioTableValidationError(
+            f"Column {series.name} has mixed types: "
+            f"{series.apply(type).unique()}. "  # type: ignore
+            "Type of all elements must be the same."
+        )
+def _check_for_supported_types(series: pd.Series) -> Literal["str", "int", "numeric"]:
+    """Check if the column has supported types.
+    Args:
+        series (pd.Series): The pandas Series to check.
+    Returns:
+        Literal["str", "int", "numeric"]: The type category of the series.
+    Raises:
+        NgioTableValidationError: If the column has unsupported types.
+    """
+    if ptypes.is_string_dtype(series):
+        return "str"
+    if ptypes.is_integer_dtype(series):
+        return "int"
+    if ptypes.is_numeric_dtype(series):
+        return "numeric"
+    raise NgioTableValidationError(
+        f"Column {series.name} has unsupported type: {series.dtype}."
+        " Supported types are string and numerics."
+    )
+# -----------------
+# Normalization functions
+# -----------------
+def normalize_pandas_df(
+    pandas_df: DataFrame,
+    index_key: str | None = None,
+    index_type: Literal["int", "str"] | None = None,
+    reset_index: bool = False,
+) -> DataFrame:
+    """Make sure the DataFrame has the correct index and dtype.
+    Args:
+        pandas_df (DataFrame): The pandas DataFrame to validate.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+        index_type (str | None): The type of the index column in the DataFrame.
+            Either 'str' or 'int'. Default is None.
+        reset_index (bool): If True the index will be reset (i.e. the index will be
+            converted to a column). If False, the index will be kept as is.
+    Returns:
+        DataFrame: Normalized pandas DataFrame.
+    """
+    pandas_df = _validate_index_key_df(pandas_df, index_key)
+    pandas_df = _validate_cast_index_dtype_df(pandas_df, index_type)
+    if pandas_df.index.name is not None:
+        index_key = pandas_df.index.name
+    if reset_index and pandas_df.index.name is not None:
+        pandas_df = pandas_df.reset_index()
+    return pandas_df
+def normalize_polars_lf(
+    polars_lf: LazyFrame | PolarsDataFrame,
+    index_key: str | None = None,
+    index_type: Literal["int", "str"] | None = None,
+) -> LazyFrame:
+    """Validate the polars LazyFrame.
+    Args:
+        polars_lf (LazyFrame | PolarsDataFrame): The polars LazyFrame to validate.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+        index_type (str | None): The type of the index column in the DataFrame.
+            Either 'str' or 'int'. Default is None.
+    Returns:
+        LazyFrame: Normalized polars LazyFrame.
+    Raises:
+        ValueError: If index_key is not found or index_type is invalid.
+    """
+    if index_key is not None:
+        schema = polars_lf.collect_schema()
+        if index_key not in schema:
+            raise NgioTableValidationError(
+                f"Index key '{index_key}' not found in LazyFrame columns."
+            )
+        if index_type is not None:
+            if index_type not in ["int", "str"]:
+                raise NgioTableValidationError(
+                    f"Invalid index type '{index_type}'. Must be 'int' or 'str'."
+                )
+            if index_type == "int" and not schema[index_key].is_integer():
+                polars_lf = polars_lf.with_columns(pl.col(index_key).cast(pl.Int64))
+            elif index_type == "str" and not schema[index_key] == pl.String():
+                polars_lf = polars_lf.with_columns(pl.col(index_key).cast(pl.String()))
+    if isinstance(polars_lf, PolarsDataFrame):
+        polars_lf = polars_lf.lazy()
+    return polars_lf
+def normalize_anndata(
+    anndata: AnnData,
+    index_key: str | None = None,
+) -> AnnData:
+    """Validate the AnnData object.
+    Args:
+        anndata (AnnData): The AnnData object to validate.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+    Returns:
+        AnnData: Normalized AnnData object.
+    """
+    if index_key is None:
+        return anndata
+    obs = _validate_index_key_df(anndata.obs, index_key)
+    obs = _validate_cast_index_dtype_df(obs, "str")
+    if obs.equals(anndata.obs):
+        return anndata
+    anndata = deepcopy(anndata)
+    anndata.obs = obs
+    return anndata
+# -----------------
+# Conversion functions
+# -----------------
+def convert_pandas_to_polars(
+    pandas_df: DataFrame,
+    index_key: str | None = None,
+    index_type: Literal["int", "str"] | None = None,
+) -> LazyFrame:
+    """Convert a pandas DataFrame to a polars LazyFrame.
+    Args:
+        pandas_df (DataFrame): The pandas DataFrame to convert.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+        index_type (str | None): The type of the index column in the DataFrame.
+            Either 'str' or 'int'. Default is None.
+    Returns:
+        LazyFrame: Converted and normalized polars LazyFrame.
+    """
+    pandas_df = normalize_pandas_df(
+        pandas_df,
+        index_key=index_key,
+        index_type=index_type,
+        reset_index=True,
+    )
+    return pl.from_pandas(pandas_df).lazy()
+def convert_polars_to_pandas(
+    polars_df: PolarsDataFrame | LazyFrame,
+    index_key: str | None = None,
+    index_type: Literal["int", "str"] | None = None,
+    reset_index: bool = False,
+) -> DataFrame:
+    """Convert a polars DataFrame or LazyFrame to a pandas DataFrame.
+    Args:
+        polars_df (PolarsDataFrame | LazyFrame): The polars DataFrame or
+            LazyFrame to convert.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+        index_type (str | None): The type of the index column in the DataFrame.
+            Either 'str' or 'int'. Default is None.
+        reset_index (bool): If True the index will be reset (i.e., the index will be
+            converted to a column). If False, the index will be kept as is.
+    Returns:
+        DataFrame: Converted and normalized pandas DataFrame.
+    """
+    if isinstance(polars_df, LazyFrame):
+        polars_df = polars_df.collect()
+    pandas_df = polars_df.to_pandas()
+    pandas_df = normalize_pandas_df(
+        pandas_df,
+        index_key=index_key,
+        index_type=index_type,
+        reset_index=reset_index,
+    )
+    return pandas_df
+def convert_pandas_to_anndata(
+    pandas_df: DataFrame,
+    index_key: str | None = None,
+) -> AnnData:
+    """Convert a pandas DataFrame to an AnnData object.
+    Args:
+        pandas_df (DataFrame): The pandas DataFrame to convert.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+    Returns:
+        AnnData: Converted AnnData object.
+    """
+    pandas_df = normalize_pandas_df(
+        pandas_df,
+        index_key=index_key,
+        index_type="str",
+        reset_index=False,
+    )
+    str_columns, int_columns, num_columns = [], [], []
+    for col_name in pandas_df.columns:
+        column = pandas_df[col_name]
+        _check_for_mixed_types(column)  # Mixed types are not allowed in the table
+        col_type = _check_for_supported_types(
+            column
+        )  # Only string and numeric types are allowed
+        if col_type == "str":
+            str_columns.append(col_name)
+        elif col_type == "int":
+            int_columns.append(col_name)
+        elif col_type == "numeric":
+            num_columns.append(col_name)
+    # Converting all observations to string
+    obs_df = pandas_df[str_columns + int_columns]
+    obs_df.index = pandas_df.index
+    x_df = pandas_df[num_columns]
+    if x_df.dtypes.nunique() > 1:
+        x_df = x_df.astype("float64")
+    if x_df.empty:
+        # If there are no numeric columns, create an empty array
+        # to avoid AnnData failing to create the object
+        x_df = np.zeros((len(obs_df), 0), dtype="float64")
+    return AnnData(X=x_df, obs=obs_df)
+def convert_anndata_to_pandas(
+    anndata: AnnData,
+    index_key: str | None = None,
+    index_type: Literal["int", "str"] | None = None,
+    reset_index: bool = False,
+) -> DataFrame:
+    """Convert an AnnData object to a pandas DataFrame.
+    Args:
+        anndata (AnnData): An AnnData object to convert.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+        index_type (str | None): The type of the index column in the DataFrame.
+            Either 'str' or 'int'. Default is None.
+        reset_index (bool): If True the index will be reset (i.e., the index will be
+            converted to a column). If False, the index will be kept as is.
+    Returns:
+        DataFrame: Converted and normalized pandas DataFrame.
+    """
+    pandas_df = anndata.to_df()
+    pandas_df[anndata.obs_keys()] = anndata.obs
+    pandas_df = normalize_pandas_df(
+        pandas_df,
+        index_key=index_key,
+        index_type=index_type,
+        reset_index=reset_index,
+    )
+    return pandas_df
+def convert_anndata_to_polars(
+    anndata: AnnData,
+    index_key: str | None = None,
+    index_type: Literal["int", "str"] | None = None,
+) -> LazyFrame:
+    """Convert an AnnData object to a polars LazyFrame.
+    Args:
+        anndata (AnnData): An AnnData object to convert.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+        index_type (str | None): The type of the index column in the DataFrame.
+            Either 'str' or 'int'. Default is None.
+    Returns:
+        LazyFrame: Converted and normalized polars LazyFrame.
+    """
+    pandas_df = convert_anndata_to_pandas(
+        anndata,
+        index_key=index_key,
+        index_type=index_type,
+        reset_index=True,
+    )
+    return pl.from_pandas(pandas_df).lazy()
+def convert_polars_to_anndata(
+    polars_df: LazyFrame | PolarsDataFrame,
+    index_key: str | None = None,
+) -> AnnData:
+    """Convert a polars LazyFrame or DataFrame to an AnnData object.
+    Args:
+        polars_df (LazyFrame | PolarsDataFrame): The polars LazyFrame or
+            DataFrame to convert.
+        index_key (str | None): The column name to use as the index of the DataFrame.
+            Default is None.
+    Returns:
+        AnnData: Converted AnnData object.
+    """
+    if isinstance(polars_df, LazyFrame):
+        polars_df = polars_df.collect()
+    pandas_df = polars_df.to_pandas()
+    return convert_pandas_to_anndata(
+        pandas_df,
+        index_key=index_key,
+    )

ngio/tables/tables_container.py CHANGED Viewed

@@ -90,6 +90,7 @@ class ImplementedTables:
         version: str,
         handler: ZarrGroupHandler,
         backend_name: str | None = None,
+        strict: bool = True,
     ) -> Table:
         """Try to get a handler for the given store based on the metadata version."""
         _errors = {}
@@ -102,7 +103,12 @@ class ImplementedTables:
                 )
                 return table
             except Exception as e:
-                _errors[name] = e
+                if strict:
+                    raise NgioValidationError(
+                        f"Could not load table {name} from handler. Error: {e}"
+                    ) from e
+                else:
+                    _errors[name] = e
         # If no table was found, we can try to load the table from a generic table
         try:
             table = GenericTable._from_handler(
@@ -207,7 +213,9 @@ class TablesContainer:
                 filtered_tables.append(table_name)
         return filtered_tables
-    def get(self, name: str, backend_name: str | None = None) -> Table:
+    def get(
+        self, name: str, backend_name: str | None = None, strict: bool = True
+    ) -> Table:
         """Get a label from the group."""
         if name not in self.list():
             raise KeyError(f"Table '{name}' not found in the group.")
@@ -220,6 +228,7 @@ class TablesContainer:
             version=table_version,
             handler=table_handler,
             backend_name=backend_name,
+            strict=strict,
         )
     def add(
@@ -237,7 +246,9 @@ class TablesContainer:
                 "Use overwrite=True to replace it."
             )
-        table_handler = self._group_handler.derive_handler(path=name)
+        table_handler = self._group_handler.derive_handler(
+            path=name, overwrite=overwrite
+        )
         if backend is None:
             backend = table.backend_name

ngio/tables/v1/_feature_table.py CHANGED Viewed

@@ -9,8 +9,8 @@ from typing import Literal
 import pandas as pd
 from pydantic import BaseModel
-from ngio.tables._validators import validate_index_key
-from ngio.tables.backends import ImplementedTableBackends
+from ngio.tables.backends import BackendMeta, ImplementedTableBackends
+from ngio.tables.backends._utils import normalize_pandas_df
 from ngio.utils import NgioValueError, ZarrGroupHandler
@@ -20,12 +20,11 @@ class RegionMeta(BaseModel):
     path: str
-class FeatureTableMeta(BaseModel):
+class FeatureTableMeta(BackendMeta):
     """Metadata for the ROI table."""
     fractal_table_version: Literal["1"] = "1"
     type: Literal["feature_table"] = "feature_table"
-    backend: str | None = None
     region: RegionMeta | None = None
     instance_key: str = "label"
@@ -53,8 +52,11 @@ class FeatureTableV1:
         if dataframe is None:
             self._dataframe = None
         else:
-            self._dataframe = validate_index_key(
-                dataframe, self._instance_key, overwrite=True
+            self._dataframe = normalize_pandas_df(
+                dataframe,
+                index_key=self._instance_key,
+                index_type="int",
+                reset_index=False,
             )
         self._table_backend = None
@@ -107,7 +109,7 @@ class FeatureTableV1:
             )
         if self._dataframe is None and self._table_backend is not None:
-            self._dataframe = self._table_backend.load_as_dataframe()
+            self._dataframe = self._table_backend.load_as_pandas_df()
         if self._dataframe is None:
             raise NgioValueError(
@@ -118,7 +120,12 @@ class FeatureTableV1:
     @dataframe.setter
     def dataframe(self, dataframe: pd.DataFrame) -> None:
         """Set the table as a DataFrame."""
-        self._dataframe = dataframe
+        self._dataframe = normalize_pandas_df(
+            dataframe,
+            index_key=self._instance_key,
+            index_type="int",
+            reset_index=False,
+        )
     @classmethod
     def _from_handler(
@@ -143,7 +150,7 @@ class FeatureTableV1:
             )
             meta.backend = backend_name
-        if not backend.implements_dataframe:
+        if not backend.implements_pandas:
             raise NgioValueError(
                 "The backend does not implement the dataframe protocol."
             )
@@ -177,6 +184,8 @@ class FeatureTableV1:
                 "Please add the table to a OME-Zarr Image before calling consolidate."
             )
-        self._table_backend.write_from_dataframe(
-            self.dataframe, metadata=self._meta.model_dump(exclude_none=True)
+        self._table_backend.write(
+            self.dataframe,
+            metadata=self._meta.model_dump(exclude_none=True),
+            mode="pandas",
         )

ngio/tables/v1/_generic_table.py CHANGED Viewed

@@ -2,22 +2,21 @@
 import pandas as pd
 from anndata import AnnData
-from pydantic import BaseModel
-from ngio.tables.backends import ImplementedTableBackends
-from ngio.tables.backends._anndata_utils import (
-    anndata_to_dataframe,
-    dataframe_to_anndata,
+from ngio.tables.backends import (
+    BackendMeta,
+    ImplementedTableBackends,
+    convert_anndata_to_pandas,
+    convert_pandas_to_anndata,
 )
 from ngio.utils import NgioValueError, ZarrGroupHandler
-class GenericTableMeta(BaseModel):
+class GenericTableMeta(BackendMeta):
     """Metadata for the ROI table."""
     fractal_table_version: str | None = None
     type: str | None = None
-    backend: str | None = None
 class GenericTable:
@@ -88,7 +87,7 @@ class GenericTable:
             return self._dataframe
         if self._anndata is not None:
-            return anndata_to_dataframe(self._anndata)
+            return convert_anndata_to_pandas(self._anndata)
         raise NgioValueError("No table loaded.")
@@ -105,7 +104,9 @@ class GenericTable:
             return self._anndata
         if self._dataframe is not None:
-            return dataframe_to_anndata(self._dataframe)
+            return convert_pandas_to_anndata(
+                self._dataframe,
+            )
         raise NgioValueError("No table loaded.")
     @anndata.setter
@@ -138,8 +139,8 @@ class GenericTable:
             anndata = backend.load_as_anndata()
             table = cls(anndata=anndata)
-        elif backend.implements_dataframe():
-            dataframe = backend.load_as_dataframe()
+        elif backend.implements_pandas():
+            dataframe = backend.load_as_pandas_df()
             table = cls(dataframe=dataframe)
         else:
             raise NgioValueError(
@@ -173,10 +174,14 @@ class GenericTable:
             )
         if self.anndata_native:
-            self._table_backend.write_from_anndata(
-                self.anndata, metadata=self._meta.model_dump(exclude_none=True)
+            self._table_backend.write(
+                self.anndata,
+                metadata=self._meta.model_dump(exclude_none=True),
+                mode="anndata",
             )
         else:
-            self._table_backend.write_from_dataframe(
-                self.dataframe, metadata=self._meta.model_dump(exclude_none=True)
+            self._table_backend.write(
+                self.dataframe,
+                metadata=self._meta.model_dump(exclude_none=True),
+                mode="pandas",
             )

ngio 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

ngio 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl