PyPI - oodeel - Versions diffs - 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

oodeel 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of oodeel might be problematic. Click here for more details.

Files changed (47) hide show

oodeel/__init__.py +1 -1
oodeel/datasets/__init__.py +2 -1
oodeel/datasets/data_handler.py +162 -94
oodeel/datasets/deprecated/DEPRECATED_data_handler.py +236 -0
oodeel/datasets/{ooddataset.py → deprecated/DEPRECATED_ooddataset.py} +14 -13
oodeel/datasets/deprecated/DEPRECATED_tf_data_handler.py +671 -0
oodeel/datasets/deprecated/DEPRECATED_torch_data_handler.py +769 -0
oodeel/datasets/deprecated/__init__.py +31 -0
oodeel/datasets/tf_data_handler.py +105 -167
oodeel/datasets/torch_data_handler.py +109 -181
oodeel/eval/metrics.py +7 -2
oodeel/eval/plots/features.py +2 -2
oodeel/eval/plots/plotly.py +2 -2
oodeel/extractor/feature_extractor.py +30 -9
oodeel/extractor/keras_feature_extractor.py +70 -13
oodeel/extractor/torch_feature_extractor.py +120 -33
oodeel/methods/__init__.py +17 -1
oodeel/methods/base.py +103 -17
oodeel/methods/dknn.py +22 -9
oodeel/methods/energy.py +8 -0
oodeel/methods/entropy.py +8 -0
oodeel/methods/gen.py +118 -0
oodeel/methods/gram.py +307 -0
oodeel/methods/mahalanobis.py +14 -12
oodeel/methods/mls.py +8 -0
oodeel/methods/odin.py +8 -0
oodeel/methods/rmds.py +122 -0
oodeel/methods/she.py +197 -0
oodeel/methods/vim.py +5 -5
oodeel/preprocess/__init__.py +31 -0
oodeel/preprocess/tf_preprocess.py +95 -0
oodeel/preprocess/torch_preprocess.py +97 -0
oodeel/utils/operator.py +72 -2
oodeel/utils/tf_operator.py +72 -4
oodeel/utils/tf_training_tools.py +26 -3
oodeel/utils/torch_operator.py +75 -4
oodeel/utils/torch_training_tools.py +31 -2
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info}/METADATA +141 -107
oodeel-0.3.0.dist-info/RECORD +57 -0
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info}/WHEEL +1 -1
tests/tests_tensorflow/tf_methods_utils.py +2 -1
tests/tests_torch/tools_torch.py +9 -9
tests/tests_torch/torch_methods_utils.py +34 -27
tests/tools_operator.py +10 -1
oodeel-0.1.1.dist-info/RECORD +0 -46
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info/licenses}/LICENSE +0 -0
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info}/top_level.txt +0 -0

oodeel/__init__.py CHANGED Viewed

@@ -25,4 +25,4 @@ oodeel
 -------
 """
-__version__ = "0.1.1"
+__version__ = "0.3.0"

oodeel/datasets/__init__.py CHANGED Viewed

@@ -20,4 +20,5 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
-from .ooddataset import OODDataset
+from .data_handler import load_data_handler
+from .deprecated.DEPRECATED_ooddataset import OODDataset

oodeel/datasets/data_handler.py CHANGED Viewed

@@ -20,6 +20,7 @@
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
+import importlib.util
 from abc import ABC
 from abc import abstractmethod
@@ -29,10 +30,45 @@ from ..types import Callable
 from ..types import DatasetType
 from ..types import ItemType
 from ..types import Optional
+from ..types import TensorType
 from ..types import Tuple
 from ..types import Union
+def get_backend():
+    """Detects whether TensorFlow or PyTorch is available and returns
+    the preferred backend."""
+    available_backends = []
+    if importlib.util.find_spec("tensorflow"):
+        available_backends.append("tensorflow")
+    if importlib.util.find_spec("torch"):
+        available_backends.append("torch")
+    if len(available_backends) == 1:
+        return available_backends[0]
+    elif len(available_backends) == 0:
+        raise ImportError("Neither TensorFlow nor PyTorch is installed.")
+    else:
+        raise ImportError(
+            "Both TensorFlow and PyTorch are installed. Please specify the backend."
+        )
+def load_data_handler(backend: str = None):
+    if backend is None:
+        backend = get_backend()
+    if backend == "tensorflow":
+        from .tf_data_handler import TFDataHandler
+        return TFDataHandler()
+    elif backend == "torch":
+        from .torch_data_handler import TorchDataHandler
+        return TorchDataHandler()
 class DataHandler(ABC):
     """
     Class to manage Datasets. The aim is to provide a simple interface
@@ -40,82 +76,122 @@ class DataHandler(ABC):
     having to use library-specific syntax.
     """
-    @classmethod
-    @abstractmethod
-    def load_dataset(
-        cls,
-        dataset_id: Union[ItemType, DatasetType, str],
-        keys: Optional[list] = None,
-        load_kwargs: dict = {},
-    ) -> DatasetType:
-        """Load dataset from different manners
+    def __init__(self):
+        self.backend = None
+        self.channel_order = None
+    def split_by_class(
+        self,
+        dataset: DatasetType,
+        in_labels: Optional[Union[np.ndarray, list]] = None,
+        out_labels: Optional[Union[np.ndarray, list]] = None,
+    ) -> Optional[Tuple[DatasetType]]:
+        """Filter the dataset by assigning ood labels depending on labels
+        value (typically, class id).
         Args:
-            dataset_id (Union[ItemType, DatasetType, str]): dataset identification
-            keys (list, optional): Features keys. If None, assigned as "input_i"
-                for i-th feature. Defaults to None.
-            load_kwargs (dict, optional): Additional loading kwargs. Defaults to {}.
+            in_labels (Optional[Union[np.ndarray, list]], optional): set of labels
+                to be considered as in-distribution. Defaults to None.
+            out_labels (Optional[Union[np.ndarray, list]], optional): set of labels
+                to be considered as out-of-distribution. Defaults to None.
         Returns:
-            DatasetType: dataset
+            Optional[Tuple[OODDataset]]: Tuple of in-distribution and
+                out-of-distribution OODDatasets
         """
-        raise NotImplementedError()
+        # Make sure the dataset has labels
+        assert (in_labels is not None) or (
+            out_labels is not None
+        ), "specify labels to filter with"
+        assert self.get_item_length(dataset) >= 2, "the dataset has no labels"
-    @staticmethod
-    @abstractmethod
-    def assign_feature_value(
-        dataset: DatasetType, feature_key: str, value: int
-    ) -> DatasetType:
-        """Assign a value to a feature for every sample in a Dataset
+        # Filter the dataset depending on in_labels and out_labels given
+        if (out_labels is not None) and (in_labels is not None):
+            in_data = self.filter_by_value(dataset, "label", in_labels)
+            out_data = self.filter_by_value(dataset, "label", out_labels)
-        Args:
-            dataset (DatasetType): Dataset to assign the value to
-            feature_key (str): Feature to assign the value to
-            value (int): Value to assign
+        if out_labels is None:
+            in_data = self.filter_by_value(dataset, "label", in_labels)
+            out_data = self.filter_by_value(dataset, "label", in_labels, excluded=True)
-        Returns:
-            DatasetType: updated dataset
-        """
-        raise NotImplementedError()
+        elif in_labels is None:
+            in_data = self.filter_by_value(dataset, "label", out_labels, excluded=True)
+            out_data = self.filter_by_value(dataset, "label", out_labels)
-    @staticmethod
+        # Return the filtered OODDatasets
+        return in_data, out_data
+    @classmethod
     @abstractmethod
-    def get_feature_from_ds(dataset: DatasetType, feature_key: str) -> np.ndarray:
-        """Get a feature from a Dataset
+    def prepare(
+        cls,
+        dataset: DatasetType,
+        batch_size: int,
+        preprocess_fn: Optional[Callable] = None,
+        augment_fn: Optional[Callable] = None,
+        columns: Optional[list] = None,
+        shuffle: bool = False,
+        dict_based_fns: bool = True,
+        return_tuple: bool = True,
+        **kwargs_prepare,
+    ) -> DatasetType:
+        """Prepare dataset for scoring or training
         Args:
-            dataset (DatasetType): Dataset to get the feature from
-            feature_key (str): Feature value to get
+            batch_size (int): Batch size
+            preprocess_fn (Callable, optional): Preprocessing function to apply to
+                the dataset. Defaults to None.
+            augment_fn (Callable, optional): Augment function to be used (when the
+                returned dataset is to be used for training). Defaults to None.
+            columns (list, optional): List of columns
+                that will be returned. Keep all columns if None. Defaults to None.
+            shuffle (bool, optional): To shuffle the returned dataset or not.
+                Defaults to False.
+            dict_based_fns (bool): Whether to use preprocess and DA functions as dict
+                based (if True) or as tuple based (if False). Defaults to True.
+            return_tuple (bool, optional): Whether to return each dataset item
+                as a tuple. Defaults to True.
+            kwargs_prepare (dict): Additional parameters to be passed to the
+                data_handler for backend specific preparation.
         Returns:
-            np.ndarray: Feature values for dataset
+            DatasetType: prepared dataset
         """
         raise NotImplementedError()
     @staticmethod
     @abstractmethod
-    def get_ds_feature_keys(dataset: DatasetType) -> list:
-        """Get the feature keys of a Dataset
+    def load_dataset_from_arrays(
+        dataset_id: ItemType, columns: Optional[list] = None
+    ) -> DatasetType:
+        """Load a DatasetType from a np.ndarray / Tensor
         Args:
-            dataset (Dataset): Dataset to get the feature keys from
+            dataset_id (ItemType): numpy array(s) to load.
+            columns (list, optional): Column names to assign. If None,
+                assigned as "input_i" for i-th column. Defaults to None.
         Returns:
-            list: List of feature keys
+            DatasetType
         """
         raise NotImplementedError()
     @staticmethod
     @abstractmethod
-    def has_feature_key(dataset: DatasetType, key: str) -> bool:
-        """Check if a Dataset has a feature denoted by key
+    def load_custom_dataset(
+        dataset_id: DatasetType, columns: Optional[list] = None
+    ) -> DatasetType:
+        """Load a custom dataset by ensuring it is properly formatted.
         Args:
-            dataset (DatasetType): Dataset to check
-            key (str): Key to check
+            dataset_id (DatasetType): dataset
+            columns (list, optional): Column names to use for elements if dataset_id is
+                tuple based. If None, assigned as "input_i"
+                for i-th column. Defaults to None.
         Returns:
-            bool: If the dataset has a feature denoted by key
+            A properly formatted dataset.
         """
         raise NotImplementedError()
@@ -135,21 +211,21 @@ class DataHandler(ABC):
     @staticmethod
     @abstractmethod
-    def filter_by_feature_value(
+    def filter_by_value(
         dataset: DatasetType,
-        feature_key: str,
+        column_name: str,
         values: list,
         excluded: bool = False,
     ) -> DatasetType:
-        """Filter the dataset by checking the value of a feature is in `values`
+        """Filter the dataset by checking the value of a column is in `values`
         Args:
             dataset (Dataset): Dataset to filter
-            feature_key (str): Feature name to check the value
-            values (list): Feature_key values to keep (if excluded is False)
+            column_name (str): Column to filter the dataset with
+            values (list): Column values to keep (if excluded is False)
                 or to exclude
             excluded (bool, optional): To keep (False) or exclude (True) the samples
-                with Feature_key value included in Values. Defaults to False.
+                with column value included in Values. Defaults to False.
         Returns:
             DatasetType: Filtered dataset
@@ -158,79 +234,71 @@ class DataHandler(ABC):
     @staticmethod
     @abstractmethod
-    def merge(
-        id_dataset: DatasetType,
-        ood_dataset: DatasetType,
-        resize: Optional[bool] = False,
-        shape: Optional[Tuple[int]] = None,
-    ) -> DatasetType:
-        """Merge two datasets
+    def get_item_length(dataset: DatasetType) -> int:
+        """Number of elements in a dataset item
         Args:
-            id_dataset (Dataset): dataset of in-distribution data
-            ood_dataset (DictDataset): dataset of out-of-distribution data
-            resize (Optional[bool], optional): toggles if input tensors of the
-                datasets have to be resized to have the same shape. Defaults to True.
-            shape (Optional[Tuple[int]], optional): shape to use for resizing input
-                tensors. If None, the tensors are resized with the shape of the
-                id_dataset input tensors. Defaults to None.
+            dataset (DatasetType): Dataset
         Returns:
-            DatasetType: merged dataset
+            int: Item length
         """
         raise NotImplementedError()
-    @classmethod
+    @staticmethod
     @abstractmethod
-    def prepare_for_training(
-        cls,
-        dataset: DatasetType,
-        batch_size: int,
-        shuffle: bool = False,
-        preprocess_fn: Optional[Callable] = None,
-        augment_fn: Optional[Callable] = None,
-        output_keys: list = ["input", "label"],
-    ) -> DatasetType:
-        """Prepare a dataset for training
+    def get_dataset_length(dataset: DatasetType) -> int:
+        """Number of items in a dataset
         Args:
-            dataset (DictDataset): Dataset to prepare
-            batch_size (int): Batch size
-            shuffle (bool): Wether to shuffle the dataloader or not
-            preprocess_fn (Callable, optional): Preprocessing function to apply to
-                the dataset. Defaults to None.
-            augment_fn (Callable, optional): Augment function to be used (when the
-                returned dataset is to be used for training). Defaults to None.
-            output_keys (list): List of keys corresponding to the features that will be
-                returned. Keep all features if None. Defaults to None.
+            dataset (DatasetType): Dataset
         Returns:
-            DatasetType: prepared dataset / dataloader
+            int: Dataset length
         """
         raise NotImplementedError()
     @staticmethod
     @abstractmethod
-    def get_item_length(dataset: DatasetType) -> int:
-        """Number of elements in a dataset item
+    def get_column_elements_shape(
+        dataset: DatasetType, column_name: Union[str, int]
+    ) -> tuple:
+        """Get the shape of the elements of a column of dataset identified by
+        column_name
         Args:
-            dataset (DatasetType): Dataset
+            dataset (Dataset): a Dataset
+            column_name (Union[str, int]): The column name to get
+                the element shape from.
         Returns:
-            int: Item length
+            tuple: the shape of an element from column_name
         """
         raise NotImplementedError()
     @staticmethod
     @abstractmethod
-    def get_dataset_length(dataset: DatasetType) -> int:
-        """Number of items in a dataset
+    def get_input_from_dataset_item(elem: ItemType) -> TensorType:
+        """Get the tensor that is to be feed as input to a model from a dataset element.
         Args:
-            dataset (DatasetType): Dataset
+            elem (ItemType): dataset element to extract input from
         Returns:
-            int: Dataset length
+            TensorType: Input tensor
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def get_label_from_dataset_item(item: ItemType):
+        """Retrieve label tensor from item as a tuple/list. Label must be at index 1
+        in the item tuple. If one-hot encoded, labels are converted to single value.
+        Args:
+            elem (ItemType): dataset element to extract label from
+        Returns:
+            Any: Label tensor
         """
         raise NotImplementedError()

oodeel/datasets/deprecated/DEPRECATED_data_handler.py ADDED Viewed

@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
+# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
+# CRIAQ and ANITI - https://www.deel.ai/
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from abc import ABC
+from abc import abstractmethod
+import numpy as np
+from ...types import Callable
+from ...types import DatasetType
+from ...types import ItemType
+from ...types import Optional
+from ...types import Tuple
+from ...types import Union
+class DataHandler(ABC):
+    """
+    Class to manage Datasets. The aim is to provide a simple interface
+    for working with datasets (torch, tensorflow or other...) and manage them without
+    having to use library-specific syntax.
+    """
+    @classmethod
+    @abstractmethod
+    def load_dataset(
+        cls,
+        dataset_id: Union[ItemType, DatasetType, str],
+        keys: Optional[list] = None,
+        load_kwargs: dict = {},
+    ) -> DatasetType:
+        """Load dataset from different manners
+        Args:
+            dataset_id (Union[ItemType, DatasetType, str]): dataset identification
+            keys (list, optional): Features keys. If None, assigned as "input_i"
+                for i-th feature. Defaults to None.
+            load_kwargs (dict, optional): Additional loading kwargs. Defaults to {}.
+        Returns:
+            DatasetType: dataset
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def assign_feature_value(
+        dataset: DatasetType, feature_key: str, value: int
+    ) -> DatasetType:
+        """Assign a value to a feature for every sample in a Dataset
+        Args:
+            dataset (DatasetType): Dataset to assign the value to
+            feature_key (str): Feature to assign the value to
+            value (int): Value to assign
+        Returns:
+            DatasetType: updated dataset
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def get_feature_from_ds(dataset: DatasetType, feature_key: str) -> np.ndarray:
+        """Get a feature from a Dataset
+        Args:
+            dataset (DatasetType): Dataset to get the feature from
+            feature_key (str): Feature value to get
+        Returns:
+            np.ndarray: Feature values for dataset
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def get_ds_feature_keys(dataset: DatasetType) -> list:
+        """Get the feature keys of a Dataset
+        Args:
+            dataset (Dataset): Dataset to get the feature keys from
+        Returns:
+            list: List of feature keys
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def has_feature_key(dataset: DatasetType, key: str) -> bool:
+        """Check if a Dataset has a feature denoted by key
+        Args:
+            dataset (DatasetType): Dataset to check
+            key (str): Key to check
+        Returns:
+            bool: If the dataset has a feature denoted by key
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def map_ds(dataset: DatasetType, map_fn: Callable) -> DatasetType:
+        """Map a function to a Dataset
+        Args:
+            dataset (DatasetType): Dataset to map the function to
+            map_fn (Callable): Function to map
+        Returns:
+            DatasetType: Mapped dataset
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def filter_by_feature_value(
+        dataset: DatasetType,
+        feature_key: str,
+        values: list,
+        excluded: bool = False,
+    ) -> DatasetType:
+        """Filter the dataset by checking the value of a feature is in `values`
+        Args:
+            dataset (Dataset): Dataset to filter
+            feature_key (str): Feature name to check the value
+            values (list): Feature_key values to keep (if excluded is False)
+                or to exclude
+            excluded (bool, optional): To keep (False) or exclude (True) the samples
+                with Feature_key value included in Values. Defaults to False.
+        Returns:
+            DatasetType: Filtered dataset
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def merge(
+        id_dataset: DatasetType,
+        ood_dataset: DatasetType,
+        resize: Optional[bool] = False,
+        shape: Optional[Tuple[int]] = None,
+    ) -> DatasetType:
+        """Merge two datasets
+        Args:
+            id_dataset (Dataset): dataset of in-distribution data
+            ood_dataset (DictDataset): dataset of out-of-distribution data
+            resize (Optional[bool], optional): toggles if input tensors of the
+                datasets have to be resized to have the same shape. Defaults to True.
+            shape (Optional[Tuple[int]], optional): shape to use for resizing input
+                tensors. If None, the tensors are resized with the shape of the
+                id_dataset input tensors. Defaults to None.
+        Returns:
+            DatasetType: merged dataset
+        """
+        raise NotImplementedError()
+    @classmethod
+    @abstractmethod
+    def prepare_for_training(
+        cls,
+        dataset: DatasetType,
+        batch_size: int,
+        shuffle: bool = False,
+        preprocess_fn: Optional[Callable] = None,
+        augment_fn: Optional[Callable] = None,
+        output_keys: list = ["input", "label"],
+    ) -> DatasetType:
+        """Prepare a dataset for training
+        Args:
+            dataset (DictDataset): Dataset to prepare
+            batch_size (int): Batch size
+            shuffle (bool): Wether to shuffle the dataloader or not
+            preprocess_fn (Callable, optional): Preprocessing function to apply to
+                the dataset. Defaults to None.
+            augment_fn (Callable, optional): Augment function to be used (when the
+                returned dataset is to be used for training). Defaults to None.
+            output_keys (list): List of keys corresponding to the features that will be
+                returned. Keep all features if None. Defaults to None.
+        Returns:
+            DatasetType: prepared dataset / dataloader
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def get_item_length(dataset: DatasetType) -> int:
+        """Number of elements in a dataset item
+        Args:
+            dataset (DatasetType): Dataset
+        Returns:
+            int: Item length
+        """
+        raise NotImplementedError()
+    @staticmethod
+    @abstractmethod
+    def get_dataset_length(dataset: DatasetType) -> int:
+        """Number of items in a dataset
+        Args:
+            dataset (DatasetType): Dataset
+        Returns:
+            int: Dataset length
+        """
+        raise NotImplementedError()

oodeel 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

oodeel 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl