PyPI - oodeel - Versions diffs - 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

oodeel 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of oodeel might be problematic. Click here for more details.

Files changed (47) hide show

oodeel/__init__.py +1 -1
oodeel/datasets/__init__.py +2 -1
oodeel/datasets/data_handler.py +162 -94
oodeel/datasets/deprecated/DEPRECATED_data_handler.py +236 -0
oodeel/datasets/{ooddataset.py → deprecated/DEPRECATED_ooddataset.py} +14 -13
oodeel/datasets/deprecated/DEPRECATED_tf_data_handler.py +671 -0
oodeel/datasets/deprecated/DEPRECATED_torch_data_handler.py +769 -0
oodeel/datasets/deprecated/__init__.py +31 -0
oodeel/datasets/tf_data_handler.py +105 -167
oodeel/datasets/torch_data_handler.py +109 -181
oodeel/eval/metrics.py +7 -2
oodeel/eval/plots/features.py +2 -2
oodeel/eval/plots/plotly.py +2 -2
oodeel/extractor/feature_extractor.py +30 -9
oodeel/extractor/keras_feature_extractor.py +70 -13
oodeel/extractor/torch_feature_extractor.py +120 -33
oodeel/methods/__init__.py +17 -1
oodeel/methods/base.py +103 -17
oodeel/methods/dknn.py +22 -9
oodeel/methods/energy.py +8 -0
oodeel/methods/entropy.py +8 -0
oodeel/methods/gen.py +118 -0
oodeel/methods/gram.py +307 -0
oodeel/methods/mahalanobis.py +14 -12
oodeel/methods/mls.py +8 -0
oodeel/methods/odin.py +8 -0
oodeel/methods/rmds.py +122 -0
oodeel/methods/she.py +197 -0
oodeel/methods/vim.py +5 -5
oodeel/preprocess/__init__.py +31 -0
oodeel/preprocess/tf_preprocess.py +95 -0
oodeel/preprocess/torch_preprocess.py +97 -0
oodeel/utils/operator.py +72 -2
oodeel/utils/tf_operator.py +72 -4
oodeel/utils/tf_training_tools.py +26 -3
oodeel/utils/torch_operator.py +75 -4
oodeel/utils/torch_training_tools.py +31 -2
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info}/METADATA +141 -107
oodeel-0.3.0.dist-info/RECORD +57 -0
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info}/WHEEL +1 -1
tests/tests_tensorflow/tf_methods_utils.py +2 -1
tests/tests_torch/tools_torch.py +9 -9
tests/tests_torch/torch_methods_utils.py +34 -27
tests/tools_operator.py +10 -1
oodeel-0.1.1.dist-info/RECORD +0 -46
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info/licenses}/LICENSE +0 -0
{oodeel-0.1.1.dist-info → oodeel-0.3.0.dist-info}/top_level.txt +0 -0

oodeel/datasets/deprecated/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
+# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
+# CRIAQ and ANITI - https://www.deel.ai/
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import warnings
+from .DEPRECATED_ooddataset import OODDataset
+warnings.warn(
+    "The 'OODDataset' object is deprecated and will be removed in a future release.",
+    DeprecationWarning,
+    stacklevel=2,
+)

oodeel/datasets/tf_data_handler.py CHANGED Viewed

@@ -22,7 +22,6 @@
 # SOFTWARE.
 from typing import get_args
-import numpy as np
 import tensorflow as tf
 import tensorflow_datasets as tfds
@@ -36,9 +35,10 @@ from .data_handler import DataHandler
 def dict_only_ds(ds_handling_method: Callable) -> Callable:
-    """Decorator to ensure that the dataset is a dict dataset and that the input key
-    matches one of the feature keys. The signature of decorated functions
-    must be function(dataset, *args, **kwargs) with feature_key either in kwargs or
+    """Decorator to ensure that the dataset is a dict dataset and that the column_name
+    given as argument matches one of the column names.
+    matches one of the column names. The signature of decorated functions
+    must be function(dataset, *args, **kwargs) with column_name either in kwargs or
     args[0] when relevant.
@@ -52,19 +52,19 @@ def dict_only_ds(ds_handling_method: Callable) -> Callable:
     def wrapper(dataset: tf.data.Dataset, *args, **kwargs):
         assert isinstance(dataset.element_spec, dict), "dataset elements must be dicts"
-        if "feature_key" in kwargs.keys():
-            feature_key = kwargs["feature_key"]
+        if "column_name" in kwargs.keys():
+            column_name = kwargs["column_name"]
         elif len(args) > 0:
-            feature_key = args[0]
+            column_name = args[0]
-        # If feature_key is provided, check that it is in the dataset feature keys
-        if (len(args) > 0) or ("feature_key" in kwargs):
-            if isinstance(feature_key, str):
-                feature_key = [feature_key]
-            for key in feature_key:
+        # If column_name is provided, check that it is in the dataset column names
+        if (len(args) > 0) or ("column_name" in kwargs):
+            if isinstance(column_name, str):
+                column_name = [column_name]
+            for name in column_name:
                 assert (
-                    key in dataset.element_spec.keys()
-                ), f"The input dataset has no feature names {key}"
+                    name in dataset.element_spec.keys()
+                ), f"The input dataset has no column named {name}"
         return ds_handling_method(dataset, *args, **kwargs)
     return wrapper
@@ -77,45 +77,54 @@ class TFDataHandler(DataHandler):
     tensorflow syntax.
     """
+    def __init__(self) -> None:
+        super().__init__()
+        self.backend = "tensorflow"
+        self.channel_order = "channels_last"
     @classmethod
     def load_dataset(
         cls,
         dataset_id: Union[tf.data.Dataset, ItemType, str],
-        keys: Optional[list] = None,
+        columns: Optional[list] = None,
         load_kwargs: dict = {},
     ) -> tf.data.Dataset:
         """Load dataset from different manners, ensuring to return a dict based
         tf.data.Dataset.
         Args:
-            dataset_id (Any): dataset identification
-            keys (list, optional): Features keys. If None, assigned as "input_i"
-                for i-th feature. Defaults to None.
+            dataset_id (Union[tf.data.Dataset, ItemType, str]): dataset identification.
+            Can be the name of a dataset from tensorflow_datasets, a tf.data.Dataset,
+            or a tuple/dict of np.ndarrays/tf.Tensors.
+            columns (list, optional): Column names. If None, assigned as "input_i"
+                for i-th column. Defaults to None.
             load_kwargs (dict, optional): Additional args for loading from
                 tensorflow_datasets. Defaults to {}.
         Returns:
             tf.data.Dataset: A dict based tf.data.Dataset
         """
+        load_kwargs["as_supervised"] = False
         if isinstance(dataset_id, get_args(ItemType)):
-            dataset = cls.load_dataset_from_arrays(dataset_id, keys)
+            dataset = cls.load_dataset_from_arrays(dataset_id, columns)
         elif isinstance(dataset_id, tf.data.Dataset):
-            dataset = cls.load_custom_dataset(dataset_id, keys)
+            dataset = cls.load_custom_dataset(dataset_id, columns)
         elif isinstance(dataset_id, str):
             dataset = cls.load_from_tensorflow_datasets(dataset_id, load_kwargs)
         return dataset
     @staticmethod
     def load_dataset_from_arrays(
-        dataset_id: ItemType, keys: Optional[list] = None
+        dataset_id: ItemType, columns: Optional[list] = None
     ) -> tf.data.Dataset:
         """Load a tf.data.Dataset from a np.ndarray, a tf.Tensor or a tuple/dict
-        of np.ndarrays/td.Tensors.
+        of np.ndarrays/tf.Tensors.
         Args:
             dataset_id (ItemType): numpy array(s) to load.
-            keys (list, optional): Features keys. If None, assigned as "input_i"
-                for i-th feature. Defaults to None.
+            columns (list, optional): Column names to assign. If None,
+                assigned as "input_i" for i-th column. Defaults to None.
         Returns:
             tf.data.Dataset
@@ -127,7 +136,7 @@ class TFDataHandler(DataHandler):
         # If dataset_id is a tuple, convert it to a dict
         elif isinstance(dataset_id, tuple):
             len_elem = len(dataset_id)
-            if keys is None:
+            if columns is None:
                 if len_elem == 2:
                     dataset_dict = {"input": dataset_id[0], "label": dataset_id[1]}
                 else:
@@ -142,19 +151,19 @@ class TFDataHandler(DataHandler):
                 )
             else:
                 assert (
-                    len(keys) == len_elem
-                ), "Number of keys mismatch with the number of features"
-                dataset_dict = {keys[i]: dataset_id[i] for i in range(len_elem)}
+                    len(columns) == len_elem
+                ), "Number of column names mismatch with the number of columns"
+                dataset_dict = {columns[i]: dataset_id[i] for i in range(len_elem)}
         elif isinstance(dataset_id, dict):
-            if keys is not None:
+            if columns is not None:
                 len_elem = len(dataset_id)
                 assert (
-                    len(keys) == len_elem
-                ), "Number of keys mismatch with the number of features"
-                original_keys = list(dataset_id.keys())
+                    len(columns) == len_elem
+                ), "Number of column names mismatch with the number of columns"
+                original_columns = list(dataset_id.keys())
                 dataset_dict = {
-                    keys[i]: dataset_id[original_keys[i]] for i in range(len_elem)
+                    columns[i]: dataset_id[original_columns[i]] for i in range(len_elem)
                 }
         dataset = tf.data.Dataset.from_tensor_slices(dataset_dict)
@@ -162,14 +171,15 @@ class TFDataHandler(DataHandler):
     @classmethod
     def load_custom_dataset(
-        cls, dataset_id: tf.data.Dataset, keys: Optional[list] = None
+        cls, dataset_id: tf.data.Dataset, columns: Optional[list] = None
     ) -> tf.data.Dataset:
         """Load a custom Dataset by ensuring it has the correct format (dict-based)
         Args:
             dataset_id (tf.data.Dataset): tf.data.Dataset
-            keys (list, optional): Features keys. If None, assigned as "input_i"
-                for i-th feature. Defaults to None.
+            columns (list, optional): Column names to use for elements if dataset_id is
+                tuple based. If None, assigned as "input_i"
+                for i-th column. Defaults to None.
         Returns:
             tf.data.Dataset
@@ -177,22 +187,22 @@ class TFDataHandler(DataHandler):
         # If dataset_id is a tuple based tf.data.dataset, convert it to a dict
         if not isinstance(dataset_id.element_spec, dict):
             len_elem = len(dataset_id.element_spec)
-            if keys is None:
+            if columns is None:
                 print(
-                    "Feature name not found, assigning 'input_i' "
+                    "Column name not found, assigning 'input_i' "
                     "key to the i-th tensor and 'label' key to the last"
                 )
                 if len_elem == 2:
-                    keys = ["input", "label"]
+                    columns = ["input", "label"]
                 else:
-                    keys = [f"input_{i}" for i in range(len_elem)]
-                    keys[-1] = "label"
+                    columns = [f"input_{i}" for i in range(len_elem)]
+                    columns[-1] = "label"
             else:
                 assert (
-                    len(keys) == len_elem
-                ), "Number of keys mismatch with the number of features"
+                    len(columns) == len_elem
+                ), "Number of column names mismatch with the number of columns"
-            dataset_id = cls.tuple_to_dict(dataset_id, keys)
+            dataset_id = cls.tuple_to_dict(dataset_id, columns)
         dataset = dataset_id
         return dataset
@@ -221,30 +231,30 @@ class TFDataHandler(DataHandler):
     @staticmethod
     @dict_only_ds
     def dict_to_tuple(
-        dataset: tf.data.Dataset, keys: Optional[list] = None
+        dataset: tf.data.Dataset, columns: Optional[list] = None
     ) -> tf.data.Dataset:
         """Turn a dict based tf.data.Dataset to a tuple based tf.data.Dataset
         Args:
             dataset (tf.data.Dataset): Dict based tf.data.Dataset
-            keys (list, optional): Features to use for the tuples based
-                tf.data.Dataset. If None, takes all the features. Defaults to None.
+            columns (list, optional): Columns to use for the tuples based
+                tf.data.Dataset. If None, takes all the columns. Defaults to None.
         Returns:
             tf.data.Dataset
         """
-        if keys is None:
-            keys = list(dataset.element_spec.keys())
-        dataset = dataset.map(lambda x: tuple(x[k] for k in keys))
+        if columns is None:
+            columns = list(dataset.element_spec.keys())
+        dataset = dataset.map(lambda x: tuple(x[k] for k in columns))
         return dataset
     @staticmethod
-    def tuple_to_dict(dataset: tf.data.Dataset, keys: list) -> tf.data.Dataset:
+    def tuple_to_dict(dataset: tf.data.Dataset, columns: list) -> tf.data.Dataset:
         """Turn a tuple based tf.data.Dataset to a dict based tf.data.Dataset
         Args:
             dataset (tf.data.Dataset): Tuple based tf.data.Dataset
-            keys (list): Keys to use for the dict based tf.data.Dataset
+            columns (list): Column names to use for the dict based tf.data.Dataset
         Returns:
             tf.data.Dataset
@@ -254,86 +264,28 @@ class TFDataHandler(DataHandler):
         ), "dataset elements must be tuples"
         len_elem = len(dataset.element_spec)
         assert len_elem == len(
-            keys
-        ), "The number of keys must be equal to the number of tuple elements"
+            columns
+        ), "The number of columns must be equal to the number of tuple elements"
         def tuple_to_dict(*inputs):
-            return {keys[i]: inputs[i] for i in range(len_elem)}
+            return {columns[i]: inputs[i] for i in range(len_elem)}
         dataset = dataset.map(tuple_to_dict)
         return dataset
-    @staticmethod
-    def assign_feature_value(
-        dataset: tf.data.Dataset, feature_key: str, value: int
-    ) -> tf.data.Dataset:
-        """Assign a value to a feature for every sample in a tf.data.Dataset
-        Args:
-            dataset (tf.data.Dataset): tf.data.Dataset to assign the value to
-            feature_key (str): Feature to assign the value to
-            value (int): Value to assign
-        Returns:
-            tf.data.Dataset
-        """
-        assert isinstance(dataset.element_spec, dict), "dataset elements must be dicts"
-        def assign_value_to_feature(x):
-            x[feature_key] = value
-            return x
-        dataset = dataset.map(assign_value_to_feature)
-        return dataset
-    @staticmethod
-    @dict_only_ds
-    def get_feature_from_ds(dataset: tf.data.Dataset, feature_key: str) -> np.ndarray:
-        """Get a feature from a tf.data.Dataset
-        !!! note
-            This function can be a bit time consuming since it needs to iterate
-            over the whole dataset.
-        Args:
-            dataset (tf.data.Dataset): tf.data.Dataset to get the feature from
-            feature_key (str): Feature value to get
-        Returns:
-            np.ndarray: Feature values for dataset
-        """
-        features = dataset.map(lambda x: x[feature_key])
-        features = list(features.as_numpy_iterator())
-        features = np.array(features)
-        return features
     @staticmethod
     @dict_only_ds
-    def get_ds_feature_keys(dataset: tf.data.Dataset) -> list:
-        """Get the feature keys of a tf.data.Dataset
+    def get_ds_column_names(dataset: tf.data.Dataset) -> list:
+        """Get the column names of a tf.data.Dataset
         Args:
-            dataset (tf.data.Dataset): tf.data.Dataset to get the feature keys from
+            dataset (tf.data.Dataset): tf.data.Dataset to get the column names from
         Returns:
-            list: List of feature keys
+            list: List of column names
         """
         return list(dataset.element_spec.keys())
-    @staticmethod
-    def has_feature_key(dataset: tf.data.Dataset, key: str) -> bool:
-        """Check if a tf.data.Dataset has a feature denoted by key
-        Args:
-            dataset (tf.data.Dataset): tf.data.Dataset to check
-            key (str): Key to check
-        Returns:
-            bool: If the tf.data.Dataset has a feature denoted by key
-        """
-        assert isinstance(dataset.element_spec, dict), "dataset elements must be dicts"
-        return True if (key in dataset.element_spec.keys()) else False
     @staticmethod
     def map_ds(
         dataset: tf.data.Dataset,
@@ -358,35 +310,35 @@ class TFDataHandler(DataHandler):
     @staticmethod
     @dict_only_ds
-    def filter_by_feature_value(
+    def filter_by_value(
         dataset: tf.data.Dataset,
-        feature_key: str,
+        column_name: str,
         values: list,
         excluded: bool = False,
     ) -> tf.data.Dataset:
-        """Filter a tf.data.Dataset by checking the value of a feature is in 'values'
+        """Filter a tf.data.Dataset by checking if the value of a column is in 'values'
         Args:
             dataset (tf.data.Dataset): tf.data.Dataset to filter
-            feature_key (str): Feature name to check the value
-            values (list): Feature_key values to keep (if excluded is False)
+            column_name (str): Column to filter the dataset with
+            values (list): Column values to keep (if excluded is False)
                 or to exclude
             excluded (bool, optional): To keep (False) or exclude (True) the samples
-                with Feature_key value included in Values. Defaults to False.
+                with Column values included in Values. Defaults to False.
         Returns:
             tf.data.Dataset: Filtered dataset
         """
         # If the labels are one-hot encoded, prepare a function to get the label as int
-        if len(dataset.element_spec[feature_key].shape) > 0:
+        if len(dataset.element_spec[column_name].shape) > 0:
             def get_label_int(elem):
-                return int(tf.argmax(elem[feature_key]))
+                return int(tf.argmax(elem[column_name]))
         else:
             def get_label_int(elem):
-                return elem[feature_key]
+                return elem[column_name]
         def filter_fn(elem):
             value = get_label_int(elem)
@@ -400,15 +352,16 @@ class TFDataHandler(DataHandler):
         return dataset_to_filter
     @classmethod
-    def prepare_for_training(
+    def prepare(
         cls,
         dataset: tf.data.Dataset,
         batch_size: int,
-        shuffle: bool = False,
         preprocess_fn: Optional[Callable] = None,
         augment_fn: Optional[Callable] = None,
-        output_keys: Optional[list] = None,
-        dict_based_fns: bool = False,
+        columns: Optional[list] = None,
+        shuffle: bool = False,
+        dict_based_fns: bool = True,
+        return_tuple: bool = True,
         shuffle_buffer_size: Optional[int] = None,
         prefetch_buffer_size: Optional[int] = None,
         drop_remainder: Optional[bool] = False,
@@ -418,16 +371,18 @@ class TFDataHandler(DataHandler):
         Args:
             dataset (tf.data.Dataset): tf.data.Dataset to prepare
             batch_size (int): Batch size
-            shuffle (bool, optional): To shuffle the returned dataset or not.
-                Defaults to False.
-            preprocess_fn (Callable, optional): Preprocessing function to apply to\
+            preprocess_fn (Callable, optional): Preprocessing function to apply to
                 the dataset. Defaults to None.
-            augment_fn (Callable, optional): Augment function to be used (when the\
+            augment_fn (Callable, optional): Augment function to be used (when the
                 returned dataset is to be used for training). Defaults to None.
-            output_keys (list, optional): List of keys corresponding to the features
-                that will be returned. Keep all features if None. Defaults to None.
-            dict_based_fns (bool, optional): If the augment and preprocess functions are
-                dict based or not. Defaults to False.
+            columns (list, optional): List of column names corresponding to the columns
+                that will be returned. Keep all columns if None. Defaults to None.
+            shuffle (bool, optional): To shuffle the returned dataset or not.
+                Defaults to False.
+            dict_based_fns (bool): Whether to use preprocess and DA functions as dict
+                based (if True) or as tuple based (if False). Defaults to True.
+            return_tuple (bool, optional): Whether to return each dataset item
+                as a tuple. Defaults to True.
             shuffle_buffer_size (int, optional): Size of the shuffle buffer. If None,
                 taken as the number of samples in the dataset. Defaults to None.
             prefetch_buffer_size (Optional[int], optional): Buffer size for prefetch.
@@ -440,9 +395,9 @@ class TFDataHandler(DataHandler):
             tf.data.Dataset: Prepared dataset
         """
         # dict based to tuple based
-        output_keys = output_keys or cls.get_ds_feature_keys(dataset)
+        columns = columns or cls.get_ds_column_names(dataset)
         if not dict_based_fns:
-            dataset = cls.dict_to_tuple(dataset, output_keys)
+            dataset = cls.dict_to_tuple(dataset, columns)
         # preprocess + DA
         if preprocess_fn is not None:
@@ -450,8 +405,8 @@ class TFDataHandler(DataHandler):
         if augment_fn is not None:
             dataset = cls.map_ds(dataset, augment_fn)
-        if dict_based_fns:
-            dataset = cls.dict_to_tuple(dataset, output_keys)
+        if dict_based_fns and return_tuple:
+            dataset = cls.dict_to_tuple(dataset, columns)
         dataset = dataset.cache()
@@ -598,19 +553,21 @@ class TFDataHandler(DataHandler):
             return int(cardinality)
     @staticmethod
-    def get_feature_shape(
-        dataset: tf.data.Dataset, feature_key: Union[str, int]
+    def get_column_elements_shape(
+        dataset: tf.data.Dataset, column_name: Union[str, int]
     ) -> tuple:
-        """Get the shape of a feature of dataset identified by feature_key
+        """Get the shape of the elements of a column of dataset identified by
+        column_name
         Args:
             dataset (tf.data.Dataset): a tf.data.dataset
-            feature_key (Union[str, int]): The identifier of the feature
+            column_name (Union[str, int]): The column name to get
+                the element shape from.
         Returns:
-            tuple: the shape of feature_id
+            tuple: the shape of an element from column_name
         """
-        return tuple(dataset.element_spec[feature_key].shape)
+        return tuple(dataset.element_spec[column_name].shape)
     @staticmethod
     def get_input_from_dataset_item(elem: ItemType) -> TensorType:
@@ -650,22 +607,3 @@ class TFDataHandler(DataHandler):
         if len(label.shape) > 1:
             label = tf.reshape(label, [label.shape[0]])
         return label
-    @staticmethod
-    def get_feature(
-        dataset: tf.data.Dataset, feature_key: Union[str, int]
-    ) -> tf.data.Dataset:
-        """Extract a feature from a dataset
-        Args:
-            dataset (tf.data.Dataset): Dataset to extract the feature from
-            feature_key (Union[str, int]): feature to extract
-        Returns:
-            tf.data.Dataset: dataset built with the extracted feature only
-        """
-        def _get_feature_elem(elem):
-            return elem[feature_key]
-        return dataset.map(_get_feature_elem)

oodeel 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

oodeel 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl