PyPI - optima-ml - Versions diffs - 0.3.2a4__tar.gz → 0.3.2a5.dev1__tar.gz - Mend

optima-ml 0.3.2a4tar.gz → 0.3.2a5.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{optima-ml-0.3.2a4 → optima_ml-0.3.2a5.dev1}/OPTIMA/builtin/evaluation.py RENAMED Viewed

@@ -578,7 +578,10 @@ def evaluate(
         # for each contribution to the histogram, we need to provide the type of contribution (step or scatter), the
         # index of the reference to use for the ratio subplot, and bin content itself..
         bin_contents_with_type = zip(["step"] * num_classes, [None] * num_classes, bin_contents)
-        colors = sns.color_palette()
+        if (num_classes <= 10 and not explicit_testing_dataset) or num_classes <= 5:
+            colors = sns.color_palette()
+        else:
+            colors = sns.color_palette("husl", num_classes if not explicit_testing_dataset else 2 * num_classes)
         colors_errors = [None] * (num_classes - 1) + ["0.4"]  # only the uppermost contribution should have error bars
         if class_labels is not None:
             legend_labels = class_labels if not binary_classification else class_labels[::-1]

optima_ml-0.3.2a5.dev1/OPTIMA/builtin/search_space.py ADDED Viewed

@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+"""A module that provides functions to handle the search space for the hyperparameter optimization for the build-in multilayer perceptrons."""
+from OPTIMA.core.model import model_config_type
+def get_hp_defaults() -> tuple[model_config_type, model_config_type]:
+    """Provides default values for all hyperparameters needed by the built-in ``build_model`` and ``compile_model``-functions.
+    This function is specific to the built-in ``build_model`` and ``compile_model``-functions for classification using
+    multilayer perceptrons. If they are not overwritten, hyperparameters that are omitted from the search space will be
+    set to their default values. When defining an own ``build_model`` or ``compile_model``-function, this functionality
+    is disabled and the corresponding default values will NOT be added to the search space, thus all necessary
+    hyperparameter are expected to be present in the search space.
+    Returns
+    -------
+    tuple[model_config_type, model_config_type]
+        Dictionaries with the names of all hyperparameters as keys and the corresponding default values as values. The
+        first return value contains the hyperparameters of the built-in ``build_model``-function, the second return value
+        the hyperparameters of the built-in ``compile_model``-function.
+    """
+    hyperparameter_defaults_build = {
+        "num_layers": 3,
+        "units": 32,
+        "activation": "swish",
+        "kernel_initializer": "auto",
+        "bias_initializer": "auto",
+        "l1_lambda": 0.0,
+        "l2_lambda": 0.0,
+        "dropout": 0.1,
+        "batch_size": 64,
+    }
+    hyperparameter_defaults_compile = {
+        "learning_rate": 0.001,
+        "Adam_beta_1": 0.9,
+        "one_minus_Adam_beta_2": 0.001,
+        "Adam_epsilon": 1e-7,
+        "loss_function": "BinaryCrossentropy",
+    }
+    return hyperparameter_defaults_build, hyperparameter_defaults_compile
+def get_hps_to_mutate() -> tuple[list[str], list[str]]:
+    """Provides a list of built-in hyperparameters that allow mutation.
+    This function is specific to the built-in ``build_model`` and ``compile_model``-functions. If either of the two
+    functions are overwritten, the corresponding hyperparameters are not assumed to be mutatable anymore.
+    Returns
+    -------
+    tuple[list[str], list[str]]
+        Two lists of mutatable, built-in hyperparameters. The first return value contains the mutatable hyperparameters
+        of the built-in ``build_model``-function, the second return value the hyperparameters of the built-in
+        ``compile_model``-function.
+    """
+    mutatable_hps_build = ["l1_lambda", "l2_lambda", "dropout", "batch_size"]
+    mutatable_hps_compile = ["learning_rate", "Adam_beta_1", "one_minus_Adam_beta_2", "Adam_epsilon"]
+    return mutatable_hps_build, mutatable_hps_compile

{optima-ml-0.3.2a4 → optima_ml-0.3.2a5.dev1}/OPTIMA/core/evaluation.py RENAMED Viewed

@@ -30,7 +30,7 @@ import OPTIMA.core.model
 import OPTIMA.builtin.evaluation
 import OPTIMA.builtin.inputs
 import OPTIMA.builtin.search_space
-from OPTIMA.core.search_space import tune_search_space_type, run_config_search_space_entry_type
+from OPTIMA.core.search_space import run_config_search_space_entry_type
 def evaluate_experiment(
@@ -39,8 +39,7 @@ def evaluate_experiment(
     run_config: ModuleType,
     optimize_name: str,
     optimize_op: Union[Literal["max"], Literal["min"]],
-    search_space: tune_search_space_type,
-    run_config_search_space: dict[str, run_config_search_space_entry_type],
+    search_space: dict[str, run_config_search_space_entry_type],
     results_dir: str,
     inputs_split: list[ray.ObjectRef],
     targets_split: list[ray.ObjectRef],
@@ -127,9 +126,7 @@ def evaluate_experiment(
         Name of the target metric.
     optimize_op : Union[Literal["max"], Literal["min"]]
         Specifies if the target metric is to be maximized or minimized. Can be either ``'max'`` or ``'min'``.
-    search_space : tune_search_space_type
-        The tune search space provided to the Tuner.
-    run_config_search_space : dict[str, run_config_search_space_entry_type]
+    search_space : dict[str, run_config_search_space_entry_type]
         The search space as defined in the run-config.
     results_dir : str
         Path to the directory where the results are to be saved.
@@ -350,12 +347,6 @@ def evaluate_experiment(
                 all_model_configs, trial_ids = pickle.load(file)
         # start with results from the best metric values
-        # get the hp limit and rounding function
-        if hasattr(run_config, "limit_and_round_hyperparameters"):
-            limit_and_round_hps = run_config.limit_and_round_hyperparameters
-        else:
-            limit_and_round_hps = OPTIMA.builtin.search_space.limit_and_round_hyperparameters
         dirs_to_evaluate = (
             []
         )  # will contain the paths to the directories containing the models to evaluate (best model from optimization + crossvalidation models)
@@ -375,7 +366,7 @@ def evaluate_experiment(
             # round the config where necessary (in the same way as during the optimization), and add the hyperparameters to
             # the dataframe containing the configs of the best trials
-            model_config = limit_and_round_hps(model_configs_to_evaluate[-1])
+            model_config = model_configs_to_evaluate[-1]
             for hp in model_configs_df.index:
                 model_configs_df.loc[hp, metric] = model_config[hp]
@@ -401,7 +392,7 @@ def evaluate_experiment(
                         ] = trial_id  # best_trail is full path to the optimization folder while trails in trial_list as only the names
                         break
                 model_configs_to_evaluate.append(model_config_to_evaluate)
-                model_config = limit_and_round_hps(model_configs_to_evaluate[-1])
+                model_config = model_configs_to_evaluate[-1]
                 for hp in model_configs_df.index:
                     model_configs_df.loc[hp, f"{metric} fit"] = model_config[hp]
                 target_folder = os.path.join(results_dir, metric if len(best_trials_fit.index) > 1 else "", "best_fit")
@@ -571,10 +562,7 @@ def evaluate_experiment(
         optimization_str += "input variables: {}\n\n".format(", ".join(input_handler.get_vars()))
         optimization_str += "search space:\n"
         for hp in search_space.keys():
-            if hp in run_config_search_space.keys():
-                optimization_str += f"\t{hp}: {run_config_search_space[hp]}\n"
-            else:
-                optimization_str += f"\t{hp}: {search_space[hp]}\n"
+            optimization_str += f"\t{hp}: {search_space[hp]}\n"
         # write results to file
         if write_results:
@@ -738,114 +726,6 @@ def evaluate_experiment(
                 )
-def evaluate_search_algorithm_test_step(
-    analysis: tune.ExperimentAnalysis,
-    optimize_name: str,
-    optimize_op: Union[Literal["max"], Literal["min"]],
-    search_space: tune_search_space_type,
-    run_config_search_space: dict[str, run_config_search_space_entry_type],
-    results_dir: str,
-    inputs_split: list[np.ndarray],
-    targets_split: list[np.ndarray],
-    weights_split: list[np.ndarray],
-    input_handler: OPTIMA.builtin.inputs.InputHandler,
-    preprocessor: Any,
-    custom_metrics: Optional[list[tuple[str, Callable]]] = None,
-    composite_metrics: Optional[list[tuple[str, tuple[str, str], Callable]]] = None,
-    overtraining_conditions: Optional[list] = None,
-    write_results: bool = True,
-    return_results_str: bool = False,
-) -> None:
-    """(unmaintained!) Performs the evaluation of one of an optimization step when running with option ``--test_search_algorithm``.
-    This function performs the same evaluations done in ``evaluate_experiment`` to determine the best trial with the
-    two evaluation methods but skips the crossvalidation and evaluation of the individual models.
-    Parameters
-    ----------
-    analysis : tune.ExperimentAnalysis
-        The ``tune.ExperimentAnalysis``-object extracted from the ``tune.ResultsGrid`` returned by the ``Tuner``.
-    optimize_name : str
-        Name of the target metric.
-    optimize_op : Union[Literal["max"], Literal["min"]]
-        Specifies if the target metric is to be maximized or minimized. Can be either ``'max'`` or ``'min'``.
-    search_space : tune_search_space_type
-        The tune search space provided to the Tuner.
-    run_config_search_space : dict[str, run_config_search_space_entry_type]
-        The search space as defined in the run-config.
-    results_dir : str
-        Path to the directory where the results are to be saved.
-    inputs_split : list[np.ndarray]
-        List containing the numpy array of input features for the training, validation and (if used) testing sets.
-    targets_split : list[np.ndarray]
-        List containing the numpy array of target labels for the training, validation and (if used) testing sets.
-    weights_split : list[np.ndarray]
-        List containing the numpy array of event weights for the training, validation and (if used) testing sets.
-    input_handler : OPTIMA.builtin.inputs.InputHandler
-        Instance of the ``preprocessing.InputHandler``-class
-    preprocessor : Any
-        The fitted scaler used to preprocess the input features that were used for training during the optimization.
-    custom_metrics : Optional[list[tuple[str, Callable]]]
-        A list of `custom metrics` as defined in the run-config. (Default value = None)
-    composite_metrics : Optional[list[tuple[str, tuple[str, str], Callable]]]
-        A list of `composite metrics` as defined in the run-config. (Default value = None)
-    overtraining_conditions : Optional[list]
-        A list of `overtraining conditions` as defined in the run-config. (Default value = None)
-    write_results : bool
-        If ``True``, the results are written to `results.txt` in ``results_dir``. (Default value = True)
-    return_results_str : bool
-        If ``True``, the results string that is printed to console is also returned. (Default value = False)
-    """
-    if overtraining_conditions is None:
-        overtraining_conditions = []
-    if composite_metrics is None:
-        composite_metrics = []
-    if custom_metrics is None:
-        custom_metrics = []
-    # save analysis to disk for later manual evaluation
-    with open(os.path.join(results_dir, "analysis.pickle"), "wb") as file:
-        pickle.dump(analysis, file)
-    # build a list containing the names of all metrics, grouped together like [[train_loss, val_loss], [train_accuracy, val_accuracy], ...]
-    metric_names = []
-    optimize_name_included = False
-    for metric, _ in custom_metrics:
-        group = ("train_" + metric, "val_" + metric)
-        metric_names.append(group)
-        if optimize_name in group:
-            optimize_name_included = True
-    for metric, _, _ in composite_metrics:
-        metric_names.append(metric)
-        if metric == optimize_name:
-            optimize_name_included = True
-    if not optimize_name_included:
-        metric_names = [optimize_name] + metric_names
-    # get the results dataframes and remove all NaN and inf values
-    dfs_dirty = analysis.fetch_trial_dataframes()
-    dfs = clean_analysis_results(dfs_dirty, metric_names)
-    # now also save the dataframes
-    with open(os.path.join(results_dir, "dfs.pickle"), "wb") as file:
-        pickle.dump(dfs, file)
-    # go through dataframes and explicitly check if overtraining conditions are fulfilled, and add results (True/False)
-    # as new column "overtrained"
-    dfs_overtraining_checked = check_overtraining(dfs, overtraining_conditions)
-    # produce two sets of plots showing the overall progress of the experiment, one set containing all trials as datapoints,
-    # and one showing the evolution of the "best" trial; both as a function of time
-    draw_total_progress(
-        dfs_overtraining_checked,
-        optimize_name,
-        optimize_op,
-        metric_names,
-        figs_dir=results_dir,
-        reject_overtrained=True,
-    )
 def scientific_rounding(value, err, notation="separate"):
     """Helper function to perform scientific rounding based on the provided uncertainty.

optima-ml 0.3.2a4__tar.gz → 0.3.2a5.dev1__tar.gz

optima-ml 0.3.2a4tar.gz → 0.3.2a5.dev1tar.gz