lecrapaud 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/api.py +11 -49
- lecrapaud/config.py +1 -0
- lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py +39 -0
- lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py +264 -0
- lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py +42 -0
- lecrapaud/db/models/__init__.py +2 -4
- lecrapaud/db/models/base.py +103 -65
- lecrapaud/db/models/experiment.py +79 -99
- lecrapaud/db/models/feature_selection.py +0 -3
- lecrapaud/db/models/feature_selection_rank.py +0 -18
- lecrapaud/db/models/model_selection.py +2 -2
- lecrapaud/db/models/{score.py → model_selection_score.py} +29 -12
- lecrapaud/db/session.py +1 -0
- lecrapaud/experiment.py +11 -13
- lecrapaud/feature_engineering.py +34 -49
- lecrapaud/feature_selection.py +90 -22
- lecrapaud/model_selection.py +434 -192
- lecrapaud/search_space.py +2 -1
- lecrapaud/utils.py +22 -2
- {lecrapaud-0.19.3.dist-info → lecrapaud-0.20.1.dist-info}/METADATA +1 -1
- {lecrapaud-0.19.3.dist-info → lecrapaud-0.20.1.dist-info}/RECORD +23 -21
- lecrapaud/db/models/model_training.py +0 -64
- {lecrapaud-0.19.3.dist-info → lecrapaud-0.20.1.dist-info}/WHEEL +0 -0
- {lecrapaud-0.19.3.dist-info → lecrapaud-0.20.1.dist-info}/licenses/LICENSE +0 -0
lecrapaud/model_selection.py
CHANGED
|
@@ -15,7 +15,7 @@ from pydantic import BaseModel
|
|
|
15
15
|
import ast
|
|
16
16
|
|
|
17
17
|
# ML models
|
|
18
|
-
from sklearn.model_selection import TimeSeriesSplit
|
|
18
|
+
from sklearn.model_selection import TimeSeriesSplit, StratifiedKFold, KFold
|
|
19
19
|
from sklearn.calibration import CalibratedClassifierCV
|
|
20
20
|
from sklearn.metrics import (
|
|
21
21
|
mean_absolute_percentage_error,
|
|
@@ -63,23 +63,38 @@ from ray.tune.logger import TBXLoggerCallback
|
|
|
63
63
|
from ray.tune.schedulers import ASHAScheduler
|
|
64
64
|
from ray.air import session
|
|
65
65
|
|
|
66
|
+
# HyperOpt standalone
|
|
67
|
+
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK, space_eval
|
|
68
|
+
|
|
66
69
|
# Internal library
|
|
67
70
|
from lecrapaud.search_space import all_models
|
|
68
71
|
from lecrapaud.directories import clean_directory
|
|
69
72
|
from lecrapaud.utils import copy_any, contains_best, logger, serialize_for_json
|
|
70
|
-
from lecrapaud.config import PYTHON_ENV
|
|
73
|
+
from lecrapaud.config import PYTHON_ENV, LECRAPAUD_OPTIMIZATION_BACKEND
|
|
71
74
|
from lecrapaud.feature_selection import load_train_data
|
|
72
75
|
from lecrapaud.db import (
|
|
73
76
|
Model,
|
|
74
77
|
ModelSelection,
|
|
75
|
-
|
|
76
|
-
Score,
|
|
78
|
+
ModelSelectionScore,
|
|
77
79
|
Target,
|
|
78
80
|
Experiment,
|
|
79
81
|
)
|
|
80
82
|
|
|
81
83
|
os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
|
|
82
84
|
|
|
85
|
+
# Suppress XGBoost and LightGBM logging
|
|
86
|
+
import logging
|
|
87
|
+
|
|
88
|
+
logging.getLogger("lightgbm").setLevel(logging.ERROR)
|
|
89
|
+
logging.getLogger("xgboost").setLevel(logging.ERROR)
|
|
90
|
+
|
|
91
|
+
# Set global verbosity for XGBoost
|
|
92
|
+
xgb.set_config(verbosity=0)
|
|
93
|
+
|
|
94
|
+
# Suppress warnings
|
|
95
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
96
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
97
|
+
|
|
83
98
|
# Reproducible result
|
|
84
99
|
keras.utils.set_random_seed(42)
|
|
85
100
|
np.random.seed(42)
|
|
@@ -410,6 +425,7 @@ class ModelEngine:
|
|
|
410
425
|
"metric": eval_metric,
|
|
411
426
|
"num_class": num_class,
|
|
412
427
|
"verbose": -1,
|
|
428
|
+
"verbose_eval": False,
|
|
413
429
|
},
|
|
414
430
|
num_boost_round=params["num_boost_round"],
|
|
415
431
|
train_set=train_data,
|
|
@@ -421,6 +437,7 @@ class ModelEngine:
|
|
|
421
437
|
),
|
|
422
438
|
lgb.record_evaluation(evals_result),
|
|
423
439
|
tensorboard_callback,
|
|
440
|
+
lgb.log_evaluation(period=0), # Disable evaluation logging
|
|
424
441
|
],
|
|
425
442
|
)
|
|
426
443
|
else:
|
|
@@ -462,7 +479,7 @@ class ModelEngine:
|
|
|
462
479
|
if self.target_type == "regression"
|
|
463
480
|
else ("logloss" if num_class <= 2 else "mlogloss")
|
|
464
481
|
)
|
|
465
|
-
|
|
482
|
+
# XGBoost verbosity already set globally
|
|
466
483
|
model = xgb.train(
|
|
467
484
|
params={
|
|
468
485
|
**params["model_params"],
|
|
@@ -477,11 +494,11 @@ class ModelEngine:
|
|
|
477
494
|
xgb.callback.EarlyStopping(
|
|
478
495
|
rounds=params["early_stopping_rounds"], save_best=True
|
|
479
496
|
),
|
|
480
|
-
|
|
497
|
+
# Removed EvaluationMonitor to suppress logs
|
|
481
498
|
tensorboard_callback,
|
|
482
499
|
],
|
|
483
500
|
evals_result=evals_result, # Record evaluation result
|
|
484
|
-
verbose_eval=
|
|
501
|
+
verbose_eval=False, # Disable evaluation logging
|
|
485
502
|
)
|
|
486
503
|
|
|
487
504
|
model.model_name = self.create_model
|
|
@@ -746,6 +763,171 @@ class ModelEngine:
|
|
|
746
763
|
)
|
|
747
764
|
|
|
748
765
|
|
|
766
|
+
def trainable_cv(
|
|
767
|
+
params,
|
|
768
|
+
x_train,
|
|
769
|
+
y_train,
|
|
770
|
+
x_val,
|
|
771
|
+
y_val,
|
|
772
|
+
model_name,
|
|
773
|
+
target_type,
|
|
774
|
+
experiment_name,
|
|
775
|
+
target_number,
|
|
776
|
+
create_model,
|
|
777
|
+
n_splits=3,
|
|
778
|
+
plot=False,
|
|
779
|
+
log_dir=None,
|
|
780
|
+
target_clf_thresholds: dict = None,
|
|
781
|
+
time_series=True,
|
|
782
|
+
recurrent=False,
|
|
783
|
+
):
|
|
784
|
+
"""Cross-validation version of trainable for hyperopt.
|
|
785
|
+
|
|
786
|
+
Uses TimeSeriesSplit for temporal data or StratifiedKFold/KFold for i.i.d. data.
|
|
787
|
+
Returns pooled metrics (single logloss/RMSE calculated on all concatenated predictions).
|
|
788
|
+
"""
|
|
789
|
+
# Combine train and validation data for cross-validation
|
|
790
|
+
if recurrent:
|
|
791
|
+
x_train_val = np.concatenate([x_train, x_val], axis=0)
|
|
792
|
+
y_train_val = np.concatenate([y_train, y_val], axis=0)
|
|
793
|
+
else:
|
|
794
|
+
x_train_val = pd.concat([x_train, x_val], axis=0)
|
|
795
|
+
y_train_val = pd.concat([y_train, y_val], axis=0)
|
|
796
|
+
# Store original index for later use if needed
|
|
797
|
+
original_index = x_train_val.index.copy()
|
|
798
|
+
# Reset index for proper iloc indexing with CV splits
|
|
799
|
+
x_train_val = x_train_val.reset_index(drop=True)
|
|
800
|
+
y_train_val = y_train_val.reset_index(drop=True)
|
|
801
|
+
|
|
802
|
+
# Choose appropriate cross-validation splitter
|
|
803
|
+
if time_series:
|
|
804
|
+
# Time series split for temporal data
|
|
805
|
+
n_samples = len(x_train_val)
|
|
806
|
+
test_size = int(n_samples / (n_splits + 1)) # Ensure reasonable test size
|
|
807
|
+
cv_splitter = TimeSeriesSplit(n_splits=n_splits, test_size=test_size)
|
|
808
|
+
else:
|
|
809
|
+
# Stratified or regular K-fold for i.i.d. data
|
|
810
|
+
if target_type == "classification":
|
|
811
|
+
cv_splitter = StratifiedKFold(
|
|
812
|
+
n_splits=n_splits, shuffle=True, random_state=42
|
|
813
|
+
)
|
|
814
|
+
else:
|
|
815
|
+
cv_splitter = KFold(n_splits=n_splits, shuffle=True, random_state=42)
|
|
816
|
+
|
|
817
|
+
# Store all predictions and true values for pooled metrics
|
|
818
|
+
all_predictions = []
|
|
819
|
+
all_y_true = []
|
|
820
|
+
fold_times = []
|
|
821
|
+
|
|
822
|
+
# Get splits based on the CV strategy
|
|
823
|
+
if time_series or target_type == "regression":
|
|
824
|
+
splits = cv_splitter.split(x_train_val)
|
|
825
|
+
else:
|
|
826
|
+
# For stratified split, we need to pass y
|
|
827
|
+
if recurrent:
|
|
828
|
+
# Extract the target from the 2D array (first column is target)
|
|
829
|
+
y_for_split = y_train_val[:, 0]
|
|
830
|
+
else:
|
|
831
|
+
y_for_split = y_train_val
|
|
832
|
+
splits = cv_splitter.split(x_train_val, y_for_split)
|
|
833
|
+
|
|
834
|
+
for fold_idx, (train_idx, val_idx) in enumerate(splits):
|
|
835
|
+
# Extract fold data
|
|
836
|
+
if recurrent:
|
|
837
|
+
x_fold_train = x_train_val[train_idx]
|
|
838
|
+
y_fold_train = y_train_val[train_idx]
|
|
839
|
+
x_fold_val = x_train_val[val_idx]
|
|
840
|
+
y_fold_val = y_train_val[val_idx]
|
|
841
|
+
else:
|
|
842
|
+
x_fold_train = x_train_val.iloc[train_idx]
|
|
843
|
+
y_fold_train = y_train_val.iloc[train_idx]
|
|
844
|
+
x_fold_val = x_train_val.iloc[val_idx]
|
|
845
|
+
y_fold_val = y_train_val.iloc[val_idx]
|
|
846
|
+
|
|
847
|
+
# Train model for this fold
|
|
848
|
+
model = ModelEngine(
|
|
849
|
+
model_name=model_name,
|
|
850
|
+
target_type=target_type,
|
|
851
|
+
target_number=target_number,
|
|
852
|
+
create_model=create_model,
|
|
853
|
+
plot=False, # Disable individual fold plots
|
|
854
|
+
log_dir=log_dir,
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
if recurrent:
|
|
858
|
+
timesteps = params["timesteps"]
|
|
859
|
+
x_fold_train = x_fold_train[:, -timesteps:, :]
|
|
860
|
+
x_fold_val = x_fold_val[:, -timesteps:, :]
|
|
861
|
+
|
|
862
|
+
# Fit model
|
|
863
|
+
model.fit(x_fold_train, y_fold_train, x_fold_val, y_fold_val, params)
|
|
864
|
+
|
|
865
|
+
# Get predictions
|
|
866
|
+
y_pred = model.predict(x_fold_val)
|
|
867
|
+
|
|
868
|
+
# Handle recurrent model indexing
|
|
869
|
+
if recurrent:
|
|
870
|
+
y_fold_val = pd.DataFrame(
|
|
871
|
+
y_fold_val, columns=["TARGET", "index"]
|
|
872
|
+
).set_index("index")
|
|
873
|
+
y_pred.index = y_fold_val.index
|
|
874
|
+
|
|
875
|
+
# Store predictions and true values
|
|
876
|
+
all_predictions.append(y_pred)
|
|
877
|
+
all_y_true.append(y_fold_val)
|
|
878
|
+
|
|
879
|
+
# Concatenate all fold predictions
|
|
880
|
+
if target_type == "classification":
|
|
881
|
+
# For classification, we need to handle probability columns
|
|
882
|
+
all_pred_df = pd.concat(all_predictions, axis=0)
|
|
883
|
+
all_y_series = pd.concat(all_y_true, axis=0)
|
|
884
|
+
# Ensure we have a DataFrame with TARGET column
|
|
885
|
+
if isinstance(all_y_series, pd.Series):
|
|
886
|
+
all_y_df = pd.DataFrame({"TARGET": all_y_series})
|
|
887
|
+
else:
|
|
888
|
+
all_y_df = all_y_series
|
|
889
|
+
else:
|
|
890
|
+
# For regression, just concatenate the predictions
|
|
891
|
+
all_pred_series = pd.concat(all_predictions, axis=0)
|
|
892
|
+
all_y_series = pd.concat(all_y_true, axis=0)
|
|
893
|
+
all_pred_df = pd.DataFrame({"PRED": all_pred_series})
|
|
894
|
+
all_y_df = pd.DataFrame({"TARGET": all_y_series})
|
|
895
|
+
|
|
896
|
+
# Create combined prediction DataFrame
|
|
897
|
+
prediction = pd.concat([all_y_df[["TARGET"]], all_pred_df], axis=1)
|
|
898
|
+
|
|
899
|
+
# Calculate pooled metrics
|
|
900
|
+
score = {
|
|
901
|
+
"DATE": datetime.now(),
|
|
902
|
+
"MODEL_NAME": model_name,
|
|
903
|
+
"EVAL_DATA_STD": prediction["TARGET"].std(),
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
# Unscale if needed (for regression with scaling)
|
|
907
|
+
if (
|
|
908
|
+
model.need_scaling
|
|
909
|
+
and target_type == "regression"
|
|
910
|
+
and model.scaler_y is not None
|
|
911
|
+
):
|
|
912
|
+
prediction.loc[:, "TARGET"] = model.scaler_y.inverse_transform(
|
|
913
|
+
prediction[["TARGET"]].values
|
|
914
|
+
)
|
|
915
|
+
prediction.loc[:, "PRED"] = model.scaler_y.inverse_transform(
|
|
916
|
+
prediction[["PRED"]].values
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
# Evaluate with pooled predictions
|
|
920
|
+
score.update(evaluate(prediction, target_type, target_clf_thresholds))
|
|
921
|
+
|
|
922
|
+
metric = "RMSE" if target_type == "regression" else "LOGLOSS"
|
|
923
|
+
logger.info(f"{model_name} CV pooled {metric}: {score[metric]:.4f}")
|
|
924
|
+
|
|
925
|
+
# Report to Ray if in Ray context
|
|
926
|
+
if session.get_session():
|
|
927
|
+
session.report(metrics=score)
|
|
928
|
+
return score
|
|
929
|
+
|
|
930
|
+
|
|
749
931
|
def trainable(
|
|
750
932
|
params,
|
|
751
933
|
x_train,
|
|
@@ -757,7 +939,6 @@ def trainable(
|
|
|
757
939
|
experiment_name,
|
|
758
940
|
target_number,
|
|
759
941
|
create_model,
|
|
760
|
-
type_name="hyperopts",
|
|
761
942
|
plot=False,
|
|
762
943
|
log_dir=None,
|
|
763
944
|
target_clf_thresholds: dict = None,
|
|
@@ -783,9 +964,7 @@ def trainable(
|
|
|
783
964
|
x_val = x_val[:, -timesteps:, :]
|
|
784
965
|
|
|
785
966
|
# Compile and fit model on train set
|
|
786
|
-
start = time.time()
|
|
787
967
|
model.fit(x_train, y_train, x_val, y_val, params)
|
|
788
|
-
stop = time.time()
|
|
789
968
|
|
|
790
969
|
# Prediction on val set
|
|
791
970
|
y_pred = model.predict(x_val)
|
|
@@ -815,8 +994,6 @@ def trainable(
|
|
|
815
994
|
score = {
|
|
816
995
|
"DATE": datetime.now(),
|
|
817
996
|
"MODEL_NAME": model.model_name,
|
|
818
|
-
"TYPE": type_name,
|
|
819
|
-
"TRAINING_TIME": stop - start,
|
|
820
997
|
"EVAL_DATA_STD": prediction["TARGET"].std(),
|
|
821
998
|
}
|
|
822
999
|
|
|
@@ -825,7 +1002,8 @@ def trainable(
|
|
|
825
1002
|
metric = "RMSE" if target_type == "regression" else "LOGLOSS"
|
|
826
1003
|
logger.info(f"{model.model_name} scores on validation set: {score[metric]:.4f}")
|
|
827
1004
|
|
|
828
|
-
if
|
|
1005
|
+
# Report to Ray if in Ray context
|
|
1006
|
+
if session.get_session():
|
|
829
1007
|
session.report(metrics=score)
|
|
830
1008
|
return score
|
|
831
1009
|
|
|
@@ -839,24 +1017,24 @@ class ModelSelectionEngine:
|
|
|
839
1017
|
data,
|
|
840
1018
|
reshaped_data,
|
|
841
1019
|
target_number,
|
|
842
|
-
target_clf,
|
|
843
1020
|
experiment,
|
|
844
|
-
models_idx,
|
|
845
|
-
time_series,
|
|
846
|
-
date_column,
|
|
847
|
-
group_column,
|
|
848
|
-
target_clf_thresholds,
|
|
849
1021
|
**kwargs,
|
|
850
1022
|
):
|
|
851
1023
|
self.data = data
|
|
852
1024
|
self.reshaped_data = reshaped_data
|
|
853
1025
|
self.target_number = target_number
|
|
854
1026
|
self.experiment = experiment
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
self.
|
|
859
|
-
self.
|
|
1027
|
+
|
|
1028
|
+
# Get all parameters from experiment context
|
|
1029
|
+
context = self.experiment.context
|
|
1030
|
+
self.target_clf = context.get("target_clf", [])
|
|
1031
|
+
self.models_idx = context.get("models_idx", [])
|
|
1032
|
+
self.time_series = context.get("time_series", False)
|
|
1033
|
+
self.date_column = context.get("date_column", None)
|
|
1034
|
+
self.group_column = context.get("group_column", None)
|
|
1035
|
+
|
|
1036
|
+
# Handle target_clf_thresholds
|
|
1037
|
+
target_clf_thresholds = context.get("target_clf_thresholds", {})
|
|
860
1038
|
self.target_clf_thresholds = (
|
|
861
1039
|
target_clf_thresholds[target_number]
|
|
862
1040
|
if target_number in target_clf_thresholds.keys()
|
|
@@ -878,24 +1056,19 @@ class ModelSelectionEngine:
|
|
|
878
1056
|
)
|
|
879
1057
|
|
|
880
1058
|
# Main training function
|
|
881
|
-
def run(
|
|
882
|
-
self,
|
|
883
|
-
experiment_name,
|
|
884
|
-
perform_hyperopt=True,
|
|
885
|
-
number_of_trials=20,
|
|
886
|
-
perform_crossval=False,
|
|
887
|
-
plot=True,
|
|
888
|
-
clean_dir=False, # TODO: This has been unused because now feature_selection is in the target directory
|
|
889
|
-
preserve_model=True,
|
|
890
|
-
best_params=None,
|
|
891
|
-
):
|
|
1059
|
+
def run(self, best_params=None):
|
|
892
1060
|
"""
|
|
893
1061
|
Selects the best models based on a target variable, optionally performing hyperparameter optimization
|
|
894
1062
|
and cross-validation, and manages outputs in a session-specific directory.
|
|
895
1063
|
"""
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
self.
|
|
1064
|
+
# Get all parameters from experiment context
|
|
1065
|
+
context = self.experiment.context
|
|
1066
|
+
self.experiment_name = context.get("experiment_name", "")
|
|
1067
|
+
self.plot = context.get("plot", True)
|
|
1068
|
+
self.number_of_trials = context.get("number_of_trials", 20)
|
|
1069
|
+
self.perform_crossval = context.get("perform_crossval", False)
|
|
1070
|
+
self.preserve_model = context.get("preserve_model", True)
|
|
1071
|
+
self.perform_hyperopt = context.get("perform_hyperopt", True)
|
|
899
1072
|
|
|
900
1073
|
if self.experiment_id is None:
|
|
901
1074
|
raise ValueError("Please provide a experiment.")
|
|
@@ -945,12 +1118,11 @@ class ModelSelectionEngine:
|
|
|
945
1118
|
# create model selection in db
|
|
946
1119
|
target = Target.find_by(name=f"TARGET_{self.target_number}")
|
|
947
1120
|
model_selection = ModelSelection.upsert(
|
|
948
|
-
match_fields=["target_id", "experiment_id"],
|
|
949
1121
|
target_id=target.id,
|
|
950
1122
|
experiment_id=self.experiment_id,
|
|
951
1123
|
)
|
|
952
1124
|
|
|
953
|
-
#
|
|
1125
|
+
# STEP 1 : TRAINING MODELS
|
|
954
1126
|
for i in self.models_idx:
|
|
955
1127
|
config = all_models[i]
|
|
956
1128
|
recurrent = config["recurrent"]
|
|
@@ -963,24 +1135,16 @@ class ModelSelectionEngine:
|
|
|
963
1135
|
self.results_dir = f"{self.target_dir}/{model_name}"
|
|
964
1136
|
if not os.path.exists(f"{self.results_dir}"):
|
|
965
1137
|
os.makedirs(f"{self.results_dir}")
|
|
966
|
-
elif preserve_model and contains_best(self.results_dir):
|
|
1138
|
+
elif self.preserve_model and contains_best(self.results_dir):
|
|
967
1139
|
continue
|
|
968
|
-
elif perform_hyperopt:
|
|
1140
|
+
elif self.perform_hyperopt:
|
|
969
1141
|
clean_directory(self.results_dir)
|
|
970
1142
|
|
|
971
|
-
logger.info(
|
|
972
|
-
|
|
973
|
-
match_fields=["name", "type"],
|
|
974
|
-
name=model_name,
|
|
975
|
-
type=self.target_type,
|
|
976
|
-
)
|
|
977
|
-
model_training = ModelTraining.upsert(
|
|
978
|
-
match_fields=["model_id", "model_selection_id"],
|
|
979
|
-
model_id=model.id,
|
|
980
|
-
model_selection_id=model_selection.id,
|
|
1143
|
+
logger.info(
|
|
1144
|
+
f"{self.experiment_name} - Training a {model_name} at {datetime.now()} for TARGET_{self.target_number}"
|
|
981
1145
|
)
|
|
982
1146
|
|
|
983
|
-
#
|
|
1147
|
+
# Getting data
|
|
984
1148
|
if recurrent:
|
|
985
1149
|
# Clear cluster from previous Keras session graphs.
|
|
986
1150
|
K.clear_session()
|
|
@@ -990,7 +1154,7 @@ class ModelSelectionEngine:
|
|
|
990
1154
|
for i, e in enumerate(self.all_features)
|
|
991
1155
|
if e in set(self.features)
|
|
992
1156
|
]
|
|
993
|
-
# TODO: Verify that features_idx are the right one, because scaling can re-arrange columns...
|
|
1157
|
+
# TODO: Verify that features_idx are the right one, because scaling can re-arrange columns (should be good)...
|
|
994
1158
|
x_train = x_train_reshaped[:, :, features_idx]
|
|
995
1159
|
y_train = y_train_reshaped[:, [self.target_number, 0]]
|
|
996
1160
|
x_val = x_val_reshaped[:, :, features_idx]
|
|
@@ -1020,7 +1184,8 @@ class ModelSelectionEngine:
|
|
|
1020
1184
|
y_test = test[f"TARGET_{self.target_number}"].rename("TARGET")
|
|
1021
1185
|
|
|
1022
1186
|
log_dir = get_log_dir(self.target_dir, model_name)
|
|
1023
|
-
|
|
1187
|
+
|
|
1188
|
+
# Instantiate model
|
|
1024
1189
|
model = ModelEngine(
|
|
1025
1190
|
target_number=self.target_number,
|
|
1026
1191
|
model_name=model_name,
|
|
@@ -1031,9 +1196,9 @@ class ModelSelectionEngine:
|
|
|
1031
1196
|
log_dir=log_dir,
|
|
1032
1197
|
)
|
|
1033
1198
|
|
|
1034
|
-
start = time.time()
|
|
1035
1199
|
# Tuning hyperparameters
|
|
1036
|
-
|
|
1200
|
+
start = time.time()
|
|
1201
|
+
if self.perform_hyperopt:
|
|
1037
1202
|
model_best_params = self.hyperoptimize(
|
|
1038
1203
|
x_train, y_train, x_val, y_val, model
|
|
1039
1204
|
)
|
|
@@ -1049,7 +1214,7 @@ class ModelSelectionEngine:
|
|
|
1049
1214
|
f"Could not find {model_name} in current data. Try to run an hyperoptimization by setting `perform_hyperopt` to true, or pass `best_params`"
|
|
1050
1215
|
)
|
|
1051
1216
|
|
|
1052
|
-
#
|
|
1217
|
+
# Save best params
|
|
1053
1218
|
best_params_file = f"{self.target_dir}/best_params.json"
|
|
1054
1219
|
try:
|
|
1055
1220
|
with open(best_params_file, "r") as f:
|
|
@@ -1061,114 +1226,25 @@ class ModelSelectionEngine:
|
|
|
1061
1226
|
with open(best_params_file, "w") as f:
|
|
1062
1227
|
json.dump(json_dict, f, indent=4)
|
|
1063
1228
|
|
|
1064
|
-
#
|
|
1065
|
-
if
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
for i, (train_index, val_index) in enumerate(tscv.split(x_train_val)):
|
|
1077
|
-
self.type_name = f"crossval_fold_{i}"
|
|
1078
|
-
|
|
1079
|
-
if self.time_series:
|
|
1080
|
-
date_series = pd.concat(
|
|
1081
|
-
[
|
|
1082
|
-
train[self.date_column],
|
|
1083
|
-
val[self.date_column],
|
|
1084
|
-
test[self.date_column],
|
|
1085
|
-
],
|
|
1086
|
-
axis=0,
|
|
1087
|
-
).reset_index(drop=True)
|
|
1088
|
-
|
|
1089
|
-
date_series = date_series.map(pd.Timestamp.fromordinal)
|
|
1090
|
-
|
|
1091
|
-
# Now you can use the actual train/val indices to extract ranges
|
|
1092
|
-
train_start = date_series.iloc[train_index[0]]
|
|
1093
|
-
train_end = date_series.iloc[train_index[-1]]
|
|
1094
|
-
val_start = date_series.iloc[val_index[0]]
|
|
1095
|
-
val_end = date_series.iloc[val_index[-1]]
|
|
1096
|
-
|
|
1097
|
-
logger.info(
|
|
1098
|
-
f"[Fold {i}] Train: {len(train_index)} samples from {train_start.date()} to {train_end.date()} | "
|
|
1099
|
-
f"Validation: {len(val_index)} samples from {val_start.date()} to {val_end.date()}"
|
|
1100
|
-
)
|
|
1101
|
-
else:
|
|
1102
|
-
logger.info(
|
|
1103
|
-
f"[Fold {i}] Train: {len(train_index)} samples | Validation: {len(val_index)} samples"
|
|
1104
|
-
)
|
|
1105
|
-
|
|
1106
|
-
# Train the model and get the score
|
|
1107
|
-
if recurrent:
|
|
1108
|
-
cv_score, _, _ = self.train_model(
|
|
1109
|
-
params=model_best_params,
|
|
1110
|
-
x_train=x_train_val[train_index],
|
|
1111
|
-
y_train=y_train_val[train_index],
|
|
1112
|
-
x_val=x_train_val[val_index],
|
|
1113
|
-
y_val=y_train_val[val_index],
|
|
1114
|
-
model=model,
|
|
1115
|
-
)
|
|
1116
|
-
else:
|
|
1117
|
-
cv_score, _, _ = self.train_model(
|
|
1118
|
-
params=model_best_params,
|
|
1119
|
-
x_train=x_train_val.iloc[train_index],
|
|
1120
|
-
y_train=y_train_val.iloc[train_index],
|
|
1121
|
-
x_val=x_train_val.iloc[val_index],
|
|
1122
|
-
y_val=y_train_val.iloc[val_index],
|
|
1123
|
-
model=model,
|
|
1124
|
-
)
|
|
1125
|
-
|
|
1126
|
-
# Append score to the list
|
|
1127
|
-
cv_scores.append(cv_score)
|
|
1128
|
-
|
|
1129
|
-
# Calculate mean of all numerical metrics across all cross-validation folds
|
|
1130
|
-
cv_scores_df = pd.DataFrame(cv_scores)
|
|
1131
|
-
# Get mean of all numeric columns
|
|
1132
|
-
cv_means = cv_scores_df.mean(numeric_only=True).to_dict()
|
|
1229
|
+
# Always evaluate on test set (no cross-validation here)
|
|
1230
|
+
# The hyperopt already did CV if needed to find best params
|
|
1231
|
+
best_score, best_model, best_pred = self.train_model(
|
|
1232
|
+
params=model_best_params,
|
|
1233
|
+
x_train=pd.concat([x_train, x_val], axis=0),
|
|
1234
|
+
y_train=pd.concat([y_train, y_val], axis=0),
|
|
1235
|
+
x_val=x_test,
|
|
1236
|
+
y_val=y_test,
|
|
1237
|
+
model=model,
|
|
1238
|
+
)
|
|
1239
|
+
stop = time.time()
|
|
1240
|
+
training_time = stop - start
|
|
1133
1241
|
|
|
1134
|
-
|
|
1135
|
-
|
|
1242
|
+
logger.info(f"Model training finished in {training_time:.2f} seconds")
|
|
1243
|
+
logger.info(f"👉 {model.model_name} scores on test set:")
|
|
1244
|
+
for metric, value in best_score.items():
|
|
1245
|
+
if isinstance(value, (int, float)):
|
|
1136
1246
|
logger.info(f" {metric}: {value:.4f}")
|
|
1137
1247
|
|
|
1138
|
-
# Retrain on entire training set, but keep score on cross-validation folds
|
|
1139
|
-
# Get the test score using the best model
|
|
1140
|
-
test_score, best_model, best_pred = self.train_model(
|
|
1141
|
-
params=model_best_params,
|
|
1142
|
-
x_train=pd.concat([x_train, x_val], axis=0),
|
|
1143
|
-
y_train=pd.concat([y_train, y_val], axis=0),
|
|
1144
|
-
x_val=x_test,
|
|
1145
|
-
y_val=y_test,
|
|
1146
|
-
model=model,
|
|
1147
|
-
)
|
|
1148
|
-
|
|
1149
|
-
# Update all metrics with cross-validation means
|
|
1150
|
-
for metric, value in cv_means.items():
|
|
1151
|
-
if metric in test_score: # Only update existing metrics
|
|
1152
|
-
test_score[metric] = value
|
|
1153
|
-
best_score = test_score
|
|
1154
|
-
best_score["TYPE"] = "crossval"
|
|
1155
|
-
else:
|
|
1156
|
-
# Evaluate on test set
|
|
1157
|
-
self.type_name = "testset"
|
|
1158
|
-
best_score, best_model, best_pred = self.train_model(
|
|
1159
|
-
params=model_best_params,
|
|
1160
|
-
x_train=pd.concat([x_train, x_val], axis=0),
|
|
1161
|
-
y_train=pd.concat([y_train, y_val], axis=0),
|
|
1162
|
-
x_val=x_test,
|
|
1163
|
-
y_val=y_test,
|
|
1164
|
-
model=model,
|
|
1165
|
-
)
|
|
1166
|
-
|
|
1167
|
-
logger.info(f"👉 {model.model_name} scores on test set:")
|
|
1168
|
-
for metric, value in best_score.items():
|
|
1169
|
-
if isinstance(value, (int, float)):
|
|
1170
|
-
logger.info(f" {metric}: {value:.4f}")
|
|
1171
|
-
|
|
1172
1248
|
# Save predictions
|
|
1173
1249
|
best_pred.to_csv(
|
|
1174
1250
|
f"{self.results_dir}/prediction.csv",
|
|
@@ -1179,7 +1255,6 @@ class ModelSelectionEngine:
|
|
|
1179
1255
|
|
|
1180
1256
|
# Save best model
|
|
1181
1257
|
model_path = best_model.save(self.results_dir)
|
|
1182
|
-
|
|
1183
1258
|
model_path = Path(model_path).resolve()
|
|
1184
1259
|
best_score["MODEL_PATH"] = model_path
|
|
1185
1260
|
|
|
@@ -1202,32 +1277,26 @@ class ModelSelectionEngine:
|
|
|
1202
1277
|
scores_tracking.sort_values(self.metric, ascending=True, inplace=True)
|
|
1203
1278
|
scores_tracking.to_csv(scores_tracking_path, index=False)
|
|
1204
1279
|
|
|
1205
|
-
# Save
|
|
1206
|
-
stop = time.time()
|
|
1207
|
-
training_time = stop - start
|
|
1208
|
-
model_training.best_params = model_best_params
|
|
1209
|
-
model_training.model_path = model_path
|
|
1210
|
-
model_training.training_time = training_time
|
|
1211
|
-
model_training.save()
|
|
1212
|
-
|
|
1213
|
-
# Store metrics in DB
|
|
1280
|
+
# Save in db
|
|
1214
1281
|
drop_cols = [
|
|
1215
1282
|
"DATE",
|
|
1216
1283
|
"MODEL_NAME",
|
|
1217
|
-
"MODEL_PATH",
|
|
1218
1284
|
]
|
|
1219
1285
|
best_score = {k: v for k, v in best_score.items() if k not in drop_cols}
|
|
1220
1286
|
score_data = {k.lower(): v for k, v in best_score.items()}
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1287
|
+
model = Model.upsert(
|
|
1288
|
+
name=model_name,
|
|
1289
|
+
type=self.target_type,
|
|
1290
|
+
)
|
|
1291
|
+
ModelSelectionScore.upsert(
|
|
1292
|
+
model_id=model.id,
|
|
1293
|
+
model_selection_id=model_selection.id,
|
|
1294
|
+
best_params=serialize_for_json(model_best_params),
|
|
1295
|
+
training_time=training_time,
|
|
1225
1296
|
**score_data,
|
|
1226
1297
|
)
|
|
1227
1298
|
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
# find best model type
|
|
1299
|
+
# STEP 2 :FINDING BEST MODEL OVERALL
|
|
1231
1300
|
scores_tracking_path = f"{self.target_dir}/scores_tracking.csv"
|
|
1232
1301
|
scores_tracking = pd.read_csv(scores_tracking_path)
|
|
1233
1302
|
best_score_overall = scores_tracking.iloc[0, :]
|
|
@@ -1238,12 +1307,11 @@ class ModelSelectionEngine:
|
|
|
1238
1307
|
else:
|
|
1239
1308
|
best_thresholds = None
|
|
1240
1309
|
|
|
1241
|
-
# Remove any .best or .keras files
|
|
1310
|
+
# Remove any .best or .keras files, and save best model in target_dir
|
|
1242
1311
|
for file_path in glob.glob(os.path.join(self.target_dir, "*.best")) + glob.glob(
|
|
1243
1312
|
os.path.join(self.target_dir, "*.keras")
|
|
1244
1313
|
):
|
|
1245
1314
|
os.remove(file_path)
|
|
1246
|
-
# Copy the best model in root training folder for this target
|
|
1247
1315
|
best_model_path = Path(
|
|
1248
1316
|
f"{self.target_dir}/{os.path.basename(best_score_overall['MODEL_PATH'])}"
|
|
1249
1317
|
).resolve()
|
|
@@ -1255,13 +1323,13 @@ class ModelSelectionEngine:
|
|
|
1255
1323
|
with open(f"{self.target_dir}/best_params.json", "r") as f:
|
|
1256
1324
|
best_model_params = json.load(f)[best_model_name]
|
|
1257
1325
|
|
|
1258
|
-
# Save
|
|
1259
|
-
|
|
1326
|
+
# Save to db
|
|
1260
1327
|
model_selection = ModelSelection.get(model_selection.id)
|
|
1261
|
-
|
|
1328
|
+
model = Model.find_by(
|
|
1262
1329
|
name=best_score_overall["MODEL_NAME"], type=self.target_type
|
|
1263
|
-
)
|
|
1264
|
-
model_selection.
|
|
1330
|
+
)
|
|
1331
|
+
model_selection.best_model_id = model.id
|
|
1332
|
+
model_selection.best_model_params = serialize_for_json(best_model_params)
|
|
1265
1333
|
model_selection.best_thresholds = best_thresholds
|
|
1266
1334
|
model_selection.best_model_path = best_model_path
|
|
1267
1335
|
|
|
@@ -1286,7 +1354,169 @@ class ModelSelectionEngine:
|
|
|
1286
1354
|
return best_model
|
|
1287
1355
|
|
|
1288
1356
|
def hyperoptimize(self, x_train, y_train, x_val, y_val, model: ModelEngine):
|
|
1289
|
-
|
|
1357
|
+
"""Choose between Ray Tune and HyperOpt standalone based on configuration."""
|
|
1358
|
+
if LECRAPAUD_OPTIMIZATION_BACKEND == "hyperopt":
|
|
1359
|
+
return self.hyperoptimize_hyperopt(x_train, y_train, x_val, y_val, model)
|
|
1360
|
+
else:
|
|
1361
|
+
return self.hyperoptimize_ray(x_train, y_train, x_val, y_val, model)
|
|
1362
|
+
|
|
1363
|
+
def hyperoptimize_hyperopt(
|
|
1364
|
+
self, x_train, y_train, x_val, y_val, model: ModelEngine
|
|
1365
|
+
):
|
|
1366
|
+
"""Hyperparameter optimization using HyperOpt standalone (Celery-friendly)."""
|
|
1367
|
+
|
|
1368
|
+
logger.info("Start tuning hyperparameters with HyperOpt standalone...")
|
|
1369
|
+
|
|
1370
|
+
# Convert Ray search space to HyperOpt search space
|
|
1371
|
+
def convert_search_space(ray_space):
|
|
1372
|
+
"""Convert Ray Tune search space to HyperOpt format."""
|
|
1373
|
+
from ray.tune.search.sample import Categorical, Float, Integer
|
|
1374
|
+
|
|
1375
|
+
hp_space = {}
|
|
1376
|
+
for key, value in ray_space.items():
|
|
1377
|
+
if isinstance(value, Float):
|
|
1378
|
+
if (
|
|
1379
|
+
hasattr(value, "sampler")
|
|
1380
|
+
and value.sampler.__class__.__name__ == "LogUniform"
|
|
1381
|
+
):
|
|
1382
|
+
# LogUniform distribution
|
|
1383
|
+
hp_space[key] = hp.loguniform(
|
|
1384
|
+
key, np.log(value.lower), np.log(value.upper)
|
|
1385
|
+
)
|
|
1386
|
+
else:
|
|
1387
|
+
# Uniform distribution
|
|
1388
|
+
hp_space[key] = hp.uniform(key, value.lower, value.upper)
|
|
1389
|
+
elif isinstance(value, Integer):
|
|
1390
|
+
# Integer uniform distribution
|
|
1391
|
+
hp_space[key] = hp.randint(key, value.lower, value.upper)
|
|
1392
|
+
elif isinstance(value, Categorical):
|
|
1393
|
+
# Categorical/choice distribution
|
|
1394
|
+
hp_space[key] = hp.choice(key, value.categories)
|
|
1395
|
+
elif isinstance(value, dict):
|
|
1396
|
+
# Nested dict, recurse
|
|
1397
|
+
hp_space[key] = convert_search_space(value)
|
|
1398
|
+
else:
|
|
1399
|
+
# Static value or unknown type
|
|
1400
|
+
hp_space[key] = value
|
|
1401
|
+
return hp_space
|
|
1402
|
+
|
|
1403
|
+
# Create objective function for HyperOpt
|
|
1404
|
+
def objective(params):
|
|
1405
|
+
"""Objective function to minimize."""
|
|
1406
|
+
try:
|
|
1407
|
+
# Convert numpy types to native Python types
|
|
1408
|
+
params = serialize_for_json(params)
|
|
1409
|
+
|
|
1410
|
+
# Use existing trainable function based on perform_crossval
|
|
1411
|
+
if self.perform_crossval:
|
|
1412
|
+
score = trainable_cv(
|
|
1413
|
+
params,
|
|
1414
|
+
x_train,
|
|
1415
|
+
y_train,
|
|
1416
|
+
x_val,
|
|
1417
|
+
y_val,
|
|
1418
|
+
model.model_name,
|
|
1419
|
+
self.target_type,
|
|
1420
|
+
self.experiment_name,
|
|
1421
|
+
self.target_number,
|
|
1422
|
+
model.create_model,
|
|
1423
|
+
n_splits=3,
|
|
1424
|
+
plot=model.plot,
|
|
1425
|
+
log_dir=model.log_dir,
|
|
1426
|
+
target_clf_thresholds=self.target_clf_thresholds,
|
|
1427
|
+
time_series=self.time_series,
|
|
1428
|
+
recurrent=model.recurrent,
|
|
1429
|
+
)
|
|
1430
|
+
else:
|
|
1431
|
+
score, _, _ = trainable(
|
|
1432
|
+
params,
|
|
1433
|
+
x_train,
|
|
1434
|
+
y_train,
|
|
1435
|
+
x_val,
|
|
1436
|
+
y_val,
|
|
1437
|
+
model.model_name,
|
|
1438
|
+
self.target_type,
|
|
1439
|
+
self.experiment_name,
|
|
1440
|
+
self.target_number,
|
|
1441
|
+
model.create_model,
|
|
1442
|
+
plot=model.plot,
|
|
1443
|
+
log_dir=model.log_dir,
|
|
1444
|
+
target_clf_thresholds=self.target_clf_thresholds,
|
|
1445
|
+
)
|
|
1446
|
+
|
|
1447
|
+
# HyperOpt minimizes, so return the metric directly
|
|
1448
|
+
loss = score[self.metric]
|
|
1449
|
+
|
|
1450
|
+
# Log trial info
|
|
1451
|
+
logger.info(f"Trial completed - {self.metric}: {loss:.4f}")
|
|
1452
|
+
|
|
1453
|
+
return {
|
|
1454
|
+
"loss": loss,
|
|
1455
|
+
"status": STATUS_OK,
|
|
1456
|
+
"score": score, # Keep full score dict for analysis
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
except Exception as e:
|
|
1460
|
+
logger.error(f"Trial failed: {str(e)}")
|
|
1461
|
+
return {"loss": float("inf"), "status": STATUS_OK, "error": str(e)}
|
|
1462
|
+
|
|
1463
|
+
# Convert search space
|
|
1464
|
+
hp_search_space = convert_search_space(model.search_params)
|
|
1465
|
+
|
|
1466
|
+
# Run optimization
|
|
1467
|
+
trials = Trials()
|
|
1468
|
+
best_params = fmin(
|
|
1469
|
+
fn=objective,
|
|
1470
|
+
space=hp_search_space,
|
|
1471
|
+
algo=tpe.suggest,
|
|
1472
|
+
max_evals=self.number_of_trials,
|
|
1473
|
+
trials=trials,
|
|
1474
|
+
verbose=True,
|
|
1475
|
+
show_progressbar=True,
|
|
1476
|
+
)
|
|
1477
|
+
|
|
1478
|
+
# Get the actual parameter values (not just indices for hp.choice)
|
|
1479
|
+
best_params = space_eval(hp_search_space, best_params)
|
|
1480
|
+
|
|
1481
|
+
# Convert numpy types to native Python types
|
|
1482
|
+
best_params = serialize_for_json(best_params)
|
|
1483
|
+
|
|
1484
|
+
# Get best score from trials
|
|
1485
|
+
best_trial_idx = np.argmin([t["result"]["loss"] for t in trials.trials])
|
|
1486
|
+
best_score = trials.trials[best_trial_idx]["result"].get("score", {})
|
|
1487
|
+
|
|
1488
|
+
# Log results
|
|
1489
|
+
logger.info(f"Best hyperparameters found were:\n{best_params}")
|
|
1490
|
+
logger.info(f"Best Scores found were:\n{best_score}")
|
|
1491
|
+
|
|
1492
|
+
# Create summary DataFrame for consistency with Ray version
|
|
1493
|
+
results_df = pd.DataFrame(
|
|
1494
|
+
[
|
|
1495
|
+
{
|
|
1496
|
+
"trial_id": i,
|
|
1497
|
+
self.metric: t["result"]["loss"],
|
|
1498
|
+
**{
|
|
1499
|
+
k: v
|
|
1500
|
+
for k, v in t["result"].get("score", {}).items()
|
|
1501
|
+
if isinstance(v, (int, float))
|
|
1502
|
+
},
|
|
1503
|
+
}
|
|
1504
|
+
for i, t in enumerate(trials.trials)
|
|
1505
|
+
if t["result"]["status"] == STATUS_OK
|
|
1506
|
+
]
|
|
1507
|
+
)
|
|
1508
|
+
|
|
1509
|
+
if not results_df.empty:
|
|
1510
|
+
logger.info(f"Markdown table with all trials :\n{results_df.to_markdown()}")
|
|
1511
|
+
|
|
1512
|
+
# Save trial history for analysis
|
|
1513
|
+
trials_path = f"{self.results_dir}/hyperopt_trials.pkl"
|
|
1514
|
+
with open(trials_path, "wb") as f:
|
|
1515
|
+
pickle.dump(trials, f)
|
|
1516
|
+
|
|
1517
|
+
return best_params
|
|
1518
|
+
|
|
1519
|
+
def hyperoptimize_ray(self, x_train, y_train, x_val, y_val, model: ModelEngine):
|
|
1290
1520
|
|
|
1291
1521
|
def collect_error_logs(target_dir: int, storage_path: str):
|
|
1292
1522
|
output_error_file = f"{target_dir}/errors.log"
|
|
@@ -1329,9 +1559,22 @@ class ModelSelectionEngine:
|
|
|
1329
1559
|
}
|
|
1330
1560
|
)
|
|
1331
1561
|
|
|
1562
|
+
# Choose between regular trainable or CV version based on perform_crossval flag
|
|
1563
|
+
# perform_crossval controls whether to use CV during hyperopt
|
|
1564
|
+
if self.perform_crossval:
|
|
1565
|
+
trainable_fn = trainable_cv
|
|
1566
|
+
additional_params = {
|
|
1567
|
+
"n_splits": 3, # Can be made configurable
|
|
1568
|
+
"time_series": self.time_series, # Controls whether to use TimeSeriesSplit or StratifiedKFold
|
|
1569
|
+
"recurrent": model.recurrent,
|
|
1570
|
+
}
|
|
1571
|
+
else:
|
|
1572
|
+
trainable_fn = trainable
|
|
1573
|
+
additional_params = {}
|
|
1574
|
+
|
|
1332
1575
|
tuner = Tuner(
|
|
1333
1576
|
trainable=with_parameters(
|
|
1334
|
-
|
|
1577
|
+
trainable_fn,
|
|
1335
1578
|
x_train=x_train,
|
|
1336
1579
|
y_train=y_train,
|
|
1337
1580
|
x_val=x_val,
|
|
@@ -1341,10 +1584,10 @@ class ModelSelectionEngine:
|
|
|
1341
1584
|
experiment_name=self.experiment_name,
|
|
1342
1585
|
target_number=self.target_number,
|
|
1343
1586
|
create_model=model.create_model,
|
|
1344
|
-
type_name="hyperopts",
|
|
1345
1587
|
plot=model.plot,
|
|
1346
1588
|
log_dir=model.log_dir,
|
|
1347
1589
|
target_clf_thresholds=self.target_clf_thresholds,
|
|
1590
|
+
**additional_params,
|
|
1348
1591
|
),
|
|
1349
1592
|
param_space=model.search_params,
|
|
1350
1593
|
tune_config=TuneConfig(
|
|
@@ -1398,7 +1641,6 @@ class ModelSelectionEngine:
|
|
|
1398
1641
|
self.experiment_name,
|
|
1399
1642
|
self.target_number,
|
|
1400
1643
|
model.create_model,
|
|
1401
|
-
self.type_name,
|
|
1402
1644
|
model.plot,
|
|
1403
1645
|
log_dir=model.log_dir,
|
|
1404
1646
|
target_clf_thresholds=self.target_clf_thresholds,
|