lecrapaud 0.11.5__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/PKG-INFO +2 -2
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/api.py +5 -6
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/model_selection.py +25 -20
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/pyproject.toml +3 -2
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/LICENSE +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/README.md +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/__init__.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/config.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/__init__.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/env.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic.ini +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/__init__.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/base.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/experiment.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/feature.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/feature_selection.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/feature_selection_rank.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/model.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/model_selection.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/model_training.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/score.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/models/target.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/session.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/directories.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/experiment.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/feature_engineering.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/feature_selection.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/integrations/openai_integration.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/jobs/__init__.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/jobs/config.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/jobs/scheduler.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/jobs/tasks.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/search_space.py +0 -0
- {lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: lecrapaud
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: Framework for machine and deep learning, with regression, classification and time series analysis
|
|
5
5
|
License: Apache License
|
|
6
6
|
Author: Pierre H. Gallet
|
|
@@ -21,7 +21,7 @@ Requires-Dist: openai (>=1.88.0)
|
|
|
21
21
|
Requires-Dist: pandas (>=2.3.0)
|
|
22
22
|
Requires-Dist: python-dotenv (>=1.1.0)
|
|
23
23
|
Requires-Dist: scikit-learn (>=1.6.1)
|
|
24
|
-
Requires-Dist: scipy (
|
|
24
|
+
Requires-Dist: scipy (<1.14.0)
|
|
25
25
|
Requires-Dist: seaborn (>=0.13.2)
|
|
26
26
|
Requires-Dist: sqlalchemy (>=2.0.41)
|
|
27
27
|
Requires-Dist: tensorboardx (>=2.6.4)
|
|
@@ -128,7 +128,7 @@ class ExperimentEngine:
|
|
|
128
128
|
value = normalize_models_idx(value)
|
|
129
129
|
setattr(self, key, value)
|
|
130
130
|
|
|
131
|
-
def train(self, data):
|
|
131
|
+
def train(self, data, best_params=None):
|
|
132
132
|
logger.info("Running training...")
|
|
133
133
|
|
|
134
134
|
data_eng = self.feature_engineering(data)
|
|
@@ -143,7 +143,7 @@ class ExperimentEngine:
|
|
|
143
143
|
std_data, reshaped_data = self.preprocess_model(train, val, test)
|
|
144
144
|
logger.info("Model preprocessing done.")
|
|
145
145
|
|
|
146
|
-
self.model_selection(std_data, reshaped_data)
|
|
146
|
+
self.model_selection(std_data, reshaped_data, best_params=best_params)
|
|
147
147
|
logger.info("Model selection done.")
|
|
148
148
|
|
|
149
149
|
def predict(self, new_data, verbose: int = 0):
|
|
@@ -221,9 +221,7 @@ class ExperimentEngine:
|
|
|
221
221
|
if target_col is not None:
|
|
222
222
|
y_true = new_data[target_col]
|
|
223
223
|
prediction = pd.concat([y_true, y_pred], axis=1)
|
|
224
|
-
prediction.rename(
|
|
225
|
-
columns={f"TARGET_{target_number}": "TARGET"}, inplace=True
|
|
226
|
-
)
|
|
224
|
+
prediction.rename(columns={target_col: "TARGET"}, inplace=True)
|
|
227
225
|
score = evaluate(
|
|
228
226
|
prediction,
|
|
229
227
|
target_type=model.target_type,
|
|
@@ -323,7 +321,7 @@ class ExperimentEngine:
|
|
|
323
321
|
data, scaled_data, reshaped_data = app.inference()
|
|
324
322
|
return data, scaled_data, reshaped_data
|
|
325
323
|
|
|
326
|
-
def model_selection(self, data, reshaped_data):
|
|
324
|
+
def model_selection(self, data, reshaped_data, best_params=None):
|
|
327
325
|
for target_number in self.target_numbers:
|
|
328
326
|
app = ModelSelectionEngine(
|
|
329
327
|
data=data,
|
|
@@ -344,6 +342,7 @@ class ExperimentEngine:
|
|
|
344
342
|
perform_crossval=self.perform_crossval,
|
|
345
343
|
plot=self.plot,
|
|
346
344
|
preserve_model=self.preserve_model,
|
|
345
|
+
best_params=best_params[target_number],
|
|
347
346
|
)
|
|
348
347
|
|
|
349
348
|
def get_scores(self, target_number: int):
|
|
@@ -721,6 +721,7 @@ class ModelSelectionEngine:
|
|
|
721
721
|
plot=True,
|
|
722
722
|
clean_dir=False, # TODO: This has been unused because now feature_selection is in the target directory
|
|
723
723
|
preserve_model=True,
|
|
724
|
+
best_params=None,
|
|
724
725
|
):
|
|
725
726
|
"""
|
|
726
727
|
Selects the best models based on a target variable, optionally performing hyperparameter optimization
|
|
@@ -866,29 +867,33 @@ class ModelSelectionEngine:
|
|
|
866
867
|
start = time.time()
|
|
867
868
|
# Tuning hyperparameters
|
|
868
869
|
if perform_hyperopt:
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
with open(best_params_file, "r") as f:
|
|
875
|
-
json_dict = json.load(f)
|
|
876
|
-
except FileNotFoundError:
|
|
877
|
-
json_dict = {}
|
|
878
|
-
|
|
879
|
-
json_dict[model.model_name] = serialize_for_json(best_params)
|
|
880
|
-
with open(best_params_file, "w") as f:
|
|
881
|
-
json.dump(json_dict, f, indent=4)
|
|
870
|
+
model_best_params = self.hyperoptimize(
|
|
871
|
+
x_train, y_train, x_val, y_val, model
|
|
872
|
+
)
|
|
873
|
+
elif best_params:
|
|
874
|
+
model_best_params = best_params[model_name]
|
|
882
875
|
else:
|
|
883
876
|
try:
|
|
884
877
|
with open(f"{self.target_dir}/best_params.json") as f:
|
|
885
878
|
json_dict = json.load(f)
|
|
886
|
-
|
|
879
|
+
model_best_params = json_dict[model_name]
|
|
887
880
|
except Exception:
|
|
888
881
|
raise FileNotFoundError(
|
|
889
|
-
f"Could not find {model_name} in current data. Try to run an hyperoptimization by setting `perform_hyperopt` to true"
|
|
882
|
+
f"Could not find {model_name} in current data. Try to run an hyperoptimization by setting `perform_hyperopt` to true, pass `best_params`"
|
|
890
883
|
)
|
|
891
884
|
|
|
885
|
+
# save best params
|
|
886
|
+
best_params_file = f"{self.target_dir}/best_params.json"
|
|
887
|
+
try:
|
|
888
|
+
with open(best_params_file, "r") as f:
|
|
889
|
+
json_dict = json.load(f)
|
|
890
|
+
except FileNotFoundError:
|
|
891
|
+
json_dict = {}
|
|
892
|
+
|
|
893
|
+
json_dict[model.model_name] = serialize_for_json(model_best_params)
|
|
894
|
+
with open(best_params_file, "w") as f:
|
|
895
|
+
json.dump(json_dict, f, indent=4)
|
|
896
|
+
|
|
892
897
|
# Perform cross-validation of the best model on k-folds of train + val set
|
|
893
898
|
if perform_crossval:
|
|
894
899
|
x_train_val = pd.concat([x_train, x_val, x_test], axis=0)
|
|
@@ -928,7 +933,7 @@ class ModelSelectionEngine:
|
|
|
928
933
|
# Train the model and get the score
|
|
929
934
|
if recurrent:
|
|
930
935
|
cross_validation_score, _, _ = self.train_model(
|
|
931
|
-
params=
|
|
936
|
+
params=model_best_params,
|
|
932
937
|
x_train=x_train_val[train_index],
|
|
933
938
|
y_train=y_train_val[train_index],
|
|
934
939
|
x_val=x_train_val[val_index],
|
|
@@ -937,7 +942,7 @@ class ModelSelectionEngine:
|
|
|
937
942
|
)
|
|
938
943
|
else:
|
|
939
944
|
cross_validation_score, _, _ = self.train_model(
|
|
940
|
-
params=
|
|
945
|
+
params=model_best_params,
|
|
941
946
|
x_train=x_train_val.iloc[train_index],
|
|
942
947
|
y_train=y_train_val.iloc[train_index],
|
|
943
948
|
x_val=x_train_val.iloc[val_index],
|
|
@@ -958,7 +963,7 @@ class ModelSelectionEngine:
|
|
|
958
963
|
|
|
959
964
|
# Retrain on entire training set, but keep score on cross-validation folds
|
|
960
965
|
best_score, best_model, best_pred = self.train_model(
|
|
961
|
-
params=
|
|
966
|
+
params=model_best_params,
|
|
962
967
|
x_train=pd.concat([x_train, x_val], axis=0),
|
|
963
968
|
y_train=pd.concat([y_train, y_val], axis=0),
|
|
964
969
|
x_val=x_test,
|
|
@@ -970,7 +975,7 @@ class ModelSelectionEngine:
|
|
|
970
975
|
# Evaluate on validation set
|
|
971
976
|
self.type_name = "validation"
|
|
972
977
|
best_score, best_model, best_pred = self.train_model(
|
|
973
|
-
params=
|
|
978
|
+
params=model_best_params,
|
|
974
979
|
x_train=pd.concat([x_train, x_val], axis=0),
|
|
975
980
|
y_train=pd.concat([y_train, y_val], axis=0),
|
|
976
981
|
x_val=x_test,
|
|
@@ -1016,7 +1021,7 @@ class ModelSelectionEngine:
|
|
|
1016
1021
|
# Save model training metadata
|
|
1017
1022
|
stop = time.time()
|
|
1018
1023
|
training_time = stop - start
|
|
1019
|
-
model_training.best_params =
|
|
1024
|
+
model_training.best_params = model_best_params
|
|
1020
1025
|
model_training.model_path = model_path
|
|
1021
1026
|
model_training.training_time = training_time
|
|
1022
1027
|
model_training.save()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "lecrapaud"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.12.0"
|
|
4
4
|
description = "Framework for machine and deep learning, with regression, classification and time series analysis"
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Pierre H. Gallet"}
|
|
@@ -22,7 +22,7 @@ dependencies = [
|
|
|
22
22
|
"pandas>=2.3.0",
|
|
23
23
|
"python-dotenv>=1.1.0",
|
|
24
24
|
"scikit-learn>=1.6.1",
|
|
25
|
-
"scipy
|
|
25
|
+
"scipy<1.14.0",
|
|
26
26
|
"seaborn>=0.13.2",
|
|
27
27
|
"sqlalchemy>=2.0.41",
|
|
28
28
|
"tensorboardx>=2.6.4",
|
|
@@ -57,6 +57,7 @@ dev = [
|
|
|
57
57
|
"pytest-mock>=3.14.1",
|
|
58
58
|
"ray[tune]>=2.47.1",
|
|
59
59
|
"safety>=3.5.2",
|
|
60
|
+
"scipy<1.14.0",
|
|
60
61
|
"sphinx>=8.2.3",
|
|
61
62
|
"sphinxcontrib-httpdomain>=1.8.1",
|
|
62
63
|
"sphinxcontrib-openapi>=0.8.4",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py
RENAMED
|
File without changes
|
{lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py
RENAMED
|
File without changes
|
{lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py
RENAMED
|
File without changes
|
{lecrapaud-0.11.5 → lecrapaud-0.12.0}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|