lecrapaud 0.9.2__tar.gz → 0.9.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/PKG-INFO +1 -1
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/api.py +1 -1
- lecrapaud-0.9.4/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +30 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/model_selection.py +1 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/experiment.py +3 -1
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/feature_engineering.py +10 -3
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/model_selection.py +17 -14
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/pyproject.toml +1 -1
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/LICENSE +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/README.md +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/__init__.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/config.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/__init__.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/env.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic.ini +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/__init__.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/base.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/experiment.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/feature.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/feature_selection.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/feature_selection_rank.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/model.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/model_training.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/score.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/models/target.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/session.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/directories.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/feature_selection.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/integrations/openai_integration.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/__init__.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/config.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/scheduler.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/jobs/tasks.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/search_space.py +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/speed_tests/tests.ipynb +0 -0
- {lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/utils.py +0 -0
|
@@ -149,13 +149,13 @@ class ExperimentEngine:
|
|
|
149
149
|
y_pred.name = "PRED"
|
|
150
150
|
|
|
151
151
|
# evaluate if TARGET is in columns
|
|
152
|
+
new_data.columns = new_data.columns.str.upper()
|
|
152
153
|
if f"TARGET_{target_number}" in new_data.columns:
|
|
153
154
|
y_true = new_data[f"TARGET_{target_number}"]
|
|
154
155
|
prediction = pd.concat([y_true, y_pred], axis=1)
|
|
155
156
|
prediction.rename(
|
|
156
157
|
columns={f"TARGET_{target_number}": "TARGET"}, inplace=True
|
|
157
158
|
)
|
|
158
|
-
print(prediction)
|
|
159
159
|
score = evaluate(
|
|
160
160
|
prediction,
|
|
161
161
|
target_type=model.target_type,
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
Revision ID: 72aa496ca65b
|
|
4
|
+
Revises: 86457e2f333f
|
|
5
|
+
Create Date: 2025-06-25 17:59:28.544283
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from typing import Sequence, Union
|
|
9
|
+
|
|
10
|
+
from alembic import op
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision: str = '72aa496ca65b'
|
|
16
|
+
down_revision: Union[str, None] = '86457e2f333f'
|
|
17
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
18
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade() -> None:
|
|
22
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
23
|
+
op.add_column('lecrapaud_model_selections', sa.Column('best_thresholds', sa.JSON(), nullable=True))
|
|
24
|
+
# ### end Alembic commands ###
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def downgrade() -> None:
|
|
28
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
29
|
+
op.drop_column('lecrapaud_model_selections', 'best_thresholds')
|
|
30
|
+
# ### end Alembic commands ###
|
|
@@ -39,7 +39,9 @@ def create_experiment(
|
|
|
39
39
|
|
|
40
40
|
with get_db() as db:
|
|
41
41
|
all_targets = Target.get_all(db=db)
|
|
42
|
-
targets = [
|
|
42
|
+
targets = [
|
|
43
|
+
target for target in all_targets if target.name in data.columns.str.upper()
|
|
44
|
+
]
|
|
43
45
|
experiment_name = f"{experiment_name}_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
|
|
44
46
|
|
|
45
47
|
experiment_dir = f"{tmp_dir}/{experiment_name}"
|
|
@@ -493,6 +493,13 @@ class PreprocessFeature:
|
|
|
493
493
|
for name, data in zip(["train", "val", "test"], [train, val, test]):
|
|
494
494
|
logger.info(f"{data.shape} {name} data")
|
|
495
495
|
|
|
496
|
+
Experiment.upsert(
|
|
497
|
+
match_fields=["id"],
|
|
498
|
+
id=self.experiment_id,
|
|
499
|
+
train_size=len(train),
|
|
500
|
+
val_size=len(val),
|
|
501
|
+
test_size=len(test),
|
|
502
|
+
)
|
|
496
503
|
return (
|
|
497
504
|
train.reset_index(drop=True),
|
|
498
505
|
val.reset_index(drop=True),
|
|
@@ -813,7 +820,7 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
|
|
|
813
820
|
|
|
814
821
|
# Create a copy of the DataFrame to avoid modifying the original
|
|
815
822
|
df_check = df.copy()
|
|
816
|
-
|
|
823
|
+
|
|
817
824
|
# Convert numpy arrays to tuples for hashing
|
|
818
825
|
for col in df_check.columns:
|
|
819
826
|
if df_check[col].apply(lambda x: isinstance(x, np.ndarray)).any():
|
|
@@ -830,10 +837,10 @@ def traditional_descriptive_analysis(df: pd.DataFrame, group_column: str | None
|
|
|
830
837
|
duplicated_cols = []
|
|
831
838
|
cols = df_check.columns
|
|
832
839
|
for i, col1 in enumerate(cols):
|
|
833
|
-
for col2 in cols[i+1:]:
|
|
840
|
+
for col2 in cols[i + 1 :]:
|
|
834
841
|
if df_check[col1].equals(df_check[col2]):
|
|
835
842
|
duplicated_cols.append(f"{col1} = {col2}")
|
|
836
|
-
|
|
843
|
+
|
|
837
844
|
results["Duplicated columns"] = (
|
|
838
845
|
", ".join(duplicated_cols) if duplicated_cols else "None"
|
|
839
846
|
)
|
|
@@ -569,14 +569,6 @@ class ModelEngine:
|
|
|
569
569
|
|
|
570
570
|
target_dir = Path(self.path)
|
|
571
571
|
|
|
572
|
-
# Load threshold
|
|
573
|
-
scores_tracking = pd.read_csv(f"{target_dir}/scores_tracking.csv")
|
|
574
|
-
self.threshold = (
|
|
575
|
-
scores_tracking["THRESHOLD"].values[0]
|
|
576
|
-
if "THRESHOLD" in scores_tracking.columns
|
|
577
|
-
else None
|
|
578
|
-
)
|
|
579
|
-
|
|
580
572
|
# Search for files that contain '.best' or '.keras' in the name
|
|
581
573
|
best_files = list(target_dir.glob("*.best*")) + list(
|
|
582
574
|
target_dir.glob("*.keras*")
|
|
@@ -589,17 +581,11 @@ class ModelEngine:
|
|
|
589
581
|
try:
|
|
590
582
|
# Attempt to load the file as a scikit-learn, XGBoost, or LightGBM model (Pickle format)
|
|
591
583
|
self._model = joblib.load(file_path)
|
|
592
|
-
logger.info(
|
|
593
|
-
f"Loaded model {self._model.model_name} and threshold {self.threshold}"
|
|
594
|
-
)
|
|
595
584
|
except (pickle.UnpicklingError, EOFError):
|
|
596
585
|
# If it's not a pickle file, try loading it as a Keras model
|
|
597
586
|
try:
|
|
598
587
|
# Attempt to load the file as a Keras model
|
|
599
588
|
self._model = keras.models.load_model(file_path)
|
|
600
|
-
logger.info(
|
|
601
|
-
f"Loaded model {self._model.model_name} and threshold {self.threshold}"
|
|
602
|
-
)
|
|
603
589
|
except Exception as e:
|
|
604
590
|
raise FileNotFoundError(
|
|
605
591
|
f"Model could not be loaded from path: {file_path}: {e}"
|
|
@@ -612,6 +598,17 @@ class ModelEngine:
|
|
|
612
598
|
self.model_name = self._model.model_name
|
|
613
599
|
self.target_type = self._model.target_type
|
|
614
600
|
|
|
601
|
+
# Load threshold
|
|
602
|
+
self.threshold = (
|
|
603
|
+
joblib.load(f"{target_dir}/thresholds.pkl")
|
|
604
|
+
if self.target_type == "classification"
|
|
605
|
+
else None
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
logger.info(
|
|
609
|
+
f"Loaded model {self._model.model_name} and threshold {self.threshold}"
|
|
610
|
+
)
|
|
611
|
+
|
|
615
612
|
|
|
616
613
|
def trainable(
|
|
617
614
|
params,
|
|
@@ -1072,6 +1069,11 @@ class ModelSelectionEngine:
|
|
|
1072
1069
|
scores_tracking = pd.read_csv(scores_tracking_path)
|
|
1073
1070
|
best_score_overall = scores_tracking.iloc[0, :]
|
|
1074
1071
|
best_model_name = best_score_overall["MODEL_NAME"]
|
|
1072
|
+
if self.target_type == "classification":
|
|
1073
|
+
best_thresholds = best_score_overall["THRESHOLDS"]
|
|
1074
|
+
joblib.dump(best_thresholds, f"{self.target_dir}/thresholds.pkl")
|
|
1075
|
+
else:
|
|
1076
|
+
best_thresholds = None
|
|
1075
1077
|
|
|
1076
1078
|
# Remove any .best or .keras files
|
|
1077
1079
|
for file_path in glob.glob(os.path.join(self.target_dir, "*.best")) + glob.glob(
|
|
@@ -1096,6 +1098,7 @@ class ModelSelectionEngine:
|
|
|
1096
1098
|
name=best_score_overall["MODEL_NAME"], type=self.target_type
|
|
1097
1099
|
).id
|
|
1098
1100
|
model_selection.best_model_params = best_model_params
|
|
1101
|
+
model_selection.best_thresholds = best_thresholds
|
|
1099
1102
|
model_selection.best_model_path = best_model_path
|
|
1100
1103
|
model_selection.save()
|
|
1101
1104
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py
RENAMED
|
File without changes
|
{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py
RENAMED
|
File without changes
|
{lecrapaud-0.9.2 → lecrapaud-0.9.4}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|