lecrapaud 0.20.0__py3-none-any.whl → 0.20.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/api.py +11 -49
- lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py +42 -0
- lecrapaud/db/models/experiment.py +48 -75
- lecrapaud/experiment.py +8 -13
- lecrapaud/feature_engineering.py +28 -40
- lecrapaud/feature_selection.py +90 -21
- lecrapaud/model_selection.py +24 -30
- {lecrapaud-0.20.0.dist-info → lecrapaud-0.20.1.dist-info}/METADATA +1 -1
- {lecrapaud-0.20.0.dist-info → lecrapaud-0.20.1.dist-info}/RECORD +11 -10
- {lecrapaud-0.20.0.dist-info → lecrapaud-0.20.1.dist-info}/WHEEL +0 -0
- {lecrapaud-0.20.0.dist-info → lecrapaud-0.20.1.dist-info}/licenses/LICENSE +0 -0
lecrapaud/api.py
CHANGED
|
@@ -165,6 +165,12 @@ class ExperimentEngine:
|
|
|
165
165
|
|
|
166
166
|
def __init__(self, id: int = None, data: pd.DataFrame = None, **kwargs):
|
|
167
167
|
"""Initialize the experiment engine with either new or existing experiment."""
|
|
168
|
+
# Set all kwargs as instance attributes
|
|
169
|
+
if "models_idx" in kwargs:
|
|
170
|
+
kwargs["models_idx"] = normalize_models_idx(kwargs["models_idx"])
|
|
171
|
+
for key, value in kwargs.items():
|
|
172
|
+
setattr(self, key, value)
|
|
173
|
+
|
|
168
174
|
if id:
|
|
169
175
|
self.experiment = Experiment.get(id)
|
|
170
176
|
kwargs.update(self.experiment.context)
|
|
@@ -180,12 +186,6 @@ class ExperimentEngine:
|
|
|
180
186
|
)
|
|
181
187
|
self.experiment = create_experiment(data=data, **kwargs)
|
|
182
188
|
|
|
183
|
-
# Set all kwargs as instance attributes
|
|
184
|
-
for key, value in kwargs.items():
|
|
185
|
-
if key == "models_idx":
|
|
186
|
-
value = normalize_models_idx(value)
|
|
187
|
-
setattr(self, key, value)
|
|
188
|
-
|
|
189
189
|
def train(self, data, best_params=None):
|
|
190
190
|
logger.info("Running training...")
|
|
191
191
|
|
|
@@ -309,12 +309,8 @@ class ExperimentEngine:
|
|
|
309
309
|
def feature_engineering(self, data, for_training=True):
|
|
310
310
|
app = FeatureEngineeringEngine(
|
|
311
311
|
data=data,
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
columns_date=getattr(self, "columns_date", []),
|
|
315
|
-
columns_te_groupby=getattr(self, "columns_te_groupby", []),
|
|
316
|
-
columns_te_target=getattr(self, "columns_te_target", []),
|
|
317
|
-
for_training=getattr(self, "for_training", True),
|
|
312
|
+
experiment=self.experiment,
|
|
313
|
+
for_training=for_training,
|
|
318
314
|
)
|
|
319
315
|
data = app.run()
|
|
320
316
|
return data
|
|
@@ -322,21 +318,7 @@ class ExperimentEngine:
|
|
|
322
318
|
def preprocess_feature(self, data, for_training=True):
|
|
323
319
|
app = PreprocessFeature(
|
|
324
320
|
data=data,
|
|
325
|
-
experiment=
|
|
326
|
-
time_series=getattr(self, "time_series", False),
|
|
327
|
-
date_column=getattr(self, "date_column", None),
|
|
328
|
-
group_column=getattr(self, "group_column", None),
|
|
329
|
-
val_size=getattr(self, "val_size", 0.2),
|
|
330
|
-
test_size=getattr(self, "test_size", 0.2),
|
|
331
|
-
columns_pca=getattr(self, "columns_pca", []),
|
|
332
|
-
pca_temporal=getattr(self, "pca_temporal", []),
|
|
333
|
-
pca_cross_sectional=getattr(self, "pca_cross_sectional", []),
|
|
334
|
-
columns_onehot=getattr(self, "columns_onehot", []),
|
|
335
|
-
columns_binary=getattr(self, "columns_binary", []),
|
|
336
|
-
columns_ordinal=getattr(self, "columns_ordinal", []),
|
|
337
|
-
columns_frequency=getattr(self, "columns_frequency", []),
|
|
338
|
-
target_numbers=getattr(self, "target_numbers", []),
|
|
339
|
-
target_clf=getattr(self, "target_clf", []),
|
|
321
|
+
experiment=self.experiment,
|
|
340
322
|
)
|
|
341
323
|
if for_training:
|
|
342
324
|
train, val, test = app.run()
|
|
@@ -351,7 +333,6 @@ class ExperimentEngine:
|
|
|
351
333
|
train=train,
|
|
352
334
|
target_number=target_number,
|
|
353
335
|
experiment=self.experiment,
|
|
354
|
-
target_clf=self.target_clf,
|
|
355
336
|
)
|
|
356
337
|
app.run()
|
|
357
338
|
self.experiment = Experiment.get(self.experiment.id)
|
|
@@ -368,14 +349,7 @@ class ExperimentEngine:
|
|
|
368
349
|
train=train,
|
|
369
350
|
val=val,
|
|
370
351
|
test=test,
|
|
371
|
-
experiment=
|
|
372
|
-
target_numbers=getattr(self, "target_numbers", []),
|
|
373
|
-
target_clf=getattr(self, "target_clf", []),
|
|
374
|
-
models_idx=getattr(self, "models_idx", []),
|
|
375
|
-
time_series=getattr(self, "time_series", False),
|
|
376
|
-
max_timesteps=getattr(self, "max_timesteps", 120),
|
|
377
|
-
date_column=getattr(self, "date_column", None),
|
|
378
|
-
group_column=getattr(self, "group_column", None),
|
|
352
|
+
experiment=self.experiment,
|
|
379
353
|
)
|
|
380
354
|
if for_training:
|
|
381
355
|
data, reshaped_data = app.run()
|
|
@@ -390,25 +364,13 @@ class ExperimentEngine:
|
|
|
390
364
|
data=data,
|
|
391
365
|
reshaped_data=reshaped_data,
|
|
392
366
|
target_number=target_number,
|
|
393
|
-
experiment=
|
|
394
|
-
target_clf=getattr(self, "target_clf", []),
|
|
395
|
-
models_idx=getattr(self, "models_idx", []),
|
|
396
|
-
time_series=getattr(self, "time_series", False),
|
|
397
|
-
date_column=getattr(self, "date_column", None),
|
|
398
|
-
group_column=getattr(self, "group_column", None),
|
|
399
|
-
target_clf_thresholds=getattr(self, "target_clf_thresholds", {}),
|
|
367
|
+
experiment=self.experiment,
|
|
400
368
|
)
|
|
401
369
|
if best_params and target_number not in best_params.keys():
|
|
402
370
|
raise ValueError(
|
|
403
371
|
f"Target {target_number} not found in best_params passed as argument"
|
|
404
372
|
)
|
|
405
373
|
app.run(
|
|
406
|
-
self.experiment_name,
|
|
407
|
-
perform_hyperopt=self.perform_hyperopt,
|
|
408
|
-
number_of_trials=self.number_of_trials,
|
|
409
|
-
perform_crossval=self.perform_crossval,
|
|
410
|
-
plot=self.plot,
|
|
411
|
-
preserve_model=self.preserve_model,
|
|
412
374
|
best_params=best_params[target_number] if best_params else None,
|
|
413
375
|
)
|
|
414
376
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""add number_of_targets and remove other fields from experiments
|
|
2
|
+
|
|
3
|
+
Revision ID: 0a8fb7826e9b
|
|
4
|
+
Revises: 033e0f7eca4f
|
|
5
|
+
Create Date: 2025-10-28 20:06:54.792631
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from typing import Sequence, Union
|
|
9
|
+
|
|
10
|
+
from alembic import op
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from sqlalchemy.dialects import mysql
|
|
13
|
+
|
|
14
|
+
# revision identifiers, used by Alembic.
|
|
15
|
+
revision: str = '0a8fb7826e9b'
|
|
16
|
+
down_revision: Union[str, None] = '033e0f7eca4f'
|
|
17
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
18
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def upgrade() -> None:
|
|
22
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
23
|
+
op.add_column('lecrapaud_experiments', sa.Column('number_of_targets', sa.Integer(), nullable=True))
|
|
24
|
+
op.drop_column('lecrapaud_experiments', 'corr_threshold')
|
|
25
|
+
op.drop_column('lecrapaud_experiments', 'max_features')
|
|
26
|
+
op.drop_column('lecrapaud_experiments', 'percentile')
|
|
27
|
+
op.drop_column('lecrapaud_experiments', 'type')
|
|
28
|
+
op.drop_index(op.f('ix_model_selection_scores_id'), table_name='lecrapaud_model_selection_scores')
|
|
29
|
+
op.create_index(op.f('ix_lecrapaud_model_selection_scores_id'), 'lecrapaud_model_selection_scores', ['id'], unique=False)
|
|
30
|
+
# ### end Alembic commands ###
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def downgrade() -> None:
|
|
34
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
35
|
+
op.drop_index(op.f('ix_lecrapaud_model_selection_scores_id'), table_name='lecrapaud_model_selection_scores')
|
|
36
|
+
op.create_index(op.f('ix_model_selection_scores_id'), 'lecrapaud_model_selection_scores', ['id'], unique=False)
|
|
37
|
+
op.add_column('lecrapaud_experiments', sa.Column('type', mysql.VARCHAR(length=50), nullable=False))
|
|
38
|
+
op.add_column('lecrapaud_experiments', sa.Column('percentile', mysql.FLOAT(), nullable=False))
|
|
39
|
+
op.add_column('lecrapaud_experiments', sa.Column('max_features', mysql.INTEGER(), autoincrement=False, nullable=False))
|
|
40
|
+
op.add_column('lecrapaud_experiments', sa.Column('corr_threshold', mysql.FLOAT(), nullable=False))
|
|
41
|
+
op.drop_column('lecrapaud_experiments', 'number_of_targets')
|
|
42
|
+
# ### end Alembic commands ###
|
|
@@ -50,10 +50,43 @@ class Experiment(Base):
|
|
|
50
50
|
)
|
|
51
51
|
name = Column(String(255), nullable=False)
|
|
52
52
|
path = Column(String(255)) # we do not have this at creation time
|
|
53
|
-
type = Column(String(50), nullable=False)
|
|
54
53
|
size = Column(Integer, nullable=False)
|
|
55
54
|
train_size = Column(Integer)
|
|
56
55
|
val_size = Column(Integer)
|
|
56
|
+
test_size = Column(Integer)
|
|
57
|
+
number_of_groups = Column(Integer)
|
|
58
|
+
list_of_groups = Column(JSON)
|
|
59
|
+
number_of_targets = Column(Integer)
|
|
60
|
+
start_date = Column(DateTime)
|
|
61
|
+
end_date = Column(DateTime)
|
|
62
|
+
train_start_date = Column(DateTime)
|
|
63
|
+
train_end_date = Column(DateTime)
|
|
64
|
+
val_start_date = Column(DateTime)
|
|
65
|
+
val_end_date = Column(DateTime)
|
|
66
|
+
test_start_date = Column(DateTime)
|
|
67
|
+
test_end_date = Column(DateTime)
|
|
68
|
+
context = Column(JSON)
|
|
69
|
+
|
|
70
|
+
feature_selections = relationship(
|
|
71
|
+
"FeatureSelection",
|
|
72
|
+
back_populates="experiment",
|
|
73
|
+
cascade="all, delete-orphan",
|
|
74
|
+
lazy="selectin",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
targets = relationship(
|
|
78
|
+
"Target",
|
|
79
|
+
secondary=lecrapaud_experiment_target_association,
|
|
80
|
+
back_populates="experiments",
|
|
81
|
+
lazy="selectin",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
__table_args__ = (
|
|
85
|
+
UniqueConstraint(
|
|
86
|
+
"name",
|
|
87
|
+
name="uq_experiments_composite",
|
|
88
|
+
),
|
|
89
|
+
)
|
|
57
90
|
|
|
58
91
|
# Relationships
|
|
59
92
|
model_selections = relationship(
|
|
@@ -68,16 +101,9 @@ class Experiment(Base):
|
|
|
68
101
|
"""Best RMSE score across all model selections and trainings."""
|
|
69
102
|
# Get the minimum RMSE for each model selection
|
|
70
103
|
min_scores = [
|
|
71
|
-
min(
|
|
72
|
-
mss.rmse
|
|
73
|
-
for mss in ms.model_selection_scores
|
|
74
|
-
if mss.rmse is not None
|
|
75
|
-
)
|
|
104
|
+
min(mss.rmse for mss in ms.model_selection_scores if mss.rmse is not None)
|
|
76
105
|
for ms in self.model_selections
|
|
77
|
-
if any(
|
|
78
|
-
mss.rmse is not None
|
|
79
|
-
for mss in ms.model_selection_scores
|
|
80
|
-
)
|
|
106
|
+
if any(mss.rmse is not None for mss in ms.model_selection_scores)
|
|
81
107
|
]
|
|
82
108
|
return min(min_scores) if min_scores else None
|
|
83
109
|
|
|
@@ -92,10 +118,7 @@ class Experiment(Base):
|
|
|
92
118
|
if mss.logloss is not None
|
|
93
119
|
)
|
|
94
120
|
for ms in self.model_selections
|
|
95
|
-
if any(
|
|
96
|
-
mss.logloss is not None
|
|
97
|
-
for mss in ms.model_selection_scores
|
|
98
|
-
)
|
|
121
|
+
if any(mss.logloss is not None for mss in ms.model_selection_scores)
|
|
99
122
|
]
|
|
100
123
|
return min(min_scores) if min_scores else None
|
|
101
124
|
|
|
@@ -104,16 +127,9 @@ class Experiment(Base):
|
|
|
104
127
|
"""Average RMSE score across all model selections and trainings."""
|
|
105
128
|
# Get the minimum RMSE for each model selection
|
|
106
129
|
min_scores = [
|
|
107
|
-
min(
|
|
108
|
-
mss.rmse
|
|
109
|
-
for mss in ms.model_selection_scores
|
|
110
|
-
if mss.rmse is not None
|
|
111
|
-
)
|
|
130
|
+
min(mss.rmse for mss in ms.model_selection_scores if mss.rmse is not None)
|
|
112
131
|
for ms in self.model_selections
|
|
113
|
-
if any(
|
|
114
|
-
mss.rmse is not None
|
|
115
|
-
for mss in ms.model_selection_scores
|
|
116
|
-
)
|
|
132
|
+
if any(mss.rmse is not None for mss in ms.model_selection_scores)
|
|
117
133
|
]
|
|
118
134
|
return mean(min_scores) if min_scores else None
|
|
119
135
|
|
|
@@ -128,50 +144,10 @@ class Experiment(Base):
|
|
|
128
144
|
if mss.logloss is not None
|
|
129
145
|
)
|
|
130
146
|
for ms in self.model_selections
|
|
131
|
-
if any(
|
|
132
|
-
mss.logloss is not None
|
|
133
|
-
for mss in ms.model_selection_scores
|
|
134
|
-
)
|
|
147
|
+
if any(mss.logloss is not None for mss in ms.model_selection_scores)
|
|
135
148
|
]
|
|
136
149
|
return mean(min_scores) if min_scores else None
|
|
137
150
|
|
|
138
|
-
test_size = Column(Integer)
|
|
139
|
-
corr_threshold = Column(Float, nullable=False)
|
|
140
|
-
max_features = Column(Integer, nullable=False)
|
|
141
|
-
percentile = Column(Float, nullable=False)
|
|
142
|
-
number_of_groups = Column(Integer)
|
|
143
|
-
list_of_groups = Column(JSON)
|
|
144
|
-
start_date = Column(DateTime)
|
|
145
|
-
end_date = Column(DateTime)
|
|
146
|
-
train_start_date = Column(DateTime)
|
|
147
|
-
train_end_date = Column(DateTime)
|
|
148
|
-
val_start_date = Column(DateTime)
|
|
149
|
-
val_end_date = Column(DateTime)
|
|
150
|
-
test_start_date = Column(DateTime)
|
|
151
|
-
test_end_date = Column(DateTime)
|
|
152
|
-
context = Column(JSON)
|
|
153
|
-
|
|
154
|
-
feature_selections = relationship(
|
|
155
|
-
"FeatureSelection",
|
|
156
|
-
back_populates="experiment",
|
|
157
|
-
cascade="all, delete-orphan",
|
|
158
|
-
lazy="selectin",
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
targets = relationship(
|
|
162
|
-
"Target",
|
|
163
|
-
secondary=lecrapaud_experiment_target_association,
|
|
164
|
-
back_populates="experiments",
|
|
165
|
-
lazy="selectin",
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
__table_args__ = (
|
|
169
|
-
UniqueConstraint(
|
|
170
|
-
"name",
|
|
171
|
-
name="uq_experiments_composite",
|
|
172
|
-
),
|
|
173
|
-
)
|
|
174
|
-
|
|
175
151
|
@classmethod
|
|
176
152
|
@with_db
|
|
177
153
|
def get_all_by_name(cls, name: str | None = None, limit: int = 1000, db=None):
|
|
@@ -354,19 +330,18 @@ class Experiment(Base):
|
|
|
354
330
|
|
|
355
331
|
# Get the best model score based on lowest logloss or rmse
|
|
356
332
|
model_scores = best_model_selection.model_selection_scores
|
|
357
|
-
|
|
333
|
+
|
|
358
334
|
# Determine if we should use logloss or rmse based on what's available
|
|
359
335
|
if any(ms.logloss is not None for ms in model_scores):
|
|
360
336
|
# Classification: find lowest logloss
|
|
361
337
|
best_score = min(
|
|
362
338
|
(ms for ms in model_scores if ms.logloss is not None),
|
|
363
|
-
key=lambda x: x.logloss
|
|
339
|
+
key=lambda x: x.logloss,
|
|
364
340
|
)
|
|
365
341
|
elif any(ms.rmse is not None for ms in model_scores):
|
|
366
342
|
# Regression: find lowest rmse
|
|
367
343
|
best_score = min(
|
|
368
|
-
(ms for ms in model_scores if ms.rmse is not None),
|
|
369
|
-
key=lambda x: x.rmse
|
|
344
|
+
(ms for ms in model_scores if ms.rmse is not None), key=lambda x: x.rmse
|
|
370
345
|
)
|
|
371
346
|
else:
|
|
372
347
|
return {
|
|
@@ -398,12 +373,8 @@ class Experiment(Base):
|
|
|
398
373
|
|
|
399
374
|
# Get the model info
|
|
400
375
|
model_info = {
|
|
401
|
-
"model_type": (
|
|
402
|
-
|
|
403
|
-
),
|
|
404
|
-
"model_name": (
|
|
405
|
-
score.model.name if score.model else "unknown"
|
|
406
|
-
),
|
|
376
|
+
"model_type": (score.model.model_type if score.model else "unknown"),
|
|
377
|
+
"model_name": (score.model.name if score.model else "unknown"),
|
|
407
378
|
"training_time_seconds": score.training_time,
|
|
408
379
|
}
|
|
409
380
|
|
|
@@ -434,7 +405,9 @@ class Experiment(Base):
|
|
|
434
405
|
return features
|
|
435
406
|
|
|
436
407
|
@with_db
|
|
437
|
-
def get_all_features(
|
|
408
|
+
def get_all_features(
|
|
409
|
+
self, date_column: str = None, group_column: str = None, db=None
|
|
410
|
+
):
|
|
438
411
|
# Ensure we have a fresh instance attached to the session
|
|
439
412
|
self = db.merge(self)
|
|
440
413
|
target_idx = [target.id for target in self.targets]
|
lecrapaud/experiment.py
CHANGED
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import joblib
|
|
6
|
+
from datetime import datetime
|
|
6
7
|
|
|
7
8
|
# Set up coverage file path
|
|
8
9
|
os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
|
|
@@ -15,9 +16,6 @@ from lecrapaud.db.session import get_db
|
|
|
15
16
|
|
|
16
17
|
def create_experiment(
|
|
17
18
|
data: pd.DataFrame | str,
|
|
18
|
-
corr_threshold,
|
|
19
|
-
percentile,
|
|
20
|
-
max_features,
|
|
21
19
|
date_column,
|
|
22
20
|
group_column,
|
|
23
21
|
experiment_name,
|
|
@@ -42,7 +40,10 @@ def create_experiment(
|
|
|
42
40
|
targets = [
|
|
43
41
|
target for target in all_targets if target.name in data.columns.str.upper()
|
|
44
42
|
]
|
|
45
|
-
experiment_name =
|
|
43
|
+
experiment_name = (
|
|
44
|
+
f"{experiment_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
45
|
+
)
|
|
46
|
+
number_of_targets = len(targets)
|
|
46
47
|
|
|
47
48
|
experiment_dir = f"{tmp_dir}/{experiment_name}"
|
|
48
49
|
preprocessing_dir = f"{experiment_dir}/preprocessing"
|
|
@@ -55,26 +56,20 @@ def create_experiment(
|
|
|
55
56
|
db=db,
|
|
56
57
|
name=experiment_name,
|
|
57
58
|
path=Path(experiment_dir).resolve(),
|
|
58
|
-
type="training",
|
|
59
59
|
size=data.shape[0],
|
|
60
|
-
|
|
61
|
-
percentile=percentile,
|
|
62
|
-
max_features=max_features,
|
|
60
|
+
number_of_targets=number_of_targets,
|
|
63
61
|
**groups,
|
|
64
62
|
**dates,
|
|
65
63
|
context={
|
|
66
|
-
"corr_threshold": corr_threshold,
|
|
67
|
-
"percentile": percentile,
|
|
68
|
-
"max_features": max_features,
|
|
69
64
|
"date_column": date_column,
|
|
70
65
|
"group_column": group_column,
|
|
71
66
|
"experiment_name": experiment_name,
|
|
72
67
|
**kwargs,
|
|
73
68
|
},
|
|
74
69
|
)
|
|
75
|
-
|
|
70
|
+
|
|
76
71
|
# Set targets relationship after creation/update
|
|
77
72
|
experiment.targets = targets
|
|
78
73
|
experiment.save(db=db)
|
|
79
|
-
|
|
74
|
+
|
|
80
75
|
return experiment
|
lecrapaud/feature_engineering.py
CHANGED
|
@@ -87,21 +87,20 @@ class FeatureEngineeringEngine:
|
|
|
87
87
|
def __init__(
|
|
88
88
|
self,
|
|
89
89
|
data: pd.DataFrame,
|
|
90
|
-
|
|
91
|
-
columns_boolean: list[str] = [],
|
|
92
|
-
columns_date: list[str] = [],
|
|
93
|
-
columns_te_groupby: list[str] = [],
|
|
94
|
-
columns_te_target: list[str] = [],
|
|
90
|
+
experiment,
|
|
95
91
|
for_training: bool = True,
|
|
96
92
|
**kwargs,
|
|
97
93
|
):
|
|
98
94
|
self.data = data
|
|
99
|
-
self.
|
|
100
|
-
self.columns_boolean = columns_boolean
|
|
101
|
-
self.columns_date = columns_date
|
|
102
|
-
self.columns_te_groupby = columns_te_groupby
|
|
103
|
-
self.columns_te_target = columns_te_target
|
|
95
|
+
self.experiment = experiment
|
|
104
96
|
self.for_training = for_training
|
|
97
|
+
|
|
98
|
+
# Get all parameters from experiment context
|
|
99
|
+
self.columns_drop = self.experiment.context.get("columns_drop", [])
|
|
100
|
+
self.columns_boolean = self.experiment.context.get("columns_boolean", [])
|
|
101
|
+
self.columns_date = self.experiment.context.get("columns_date", [])
|
|
102
|
+
self.columns_te_groupby = self.experiment.context.get("columns_te_groupby", [])
|
|
103
|
+
self.columns_te_target = self.experiment.context.get("columns_te_target", [])
|
|
105
104
|
|
|
106
105
|
def run(self) -> pd.DataFrame:
|
|
107
106
|
# drop columns
|
|
@@ -316,41 +315,30 @@ class PreprocessFeature:
|
|
|
316
315
|
self,
|
|
317
316
|
data: pd.DataFrame,
|
|
318
317
|
experiment,
|
|
319
|
-
time_series: bool = False,
|
|
320
|
-
date_column: str | None = None,
|
|
321
|
-
group_column: str | None = None,
|
|
322
|
-
val_size: float = 0.2,
|
|
323
|
-
test_size: float = 0.2,
|
|
324
|
-
columns_pca: list[str] = [],
|
|
325
|
-
pca_temporal: list[dict[str, list[str]]] = [],
|
|
326
|
-
pca_cross_sectional: list[dict[str, list[str]]] = [],
|
|
327
|
-
columns_onehot: list[str] = [],
|
|
328
|
-
columns_binary: list[str] = [],
|
|
329
|
-
columns_ordinal: list[str] = [],
|
|
330
|
-
columns_frequency: list[str] = [],
|
|
331
|
-
target_numbers: list = [],
|
|
332
|
-
target_clf: list = [],
|
|
333
318
|
**kwargs,
|
|
334
319
|
):
|
|
335
320
|
self.data = data
|
|
336
321
|
self.data.columns = self.data.columns.str.upper()
|
|
337
|
-
|
|
338
322
|
self.experiment = experiment
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
self.
|
|
343
|
-
self.
|
|
344
|
-
self.
|
|
345
|
-
self.
|
|
346
|
-
self.
|
|
347
|
-
self.
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
self.
|
|
352
|
-
self.
|
|
353
|
-
self.
|
|
323
|
+
|
|
324
|
+
# Get all parameters from experiment context
|
|
325
|
+
context = self.experiment.context
|
|
326
|
+
self.time_series = context.get("time_series", False)
|
|
327
|
+
self.date_column = context.get("date_column", None)
|
|
328
|
+
self.group_column = context.get("group_column", None)
|
|
329
|
+
self.val_size = context.get("val_size", 0.2)
|
|
330
|
+
self.test_size = context.get("test_size", 0.2)
|
|
331
|
+
self.target_numbers = context.get("target_numbers", [])
|
|
332
|
+
self.target_clf = context.get("target_clf", [])
|
|
333
|
+
|
|
334
|
+
# Handle list parameters with uppercase conversion
|
|
335
|
+
self.columns_pca = [col.upper() for col in context.get("columns_pca", [])]
|
|
336
|
+
self.pca_temporal = context.get("pca_temporal", [])
|
|
337
|
+
self.pca_cross_sectional = context.get("pca_cross_sectional", [])
|
|
338
|
+
self.columns_onehot = [col.upper() for col in context.get("columns_onehot", [])]
|
|
339
|
+
self.columns_binary = [col.upper() for col in context.get("columns_binary", [])]
|
|
340
|
+
self.columns_ordinal = [col.upper() for col in context.get("columns_ordinal", [])]
|
|
341
|
+
self.columns_frequency = [col.upper() for col in context.get("columns_frequency", [])]
|
|
354
342
|
|
|
355
343
|
self.experiment_dir = self.experiment.path
|
|
356
344
|
self.experiment_id = self.experiment.id
|
lecrapaud/feature_selection.py
CHANGED
|
@@ -73,18 +73,21 @@ def load_train_data(experiment_dir):
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
class FeatureSelectionEngine:
|
|
76
|
-
def __init__(self, train, experiment, target_number,
|
|
76
|
+
def __init__(self, train, experiment, target_number, **kwargs):
|
|
77
77
|
self.experiment = experiment
|
|
78
78
|
self.train = train
|
|
79
79
|
self.target_number = target_number
|
|
80
|
-
|
|
80
|
+
|
|
81
|
+
# Get all parameters from experiment context
|
|
82
|
+
self.target_clf = self.experiment.context.get("target_clf", [])
|
|
83
|
+
self.max_p_value_categorical = self.experiment.context.get("max_p_value_categorical", 0.05)
|
|
84
|
+
self.percentile = self.experiment.context.get("percentile", 20)
|
|
85
|
+
self.corr_threshold = self.experiment.context.get("corr_threshold", 80)
|
|
86
|
+
self.max_features = self.experiment.context.get("max_features", 50)
|
|
81
87
|
|
|
82
88
|
self.target_type = (
|
|
83
89
|
"classification" if self.target_number in self.target_clf else "regression"
|
|
84
90
|
)
|
|
85
|
-
self.percentile = self.experiment.percentile
|
|
86
|
-
self.corr_threshold = self.experiment.corr_threshold
|
|
87
|
-
self.max_features = self.experiment.max_features
|
|
88
91
|
|
|
89
92
|
self.experiment_dir = self.experiment.path
|
|
90
93
|
self.experiment_id = self.experiment.id
|
|
@@ -274,6 +277,38 @@ class FeatureSelectionEngine:
|
|
|
274
277
|
features_selected.drop_duplicates("features", inplace=True)
|
|
275
278
|
|
|
276
279
|
features_selected_list = features_selected["features"].values.tolist()
|
|
280
|
+
|
|
281
|
+
# Save ensemble features before correlation (aggregated features)
|
|
282
|
+
logger.info("Saving ensemble features before correlation...")
|
|
283
|
+
all_features_in_data = self.X.columns.tolist()
|
|
284
|
+
ensemble_rows = []
|
|
285
|
+
|
|
286
|
+
# Add global rank for selected features
|
|
287
|
+
features_selected_with_global_rank = features_selected.copy()
|
|
288
|
+
features_selected_with_global_rank["global_rank"] = range(1, len(features_selected_with_global_rank) + 1)
|
|
289
|
+
|
|
290
|
+
for feature in all_features_in_data:
|
|
291
|
+
feature_id = feature_map.get(feature)
|
|
292
|
+
if feature_id:
|
|
293
|
+
is_selected = feature in features_selected_list
|
|
294
|
+
global_rank = None
|
|
295
|
+
if is_selected:
|
|
296
|
+
global_rank = features_selected_with_global_rank[
|
|
297
|
+
features_selected_with_global_rank["features"] == feature
|
|
298
|
+
]["global_rank"].values[0]
|
|
299
|
+
|
|
300
|
+
ensemble_rows.append({
|
|
301
|
+
"feature_selection_id": feature_selection.id,
|
|
302
|
+
"feature_id": feature_id,
|
|
303
|
+
"method": "ensemble",
|
|
304
|
+
"score": None,
|
|
305
|
+
"pvalue": None,
|
|
306
|
+
"support": 2 if is_selected else 0, # 2 = in aggregated features
|
|
307
|
+
"rank": global_rank,
|
|
308
|
+
"training_time": 0,
|
|
309
|
+
})
|
|
310
|
+
|
|
311
|
+
FeatureSelectionRank.bulk_upsert(rows=ensemble_rows)
|
|
277
312
|
|
|
278
313
|
# analysis 1
|
|
279
314
|
features_selected_by_every_methods = set(results[0]["features"].values.tolist())
|
|
@@ -302,12 +337,46 @@ class FeatureSelectionEngine:
|
|
|
302
337
|
header=True,
|
|
303
338
|
index_label="ID",
|
|
304
339
|
)
|
|
340
|
+
|
|
341
|
+
# Update support for features after correlation removal (before max)
|
|
342
|
+
logger.info("Updating ensemble features after correlation removal...")
|
|
343
|
+
for row in ensemble_rows:
|
|
344
|
+
feature = Feature.get(row["feature_id"]).name
|
|
345
|
+
if feature in features:
|
|
346
|
+
row["support"] = 1 # 1 = survived correlation removal
|
|
347
|
+
|
|
305
348
|
features = features[:max_features]
|
|
306
349
|
|
|
307
350
|
# adding categorical features selected
|
|
308
351
|
features += (
|
|
309
352
|
categorical_features_selected if target_type == "classification" else []
|
|
310
353
|
)
|
|
354
|
+
|
|
355
|
+
# Final update for features after max limitation (final selection)
|
|
356
|
+
logger.info("Finalizing ensemble features with categorical features...")
|
|
357
|
+
for row in ensemble_rows:
|
|
358
|
+
feature = Feature.get(row["feature_id"]).name
|
|
359
|
+
if feature in features and row["support"] == 1:
|
|
360
|
+
row["support"] = 2 # 2 = in final selection
|
|
361
|
+
|
|
362
|
+
# Add categorical features to ensemble if not already present
|
|
363
|
+
if target_type == "classification":
|
|
364
|
+
for cat_feature in categorical_features_selected:
|
|
365
|
+
feature_id = feature_map.get(cat_feature)
|
|
366
|
+
if feature_id and not any(row["feature_id"] == feature_id for row in ensemble_rows):
|
|
367
|
+
ensemble_rows.append({
|
|
368
|
+
"feature_selection_id": feature_selection.id,
|
|
369
|
+
"feature_id": feature_id,
|
|
370
|
+
"method": "ensemble",
|
|
371
|
+
"score": None,
|
|
372
|
+
"pvalue": None,
|
|
373
|
+
"support": 2, # 2 = in final selection (categorical)
|
|
374
|
+
"rank": None, # No rank for categorical features added at the end
|
|
375
|
+
"training_time": 0,
|
|
376
|
+
})
|
|
377
|
+
|
|
378
|
+
# Re-save all ensemble data with updated support values
|
|
379
|
+
FeatureSelectionRank.bulk_upsert(rows=ensemble_rows)
|
|
311
380
|
logger.debug(
|
|
312
381
|
f"Final pre-selection: {len(features)} features below {corr_threshold}% out of {len(features_selected_list)} features, and rejected {len(features_correlated)} features, {100*len(features)/len(features_selected_list):.2f}% features selected"
|
|
313
382
|
)
|
|
@@ -440,13 +509,18 @@ class FeatureSelectionEngine:
|
|
|
440
509
|
feat_scores["features"] = X.columns
|
|
441
510
|
feat_scores["rank"] = feat_scores["score"].rank(method="first", ascending=False)
|
|
442
511
|
feat_scores["method"] = "Chi2"
|
|
512
|
+
|
|
513
|
+
# Apply both percentile and p-value filtering
|
|
514
|
+
# Keep features that satisfy BOTH conditions: within percentile AND p-value < threshold
|
|
515
|
+
feat_scores["support"] = feat_scores["support"] & (feat_scores["pvalue"] <= self.max_p_value_categorical)
|
|
516
|
+
|
|
443
517
|
feat_scores.sort_values("rank", ascending=True, inplace=True)
|
|
444
518
|
stop = time.time()
|
|
445
519
|
training_time = timedelta(seconds=(stop - start)).total_seconds()
|
|
446
520
|
feat_scores["training_time"] = training_time
|
|
447
521
|
|
|
448
522
|
logger.debug(
|
|
449
|
-
f"Chi2 evaluation selected {feat_scores['support'].sum()} features in {training_time:.2f} seconds"
|
|
523
|
+
f"Chi2 evaluation selected {feat_scores['support'].sum()} features in {training_time:.2f} seconds (percentile={percentile}%, p-value<={self.max_p_value_categorical})"
|
|
450
524
|
)
|
|
451
525
|
|
|
452
526
|
feat_scores.to_csv(
|
|
@@ -803,33 +877,28 @@ class PreprocessModel:
|
|
|
803
877
|
val,
|
|
804
878
|
test,
|
|
805
879
|
experiment,
|
|
806
|
-
target_numbers,
|
|
807
|
-
target_clf,
|
|
808
|
-
models_idx,
|
|
809
|
-
time_series,
|
|
810
|
-
max_timesteps,
|
|
811
|
-
group_column,
|
|
812
|
-
date_column,
|
|
813
880
|
**kwargs,
|
|
814
881
|
):
|
|
815
882
|
self.train = train
|
|
816
883
|
self.val = val
|
|
817
884
|
self.test = test
|
|
818
885
|
self.experiment = experiment
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
self.
|
|
822
|
-
self.
|
|
823
|
-
self.
|
|
824
|
-
self.
|
|
825
|
-
self.
|
|
886
|
+
|
|
887
|
+
# Get all parameters from experiment context
|
|
888
|
+
self.target_numbers = self.experiment.context.get("target_numbers", [])
|
|
889
|
+
self.target_clf = self.experiment.context.get("target_clf", [])
|
|
890
|
+
self.models_idx = self.experiment.context.get("models_idx", [])
|
|
891
|
+
self.time_series = self.experiment.context.get("time_series", False)
|
|
892
|
+
self.max_timesteps = self.experiment.context.get("max_timesteps", 120)
|
|
893
|
+
self.group_column = self.experiment.context.get("group_column", None)
|
|
894
|
+
self.date_column = self.experiment.context.get("date_column", None)
|
|
826
895
|
|
|
827
896
|
self.experiment_dir = experiment.path
|
|
828
897
|
self.data_dir = f"{self.experiment_dir}/data"
|
|
829
898
|
self.preprocessing_dir = f"{self.experiment_dir}/preprocessing"
|
|
830
899
|
|
|
831
900
|
self.all_features = experiment.get_all_features(
|
|
832
|
-
date_column=date_column, group_column=group_column
|
|
901
|
+
date_column=self.date_column, group_column=self.group_column
|
|
833
902
|
)
|
|
834
903
|
|
|
835
904
|
def run(self):
|
lecrapaud/model_selection.py
CHANGED
|
@@ -1017,24 +1017,24 @@ class ModelSelectionEngine:
|
|
|
1017
1017
|
data,
|
|
1018
1018
|
reshaped_data,
|
|
1019
1019
|
target_number,
|
|
1020
|
-
target_clf,
|
|
1021
1020
|
experiment,
|
|
1022
|
-
models_idx,
|
|
1023
|
-
time_series,
|
|
1024
|
-
date_column,
|
|
1025
|
-
group_column,
|
|
1026
|
-
target_clf_thresholds,
|
|
1027
1021
|
**kwargs,
|
|
1028
1022
|
):
|
|
1029
1023
|
self.data = data
|
|
1030
1024
|
self.reshaped_data = reshaped_data
|
|
1031
1025
|
self.target_number = target_number
|
|
1032
1026
|
self.experiment = experiment
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
self.
|
|
1037
|
-
self.
|
|
1027
|
+
|
|
1028
|
+
# Get all parameters from experiment context
|
|
1029
|
+
context = self.experiment.context
|
|
1030
|
+
self.target_clf = context.get("target_clf", [])
|
|
1031
|
+
self.models_idx = context.get("models_idx", [])
|
|
1032
|
+
self.time_series = context.get("time_series", False)
|
|
1033
|
+
self.date_column = context.get("date_column", None)
|
|
1034
|
+
self.group_column = context.get("group_column", None)
|
|
1035
|
+
|
|
1036
|
+
# Handle target_clf_thresholds
|
|
1037
|
+
target_clf_thresholds = context.get("target_clf_thresholds", {})
|
|
1038
1038
|
self.target_clf_thresholds = (
|
|
1039
1039
|
target_clf_thresholds[target_number]
|
|
1040
1040
|
if target_number in target_clf_thresholds.keys()
|
|
@@ -1056,25 +1056,19 @@ class ModelSelectionEngine:
|
|
|
1056
1056
|
)
|
|
1057
1057
|
|
|
1058
1058
|
# Main training function
|
|
1059
|
-
def run(
|
|
1060
|
-
self,
|
|
1061
|
-
experiment_name,
|
|
1062
|
-
perform_hyperopt=True,
|
|
1063
|
-
number_of_trials=20,
|
|
1064
|
-
perform_crossval=False, # This controls CV during hyperopt, not after
|
|
1065
|
-
plot=True,
|
|
1066
|
-
clean_dir=False, # TODO: This has been unused because now feature_selection is in the target directory
|
|
1067
|
-
preserve_model=True,
|
|
1068
|
-
best_params=None,
|
|
1069
|
-
):
|
|
1059
|
+
def run(self, best_params=None):
|
|
1070
1060
|
"""
|
|
1071
1061
|
Selects the best models based on a target variable, optionally performing hyperparameter optimization
|
|
1072
1062
|
and cross-validation, and manages outputs in a session-specific directory.
|
|
1073
1063
|
"""
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
self.
|
|
1077
|
-
self.
|
|
1064
|
+
# Get all parameters from experiment context
|
|
1065
|
+
context = self.experiment.context
|
|
1066
|
+
self.experiment_name = context.get("experiment_name", "")
|
|
1067
|
+
self.plot = context.get("plot", True)
|
|
1068
|
+
self.number_of_trials = context.get("number_of_trials", 20)
|
|
1069
|
+
self.perform_crossval = context.get("perform_crossval", False)
|
|
1070
|
+
self.preserve_model = context.get("preserve_model", True)
|
|
1071
|
+
self.perform_hyperopt = context.get("perform_hyperopt", True)
|
|
1078
1072
|
|
|
1079
1073
|
if self.experiment_id is None:
|
|
1080
1074
|
raise ValueError("Please provide a experiment.")
|
|
@@ -1141,13 +1135,13 @@ class ModelSelectionEngine:
|
|
|
1141
1135
|
self.results_dir = f"{self.target_dir}/{model_name}"
|
|
1142
1136
|
if not os.path.exists(f"{self.results_dir}"):
|
|
1143
1137
|
os.makedirs(f"{self.results_dir}")
|
|
1144
|
-
elif preserve_model and contains_best(self.results_dir):
|
|
1138
|
+
elif self.preserve_model and contains_best(self.results_dir):
|
|
1145
1139
|
continue
|
|
1146
|
-
elif perform_hyperopt:
|
|
1140
|
+
elif self.perform_hyperopt:
|
|
1147
1141
|
clean_directory(self.results_dir)
|
|
1148
1142
|
|
|
1149
1143
|
logger.info(
|
|
1150
|
-
f"{experiment_name} - Training a {model_name} at {datetime.now()} for TARGET_{self.target_number}"
|
|
1144
|
+
f"{self.experiment_name} - Training a {model_name} at {datetime.now()} for TARGET_{self.target_number}"
|
|
1151
1145
|
)
|
|
1152
1146
|
|
|
1153
1147
|
# Getting data
|
|
@@ -1204,7 +1198,7 @@ class ModelSelectionEngine:
|
|
|
1204
1198
|
|
|
1205
1199
|
# Tuning hyperparameters
|
|
1206
1200
|
start = time.time()
|
|
1207
|
-
if perform_hyperopt:
|
|
1201
|
+
if self.perform_hyperopt:
|
|
1208
1202
|
model_best_params = self.hyperoptimize(
|
|
1209
1203
|
x_train, y_train, x_val, y_val, model
|
|
1210
1204
|
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
|
|
2
|
-
lecrapaud/api.py,sha256=
|
|
2
|
+
lecrapaud/api.py,sha256=IQlH3wcSzxYgvlamfICNMwNsQGoaNxBJUPTlC9M0kBk,20321
|
|
3
3
|
lecrapaud/config.py,sha256=QK1MxWsEddXii02Rme31tCGDyMFsfHHF2Zy-lLIOuSY,1218
|
|
4
4
|
lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
|
|
5
5
|
lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
|
|
@@ -14,10 +14,11 @@ lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_
|
|
|
14
14
|
lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py,sha256=g6H2Z9MwB6UEiqdGlBoHBXpO9DTaWkwHt8FS6joVOm0,1191
|
|
15
15
|
lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py,sha256=FshOF1t-NWXrBtXT3wMNGFslJ4sWUxzvBEXSymu05cI,1043
|
|
16
16
|
lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py,sha256=htHUD4zPJr-0z_DQfTi8r9RsFVe9m7SL0f7oRIvLIcQ,10999
|
|
17
|
+
lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py,sha256=o3TNHq1GTFjxfk2zHWaUbq91khMJi6Xy6HToO9i54AU,2051
|
|
17
18
|
lecrapaud/db/alembic.ini,sha256=Zw2rdwsKV6c7J1SPtoFIPDX08_oTP3MuUKnNxBDiY8I,3796
|
|
18
19
|
lecrapaud/db/models/__init__.py,sha256=-XoCN1eeLihnNxBMl90lXrgrTSDkMbeqgienMqFi5f4,702
|
|
19
20
|
lecrapaud/db/models/base.py,sha256=0548x4ftd6Oim9BJmtD7Er4izM6u0QCrlTG5560384w,9458
|
|
20
|
-
lecrapaud/db/models/experiment.py,sha256=
|
|
21
|
+
lecrapaud/db/models/experiment.py,sha256=aDvSgbE0n-gUHLrz3NNYkeeSp-KkAZ5nbF9WxaxXawM,15029
|
|
21
22
|
lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
|
|
22
23
|
lecrapaud/db/models/feature_selection.py,sha256=PBNWk9QaLb7-_xyrLlOUfab0y2xEj3agAIzt1gxssZQ,3172
|
|
23
24
|
lecrapaud/db/models/feature_selection_rank.py,sha256=POo-OLdaxU3eaH6fC6fTOj7Fnv0ujvTXgYZMzjjwTfE,1773
|
|
@@ -28,9 +29,9 @@ lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk
|
|
|
28
29
|
lecrapaud/db/models/utils.py,sha256=-a-nWWmpJ2XzidIxo2COVUTrGZIPYCfBzjhcszJj_bM,1109
|
|
29
30
|
lecrapaud/db/session.py,sha256=u9NCwUoV5VbtScRb6HOSQr4oTEjIwj0waP5mGlc1qJg,3735
|
|
30
31
|
lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
|
|
31
|
-
lecrapaud/experiment.py,sha256=
|
|
32
|
-
lecrapaud/feature_engineering.py,sha256=
|
|
33
|
-
lecrapaud/feature_selection.py,sha256
|
|
32
|
+
lecrapaud/experiment.py,sha256=TYECkPqZNVqQQaSg8u5fZ3UvxKYCzc3f-mYVlikCz4s,2234
|
|
33
|
+
lecrapaud/feature_engineering.py,sha256=UM-EIOsgYWedqsR9uA-09eaWSb9FofVxoE0rRcDelQ8,39173
|
|
34
|
+
lecrapaud/feature_selection.py,sha256=Q9xWVmZsvRjX9mJHB_PY_KLXsEAYNLX7txSe0cniY4A,47529
|
|
34
35
|
lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
|
|
35
36
|
lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
|
|
36
37
|
lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
|
|
@@ -40,10 +41,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
|
|
|
40
41
|
lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
|
|
41
42
|
lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
|
|
42
43
|
lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
|
|
43
|
-
lecrapaud/model_selection.py,sha256=
|
|
44
|
+
lecrapaud/model_selection.py,sha256=o4_hOEp91_33HtMatVHU7YPc71KZ2hK7wucN63xqWkA,88017
|
|
44
45
|
lecrapaud/search_space.py,sha256=caCehJklD3-sgmlisJj_GmuB7LJiVvTF71gEjPGDvV4,36336
|
|
45
46
|
lecrapaud/utils.py,sha256=vsNBd2Nnhpjo65Ugz2GFJaRhq3U3_eWERfofpevo5Ls,8884
|
|
46
|
-
lecrapaud-0.20.
|
|
47
|
-
lecrapaud-0.20.
|
|
48
|
-
lecrapaud-0.20.
|
|
49
|
-
lecrapaud-0.20.
|
|
47
|
+
lecrapaud-0.20.1.dist-info/METADATA,sha256=gCEqDJXok9Ti9DQ32XRqU4cH0blMCrSBAOLPTy9viXE,11137
|
|
48
|
+
lecrapaud-0.20.1.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
49
|
+
lecrapaud-0.20.1.dist-info/licenses/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
|
|
50
|
+
lecrapaud-0.20.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|