lecrapaud 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/api.py +13 -4
- lecrapaud/db/models/experiment.py +67 -40
- {lecrapaud-0.15.0.dist-info → lecrapaud-0.16.0.dist-info}/METADATA +1 -1
- {lecrapaud-0.15.0.dist-info → lecrapaud-0.16.0.dist-info}/RECORD +6 -6
- {lecrapaud-0.15.0.dist-info → lecrapaud-0.16.0.dist-info}/LICENSE +0 -0
- {lecrapaud-0.15.0.dist-info → lecrapaud-0.16.0.dist-info}/WHEEL +0 -0
lecrapaud/api.py
CHANGED
|
@@ -92,13 +92,22 @@ class LeCrapaud:
|
|
|
92
92
|
"""Retrieve the last experiment by name."""
|
|
93
93
|
return ExperimentEngine(id=Experiment.get_last_by_name(name).id, **kwargs)
|
|
94
94
|
|
|
95
|
-
def get_best_experiment_by_name(
|
|
95
|
+
def get_best_experiment_by_name(
|
|
96
|
+
self, name: str, metric: str = "both", **kwargs
|
|
97
|
+
) -> "ExperimentEngine":
|
|
96
98
|
"""Retrieve the best experiment by score."""
|
|
97
|
-
return ExperimentEngine(
|
|
99
|
+
return ExperimentEngine(
|
|
100
|
+
id=Experiment.get_best_by_score(name=name, metric=metric).id, **kwargs
|
|
101
|
+
)
|
|
98
102
|
|
|
99
|
-
def list_experiments(
|
|
103
|
+
def list_experiments(
|
|
104
|
+
self, name: str = None, limit: int = 1000
|
|
105
|
+
) -> list["ExperimentEngine"]:
|
|
100
106
|
"""List all experiments in the database."""
|
|
101
|
-
return [
|
|
107
|
+
return [
|
|
108
|
+
ExperimentEngine(id=exp.id)
|
|
109
|
+
for exp in Experiment.get_all_by_name(name=name, limit=limit)
|
|
110
|
+
]
|
|
102
111
|
|
|
103
112
|
|
|
104
113
|
class ExperimentEngine:
|
|
@@ -32,7 +32,7 @@ lecrapaud_experiment_target_association = create_association_table(
|
|
|
32
32
|
table1="experiments",
|
|
33
33
|
column1="experiment",
|
|
34
34
|
table2="targets",
|
|
35
|
-
column2="target"
|
|
35
|
+
column2="target",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
38
|
|
|
@@ -62,21 +62,22 @@ class Experiment(Base):
|
|
|
62
62
|
cascade="all, delete-orphan",
|
|
63
63
|
lazy="selectin",
|
|
64
64
|
)
|
|
65
|
+
|
|
65
66
|
@hybrid_property
|
|
66
67
|
def best_rmse(self):
|
|
67
68
|
"""Best RMSE score across all model selections and trainings."""
|
|
68
69
|
# Get the minimum RMSE for each model selection
|
|
69
70
|
min_scores = [
|
|
70
71
|
min(
|
|
71
|
-
score.rmse
|
|
72
|
-
for mt in ms.model_trainings
|
|
72
|
+
score.rmse
|
|
73
|
+
for mt in ms.model_trainings
|
|
73
74
|
for score in mt.score
|
|
74
75
|
if score.rmse is not None
|
|
75
76
|
)
|
|
76
77
|
for ms in self.model_selections
|
|
77
78
|
if any(
|
|
78
|
-
score.rmse is not None
|
|
79
|
-
for mt in ms.model_trainings
|
|
79
|
+
score.rmse is not None
|
|
80
|
+
for mt in ms.model_trainings
|
|
80
81
|
for score in mt.score
|
|
81
82
|
)
|
|
82
83
|
]
|
|
@@ -88,15 +89,15 @@ class Experiment(Base):
|
|
|
88
89
|
# Get the minimum LogLoss for each model selection
|
|
89
90
|
min_scores = [
|
|
90
91
|
min(
|
|
91
|
-
score.logloss
|
|
92
|
-
for mt in ms.model_trainings
|
|
92
|
+
score.logloss
|
|
93
|
+
for mt in ms.model_trainings
|
|
93
94
|
for score in mt.score
|
|
94
95
|
if score.logloss is not None
|
|
95
96
|
)
|
|
96
97
|
for ms in self.model_selections
|
|
97
98
|
if any(
|
|
98
|
-
score.logloss is not None
|
|
99
|
-
for mt in ms.model_trainings
|
|
99
|
+
score.logloss is not None
|
|
100
|
+
for mt in ms.model_trainings
|
|
100
101
|
for score in mt.score
|
|
101
102
|
)
|
|
102
103
|
]
|
|
@@ -108,15 +109,15 @@ class Experiment(Base):
|
|
|
108
109
|
# Get the minimum RMSE for each model selection
|
|
109
110
|
min_scores = [
|
|
110
111
|
min(
|
|
111
|
-
score.rmse
|
|
112
|
-
for mt in ms.model_trainings
|
|
112
|
+
score.rmse
|
|
113
|
+
for mt in ms.model_trainings
|
|
113
114
|
for score in mt.score
|
|
114
115
|
if score.rmse is not None
|
|
115
116
|
)
|
|
116
117
|
for ms in self.model_selections
|
|
117
118
|
if any(
|
|
118
|
-
score.rmse is not None
|
|
119
|
-
for mt in ms.model_trainings
|
|
119
|
+
score.rmse is not None
|
|
120
|
+
for mt in ms.model_trainings
|
|
120
121
|
for score in mt.score
|
|
121
122
|
)
|
|
122
123
|
]
|
|
@@ -128,20 +129,20 @@ class Experiment(Base):
|
|
|
128
129
|
# Get the minimum LogLoss for each model selection
|
|
129
130
|
min_scores = [
|
|
130
131
|
min(
|
|
131
|
-
score.logloss
|
|
132
|
-
for mt in ms.model_trainings
|
|
132
|
+
score.logloss
|
|
133
|
+
for mt in ms.model_trainings
|
|
133
134
|
for score in mt.score
|
|
134
135
|
if score.logloss is not None
|
|
135
136
|
)
|
|
136
137
|
for ms in self.model_selections
|
|
137
138
|
if any(
|
|
138
|
-
score.logloss is not None
|
|
139
|
-
for mt in ms.model_trainings
|
|
139
|
+
score.logloss is not None
|
|
140
|
+
for mt in ms.model_trainings
|
|
140
141
|
for score in mt.score
|
|
141
142
|
)
|
|
142
143
|
]
|
|
143
144
|
return mean(min_scores) if min_scores else None
|
|
144
|
-
|
|
145
|
+
|
|
145
146
|
test_size = Column(Integer)
|
|
146
147
|
corr_threshold = Column(Float, nullable=False)
|
|
147
148
|
max_features = Column(Integer, nullable=False)
|
|
@@ -179,88 +180,114 @@ class Experiment(Base):
|
|
|
179
180
|
),
|
|
180
181
|
)
|
|
181
182
|
|
|
183
|
+
@classmethod
|
|
184
|
+
@with_db
|
|
185
|
+
def get_all_by_name(cls, name: str | None = None, db=None):
|
|
186
|
+
"""
|
|
187
|
+
Find the most recently created experiment that contains the given name string.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
session: SQLAlchemy session
|
|
191
|
+
name (str): String to search for in experiment names
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Experiment or None: The most recent matching experiment or None if not found
|
|
195
|
+
"""
|
|
196
|
+
if name is not None:
|
|
197
|
+
return (
|
|
198
|
+
db.query(cls)
|
|
199
|
+
.filter(cls.name.ilike(f"%{name}%"))
|
|
200
|
+
.order_by(cls.created_at.desc())
|
|
201
|
+
.all()
|
|
202
|
+
)
|
|
203
|
+
return db.query(cls).order_by(cls.created_at.desc()).all()
|
|
204
|
+
|
|
182
205
|
@classmethod
|
|
183
206
|
@with_db
|
|
184
207
|
def get_last_by_name(cls, name: str, db=None):
|
|
185
208
|
"""
|
|
186
209
|
Find the most recently created experiment that contains the given name string.
|
|
187
|
-
|
|
210
|
+
|
|
188
211
|
Args:
|
|
189
212
|
session: SQLAlchemy session
|
|
190
213
|
name (str): String to search for in experiment names
|
|
191
|
-
|
|
214
|
+
|
|
192
215
|
Returns:
|
|
193
216
|
Experiment or None: The most recent matching experiment or None if not found
|
|
194
217
|
"""
|
|
195
218
|
return (
|
|
196
219
|
db.query(cls)
|
|
197
|
-
.filter(cls.name.ilike(f
|
|
220
|
+
.filter(cls.name.ilike(f"%{name}%"))
|
|
198
221
|
.order_by(cls.created_at.desc())
|
|
199
222
|
.first()
|
|
200
223
|
)
|
|
201
224
|
|
|
202
225
|
@classmethod
|
|
203
226
|
@with_db
|
|
204
|
-
def get_best_by_score(cls, name: str, metric=
|
|
227
|
+
def get_best_by_score(cls, name: str, metric="both", db=None):
|
|
205
228
|
"""
|
|
206
229
|
Find the experiment with the best score based on average RMSE, LogLoss, or both.
|
|
207
|
-
|
|
230
|
+
|
|
208
231
|
Args:
|
|
209
232
|
metric (str): 'rmse', 'logloss', or 'both' to determine which score to optimize
|
|
210
233
|
db: SQLAlchemy session
|
|
211
|
-
|
|
234
|
+
|
|
212
235
|
Returns:
|
|
213
236
|
Experiment or None: The experiment with the best score or None if not found
|
|
214
237
|
"""
|
|
215
|
-
if metric ==
|
|
238
|
+
if metric == "both":
|
|
216
239
|
# Calculate a combined score: average of normalized RMSE and LogLoss
|
|
217
240
|
# This ensures we're comparing apples to apples by normalizing the scores
|
|
218
|
-
experiments = db.query(cls).filter(cls.name.ilike(f
|
|
241
|
+
experiments = db.query(cls).filter(cls.name.ilike(f"%{name}%")).all()
|
|
219
242
|
if not experiments:
|
|
220
243
|
return None
|
|
221
|
-
|
|
244
|
+
|
|
222
245
|
# Get all scores
|
|
223
246
|
rmse_scores = [e.avg_rmse for e in experiments if e.avg_rmse is not None]
|
|
224
|
-
logloss_scores = [
|
|
225
|
-
|
|
247
|
+
logloss_scores = [
|
|
248
|
+
e.avg_logloss for e in experiments if e.avg_logloss is not None
|
|
249
|
+
]
|
|
250
|
+
|
|
226
251
|
if not rmse_scores or not logloss_scores:
|
|
227
252
|
return None
|
|
228
|
-
|
|
253
|
+
|
|
229
254
|
# Normalize scores (subtract min and divide by range)
|
|
230
255
|
min_rmse = min(rmse_scores)
|
|
231
256
|
range_rmse = max(rmse_scores) - min_rmse
|
|
232
257
|
min_logloss = min(logloss_scores)
|
|
233
258
|
range_logloss = max(logloss_scores) - min_logloss
|
|
234
|
-
|
|
259
|
+
|
|
235
260
|
# Calculate combined score for each experiment
|
|
236
261
|
experiment_scores = []
|
|
237
262
|
for experiment in experiments:
|
|
238
263
|
if experiment.avg_rmse is None or experiment.avg_logloss is None:
|
|
239
264
|
continue
|
|
240
|
-
|
|
265
|
+
|
|
241
266
|
# Normalize both scores
|
|
242
267
|
norm_rmse = (experiment.avg_rmse - min_rmse) / range_rmse
|
|
243
268
|
norm_logloss = (experiment.avg_logloss - min_logloss) / range_logloss
|
|
244
|
-
|
|
269
|
+
|
|
245
270
|
# Calculate combined score (average of normalized scores)
|
|
246
271
|
combined_score = (norm_rmse + norm_logloss) / 2
|
|
247
272
|
experiment_scores.append((experiment, combined_score))
|
|
248
|
-
|
|
273
|
+
|
|
249
274
|
# Sort by combined score (ascending since lower is better)
|
|
250
275
|
experiment_scores.sort(key=lambda x: x[1])
|
|
251
|
-
|
|
276
|
+
|
|
252
277
|
return experiment_scores[0][0] if experiment_scores else None
|
|
253
|
-
|
|
278
|
+
|
|
254
279
|
# For single metric case (rmse or logloss)
|
|
255
|
-
score_property = cls.avg_rmse if metric ==
|
|
256
|
-
|
|
280
|
+
score_property = cls.avg_rmse if metric == "rmse" else cls.avg_logloss
|
|
281
|
+
|
|
257
282
|
return (
|
|
258
283
|
db.query(cls)
|
|
259
|
-
.filter(
|
|
284
|
+
.filter(
|
|
285
|
+
cls.name.ilike(f"%{name}%"), score_property.isnot(None)
|
|
286
|
+
) # Only consider experiments with scores
|
|
260
287
|
.order_by(score_property)
|
|
261
288
|
.first()
|
|
262
289
|
)
|
|
263
|
-
|
|
290
|
+
|
|
264
291
|
def get_features(self, target_number: int):
|
|
265
292
|
targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
|
|
266
293
|
if targets:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
|
|
2
|
-
lecrapaud/api.py,sha256=
|
|
2
|
+
lecrapaud/api.py,sha256=oC64qv4jjfhO6Nn8rS92_xPwT71isSOIBCwyuxyIdqE,20368
|
|
3
3
|
lecrapaud/config.py,sha256=N8kQS1bNEXp6loIw7_X2_OjrbY4_a7UZhZcT1XgsYAs,1121
|
|
4
4
|
lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
|
|
5
5
|
lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
|
|
@@ -12,7 +12,7 @@ lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=MiqooJuZ1e
|
|
|
12
12
|
lecrapaud/db/alembic.ini,sha256=TXrZB4pWVLn2EUg867yp6paA_19vGeirO95mTPA3nbs,3699
|
|
13
13
|
lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
|
|
14
14
|
lecrapaud/db/models/base.py,sha256=J9ew-0z_-tnWAwhVvOmVDys2R6jPF_oSca_ny6wpXQE,7606
|
|
15
|
-
lecrapaud/db/models/experiment.py,sha256=
|
|
15
|
+
lecrapaud/db/models/experiment.py,sha256=JcDQDVTFbGPiw-7_pWibCSskli9e9MxYfka0v5cXpCw,10869
|
|
16
16
|
lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
|
|
17
17
|
lecrapaud/db/models/feature_selection.py,sha256=mk42xuw1Sm_7Pznfg7TNc5_S4hscdw79QgIe3Bt9ZRI,3245
|
|
18
18
|
lecrapaud/db/models/feature_selection_rank.py,sha256=Ydsb_rAT58FoSH13wkGjGPByzsjPx3DITXgJ2jgZmow,2198
|
|
@@ -39,7 +39,7 @@ lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIF
|
|
|
39
39
|
lecrapaud/model_selection.py,sha256=vV2oO_fhhJ4dozOQCn_ySJzQT5gMVtQbLR4Wf59bi3A,67885
|
|
40
40
|
lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
|
|
41
41
|
lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
|
|
42
|
-
lecrapaud-0.
|
|
43
|
-
lecrapaud-0.
|
|
44
|
-
lecrapaud-0.
|
|
45
|
-
lecrapaud-0.
|
|
42
|
+
lecrapaud-0.16.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
|
|
43
|
+
lecrapaud-0.16.0.dist-info/METADATA,sha256=voBvpnTPzxq7SmZ6ZXnk0-qhIs2LovNldrQj_7SYkeU,11081
|
|
44
|
+
lecrapaud-0.16.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
45
|
+
lecrapaud-0.16.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|