lecrapaud 0.14.8__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/api.py +8 -0
- lecrapaud/db/models/experiment.py +179 -8
- {lecrapaud-0.14.8.dist-info → lecrapaud-0.15.0.dist-info}/METADATA +2 -1
- {lecrapaud-0.14.8.dist-info → lecrapaud-0.15.0.dist-info}/RECORD +6 -6
- {lecrapaud-0.14.8.dist-info → lecrapaud-0.15.0.dist-info}/LICENSE +0 -0
- {lecrapaud-0.14.8.dist-info → lecrapaud-0.15.0.dist-info}/WHEEL +0 -0
lecrapaud/api.py
CHANGED
|
@@ -88,6 +88,14 @@ class LeCrapaud:
|
|
|
88
88
|
"""
|
|
89
89
|
return ExperimentEngine(id=id, **kwargs)
|
|
90
90
|
|
|
91
|
+
def get_last_experiement_by_name(self, name: str, **kwargs) -> "ExperimentEngine":
|
|
92
|
+
"""Retrieve the last experiment by name."""
|
|
93
|
+
return ExperimentEngine(id=Experiment.get_last_by_name(name).id, **kwargs)
|
|
94
|
+
|
|
95
|
+
def get_best_experiment_by_name(self, name: str,metric: str = "both", **kwargs) -> "ExperimentEngine":
|
|
96
|
+
"""Retrieve the best experiment by score."""
|
|
97
|
+
return ExperimentEngine(id=Experiment.get_best_by_score(name=name, metric=metric).id, **kwargs)
|
|
98
|
+
|
|
91
99
|
def list_experiments(self, limit=1000) -> list["ExperimentEngine"]:
|
|
92
100
|
"""List all experiments in the database."""
|
|
93
101
|
return [ExperimentEngine(id=exp.id) for exp in Experiment.get_all(limit=limit)]
|
|
@@ -15,9 +15,15 @@ from sqlalchemy import (
|
|
|
15
15
|
UniqueConstraint,
|
|
16
16
|
func,
|
|
17
17
|
)
|
|
18
|
-
from sqlalchemy.orm import relationship
|
|
18
|
+
from sqlalchemy.orm import relationship, aliased
|
|
19
|
+
from sqlalchemy.ext.hybrid import hybrid_property
|
|
20
|
+
from sqlalchemy import func
|
|
21
|
+
from statistics import fmean as mean
|
|
22
|
+
from lecrapaud.db.models.model_selection import ModelSelection
|
|
23
|
+
from lecrapaud.db.models.model_training import ModelTraining
|
|
24
|
+
from lecrapaud.db.models.score import Score
|
|
19
25
|
|
|
20
|
-
from lecrapaud.db.models.base import Base
|
|
26
|
+
from lecrapaud.db.models.base import Base, with_db
|
|
21
27
|
from lecrapaud.db.models.utils import create_association_table
|
|
22
28
|
|
|
23
29
|
# jointures
|
|
@@ -48,6 +54,94 @@ class Experiment(Base):
|
|
|
48
54
|
size = Column(Integer, nullable=False)
|
|
49
55
|
train_size = Column(Integer)
|
|
50
56
|
val_size = Column(Integer)
|
|
57
|
+
|
|
58
|
+
# Relationships
|
|
59
|
+
model_selections = relationship(
|
|
60
|
+
"ModelSelection",
|
|
61
|
+
back_populates="experiment",
|
|
62
|
+
cascade="all, delete-orphan",
|
|
63
|
+
lazy="selectin",
|
|
64
|
+
)
|
|
65
|
+
@hybrid_property
|
|
66
|
+
def best_rmse(self):
|
|
67
|
+
"""Best RMSE score across all model selections and trainings."""
|
|
68
|
+
# Get the minimum RMSE for each model selection
|
|
69
|
+
min_scores = [
|
|
70
|
+
min(
|
|
71
|
+
score.rmse
|
|
72
|
+
for mt in ms.model_trainings
|
|
73
|
+
for score in mt.score
|
|
74
|
+
if score.rmse is not None
|
|
75
|
+
)
|
|
76
|
+
for ms in self.model_selections
|
|
77
|
+
if any(
|
|
78
|
+
score.rmse is not None
|
|
79
|
+
for mt in ms.model_trainings
|
|
80
|
+
for score in mt.score
|
|
81
|
+
)
|
|
82
|
+
]
|
|
83
|
+
return min(min_scores) if min_scores else None
|
|
84
|
+
|
|
85
|
+
@hybrid_property
|
|
86
|
+
def best_logloss(self):
|
|
87
|
+
"""Best LogLoss score across all model selections and trainings."""
|
|
88
|
+
# Get the minimum LogLoss for each model selection
|
|
89
|
+
min_scores = [
|
|
90
|
+
min(
|
|
91
|
+
score.logloss
|
|
92
|
+
for mt in ms.model_trainings
|
|
93
|
+
for score in mt.score
|
|
94
|
+
if score.logloss is not None
|
|
95
|
+
)
|
|
96
|
+
for ms in self.model_selections
|
|
97
|
+
if any(
|
|
98
|
+
score.logloss is not None
|
|
99
|
+
for mt in ms.model_trainings
|
|
100
|
+
for score in mt.score
|
|
101
|
+
)
|
|
102
|
+
]
|
|
103
|
+
return min(min_scores) if min_scores else None
|
|
104
|
+
|
|
105
|
+
@hybrid_property
|
|
106
|
+
def avg_rmse(self):
|
|
107
|
+
"""Average RMSE score across all model selections and trainings."""
|
|
108
|
+
# Get the minimum RMSE for each model selection
|
|
109
|
+
min_scores = [
|
|
110
|
+
min(
|
|
111
|
+
score.rmse
|
|
112
|
+
for mt in ms.model_trainings
|
|
113
|
+
for score in mt.score
|
|
114
|
+
if score.rmse is not None
|
|
115
|
+
)
|
|
116
|
+
for ms in self.model_selections
|
|
117
|
+
if any(
|
|
118
|
+
score.rmse is not None
|
|
119
|
+
for mt in ms.model_trainings
|
|
120
|
+
for score in mt.score
|
|
121
|
+
)
|
|
122
|
+
]
|
|
123
|
+
return mean(min_scores) if min_scores else None
|
|
124
|
+
|
|
125
|
+
@hybrid_property
|
|
126
|
+
def avg_logloss(self):
|
|
127
|
+
"""Average LogLoss score across all model selections and trainings."""
|
|
128
|
+
# Get the minimum LogLoss for each model selection
|
|
129
|
+
min_scores = [
|
|
130
|
+
min(
|
|
131
|
+
score.logloss
|
|
132
|
+
for mt in ms.model_trainings
|
|
133
|
+
for score in mt.score
|
|
134
|
+
if score.logloss is not None
|
|
135
|
+
)
|
|
136
|
+
for ms in self.model_selections
|
|
137
|
+
if any(
|
|
138
|
+
score.logloss is not None
|
|
139
|
+
for mt in ms.model_trainings
|
|
140
|
+
for score in mt.score
|
|
141
|
+
)
|
|
142
|
+
]
|
|
143
|
+
return mean(min_scores) if min_scores else None
|
|
144
|
+
|
|
51
145
|
test_size = Column(Integer)
|
|
52
146
|
corr_threshold = Column(Float, nullable=False)
|
|
53
147
|
max_features = Column(Integer, nullable=False)
|
|
@@ -70,12 +164,7 @@ class Experiment(Base):
|
|
|
70
164
|
cascade="all, delete-orphan",
|
|
71
165
|
lazy="selectin",
|
|
72
166
|
)
|
|
73
|
-
|
|
74
|
-
"ModelSelection",
|
|
75
|
-
back_populates="experiment",
|
|
76
|
-
cascade="all, delete-orphan",
|
|
77
|
-
lazy="selectin",
|
|
78
|
-
)
|
|
167
|
+
|
|
79
168
|
targets = relationship(
|
|
80
169
|
"Target",
|
|
81
170
|
secondary=lecrapaud_experiment_target_association,
|
|
@@ -90,6 +179,88 @@ class Experiment(Base):
|
|
|
90
179
|
),
|
|
91
180
|
)
|
|
92
181
|
|
|
182
|
+
@classmethod
|
|
183
|
+
@with_db
|
|
184
|
+
def get_last_by_name(cls, name: str, db=None):
|
|
185
|
+
"""
|
|
186
|
+
Find the most recently created experiment that contains the given name string.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
session: SQLAlchemy session
|
|
190
|
+
name (str): String to search for in experiment names
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Experiment or None: The most recent matching experiment or None if not found
|
|
194
|
+
"""
|
|
195
|
+
return (
|
|
196
|
+
db.query(cls)
|
|
197
|
+
.filter(cls.name.ilike(f'%{name}%'))
|
|
198
|
+
.order_by(cls.created_at.desc())
|
|
199
|
+
.first()
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
@classmethod
|
|
203
|
+
@with_db
|
|
204
|
+
def get_best_by_score(cls, name: str, metric='both', db=None):
|
|
205
|
+
"""
|
|
206
|
+
Find the experiment with the best score based on average RMSE, LogLoss, or both.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
metric (str): 'rmse', 'logloss', or 'both' to determine which score to optimize
|
|
210
|
+
db: SQLAlchemy session
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Experiment or None: The experiment with the best score or None if not found
|
|
214
|
+
"""
|
|
215
|
+
if metric == 'both':
|
|
216
|
+
# Calculate a combined score: average of normalized RMSE and LogLoss
|
|
217
|
+
# This ensures we're comparing apples to apples by normalizing the scores
|
|
218
|
+
experiments = db.query(cls).filter(cls.name.ilike(f'%{name}%')).all()
|
|
219
|
+
if not experiments:
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
# Get all scores
|
|
223
|
+
rmse_scores = [e.avg_rmse for e in experiments if e.avg_rmse is not None]
|
|
224
|
+
logloss_scores = [e.avg_logloss for e in experiments if e.avg_logloss is not None]
|
|
225
|
+
|
|
226
|
+
if not rmse_scores or not logloss_scores:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
# Normalize scores (subtract min and divide by range)
|
|
230
|
+
min_rmse = min(rmse_scores)
|
|
231
|
+
range_rmse = max(rmse_scores) - min_rmse
|
|
232
|
+
min_logloss = min(logloss_scores)
|
|
233
|
+
range_logloss = max(logloss_scores) - min_logloss
|
|
234
|
+
|
|
235
|
+
# Calculate combined score for each experiment
|
|
236
|
+
experiment_scores = []
|
|
237
|
+
for experiment in experiments:
|
|
238
|
+
if experiment.avg_rmse is None or experiment.avg_logloss is None:
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
# Normalize both scores
|
|
242
|
+
norm_rmse = (experiment.avg_rmse - min_rmse) / range_rmse
|
|
243
|
+
norm_logloss = (experiment.avg_logloss - min_logloss) / range_logloss
|
|
244
|
+
|
|
245
|
+
# Calculate combined score (average of normalized scores)
|
|
246
|
+
combined_score = (norm_rmse + norm_logloss) / 2
|
|
247
|
+
experiment_scores.append((experiment, combined_score))
|
|
248
|
+
|
|
249
|
+
# Sort by combined score (ascending since lower is better)
|
|
250
|
+
experiment_scores.sort(key=lambda x: x[1])
|
|
251
|
+
|
|
252
|
+
return experiment_scores[0][0] if experiment_scores else None
|
|
253
|
+
|
|
254
|
+
# For single metric case (rmse or logloss)
|
|
255
|
+
score_property = cls.avg_rmse if metric == 'rmse' else cls.avg_logloss
|
|
256
|
+
|
|
257
|
+
return (
|
|
258
|
+
db.query(cls)
|
|
259
|
+
.filter(cls.name.ilike(f'%{name}%'), score_property.isnot(None)) # Only consider experiments with scores
|
|
260
|
+
.order_by(score_property)
|
|
261
|
+
.first()
|
|
262
|
+
)
|
|
263
|
+
|
|
93
264
|
def get_features(self, target_number: int):
|
|
94
265
|
targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
|
|
95
266
|
if targets:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: lecrapaud
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.15.0
|
|
4
4
|
Summary: Framework for machine and deep learning, with regression, classification and time series analysis
|
|
5
5
|
License: Apache License
|
|
6
6
|
Author: Pierre H. Gallet
|
|
@@ -19,6 +19,7 @@ Requires-Dist: mlxtend (>=0.23.4)
|
|
|
19
19
|
Requires-Dist: numpy (>=2.1.3)
|
|
20
20
|
Requires-Dist: openai (>=1.88.0)
|
|
21
21
|
Requires-Dist: pandas (>=2.3.0)
|
|
22
|
+
Requires-Dist: pydantic (>=2.9.2)
|
|
22
23
|
Requires-Dist: python-dotenv (>=1.1.0)
|
|
23
24
|
Requires-Dist: scikit-learn (>=1.6.1)
|
|
24
25
|
Requires-Dist: scipy (<1.14.0)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
|
|
2
|
-
lecrapaud/api.py,sha256=
|
|
2
|
+
lecrapaud/api.py,sha256=Pu4761gQuQBmey_P3s9y6Kf4KWD4t9O0OcH9sDuw3xA,20239
|
|
3
3
|
lecrapaud/config.py,sha256=N8kQS1bNEXp6loIw7_X2_OjrbY4_a7UZhZcT1XgsYAs,1121
|
|
4
4
|
lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
|
|
5
5
|
lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
|
|
@@ -12,7 +12,7 @@ lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=MiqooJuZ1e
|
|
|
12
12
|
lecrapaud/db/alembic.ini,sha256=TXrZB4pWVLn2EUg867yp6paA_19vGeirO95mTPA3nbs,3699
|
|
13
13
|
lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
|
|
14
14
|
lecrapaud/db/models/base.py,sha256=J9ew-0z_-tnWAwhVvOmVDys2R6jPF_oSca_ny6wpXQE,7606
|
|
15
|
-
lecrapaud/db/models/experiment.py,sha256=
|
|
15
|
+
lecrapaud/db/models/experiment.py,sha256=gOrP28lEvOAxAwYIAxMkB3ip-TDxHMuSeHGv975ufrI,10288
|
|
16
16
|
lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
|
|
17
17
|
lecrapaud/db/models/feature_selection.py,sha256=mk42xuw1Sm_7Pznfg7TNc5_S4hscdw79QgIe3Bt9ZRI,3245
|
|
18
18
|
lecrapaud/db/models/feature_selection_rank.py,sha256=Ydsb_rAT58FoSH13wkGjGPByzsjPx3DITXgJ2jgZmow,2198
|
|
@@ -39,7 +39,7 @@ lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIF
|
|
|
39
39
|
lecrapaud/model_selection.py,sha256=vV2oO_fhhJ4dozOQCn_ySJzQT5gMVtQbLR4Wf59bi3A,67885
|
|
40
40
|
lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
|
|
41
41
|
lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
|
|
42
|
-
lecrapaud-0.
|
|
43
|
-
lecrapaud-0.
|
|
44
|
-
lecrapaud-0.
|
|
45
|
-
lecrapaud-0.
|
|
42
|
+
lecrapaud-0.15.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
|
|
43
|
+
lecrapaud-0.15.0.dist-info/METADATA,sha256=yHTluJ8CMpXc9feZ6ZQhdoWwdMyR2bD8AImPgVf84uc,11081
|
|
44
|
+
lecrapaud-0.15.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
45
|
+
lecrapaud-0.15.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|