lecrapaud 0.14.8__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

lecrapaud/api.py CHANGED
@@ -88,9 +88,26 @@ class LeCrapaud:
88
88
  """
89
89
  return ExperimentEngine(id=id, **kwargs)
90
90
 
91
- def list_experiments(self, limit=1000) -> list["ExperimentEngine"]:
91
+ def get_last_experiement_by_name(self, name: str, **kwargs) -> "ExperimentEngine":
92
+ """Retrieve the last experiment by name."""
93
+ return ExperimentEngine(id=Experiment.get_last_by_name(name).id, **kwargs)
94
+
95
+ def get_best_experiment_by_name(
96
+ self, name: str, metric: str = "both", **kwargs
97
+ ) -> "ExperimentEngine":
98
+ """Retrieve the best experiment by score."""
99
+ return ExperimentEngine(
100
+ id=Experiment.get_best_by_score(name=name, metric=metric).id, **kwargs
101
+ )
102
+
103
+ def list_experiments(
104
+ self, name: str = None, limit: int = 1000
105
+ ) -> list["ExperimentEngine"]:
92
106
  """List all experiments in the database."""
93
- return [ExperimentEngine(id=exp.id) for exp in Experiment.get_all(limit=limit)]
107
+ return [
108
+ ExperimentEngine(id=exp.id)
109
+ for exp in Experiment.get_all_by_name(name=name, limit=limit)
110
+ ]
94
111
 
95
112
 
96
113
  class ExperimentEngine:
@@ -15,9 +15,15 @@ from sqlalchemy import (
15
15
  UniqueConstraint,
16
16
  func,
17
17
  )
18
- from sqlalchemy.orm import relationship
18
+ from sqlalchemy.orm import relationship, aliased
19
+ from sqlalchemy.ext.hybrid import hybrid_property
20
+ from sqlalchemy import func
21
+ from statistics import fmean as mean
22
+ from lecrapaud.db.models.model_selection import ModelSelection
23
+ from lecrapaud.db.models.model_training import ModelTraining
24
+ from lecrapaud.db.models.score import Score
19
25
 
20
- from lecrapaud.db.models.base import Base
26
+ from lecrapaud.db.models.base import Base, with_db
21
27
  from lecrapaud.db.models.utils import create_association_table
22
28
 
23
29
  # jointures
@@ -26,7 +32,7 @@ lecrapaud_experiment_target_association = create_association_table(
26
32
  table1="experiments",
27
33
  column1="experiment",
28
34
  table2="targets",
29
- column2="target"
35
+ column2="target",
30
36
  )
31
37
 
32
38
 
@@ -48,6 +54,95 @@ class Experiment(Base):
48
54
  size = Column(Integer, nullable=False)
49
55
  train_size = Column(Integer)
50
56
  val_size = Column(Integer)
57
+
58
+ # Relationships
59
+ model_selections = relationship(
60
+ "ModelSelection",
61
+ back_populates="experiment",
62
+ cascade="all, delete-orphan",
63
+ lazy="selectin",
64
+ )
65
+
66
+ @hybrid_property
67
+ def best_rmse(self):
68
+ """Best RMSE score across all model selections and trainings."""
69
+ # Get the minimum RMSE for each model selection
70
+ min_scores = [
71
+ min(
72
+ score.rmse
73
+ for mt in ms.model_trainings
74
+ for score in mt.score
75
+ if score.rmse is not None
76
+ )
77
+ for ms in self.model_selections
78
+ if any(
79
+ score.rmse is not None
80
+ for mt in ms.model_trainings
81
+ for score in mt.score
82
+ )
83
+ ]
84
+ return min(min_scores) if min_scores else None
85
+
86
+ @hybrid_property
87
+ def best_logloss(self):
88
+ """Best LogLoss score across all model selections and trainings."""
89
+ # Get the minimum LogLoss for each model selection
90
+ min_scores = [
91
+ min(
92
+ score.logloss
93
+ for mt in ms.model_trainings
94
+ for score in mt.score
95
+ if score.logloss is not None
96
+ )
97
+ for ms in self.model_selections
98
+ if any(
99
+ score.logloss is not None
100
+ for mt in ms.model_trainings
101
+ for score in mt.score
102
+ )
103
+ ]
104
+ return min(min_scores) if min_scores else None
105
+
106
+ @hybrid_property
107
+ def avg_rmse(self):
108
+ """Average RMSE score across all model selections and trainings."""
109
+ # Get the minimum RMSE for each model selection
110
+ min_scores = [
111
+ min(
112
+ score.rmse
113
+ for mt in ms.model_trainings
114
+ for score in mt.score
115
+ if score.rmse is not None
116
+ )
117
+ for ms in self.model_selections
118
+ if any(
119
+ score.rmse is not None
120
+ for mt in ms.model_trainings
121
+ for score in mt.score
122
+ )
123
+ ]
124
+ return mean(min_scores) if min_scores else None
125
+
126
+ @hybrid_property
127
+ def avg_logloss(self):
128
+ """Average LogLoss score across all model selections and trainings."""
129
+ # Get the minimum LogLoss for each model selection
130
+ min_scores = [
131
+ min(
132
+ score.logloss
133
+ for mt in ms.model_trainings
134
+ for score in mt.score
135
+ if score.logloss is not None
136
+ )
137
+ for ms in self.model_selections
138
+ if any(
139
+ score.logloss is not None
140
+ for mt in ms.model_trainings
141
+ for score in mt.score
142
+ )
143
+ ]
144
+ return mean(min_scores) if min_scores else None
145
+
51
146
  test_size = Column(Integer)
52
147
  corr_threshold = Column(Float, nullable=False)
53
148
  max_features = Column(Integer, nullable=False)
@@ -70,12 +165,7 @@ class Experiment(Base):
70
165
  cascade="all, delete-orphan",
71
166
  lazy="selectin",
72
167
  )
73
- model_selections = relationship(
74
- "ModelSelection",
75
- back_populates="experiment",
76
- cascade="all, delete-orphan",
77
- lazy="selectin",
78
- )
168
+
79
169
  targets = relationship(
80
170
  "Target",
81
171
  secondary=lecrapaud_experiment_target_association,
@@ -90,6 +180,114 @@ class Experiment(Base):
90
180
  ),
91
181
  )
92
182
 
183
+ @classmethod
184
+ @with_db
185
+ def get_all_by_name(cls, name: str | None = None, db=None):
186
+ """
187
+ Find the most recently created experiment that contains the given name string.
188
+
189
+ Args:
190
+ session: SQLAlchemy session
191
+ name (str): String to search for in experiment names
192
+
193
+ Returns:
194
+ Experiment or None: The most recent matching experiment or None if not found
195
+ """
196
+ if name is not None:
197
+ return (
198
+ db.query(cls)
199
+ .filter(cls.name.ilike(f"%{name}%"))
200
+ .order_by(cls.created_at.desc())
201
+ .all()
202
+ )
203
+ return db.query(cls).order_by(cls.created_at.desc()).all()
204
+
205
+ @classmethod
206
+ @with_db
207
+ def get_last_by_name(cls, name: str, db=None):
208
+ """
209
+ Find the most recently created experiment that contains the given name string.
210
+
211
+ Args:
212
+ session: SQLAlchemy session
213
+ name (str): String to search for in experiment names
214
+
215
+ Returns:
216
+ Experiment or None: The most recent matching experiment or None if not found
217
+ """
218
+ return (
219
+ db.query(cls)
220
+ .filter(cls.name.ilike(f"%{name}%"))
221
+ .order_by(cls.created_at.desc())
222
+ .first()
223
+ )
224
+
225
+ @classmethod
226
+ @with_db
227
+ def get_best_by_score(cls, name: str, metric="both", db=None):
228
+ """
229
+ Find the experiment with the best score based on average RMSE, LogLoss, or both.
230
+
231
+ Args:
232
+ metric (str): 'rmse', 'logloss', or 'both' to determine which score to optimize
233
+ db: SQLAlchemy session
234
+
235
+ Returns:
236
+ Experiment or None: The experiment with the best score or None if not found
237
+ """
238
+ if metric == "both":
239
+ # Calculate a combined score: average of normalized RMSE and LogLoss
240
+ # This ensures we're comparing apples to apples by normalizing the scores
241
+ experiments = db.query(cls).filter(cls.name.ilike(f"%{name}%")).all()
242
+ if not experiments:
243
+ return None
244
+
245
+ # Get all scores
246
+ rmse_scores = [e.avg_rmse for e in experiments if e.avg_rmse is not None]
247
+ logloss_scores = [
248
+ e.avg_logloss for e in experiments if e.avg_logloss is not None
249
+ ]
250
+
251
+ if not rmse_scores or not logloss_scores:
252
+ return None
253
+
254
+ # Normalize scores (subtract min and divide by range)
255
+ min_rmse = min(rmse_scores)
256
+ range_rmse = max(rmse_scores) - min_rmse
257
+ min_logloss = min(logloss_scores)
258
+ range_logloss = max(logloss_scores) - min_logloss
259
+
260
+ # Calculate combined score for each experiment
261
+ experiment_scores = []
262
+ for experiment in experiments:
263
+ if experiment.avg_rmse is None or experiment.avg_logloss is None:
264
+ continue
265
+
266
+ # Normalize both scores
267
+ norm_rmse = (experiment.avg_rmse - min_rmse) / range_rmse
268
+ norm_logloss = (experiment.avg_logloss - min_logloss) / range_logloss
269
+
270
+ # Calculate combined score (average of normalized scores)
271
+ combined_score = (norm_rmse + norm_logloss) / 2
272
+ experiment_scores.append((experiment, combined_score))
273
+
274
+ # Sort by combined score (ascending since lower is better)
275
+ experiment_scores.sort(key=lambda x: x[1])
276
+
277
+ return experiment_scores[0][0] if experiment_scores else None
278
+
279
+ # For single metric case (rmse or logloss)
280
+ score_property = cls.avg_rmse if metric == "rmse" else cls.avg_logloss
281
+
282
+ return (
283
+ db.query(cls)
284
+ .filter(
285
+ cls.name.ilike(f"%{name}%"), score_property.isnot(None)
286
+ ) # Only consider experiments with scores
287
+ .order_by(score_property)
288
+ .first()
289
+ )
290
+
93
291
  def get_features(self, target_number: int):
94
292
  targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
95
293
  if targets:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.14.8
3
+ Version: 0.16.0
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -19,6 +19,7 @@ Requires-Dist: mlxtend (>=0.23.4)
19
19
  Requires-Dist: numpy (>=2.1.3)
20
20
  Requires-Dist: openai (>=1.88.0)
21
21
  Requires-Dist: pandas (>=2.3.0)
22
+ Requires-Dist: pydantic (>=2.9.2)
22
23
  Requires-Dist: python-dotenv (>=1.1.0)
23
24
  Requires-Dist: scikit-learn (>=1.6.1)
24
25
  Requires-Dist: scipy (<1.14.0)
@@ -1,5 +1,5 @@
1
1
  lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=j3aCdg4J6DCgmBoLiOWd_b-ji2wZVeyoFfoXd07rBXQ,19751
2
+ lecrapaud/api.py,sha256=oC64qv4jjfhO6Nn8rS92_xPwT71isSOIBCwyuxyIdqE,20368
3
3
  lecrapaud/config.py,sha256=N8kQS1bNEXp6loIw7_X2_OjrbY4_a7UZhZcT1XgsYAs,1121
4
4
  lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
5
  lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -12,7 +12,7 @@ lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=MiqooJuZ1e
12
12
  lecrapaud/db/alembic.ini,sha256=TXrZB4pWVLn2EUg867yp6paA_19vGeirO95mTPA3nbs,3699
13
13
  lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
14
14
  lecrapaud/db/models/base.py,sha256=J9ew-0z_-tnWAwhVvOmVDys2R6jPF_oSca_ny6wpXQE,7606
15
- lecrapaud/db/models/experiment.py,sha256=5vKttTRLCcb2oS9-BHZhGxRRckM8E3C8LBdW34tPJuQ,3964
15
+ lecrapaud/db/models/experiment.py,sha256=JcDQDVTFbGPiw-7_pWibCSskli9e9MxYfka0v5cXpCw,10869
16
16
  lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
17
17
  lecrapaud/db/models/feature_selection.py,sha256=mk42xuw1Sm_7Pznfg7TNc5_S4hscdw79QgIe3Bt9ZRI,3245
18
18
  lecrapaud/db/models/feature_selection_rank.py,sha256=Ydsb_rAT58FoSH13wkGjGPByzsjPx3DITXgJ2jgZmow,2198
@@ -39,7 +39,7 @@ lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIF
39
39
  lecrapaud/model_selection.py,sha256=vV2oO_fhhJ4dozOQCn_ySJzQT5gMVtQbLR4Wf59bi3A,67885
40
40
  lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
41
41
  lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
42
- lecrapaud-0.14.8.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
43
- lecrapaud-0.14.8.dist-info/METADATA,sha256=6ZreLSTSEwgFC8hpaqA8PInDhL-T20n2uwLGg1Uabu4,11047
44
- lecrapaud-0.14.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
45
- lecrapaud-0.14.8.dist-info/RECORD,,
42
+ lecrapaud-0.16.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
43
+ lecrapaud-0.16.0.dist-info/METADATA,sha256=voBvpnTPzxq7SmZ6ZXnk0-qhIs2LovNldrQj_7SYkeU,11081
44
+ lecrapaud-0.16.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
45
+ lecrapaud-0.16.0.dist-info/RECORD,,