lecrapaud 0.18.10__tar.gz → 0.19.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (48) hide show
  1. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/PKG-INFO +2 -1
  2. lecrapaud-0.19.0/lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_score.py +34 -0
  3. lecrapaud-0.19.0/lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py +44 -0
  4. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/experiment.py +1 -1
  5. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/score.py +1 -0
  6. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/model_selection.py +120 -14
  7. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/search_space.py +36 -0
  8. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/pyproject.toml +2 -1
  9. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/LICENSE +0 -0
  10. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/README.md +0 -0
  11. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/__init__.py +0 -0
  12. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/api.py +0 -0
  13. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/config.py +0 -0
  14. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/__init__.py +0 -0
  15. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/README +0 -0
  16. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/env.py +0 -0
  17. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/script.py.mako +0 -0
  18. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  19. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  20. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  21. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
  22. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py +0 -0
  23. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/alembic.ini +0 -0
  24. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/__init__.py +0 -0
  25. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/base.py +0 -0
  26. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/feature.py +0 -0
  27. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/feature_selection.py +0 -0
  28. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/feature_selection_rank.py +0 -0
  29. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/model.py +0 -0
  30. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/model_selection.py +0 -0
  31. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/model_training.py +0 -0
  32. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/target.py +0 -0
  33. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/models/utils.py +0 -0
  34. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/db/session.py +0 -0
  35. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/directories.py +0 -0
  36. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/experiment.py +0 -0
  37. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/feature_engineering.py +0 -0
  38. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/feature_selection.py +0 -0
  39. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/integrations/openai_integration.py +0 -0
  40. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/jobs/__init__.py +0 -0
  41. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/jobs/config.py +0 -0
  42. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/jobs/scheduler.py +0 -0
  43. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/jobs/tasks.py +0 -0
  44. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
  45. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
  46. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
  47. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
  48. {lecrapaud-0.18.10 → lecrapaud-0.19.0}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.18.10
3
+ Version: 0.19.0
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -8,6 +8,7 @@ Requires-Python: ==3.12.*
8
8
  Classifier: License :: Other/Proprietary License
9
9
  Classifier: Programming Language :: Python :: 3
10
10
  Classifier: Programming Language :: Python :: 3.12
11
+ Requires-Dist: catboost (>=1.2.8)
11
12
  Requires-Dist: category-encoders (>=2.8.1)
12
13
  Requires-Dist: celery (>=5.5.3)
13
14
  Requires-Dist: ftfy (>=6.3.1)
@@ -0,0 +1,34 @@
1
+ """add avg precision to score
2
+
3
+ Revision ID: c36e9fee22b9
4
+ Revises: 7ed9963e732f
5
+ Create Date: 2025-08-28 15:16:34.657593
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+ from lecrapaud.config import LECRAPAUD_TABLE_PREFIX
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "c36e9fee22b9"
17
+ down_revision: Union[str, None] = "7ed9963e732f"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def upgrade() -> None:
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.add_column(
25
+ f"{LECRAPAUD_TABLE_PREFIX}_scores",
26
+ sa.Column("avg_precision", sa.Float(), nullable=True),
27
+ )
28
+ # ### end Alembic commands ###
29
+
30
+
31
+ def downgrade() -> None:
32
+ # ### commands auto generated by Alembic - please adjust! ###
33
+ op.drop_column(f"{LECRAPAUD_TABLE_PREFIX}_scores", "avg_precision")
34
+ # ### end Alembic commands ###
@@ -0,0 +1,44 @@
1
+ """change name column
2
+
3
+ Revision ID: 8b11c1ba982e
4
+ Revises: c36e9fee22b9
5
+ Create Date: 2025-08-28 16:22:45.528296
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+ from sqlalchemy.dialects import mysql
14
+ from lecrapaud.config import LECRAPAUD_TABLE_PREFIX
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = "8b11c1ba982e"
18
+ down_revision: Union[str, None] = "c36e9fee22b9"
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def upgrade() -> None:
24
+ # ### commands auto generated by Alembic - please adjust! ###
25
+ op.alter_column(
26
+ f"{LECRAPAUD_TABLE_PREFIX}_experiments",
27
+ "name",
28
+ existing_type=mysql.VARCHAR(length=50),
29
+ type_=sa.String(length=255),
30
+ existing_nullable=False,
31
+ )
32
+ # ### end Alembic commands ###
33
+
34
+
35
+ def downgrade() -> None:
36
+ # ### commands auto generated by Alembic - please adjust! ###
37
+ op.alter_column(
38
+ f"{LECRAPAUD_TABLE_PREFIX}_experiments",
39
+ "name",
40
+ existing_type=sa.String(length=255),
41
+ type_=mysql.VARCHAR(length=50),
42
+ existing_nullable=False,
43
+ )
44
+ # ### end Alembic commands ###
@@ -49,7 +49,7 @@ class Experiment(Base):
49
49
  onupdate=func.now(),
50
50
  nullable=False,
51
51
  )
52
- name = Column(String(50), nullable=False)
52
+ name = Column(String(255), nullable=False)
53
53
  path = Column(String(255)) # we do not have this at creation time
54
54
  type = Column(String(50), nullable=False)
55
55
  size = Column(Integer, nullable=False)
@@ -45,6 +45,7 @@ class Score(Base):
45
45
  recall = Column(Float)
46
46
  f1 = Column(Float)
47
47
  roc_auc = Column(Float)
48
+ avg_precision = Column(Float)
48
49
  thresholds = Column(JSON)
49
50
  precision_at_threshold = Column(Float)
50
51
  recall_at_threshold = Column(Float)
@@ -14,8 +14,6 @@ import pickle
14
14
  from pydantic import BaseModel
15
15
  import ast
16
16
 
17
- os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
18
-
19
17
  # ML models
20
18
  from sklearn.model_selection import TimeSeriesSplit
21
19
  from sklearn.calibration import CalibratedClassifierCV
@@ -80,6 +78,8 @@ from lecrapaud.db import (
80
78
  Experiment,
81
79
  )
82
80
 
81
+ os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
82
+
83
83
  # Reproducible result
84
84
  keras.utils.set_random_seed(42)
85
85
  np.random.seed(42)
@@ -157,8 +157,10 @@ class ModelEngine:
157
157
  def fit(self, *args):
158
158
  if self.recurrent:
159
159
  fit = self.fit_recurrent
160
- elif (self.create_model == "lgb") or (self.create_model == "xgb"):
160
+ elif (self.model_name == "lgb") or (self.model_name == "xgb"):
161
161
  fit = self.fit_boosting
162
+ elif self.model_name == "catboost":
163
+ fit = self.fit_catboost
162
164
  else:
163
165
  fit = self.fit_sklearn
164
166
  model = fit(*args)
@@ -201,17 +203,110 @@ class ModelEngine:
201
203
 
202
204
  return model
203
205
 
204
- def fit_boosting(self, x_train, y_train, x_val, y_val, params):
206
+ def fit_catboost(self, x_train, y_train, x_val, y_val, params):
205
207
  """
206
- This is using lightGBM or XGboost C++ librairies
208
+ Train CatBoost models with native early stopping and log metrics to TensorBoard.
209
+ Also supports plotting of the primary eval metric if self.plot is True.
207
210
  """
208
- lightGBM = self.create_model == "lgb"
211
+ # Prepare constructor parameters
212
+ ctor_params = dict(params) if params else {}
213
+ early_stopping_rounds = ctor_params.pop("early_stopping_rounds", None)
214
+ # Alias support: num_boost_round -> iterations
215
+ num_boost_round = ctor_params.pop("num_boost_round", None)
216
+ if num_boost_round is not None and "iterations" not in ctor_params:
217
+ ctor_params["iterations"] = num_boost_round
218
+
219
+ # Determine classification/regression setup
220
+ labels = np.unique(y_train)
221
+ num_class = (
222
+ labels.size
223
+ if self.target_type == "classification" and labels.size > 2
224
+ else 1
225
+ )
226
+
227
+ if self.target_type == "regression":
228
+ ctor_params.setdefault("loss_function", "RMSE")
229
+ eval_metric = ctor_params.get("eval_metric", "RMSE")
230
+ else:
231
+ if num_class <= 2:
232
+ ctor_params.setdefault("loss_function", "Logloss")
233
+ eval_metric = ctor_params.get("eval_metric", "Logloss")
234
+ else:
235
+ ctor_params.setdefault("loss_function", "MultiClass")
236
+ eval_metric = ctor_params.get("eval_metric", "MultiClass")
237
+ ctor_params.setdefault("eval_metric", eval_metric)
238
+
239
+ # Instantiate CatBoost model from provided constructor
240
+ model = self.create_model(**ctor_params, allow_writing_files=False)
241
+
242
+ # Train with eval_set and early stopping
243
+ logger.info(f"Fitting the model {self.model_name}...")
244
+ logger.info(f"x_train shape: {x_train.shape}, x_val shape: {x_val.shape}")
245
+ logger.info(f"y_train shape: {y_train.shape}, y_val shape: {y_val.shape}")
246
+
247
+ model.fit(
248
+ x_train,
249
+ y_train,
250
+ eval_set=[(x_val, y_val)],
251
+ use_best_model=True,
252
+ early_stopping_rounds=early_stopping_rounds,
253
+ verbose=False,
254
+ )
255
+
256
+ # Retrieve evaluation results
257
+ evals_result = model.get_evals_result()
258
+ # CatBoost commonly uses 'learn' and 'validation' (or 'validation_0')
259
+ learn_key = "learn"
260
+ val_key = None
261
+ for k in evals_result.keys():
262
+ if k != learn_key:
263
+ val_key = k
264
+ break
265
+
266
+ # Ensure eval_metric exists; otherwise fallback to first available metric
267
+ if eval_metric not in evals_result.get(learn_key, {}):
268
+ if evals_result.get(learn_key):
269
+ eval_metric = next(iter(evals_result[learn_key].keys()))
270
+
271
+ # TensorBoard logging
272
+ writer = SummaryWriter(self.log_dir)
273
+ try:
274
+ # learn_scores = evals_result.get(learn_key, {}).get(eval_metric, [])
275
+ val_scores = (
276
+ evals_result.get(val_key, {}).get(eval_metric, []) if val_key else []
277
+ )
278
+ # for i, v in enumerate(learn_scores):
279
+ # writer.add_scalar(f"CatBoost/train/{eval_metric}", v, i)
280
+ for i, v in enumerate(val_scores):
281
+ writer.add_scalar(f"CatBoost/{eval_metric}", v, i)
282
+ finally:
283
+ writer.close()
284
+
285
+ # Optional plotting of training progress
286
+ if self.plot and eval_metric and learn_key in evals_result and val_key:
287
+ logs = {
288
+ "train": evals_result[learn_key].get(eval_metric, []),
289
+ "val": evals_result[val_key].get(eval_metric, []),
290
+ }
291
+ plot_training_progress(
292
+ logs=logs,
293
+ model_name=self.model_name,
294
+ target_number=self.target_number,
295
+ title_suffix=f"Training Progress - {eval_metric}",
296
+ )
297
+
298
+ # Attach metadata for consistency with sklearn path
299
+ model.model_name = self.model_name
300
+ model.target_type = self.target_type
301
+ logger.info(f"Successfully created a {model.model_name} at {datetime.now()}")
209
302
 
210
- # Experiments
211
- boosting_dataset = lgb.Dataset if lightGBM else xgb.DMatrix
212
- train_data = boosting_dataset(x_train, label=y_train)
213
- val_data = boosting_dataset(x_val, label=y_val)
303
+ self._model = model
304
+ return model
214
305
 
306
+ def fit_boosting(self, x_train, y_train, x_val, y_val, params):
307
+ """
308
+ This is using lightGBM or XGboost C++ librairies
309
+ """
215
310
  # Create a TensorBoardX writer
216
311
  writer = SummaryWriter(self.log_dir)
217
312
  evals_result = {}
@@ -223,11 +318,13 @@ class ModelEngine:
223
318
  if self.target_type == "classification" and labels.size > 2
224
319
  else 1
225
320
  )
226
- logger.info("Fitting the model...")
321
+ logger.info(f"Fitting the model {self.model_name}...")
227
322
  logger.info(f"x_train shape: {x_train.shape}, x_val shape: {x_val.shape}")
228
323
  logger.info(f"y_train shape: {y_train.shape}, y_val shape: {y_val.shape}")
229
324
 
230
- if lightGBM:
325
+ if self.model_name == "lgb":
326
+ train_data = lgb.Dataset(x_train, label=y_train)
327
+ val_data = lgb.Dataset(x_val, label=y_val)
231
328
 
232
329
  def tensorboard_callback(env):
233
330
  for i, metric in enumerate(env.evaluation_result_list):
@@ -252,18 +349,23 @@ class ModelEngine:
252
349
  "objective": loss,
253
350
  "metric": eval_metric,
254
351
  "num_class": num_class,
352
+ "verbose": -1,
255
353
  },
256
354
  num_boost_round=params["num_boost_round"],
257
355
  train_set=train_data,
258
356
  valid_sets=[train_data, val_data],
259
357
  valid_names=["train", "val"],
260
358
  callbacks=[
261
- lgb.early_stopping(stopping_rounds=params["early_stopping_rounds"]),
359
+ lgb.early_stopping(
360
+ stopping_rounds=params["early_stopping_rounds"], verbose=False
361
+ ),
262
362
  lgb.record_evaluation(evals_result),
263
363
  tensorboard_callback,
264
364
  ],
265
365
  )
266
366
  else:
367
+ train_data = xgb.DMatrix(x_train, label=y_train)
368
+ val_data = xgb.DMatrix(x_val, label=y_val)
267
369
 
268
370
  class TensorBoardCallback(xgb.callback.TrainingCallback):
269
371
 
@@ -300,6 +402,7 @@ class ModelEngine:
300
402
  if self.target_type == "regression"
301
403
  else ("logloss" if num_class <= 2 else "mlogloss")
302
404
  )
405
+ xgb.set_config(verbosity=0)
303
406
  model = xgb.train(
304
407
  params={
305
408
  **params["model_params"],
@@ -318,7 +421,7 @@ class ModelEngine:
318
421
  tensorboard_callback,
319
422
  ],
320
423
  evals_result=evals_result, # Record evaluation result
321
- verbose_eval=0,
424
+ verbose_eval=10000,
322
425
  )
323
426
 
324
427
  model.model_name = self.create_model
@@ -1365,6 +1468,9 @@ def evaluate(
1365
1468
  average=("binary" if num_classes == 2 else "macro"),
1366
1469
  )
1367
1470
  score["ROC_AUC"] = float(roc_auc_score(y_true, y_pred_proba, multi_class="ovr"))
1471
+ score["AVG_PRECISION"] = average_precision_score(
1472
+ y_true, y_pred_proba, average="macro"
1473
+ )
1368
1474
 
1369
1475
  # Store the complete thresholds dictionary
1370
1476
  if len(target_clf_thresholds.keys()) > 1:
@@ -15,6 +15,7 @@ from sklearn.naive_bayes import GaussianNB
15
15
  # Ensemble models
16
16
  from lightgbm import LGBMRegressor, LGBMClassifier
17
17
  from xgboost import XGBRegressor, XGBClassifier
18
+ from catboost import CatBoostRegressor, CatBoostClassifier
18
19
  from sklearn.ensemble import (
19
20
  RandomForestRegressor,
20
21
  AdaBoostRegressor,
@@ -464,6 +465,41 @@ ml_models = [
464
465
  },
465
466
  },
466
467
  },
468
+ {
469
+ "model_name": "catboost",
470
+ "recurrent": False,
471
+ "need_scaling": False,
472
+ "classification": {
473
+ "create_model": CatBoostClassifier,
474
+ "search_params": {
475
+ "iterations": tune.randint(50, 1000),
476
+ "num_boost_round": tune.randint(50, 1000),
477
+ "early_stopping_rounds": tune.randint(5, 50),
478
+ "learning_rate": tune.loguniform(1e-4, 0.5),
479
+ "depth": tune.randint(3, 10),
480
+ "l2_leaf_reg": tune.loguniform(1e-5, 10),
481
+ "bagging_temperature": tune.uniform(0.0, 1.0),
482
+ "rsm": tune.quniform(0.6, 1.0, 0.05),
483
+ "random_state": 42,
484
+ "verbose": False,
485
+ },
486
+ },
487
+ "regression": {
488
+ "create_model": CatBoostRegressor,
489
+ "search_params": {
490
+ "iterations": tune.randint(50, 1000),
491
+ "num_boost_round": tune.randint(50, 1000),
492
+ "early_stopping_rounds": tune.randint(5, 50),
493
+ "learning_rate": tune.loguniform(1e-4, 0.5),
494
+ "depth": tune.randint(3, 10),
495
+ "l2_leaf_reg": tune.loguniform(1e-5, 10),
496
+ "bagging_temperature": tune.uniform(0.0, 1.0),
497
+ "rsm": tune.quniform(0.6, 1.0, 0.05),
498
+ "random_state": 42,
499
+ "verbose": False,
500
+ },
501
+ },
502
+ },
467
503
  ]
468
504
 
469
505
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "0.18.10"
3
+ version = "0.19.0"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
@@ -9,6 +9,7 @@ license = {text = "Apache License"}
9
9
  readme = "README.md"
10
10
  requires-python = "==3.12.*"
11
11
  dependencies = [
12
+ "catboost>=1.2.8",
12
13
  "category-encoders>=2.8.1",
13
14
  "celery>=5.5.3",
14
15
  "ftfy>=6.3.1",
File without changes
File without changes
File without changes