lecrapaud 0.18.7__py3-none-any.whl → 0.22.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lecrapaud/__init__.py +22 -1
  2. lecrapaud/{api.py → base.py} +331 -241
  3. lecrapaud/config.py +15 -3
  4. lecrapaud/db/alembic/versions/2025_08_25_1434-7ed9963e732f_add_best_score_to_model_selection.py +9 -4
  5. lecrapaud/db/alembic/versions/2025_08_28_1516-c36e9fee22b9_add_avg_precision_to_score.py +34 -0
  6. lecrapaud/db/alembic/versions/2025_08_28_1622-8b11c1ba982e_change_name_column.py +44 -0
  7. lecrapaud/db/alembic/versions/2025_10_25_0635-07e303521594_add_unique_constraint_to_score.py +39 -0
  8. lecrapaud/db/alembic/versions/2025_10_26_1727-033e0f7eca4f_merge_score_and_model_trainings_into_.py +264 -0
  9. lecrapaud/db/alembic/versions/2025_10_28_2006-0a8fb7826e9b_add_number_of_targets_and_remove_other_.py +75 -0
  10. lecrapaud/db/models/__init__.py +2 -4
  11. lecrapaud/db/models/base.py +122 -67
  12. lecrapaud/db/models/experiment.py +196 -183
  13. lecrapaud/db/models/feature_selection.py +0 -3
  14. lecrapaud/db/models/feature_selection_rank.py +0 -18
  15. lecrapaud/db/models/model_selection.py +2 -2
  16. lecrapaud/db/models/{score.py → model_selection_score.py} +30 -12
  17. lecrapaud/db/session.py +33 -4
  18. lecrapaud/experiment.py +44 -17
  19. lecrapaud/feature_engineering.py +45 -674
  20. lecrapaud/feature_preprocessing.py +1202 -0
  21. lecrapaud/feature_selection.py +145 -332
  22. lecrapaud/integrations/sentry_integration.py +46 -0
  23. lecrapaud/misc/tabpfn_tests.ipynb +2 -2
  24. lecrapaud/mixins.py +247 -0
  25. lecrapaud/model_preprocessing.py +295 -0
  26. lecrapaud/model_selection.py +725 -249
  27. lecrapaud/pipeline.py +548 -0
  28. lecrapaud/search_space.py +38 -1
  29. lecrapaud/utils.py +36 -3
  30. lecrapaud-0.22.6.dist-info/METADATA +423 -0
  31. lecrapaud-0.22.6.dist-info/RECORD +51 -0
  32. {lecrapaud-0.18.7.dist-info → lecrapaud-0.22.6.dist-info}/WHEEL +1 -1
  33. {lecrapaud-0.18.7.dist-info → lecrapaud-0.22.6.dist-info/licenses}/LICENSE +1 -1
  34. lecrapaud/db/models/model_training.py +0 -64
  35. lecrapaud/jobs/__init__.py +0 -13
  36. lecrapaud/jobs/config.py +0 -17
  37. lecrapaud/jobs/scheduler.py +0 -30
  38. lecrapaud/jobs/tasks.py +0 -17
  39. lecrapaud-0.18.7.dist-info/METADATA +0 -248
  40. lecrapaud-0.18.7.dist-info/RECORD +0 -46
lecrapaud/pipeline.py ADDED
@@ -0,0 +1,548 @@
1
+ """
2
+ LeCrapaud Pipeline for sklearn integration.
3
+
4
+ This module provides a sklearn-compatible pipeline that can be used
5
+ in sklearn workflows while incorporating LeCrapaud's custom components.
6
+ """
7
+
8
+ from sklearn.pipeline import Pipeline
9
+ from sklearn.base import BaseEstimator, TransformerMixin
10
+ from typing import List, Tuple, Optional, Dict, Any
11
+ import pandas as pd
12
+
13
+ from lecrapaud.db import Experiment
14
+ from lecrapaud.feature_engineering import FeatureEngineering
15
+ from lecrapaud.feature_preprocessing import FeaturePreprocessor, split_data
16
+ from lecrapaud.feature_selection import FeatureSelector
17
+ from lecrapaud.model_preprocessing import ModelPreprocessor, reshape_time_series
18
+ from lecrapaud.model_selection import ModelSelector
19
+ from lecrapaud.search_space import all_models
20
+
21
+
22
+ class DataSplitterTransformer(BaseEstimator, TransformerMixin):
23
+ """
24
+ Transformer that handles train/val/test data splitting for LeCrapaud pipelines.
25
+
26
+ This component splits data and can be used in sklearn pipelines while maintaining
27
+ the ability to access individual splits.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ experiment: Experiment,
33
+ time_series: bool = False,
34
+ date_column: str = None,
35
+ group_column: str = None,
36
+ val_size: float = 0.2,
37
+ test_size: float = 0.2,
38
+ target_numbers: List[int] = None,
39
+ target_clf: List[int] = None
40
+ ):
41
+ self.experiment = experiment
42
+ self.time_series = time_series
43
+ self.date_column = date_column
44
+ self.group_column = group_column
45
+ self.val_size = val_size
46
+ self.test_size = test_size
47
+ self.target_numbers = target_numbers or []
48
+ self.target_clf = target_clf or []
49
+
50
+ def fit(self, X, y=None):
51
+ """Fit the splitter (no-op, just validates parameters)."""
52
+ return self
53
+
54
+ def transform(self, X):
55
+ """Transform data by splitting it and returning train split."""
56
+ train, val, test = split_data(X, experiment=self.experiment)
57
+
58
+ # Store splits as attributes for later access
59
+ self.train_ = train
60
+ self.val_ = val
61
+ self.test_ = test
62
+
63
+ return train
64
+
65
+ def get_splits(self):
66
+ """Get all data splits."""
67
+ if not hasattr(self, 'train_'):
68
+ raise ValueError("Must call transform() first to create splits")
69
+ return self.train_, self.val_, self.test_
70
+
71
+
72
+ class DataReshaper(BaseEstimator, TransformerMixin):
73
+ """
74
+ Transformer that handles time series data reshaping for recurrent models.
75
+
76
+ This component checks if reshaping is needed and applies it when appropriate.
77
+ """
78
+
79
+ def __init__(
80
+ self,
81
+ experiment: Experiment,
82
+ models_idx: List[int] = None,
83
+ time_series: bool = False,
84
+ max_timesteps: int = 120,
85
+ group_column: str = None
86
+ ):
87
+ self.experiment = experiment
88
+ self.models_idx = models_idx or []
89
+ self.time_series = time_series
90
+ self.max_timesteps = max_timesteps
91
+ self.group_column = group_column
92
+
93
+ def fit(self, X, y=None):
94
+ """Fit the reshaper (determines if reshaping is needed)."""
95
+ # Check if any model requires recurrent processing
96
+ self.need_reshaping_ = (
97
+ any(all_models[i].get("recurrent") for i in self.models_idx)
98
+ and self.time_series
99
+ )
100
+ return self
101
+
102
+ def transform(self, X):
103
+ """Transform data by reshaping for time series if needed."""
104
+ if not self.need_reshaping_:
105
+ return X
106
+
107
+ # Sanity check: make sure we have enough data for max_timesteps
108
+ if (
109
+ self.group_column
110
+ and X.groupby(self.group_column).size().min()
111
+ < self.max_timesteps
112
+ ) or X.shape[0] < self.max_timesteps:
113
+ raise ValueError(
114
+ f"Not enough data for group_column {self.group_column} to reshape data for recurrent models"
115
+ )
116
+
117
+ # Get features for reshaping
118
+ all_features = self.experiment.get_all_features(
119
+ date_column=getattr(self, 'date_column', None),
120
+ group_column=self.group_column
121
+ )
122
+
123
+ # Reshape the data
124
+ reshaped_data = reshape_time_series(
125
+ self.experiment, all_features, X, timesteps=self.max_timesteps
126
+ )
127
+
128
+ # Store reshaped data as attribute
129
+ self.reshaped_data_ = reshaped_data
130
+
131
+ return X # Return original data, reshaped data accessible via get_reshaped_data()
132
+
133
+ def get_reshaped_data(self):
134
+ """Get the reshaped data."""
135
+ if not hasattr(self, 'reshaped_data_'):
136
+ return None
137
+ return self.reshaped_data_
138
+
139
+
140
+ class FullPipelineTransformer(BaseEstimator, TransformerMixin):
141
+ """
142
+ Complete LeCrapaud pipeline transformer that handles all steps including
143
+ data splitting, preprocessing, and reshaping in a sklearn-compatible way.
144
+ """
145
+
146
+ def __init__(
147
+ self,
148
+ experiment: Experiment,
149
+ target_numbers: List[int] = None,
150
+ include_model_selection: bool = False,
151
+ **pipeline_params
152
+ ):
153
+ self.experiment = experiment
154
+ self.target_numbers = target_numbers or []
155
+ self.include_model_selection = include_model_selection
156
+ self.pipeline_params = pipeline_params
157
+
158
+ # Extract parameters from experiment context
159
+ if experiment and hasattr(experiment, 'context'):
160
+ for key, value in experiment.context.items():
161
+ if not hasattr(self, key):
162
+ setattr(self, key, value)
163
+
164
+ # Set defaults
165
+ self._set_defaults()
166
+
167
+ def _set_defaults(self):
168
+ """Set default values for pipeline parameters."""
169
+ defaults = {
170
+ 'time_series': False,
171
+ 'date_column': None,
172
+ 'group_column': None,
173
+ 'val_size': 0.2,
174
+ 'test_size': 0.2,
175
+ 'target_clf': [],
176
+ 'models_idx': [],
177
+ 'max_timesteps': 120
178
+ }
179
+
180
+ for key, default_value in defaults.items():
181
+ if not hasattr(self, key):
182
+ setattr(self, key, default_value)
183
+
184
+ def fit(self, X, y=None):
185
+ """Fit the complete pipeline."""
186
+ # Step 1: Feature Engineering
187
+ self.feature_eng_ = FeatureEngineering(experiment=self.experiment)
188
+ self.feature_eng_.fit(X)
189
+ data_eng = self.feature_eng_.get_data()
190
+
191
+ # Step 2: Data Splitting
192
+ self.data_splitter_ = DataSplitterTransformer(experiment=self.experiment)
193
+ train = self.data_splitter_.transform(data_eng)
194
+ val = self.data_splitter_.val_
195
+ test = self.data_splitter_.test_
196
+
197
+ # Step 3: Feature Preprocessing
198
+ self.feature_prep_ = FeaturePreprocessor(experiment=self.experiment)
199
+ self.feature_prep_.fit(train)
200
+ train_prep = self.feature_prep_.transform(train)
201
+ val_prep = self.feature_prep_.transform(val) if val is not None else None
202
+ test_prep = self.feature_prep_.transform(test) if test is not None else None
203
+
204
+ # Step 4: Feature Selection (for each target)
205
+ self.feature_selectors_ = {}
206
+ for target_number in self.target_numbers:
207
+ selector = FeatureSelector(
208
+ experiment=self.experiment,
209
+ target_number=target_number
210
+ )
211
+ selector.fit(train_prep)
212
+ self.feature_selectors_[target_number] = selector
213
+
214
+ # Step 5: Model Preprocessing
215
+ self.model_prep_ = ModelPreprocessor(experiment=self.experiment)
216
+ self.model_prep_.fit(train_prep)
217
+ train_scaled = self.model_prep_.transform(train_prep)
218
+ val_scaled = self.model_prep_.transform(val_prep) if val_prep is not None else None
219
+ test_scaled = self.model_prep_.transform(test_prep) if test_prep is not None else None
220
+
221
+ # Step 6: Data Reshaping (if needed)
222
+ self.data_reshaper_ = DataReshaper(
223
+ experiment=self.experiment,
224
+ models_idx=self.models_idx,
225
+ time_series=self.time_series,
226
+ max_timesteps=self.max_timesteps,
227
+ group_column=self.group_column
228
+ )
229
+ self.data_reshaper_.fit(train_scaled)
230
+ self.data_reshaper_.transform(train_scaled)
231
+
232
+ # Step 7: Model Selection (optional)
233
+ if self.include_model_selection:
234
+ self.model_selectors_ = {}
235
+ std_data = {"train": train_scaled, "val": val_scaled, "test": test_scaled}
236
+ reshaped_data = self.data_reshaper_.get_reshaped_data()
237
+
238
+ for target_number in self.target_numbers:
239
+ model_selector = ModelSelector(
240
+ experiment=self.experiment,
241
+ target_number=target_number
242
+ )
243
+ model_selector.fit(std_data, reshaped_data=reshaped_data)
244
+ self.model_selectors_[target_number] = model_selector
245
+
246
+ return self
247
+
248
+ def transform(self, X):
249
+ """Transform new data through the fitted pipeline."""
250
+ # Apply feature engineering
251
+ self.feature_eng_.fit(X) # Refit for new data
252
+ data_eng = self.feature_eng_.get_data()
253
+
254
+ # Apply feature preprocessing
255
+ data_prep = self.feature_prep_.transform(data_eng)
256
+
257
+ # Apply model preprocessing
258
+ data_scaled = self.model_prep_.transform(data_prep)
259
+
260
+ # Apply reshaping if needed
261
+ self.data_reshaper_.transform(data_scaled)
262
+
263
+ return data_scaled
264
+
265
+ def get_training_splits(self):
266
+ """Get the training data splits."""
267
+ if not hasattr(self, 'data_splitter_'):
268
+ raise ValueError("Must call fit() first")
269
+ return self.data_splitter_.get_splits()
270
+
271
+ def get_reshaped_data(self):
272
+ """Get the reshaped data for recurrent models."""
273
+ if not hasattr(self, 'data_reshaper_'):
274
+ raise ValueError("Must call fit() first")
275
+ return self.data_reshaper_.get_reshaped_data()
276
+
277
+ def get_models(self):
278
+ """Get the trained models."""
279
+ if not hasattr(self, 'model_selectors_'):
280
+ return {}
281
+ return {num: selector.get_best_model() for num, selector in self.model_selectors_.items()}
282
+
283
+
284
+ class PipelineLeCrapaud(Pipeline):
285
+ """
286
+ LeCrapaud pipeline that extends sklearn Pipeline for ML workflows.
287
+
288
+ This pipeline provides pre-configured steps for the typical LeCrapaud workflow:
289
+ 1. Feature Engineering
290
+ 2. Feature Preprocessing
291
+ 3. Feature Selection
292
+ 4. Model Preprocessing
293
+ 5. Model Selection
294
+
295
+ It can be used as a drop-in replacement for sklearn Pipeline while
296
+ leveraging LeCrapaud's experiment tracking and domain-specific features.
297
+ """
298
+
299
+ def __init__(
300
+ self,
301
+ experiment: Experiment,
302
+ steps: Optional[List[Tuple[str, BaseEstimator]]] = None,
303
+ memory=None,
304
+ verbose=False,
305
+ target_number: Optional[int] = None,
306
+ **kwargs
307
+ ):
308
+ """
309
+ Initialize LeCrapaud pipeline.
310
+
311
+ Args:
312
+ experiment: LeCrapaud experiment instance
313
+ steps: List of (name, estimator) tuples. If None, uses default workflow
314
+ memory: Caching parameter (passed to sklearn Pipeline)
315
+ verbose: Whether to output progress info
316
+ target_number: Target number for model selection (if using default steps)
317
+ **kwargs: Additional parameters passed to default estimators
318
+ """
319
+ self.experiment = experiment
320
+ self.target_number = target_number
321
+ self.step_kwargs = kwargs
322
+
323
+ if steps is None:
324
+ steps = self._create_default_steps()
325
+
326
+ super().__init__(steps=steps, memory=memory, verbose=verbose)
327
+
328
+ def _create_default_steps(self) -> List[Tuple[str, BaseEstimator]]:
329
+ """Create default LeCrapaud pipeline steps."""
330
+ steps = [
331
+ ('feature_engineering', FeatureEngineering(
332
+ experiment=self.experiment,
333
+ **self.step_kwargs.get('feature_engineering', {})
334
+ )),
335
+ ('feature_preprocessing', FeaturePreprocessor(
336
+ experiment=self.experiment,
337
+ **self.step_kwargs.get('feature_preprocessing', {})
338
+ )),
339
+ ('feature_selection', FeatureSelector(
340
+ experiment=self.experiment,
341
+ target_number=self.target_number,
342
+ **self.step_kwargs.get('feature_selection', {})
343
+ )),
344
+ ('model_preprocessing', ModelPreprocessor(
345
+ experiment=self.experiment,
346
+ **self.step_kwargs.get('model_preprocessing', {})
347
+ ))
348
+ ]
349
+
350
+ # Add model selection if target_number is specified
351
+ if self.target_number is not None:
352
+ steps.append((
353
+ 'model_selection',
354
+ ModelSelector(
355
+ experiment=self.experiment,
356
+ target_number=self.target_number,
357
+ **self.step_kwargs.get('model_selection', {})
358
+ )
359
+ ))
360
+
361
+ return steps
362
+
363
+ @classmethod
364
+ def create_feature_pipeline(
365
+ cls,
366
+ experiment: Experiment,
367
+ include_selection: bool = True,
368
+ target_number: Optional[int] = None,
369
+ **kwargs
370
+ ) -> 'PipelineLeCrapaud':
371
+ """
372
+ Create a pipeline focused on feature processing only.
373
+
374
+ Args:
375
+ experiment: LeCrapaud experiment instance
376
+ include_selection: Whether to include feature selection step
377
+ target_number: Target number for feature selection
378
+ **kwargs: Additional parameters for estimators
379
+
380
+ Returns:
381
+ PipelineLeCrapaud: Feature processing pipeline
382
+ """
383
+ steps = [
384
+ ('feature_engineering', FeatureEngineering(
385
+ experiment=experiment,
386
+ **kwargs.get('feature_engineering', {})
387
+ )),
388
+ ('feature_preprocessing', FeaturePreprocessor(
389
+ experiment=experiment,
390
+ **kwargs.get('feature_preprocessing', {})
391
+ ))
392
+ ]
393
+
394
+ if include_selection and target_number is not None:
395
+ steps.append((
396
+ 'feature_selection',
397
+ FeatureSelector(
398
+ experiment=experiment,
399
+ target_number=target_number,
400
+ **kwargs.get('feature_selection', {})
401
+ )
402
+ ))
403
+
404
+ return cls(experiment=experiment, steps=steps)
405
+
406
+ @classmethod
407
+ def create_model_pipeline(
408
+ cls,
409
+ experiment: Experiment,
410
+ target_number: int,
411
+ **kwargs
412
+ ) -> 'PipelineLeCrapaud':
413
+ """
414
+ Create a pipeline focused on model preprocessing and selection.
415
+
416
+ Args:
417
+ experiment: LeCrapaud experiment instance
418
+ target_number: Target number for model selection
419
+ **kwargs: Additional parameters for estimators
420
+
421
+ Returns:
422
+ PipelineLeCrapaud: Model pipeline
423
+ """
424
+ steps = [
425
+ ('model_preprocessing', ModelPreprocessor(
426
+ experiment=experiment,
427
+ **kwargs.get('model_preprocessing', {})
428
+ )),
429
+ ('model_selection', ModelSelector(
430
+ experiment=experiment,
431
+ target_number=target_number,
432
+ **kwargs.get('model_selection', {})
433
+ ))
434
+ ]
435
+
436
+ return cls(experiment=experiment, steps=steps)
437
+
438
+ def get_feature_names_out(self, input_features=None):
439
+ """Get output feature names for transformation."""
440
+ # Try to get from the last transformer that has this method
441
+ for name, estimator in reversed(self.steps):
442
+ if hasattr(estimator, 'get_feature_names_out'):
443
+ return estimator.get_feature_names_out(input_features)
444
+ # For FeatureSelector, try to get selected features
445
+ elif hasattr(estimator, 'get_selected_features'):
446
+ return estimator.get_selected_features()
447
+
448
+ return input_features
449
+
450
+ def get_experiment(self) -> Experiment:
451
+ """Get the experiment instance."""
452
+ return self.experiment
453
+
454
+ def get_step_results(self, step_name: str) -> Any:
455
+ """
456
+ Get results from a specific pipeline step.
457
+
458
+ Args:
459
+ step_name: Name of the pipeline step
460
+
461
+ Returns:
462
+ Results from the specified step
463
+ """
464
+ if step_name not in self.named_steps:
465
+ raise ValueError(f"Step '{step_name}' not found in pipeline")
466
+
467
+ estimator = self.named_steps[step_name]
468
+
469
+ # Try common result methods
470
+ if hasattr(estimator, 'get_data'):
471
+ return estimator.get_data()
472
+ elif hasattr(estimator, 'get_selected_features'):
473
+ return estimator.get_selected_features()
474
+ elif hasattr(estimator, 'get_best_model'):
475
+ return estimator.get_best_model()
476
+ else:
477
+ return estimator
478
+
479
+
480
+ class LeCrapaudTransformer(BaseEstimator, TransformerMixin):
481
+ """
482
+ A transformer wrapper that makes any LeCrapaud estimator compatible
483
+ with sklearn transformers, allowing them to be used in standard sklearn pipelines.
484
+ """
485
+
486
+ def __init__(self, estimator_class, experiment: Experiment, **estimator_params):
487
+ """
488
+ Initialize the transformer wrapper.
489
+
490
+ Args:
491
+ estimator_class: The LeCrapaud estimator class to wrap
492
+ experiment: LeCrapaud experiment instance
493
+ **estimator_params: Parameters to pass to the estimator
494
+ """
495
+ self.estimator_class = estimator_class
496
+ self.experiment = experiment
497
+ self.estimator_params = estimator_params
498
+ self.estimator_ = None
499
+
500
+ def fit(self, X, y=None):
501
+ """Fit the wrapped estimator."""
502
+ self.estimator_ = self.estimator_class(
503
+ experiment=self.experiment,
504
+ **self.estimator_params
505
+ )
506
+ self.estimator_.fit(X, y)
507
+ return self
508
+
509
+ def transform(self, X):
510
+ """Transform using the fitted estimator."""
511
+ if self.estimator_ is None:
512
+ raise ValueError("Transformer has not been fitted yet.")
513
+
514
+ # For estimators that don't have transform, use get_data or return X
515
+ if hasattr(self.estimator_, 'transform'):
516
+ return self.estimator_.transform(X)
517
+ elif hasattr(self.estimator_, 'get_data'):
518
+ return self.estimator_.get_data()
519
+ else:
520
+ return X
521
+
522
+ def get_params(self, deep=True):
523
+ """Get parameters for this transformer."""
524
+ params = {
525
+ 'estimator_class': self.estimator_class,
526
+ 'experiment': self.experiment
527
+ }
528
+ if deep and self.estimator_params:
529
+ for key, value in self.estimator_params.items():
530
+ params[key] = value
531
+ return params
532
+
533
+ def set_params(self, **params):
534
+ """Set parameters for this transformer."""
535
+ estimator_params = {}
536
+ base_params = {}
537
+
538
+ for key, value in params.items():
539
+ if key in ['estimator_class', 'experiment']:
540
+ base_params[key] = value
541
+ else:
542
+ estimator_params[key] = value
543
+
544
+ for key, value in base_params.items():
545
+ setattr(self, key, value)
546
+
547
+ self.estimator_params.update(estimator_params)
548
+ return self
lecrapaud/search_space.py CHANGED
@@ -15,6 +15,7 @@ from sklearn.naive_bayes import GaussianNB
15
15
  # Ensemble models
16
16
  from lightgbm import LGBMRegressor, LGBMClassifier
17
17
  from xgboost import XGBRegressor, XGBClassifier
18
+ from catboost import CatBoostRegressor, CatBoostClassifier
18
19
  from sklearn.ensemble import (
19
20
  RandomForestRegressor,
20
21
  AdaBoostRegressor,
@@ -50,7 +51,8 @@ from keras.activations import sigmoid
50
51
  from ray import tune
51
52
  import pandas as pd
52
53
 
53
- # we cannot use tune.sample_from function to make conditionnal search space, because hyperopt and bayesian opt need a fixed search space
54
+ # we cannot use tune.sample_from function to make conditionnal search space,
55
+ # because hyperopt and bayesian opt need a fixed search space
54
56
 
55
57
  ml_models = [
56
58
  {
@@ -464,6 +466,41 @@ ml_models = [
464
466
  },
465
467
  },
466
468
  },
469
+ {
470
+ "model_name": "catboost",
471
+ "recurrent": False,
472
+ "need_scaling": False,
473
+ "classification": {
474
+ "create_model": CatBoostClassifier,
475
+ "search_params": {
476
+ "iterations": tune.randint(50, 1000),
477
+ "num_boost_round": tune.randint(50, 1000),
478
+ "early_stopping_rounds": tune.randint(5, 50),
479
+ "learning_rate": tune.loguniform(1e-4, 0.5),
480
+ "depth": tune.randint(3, 10),
481
+ "l2_leaf_reg": tune.loguniform(1e-5, 10),
482
+ "bagging_temperature": tune.uniform(0.0, 1.0),
483
+ "rsm": tune.quniform(0.6, 1.0, 0.05),
484
+ "random_state": 42,
485
+ "verbose": False,
486
+ },
487
+ },
488
+ "regression": {
489
+ "create_model": CatBoostRegressor,
490
+ "search_params": {
491
+ "iterations": tune.randint(50, 1000),
492
+ "num_boost_round": tune.randint(50, 1000),
493
+ "early_stopping_rounds": tune.randint(5, 50),
494
+ "learning_rate": tune.loguniform(1e-4, 0.5),
495
+ "depth": tune.randint(3, 10),
496
+ "l2_leaf_reg": tune.loguniform(1e-5, 10),
497
+ "bagging_temperature": tune.uniform(0.0, 1.0),
498
+ "rsm": tune.quniform(0.6, 1.0, 0.05),
499
+ "random_state": 42,
500
+ "verbose": False,
501
+ },
502
+ },
503
+ },
467
504
  ]
468
505
 
469
506
 
lecrapaud/utils.py CHANGED
@@ -11,7 +11,7 @@ import re
11
11
  import string
12
12
 
13
13
  from lecrapaud.directories import logger_dir
14
- from lecrapaud.config import LOGGING_LEVEL, PYTHON_ENV, LECRAPAUD_LOCAL
14
+ from lecrapaud.config import LOGGING_LEVEL, PYTHON_ENV
15
15
 
16
16
 
17
17
  _LECRAPAUD_LOGGER_ALREADY_CONFIGURED = False
@@ -59,6 +59,14 @@ def setup_logger():
59
59
  file_handler.setLevel(log_level)
60
60
  logger.addHandler(file_handler)
61
61
 
62
+ try:
63
+ from lecrapaud.integrations.sentry_integration import init_sentry
64
+
65
+ if init_sentry():
66
+ logger.info("Sentry logging enabled")
67
+ except Exception as exc:
68
+ logger.info(f"Sentry logging disabled: {exc}")
69
+
62
70
  _LECRAPAUD_LOGGER_ALREADY_CONFIGURED = True
63
71
  return logger
64
72
 
@@ -232,9 +240,28 @@ def remove_accents(text: str) -> str:
232
240
  def serialize_for_json(obj):
233
241
  """
234
242
  Recursively convert any object into a JSON-serializable structure.
235
- Classes and class instances are converted to readable strings like 'ClassName()'.
243
+ Handles NumPy types, datetime objects, and class instances.
236
244
  """
237
- if isinstance(obj, (str, int, float, bool, type(None))):
245
+ import numpy as np
246
+ from datetime import datetime, date
247
+ import pandas as pd
248
+
249
+ # Handle NumPy types
250
+ if isinstance(obj, (np.integer, np.int64, np.int32, np.int16)):
251
+ return int(obj)
252
+ elif isinstance(obj, (np.floating, np.float64, np.float32, np.float16)):
253
+ return float(obj)
254
+ elif isinstance(obj, np.ndarray):
255
+ return obj.tolist()
256
+ elif isinstance(obj, np.bool_):
257
+ return bool(obj)
258
+
259
+ # Handle datetime types
260
+ elif isinstance(obj, (datetime, date, pd.Timestamp)):
261
+ return obj.isoformat()
262
+
263
+ # Handle basic Python types
264
+ elif isinstance(obj, (str, int, float, bool, type(None))):
238
265
  return obj
239
266
  elif isinstance(obj, dict):
240
267
  return {str(k): serialize_for_json(v) for k, v in obj.items()}
@@ -244,6 +271,12 @@ def serialize_for_json(obj):
244
271
  # A class/type object like int, str, etc.
245
272
  return obj.__name__
246
273
  elif hasattr(obj, "__class__"):
274
+ # For other objects, return their string representation
247
275
  return f"{obj.__class__.__name__}()"
248
276
  else:
249
277
  return str(obj)
278
+
279
+
280
+ def strip_timestamp_suffix(name: str) -> str:
281
+ # Matches an underscore followed by 8 digits, another underscore, then 6 digits at the end
282
+ return re.sub(r"_\d{8}_\d{6}$", "", name)