lecrapaud 0.14.2__tar.gz → 0.14.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (44) hide show
  1. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/PKG-INFO +1 -1
  2. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/api.py +3 -1
  3. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/experiment.py +1 -1
  4. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/feature_engineering.py +5 -2
  5. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/feature_selection.py +7 -0
  6. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/model_selection.py +16 -6
  7. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/pyproject.toml +1 -1
  8. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/LICENSE +0 -0
  9. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/README.md +0 -0
  10. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/__init__.py +0 -0
  11. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/config.py +0 -0
  12. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/__init__.py +0 -0
  13. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic/README +0 -0
  14. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic/env.py +0 -0
  15. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic/script.py.mako +0 -0
  16. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  17. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  18. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  19. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
  20. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/alembic.ini +0 -0
  21. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/__init__.py +0 -0
  22. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/base.py +0 -0
  23. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/experiment.py +0 -0
  24. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/feature.py +0 -0
  25. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/feature_selection.py +0 -0
  26. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/feature_selection_rank.py +0 -0
  27. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/model.py +0 -0
  28. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/model_selection.py +0 -0
  29. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/model_training.py +0 -0
  30. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/score.py +0 -0
  31. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/models/target.py +0 -0
  32. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/db/session.py +0 -0
  33. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/directories.py +0 -0
  34. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/integrations/openai_integration.py +0 -0
  35. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/jobs/__init__.py +0 -0
  36. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/jobs/config.py +0 -0
  37. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/jobs/scheduler.py +0 -0
  38. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/jobs/tasks.py +0 -0
  39. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
  40. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
  41. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
  42. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
  43. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/search_space.py +0 -0
  44. {lecrapaud-0.14.2 → lecrapaud-0.14.3}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.14.2
3
+ Version: 0.14.3
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -119,7 +119,9 @@ class ExperimentEngine:
119
119
  os.makedirs(data_dir, exist_ok=True)
120
120
  else:
121
121
  if data is None:
122
- raise ValueError("Either id or data must be provided")
122
+ raise ValueError(
123
+ "Either id or data must be provided. Data can be a path to a folder containing trained models"
124
+ )
123
125
  self.experiment = create_experiment(data=data, **kwargs)
124
126
 
125
127
  # Set all kwargs as instance attributes
@@ -35,7 +35,7 @@ def create_experiment(
35
35
  groups = {}
36
36
  if group_column:
37
37
  groups["number_of_groups"] = data[group_column].nunique()
38
- groups["list_of_groups"] = data[group_column].unique().tolist()
38
+ groups["list_of_groups"] = data[group_column].unique().tolist().sort()
39
39
 
40
40
  with get_db() as db:
41
41
  all_targets = Target.get_all(db=db)
@@ -48,6 +48,7 @@ import pandas as pd
48
48
  import numpy as np
49
49
  from itertools import product
50
50
  import joblib
51
+ import os
51
52
 
52
53
  from sklearn.compose import ColumnTransformer
53
54
  from sklearn.decomposition import PCA
@@ -382,9 +383,11 @@ class PreprocessFeature:
382
383
  return train, val, test
383
384
 
384
385
  def inference(self):
386
+ data = self.data
385
387
  # PCA
386
- pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
387
- data, _ = self.add_pca_features(self.data, pcas=pcas)
388
+ if os.path.exists(f"{self.preprocessing_dir}/pcas.pkl"):
389
+ pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
390
+ data, _ = self.add_pca_features(data, pcas=pcas)
388
391
 
389
392
  # Encoding
390
393
  transformer = joblib.load(f"{self.preprocessing_dir}/column_transformer.pkl")
@@ -313,6 +313,13 @@ class FeatureSelectionEngine:
313
313
  f"Final pre-selection: {len(features)} features below {corr_threshold}% out of {len(features_selected_list)} features, and rejected {len(features_correlated)} features, {100*len(features)/len(features_selected_list):.2f}% features selected"
314
314
  )
315
315
 
316
+ pd.Series(features).to_csv(
317
+ f"{self.feature_selection_dir}/features.csv",
318
+ index=True,
319
+ header=True,
320
+ index_label="ID",
321
+ )
322
+
316
323
  # analysis 2
317
324
  features_selected_by_every_methods_uncorrelated = list(
318
325
  set(features) & set(features_selected_by_every_methods)
@@ -11,6 +11,7 @@ import joblib
11
11
  import glob
12
12
  from pathlib import Path
13
13
  import pickle
14
+ from pydantic import BaseModel
14
15
 
15
16
  os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
16
17
 
@@ -1585,9 +1586,20 @@ def plot_confusion_matrix(y_true, y_pred):
1585
1586
  plt.show()
1586
1587
 
1587
1588
 
1589
+ class Threshold(BaseModel):
1590
+ threshold: float
1591
+ precision: float
1592
+ recall: float
1593
+ f1: float
1594
+
1595
+
1596
+ class Thresholds(BaseModel):
1597
+ thresholds: dict[str, Threshold]
1598
+
1599
+
1588
1600
  def find_best_threshold(
1589
1601
  prediction: pd.DataFrame, metric: str = "recall", target_value: float | None = None
1590
- ) -> dict:
1602
+ ) -> Thresholds:
1591
1603
  """
1592
1604
  General function to find best threshold optimizing recall, precision, or f1.
1593
1605
 
@@ -1599,7 +1611,7 @@ def find_best_threshold(
1599
1611
  - target_value (float | None): minimum acceptable value for the chosen metric.
1600
1612
 
1601
1613
  Returns:
1602
- - dict: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
1614
+ - Thresholds: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
1603
1615
  """
1604
1616
  assert metric in {"recall", "precision", "f1"}, "Invalid metric"
1605
1617
  y_true = prediction["TARGET"]
@@ -1687,14 +1699,14 @@ def find_best_threshold(
1687
1699
 
1688
1700
 
1689
1701
  def apply_thresholds(
1690
- pred_proba: pd.DataFrame, threshold: dict | int | float, classes
1702
+ pred_proba: pd.DataFrame, threshold: Thresholds | float, classes
1691
1703
  ) -> pd.DataFrame:
1692
1704
  """
1693
1705
  Apply thresholds to predicted probabilities.
1694
1706
 
1695
1707
  Parameters:
1696
1708
  - pred_proba (pd.DataFrame): Probabilities per class.
1697
- - threshold (float | dict): Global threshold (float) or per-class dict from `find_best_threshold`.
1709
+ - threshold (Thresholds | float): Global threshold (float) or per-class dict from `find_best_threshold`.
1698
1710
  - classes (iterable): List or array of class labels (used for binary classification).
1699
1711
 
1700
1712
  Returns:
@@ -1817,8 +1829,6 @@ def plot_threshold(prediction, threshold, precision, recall):
1817
1829
 
1818
1830
 
1819
1831
  # OLD - to sort out
1820
-
1821
-
1822
1832
  def print_model_estimators(target_dir: str, model_name="linear"):
1823
1833
  """
1824
1834
  Look at a specific trained model
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "0.14.2"
3
+ version = "0.14.3"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
File without changes
File without changes