lecrapaud 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

lecrapaud/api.py CHANGED
@@ -119,7 +119,9 @@ class ExperimentEngine:
119
119
  os.makedirs(data_dir, exist_ok=True)
120
120
  else:
121
121
  if data is None:
122
- raise ValueError("Either id or data must be provided")
122
+ raise ValueError(
123
+ "Either id or data must be provided. Data can be a path to a folder containing trained models"
124
+ )
123
125
  self.experiment = create_experiment(data=data, **kwargs)
124
126
 
125
127
  # Set all kwargs as instance attributes
@@ -100,19 +100,19 @@ class Experiment(Base):
100
100
  )
101
101
 
102
102
  def get_features(self, target_number: int):
103
- feature_selections = self.feature_selections
104
- target_id = [t for t in self.targets if t.name == f"TARGET_{target_number}"][
105
- 0
106
- ].id
107
- feature_selection = [
108
- fs for fs in feature_selections if fs.target_id == target_id
109
- ][0]
110
- features = [f.name for f in feature_selection.features]
103
+ targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
104
+ if targets:
105
+ target_id = targets[0].id
106
+ feature_selection = [
107
+ fs for fs in self.feature_selections if fs.target_id == target_id
108
+ ]
109
+ if feature_selection:
110
+ feature_selection = feature_selection[0]
111
+ features = [f.name for f in feature_selection.features]
112
+ return features
111
113
 
112
114
  # fallback to path if no features found
113
- if len(features) == 0:
114
- features = joblib.load(f"{self.path}/TARGET_{target_number}/features.pkl")
115
-
115
+ features = joblib.load(f"{self.path}/TARGET_{target_number}/features.pkl")
116
116
  return features
117
117
 
118
118
  def get_all_features(self, date_column: str = None, group_column: str = None):
lecrapaud/experiment.py CHANGED
@@ -35,7 +35,7 @@ def create_experiment(
35
35
  groups = {}
36
36
  if group_column:
37
37
  groups["number_of_groups"] = data[group_column].nunique()
38
- groups["list_of_groups"] = data[group_column].unique().tolist()
38
+ groups["list_of_groups"] = data[group_column].unique().tolist().sort()
39
39
 
40
40
  with get_db() as db:
41
41
  all_targets = Target.get_all(db=db)
@@ -48,6 +48,7 @@ import pandas as pd
48
48
  import numpy as np
49
49
  from itertools import product
50
50
  import joblib
51
+ import os
51
52
 
52
53
  from sklearn.compose import ColumnTransformer
53
54
  from sklearn.decomposition import PCA
@@ -382,9 +383,11 @@ class PreprocessFeature:
382
383
  return train, val, test
383
384
 
384
385
  def inference(self):
386
+ data = self.data
385
387
  # PCA
386
- pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
387
- data, _ = self.add_pca_features(self.data, pcas=pcas)
388
+ if os.path.exists(f"{self.preprocessing_dir}/pcas.pkl"):
389
+ pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
390
+ data, _ = self.add_pca_features(data, pcas=pcas)
388
391
 
389
392
  # Encoding
390
393
  transformer = joblib.load(f"{self.preprocessing_dir}/column_transformer.pkl")
@@ -313,6 +313,13 @@ class FeatureSelectionEngine:
313
313
  f"Final pre-selection: {len(features)} features below {corr_threshold}% out of {len(features_selected_list)} features, and rejected {len(features_correlated)} features, {100*len(features)/len(features_selected_list):.2f}% features selected"
314
314
  )
315
315
 
316
+ pd.Series(features).to_csv(
317
+ f"{self.feature_selection_dir}/features.csv",
318
+ index=True,
319
+ header=True,
320
+ index_label="ID",
321
+ )
322
+
316
323
  # analysis 2
317
324
  features_selected_by_every_methods_uncorrelated = list(
318
325
  set(features) & set(features_selected_by_every_methods)
@@ -11,6 +11,7 @@ import joblib
11
11
  import glob
12
12
  from pathlib import Path
13
13
  import pickle
14
+ from pydantic import BaseModel
14
15
 
15
16
  os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
16
17
 
@@ -1585,9 +1586,20 @@ def plot_confusion_matrix(y_true, y_pred):
1585
1586
  plt.show()
1586
1587
 
1587
1588
 
1589
+ class Threshold(BaseModel):
1590
+ threshold: float
1591
+ precision: float
1592
+ recall: float
1593
+ f1: float
1594
+
1595
+
1596
+ class Thresholds(BaseModel):
1597
+ thresholds: dict[str, Threshold]
1598
+
1599
+
1588
1600
  def find_best_threshold(
1589
1601
  prediction: pd.DataFrame, metric: str = "recall", target_value: float | None = None
1590
- ) -> dict:
1602
+ ) -> Thresholds:
1591
1603
  """
1592
1604
  General function to find best threshold optimizing recall, precision, or f1.
1593
1605
 
@@ -1599,7 +1611,7 @@ def find_best_threshold(
1599
1611
  - target_value (float | None): minimum acceptable value for the chosen metric.
1600
1612
 
1601
1613
  Returns:
1602
- - dict: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
1614
+ - Thresholds: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
1603
1615
  """
1604
1616
  assert metric in {"recall", "precision", "f1"}, "Invalid metric"
1605
1617
  y_true = prediction["TARGET"]
@@ -1687,14 +1699,14 @@ def find_best_threshold(
1687
1699
 
1688
1700
 
1689
1701
  def apply_thresholds(
1690
- pred_proba: pd.DataFrame, threshold: dict | int | float, classes
1702
+ pred_proba: pd.DataFrame, threshold: Thresholds | float, classes
1691
1703
  ) -> pd.DataFrame:
1692
1704
  """
1693
1705
  Apply thresholds to predicted probabilities.
1694
1706
 
1695
1707
  Parameters:
1696
1708
  - pred_proba (pd.DataFrame): Probabilities per class.
1697
- - threshold (float | dict): Global threshold (float) or per-class dict from `find_best_threshold`.
1709
+ - threshold (Thresholds | float): Global threshold (float) or per-class dict from `find_best_threshold`.
1698
1710
  - classes (iterable): List or array of class labels (used for binary classification).
1699
1711
 
1700
1712
  Returns:
@@ -1817,8 +1829,6 @@ def plot_threshold(prediction, threshold, precision, recall):
1817
1829
 
1818
1830
 
1819
1831
  # OLD - to sort out
1820
-
1821
-
1822
1832
  def print_model_estimators(target_dir: str, model_name="linear"):
1823
1833
  """
1824
1834
  Look at a specific trained model
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.14.2
3
+ Version: 0.14.4
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -1,5 +1,5 @@
1
1
  lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=HDJCYSYJ8md9X6ns17sQZB1yo3_O43G5HE7CZ3ICfyc,19655
2
+ lecrapaud/api.py,sha256=j3aCdg4J6DCgmBoLiOWd_b-ji2wZVeyoFfoXd07rBXQ,19751
3
3
  lecrapaud/config.py,sha256=N8kQS1bNEXp6loIw7_X2_OjrbY4_a7UZhZcT1XgsYAs,1121
4
4
  lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
5
  lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -12,7 +12,7 @@ lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py,sha256=sBgPLvvqI_
12
12
  lecrapaud/db/alembic.ini,sha256=TXrZB4pWVLn2EUg867yp6paA_19vGeirO95mTPA3nbs,3699
13
13
  lecrapaud/db/models/__init__.py,sha256=Lhyw9fVLdom0Fc6yIP-ip8FjkU1EwVwjae5q2VM815Q,740
14
14
  lecrapaud/db/models/base.py,sha256=J9ew-0z_-tnWAwhVvOmVDys2R6jPF_oSca_ny6wpXQE,7606
15
- lecrapaud/db/models/experiment.py,sha256=29Ea0PfbiEvXx_0IhZBkd14PqY5v2Adx7LaW6c-6xfM,4135
15
+ lecrapaud/db/models/experiment.py,sha256=x7-zTuCCecE401f3wQ9l6u8HyGOKORGc0b4_z-FMriU,4219
16
16
  lecrapaud/db/models/feature.py,sha256=5o77O2FyRObnLOCGNj8kaPSGM3pLv1Ov6mXXHYkmnYY,1136
17
17
  lecrapaud/db/models/feature_selection.py,sha256=epvOkU3eyhSXvEpRePd9Ylxl3feDvMUf_v6-jeU7fRE,3469
18
18
  lecrapaud/db/models/feature_selection_rank.py,sha256=Ydsb_rAT58FoSH13wkGjGPByzsjPx3DITXgJ2jgZmow,2198
@@ -23,9 +23,9 @@ lecrapaud/db/models/score.py,sha256=fSfXLt6Dm-8Fy9ku0urMT5Fa6zNqn4YqVnEO4o3zKVI,
23
23
  lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
24
24
  lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
25
25
  lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
26
- lecrapaud/experiment.py,sha256=xdqUJkfs3MfX2x5EAAcr8_EgfGLkb4TDk9k8TxS85Zc,2507
27
- lecrapaud/feature_engineering.py,sha256=MaMy4mkZ9wN7B8sLrSQcwjlMfUEU7ek8kTCYMz_Nc0o,32318
28
- lecrapaud/feature_selection.py,sha256=u3TWq3G5Xh3geQevGDOZEt_rl_m6-K_CR7SttFtpwKw,43409
26
+ lecrapaud/experiment.py,sha256=u1whBtRFEhjM2OtKwHuUAp6T_7JXZhf_90luUiV4nD8,2514
27
+ lecrapaud/feature_engineering.py,sha256=2V69qHmbsw9PZiZf64PpMqCL5mDMOukLeAaNsGftq9o,32421
28
+ lecrapaud/feature_selection.py,sha256=6ry-oVPQHbipm1XSE5YsH7AY0lQFt4CFbWiHiRs1nxg,43593
29
29
  lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
30
30
  lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
31
31
  lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
@@ -35,10 +35,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
35
35
  lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
36
36
  lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
37
37
  lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
38
- lecrapaud/model_selection.py,sha256=Sv-VJaYvYjZUV2lq0vIKUxcp9PVrdm4IE18RG3RSvsw,67577
38
+ lecrapaud/model_selection.py,sha256=5Q7KJ-oKDcW5ILlFzk4MjHyywB8HlNM6txCFNofku18,67796
39
39
  lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
40
40
  lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
41
- lecrapaud-0.14.2.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
42
- lecrapaud-0.14.2.dist-info/METADATA,sha256=0wdQquOq8MZ4edbtEp4HdLdb89_N-XD4moPXZOjNizY,11047
43
- lecrapaud-0.14.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- lecrapaud-0.14.2.dist-info/RECORD,,
41
+ lecrapaud-0.14.4.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
42
+ lecrapaud-0.14.4.dist-info/METADATA,sha256=GZzcYaoTXl_lBYTUk97bJDdylGMPr7PahX-H-BqoKWg,11047
43
+ lecrapaud-0.14.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
+ lecrapaud-0.14.4.dist-info/RECORD,,