lecrapaud 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/api.py +3 -1
- lecrapaud/experiment.py +1 -1
- lecrapaud/feature_engineering.py +5 -2
- lecrapaud/feature_selection.py +7 -0
- lecrapaud/model_selection.py +16 -6
- {lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/METADATA +1 -1
- {lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/RECORD +9 -9
- {lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/LICENSE +0 -0
- {lecrapaud-0.14.2.dist-info → lecrapaud-0.14.3.dist-info}/WHEEL +0 -0
lecrapaud/api.py
CHANGED
|
@@ -119,7 +119,9 @@ class ExperimentEngine:
|
|
|
119
119
|
os.makedirs(data_dir, exist_ok=True)
|
|
120
120
|
else:
|
|
121
121
|
if data is None:
|
|
122
|
-
raise ValueError(
|
|
122
|
+
raise ValueError(
|
|
123
|
+
"Either id or data must be provided. Data can be a path to a folder containing trained models"
|
|
124
|
+
)
|
|
123
125
|
self.experiment = create_experiment(data=data, **kwargs)
|
|
124
126
|
|
|
125
127
|
# Set all kwargs as instance attributes
|
lecrapaud/experiment.py
CHANGED
|
@@ -35,7 +35,7 @@ def create_experiment(
|
|
|
35
35
|
groups = {}
|
|
36
36
|
if group_column:
|
|
37
37
|
groups["number_of_groups"] = data[group_column].nunique()
|
|
38
|
-
groups["list_of_groups"] = data[group_column].unique().tolist()
|
|
38
|
+
groups["list_of_groups"] = data[group_column].unique().tolist().sort()
|
|
39
39
|
|
|
40
40
|
with get_db() as db:
|
|
41
41
|
all_targets = Target.get_all(db=db)
|
lecrapaud/feature_engineering.py
CHANGED
|
@@ -48,6 +48,7 @@ import pandas as pd
|
|
|
48
48
|
import numpy as np
|
|
49
49
|
from itertools import product
|
|
50
50
|
import joblib
|
|
51
|
+
import os
|
|
51
52
|
|
|
52
53
|
from sklearn.compose import ColumnTransformer
|
|
53
54
|
from sklearn.decomposition import PCA
|
|
@@ -382,9 +383,11 @@ class PreprocessFeature:
|
|
|
382
383
|
return train, val, test
|
|
383
384
|
|
|
384
385
|
def inference(self):
|
|
386
|
+
data = self.data
|
|
385
387
|
# PCA
|
|
386
|
-
|
|
387
|
-
|
|
388
|
+
if os.path.exists(f"{self.preprocessing_dir}/pcas.pkl"):
|
|
389
|
+
pcas = joblib.load(f"{self.preprocessing_dir}/pcas.pkl")
|
|
390
|
+
data, _ = self.add_pca_features(data, pcas=pcas)
|
|
388
391
|
|
|
389
392
|
# Encoding
|
|
390
393
|
transformer = joblib.load(f"{self.preprocessing_dir}/column_transformer.pkl")
|
lecrapaud/feature_selection.py
CHANGED
|
@@ -313,6 +313,13 @@ class FeatureSelectionEngine:
|
|
|
313
313
|
f"Final pre-selection: {len(features)} features below {corr_threshold}% out of {len(features_selected_list)} features, and rejected {len(features_correlated)} features, {100*len(features)/len(features_selected_list):.2f}% features selected"
|
|
314
314
|
)
|
|
315
315
|
|
|
316
|
+
pd.Series(features).to_csv(
|
|
317
|
+
f"{self.feature_selection_dir}/features.csv",
|
|
318
|
+
index=True,
|
|
319
|
+
header=True,
|
|
320
|
+
index_label="ID",
|
|
321
|
+
)
|
|
322
|
+
|
|
316
323
|
# analysis 2
|
|
317
324
|
features_selected_by_every_methods_uncorrelated = list(
|
|
318
325
|
set(features) & set(features_selected_by_every_methods)
|
lecrapaud/model_selection.py
CHANGED
|
@@ -11,6 +11,7 @@ import joblib
|
|
|
11
11
|
import glob
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
import pickle
|
|
14
|
+
from pydantic import BaseModel
|
|
14
15
|
|
|
15
16
|
os.environ["COVERAGE_FILE"] = str(Path(".coverage").resolve())
|
|
16
17
|
|
|
@@ -1585,9 +1586,20 @@ def plot_confusion_matrix(y_true, y_pred):
|
|
|
1585
1586
|
plt.show()
|
|
1586
1587
|
|
|
1587
1588
|
|
|
1589
|
+
class Threshold(BaseModel):
|
|
1590
|
+
threshold: float
|
|
1591
|
+
precision: float
|
|
1592
|
+
recall: float
|
|
1593
|
+
f1: float
|
|
1594
|
+
|
|
1595
|
+
|
|
1596
|
+
class Thresholds(BaseModel):
|
|
1597
|
+
thresholds: dict[str, Threshold]
|
|
1598
|
+
|
|
1599
|
+
|
|
1588
1600
|
def find_best_threshold(
|
|
1589
1601
|
prediction: pd.DataFrame, metric: str = "recall", target_value: float | None = None
|
|
1590
|
-
) ->
|
|
1602
|
+
) -> Thresholds:
|
|
1591
1603
|
"""
|
|
1592
1604
|
General function to find best threshold optimizing recall, precision, or f1.
|
|
1593
1605
|
|
|
@@ -1599,7 +1611,7 @@ def find_best_threshold(
|
|
|
1599
1611
|
- target_value (float | None): minimum acceptable value for the chosen metric.
|
|
1600
1612
|
|
|
1601
1613
|
Returns:
|
|
1602
|
-
-
|
|
1614
|
+
- Thresholds: {class_label: {'threshold', 'precision', 'recall', 'f1'}}
|
|
1603
1615
|
"""
|
|
1604
1616
|
assert metric in {"recall", "precision", "f1"}, "Invalid metric"
|
|
1605
1617
|
y_true = prediction["TARGET"]
|
|
@@ -1687,14 +1699,14 @@ def find_best_threshold(
|
|
|
1687
1699
|
|
|
1688
1700
|
|
|
1689
1701
|
def apply_thresholds(
|
|
1690
|
-
pred_proba: pd.DataFrame, threshold:
|
|
1702
|
+
pred_proba: pd.DataFrame, threshold: Thresholds | float, classes
|
|
1691
1703
|
) -> pd.DataFrame:
|
|
1692
1704
|
"""
|
|
1693
1705
|
Apply thresholds to predicted probabilities.
|
|
1694
1706
|
|
|
1695
1707
|
Parameters:
|
|
1696
1708
|
- pred_proba (pd.DataFrame): Probabilities per class.
|
|
1697
|
-
- threshold (
|
|
1709
|
+
- threshold (Thresholds | float): Global threshold (float) or per-class dict from `find_best_threshold`.
|
|
1698
1710
|
- classes (iterable): List or array of class labels (used for binary classification).
|
|
1699
1711
|
|
|
1700
1712
|
Returns:
|
|
@@ -1817,8 +1829,6 @@ def plot_threshold(prediction, threshold, precision, recall):
|
|
|
1817
1829
|
|
|
1818
1830
|
|
|
1819
1831
|
# OLD - to sort out
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
1832
|
def print_model_estimators(target_dir: str, model_name="linear"):
|
|
1823
1833
|
"""
|
|
1824
1834
|
Look at a specific trained model
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
|
|
2
|
-
lecrapaud/api.py,sha256=
|
|
2
|
+
lecrapaud/api.py,sha256=j3aCdg4J6DCgmBoLiOWd_b-ji2wZVeyoFfoXd07rBXQ,19751
|
|
3
3
|
lecrapaud/config.py,sha256=N8kQS1bNEXp6loIw7_X2_OjrbY4_a7UZhZcT1XgsYAs,1121
|
|
4
4
|
lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
|
|
5
5
|
lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
|
|
@@ -23,9 +23,9 @@ lecrapaud/db/models/score.py,sha256=fSfXLt6Dm-8Fy9ku0urMT5Fa6zNqn4YqVnEO4o3zKVI,
|
|
|
23
23
|
lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
|
|
24
24
|
lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
|
|
25
25
|
lecrapaud/directories.py,sha256=0LrANuDgbuneSLker60c6q2hmGnQ3mKHIztTGzTx6Gw,826
|
|
26
|
-
lecrapaud/experiment.py,sha256=
|
|
27
|
-
lecrapaud/feature_engineering.py,sha256=
|
|
28
|
-
lecrapaud/feature_selection.py,sha256=
|
|
26
|
+
lecrapaud/experiment.py,sha256=u1whBtRFEhjM2OtKwHuUAp6T_7JXZhf_90luUiV4nD8,2514
|
|
27
|
+
lecrapaud/feature_engineering.py,sha256=2V69qHmbsw9PZiZf64PpMqCL5mDMOukLeAaNsGftq9o,32421
|
|
28
|
+
lecrapaud/feature_selection.py,sha256=6ry-oVPQHbipm1XSE5YsH7AY0lQFt4CFbWiHiRs1nxg,43593
|
|
29
29
|
lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
|
|
30
30
|
lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
|
|
31
31
|
lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
|
|
@@ -35,10 +35,10 @@ lecrapaud/misc/tabpfn_tests.ipynb,sha256=VkgsCUJ30d8jaL2VaWtQAgb8ngHPNtPgnXLs7QQ
|
|
|
35
35
|
lecrapaud/misc/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
|
|
36
36
|
lecrapaud/misc/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
|
|
37
37
|
lecrapaud/misc/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
|
|
38
|
-
lecrapaud/model_selection.py,sha256=
|
|
38
|
+
lecrapaud/model_selection.py,sha256=5Q7KJ-oKDcW5ILlFzk4MjHyywB8HlNM6txCFNofku18,67796
|
|
39
39
|
lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
|
|
40
40
|
lecrapaud/utils.py,sha256=JdBB1NvbNIx4y0Una-kSZdo1_ZEocc5hwyYFIZKHmGg,8305
|
|
41
|
-
lecrapaud-0.14.
|
|
42
|
-
lecrapaud-0.14.
|
|
43
|
-
lecrapaud-0.14.
|
|
44
|
-
lecrapaud-0.14.
|
|
41
|
+
lecrapaud-0.14.3.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
|
|
42
|
+
lecrapaud-0.14.3.dist-info/METADATA,sha256=WoPUnqxj35FUK2nddUMy7zAx2DhC_DwPdK8m677Ybtc,11047
|
|
43
|
+
lecrapaud-0.14.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
44
|
+
lecrapaud-0.14.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|