lecrapaud 0.11.2__tar.gz → 0.11.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/PKG-INFO +1 -1
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/api.py +7 -2
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/model_selection.py +116 -31
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/utils.py +6 -1
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/pyproject.toml +1 -1
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/LICENSE +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/README.md +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/__init__.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/config.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/__init__.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/env.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic.ini +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/__init__.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/base.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/experiment.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/feature.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/feature_selection.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/feature_selection_rank.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/model.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/model_selection.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/model_training.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/score.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/models/target.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/session.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/directories.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/experiment.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/feature_engineering.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/feature_selection.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/integrations/openai_integration.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/jobs/__init__.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/jobs/config.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/jobs/scheduler.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/jobs/tasks.py +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
- {lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/search_space.py +0 -0
|
@@ -27,11 +27,11 @@ Basic Usage:
|
|
|
27
27
|
|
|
28
28
|
import joblib
|
|
29
29
|
import pandas as pd
|
|
30
|
+
import ast
|
|
30
31
|
import logging
|
|
31
32
|
import seaborn as sns
|
|
32
33
|
import numpy as np
|
|
33
34
|
import matplotlib.pyplot as plt
|
|
34
|
-
from lecrapaud.utils import logger
|
|
35
35
|
from lecrapaud.db.session import init_db
|
|
36
36
|
from lecrapaud.feature_selection import FeatureSelectionEngine, PreprocessModel
|
|
37
37
|
from lecrapaud.model_selection import (
|
|
@@ -46,6 +46,7 @@ from lecrapaud.feature_engineering import FeatureEngineeringEngine, PreprocessFe
|
|
|
46
46
|
from lecrapaud.experiment import create_experiment
|
|
47
47
|
from lecrapaud.db import Experiment
|
|
48
48
|
from lecrapaud.search_space import normalize_models_idx
|
|
49
|
+
from lecrapaud.utils import logger
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
class LeCrapaud:
|
|
@@ -344,9 +345,13 @@ class ExperimentEngine:
|
|
|
344
345
|
return pd.read_csv(f"{self.experiment.path}/feature_summary.csv")
|
|
345
346
|
|
|
346
347
|
def get_threshold(self, target_number: int):
|
|
347
|
-
|
|
348
|
+
thresholds = joblib.load(
|
|
348
349
|
f"{self.experiment.path}/TARGET_{target_number}/thresholds.pkl"
|
|
349
350
|
)
|
|
351
|
+
if isinstance(thresholds, str):
|
|
352
|
+
thresholds = ast.literal_eval(thresholds)
|
|
353
|
+
|
|
354
|
+
return thresholds
|
|
350
355
|
|
|
351
356
|
def load_model(self, target_number: int, model_name: str = None):
|
|
352
357
|
|
|
@@ -1328,46 +1328,131 @@ def load_model(target_dir: str):
|
|
|
1328
1328
|
# plots
|
|
1329
1329
|
def plot_evaluation_for_classification(prediction: dict):
|
|
1330
1330
|
"""
|
|
1331
|
-
|
|
1332
|
-
|
|
1331
|
+
Plot evaluation metrics for classification tasks (both binary and multiclass).
|
|
1332
|
+
|
|
1333
|
+
Args:
|
|
1334
|
+
prediction (pd.DataFrame): Should be a df with:
|
|
1335
|
+
- TARGET: true labels
|
|
1336
|
+
- PRED: predicted labels
|
|
1337
|
+
- For binary: column '1' or 1 for positive class probabilities
|
|
1338
|
+
- For multiclass: columns 2 onwards for class probabilities
|
|
1333
1339
|
"""
|
|
1334
1340
|
y_true = prediction["TARGET"]
|
|
1335
1341
|
y_pred = prediction["PRED"]
|
|
1336
|
-
y_pred_proba = prediction[1] if 1 in prediction.columns else prediction["1"]
|
|
1337
1342
|
|
|
1338
1343
|
# Plot confusion matrix
|
|
1339
1344
|
plot_confusion_matrix(y_true, y_pred)
|
|
1340
1345
|
|
|
1341
|
-
#
|
|
1342
|
-
|
|
1343
|
-
|
|
1346
|
+
# Determine if binary or multiclass
|
|
1347
|
+
unique_labels = np.unique(y_true)
|
|
1348
|
+
unique_labels = np.sort(unique_labels)
|
|
1349
|
+
n_classes = len(unique_labels)
|
|
1350
|
+
|
|
1351
|
+
if n_classes <= 2:
|
|
1352
|
+
# Binary classification
|
|
1353
|
+
y_pred_proba = prediction[1] if 1 in prediction.columns else prediction["1"]
|
|
1354
|
+
|
|
1355
|
+
# Compute and plot ROC curve
|
|
1356
|
+
fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
|
|
1357
|
+
roc_auc = auc(fpr, tpr)
|
|
1358
|
+
|
|
1359
|
+
plt.figure(figsize=(8, 8))
|
|
1360
|
+
plt.plot(
|
|
1361
|
+
fpr,
|
|
1362
|
+
tpr,
|
|
1363
|
+
color="darkorange",
|
|
1364
|
+
lw=2,
|
|
1365
|
+
label=f"ROC curve (area = {roc_auc:0.2f})",
|
|
1366
|
+
)
|
|
1367
|
+
plt.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--")
|
|
1368
|
+
plt.xlim([0.0, 1.0])
|
|
1369
|
+
plt.ylim([0.0, 1.05])
|
|
1370
|
+
plt.xlabel("False Positive Rate")
|
|
1371
|
+
plt.ylabel("True Positive Rate")
|
|
1372
|
+
plt.title("ROC Curve")
|
|
1373
|
+
plt.legend(loc="lower right")
|
|
1374
|
+
plt.show()
|
|
1375
|
+
|
|
1376
|
+
# Compute and plot precision-recall curve
|
|
1377
|
+
precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
|
|
1378
|
+
average_precision = average_precision_score(y_true, y_pred_proba)
|
|
1379
|
+
|
|
1380
|
+
plt.figure(figsize=(8, 8))
|
|
1381
|
+
plt.step(recall, precision, color="b", alpha=0.2, where="post")
|
|
1382
|
+
plt.fill_between(recall, precision, step="post", alpha=0.2, color="b")
|
|
1383
|
+
plt.xlabel("Recall")
|
|
1384
|
+
plt.ylabel("Precision")
|
|
1385
|
+
plt.ylim([0.0, 1.05])
|
|
1386
|
+
plt.xlim([0.0, 1.0])
|
|
1387
|
+
plt.title(f"Precision-Recall Curve: AP={average_precision:0.2f}")
|
|
1388
|
+
plt.show()
|
|
1344
1389
|
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
plt.xlabel("False Positive Rate")
|
|
1353
|
-
plt.ylabel("True Positive Rate")
|
|
1354
|
-
plt.title("ROC Curve")
|
|
1355
|
-
plt.legend(loc="lower right")
|
|
1356
|
-
plt.show()
|
|
1390
|
+
else:
|
|
1391
|
+
# Multiclass classification
|
|
1392
|
+
# Get class probabilities
|
|
1393
|
+
pred_cols = [
|
|
1394
|
+
col for col in prediction.columns if col not in ["ID", "TARGET", "PRED"]
|
|
1395
|
+
]
|
|
1396
|
+
y_pred_proba = prediction[pred_cols].values
|
|
1357
1397
|
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1398
|
+
# Compute ROC curve and ROC area for each class
|
|
1399
|
+
fpr = dict()
|
|
1400
|
+
tpr = dict()
|
|
1401
|
+
roc_auc = dict()
|
|
1402
|
+
|
|
1403
|
+
plt.figure(figsize=(10, 8))
|
|
1404
|
+
colors = plt.cm.get_cmap("tab10")(np.linspace(0, 1, n_classes))
|
|
1405
|
+
|
|
1406
|
+
for i, (label, color) in enumerate(zip(unique_labels, colors)):
|
|
1407
|
+
y_true_binary = (y_true == label).astype(int)
|
|
1408
|
+
y_score = y_pred_proba[:, i]
|
|
1409
|
+
|
|
1410
|
+
fpr[i], tpr[i], _ = roc_curve(y_true_binary, y_score)
|
|
1411
|
+
roc_auc[i] = auc(fpr[i], tpr[i])
|
|
1412
|
+
|
|
1413
|
+
plt.plot(
|
|
1414
|
+
fpr[i],
|
|
1415
|
+
tpr[i],
|
|
1416
|
+
color=color,
|
|
1417
|
+
lw=2,
|
|
1418
|
+
label=f"Class {label} (area = {roc_auc[i]:0.2f})",
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
plt.plot([0, 1], [0, 1], "k--", lw=2)
|
|
1422
|
+
plt.xlim([0.0, 1.0])
|
|
1423
|
+
plt.ylim([0.0, 1.05])
|
|
1424
|
+
plt.xlabel("False Positive Rate")
|
|
1425
|
+
plt.ylabel("True Positive Rate")
|
|
1426
|
+
plt.title("Multiclass ROC Curves (One-vs-Rest)")
|
|
1427
|
+
plt.legend(loc="lower right")
|
|
1428
|
+
plt.show()
|
|
1429
|
+
|
|
1430
|
+
# Compute PR curve for each class
|
|
1431
|
+
plt.figure(figsize=(10, 8))
|
|
1432
|
+
|
|
1433
|
+
for i, (label, color) in enumerate(zip(unique_labels, colors)):
|
|
1434
|
+
y_true_binary = (y_true == label).astype(int)
|
|
1435
|
+
y_score = y_pred_proba[:, i]
|
|
1436
|
+
|
|
1437
|
+
precision, recall, _ = precision_recall_curve(y_true_binary, y_score)
|
|
1438
|
+
average_precision = average_precision_score(y_true_binary, y_score)
|
|
1439
|
+
|
|
1440
|
+
plt.step(
|
|
1441
|
+
recall,
|
|
1442
|
+
precision,
|
|
1443
|
+
color=color,
|
|
1444
|
+
alpha=0.8,
|
|
1445
|
+
where="post",
|
|
1446
|
+
label=f"Class {label} (AP = {average_precision:0.2f})",
|
|
1447
|
+
)
|
|
1448
|
+
|
|
1449
|
+
plt.xlabel("Recall")
|
|
1450
|
+
plt.ylabel("Precision")
|
|
1451
|
+
plt.ylim([0.0, 1.05])
|
|
1452
|
+
plt.xlim([0.0, 1.0])
|
|
1453
|
+
plt.title("Multiclass Precision-Recall Curves")
|
|
1454
|
+
plt.legend(loc="lower left")
|
|
1455
|
+
plt.show()
|
|
1371
1456
|
|
|
1372
1457
|
|
|
1373
1458
|
def plot_confusion_matrix(y_true, y_pred):
|
|
@@ -9,7 +9,7 @@ from ftfy import fix_text
|
|
|
9
9
|
import unicodedata
|
|
10
10
|
import re
|
|
11
11
|
import string
|
|
12
|
-
|
|
12
|
+
import sys
|
|
13
13
|
from lecrapaud.directories import logger_dir
|
|
14
14
|
from lecrapaud.config import LOGGING_LEVEL, PYTHON_ENV, LECRAPAUD_LOCAL
|
|
15
15
|
|
|
@@ -57,6 +57,11 @@ def setup_logger():
|
|
|
57
57
|
file_handler.setLevel(log_level)
|
|
58
58
|
logger.addHandler(file_handler)
|
|
59
59
|
|
|
60
|
+
stream_handler = logging.StreamHandler(sys.stdout)
|
|
61
|
+
stream_handler.setFormatter(formatter)
|
|
62
|
+
stream_handler.setLevel(log_level)
|
|
63
|
+
logger.addHandler(stream_handler)
|
|
64
|
+
|
|
60
65
|
_LECRAPAUD_LOGGER_ALREADY_CONFIGURED = True
|
|
61
66
|
return logger
|
|
62
67
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py
RENAMED
|
File without changes
|
{lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py
RENAMED
|
File without changes
|
{lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py
RENAMED
|
File without changes
|
{lecrapaud-0.11.2 → lecrapaud-0.11.3}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|