lecrapaud 0.16.6__tar.gz → 0.16.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/PKG-INFO +1 -1
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/api.py +59 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/experiment.py +77 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/feature_engineering.py +8 -1
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/model_selection.py +93 -9
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/pyproject.toml +1 -1
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/LICENSE +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/README.md +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/__init__.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/config.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/__init__.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/README +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/env.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/script.py.mako +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic.ini +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/__init__.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/base.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/feature.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/feature_selection.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/feature_selection_rank.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/model.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/model_selection.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/model_training.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/score.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/target.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/models/utils.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/session.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/directories.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/experiment.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/feature_selection.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/integrations/openai_integration.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/jobs/__init__.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/jobs/config.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/jobs/scheduler.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/jobs/tasks.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/search_space.py +0 -0
- {lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/utils.py +0 -0
|
@@ -100,6 +100,65 @@ class LeCrapaud:
|
|
|
100
100
|
id=Experiment.get_best_by_score(name=name, metric=metric).id, **kwargs
|
|
101
101
|
)
|
|
102
102
|
|
|
103
|
+
def compare_experiment_scores(self, name: str):
|
|
104
|
+
"""Compare scores of experiments with matching names.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
name (str): Name or partial name of experiments to compare
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
dict: Dictionary containing experiment names as keys and their scores as values
|
|
111
|
+
"""
|
|
112
|
+
from lecrapaud.db import SessionLocal
|
|
113
|
+
from sqlalchemy.orm import joinedload
|
|
114
|
+
|
|
115
|
+
db = SessionLocal()
|
|
116
|
+
try:
|
|
117
|
+
# Get all experiments with the given name pattern
|
|
118
|
+
experiments = (
|
|
119
|
+
db.query(Experiment)
|
|
120
|
+
.options(joinedload(Experiment.model_selections)
|
|
121
|
+
.joinedload(ModelSelection.scores))
|
|
122
|
+
.filter(Experiment.name.ilike(f"%{name}%"))
|
|
123
|
+
.all()
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if not experiments:
|
|
127
|
+
return {"error": f"No experiments found with name containing '{name}'"}
|
|
128
|
+
|
|
129
|
+
comparison = {}
|
|
130
|
+
|
|
131
|
+
for exp in experiments:
|
|
132
|
+
scores = {
|
|
133
|
+
"rmse": exp.avg_rmse,
|
|
134
|
+
"logloss": exp.avg_logloss,
|
|
135
|
+
"accuracy": None,
|
|
136
|
+
"f1": None,
|
|
137
|
+
"roc_auc": None
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
# Get classification metrics from the first model selection with scores
|
|
141
|
+
for model_sel in exp.model_selections:
|
|
142
|
+
if model_sel.scores:
|
|
143
|
+
for score in model_sel.scores:
|
|
144
|
+
if score.type == 'validation': # Use validation scores
|
|
145
|
+
if score.accuracy is not None:
|
|
146
|
+
scores["accuracy"] = score.accuracy
|
|
147
|
+
if score.f1 is not None:
|
|
148
|
+
scores["f1"] = score.f1
|
|
149
|
+
if score.roc_auc is not None:
|
|
150
|
+
scores["roc_auc"] = score.roc_auc
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
comparison[exp.name] = scores
|
|
154
|
+
|
|
155
|
+
return comparison
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
return {"error": f"Error comparing experiment scores: {str(e)}"}
|
|
159
|
+
finally:
|
|
160
|
+
db.close()
|
|
161
|
+
|
|
103
162
|
def list_experiments(
|
|
104
163
|
self, name: str = None, limit: int = 1000
|
|
105
164
|
) -> list["ExperimentEngine"]:
|
|
@@ -303,6 +303,83 @@ class Experiment(Base):
|
|
|
303
303
|
else:
|
|
304
304
|
raise ValueError("Invalid metric. Must be 'rmse', 'logloss', or 'both'.")
|
|
305
305
|
|
|
306
|
+
def best_score(self, target_number: int) -> dict:
|
|
307
|
+
"""
|
|
308
|
+
Returns the scores for the best model of the specified target.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
target_number (int): The target number to get scores for
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
dict: A dictionary containing the experiment name, target number, and the best model's scores
|
|
315
|
+
"""
|
|
316
|
+
# Find the target
|
|
317
|
+
target_name = f"TARGET_{target_number}"
|
|
318
|
+
target = next((t for t in self.targets if t.name == target_name), None)
|
|
319
|
+
|
|
320
|
+
if not target:
|
|
321
|
+
return {
|
|
322
|
+
'experiment_name': self.name,
|
|
323
|
+
'target_number': target_number,
|
|
324
|
+
'error': f'Target {target_name} not found in this experiment',
|
|
325
|
+
'scores': {}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
# Find the best model selection for this target
|
|
329
|
+
best_model_selection = next(
|
|
330
|
+
(ms for ms in self.model_selections if ms.target_id == target.id),
|
|
331
|
+
None
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
if not best_model_selection or not best_model_selection.model_trainings:
|
|
335
|
+
return {
|
|
336
|
+
'experiment_name': self.name,
|
|
337
|
+
'target_number': target_number,
|
|
338
|
+
'error': 'No model found for this target',
|
|
339
|
+
'scores': {}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
# Get the best model training (assuming the first one is the best)
|
|
343
|
+
best_training = best_model_selection.model_trainings[0]
|
|
344
|
+
|
|
345
|
+
# Get the validation score for this training
|
|
346
|
+
validation_scores = [s for s in best_training.score if s.type == 'validation']
|
|
347
|
+
|
|
348
|
+
if not validation_scores:
|
|
349
|
+
return {
|
|
350
|
+
'experiment_name': self.name,
|
|
351
|
+
'target_number': target_number,
|
|
352
|
+
'error': 'No validation scores found for the best model',
|
|
353
|
+
'scores': {}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
# Get all available metrics from the first validation score
|
|
357
|
+
score = validation_scores[0]
|
|
358
|
+
available_metrics = [
|
|
359
|
+
'rmse', 'mae', 'r2', 'logloss', 'accuracy',
|
|
360
|
+
'precision', 'recall', 'f1', 'roc_auc'
|
|
361
|
+
]
|
|
362
|
+
|
|
363
|
+
scores = {}
|
|
364
|
+
for metric in available_metrics:
|
|
365
|
+
value = getattr(score, metric, None)
|
|
366
|
+
if value is not None:
|
|
367
|
+
scores[metric] = value
|
|
368
|
+
|
|
369
|
+
# Get the model info
|
|
370
|
+
model_info = {
|
|
371
|
+
'model_type': best_training.model.model_type if best_training.model else 'unknown',
|
|
372
|
+
'model_name': best_training.model.name if best_training.model else 'unknown',
|
|
373
|
+
'training_time_seconds': best_training.training_time
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
return {
|
|
377
|
+
'experiment_name': self.name,
|
|
378
|
+
'target_number': target_number,
|
|
379
|
+
'model': model_info,
|
|
380
|
+
'scores': scores
|
|
381
|
+
}
|
|
382
|
+
|
|
306
383
|
def get_features(self, target_number: int):
|
|
307
384
|
targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
|
|
308
385
|
if targets:
|
|
@@ -364,6 +364,12 @@ class PreprocessFeature:
|
|
|
364
364
|
|
|
365
365
|
joblib.dump(pcas, f"{self.preprocessing_dir}/pcas.pkl")
|
|
366
366
|
|
|
367
|
+
# Save all features before encoding
|
|
368
|
+
joblib.dump(
|
|
369
|
+
list(train.columns),
|
|
370
|
+
f"{self.preprocessing_dir}/all_features_before_encoding.pkl",
|
|
371
|
+
)
|
|
372
|
+
|
|
367
373
|
# Encoding
|
|
368
374
|
train, transformer = self.encode_categorical_features(train)
|
|
369
375
|
val, _ = self.encode_categorical_features(
|
|
@@ -382,7 +388,8 @@ class PreprocessFeature:
|
|
|
382
388
|
|
|
383
389
|
# Save all features before selection
|
|
384
390
|
joblib.dump(
|
|
385
|
-
train,
|
|
391
|
+
list(train.columns),
|
|
392
|
+
f"{self.preprocessing_dir}/all_features_before_selection.pkl",
|
|
386
393
|
)
|
|
387
394
|
|
|
388
395
|
return train, val, test
|
|
@@ -1592,20 +1592,104 @@ def plot_evaluation_for_classification(prediction: dict):
|
|
|
1592
1592
|
|
|
1593
1593
|
|
|
1594
1594
|
def plot_confusion_matrix(y_true, y_pred):
|
|
1595
|
-
|
|
1595
|
+
# Calculate confusion matrix
|
|
1596
1596
|
cm = confusion_matrix(y_true, y_pred)
|
|
1597
1597
|
|
|
1598
|
-
|
|
1598
|
+
# Get unique, sorted class labels
|
|
1599
|
+
labels = np.unique(np.concatenate((y_true, y_pred)))
|
|
1600
|
+
labels = np.sort(labels)
|
|
1601
|
+
|
|
1602
|
+
# Calculate class distribution
|
|
1603
|
+
class_dist = np.bincount(y_true.astype(int))
|
|
1604
|
+
class_dist_pct = class_dist / len(y_true) * 100
|
|
1605
|
+
|
|
1606
|
+
# Create figure with two subplots stacked vertically
|
|
1607
|
+
fig = plt.figure(figsize=(10, 12))
|
|
1608
|
+
|
|
1609
|
+
# Subplot 1: Confusion Matrix
|
|
1610
|
+
ax1 = plt.subplot(2, 1, 1) # Changed to 2 rows, 1 column, first subplot
|
|
1611
|
+
|
|
1612
|
+
# Create a custom colormap (blue to white to red)
|
|
1613
|
+
cmap = sns.diverging_palette(220, 10, as_cmap=True)
|
|
1614
|
+
|
|
1615
|
+
# Plot heatmap with better styling
|
|
1616
|
+
sns.heatmap(
|
|
1617
|
+
cm,
|
|
1618
|
+
annot=True,
|
|
1619
|
+
fmt="d",
|
|
1620
|
+
cmap=cmap,
|
|
1621
|
+
center=0,
|
|
1622
|
+
linewidths=0.5,
|
|
1623
|
+
linecolor="lightgray",
|
|
1624
|
+
cbar_kws={"label": "Number of Samples"},
|
|
1625
|
+
ax=ax1,
|
|
1626
|
+
)
|
|
1599
1627
|
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1628
|
+
# Add title and labels with better styling
|
|
1629
|
+
ax1.set_title("Confusion Matrix", fontsize=14, pad=20, weight="bold")
|
|
1630
|
+
ax1.set_xlabel("Predicted Label", fontsize=12, labelpad=10)
|
|
1631
|
+
ax1.set_ylabel("True Label", fontsize=12, labelpad=10)
|
|
1632
|
+
|
|
1633
|
+
# Set tick labels to be centered and more readable
|
|
1634
|
+
ax1.set_xticks(np.arange(len(labels)) + 0.5)
|
|
1635
|
+
ax1.set_yticks(np.arange(len(labels)) + 0.5)
|
|
1636
|
+
ax1.set_xticklabels(labels, fontsize=10)
|
|
1637
|
+
ax1.set_yticklabels(labels, fontsize=10, rotation=0)
|
|
1638
|
+
|
|
1639
|
+
# Add grid lines for better readability
|
|
1640
|
+
ax1.set_xticks(np.arange(len(labels) + 1) - 0.5, minor=True)
|
|
1641
|
+
ax1.set_yticks(np.arange(len(labels) + 1) - 0.5, minor=True)
|
|
1642
|
+
ax1.grid(which="minor", color="w", linestyle="-", linewidth=2)
|
|
1643
|
+
ax1.tick_params(which="minor", bottom=False, left=False)
|
|
1644
|
+
|
|
1645
|
+
# Subplot 2: Class Distribution
|
|
1646
|
+
ax2 = plt.subplot(2, 1, 2) # Changed to 2 rows, 1 column, second subplot
|
|
1647
|
+
|
|
1648
|
+
# Create a bar plot for class distribution
|
|
1649
|
+
bars = ax2.bar(
|
|
1650
|
+
labels.astype(str),
|
|
1651
|
+
class_dist_pct,
|
|
1652
|
+
color=sns.color_palette("viridis", len(labels)),
|
|
1653
|
+
)
|
|
1605
1654
|
|
|
1606
|
-
|
|
1607
|
-
|
|
1655
|
+
# Add percentage labels on top of bars
|
|
1656
|
+
for bar in bars:
|
|
1657
|
+
height = bar.get_height()
|
|
1658
|
+
ax2.text(
|
|
1659
|
+
bar.get_x() + bar.get_width() / 2.0,
|
|
1660
|
+
height + 1,
|
|
1661
|
+
f"{height:.1f}%",
|
|
1662
|
+
ha="center",
|
|
1663
|
+
va="bottom",
|
|
1664
|
+
fontsize=10,
|
|
1665
|
+
)
|
|
1666
|
+
|
|
1667
|
+
# Add title and labels
|
|
1668
|
+
ax2.set_title("Class Distribution", fontsize=14, pad=20, weight="bold")
|
|
1669
|
+
ax2.set_xlabel("Class", fontsize=12, labelpad=10)
|
|
1670
|
+
ax2.set_ylabel("Percentage of Total Samples", fontsize=12, labelpad=10)
|
|
1671
|
+
ax2.set_ylim(0, 100)
|
|
1672
|
+
ax2.grid(axis="y", linestyle="--", alpha=0.7)
|
|
1673
|
+
|
|
1674
|
+
# Add total count annotation
|
|
1675
|
+
total = len(y_true)
|
|
1676
|
+
ax2.text(
|
|
1677
|
+
0.5,
|
|
1678
|
+
-0.15, # Adjusted y-position for better spacing
|
|
1679
|
+
f"Total samples: {total:,}",
|
|
1680
|
+
transform=ax2.transAxes,
|
|
1681
|
+
ha="center",
|
|
1682
|
+
fontsize=10,
|
|
1683
|
+
bbox=dict(
|
|
1684
|
+
facecolor="white",
|
|
1685
|
+
alpha=0.8,
|
|
1686
|
+
edgecolor="lightgray",
|
|
1687
|
+
boxstyle="round,pad=0.5",
|
|
1688
|
+
),
|
|
1689
|
+
)
|
|
1608
1690
|
|
|
1691
|
+
# Adjust layout to prevent overlap with more vertical space
|
|
1692
|
+
plt.tight_layout(rect=[0, 0.03, 1, 0.98])
|
|
1609
1693
|
plt.show()
|
|
1610
1694
|
|
|
1611
1695
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py
RENAMED
|
File without changes
|
{lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py
RENAMED
|
File without changes
|
{lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py
RENAMED
|
File without changes
|
{lecrapaud-0.16.6 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|