lecrapaud 0.16.5__tar.gz → 0.16.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (45) hide show
  1. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/PKG-INFO +1 -1
  2. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/api.py +59 -0
  3. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/experiment.py +77 -0
  4. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/session.py +2 -2
  5. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/feature_engineering.py +8 -1
  6. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/model_selection.py +93 -9
  7. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/pyproject.toml +1 -1
  8. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/LICENSE +0 -0
  9. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/README.md +0 -0
  10. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/__init__.py +0 -0
  11. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/config.py +0 -0
  12. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/__init__.py +0 -0
  13. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic/README +0 -0
  14. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic/env.py +0 -0
  15. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic/script.py.mako +0 -0
  16. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  17. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  18. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  19. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
  20. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/alembic.ini +0 -0
  21. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/__init__.py +0 -0
  22. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/base.py +0 -0
  23. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/feature.py +0 -0
  24. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/feature_selection.py +0 -0
  25. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/feature_selection_rank.py +0 -0
  26. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/model.py +0 -0
  27. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/model_selection.py +0 -0
  28. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/model_training.py +0 -0
  29. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/score.py +0 -0
  30. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/target.py +0 -0
  31. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/db/models/utils.py +0 -0
  32. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/directories.py +0 -0
  33. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/experiment.py +0 -0
  34. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/feature_selection.py +0 -0
  35. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/integrations/openai_integration.py +0 -0
  36. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/jobs/__init__.py +0 -0
  37. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/jobs/config.py +0 -0
  38. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/jobs/scheduler.py +0 -0
  39. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/jobs/tasks.py +0 -0
  40. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/misc/tabpfn_tests.ipynb +0 -0
  41. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/misc/test-gpu-bilstm.ipynb +0 -0
  42. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/misc/test-gpu-resnet.ipynb +0 -0
  43. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/misc/test-gpu-transformers.ipynb +0 -0
  44. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/search_space.py +0 -0
  45. {lecrapaud-0.16.5 → lecrapaud-0.16.7}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.16.5
3
+ Version: 0.16.7
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -100,6 +100,65 @@ class LeCrapaud:
100
100
  id=Experiment.get_best_by_score(name=name, metric=metric).id, **kwargs
101
101
  )
102
102
 
103
+ def compare_experiment_scores(self, name: str):
104
+ """Compare scores of experiments with matching names.
105
+
106
+ Args:
107
+ name (str): Name or partial name of experiments to compare
108
+
109
+ Returns:
110
+ dict: Dictionary containing experiment names as keys and their scores as values
111
+ """
112
+ from lecrapaud.db import SessionLocal
113
+ from sqlalchemy.orm import joinedload
114
+
115
+ db = SessionLocal()
116
+ try:
117
+ # Get all experiments with the given name pattern
118
+ experiments = (
119
+ db.query(Experiment)
120
+ .options(joinedload(Experiment.model_selections)
121
+ .joinedload(ModelSelection.scores))
122
+ .filter(Experiment.name.ilike(f"%{name}%"))
123
+ .all()
124
+ )
125
+
126
+ if not experiments:
127
+ return {"error": f"No experiments found with name containing '{name}'"}
128
+
129
+ comparison = {}
130
+
131
+ for exp in experiments:
132
+ scores = {
133
+ "rmse": exp.avg_rmse,
134
+ "logloss": exp.avg_logloss,
135
+ "accuracy": None,
136
+ "f1": None,
137
+ "roc_auc": None
138
+ }
139
+
140
+ # Get classification metrics from the first model selection with scores
141
+ for model_sel in exp.model_selections:
142
+ if model_sel.scores:
143
+ for score in model_sel.scores:
144
+ if score.type == 'validation': # Use validation scores
145
+ if score.accuracy is not None:
146
+ scores["accuracy"] = score.accuracy
147
+ if score.f1 is not None:
148
+ scores["f1"] = score.f1
149
+ if score.roc_auc is not None:
150
+ scores["roc_auc"] = score.roc_auc
151
+ break
152
+
153
+ comparison[exp.name] = scores
154
+
155
+ return comparison
156
+
157
+ except Exception as e:
158
+ return {"error": f"Error comparing experiment scores: {str(e)}"}
159
+ finally:
160
+ db.close()
161
+
103
162
  def list_experiments(
104
163
  self, name: str = None, limit: int = 1000
105
164
  ) -> list["ExperimentEngine"]:
@@ -303,6 +303,83 @@ class Experiment(Base):
303
303
  else:
304
304
  raise ValueError("Invalid metric. Must be 'rmse', 'logloss', or 'both'.")
305
305
 
306
+ def best_score(self, target_number: int) -> dict:
307
+ """
308
+ Returns the scores for the best model of the specified target.
309
+
310
+ Args:
311
+ target_number (int): The target number to get scores for
312
+
313
+ Returns:
314
+ dict: A dictionary containing the experiment name, target number, and the best model's scores
315
+ """
316
+ # Find the target
317
+ target_name = f"TARGET_{target_number}"
318
+ target = next((t for t in self.targets if t.name == target_name), None)
319
+
320
+ if not target:
321
+ return {
322
+ 'experiment_name': self.name,
323
+ 'target_number': target_number,
324
+ 'error': f'Target {target_name} not found in this experiment',
325
+ 'scores': {}
326
+ }
327
+
328
+ # Find the best model selection for this target
329
+ best_model_selection = next(
330
+ (ms for ms in self.model_selections if ms.target_id == target.id),
331
+ None
332
+ )
333
+
334
+ if not best_model_selection or not best_model_selection.model_trainings:
335
+ return {
336
+ 'experiment_name': self.name,
337
+ 'target_number': target_number,
338
+ 'error': 'No model found for this target',
339
+ 'scores': {}
340
+ }
341
+
342
+ # Get the best model training (assuming the first one is the best)
343
+ best_training = best_model_selection.model_trainings[0]
344
+
345
+ # Get the validation score for this training
346
+ validation_scores = [s for s in best_training.score if s.type == 'validation']
347
+
348
+ if not validation_scores:
349
+ return {
350
+ 'experiment_name': self.name,
351
+ 'target_number': target_number,
352
+ 'error': 'No validation scores found for the best model',
353
+ 'scores': {}
354
+ }
355
+
356
+ # Get all available metrics from the first validation score
357
+ score = validation_scores[0]
358
+ available_metrics = [
359
+ 'rmse', 'mae', 'r2', 'logloss', 'accuracy',
360
+ 'precision', 'recall', 'f1', 'roc_auc'
361
+ ]
362
+
363
+ scores = {}
364
+ for metric in available_metrics:
365
+ value = getattr(score, metric, None)
366
+ if value is not None:
367
+ scores[metric] = value
368
+
369
+ # Get the model info
370
+ model_info = {
371
+ 'model_type': best_training.model.model_type if best_training.model else 'unknown',
372
+ 'model_name': best_training.model.name if best_training.model else 'unknown',
373
+ 'training_time_seconds': best_training.training_time
374
+ }
375
+
376
+ return {
377
+ 'experiment_name': self.name,
378
+ 'target_number': target_number,
379
+ 'model': model_info,
380
+ 'scores': scores
381
+ }
382
+
306
383
  def get_features(self, target_number: int):
307
384
  targets = [t for t in self.targets if t.name == f"TARGET_{target_number}"]
308
385
  if targets:
@@ -13,7 +13,7 @@ from lecrapaud.config import DB_USER, DB_PASSWORD, DB_HOST, DB_PORT, DB_NAME, DB
13
13
  _engine = None
14
14
  _SessionLocal = None
15
15
  if DB_URI:
16
- if "mysql://" in DB_URI:
16
+ if "mysql://" in DB_URI and "pymysql://" not in DB_URI:
17
17
  DB_URI = DB_URI.replace("mysql://", "mysql+pymysql://")
18
18
  DATABASE_URL = DB_URI
19
19
  elif DB_USER:
@@ -27,7 +27,7 @@ else:
27
27
  def init_db(uri: str = None):
28
28
  global _engine, _SessionLocal, DATABASE_URL, DB_URI
29
29
  if uri:
30
- if "mysql://" in uri:
30
+ if "mysql://" in uri and "pymysql://" not in uri:
31
31
  uri = uri.replace("mysql://", "mysql+pymysql://")
32
32
  DATABASE_URL = uri
33
33
  elif DATABASE_URL:
@@ -364,6 +364,12 @@ class PreprocessFeature:
364
364
 
365
365
  joblib.dump(pcas, f"{self.preprocessing_dir}/pcas.pkl")
366
366
 
367
+ # Save all features before encoding
368
+ joblib.dump(
369
+ list(train.columns),
370
+ f"{self.preprocessing_dir}/all_features_before_encoding.pkl",
371
+ )
372
+
367
373
  # Encoding
368
374
  train, transformer = self.encode_categorical_features(train)
369
375
  val, _ = self.encode_categorical_features(
@@ -382,7 +388,8 @@ class PreprocessFeature:
382
388
 
383
389
  # Save all features before selection
384
390
  joblib.dump(
385
- train, f"{self.preprocessing_dir}/all_features_before_selection.pkl"
391
+ list(train.columns),
392
+ f"{self.preprocessing_dir}/all_features_before_selection.pkl",
386
393
  )
387
394
 
388
395
  return train, val, test
@@ -1592,20 +1592,104 @@ def plot_evaluation_for_classification(prediction: dict):
1592
1592
 
1593
1593
 
1594
1594
  def plot_confusion_matrix(y_true, y_pred):
1595
- unique_labels = np.unique(np.concatenate((y_true, y_pred)))
1595
+ # Calculate confusion matrix
1596
1596
  cm = confusion_matrix(y_true, y_pred)
1597
1597
 
1598
- labels = np.sort(unique_labels) # Sort labels based on numerical order
1598
+ # Get unique, sorted class labels
1599
+ labels = np.unique(np.concatenate((y_true, y_pred)))
1600
+ labels = np.sort(labels)
1601
+
1602
+ # Calculate class distribution
1603
+ class_dist = np.bincount(y_true.astype(int))
1604
+ class_dist_pct = class_dist / len(y_true) * 100
1605
+
1606
+ # Create figure with two subplots stacked vertically
1607
+ fig = plt.figure(figsize=(10, 12))
1608
+
1609
+ # Subplot 1: Confusion Matrix
1610
+ ax1 = plt.subplot(2, 1, 1) # Changed to 2 rows, 1 column, first subplot
1611
+
1612
+ # Create a custom colormap (blue to white to red)
1613
+ cmap = sns.diverging_palette(220, 10, as_cmap=True)
1614
+
1615
+ # Plot heatmap with better styling
1616
+ sns.heatmap(
1617
+ cm,
1618
+ annot=True,
1619
+ fmt="d",
1620
+ cmap=cmap,
1621
+ center=0,
1622
+ linewidths=0.5,
1623
+ linecolor="lightgray",
1624
+ cbar_kws={"label": "Number of Samples"},
1625
+ ax=ax1,
1626
+ )
1599
1627
 
1600
- plt.figure(figsize=(10, 7))
1601
- sns.heatmap(cm, annot=True, fmt="d", cmap="viridis")
1602
- plt.xlabel("Predicted", fontsize=12)
1603
- plt.ylabel("True", fontsize=12)
1604
- plt.title("Confusion Matrix", fontsize=14)
1628
+ # Add title and labels with better styling
1629
+ ax1.set_title("Confusion Matrix", fontsize=14, pad=20, weight="bold")
1630
+ ax1.set_xlabel("Predicted Label", fontsize=12, labelpad=10)
1631
+ ax1.set_ylabel("True Label", fontsize=12, labelpad=10)
1632
+
1633
+ # Set tick labels to be centered and more readable
1634
+ ax1.set_xticks(np.arange(len(labels)) + 0.5)
1635
+ ax1.set_yticks(np.arange(len(labels)) + 0.5)
1636
+ ax1.set_xticklabels(labels, fontsize=10)
1637
+ ax1.set_yticklabels(labels, fontsize=10, rotation=0)
1638
+
1639
+ # Add grid lines for better readability
1640
+ ax1.set_xticks(np.arange(len(labels) + 1) - 0.5, minor=True)
1641
+ ax1.set_yticks(np.arange(len(labels) + 1) - 0.5, minor=True)
1642
+ ax1.grid(which="minor", color="w", linestyle="-", linewidth=2)
1643
+ ax1.tick_params(which="minor", bottom=False, left=False)
1644
+
1645
+ # Subplot 2: Class Distribution
1646
+ ax2 = plt.subplot(2, 1, 2) # Changed to 2 rows, 1 column, second subplot
1647
+
1648
+ # Create a bar plot for class distribution
1649
+ bars = ax2.bar(
1650
+ labels.astype(str),
1651
+ class_dist_pct,
1652
+ color=sns.color_palette("viridis", len(labels)),
1653
+ )
1605
1654
 
1606
- plt.xticks(ticks=np.arange(len(labels)), labels=labels, fontsize=10)
1607
- plt.yticks(ticks=np.arange(len(labels)), labels=labels, fontsize=10)
1655
+ # Add percentage labels on top of bars
1656
+ for bar in bars:
1657
+ height = bar.get_height()
1658
+ ax2.text(
1659
+ bar.get_x() + bar.get_width() / 2.0,
1660
+ height + 1,
1661
+ f"{height:.1f}%",
1662
+ ha="center",
1663
+ va="bottom",
1664
+ fontsize=10,
1665
+ )
1666
+
1667
+ # Add title and labels
1668
+ ax2.set_title("Class Distribution", fontsize=14, pad=20, weight="bold")
1669
+ ax2.set_xlabel("Class", fontsize=12, labelpad=10)
1670
+ ax2.set_ylabel("Percentage of Total Samples", fontsize=12, labelpad=10)
1671
+ ax2.set_ylim(0, 100)
1672
+ ax2.grid(axis="y", linestyle="--", alpha=0.7)
1673
+
1674
+ # Add total count annotation
1675
+ total = len(y_true)
1676
+ ax2.text(
1677
+ 0.5,
1678
+ -0.15, # Adjusted y-position for better spacing
1679
+ f"Total samples: {total:,}",
1680
+ transform=ax2.transAxes,
1681
+ ha="center",
1682
+ fontsize=10,
1683
+ bbox=dict(
1684
+ facecolor="white",
1685
+ alpha=0.8,
1686
+ edgecolor="lightgray",
1687
+ boxstyle="round,pad=0.5",
1688
+ ),
1689
+ )
1608
1690
 
1691
+ # Adjust layout to prevent overlap with more vertical space
1692
+ plt.tight_layout(rect=[0, 0.03, 1, 0.98])
1609
1693
  plt.show()
1610
1694
 
1611
1695
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "0.16.5"
3
+ version = "0.16.7"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
File without changes
File without changes