lecrapaud 0.9.3__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

Files changed (44) hide show
  1. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/PKG-INFO +1 -1
  2. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/api.py +8 -7
  3. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/experiment.py +3 -1
  4. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/feature_selection.py +43 -38
  5. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/model_selection.py +3 -0
  6. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/pyproject.toml +1 -1
  7. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/LICENSE +0 -0
  8. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/README.md +0 -0
  9. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/__init__.py +0 -0
  10. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/config.py +0 -0
  11. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/__init__.py +0 -0
  12. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic/README +0 -0
  13. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic/env.py +0 -0
  14. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic/script.py.mako +0 -0
  15. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic/versions/2025_06_23_1748-f089dfb7e3ba_.py +0 -0
  16. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic/versions/2025_06_24_1216-c62251b129ed_.py +0 -0
  17. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic/versions/2025_06_24_1711-86457e2f333f_.py +0 -0
  18. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic/versions/2025_06_25_1759-72aa496ca65b_.py +0 -0
  19. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/alembic.ini +0 -0
  20. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/__init__.py +0 -0
  21. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/base.py +0 -0
  22. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/experiment.py +0 -0
  23. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/feature.py +0 -0
  24. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/feature_selection.py +0 -0
  25. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/feature_selection_rank.py +0 -0
  26. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/model.py +0 -0
  27. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/model_selection.py +0 -0
  28. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/model_training.py +0 -0
  29. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/score.py +0 -0
  30. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/models/target.py +0 -0
  31. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/db/session.py +0 -0
  32. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/directories.py +0 -0
  33. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/feature_engineering.py +0 -0
  34. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/integrations/openai_integration.py +0 -0
  35. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/jobs/__init__.py +0 -0
  36. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/jobs/config.py +0 -0
  37. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/jobs/scheduler.py +0 -0
  38. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/jobs/tasks.py +0 -0
  39. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/search_space.py +0 -0
  40. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/speed_tests/test-gpu-bilstm.ipynb +0 -0
  41. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/speed_tests/test-gpu-resnet.ipynb +0 -0
  42. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/speed_tests/test-gpu-transformers.ipynb +0 -0
  43. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/speed_tests/tests.ipynb +0 -0
  44. {lecrapaud-0.9.3 → lecrapaud-0.10.0}/lecrapaud/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.9.3
3
+ Version: 0.10.0
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -167,14 +167,10 @@ class ExperimentEngine:
167
167
  else:
168
168
  scores_reg.append(score)
169
169
 
170
- # renaming pred column and concatenating with initial data
170
+ # renaming and concatenating with initial data
171
171
  if isinstance(y_pred, pd.DataFrame):
172
- y_pred.rename(
173
- columns={"PRED": f"TARGET_{target_number}_PRED"}, inplace=True
174
- )
175
- new_data = pd.concat(
176
- [new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
177
- )
172
+ y_pred = y_pred.add_prefix(f"TARGET_{target_number}_")
173
+ new_data = pd.concat([new_data, y_pred], axis=1)
178
174
 
179
175
  else:
180
176
  y_pred.name = f"TARGET_{target_number}_PRED"
@@ -294,3 +290,8 @@ class ExperimentEngine:
294
290
 
295
291
  def get_feature_summary(self):
296
292
  return pd.read_csv(f"{self.experiment.path}/feature_summary.csv")
293
+
294
+ def get_threshold(self, target_number: int):
295
+ return joblib.load(
296
+ f"{self.experiment.path}/TARGET_{target_number}/thresholds.pkl"
297
+ )
@@ -39,7 +39,9 @@ def create_experiment(
39
39
 
40
40
  with get_db() as db:
41
41
  all_targets = Target.get_all(db=db)
42
- targets = [target for target in all_targets if target.name in data.columns]
42
+ targets = [
43
+ target for target in all_targets if target.name in data.columns.str.upper()
44
+ ]
43
45
  experiment_name = f"{experiment_name}_{groups["number_of_groups"] if group_column else 'ng'}_{corr_threshold}_{percentile}_{max_features}_{dates['start_date'].date() if date_column else 'nd'}_{dates['end_date'].date() if date_column else 'nd'}"
44
46
 
45
47
  experiment_dir = f"{tmp_dir}/{experiment_name}"
@@ -47,7 +47,6 @@ from lecrapaud.db import (
47
47
  FeatureSelection,
48
48
  FeatureSelectionRank,
49
49
  )
50
- from lecrapaud.db.session import get_db
51
50
  from lecrapaud.search_space import all_models
52
51
 
53
52
  # Annoying Warnings
@@ -120,6 +119,7 @@ class FeatureSelectionEngine:
120
119
  target_id=target.id,
121
120
  experiment_id=self.experiment_id,
122
121
  )
122
+ feature_map = {f.name: f.id for f in Feature.get_all(limit=20000)}
123
123
 
124
124
  if feature_selection.best_features_path and os.path.exists(
125
125
  feature_selection.best_features_path
@@ -156,21 +156,30 @@ class FeatureSelectionEngine:
156
156
 
157
157
  if target_type == "classification" and self.X_categorical.shape[1] > 0:
158
158
  feat_scores = self.select_categorical_features(percentile=percentile)
159
- with get_db() as db:
160
- for row in feat_scores.itertuples(index=False):
161
- feature = Feature.find_by(name=row.features, db=db)
162
- FeatureSelectionRank.upsert(
163
- ["feature_selection_id", "feature_id", "method"],
164
- db=db,
165
- score=row.score,
166
- pvalue=row.pvalue,
167
- support=row.support,
168
- rank=row.rank,
169
- method=row.method,
170
- training_time=row.training_time,
171
- feature_selection_id=feature_selection.id,
172
- feature_id=feature.id,
173
- )
159
+ rows = []
160
+ for row in feat_scores.itertuples(index=False):
161
+ feature_id = feature_map.get(row.features)
162
+
163
+ rows.append(
164
+ {
165
+ "feature_selection_id": feature_selection.id,
166
+ "feature_id": feature_id,
167
+ "method": row.method,
168
+ "score": row.score,
169
+ "pvalue": row.pvalue,
170
+ "support": row.support,
171
+ "rank": row.rank,
172
+ "training_time": row.training_time,
173
+ }
174
+ )
175
+
176
+ if len(rows) == 0:
177
+ logger.warning(
178
+ f"No categorical features selected for TARGET_{target_number}"
179
+ )
180
+
181
+ FeatureSelectionRank.bulk_upsert(rows=rows)
182
+
174
183
  categorical_features_selected = feat_scores[feat_scores["support"]][
175
184
  "features"
176
185
  ].values.tolist()
@@ -238,30 +247,26 @@ class FeatureSelectionEngine:
238
247
 
239
248
  logger.info("Inserting feature selection results to db...")
240
249
  rows = []
241
- with get_db() as db:
242
- feature_map = {f.name: f.id for f in Feature.get_all(db=db, limit=20000)}
243
- for row in feat_scores.itertuples(index=False):
244
- feature_id = feature_map.get(row.features)
245
- if not feature_id:
246
- continue # or raise if feature must exist
247
-
248
- rows.append(
249
- {
250
- "feature_selection_id": feature_selection.id,
251
- "feature_id": feature_id,
252
- "method": row.method,
253
- "score": row.score,
254
- "pvalue": None if pd.isna(row.pvalue) else row.pvalue,
255
- "support": row.support,
256
- "rank": row.rank,
257
- "training_time": row.training_time,
258
- }
259
- )
250
+ for row in feat_scores.itertuples(index=False):
251
+ feature_id = feature_map.get(row.features)
252
+
253
+ rows.append(
254
+ {
255
+ "feature_selection_id": feature_selection.id,
256
+ "feature_id": feature_id,
257
+ "method": row.method,
258
+ "score": row.score,
259
+ "pvalue": None if pd.isna(row.pvalue) else row.pvalue,
260
+ "support": row.support,
261
+ "rank": row.rank,
262
+ "training_time": row.training_time,
263
+ }
264
+ )
260
265
 
261
- if len(rows) == 0:
262
- raise ValueError(f"No features selected for TARGET_{target_number}")
266
+ if len(rows) == 0:
267
+ logger.warning(f"No numerical features selected for TARGET_{target_number}")
263
268
 
264
- FeatureSelectionRank.bulk_upsert(rows=rows, db=db)
269
+ FeatureSelectionRank.bulk_upsert(rows=rows)
265
270
 
266
271
  # Merge the results
267
272
  logger.info("Merging feature selection methods...")
@@ -1537,6 +1537,9 @@ def apply_thresholds(
1537
1537
  pd.Series(np.where(exceeded, col, -np.inf), index=pred_proba.index)
1538
1538
  )
1539
1539
 
1540
+ # For each row:
1541
+ # 1. If any threshold is exceeded, take the class with highest probability among exceeded
1542
+ # 2. If no threshold is exceeded, take the class with highest probability overall
1540
1543
  if class_predictions:
1541
1544
  preds_df = pd.concat(class_predictions, axis=1)
1542
1545
  probs_df = pd.concat(class_probabilities, axis=1)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "lecrapaud"
3
- version = "0.9.3"
3
+ version = "0.10.0"
4
4
  description = "Framework for machine and deep learning, with regression, classification and time series analysis"
5
5
  authors = [
6
6
  {name = "Pierre H. Gallet"}
File without changes
File without changes
File without changes