lecrapaud 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lecrapaud might be problematic. Click here for more details.

lecrapaud/api.py CHANGED
@@ -167,14 +167,10 @@ class ExperimentEngine:
167
167
  else:
168
168
  scores_reg.append(score)
169
169
 
170
- # renaming pred column and concatenating with initial data
170
+ # renaming and concatenating with initial data
171
171
  if isinstance(y_pred, pd.DataFrame):
172
- y_pred.rename(
173
- columns={"PRED": f"TARGET_{target_number}_PRED"}, inplace=True
174
- )
175
- new_data = pd.concat(
176
- [new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
177
- )
172
+ y_pred = y_pred.add_prefix(f"TARGET_{target_number}_")
173
+ new_data = pd.concat([new_data, y_pred], axis=1)
178
174
 
179
175
  else:
180
176
  y_pred.name = f"TARGET_{target_number}_PRED"
@@ -294,3 +290,8 @@ class ExperimentEngine:
294
290
 
295
291
  def get_feature_summary(self):
296
292
  return pd.read_csv(f"{self.experiment.path}/feature_summary.csv")
293
+
294
+ def get_threshold(self, target_number: int):
295
+ return joblib.load(
296
+ f"{self.experiment.path}/TARGET_{target_number}/thresholds.pkl"
297
+ )
@@ -47,7 +47,6 @@ from lecrapaud.db import (
47
47
  FeatureSelection,
48
48
  FeatureSelectionRank,
49
49
  )
50
- from lecrapaud.db.session import get_db
51
50
  from lecrapaud.search_space import all_models
52
51
 
53
52
  # Annoying Warnings
@@ -120,6 +119,7 @@ class FeatureSelectionEngine:
120
119
  target_id=target.id,
121
120
  experiment_id=self.experiment_id,
122
121
  )
122
+ feature_map = {f.name: f.id for f in Feature.get_all(limit=20000)}
123
123
 
124
124
  if feature_selection.best_features_path and os.path.exists(
125
125
  feature_selection.best_features_path
@@ -156,21 +156,30 @@ class FeatureSelectionEngine:
156
156
 
157
157
  if target_type == "classification" and self.X_categorical.shape[1] > 0:
158
158
  feat_scores = self.select_categorical_features(percentile=percentile)
159
- with get_db() as db:
160
- for row in feat_scores.itertuples(index=False):
161
- feature = Feature.find_by(name=row.features, db=db)
162
- FeatureSelectionRank.upsert(
163
- ["feature_selection_id", "feature_id", "method"],
164
- db=db,
165
- score=row.score,
166
- pvalue=row.pvalue,
167
- support=row.support,
168
- rank=row.rank,
169
- method=row.method,
170
- training_time=row.training_time,
171
- feature_selection_id=feature_selection.id,
172
- feature_id=feature.id,
173
- )
159
+ rows = []
160
+ for row in feat_scores.itertuples(index=False):
161
+ feature_id = feature_map.get(row.features)
162
+
163
+ rows.append(
164
+ {
165
+ "feature_selection_id": feature_selection.id,
166
+ "feature_id": feature_id,
167
+ "method": row.method,
168
+ "score": row.score,
169
+ "pvalue": row.pvalue,
170
+ "support": row.support,
171
+ "rank": row.rank,
172
+ "training_time": row.training_time,
173
+ }
174
+ )
175
+
176
+ if len(rows) == 0:
177
+ logger.warning(
178
+ f"No categorical features selected for TARGET_{target_number}"
179
+ )
180
+
181
+ FeatureSelectionRank.bulk_upsert(rows=rows)
182
+
174
183
  categorical_features_selected = feat_scores[feat_scores["support"]][
175
184
  "features"
176
185
  ].values.tolist()
@@ -238,30 +247,26 @@ class FeatureSelectionEngine:
238
247
 
239
248
  logger.info("Inserting feature selection results to db...")
240
249
  rows = []
241
- with get_db() as db:
242
- feature_map = {f.name: f.id for f in Feature.get_all(db=db, limit=20000)}
243
- for row in feat_scores.itertuples(index=False):
244
- feature_id = feature_map.get(row.features)
245
- if not feature_id:
246
- continue # or raise if feature must exist
247
-
248
- rows.append(
249
- {
250
- "feature_selection_id": feature_selection.id,
251
- "feature_id": feature_id,
252
- "method": row.method,
253
- "score": row.score,
254
- "pvalue": None if pd.isna(row.pvalue) else row.pvalue,
255
- "support": row.support,
256
- "rank": row.rank,
257
- "training_time": row.training_time,
258
- }
259
- )
250
+ for row in feat_scores.itertuples(index=False):
251
+ feature_id = feature_map.get(row.features)
252
+
253
+ rows.append(
254
+ {
255
+ "feature_selection_id": feature_selection.id,
256
+ "feature_id": feature_id,
257
+ "method": row.method,
258
+ "score": row.score,
259
+ "pvalue": None if pd.isna(row.pvalue) else row.pvalue,
260
+ "support": row.support,
261
+ "rank": row.rank,
262
+ "training_time": row.training_time,
263
+ }
264
+ )
260
265
 
261
- if len(rows) == 0:
262
- raise ValueError(f"No features selected for TARGET_{target_number}")
266
+ if len(rows) == 0:
267
+ logger.warning(f"No numerical features selected for TARGET_{target_number}")
263
268
 
264
- FeatureSelectionRank.bulk_upsert(rows=rows, db=db)
269
+ FeatureSelectionRank.bulk_upsert(rows=rows)
265
270
 
266
271
  # Merge the results
267
272
  logger.info("Merging feature selection methods...")
@@ -1537,6 +1537,9 @@ def apply_thresholds(
1537
1537
  pd.Series(np.where(exceeded, col, -np.inf), index=pred_proba.index)
1538
1538
  )
1539
1539
 
1540
+ # For each row:
1541
+ # 1. If any threshold is exceeded, take the class with highest probability among exceeded
1542
+ # 2. If no threshold is exceeded, take the class with highest probability overall
1540
1543
  if class_predictions:
1541
1544
  preds_df = pd.concat(class_predictions, axis=1)
1542
1545
  probs_df = pd.concat(class_probabilities, axis=1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lecrapaud
3
- Version: 0.9.4
3
+ Version: 0.10.0
4
4
  Summary: Framework for machine and deep learning, with regression, classification and time series analysis
5
5
  License: Apache License
6
6
  Author: Pierre H. Gallet
@@ -1,5 +1,5 @@
1
1
  lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
2
- lecrapaud/api.py,sha256=K1sDm7XBcHmxplWTltQMsVRJ5AOYJ_AUiOS0rtg6uH8,10542
2
+ lecrapaud/api.py,sha256=hpAVsHeOaxck2ufH0BA7IsKQXG9oA8Y_q1lvaHn6liU,10563
3
3
  lecrapaud/config.py,sha256=WJglRV6-lUfYUy5LZjwv3aO_X6ossHY9BUT7_NCSY1I,942
4
4
  lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
5
5
  lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
@@ -25,20 +25,20 @@ lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
25
25
  lecrapaud/directories.py,sha256=t4PrnJR48MpDfBOTYTyGlDVMUr39mcaj7uCPTaocBRw,725
26
26
  lecrapaud/experiment.py,sha256=FSj5RUQsRdFpiK0iSyRBLRZQMlKJLQbS52cFoAVxoMk,2553
27
27
  lecrapaud/feature_engineering.py,sha256=2Er29SxHRIdzwxcEjk-2UI-MxQNVBPdTzlTemZ8bqYg,32193
28
- lecrapaud/feature_selection.py,sha256=9I0nQrou9f3tfIj_LRHCdj_eZYNNG0W4SOIXuHpIYRQ,43519
28
+ lecrapaud/feature_selection.py,sha256=u3TWq3G5Xh3geQevGDOZEt_rl_m6-K_CR7SttFtpwKw,43409
29
29
  lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
30
30
  lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
31
31
  lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
32
32
  lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
33
33
  lecrapaud/jobs/tasks.py,sha256=jfhOCsgZlZGTnsLB_K7-Y3NgJqpzpUCFu7EfDQuIeSY,1655
34
- lecrapaud/model_selection.py,sha256=FRlW0G4qAPk4jyX_5kiCRhTIBrQuSVtmIv440NBu60c,61555
34
+ lecrapaud/model_selection.py,sha256=hKa6rQPbFBPSiQv98R89bxp-U-3Kufj9pETV0ff6KKM,61767
35
35
  lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
36
36
  lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
37
37
  lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
38
38
  lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
39
39
  lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
40
40
  lecrapaud/utils.py,sha256=Dy2uhK9cslzoqRHiIE6MdkxjNJWjmKmzGr6i7EYM28A,8106
41
- lecrapaud-0.9.4.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
42
- lecrapaud-0.9.4.dist-info/METADATA,sha256=o8RM-3rjlsiISI4xwgCdvRIped4YvpdLg14OYAY2hfA,11623
43
- lecrapaud-0.9.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
- lecrapaud-0.9.4.dist-info/RECORD,,
41
+ lecrapaud-0.10.0.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
42
+ lecrapaud-0.10.0.dist-info/METADATA,sha256=RcsNQ75VKfFbx4dSsY8UaSUwGILQqRkl1Z81pc17m_Y,11624
43
+ lecrapaud-0.10.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
44
+ lecrapaud-0.10.0.dist-info/RECORD,,