lecrapaud 0.9.4__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lecrapaud might be problematic. Click here for more details.
- lecrapaud/api.py +8 -7
- lecrapaud/config.py +1 -0
- lecrapaud/directories.py +9 -5
- lecrapaud/feature_selection.py +43 -38
- lecrapaud/model_selection.py +3 -0
- lecrapaud/utils.py +13 -12
- {lecrapaud-0.9.4.dist-info → lecrapaud-0.10.1.dist-info}/METADATA +1 -1
- {lecrapaud-0.9.4.dist-info → lecrapaud-0.10.1.dist-info}/RECORD +10 -10
- {lecrapaud-0.9.4.dist-info → lecrapaud-0.10.1.dist-info}/LICENSE +0 -0
- {lecrapaud-0.9.4.dist-info → lecrapaud-0.10.1.dist-info}/WHEEL +0 -0
lecrapaud/api.py
CHANGED
|
@@ -167,14 +167,10 @@ class ExperimentEngine:
|
|
|
167
167
|
else:
|
|
168
168
|
scores_reg.append(score)
|
|
169
169
|
|
|
170
|
-
# renaming
|
|
170
|
+
# renaming and concatenating with initial data
|
|
171
171
|
if isinstance(y_pred, pd.DataFrame):
|
|
172
|
-
y_pred.
|
|
173
|
-
|
|
174
|
-
)
|
|
175
|
-
new_data = pd.concat(
|
|
176
|
-
[new_data, y_pred[f"TARGET_{target_number}_PRED"]], axis=1
|
|
177
|
-
)
|
|
172
|
+
y_pred = y_pred.add_prefix(f"TARGET_{target_number}_")
|
|
173
|
+
new_data = pd.concat([new_data, y_pred], axis=1)
|
|
178
174
|
|
|
179
175
|
else:
|
|
180
176
|
y_pred.name = f"TARGET_{target_number}_PRED"
|
|
@@ -294,3 +290,8 @@ class ExperimentEngine:
|
|
|
294
290
|
|
|
295
291
|
def get_feature_summary(self):
|
|
296
292
|
return pd.read_csv(f"{self.experiment.path}/feature_summary.csv")
|
|
293
|
+
|
|
294
|
+
def get_threshold(self, target_number: int):
|
|
295
|
+
return joblib.load(
|
|
296
|
+
f"{self.experiment.path}/TARGET_{target_number}/thresholds.pkl"
|
|
297
|
+
)
|
lecrapaud/config.py
CHANGED
lecrapaud/directories.py
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from config import LECRAPAUD_LOGFILE
|
|
3
4
|
|
|
4
5
|
cwd = os.getcwd()
|
|
5
|
-
tmp_dir = cwd + "/tmp"
|
|
6
|
-
logger_dir = cwd + "/log"
|
|
7
6
|
|
|
7
|
+
tmp_dir = cwd + "/tmp"
|
|
8
8
|
os.makedirs(tmp_dir, exist_ok=True)
|
|
9
|
-
|
|
9
|
+
|
|
10
|
+
logger_dir = None
|
|
11
|
+
if LECRAPAUD_LOGFILE:
|
|
12
|
+
logger_dir = os.path.join(cwd, LECRAPAUD_LOGFILE)
|
|
13
|
+
os.makedirs(logger_dir, exist_ok=True)
|
|
10
14
|
|
|
11
15
|
|
|
12
16
|
def clean_directories():
|
lecrapaud/feature_selection.py
CHANGED
|
@@ -47,7 +47,6 @@ from lecrapaud.db import (
|
|
|
47
47
|
FeatureSelection,
|
|
48
48
|
FeatureSelectionRank,
|
|
49
49
|
)
|
|
50
|
-
from lecrapaud.db.session import get_db
|
|
51
50
|
from lecrapaud.search_space import all_models
|
|
52
51
|
|
|
53
52
|
# Annoying Warnings
|
|
@@ -120,6 +119,7 @@ class FeatureSelectionEngine:
|
|
|
120
119
|
target_id=target.id,
|
|
121
120
|
experiment_id=self.experiment_id,
|
|
122
121
|
)
|
|
122
|
+
feature_map = {f.name: f.id for f in Feature.get_all(limit=20000)}
|
|
123
123
|
|
|
124
124
|
if feature_selection.best_features_path and os.path.exists(
|
|
125
125
|
feature_selection.best_features_path
|
|
@@ -156,21 +156,30 @@ class FeatureSelectionEngine:
|
|
|
156
156
|
|
|
157
157
|
if target_type == "classification" and self.X_categorical.shape[1] > 0:
|
|
158
158
|
feat_scores = self.select_categorical_features(percentile=percentile)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
159
|
+
rows = []
|
|
160
|
+
for row in feat_scores.itertuples(index=False):
|
|
161
|
+
feature_id = feature_map.get(row.features)
|
|
162
|
+
|
|
163
|
+
rows.append(
|
|
164
|
+
{
|
|
165
|
+
"feature_selection_id": feature_selection.id,
|
|
166
|
+
"feature_id": feature_id,
|
|
167
|
+
"method": row.method,
|
|
168
|
+
"score": row.score,
|
|
169
|
+
"pvalue": row.pvalue,
|
|
170
|
+
"support": row.support,
|
|
171
|
+
"rank": row.rank,
|
|
172
|
+
"training_time": row.training_time,
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if len(rows) == 0:
|
|
177
|
+
logger.warning(
|
|
178
|
+
f"No categorical features selected for TARGET_{target_number}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
FeatureSelectionRank.bulk_upsert(rows=rows)
|
|
182
|
+
|
|
174
183
|
categorical_features_selected = feat_scores[feat_scores["support"]][
|
|
175
184
|
"features"
|
|
176
185
|
].values.tolist()
|
|
@@ -238,30 +247,26 @@ class FeatureSelectionEngine:
|
|
|
238
247
|
|
|
239
248
|
logger.info("Inserting feature selection results to db...")
|
|
240
249
|
rows = []
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
"rank": row.rank,
|
|
257
|
-
"training_time": row.training_time,
|
|
258
|
-
}
|
|
259
|
-
)
|
|
250
|
+
for row in feat_scores.itertuples(index=False):
|
|
251
|
+
feature_id = feature_map.get(row.features)
|
|
252
|
+
|
|
253
|
+
rows.append(
|
|
254
|
+
{
|
|
255
|
+
"feature_selection_id": feature_selection.id,
|
|
256
|
+
"feature_id": feature_id,
|
|
257
|
+
"method": row.method,
|
|
258
|
+
"score": row.score,
|
|
259
|
+
"pvalue": None if pd.isna(row.pvalue) else row.pvalue,
|
|
260
|
+
"support": row.support,
|
|
261
|
+
"rank": row.rank,
|
|
262
|
+
"training_time": row.training_time,
|
|
263
|
+
}
|
|
264
|
+
)
|
|
260
265
|
|
|
261
|
-
|
|
262
|
-
|
|
266
|
+
if len(rows) == 0:
|
|
267
|
+
logger.warning(f"No numerical features selected for TARGET_{target_number}")
|
|
263
268
|
|
|
264
|
-
|
|
269
|
+
FeatureSelectionRank.bulk_upsert(rows=rows)
|
|
265
270
|
|
|
266
271
|
# Merge the results
|
|
267
272
|
logger.info("Merging feature selection methods...")
|
lecrapaud/model_selection.py
CHANGED
|
@@ -1537,6 +1537,9 @@ def apply_thresholds(
|
|
|
1537
1537
|
pd.Series(np.where(exceeded, col, -np.inf), index=pred_proba.index)
|
|
1538
1538
|
)
|
|
1539
1539
|
|
|
1540
|
+
# For each row:
|
|
1541
|
+
# 1. If any threshold is exceeded, take the class with highest probability among exceeded
|
|
1542
|
+
# 2. If no threshold is exceeded, take the class with highest probability overall
|
|
1540
1543
|
if class_predictions:
|
|
1541
1544
|
preds_df = pd.concat(class_predictions, axis=1)
|
|
1542
1545
|
probs_df = pd.concat(class_probabilities, axis=1)
|
lecrapaud/utils.py
CHANGED
|
@@ -13,13 +13,13 @@ import string
|
|
|
13
13
|
from lecrapaud.directories import logger_dir
|
|
14
14
|
from lecrapaud.config import LOGGING_LEVEL, PYTHON_ENV
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
_LECRAPAUD_LOGGER_ALREADY_CONFIGURED = False
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def setup_logger():
|
|
20
20
|
|
|
21
|
-
global
|
|
22
|
-
if
|
|
21
|
+
global _LECRAPAUD_LOGGER_ALREADY_CONFIGURED
|
|
22
|
+
if _LECRAPAUD_LOGGER_ALREADY_CONFIGURED: # ← bail out if done before
|
|
23
23
|
|
|
24
24
|
return logging.getLogger("lecrapaud" if PYTHON_ENV != "Worker" else "")
|
|
25
25
|
|
|
@@ -47,16 +47,17 @@ def setup_logger():
|
|
|
47
47
|
"Worker": "worker.log",
|
|
48
48
|
}.get(PYTHON_ENV, "app.log")
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
50
|
+
if logger_dir:
|
|
51
|
+
file_handler = RotatingFileHandler(
|
|
52
|
+
f"{logger_dir}/{env_file}",
|
|
53
|
+
maxBytes=5 * 1024 * 1024,
|
|
54
|
+
backupCount=3,
|
|
55
|
+
)
|
|
56
|
+
file_handler.setFormatter(formatter)
|
|
57
|
+
file_handler.setLevel(log_level)
|
|
58
|
+
logger.addHandler(file_handler)
|
|
58
59
|
|
|
59
|
-
|
|
60
|
+
_LECRAPAUD_LOGGER_ALREADY_CONFIGURED = True
|
|
60
61
|
return logger
|
|
61
62
|
|
|
62
63
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
lecrapaud/__init__.py,sha256=oCxbtw_nk8rlOXbXbWo0RRMlsh6w-hTiZ6e5PRG_wp0,28
|
|
2
|
-
lecrapaud/api.py,sha256=
|
|
3
|
-
lecrapaud/config.py,sha256=
|
|
2
|
+
lecrapaud/api.py,sha256=hpAVsHeOaxck2ufH0BA7IsKQXG9oA8Y_q1lvaHn6liU,10563
|
|
3
|
+
lecrapaud/config.py,sha256=n5qYpWyNSgxhJrmiujqRPa_EN3eLjGjtXDsboi1eeCo,993
|
|
4
4
|
lecrapaud/db/__init__.py,sha256=82o9fMfaqKXPh2_rt44EzNRVZV1R4LScEnQYvj_TjK0,34
|
|
5
5
|
lecrapaud/db/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
|
|
6
6
|
lecrapaud/db/alembic/env.py,sha256=rseEi8oR_eKXYYW3UwOKiCMuDEwT4lxsT7llySOUpgk,2305
|
|
@@ -22,23 +22,23 @@ lecrapaud/db/models/model_training.py,sha256=egggSfkW8C2nTadytc5DdjU7d2VEMT6LRRZ
|
|
|
22
22
|
lecrapaud/db/models/score.py,sha256=_yaa6yBxugcOZMvLxqqIaMN7QGvzAOzOGCYQO0_gBjw,1601
|
|
23
23
|
lecrapaud/db/models/target.py,sha256=DKnfeaLU8eT8J_oh_vuFo5-o1CaoXR13xBbswme6Bgk,1649
|
|
24
24
|
lecrapaud/db/session.py,sha256=K9dTyXmO-aF_2r9RRHsDsbW9_zLNDaOcchXlpiv7cSo,2719
|
|
25
|
-
lecrapaud/directories.py,sha256=
|
|
25
|
+
lecrapaud/directories.py,sha256=svfeNjuUvxYKUQECx3qOi4XxBO3cg-bnlDq6FhNFI0Q,816
|
|
26
26
|
lecrapaud/experiment.py,sha256=FSj5RUQsRdFpiK0iSyRBLRZQMlKJLQbS52cFoAVxoMk,2553
|
|
27
27
|
lecrapaud/feature_engineering.py,sha256=2Er29SxHRIdzwxcEjk-2UI-MxQNVBPdTzlTemZ8bqYg,32193
|
|
28
|
-
lecrapaud/feature_selection.py,sha256=
|
|
28
|
+
lecrapaud/feature_selection.py,sha256=u3TWq3G5Xh3geQevGDOZEt_rl_m6-K_CR7SttFtpwKw,43409
|
|
29
29
|
lecrapaud/integrations/openai_integration.py,sha256=hHLF3fk5Bps8KNbNrEL3NUFa945jwClE6LrLpuMZOd4,7459
|
|
30
30
|
lecrapaud/jobs/__init__.py,sha256=ZkrsyTOR21c_wN7RY8jPhm8jCrL1oCEtTsf3VFIlQiE,292
|
|
31
31
|
lecrapaud/jobs/config.py,sha256=AmO0j3RFjx8H66dfKw_7vnshaOJb9Ox5BAZ9cwwLFMY,377
|
|
32
32
|
lecrapaud/jobs/scheduler.py,sha256=SiYWPxokpKnR8V6btLOO6gbK0PEjSRoeG0kCbQvYPf4,990
|
|
33
33
|
lecrapaud/jobs/tasks.py,sha256=jfhOCsgZlZGTnsLB_K7-Y3NgJqpzpUCFu7EfDQuIeSY,1655
|
|
34
|
-
lecrapaud/model_selection.py,sha256=
|
|
34
|
+
lecrapaud/model_selection.py,sha256=hKa6rQPbFBPSiQv98R89bxp-U-3Kufj9pETV0ff6KKM,61767
|
|
35
35
|
lecrapaud/search_space.py,sha256=-JkzuMhaomdwiWi4HvVQY5hiw3-oREemJA16tbwEIp4,34854
|
|
36
36
|
lecrapaud/speed_tests/test-gpu-bilstm.ipynb,sha256=4nLuZRJVe2kn6kEmauhRiz5wkWT9AVrYhI9CEk_dYUY,9608
|
|
37
37
|
lecrapaud/speed_tests/test-gpu-resnet.ipynb,sha256=27Vu7nYwujYeh3fOxBNCnKJn3MXNPKZU-U8oDDUbymg,4944
|
|
38
38
|
lecrapaud/speed_tests/test-gpu-transformers.ipynb,sha256=k6MBSs_Um1h4PykvE-LTBcdpbWLbIFST_xl_AFW2jgI,8444
|
|
39
39
|
lecrapaud/speed_tests/tests.ipynb,sha256=RjI7LDHSsbadUkea_hT14sD7ivljtIQk4NB5McXJ1bE,3835
|
|
40
|
-
lecrapaud/utils.py,sha256=
|
|
41
|
-
lecrapaud-0.
|
|
42
|
-
lecrapaud-0.
|
|
43
|
-
lecrapaud-0.
|
|
44
|
-
lecrapaud-0.
|
|
40
|
+
lecrapaud/utils.py,sha256=zM3V6WzY7XTBnbBAzk5_HKPYsH4WskjbqFwnQLG9g90,8197
|
|
41
|
+
lecrapaud-0.10.1.dist-info/LICENSE,sha256=MImCryu0AnqhJE_uAZD-PIDKXDKb8sT7v0i1NOYeHTM,11350
|
|
42
|
+
lecrapaud-0.10.1.dist-info/METADATA,sha256=Ho8NyjWAZKXC6TI2QBgfxspyGRkTjP0ywimgRP3LtcQ,11624
|
|
43
|
+
lecrapaud-0.10.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
44
|
+
lecrapaud-0.10.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|