py2ls 0.2.4.14__py3-none-any.whl → 0.2.4.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ml2ls.py
CHANGED
@@ -616,10 +616,10 @@ def get_features(
|
|
616
616
|
if isinstance(y, str) and y in X.columns:
|
617
617
|
y_col_name = y
|
618
618
|
y = X[y]
|
619
|
-
y = ips.df_encoder(pd.DataFrame(y), method="
|
619
|
+
y = ips.df_encoder(pd.DataFrame(y), method="label")
|
620
620
|
X = X.drop(y_col_name, axis=1)
|
621
621
|
else:
|
622
|
-
y = ips.df_encoder(pd.DataFrame(y), method="
|
622
|
+
y = ips.df_encoder(pd.DataFrame(y), method="label").values.ravel()
|
623
623
|
y = y.loc[X.index] # Align y with X after dropping rows with missing values in X
|
624
624
|
y = y.ravel() if isinstance(y, np.ndarray) else y.values.ravel()
|
625
625
|
|
@@ -1217,142 +1217,335 @@ def validate_features(
|
|
1217
1217
|
|
1218
1218
|
# # If you want to access validation scores
|
1219
1219
|
# print(validation_results)
|
1220
|
-
def plot_validate_features(res_val):
|
1220
|
+
def plot_validate_features(res_val,is_binary=True,figsize=None):
|
1221
1221
|
"""
|
1222
1222
|
plot the results of 'validate_features()'
|
1223
1223
|
"""
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1224
|
+
if is_binary:
|
1225
|
+
colors = plot.get_color(len(ips.flatten(res_val["pr_curve"].index)))
|
1226
|
+
if res_val.shape[0] > 5:
|
1227
|
+
alpha = 0
|
1228
|
+
figsize = [8, 10] if figsize is None else figsize
|
1229
|
+
subplot_layout = [1, 2]
|
1230
|
+
ncols = 2
|
1231
|
+
bbox_to_anchor = [1.5, 0.6]
|
1232
|
+
else:
|
1233
|
+
alpha = 0.03
|
1234
|
+
figsize = [10, 6] if figsize is None else figsize
|
1235
|
+
subplot_layout = [1, 1]
|
1236
|
+
ncols = 1
|
1237
|
+
bbox_to_anchor = [1, 1]
|
1238
|
+
nexttile = plot.subplot(figsize=figsize)
|
1239
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1240
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1241
|
+
fpr = res_val["roc_curve"][model_name]["fpr"]
|
1242
|
+
tpr = res_val["roc_curve"][model_name]["tpr"]
|
1243
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"]
|
1244
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"]
|
1245
|
+
plot_roc_curve(
|
1246
|
+
fpr,
|
1247
|
+
tpr,
|
1248
|
+
mean_auc,
|
1249
|
+
lower_ci,
|
1250
|
+
upper_ci,
|
1251
|
+
model_name=model_name,
|
1252
|
+
lw=1.5,
|
1253
|
+
color=colors[i],
|
1254
|
+
alpha=alpha,
|
1255
|
+
ax=ax,
|
1256
|
+
)
|
1257
|
+
plot.figsets(
|
1258
|
+
sp=2,
|
1259
|
+
legend=dict(
|
1260
|
+
loc="upper right",
|
1261
|
+
ncols=ncols,
|
1262
|
+
fontsize=8,
|
1263
|
+
bbox_to_anchor=[1.5, 0.6],
|
1264
|
+
markerscale=0.8,
|
1265
|
+
),
|
1255
1266
|
)
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
|
1274
|
-
model_name=model_name,
|
1275
|
-
color=colors[i],
|
1276
|
-
lw=1.5,
|
1277
|
-
alpha=alpha,
|
1278
|
-
ax=ax,
|
1267
|
+
# plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
|
1268
|
+
|
1269
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1270
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1271
|
+
plot_pr_curve(
|
1272
|
+
recall=res_val["pr_curve"][model_name]["recall"],
|
1273
|
+
precision=res_val["pr_curve"][model_name]["precision"],
|
1274
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
|
1275
|
+
model_name=model_name,
|
1276
|
+
color=colors[i],
|
1277
|
+
lw=1.5,
|
1278
|
+
alpha=alpha,
|
1279
|
+
ax=ax,
|
1280
|
+
)
|
1281
|
+
plot.figsets(
|
1282
|
+
sp=2,
|
1283
|
+
legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
|
1279
1284
|
)
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
+
# plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
|
1286
|
+
else:
|
1287
|
+
colors = plot.get_color(len(ips.flatten(res_val["pr_curve"].index)))
|
1288
|
+
modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
|
1289
|
+
classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
|
1290
|
+
if res_val.shape[0] > 5:
|
1291
|
+
alpha = 0
|
1292
|
+
figsize = [8, 8*2*(len(classes))] if figsize is None else figsize
|
1293
|
+
subplot_layout = [1, 2]
|
1294
|
+
ncols = 2
|
1295
|
+
bbox_to_anchor = [1.5, 0.6]
|
1296
|
+
else:
|
1297
|
+
alpha = 0.03
|
1298
|
+
figsize = [10, 6*(len(classes))] if figsize is None else figsize
|
1299
|
+
subplot_layout = [1, 1]
|
1300
|
+
ncols = 1
|
1301
|
+
bbox_to_anchor = [1, 1]
|
1302
|
+
nexttile = plot.subplot(2*(len(classes)),2,figsize=figsize)
|
1303
|
+
for iclass, class_ in enumerate(classes):
|
1304
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1305
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1306
|
+
fpr = res_val["roc_curve"][model_name]["fpr"][class_]
|
1307
|
+
tpr = res_val["roc_curve"][model_name]["tpr"][class_]
|
1308
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"][iclass]
|
1309
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"][class_]
|
1310
|
+
plot_roc_curve(
|
1311
|
+
fpr,
|
1312
|
+
tpr,
|
1313
|
+
mean_auc,
|
1314
|
+
lower_ci,
|
1315
|
+
upper_ci,
|
1316
|
+
model_name=model_name,
|
1317
|
+
lw=1.5,
|
1318
|
+
color=colors[i],
|
1319
|
+
alpha=alpha,
|
1320
|
+
ax=ax,
|
1321
|
+
)
|
1322
|
+
plot.figsets(
|
1323
|
+
sp=2,
|
1324
|
+
title=class_,
|
1325
|
+
legend=dict(
|
1326
|
+
loc="upper right",
|
1327
|
+
ncols=ncols,
|
1328
|
+
fontsize=8,
|
1329
|
+
bbox_to_anchor=[1.5, 0.6],
|
1330
|
+
markerscale=0.8,
|
1331
|
+
),
|
1332
|
+
)
|
1333
|
+
# plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
|
1334
|
+
|
1335
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1336
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1337
|
+
plot_pr_curve(
|
1338
|
+
recall=res_val["pr_curve"][model_name]["recall"][iclass],
|
1339
|
+
precision=res_val["pr_curve"][model_name]["precision"][iclass],
|
1340
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
|
1341
|
+
model_name=model_name,
|
1342
|
+
color=colors[i],
|
1343
|
+
lw=1.5,
|
1344
|
+
alpha=alpha,
|
1345
|
+
ax=ax,
|
1346
|
+
)
|
1347
|
+
plot.figsets(
|
1348
|
+
sp=2,
|
1349
|
+
title=class_,
|
1350
|
+
legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
|
1351
|
+
)
|
1285
1352
|
|
1353
|
+
def plot_validate_features_single(res_val, figsize=None,is_binary=True):
|
1354
|
+
if is_binary:
|
1355
|
+
if figsize is None:
|
1356
|
+
nexttile = plot.subplot(len(ips.flatten(res_val["pr_curve"].index)), 3,figsize=[13,4*len(ips.flatten(res_val["pr_curve"].index))])
|
1357
|
+
else:
|
1358
|
+
nexttile = plot.subplot(
|
1359
|
+
len(ips.flatten(res_val["pr_curve"].index)), 3, figsize=figsize
|
1360
|
+
)
|
1361
|
+
for model_name in ips.flatten(res_val["pr_curve"].index):
|
1362
|
+
fpr = res_val["roc_curve"][model_name]["fpr"]
|
1363
|
+
tpr = res_val["roc_curve"][model_name]["tpr"]
|
1364
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"]
|
1365
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"]
|
1366
|
+
|
1367
|
+
# Plotting
|
1368
|
+
plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci,
|
1369
|
+
model_name=model_name, ax=nexttile())
|
1370
|
+
plot.figsets(title=model_name, sp=2)
|
1371
|
+
|
1372
|
+
plot_pr_binary(
|
1373
|
+
recall=res_val["pr_curve"][model_name]["recall"],
|
1374
|
+
precision=res_val["pr_curve"][model_name]["precision"],
|
1375
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
|
1376
|
+
model_name=model_name,
|
1377
|
+
ax=nexttile(),
|
1378
|
+
)
|
1379
|
+
plot.figsets(title=model_name, sp=2)
|
1286
1380
|
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1381
|
+
# plot cm
|
1382
|
+
plot_cm(res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False)
|
1383
|
+
plot.figsets(title=model_name, sp=2)
|
1290
1384
|
else:
|
1291
|
-
|
1292
|
-
|
1293
|
-
)
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1385
|
+
|
1386
|
+
modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
|
1387
|
+
classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
|
1388
|
+
if figsize is None:
|
1389
|
+
nexttile = plot.subplot(len(modname_tmp), 3,figsize=[15,len(modname_tmp)*5])
|
1390
|
+
else:
|
1391
|
+
nexttile = plot.subplot(len(modname_tmp), 3, figsize=figsize)
|
1392
|
+
colors = plot.get_color(len(classes))
|
1393
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1394
|
+
ax = nexttile()
|
1395
|
+
for iclass, class_ in enumerate(classes):
|
1396
|
+
fpr = res_val["roc_curve"][model_name]["fpr"][class_]
|
1397
|
+
tpr = res_val["roc_curve"][model_name]["tpr"][class_]
|
1398
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"][iclass]
|
1399
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"][class_]
|
1400
|
+
plot_roc_curve(
|
1401
|
+
fpr,
|
1402
|
+
tpr,
|
1403
|
+
mean_auc,
|
1404
|
+
lower_ci,
|
1405
|
+
upper_ci,
|
1406
|
+
model_name=class_,
|
1407
|
+
lw=1.5,
|
1408
|
+
color=colors[iclass],
|
1409
|
+
alpha=0.03,
|
1410
|
+
ax=ax,
|
1411
|
+
)
|
1412
|
+
plot.figsets(
|
1413
|
+
sp=2,
|
1414
|
+
title=model_name,
|
1415
|
+
legend=dict(
|
1416
|
+
loc="best",
|
1417
|
+
fontsize=8,
|
1418
|
+
),
|
1419
|
+
)
|
1420
|
+
|
1421
|
+
ax = nexttile()
|
1422
|
+
for iclass, class_ in enumerate(classes):
|
1423
|
+
plot_pr_curve(
|
1424
|
+
recall=res_val["pr_curve"][model_name]["recall"][iclass],
|
1425
|
+
precision=res_val["pr_curve"][model_name]["precision"][iclass],
|
1426
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
|
1427
|
+
model_name=class_,
|
1428
|
+
color=colors[iclass],
|
1429
|
+
lw=1.5,
|
1430
|
+
alpha=0.03,
|
1431
|
+
ax=ax,
|
1432
|
+
)
|
1433
|
+
plot.figsets(
|
1434
|
+
sp=2,
|
1435
|
+
title=class_,
|
1436
|
+
legend=dict(loc="best", fontsize=8),
|
1437
|
+
)
|
1438
|
+
|
1439
|
+
plot_cm(res_val["confusion_matrix"][model_name],labels_name=classes, ax=nexttile(), normalize=False)
|
1440
|
+
plot.figsets(title=model_name, sp=2)
|
1313
1441
|
|
1314
|
-
# plot cm
|
1315
|
-
plot_cm(res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False)
|
1316
|
-
plot.figsets(title=model_name, sp=2)
|
1317
1442
|
|
1443
|
+
def cal_precision_recall(
|
1444
|
+
y_true, y_pred_proba, is_binary=True):
|
1445
|
+
if is_binary:
|
1446
|
+
precision_, recall_, _ = precision_recall_curve(y_true, y_pred_proba)
|
1447
|
+
avg_precision_ = average_precision_score(y_true, y_pred_proba)
|
1448
|
+
return precision_, recall_,avg_precision_
|
1449
|
+
else:
|
1450
|
+
n_classes = y_pred_proba.shape[1] # Number of classes
|
1451
|
+
precision_ = []
|
1452
|
+
recall_ = []
|
1453
|
+
|
1454
|
+
# One-vs-rest approach for multi-class precision-recall curve
|
1455
|
+
for class_idx in range(n_classes):
|
1456
|
+
precision, recall, _ = precision_recall_curve(
|
1457
|
+
(y_true == class_idx).astype(int), # Binarize true labels for the current class
|
1458
|
+
y_pred_proba[:, class_idx], # Probabilities for the current class
|
1459
|
+
)
|
1318
1460
|
|
1461
|
+
precision_.append(precision)
|
1462
|
+
recall_.append(recall)
|
1463
|
+
# Optionally, you can compute average precision for each class
|
1464
|
+
avg_precision_ = []
|
1465
|
+
for class_idx in range(n_classes):
|
1466
|
+
avg_precision = average_precision_score(
|
1467
|
+
(y_true == class_idx).astype(int), # Binarize true labels for the current class
|
1468
|
+
y_pred_proba[:, class_idx], # Probabilities for the current class
|
1469
|
+
)
|
1470
|
+
avg_precision_.append(avg_precision)
|
1471
|
+
return precision_, recall_,avg_precision_
|
1472
|
+
|
1319
1473
|
def cal_auc_ci(
|
1320
|
-
y_true, y_pred, n_bootstraps=1000, ci=0.95, random_state=1, verbose=True
|
1474
|
+
y_true, y_pred, n_bootstraps=1000, ci=0.95, random_state=1,is_binary=True, verbose=True
|
1321
1475
|
):
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1476
|
+
if is_binary:
|
1477
|
+
y_true = np.asarray(y_true)
|
1478
|
+
y_pred = np.asarray(y_pred)
|
1479
|
+
bootstrapped_scores = []
|
1480
|
+
if verbose:
|
1481
|
+
print("auroc score:", roc_auc_score(y_true, y_pred))
|
1482
|
+
rng = np.random.RandomState(random_state)
|
1483
|
+
for i in range(n_bootstraps):
|
1484
|
+
# bootstrap by sampling with replacement on the prediction indices
|
1485
|
+
indices = rng.randint(0, len(y_pred), len(y_pred))
|
1486
|
+
if len(np.unique(y_true[indices])) < 2:
|
1487
|
+
# We need at least one positive and one negative sample for ROC AUC
|
1488
|
+
# to be defined: reject the sample
|
1489
|
+
continue
|
1490
|
+
if isinstance(y_true, np.ndarray):
|
1491
|
+
score = roc_auc_score(y_true[indices], y_pred[indices])
|
1492
|
+
else:
|
1493
|
+
score = roc_auc_score(y_true.iloc[indices], y_pred.iloc[indices])
|
1494
|
+
bootstrapped_scores.append(score)
|
1495
|
+
# print("Bootstrap #{} ROC area: {:0.3f}".format(i + 1, score))
|
1496
|
+
sorted_scores = np.array(bootstrapped_scores)
|
1497
|
+
sorted_scores.sort()
|
1498
|
+
|
1499
|
+
# Computing the lower and upper bound of the 90% confidence interval
|
1500
|
+
# You can change the bounds percentiles to 0.025 and 0.975 to get
|
1501
|
+
# a 95% confidence interval instead.
|
1502
|
+
confidence_lower = sorted_scores[int((1 - ci) * len(sorted_scores))]
|
1503
|
+
confidence_upper = sorted_scores[int(ci * len(sorted_scores))]
|
1504
|
+
if verbose:
|
1505
|
+
print(
|
1506
|
+
"Confidence interval for the score: [{:0.3f} - {:0.3}]".format(
|
1507
|
+
confidence_lower, confidence_upper
|
1508
|
+
)
|
1353
1509
|
)
|
1354
|
-
|
1355
|
-
|
1510
|
+
return confidence_lower, confidence_upper
|
1511
|
+
else:
|
1512
|
+
from sklearn.preprocessing import label_binarize
|
1513
|
+
# Multi-class classification case
|
1514
|
+
y_true = np.asarray(y_true)
|
1515
|
+
y_pred = np.asarray(y_pred)
|
1516
|
+
|
1517
|
+
# Binarize the multi-class labels for OvR computation
|
1518
|
+
y_true_bin = label_binarize(y_true, classes=np.unique(y_true)) # One-vs-Rest transformation
|
1519
|
+
n_classes = y_true_bin.shape[1] # Number of classes
|
1520
|
+
|
1521
|
+
bootstrapped_scores = np.zeros((n_classes, n_bootstraps)) # Store scores for each class
|
1522
|
+
|
1523
|
+
if verbose:
|
1524
|
+
print("AUROC scores for each class:")
|
1525
|
+
for i in range(n_classes):
|
1526
|
+
print(f"Class {i}: {roc_auc_score(y_true_bin[:, i], y_pred[:, i])}")
|
1527
|
+
|
1528
|
+
rng = np.random.RandomState(random_state)
|
1529
|
+
for i in range(n_bootstraps):
|
1530
|
+
indices = rng.randint(0, len(y_pred), len(y_pred))
|
1531
|
+
for class_idx in range(n_classes):
|
1532
|
+
if len(np.unique(y_true_bin[indices, class_idx])) < 2:
|
1533
|
+
continue # Reject if the class doesn't have both positive and negative samples
|
1534
|
+
score = roc_auc_score(y_true_bin[indices, class_idx], y_pred[indices, class_idx])
|
1535
|
+
bootstrapped_scores[class_idx, i] = score
|
1536
|
+
|
1537
|
+
# Calculating the confidence intervals for each class
|
1538
|
+
confidence_intervals = []
|
1539
|
+
for class_idx in range(n_classes):
|
1540
|
+
sorted_scores = np.sort(bootstrapped_scores[class_idx])
|
1541
|
+
confidence_lower = sorted_scores[int((1 - ci) * len(sorted_scores))]
|
1542
|
+
confidence_upper = sorted_scores[int(ci * len(sorted_scores))]
|
1543
|
+
confidence_intervals.append((confidence_lower, confidence_upper))
|
1544
|
+
|
1545
|
+
if verbose:
|
1546
|
+
print(f"Class {class_idx} - Confidence interval: [{confidence_lower:.3f} - {confidence_upper:.3f}]")
|
1547
|
+
|
1548
|
+
return confidence_intervals
|
1356
1549
|
|
1357
1550
|
|
1358
1551
|
def plot_roc_curve(
|
@@ -1517,7 +1710,7 @@ def plot_pr_binary(
|
|
1517
1710
|
|
1518
1711
|
pr_boundary = interp1d(recall, precision, kind="linear", fill_value="extrapolate")
|
1519
1712
|
for f_score in f_scores:
|
1520
|
-
x_vals = np.linspace(0.01, 1,
|
1713
|
+
x_vals = np.linspace(0.01, 1, 20000)
|
1521
1714
|
y_vals = f_score * x_vals / (2 * x_vals - f_score)
|
1522
1715
|
y_vals_clipped = np.minimum(y_vals, pr_boundary(x_vals))
|
1523
1716
|
y_vals_clipped = np.clip(y_vals_clipped, 1e-3, None) # Prevent going to zero
|
@@ -1553,7 +1746,7 @@ def plot_pr_binary(
|
|
1553
1746
|
def plot_cm(
|
1554
1747
|
cm,
|
1555
1748
|
labels_name=None,
|
1556
|
-
thresh=0.8,
|
1749
|
+
thresh=0.8, # for set color
|
1557
1750
|
axis_labels=None,
|
1558
1751
|
cmap="Reds",
|
1559
1752
|
normalize=True,
|
@@ -2029,10 +2222,16 @@ def predict(
|
|
2029
2222
|
if isinstance(y_train, str) and y_train in x_train.columns:
|
2030
2223
|
y_train_col_name = y_train
|
2031
2224
|
y_train = x_train[y_train]
|
2032
|
-
y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy")
|
2225
|
+
# y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy")
|
2033
2226
|
x_train = x_train.drop(y_train_col_name, axis=1)
|
2034
|
-
else:
|
2035
|
-
|
2227
|
+
# else:
|
2228
|
+
# y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy").values.ravel()
|
2229
|
+
y_train=pd.DataFrame(y_train)
|
2230
|
+
y_train_=ips.df_encoder(y_train, method="dummy",drop=None)
|
2231
|
+
is_binary = False if y_train_.shape[1] >2 else True
|
2232
|
+
|
2233
|
+
# if is_binary:
|
2234
|
+
# y_train = ips.df_encoder(pd.DataFrame(y_train), method="label").values.ravel()
|
2036
2235
|
|
2037
2236
|
if x_true is None:
|
2038
2237
|
x_train, x_true, y_train, y_true = train_test_split(
|
@@ -2042,23 +2241,27 @@ def predict(
|
|
2042
2241
|
random_state=random_state,
|
2043
2242
|
stratify=y_train if purpose == "classification" else None,
|
2044
2243
|
)
|
2244
|
+
|
2045
2245
|
if isinstance(y_train, str) and y_train in x_train.columns:
|
2046
2246
|
y_train_col_name = y_train
|
2047
2247
|
y_train = x_train[y_train]
|
2048
|
-
y_train = ips.df_encoder(pd.DataFrame(y_train), method="
|
2248
|
+
y_train = ips.df_encoder(pd.DataFrame(y_train), method="label") if is_binary else y_train
|
2049
2249
|
x_train = x_train.drop(y_train_col_name, axis=1)
|
2050
|
-
|
2250
|
+
if is_binary:
|
2051
2251
|
y_train = ips.df_encoder(
|
2052
|
-
pd.DataFrame(y_train), method="
|
2053
|
-
).values.ravel()
|
2252
|
+
pd.DataFrame(y_train), method="label"
|
2253
|
+
).values.ravel()
|
2254
|
+
|
2054
2255
|
if y_true is not None:
|
2055
2256
|
if isinstance(y_true, str) and y_true in x_true.columns:
|
2056
2257
|
y_true_col_name = y_true
|
2057
2258
|
y_true = x_true[y_true]
|
2058
|
-
y_true = ips.df_encoder(pd.DataFrame(y_true), method="
|
2259
|
+
y_true = ips.df_encoder(pd.DataFrame(y_true), method="label") if is_binary else y_true
|
2260
|
+
y_true = pd.DataFrame(y_true)
|
2059
2261
|
x_true = x_true.drop(y_true_col_name, axis=1)
|
2060
|
-
|
2061
|
-
y_true = ips.df_encoder(pd.DataFrame(y_true), method="
|
2262
|
+
if is_binary:
|
2263
|
+
y_true = ips.df_encoder(pd.DataFrame(y_true), method="label").values.ravel()
|
2264
|
+
y_true = pd.DataFrame(y_true)
|
2062
2265
|
|
2063
2266
|
# to convert the 2D to 1D: 2D column-vector format (like [[1], [0], [1], ...]) instead of a 1D array ([1, 0, 1, ...]
|
2064
2267
|
|
@@ -2068,7 +2271,6 @@ def predict(
|
|
2068
2271
|
y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
|
2069
2272
|
)
|
2070
2273
|
y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
|
2071
|
-
|
2072
2274
|
# Ensure common features are selected
|
2073
2275
|
if common_features is not None:
|
2074
2276
|
x_train, x_true = x_train[common_features], x_true[common_features]
|
@@ -2077,10 +2279,7 @@ def predict(
|
|
2077
2279
|
x_train, x_true = x_train[share_col_names], x_true[share_col_names]
|
2078
2280
|
|
2079
2281
|
x_train, x_true = ips.df_scaler(x_train), ips.df_scaler(x_true)
|
2080
|
-
x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(
|
2081
|
-
x_true, method="dummy"
|
2082
|
-
)
|
2083
|
-
|
2282
|
+
x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(x_true, method="dummy")
|
2084
2283
|
# Handle class imbalance using SMOTE (only for classification)
|
2085
2284
|
if (
|
2086
2285
|
smote
|
@@ -2091,7 +2290,13 @@ def predict(
|
|
2091
2290
|
|
2092
2291
|
smote_sampler = SMOTE(random_state=random_state)
|
2093
2292
|
x_train, y_train = smote_sampler.fit_resample(x_train, y_train)
|
2094
|
-
|
2293
|
+
if not is_binary:
|
2294
|
+
if isinstance(y_train, np.ndarray):
|
2295
|
+
y_train = ips.df_encoder(data=pd.DataFrame(y_train),method='label')
|
2296
|
+
y_train=np.asarray(y_train)
|
2297
|
+
if isinstance(y_train, np.ndarray):
|
2298
|
+
y_true = ips.df_encoder(data=pd.DataFrame(y_true),method='label')
|
2299
|
+
y_true=np.asarray(y_true)
|
2095
2300
|
# Hyperparameter grids for tuning
|
2096
2301
|
if cv_level in ["low", "simple", "s", "l"]:
|
2097
2302
|
param_grids = {
|
@@ -2670,95 +2875,177 @@ def predict(
|
|
2670
2875
|
print(f"\nTraining and validating {name}:")
|
2671
2876
|
|
2672
2877
|
# Grid search with KFold or StratifiedKFold
|
2673
|
-
|
2674
|
-
|
2675
|
-
|
2676
|
-
|
2677
|
-
|
2678
|
-
|
2679
|
-
|
2680
|
-
|
2681
|
-
|
2682
|
-
|
2683
|
-
gs.fit(x_train, y_train)
|
2684
|
-
best_clf = gs.best_estimator_
|
2685
|
-
# make sure x_train and x_test has the same name
|
2686
|
-
x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
|
2687
|
-
y_pred = best_clf.predict(x_true)
|
2688
|
-
|
2689
|
-
# y_pred_proba
|
2690
|
-
if hasattr(best_clf, "predict_proba"):
|
2691
|
-
y_pred_proba = best_clf.predict_proba(x_true)[:, 1]
|
2692
|
-
elif hasattr(best_clf, "decision_function"):
|
2693
|
-
# If predict_proba is not available, use decision_function (e.g., for SVM)
|
2694
|
-
y_pred_proba = best_clf.decision_function(x_true)
|
2695
|
-
# Ensure y_pred_proba is within 0 and 1 bounds
|
2696
|
-
y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
|
2697
|
-
y_pred_proba.max() - y_pred_proba.min()
|
2878
|
+
if is_binary:
|
2879
|
+
gs = GridSearchCV(
|
2880
|
+
clf,
|
2881
|
+
param_grid=param_grids.get(name, {}),
|
2882
|
+
scoring=(
|
2883
|
+
"roc_auc" if purpose == "classification" else "neg_mean_squared_error"
|
2884
|
+
),
|
2885
|
+
cv=cv,
|
2886
|
+
n_jobs=n_jobs,
|
2887
|
+
verbose=verbose,
|
2698
2888
|
)
|
2889
|
+
|
2890
|
+
gs.fit(x_train, y_train)
|
2891
|
+
best_clf = gs.best_estimator_
|
2892
|
+
# make sure x_train and x_test has the same name
|
2893
|
+
x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
|
2894
|
+
y_pred = best_clf.predict(x_true)
|
2895
|
+
if hasattr(best_clf, "predict_proba"):
|
2896
|
+
y_pred_proba = best_clf.predict_proba(x_true)[:, 1]
|
2897
|
+
elif hasattr(best_clf, "decision_function"):
|
2898
|
+
# If predict_proba is not available, use decision_function (e.g., for SVM)
|
2899
|
+
y_pred_proba = best_clf.decision_function(x_true)
|
2900
|
+
# Ensure y_pred_proba is within 0 and 1 bounds
|
2901
|
+
y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
|
2902
|
+
y_pred_proba.max() - y_pred_proba.min()
|
2903
|
+
)
|
2904
|
+
else:
|
2905
|
+
y_pred_proba = None # No probability output for certain models
|
2699
2906
|
else:
|
2700
|
-
|
2907
|
+
gs = GridSearchCV(
|
2908
|
+
clf,
|
2909
|
+
param_grid=param_grids.get(name, {}),
|
2910
|
+
scoring=(
|
2911
|
+
"roc_auc_ovr" if purpose == "classification" else "neg_mean_squared_error"
|
2912
|
+
),
|
2913
|
+
cv=cv,
|
2914
|
+
n_jobs=n_jobs,
|
2915
|
+
verbose=verbose,
|
2916
|
+
)
|
2701
2917
|
|
2918
|
+
# Fit GridSearchCV
|
2919
|
+
gs.fit(x_train, y_train)
|
2920
|
+
best_clf = gs.best_estimator_
|
2921
|
+
|
2922
|
+
# Ensure x_true aligns with x_train columns
|
2923
|
+
x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
|
2924
|
+
y_pred = best_clf.predict(x_true)
|
2925
|
+
|
2926
|
+
# Handle prediction probabilities for multiclass
|
2927
|
+
if hasattr(best_clf, "predict_proba"):
|
2928
|
+
y_pred_proba = best_clf.predict_proba(x_true)
|
2929
|
+
elif hasattr(best_clf, "decision_function"):
|
2930
|
+
y_pred_proba = best_clf.decision_function(x_true)
|
2931
|
+
|
2932
|
+
# Normalize for multiclass if necessary
|
2933
|
+
if y_pred_proba.ndim == 2:
|
2934
|
+
y_pred_proba = (y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)) / \
|
2935
|
+
(y_pred_proba.max(axis=1, keepdims=True) - y_pred_proba.min(axis=1, keepdims=True))
|
2936
|
+
else:
|
2937
|
+
y_pred_proba = None # No probability output for certain models
|
2938
|
+
|
2702
2939
|
validation_scores = {}
|
2703
|
-
|
2940
|
+
|
2941
|
+
if y_true is not None and y_pred_proba is not None:
|
2704
2942
|
validation_scores = cal_metrics(
|
2705
2943
|
y_true,
|
2706
2944
|
y_pred,
|
2707
2945
|
y_pred_proba=y_pred_proba,
|
2946
|
+
is_binary=is_binary,
|
2708
2947
|
purpose=purpose,
|
2709
2948
|
average="weighted",
|
2710
2949
|
)
|
2711
|
-
|
2712
|
-
|
2713
|
-
|
2714
|
-
|
2715
|
-
|
2716
|
-
|
2717
|
-
|
2718
|
-
|
2719
|
-
|
2720
|
-
|
2721
|
-
|
2722
|
-
|
2723
|
-
|
2724
|
-
|
2725
|
-
|
2726
|
-
|
2727
|
-
|
2728
|
-
|
2729
|
-
|
2730
|
-
|
2731
|
-
|
2732
|
-
|
2733
|
-
|
2734
|
-
|
2735
|
-
|
2736
|
-
|
2737
|
-
|
2738
|
-
|
2739
|
-
|
2740
|
-
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
|
2745
|
-
|
2746
|
-
|
2747
|
-
|
2748
|
-
|
2749
|
-
|
2750
|
-
|
2751
|
-
|
2752
|
-
|
2753
|
-
|
2754
|
-
|
2755
|
-
|
2756
|
-
|
2757
|
-
|
2758
|
-
|
2759
|
-
|
2760
|
-
|
2761
|
-
|
2950
|
+
if is_binary:
|
2951
|
+
# Calculate ROC curve
|
2952
|
+
# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
|
2953
|
+
if y_pred_proba is not None:
|
2954
|
+
# fpr, tpr, roc_auc = dict(), dict(), dict()
|
2955
|
+
fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
|
2956
|
+
lower_ci, upper_ci = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
|
2957
|
+
roc_auc = auc(fpr, tpr)
|
2958
|
+
roc_info = {
|
2959
|
+
"fpr": fpr.tolist(),
|
2960
|
+
"tpr": tpr.tolist(),
|
2961
|
+
"auc": roc_auc,
|
2962
|
+
"ci95": (lower_ci, upper_ci),
|
2963
|
+
}
|
2964
|
+
# precision-recall curve
|
2965
|
+
precision_, recall_, _ = cal_precision_recall(y_true, y_pred_proba)
|
2966
|
+
avg_precision_ = average_precision_score(y_true, y_pred_proba)
|
2967
|
+
pr_info = {
|
2968
|
+
"precision": precision_,
|
2969
|
+
"recall": recall_,
|
2970
|
+
"avg_precision": avg_precision_,
|
2971
|
+
}
|
2972
|
+
else:
|
2973
|
+
roc_info, pr_info = None, None
|
2974
|
+
if purpose == "classification":
|
2975
|
+
results[name] = {
|
2976
|
+
"best_clf": gs.best_estimator_,
|
2977
|
+
"best_params": gs.best_params_,
|
2978
|
+
"auc_indiv": [
|
2979
|
+
gs.cv_results_[f"split{i}_test_score"][gs.best_index_]
|
2980
|
+
for i in range(cv_folds)
|
2981
|
+
],
|
2982
|
+
"scores": validation_scores,
|
2983
|
+
"roc_curve": roc_info,
|
2984
|
+
"pr_curve": pr_info,
|
2985
|
+
"confusion_matrix": confusion_matrix(y_true, y_pred),
|
2986
|
+
"predictions": y_pred.tolist(),
|
2987
|
+
"predictions_proba": (
|
2988
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
2989
|
+
),
|
2990
|
+
}
|
2991
|
+
else: # "regression"
|
2992
|
+
results[name] = {
|
2993
|
+
"best_clf": gs.best_estimator_,
|
2994
|
+
"best_params": gs.best_params_,
|
2995
|
+
"scores": validation_scores, # e.g., neg_MSE, R², etc.
|
2996
|
+
"predictions": y_pred.tolist(),
|
2997
|
+
"predictions_proba": (
|
2998
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
2999
|
+
),
|
3000
|
+
}
|
3001
|
+
else: # multi-classes
|
3002
|
+
if y_pred_proba is not None:
|
3003
|
+
# fpr, tpr, roc_auc = dict(), dict(), dict()
|
3004
|
+
# fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
|
3005
|
+
confidence_intervals = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
|
3006
|
+
roc_info = {
|
3007
|
+
"fpr": validation_scores["fpr"],
|
3008
|
+
"tpr": validation_scores["tpr"],
|
3009
|
+
"auc": validation_scores["roc_auc_by_class"],
|
3010
|
+
"ci95": confidence_intervals,
|
3011
|
+
}
|
3012
|
+
# precision-recall curve
|
3013
|
+
precision_, recall_, avg_precision_ = cal_precision_recall(y_true, y_pred_proba,is_binary=is_binary)
|
3014
|
+
pr_info = {
|
3015
|
+
"precision": precision_,
|
3016
|
+
"recall": recall_,
|
3017
|
+
"avg_precision": avg_precision_,
|
3018
|
+
}
|
3019
|
+
else:
|
3020
|
+
roc_info, pr_info = None, None
|
3021
|
+
|
3022
|
+
if purpose == "classification":
|
3023
|
+
results[name] = {
|
3024
|
+
"best_clf": gs.best_estimator_,
|
3025
|
+
"best_params": gs.best_params_,
|
3026
|
+
"auc_indiv": [
|
3027
|
+
gs.cv_results_[f"split{i}_test_score"][gs.best_index_]
|
3028
|
+
for i in range(cv_folds)
|
3029
|
+
],
|
3030
|
+
"scores": validation_scores,
|
3031
|
+
"roc_curve": roc_info,
|
3032
|
+
"pr_curve": pr_info,
|
3033
|
+
"confusion_matrix": confusion_matrix(y_true, y_pred),
|
3034
|
+
"predictions": y_pred.tolist(),
|
3035
|
+
"predictions_proba": (
|
3036
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
3037
|
+
),
|
3038
|
+
}
|
3039
|
+
else: # "regression"
|
3040
|
+
results[name] = {
|
3041
|
+
"best_clf": gs.best_estimator_,
|
3042
|
+
"best_params": gs.best_params_,
|
3043
|
+
"scores": validation_scores, # e.g., neg_MSE, R², etc.
|
3044
|
+
"predictions": y_pred.tolist(),
|
3045
|
+
"predictions_proba": (
|
3046
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
3047
|
+
),
|
3048
|
+
}
|
2762
3049
|
|
2763
3050
|
else:
|
2764
3051
|
results[name] = {
|
@@ -2773,7 +3060,6 @@ def predict(
|
|
2773
3060
|
|
2774
3061
|
# Convert results to DataFrame
|
2775
3062
|
df_results = pd.DataFrame.from_dict(results, orient="index")
|
2776
|
-
|
2777
3063
|
# sort
|
2778
3064
|
if y_true is not None and purpose == "classification":
|
2779
3065
|
df_scores = pd.DataFrame(
|
@@ -2790,26 +3076,29 @@ def predict(
|
|
2790
3076
|
plot.figsets(xangle=30)
|
2791
3077
|
if dir_save:
|
2792
3078
|
ips.figsave(dir_save + f"scores_sorted_heatmap{now_}.pdf")
|
3079
|
+
|
3080
|
+
df_scores=df_scores.select_dtypes(include=np.number)
|
3081
|
+
display(df_scores)
|
2793
3082
|
if df_scores.shape[0] > 1: # draw cluster
|
2794
3083
|
plot.heatmap(df_scores, kind="direct", cluster=True)
|
2795
3084
|
plot.figsets(xangle=30)
|
2796
3085
|
if dir_save:
|
2797
3086
|
ips.figsave(dir_save + f"scores_clus{now_}.pdf")
|
2798
3087
|
if all([plot_, y_true is not None, purpose == "classification"]):
|
2799
|
-
try:
|
2800
|
-
|
2801
|
-
|
2802
|
-
|
2803
|
-
|
2804
|
-
|
2805
|
-
|
2806
|
-
except Exception as e:
|
2807
|
-
|
3088
|
+
# try:
|
3089
|
+
if len(models) > 3:
|
3090
|
+
plot_validate_features(df_results,is_binary=is_binary)
|
3091
|
+
else:
|
3092
|
+
plot_validate_features_single(df_results, is_binary=is_binary)
|
3093
|
+
if dir_save:
|
3094
|
+
ips.figsave(dir_save + f"validate_features{now_}.pdf")
|
3095
|
+
# except Exception as e:
|
3096
|
+
# print(f"Error: 在画图的过程中出现了问题:{e}")
|
2808
3097
|
return df_results
|
2809
3098
|
|
2810
3099
|
|
2811
3100
|
def cal_metrics(
|
2812
|
-
y_true, y_pred, y_pred_proba=None, purpose="regression", average="weighted"
|
3101
|
+
y_true, y_pred, y_pred_proba=None, is_binary=True,purpose="regression", average="weighted"
|
2813
3102
|
):
|
2814
3103
|
"""
|
2815
3104
|
Calculate regression or classification metrics based on the purpose.
|
@@ -2879,16 +3168,49 @@ def cal_metrics(
|
|
2879
3168
|
}
|
2880
3169
|
|
2881
3170
|
# Confusion matrix to calculate specificity
|
2882
|
-
|
2883
|
-
|
2884
|
-
|
2885
|
-
|
3171
|
+
if is_binary:
|
3172
|
+
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
3173
|
+
# Specificity calculation
|
3174
|
+
validation_scores["specificity"] = (
|
3175
|
+
tn / (tn + fp) if (tn + fp) > 0 else 0
|
3176
|
+
)
|
3177
|
+
if y_pred_proba is not None:
|
3178
|
+
# Calculate ROC-AUC
|
3179
|
+
validation_scores["roc_auc"] = roc_auc_score(y_true, y_pred_proba)
|
3180
|
+
# PR-AUC (Precision-Recall AUC) calculation
|
3181
|
+
validation_scores["pr_auc"] = average_precision_score(y_true, y_pred_proba)
|
3182
|
+
|
3183
|
+
else: # multi-class
|
3184
|
+
from sklearn.preprocessing import label_binarize
|
3185
|
+
#* Multi-class ROC calculation
|
3186
|
+
y_pred_proba = np.asarray(y_pred_proba)
|
3187
|
+
classes = np.unique(y_true)
|
3188
|
+
y_true_bin = label_binarize(y_true, classes=classes)
|
3189
|
+
if isinstance(y_true, np.ndarray):
|
3190
|
+
y_true = ips.df_encoder(data=pd.DataFrame(y_true), method='dum',prefix='Label')
|
3191
|
+
# Initialize dictionaries to store FPR, TPR, and AUC for each class
|
3192
|
+
fpr = dict()
|
3193
|
+
tpr = dict()
|
3194
|
+
roc_auc = dict()
|
3195
|
+
for i, class_label in enumerate(classes):
|
3196
|
+
fpr[class_label], tpr[class_label], _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
|
3197
|
+
roc_auc[class_label] = auc(fpr[class_label], tpr[class_label])
|
3198
|
+
|
3199
|
+
# Store the mean ROC AUC
|
3200
|
+
try:
|
3201
|
+
validation_scores["roc_auc"] = roc_auc_score(
|
3202
|
+
y_true, y_pred_proba, multi_class="ovr", average=average
|
3203
|
+
)
|
3204
|
+
except Exception as e:
|
3205
|
+
y_pred_proba = y_pred_proba / y_pred_proba.sum(axis=1, keepdims=True)
|
3206
|
+
validation_scores["roc_auc"] = roc_auc_score(
|
3207
|
+
y_true, y_pred_proba, multi_class="ovr", average=average
|
3208
|
+
)
|
3209
|
+
|
3210
|
+
validation_scores["roc_auc_by_class"] = roc_auc # Individual class AUCs
|
3211
|
+
validation_scores["fpr"] = fpr
|
3212
|
+
validation_scores["tpr"] = tpr
|
2886
3213
|
|
2887
|
-
if y_pred_proba is not None:
|
2888
|
-
# Calculate ROC-AUC
|
2889
|
-
validation_scores["roc_auc"] = roc_auc_score(y_true, y_pred_proba)
|
2890
|
-
# PR-AUC (Precision-Recall AUC) calculation
|
2891
|
-
validation_scores["pr_auc"] = average_precision_score(y_true, y_pred_proba)
|
2892
3214
|
else:
|
2893
3215
|
raise ValueError(
|
2894
3216
|
"Invalid purpose specified. Choose 'regression' or 'classification'."
|
py2ls/translator.py
CHANGED
@@ -586,6 +586,8 @@ def replace_text(text, dict_replace=None, robust=True):
|
|
586
586
|
Returns:
|
587
587
|
str: The text after replacements have been made.
|
588
588
|
"""
|
589
|
+
if not all(text):
|
590
|
+
return ''
|
589
591
|
# Default replacements for newline and tab characters
|
590
592
|
default_replacements = {
|
591
593
|
"\a": "",
|
@@ -235,7 +235,7 @@ py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
|
|
235
235
|
py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
|
236
236
|
py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
|
237
237
|
py2ls/ips.py,sha256=O2QdLo6-vPbHvWtlVdtMA49LAn2y0CNVM27cxLbqqYA,271496
|
238
|
-
py2ls/ml2ls.py,sha256=
|
238
|
+
py2ls/ml2ls.py,sha256=LutEbrIF2KcBdz8jnbR3EZ4WTjRTuVGPvskUsuX2ZoA,128551
|
239
239
|
py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
|
240
240
|
py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
|
241
241
|
py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
|
@@ -243,8 +243,8 @@ py2ls/plot.py,sha256=X0R1KK_UTdeJazjnqTqYvP-uWu6wY8szQHyJMsDDz2s,171515
|
|
243
243
|
py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
|
244
244
|
py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
|
245
245
|
py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
|
246
|
-
py2ls/translator.py,sha256=
|
246
|
+
py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
|
247
247
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
248
|
-
py2ls-0.2.4.
|
249
|
-
py2ls-0.2.4.
|
250
|
-
py2ls-0.2.4.
|
248
|
+
py2ls-0.2.4.15.dist-info/METADATA,sha256=MbwWj3zOohusA3UxDrIgR6S3Zms5tdWbcWjw9-dA57U,20046
|
249
|
+
py2ls-0.2.4.15.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
250
|
+
py2ls-0.2.4.15.dist-info/RECORD,,
|
File without changes
|