py2ls 0.2.4.14__py3-none-any.whl → 0.2.4.15__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
py2ls/ml2ls.py
CHANGED
@@ -616,10 +616,10 @@ def get_features(
|
|
616
616
|
if isinstance(y, str) and y in X.columns:
|
617
617
|
y_col_name = y
|
618
618
|
y = X[y]
|
619
|
-
y = ips.df_encoder(pd.DataFrame(y), method="
|
619
|
+
y = ips.df_encoder(pd.DataFrame(y), method="label")
|
620
620
|
X = X.drop(y_col_name, axis=1)
|
621
621
|
else:
|
622
|
-
y = ips.df_encoder(pd.DataFrame(y), method="
|
622
|
+
y = ips.df_encoder(pd.DataFrame(y), method="label").values.ravel()
|
623
623
|
y = y.loc[X.index] # Align y with X after dropping rows with missing values in X
|
624
624
|
y = y.ravel() if isinstance(y, np.ndarray) else y.values.ravel()
|
625
625
|
|
@@ -1217,142 +1217,335 @@ def validate_features(
|
|
1217
1217
|
|
1218
1218
|
# # If you want to access validation scores
|
1219
1219
|
# print(validation_results)
|
1220
|
-
def plot_validate_features(res_val):
|
1220
|
+
def plot_validate_features(res_val,is_binary=True,figsize=None):
|
1221
1221
|
"""
|
1222
1222
|
plot the results of 'validate_features()'
|
1223
1223
|
"""
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1224
|
+
if is_binary:
|
1225
|
+
colors = plot.get_color(len(ips.flatten(res_val["pr_curve"].index)))
|
1226
|
+
if res_val.shape[0] > 5:
|
1227
|
+
alpha = 0
|
1228
|
+
figsize = [8, 10] if figsize is None else figsize
|
1229
|
+
subplot_layout = [1, 2]
|
1230
|
+
ncols = 2
|
1231
|
+
bbox_to_anchor = [1.5, 0.6]
|
1232
|
+
else:
|
1233
|
+
alpha = 0.03
|
1234
|
+
figsize = [10, 6] if figsize is None else figsize
|
1235
|
+
subplot_layout = [1, 1]
|
1236
|
+
ncols = 1
|
1237
|
+
bbox_to_anchor = [1, 1]
|
1238
|
+
nexttile = plot.subplot(figsize=figsize)
|
1239
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1240
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1241
|
+
fpr = res_val["roc_curve"][model_name]["fpr"]
|
1242
|
+
tpr = res_val["roc_curve"][model_name]["tpr"]
|
1243
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"]
|
1244
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"]
|
1245
|
+
plot_roc_curve(
|
1246
|
+
fpr,
|
1247
|
+
tpr,
|
1248
|
+
mean_auc,
|
1249
|
+
lower_ci,
|
1250
|
+
upper_ci,
|
1251
|
+
model_name=model_name,
|
1252
|
+
lw=1.5,
|
1253
|
+
color=colors[i],
|
1254
|
+
alpha=alpha,
|
1255
|
+
ax=ax,
|
1256
|
+
)
|
1257
|
+
plot.figsets(
|
1258
|
+
sp=2,
|
1259
|
+
legend=dict(
|
1260
|
+
loc="upper right",
|
1261
|
+
ncols=ncols,
|
1262
|
+
fontsize=8,
|
1263
|
+
bbox_to_anchor=[1.5, 0.6],
|
1264
|
+
markerscale=0.8,
|
1265
|
+
),
|
1255
1266
|
)
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
|
1274
|
-
model_name=model_name,
|
1275
|
-
color=colors[i],
|
1276
|
-
lw=1.5,
|
1277
|
-
alpha=alpha,
|
1278
|
-
ax=ax,
|
1267
|
+
# plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
|
1268
|
+
|
1269
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1270
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1271
|
+
plot_pr_curve(
|
1272
|
+
recall=res_val["pr_curve"][model_name]["recall"],
|
1273
|
+
precision=res_val["pr_curve"][model_name]["precision"],
|
1274
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
|
1275
|
+
model_name=model_name,
|
1276
|
+
color=colors[i],
|
1277
|
+
lw=1.5,
|
1278
|
+
alpha=alpha,
|
1279
|
+
ax=ax,
|
1280
|
+
)
|
1281
|
+
plot.figsets(
|
1282
|
+
sp=2,
|
1283
|
+
legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
|
1279
1284
|
)
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
+
# plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
|
1286
|
+
else:
|
1287
|
+
colors = plot.get_color(len(ips.flatten(res_val["pr_curve"].index)))
|
1288
|
+
modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
|
1289
|
+
classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
|
1290
|
+
if res_val.shape[0] > 5:
|
1291
|
+
alpha = 0
|
1292
|
+
figsize = [8, 8*2*(len(classes))] if figsize is None else figsize
|
1293
|
+
subplot_layout = [1, 2]
|
1294
|
+
ncols = 2
|
1295
|
+
bbox_to_anchor = [1.5, 0.6]
|
1296
|
+
else:
|
1297
|
+
alpha = 0.03
|
1298
|
+
figsize = [10, 6*(len(classes))] if figsize is None else figsize
|
1299
|
+
subplot_layout = [1, 1]
|
1300
|
+
ncols = 1
|
1301
|
+
bbox_to_anchor = [1, 1]
|
1302
|
+
nexttile = plot.subplot(2*(len(classes)),2,figsize=figsize)
|
1303
|
+
for iclass, class_ in enumerate(classes):
|
1304
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1305
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1306
|
+
fpr = res_val["roc_curve"][model_name]["fpr"][class_]
|
1307
|
+
tpr = res_val["roc_curve"][model_name]["tpr"][class_]
|
1308
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"][iclass]
|
1309
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"][class_]
|
1310
|
+
plot_roc_curve(
|
1311
|
+
fpr,
|
1312
|
+
tpr,
|
1313
|
+
mean_auc,
|
1314
|
+
lower_ci,
|
1315
|
+
upper_ci,
|
1316
|
+
model_name=model_name,
|
1317
|
+
lw=1.5,
|
1318
|
+
color=colors[i],
|
1319
|
+
alpha=alpha,
|
1320
|
+
ax=ax,
|
1321
|
+
)
|
1322
|
+
plot.figsets(
|
1323
|
+
sp=2,
|
1324
|
+
title=class_,
|
1325
|
+
legend=dict(
|
1326
|
+
loc="upper right",
|
1327
|
+
ncols=ncols,
|
1328
|
+
fontsize=8,
|
1329
|
+
bbox_to_anchor=[1.5, 0.6],
|
1330
|
+
markerscale=0.8,
|
1331
|
+
),
|
1332
|
+
)
|
1333
|
+
# plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
|
1334
|
+
|
1335
|
+
ax = nexttile(subplot_layout[0], subplot_layout[1])
|
1336
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1337
|
+
plot_pr_curve(
|
1338
|
+
recall=res_val["pr_curve"][model_name]["recall"][iclass],
|
1339
|
+
precision=res_val["pr_curve"][model_name]["precision"][iclass],
|
1340
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
|
1341
|
+
model_name=model_name,
|
1342
|
+
color=colors[i],
|
1343
|
+
lw=1.5,
|
1344
|
+
alpha=alpha,
|
1345
|
+
ax=ax,
|
1346
|
+
)
|
1347
|
+
plot.figsets(
|
1348
|
+
sp=2,
|
1349
|
+
title=class_,
|
1350
|
+
legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
|
1351
|
+
)
|
1285
1352
|
|
1353
|
+
def plot_validate_features_single(res_val, figsize=None,is_binary=True):
|
1354
|
+
if is_binary:
|
1355
|
+
if figsize is None:
|
1356
|
+
nexttile = plot.subplot(len(ips.flatten(res_val["pr_curve"].index)), 3,figsize=[13,4*len(ips.flatten(res_val["pr_curve"].index))])
|
1357
|
+
else:
|
1358
|
+
nexttile = plot.subplot(
|
1359
|
+
len(ips.flatten(res_val["pr_curve"].index)), 3, figsize=figsize
|
1360
|
+
)
|
1361
|
+
for model_name in ips.flatten(res_val["pr_curve"].index):
|
1362
|
+
fpr = res_val["roc_curve"][model_name]["fpr"]
|
1363
|
+
tpr = res_val["roc_curve"][model_name]["tpr"]
|
1364
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"]
|
1365
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"]
|
1366
|
+
|
1367
|
+
# Plotting
|
1368
|
+
plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci,
|
1369
|
+
model_name=model_name, ax=nexttile())
|
1370
|
+
plot.figsets(title=model_name, sp=2)
|
1371
|
+
|
1372
|
+
plot_pr_binary(
|
1373
|
+
recall=res_val["pr_curve"][model_name]["recall"],
|
1374
|
+
precision=res_val["pr_curve"][model_name]["precision"],
|
1375
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
|
1376
|
+
model_name=model_name,
|
1377
|
+
ax=nexttile(),
|
1378
|
+
)
|
1379
|
+
plot.figsets(title=model_name, sp=2)
|
1286
1380
|
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1381
|
+
# plot cm
|
1382
|
+
plot_cm(res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False)
|
1383
|
+
plot.figsets(title=model_name, sp=2)
|
1290
1384
|
else:
|
1291
|
-
|
1292
|
-
|
1293
|
-
)
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1385
|
+
|
1386
|
+
modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
|
1387
|
+
classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
|
1388
|
+
if figsize is None:
|
1389
|
+
nexttile = plot.subplot(len(modname_tmp), 3,figsize=[15,len(modname_tmp)*5])
|
1390
|
+
else:
|
1391
|
+
nexttile = plot.subplot(len(modname_tmp), 3, figsize=figsize)
|
1392
|
+
colors = plot.get_color(len(classes))
|
1393
|
+
for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
|
1394
|
+
ax = nexttile()
|
1395
|
+
for iclass, class_ in enumerate(classes):
|
1396
|
+
fpr = res_val["roc_curve"][model_name]["fpr"][class_]
|
1397
|
+
tpr = res_val["roc_curve"][model_name]["tpr"][class_]
|
1398
|
+
(lower_ci, upper_ci) = res_val["roc_curve"][model_name]["ci95"][iclass]
|
1399
|
+
mean_auc = res_val["roc_curve"][model_name]["auc"][class_]
|
1400
|
+
plot_roc_curve(
|
1401
|
+
fpr,
|
1402
|
+
tpr,
|
1403
|
+
mean_auc,
|
1404
|
+
lower_ci,
|
1405
|
+
upper_ci,
|
1406
|
+
model_name=class_,
|
1407
|
+
lw=1.5,
|
1408
|
+
color=colors[iclass],
|
1409
|
+
alpha=0.03,
|
1410
|
+
ax=ax,
|
1411
|
+
)
|
1412
|
+
plot.figsets(
|
1413
|
+
sp=2,
|
1414
|
+
title=model_name,
|
1415
|
+
legend=dict(
|
1416
|
+
loc="best",
|
1417
|
+
fontsize=8,
|
1418
|
+
),
|
1419
|
+
)
|
1420
|
+
|
1421
|
+
ax = nexttile()
|
1422
|
+
for iclass, class_ in enumerate(classes):
|
1423
|
+
plot_pr_curve(
|
1424
|
+
recall=res_val["pr_curve"][model_name]["recall"][iclass],
|
1425
|
+
precision=res_val["pr_curve"][model_name]["precision"][iclass],
|
1426
|
+
avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
|
1427
|
+
model_name=class_,
|
1428
|
+
color=colors[iclass],
|
1429
|
+
lw=1.5,
|
1430
|
+
alpha=0.03,
|
1431
|
+
ax=ax,
|
1432
|
+
)
|
1433
|
+
plot.figsets(
|
1434
|
+
sp=2,
|
1435
|
+
title=class_,
|
1436
|
+
legend=dict(loc="best", fontsize=8),
|
1437
|
+
)
|
1438
|
+
|
1439
|
+
plot_cm(res_val["confusion_matrix"][model_name],labels_name=classes, ax=nexttile(), normalize=False)
|
1440
|
+
plot.figsets(title=model_name, sp=2)
|
1313
1441
|
|
1314
|
-
# plot cm
|
1315
|
-
plot_cm(res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False)
|
1316
|
-
plot.figsets(title=model_name, sp=2)
|
1317
1442
|
|
1443
|
+
def cal_precision_recall(
|
1444
|
+
y_true, y_pred_proba, is_binary=True):
|
1445
|
+
if is_binary:
|
1446
|
+
precision_, recall_, _ = precision_recall_curve(y_true, y_pred_proba)
|
1447
|
+
avg_precision_ = average_precision_score(y_true, y_pred_proba)
|
1448
|
+
return precision_, recall_,avg_precision_
|
1449
|
+
else:
|
1450
|
+
n_classes = y_pred_proba.shape[1] # Number of classes
|
1451
|
+
precision_ = []
|
1452
|
+
recall_ = []
|
1453
|
+
|
1454
|
+
# One-vs-rest approach for multi-class precision-recall curve
|
1455
|
+
for class_idx in range(n_classes):
|
1456
|
+
precision, recall, _ = precision_recall_curve(
|
1457
|
+
(y_true == class_idx).astype(int), # Binarize true labels for the current class
|
1458
|
+
y_pred_proba[:, class_idx], # Probabilities for the current class
|
1459
|
+
)
|
1318
1460
|
|
1461
|
+
precision_.append(precision)
|
1462
|
+
recall_.append(recall)
|
1463
|
+
# Optionally, you can compute average precision for each class
|
1464
|
+
avg_precision_ = []
|
1465
|
+
for class_idx in range(n_classes):
|
1466
|
+
avg_precision = average_precision_score(
|
1467
|
+
(y_true == class_idx).astype(int), # Binarize true labels for the current class
|
1468
|
+
y_pred_proba[:, class_idx], # Probabilities for the current class
|
1469
|
+
)
|
1470
|
+
avg_precision_.append(avg_precision)
|
1471
|
+
return precision_, recall_,avg_precision_
|
1472
|
+
|
1319
1473
|
def cal_auc_ci(
|
1320
|
-
y_true, y_pred, n_bootstraps=1000, ci=0.95, random_state=1, verbose=True
|
1474
|
+
y_true, y_pred, n_bootstraps=1000, ci=0.95, random_state=1,is_binary=True, verbose=True
|
1321
1475
|
):
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1476
|
+
if is_binary:
|
1477
|
+
y_true = np.asarray(y_true)
|
1478
|
+
y_pred = np.asarray(y_pred)
|
1479
|
+
bootstrapped_scores = []
|
1480
|
+
if verbose:
|
1481
|
+
print("auroc score:", roc_auc_score(y_true, y_pred))
|
1482
|
+
rng = np.random.RandomState(random_state)
|
1483
|
+
for i in range(n_bootstraps):
|
1484
|
+
# bootstrap by sampling with replacement on the prediction indices
|
1485
|
+
indices = rng.randint(0, len(y_pred), len(y_pred))
|
1486
|
+
if len(np.unique(y_true[indices])) < 2:
|
1487
|
+
# We need at least one positive and one negative sample for ROC AUC
|
1488
|
+
# to be defined: reject the sample
|
1489
|
+
continue
|
1490
|
+
if isinstance(y_true, np.ndarray):
|
1491
|
+
score = roc_auc_score(y_true[indices], y_pred[indices])
|
1492
|
+
else:
|
1493
|
+
score = roc_auc_score(y_true.iloc[indices], y_pred.iloc[indices])
|
1494
|
+
bootstrapped_scores.append(score)
|
1495
|
+
# print("Bootstrap #{} ROC area: {:0.3f}".format(i + 1, score))
|
1496
|
+
sorted_scores = np.array(bootstrapped_scores)
|
1497
|
+
sorted_scores.sort()
|
1498
|
+
|
1499
|
+
# Computing the lower and upper bound of the 90% confidence interval
|
1500
|
+
# You can change the bounds percentiles to 0.025 and 0.975 to get
|
1501
|
+
# a 95% confidence interval instead.
|
1502
|
+
confidence_lower = sorted_scores[int((1 - ci) * len(sorted_scores))]
|
1503
|
+
confidence_upper = sorted_scores[int(ci * len(sorted_scores))]
|
1504
|
+
if verbose:
|
1505
|
+
print(
|
1506
|
+
"Confidence interval for the score: [{:0.3f} - {:0.3}]".format(
|
1507
|
+
confidence_lower, confidence_upper
|
1508
|
+
)
|
1353
1509
|
)
|
1354
|
-
|
1355
|
-
|
1510
|
+
return confidence_lower, confidence_upper
|
1511
|
+
else:
|
1512
|
+
from sklearn.preprocessing import label_binarize
|
1513
|
+
# Multi-class classification case
|
1514
|
+
y_true = np.asarray(y_true)
|
1515
|
+
y_pred = np.asarray(y_pred)
|
1516
|
+
|
1517
|
+
# Binarize the multi-class labels for OvR computation
|
1518
|
+
y_true_bin = label_binarize(y_true, classes=np.unique(y_true)) # One-vs-Rest transformation
|
1519
|
+
n_classes = y_true_bin.shape[1] # Number of classes
|
1520
|
+
|
1521
|
+
bootstrapped_scores = np.zeros((n_classes, n_bootstraps)) # Store scores for each class
|
1522
|
+
|
1523
|
+
if verbose:
|
1524
|
+
print("AUROC scores for each class:")
|
1525
|
+
for i in range(n_classes):
|
1526
|
+
print(f"Class {i}: {roc_auc_score(y_true_bin[:, i], y_pred[:, i])}")
|
1527
|
+
|
1528
|
+
rng = np.random.RandomState(random_state)
|
1529
|
+
for i in range(n_bootstraps):
|
1530
|
+
indices = rng.randint(0, len(y_pred), len(y_pred))
|
1531
|
+
for class_idx in range(n_classes):
|
1532
|
+
if len(np.unique(y_true_bin[indices, class_idx])) < 2:
|
1533
|
+
continue # Reject if the class doesn't have both positive and negative samples
|
1534
|
+
score = roc_auc_score(y_true_bin[indices, class_idx], y_pred[indices, class_idx])
|
1535
|
+
bootstrapped_scores[class_idx, i] = score
|
1536
|
+
|
1537
|
+
# Calculating the confidence intervals for each class
|
1538
|
+
confidence_intervals = []
|
1539
|
+
for class_idx in range(n_classes):
|
1540
|
+
sorted_scores = np.sort(bootstrapped_scores[class_idx])
|
1541
|
+
confidence_lower = sorted_scores[int((1 - ci) * len(sorted_scores))]
|
1542
|
+
confidence_upper = sorted_scores[int(ci * len(sorted_scores))]
|
1543
|
+
confidence_intervals.append((confidence_lower, confidence_upper))
|
1544
|
+
|
1545
|
+
if verbose:
|
1546
|
+
print(f"Class {class_idx} - Confidence interval: [{confidence_lower:.3f} - {confidence_upper:.3f}]")
|
1547
|
+
|
1548
|
+
return confidence_intervals
|
1356
1549
|
|
1357
1550
|
|
1358
1551
|
def plot_roc_curve(
|
@@ -1517,7 +1710,7 @@ def plot_pr_binary(
|
|
1517
1710
|
|
1518
1711
|
pr_boundary = interp1d(recall, precision, kind="linear", fill_value="extrapolate")
|
1519
1712
|
for f_score in f_scores:
|
1520
|
-
x_vals = np.linspace(0.01, 1,
|
1713
|
+
x_vals = np.linspace(0.01, 1, 20000)
|
1521
1714
|
y_vals = f_score * x_vals / (2 * x_vals - f_score)
|
1522
1715
|
y_vals_clipped = np.minimum(y_vals, pr_boundary(x_vals))
|
1523
1716
|
y_vals_clipped = np.clip(y_vals_clipped, 1e-3, None) # Prevent going to zero
|
@@ -1553,7 +1746,7 @@ def plot_pr_binary(
|
|
1553
1746
|
def plot_cm(
|
1554
1747
|
cm,
|
1555
1748
|
labels_name=None,
|
1556
|
-
thresh=0.8,
|
1749
|
+
thresh=0.8, # for set color
|
1557
1750
|
axis_labels=None,
|
1558
1751
|
cmap="Reds",
|
1559
1752
|
normalize=True,
|
@@ -2029,10 +2222,16 @@ def predict(
|
|
2029
2222
|
if isinstance(y_train, str) and y_train in x_train.columns:
|
2030
2223
|
y_train_col_name = y_train
|
2031
2224
|
y_train = x_train[y_train]
|
2032
|
-
y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy")
|
2225
|
+
# y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy")
|
2033
2226
|
x_train = x_train.drop(y_train_col_name, axis=1)
|
2034
|
-
else:
|
2035
|
-
|
2227
|
+
# else:
|
2228
|
+
# y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy").values.ravel()
|
2229
|
+
y_train=pd.DataFrame(y_train)
|
2230
|
+
y_train_=ips.df_encoder(y_train, method="dummy",drop=None)
|
2231
|
+
is_binary = False if y_train_.shape[1] >2 else True
|
2232
|
+
|
2233
|
+
# if is_binary:
|
2234
|
+
# y_train = ips.df_encoder(pd.DataFrame(y_train), method="label").values.ravel()
|
2036
2235
|
|
2037
2236
|
if x_true is None:
|
2038
2237
|
x_train, x_true, y_train, y_true = train_test_split(
|
@@ -2042,23 +2241,27 @@ def predict(
|
|
2042
2241
|
random_state=random_state,
|
2043
2242
|
stratify=y_train if purpose == "classification" else None,
|
2044
2243
|
)
|
2244
|
+
|
2045
2245
|
if isinstance(y_train, str) and y_train in x_train.columns:
|
2046
2246
|
y_train_col_name = y_train
|
2047
2247
|
y_train = x_train[y_train]
|
2048
|
-
y_train = ips.df_encoder(pd.DataFrame(y_train), method="
|
2248
|
+
y_train = ips.df_encoder(pd.DataFrame(y_train), method="label") if is_binary else y_train
|
2049
2249
|
x_train = x_train.drop(y_train_col_name, axis=1)
|
2050
|
-
|
2250
|
+
if is_binary:
|
2051
2251
|
y_train = ips.df_encoder(
|
2052
|
-
pd.DataFrame(y_train), method="
|
2053
|
-
).values.ravel()
|
2252
|
+
pd.DataFrame(y_train), method="label"
|
2253
|
+
).values.ravel()
|
2254
|
+
|
2054
2255
|
if y_true is not None:
|
2055
2256
|
if isinstance(y_true, str) and y_true in x_true.columns:
|
2056
2257
|
y_true_col_name = y_true
|
2057
2258
|
y_true = x_true[y_true]
|
2058
|
-
y_true = ips.df_encoder(pd.DataFrame(y_true), method="
|
2259
|
+
y_true = ips.df_encoder(pd.DataFrame(y_true), method="label") if is_binary else y_true
|
2260
|
+
y_true = pd.DataFrame(y_true)
|
2059
2261
|
x_true = x_true.drop(y_true_col_name, axis=1)
|
2060
|
-
|
2061
|
-
y_true = ips.df_encoder(pd.DataFrame(y_true), method="
|
2262
|
+
if is_binary:
|
2263
|
+
y_true = ips.df_encoder(pd.DataFrame(y_true), method="label").values.ravel()
|
2264
|
+
y_true = pd.DataFrame(y_true)
|
2062
2265
|
|
2063
2266
|
# to convert the 2D to 1D: 2D column-vector format (like [[1], [0], [1], ...]) instead of a 1D array ([1, 0, 1, ...]
|
2064
2267
|
|
@@ -2068,7 +2271,6 @@ def predict(
|
|
2068
2271
|
y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
|
2069
2272
|
)
|
2070
2273
|
y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
|
2071
|
-
|
2072
2274
|
# Ensure common features are selected
|
2073
2275
|
if common_features is not None:
|
2074
2276
|
x_train, x_true = x_train[common_features], x_true[common_features]
|
@@ -2077,10 +2279,7 @@ def predict(
|
|
2077
2279
|
x_train, x_true = x_train[share_col_names], x_true[share_col_names]
|
2078
2280
|
|
2079
2281
|
x_train, x_true = ips.df_scaler(x_train), ips.df_scaler(x_true)
|
2080
|
-
x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(
|
2081
|
-
x_true, method="dummy"
|
2082
|
-
)
|
2083
|
-
|
2282
|
+
x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(x_true, method="dummy")
|
2084
2283
|
# Handle class imbalance using SMOTE (only for classification)
|
2085
2284
|
if (
|
2086
2285
|
smote
|
@@ -2091,7 +2290,13 @@ def predict(
|
|
2091
2290
|
|
2092
2291
|
smote_sampler = SMOTE(random_state=random_state)
|
2093
2292
|
x_train, y_train = smote_sampler.fit_resample(x_train, y_train)
|
2094
|
-
|
2293
|
+
if not is_binary:
|
2294
|
+
if isinstance(y_train, np.ndarray):
|
2295
|
+
y_train = ips.df_encoder(data=pd.DataFrame(y_train),method='label')
|
2296
|
+
y_train=np.asarray(y_train)
|
2297
|
+
if isinstance(y_train, np.ndarray):
|
2298
|
+
y_true = ips.df_encoder(data=pd.DataFrame(y_true),method='label')
|
2299
|
+
y_true=np.asarray(y_true)
|
2095
2300
|
# Hyperparameter grids for tuning
|
2096
2301
|
if cv_level in ["low", "simple", "s", "l"]:
|
2097
2302
|
param_grids = {
|
@@ -2670,95 +2875,177 @@ def predict(
|
|
2670
2875
|
print(f"\nTraining and validating {name}:")
|
2671
2876
|
|
2672
2877
|
# Grid search with KFold or StratifiedKFold
|
2673
|
-
|
2674
|
-
|
2675
|
-
|
2676
|
-
|
2677
|
-
|
2678
|
-
|
2679
|
-
|
2680
|
-
|
2681
|
-
|
2682
|
-
|
2683
|
-
gs.fit(x_train, y_train)
|
2684
|
-
best_clf = gs.best_estimator_
|
2685
|
-
# make sure x_train and x_test has the same name
|
2686
|
-
x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
|
2687
|
-
y_pred = best_clf.predict(x_true)
|
2688
|
-
|
2689
|
-
# y_pred_proba
|
2690
|
-
if hasattr(best_clf, "predict_proba"):
|
2691
|
-
y_pred_proba = best_clf.predict_proba(x_true)[:, 1]
|
2692
|
-
elif hasattr(best_clf, "decision_function"):
|
2693
|
-
# If predict_proba is not available, use decision_function (e.g., for SVM)
|
2694
|
-
y_pred_proba = best_clf.decision_function(x_true)
|
2695
|
-
# Ensure y_pred_proba is within 0 and 1 bounds
|
2696
|
-
y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
|
2697
|
-
y_pred_proba.max() - y_pred_proba.min()
|
2878
|
+
if is_binary:
|
2879
|
+
gs = GridSearchCV(
|
2880
|
+
clf,
|
2881
|
+
param_grid=param_grids.get(name, {}),
|
2882
|
+
scoring=(
|
2883
|
+
"roc_auc" if purpose == "classification" else "neg_mean_squared_error"
|
2884
|
+
),
|
2885
|
+
cv=cv,
|
2886
|
+
n_jobs=n_jobs,
|
2887
|
+
verbose=verbose,
|
2698
2888
|
)
|
2889
|
+
|
2890
|
+
gs.fit(x_train, y_train)
|
2891
|
+
best_clf = gs.best_estimator_
|
2892
|
+
# make sure x_train and x_test has the same name
|
2893
|
+
x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
|
2894
|
+
y_pred = best_clf.predict(x_true)
|
2895
|
+
if hasattr(best_clf, "predict_proba"):
|
2896
|
+
y_pred_proba = best_clf.predict_proba(x_true)[:, 1]
|
2897
|
+
elif hasattr(best_clf, "decision_function"):
|
2898
|
+
# If predict_proba is not available, use decision_function (e.g., for SVM)
|
2899
|
+
y_pred_proba = best_clf.decision_function(x_true)
|
2900
|
+
# Ensure y_pred_proba is within 0 and 1 bounds
|
2901
|
+
y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
|
2902
|
+
y_pred_proba.max() - y_pred_proba.min()
|
2903
|
+
)
|
2904
|
+
else:
|
2905
|
+
y_pred_proba = None # No probability output for certain models
|
2699
2906
|
else:
|
2700
|
-
|
2907
|
+
gs = GridSearchCV(
|
2908
|
+
clf,
|
2909
|
+
param_grid=param_grids.get(name, {}),
|
2910
|
+
scoring=(
|
2911
|
+
"roc_auc_ovr" if purpose == "classification" else "neg_mean_squared_error"
|
2912
|
+
),
|
2913
|
+
cv=cv,
|
2914
|
+
n_jobs=n_jobs,
|
2915
|
+
verbose=verbose,
|
2916
|
+
)
|
2701
2917
|
|
2918
|
+
# Fit GridSearchCV
|
2919
|
+
gs.fit(x_train, y_train)
|
2920
|
+
best_clf = gs.best_estimator_
|
2921
|
+
|
2922
|
+
# Ensure x_true aligns with x_train columns
|
2923
|
+
x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
|
2924
|
+
y_pred = best_clf.predict(x_true)
|
2925
|
+
|
2926
|
+
# Handle prediction probabilities for multiclass
|
2927
|
+
if hasattr(best_clf, "predict_proba"):
|
2928
|
+
y_pred_proba = best_clf.predict_proba(x_true)
|
2929
|
+
elif hasattr(best_clf, "decision_function"):
|
2930
|
+
y_pred_proba = best_clf.decision_function(x_true)
|
2931
|
+
|
2932
|
+
# Normalize for multiclass if necessary
|
2933
|
+
if y_pred_proba.ndim == 2:
|
2934
|
+
y_pred_proba = (y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)) / \
|
2935
|
+
(y_pred_proba.max(axis=1, keepdims=True) - y_pred_proba.min(axis=1, keepdims=True))
|
2936
|
+
else:
|
2937
|
+
y_pred_proba = None # No probability output for certain models
|
2938
|
+
|
2702
2939
|
validation_scores = {}
|
2703
|
-
|
2940
|
+
|
2941
|
+
if y_true is not None and y_pred_proba is not None:
|
2704
2942
|
validation_scores = cal_metrics(
|
2705
2943
|
y_true,
|
2706
2944
|
y_pred,
|
2707
2945
|
y_pred_proba=y_pred_proba,
|
2946
|
+
is_binary=is_binary,
|
2708
2947
|
purpose=purpose,
|
2709
2948
|
average="weighted",
|
2710
2949
|
)
|
2711
|
-
|
2712
|
-
|
2713
|
-
|
2714
|
-
|
2715
|
-
|
2716
|
-
|
2717
|
-
|
2718
|
-
|
2719
|
-
|
2720
|
-
|
2721
|
-
|
2722
|
-
|
2723
|
-
|
2724
|
-
|
2725
|
-
|
2726
|
-
|
2727
|
-
|
2728
|
-
|
2729
|
-
|
2730
|
-
|
2731
|
-
|
2732
|
-
|
2733
|
-
|
2734
|
-
|
2735
|
-
|
2736
|
-
|
2737
|
-
|
2738
|
-
|
2739
|
-
|
2740
|
-
|
2741
|
-
|
2742
|
-
|
2743
|
-
|
2744
|
-
|
2745
|
-
|
2746
|
-
|
2747
|
-
|
2748
|
-
|
2749
|
-
|
2750
|
-
|
2751
|
-
|
2752
|
-
|
2753
|
-
|
2754
|
-
|
2755
|
-
|
2756
|
-
|
2757
|
-
|
2758
|
-
|
2759
|
-
|
2760
|
-
|
2761
|
-
|
2950
|
+
if is_binary:
|
2951
|
+
# Calculate ROC curve
|
2952
|
+
# https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
|
2953
|
+
if y_pred_proba is not None:
|
2954
|
+
# fpr, tpr, roc_auc = dict(), dict(), dict()
|
2955
|
+
fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
|
2956
|
+
lower_ci, upper_ci = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
|
2957
|
+
roc_auc = auc(fpr, tpr)
|
2958
|
+
roc_info = {
|
2959
|
+
"fpr": fpr.tolist(),
|
2960
|
+
"tpr": tpr.tolist(),
|
2961
|
+
"auc": roc_auc,
|
2962
|
+
"ci95": (lower_ci, upper_ci),
|
2963
|
+
}
|
2964
|
+
# precision-recall curve
|
2965
|
+
precision_, recall_, _ = cal_precision_recall(y_true, y_pred_proba)
|
2966
|
+
avg_precision_ = average_precision_score(y_true, y_pred_proba)
|
2967
|
+
pr_info = {
|
2968
|
+
"precision": precision_,
|
2969
|
+
"recall": recall_,
|
2970
|
+
"avg_precision": avg_precision_,
|
2971
|
+
}
|
2972
|
+
else:
|
2973
|
+
roc_info, pr_info = None, None
|
2974
|
+
if purpose == "classification":
|
2975
|
+
results[name] = {
|
2976
|
+
"best_clf": gs.best_estimator_,
|
2977
|
+
"best_params": gs.best_params_,
|
2978
|
+
"auc_indiv": [
|
2979
|
+
gs.cv_results_[f"split{i}_test_score"][gs.best_index_]
|
2980
|
+
for i in range(cv_folds)
|
2981
|
+
],
|
2982
|
+
"scores": validation_scores,
|
2983
|
+
"roc_curve": roc_info,
|
2984
|
+
"pr_curve": pr_info,
|
2985
|
+
"confusion_matrix": confusion_matrix(y_true, y_pred),
|
2986
|
+
"predictions": y_pred.tolist(),
|
2987
|
+
"predictions_proba": (
|
2988
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
2989
|
+
),
|
2990
|
+
}
|
2991
|
+
else: # "regression"
|
2992
|
+
results[name] = {
|
2993
|
+
"best_clf": gs.best_estimator_,
|
2994
|
+
"best_params": gs.best_params_,
|
2995
|
+
"scores": validation_scores, # e.g., neg_MSE, R², etc.
|
2996
|
+
"predictions": y_pred.tolist(),
|
2997
|
+
"predictions_proba": (
|
2998
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
2999
|
+
),
|
3000
|
+
}
|
3001
|
+
else: # multi-classes
|
3002
|
+
if y_pred_proba is not None:
|
3003
|
+
# fpr, tpr, roc_auc = dict(), dict(), dict()
|
3004
|
+
# fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
|
3005
|
+
confidence_intervals = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
|
3006
|
+
roc_info = {
|
3007
|
+
"fpr": validation_scores["fpr"],
|
3008
|
+
"tpr": validation_scores["tpr"],
|
3009
|
+
"auc": validation_scores["roc_auc_by_class"],
|
3010
|
+
"ci95": confidence_intervals,
|
3011
|
+
}
|
3012
|
+
# precision-recall curve
|
3013
|
+
precision_, recall_, avg_precision_ = cal_precision_recall(y_true, y_pred_proba,is_binary=is_binary)
|
3014
|
+
pr_info = {
|
3015
|
+
"precision": precision_,
|
3016
|
+
"recall": recall_,
|
3017
|
+
"avg_precision": avg_precision_,
|
3018
|
+
}
|
3019
|
+
else:
|
3020
|
+
roc_info, pr_info = None, None
|
3021
|
+
|
3022
|
+
if purpose == "classification":
|
3023
|
+
results[name] = {
|
3024
|
+
"best_clf": gs.best_estimator_,
|
3025
|
+
"best_params": gs.best_params_,
|
3026
|
+
"auc_indiv": [
|
3027
|
+
gs.cv_results_[f"split{i}_test_score"][gs.best_index_]
|
3028
|
+
for i in range(cv_folds)
|
3029
|
+
],
|
3030
|
+
"scores": validation_scores,
|
3031
|
+
"roc_curve": roc_info,
|
3032
|
+
"pr_curve": pr_info,
|
3033
|
+
"confusion_matrix": confusion_matrix(y_true, y_pred),
|
3034
|
+
"predictions": y_pred.tolist(),
|
3035
|
+
"predictions_proba": (
|
3036
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
3037
|
+
),
|
3038
|
+
}
|
3039
|
+
else: # "regression"
|
3040
|
+
results[name] = {
|
3041
|
+
"best_clf": gs.best_estimator_,
|
3042
|
+
"best_params": gs.best_params_,
|
3043
|
+
"scores": validation_scores, # e.g., neg_MSE, R², etc.
|
3044
|
+
"predictions": y_pred.tolist(),
|
3045
|
+
"predictions_proba": (
|
3046
|
+
y_pred_proba.tolist() if y_pred_proba is not None else None
|
3047
|
+
),
|
3048
|
+
}
|
2762
3049
|
|
2763
3050
|
else:
|
2764
3051
|
results[name] = {
|
@@ -2773,7 +3060,6 @@ def predict(
|
|
2773
3060
|
|
2774
3061
|
# Convert results to DataFrame
|
2775
3062
|
df_results = pd.DataFrame.from_dict(results, orient="index")
|
2776
|
-
|
2777
3063
|
# sort
|
2778
3064
|
if y_true is not None and purpose == "classification":
|
2779
3065
|
df_scores = pd.DataFrame(
|
@@ -2790,26 +3076,29 @@ def predict(
|
|
2790
3076
|
plot.figsets(xangle=30)
|
2791
3077
|
if dir_save:
|
2792
3078
|
ips.figsave(dir_save + f"scores_sorted_heatmap{now_}.pdf")
|
3079
|
+
|
3080
|
+
df_scores=df_scores.select_dtypes(include=np.number)
|
3081
|
+
display(df_scores)
|
2793
3082
|
if df_scores.shape[0] > 1: # draw cluster
|
2794
3083
|
plot.heatmap(df_scores, kind="direct", cluster=True)
|
2795
3084
|
plot.figsets(xangle=30)
|
2796
3085
|
if dir_save:
|
2797
3086
|
ips.figsave(dir_save + f"scores_clus{now_}.pdf")
|
2798
3087
|
if all([plot_, y_true is not None, purpose == "classification"]):
|
2799
|
-
try:
|
2800
|
-
|
2801
|
-
|
2802
|
-
|
2803
|
-
|
2804
|
-
|
2805
|
-
|
2806
|
-
except Exception as e:
|
2807
|
-
|
3088
|
+
# try:
|
3089
|
+
if len(models) > 3:
|
3090
|
+
plot_validate_features(df_results,is_binary=is_binary)
|
3091
|
+
else:
|
3092
|
+
plot_validate_features_single(df_results, is_binary=is_binary)
|
3093
|
+
if dir_save:
|
3094
|
+
ips.figsave(dir_save + f"validate_features{now_}.pdf")
|
3095
|
+
# except Exception as e:
|
3096
|
+
# print(f"Error: 在画图的过程中出现了问题:{e}")
|
2808
3097
|
return df_results
|
2809
3098
|
|
2810
3099
|
|
2811
3100
|
def cal_metrics(
|
2812
|
-
y_true, y_pred, y_pred_proba=None, purpose="regression", average="weighted"
|
3101
|
+
y_true, y_pred, y_pred_proba=None, is_binary=True,purpose="regression", average="weighted"
|
2813
3102
|
):
|
2814
3103
|
"""
|
2815
3104
|
Calculate regression or classification metrics based on the purpose.
|
@@ -2879,16 +3168,49 @@ def cal_metrics(
|
|
2879
3168
|
}
|
2880
3169
|
|
2881
3170
|
# Confusion matrix to calculate specificity
|
2882
|
-
|
2883
|
-
|
2884
|
-
|
2885
|
-
|
3171
|
+
if is_binary:
|
3172
|
+
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
3173
|
+
# Specificity calculation
|
3174
|
+
validation_scores["specificity"] = (
|
3175
|
+
tn / (tn + fp) if (tn + fp) > 0 else 0
|
3176
|
+
)
|
3177
|
+
if y_pred_proba is not None:
|
3178
|
+
# Calculate ROC-AUC
|
3179
|
+
validation_scores["roc_auc"] = roc_auc_score(y_true, y_pred_proba)
|
3180
|
+
# PR-AUC (Precision-Recall AUC) calculation
|
3181
|
+
validation_scores["pr_auc"] = average_precision_score(y_true, y_pred_proba)
|
3182
|
+
|
3183
|
+
else: # multi-class
|
3184
|
+
from sklearn.preprocessing import label_binarize
|
3185
|
+
#* Multi-class ROC calculation
|
3186
|
+
y_pred_proba = np.asarray(y_pred_proba)
|
3187
|
+
classes = np.unique(y_true)
|
3188
|
+
y_true_bin = label_binarize(y_true, classes=classes)
|
3189
|
+
if isinstance(y_true, np.ndarray):
|
3190
|
+
y_true = ips.df_encoder(data=pd.DataFrame(y_true), method='dum',prefix='Label')
|
3191
|
+
# Initialize dictionaries to store FPR, TPR, and AUC for each class
|
3192
|
+
fpr = dict()
|
3193
|
+
tpr = dict()
|
3194
|
+
roc_auc = dict()
|
3195
|
+
for i, class_label in enumerate(classes):
|
3196
|
+
fpr[class_label], tpr[class_label], _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
|
3197
|
+
roc_auc[class_label] = auc(fpr[class_label], tpr[class_label])
|
3198
|
+
|
3199
|
+
# Store the mean ROC AUC
|
3200
|
+
try:
|
3201
|
+
validation_scores["roc_auc"] = roc_auc_score(
|
3202
|
+
y_true, y_pred_proba, multi_class="ovr", average=average
|
3203
|
+
)
|
3204
|
+
except Exception as e:
|
3205
|
+
y_pred_proba = y_pred_proba / y_pred_proba.sum(axis=1, keepdims=True)
|
3206
|
+
validation_scores["roc_auc"] = roc_auc_score(
|
3207
|
+
y_true, y_pred_proba, multi_class="ovr", average=average
|
3208
|
+
)
|
3209
|
+
|
3210
|
+
validation_scores["roc_auc_by_class"] = roc_auc # Individual class AUCs
|
3211
|
+
validation_scores["fpr"] = fpr
|
3212
|
+
validation_scores["tpr"] = tpr
|
2886
3213
|
|
2887
|
-
if y_pred_proba is not None:
|
2888
|
-
# Calculate ROC-AUC
|
2889
|
-
validation_scores["roc_auc"] = roc_auc_score(y_true, y_pred_proba)
|
2890
|
-
# PR-AUC (Precision-Recall AUC) calculation
|
2891
|
-
validation_scores["pr_auc"] = average_precision_score(y_true, y_pred_proba)
|
2892
3214
|
else:
|
2893
3215
|
raise ValueError(
|
2894
3216
|
"Invalid purpose specified. Choose 'regression' or 'classification'."
|
py2ls/translator.py
CHANGED
@@ -586,6 +586,8 @@ def replace_text(text, dict_replace=None, robust=True):
|
|
586
586
|
Returns:
|
587
587
|
str: The text after replacements have been made.
|
588
588
|
"""
|
589
|
+
if not all(text):
|
590
|
+
return ''
|
589
591
|
# Default replacements for newline and tab characters
|
590
592
|
default_replacements = {
|
591
593
|
"\a": "",
|
@@ -235,7 +235,7 @@ py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
|
|
235
235
|
py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
|
236
236
|
py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
|
237
237
|
py2ls/ips.py,sha256=O2QdLo6-vPbHvWtlVdtMA49LAn2y0CNVM27cxLbqqYA,271496
|
238
|
-
py2ls/ml2ls.py,sha256=
|
238
|
+
py2ls/ml2ls.py,sha256=LutEbrIF2KcBdz8jnbR3EZ4WTjRTuVGPvskUsuX2ZoA,128551
|
239
239
|
py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
|
240
240
|
py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
|
241
241
|
py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
|
@@ -243,8 +243,8 @@ py2ls/plot.py,sha256=X0R1KK_UTdeJazjnqTqYvP-uWu6wY8szQHyJMsDDz2s,171515
|
|
243
243
|
py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
|
244
244
|
py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
|
245
245
|
py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
|
246
|
-
py2ls/translator.py,sha256=
|
246
|
+
py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
|
247
247
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
248
|
-
py2ls-0.2.4.
|
249
|
-
py2ls-0.2.4.
|
250
|
-
py2ls-0.2.4.
|
248
|
+
py2ls-0.2.4.15.dist-info/METADATA,sha256=MbwWj3zOohusA3UxDrIgR6S3Zms5tdWbcWjw9-dA57U,20046
|
249
|
+
py2ls-0.2.4.15.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
250
|
+
py2ls-0.2.4.15.dist-info/RECORD,,
|
File without changes
|