py2ls 0.2.4.18__py3-none-any.whl → 0.2.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ml2ls.py CHANGED
@@ -702,7 +702,7 @@ def get_features(
702
702
  "AdaBoost",
703
703
  ]
704
704
  cls = [ips.strcmp(i, cls_)[0] for i in cls]
705
-
705
+
706
706
  feature_importances = {}
707
707
 
708
708
  # Lasso Feature Selection
@@ -714,7 +714,7 @@ def get_features(
714
714
  lasso_selected_features = (
715
715
  lasso_importances.head(n_features)["feature"].values if "lasso" in cls else []
716
716
  )
717
- feature_importances['lasso']=lasso_importances.head(n_features)
717
+ feature_importances["lasso"] = lasso_importances.head(n_features)
718
718
  # Ridge
719
719
  ridge_importances = (
720
720
  features_ridge(x_train, y_train, ridge_params)
@@ -724,7 +724,7 @@ def get_features(
724
724
  selected_ridge_features = (
725
725
  ridge_importances.head(n_features)["feature"].values if "ridge" in cls else []
726
726
  )
727
- feature_importances['ridge']=ridge_importances.head(n_features)
727
+ feature_importances["ridge"] = ridge_importances.head(n_features)
728
728
  # Elastic Net
729
729
  enet_importances = (
730
730
  features_enet(x_train, y_train, enet_params)
@@ -734,7 +734,7 @@ def get_features(
734
734
  selected_enet_features = (
735
735
  enet_importances.head(n_features)["feature"].values if "Enet" in cls else []
736
736
  )
737
- feature_importances['Enet']=enet_importances.head(n_features)
737
+ feature_importances["Enet"] = enet_importances.head(n_features)
738
738
  # Random Forest Feature Importance
739
739
  rf_importances = (
740
740
  features_rf(x_train, y_train, rf_params)
@@ -746,7 +746,7 @@ def get_features(
746
746
  if "Random Forest" in cls
747
747
  else []
748
748
  )
749
- feature_importances['Random Forest']=rf_importances.head(n_features)
749
+ feature_importances["Random Forest"] = rf_importances.head(n_features)
750
750
  # Gradient Boosting Feature Importance
751
751
  gb_importances = (
752
752
  features_gradient_boosting(x_train, y_train, gb_params)
@@ -758,7 +758,7 @@ def get_features(
758
758
  if "Gradient Boosting" in cls
759
759
  else []
760
760
  )
761
- feature_importances['Gradient Boosting']=gb_importances.head(n_features)
761
+ feature_importances["Gradient Boosting"] = gb_importances.head(n_features)
762
762
  # xgb
763
763
  xgb_importances = (
764
764
  features_xgb(x_train, y_train, xgb_params) if "xgb" in cls else pd.DataFrame()
@@ -766,7 +766,7 @@ def get_features(
766
766
  top_xgb_features = (
767
767
  xgb_importances.head(n_features)["feature"].values if "xgb" in cls else []
768
768
  )
769
- feature_importances['xgb']=xgb_importances.head(n_features)
769
+ feature_importances["xgb"] = xgb_importances.head(n_features)
770
770
 
771
771
  # SVM with RFE
772
772
  selected_svm_features = (
@@ -781,7 +781,7 @@ def get_features(
781
781
  selected_lda_features = (
782
782
  lda_importances.head(n_features)["feature"].values if "lda" in cls else []
783
783
  )
784
- feature_importances['lda']=lda_importances.head(n_features)
784
+ feature_importances["lda"] = lda_importances.head(n_features)
785
785
  # AdaBoost Feature Importance
786
786
  adaboost_importances = (
787
787
  features_adaboost(x_train, y_train, adaboost_params)
@@ -793,7 +793,7 @@ def get_features(
793
793
  if "AdaBoost" in cls
794
794
  else []
795
795
  )
796
- feature_importances['AdaBoost']=adaboost_importances.head(n_features)
796
+ feature_importances["AdaBoost"] = adaboost_importances.head(n_features)
797
797
  # Decision Tree Feature Importance
798
798
  dt_importances = (
799
799
  features_decision_tree(x_train, y_train, dt_params)
@@ -804,8 +804,8 @@ def get_features(
804
804
  dt_importances.head(n_features)["feature"].values
805
805
  if "Decision Tree" in cls
806
806
  else []
807
- )
808
- feature_importances['Decision Tree']=dt_importances.head(n_features)
807
+ )
808
+ feature_importances["Decision Tree"] = dt_importances.head(n_features)
809
809
  # Bagging Feature Importance
810
810
  bagging_importances = (
811
811
  features_bagging(x_train, y_train, bagging_params)
@@ -817,7 +817,7 @@ def get_features(
817
817
  if "Bagging" in cls
818
818
  else []
819
819
  )
820
- feature_importances['Bagging']=bagging_importances.head(n_features)
820
+ feature_importances["Bagging"] = bagging_importances.head(n_features)
821
821
  # KNN Feature Importance via Permutation
822
822
  knn_importances = (
823
823
  features_knn(x_train, y_train, knn_params) if "KNN" in cls else pd.DataFrame()
@@ -825,7 +825,7 @@ def get_features(
825
825
  top_knn_features = (
826
826
  knn_importances.head(n_features)["feature"].values if "KNN" in cls else []
827
827
  )
828
- feature_importances['KNN']=knn_importances.head(n_features)
828
+ feature_importances["KNN"] = knn_importances.head(n_features)
829
829
 
830
830
  #! Find common features
831
831
  common_features = ips.shared(
@@ -928,7 +928,7 @@ def get_features(
928
928
  "cv_train_scores": cv_train_results_df,
929
929
  "cv_test_scores": rank_models(cv_test_results_df, plot_=plot_),
930
930
  "common_features": list(common_features),
931
- "feature_importances":feature_importances
931
+ "feature_importances": feature_importances,
932
932
  }
933
933
  if all([plot_, dir_save]):
934
934
  from datetime import datetime
@@ -941,7 +941,7 @@ def get_features(
941
941
  "cv_train_scores": pd.DataFrame(),
942
942
  "cv_test_scores": pd.DataFrame(),
943
943
  "common_features": [],
944
- "feature_importances":{}
944
+ "feature_importances": {},
945
945
  }
946
946
  print(f"Warning: 没有找到共同的genes, when n_shared={n_shared}")
947
947
  return results
@@ -1232,7 +1232,7 @@ def validate_features(
1232
1232
 
1233
1233
  # # If you want to access validation scores
1234
1234
  # print(validation_results)
1235
- def plot_validate_features(res_val,is_binary=True,figsize=None):
1235
+ def plot_validate_features(res_val, is_binary=True, figsize=None):
1236
1236
  """
1237
1237
  plot the results of 'validate_features()'
1238
1238
  """
@@ -1295,26 +1295,28 @@ def plot_validate_features(res_val,is_binary=True,figsize=None):
1295
1295
  )
1296
1296
  plot.figsets(
1297
1297
  sp=2,
1298
- legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
1298
+ legend=dict(
1299
+ loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]
1300
+ ),
1299
1301
  )
1300
1302
  # plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
1301
1303
  else:
1302
1304
  colors = plot.get_color(len(ips.flatten(res_val["pr_curve"].index)))
1303
- modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
1304
- classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
1305
+ modname_tmp = ips.flatten(res_val["roc_curve"].index)[0]
1306
+ classes = list(res_val["roc_curve"][modname_tmp]["fpr"].keys())
1305
1307
  if res_val.shape[0] > 5:
1306
1308
  alpha = 0
1307
- figsize = [8, 8*2*(len(classes))] if figsize is None else figsize
1309
+ figsize = [8, 8 * 2 * (len(classes))] if figsize is None else figsize
1308
1310
  subplot_layout = [1, 2]
1309
1311
  ncols = 2
1310
1312
  bbox_to_anchor = [1.5, 0.6]
1311
1313
  else:
1312
1314
  alpha = 0.03
1313
- figsize = [10, 6*(len(classes))] if figsize is None else figsize
1315
+ figsize = [10, 6 * (len(classes))] if figsize is None else figsize
1314
1316
  subplot_layout = [1, 1]
1315
1317
  ncols = 1
1316
1318
  bbox_to_anchor = [1, 1]
1317
- nexttile = plot.subplot(2*(len(classes)),2,figsize=figsize)
1319
+ nexttile = plot.subplot(2 * (len(classes)), 2, figsize=figsize)
1318
1320
  for iclass, class_ in enumerate(classes):
1319
1321
  ax = nexttile(subplot_layout[0], subplot_layout[1])
1320
1322
  for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
@@ -1352,7 +1354,9 @@ def plot_validate_features(res_val,is_binary=True,figsize=None):
1352
1354
  plot_pr_curve(
1353
1355
  recall=res_val["pr_curve"][model_name]["recall"][iclass],
1354
1356
  precision=res_val["pr_curve"][model_name]["precision"][iclass],
1355
- avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
1357
+ avg_precision=res_val["pr_curve"][model_name]["avg_precision"][
1358
+ iclass
1359
+ ],
1356
1360
  model_name=model_name,
1357
1361
  color=colors[i],
1358
1362
  lw=1.5,
@@ -1362,13 +1366,20 @@ def plot_validate_features(res_val,is_binary=True,figsize=None):
1362
1366
  plot.figsets(
1363
1367
  sp=2,
1364
1368
  title=class_,
1365
- legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
1369
+ legend=dict(
1370
+ loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]
1371
+ ),
1366
1372
  )
1367
1373
 
1368
- def plot_validate_features_single(res_val, figsize=None,is_binary=True):
1374
+
1375
+ def plot_validate_features_single(res_val, figsize=None, is_binary=True):
1369
1376
  if is_binary:
1370
1377
  if figsize is None:
1371
- nexttile = plot.subplot(len(ips.flatten(res_val["pr_curve"].index)), 3,figsize=[13,4*len(ips.flatten(res_val["pr_curve"].index))])
1378
+ nexttile = plot.subplot(
1379
+ len(ips.flatten(res_val["pr_curve"].index)),
1380
+ 3,
1381
+ figsize=[13, 4 * len(ips.flatten(res_val["pr_curve"].index))],
1382
+ )
1372
1383
  else:
1373
1384
  nexttile = plot.subplot(
1374
1385
  len(ips.flatten(res_val["pr_curve"].index)), 3, figsize=figsize
@@ -1380,8 +1391,15 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
1380
1391
  mean_auc = res_val["roc_curve"][model_name]["auc"]
1381
1392
 
1382
1393
  # Plotting
1383
- plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci,
1384
- model_name=model_name, ax=nexttile())
1394
+ plot_roc_curve(
1395
+ fpr,
1396
+ tpr,
1397
+ mean_auc,
1398
+ lower_ci,
1399
+ upper_ci,
1400
+ model_name=model_name,
1401
+ ax=nexttile(),
1402
+ )
1385
1403
  plot.figsets(title=model_name, sp=2)
1386
1404
 
1387
1405
  plot_pr_binary(
@@ -1394,14 +1412,18 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
1394
1412
  plot.figsets(title=model_name, sp=2)
1395
1413
 
1396
1414
  # plot cm
1397
- plot_cm(res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False)
1415
+ plot_cm(
1416
+ res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False
1417
+ )
1398
1418
  plot.figsets(title=model_name, sp=2)
1399
1419
  else:
1400
-
1401
- modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
1402
- classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
1420
+
1421
+ modname_tmp = ips.flatten(res_val["roc_curve"].index)[0]
1422
+ classes = list(res_val["roc_curve"][modname_tmp]["fpr"].keys())
1403
1423
  if figsize is None:
1404
- nexttile = plot.subplot(len(modname_tmp), 3,figsize=[15,len(modname_tmp)*5])
1424
+ nexttile = plot.subplot(
1425
+ len(modname_tmp), 3, figsize=[15, len(modname_tmp) * 5]
1426
+ )
1405
1427
  else:
1406
1428
  nexttile = plot.subplot(len(modname_tmp), 3, figsize=figsize)
1407
1429
  colors = plot.get_color(len(classes))
@@ -1429,16 +1451,18 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
1429
1451
  title=model_name,
1430
1452
  legend=dict(
1431
1453
  loc="best",
1432
- fontsize=8,
1454
+ fontsize=8,
1433
1455
  ),
1434
- )
1456
+ )
1435
1457
 
1436
1458
  ax = nexttile()
1437
1459
  for iclass, class_ in enumerate(classes):
1438
1460
  plot_pr_curve(
1439
1461
  recall=res_val["pr_curve"][model_name]["recall"][iclass],
1440
1462
  precision=res_val["pr_curve"][model_name]["precision"][iclass],
1441
- avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
1463
+ avg_precision=res_val["pr_curve"][model_name]["avg_precision"][
1464
+ iclass
1465
+ ],
1442
1466
  model_name=class_,
1443
1467
  color=colors[iclass],
1444
1468
  lw=1.5,
@@ -1450,17 +1474,21 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
1450
1474
  title=class_,
1451
1475
  legend=dict(loc="best", fontsize=8),
1452
1476
  )
1453
-
1454
- plot_cm(res_val["confusion_matrix"][model_name],labels_name=classes, ax=nexttile(), normalize=False)
1477
+
1478
+ plot_cm(
1479
+ res_val["confusion_matrix"][model_name],
1480
+ labels_name=classes,
1481
+ ax=nexttile(),
1482
+ normalize=False,
1483
+ )
1455
1484
  plot.figsets(title=model_name, sp=2)
1456
1485
 
1457
1486
 
1458
- def cal_precision_recall(
1459
- y_true, y_pred_proba, is_binary=True):
1487
+ def cal_precision_recall(y_true, y_pred_proba, is_binary=True):
1460
1488
  if is_binary:
1461
1489
  precision_, recall_, _ = precision_recall_curve(y_true, y_pred_proba)
1462
1490
  avg_precision_ = average_precision_score(y_true, y_pred_proba)
1463
- return precision_, recall_,avg_precision_
1491
+ return precision_, recall_, avg_precision_
1464
1492
  else:
1465
1493
  n_classes = y_pred_proba.shape[1] # Number of classes
1466
1494
  precision_ = []
@@ -1469,7 +1497,9 @@ def cal_precision_recall(
1469
1497
  # One-vs-rest approach for multi-class precision-recall curve
1470
1498
  for class_idx in range(n_classes):
1471
1499
  precision, recall, _ = precision_recall_curve(
1472
- (y_true == class_idx).astype(int), # Binarize true labels for the current class
1500
+ (y_true == class_idx).astype(
1501
+ int
1502
+ ), # Binarize true labels for the current class
1473
1503
  y_pred_proba[:, class_idx], # Probabilities for the current class
1474
1504
  )
1475
1505
 
@@ -1479,14 +1509,23 @@ def cal_precision_recall(
1479
1509
  avg_precision_ = []
1480
1510
  for class_idx in range(n_classes):
1481
1511
  avg_precision = average_precision_score(
1482
- (y_true == class_idx).astype(int), # Binarize true labels for the current class
1512
+ (y_true == class_idx).astype(
1513
+ int
1514
+ ), # Binarize true labels for the current class
1483
1515
  y_pred_proba[:, class_idx], # Probabilities for the current class
1484
1516
  )
1485
1517
  avg_precision_.append(avg_precision)
1486
- return precision_, recall_,avg_precision_
1487
-
1518
+ return precision_, recall_, avg_precision_
1519
+
1520
+
1488
1521
  def cal_auc_ci(
1489
- y_true, y_pred, n_bootstraps=1000, ci=0.95, random_state=1,is_binary=True, verbose=True
1522
+ y_true,
1523
+ y_pred,
1524
+ n_bootstraps=1000,
1525
+ ci=0.95,
1526
+ random_state=1,
1527
+ is_binary=True,
1528
+ verbose=True,
1490
1529
  ):
1491
1530
  if is_binary:
1492
1531
  y_true = np.asarray(y_true)
@@ -1525,15 +1564,20 @@ def cal_auc_ci(
1525
1564
  return confidence_lower, confidence_upper
1526
1565
  else:
1527
1566
  from sklearn.preprocessing import label_binarize
1567
+
1528
1568
  # Multi-class classification case
1529
1569
  y_true = np.asarray(y_true)
1530
1570
  y_pred = np.asarray(y_pred)
1531
1571
 
1532
1572
  # Binarize the multi-class labels for OvR computation
1533
- y_true_bin = label_binarize(y_true, classes=np.unique(y_true)) # One-vs-Rest transformation
1573
+ y_true_bin = label_binarize(
1574
+ y_true, classes=np.unique(y_true)
1575
+ ) # One-vs-Rest transformation
1534
1576
  n_classes = y_true_bin.shape[1] # Number of classes
1535
-
1536
- bootstrapped_scores = np.zeros((n_classes, n_bootstraps)) # Store scores for each class
1577
+
1578
+ bootstrapped_scores = np.zeros(
1579
+ (n_classes, n_bootstraps)
1580
+ ) # Store scores for each class
1537
1581
 
1538
1582
  if verbose:
1539
1583
  print("AUROC scores for each class:")
@@ -1546,7 +1590,9 @@ def cal_auc_ci(
1546
1590
  for class_idx in range(n_classes):
1547
1591
  if len(np.unique(y_true_bin[indices, class_idx])) < 2:
1548
1592
  continue # Reject if the class doesn't have both positive and negative samples
1549
- score = roc_auc_score(y_true_bin[indices, class_idx], y_pred[indices, class_idx])
1593
+ score = roc_auc_score(
1594
+ y_true_bin[indices, class_idx], y_pred[indices, class_idx]
1595
+ )
1550
1596
  bootstrapped_scores[class_idx, i] = score
1551
1597
 
1552
1598
  # Calculating the confidence intervals for each class
@@ -1558,8 +1604,10 @@ def cal_auc_ci(
1558
1604
  confidence_intervals.append((confidence_lower, confidence_upper))
1559
1605
 
1560
1606
  if verbose:
1561
- print(f"Class {class_idx} - Confidence interval: [{confidence_lower:.3f} - {confidence_upper:.3f}]")
1562
-
1607
+ print(
1608
+ f"Class {class_idx} - Confidence interval: [{confidence_lower:.3f} - {confidence_upper:.3f}]"
1609
+ )
1610
+
1563
1611
  return confidence_intervals
1564
1612
 
1565
1613
 
@@ -1619,6 +1667,7 @@ def plot_roc_curve(
1619
1667
  # ml2ls.plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci)
1620
1668
  # figsets(title=model_name)
1621
1669
 
1670
+
1622
1671
  def plot_pr_curve(
1623
1672
  recall=None,
1624
1673
  precision=None,
@@ -1661,6 +1710,7 @@ def plot_pr_curve(
1661
1710
  ax.legend(loc=legend_loc)
1662
1711
  return ax
1663
1712
 
1713
+
1664
1714
  # * usage: ml2ls.plot_pr_curve()
1665
1715
  # for md_name in flatten(validation_results["pr_curve"].keys()):
1666
1716
  # ml2ls.plot_pr_curve(
@@ -1673,6 +1723,7 @@ def plot_pr_curve(
1673
1723
  # color="r",
1674
1724
  # )
1675
1725
 
1726
+
1676
1727
  def plot_pr_binary(
1677
1728
  recall=None,
1678
1729
  precision=None,
@@ -1689,19 +1740,20 @@ def plot_pr_binary(
1689
1740
  ax=None,
1690
1741
  show_avg_precision=False,
1691
1742
  **kwargs,
1692
- ):
1743
+ ):
1693
1744
  from scipy.interpolate import interp1d
1745
+
1694
1746
  if ax is None:
1695
1747
  fig, ax = plt.subplots(figsize=figsize)
1696
1748
  model_name = "Binary PR Curve" if model_name is None else model_name
1697
1749
 
1698
- #* use sklearn bulitin function 'PrecisionRecallDisplay'?
1750
+ # * use sklearn bulitin function 'PrecisionRecallDisplay'?
1699
1751
  # from sklearn.metrics import PrecisionRecallDisplay
1700
- # disp = PrecisionRecallDisplay(precision=precision,
1701
- # recall=recall,
1752
+ # disp = PrecisionRecallDisplay(precision=precision,
1753
+ # recall=recall,
1702
1754
  # average_precision=avg_precision,**kwargs)
1703
1755
  # disp.plot(ax=ax, name=model_name, color=color)
1704
-
1756
+
1705
1757
  # Plot Precision-Recall curve
1706
1758
  ax.plot(
1707
1759
  recall,
@@ -1729,15 +1781,17 @@ def plot_pr_binary(
1729
1781
  y_vals = f_score * x_vals / (2 * x_vals - f_score)
1730
1782
  y_vals_clipped = np.minimum(y_vals, pr_boundary(x_vals))
1731
1783
  y_vals_clipped = np.clip(y_vals_clipped, 1e-3, None) # Prevent going to zero
1732
- valid = y_vals_clipped < pr_boundary(x_vals)
1733
- valid_ = y_vals_clipped > 1e-3
1734
- valid = valid&valid_
1735
- x_vals = x_vals[valid]
1784
+ valid = y_vals_clipped < pr_boundary(x_vals)
1785
+ valid_ = y_vals_clipped > 1e-3
1786
+ valid = valid & valid_
1787
+ x_vals = x_vals[valid]
1736
1788
  y_vals_clipped = y_vals_clipped[valid]
1737
1789
  if len(x_vals) > 0: # Ensure annotation is placed only if line segment exists
1738
1790
  ax.plot(x_vals, y_vals_clipped, color="gray", alpha=1)
1739
- plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y_vals_clipped[-int(len(y_vals_clipped)*0.35)] + 0.02))
1740
-
1791
+ plt.annotate(
1792
+ f"$f_1={f_score:0.1f}$",
1793
+ xy=(0.8, y_vals_clipped[-int(len(y_vals_clipped) * 0.35)] + 0.02),
1794
+ )
1741
1795
 
1742
1796
  # # Plot the average precision line
1743
1797
  if show_avg_precision:
@@ -1757,11 +1811,12 @@ def plot_pr_binary(
1757
1811
  ax.grid(False)
1758
1812
  ax.legend(loc=legend_loc)
1759
1813
  return ax
1760
-
1814
+
1815
+
1761
1816
  def plot_cm(
1762
1817
  cm,
1763
1818
  labels_name=None,
1764
- thresh=0.8, # for set color
1819
+ thresh=0.8, # for set color
1765
1820
  axis_labels=None,
1766
1821
  cmap="Reds",
1767
1822
  normalize=True,
@@ -2048,7 +2103,7 @@ def predict(
2048
2103
  y_train: pd.Series,
2049
2104
  x_true: pd.DataFrame = None,
2050
2105
  y_true: Optional[pd.Series] = None,
2051
- backward:bool=False, # backward_regression
2106
+ backward: bool = False, # backward_regression
2052
2107
  common_features: set = None,
2053
2108
  purpose: str = "classification", # 'classification' or 'regression'
2054
2109
  cls: Optional[Dict[str, Any]] = None,
@@ -2242,22 +2297,22 @@ def predict(
2242
2297
  x_train = x_train.drop(y_train_col_name, axis=1)
2243
2298
  # else:
2244
2299
  # y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy").values.ravel()
2245
- y_train=pd.DataFrame(y_train)
2300
+ y_train = pd.DataFrame(y_train)
2246
2301
  if y_train.select_dtypes(include=np.number).empty:
2247
- y_train_=ips.df_encoder(y_train, method="dummy",drop=None)
2248
- is_binary = False if y_train_.shape[1] >2 else True
2302
+ y_train_ = ips.df_encoder(y_train, method="dummy", drop=None)
2303
+ is_binary = False if y_train_.shape[1] > 2 else True
2249
2304
  else:
2250
- y_train_=ips.flatten(y_train.values)
2251
- is_binary = False if len(y_train_)>2 else True
2305
+ y_train_ = ips.flatten(y_train.values)
2306
+ is_binary = False if len(y_train_) > 2 else True
2252
2307
 
2253
2308
  if is_binary:
2254
- y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
2255
- print('is_binary:',is_binary)
2309
+ y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
2310
+ print("is_binary:", is_binary)
2256
2311
 
2257
2312
  # Perform backward feature selection
2258
2313
  if backward:
2259
2314
  selected_features = backward_regression(x_train, y_train, threshold_out=0.05)
2260
- x_train=x_train[selected_features]
2315
+ x_train = x_train[selected_features]
2261
2316
 
2262
2317
  if x_true is None:
2263
2318
  x_train, x_true, y_train, y_true = train_test_split(
@@ -2271,23 +2326,31 @@ def predict(
2271
2326
  if isinstance(y_train, str) and y_train in x_train.columns:
2272
2327
  y_train_col_name = y_train
2273
2328
  y_train = x_train[y_train]
2274
- y_train = ips.df_encoder(pd.DataFrame(y_train), method="label") if is_binary else y_train
2329
+ y_train = (
2330
+ ips.df_encoder(pd.DataFrame(y_train), method="label")
2331
+ if is_binary
2332
+ else y_train
2333
+ )
2275
2334
  x_train = x_train.drop(y_train_col_name, axis=1)
2276
2335
  if is_binary:
2277
2336
  y_train = ips.df_encoder(
2278
2337
  pd.DataFrame(y_train), method="label"
2279
- ).values.ravel()
2338
+ ).values.ravel()
2280
2339
 
2281
2340
  if y_true is not None:
2282
2341
  if isinstance(y_true, str) and y_true in x_true.columns:
2283
2342
  y_true_col_name = y_true
2284
2343
  y_true = x_true[y_true]
2285
- y_true = ips.df_encoder(pd.DataFrame(y_true), method="label") if is_binary else y_true
2286
- y_true = pd.DataFrame(y_true)
2344
+ y_true = (
2345
+ ips.df_encoder(pd.DataFrame(y_true), method="label")
2346
+ if is_binary
2347
+ else y_true
2348
+ )
2349
+ y_true = pd.DataFrame(y_true)
2287
2350
  x_true = x_true.drop(y_true_col_name, axis=1)
2288
2351
  if is_binary:
2289
2352
  y_true = ips.df_encoder(pd.DataFrame(y_true), method="label").values.ravel()
2290
- y_true = pd.DataFrame(y_true)
2353
+ y_true = pd.DataFrame(y_true)
2291
2354
 
2292
2355
  # to convert the 2D to 1D: 2D column-vector format (like [[1], [0], [1], ...]) instead of a 1D array ([1, 0, 1, ...]
2293
2356
 
@@ -2295,10 +2358,14 @@ def predict(
2295
2358
  # y_true=y_true.values.ravel() if y_true is not None else None
2296
2359
  if y_train is not None:
2297
2360
  y_train = (
2298
- y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
2361
+ y_train.ravel()
2362
+ if isinstance(y_train, np.ndarray)
2363
+ else y_train.values.ravel()
2299
2364
  )
2300
2365
  if y_true is not None:
2301
- y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
2366
+ y_true = (
2367
+ y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
2368
+ )
2302
2369
  # Ensure common features are selected
2303
2370
  if common_features is not None:
2304
2371
  x_train, x_true = x_train[common_features], x_true[common_features]
@@ -2307,7 +2374,9 @@ def predict(
2307
2374
  x_train, x_true = x_train[share_col_names], x_true[share_col_names]
2308
2375
 
2309
2376
  x_train, x_true = ips.df_scaler(x_train), ips.df_scaler(x_true)
2310
- x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(x_true, method="dummy")
2377
+ x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(
2378
+ x_true, method="dummy"
2379
+ )
2311
2380
  # Handle class imbalance using SMOTE (only for classification)
2312
2381
  if (
2313
2382
  smote
@@ -2320,11 +2389,11 @@ def predict(
2320
2389
  x_train, y_train = smote_sampler.fit_resample(x_train, y_train)
2321
2390
  if not is_binary:
2322
2391
  if isinstance(y_train, np.ndarray):
2323
- y_train = ips.df_encoder(data=pd.DataFrame(y_train),method='label')
2324
- y_train=np.asarray(y_train)
2392
+ y_train = ips.df_encoder(data=pd.DataFrame(y_train), method="label")
2393
+ y_train = np.asarray(y_train)
2325
2394
  if isinstance(y_train, np.ndarray):
2326
- y_true = ips.df_encoder(data=pd.DataFrame(y_true),method='label')
2327
- y_true=np.asarray(y_true)
2395
+ y_true = ips.df_encoder(data=pd.DataFrame(y_true), method="label")
2396
+ y_true = np.asarray(y_true)
2328
2397
  # Hyperparameter grids for tuning
2329
2398
  if cv_level in ["low", "simple", "s", "l"]:
2330
2399
  param_grids = {
@@ -2908,14 +2977,16 @@ def predict(
2908
2977
  clf,
2909
2978
  param_grid=param_grids.get(name, {}),
2910
2979
  scoring=(
2911
- "roc_auc" if purpose == "classification" else "neg_mean_squared_error"
2980
+ "roc_auc"
2981
+ if purpose == "classification"
2982
+ else "neg_mean_squared_error"
2912
2983
  ),
2913
2984
  cv=cv,
2914
2985
  n_jobs=n_jobs,
2915
2986
  verbose=verbose,
2916
2987
  )
2917
2988
 
2918
- gs.fit(x_train, y_train)
2989
+ gs.fit(x_train, y_train)
2919
2990
  best_clf = gs.best_estimator_
2920
2991
  # make sure x_train and x_test has the same name
2921
2992
  x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
@@ -2924,7 +2995,9 @@ def predict(
2924
2995
  y_pred_proba = best_clf.predict_proba(x_true)
2925
2996
  print("Shape of predicted probabilities:", y_pred_proba.shape)
2926
2997
  if y_pred_proba.shape[1] == 1:
2927
- y_pred_proba = np.hstack([1 - y_pred_proba, y_pred_proba]) # Add missing class probabilities
2998
+ y_pred_proba = np.hstack(
2999
+ [1 - y_pred_proba, y_pred_proba]
3000
+ ) # Add missing class probabilities
2928
3001
  y_pred_proba = y_pred_proba[:, 1]
2929
3002
  elif hasattr(best_clf, "decision_function"):
2930
3003
  # If predict_proba is not available, use decision_function (e.g., for SVM)
@@ -2940,7 +3013,9 @@ def predict(
2940
3013
  clf,
2941
3014
  param_grid=param_grids.get(name, {}),
2942
3015
  scoring=(
2943
- "roc_auc_ovr" if purpose == "classification" else "neg_mean_squared_error"
3016
+ "roc_auc_ovr"
3017
+ if purpose == "classification"
3018
+ else "neg_mean_squared_error"
2944
3019
  ),
2945
3020
  cv=cv,
2946
3021
  n_jobs=n_jobs,
@@ -2948,7 +3023,7 @@ def predict(
2948
3023
  )
2949
3024
 
2950
3025
  # Fit GridSearchCV
2951
- gs.fit(x_train, y_train)
3026
+ gs.fit(x_train, y_train)
2952
3027
  best_clf = gs.best_estimator_
2953
3028
 
2954
3029
  # Ensure x_true aligns with x_train columns
@@ -2960,14 +3035,18 @@ def predict(
2960
3035
  y_pred_proba = best_clf.predict_proba(x_true)
2961
3036
  elif hasattr(best_clf, "decision_function"):
2962
3037
  y_pred_proba = best_clf.decision_function(x_true)
2963
-
3038
+
2964
3039
  # Normalize for multiclass if necessary
2965
3040
  if y_pred_proba.ndim == 2:
2966
- y_pred_proba = (y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)) / \
2967
- (y_pred_proba.max(axis=1, keepdims=True) - y_pred_proba.min(axis=1, keepdims=True))
3041
+ y_pred_proba = (
3042
+ y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)
3043
+ ) / (
3044
+ y_pred_proba.max(axis=1, keepdims=True)
3045
+ - y_pred_proba.min(axis=1, keepdims=True)
3046
+ )
2968
3047
  else:
2969
3048
  y_pred_proba = None # No probability output for certain models
2970
-
3049
+
2971
3050
  validation_scores = {}
2972
3051
 
2973
3052
  if y_true is not None and y_pred_proba is not None:
@@ -2985,7 +3064,9 @@ def predict(
2985
3064
  if y_pred_proba is not None:
2986
3065
  # fpr, tpr, roc_auc = dict(), dict(), dict()
2987
3066
  fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
2988
- lower_ci, upper_ci = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
3067
+ lower_ci, upper_ci = cal_auc_ci(
3068
+ y_true, y_pred_proba, verbose=False, is_binary=is_binary
3069
+ )
2989
3070
  roc_auc = auc(fpr, tpr)
2990
3071
  roc_info = {
2991
3072
  "fpr": fpr.tolist(),
@@ -3030,11 +3111,13 @@ def predict(
3030
3111
  y_pred_proba.tolist() if y_pred_proba is not None else None
3031
3112
  ),
3032
3113
  }
3033
- else: # multi-classes
3114
+ else: # multi-classes
3034
3115
  if y_pred_proba is not None:
3035
3116
  # fpr, tpr, roc_auc = dict(), dict(), dict()
3036
3117
  # fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
3037
- confidence_intervals = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
3118
+ confidence_intervals = cal_auc_ci(
3119
+ y_true, y_pred_proba, verbose=False, is_binary=is_binary
3120
+ )
3038
3121
  roc_info = {
3039
3122
  "fpr": validation_scores["fpr"],
3040
3123
  "tpr": validation_scores["tpr"],
@@ -3042,7 +3125,9 @@ def predict(
3042
3125
  "ci95": confidence_intervals,
3043
3126
  }
3044
3127
  # precision-recall curve
3045
- precision_, recall_, avg_precision_ = cal_precision_recall(y_true, y_pred_proba,is_binary=is_binary)
3128
+ precision_, recall_, avg_precision_ = cal_precision_recall(
3129
+ y_true, y_pred_proba, is_binary=is_binary
3130
+ )
3046
3131
  pr_info = {
3047
3132
  "precision": precision_,
3048
3133
  "recall": recall_,
@@ -3080,14 +3165,17 @@ def predict(
3080
3165
  }
3081
3166
 
3082
3167
  else:
3083
- validation_scores = cal_metrics(
3084
- y_true,
3085
- y_pred,
3086
- y_pred_proba=y_pred_proba,
3087
- is_binary=is_binary,
3088
- purpose=purpose,
3089
- average="weighted",
3090
- )
3168
+ if not y_true:
3169
+ validation_scores = []
3170
+ else:
3171
+ validation_scores = cal_metrics(
3172
+ y_true,
3173
+ y_pred,
3174
+ y_pred_proba=y_pred_proba,
3175
+ is_binary=is_binary,
3176
+ purpose=purpose,
3177
+ average="weighted",
3178
+ )
3091
3179
  results[name] = {
3092
3180
  "best_clf": gs.best_estimator_,
3093
3181
  "best_params": gs.best_params_,
@@ -3096,8 +3184,8 @@ def predict(
3096
3184
  "predictions_proba": (
3097
3185
  y_pred_proba.tolist() if y_pred_proba is not None else None
3098
3186
  ),
3099
- "y_train":y_train if y_train is not None else [],
3100
- "y_true": y_true if y_true is not None else []
3187
+ "y_train": y_train if y_train is not None else [],
3188
+ "y_true": y_true if y_true is not None else [],
3101
3189
  }
3102
3190
 
3103
3191
  # Convert results to DataFrame
@@ -3118,8 +3206,8 @@ def predict(
3118
3206
  plot.figsets(xangle=30)
3119
3207
  if dir_save:
3120
3208
  ips.figsave(dir_save + f"scores_sorted_heatmap{now_}.pdf")
3121
-
3122
- df_scores=df_scores.select_dtypes(include=np.number)
3209
+
3210
+ df_scores = df_scores.select_dtypes(include=np.number)
3123
3211
 
3124
3212
  if df_scores.shape[0] > 1: # draw cluster
3125
3213
  plot.heatmap(df_scores, kind="direct", cluster=True)
@@ -3129,7 +3217,7 @@ def predict(
3129
3217
  if all([plot_, y_true is not None, purpose == "classification"]):
3130
3218
  # try:
3131
3219
  if len(models) > 3:
3132
- plot_validate_features(df_results,is_binary=is_binary)
3220
+ plot_validate_features(df_results, is_binary=is_binary)
3133
3221
  else:
3134
3222
  plot_validate_features_single(df_results, is_binary=is_binary)
3135
3223
  if dir_save:
@@ -3140,7 +3228,12 @@ def predict(
3140
3228
 
3141
3229
 
3142
3230
  def cal_metrics(
3143
- y_true, y_pred, y_pred_proba=None, is_binary=True,purpose="regression", average="weighted"
3231
+ y_true,
3232
+ y_pred,
3233
+ y_pred_proba=None,
3234
+ is_binary=True,
3235
+ purpose="regression",
3236
+ average="weighted",
3144
3237
  ):
3145
3238
  """
3146
3239
  Calculate regression or classification metrics based on the purpose.
@@ -3216,33 +3309,38 @@ def cal_metrics(
3216
3309
  tn, fp, fn, tp = cm.ravel()
3217
3310
  else:
3218
3311
  # Handle single-class predictions
3219
- tn, fp, fn, tp = 0, 0, 0, 0
3312
+ tn, fp, fn, tp = 0, 0, 0, 0
3220
3313
  print("Warning: Only one class found in y_pred or y_true.")
3221
3314
 
3222
3315
  # Specificity calculation
3223
- validation_scores["specificity"] = (
3224
- tn / (tn + fp) if (tn + fp) > 0 else 0
3225
- )
3316
+ validation_scores["specificity"] = tn / (tn + fp) if (tn + fp) > 0 else 0
3226
3317
  if y_pred_proba is not None:
3227
3318
  # Calculate ROC-AUC
3228
3319
  validation_scores["roc_auc"] = roc_auc_score(y_true, y_pred_proba)
3229
3320
  # PR-AUC (Precision-Recall AUC) calculation
3230
- validation_scores["pr_auc"] = average_precision_score(y_true, y_pred_proba)
3231
-
3232
- else: # multi-class
3321
+ validation_scores["pr_auc"] = average_precision_score(
3322
+ y_true, y_pred_proba
3323
+ )
3324
+
3325
+ else: # multi-class
3233
3326
  from sklearn.preprocessing import label_binarize
3234
- #* Multi-class ROC calculation
3235
- y_pred_proba = np.asarray(y_pred_proba)
3327
+
3328
+ # * Multi-class ROC calculation
3329
+ y_pred_proba = np.asarray(y_pred_proba)
3236
3330
  classes = np.unique(y_true)
3237
3331
  y_true_bin = label_binarize(y_true, classes=classes)
3238
3332
  if isinstance(y_true, np.ndarray):
3239
- y_true = ips.df_encoder(data=pd.DataFrame(y_true), method='dum',prefix='Label')
3333
+ y_true = ips.df_encoder(
3334
+ data=pd.DataFrame(y_true), method="dum", prefix="Label"
3335
+ )
3240
3336
  # Initialize dictionaries to store FPR, TPR, and AUC for each class
3241
3337
  fpr = dict()
3242
3338
  tpr = dict()
3243
- roc_auc = dict()
3339
+ roc_auc = dict()
3244
3340
  for i, class_label in enumerate(classes):
3245
- fpr[class_label], tpr[class_label], _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
3341
+ fpr[class_label], tpr[class_label], _ = roc_curve(
3342
+ y_true_bin[:, i], y_pred_proba[:, i]
3343
+ )
3246
3344
  roc_auc[class_label] = auc(fpr[class_label], tpr[class_label])
3247
3345
 
3248
3346
  # Store the mean ROC AUC
@@ -3267,6 +3365,7 @@ def cal_metrics(
3267
3365
 
3268
3366
  return validation_scores
3269
3367
 
3368
+
3270
3369
  def plot_trees(
3271
3370
  X, y, cls, max_trees=500, test_size=0.2, random_state=42, early_stopping_rounds=None
3272
3371
  ):
@@ -3303,6 +3402,7 @@ def plot_trees(
3303
3402
  ExtraTreesClassifier,
3304
3403
  )
3305
3404
  from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
3405
+
3306
3406
  # Split data for training and testing error calculation
3307
3407
  x_train, x_test, y_train, y_test = train_test_split(
3308
3408
  X, y, test_size=test_size, random_state=random_state
@@ -3361,7 +3461,9 @@ def plot_trees(
3361
3461
  if validation_error[-early_stopping_rounds:] == sorted(
3362
3462
  validation_error[-early_stopping_rounds:]
3363
3463
  ):
3364
- print(f"Early stopping at tree {i} due to lack of improvement in validation error.")
3464
+ print(
3465
+ f"Early stopping at tree {i} due to lack of improvement in validation error."
3466
+ )
3365
3467
  break
3366
3468
 
3367
3469
  # Plot results
@@ -3407,16 +3509,17 @@ def plot_trees(
3407
3509
  plt.grid(True)
3408
3510
  plt.show()
3409
3511
 
3512
+
3410
3513
  def img_datasets_preprocessing(
3411
3514
  data: pd.DataFrame,
3412
3515
  x_col: str,
3413
- y_col: str=None,
3516
+ y_col: str = None,
3414
3517
  target_size: tuple = (224, 224),
3415
3518
  batch_size: int = 128,
3416
3519
  class_mode: str = "raw",
3417
3520
  shuffle: bool = False,
3418
3521
  augment: bool = False,
3419
- scaler: str = 'normalize', # 'normalize', 'standardize', 'clahe', 'raw'
3522
+ scaler: str = "normalize", # 'normalize', 'standardize', 'clahe', 'raw'
3420
3523
  grayscale: bool = False,
3421
3524
  encoder: str = "label", # Options: 'label', 'onehot', 'binary'
3422
3525
  label_encoder=None,
@@ -3461,16 +3564,29 @@ def img_datasets_preprocessing(
3461
3564
  x_col in data.columns and y_col in data.columns
3462
3565
  ), "Missing required columns in DataFrame."
3463
3566
  if y_col is None:
3464
- class_mode=None
3567
+ class_mode = None
3465
3568
  # 输出格式
3466
- output = ips.strcmp(output,[
3467
- "generator","tf","iterator","transform","transformer","dataframe",
3468
- "df","pd","pandas"])[0]
3469
-
3569
+ output = ips.strcmp(
3570
+ output,
3571
+ [
3572
+ "generator",
3573
+ "tf",
3574
+ "iterator",
3575
+ "transform",
3576
+ "transformer",
3577
+ "dataframe",
3578
+ "df",
3579
+ "pd",
3580
+ "pandas",
3581
+ ],
3582
+ )[0]
3583
+
3470
3584
  # Handle missing file paths
3471
3585
  if drop_missing:
3472
3586
  data = data[
3473
- data[x_col].apply(lambda path: os.path.exists(path) and os.path.isfile(path))
3587
+ data[x_col].apply(
3588
+ lambda path: os.path.exists(path) and os.path.isfile(path)
3589
+ )
3474
3590
  ]
3475
3591
 
3476
3592
  # Encoding labels if necessary
@@ -3502,11 +3618,11 @@ def img_datasets_preprocessing(
3502
3618
  aug_params.update(kws_augmentation)
3503
3619
  dat = ImageDataGenerator(rescale=scaler, **aug_params)
3504
3620
  dat = ImageDataGenerator(
3505
- rescale=1.0 / 255 if scaler == 'normalize' else None, **aug_params)
3621
+ rescale=1.0 / 255 if scaler == "normalize" else None, **aug_params
3622
+ )
3506
3623
 
3507
3624
  else:
3508
- dat = ImageDataGenerator(
3509
- rescale=1.0 / 255 if scaler == 'normalize' else None)
3625
+ dat = ImageDataGenerator(rescale=1.0 / 255 if scaler == "normalize" else None)
3510
3626
 
3511
3627
  # Create DataFrameIterator
3512
3628
  data_iterator = dat.flow_from_dataframe(
@@ -3529,14 +3645,14 @@ def img_datasets_preprocessing(
3529
3645
 
3530
3646
  # Load, resize, and process images in batches
3531
3647
  for i, (batch_images, batch_labels) in enumerate(data_iterator):
3532
- for img, label in zip(batch_images, batch_labels):
3533
- if scaler == ['normalize','raw']:
3648
+ for img, label in zip(batch_images, batch_labels):
3649
+ if scaler == ["normalize", "raw"]:
3534
3650
  # Already rescaled by 1.0/255 in ImageDataGenerator
3535
3651
  pass
3536
- elif scaler == 'standardize':
3652
+ elif scaler == "standardize":
3537
3653
  # Standardize by subtracting mean and dividing by std
3538
3654
  img = (img - np.mean(img)) / np.std(img)
3539
- elif scaler == 'clahe':
3655
+ elif scaler == "clahe":
3540
3656
  # Apply CLAHE to the image
3541
3657
  img = apply_clahe(img)
3542
3658
  flat_img = img.flatten()
@@ -3561,11 +3677,13 @@ def img_datasets_preprocessing(
3561
3677
  return df_img
3562
3678
 
3563
3679
 
3564
- def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_out=0.05, verbose=True):
3680
+ def backward_regression(
3681
+ X: pd.DataFrame, y: pd.Series, initial_list=[], threshold_out=0.05, verbose=True
3682
+ ):
3565
3683
  """
3566
3684
  # awesome bit of code from https://www.kaggle.com/code/adibouayjan/house-price-step-by-step-modeling
3567
-
3568
- Evaluates the p-values of all features, which represent the probability of observing a coefficient
3685
+
3686
+ Evaluates the p-values of all features, which represent the probability of observing a coefficient
3569
3687
  as extreme as the one calculated if the feature had no true effect on the target.
3570
3688
 
3571
3689
  Args:
@@ -3576,9 +3694,10 @@ def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_
3576
3694
  verbose -- true to produce lots of logging output
3577
3695
 
3578
3696
  Returns:
3579
- list of selected features for modeling
3697
+ list of selected features for modeling
3580
3698
  """
3581
3699
  import statsmodels.api as sm
3700
+
3582
3701
  if isinstance(y, str) and y in X.columns:
3583
3702
  y_col_name = y
3584
3703
  y = X[y]
@@ -3600,15 +3719,16 @@ def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_
3600
3719
  break
3601
3720
  print(f"\nSelected Features:\n{included}")
3602
3721
  return included # Returns the list of selected features
3603
-
3722
+
3604
3723
 
3605
3724
  # Function to apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
3606
3725
  def apply_clahe(img):
3607
3726
  import cv2
3727
+
3608
3728
  lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB) # Convert to LAB color space
3609
3729
  l, a, b = cv2.split(lab) # Split into channels
3610
3730
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
3611
3731
  cl = clahe.apply(l) # Apply CLAHE to the L channel
3612
3732
  limg = cv2.merge((cl, a, b)) # Merge back the channels
3613
3733
  img_clahe = cv2.cvtColor(limg, cv2.COLOR_LAB2RGB) # Convert back to RGB
3614
- return img_clahe
3734
+ return img_clahe
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.2.4.18
3
+ Version: 0.2.4.20
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
@@ -236,7 +236,7 @@ py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
236
236
  py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
237
237
  py2ls/ips.py,sha256=2TWuOSFquwhmPdxkmmvU_pcIbE5M0S9aRPtuQgs5B7A,297706
238
238
  py2ls/ml2ls copy.py,sha256=iZJrFLIrdfTieAY2BDsxQFTm29smwnJh0aC4hRB9VGM,113314
239
- py2ls/ml2ls.py,sha256=Mkf374TLsCdBVYtSYptFzegn8euda33TA-M73nGtzV0,144368
239
+ py2ls/ml2ls.py,sha256=wvQkhcOsBiysgfaRmeT2KAR5C8uFOaX3HeyDA2Oy1LI,146065
240
240
  py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
241
241
  py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
242
242
  py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
@@ -246,6 +246,6 @@ py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso
246
246
  py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
247
247
  py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
248
248
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
249
- py2ls-0.2.4.18.dist-info/METADATA,sha256=q3L5q5BJ3olDRMxjimPsTEEE79pqmLwh93bxgsevPNU,20078
250
- py2ls-0.2.4.18.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
251
- py2ls-0.2.4.18.dist-info/RECORD,,
249
+ py2ls-0.2.4.20.dist-info/METADATA,sha256=iCOFX-A3J17xwkEB2UdDpS5A7kQuRcBCJVq5x8BtqPg,20078
250
+ py2ls-0.2.4.20.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
251
+ py2ls-0.2.4.20.dist-info/RECORD,,