PyPI - py2ls - Versions diffs - 0.2.4.18__py3-none-any.whl → 0.2.4.20__py3-none-any.whl - Mend

py2ls 0.2.4.18py3-none-any.whl → 0.2.4.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

py2ls/ml2ls.py CHANGED Viewed

@@ -702,7 +702,7 @@ def get_features(
         "AdaBoost",
     ]
     cls = [ips.strcmp(i, cls_)[0] for i in cls]
     feature_importances = {}
     # Lasso Feature Selection
@@ -714,7 +714,7 @@ def get_features(
     lasso_selected_features = (
         lasso_importances.head(n_features)["feature"].values if "lasso" in cls else []
     )
-    feature_importances['lasso']=lasso_importances.head(n_features)
+    feature_importances["lasso"] = lasso_importances.head(n_features)
     # Ridge
     ridge_importances = (
         features_ridge(x_train, y_train, ridge_params)
@@ -724,7 +724,7 @@ def get_features(
     selected_ridge_features = (
         ridge_importances.head(n_features)["feature"].values if "ridge" in cls else []
     )
-    feature_importances['ridge']=ridge_importances.head(n_features)
+    feature_importances["ridge"] = ridge_importances.head(n_features)
     # Elastic Net
     enet_importances = (
         features_enet(x_train, y_train, enet_params)
@@ -734,7 +734,7 @@ def get_features(
     selected_enet_features = (
         enet_importances.head(n_features)["feature"].values if "Enet" in cls else []
     )
-    feature_importances['Enet']=enet_importances.head(n_features)
+    feature_importances["Enet"] = enet_importances.head(n_features)
     # Random Forest Feature Importance
     rf_importances = (
         features_rf(x_train, y_train, rf_params)
@@ -746,7 +746,7 @@ def get_features(
         if "Random Forest" in cls
         else []
     )
-    feature_importances['Random Forest']=rf_importances.head(n_features)
+    feature_importances["Random Forest"] = rf_importances.head(n_features)
     # Gradient Boosting Feature Importance
     gb_importances = (
         features_gradient_boosting(x_train, y_train, gb_params)
@@ -758,7 +758,7 @@ def get_features(
         if "Gradient Boosting" in cls
         else []
     )
-    feature_importances['Gradient Boosting']=gb_importances.head(n_features)
+    feature_importances["Gradient Boosting"] = gb_importances.head(n_features)
     # xgb
     xgb_importances = (
         features_xgb(x_train, y_train, xgb_params) if "xgb" in cls else pd.DataFrame()
@@ -766,7 +766,7 @@ def get_features(
     top_xgb_features = (
         xgb_importances.head(n_features)["feature"].values if "xgb" in cls else []
     )
-    feature_importances['xgb']=xgb_importances.head(n_features)
+    feature_importances["xgb"] = xgb_importances.head(n_features)
     # SVM with RFE
     selected_svm_features = (
@@ -781,7 +781,7 @@ def get_features(
     selected_lda_features = (
         lda_importances.head(n_features)["feature"].values if "lda" in cls else []
     )
-    feature_importances['lda']=lda_importances.head(n_features)
+    feature_importances["lda"] = lda_importances.head(n_features)
     # AdaBoost Feature Importance
     adaboost_importances = (
         features_adaboost(x_train, y_train, adaboost_params)
@@ -793,7 +793,7 @@ def get_features(
         if "AdaBoost" in cls
         else []
     )
-    feature_importances['AdaBoost']=adaboost_importances.head(n_features)
+    feature_importances["AdaBoost"] = adaboost_importances.head(n_features)
     # Decision Tree Feature Importance
     dt_importances = (
         features_decision_tree(x_train, y_train, dt_params)
@@ -804,8 +804,8 @@ def get_features(
         dt_importances.head(n_features)["feature"].values
         if "Decision Tree" in cls
         else []
-    )
-    feature_importances['Decision Tree']=dt_importances.head(n_features)
+    )
+    feature_importances["Decision Tree"] = dt_importances.head(n_features)
     # Bagging Feature Importance
     bagging_importances = (
         features_bagging(x_train, y_train, bagging_params)
@@ -817,7 +817,7 @@ def get_features(
         if "Bagging" in cls
         else []
     )
-    feature_importances['Bagging']=bagging_importances.head(n_features)
+    feature_importances["Bagging"] = bagging_importances.head(n_features)
     # KNN Feature Importance via Permutation
     knn_importances = (
         features_knn(x_train, y_train, knn_params) if "KNN" in cls else pd.DataFrame()
@@ -825,7 +825,7 @@ def get_features(
     top_knn_features = (
         knn_importances.head(n_features)["feature"].values if "KNN" in cls else []
     )
-    feature_importances['KNN']=knn_importances.head(n_features)
+    feature_importances["KNN"] = knn_importances.head(n_features)
     #! Find common features
     common_features = ips.shared(
@@ -928,7 +928,7 @@ def get_features(
             "cv_train_scores": cv_train_results_df,
             "cv_test_scores": rank_models(cv_test_results_df, plot_=plot_),
             "common_features": list(common_features),
-            "feature_importances":feature_importances
+            "feature_importances": feature_importances,
         }
         if all([plot_, dir_save]):
             from datetime import datetime
@@ -941,7 +941,7 @@ def get_features(
             "cv_train_scores": pd.DataFrame(),
             "cv_test_scores": pd.DataFrame(),
             "common_features": [],
-            "feature_importances":{}
+            "feature_importances": {},
         }
         print(f"Warning: 没有找到共同的genes, when n_shared={n_shared}")
     return results
@@ -1232,7 +1232,7 @@ def validate_features(
 # # If you want to access validation scores
 # print(validation_results)
-def plot_validate_features(res_val,is_binary=True,figsize=None):
+def plot_validate_features(res_val, is_binary=True, figsize=None):
     """
     plot the results of 'validate_features()'
     """
@@ -1295,26 +1295,28 @@ def plot_validate_features(res_val,is_binary=True,figsize=None):
             )
         plot.figsets(
             sp=2,
-            legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
+            legend=dict(
+                loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]
+            ),
         )
         # plot.split_legend(ax,n=2, loc=["upper left", "lower left"],bbox=[[1,0.5],[1,0.5]],ncols=2,labelcolor="k",fontsize=8)
     else:
         colors = plot.get_color(len(ips.flatten(res_val["pr_curve"].index)))
-        modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
-        classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
+        modname_tmp = ips.flatten(res_val["roc_curve"].index)[0]
+        classes = list(res_val["roc_curve"][modname_tmp]["fpr"].keys())
         if res_val.shape[0] > 5:
             alpha = 0
-            figsize = [8, 8*2*(len(classes))]  if figsize is None else figsize
+            figsize = [8, 8 * 2 * (len(classes))] if figsize is None else figsize
             subplot_layout = [1, 2]
             ncols = 2
             bbox_to_anchor = [1.5, 0.6]
         else:
             alpha = 0.03
-            figsize = [10, 6*(len(classes))] if figsize is None else figsize
+            figsize = [10, 6 * (len(classes))] if figsize is None else figsize
             subplot_layout = [1, 1]
             ncols = 1
             bbox_to_anchor = [1, 1]
-        nexttile = plot.subplot(2*(len(classes)),2,figsize=figsize)
+        nexttile = plot.subplot(2 * (len(classes)), 2, figsize=figsize)
         for iclass, class_ in enumerate(classes):
             ax = nexttile(subplot_layout[0], subplot_layout[1])
             for i, model_name in enumerate(ips.flatten(res_val["pr_curve"].index)):
@@ -1352,7 +1354,9 @@ def plot_validate_features(res_val,is_binary=True,figsize=None):
                 plot_pr_curve(
                     recall=res_val["pr_curve"][model_name]["recall"][iclass],
                     precision=res_val["pr_curve"][model_name]["precision"][iclass],
-                    avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
+                    avg_precision=res_val["pr_curve"][model_name]["avg_precision"][
+                        iclass
+                    ],
                     model_name=model_name,
                     color=colors[i],
                     lw=1.5,
@@ -1362,13 +1366,20 @@ def plot_validate_features(res_val,is_binary=True,figsize=None):
             plot.figsets(
                 sp=2,
                 title=class_,
-                legend=dict(loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]),
+                legend=dict(
+                    loc="upper right", ncols=1, fontsize=8, bbox_to_anchor=[1.5, 0.5]
+                ),
             )
-def plot_validate_features_single(res_val, figsize=None,is_binary=True):
+def plot_validate_features_single(res_val, figsize=None, is_binary=True):
     if is_binary:
         if figsize is None:
-            nexttile = plot.subplot(len(ips.flatten(res_val["pr_curve"].index)), 3,figsize=[13,4*len(ips.flatten(res_val["pr_curve"].index))])
+            nexttile = plot.subplot(
+                len(ips.flatten(res_val["pr_curve"].index)),
+                3,
+                figsize=[13, 4 * len(ips.flatten(res_val["pr_curve"].index))],
+            )
         else:
             nexttile = plot.subplot(
                 len(ips.flatten(res_val["pr_curve"].index)), 3, figsize=figsize
@@ -1380,8 +1391,15 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
             mean_auc = res_val["roc_curve"][model_name]["auc"]
             # Plotting
-            plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci,
-                model_name=model_name, ax=nexttile())
+            plot_roc_curve(
+                fpr,
+                tpr,
+                mean_auc,
+                lower_ci,
+                upper_ci,
+                model_name=model_name,
+                ax=nexttile(),
+            )
             plot.figsets(title=model_name, sp=2)
             plot_pr_binary(
@@ -1394,14 +1412,18 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
             plot.figsets(title=model_name, sp=2)
             # plot cm
-            plot_cm(res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False)
+            plot_cm(
+                res_val["confusion_matrix"][model_name], ax=nexttile(), normalize=False
+            )
             plot.figsets(title=model_name, sp=2)
     else:
-        modname_tmp=ips.flatten(res_val["roc_curve"].index)[0]
-        classes=list(res_val["roc_curve"][modname_tmp]['fpr'].keys())
+        modname_tmp = ips.flatten(res_val["roc_curve"].index)[0]
+        classes = list(res_val["roc_curve"][modname_tmp]["fpr"].keys())
         if figsize is None:
-            nexttile = plot.subplot(len(modname_tmp), 3,figsize=[15,len(modname_tmp)*5])
+            nexttile = plot.subplot(
+                len(modname_tmp), 3, figsize=[15, len(modname_tmp) * 5]
+            )
         else:
             nexttile = plot.subplot(len(modname_tmp), 3, figsize=figsize)
         colors = plot.get_color(len(classes))
@@ -1429,16 +1451,18 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
                 title=model_name,
                 legend=dict(
                     loc="best",
-                    fontsize=8,
+                    fontsize=8,
                 ),
-            )
+            )
             ax = nexttile()
             for iclass, class_ in enumerate(classes):
                 plot_pr_curve(
                     recall=res_val["pr_curve"][model_name]["recall"][iclass],
                     precision=res_val["pr_curve"][model_name]["precision"][iclass],
-                    avg_precision=res_val["pr_curve"][model_name]["avg_precision"][iclass],
+                    avg_precision=res_val["pr_curve"][model_name]["avg_precision"][
+                        iclass
+                    ],
                     model_name=class_,
                     color=colors[iclass],
                     lw=1.5,
@@ -1450,17 +1474,21 @@ def plot_validate_features_single(res_val, figsize=None,is_binary=True):
                 title=class_,
                 legend=dict(loc="best", fontsize=8),
             )
-            plot_cm(res_val["confusion_matrix"][model_name],labels_name=classes, ax=nexttile(), normalize=False)
+            plot_cm(
+                res_val["confusion_matrix"][model_name],
+                labels_name=classes,
+                ax=nexttile(),
+                normalize=False,
+            )
             plot.figsets(title=model_name, sp=2)
-def cal_precision_recall(
-    y_true, y_pred_proba, is_binary=True):
+def cal_precision_recall(y_true, y_pred_proba, is_binary=True):
     if is_binary:
         precision_, recall_, _ = precision_recall_curve(y_true, y_pred_proba)
         avg_precision_ = average_precision_score(y_true, y_pred_proba)
-        return precision_, recall_,avg_precision_
+        return precision_, recall_, avg_precision_
     else:
         n_classes = y_pred_proba.shape[1]  # Number of classes
         precision_ = []
@@ -1469,7 +1497,9 @@ def cal_precision_recall(
         # One-vs-rest approach for multi-class precision-recall curve
         for class_idx in range(n_classes):
             precision, recall, _ = precision_recall_curve(
-                (y_true == class_idx).astype(int),  # Binarize true labels for the current class
+                (y_true == class_idx).astype(
+                    int
+                ),  # Binarize true labels for the current class
                 y_pred_proba[:, class_idx],  # Probabilities for the current class
             )
@@ -1479,14 +1509,23 @@ def cal_precision_recall(
         avg_precision_ = []
         for class_idx in range(n_classes):
             avg_precision = average_precision_score(
-                (y_true == class_idx).astype(int),  # Binarize true labels for the current class
+                (y_true == class_idx).astype(
+                    int
+                ),  # Binarize true labels for the current class
                 y_pred_proba[:, class_idx],  # Probabilities for the current class
             )
             avg_precision_.append(avg_precision)
-        return precision_, recall_,avg_precision_
+        return precision_, recall_, avg_precision_
 def cal_auc_ci(
-    y_true, y_pred, n_bootstraps=1000, ci=0.95, random_state=1,is_binary=True, verbose=True
+    y_true,
+    y_pred,
+    n_bootstraps=1000,
+    ci=0.95,
+    random_state=1,
+    is_binary=True,
+    verbose=True,
 ):
     if is_binary:
         y_true = np.asarray(y_true)
@@ -1525,15 +1564,20 @@ def cal_auc_ci(
         return confidence_lower, confidence_upper
     else:
         from sklearn.preprocessing import label_binarize
         # Multi-class classification case
         y_true = np.asarray(y_true)
         y_pred = np.asarray(y_pred)
         # Binarize the multi-class labels for OvR computation
-        y_true_bin = label_binarize(y_true, classes=np.unique(y_true))  # One-vs-Rest transformation
+        y_true_bin = label_binarize(
+            y_true, classes=np.unique(y_true)
+        )  # One-vs-Rest transformation
         n_classes = y_true_bin.shape[1]  # Number of classes
-        bootstrapped_scores = np.zeros((n_classes, n_bootstraps))  # Store scores for each class
+        bootstrapped_scores = np.zeros(
+            (n_classes, n_bootstraps)
+        )  # Store scores for each class
         if verbose:
             print("AUROC scores for each class:")
@@ -1546,7 +1590,9 @@ def cal_auc_ci(
             for class_idx in range(n_classes):
                 if len(np.unique(y_true_bin[indices, class_idx])) < 2:
                     continue  # Reject if the class doesn't have both positive and negative samples
-                score = roc_auc_score(y_true_bin[indices, class_idx], y_pred[indices, class_idx])
+                score = roc_auc_score(
+                    y_true_bin[indices, class_idx], y_pred[indices, class_idx]
+                )
                 bootstrapped_scores[class_idx, i] = score
         # Calculating the confidence intervals for each class
@@ -1558,8 +1604,10 @@ def cal_auc_ci(
             confidence_intervals.append((confidence_lower, confidence_upper))
             if verbose:
-                print(f"Class {class_idx} - Confidence interval: [{confidence_lower:.3f} - {confidence_upper:.3f}]")
+                print(
+                    f"Class {class_idx} - Confidence interval: [{confidence_lower:.3f} - {confidence_upper:.3f}]"
+                )
         return confidence_intervals
@@ -1619,6 +1667,7 @@ def plot_roc_curve(
 #     ml2ls.plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci)
 #     figsets(title=model_name)
 def plot_pr_curve(
     recall=None,
     precision=None,
@@ -1661,6 +1710,7 @@ def plot_pr_curve(
     ax.legend(loc=legend_loc)
     return ax
 # * usage: ml2ls.plot_pr_curve()
 # for md_name in flatten(validation_results["pr_curve"].keys()):
 #     ml2ls.plot_pr_curve(
@@ -1673,6 +1723,7 @@ def plot_pr_curve(
 #         color="r",
 #     )
 def plot_pr_binary(
     recall=None,
     precision=None,
@@ -1689,19 +1740,20 @@ def plot_pr_binary(
     ax=None,
     show_avg_precision=False,
     **kwargs,
-    ):
+):
     from scipy.interpolate import interp1d
     if ax is None:
         fig, ax = plt.subplots(figsize=figsize)
     model_name = "Binary PR Curve" if model_name is None else model_name
-    #* use sklearn bulitin function 'PrecisionRecallDisplay'?
+    # * use sklearn bulitin function 'PrecisionRecallDisplay'?
     # from sklearn.metrics import PrecisionRecallDisplay
-    # disp = PrecisionRecallDisplay(precision=precision,
-    #                               recall=recall,
+    # disp = PrecisionRecallDisplay(precision=precision,
+    #                               recall=recall,
     #                               average_precision=avg_precision,**kwargs)
     # disp.plot(ax=ax, name=model_name, color=color)
     # Plot Precision-Recall curve
     ax.plot(
         recall,
@@ -1729,15 +1781,17 @@ def plot_pr_binary(
         y_vals = f_score * x_vals / (2 * x_vals - f_score)
         y_vals_clipped = np.minimum(y_vals, pr_boundary(x_vals))
         y_vals_clipped = np.clip(y_vals_clipped, 1e-3, None)  # Prevent going to zero
-        valid =  y_vals_clipped < pr_boundary(x_vals)
-        valid_ = y_vals_clipped > 1e-3
-        valid = valid&valid_
-        x_vals = x_vals[valid]
+        valid = y_vals_clipped < pr_boundary(x_vals)
+        valid_ = y_vals_clipped > 1e-3
+        valid = valid & valid_
+        x_vals = x_vals[valid]
         y_vals_clipped = y_vals_clipped[valid]
         if len(x_vals) > 0:  # Ensure annotation is placed only if line segment exists
             ax.plot(x_vals, y_vals_clipped, color="gray", alpha=1)
-            plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y_vals_clipped[-int(len(y_vals_clipped)*0.35)] + 0.02))
+            plt.annotate(
+                f"$f_1={f_score:0.1f}$",
+                xy=(0.8, y_vals_clipped[-int(len(y_vals_clipped) * 0.35)] + 0.02),
+            )
     # # Plot the average precision line
     if show_avg_precision:
@@ -1757,11 +1811,12 @@ def plot_pr_binary(
     ax.grid(False)
     ax.legend(loc=legend_loc)
     return ax
 def plot_cm(
     cm,
     labels_name=None,
-    thresh=0.8, # for set color
+    thresh=0.8,  # for set color
     axis_labels=None,
     cmap="Reds",
     normalize=True,
@@ -2048,7 +2103,7 @@ def predict(
     y_train: pd.Series,
     x_true: pd.DataFrame = None,
     y_true: Optional[pd.Series] = None,
-    backward:bool=False, # backward_regression
+    backward: bool = False,  # backward_regression
     common_features: set = None,
     purpose: str = "classification",  # 'classification' or 'regression'
     cls: Optional[Dict[str, Any]] = None,
@@ -2242,22 +2297,22 @@ def predict(
         x_train = x_train.drop(y_train_col_name, axis=1)
     # else:
     #     y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy").values.ravel()
-    y_train=pd.DataFrame(y_train)
+    y_train = pd.DataFrame(y_train)
     if y_train.select_dtypes(include=np.number).empty:
-        y_train_=ips.df_encoder(y_train, method="dummy",drop=None)
-        is_binary = False if y_train_.shape[1] >2 else True
+        y_train_ = ips.df_encoder(y_train, method="dummy", drop=None)
+        is_binary = False if y_train_.shape[1] > 2 else True
     else:
-        y_train_=ips.flatten(y_train.values)
-        is_binary = False if len(y_train_)>2 else True
+        y_train_ = ips.flatten(y_train.values)
+        is_binary = False if len(y_train_) > 2 else True
     if is_binary:
-        y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
-    print('is_binary:',is_binary)
+        y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
+    print("is_binary:", is_binary)
     # Perform backward feature selection
     if backward:
         selected_features = backward_regression(x_train, y_train, threshold_out=0.05)
-        x_train=x_train[selected_features]
+        x_train = x_train[selected_features]
     if x_true is None:
         x_train, x_true, y_train, y_true = train_test_split(
@@ -2271,23 +2326,31 @@ def predict(
         if isinstance(y_train, str) and y_train in x_train.columns:
             y_train_col_name = y_train
             y_train = x_train[y_train]
-            y_train = ips.df_encoder(pd.DataFrame(y_train), method="label") if is_binary else y_train
+            y_train = (
+                ips.df_encoder(pd.DataFrame(y_train), method="label")
+                if is_binary
+                else y_train
+            )
             x_train = x_train.drop(y_train_col_name, axis=1)
         if is_binary:
             y_train = ips.df_encoder(
                 pd.DataFrame(y_train), method="label"
-            ).values.ravel()
+            ).values.ravel()
     if y_true is not None:
         if isinstance(y_true, str) and y_true in x_true.columns:
             y_true_col_name = y_true
             y_true = x_true[y_true]
-            y_true = ips.df_encoder(pd.DataFrame(y_true), method="label") if is_binary else y_true
-            y_true =  pd.DataFrame(y_true)
+            y_true = (
+                ips.df_encoder(pd.DataFrame(y_true), method="label")
+                if is_binary
+                else y_true
+            )
+            y_true = pd.DataFrame(y_true)
             x_true = x_true.drop(y_true_col_name, axis=1)
         if is_binary:
             y_true = ips.df_encoder(pd.DataFrame(y_true), method="label").values.ravel()
-            y_true =  pd.DataFrame(y_true)
+            y_true = pd.DataFrame(y_true)
     # to convert the 2D to 1D: 2D column-vector format (like [[1], [0], [1], ...]) instead of a 1D array ([1, 0, 1, ...]
@@ -2295,10 +2358,14 @@ def predict(
     # y_true=y_true.values.ravel() if y_true is not None else None
     if y_train is not None:
         y_train = (
-            y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
+            y_train.ravel()
+            if isinstance(y_train, np.ndarray)
+            else y_train.values.ravel()
         )
     if y_true is not None:
-        y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
+        y_true = (
+            y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
+        )
     # Ensure common features are selected
     if common_features is not None:
         x_train, x_true = x_train[common_features], x_true[common_features]
@@ -2307,7 +2374,9 @@ def predict(
         x_train, x_true = x_train[share_col_names], x_true[share_col_names]
     x_train, x_true = ips.df_scaler(x_train), ips.df_scaler(x_true)
-    x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(x_true, method="dummy")
+    x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(
+        x_true, method="dummy"
+    )
     # Handle class imbalance using SMOTE (only for classification)
     if (
         smote
@@ -2320,11 +2389,11 @@ def predict(
         x_train, y_train = smote_sampler.fit_resample(x_train, y_train)
     if not is_binary:
         if isinstance(y_train, np.ndarray):
-            y_train = ips.df_encoder(data=pd.DataFrame(y_train),method='label')
-            y_train=np.asarray(y_train)
+            y_train = ips.df_encoder(data=pd.DataFrame(y_train), method="label")
+            y_train = np.asarray(y_train)
         if isinstance(y_train, np.ndarray):
-            y_true = ips.df_encoder(data=pd.DataFrame(y_true),method='label')
-            y_true=np.asarray(y_true)
+            y_true = ips.df_encoder(data=pd.DataFrame(y_true), method="label")
+            y_true = np.asarray(y_true)
     # Hyperparameter grids for tuning
     if cv_level in ["low", "simple", "s", "l"]:
         param_grids = {
@@ -2908,14 +2977,16 @@ def predict(
                 clf,
                 param_grid=param_grids.get(name, {}),
                 scoring=(
-                    "roc_auc" if purpose == "classification" else "neg_mean_squared_error"
+                    "roc_auc"
+                    if purpose == "classification"
+                    else "neg_mean_squared_error"
                 ),
                 cv=cv,
                 n_jobs=n_jobs,
                 verbose=verbose,
             )
-            gs.fit(x_train, y_train)
+            gs.fit(x_train, y_train)
             best_clf = gs.best_estimator_
             # make sure x_train and x_test has the same name
             x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
@@ -2924,7 +2995,9 @@ def predict(
                 y_pred_proba = best_clf.predict_proba(x_true)
                 print("Shape of predicted probabilities:", y_pred_proba.shape)
                 if y_pred_proba.shape[1] == 1:
-                    y_pred_proba = np.hstack([1 - y_pred_proba, y_pred_proba])  # Add missing class probabilities
+                    y_pred_proba = np.hstack(
+                        [1 - y_pred_proba, y_pred_proba]
+                    )  # Add missing class probabilities
                 y_pred_proba = y_pred_proba[:, 1]
             elif hasattr(best_clf, "decision_function"):
                 # If predict_proba is not available, use decision_function (e.g., for SVM)
@@ -2940,7 +3013,9 @@ def predict(
                 clf,
                 param_grid=param_grids.get(name, {}),
                 scoring=(
-                    "roc_auc_ovr" if purpose == "classification" else "neg_mean_squared_error"
+                    "roc_auc_ovr"
+                    if purpose == "classification"
+                    else "neg_mean_squared_error"
                 ),
                 cv=cv,
                 n_jobs=n_jobs,
@@ -2948,7 +3023,7 @@ def predict(
             )
             # Fit GridSearchCV
-            gs.fit(x_train, y_train)
+            gs.fit(x_train, y_train)
             best_clf = gs.best_estimator_
             # Ensure x_true aligns with x_train columns
@@ -2960,14 +3035,18 @@ def predict(
                 y_pred_proba = best_clf.predict_proba(x_true)
             elif hasattr(best_clf, "decision_function"):
                 y_pred_proba = best_clf.decision_function(x_true)
                 # Normalize for multiclass if necessary
                 if y_pred_proba.ndim == 2:
-                    y_pred_proba = (y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)) / \
-                                (y_pred_proba.max(axis=1, keepdims=True) - y_pred_proba.min(axis=1, keepdims=True))
+                    y_pred_proba = (
+                        y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)
+                    ) / (
+                        y_pred_proba.max(axis=1, keepdims=True)
+                        - y_pred_proba.min(axis=1, keepdims=True)
+                    )
             else:
                 y_pred_proba = None  # No probability output for certain models
         validation_scores = {}
         if y_true is not None and y_pred_proba is not None:
@@ -2985,7 +3064,9 @@ def predict(
                 if y_pred_proba is not None:
                     # fpr, tpr, roc_auc = dict(), dict(), dict()
                     fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
-                    lower_ci, upper_ci = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
+                    lower_ci, upper_ci = cal_auc_ci(
+                        y_true, y_pred_proba, verbose=False, is_binary=is_binary
+                    )
                     roc_auc = auc(fpr, tpr)
                     roc_info = {
                         "fpr": fpr.tolist(),
@@ -3030,11 +3111,13 @@ def predict(
                             y_pred_proba.tolist() if y_pred_proba is not None else None
                         ),
                     }
-            else: # multi-classes
+            else:  # multi-classes
                 if y_pred_proba is not None:
                     # fpr, tpr, roc_auc = dict(), dict(), dict()
                     # fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
-                    confidence_intervals = cal_auc_ci(y_true, y_pred_proba, verbose=False,is_binary=is_binary)
+                    confidence_intervals = cal_auc_ci(
+                        y_true, y_pred_proba, verbose=False, is_binary=is_binary
+                    )
                     roc_info = {
                         "fpr": validation_scores["fpr"],
                         "tpr": validation_scores["tpr"],
@@ -3042,7 +3125,9 @@ def predict(
                         "ci95": confidence_intervals,
                     }
                     # precision-recall curve
-                    precision_, recall_, avg_precision_ = cal_precision_recall(y_true, y_pred_proba,is_binary=is_binary)
+                    precision_, recall_, avg_precision_ = cal_precision_recall(
+                        y_true, y_pred_proba, is_binary=is_binary
+                    )
                     pr_info = {
                         "precision": precision_,
                         "recall": recall_,
@@ -3080,14 +3165,17 @@ def predict(
                     }
         else:
-            validation_scores = cal_metrics(
-                y_true,
-                y_pred,
-                y_pred_proba=y_pred_proba,
-                is_binary=is_binary,
-                purpose=purpose,
-                average="weighted",
-            )
+            if not y_true:
+                validation_scores = []
+            else:
+                validation_scores = cal_metrics(
+                    y_true,
+                    y_pred,
+                    y_pred_proba=y_pred_proba,
+                    is_binary=is_binary,
+                    purpose=purpose,
+                    average="weighted",
+                )
             results[name] = {
                 "best_clf": gs.best_estimator_,
                 "best_params": gs.best_params_,
@@ -3096,8 +3184,8 @@ def predict(
                 "predictions_proba": (
                     y_pred_proba.tolist() if y_pred_proba is not None else None
                 ),
-                "y_train":y_train if y_train is not None else [],
-                "y_true": y_true if y_true is not None else []
+                "y_train": y_train if y_train is not None else [],
+                "y_true": y_true if y_true is not None else [],
             }
     # Convert results to DataFrame
@@ -3118,8 +3206,8 @@ def predict(
             plot.figsets(xangle=30)
             if dir_save:
                 ips.figsave(dir_save + f"scores_sorted_heatmap{now_}.pdf")
-            df_scores=df_scores.select_dtypes(include=np.number)
+            df_scores = df_scores.select_dtypes(include=np.number)
             if df_scores.shape[0] > 1:  # draw cluster
                 plot.heatmap(df_scores, kind="direct", cluster=True)
@@ -3129,7 +3217,7 @@ def predict(
     if all([plot_, y_true is not None, purpose == "classification"]):
         # try:
         if len(models) > 3:
-            plot_validate_features(df_results,is_binary=is_binary)
+            plot_validate_features(df_results, is_binary=is_binary)
         else:
             plot_validate_features_single(df_results, is_binary=is_binary)
         if dir_save:
@@ -3140,7 +3228,12 @@ def predict(
 def cal_metrics(
-    y_true, y_pred, y_pred_proba=None, is_binary=True,purpose="regression", average="weighted"
+    y_true,
+    y_pred,
+    y_pred_proba=None,
+    is_binary=True,
+    purpose="regression",
+    average="weighted",
 ):
     """
     Calculate regression or classification metrics based on the purpose.
@@ -3216,33 +3309,38 @@ def cal_metrics(
                 tn, fp, fn, tp = cm.ravel()
             else:
                 # Handle single-class predictions
-                tn, fp, fn, tp = 0, 0, 0, 0
+                tn, fp, fn, tp = 0, 0, 0, 0
                 print("Warning: Only one class found in y_pred or y_true.")
             # Specificity calculation
-            validation_scores["specificity"] = (
-                tn / (tn + fp) if (tn + fp) > 0 else 0
-            )
+            validation_scores["specificity"] = tn / (tn + fp) if (tn + fp) > 0 else 0
             if y_pred_proba is not None:
                 # Calculate ROC-AUC
                 validation_scores["roc_auc"] = roc_auc_score(y_true, y_pred_proba)
                 # PR-AUC (Precision-Recall AUC) calculation
-                validation_scores["pr_auc"] = average_precision_score(y_true, y_pred_proba)
-        else: # multi-class
+                validation_scores["pr_auc"] = average_precision_score(
+                    y_true, y_pred_proba
+                )
+        else:  # multi-class
             from sklearn.preprocessing import label_binarize
-            #* Multi-class ROC calculation
-            y_pred_proba = np.asarray(y_pred_proba)
+            # * Multi-class ROC calculation
+            y_pred_proba = np.asarray(y_pred_proba)
             classes = np.unique(y_true)
             y_true_bin = label_binarize(y_true, classes=classes)
             if isinstance(y_true, np.ndarray):
-                y_true = ips.df_encoder(data=pd.DataFrame(y_true), method='dum',prefix='Label')
+                y_true = ips.df_encoder(
+                    data=pd.DataFrame(y_true), method="dum", prefix="Label"
+                )
             # Initialize dictionaries to store FPR, TPR, and AUC for each class
             fpr = dict()
             tpr = dict()
-            roc_auc = dict()
+            roc_auc = dict()
             for i, class_label in enumerate(classes):
-                fpr[class_label], tpr[class_label], _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
+                fpr[class_label], tpr[class_label], _ = roc_curve(
+                    y_true_bin[:, i], y_pred_proba[:, i]
+                )
                 roc_auc[class_label] = auc(fpr[class_label], tpr[class_label])
             # Store the mean ROC AUC
@@ -3267,6 +3365,7 @@ def cal_metrics(
     return validation_scores
 def plot_trees(
     X, y, cls, max_trees=500, test_size=0.2, random_state=42, early_stopping_rounds=None
 ):
@@ -3303,6 +3402,7 @@ def plot_trees(
         ExtraTreesClassifier,
     )
     from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
     # Split data for training and testing error calculation
     x_train, x_test, y_train, y_test = train_test_split(
         X, y, test_size=test_size, random_state=random_state
@@ -3361,7 +3461,9 @@ def plot_trees(
                 if validation_error[-early_stopping_rounds:] == sorted(
                     validation_error[-early_stopping_rounds:]
                 ):
-                    print(f"Early stopping at tree {i} due to lack of improvement in validation error.")
+                    print(
+                        f"Early stopping at tree {i} due to lack of improvement in validation error."
+                    )
                     break
     # Plot results
@@ -3407,16 +3509,17 @@ def plot_trees(
     plt.grid(True)
     plt.show()
 def img_datasets_preprocessing(
     data: pd.DataFrame,
     x_col: str,
-    y_col: str=None,
+    y_col: str = None,
     target_size: tuple = (224, 224),
     batch_size: int = 128,
     class_mode: str = "raw",
     shuffle: bool = False,
     augment: bool = False,
-    scaler: str = 'normalize',  # 'normalize', 'standardize', 'clahe', 'raw'
+    scaler: str = "normalize",  # 'normalize', 'standardize', 'clahe', 'raw'
     grayscale: bool = False,
     encoder: str = "label",  # Options: 'label', 'onehot', 'binary'
     label_encoder=None,
@@ -3461,16 +3564,29 @@ def img_datasets_preprocessing(
             x_col in data.columns and y_col in data.columns
         ), "Missing required columns in DataFrame."
     if y_col is None:
-        class_mode=None
+        class_mode = None
     # 输出格式
-    output = ips.strcmp(output,[
-        "generator","tf","iterator","transform","transformer","dataframe",
-         "df","pd","pandas"])[0]
+    output = ips.strcmp(
+        output,
+        [
+            "generator",
+            "tf",
+            "iterator",
+            "transform",
+            "transformer",
+            "dataframe",
+            "df",
+            "pd",
+            "pandas",
+        ],
+    )[0]
     # Handle missing file paths
     if drop_missing:
         data = data[
-            data[x_col].apply(lambda path: os.path.exists(path) and os.path.isfile(path))
+            data[x_col].apply(
+                lambda path: os.path.exists(path) and os.path.isfile(path)
+            )
         ]
     # Encoding labels if necessary
@@ -3502,11 +3618,11 @@ def img_datasets_preprocessing(
             aug_params.update(kws_augmentation)
         dat = ImageDataGenerator(rescale=scaler, **aug_params)
         dat = ImageDataGenerator(
-        rescale=1.0 / 255 if scaler == 'normalize' else None, **aug_params)
+            rescale=1.0 / 255 if scaler == "normalize" else None, **aug_params
+        )
     else:
-        dat = ImageDataGenerator(
-        rescale=1.0 / 255 if scaler == 'normalize' else None)
+        dat = ImageDataGenerator(rescale=1.0 / 255 if scaler == "normalize" else None)
     # Create DataFrameIterator
     data_iterator = dat.flow_from_dataframe(
@@ -3529,14 +3645,14 @@ def img_datasets_preprocessing(
         # Load, resize, and process images in batches
         for i, (batch_images, batch_labels) in enumerate(data_iterator):
-            for img, label in zip(batch_images, batch_labels):
-                if scaler == ['normalize','raw']:
+            for img, label in zip(batch_images, batch_labels):
+                if scaler == ["normalize", "raw"]:
                     # Already rescaled by 1.0/255 in ImageDataGenerator
                     pass
-                elif scaler == 'standardize':
+                elif scaler == "standardize":
                     # Standardize by subtracting mean and dividing by std
                     img = (img - np.mean(img)) / np.std(img)
-                elif scaler == 'clahe':
+                elif scaler == "clahe":
                     # Apply CLAHE to the image
                     img = apply_clahe(img)
                 flat_img = img.flatten()
@@ -3561,11 +3677,13 @@ def img_datasets_preprocessing(
         return df_img
-def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_out=0.05, verbose=True):
+def backward_regression(
+    X: pd.DataFrame, y: pd.Series, initial_list=[], threshold_out=0.05, verbose=True
+):
     """
     # awesome bit of code from https://www.kaggle.com/code/adibouayjan/house-price-step-by-step-modeling
-    Evaluates the p-values of all features, which represent the probability of observing a coefficient
+    Evaluates the p-values of all features, which represent the probability of observing a coefficient
     as extreme as the one calculated if the feature had no true effect on the target.
     Args:
@@ -3576,9 +3694,10 @@ def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_
         verbose -- true to produce lots of logging output
     Returns:
-        list of selected features for modeling
+        list of selected features for modeling
     """
     import statsmodels.api as sm
     if isinstance(y, str) and y in X.columns:
         y_col_name = y
         y = X[y]
@@ -3600,15 +3719,16 @@ def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_
             break
     print(f"\nSelected Features:\n{included}")
     return included  # Returns the list of selected features
 # Function to apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
 def apply_clahe(img):
     import cv2
     lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)  # Convert to LAB color space
     l, a, b = cv2.split(lab)  # Split into channels
     clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
     cl = clahe.apply(l)  # Apply CLAHE to the L channel
     limg = cv2.merge((cl, a, b))  # Merge back the channels
     img_clahe = cv2.cvtColor(limg, cv2.COLOR_LAB2RGB)  # Convert back to RGB
-    return img_clahe
+    return img_clahe

{py2ls-0.2.4.18.dist-info → py2ls-0.2.4.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py2ls
-Version: 0.2.4.18
+Version: 0.2.4.20
 Summary: py(thon)2(too)ls
 Author: Jianfeng
 Author-email: Jianfeng.Liu0413@gmail.com

{py2ls-0.2.4.18.dist-info → py2ls-0.2.4.20.dist-info}/RECORD RENAMED Viewed

@@ -236,7 +236,7 @@ py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
 py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
 py2ls/ips.py,sha256=2TWuOSFquwhmPdxkmmvU_pcIbE5M0S9aRPtuQgs5B7A,297706
 py2ls/ml2ls copy.py,sha256=iZJrFLIrdfTieAY2BDsxQFTm29smwnJh0aC4hRB9VGM,113314
-py2ls/ml2ls.py,sha256=Mkf374TLsCdBVYtSYptFzegn8euda33TA-M73nGtzV0,144368
+py2ls/ml2ls.py,sha256=wvQkhcOsBiysgfaRmeT2KAR5C8uFOaX3HeyDA2Oy1LI,146065
 py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
 py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
 py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
@@ -246,6 +246,6 @@ py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso
 py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
 py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
 py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
-py2ls-0.2.4.18.dist-info/METADATA,sha256=q3L5q5BJ3olDRMxjimPsTEEE79pqmLwh93bxgsevPNU,20078
-py2ls-0.2.4.18.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-py2ls-0.2.4.18.dist-info/RECORD,,
+py2ls-0.2.4.20.dist-info/METADATA,sha256=iCOFX-A3J17xwkEB2UdDpS5A7kQuRcBCJVq5x8BtqPg,20078
+py2ls-0.2.4.20.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+py2ls-0.2.4.20.dist-info/RECORD,,

{py2ls-0.2.4.18.dist-info → py2ls-0.2.4.20.dist-info}/WHEEL RENAMED Viewed

File without changes

py2ls 0.2.4.18__py3-none-any.whl → 0.2.4.20__py3-none-any.whl

py2ls 0.2.4.18py3-none-any.whl → 0.2.4.20py3-none-any.whl