PyPI - py2ls - Versions diffs - 0.2.4.24__py3-none-any.whl → 0.2.4.25__py3-none-any.whl - Mend

py2ls 0.2.4.24py3-none-any.whl → 0.2.4.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

py2ls/.git/index +0 -0
py2ls/ec2ls.py +61 -0
py2ls/ips.py +105 -55
py2ls/ml2ls.py +244 -110
py2ls/nl2ls.py +283 -0
py2ls/plot.py +351 -40
{py2ls-0.2.4.24.dist-info → py2ls-0.2.4.25.dist-info}/METADATA +1 -1
{py2ls-0.2.4.24.dist-info → py2ls-0.2.4.25.dist-info}/RECORD +9 -8
py2ls/ml2ls copy.py +0 -2906
{py2ls-0.2.4.24.dist-info → py2ls-0.2.4.25.dist-info}/WHEEL +0 -0

py2ls/ml2ls.py CHANGED Viewed

@@ -2206,6 +2206,8 @@ def predict(
     y_train: pd.Series,
     x_true: pd.DataFrame = None,
     y_true: Optional[pd.Series] = None,
+    fill_missing:bool = True,
+    scaler:str='standard',# ["standard", "minmax", "robust","maxabs"]
     backward: bool = False,  # backward_regression
     backward_thr:float = 0.05,# pval thr,only works when backward is True
     common_features: set = None,
@@ -2324,7 +2326,7 @@ def predict(
     if purpose == "classification":
         model_ = {
             "Random Forest": RandomForestClassifier(
-                random_state=random_state, class_weight=class_weight
+                random_state=random_state, class_weight=class_weight,n_jobs=n_jobs
             ),
             # SVC (Support Vector Classification)
             "SVM": SVC(
@@ -2335,7 +2337,7 @@ def predict(
             ),
             # fit the best model without enforcing sparsity, which means it does not directly perform feature selection.
             "Logistic Regression": LogisticRegression(
-                class_weight=class_weight, random_state=random_state
+                class_weight=class_weight, random_state=random_state,n_jobs=n_jobs
             ),
             # Logistic Regression with L1 Regularization (Lasso)
             "Lasso Logistic Regression": LogisticRegression(
@@ -2346,49 +2348,51 @@ def predict(
                 eval_metric="logloss",
                 random_state=random_state,
             ),
-            "KNN": KNeighborsClassifier(n_neighbors=5),
+            "KNN": KNeighborsClassifier(n_neighbors=5,n_jobs=n_jobs),
             "Naive Bayes": GaussianNB(),
             "Linear Discriminant Analysis": LinearDiscriminantAnalysis(),
             "AdaBoost": AdaBoostClassifier(
                 algorithm="SAMME", random_state=random_state
             ),
-            # "LightGBM": lgb.LGBMClassifier(random_state=random_state, class_weight=class_weight),
+            "LightGBM": lgb.LGBMClassifier(random_state=random_state, class_weight=class_weight,n_jobs=n_jobs),
             "CatBoost": cb.CatBoostClassifier(verbose=0, random_state=random_state),
             "Extra Trees": ExtraTreesClassifier(
-                random_state=random_state, class_weight=class_weight
+                random_state=random_state, class_weight=class_weight,n_jobs=n_jobs
             ),
-            "Bagging": BaggingClassifier(random_state=random_state),
+            "Bagging": BaggingClassifier(random_state=random_state,n_jobs=n_jobs),
             "Neural Network": MLPClassifier(max_iter=500, random_state=random_state),
             "DecisionTree": DecisionTreeClassifier(),
             "Quadratic Discriminant Analysis": QuadraticDiscriminantAnalysis(),
             "Ridge": RidgeClassifierCV(
                 class_weight=class_weight, store_cv_results=True
             ),
-            "Perceptron": Perceptron(random_state=random_state),
+            "Perceptron": Perceptron(random_state=random_state,n_jobs=n_jobs),
             "Bernoulli Naive Bayes": BernoulliNB(),
-            "SGDClassifier": SGDClassifier(random_state=random_state),
+            "SGDClassifier": SGDClassifier(random_state=random_state,n_jobs=n_jobs),
         }
     elif purpose == "regression":
         model_ = {
-            "Random Forest": RandomForestRegressor(random_state=random_state),
+            "Random Forest": RandomForestRegressor(random_state=random_state,n_jobs=n_jobs),
             "SVM": SVR(),  # SVR (Support Vector Regression)
             # "Lasso": Lasso(random_state=random_state), # 它和LassoCV相同(必须要提供alpha参数),
             "LassoCV": LassoCV(
-                cv=cv_folds, random_state=random_state
+                cv=cv_folds, random_state=random_state,n_jobs=n_jobs
             ),  # LassoCV自动找出最适alpha,优于Lasso
             "Gradient Boosting": GradientBoostingRegressor(random_state=random_state),
-            "XGBoost": xgb.XGBRegressor(eval_metric="rmse", random_state=random_state),
-            "Linear Regression": LinearRegression(),
+            "XGBoost": xgb.XGBRegressor(eval_metric="rmse", random_state=random_state,n_jobs=n_jobs),
+            "Linear Regression": LinearRegression(n_jobs=n_jobs),
             "AdaBoost": AdaBoostRegressor(random_state=random_state),
-            # "LightGBM": lgb.LGBMRegressor(random_state=random_state),
+            "LightGBM": lgb.LGBMRegressor(random_state=random_state,n_jobs=n_jobs,
+                                          force_row_wise=True  # Or use force_col_wise=True if memory is a concern
+                                            ),
             "CatBoost": cb.CatBoostRegressor(verbose=0, random_state=random_state),
-            "Extra Trees": ExtraTreesRegressor(random_state=random_state),
-            "Bagging": BaggingRegressor(random_state=random_state),
+            "Extra Trees": ExtraTreesRegressor(random_state=random_state,n_jobs=n_jobs),
+            "Bagging": BaggingRegressor(random_state=random_state,n_jobs=n_jobs),
             "Neural Network": MLPRegressor(max_iter=500, random_state=random_state),
             "ElasticNet": ElasticNet(random_state=random_state),
             "Ridge": Ridge(),
-            "KNN": KNeighborsRegressor(),
-            "TheilSen":TheilSenRegressor(),
+            "KNN": KNeighborsRegressor(n_jobs=n_jobs),
+            "TheilSen":TheilSenRegressor(n_jobs=n_jobs),
             "Huber":HuberRegressor(),
             "Poisson":PoissonRegressor()
         }
@@ -2410,7 +2414,7 @@ def predict(
     # indicate cls:
     if ips.run_once_within(30):  # 10 min
         print(f"processing: {list(models.keys())}")
+    print(isinstance(y_train, str) and y_train in x_train.columns)
     if isinstance(y_train, str) and y_train in x_train.columns:
         y_train_col_name = y_train
         y_train = x_train[y_train]
@@ -2418,6 +2422,7 @@ def predict(
         x_train = x_train.drop(y_train_col_name, axis=1)
     # else:
     #     y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy").values.ravel()
     y_train = pd.DataFrame(y_train)
     if y_train.select_dtypes(include=np.number).empty:
         y_train_ = ips.df_encoder(y_train, method="dummy", drop=None)
@@ -2430,6 +2435,9 @@ def predict(
         y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
     print("is_binary:", is_binary)
+    if fill_missing:
+        ips.df_fillna(data=x_train, method="knn", inplace=True, axis=0)
+        ips.df_fillna(data=y_train, method="knn", inplace=True, axis=0)
     # Perform backward feature selection
     if backward:
         selected_features = backward_regression(x_train, y_train, thr=backward_thr)
@@ -2458,6 +2466,8 @@ def predict(
                 pd.DataFrame(y_train), method="label"
             ).values.ravel()
+    if fill_missing:
+        ips.df_fillna(data=x_true, method="knn", inplace=True, axis=0)
     if y_true is not None:
         if isinstance(y_true, str) and y_true in x_true.columns:
             y_true_col_name = y_true
@@ -2490,11 +2500,16 @@ def predict(
     # Ensure common features are selected
     if common_features is not None:
         x_train, x_true = x_train[common_features], x_true[common_features]
+        share_col_names=common_features
     else:
         share_col_names = ips.shared(x_train.columns, x_true.columns, verbose=verbose)
         x_train, x_true = x_train[share_col_names], x_true[share_col_names]
-    x_train, x_true = ips.df_scaler(x_train), ips.df_scaler(x_true)
+    #! scaler
+    # scaler and fit x_train and export scaler to fit the x_true
+    x_train,scaler_=ips.df_scaler(x_train,method=scaler,return_scaler=True)
+    #
+    x_true=ips.df_scaler(x_true,scaler=scaler_)# make sure 用于同一个scaler
     x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(
         x_true, method="dummy"
     )
@@ -2516,6 +2531,7 @@ def predict(
             if isinstance(y_train, np.ndarray):
                 y_true = ips.df_encoder(data=pd.DataFrame(y_true), method="label")
                 y_true = np.asarray(y_true)
     # Hyperparameter grids for tuning
     param_grid_common_xgb = {
                 'learning_rate': [0.01, 0.1, 0.2, 0.3],
@@ -3168,83 +3184,124 @@ def predict(
     ):
         if verbose:
             print(f"\nTraining and validating {name}:")
-        # Grid search with KFold or StratifiedKFold
-        if is_binary:
-            gs = GridSearchCV(
-                clf,
-                param_grid=param_grids.get(name, {}),
-                scoring=(
-                    "roc_auc"
-                    if purpose == "classification"
-                    else "neg_mean_squared_error"
-                ),
-                cv=cv,
-                n_jobs=n_jobs,
-                verbose=verbose,
-            )
-            gs.fit(x_train, y_train)
-            best_clf = gs.best_estimator_
-            # make sure x_train and x_test has the same name
-            x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
-            y_pred = best_clf.predict(x_true)
-            if hasattr(best_clf, "predict_proba"):
-                y_pred_proba = best_clf.predict_proba(x_true)
-                print("Shape of predicted probabilities:", y_pred_proba.shape)
-                if y_pred_proba.shape[1] == 1:
-                    y_pred_proba = np.hstack(
-                        [1 - y_pred_proba, y_pred_proba]
-                    )  # Add missing class probabilities
-                y_pred_proba = y_pred_proba[:, 1]
-            elif hasattr(best_clf, "decision_function"):
-                # If predict_proba is not available, use decision_function (e.g., for SVM)
-                y_pred_proba = best_clf.decision_function(x_true)
-                # Ensure y_pred_proba is within 0 and 1 bounds
-                y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
-                    y_pred_proba.max() - y_pred_proba.min()
+        try:
+            # Grid search with KFold or StratifiedKFold
+            if is_binary:
+                gs = GridSearchCV(
+                    clf,
+                    param_grid=param_grids.get(name, {}),
+                    scoring=(
+                        "roc_auc"
+                        if purpose == "classification"
+                        else "neg_mean_squared_error"
+                    ),
+                    cv=cv,
+                    n_jobs=n_jobs,
+                    verbose=verbose,
                 )
-            else:
-                y_pred_proba = None  # No probability output for certain models
-        else:
-            gs = GridSearchCV(
-                clf,
-                param_grid=param_grids.get(name, {}),
-                scoring=(
-                    "roc_auc_ovr"
-                    if purpose == "classification"
-                    else "neg_mean_squared_error"
-                ),
-                cv=cv,
-                n_jobs=n_jobs,
-                verbose=verbose,
-            )
-            # Fit GridSearchCV
-            gs.fit(x_train, y_train)
-            best_clf = gs.best_estimator_
-            # Ensure x_true aligns with x_train columns
-            x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
-            y_pred = best_clf.predict(x_true)
-            # Handle prediction probabilities for multiclass
-            if hasattr(best_clf, "predict_proba"):
-                y_pred_proba = best_clf.predict_proba(x_true)
-            elif hasattr(best_clf, "decision_function"):
-                y_pred_proba = best_clf.decision_function(x_true)
-                # Normalize for multiclass if necessary
-                if y_pred_proba.ndim == 2:
-                    y_pred_proba = (
-                        y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)
-                    ) / (
-                        y_pred_proba.max(axis=1, keepdims=True)
-                        - y_pred_proba.min(axis=1, keepdims=True)
+                gs.fit(x_train, y_train)
+                best_clf = gs.best_estimator_
+                # make sure x_train and x_test has the same name
+                x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
+                y_pred = best_clf.predict(x_true)
+                if hasattr(best_clf, "predict_proba"):
+                    y_pred_proba = best_clf.predict_proba(x_true)
+                    print("Shape of predicted probabilities:", y_pred_proba.shape)
+                    if y_pred_proba.shape[1] == 1:
+                        y_pred_proba = np.hstack(
+                            [1 - y_pred_proba, y_pred_proba]
+                        )  # Add missing class probabilities
+                    y_pred_proba = y_pred_proba[:, 1]
+                elif hasattr(best_clf, "decision_function"):
+                    # If predict_proba is not available, use decision_function (e.g., for SVM)
+                    y_pred_proba = best_clf.decision_function(x_true)
+                    # Ensure y_pred_proba is within 0 and 1 bounds
+                    y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
+                        y_pred_proba.max() - y_pred_proba.min()
                     )
+                else:
+                    y_pred_proba = None  # No probability output for certain models
+                # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
+                if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
+                    if hasattr(best_clf, "alphas_"):
+                        alphas_ = best_clf.alphas_
+                    elif hasattr(best_clf, "alpha_"):
+                        alphas_ = best_clf.alpha_
+                    elif hasattr(best_clf, "Cs_"):
+                        alphas_ = best_clf.Cs_
+                else:
+                    alphas_= None
+                coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
             else:
-                y_pred_proba = None  # No probability output for certain models
+                gs = GridSearchCV(
+                    clf,
+                    param_grid=param_grids.get(name, {}),
+                    scoring=(
+                        "roc_auc_ovr"
+                        if purpose == "classification"
+                        else "neg_mean_squared_error"
+                    ),
+                    cv=cv,
+                    n_jobs=n_jobs,
+                    verbose=verbose,
+                )
+                # Fit GridSearchCV
+                gs.fit(x_train, y_train)
+                best_clf = gs.best_estimator_
+                # Ensure x_true aligns with x_train columns
+                x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
+                # do i need to fit the x_train, y_train again?
+                best_clf=best_clf.fit(x_train, y_train)
+                y_pred = best_clf.predict(x_true)
+                # Handle prediction probabilities for multiclass
+                if hasattr(best_clf, "predict_proba"):
+                    y_pred_proba = best_clf.predict_proba(x_true)
+                elif hasattr(best_clf, "decision_function"):
+                    y_pred_proba = best_clf.decision_function(x_true)
+                    # Normalize for multiclass if necessary
+                    if y_pred_proba.ndim == 2:
+                        y_pred_proba = (
+                            y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)
+                        ) / (
+                            y_pred_proba.max(axis=1, keepdims=True)
+                            - y_pred_proba.min(axis=1, keepdims=True)
+                        )
+                else:
+                    y_pred_proba = None  # No probability output for certain models
+                # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
+                if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
+                    if hasattr(best_clf, "alphas_"):
+                        alphas_ = best_clf.alphas_
+                    elif hasattr(best_clf, "alpha_"):
+                        alphas_ = best_clf.alpha_
+                    elif hasattr(best_clf, "Cs_"):
+                        alphas_ = best_clf.Cs_
+                else:
+                    alphas_= None
+                coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
+        except Exception as e:
+            alphas_,coef_ = None,None
+            print(f"skiped {clf}: {e}")
+            continue
+        # try to make predict format consistant
+        try:
+           y_pred= [i[0] for i in y_pred]
+        except:
+            pass
+        try:
+           y_true= [i[0] for i in y_true]
+        except:
+            pass
+        try:
+           y_train= [i[0] for i in y_train]
+        except:
+            pass
         validation_scores = {}
         if y_true is not None and y_pred_proba is not None:
@@ -3294,20 +3351,26 @@ def predict(
                         "roc_curve": roc_info,
                         "pr_curve": pr_info,
                         "confusion_matrix": confusion_matrix(y_true, y_pred),
-                        "predictions": y_pred.tolist(),
+                        "predictions": y_pred,#.tolist(),
                         "predictions_proba": (
                             y_pred_proba.tolist() if y_pred_proba is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
                 else:  # "regression"
                     results[name] = {
                         "best_clf": gs.best_estimator_,
                         "best_params": gs.best_params_,
                         "scores": validation_scores,  # e.g., neg_MSE, R², etc.
-                        "predictions": y_pred.tolist(),
+                        "predictions": y_pred,#.tolist(),
                         "predictions_proba": (
                             y_pred_proba.tolist() if y_pred_proba is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
             else:  # multi-classes
                 if y_pred_proba is not None:
@@ -3346,20 +3409,26 @@ def predict(
                         "roc_curve": roc_info,
                         "pr_curve": pr_info,
                         "confusion_matrix": confusion_matrix(y_true, y_pred),
-                        "predictions": y_pred.tolist(),
+                        "predictions": y_pred,#.tolist(),
                         "predictions_proba": (
                             y_pred_proba.tolist() if y_pred_proba is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
                 else:  # "regression"
                     results[name] = {
                         "best_clf": gs.best_estimator_,
                         "best_params": gs.best_params_,
                         "scores": validation_scores,  # e.g., neg_MSE, R², etc.
-                        "predictions": y_pred.tolist(),
+                        "predictions": y_pred,#.tolist(),
                         "predictions_proba": (
                             y_pred_proba.tolist() if y_pred_proba is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
         else:
@@ -3378,12 +3447,15 @@ def predict(
                 "best_clf": gs.best_estimator_,
                 "best_params": gs.best_params_,
                 "scores": validation_scores,
-                "predictions": y_pred.tolist(),
+                "predictions": y_pred,#.tolist(),
                 "predictions_proba": (
                     y_pred_proba.tolist() if y_pred_proba is not None else None
                 ),
+                "features":share_col_names,
                 "y_train": y_train if y_train is not None else [],
                 "y_true": y_true if y_true is not None else [],
+                "coef":coef_,
+                "alphas":alphas_
             }
     # Convert results to DataFrame
@@ -3446,7 +3518,7 @@ def predict(
         for i, j in top_models.to_dict().items():
             base_estimators.append((i, j))
         if stacking_cv:
-            print(f" ⤵ stacking_cv is processing...")
+            print(f"⤵ stacking_cv is processing...")
             #* 定义几个象征性的final_estimator
             # 备选的几种
             if purpose == "classification":
@@ -3520,7 +3592,7 @@ def predict(
             best_final_estimator = cv_results_df.iloc[0]['final_estimator']
             print(f"Best final estimator based on cross-validation: {best_final_estimator}")
         else:
-            print(f" ⤵ trying to find the best_final_estimator for stacking...")
+            print(f"⤵ trying to find the best_final_estimator for stacking...")
             if purpose=="classification":
                 best_final_estimator = LogisticRegression(class_weight=class_weight,
                                                         random_state=random_state,
@@ -3530,26 +3602,25 @@ def predict(
         print(f"⤵ the best best_final_estimator: {best_final_estimator}")
         #! apply stacking
         if purpose == "classification":
-            print(f" ⤵ StackingClassifier...")
+            print(f"⤵ StackingClassifier...")
             stacking_model = StackingClassifier(estimators=base_estimators,
                                                 final_estimator=best_final_estimator,
                                                 cv=cv)
         else:
-            print(f" ⤵ StackingRegressor...")
+            print(f"⤵ StackingRegressor...")
             stacking_model = StackingRegressor(estimators=base_estimators,
                                             final_estimator=best_final_estimator,
                                             cv=cv)
         # Train the Stacking Classifier
-        print(f" ⤵ fit & predict...")
+        print(f"⤵ fit & predict...")
         stacking_model.fit(x_train, y_train)
         y_pred_final = stacking_model.predict(x_true)
-        print(f" ⤵ collecting results...")
+        print(f"⤵ collecting results...")
         # pred_proba
         if is_binary:
             if hasattr(stacking_model, "predict_proba"):
                 y_pred_proba_final = stacking_model.predict_proba(x_true)
-                print("Shape of predicted probabilities:", y_pred_proba_final.shape)
                 if y_pred_proba_final.shape[1] == 1:
                     y_pred_proba_final = np.hstack(
                         [1 - y_pred_proba_final, y_pred_proba_final]
@@ -3564,6 +3635,17 @@ def predict(
                 )
             else:
                 y_pred_proba_final = None  # No probability output for certain models
+            # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
+            if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
+                if hasattr(best_clf, "alphas_"):
+                    alphas_ = best_clf.alphas_
+                elif hasattr(best_clf, "alpha_"):
+                    alphas_ = best_clf.alpha_
+                elif hasattr(best_clf, "Cs_"):
+                    alphas_ = best_clf.Cs_
+            else:
+                alphas_= None
+            coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
         if not is_binary:
             # Handle prediction probabilities for multiclass
             if hasattr(stacking_model, "predict_proba"):
@@ -3581,6 +3663,17 @@ def predict(
                     )
             else:
                 y_pred_proba_final = None  # No probability output for certain models
+            # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
+            if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
+                if hasattr(best_clf, "alphas_"):
+                    alphas_ = best_clf.alphas_
+                elif hasattr(best_clf, "alpha_"):
+                    alphas_ = best_clf.alpha_
+                elif hasattr(best_clf, "Cs_"):
+                    alphas_ = best_clf.Cs_
+            else:
+                alphas_= None
+            coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
         #! dict_pred_stack
         dict_pred_stack={}
         validation_scores_final = {}
@@ -3631,6 +3724,9 @@ def predict(
                         "predictions_proba": (
                             y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
                 else:  # "regression"
                     dict_pred_stack = {
@@ -3641,6 +3737,9 @@ def predict(
                         "predictions_proba": (
                             y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
             else:  # multi-classes
                 if y_pred_proba_final is not None:
@@ -3680,6 +3779,9 @@ def predict(
                         "predictions_proba": (
                             y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
                 else:  # "regression"
                     dict_pred_stack = {
@@ -3690,6 +3792,9 @@ def predict(
                         "predictions_proba": (
                             y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
                         ),
+                        "features":share_col_names,
+                        "coef":coef_,
+                        "alphas":alphas_
                     }
         else:
@@ -3712,8 +3817,11 @@ def predict(
                 "predictions_proba": (
                     y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
                 ),
+                "features":share_col_names,
                 "y_train": y_train if y_train is not None else [],
                 "y_true": y_true if y_true is not None else [],
+                "coef":coef_,
+                "alphas":alphas_
             }
         # merge together
         df_pred = pd.DataFrame(
@@ -3728,16 +3836,16 @@ def predict(
         #     if dir_save:
         #         ips.figsave(dir_save + f"validate_features_stacking_{now_}.pdf")
     if vote:
-        print(f" ⤵ voting...")
+        print(f"⤵ voting...")
         from sklearn.ensemble import VotingClassifier, VotingRegressor
-        #! Votting
+        #! voting
         n_top_models = min(n_top_models, df_results.shape[0])
         base_estimators=[]
         for name, cls in zip(list(df_results.iloc[:n_top_models, :].index),df_results.iloc[:n_top_models, :]["best_clf"].tolist()):
             base_estimators.append((name,cls))
         # Apply Voting Classifier/Regressor
         if purpose == "classification":
-            print(f" ⤵ VotingClassifier...via{votting}")
+            print(f"⤵ VotingClassifier...via{voting}")
             if voting=='hard':
                 # Hard voting does not support `predict_proba`
                 voting_model = VotingClassifier(estimators=base_estimators)
@@ -3745,7 +3853,7 @@ def predict(
                 # Soft voting supports `predict_proba`
                 voting_model = VotingClassifier(estimators=base_estimators, voting="soft")
         else:
-            print(f" ⤵ VotingRegressor...")
+            print(f"⤵ VotingRegressor...")
             voting_model = VotingRegressor(estimators=base_estimators)
         # Train the Voting Classifier/Regressor
@@ -3770,10 +3878,23 @@ def predict(
                 y_pred_proba_vote = y_pred_proba_vote[:, 1]
             else:
                 y_pred_proba_vote = None
+            # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
+            if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
+                if hasattr(best_clf, "alphas_"):
+                    alphas_ = best_clf.alphas_
+                elif hasattr(best_clf, "alpha_"):
+                    alphas_ = best_clf.alpha_
+                elif hasattr(best_clf, "Cs_"):
+                    alphas_ = best_clf.Cs_
+            else:
+                alphas_= None
+            coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
         else:  # Regression
             y_pred_proba_vote = None
+            coef_,alphas_=None,None
-        print(f" ⤵ collecting voting results...")
+        print(f"⤵ collecting voting results...")
         #! dict_pred_vote
         dict_pred_vote = {}
         validation_scores_vote = {}
@@ -3822,6 +3943,9 @@ def predict(
                     "predictions_proba": (
                         y_pred_proba_vote.tolist() if y_pred_proba_vote is not None else None
                     ),
+                    "features":share_col_names,
+                    "coef":coef_,
+                    "alphas":alphas_
                 }
             else:  # Multi-class
                 if y_pred_proba_vote is not None:
@@ -3856,6 +3980,9 @@ def predict(
                     "predictions_proba": (
                         y_pred_proba_vote.tolist() if y_pred_proba_vote is not None else None
                     ),
+                    "features":share_col_names,
+                    "coef":coef_,
+                    "alphas":alphas_
                 }
         else:
             if y_true is None:
@@ -3877,6 +4004,7 @@ def predict(
                 "predictions_proba": (
                     y_pred_proba_vote.tolist() if y_pred_proba_vote is not None else None
                 ),
+                "features":share_col_names,
                 "y_train": y_train if y_train is not None else [],
                 "y_true": y_true if y_true is not None else [],
             }
@@ -3907,9 +4035,15 @@ def predict(
         now_ = datetime.now().strftime("%y%m%d_%H%M%S")
         # try:
         if df_res.shape[0] > 3:
-            plot_validate_features(df_res, is_binary=is_binary)
+            try:
+                plot_validate_features(df_res, is_binary=is_binary)
+            except Exception as e:
+                print(e)
         else:
-            plot_validate_features_single(df_res, is_binary=is_binary)
+            try:
+                plot_validate_features_single(df_res, is_binary=is_binary)
+            except Exception as e:
+                print(e)
         if dir_save:
             ips.figsave(dir_save + f"validate_features{now_}.pdf")
     # except Exception as e:

py2ls 0.2.4.24__py3-none-any.whl → 0.2.4.25__py3-none-any.whl

py2ls 0.2.4.24py3-none-any.whl → 0.2.4.25py3-none-any.whl