py2ls 0.2.4.24__py3-none-any.whl → 0.2.4.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ml2ls.py CHANGED
@@ -2206,6 +2206,8 @@ def predict(
2206
2206
  y_train: pd.Series,
2207
2207
  x_true: pd.DataFrame = None,
2208
2208
  y_true: Optional[pd.Series] = None,
2209
+ fill_missing:bool = True,
2210
+ scaler:str='standard',# ["standard", "minmax", "robust","maxabs"]
2209
2211
  backward: bool = False, # backward_regression
2210
2212
  backward_thr:float = 0.05,# pval thr,only works when backward is True
2211
2213
  common_features: set = None,
@@ -2324,7 +2326,7 @@ def predict(
2324
2326
  if purpose == "classification":
2325
2327
  model_ = {
2326
2328
  "Random Forest": RandomForestClassifier(
2327
- random_state=random_state, class_weight=class_weight
2329
+ random_state=random_state, class_weight=class_weight,n_jobs=n_jobs
2328
2330
  ),
2329
2331
  # SVC (Support Vector Classification)
2330
2332
  "SVM": SVC(
@@ -2335,7 +2337,7 @@ def predict(
2335
2337
  ),
2336
2338
  # fit the best model without enforcing sparsity, which means it does not directly perform feature selection.
2337
2339
  "Logistic Regression": LogisticRegression(
2338
- class_weight=class_weight, random_state=random_state
2340
+ class_weight=class_weight, random_state=random_state,n_jobs=n_jobs
2339
2341
  ),
2340
2342
  # Logistic Regression with L1 Regularization (Lasso)
2341
2343
  "Lasso Logistic Regression": LogisticRegression(
@@ -2346,49 +2348,51 @@ def predict(
2346
2348
  eval_metric="logloss",
2347
2349
  random_state=random_state,
2348
2350
  ),
2349
- "KNN": KNeighborsClassifier(n_neighbors=5),
2351
+ "KNN": KNeighborsClassifier(n_neighbors=5,n_jobs=n_jobs),
2350
2352
  "Naive Bayes": GaussianNB(),
2351
2353
  "Linear Discriminant Analysis": LinearDiscriminantAnalysis(),
2352
2354
  "AdaBoost": AdaBoostClassifier(
2353
2355
  algorithm="SAMME", random_state=random_state
2354
2356
  ),
2355
- # "LightGBM": lgb.LGBMClassifier(random_state=random_state, class_weight=class_weight),
2357
+ "LightGBM": lgb.LGBMClassifier(random_state=random_state, class_weight=class_weight,n_jobs=n_jobs),
2356
2358
  "CatBoost": cb.CatBoostClassifier(verbose=0, random_state=random_state),
2357
2359
  "Extra Trees": ExtraTreesClassifier(
2358
- random_state=random_state, class_weight=class_weight
2360
+ random_state=random_state, class_weight=class_weight,n_jobs=n_jobs
2359
2361
  ),
2360
- "Bagging": BaggingClassifier(random_state=random_state),
2362
+ "Bagging": BaggingClassifier(random_state=random_state,n_jobs=n_jobs),
2361
2363
  "Neural Network": MLPClassifier(max_iter=500, random_state=random_state),
2362
2364
  "DecisionTree": DecisionTreeClassifier(),
2363
2365
  "Quadratic Discriminant Analysis": QuadraticDiscriminantAnalysis(),
2364
2366
  "Ridge": RidgeClassifierCV(
2365
2367
  class_weight=class_weight, store_cv_results=True
2366
2368
  ),
2367
- "Perceptron": Perceptron(random_state=random_state),
2369
+ "Perceptron": Perceptron(random_state=random_state,n_jobs=n_jobs),
2368
2370
  "Bernoulli Naive Bayes": BernoulliNB(),
2369
- "SGDClassifier": SGDClassifier(random_state=random_state),
2371
+ "SGDClassifier": SGDClassifier(random_state=random_state,n_jobs=n_jobs),
2370
2372
  }
2371
2373
  elif purpose == "regression":
2372
2374
  model_ = {
2373
- "Random Forest": RandomForestRegressor(random_state=random_state),
2375
+ "Random Forest": RandomForestRegressor(random_state=random_state,n_jobs=n_jobs),
2374
2376
  "SVM": SVR(), # SVR (Support Vector Regression)
2375
2377
  # "Lasso": Lasso(random_state=random_state), # 它和LassoCV相同(必须要提供alpha参数),
2376
2378
  "LassoCV": LassoCV(
2377
- cv=cv_folds, random_state=random_state
2379
+ cv=cv_folds, random_state=random_state,n_jobs=n_jobs
2378
2380
  ), # LassoCV自动找出最适alpha,优于Lasso
2379
2381
  "Gradient Boosting": GradientBoostingRegressor(random_state=random_state),
2380
- "XGBoost": xgb.XGBRegressor(eval_metric="rmse", random_state=random_state),
2381
- "Linear Regression": LinearRegression(),
2382
+ "XGBoost": xgb.XGBRegressor(eval_metric="rmse", random_state=random_state,n_jobs=n_jobs),
2383
+ "Linear Regression": LinearRegression(n_jobs=n_jobs),
2382
2384
  "AdaBoost": AdaBoostRegressor(random_state=random_state),
2383
- # "LightGBM": lgb.LGBMRegressor(random_state=random_state),
2385
+ "LightGBM": lgb.LGBMRegressor(random_state=random_state,n_jobs=n_jobs,
2386
+ force_row_wise=True # Or use force_col_wise=True if memory is a concern
2387
+ ),
2384
2388
  "CatBoost": cb.CatBoostRegressor(verbose=0, random_state=random_state),
2385
- "Extra Trees": ExtraTreesRegressor(random_state=random_state),
2386
- "Bagging": BaggingRegressor(random_state=random_state),
2389
+ "Extra Trees": ExtraTreesRegressor(random_state=random_state,n_jobs=n_jobs),
2390
+ "Bagging": BaggingRegressor(random_state=random_state,n_jobs=n_jobs),
2387
2391
  "Neural Network": MLPRegressor(max_iter=500, random_state=random_state),
2388
2392
  "ElasticNet": ElasticNet(random_state=random_state),
2389
2393
  "Ridge": Ridge(),
2390
- "KNN": KNeighborsRegressor(),
2391
- "TheilSen":TheilSenRegressor(),
2394
+ "KNN": KNeighborsRegressor(n_jobs=n_jobs),
2395
+ "TheilSen":TheilSenRegressor(n_jobs=n_jobs),
2392
2396
  "Huber":HuberRegressor(),
2393
2397
  "Poisson":PoissonRegressor()
2394
2398
  }
@@ -2410,7 +2414,7 @@ def predict(
2410
2414
  # indicate cls:
2411
2415
  if ips.run_once_within(30): # 10 min
2412
2416
  print(f"processing: {list(models.keys())}")
2413
-
2417
+ print(isinstance(y_train, str) and y_train in x_train.columns)
2414
2418
  if isinstance(y_train, str) and y_train in x_train.columns:
2415
2419
  y_train_col_name = y_train
2416
2420
  y_train = x_train[y_train]
@@ -2418,6 +2422,7 @@ def predict(
2418
2422
  x_train = x_train.drop(y_train_col_name, axis=1)
2419
2423
  # else:
2420
2424
  # y_train = ips.df_encoder(pd.DataFrame(y_train), method="dummy").values.ravel()
2425
+
2421
2426
  y_train = pd.DataFrame(y_train)
2422
2427
  if y_train.select_dtypes(include=np.number).empty:
2423
2428
  y_train_ = ips.df_encoder(y_train, method="dummy", drop=None)
@@ -2430,6 +2435,9 @@ def predict(
2430
2435
  y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
2431
2436
  print("is_binary:", is_binary)
2432
2437
 
2438
+ if fill_missing:
2439
+ ips.df_fillna(data=x_train, method="knn", inplace=True, axis=0)
2440
+ ips.df_fillna(data=y_train, method="knn", inplace=True, axis=0)
2433
2441
  # Perform backward feature selection
2434
2442
  if backward:
2435
2443
  selected_features = backward_regression(x_train, y_train, thr=backward_thr)
@@ -2458,6 +2466,8 @@ def predict(
2458
2466
  pd.DataFrame(y_train), method="label"
2459
2467
  ).values.ravel()
2460
2468
 
2469
+ if fill_missing:
2470
+ ips.df_fillna(data=x_true, method="knn", inplace=True, axis=0)
2461
2471
  if y_true is not None:
2462
2472
  if isinstance(y_true, str) and y_true in x_true.columns:
2463
2473
  y_true_col_name = y_true
@@ -2490,11 +2500,16 @@ def predict(
2490
2500
  # Ensure common features are selected
2491
2501
  if common_features is not None:
2492
2502
  x_train, x_true = x_train[common_features], x_true[common_features]
2503
+ share_col_names=common_features
2493
2504
  else:
2494
2505
  share_col_names = ips.shared(x_train.columns, x_true.columns, verbose=verbose)
2495
2506
  x_train, x_true = x_train[share_col_names], x_true[share_col_names]
2496
2507
 
2497
- x_train, x_true = ips.df_scaler(x_train), ips.df_scaler(x_true)
2508
+ #! scaler
2509
+ # scaler and fit x_train and export scaler to fit the x_true
2510
+ x_train,scaler_=ips.df_scaler(x_train,method=scaler,return_scaler=True)
2511
+ #
2512
+ x_true=ips.df_scaler(x_true,scaler=scaler_)# make sure 用于同一个scaler
2498
2513
  x_train, x_true = ips.df_encoder(x_train, method="dummy"), ips.df_encoder(
2499
2514
  x_true, method="dummy"
2500
2515
  )
@@ -2516,6 +2531,7 @@ def predict(
2516
2531
  if isinstance(y_train, np.ndarray):
2517
2532
  y_true = ips.df_encoder(data=pd.DataFrame(y_true), method="label")
2518
2533
  y_true = np.asarray(y_true)
2534
+
2519
2535
  # Hyperparameter grids for tuning
2520
2536
  param_grid_common_xgb = {
2521
2537
  'learning_rate': [0.01, 0.1, 0.2, 0.3],
@@ -3168,83 +3184,124 @@ def predict(
3168
3184
  ):
3169
3185
  if verbose:
3170
3186
  print(f"\nTraining and validating {name}:")
3171
-
3172
- # Grid search with KFold or StratifiedKFold
3173
- if is_binary:
3174
- gs = GridSearchCV(
3175
- clf,
3176
- param_grid=param_grids.get(name, {}),
3177
- scoring=(
3178
- "roc_auc"
3179
- if purpose == "classification"
3180
- else "neg_mean_squared_error"
3181
- ),
3182
- cv=cv,
3183
- n_jobs=n_jobs,
3184
- verbose=verbose,
3185
- )
3186
-
3187
- gs.fit(x_train, y_train)
3188
- best_clf = gs.best_estimator_
3189
- # make sure x_train and x_test has the same name
3190
- x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
3191
- y_pred = best_clf.predict(x_true)
3192
- if hasattr(best_clf, "predict_proba"):
3193
- y_pred_proba = best_clf.predict_proba(x_true)
3194
- print("Shape of predicted probabilities:", y_pred_proba.shape)
3195
- if y_pred_proba.shape[1] == 1:
3196
- y_pred_proba = np.hstack(
3197
- [1 - y_pred_proba, y_pred_proba]
3198
- ) # Add missing class probabilities
3199
- y_pred_proba = y_pred_proba[:, 1]
3200
- elif hasattr(best_clf, "decision_function"):
3201
- # If predict_proba is not available, use decision_function (e.g., for SVM)
3202
- y_pred_proba = best_clf.decision_function(x_true)
3203
- # Ensure y_pred_proba is within 0 and 1 bounds
3204
- y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
3205
- y_pred_proba.max() - y_pred_proba.min()
3187
+ try:
3188
+ # Grid search with KFold or StratifiedKFold
3189
+ if is_binary:
3190
+ gs = GridSearchCV(
3191
+ clf,
3192
+ param_grid=param_grids.get(name, {}),
3193
+ scoring=(
3194
+ "roc_auc"
3195
+ if purpose == "classification"
3196
+ else "neg_mean_squared_error"
3197
+ ),
3198
+ cv=cv,
3199
+ n_jobs=n_jobs,
3200
+ verbose=verbose,
3206
3201
  )
3207
- else:
3208
- y_pred_proba = None # No probability output for certain models
3209
- else:
3210
- gs = GridSearchCV(
3211
- clf,
3212
- param_grid=param_grids.get(name, {}),
3213
- scoring=(
3214
- "roc_auc_ovr"
3215
- if purpose == "classification"
3216
- else "neg_mean_squared_error"
3217
- ),
3218
- cv=cv,
3219
- n_jobs=n_jobs,
3220
- verbose=verbose,
3221
- )
3222
-
3223
- # Fit GridSearchCV
3224
- gs.fit(x_train, y_train)
3225
- best_clf = gs.best_estimator_
3226
-
3227
- # Ensure x_true aligns with x_train columns
3228
- x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
3229
- y_pred = best_clf.predict(x_true)
3230
-
3231
- # Handle prediction probabilities for multiclass
3232
- if hasattr(best_clf, "predict_proba"):
3233
- y_pred_proba = best_clf.predict_proba(x_true)
3234
- elif hasattr(best_clf, "decision_function"):
3235
- y_pred_proba = best_clf.decision_function(x_true)
3236
3202
 
3237
- # Normalize for multiclass if necessary
3238
- if y_pred_proba.ndim == 2:
3239
- y_pred_proba = (
3240
- y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)
3241
- ) / (
3242
- y_pred_proba.max(axis=1, keepdims=True)
3243
- - y_pred_proba.min(axis=1, keepdims=True)
3203
+ gs.fit(x_train, y_train)
3204
+ best_clf = gs.best_estimator_
3205
+ # make sure x_train and x_test has the same name
3206
+ x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
3207
+ y_pred = best_clf.predict(x_true)
3208
+ if hasattr(best_clf, "predict_proba"):
3209
+ y_pred_proba = best_clf.predict_proba(x_true)
3210
+ print("Shape of predicted probabilities:", y_pred_proba.shape)
3211
+ if y_pred_proba.shape[1] == 1:
3212
+ y_pred_proba = np.hstack(
3213
+ [1 - y_pred_proba, y_pred_proba]
3214
+ ) # Add missing class probabilities
3215
+ y_pred_proba = y_pred_proba[:, 1]
3216
+ elif hasattr(best_clf, "decision_function"):
3217
+ # If predict_proba is not available, use decision_function (e.g., for SVM)
3218
+ y_pred_proba = best_clf.decision_function(x_true)
3219
+ # Ensure y_pred_proba is within 0 and 1 bounds
3220
+ y_pred_proba = (y_pred_proba - y_pred_proba.min()) / (
3221
+ y_pred_proba.max() - y_pred_proba.min()
3244
3222
  )
3223
+ else:
3224
+ y_pred_proba = None # No probability output for certain models
3225
+ # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
3226
+ if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
3227
+ if hasattr(best_clf, "alphas_"):
3228
+ alphas_ = best_clf.alphas_
3229
+ elif hasattr(best_clf, "alpha_"):
3230
+ alphas_ = best_clf.alpha_
3231
+ elif hasattr(best_clf, "Cs_"):
3232
+ alphas_ = best_clf.Cs_
3233
+ else:
3234
+ alphas_= None
3235
+ coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
3245
3236
  else:
3246
- y_pred_proba = None # No probability output for certain models
3237
+ gs = GridSearchCV(
3238
+ clf,
3239
+ param_grid=param_grids.get(name, {}),
3240
+ scoring=(
3241
+ "roc_auc_ovr"
3242
+ if purpose == "classification"
3243
+ else "neg_mean_squared_error"
3244
+ ),
3245
+ cv=cv,
3246
+ n_jobs=n_jobs,
3247
+ verbose=verbose,
3248
+ )
3247
3249
 
3250
+ # Fit GridSearchCV
3251
+ gs.fit(x_train, y_train)
3252
+ best_clf = gs.best_estimator_
3253
+
3254
+ # Ensure x_true aligns with x_train columns
3255
+ x_true = x_true.reindex(columns=x_train.columns, fill_value=0)
3256
+
3257
+ # do i need to fit the x_train, y_train again?
3258
+ best_clf=best_clf.fit(x_train, y_train)
3259
+ y_pred = best_clf.predict(x_true)
3260
+
3261
+ # Handle prediction probabilities for multiclass
3262
+ if hasattr(best_clf, "predict_proba"):
3263
+ y_pred_proba = best_clf.predict_proba(x_true)
3264
+ elif hasattr(best_clf, "decision_function"):
3265
+ y_pred_proba = best_clf.decision_function(x_true)
3266
+
3267
+ # Normalize for multiclass if necessary
3268
+ if y_pred_proba.ndim == 2:
3269
+ y_pred_proba = (
3270
+ y_pred_proba - y_pred_proba.min(axis=1, keepdims=True)
3271
+ ) / (
3272
+ y_pred_proba.max(axis=1, keepdims=True)
3273
+ - y_pred_proba.min(axis=1, keepdims=True)
3274
+ )
3275
+ else:
3276
+ y_pred_proba = None # No probability output for certain models
3277
+ # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
3278
+ if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
3279
+ if hasattr(best_clf, "alphas_"):
3280
+ alphas_ = best_clf.alphas_
3281
+ elif hasattr(best_clf, "alpha_"):
3282
+ alphas_ = best_clf.alpha_
3283
+ elif hasattr(best_clf, "Cs_"):
3284
+ alphas_ = best_clf.Cs_
3285
+ else:
3286
+ alphas_= None
3287
+ coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
3288
+ except Exception as e:
3289
+ alphas_,coef_ = None,None
3290
+ print(f"skiped {clf}: {e}")
3291
+ continue
3292
+ # try to make predict format consistant
3293
+ try:
3294
+ y_pred= [i[0] for i in y_pred]
3295
+ except:
3296
+ pass
3297
+ try:
3298
+ y_true= [i[0] for i in y_true]
3299
+ except:
3300
+ pass
3301
+ try:
3302
+ y_train= [i[0] for i in y_train]
3303
+ except:
3304
+ pass
3248
3305
  validation_scores = {}
3249
3306
 
3250
3307
  if y_true is not None and y_pred_proba is not None:
@@ -3294,20 +3351,26 @@ def predict(
3294
3351
  "roc_curve": roc_info,
3295
3352
  "pr_curve": pr_info,
3296
3353
  "confusion_matrix": confusion_matrix(y_true, y_pred),
3297
- "predictions": y_pred.tolist(),
3354
+ "predictions": y_pred,#.tolist(),
3298
3355
  "predictions_proba": (
3299
3356
  y_pred_proba.tolist() if y_pred_proba is not None else None
3300
3357
  ),
3358
+ "features":share_col_names,
3359
+ "coef":coef_,
3360
+ "alphas":alphas_
3301
3361
  }
3302
3362
  else: # "regression"
3303
3363
  results[name] = {
3304
3364
  "best_clf": gs.best_estimator_,
3305
3365
  "best_params": gs.best_params_,
3306
3366
  "scores": validation_scores, # e.g., neg_MSE, R², etc.
3307
- "predictions": y_pred.tolist(),
3367
+ "predictions": y_pred,#.tolist(),
3308
3368
  "predictions_proba": (
3309
3369
  y_pred_proba.tolist() if y_pred_proba is not None else None
3310
3370
  ),
3371
+ "features":share_col_names,
3372
+ "coef":coef_,
3373
+ "alphas":alphas_
3311
3374
  }
3312
3375
  else: # multi-classes
3313
3376
  if y_pred_proba is not None:
@@ -3346,20 +3409,26 @@ def predict(
3346
3409
  "roc_curve": roc_info,
3347
3410
  "pr_curve": pr_info,
3348
3411
  "confusion_matrix": confusion_matrix(y_true, y_pred),
3349
- "predictions": y_pred.tolist(),
3412
+ "predictions": y_pred,#.tolist(),
3350
3413
  "predictions_proba": (
3351
3414
  y_pred_proba.tolist() if y_pred_proba is not None else None
3352
3415
  ),
3416
+ "features":share_col_names,
3417
+ "coef":coef_,
3418
+ "alphas":alphas_
3353
3419
  }
3354
3420
  else: # "regression"
3355
3421
  results[name] = {
3356
3422
  "best_clf": gs.best_estimator_,
3357
3423
  "best_params": gs.best_params_,
3358
3424
  "scores": validation_scores, # e.g., neg_MSE, R², etc.
3359
- "predictions": y_pred.tolist(),
3425
+ "predictions": y_pred,#.tolist(),
3360
3426
  "predictions_proba": (
3361
3427
  y_pred_proba.tolist() if y_pred_proba is not None else None
3362
3428
  ),
3429
+ "features":share_col_names,
3430
+ "coef":coef_,
3431
+ "alphas":alphas_
3363
3432
  }
3364
3433
 
3365
3434
  else:
@@ -3378,12 +3447,15 @@ def predict(
3378
3447
  "best_clf": gs.best_estimator_,
3379
3448
  "best_params": gs.best_params_,
3380
3449
  "scores": validation_scores,
3381
- "predictions": y_pred.tolist(),
3450
+ "predictions": y_pred,#.tolist(),
3382
3451
  "predictions_proba": (
3383
3452
  y_pred_proba.tolist() if y_pred_proba is not None else None
3384
3453
  ),
3454
+ "features":share_col_names,
3385
3455
  "y_train": y_train if y_train is not None else [],
3386
3456
  "y_true": y_true if y_true is not None else [],
3457
+ "coef":coef_,
3458
+ "alphas":alphas_
3387
3459
  }
3388
3460
 
3389
3461
  # Convert results to DataFrame
@@ -3446,7 +3518,7 @@ def predict(
3446
3518
  for i, j in top_models.to_dict().items():
3447
3519
  base_estimators.append((i, j))
3448
3520
  if stacking_cv:
3449
- print(f" ⤵ stacking_cv is processing...")
3521
+ print(f"⤵ stacking_cv is processing...")
3450
3522
  #* 定义几个象征性的final_estimator
3451
3523
  # 备选的几种
3452
3524
  if purpose == "classification":
@@ -3520,7 +3592,7 @@ def predict(
3520
3592
  best_final_estimator = cv_results_df.iloc[0]['final_estimator']
3521
3593
  print(f"Best final estimator based on cross-validation: {best_final_estimator}")
3522
3594
  else:
3523
- print(f" ⤵ trying to find the best_final_estimator for stacking...")
3595
+ print(f"⤵ trying to find the best_final_estimator for stacking...")
3524
3596
  if purpose=="classification":
3525
3597
  best_final_estimator = LogisticRegression(class_weight=class_weight,
3526
3598
  random_state=random_state,
@@ -3530,26 +3602,25 @@ def predict(
3530
3602
  print(f"⤵ the best best_final_estimator: {best_final_estimator}")
3531
3603
  #! apply stacking
3532
3604
  if purpose == "classification":
3533
- print(f" ⤵ StackingClassifier...")
3605
+ print(f"⤵ StackingClassifier...")
3534
3606
  stacking_model = StackingClassifier(estimators=base_estimators,
3535
3607
  final_estimator=best_final_estimator,
3536
3608
  cv=cv)
3537
3609
  else:
3538
- print(f" ⤵ StackingRegressor...")
3610
+ print(f"⤵ StackingRegressor...")
3539
3611
  stacking_model = StackingRegressor(estimators=base_estimators,
3540
3612
  final_estimator=best_final_estimator,
3541
3613
  cv=cv)
3542
3614
 
3543
3615
  # Train the Stacking Classifier
3544
- print(f" ⤵ fit & predict...")
3616
+ print(f"⤵ fit & predict...")
3545
3617
  stacking_model.fit(x_train, y_train)
3546
3618
  y_pred_final = stacking_model.predict(x_true)
3547
- print(f" ⤵ collecting results...")
3619
+ print(f"⤵ collecting results...")
3548
3620
  # pred_proba
3549
3621
  if is_binary:
3550
3622
  if hasattr(stacking_model, "predict_proba"):
3551
3623
  y_pred_proba_final = stacking_model.predict_proba(x_true)
3552
- print("Shape of predicted probabilities:", y_pred_proba_final.shape)
3553
3624
  if y_pred_proba_final.shape[1] == 1:
3554
3625
  y_pred_proba_final = np.hstack(
3555
3626
  [1 - y_pred_proba_final, y_pred_proba_final]
@@ -3564,6 +3635,17 @@ def predict(
3564
3635
  )
3565
3636
  else:
3566
3637
  y_pred_proba_final = None # No probability output for certain models
3638
+ # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
3639
+ if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
3640
+ if hasattr(best_clf, "alphas_"):
3641
+ alphas_ = best_clf.alphas_
3642
+ elif hasattr(best_clf, "alpha_"):
3643
+ alphas_ = best_clf.alpha_
3644
+ elif hasattr(best_clf, "Cs_"):
3645
+ alphas_ = best_clf.Cs_
3646
+ else:
3647
+ alphas_= None
3648
+ coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
3567
3649
  if not is_binary:
3568
3650
  # Handle prediction probabilities for multiclass
3569
3651
  if hasattr(stacking_model, "predict_proba"):
@@ -3581,6 +3663,17 @@ def predict(
3581
3663
  )
3582
3664
  else:
3583
3665
  y_pred_proba_final = None # No probability output for certain models
3666
+ # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
3667
+ if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
3668
+ if hasattr(best_clf, "alphas_"):
3669
+ alphas_ = best_clf.alphas_
3670
+ elif hasattr(best_clf, "alpha_"):
3671
+ alphas_ = best_clf.alpha_
3672
+ elif hasattr(best_clf, "Cs_"):
3673
+ alphas_ = best_clf.Cs_
3674
+ else:
3675
+ alphas_= None
3676
+ coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
3584
3677
  #! dict_pred_stack
3585
3678
  dict_pred_stack={}
3586
3679
  validation_scores_final = {}
@@ -3631,6 +3724,9 @@ def predict(
3631
3724
  "predictions_proba": (
3632
3725
  y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
3633
3726
  ),
3727
+ "features":share_col_names,
3728
+ "coef":coef_,
3729
+ "alphas":alphas_
3634
3730
  }
3635
3731
  else: # "regression"
3636
3732
  dict_pred_stack = {
@@ -3641,6 +3737,9 @@ def predict(
3641
3737
  "predictions_proba": (
3642
3738
  y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
3643
3739
  ),
3740
+ "features":share_col_names,
3741
+ "coef":coef_,
3742
+ "alphas":alphas_
3644
3743
  }
3645
3744
  else: # multi-classes
3646
3745
  if y_pred_proba_final is not None:
@@ -3680,6 +3779,9 @@ def predict(
3680
3779
  "predictions_proba": (
3681
3780
  y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
3682
3781
  ),
3782
+ "features":share_col_names,
3783
+ "coef":coef_,
3784
+ "alphas":alphas_
3683
3785
  }
3684
3786
  else: # "regression"
3685
3787
  dict_pred_stack = {
@@ -3690,6 +3792,9 @@ def predict(
3690
3792
  "predictions_proba": (
3691
3793
  y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
3692
3794
  ),
3795
+ "features":share_col_names,
3796
+ "coef":coef_,
3797
+ "alphas":alphas_
3693
3798
  }
3694
3799
 
3695
3800
  else:
@@ -3712,8 +3817,11 @@ def predict(
3712
3817
  "predictions_proba": (
3713
3818
  y_pred_proba_final.tolist() if y_pred_proba_final is not None else None
3714
3819
  ),
3820
+ "features":share_col_names,
3715
3821
  "y_train": y_train if y_train is not None else [],
3716
3822
  "y_true": y_true if y_true is not None else [],
3823
+ "coef":coef_,
3824
+ "alphas":alphas_
3717
3825
  }
3718
3826
  # merge together
3719
3827
  df_pred = pd.DataFrame(
@@ -3728,16 +3836,16 @@ def predict(
3728
3836
  # if dir_save:
3729
3837
  # ips.figsave(dir_save + f"validate_features_stacking_{now_}.pdf")
3730
3838
  if vote:
3731
- print(f" ⤵ voting...")
3839
+ print(f"⤵ voting...")
3732
3840
  from sklearn.ensemble import VotingClassifier, VotingRegressor
3733
- #! Votting
3841
+ #! voting
3734
3842
  n_top_models = min(n_top_models, df_results.shape[0])
3735
3843
  base_estimators=[]
3736
3844
  for name, cls in zip(list(df_results.iloc[:n_top_models, :].index),df_results.iloc[:n_top_models, :]["best_clf"].tolist()):
3737
3845
  base_estimators.append((name,cls))
3738
3846
  # Apply Voting Classifier/Regressor
3739
3847
  if purpose == "classification":
3740
- print(f" ⤵ VotingClassifier...via{votting}")
3848
+ print(f"⤵ VotingClassifier...via{voting}")
3741
3849
  if voting=='hard':
3742
3850
  # Hard voting does not support `predict_proba`
3743
3851
  voting_model = VotingClassifier(estimators=base_estimators)
@@ -3745,7 +3853,7 @@ def predict(
3745
3853
  # Soft voting supports `predict_proba`
3746
3854
  voting_model = VotingClassifier(estimators=base_estimators, voting="soft")
3747
3855
  else:
3748
- print(f" ⤵ VotingRegressor...")
3856
+ print(f"⤵ VotingRegressor...")
3749
3857
  voting_model = VotingRegressor(estimators=base_estimators)
3750
3858
 
3751
3859
  # Train the Voting Classifier/Regressor
@@ -3770,10 +3878,23 @@ def predict(
3770
3878
  y_pred_proba_vote = y_pred_proba_vote[:, 1]
3771
3879
  else:
3772
3880
  y_pred_proba_vote = None
3881
+
3882
+ # Access alphas if applicable (e.g., ElasticNetCV, LassoCV)
3883
+ if hasattr(best_clf, "alphas_") or hasattr(best_clf, "Cs_"):
3884
+ if hasattr(best_clf, "alphas_"):
3885
+ alphas_ = best_clf.alphas_
3886
+ elif hasattr(best_clf, "alpha_"):
3887
+ alphas_ = best_clf.alpha_
3888
+ elif hasattr(best_clf, "Cs_"):
3889
+ alphas_ = best_clf.Cs_
3890
+ else:
3891
+ alphas_= None
3892
+ coef_ = best_clf.coef_ if hasattr(best_clf, "coef_") else None
3773
3893
  else: # Regression
3774
3894
  y_pred_proba_vote = None
3895
+ coef_,alphas_=None,None
3775
3896
 
3776
- print(f" ⤵ collecting voting results...")
3897
+ print(f"⤵ collecting voting results...")
3777
3898
  #! dict_pred_vote
3778
3899
  dict_pred_vote = {}
3779
3900
  validation_scores_vote = {}
@@ -3822,6 +3943,9 @@ def predict(
3822
3943
  "predictions_proba": (
3823
3944
  y_pred_proba_vote.tolist() if y_pred_proba_vote is not None else None
3824
3945
  ),
3946
+ "features":share_col_names,
3947
+ "coef":coef_,
3948
+ "alphas":alphas_
3825
3949
  }
3826
3950
  else: # Multi-class
3827
3951
  if y_pred_proba_vote is not None:
@@ -3856,6 +3980,9 @@ def predict(
3856
3980
  "predictions_proba": (
3857
3981
  y_pred_proba_vote.tolist() if y_pred_proba_vote is not None else None
3858
3982
  ),
3983
+ "features":share_col_names,
3984
+ "coef":coef_,
3985
+ "alphas":alphas_
3859
3986
  }
3860
3987
  else:
3861
3988
  if y_true is None:
@@ -3877,6 +4004,7 @@ def predict(
3877
4004
  "predictions_proba": (
3878
4005
  y_pred_proba_vote.tolist() if y_pred_proba_vote is not None else None
3879
4006
  ),
4007
+ "features":share_col_names,
3880
4008
  "y_train": y_train if y_train is not None else [],
3881
4009
  "y_true": y_true if y_true is not None else [],
3882
4010
  }
@@ -3907,9 +4035,15 @@ def predict(
3907
4035
  now_ = datetime.now().strftime("%y%m%d_%H%M%S")
3908
4036
  # try:
3909
4037
  if df_res.shape[0] > 3:
3910
- plot_validate_features(df_res, is_binary=is_binary)
4038
+ try:
4039
+ plot_validate_features(df_res, is_binary=is_binary)
4040
+ except Exception as e:
4041
+ print(e)
3911
4042
  else:
3912
- plot_validate_features_single(df_res, is_binary=is_binary)
4043
+ try:
4044
+ plot_validate_features_single(df_res, is_binary=is_binary)
4045
+ except Exception as e:
4046
+ print(e)
3913
4047
  if dir_save:
3914
4048
  ips.figsave(dir_save + f"validate_features{now_}.pdf")
3915
4049
  # except Exception as e: