py2ls 0.2.4.16__py3-none-any.whl → 0.2.4.18__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
py2ls/.git/index CHANGED
Binary file
py2ls/ips.py CHANGED
@@ -2171,6 +2171,8 @@ def fload(fpath, kind=None, **kwargs):
2171
2171
  continue
2172
2172
  else:
2173
2173
  pass
2174
+ if is_df_abnormal(df,verbose=verbose):
2175
+ df=pd.read_csv(fpath,**kwargs)
2174
2176
  display(df.head(2))
2175
2177
  print(f"shape: {df.shape}")
2176
2178
  return df
py2ls/ml2ls.py CHANGED
@@ -2048,6 +2048,7 @@ def predict(
2048
2048
  y_train: pd.Series,
2049
2049
  x_true: pd.DataFrame = None,
2050
2050
  y_true: Optional[pd.Series] = None,
2051
+ backward:bool=False, # backward_regression
2051
2052
  common_features: set = None,
2052
2053
  purpose: str = "classification", # 'classification' or 'regression'
2053
2054
  cls: Optional[Dict[str, Any]] = None,
@@ -2252,6 +2253,12 @@ def predict(
2252
2253
  if is_binary:
2253
2254
  y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
2254
2255
  print('is_binary:',is_binary)
2256
+
2257
+ # Perform backward feature selection
2258
+ if backward:
2259
+ selected_features = backward_regression(x_train, y_train, threshold_out=0.05)
2260
+ x_train=x_train[selected_features]
2261
+
2255
2262
  if x_true is None:
2256
2263
  x_train, x_true, y_train, y_true = train_test_split(
2257
2264
  x_train,
@@ -2286,10 +2293,12 @@ def predict(
2286
2293
 
2287
2294
  # y_train=y_train.values.ravel() if y_train is not None else None
2288
2295
  # y_true=y_true.values.ravel() if y_true is not None else None
2289
- y_train = (
2290
- y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
2291
- )
2292
- y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
2296
+ if y_train is not None:
2297
+ y_train = (
2298
+ y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
2299
+ )
2300
+ if y_true is not None:
2301
+ y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
2293
2302
  # Ensure common features are selected
2294
2303
  if common_features is not None:
2295
2304
  x_train, x_true = x_train[common_features], x_true[common_features]
@@ -3071,6 +3080,14 @@ def predict(
3071
3080
  }
3072
3081
 
3073
3082
  else:
3083
+ validation_scores = cal_metrics(
3084
+ y_true,
3085
+ y_pred,
3086
+ y_pred_proba=y_pred_proba,
3087
+ is_binary=is_binary,
3088
+ purpose=purpose,
3089
+ average="weighted",
3090
+ )
3074
3091
  results[name] = {
3075
3092
  "best_clf": gs.best_estimator_,
3076
3093
  "best_params": gs.best_params_,
@@ -3079,6 +3096,8 @@ def predict(
3079
3096
  "predictions_proba": (
3080
3097
  y_pred_proba.tolist() if y_pred_proba is not None else None
3081
3098
  ),
3099
+ "y_train":y_train if y_train is not None else [],
3100
+ "y_true": y_true if y_true is not None else []
3082
3101
  }
3083
3102
 
3084
3103
  # Convert results to DataFrame
@@ -3540,6 +3559,49 @@ def img_datasets_preprocessing(
3540
3559
  display(df_img.head())
3541
3560
 
3542
3561
  return df_img
3562
+
3563
+
3564
+ def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_out=0.05, verbose=True):
3565
+ """
3566
+ # awesome bit of code from https://www.kaggle.com/code/adibouayjan/house-price-step-by-step-modeling
3567
+
3568
+ Evaluates the p-values of all features, which represent the probability of observing a coefficient
3569
+ as extreme as the one calculated if the feature had no true effect on the target.
3570
+
3571
+ Args:
3572
+ X -- features values
3573
+ y -- target variable
3574
+ initial_list -- features header
3575
+ threshold_out -- pvalue threshold of features to drop
3576
+ verbose -- true to produce lots of logging output
3577
+
3578
+ Returns:
3579
+ list of selected features for modeling
3580
+ """
3581
+ import statsmodels.api as sm
3582
+ if isinstance(y, str) and y in X.columns:
3583
+ y_col_name = y
3584
+ y = X[y]
3585
+ X = X.drop(y_col_name, axis=1)
3586
+ included = list(X.columns)
3587
+ while True:
3588
+ changed = False
3589
+ model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included]))).fit()
3590
+ # exclude the intercept for p-value checking
3591
+ pvalues = model.pvalues.iloc[1:]
3592
+ worst_pval = pvalues.max()
3593
+ if worst_pval > threshold_out:
3594
+ changed = True
3595
+ worst_feature = pvalues.idxmax()
3596
+ included.remove(worst_feature)
3597
+ if verbose:
3598
+ print(f"Removing feature '{worst_feature}' with p-value {worst_pval}")
3599
+ if not changed:
3600
+ break
3601
+ print(f"\nSelected Features:\n{included}")
3602
+ return included # Returns the list of selected features
3603
+
3604
+
3543
3605
  # Function to apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
3544
3606
  def apply_clahe(img):
3545
3607
  import cv2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.2.4.16
3
+ Version: 0.2.4.18
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
@@ -200,6 +200,7 @@ Requires-Dist: selenium (>=4.23.1)
200
200
  Requires-Dist: setuptools (>=70.3.0)
201
201
  Requires-Dist: shellingham (>=1.5.4)
202
202
  Requires-Dist: six (>=1.16.0)
203
+ Requires-Dist: skorch (>=1.0.0)
203
204
  Requires-Dist: sniffio (>=1.3.1)
204
205
  Requires-Dist: sortedcontainers (>=2.4.0)
205
206
  Requires-Dist: soupsieve (>=2.5)
@@ -17,7 +17,7 @@ py2ls/.git/hooks/pre-receive.sample,sha256=pMPSuce7P9jRRBwxvU7nGlldZrRPz0ndsxAlI
17
17
  py2ls/.git/hooks/prepare-commit-msg.sample,sha256=6d3KpBif3dJe2X_Ix4nsp7bKFjkLI5KuMnbwyOGqRhk,1492
18
18
  py2ls/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
19
19
  py2ls/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
20
- py2ls/.git/index,sha256=2psUA1HSf13h_hMIjiDXo3a-eKs7JT7vYt6myqyT568,4232
20
+ py2ls/.git/index,sha256=icdOlk1IdpGPwTdGS8m0F_bYf3XNjgzp1gMlKFKx8fU,4232
21
21
  py2ls/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
22
22
  py2ls/.git/logs/HEAD,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
23
23
  py2ls/.git/logs/refs/heads/main,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
@@ -234,9 +234,9 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
234
234
  py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
235
235
  py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
236
236
  py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
237
- py2ls/ips.py,sha256=YlBLv69l2ILYO9jGqHZss7AydNHCoq51qqu3b443cv0,297616
237
+ py2ls/ips.py,sha256=2TWuOSFquwhmPdxkmmvU_pcIbE5M0S9aRPtuQgs5B7A,297706
238
238
  py2ls/ml2ls copy.py,sha256=iZJrFLIrdfTieAY2BDsxQFTm29smwnJh0aC4hRB9VGM,113314
239
- py2ls/ml2ls.py,sha256=ldNxpo7JKekDai5Izn0pk1wQ1fcNFHZx0huoK_ypfvw,142109
239
+ py2ls/ml2ls.py,sha256=Mkf374TLsCdBVYtSYptFzegn8euda33TA-M73nGtzV0,144368
240
240
  py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
241
241
  py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
242
242
  py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
@@ -246,6 +246,6 @@ py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso
246
246
  py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
247
247
  py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
248
248
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
249
- py2ls-0.2.4.16.dist-info/METADATA,sha256=z431Uha36h4d6syqBxzeNEsOwrXAp2yCya_q19-hoYo,20046
250
- py2ls-0.2.4.16.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
251
- py2ls-0.2.4.16.dist-info/RECORD,,
249
+ py2ls-0.2.4.18.dist-info/METADATA,sha256=q3L5q5BJ3olDRMxjimPsTEEE79pqmLwh93bxgsevPNU,20078
250
+ py2ls-0.2.4.18.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
251
+ py2ls-0.2.4.18.dist-info/RECORD,,