py2ls 0.2.4.16__py3-none-any.whl → 0.2.4.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/.git/index CHANGED
Binary file
py2ls/ips.py CHANGED
@@ -2171,6 +2171,8 @@ def fload(fpath, kind=None, **kwargs):
2171
2171
  continue
2172
2172
  else:
2173
2173
  pass
2174
+ if is_df_abnormal(df,verbose=verbose):
2175
+ df=pd.read_csv(fpath,**kwargs)
2174
2176
  display(df.head(2))
2175
2177
  print(f"shape: {df.shape}")
2176
2178
  return df
py2ls/ml2ls.py CHANGED
@@ -2048,6 +2048,7 @@ def predict(
2048
2048
  y_train: pd.Series,
2049
2049
  x_true: pd.DataFrame = None,
2050
2050
  y_true: Optional[pd.Series] = None,
2051
+ backward:bool=False, # backward_regression
2051
2052
  common_features: set = None,
2052
2053
  purpose: str = "classification", # 'classification' or 'regression'
2053
2054
  cls: Optional[Dict[str, Any]] = None,
@@ -2252,6 +2253,12 @@ def predict(
2252
2253
  if is_binary:
2253
2254
  y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
2254
2255
  print('is_binary:',is_binary)
2256
+
2257
+ # Perform backward feature selection
2258
+ if backward:
2259
+ selected_features = backward_regression(x_train, y_train, threshold_out=0.05)
2260
+ x_train=x_train[selected_features]
2261
+
2255
2262
  if x_true is None:
2256
2263
  x_train, x_true, y_train, y_true = train_test_split(
2257
2264
  x_train,
@@ -2286,10 +2293,12 @@ def predict(
2286
2293
 
2287
2294
  # y_train=y_train.values.ravel() if y_train is not None else None
2288
2295
  # y_true=y_true.values.ravel() if y_true is not None else None
2289
- y_train = (
2290
- y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
2291
- )
2292
- y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
2296
+ if y_train is not None:
2297
+ y_train = (
2298
+ y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
2299
+ )
2300
+ if y_true is not None:
2301
+ y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
2293
2302
  # Ensure common features are selected
2294
2303
  if common_features is not None:
2295
2304
  x_train, x_true = x_train[common_features], x_true[common_features]
@@ -3071,6 +3080,14 @@ def predict(
3071
3080
  }
3072
3081
 
3073
3082
  else:
3083
+ validation_scores = cal_metrics(
3084
+ y_true,
3085
+ y_pred,
3086
+ y_pred_proba=y_pred_proba,
3087
+ is_binary=is_binary,
3088
+ purpose=purpose,
3089
+ average="weighted",
3090
+ )
3074
3091
  results[name] = {
3075
3092
  "best_clf": gs.best_estimator_,
3076
3093
  "best_params": gs.best_params_,
@@ -3079,6 +3096,8 @@ def predict(
3079
3096
  "predictions_proba": (
3080
3097
  y_pred_proba.tolist() if y_pred_proba is not None else None
3081
3098
  ),
3099
+ "y_train":y_train if y_train is not None else [],
3100
+ "y_true": y_true if y_true is not None else []
3082
3101
  }
3083
3102
 
3084
3103
  # Convert results to DataFrame
@@ -3540,6 +3559,49 @@ def img_datasets_preprocessing(
3540
3559
  display(df_img.head())
3541
3560
 
3542
3561
  return df_img
3562
+
3563
+
3564
+ def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_out=0.05, verbose=True):
3565
+ """
3566
+ # awesome bit of code from https://www.kaggle.com/code/adibouayjan/house-price-step-by-step-modeling
3567
+
3568
+ Evaluates the p-values of all features, which represent the probability of observing a coefficient
3569
+ as extreme as the one calculated if the feature had no true effect on the target.
3570
+
3571
+ Args:
3572
+ X -- features values
3573
+ y -- target variable
3574
+ initial_list -- features header
3575
+ threshold_out -- pvalue threshold of features to drop
3576
+ verbose -- true to produce lots of logging output
3577
+
3578
+ Returns:
3579
+ list of selected features for modeling
3580
+ """
3581
+ import statsmodels.api as sm
3582
+ if isinstance(y, str) and y in X.columns:
3583
+ y_col_name = y
3584
+ y = X[y]
3585
+ X = X.drop(y_col_name, axis=1)
3586
+ included = list(X.columns)
3587
+ while True:
3588
+ changed = False
3589
+ model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included]))).fit()
3590
+ # exclude the intercept for p-value checking
3591
+ pvalues = model.pvalues.iloc[1:]
3592
+ worst_pval = pvalues.max()
3593
+ if worst_pval > threshold_out:
3594
+ changed = True
3595
+ worst_feature = pvalues.idxmax()
3596
+ included.remove(worst_feature)
3597
+ if verbose:
3598
+ print(f"Removing feature '{worst_feature}' with p-value {worst_pval}")
3599
+ if not changed:
3600
+ break
3601
+ print(f"\nSelected Features:\n{included}")
3602
+ return included # Returns the list of selected features
3603
+
3604
+
3543
3605
  # Function to apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
3544
3606
  def apply_clahe(img):
3545
3607
  import cv2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.2.4.16
3
+ Version: 0.2.4.18
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
@@ -200,6 +200,7 @@ Requires-Dist: selenium (>=4.23.1)
200
200
  Requires-Dist: setuptools (>=70.3.0)
201
201
  Requires-Dist: shellingham (>=1.5.4)
202
202
  Requires-Dist: six (>=1.16.0)
203
+ Requires-Dist: skorch (>=1.0.0)
203
204
  Requires-Dist: sniffio (>=1.3.1)
204
205
  Requires-Dist: sortedcontainers (>=2.4.0)
205
206
  Requires-Dist: soupsieve (>=2.5)
@@ -17,7 +17,7 @@ py2ls/.git/hooks/pre-receive.sample,sha256=pMPSuce7P9jRRBwxvU7nGlldZrRPz0ndsxAlI
17
17
  py2ls/.git/hooks/prepare-commit-msg.sample,sha256=6d3KpBif3dJe2X_Ix4nsp7bKFjkLI5KuMnbwyOGqRhk,1492
18
18
  py2ls/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
19
19
  py2ls/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
20
- py2ls/.git/index,sha256=2psUA1HSf13h_hMIjiDXo3a-eKs7JT7vYt6myqyT568,4232
20
+ py2ls/.git/index,sha256=icdOlk1IdpGPwTdGS8m0F_bYf3XNjgzp1gMlKFKx8fU,4232
21
21
  py2ls/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
22
22
  py2ls/.git/logs/HEAD,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
23
23
  py2ls/.git/logs/refs/heads/main,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
@@ -234,9 +234,9 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
234
234
  py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
235
235
  py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
236
236
  py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
237
- py2ls/ips.py,sha256=YlBLv69l2ILYO9jGqHZss7AydNHCoq51qqu3b443cv0,297616
237
+ py2ls/ips.py,sha256=2TWuOSFquwhmPdxkmmvU_pcIbE5M0S9aRPtuQgs5B7A,297706
238
238
  py2ls/ml2ls copy.py,sha256=iZJrFLIrdfTieAY2BDsxQFTm29smwnJh0aC4hRB9VGM,113314
239
- py2ls/ml2ls.py,sha256=ldNxpo7JKekDai5Izn0pk1wQ1fcNFHZx0huoK_ypfvw,142109
239
+ py2ls/ml2ls.py,sha256=Mkf374TLsCdBVYtSYptFzegn8euda33TA-M73nGtzV0,144368
240
240
  py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
241
241
  py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
242
242
  py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
@@ -246,6 +246,6 @@ py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso
246
246
  py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
247
247
  py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
248
248
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
249
- py2ls-0.2.4.16.dist-info/METADATA,sha256=z431Uha36h4d6syqBxzeNEsOwrXAp2yCya_q19-hoYo,20046
250
- py2ls-0.2.4.16.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
251
- py2ls-0.2.4.16.dist-info/RECORD,,
249
+ py2ls-0.2.4.18.dist-info/METADATA,sha256=q3L5q5BJ3olDRMxjimPsTEEE79pqmLwh93bxgsevPNU,20078
250
+ py2ls-0.2.4.18.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
251
+ py2ls-0.2.4.18.dist-info/RECORD,,