PyPI - py2ls - Versions diffs - 0.2.4.16__py3-none-any.whl → 0.2.4.18__py3-none-any.whl - Mend

py2ls 0.2.4.16py3-none-any.whl → 0.2.4.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

py2ls/.git/index +0 -0
py2ls/ips.py +2 -0
py2ls/ml2ls.py +66 -4
{py2ls-0.2.4.16.dist-info → py2ls-0.2.4.18.dist-info}/METADATA +2 -1
{py2ls-0.2.4.16.dist-info → py2ls-0.2.4.18.dist-info}/RECORD +6 -6
{py2ls-0.2.4.16.dist-info → py2ls-0.2.4.18.dist-info}/WHEEL +0 -0

py2ls/.git/index CHANGED Viewed

Binary file

py2ls/ips.py CHANGED Viewed

@@ -2171,6 +2171,8 @@ def fload(fpath, kind=None, **kwargs):
                                     continue
                             else:
                                 pass
+        if is_df_abnormal(df,verbose=verbose):
+            df=pd.read_csv(fpath,**kwargs)
         display(df.head(2))
         print(f"shape: {df.shape}")
         return df

py2ls/ml2ls.py CHANGED Viewed

@@ -2048,6 +2048,7 @@ def predict(
     y_train: pd.Series,
     x_true: pd.DataFrame = None,
     y_true: Optional[pd.Series] = None,
+    backward:bool=False, # backward_regression
     common_features: set = None,
     purpose: str = "classification",  # 'classification' or 'regression'
     cls: Optional[Dict[str, Any]] = None,
@@ -2252,6 +2253,12 @@ def predict(
     if is_binary:
         y_train = ips.df_encoder(pd.DataFrame(y_train), method="label")
     print('is_binary:',is_binary)
+    # Perform backward feature selection
+    if backward:
+        selected_features = backward_regression(x_train, y_train, threshold_out=0.05)
+        x_train=x_train[selected_features]
     if x_true is None:
         x_train, x_true, y_train, y_true = train_test_split(
             x_train,
@@ -2286,10 +2293,12 @@ def predict(
     # y_train=y_train.values.ravel() if y_train is not None else None
     # y_true=y_true.values.ravel() if y_true is not None else None
-    y_train = (
-        y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
-    )
-    y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
+    if y_train is not None:
+        y_train = (
+            y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
+        )
+    if y_true is not None:
+        y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
     # Ensure common features are selected
     if common_features is not None:
         x_train, x_true = x_train[common_features], x_true[common_features]
@@ -3071,6 +3080,14 @@ def predict(
                     }
         else:
+            validation_scores = cal_metrics(
+                y_true,
+                y_pred,
+                y_pred_proba=y_pred_proba,
+                is_binary=is_binary,
+                purpose=purpose,
+                average="weighted",
+            )
             results[name] = {
                 "best_clf": gs.best_estimator_,
                 "best_params": gs.best_params_,
@@ -3079,6 +3096,8 @@ def predict(
                 "predictions_proba": (
                     y_pred_proba.tolist() if y_pred_proba is not None else None
                 ),
+                "y_train":y_train if y_train is not None else [],
+                "y_true": y_true if y_true is not None else []
             }
     # Convert results to DataFrame
@@ -3540,6 +3559,49 @@ def img_datasets_preprocessing(
             display(df_img.head())
         return df_img
+def backward_regression(X:pd.DataFrame, y:pd.Series, initial_list=[], threshold_out=0.05, verbose=True):
+    """
+    # awesome bit of code from https://www.kaggle.com/code/adibouayjan/house-price-step-by-step-modeling
+    Evaluates the p-values of all features, which represent the probability of observing a coefficient
+    as extreme as the one calculated if the feature had no true effect on the target.
+    Args:
+        X -- features values
+        y -- target variable
+        initial_list -- features header
+        threshold_out -- pvalue threshold of features to drop
+        verbose -- true to produce lots of logging output
+    Returns:
+        list of selected features for modeling
+    """
+    import statsmodels.api as sm
+    if isinstance(y, str) and y in X.columns:
+        y_col_name = y
+        y = X[y]
+        X = X.drop(y_col_name, axis=1)
+    included = list(X.columns)
+    while True:
+        changed = False
+        model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included]))).fit()
+        # exclude the intercept for p-value checking
+        pvalues = model.pvalues.iloc[1:]
+        worst_pval = pvalues.max()
+        if worst_pval > threshold_out:
+            changed = True
+            worst_feature = pvalues.idxmax()
+            included.remove(worst_feature)
+            if verbose:
+                print(f"Removing feature '{worst_feature}' with p-value {worst_pval}")
+        if not changed:
+            break
+    print(f"\nSelected Features:\n{included}")
+    return included  # Returns the list of selected features
 # Function to apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
 def apply_clahe(img):
     import cv2

{py2ls-0.2.4.16.dist-info → py2ls-0.2.4.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py2ls
-Version: 0.2.4.16
+Version: 0.2.4.18
 Summary: py(thon)2(too)ls
 Author: Jianfeng
 Author-email: Jianfeng.Liu0413@gmail.com
@@ -200,6 +200,7 @@ Requires-Dist: selenium (>=4.23.1)
 Requires-Dist: setuptools (>=70.3.0)
 Requires-Dist: shellingham (>=1.5.4)
 Requires-Dist: six (>=1.16.0)
+Requires-Dist: skorch (>=1.0.0)
 Requires-Dist: sniffio (>=1.3.1)
 Requires-Dist: sortedcontainers (>=2.4.0)
 Requires-Dist: soupsieve (>=2.5)

{py2ls-0.2.4.16.dist-info → py2ls-0.2.4.18.dist-info}/RECORD RENAMED Viewed

@@ -17,7 +17,7 @@ py2ls/.git/hooks/pre-receive.sample,sha256=pMPSuce7P9jRRBwxvU7nGlldZrRPz0ndsxAlI
 py2ls/.git/hooks/prepare-commit-msg.sample,sha256=6d3KpBif3dJe2X_Ix4nsp7bKFjkLI5KuMnbwyOGqRhk,1492
 py2ls/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
 py2ls/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
-py2ls/.git/index,sha256=2psUA1HSf13h_hMIjiDXo3a-eKs7JT7vYt6myqyT568,4232
+py2ls/.git/index,sha256=icdOlk1IdpGPwTdGS8m0F_bYf3XNjgzp1gMlKFKx8fU,4232
 py2ls/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
 py2ls/.git/logs/HEAD,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
 py2ls/.git/logs/refs/heads/main,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
@@ -234,9 +234,9 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
 py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
 py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
 py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
-py2ls/ips.py,sha256=YlBLv69l2ILYO9jGqHZss7AydNHCoq51qqu3b443cv0,297616
+py2ls/ips.py,sha256=2TWuOSFquwhmPdxkmmvU_pcIbE5M0S9aRPtuQgs5B7A,297706
 py2ls/ml2ls copy.py,sha256=iZJrFLIrdfTieAY2BDsxQFTm29smwnJh0aC4hRB9VGM,113314
-py2ls/ml2ls.py,sha256=ldNxpo7JKekDai5Izn0pk1wQ1fcNFHZx0huoK_ypfvw,142109
+py2ls/ml2ls.py,sha256=Mkf374TLsCdBVYtSYptFzegn8euda33TA-M73nGtzV0,144368
 py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
 py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
 py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
@@ -246,6 +246,6 @@ py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso
 py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
 py2ls/translator.py,sha256=77Tp_GjmiiwFbEIJD_q3VYpQ43XL9ZeJo6Mhl44mvh8,34284
 py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
-py2ls-0.2.4.16.dist-info/METADATA,sha256=z431Uha36h4d6syqBxzeNEsOwrXAp2yCya_q19-hoYo,20046
-py2ls-0.2.4.16.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-py2ls-0.2.4.16.dist-info/RECORD,,
+py2ls-0.2.4.18.dist-info/METADATA,sha256=q3L5q5BJ3olDRMxjimPsTEEE79pqmLwh93bxgsevPNU,20078
+py2ls-0.2.4.18.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+py2ls-0.2.4.18.dist-info/RECORD,,

{py2ls-0.2.4.16.dist-info → py2ls-0.2.4.18.dist-info}/WHEEL RENAMED Viewed

File without changes

py2ls 0.2.4.16__py3-none-any.whl → 0.2.4.18__py3-none-any.whl

py2ls 0.2.4.16py3-none-any.whl → 0.2.4.18py3-none-any.whl