PyPI - py2ls - Versions diffs - 0.2.4.8__py3-none-any.whl → 0.2.4.9__py3-none-any.whl - Mend

py2ls 0.2.4.8py3-none-any.whl → 0.2.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

py2ls/.git/index +0 -0
py2ls/ips.py +809 -398
py2ls/ml2ls.py +73 -38
py2ls/plot.py +2 -9
{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/METADATA +2 -2
{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/RECORD +7 -7
{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/WHEEL +0 -0

py2ls/ml2ls.py CHANGED Viewed

@@ -582,7 +582,8 @@ def get_features(
     strict: bool = False,
     n_shared: int = 2,  # 只要有两个方法有重合,就纳入common genes
     use_selected_features: bool = True,
-) -> dict:
+    plot_: bool = True,
+    dir_save:str="./") -> dict:
     """
     Master function to perform feature selection and validate models.
     """
@@ -598,11 +599,15 @@ def get_features(
     # fill na
     if fill_missing:
         ips.df_fillna(data=X,method='knn',inplace=True,axis=0)
-    # rm missing values
-    X.dropna(inplace=True)
-    y.dropna(inplace=True)
+    if isinstance(y, str) and y in X.columns:
+        y_col_name=y
+        y=X[y]
+        y=ips.df_encoder(pd.DataFrame(y),method='dummy')
+        X = X.drop(y_col_name,axis=1)
+    else:
+        y=ips.df_encoder(pd.DataFrame(y),method='dummy').values.ravel()
     y = y.loc[X.index]  # Align y with X after dropping rows with missing values in X
+    y = y.ravel() if isinstance(y, np.ndarray) else y.values.ravel()
     if X.shape[0] != len(y):
         raise ValueError("X and y must have the same number of samples (rows).")
@@ -894,9 +899,13 @@ def get_features(
         results = {
             "selected_features": features_df,
             "cv_train_scores": cv_train_results_df,
-            "cv_test_scores": rank_models(cv_test_results_df),
+            "cv_test_scores": rank_models(cv_test_results_df,plot_=plot_),
             "common_features": list(common_features),
         }
+        if all([plot_,dir_save]):
+            from datetime import datetime
+            now_ = datetime.now().strftime("%y%m%d_%H%M%S")
+            ips.figsave(dir_save+f"features{now_}.pdf")
     else:
         results = {
             "selected_features": pd.DataFrame(),
@@ -1707,36 +1716,55 @@ def predict(
     smote: bool = False,
     n_jobs:int  = -1,
     plot_: bool = True,
+    dir_save:str="./",
     test_size:float=0.2,# specific only when x_true is None
     cv_folds:int=5,# more cv_folds 得更加稳定,auc可能更低
     cv_level:str="l",#"s":'low',"m":'medium',"l":"high"
     class_weight: str = "balanced",
     verbose:bool=False,
-    dir_save:str="./"
 ) -> pd.DataFrame:
-    """
-    1. 对x_train进行split_train_test,并对其进行validate
-    predict(x_train, y_train)
-    2. 利用x_train, y_train的数据,对x_true的数据进行predict
-    predict(x_train, y_train, x_true)
-    3. 利用x_train, y_train的数据,validate x_true和y_true
-    predict(x_train, y_train, x_true, y_true)
-    Advanced master predictor function with grid search for hyperparameter tuning.
-    Parameters:
-    - x_train, y_train: Training dataset.
-    - x_true, y_true: Dataset for validation or prediction (y_true=None for prediction).
-    - common_features (set): Common features to use for validation.
-    - purpose (str): Task type - 'classification' or 'regression'.
-    - models (dict): Dictionary of models and parameters.
-    - metrics (list): Metrics to compute.
-    - random_state (int): Seed for reproducibility.
-    - smote (bool): Use SMOTE for class imbalance (classification only).
-    - class_weight (str): Class weights to handle imbalance.
-    Returns:
-    - df_results (pd.DataFrame): DataFrame with performance metrics and hyperparameters.
+    """
+        第一种情况是内部拆分，第二种是直接预测，第三种是外部验证。
+        Usage:
+            (1). predict(x_train, y_train,...) 对 x_train 进行拆分训练/测试集，并在测试集上进行验证.
+                predict 函数会根据 test_size 参数，将 x_train 和 y_train 拆分出内部测试集。然后模型会在拆分出的训练集上进行训练，并在测试集上验证效果。
+            (2). predict(x_train, y_train, x_true,...)使用 x_train 和 y_train 训练并对 x_true 进行预测
+                由于传入了 x_true，函数会跳过 x_train 的拆分，直接使用全部的 x_train 和 y_train 进行训练。然后对 x_true 进行预测，但由于没有提供 y_true，
+                因此无法与真实值进行对比。
+            (3). predict(x_train, y_train, x_true, y_true,...)使用 x_train 和 y_train 训练，并验证 x_true 与真实标签 y_true.
+                predict 函数会在 x_train 和 y_train 上进行训练，并将 x_true 作为测试集。由于提供了 y_true，函数可以将预测结果与 y_true 进行对比，从而
+                计算验证指标，完成对 x_true 的真正验证。
+        trains and validates a variety of machine learning models for both classification and regression tasks.
+        It supports hyperparameter tuning with grid search and includes additional features like cross-validation,
+        feature scaling, and handling of class imbalance through SMOTE.
+        Parameters:
+            - x_train (pd.DataFrame):Training feature data, structured with each row as an observation and each column as a feature.
+            - y_train (pd.Series):Target variable for the training dataset.
+            - x_true (pd.DataFrame, optional):Test feature data. If not provided, the function splits x_train based on test_size.
+            - y_true (pd.Series, optional):Test target values. If not provided, y_train is split into training and testing sets.
+            - common_features (set, optional):Specifies a subset of features common across training and test data.
+            - purpose (str, default = "classification"):Defines whether the task is "classification" or "regression". Determines which
+                metrics and models are applied.
+            - cls (dict, optional):Dictionary to specify custom classifiers/regressors. Defaults to a set of common models if not provided.
+            - metrics (list, optional):List of evaluation metrics (like accuracy, F1 score) used for model evaluation.
+            - random_state (int, default = 1):Random seed to ensure reproducibility.
+            - smote (bool, default = False):Applies Synthetic Minority Oversampling Technique (SMOTE) to address class imbalance if enabled.
+            - n_jobs (int, default = -1):Number of parallel jobs for computation. Set to -1 to use all available cores.
+            - plot_ (bool, default = True):If True, generates plots of the model evaluation metrics.
+            - test_size (float, default = 0.2):Test data proportion if x_true is not provided.
+            - cv_folds (int, default = 5):Number of cross-validation folds.
+            - cv_level (str, default = "l"):Sets the detail level of cross-validation. "s" for low, "m" for medium, and "l" for high.
+            - class_weight (str, default = "balanced"):Balances class weights in classification tasks.
+            - verbose (bool, default = False):If True, prints detailed output during model training.
+            - dir_save (str, default = "./"):Directory path to save plot outputs and results.
+        Key Steps in the Function:
+            Model Initialization: Depending on purpose, initializes either classification or regression models.
+            Feature Selection: Ensures training and test sets have matching feature columns.
+            SMOTE Application: Balances classes if smote is enabled and the task is classification.
+            Cross-Validation and Hyperparameter Tuning: Utilizes GridSearchCV for model tuning based on cv_level.
+            Evaluation and Plotting: Outputs evaluation metrics like AUC, confusion matrices, and optional plotting of performance metrics.
     """
     from tqdm import tqdm
     from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, ExtraTreesClassifier, ExtraTreesRegressor, BaggingClassifier, BaggingRegressor, AdaBoostClassifier, AdaBoostRegressor
@@ -1858,8 +1886,12 @@ def predict(
             y_true=ips.df_encoder(pd.DataFrame(y_true),method='dummy').values.ravel()
     # to convert the 2D to 1D: 2D column-vector format (like [[1], [0], [1], ...]) instead of a 1D array ([1, 0, 1, ...]
-    y_train=y_train.values.ravel() if y_train is not None else None
-    y_true=y_true.values.ravel() if y_true is not None else None
+    # y_train=y_train.values.ravel() if y_train is not None else None
+    # y_true=y_true.values.ravel() if y_true is not None else None
+    y_train = y_train.ravel() if isinstance(y_train, np.ndarray) else y_train.values.ravel()
+    y_true = y_true.ravel() if isinstance(y_true, np.ndarray) else y_true.values.ravel()
     # Ensure common features are selected
     if common_features is not None:
@@ -2440,15 +2472,18 @@ def predict(
         df_results=df_results.loc[df_scores.index]
         if plot_:
+            from datetime import datetime
+            now_ = datetime.now().strftime("%y%m%d_%H%M%S")
             nexttile=plot.subplot(figsize=[12, 10])
             plot.heatmap(df_scores, kind="direct",ax=nexttile())
             plot.figsets(xangle=30)
             if dir_save:
-                ips.figsave(dir_save+"scores_sorted_heatmap.pdf")
-            plot.heatmap(df_scores, kind="direct",cluster=True)
-            plot.figsets(xangle=30)
-            if dir_save:
-                ips.figsave(dir_save+"scores_clus.pdf")
+                ips.figsave(dir_save+f"scores_sorted_heatmap{now_}.pdf")
+            if df_scores.shape[0]>1:# draw cluster
+                plot.heatmap(df_scores, kind="direct",cluster=True)
+                plot.figsets(xangle=30)
+                if dir_save:
+                    ips.figsave(dir_save+f"scores_clus{now_}.pdf")
     if all([plot_, y_true is not None, purpose=='classification']):
         try:
             if len(models)>3:
@@ -2456,7 +2491,7 @@ def predict(
             else:
                 plot_validate_features_single(df_results,figsize=(12,4*len(models)))
             if dir_save:
-                ips.figsave(dir_save+"validate_features.pdf")
+                ips.figsave(dir_save+f"validate_features{now_}.pdf")
         except Exception as e:
             print(f"Error: 在画图的过程中出现了问题:{e}")
     return df_results

py2ls/plot.py CHANGED Viewed

@@ -3020,7 +3020,7 @@ def plotxy(
         sns_info = pd.DataFrame(fload(current_directory / 'data' / 'sns_info.json'))
     valid_kinds = list(default_settings.keys())
-    print(valid_kinds)
+    # print(valid_kinds)
     if kind is not None:
         if isinstance(kind, str):
             kind = [kind]
@@ -3032,13 +3032,7 @@ def plotxy(
         if kind is not None:
             for k in kind:
                 if k in valid_kinds:
-                    print(f"{k}:\n\t{default_settings[k]}")
-                    print(
-                        sns_info[sns_info["Functions"].str.contains(k)]
-                        .iloc[:, -1]
-                        .tolist()[0]
-                    )
-                    print()
+                    print(f"{k}:\n\t{default_settings[k]}")
         usage_str = """plotxy(data=ranked_genes,
         x="log2(fold_change)",
         y="-log10(p-value)",
@@ -3102,7 +3096,6 @@ def plotxy(
             hue = kwargs.pop("hue", None)
             if isinstance(kws_scatter, dict):  # Check if kws_scatter is a dictionary
                 kws_scatter.pop("hue", None)  # Safely remove 'hue' if it exists
             palette = kws_scatter.pop("palette",get_color(data[hue].nunique()) if hue is not None else None)
             s = kws_scatter.pop("s", 10)
             alpha = kws_scatter.pop("alpha", 0.7)

{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py2ls
-Version: 0.2.4.8
+Version: 0.2.4.9
 Summary: py(thon)2(too)ls
 Author: Jianfeng
 Author-email: Jianfeng.Liu0413@gmail.com
@@ -56,7 +56,7 @@ Requires-Dist: coverage (>=7.6.0)
 Requires-Dist: coveralls (>=4.0.1)
 Requires-Dist: crashtest (>=0.4.1)
 Requires-Dist: cycler (>=0.12.1)
-Requires-Dist: dask[dataframe] (>=2023.6,<2024.0)
+Requires-Dist: dask (>=2024.7.1)
 Requires-Dist: debugpy (>=1.8.2)
 Requires-Dist: decorator (>=5.1.1)
 Requires-Dist: defusedxml (>=0.7.1)

{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/RECORD RENAMED Viewed

@@ -17,7 +17,7 @@ py2ls/.git/hooks/pre-receive.sample,sha256=pMPSuce7P9jRRBwxvU7nGlldZrRPz0ndsxAlI
 py2ls/.git/hooks/prepare-commit-msg.sample,sha256=6d3KpBif3dJe2X_Ix4nsp7bKFjkLI5KuMnbwyOGqRhk,1492
 py2ls/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
 py2ls/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
-py2ls/.git/index,sha256=w4n_eE4nWnGzWPUMesYlqgV_hldK1TphJVZrRyij5Vo,4232
+py2ls/.git/index,sha256=CwYqVYvTvnn8cGWn-ctzQIfhmbgRfxtYXrAkmCA1AuU,4232
 py2ls/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
 py2ls/.git/logs/HEAD,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
 py2ls/.git/logs/refs/heads/main,sha256=8ID7WuAe_TlO9g-ARxhIJYdgdL3u3m7-1qrOanaIUlA,3535
@@ -214,17 +214,17 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
 py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
 py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
 py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
-py2ls/ips.py,sha256=OJgNO3F-S7m5QjrwRjFOxG-sIZruRvK52NfnTm9yhTU,260110
-py2ls/ml2ls.py,sha256=SODP4ebQnbpdhX1VeUXTkHIKSxz37c0Brxis87vPv4U,102625
+py2ls/ips.py,sha256=2Ds3kra7LtxVu5L1vNrpKjGFhg2mdnS5qcqSqHDNkkQ,265181
+py2ls/ml2ls.py,sha256=EN-ufKgFs6NWPJVyh3mu9VmRyRK4vgi6rzufDd7B2pA,106633
 py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
 py2ls/netfinder.py,sha256=RJFr80tGEJiuwEx99IBOhI5-ZuXnPdWnGUYpF7XCEwI,56426
 py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
-py2ls/plot.py,sha256=IBIlcOYmXLrsgq_7338JlowZVPns8Hr3dHnvozwINl4,167825
+py2ls/plot.py,sha256=LeQpTLvRHMDrQtU8yaeXEOgDdVm7KWLcAuRia6wWMYQ,167604
 py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
 py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
 py2ls/stats.py,sha256=DMoJd8Z5YV9T1wB-4P52F5K5scfVK55DT8UP4Twcebo,38627
 py2ls/translator.py,sha256=zBeq4pYZeroqw3DT-5g7uHfVqKd-EQptT6LJ-Adi8JY,34244
 py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
-py2ls-0.2.4.8.dist-info/METADATA,sha256=Fr6xazl4OK1paux9REIe5pEKB-xDp4soHe7PLsvIHmA,20055
-py2ls-0.2.4.8.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-py2ls-0.2.4.8.dist-info/RECORD,,
+py2ls-0.2.4.9.dist-info/METADATA,sha256=4HaavKedVGS05_RLEBRr7E_A9XJotqR0oXRC0u-qR4k,20038
+py2ls-0.2.4.9.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+py2ls-0.2.4.9.dist-info/RECORD,,

{py2ls-0.2.4.8.dist-info → py2ls-0.2.4.9.dist-info}/WHEEL RENAMED Viewed

File without changes

py2ls 0.2.4.8__py3-none-any.whl → 0.2.4.9__py3-none-any.whl

py2ls 0.2.4.8py3-none-any.whl → 0.2.4.9py3-none-any.whl