PyPI - gpbench - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

gp_agent_tool/compute_dataset_feature.py +67 -0
gp_agent_tool/config.py +65 -0
gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
gp_agent_tool/experience/dataset_summary_info.py +13 -0
gp_agent_tool/experience/experience_info.py +12 -0
gp_agent_tool/experience/get_matched_experience.py +111 -0
gp_agent_tool/llm_client.py +119 -0
gp_agent_tool/logging_utils.py +24 -0
gp_agent_tool/main.py +347 -0
gp_agent_tool/read_agent/__init__.py +46 -0
gp_agent_tool/read_agent/nodes.py +674 -0
gp_agent_tool/read_agent/prompts.py +547 -0
gp_agent_tool/read_agent/python_repl_tool.py +165 -0
gp_agent_tool/read_agent/state.py +101 -0
gp_agent_tool/read_agent/workflow.py +54 -0
gpbench/__init__.py +25 -0
gpbench/_selftest.py +104 -0
gpbench/method_class/BayesA/BayesA_class.py +141 -0
gpbench/method_class/BayesA/__init__.py +5 -0
gpbench/method_class/BayesA/_bayesfromR.py +96 -0
gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
gpbench/method_class/BayesA/bayesAfromR.py +16 -0
gpbench/method_class/BayesB/BayesB_class.py +140 -0
gpbench/method_class/BayesB/__init__.py +5 -0
gpbench/method_class/BayesB/_bayesfromR.py +96 -0
gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
gpbench/method_class/BayesB/bayesBfromR.py +16 -0
gpbench/method_class/BayesC/BayesC_class.py +141 -0
gpbench/method_class/BayesC/__init__.py +4 -0
gpbench/method_class/BayesC/_bayesfromR.py +96 -0
gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
gpbench/method_class/BayesC/bayesCfromR.py +16 -0
gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
gpbench/method_class/CropARNet/__init__.py +5 -0
gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
gpbench/method_class/Cropformer/__init__.py +5 -0
gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
gpbench/method_class/DL_GWAS/__init__.py +5 -0
gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
gpbench/method_class/DNNGP/__init__.py +5 -0
gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
gpbench/method_class/DeepCCR/__init__.py +5 -0
gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
gpbench/method_class/DeepGS/__init__.py +5 -0
gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
gpbench/method_class/EIR/EIR_class.py +276 -0
gpbench/method_class/EIR/EIR_he_class.py +184 -0
gpbench/method_class/EIR/__init__.py +5 -0
gpbench/method_class/EIR/utils/__init__.py +0 -0
gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
gpbench/method_class/EIR/utils/common.py +65 -0
gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
gpbench/method_class/EIR/utils/logging.py +59 -0
gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_class/EIR/utils/transformer_models.py +546 -0
gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
gpbench/method_class/ElasticNet/__init__.py +5 -0
gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
gpbench/method_class/G2PDeep/__init__.py +5 -0
gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
gpbench/method_class/GBLUP/__init__.py +5 -0
gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
gpbench/method_class/GEFormer/__init__.py +5 -0
gpbench/method_class/GEFormer/gMLP_class.py +357 -0
gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
gpbench/method_class/LightGBM/__init__.py +5 -0
gpbench/method_class/RF/RF_GPU_class.py +165 -0
gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
gpbench/method_class/RF/__init__.py +5 -0
gpbench/method_class/SVC/SVC_GPU.py +181 -0
gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
gpbench/method_class/SVC/__init__.py +5 -0
gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
gpbench/method_class/SoyDNGP/__init__.py +5 -0
gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
gpbench/method_class/XGBoost/__init__.py +5 -0
gpbench/method_class/__init__.py +52 -0
gpbench/method_class/rrBLUP/__init__.py +5 -0
gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
gpbench/method_reg/BayesA/BayesA.py +116 -0
gpbench/method_reg/BayesA/__init__.py +5 -0
gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
gpbench/method_reg/BayesB/BayesB.py +117 -0
gpbench/method_reg/BayesB/__init__.py +5 -0
gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
gpbench/method_reg/BayesC/BayesC.py +115 -0
gpbench/method_reg/BayesC/__init__.py +5 -0
gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
gpbench/method_reg/CropARNet/CropARNet.py +159 -0
gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
gpbench/method_reg/CropARNet/__init__.py +5 -0
gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
gpbench/method_reg/Cropformer/Cropformer.py +313 -0
gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
gpbench/method_reg/Cropformer/__init__.py +5 -0
gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
gpbench/method_reg/DL_GWAS/__init__.py +5 -0
gpbench/method_reg/DNNGP/DNNGP.py +157 -0
gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
gpbench/method_reg/DNNGP/__init__.py +5 -0
gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
gpbench/method_reg/DeepCCR/__init__.py +5 -0
gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
gpbench/method_reg/DeepGS/DeepGS.py +165 -0
gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
gpbench/method_reg/DeepGS/__init__.py +5 -0
gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
gpbench/method_reg/EIR/EIR.py +258 -0
gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
gpbench/method_reg/EIR/__init__.py +5 -0
gpbench/method_reg/EIR/utils/__init__.py +0 -0
gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
gpbench/method_reg/EIR/utils/common.py +65 -0
gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
gpbench/method_reg/EIR/utils/logging.py +59 -0
gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
gpbench/method_reg/ElasticNet/__init__.py +5 -0
gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
gpbench/method_reg/G2PDeep/__init__.py +5 -0
gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
gpbench/method_reg/GBLUP/__init__.py +5 -0
gpbench/method_reg/GEFormer/GEFormer.py +164 -0
gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
gpbench/method_reg/GEFormer/__init__.py +5 -0
gpbench/method_reg/GEFormer/gMLP.py +341 -0
gpbench/method_reg/LightGBM/LightGBM.py +237 -0
gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
gpbench/method_reg/LightGBM/__init__.py +5 -0
gpbench/method_reg/MVP/MVP.py +182 -0
gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
gpbench/method_reg/MVP/__init__.py +5 -0
gpbench/method_reg/MVP/base_MVP.py +113 -0
gpbench/method_reg/RF/RF_GPU.py +174 -0
gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
gpbench/method_reg/RF/__init__.py +5 -0
gpbench/method_reg/SVC/SVC_GPU.py +194 -0
gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
gpbench/method_reg/SVC/__init__.py +5 -0
gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
gpbench/method_reg/SoyDNGP/__init__.py +5 -0
gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
gpbench/method_reg/XGBoost/__init__.py +5 -0
gpbench/method_reg/__init__.py +55 -0
gpbench/method_reg/rrBLUP/__init__.py +5 -0
gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
gpbench-1.0.0.dist-info/METADATA +379 -0
gpbench-1.0.0.dist-info/RECORD +188 -0
gpbench-1.0.0.dist-info/WHEEL +5 -0
gpbench-1.0.0.dist-info/entry_points.txt +2 -0
gpbench-1.0.0.dist-info/top_level.txt +3 -0
tests/test_import.py +80 -0
tests/test_method.py +232 -0

gpbench/method_reg/BayesC/BayesC.py ADDED Viewed

@@ -0,0 +1,115 @@
+import os
+import time
+import psutil
+import argparse
+import random
+import torch
+import numpy as np
+import pandas as pd
+from .bayesCfromR import BayesC
+from sklearn.model_selection import KFold
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+def parse_args():
+    parser = argparse.ArgumentParser(description="Argument parser")
+    parser.add_argument('--methods', type=str, default='BayesC/', help='Model name')
+    parser.add_argument('--species', type=str, default='Cattle/', help='Species name')
+    parser.add_argument('--phe', type=str, default='', help='Phenotype name')
+    parser.add_argument('--data_dir', type=str, default='../../data/', help='Path to data directory')
+    parser.add_argument('--result_dir', type=str, default='result/', help='Path to result directory')
+    return parser.parse_args()
+def load_data(args):
+    xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
+    yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
+    names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
+    nsample = xData.shape[0]
+    nsnp = xData.shape[1]
+    print("Number of samples: ", nsample)
+    print("Number of SNPs: ", nsnp)
+    return xData, yData, nsample, nsnp, names
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def run_nested_cv(args, data, label):
+    result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
+    os.makedirs(result_dir, exist_ok=True)
+    print("Starting 10-fold cross-validation...")
+    kf = KFold(n_splits=10, shuffle=True, random_state=42)
+    process = psutil.Process(os.getpid())
+    all_mse, all_mae, all_r2, all_pcc = [], [], [], []
+    start_time = time.time()
+    for fold, (train_index, test_index) in enumerate(kf.split(data)):
+        fold_start = time.time()
+        print(f"\n===== Fold {fold} =====")
+        X_train, X_test = data[train_index], data[test_index]
+        Y_train, Y_test = label[train_index], label[test_index]
+        if torch.cuda.is_available():
+            torch.cuda.reset_peak_memory_stats()
+        model = BayesC(task="regression")
+        model.fit(X_train, Y_train)
+        Y_pred = model.predict(X_test)
+        mse = mean_squared_error(Y_test, Y_pred)
+        mae = mean_absolute_error(Y_test, Y_pred)
+        r2 = r2_score(Y_test, Y_pred)
+        pcc, _ = pearsonr(Y_test, Y_pred)
+        all_mse.append(mse)
+        all_mae.append(mae)
+        all_r2.append(r2)
+        all_pcc.append(pcc)
+        fold_time = time.time() - fold_start
+        fold_gpu_mem = torch.cuda.max_memory_allocated() / 1024**2 if torch.cuda.is_available() else 0
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}: Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
+              f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
+        results_df = pd.DataFrame({'Y_test': Y_test, 'Y_pred': Y_pred})
+        results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
+    print("\n===== Cross-validation summary =====")
+    print(f"Average PCC: {np.mean(all_pcc):.4f} ± {np.std(all_pcc):.4f}")
+    print(f"Average MAE: {np.mean(all_mae):.4f} ± {np.std(all_mae):.4f}")
+    print(f"Average MSE: {np.mean(all_mse):.4f} ± {np.std(all_mse):.4f}")
+    print(f"Average R2 : {np.mean(all_r2):.4f} ± {np.std(all_r2):.4f}")
+    print(f"Total time : {time.time() - start_time:.2f}s")
+def BayesC_reg():
+    set_seed(42)
+    torch.cuda.empty_cache()
+    args = parse_args()
+    all_species =['Cotton/']
+    for i in range(len(all_species)):
+        args.species = all_species[i]
+        X, Y, nsamples, nsnp, names = load_data(args)
+        for j in range(len(names)):
+            args.phe = names[j]
+            print("starting run " + args.methods + args.species + args.phe)
+            label = Y[:, j]
+            label = np.nan_to_num(label, nan=np.nanmean(label))
+            start_time = time.time()
+            torch.cuda.reset_peak_memory_stats()
+            process = psutil.Process(os.getpid())
+            run_nested_cv(args, data=X, label=label)
+            elapsed_time = time.time() - start_time
+            print(f"running time: {elapsed_time:.2f} s")
+            print("successfully")
+if __name__ == "__main__":
+    BayesC_reg()

gpbench/method_reg/BayesC/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .BayesC import BayesC_reg
+BayesC = BayesC_reg
+__all__ = ["BayesC","BayesC_reg"]

gpbench/method_reg/BayesC/_bayesfromR.py ADDED Viewed

@@ -0,0 +1,96 @@
+import numpy as np
+import rpy2
+from rpy2.robjects import numpy2ri
+rpy2.robjects.numpy2ri.activate()
+import rpy2.robjects as robjects
+from rpy2.robjects.packages import importr
+from . import _param_free_base_model
+from joblib import Parallel, delayed
+class Bayes_R(_param_free_base_model.ParamFreeBaseModel):
+    """
+    Implementation of a class for Bayesian alphabet.
+    *Attributes*
+        *Inherited attributes*
+        See :obj:`~easypheno.model._param_free_base_model.ParamFreeBaseModel` for more information on the attributes.
+        *Additional attributes*
+        - mu (*np.array*): intercept
+        - beta (*np.array*): effect size
+        - model_name (*str*): model to use (BayesA, BayesB or BayesC)
+        - n_iter (*int*): iterations for sampling
+        - burn_in (*int*): warmup/burnin for sampling
+    """
+    standard_encoding = '012'
+    possible_encodings = ['101']
+    def __init__(self, task: str, model_name: str, encoding: str = None, n_iter: int =1000, burn_in: int = 200):
+        super().__init__(task=task, encoding=encoding)
+        self.model_name = model_name
+        self.n_iter = n_iter
+        self.burn_in = burn_in
+        self.n_jobs = 1
+        self.mu = None
+        self.beta = None
+    def _run_chain(self, chain_num: int, R_X, R_y):
+        """
+        Helper function to run an individual MCMC chain.
+        """
+        BGLR = importr('BGLR')
+        # Run BGLR for BayesB on a single chain
+        ETA = robjects.r['list'](robjects.r['list'](X=R_X, model=self.model_name))
+        fmBB = BGLR.BGLR(y=R_y, ETA=ETA, verbose=False, nIter=self.n_iter, burnIn=self.burn_in)
+        # Extract the results for this chain
+        beta_chain = np.asarray(fmBB.rx2('ETA').rx2(1).rx2('b'))
+        mu_chain = np.asarray(fmBB.rx2('mu'))  # Extract mu (intercept) for this chain
+        return beta_chain, mu_chain
+    def fit(self, X: np.array, y: np.array) -> np.array:
+        """
+        Implementation of fit function for Bayesian alphabet imported from R.
+        See :obj:`~easypheno.model._param_free_base_model.ParamFreeBaseModel` for more information.
+        """
+        # import necessary R packages
+        base = importr('base')
+        BGLR = importr('BGLR')
+        # create R objects for X and y
+        R_X = robjects.r['matrix'](X, nrow=X.shape[0], ncol=X.shape[1])
+        R_y = robjects.FloatVector(y)
+        results = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._run_chain)(chain_num, R_X, R_y) for chain_num in range(self.n_jobs)
+        )
+        # Aggregate results from all chains
+        beta_chains = [result[0] for result in results]
+        mu_chains = [result[1] for result in results]
+        # Compute the mean of beta and mu over all chains
+        self.beta = np.mean(beta_chains, axis=0)
+        self.mu = np.mean(mu_chains, axis=0)
+        # run BGLR for BayesB
+        # ETA = base.list(base.list(X=R_X, model=self.model_name))
+        # fmBB = BGLR.BGLR(y=R_y, ETA=ETA, verbose=True, nIter=self.n_iter, burnIn=self.burn_in)
+        # # save results as numpy arrays
+        # self.beta = np.asarray(fmBB.rx2('ETA').rx2(1).rx2('b'))
+        # self.mu = fmBB.rx2('mu')
+        return self.predict(X_in=X)
+    def predict(self, X_in: np.array) -> np.array:
+        """
+        Implementation of predict function for Bayesian alphabet model imported from R.
+        See :obj:`~easypheno.model._param_free_base_model.ParamFreeBaseModel` for more information.
+        """
+        return self.mu + np.matmul(X_in, self.beta)

gpbench/method_reg/BayesC/_param_free_base_model.py ADDED Viewed

@@ -0,0 +1,84 @@
+import abc
+import joblib
+import numpy as np
+import pathlib
+class ParamFreeBaseModel(abc.ABC):
+    """
+    BaseModel parent class for all models that do not have hyperparameters, e.g. BLUP.
+    Every model must be based on :obj:`~easypheno.model.param_free_base_model.ParamFreeBaseModel` directly or ParamFreeBaseModel's child classes.
+    Please add ``super().__init__(PARAMS)`` to the constructor in case you override it in a child class
+    **Attributes**
+        *Class attributes*
+        - standard_encoding (*str*): the standard encoding for this model
+        - possible_encodings (*List<str>*): a list of all encodings that are possible according to the model definition
+        *Instance attributes*
+        - task (*str*): ML task ('regression' or 'classification') depending on target variable
+        - encoding (*str*): the encoding to use (standard encoding or user-defined)
+    :param task: ML task (regression or classification) depending on target variable
+    :param encoding: the encoding to use (standard encoding or user-defined)
+    """
+    # Class attributes #
+    @property
+    @classmethod
+    @abc.abstractmethod
+    def standard_encoding(cls):
+        """the standard encoding for this model"""
+        raise NotImplementedError
+    @property
+    @classmethod
+    @abc.abstractmethod
+    def possible_encodings(cls):
+        """a list of all encodings that are possible according to the model definition"""
+        raise NotImplementedError
+    # Constructor super class #
+    def __init__(self, task: str, encoding: str = None):
+        self.task = task
+        self.encoding = self.standard_encoding if encoding is None else encoding
+    # Methods required by each child class #
+    @abc.abstractmethod
+    def fit(self, X: np.array, y: np.array) -> np.array:
+        """
+        Method that fits the model based on features X and targets y
+        :param X: feature matrix for retraining
+        :param y: target vector
+        :return: numpy array with values predicted for X
+        """
+    @abc.abstractmethod
+    def predict(self, X_in: np.array) -> np.array:
+        """
+        Method that predicts target values based on the input X_in
+        :param X_in: feature matrix as input
+        :return: numpy array with the predicted values
+        """
+    def save_model(self, path: pathlib.Path, filename: str):
+        """
+        Persist the whole model object on a hard drive
+        (can be loaded with :obj:`~easypheno.model._model_functions.load_model`)
+        :param path: path where the model will be saved
+        :param filename: filename of the model
+        """
+        joblib.dump(self, path.joinpath(filename), compress=3)

gpbench/method_reg/BayesC/bayesCfromR.py ADDED Viewed

@@ -0,0 +1,16 @@
+from . import _bayesfromR
+class BayesC(_bayesfromR.Bayes_R):
+    """
+    Implementation of a class for Bayes A.
+    *Attributes*
+        *Inherited attributes*
+        See :obj:`~easypheno.model._bayesfromR.Bayes_R` for more information on the attributes.
+    """
+    def __init__(self, task: str, encoding: str = None):
+        super().__init__(task=task, model_name='BayesC', encoding=encoding)

gpbench/method_reg/CropARNet/CropARNet.py ADDED Viewed

@@ -0,0 +1,159 @@
+import os
+import time
+import psutil
+import argparse
+import random
+import torch
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import KFold, train_test_split
+from .base_CropARNet import SimpleSNPModel
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from torch.utils.data  import DataLoader, TensorDataset
+from . import CropARNet_Hyperparameters
+import pynvml
+def parse_args():
+    parser = argparse.ArgumentParser(description="Argument parser")
+    parser.add_argument('--methods', type=str, default='CropARNet/', help='Random seed')
+    parser.add_argument('--species', type=str, default='', help='Species name')
+    parser.add_argument('--phe', type=str, default='', help='Dataset name')
+    parser.add_argument('--data_dir', type=str, default='../../data/')
+    parser.add_argument('--result_dir', type=str, default='result/')
+    parser.add_argument('--epochs', type=int, default=500, help='Number of training rounds')
+    parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
+    parser.add_argument('--weight_decay', type=float, default=0.00001, help='Weight decay')
+    parser.add_argument('--momentum', type=float, default=0.5, help='Momentum')
+    parser.add_argument('--learning_rate', type=float, default=0.01, help='Learning rate')
+    parser.add_argument('--patience', type=int, default=50, help='Patience for early stopping')
+    args = parser.parse_args()
+    return args
+def load_data(args):
+    xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
+    yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
+    names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
+    nsample = xData.shape[0]
+    nsnp = xData.shape[1]
+    print("Number of samples: ", nsample)
+    print("Number of SNPs: ", nsnp)
+    return xData, yData, nsample, nsnp, names
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def get_gpu_mem_by_pid(pid):
+    procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
+    for p in procs:
+        if p.pid == pid:
+            return p.usedGpuMemory / 1024**2
+    return 0.0
+def run_nested_cv(args, data, label, nsnp, device):
+    result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
+    os.makedirs(result_dir, exist_ok=True)
+    print("Starting 10-fold cross-validation...")
+    kf = KFold(n_splits=10, shuffle=True, random_state=42)
+    all_mse, all_mae, all_r2, all_pcc = [], [], [], []
+    time_star = time.time()
+    for fold, (train_index, test_index) in enumerate(kf.split(data)):
+        print(f"Running fold {fold}...")
+        process = psutil.Process(os.getpid())
+        fold_start_time = time.time()
+        X_train, X_test = data[train_index], data[test_index]
+        y_train, y_test = label[train_index], label[test_index]
+        X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
+        x_train_tensor = torch.from_numpy(X_train_sub).float().to(device)
+        y_train_tensor = torch.from_numpy(y_train_sub).float().to(device)
+        x_valid_tensor = torch.from_numpy(X_valid).float().to(device)
+        y_valid_tensor = torch.from_numpy(y_valid).float().to(device)
+        x_test_tensor = torch.from_numpy(X_test).float().to(device)
+        y_test_tensor = torch.from_numpy(y_test).float().to(device)
+        train_data = TensorDataset(x_train_tensor, y_train_tensor)
+        valid_data = TensorDataset(x_valid_tensor, y_valid_tensor)
+        test_data = TensorDataset(x_test_tensor, y_test_tensor)
+        train_loader = DataLoader(train_data, args.batch_size, shuffle=True)
+        valid_loader = DataLoader(valid_data, args.batch_size, shuffle=False)
+        test_loader = DataLoader(test_data, args.batch_size, shuffle=False)
+        model = SimpleSNPModel(nsnp)
+        model.train_model(train_loader, valid_loader, args.epochs, args.learning_rate, args.weight_decay, args.patience, device)
+        y_pred = model.predict(test_loader)
+        mse = mean_squared_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        mae = mean_absolute_error(y_test, y_pred)
+        pcc, _ = pearsonr(y_test, y_pred)
+        all_mse.append(mse)
+        all_r2.append(r2)
+        all_mae.append(mae)
+        all_pcc.append(pcc)
+        fold_time = time.time() - fold_start_time
+        fold_gpu_mem =  get_gpu_mem_by_pid(os.getpid())
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}:  Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
+              f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        results_df = pd.DataFrame({'Y_test': y_test, 'Y_pred': y_pred})
+        results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
+    print("\n===== Cross-validation summary =====")
+    print(f"Average PCC: {np.mean(all_pcc):.4f} ± {np.std(all_pcc):.4f}")
+    print(f"Average MAE: {np.mean(all_mae):.4f} ± {np.std(all_mae):.4f}")
+    print(f"Average MSE: {np.mean(all_mse):.4f} ± {np.std(all_mse):.4f}")
+    print(f"Average R2 : {np.mean(all_r2):.4f} ± {np.std(all_r2):.4f}")
+    print(f"Time: {time.time() - time_star:.2f}s")
+def CropARNet_reg():
+    set_seed(42)
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+    args = parse_args()
+    all_species =['Cotton/']
+    for i in range(len(all_species)):
+        args.species = all_species[i]
+        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        args.device = device
+        X, Y, nsamples, nsnp, names = load_data(args)
+        for j in range(len(names)):
+            args.phe = names[j]
+            print("starting run " + args.methods + args.species + args.phe)
+            label = Y[:, j]
+            label = np.nan_to_num(label, nan=np.nanmean(label))
+            best_params = CropARNet_Hyperparameters.Hyperparameter(X, label, nsnp)
+            args.learning_rate = best_params['learning_rate']
+            args.batch_size = best_params['batch_size']
+            args.weight_decay = best_params['weight_decay']
+            args.patience = best_params['patience']
+            start_time = time.time()
+            torch.cuda.reset_peak_memory_stats()
+            process = psutil.Process(os.getpid())
+            run_nested_cv(args, data=X, label=label, nsnp = nsnp, device = args.device)
+            elapsed_time = time.time() - start_time
+            print(f"running time: {elapsed_time:.2f} s")
+            print("successfully")
+if __name__ == "__main__":
+    CropARNet_reg()

gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py ADDED Viewed

@@ -0,0 +1,109 @@
+import os
+import time
+import psutil
+import random
+import torch
+import numpy as np
+import optuna
+from sklearn.model_selection import KFold, train_test_split
+from .base_CropARNet import SimpleSNPModel
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from torch.utils.data  import DataLoader, TensorDataset
+from optuna.exceptions import TrialPruned
+def run_nested_cv_with_early_stopping(data, label, nsnp, learning_rate, weight_decay, patience, batch_size, num_round=500):
+    device = torch.device("cuda:0")
+    print("Starting 10-fold cross-validation...")
+    kf = KFold(n_splits=10, shuffle=True, random_state=42)
+    all_mse, all_mae, all_r2, all_pcc = [], [], [], []
+    for fold, (train_index, test_index) in enumerate(kf.split(data)):
+        print(f"Running fold {fold}...")
+        process = psutil.Process(os.getpid())
+        fold_start_time = time.time()
+        X_train, X_test = data[train_index], data[test_index]
+        y_train, y_test = label[train_index], label[test_index]
+        X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
+        x_train_tensor = torch.from_numpy(X_train_sub).float().to(device)
+        y_train_tensor = torch.from_numpy(y_train_sub).float().to(device)
+        x_valid_tensor = torch.from_numpy(X_valid).float().to(device)
+        y_valid_tensor = torch.from_numpy(y_valid).float().to(device)
+        x_test_tensor = torch.from_numpy(X_test).float().to(device)
+        y_test_tensor = torch.from_numpy(y_test).float().to(device)
+        train_data = TensorDataset(x_train_tensor, y_train_tensor)
+        valid_data = TensorDataset(x_valid_tensor, y_valid_tensor)
+        test_data = TensorDataset(x_test_tensor, y_test_tensor)
+        train_loader = DataLoader(train_data, batch_size, shuffle=True)
+        valid_loader = DataLoader(valid_data, batch_size, shuffle=False)
+        test_loader = DataLoader(test_data, batch_size, shuffle=False)
+        model = SimpleSNPModel(nsnp)
+        model.train_model(train_loader, valid_loader, num_round, learning_rate, weight_decay, patience, device)
+        y_pred = model.predict(test_loader)
+        mse = mean_squared_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        mae = mean_absolute_error(y_test, y_pred)
+        pcc, _ = pearsonr(y_test, y_pred)
+        if np.isnan(pcc):
+            print(f"Fold {fold} resulted in NaN PCC, pruning the trial...")
+            raise TrialPruned()
+        all_mse.append(mse)
+        all_r2.append(r2)
+        all_mae.append(mae)
+        all_pcc.append(pcc)
+        fold_time = time.time() - fold_start_time
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}:  Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
+                f'CPU={fold_cpu_mem:.2f}MB')
+    return np.mean(all_pcc) if all_pcc else 0.0
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def Hyperparameter(data, label, nsnp):
+    set_seed(42)
+    def objective(trial):
+        learning_rate = trial.suggest_float("learning_rate", 1e-4,0.1)
+        batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
+        weight_decay = trial.suggest_categorical("weight_decay", [1e-4, 1e-3, 1e-2, 1e-1])
+        patience = trial.suggest_int("patience", 1, 10)
+        try:
+            corr_score = run_nested_cv_with_early_stopping(
+                data=data,
+                label=label,
+                nsnp=nsnp,
+                learning_rate=learning_rate,
+                weight_decay=weight_decay,
+                patience=patience,
+                batch_size=batch_size
+            )
+        except TrialPruned:
+            return float("-inf")
+        return corr_score
+    study = optuna.create_study(direction="maximize")
+    study.optimize(objective, n_trials=20)
+    print("best params:", study.best_params)
+    print("successfully")
+    return study.best_params

gpbench/method_reg/CropARNet/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .CropARNet import CropARNet_reg
+CropARNet = CropARNet_reg
+__all__ = ["CropARNet","CropARNet_reg"]