PyPI - gpbench - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

gp_agent_tool/compute_dataset_feature.py +67 -0
gp_agent_tool/config.py +65 -0
gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
gp_agent_tool/experience/dataset_summary_info.py +13 -0
gp_agent_tool/experience/experience_info.py +12 -0
gp_agent_tool/experience/get_matched_experience.py +111 -0
gp_agent_tool/llm_client.py +119 -0
gp_agent_tool/logging_utils.py +24 -0
gp_agent_tool/main.py +347 -0
gp_agent_tool/read_agent/__init__.py +46 -0
gp_agent_tool/read_agent/nodes.py +674 -0
gp_agent_tool/read_agent/prompts.py +547 -0
gp_agent_tool/read_agent/python_repl_tool.py +165 -0
gp_agent_tool/read_agent/state.py +101 -0
gp_agent_tool/read_agent/workflow.py +54 -0
gpbench/__init__.py +25 -0
gpbench/_selftest.py +104 -0
gpbench/method_class/BayesA/BayesA_class.py +141 -0
gpbench/method_class/BayesA/__init__.py +5 -0
gpbench/method_class/BayesA/_bayesfromR.py +96 -0
gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
gpbench/method_class/BayesA/bayesAfromR.py +16 -0
gpbench/method_class/BayesB/BayesB_class.py +140 -0
gpbench/method_class/BayesB/__init__.py +5 -0
gpbench/method_class/BayesB/_bayesfromR.py +96 -0
gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
gpbench/method_class/BayesB/bayesBfromR.py +16 -0
gpbench/method_class/BayesC/BayesC_class.py +141 -0
gpbench/method_class/BayesC/__init__.py +4 -0
gpbench/method_class/BayesC/_bayesfromR.py +96 -0
gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
gpbench/method_class/BayesC/bayesCfromR.py +16 -0
gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
gpbench/method_class/CropARNet/__init__.py +5 -0
gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
gpbench/method_class/Cropformer/__init__.py +5 -0
gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
gpbench/method_class/DL_GWAS/__init__.py +5 -0
gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
gpbench/method_class/DNNGP/__init__.py +5 -0
gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
gpbench/method_class/DeepCCR/__init__.py +5 -0
gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
gpbench/method_class/DeepGS/__init__.py +5 -0
gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
gpbench/method_class/EIR/EIR_class.py +276 -0
gpbench/method_class/EIR/EIR_he_class.py +184 -0
gpbench/method_class/EIR/__init__.py +5 -0
gpbench/method_class/EIR/utils/__init__.py +0 -0
gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
gpbench/method_class/EIR/utils/common.py +65 -0
gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
gpbench/method_class/EIR/utils/logging.py +59 -0
gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_class/EIR/utils/transformer_models.py +546 -0
gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
gpbench/method_class/ElasticNet/__init__.py +5 -0
gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
gpbench/method_class/G2PDeep/__init__.py +5 -0
gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
gpbench/method_class/GBLUP/__init__.py +5 -0
gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
gpbench/method_class/GEFormer/__init__.py +5 -0
gpbench/method_class/GEFormer/gMLP_class.py +357 -0
gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
gpbench/method_class/LightGBM/__init__.py +5 -0
gpbench/method_class/RF/RF_GPU_class.py +165 -0
gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
gpbench/method_class/RF/__init__.py +5 -0
gpbench/method_class/SVC/SVC_GPU.py +181 -0
gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
gpbench/method_class/SVC/__init__.py +5 -0
gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
gpbench/method_class/SoyDNGP/__init__.py +5 -0
gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
gpbench/method_class/XGBoost/__init__.py +5 -0
gpbench/method_class/__init__.py +52 -0
gpbench/method_class/rrBLUP/__init__.py +5 -0
gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
gpbench/method_reg/BayesA/BayesA.py +116 -0
gpbench/method_reg/BayesA/__init__.py +5 -0
gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
gpbench/method_reg/BayesB/BayesB.py +117 -0
gpbench/method_reg/BayesB/__init__.py +5 -0
gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
gpbench/method_reg/BayesC/BayesC.py +115 -0
gpbench/method_reg/BayesC/__init__.py +5 -0
gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
gpbench/method_reg/CropARNet/CropARNet.py +159 -0
gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
gpbench/method_reg/CropARNet/__init__.py +5 -0
gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
gpbench/method_reg/Cropformer/Cropformer.py +313 -0
gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
gpbench/method_reg/Cropformer/__init__.py +5 -0
gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
gpbench/method_reg/DL_GWAS/__init__.py +5 -0
gpbench/method_reg/DNNGP/DNNGP.py +157 -0
gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
gpbench/method_reg/DNNGP/__init__.py +5 -0
gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
gpbench/method_reg/DeepCCR/__init__.py +5 -0
gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
gpbench/method_reg/DeepGS/DeepGS.py +165 -0
gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
gpbench/method_reg/DeepGS/__init__.py +5 -0
gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
gpbench/method_reg/EIR/EIR.py +258 -0
gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
gpbench/method_reg/EIR/__init__.py +5 -0
gpbench/method_reg/EIR/utils/__init__.py +0 -0
gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
gpbench/method_reg/EIR/utils/common.py +65 -0
gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
gpbench/method_reg/EIR/utils/logging.py +59 -0
gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
gpbench/method_reg/ElasticNet/__init__.py +5 -0
gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
gpbench/method_reg/G2PDeep/__init__.py +5 -0
gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
gpbench/method_reg/GBLUP/__init__.py +5 -0
gpbench/method_reg/GEFormer/GEFormer.py +164 -0
gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
gpbench/method_reg/GEFormer/__init__.py +5 -0
gpbench/method_reg/GEFormer/gMLP.py +341 -0
gpbench/method_reg/LightGBM/LightGBM.py +237 -0
gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
gpbench/method_reg/LightGBM/__init__.py +5 -0
gpbench/method_reg/MVP/MVP.py +182 -0
gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
gpbench/method_reg/MVP/__init__.py +5 -0
gpbench/method_reg/MVP/base_MVP.py +113 -0
gpbench/method_reg/RF/RF_GPU.py +174 -0
gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
gpbench/method_reg/RF/__init__.py +5 -0
gpbench/method_reg/SVC/SVC_GPU.py +194 -0
gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
gpbench/method_reg/SVC/__init__.py +5 -0
gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
gpbench/method_reg/SoyDNGP/__init__.py +5 -0
gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
gpbench/method_reg/XGBoost/__init__.py +5 -0
gpbench/method_reg/__init__.py +55 -0
gpbench/method_reg/rrBLUP/__init__.py +5 -0
gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
gpbench-1.0.0.dist-info/METADATA +379 -0
gpbench-1.0.0.dist-info/RECORD +188 -0
gpbench-1.0.0.dist-info/WHEEL +5 -0
gpbench-1.0.0.dist-info/entry_points.txt +2 -0
gpbench-1.0.0.dist-info/top_level.txt +3 -0
tests/test_import.py +80 -0
tests/test_method.py +232 -0

gpbench/method_reg/ElasticNet/ElasticNet_he.py ADDED Viewed

@@ -0,0 +1,83 @@
+import gc
+import random
+import time
+import numpy as np
+import optuna
+from sklearn.model_selection import KFold
+from sklearn.linear_model import ElasticNet
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+from optuna.exceptions import TrialPruned
+def run_nested_cv_with_early_stopping(data, label, outer_cv, alpha, l1_ratio):
+    best_corr_coefs = []
+    best_maes = []
+    best_r2s = []
+    best_mses = []
+    time_star = time.time()
+    for fold, (train_idx, test_idx) in enumerate(outer_cv.split(data)):
+        x_train = data[train_idx]
+        x_test = data[test_idx]
+        y_train = label[train_idx]
+        y_test = label[test_idx]
+        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=1000, random_state=42)
+        model.fit(x_train, y_train)
+        y_test_preds = model.predict(x_test)
+        pcc, _ = pearsonr(y_test, y_test_preds)
+        mse = mean_squared_error(y_test, y_test_preds)
+        r2 = r2_score(y_test, y_test_preds)
+        mae = mean_absolute_error(y_test, y_test_preds)
+        best_corr_coefs.append(pcc)
+        best_maes.append(mae)
+        best_r2s.append(r2)
+        best_mses.append(mse)
+        print(f'Fold {fold + 1}: MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Corr={pcc:.4f}')
+        del model, y_test_preds, x_train, x_test, y_train, y_test
+    print("==== Final Results ====")
+    print(f"MAE: {np.mean(best_maes):.4f} ± {np.std(best_maes):.4f}")
+    print(f"MSE: {np.mean(best_mses):.4f} ± {np.std(best_mses):.4f}")
+    print(f"R2 : {np.mean(best_r2s):.4f} ± {np.std(best_r2s):.4f}")
+    print(f"Corr: {np.mean(best_corr_coefs):.4f} ± {np.std(best_corr_coefs):.4f}")
+    print(f"Time: {time.time() - time_star:.2f}s")
+    gc.collect()
+    return np.mean(best_corr_coefs)
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+def Hyperparameter(data, label):
+    set_seed(42)
+    def objective(trial):
+        alpha = trial.suggest_float("alpha", 1e-4, 1.0, log=True)
+        l1_ratio = trial.suggest_categorical("l1_ratio", [0.1, 0.3, 0.5, 0.7, 0.9])
+        outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
+        try:
+            corr_score = run_nested_cv_with_early_stopping(
+                data=data,
+                label=label,
+                outer_cv=outer_cv,
+                alpha=alpha,
+                l1_ratio=l1_ratio
+            )
+        except TrialPruned:
+            return float("-inf")
+        return corr_score
+    study = optuna.create_study(direction="maximize")
+    study.optimize(objective, n_trials=20)
+    print("best params:", study.best_params)
+    print("successfully")
+    return study.best_params

gpbench/method_reg/ElasticNet/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .ElasticNet import ElasticNet_reg
+ElasticNet = ElasticNet_reg
+__all__ = ["ElasticNet","ElasticNet_reg"]

gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py ADDED Viewed

@@ -0,0 +1,107 @@
+import os
+import time
+import psutil
+import random
+import torch
+import numpy as np
+import optuna
+from sklearn.model_selection import KFold, train_test_split
+from .base_G2PDeep import G2PDeep, ModelHyperparams
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from torch.utils.data  import DataLoader, TensorDataset
+from optuna.exceptions import TrialPruned
+def run_nested_cv_with_early_stopping(data, label, nsnp, learning_rate, batch_size, patience, epoch=1000):
+    device = torch.device("cuda:0")
+    print("Starting 10-fold cross-validation...")
+    kf = KFold(n_splits=10, shuffle=True, random_state=42)
+    all_mse, all_mae, all_r2, all_pcc = [], [], [], []
+    for fold, (train_index, test_index) in enumerate(kf.split(data)):
+        print(f"Running fold {fold}...")
+        process = psutil.Process(os.getpid())
+        fold_start_time = time.time()
+        X_train, X_test = data[train_index], data[test_index]
+        y_train, y_test = label[train_index], label[test_index]
+        X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
+        x_train_tensor = torch.from_numpy(X_train_sub).float()
+        y_train_tensor = torch.from_numpy(y_train_sub).float()
+        x_valid_tensor = torch.from_numpy(X_valid).float()
+        y_valid_tensor = torch.from_numpy(y_valid).float()
+        x_test_tensor = torch.from_numpy(X_test).float()
+        y_test_tensor = torch.from_numpy(y_test).float()
+        train_data = TensorDataset(x_train_tensor, y_train_tensor)
+        valid_data = TensorDataset(x_valid_tensor, y_valid_tensor)
+        test_data = TensorDataset(x_test_tensor, y_test_tensor)
+        train_loader = DataLoader(train_data, batch_size, shuffle=True)
+        valid_loader = DataLoader(valid_data, batch_size, shuffle=False)
+        test_loader = DataLoader(test_data, batch_size, shuffle=False)
+        hp = ModelHyperparams()
+        model = G2PDeep(nsnp=nsnp, hyperparams = hp)
+        model.train_model(train_loader, valid_loader, epoch, learning_rate, patience, device)
+        y_pred = model.predict(test_loader, device)
+        mse = mean_squared_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        mae = mean_absolute_error(y_test, y_pred)
+        pcc, _ = pearsonr(y_test, y_pred)
+        if np.isnan(pcc):
+            print(f"Fold {fold} resulted in NaN PCC, pruning the trial...")
+            raise TrialPruned()
+        all_mse.append(mse)
+        all_r2.append(r2)
+        all_mae.append(mae)
+        all_pcc.append(pcc)
+        fold_time = time.time() - fold_start_time
+        fold_gpu_mem = torch.cuda.max_memory_allocated() / 1024**2 if torch.cuda.is_available() else 0
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}:  Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
+                f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
+    return np.mean(all_pcc) if all_pcc else 0.0
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def Hyperparameter(data, label, nsnp):
+    set_seed(42)
+    def objective(trial):
+        lr = trial.suggest_float("learning_rate", 1e-4, 0.1)
+        batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
+        patience = trial.suggest_int("patience", 1, 10)
+        try:
+            corr_score = run_nested_cv_with_early_stopping(
+                data=data,
+                label=label,
+                nsnp=nsnp,
+                learning_rate=lr,
+                batch_size=batch_size,
+                patience=patience
+            )
+        except TrialPruned:
+            return float("-inf")
+        return corr_score
+    study = optuna.create_study(direction="maximize")
+    study.optimize(objective, n_trials=20)
+    print("best params:", study.best_params)
+    print("successfully")
+    return study.best_params

gpbench/method_reg/G2PDeep/G2Pdeep.py ADDED Viewed

@@ -0,0 +1,166 @@
+import os
+import time
+import psutil
+import swanlab
+import argparse
+import random
+import torch
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import KFold, train_test_split
+from .base_G2PDeep import G2PDeep, ModelHyperparams
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from torch.utils.data  import DataLoader, TensorDataset
+from . import G2PDeep_Hyperparameters
+def parse_args():
+    parser = argparse.ArgumentParser(description="Argument parser")
+    parser.add_argument('--methods', type=str, default='G2PDeep/', help='Random seed')
+    parser.add_argument('--species', type=str, default='')
+    parser.add_argument('--phe', type=str, default='', help='Dataset name')
+    parser.add_argument('--data_dir', type=str, default='../../data/')
+    parser.add_argument('--result_dir', type=str, default='result/')
+    parser.add_argument('--epoch', type=int, default=1000, help='Number of training rounds')
+    parser.add_argument('--batch_size', type=int, default=64, help='Batch size')
+    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate')
+    parser.add_argument('--patience', type=int, default=10, help='Patience for early stopping')
+    args = parser.parse_args()
+    return args
+def process_snp_data(data: np.array) -> np.array:
+    nb_classes = 4
+    onehot_x = np.empty(
+        shape=(data.shape[0], data.shape[1], nb_classes),
+        dtype=np.float32
+    )
+    for i in range(data.shape[0]):
+        _data = pd.to_numeric(data[i], errors='coerce')
+        _targets = np.array(_data).reshape(-1).astype(np.int64)
+        onehot_x[i] = np.eye(nb_classes)[_targets]
+    return onehot_x
+def load_data(args):
+    xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
+    yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
+    names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
+    xData[xData == -9] = 0
+    xData = process_snp_data(xData)
+    nsample = xData.shape[0]
+    nsnp = xData.shape[1]
+    print("Number of samples: ", nsample)
+    print("Number of SNPs: ", nsnp)
+    return xData, yData, nsample, nsnp, names
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def run_nested_cv(args, data, label, nsnp, device):
+    result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
+    os.makedirs(result_dir, exist_ok=True)
+    print("Starting 10-fold cross-validation...")
+    kf = KFold(n_splits=10, shuffle=True, random_state=42)
+    all_mse, all_mae, all_r2, all_pcc = [], [], [], []
+    time_star = time.time()
+    for fold, (train_index, test_index) in enumerate(kf.split(data)):
+        print(f"Running fold {fold}...")
+        process = psutil.Process(os.getpid())
+        fold_start_time = time.time()
+        X_train, X_test = data[train_index], data[test_index]
+        y_train, y_test = label[train_index], label[test_index]
+        X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
+        x_train_tensor = torch.from_numpy(X_train_sub).float()
+        y_train_tensor = torch.from_numpy(y_train_sub).float()
+        x_valid_tensor = torch.from_numpy(X_valid).float()
+        y_valid_tensor = torch.from_numpy(y_valid).float()
+        x_test_tensor = torch.from_numpy(X_test).float()
+        y_test_tensor = torch.from_numpy(y_test).float()
+        train_data = TensorDataset(x_train_tensor, y_train_tensor)
+        valid_data = TensorDataset(x_valid_tensor, y_valid_tensor)
+        test_data = TensorDataset(x_test_tensor, y_test_tensor)
+        train_loader = DataLoader(train_data, args.batch_size, shuffle=True)
+        valid_loader = DataLoader(valid_data, args.batch_size, shuffle=False)
+        test_loader = DataLoader(test_data, args.batch_size, shuffle=False)
+        hp = ModelHyperparams()
+        model = G2PDeep(nsnp=nsnp, hyperparams=hp).to(device)
+        model.train_model(train_loader, valid_loader, args.epoch, args.lr, args.patience, device)
+        y_pred = model.predict(test_loader, device)
+        mse = mean_squared_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        mae = mean_absolute_error(y_test, y_pred)
+        pcc, _ = pearsonr(y_test, y_pred)
+        all_mse.append(mse)
+        all_r2.append(r2)
+        all_mae.append(mae)
+        all_pcc.append(pcc)
+        fold_time = time.time() - fold_start_time
+        fold_gpu_mem = torch.cuda.max_memory_allocated() / 1024**2 if torch.cuda.is_available() else 0
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}:  Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
+              f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
+        torch.cuda.empty_cache()
+        torch.cuda.reset_peak_memory_stats()
+        results_df = pd.DataFrame({'Y_test': y_test, 'Y_pred': y_pred})
+        results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
+    print("\n===== Cross-validation summary =====")
+    print(f"Average PCC: {np.mean(all_pcc):.4f} ± {np.std(all_pcc):.4f}")
+    print(f"Average MAE: {np.mean(all_mae):.4f} ± {np.std(all_mae):.4f}")
+    print(f"Average MSE: {np.mean(all_mse):.4f} ± {np.std(all_mse):.4f}")
+    print(f"Average R2 : {np.mean(all_r2):.4f} ± {np.std(all_r2):.4f}")
+    print(f"Time: {time.time() - time_star:.2f}s")
+def G2PDeep_reg():
+    set_seed(42)
+    torch.cuda.empty_cache()
+    args = parse_args()
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    all_species =['Cotton/']
+    for i in range(len(all_species)):
+        args.species = all_species[i]
+        args.device = device
+        X, Y, nsamples, nsnp, names = load_data(args)
+        for j in range(len(names)):
+            args.phe = names[j]
+            print("starting run " + args.methods + args.species + args.phe)
+            label = Y[:, j]
+            label = np.nan_to_num(label, nan=np.nanmean(label))
+            best_params = G2PDeep_Hyperparameters.Hyperparameter(X, label, nsnp)
+            args.lr = best_params['learning_rate']
+            args.patience = best_params['patience']
+            args.batch_size = best_params['batch_size']
+            start_time = time.time()
+            torch.cuda.reset_peak_memory_stats()
+            process = psutil.Process(os.getpid())
+            run_nested_cv(args, data=X, label=label, nsnp = nsnp, device = args.device)
+            elapsed_time = time.time() - start_time
+            print(f"running time: {elapsed_time:.2f} s")
+            print("successfully")
+if __name__ == "__main__":
+    G2PDeep_reg()

gpbench/method_reg/G2PDeep/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .G2Pdeep import G2PDeep_reg
+G2PDeep = G2PDeep_reg
+__all__ = ["G2PDeep","G2PDeep_reg"]

gpbench/method_reg/G2PDeep/base_G2PDeep.py ADDED Viewed

@@ -0,0 +1,209 @@
+import torch
+import torch.nn as nn
+from typing import List, Optional
+import numpy as np
+class ModelHyperparams:
+    def __init__(self,
+                 left_tower_filters_list: Optional[List[int]] = None,
+                 left_tower_kernel_size_list: Optional[List[int]] = None,
+                 right_tower_filters_list: Optional[List[int]] = None,
+                 right_tower_kernel_size_list: Optional[List[int]] = None,
+                 central_tower_filters_list: Optional[List[int]] = None,
+                 central_tower_kernel_size_list: Optional[List[int]] = None,
+                 dnn_size_list: Optional[List[int]] = None,
+                 activation: str = "linear",
+                 dropout_rate: float = 0.75):   # ⬅ 改小
+        self.left_tower_filters_list = left_tower_filters_list or [4, 4]
+        self.left_tower_kernel_size_list = left_tower_kernel_size_list or [3, 5]
+        self.right_tower_filters_list = right_tower_filters_list or [4]
+        self.right_tower_kernel_size_list = right_tower_kernel_size_list or [3]
+        self.central_tower_filters_list = central_tower_filters_list or [4]
+        self.central_tower_kernel_size_list = central_tower_kernel_size_list or [3]
+        self.dnn_size_list = dnn_size_list or [1]
+        self.activation = activation
+        self.dropout_rate = dropout_rate
+def get_activation(name: str):
+    if name.lower() == "relu":
+        return nn.ReLU()
+    elif name.lower() == "linear":
+        return nn.Identity()
+    else:
+        raise ValueError(f"Unsupported activation: {name}")
+class G2PDeep(nn.Module):
+    def __init__(self, nsnp: int, hyperparams: ModelHyperparams):
+        super().__init__()
+        self.nsnp = nsnp
+        hp = hyperparams
+        # --- Left Tower ---
+        self.left_convs = nn.ModuleList()
+        in_ch = 4
+        for filt, k in zip(hp.left_tower_filters_list, hp.left_tower_kernel_size_list):
+            self.left_convs.append(nn.Conv1d(in_ch, filt, k, padding="same"))
+            in_ch = filt
+        # --- Right Tower ---
+        self.right_convs = nn.ModuleList()
+        in_ch = 4
+        for filt, k in zip(hp.right_tower_filters_list, hp.right_tower_kernel_size_list):
+            self.right_convs.append(nn.Conv1d(in_ch, filt, k, padding="same"))
+            in_ch = filt
+        # --- Channel alignment ---
+        left_out_ch = hp.left_tower_filters_list[-1]
+        right_out_ch = hp.right_tower_filters_list[-1]
+        self.merged_ch = max(left_out_ch, right_out_ch)
+        self.left_proj = nn.Conv1d(left_out_ch, self.merged_ch, 1) \
+            if left_out_ch != self.merged_ch else nn.Identity()
+        self.right_proj = nn.Conv1d(right_out_ch, self.merged_ch, 1) \
+            if right_out_ch != self.merged_ch else nn.Identity()
+        # --- Central Tower ---
+        self.central_convs = nn.ModuleList()
+        in_ch = self.merged_ch
+        for filt, k in zip(hp.central_tower_filters_list, hp.central_tower_kernel_size_list):
+            self.central_convs.append(nn.Conv1d(in_ch, filt, k, padding="same"))
+            in_ch = filt
+        # --DNN ---
+        self.dropout = nn.Dropout(p=hp.dropout_rate)
+        final_conv_ch = hp.central_tower_filters_list[-1]
+        flattened_dim = final_conv_ch * nsnp
+        dnn_layers = []
+        prev = flattened_dim
+        for out_sz in hp.dnn_size_list[:-1]:
+                dnn_layers.append(nn.Linear(prev, out_sz))
+                dnn_layers.append(get_activation(hp.activation))
+                dnn_layers.append(nn.Dropout(hp.dropout_rate))
+                prev = out_sz
+        dnn_layers.append(nn.Linear(prev, hp.dnn_size_list[-1]))
+        self.dnn = nn.Sequential(*dnn_layers)
+        self.activation = get_activation(hp.activation)
+    def forward(self, x):
+        # (B, Seq, 4) -> (B, 4, Seq)
+        if x.shape[-1] != 4:
+            raise ValueError(f"Expected input with 4 channels, got {x.shape}")
+        x = x.transpose(1, 2)
+        # Left tower
+        left = x
+        for conv in self.left_convs:
+            left = self.activation(conv(left))
+        # Right tower
+        right = x
+        for conv in self.right_convs:
+            right = self.activation(conv(right))
+        merged = self.left_proj(left) + self.right_proj(right)
+        # Central tower
+        for conv in self.central_convs:
+            merged = self.activation(conv(merged))
+        x_flat = torch.flatten(merged, 1)
+        x_flat = self.dropout(x_flat)
+        return self.dnn(x_flat)
+    def train_model(self, train_loader, valid_loader, num_epochs, learning_rate, patience, device):
+        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate, weight_decay=1e-4)
+        criterion = nn.MSELoss()
+        self.to(device)
+        # 启用混合精度训练
+        use_amp = device.type == 'cuda'
+        scaler = torch.amp.GradScaler('cuda') if use_amp else None
+        best_loss = float('inf')
+        best_state = None
+        trigger_times = 0
+        for epoch in range(num_epochs):
+            # 训练
+            self.train()
+            train_loss = 0.0
+            for inputs, labels in train_loader:
+                inputs = inputs.to(device, non_blocking=True)
+                labels = labels.to(device, non_blocking=True).unsqueeze(1)
+                optimizer.zero_grad()
+                if use_amp:
+                    with torch.amp.autocast('cuda'):
+                        outputs = self(inputs)
+                        loss = criterion(outputs, labels)
+                    scaler.scale(loss).backward()
+                    scaler.step(optimizer)
+                    scaler.update()
+                else:
+                    outputs = self(inputs)
+                    loss = criterion(outputs, labels)
+                    loss.backward()
+                    optimizer.step()
+                train_loss += loss.item() * inputs.size(0)
+            train_loss /= len(train_loader.dataset)
+            # 验证
+            self.eval()
+            valid_loss = 0.0
+            with torch.no_grad():
+                for inputs, labels in valid_loader:
+                    inputs = inputs.to(device, non_blocking=True)
+                    labels = labels.to(device, non_blocking=True).unsqueeze(1)
+                    if use_amp:
+                        with torch.amp.autocast('cuda'):
+                            outputs = self(inputs)
+                            loss = criterion(outputs, labels)
+                    else:
+                        outputs = self(inputs)
+                        loss = criterion(outputs, labels)
+                    valid_loss += loss.item() * inputs.size(0)
+            valid_loss /= len(valid_loader.dataset)
+            # Early stopping
+            if valid_loss < best_loss:
+                best_loss = valid_loss
+                best_state = {k: v.cpu().clone() for k, v in self.state_dict().items()}
+                trigger_times = 0
+            else:
+                trigger_times += 1
+                if trigger_times >= patience:
+                    print(f"Early stopping at epoch {epoch+1}")
+                    break
+        if best_state is not None:
+            cur_device = next(self.parameters()).device
+            best_state = {k: v.to(cur_device) for k, v in best_state.items()}
+            self.load_state_dict(best_state)
+        return best_loss
+    def predict(self, test_loader, device):
+        self.eval()
+        self.to(device)
+        y_pred_list = []
+        use_amp = device.type == 'cuda'
+        with torch.no_grad():
+            for inputs, _ in test_loader:
+                inputs = inputs.to(device, non_blocking=True)
+                if use_amp:
+                    with torch.amp.autocast('cuda'):
+                        outputs = self(inputs)
+                else:
+                    outputs = self(inputs)
+                y_pred_list.append(outputs.cpu())
+        y_pred = torch.cat(y_pred_list, dim=0).numpy()  # 一次性转换
+        y_pred = np.squeeze(y_pred)
+        return y_pred