PyPI - gpbench - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

gp_agent_tool/compute_dataset_feature.py +67 -0
gp_agent_tool/config.py +65 -0
gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
gp_agent_tool/experience/dataset_summary_info.py +13 -0
gp_agent_tool/experience/experience_info.py +12 -0
gp_agent_tool/experience/get_matched_experience.py +111 -0
gp_agent_tool/llm_client.py +119 -0
gp_agent_tool/logging_utils.py +24 -0
gp_agent_tool/main.py +347 -0
gp_agent_tool/read_agent/__init__.py +46 -0
gp_agent_tool/read_agent/nodes.py +674 -0
gp_agent_tool/read_agent/prompts.py +547 -0
gp_agent_tool/read_agent/python_repl_tool.py +165 -0
gp_agent_tool/read_agent/state.py +101 -0
gp_agent_tool/read_agent/workflow.py +54 -0
gpbench/__init__.py +25 -0
gpbench/_selftest.py +104 -0
gpbench/method_class/BayesA/BayesA_class.py +141 -0
gpbench/method_class/BayesA/__init__.py +5 -0
gpbench/method_class/BayesA/_bayesfromR.py +96 -0
gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
gpbench/method_class/BayesA/bayesAfromR.py +16 -0
gpbench/method_class/BayesB/BayesB_class.py +140 -0
gpbench/method_class/BayesB/__init__.py +5 -0
gpbench/method_class/BayesB/_bayesfromR.py +96 -0
gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
gpbench/method_class/BayesB/bayesBfromR.py +16 -0
gpbench/method_class/BayesC/BayesC_class.py +141 -0
gpbench/method_class/BayesC/__init__.py +4 -0
gpbench/method_class/BayesC/_bayesfromR.py +96 -0
gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
gpbench/method_class/BayesC/bayesCfromR.py +16 -0
gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
gpbench/method_class/CropARNet/__init__.py +5 -0
gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
gpbench/method_class/Cropformer/__init__.py +5 -0
gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
gpbench/method_class/DL_GWAS/__init__.py +5 -0
gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
gpbench/method_class/DNNGP/__init__.py +5 -0
gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
gpbench/method_class/DeepCCR/__init__.py +5 -0
gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
gpbench/method_class/DeepGS/__init__.py +5 -0
gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
gpbench/method_class/EIR/EIR_class.py +276 -0
gpbench/method_class/EIR/EIR_he_class.py +184 -0
gpbench/method_class/EIR/__init__.py +5 -0
gpbench/method_class/EIR/utils/__init__.py +0 -0
gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
gpbench/method_class/EIR/utils/common.py +65 -0
gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
gpbench/method_class/EIR/utils/logging.py +59 -0
gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_class/EIR/utils/transformer_models.py +546 -0
gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
gpbench/method_class/ElasticNet/__init__.py +5 -0
gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
gpbench/method_class/G2PDeep/__init__.py +5 -0
gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
gpbench/method_class/GBLUP/__init__.py +5 -0
gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
gpbench/method_class/GEFormer/__init__.py +5 -0
gpbench/method_class/GEFormer/gMLP_class.py +357 -0
gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
gpbench/method_class/LightGBM/__init__.py +5 -0
gpbench/method_class/RF/RF_GPU_class.py +165 -0
gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
gpbench/method_class/RF/__init__.py +5 -0
gpbench/method_class/SVC/SVC_GPU.py +181 -0
gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
gpbench/method_class/SVC/__init__.py +5 -0
gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
gpbench/method_class/SoyDNGP/__init__.py +5 -0
gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
gpbench/method_class/XGBoost/__init__.py +5 -0
gpbench/method_class/__init__.py +52 -0
gpbench/method_class/rrBLUP/__init__.py +5 -0
gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
gpbench/method_reg/BayesA/BayesA.py +116 -0
gpbench/method_reg/BayesA/__init__.py +5 -0
gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
gpbench/method_reg/BayesB/BayesB.py +117 -0
gpbench/method_reg/BayesB/__init__.py +5 -0
gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
gpbench/method_reg/BayesC/BayesC.py +115 -0
gpbench/method_reg/BayesC/__init__.py +5 -0
gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
gpbench/method_reg/CropARNet/CropARNet.py +159 -0
gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
gpbench/method_reg/CropARNet/__init__.py +5 -0
gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
gpbench/method_reg/Cropformer/Cropformer.py +313 -0
gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
gpbench/method_reg/Cropformer/__init__.py +5 -0
gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
gpbench/method_reg/DL_GWAS/__init__.py +5 -0
gpbench/method_reg/DNNGP/DNNGP.py +157 -0
gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
gpbench/method_reg/DNNGP/__init__.py +5 -0
gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
gpbench/method_reg/DeepCCR/__init__.py +5 -0
gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
gpbench/method_reg/DeepGS/DeepGS.py +165 -0
gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
gpbench/method_reg/DeepGS/__init__.py +5 -0
gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
gpbench/method_reg/EIR/EIR.py +258 -0
gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
gpbench/method_reg/EIR/__init__.py +5 -0
gpbench/method_reg/EIR/utils/__init__.py +0 -0
gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
gpbench/method_reg/EIR/utils/common.py +65 -0
gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
gpbench/method_reg/EIR/utils/logging.py +59 -0
gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
gpbench/method_reg/ElasticNet/__init__.py +5 -0
gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
gpbench/method_reg/G2PDeep/__init__.py +5 -0
gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
gpbench/method_reg/GBLUP/__init__.py +5 -0
gpbench/method_reg/GEFormer/GEFormer.py +164 -0
gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
gpbench/method_reg/GEFormer/__init__.py +5 -0
gpbench/method_reg/GEFormer/gMLP.py +341 -0
gpbench/method_reg/LightGBM/LightGBM.py +237 -0
gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
gpbench/method_reg/LightGBM/__init__.py +5 -0
gpbench/method_reg/MVP/MVP.py +182 -0
gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
gpbench/method_reg/MVP/__init__.py +5 -0
gpbench/method_reg/MVP/base_MVP.py +113 -0
gpbench/method_reg/RF/RF_GPU.py +174 -0
gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
gpbench/method_reg/RF/__init__.py +5 -0
gpbench/method_reg/SVC/SVC_GPU.py +194 -0
gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
gpbench/method_reg/SVC/__init__.py +5 -0
gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
gpbench/method_reg/SoyDNGP/__init__.py +5 -0
gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
gpbench/method_reg/XGBoost/__init__.py +5 -0
gpbench/method_reg/__init__.py +55 -0
gpbench/method_reg/rrBLUP/__init__.py +5 -0
gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
gpbench-1.0.0.dist-info/METADATA +379 -0
gpbench-1.0.0.dist-info/RECORD +188 -0
gpbench-1.0.0.dist-info/WHEEL +5 -0
gpbench-1.0.0.dist-info/entry_points.txt +2 -0
gpbench-1.0.0.dist-info/top_level.txt +3 -0
tests/test_import.py +80 -0
tests/test_method.py +232 -0

gpbench/method_reg/RF/RF_Hyperparameters.py ADDED Viewed

@@ -0,0 +1,163 @@
+import os
+import random
+import torch
+import numpy as np
+from sklearn.model_selection import KFold
+from sklearn.preprocessing import StandardScaler
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+import optuna
+from scipy.stats import pearsonr
+# 尝试导入GPU加速版本
+try:
+    import cudf
+    import cupy as cp
+    from cuml.ensemble import RandomForestRegressor as cuRandomForestRegressor
+    CUML_AVAILABLE = True
+    print("✓ RAPIDS cuML 可用，将支持 GPU 加速")
+except ImportError:
+    CUML_AVAILABLE = False
+    print("⚠ cuML 不可用，将使用 scikit-learn CPU 版本")
+# 使用K折交叉验证并进行RandomForest训练
+def run_nested_cv_with_early_stopping(data, label, outer_cv, n_estimators, max_depth, use_gpu=True):
+    best_corr_coefs = []
+    best_maes = []
+    best_r2s = []
+    best_mses = []
+    # 检查GPU可用性
+    gpu_available = use_gpu and CUML_AVAILABLE and torch.cuda.is_available()
+    if gpu_available:
+        print("🚀 使用 GPU 加速随机森林")
+    else:
+        print("⚠ 使用 CPU 版本 (scikit-learn)")
+    import time
+    time_star = time.time()
+    for fold, (train_idx, test_idx) in enumerate(outer_cv.split(data)):
+        x_train, x_test = data[train_idx], data[test_idx]
+        y_train, y_test = label[train_idx], label[test_idx]
+        # # 标准化数据
+        # scaler = StandardScaler()
+        # x_train = scaler.fit_transform(x_train)
+        # x_test = scaler.transform(x_test)
+        # # ==== y 标准化 ====
+        # scaler_y = StandardScaler()
+        # y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).reshape(-1)
+        # y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).reshape(-1)
+        x_train = x_train.astype(np.float32)
+        x_test = x_test.astype(np.float32)
+        y_train_scaled = y_train.astype(np.float32)
+        y_test_scaled = y_test.astype(np.float32)
+        # 将数据转换为 GPU 格式
+        x_train_gpu = cp.asarray(x_train)
+        x_test_gpu = cp.asarray(x_test)
+        y_train_gpu = cp.asarray(y_train_scaled)
+        model = cuRandomForestRegressor(
+            n_estimators=n_estimators,
+            max_depth=max_depth,
+            # min_samples_split=min_samples_split,
+            # min_samples_leaf=min_samples_leaf,
+            # max_features=max_features,
+            random_state=42,
+            n_streams=1  # 使用单个流以获得更好的性能
+        )
+        # 训练模型
+        model.fit(x_train_gpu, y_train_gpu)
+        # 预测
+        y_test_preds = model.predict(x_test_gpu)
+        # 将结果转换回 CPU
+        y_test_preds = cp.asnumpy(y_test_preds)
+        y_test_scaled_cpu = cp.asnumpy(cp.asarray(y_test_scaled))
+        # # 反标准化
+        # y_test_preds = scaler_y.inverse_transform(y_test_preds.reshape(-1, 1)).reshape(-1)
+        # y_test_trues = scaler_y.inverse_transform(y_test_scaled_cpu.reshape(-1, 1)).reshape(-1)
+        y_test_trues = y_test_scaled_cpu.reshape(-1)
+        y_test_preds = y_test_preds.reshape(-1)
+        # 计算评价指标
+        corr_coef = np.corrcoef(y_test_preds, y_test_trues)[0, 1]
+        mae = mean_absolute_error(y_test_trues, y_test_preds)
+        mse = mean_squared_error(y_test_trues, y_test_preds)
+        r2 = r2_score(y_test_trues, y_test_preds)
+        best_corr_coefs.append(corr_coef)
+        best_maes.append(mae)
+        best_r2s.append(r2)
+        best_mses.append(mse)
+        acceleration_status = "GPU" if gpu_available else "CPU"
+        print(f'Fold {fold + 1}[{acceleration_status}]: MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Corr={corr_coef:.4f}')
+    print("==== Final Results ====")
+    acceleration_status = "GPU" if gpu_available else "CPU"
+    print(f"加速方式: {acceleration_status}")
+    print(f"MAE: {np.mean(best_maes):.4f} ± {np.std(best_maes):.4f}")
+    print(f"MSE: {np.mean(best_mses):.4f} ± {np.std(best_mses):.4f}")
+    print(f"R2 : {np.mean(best_r2s):.4f} ± {np.std(best_r2s):.4f}")
+    print(f"Corr: {np.mean(best_corr_coefs):.4f} ± {np.std(best_corr_coefs):.4f}")
+    print(f"Time: {time.time() - time_star:.2f}s")
+    return np.mean(best_corr_coefs)
+# 设置随机种子
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def Hyperparameter(data, label, use_gpu=True):
+    set_seed(42)
+    # 目标函数，用于Optuna优化
+    def objective(trial):
+        n_estimators = trial.suggest_int("n_estimators", 100, 1000)
+        max_depth = trial.suggest_int("max_depth", 3, 10)
+        # min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
+        # min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)
+        # max_features = trial.suggest_float("max_features", 0.1, 1)
+        outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
+        corr_score = run_nested_cv_with_early_stopping(
+            data=data,
+            label=label,
+            outer_cv=outer_cv,
+            n_estimators=n_estimators,
+            max_depth=max_depth,
+            # min_samples_split=min_samples_split,
+            # min_samples_leaf=min_samples_leaf,
+            # max_features=max_features,
+            use_gpu=use_gpu
+        )
+        return corr_score
+    # 运行Optuna超参数优化
+    study = optuna.create_study(direction="maximize")
+    # 添加GPU信息到study
+    study.set_user_attr('gpu_available', torch.cuda.is_available())
+    study.set_user_attr('using_gpu', use_gpu and torch.cuda.is_available())
+    study.optimize(objective, n_trials=20)
+    print("最佳参数:", study.best_params)
+    print(f"优化完成 - 使用 {'GPU' if (use_gpu and torch.cuda.is_available()) else 'CPU'}")
+    return study.best_params

gpbench/method_reg/RF/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .RF_GPU import RF_reg
+RF = RF_reg
+__all__ = ["RF","RF_reg"]

gpbench/method_reg/SVC/SVC_GPU.py ADDED Viewed

@@ -0,0 +1,194 @@
+import os
+import time
+import psutil
+import argparse
+import torch
+import random
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+import numpy as np
+from sklearn.model_selection import KFold
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+import swanlab
+from . import SVC_Hyperparameters
+import pynvml
+# 添加 cuML 导入
+try:
+    import cudf
+    import cupy as cp
+    from cuml.svm import SVR as cuSVR
+    from cuml.preprocessing import StandardScaler as cuStandardScaler
+    CUML_AVAILABLE = True
+    print("cuML is available, will use GPU acceleration")
+except ImportError:
+    CUML_AVAILABLE = False
+    from sklearn.svm import SVR
+    print("cuML not available, falling back to CPU SVR")
+def parse_args():
+    parser = argparse.ArgumentParser(description="Argument parser")
+    parser.add_argument('--methods', type=str, default='SVR/', help='Random seed')
+    parser.add_argument('--species', type=str, default='', help='Dataset name')
+    parser.add_argument('--phe', type=str, default='', help='Dataset name')
+    parser.add_argument('--data_dir', type=str, default='../../data/')
+    parser.add_argument('--result_dir', type=str, default='result/')
+    parser.add_argument('--C', type=float, default=0.001)
+    parser.add_argument('--epsilon', type=float, default=0.001)
+    parser.add_argument('--kernel', type=str, default='linear')
+    parser.add_argument('--gamma', type=str, default='scale')
+    parser.add_argument('--degree', type=int, default=1)
+    parser.add_argument('--use_gpu', type=bool, default=True, help='Whether to use GPU acceleration')
+    args = parser.parse_args()
+    return args
+def load_data(args):
+    xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
+    yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
+    names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
+    nsample = xData.shape[0]
+    nsnp = xData.shape[1]
+    print("Number of samples: ", nsample)
+    print("Number of SNPs: ", nsnp)
+    return xData, yData, nsample, nsnp, names
+def get_gpu_mem_by_pid(pid):
+    procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
+    for p in procs:
+        if p.pid == pid:
+            return p.usedGpuMemory / 1024**2
+    return 0.0
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def numpy_to_cudf(data):
+    """Convert numpy array to cudf DataFrame"""
+    if CUML_AVAILABLE:
+        return cudf.DataFrame(data)
+    return data
+def numpy_to_cupy(data):
+    """Convert numpy array to cupy array"""
+    if CUML_AVAILABLE:
+        return cp.asarray(data)
+    return data
+def run_nested_cv(args, data, label):
+    result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
+    os.makedirs(result_dir, exist_ok=True)
+    print("Starting 10-fold cross-validation with SVR...")
+    use_gpu = args.use_gpu and CUML_AVAILABLE
+    if use_gpu:
+        print("Using GPU acceleration with cuML")
+    else:
+        print("Using CPU with scikit-learn")
+    kf = KFold(n_splits=10, shuffle=True, random_state=42)
+    all_mse, all_mae, all_r2, all_pcc = [], [], [], []
+    time_star = time.time()
+    for fold, (train_index, test_index) in enumerate(kf.split(data)):
+        print(f"Running fold {fold}...")
+        process = psutil.Process(os.getpid())
+        fold_start_time = time.time()
+        x_train, x_test = data[train_index], data[test_index]
+        y_train, y_test = label[train_index], label[test_index]
+        x_train_gpu = cp.asarray(x_train, dtype=cp.float32)
+        x_test_gpu = cp.asarray(x_test, dtype=cp.float32)
+        y_train_gpu = cp.asarray(y_train.reshape(-1, 1),  dtype=cp.float32)
+        y_test_gpu = cp.asarray(y_test.reshape(-1, 1), dtype=cp.float32)
+        model = cuSVR(
+            C=args.C,
+            epsilon=args.epsilon,
+            kernel=args.kernel,
+            gamma=args.gamma,
+            degree=args.degree
+        )
+        model.fit(x_train_gpu, y_train_gpu)
+        y_test_preds = model.predict(x_test_gpu)
+        y_test_preds = cp.asnumpy(y_test_preds).reshape(-1)
+        y_test_scaled = cp.asnumpy(y_test_gpu).reshape(-1)
+        mse = mean_squared_error(y_test_scaled, y_test_preds)
+        r2 = r2_score(y_test_scaled, y_test_preds)
+        mae = mean_absolute_error(y_test_scaled, y_test_preds)
+        pcc, _ = pearsonr(y_test_scaled, y_test_preds)
+        all_mse.append(mse)
+        all_r2.append(r2)
+        all_mae.append(mae)
+        all_pcc.append(pcc)
+        fold_time = time.time() - fold_start_time
+        fold_gpu_mem = get_gpu_mem_by_pid(os.getpid())
+        fold_cpu_mem = process.memory_info().rss / 1024**2
+        print(f'Fold {fold}: Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, '
+              f'Time={fold_time:.2f}s, GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
+        results_df = pd.DataFrame({'Y_test': y_test, 'Y_pred': y_test_preds})
+        results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
+    print("\n===== Cross-validation summary =====")
+    print(f"Using GPU: {use_gpu}")
+    print(f"Average PCC: {np.mean(all_pcc):.4f} ± {np.std(all_pcc):.4f}")
+    print(f"Average MAE: {np.mean(all_mae):.4f} ± {np.std(all_mae):.4f}")
+    print(f"Average MSE: {np.mean(all_mse):.4f} ± {np.std(all_mse):.4f}")
+    print(f"Average R2 : {np.mean(all_r2):.4f} ± {np.std(all_r2):.4f}")
+    print(f"Total Time: {time.time() - time_star:.2f}s")
+def SVC_reg():
+    set_seed(42)
+    pynvml.nvmlInit()
+    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+    args = parse_args()
+    all_species =['Cotton/']
+    for i in range(len(all_species)):
+        args.species = all_species[i]
+        X, Y, nsamples, nsnp, names = load_data(args)
+        for j in range(len(names)):
+            args.phe = names[j]
+            print("starting run " + args.methods + args.species + args.phe)
+            label = Y[:, j]
+            label = np.nan_to_num(label, nan=np.nanmean(label))
+            best_params = SVC_Hyperparameters.Hyperparameter(X, label)
+            args.C = best_params['C']
+            args.epsilon = best_params['epsilon']
+            args.kernel = best_params['kernel']
+            args.gamma = best_params['gamma']
+            args.degree = best_params['degree']
+            start_time = time.time()
+            process = psutil.Process(os.getpid())
+            run_nested_cv(args, data=X, label=label)
+            elapsed_time = time.time() - start_time
+            print(f"running time: {elapsed_time:.2f} s")
+            print("successfully")
+            if CUML_AVAILABLE:
+                cp.get_default_memory_pool().free_all_blocks()
+if __name__ == "__main__":
+    SVC_reg()

gpbench/method_reg/SVC/SVC_Hyperparameters.py ADDED Viewed

@@ -0,0 +1,107 @@
+import gc
+import random
+import torch
+import numpy as np
+from sklearn.model_selection import KFold
+from scipy.stats import pearsonr
+from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
+import optuna
+try:
+    import cupy as cp
+    from cuml.svm import SVR as cuSVR
+    from cuml.preprocessing import StandardScaler as cuStandardScaler
+    CUML_AVAILABLE = True
+except ImportError:
+    CUML_AVAILABLE = False
+def run_nested_cv_with_early_stopping(data, label, outer_cv, C, epsilon, kernel, gamma, degree):
+    best_corr_coefs = []
+    best_maes = []
+    best_r2s = []
+    best_mses = []
+    import time
+    time_star = time.time()
+    for fold, (train_idx, test_idx) in enumerate(outer_cv.split(data)):
+        x_train, x_test = data[train_idx], data[test_idx]
+        y_train, y_test = label[train_idx], label[test_idx]
+        x_train_gpu = cp.asarray(x_train,  dtype=cp.float32)
+        x_test_gpu = cp.asarray(x_test,  dtype=cp.float32)
+        y_train_gpu = cp.asarray(y_train.reshape(-1, 1),  dtype=cp.float32)
+        y_test_gpu = cp.asarray(y_test.reshape(-1, 1), dtype=cp.float32)
+        model = cuSVR(C=C, epsilon=epsilon, kernel=kernel, gamma=gamma, degree=degree)
+        model.fit(x_train_gpu, y_train_gpu)
+        y_test_preds = model.predict(x_test_gpu)
+        y_test_preds = cp.asnumpy(y_test_preds).reshape(-1)
+        y_test_scaled = cp.asnumpy(y_test_gpu).reshape(-1)
+        mse = mean_squared_error(y_test_scaled, y_test_preds)
+        r2 = r2_score(y_test_scaled, y_test_preds)
+        mae = mean_absolute_error(y_test_scaled, y_test_preds)
+        pcc, _ = pearsonr(y_test_scaled, y_test_preds)
+        best_corr_coefs.append(pcc)
+        best_maes.append(mae)
+        best_r2s.append(r2)
+        best_mses.append(mse)
+        print(f'Fold {fold + 1}: MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Corr={pcc:.4f}')
+        del model, x_train_gpu, x_test_gpu, y_train_gpu, y_test_gpu
+        cp.get_default_memory_pool().free_all_blocks()
+        gc.collect()
+    print("==== Final Results ====")
+    print(f"MAE: {np.mean(best_maes):.4f} ± {np.std(best_maes):.4f}")
+    print(f"MSE: {np.mean(best_mses):.4f} ± {np.std(best_mses):.4f}")
+    print(f"R2 : {np.mean(best_r2s):.4f} ± {np.std(best_r2s):.4f}")
+    print(f"Corr: {np.mean(best_corr_coefs):.4f} ± {np.std(best_corr_coefs):.4f}")
+    print(f"Time: {time.time() - time_star:.2f}s")
+    return np.mean(best_corr_coefs)
+def set_seed(seed=42):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def Hyperparameter(data, label):
+    set_seed(42)
+    def objective(trial):
+        C = trial.suggest_loguniform("C", 1e-3, 1)
+        epsilon = trial.suggest_uniform("epsilon", 0.01, 1)
+        kernel = trial.suggest_categorical("kernel", ["rbf", "poly"])
+        gamma = trial.suggest_categorical("gamma", ["scale", "auto"])
+        degree = trial.suggest_int("degree", 1, 5)
+        outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
+        corr_score = run_nested_cv_with_early_stopping(
+            data=data,
+            label=label,
+            outer_cv=outer_cv,
+            C=C,
+            epsilon=epsilon,
+            kernel=kernel,
+            gamma=gamma,
+            degree=degree,
+        )
+        return corr_score
+    study = optuna.create_study(direction="maximize")
+    study.optimize(objective, n_trials=20)
+    print("best params:", study.best_params)
+    print("successfully")
+    return study.best_params

gpbench/method_reg/SVC/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .SVC_GPU import SVC_reg
+SVC = SVC_reg
+__all__ = ["SVC","SVC_reg"]

gpbench/method_reg/SoyDNGP/AlexNet_206.py ADDED Viewed

@@ -0,0 +1,185 @@
+import torch
+from torch import nn
+from torch.nn import Module
+import numpy as np
+class CA_Block(nn.Module):
+    def __init__(self, channel, h, w, reduction=16):
+        super(CA_Block, self).__init__()
+        self.h = h
+        self.w = w
+        self.avg_pool_x = nn.AdaptiveAvgPool2d((h, 1))
+        self.avg_pool_y = nn.AdaptiveAvgPool2d((1, w))
+        self.conv_1x1 = nn.Conv2d(in_channels=channel, out_channels=channel//reduction, kernel_size=1, stride=1, bias=False)
+        self.relu = nn.ReLU()
+        self.bn = nn.BatchNorm2d(channel//reduction)
+        self.F_h = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
+        self.F_w = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
+        self.sigmoid_h = nn.Sigmoid()
+        self.sigmoid_w = nn.Sigmoid()
+    def forward(self, x):
+        x_h = self.avg_pool_x(x).permute(0, 1, 3, 2)
+        x_w = self.avg_pool_y(x)
+        x_cat_conv_relu = self.relu(self.conv_1x1(torch.cat((x_h, x_w), 3)))
+        x_cat_conv_split_h, x_cat_conv_split_w = x_cat_conv_relu.split([self.h, self.w], 3)
+        s_h = self.sigmoid_h(self.F_h(x_cat_conv_split_h.permute(0, 1, 3, 2)))
+        s_w = self.sigmoid_w(self.F_w(x_cat_conv_split_w))
+        out = x * s_h.expand_as(x) * s_w.expand_as(x)
+        return out
+class AlexNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(3,32,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
+            nn.BatchNorm2d(32),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            CA_Block(32,206,206,reduction=16),
+            nn.Conv2d(32,64,kernel_size=4,padding=1,padding_mode='reflect',stride=2,bias=False),
+            nn.BatchNorm2d(64),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(64,64,kernel_size=3,padding=1,padding_mode='reflect',stride=2,bias=False),
+            nn.BatchNorm2d(64),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(64,64,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
+            nn.BatchNorm2d(64),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(64,128,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
+            nn.BatchNorm2d(128),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(128,128,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
+            nn.BatchNorm2d(128),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(128,256,kernel_size=2,stride=2,bias=False),
+            nn.BatchNorm2d(256),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(256,256,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
+            nn.BatchNorm2d(256),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(256,512,kernel_size=2,stride=2,bias=False),
+            nn.BatchNorm2d(512),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(512,512,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
+            nn.BatchNorm2d(512),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(512,1024,kernel_size=3,padding=1,padding_mode='reflect',stride=2,bias=False),
+            nn.BatchNorm2d(1024),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            nn.Conv2d(1024,1024,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
+            nn.BatchNorm2d(1024),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            CA_Block(1024,7,7,reduction=16),
+            nn.Flatten(),
+            nn.Dropout(0.3),
+            nn.ReLU(),
+            # nn.Linear(50176,6400),
+            # nn.Dropout(0.4),
+            # nn.ReLU(),
+            nn.Linear(50176,1),
+            # nn.Sigmoid()
+        )
+    def forward(self, x):
+        x = x.permute(0, 3, 1, 2)  # 转为NCHW
+        return self.net(x)
+    def train_model(self, train_loader, valid_loader, num_epochs, learning_rate, patience, device):
+        optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate, weight_decay=1e-5)
+        criterion = nn.MSELoss()
+        self.to(device)
+        best_loss = float('inf')
+        best_state = None
+        trigger_times = 0
+        for epoch in range(num_epochs):
+            self.train()
+            train_loss = 0.0
+            for inputs, labels in train_loader:
+                inputs, labels = inputs.to(device), labels.to(device)
+                optimizer.zero_grad()
+                outputs = self(inputs)
+                labels = labels.unsqueeze(1)
+                loss = criterion(outputs, labels)
+                loss.backward()
+                optimizer.step()
+                train_loss += loss.item() * inputs.size(0)
+            self.eval()
+            valid_loss = 0.0
+            with torch.no_grad():
+                for inputs, labels in valid_loader:
+                    inputs, labels = inputs.to(device), labels.to(device)
+                    outputs = self(inputs)
+                    labels = labels.unsqueeze(1)
+                    loss = criterion(outputs, labels)
+                    valid_loss += loss.item() * inputs.size(0)
+            train_loss /= len(train_loader.dataset)
+            valid_loss /= len(valid_loader.dataset)
+            # ---------- Early stopping ----------
+            if valid_loss < best_loss:
+                best_loss = valid_loss
+                best_state = self.state_dict()
+                trigger_times = 0
+            else:
+                trigger_times += 1
+                if trigger_times >= patience:
+                    print(f"Early stopping at epoch {epoch+1}")
+                    break
+        if best_state is not None:
+            self.load_state_dict(best_state)
+        return best_loss
+    def predict(self, test_loader):
+        self.eval()
+        y_pred = []
+        with torch.no_grad():
+            for inputs, _ in test_loader:
+                outputs = self(inputs)
+                y_pred.append(outputs.cpu().numpy())
+        y_pred = np.concatenate(y_pred, axis=0)
+        y_pred = np.squeeze(y_pred)
+        return y_pred