gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. gp_agent_tool/compute_dataset_feature.py +67 -0
  2. gp_agent_tool/config.py +65 -0
  3. gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
  4. gp_agent_tool/experience/dataset_summary_info.py +13 -0
  5. gp_agent_tool/experience/experience_info.py +12 -0
  6. gp_agent_tool/experience/get_matched_experience.py +111 -0
  7. gp_agent_tool/llm_client.py +119 -0
  8. gp_agent_tool/logging_utils.py +24 -0
  9. gp_agent_tool/main.py +347 -0
  10. gp_agent_tool/read_agent/__init__.py +46 -0
  11. gp_agent_tool/read_agent/nodes.py +674 -0
  12. gp_agent_tool/read_agent/prompts.py +547 -0
  13. gp_agent_tool/read_agent/python_repl_tool.py +165 -0
  14. gp_agent_tool/read_agent/state.py +101 -0
  15. gp_agent_tool/read_agent/workflow.py +54 -0
  16. gpbench/__init__.py +25 -0
  17. gpbench/_selftest.py +104 -0
  18. gpbench/method_class/BayesA/BayesA_class.py +141 -0
  19. gpbench/method_class/BayesA/__init__.py +5 -0
  20. gpbench/method_class/BayesA/_bayesfromR.py +96 -0
  21. gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
  22. gpbench/method_class/BayesA/bayesAfromR.py +16 -0
  23. gpbench/method_class/BayesB/BayesB_class.py +140 -0
  24. gpbench/method_class/BayesB/__init__.py +5 -0
  25. gpbench/method_class/BayesB/_bayesfromR.py +96 -0
  26. gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
  27. gpbench/method_class/BayesB/bayesBfromR.py +16 -0
  28. gpbench/method_class/BayesC/BayesC_class.py +141 -0
  29. gpbench/method_class/BayesC/__init__.py +4 -0
  30. gpbench/method_class/BayesC/_bayesfromR.py +96 -0
  31. gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
  32. gpbench/method_class/BayesC/bayesCfromR.py +16 -0
  33. gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
  34. gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
  35. gpbench/method_class/CropARNet/__init__.py +5 -0
  36. gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
  37. gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
  38. gpbench/method_class/Cropformer/__init__.py +5 -0
  39. gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
  40. gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
  41. gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
  42. gpbench/method_class/DL_GWAS/__init__.py +5 -0
  43. gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
  44. gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
  45. gpbench/method_class/DNNGP/__init__.py +5 -0
  46. gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
  47. gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
  48. gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
  49. gpbench/method_class/DeepCCR/__init__.py +5 -0
  50. gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
  51. gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
  52. gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
  53. gpbench/method_class/DeepGS/__init__.py +5 -0
  54. gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
  55. gpbench/method_class/EIR/EIR_class.py +276 -0
  56. gpbench/method_class/EIR/EIR_he_class.py +184 -0
  57. gpbench/method_class/EIR/__init__.py +5 -0
  58. gpbench/method_class/EIR/utils/__init__.py +0 -0
  59. gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
  60. gpbench/method_class/EIR/utils/common.py +65 -0
  61. gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
  62. gpbench/method_class/EIR/utils/logging.py +59 -0
  63. gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
  64. gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
  65. gpbench/method_class/EIR/utils/transformer_models.py +546 -0
  66. gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
  67. gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
  68. gpbench/method_class/ElasticNet/__init__.py +5 -0
  69. gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
  70. gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
  71. gpbench/method_class/G2PDeep/__init__.py +5 -0
  72. gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
  73. gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
  74. gpbench/method_class/GBLUP/__init__.py +5 -0
  75. gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
  76. gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
  77. gpbench/method_class/GEFormer/__init__.py +5 -0
  78. gpbench/method_class/GEFormer/gMLP_class.py +357 -0
  79. gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
  80. gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
  81. gpbench/method_class/LightGBM/__init__.py +5 -0
  82. gpbench/method_class/RF/RF_GPU_class.py +165 -0
  83. gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
  84. gpbench/method_class/RF/__init__.py +5 -0
  85. gpbench/method_class/SVC/SVC_GPU.py +181 -0
  86. gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
  87. gpbench/method_class/SVC/__init__.py +5 -0
  88. gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
  89. gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
  90. gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
  91. gpbench/method_class/SoyDNGP/__init__.py +5 -0
  92. gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
  93. gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
  94. gpbench/method_class/XGBoost/__init__.py +5 -0
  95. gpbench/method_class/__init__.py +52 -0
  96. gpbench/method_class/rrBLUP/__init__.py +5 -0
  97. gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
  98. gpbench/method_reg/BayesA/BayesA.py +116 -0
  99. gpbench/method_reg/BayesA/__init__.py +5 -0
  100. gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
  101. gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
  102. gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
  103. gpbench/method_reg/BayesB/BayesB.py +117 -0
  104. gpbench/method_reg/BayesB/__init__.py +5 -0
  105. gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
  106. gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
  107. gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
  108. gpbench/method_reg/BayesC/BayesC.py +115 -0
  109. gpbench/method_reg/BayesC/__init__.py +5 -0
  110. gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
  111. gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
  112. gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
  113. gpbench/method_reg/CropARNet/CropARNet.py +159 -0
  114. gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
  115. gpbench/method_reg/CropARNet/__init__.py +5 -0
  116. gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
  117. gpbench/method_reg/Cropformer/Cropformer.py +313 -0
  118. gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
  119. gpbench/method_reg/Cropformer/__init__.py +5 -0
  120. gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
  121. gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
  122. gpbench/method_reg/DL_GWAS/__init__.py +5 -0
  123. gpbench/method_reg/DNNGP/DNNGP.py +157 -0
  124. gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
  125. gpbench/method_reg/DNNGP/__init__.py +5 -0
  126. gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
  127. gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
  128. gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
  129. gpbench/method_reg/DeepCCR/__init__.py +5 -0
  130. gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
  131. gpbench/method_reg/DeepGS/DeepGS.py +165 -0
  132. gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
  133. gpbench/method_reg/DeepGS/__init__.py +5 -0
  134. gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
  135. gpbench/method_reg/EIR/EIR.py +258 -0
  136. gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
  137. gpbench/method_reg/EIR/__init__.py +5 -0
  138. gpbench/method_reg/EIR/utils/__init__.py +0 -0
  139. gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
  140. gpbench/method_reg/EIR/utils/common.py +65 -0
  141. gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
  142. gpbench/method_reg/EIR/utils/logging.py +59 -0
  143. gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
  144. gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
  145. gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
  146. gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
  147. gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
  148. gpbench/method_reg/ElasticNet/__init__.py +5 -0
  149. gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
  150. gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
  151. gpbench/method_reg/G2PDeep/__init__.py +5 -0
  152. gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
  153. gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
  154. gpbench/method_reg/GBLUP/__init__.py +5 -0
  155. gpbench/method_reg/GEFormer/GEFormer.py +164 -0
  156. gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
  157. gpbench/method_reg/GEFormer/__init__.py +5 -0
  158. gpbench/method_reg/GEFormer/gMLP.py +341 -0
  159. gpbench/method_reg/LightGBM/LightGBM.py +237 -0
  160. gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
  161. gpbench/method_reg/LightGBM/__init__.py +5 -0
  162. gpbench/method_reg/MVP/MVP.py +182 -0
  163. gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
  164. gpbench/method_reg/MVP/__init__.py +5 -0
  165. gpbench/method_reg/MVP/base_MVP.py +113 -0
  166. gpbench/method_reg/RF/RF_GPU.py +174 -0
  167. gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
  168. gpbench/method_reg/RF/__init__.py +5 -0
  169. gpbench/method_reg/SVC/SVC_GPU.py +194 -0
  170. gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
  171. gpbench/method_reg/SVC/__init__.py +5 -0
  172. gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
  173. gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
  174. gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
  175. gpbench/method_reg/SoyDNGP/__init__.py +5 -0
  176. gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
  177. gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
  178. gpbench/method_reg/XGBoost/__init__.py +5 -0
  179. gpbench/method_reg/__init__.py +55 -0
  180. gpbench/method_reg/rrBLUP/__init__.py +5 -0
  181. gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
  182. gpbench-1.0.0.dist-info/METADATA +379 -0
  183. gpbench-1.0.0.dist-info/RECORD +188 -0
  184. gpbench-1.0.0.dist-info/WHEEL +5 -0
  185. gpbench-1.0.0.dist-info/entry_points.txt +2 -0
  186. gpbench-1.0.0.dist-info/top_level.txt +3 -0
  187. tests/test_import.py +80 -0
  188. tests/test_method.py +232 -0
@@ -0,0 +1,163 @@
1
+ import os
2
+ import random
3
+ import torch
4
+ import numpy as np
5
+ from sklearn.model_selection import KFold
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.ensemble import RandomForestRegressor
8
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
9
+ import optuna
10
+ from scipy.stats import pearsonr
11
+
12
+ # 尝试导入GPU加速版本
13
+ try:
14
+ import cudf
15
+ import cupy as cp
16
+ from cuml.ensemble import RandomForestRegressor as cuRandomForestRegressor
17
+ CUML_AVAILABLE = True
18
+ print("✓ RAPIDS cuML 可用,将支持 GPU 加速")
19
+ except ImportError:
20
+ CUML_AVAILABLE = False
21
+ print("⚠ cuML 不可用,将使用 scikit-learn CPU 版本")
22
+
23
+ # 使用K折交叉验证并进行RandomForest训练
24
+ def run_nested_cv_with_early_stopping(data, label, outer_cv, n_estimators, max_depth, use_gpu=True):
25
+ best_corr_coefs = []
26
+ best_maes = []
27
+ best_r2s = []
28
+ best_mses = []
29
+
30
+ # 检查GPU可用性
31
+ gpu_available = use_gpu and CUML_AVAILABLE and torch.cuda.is_available()
32
+
33
+ if gpu_available:
34
+ print("🚀 使用 GPU 加速随机森林")
35
+ else:
36
+ print("⚠ 使用 CPU 版本 (scikit-learn)")
37
+
38
+ import time
39
+ time_star = time.time()
40
+
41
+ for fold, (train_idx, test_idx) in enumerate(outer_cv.split(data)):
42
+ x_train, x_test = data[train_idx], data[test_idx]
43
+ y_train, y_test = label[train_idx], label[test_idx]
44
+
45
+ # # 标准化数据
46
+ # scaler = StandardScaler()
47
+ # x_train = scaler.fit_transform(x_train)
48
+ # x_test = scaler.transform(x_test)
49
+
50
+ # # ==== y 标准化 ====
51
+ # scaler_y = StandardScaler()
52
+ # y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).reshape(-1)
53
+ # y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).reshape(-1)
54
+
55
+ x_train = x_train.astype(np.float32)
56
+ x_test = x_test.astype(np.float32)
57
+ y_train_scaled = y_train.astype(np.float32)
58
+ y_test_scaled = y_test.astype(np.float32)
59
+
60
+ # 将数据转换为 GPU 格式
61
+ x_train_gpu = cp.asarray(x_train)
62
+ x_test_gpu = cp.asarray(x_test)
63
+ y_train_gpu = cp.asarray(y_train_scaled)
64
+
65
+ model = cuRandomForestRegressor(
66
+ n_estimators=n_estimators,
67
+ max_depth=max_depth,
68
+ # min_samples_split=min_samples_split,
69
+ # min_samples_leaf=min_samples_leaf,
70
+ # max_features=max_features,
71
+ random_state=42,
72
+ n_streams=1 # 使用单个流以获得更好的性能
73
+ )
74
+
75
+ # 训练模型
76
+ model.fit(x_train_gpu, y_train_gpu)
77
+
78
+ # 预测
79
+ y_test_preds = model.predict(x_test_gpu)
80
+
81
+ # 将结果转换回 CPU
82
+ y_test_preds = cp.asnumpy(y_test_preds)
83
+ y_test_scaled_cpu = cp.asnumpy(cp.asarray(y_test_scaled))
84
+
85
+ # # 反标准化
86
+ # y_test_preds = scaler_y.inverse_transform(y_test_preds.reshape(-1, 1)).reshape(-1)
87
+ # y_test_trues = scaler_y.inverse_transform(y_test_scaled_cpu.reshape(-1, 1)).reshape(-1)
88
+ y_test_trues = y_test_scaled_cpu.reshape(-1)
89
+ y_test_preds = y_test_preds.reshape(-1)
90
+
91
+ # 计算评价指标
92
+ corr_coef = np.corrcoef(y_test_preds, y_test_trues)[0, 1]
93
+ mae = mean_absolute_error(y_test_trues, y_test_preds)
94
+ mse = mean_squared_error(y_test_trues, y_test_preds)
95
+ r2 = r2_score(y_test_trues, y_test_preds)
96
+
97
+ best_corr_coefs.append(corr_coef)
98
+ best_maes.append(mae)
99
+ best_r2s.append(r2)
100
+ best_mses.append(mse)
101
+
102
+ acceleration_status = "GPU" if gpu_available else "CPU"
103
+ print(f'Fold {fold + 1}[{acceleration_status}]: MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Corr={corr_coef:.4f}')
104
+
105
+ print("==== Final Results ====")
106
+ acceleration_status = "GPU" if gpu_available else "CPU"
107
+ print(f"加速方式: {acceleration_status}")
108
+ print(f"MAE: {np.mean(best_maes):.4f} ± {np.std(best_maes):.4f}")
109
+ print(f"MSE: {np.mean(best_mses):.4f} ± {np.std(best_mses):.4f}")
110
+ print(f"R2 : {np.mean(best_r2s):.4f} ± {np.std(best_r2s):.4f}")
111
+ print(f"Corr: {np.mean(best_corr_coefs):.4f} ± {np.std(best_corr_coefs):.4f}")
112
+
113
+ print(f"Time: {time.time() - time_star:.2f}s")
114
+ return np.mean(best_corr_coefs)
115
+
116
+ # 设置随机种子
117
+ def set_seed(seed=42):
118
+ random.seed(seed)
119
+ np.random.seed(seed)
120
+ torch.manual_seed(seed)
121
+ if torch.cuda.is_available():
122
+ torch.cuda.manual_seed_all(seed)
123
+ torch.backends.cudnn.deterministic = True
124
+ torch.backends.cudnn.benchmark = False
125
+
126
+ def Hyperparameter(data, label, use_gpu=True):
127
+ set_seed(42)
128
+
129
+ # 目标函数,用于Optuna优化
130
+ def objective(trial):
131
+ n_estimators = trial.suggest_int("n_estimators", 100, 1000)
132
+ max_depth = trial.suggest_int("max_depth", 3, 10)
133
+ # min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
134
+ # min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)
135
+ # max_features = trial.suggest_float("max_features", 0.1, 1)
136
+
137
+ outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
138
+
139
+ corr_score = run_nested_cv_with_early_stopping(
140
+ data=data,
141
+ label=label,
142
+ outer_cv=outer_cv,
143
+ n_estimators=n_estimators,
144
+ max_depth=max_depth,
145
+ # min_samples_split=min_samples_split,
146
+ # min_samples_leaf=min_samples_leaf,
147
+ # max_features=max_features,
148
+ use_gpu=use_gpu
149
+ )
150
+ return corr_score
151
+
152
+ # 运行Optuna超参数优化
153
+ study = optuna.create_study(direction="maximize")
154
+
155
+ # 添加GPU信息到study
156
+ study.set_user_attr('gpu_available', torch.cuda.is_available())
157
+ study.set_user_attr('using_gpu', use_gpu and torch.cuda.is_available())
158
+
159
+ study.optimize(objective, n_trials=20)
160
+
161
+ print("最佳参数:", study.best_params)
162
+ print(f"优化完成 - 使用 {'GPU' if (use_gpu and torch.cuda.is_available()) else 'CPU'}")
163
+ return study.best_params
@@ -0,0 +1,5 @@
1
+ from .RF_GPU import RF_reg
2
+
3
+ RF = RF_reg
4
+
5
+ __all__ = ["RF","RF_reg"]
@@ -0,0 +1,194 @@
1
+ import os
2
+ import time
3
+ import psutil
4
+ import argparse
5
+ import torch
6
+ import random
7
+ import pandas as pd
8
+ from sklearn.preprocessing import StandardScaler
9
+ import numpy as np
10
+ from sklearn.model_selection import KFold
11
+ from scipy.stats import pearsonr
12
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
13
+ import swanlab
14
+ from . import SVC_Hyperparameters
15
+ import pynvml
16
+
17
+ # 添加 cuML 导入
18
+ try:
19
+ import cudf
20
+ import cupy as cp
21
+ from cuml.svm import SVR as cuSVR
22
+ from cuml.preprocessing import StandardScaler as cuStandardScaler
23
+ CUML_AVAILABLE = True
24
+ print("cuML is available, will use GPU acceleration")
25
+ except ImportError:
26
+ CUML_AVAILABLE = False
27
+ from sklearn.svm import SVR
28
+ print("cuML not available, falling back to CPU SVR")
29
+
30
+ def parse_args():
31
+ parser = argparse.ArgumentParser(description="Argument parser")
32
+ parser.add_argument('--methods', type=str, default='SVR/', help='Random seed')
33
+ parser.add_argument('--species', type=str, default='', help='Dataset name')
34
+ parser.add_argument('--phe', type=str, default='', help='Dataset name')
35
+ parser.add_argument('--data_dir', type=str, default='../../data/')
36
+ parser.add_argument('--result_dir', type=str, default='result/')
37
+
38
+ parser.add_argument('--C', type=float, default=0.001)
39
+ parser.add_argument('--epsilon', type=float, default=0.001)
40
+ parser.add_argument('--kernel', type=str, default='linear')
41
+ parser.add_argument('--gamma', type=str, default='scale')
42
+ parser.add_argument('--degree', type=int, default=1)
43
+ parser.add_argument('--use_gpu', type=bool, default=True, help='Whether to use GPU acceleration')
44
+ args = parser.parse_args()
45
+ return args
46
+
47
+ def load_data(args):
48
+ xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
49
+ yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
50
+ names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
51
+
52
+ nsample = xData.shape[0]
53
+ nsnp = xData.shape[1]
54
+ print("Number of samples: ", nsample)
55
+ print("Number of SNPs: ", nsnp)
56
+ return xData, yData, nsample, nsnp, names
57
+
58
+ def get_gpu_mem_by_pid(pid):
59
+ procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
60
+ for p in procs:
61
+ if p.pid == pid:
62
+ return p.usedGpuMemory / 1024**2
63
+ return 0.0
64
+
65
+ def set_seed(seed=42):
66
+ random.seed(seed)
67
+ np.random.seed(seed)
68
+ torch.manual_seed(seed)
69
+ if torch.cuda.is_available():
70
+ torch.cuda.manual_seed_all(seed)
71
+ torch.backends.cudnn.deterministic = True
72
+ torch.backends.cudnn.benchmark = False
73
+
74
+ def numpy_to_cudf(data):
75
+ """Convert numpy array to cudf DataFrame"""
76
+ if CUML_AVAILABLE:
77
+ return cudf.DataFrame(data)
78
+ return data
79
+
80
+ def numpy_to_cupy(data):
81
+ """Convert numpy array to cupy array"""
82
+ if CUML_AVAILABLE:
83
+ return cp.asarray(data)
84
+ return data
85
+
86
+ def run_nested_cv(args, data, label):
87
+ result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
88
+ os.makedirs(result_dir, exist_ok=True)
89
+ print("Starting 10-fold cross-validation with SVR...")
90
+ use_gpu = args.use_gpu and CUML_AVAILABLE
91
+
92
+ if use_gpu:
93
+ print("Using GPU acceleration with cuML")
94
+ else:
95
+ print("Using CPU with scikit-learn")
96
+
97
+ kf = KFold(n_splits=10, shuffle=True, random_state=42)
98
+
99
+ all_mse, all_mae, all_r2, all_pcc = [], [], [], []
100
+ time_star = time.time()
101
+
102
+ for fold, (train_index, test_index) in enumerate(kf.split(data)):
103
+ print(f"Running fold {fold}...")
104
+ process = psutil.Process(os.getpid())
105
+ fold_start_time = time.time()
106
+
107
+ x_train, x_test = data[train_index], data[test_index]
108
+ y_train, y_test = label[train_index], label[test_index]
109
+
110
+ x_train_gpu = cp.asarray(x_train, dtype=cp.float32)
111
+ x_test_gpu = cp.asarray(x_test, dtype=cp.float32)
112
+ y_train_gpu = cp.asarray(y_train.reshape(-1, 1), dtype=cp.float32)
113
+ y_test_gpu = cp.asarray(y_test.reshape(-1, 1), dtype=cp.float32)
114
+
115
+ model = cuSVR(
116
+ C=args.C,
117
+ epsilon=args.epsilon,
118
+ kernel=args.kernel,
119
+ gamma=args.gamma,
120
+ degree=args.degree
121
+ )
122
+
123
+ model.fit(x_train_gpu, y_train_gpu)
124
+ y_test_preds = model.predict(x_test_gpu)
125
+
126
+ y_test_preds = cp.asnumpy(y_test_preds).reshape(-1)
127
+ y_test_scaled = cp.asnumpy(y_test_gpu).reshape(-1)
128
+
129
+ mse = mean_squared_error(y_test_scaled, y_test_preds)
130
+ r2 = r2_score(y_test_scaled, y_test_preds)
131
+ mae = mean_absolute_error(y_test_scaled, y_test_preds)
132
+ pcc, _ = pearsonr(y_test_scaled, y_test_preds)
133
+
134
+ all_mse.append(mse)
135
+ all_r2.append(r2)
136
+ all_mae.append(mae)
137
+ all_pcc.append(pcc)
138
+
139
+ fold_time = time.time() - fold_start_time
140
+ fold_gpu_mem = get_gpu_mem_by_pid(os.getpid())
141
+ fold_cpu_mem = process.memory_info().rss / 1024**2
142
+
143
+ print(f'Fold {fold}: Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, '
144
+ f'Time={fold_time:.2f}s, GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
145
+
146
+ results_df = pd.DataFrame({'Y_test': y_test, 'Y_pred': y_test_preds})
147
+ results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
148
+
149
+ print("\n===== Cross-validation summary =====")
150
+ print(f"Using GPU: {use_gpu}")
151
+ print(f"Average PCC: {np.mean(all_pcc):.4f} ± {np.std(all_pcc):.4f}")
152
+ print(f"Average MAE: {np.mean(all_mae):.4f} ± {np.std(all_mae):.4f}")
153
+ print(f"Average MSE: {np.mean(all_mse):.4f} ± {np.std(all_mse):.4f}")
154
+ print(f"Average R2 : {np.mean(all_r2):.4f} ± {np.std(all_r2):.4f}")
155
+ print(f"Total Time: {time.time() - time_star:.2f}s")
156
+
157
+
158
+ def SVC_reg():
159
+ set_seed(42)
160
+ pynvml.nvmlInit()
161
+ handle = pynvml.nvmlDeviceGetHandleByIndex(0)
162
+ args = parse_args()
163
+ all_species =['Cotton/']
164
+
165
+ for i in range(len(all_species)):
166
+ args.species = all_species[i]
167
+ X, Y, nsamples, nsnp, names = load_data(args)
168
+ for j in range(len(names)):
169
+ args.phe = names[j]
170
+ print("starting run " + args.methods + args.species + args.phe)
171
+ label = Y[:, j]
172
+ label = np.nan_to_num(label, nan=np.nanmean(label))
173
+
174
+ best_params = SVC_Hyperparameters.Hyperparameter(X, label)
175
+ args.C = best_params['C']
176
+ args.epsilon = best_params['epsilon']
177
+ args.kernel = best_params['kernel']
178
+ args.gamma = best_params['gamma']
179
+ args.degree = best_params['degree']
180
+
181
+ start_time = time.time()
182
+ process = psutil.Process(os.getpid())
183
+
184
+ run_nested_cv(args, data=X, label=label)
185
+ elapsed_time = time.time() - start_time
186
+ print(f"running time: {elapsed_time:.2f} s")
187
+ print("successfully")
188
+
189
+ if CUML_AVAILABLE:
190
+ cp.get_default_memory_pool().free_all_blocks()
191
+
192
+
193
+ if __name__ == "__main__":
194
+ SVC_reg()
@@ -0,0 +1,107 @@
1
+ import gc
2
+ import random
3
+ import torch
4
+ import numpy as np
5
+ from sklearn.model_selection import KFold
6
+ from scipy.stats import pearsonr
7
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
8
+ import optuna
9
+
10
+ try:
11
+ import cupy as cp
12
+ from cuml.svm import SVR as cuSVR
13
+ from cuml.preprocessing import StandardScaler as cuStandardScaler
14
+ CUML_AVAILABLE = True
15
+ except ImportError:
16
+ CUML_AVAILABLE = False
17
+
18
+ def run_nested_cv_with_early_stopping(data, label, outer_cv, C, epsilon, kernel, gamma, degree):
19
+ best_corr_coefs = []
20
+ best_maes = []
21
+ best_r2s = []
22
+ best_mses = []
23
+
24
+ import time
25
+ time_star = time.time()
26
+
27
+ for fold, (train_idx, test_idx) in enumerate(outer_cv.split(data)):
28
+ x_train, x_test = data[train_idx], data[test_idx]
29
+ y_train, y_test = label[train_idx], label[test_idx]
30
+
31
+ x_train_gpu = cp.asarray(x_train, dtype=cp.float32)
32
+ x_test_gpu = cp.asarray(x_test, dtype=cp.float32)
33
+ y_train_gpu = cp.asarray(y_train.reshape(-1, 1), dtype=cp.float32)
34
+ y_test_gpu = cp.asarray(y_test.reshape(-1, 1), dtype=cp.float32)
35
+
36
+ model = cuSVR(C=C, epsilon=epsilon, kernel=kernel, gamma=gamma, degree=degree)
37
+ model.fit(x_train_gpu, y_train_gpu)
38
+
39
+ y_test_preds = model.predict(x_test_gpu)
40
+
41
+ y_test_preds = cp.asnumpy(y_test_preds).reshape(-1)
42
+ y_test_scaled = cp.asnumpy(y_test_gpu).reshape(-1)
43
+
44
+ mse = mean_squared_error(y_test_scaled, y_test_preds)
45
+ r2 = r2_score(y_test_scaled, y_test_preds)
46
+ mae = mean_absolute_error(y_test_scaled, y_test_preds)
47
+ pcc, _ = pearsonr(y_test_scaled, y_test_preds)
48
+
49
+ best_corr_coefs.append(pcc)
50
+ best_maes.append(mae)
51
+ best_r2s.append(r2)
52
+ best_mses.append(mse)
53
+
54
+ print(f'Fold {fold + 1}: MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Corr={pcc:.4f}')
55
+
56
+ del model, x_train_gpu, x_test_gpu, y_train_gpu, y_test_gpu
57
+ cp.get_default_memory_pool().free_all_blocks()
58
+ gc.collect()
59
+
60
+ print("==== Final Results ====")
61
+ print(f"MAE: {np.mean(best_maes):.4f} ± {np.std(best_maes):.4f}")
62
+ print(f"MSE: {np.mean(best_mses):.4f} ± {np.std(best_mses):.4f}")
63
+ print(f"R2 : {np.mean(best_r2s):.4f} ± {np.std(best_r2s):.4f}")
64
+ print(f"Corr: {np.mean(best_corr_coefs):.4f} ± {np.std(best_corr_coefs):.4f}")
65
+
66
+ print(f"Time: {time.time() - time_star:.2f}s")
67
+ return np.mean(best_corr_coefs)
68
+
69
+ def set_seed(seed=42):
70
+ random.seed(seed)
71
+ np.random.seed(seed)
72
+ torch.manual_seed(seed)
73
+ if torch.cuda.is_available():
74
+ torch.cuda.manual_seed_all(seed)
75
+ torch.backends.cudnn.deterministic = True
76
+ torch.backends.cudnn.benchmark = False
77
+
78
+ def Hyperparameter(data, label):
79
+ set_seed(42)
80
+
81
+ def objective(trial):
82
+ C = trial.suggest_loguniform("C", 1e-3, 1)
83
+ epsilon = trial.suggest_uniform("epsilon", 0.01, 1)
84
+ kernel = trial.suggest_categorical("kernel", ["rbf", "poly"])
85
+ gamma = trial.suggest_categorical("gamma", ["scale", "auto"])
86
+ degree = trial.suggest_int("degree", 1, 5)
87
+
88
+ outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
89
+
90
+ corr_score = run_nested_cv_with_early_stopping(
91
+ data=data,
92
+ label=label,
93
+ outer_cv=outer_cv,
94
+ C=C,
95
+ epsilon=epsilon,
96
+ kernel=kernel,
97
+ gamma=gamma,
98
+ degree=degree,
99
+ )
100
+ return corr_score
101
+
102
+ study = optuna.create_study(direction="maximize")
103
+ study.optimize(objective, n_trials=20)
104
+
105
+ print("best params:", study.best_params)
106
+ print("successfully")
107
+ return study.best_params
@@ -0,0 +1,5 @@
1
+ from .SVC_GPU import SVC_reg
2
+
3
+ SVC = SVC_reg
4
+
5
+ __all__ = ["SVC","SVC_reg"]
@@ -0,0 +1,185 @@
1
+ import torch
2
+ from torch import nn
3
+ from torch.nn import Module
4
+ import numpy as np
5
+ class CA_Block(nn.Module):
6
+ def __init__(self, channel, h, w, reduction=16):
7
+ super(CA_Block, self).__init__()
8
+
9
+ self.h = h
10
+ self.w = w
11
+
12
+ self.avg_pool_x = nn.AdaptiveAvgPool2d((h, 1))
13
+ self.avg_pool_y = nn.AdaptiveAvgPool2d((1, w))
14
+
15
+ self.conv_1x1 = nn.Conv2d(in_channels=channel, out_channels=channel//reduction, kernel_size=1, stride=1, bias=False)
16
+
17
+ self.relu = nn.ReLU()
18
+ self.bn = nn.BatchNorm2d(channel//reduction)
19
+
20
+ self.F_h = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
21
+ self.F_w = nn.Conv2d(in_channels=channel//reduction, out_channels=channel, kernel_size=1, stride=1, bias=False)
22
+
23
+ self.sigmoid_h = nn.Sigmoid()
24
+ self.sigmoid_w = nn.Sigmoid()
25
+
26
+ def forward(self, x):
27
+
28
+ x_h = self.avg_pool_x(x).permute(0, 1, 3, 2)
29
+ x_w = self.avg_pool_y(x)
30
+
31
+ x_cat_conv_relu = self.relu(self.conv_1x1(torch.cat((x_h, x_w), 3)))
32
+
33
+ x_cat_conv_split_h, x_cat_conv_split_w = x_cat_conv_relu.split([self.h, self.w], 3)
34
+
35
+ s_h = self.sigmoid_h(self.F_h(x_cat_conv_split_h.permute(0, 1, 3, 2)))
36
+ s_w = self.sigmoid_w(self.F_w(x_cat_conv_split_w))
37
+
38
+ out = x * s_h.expand_as(x) * s_w.expand_as(x)
39
+
40
+ return out
41
+
42
+ class AlexNet(nn.Module):
43
+ def __init__(self):
44
+ super().__init__()
45
+ self.net = nn.Sequential(
46
+
47
+ nn.Conv2d(3,32,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
48
+ nn.BatchNorm2d(32),
49
+ nn.Dropout(0.3),
50
+ nn.ReLU(),
51
+
52
+ CA_Block(32,206,206,reduction=16),
53
+
54
+ nn.Conv2d(32,64,kernel_size=4,padding=1,padding_mode='reflect',stride=2,bias=False),
55
+ nn.BatchNorm2d(64),
56
+ nn.Dropout(0.3),
57
+ nn.ReLU(),
58
+
59
+ nn.Conv2d(64,64,kernel_size=3,padding=1,padding_mode='reflect',stride=2,bias=False),
60
+ nn.BatchNorm2d(64),
61
+ nn.Dropout(0.3),
62
+ nn.ReLU(),
63
+
64
+ nn.Conv2d(64,64,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
65
+ nn.BatchNorm2d(64),
66
+ nn.Dropout(0.3),
67
+ nn.ReLU(),
68
+
69
+ nn.Conv2d(64,128,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
70
+ nn.BatchNorm2d(128),
71
+ nn.Dropout(0.3),
72
+ nn.ReLU(),
73
+
74
+ nn.Conv2d(128,128,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
75
+ nn.BatchNorm2d(128),
76
+ nn.Dropout(0.3),
77
+ nn.ReLU(),
78
+
79
+ nn.Conv2d(128,256,kernel_size=2,stride=2,bias=False),
80
+ nn.BatchNorm2d(256),
81
+ nn.Dropout(0.3),
82
+ nn.ReLU(),
83
+
84
+ nn.Conv2d(256,256,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
85
+ nn.BatchNorm2d(256),
86
+ nn.Dropout(0.3),
87
+ nn.ReLU(),
88
+
89
+ nn.Conv2d(256,512,kernel_size=2,stride=2,bias=False),
90
+ nn.BatchNorm2d(512),
91
+ nn.Dropout(0.3),
92
+ nn.ReLU(),
93
+
94
+ nn.Conv2d(512,512,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
95
+ nn.BatchNorm2d(512),
96
+ nn.Dropout(0.3),
97
+ nn.ReLU(),
98
+
99
+ nn.Conv2d(512,1024,kernel_size=3,padding=1,padding_mode='reflect',stride=2,bias=False),
100
+ nn.BatchNorm2d(1024),
101
+ nn.Dropout(0.3),
102
+ nn.ReLU(),
103
+
104
+ nn.Conv2d(1024,1024,kernel_size=3,padding=1,padding_mode='reflect',stride=1,bias=False),
105
+ nn.BatchNorm2d(1024),
106
+ nn.Dropout(0.3),
107
+ nn.ReLU(),
108
+
109
+ CA_Block(1024,7,7,reduction=16),
110
+
111
+ nn.Flatten(),
112
+ nn.Dropout(0.3),
113
+ nn.ReLU(),
114
+
115
+ # nn.Linear(50176,6400),
116
+ # nn.Dropout(0.4),
117
+ # nn.ReLU(),
118
+
119
+ nn.Linear(50176,1),
120
+ # nn.Sigmoid()
121
+ )
122
+ def forward(self, x):
123
+ x = x.permute(0, 3, 1, 2) # 转为NCHW
124
+ return self.net(x)
125
+
126
+ def train_model(self, train_loader, valid_loader, num_epochs, learning_rate, patience, device):
127
+ optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate, weight_decay=1e-5)
128
+ criterion = nn.MSELoss()
129
+ self.to(device)
130
+
131
+ best_loss = float('inf')
132
+ best_state = None
133
+ trigger_times = 0
134
+
135
+ for epoch in range(num_epochs):
136
+ self.train()
137
+ train_loss = 0.0
138
+ for inputs, labels in train_loader:
139
+ inputs, labels = inputs.to(device), labels.to(device)
140
+ optimizer.zero_grad()
141
+ outputs = self(inputs)
142
+ labels = labels.unsqueeze(1)
143
+ loss = criterion(outputs, labels)
144
+ loss.backward()
145
+ optimizer.step()
146
+ train_loss += loss.item() * inputs.size(0)
147
+
148
+ self.eval()
149
+ valid_loss = 0.0
150
+ with torch.no_grad():
151
+ for inputs, labels in valid_loader:
152
+ inputs, labels = inputs.to(device), labels.to(device)
153
+ outputs = self(inputs)
154
+ labels = labels.unsqueeze(1)
155
+ loss = criterion(outputs, labels)
156
+ valid_loss += loss.item() * inputs.size(0)
157
+
158
+ train_loss /= len(train_loader.dataset)
159
+ valid_loss /= len(valid_loader.dataset)
160
+
161
+ # ---------- Early stopping ----------
162
+ if valid_loss < best_loss:
163
+ best_loss = valid_loss
164
+ best_state = self.state_dict()
165
+ trigger_times = 0
166
+ else:
167
+ trigger_times += 1
168
+ if trigger_times >= patience:
169
+ print(f"Early stopping at epoch {epoch+1}")
170
+ break
171
+
172
+ if best_state is not None:
173
+ self.load_state_dict(best_state)
174
+ return best_loss
175
+
176
+ def predict(self, test_loader):
177
+ self.eval()
178
+ y_pred = []
179
+ with torch.no_grad():
180
+ for inputs, _ in test_loader:
181
+ outputs = self(inputs)
182
+ y_pred.append(outputs.cpu().numpy())
183
+ y_pred = np.concatenate(y_pred, axis=0)
184
+ y_pred = np.squeeze(y_pred)
185
+ return y_pred