gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. gp_agent_tool/compute_dataset_feature.py +67 -0
  2. gp_agent_tool/config.py +65 -0
  3. gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
  4. gp_agent_tool/experience/dataset_summary_info.py +13 -0
  5. gp_agent_tool/experience/experience_info.py +12 -0
  6. gp_agent_tool/experience/get_matched_experience.py +111 -0
  7. gp_agent_tool/llm_client.py +119 -0
  8. gp_agent_tool/logging_utils.py +24 -0
  9. gp_agent_tool/main.py +347 -0
  10. gp_agent_tool/read_agent/__init__.py +46 -0
  11. gp_agent_tool/read_agent/nodes.py +674 -0
  12. gp_agent_tool/read_agent/prompts.py +547 -0
  13. gp_agent_tool/read_agent/python_repl_tool.py +165 -0
  14. gp_agent_tool/read_agent/state.py +101 -0
  15. gp_agent_tool/read_agent/workflow.py +54 -0
  16. gpbench/__init__.py +25 -0
  17. gpbench/_selftest.py +104 -0
  18. gpbench/method_class/BayesA/BayesA_class.py +141 -0
  19. gpbench/method_class/BayesA/__init__.py +5 -0
  20. gpbench/method_class/BayesA/_bayesfromR.py +96 -0
  21. gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
  22. gpbench/method_class/BayesA/bayesAfromR.py +16 -0
  23. gpbench/method_class/BayesB/BayesB_class.py +140 -0
  24. gpbench/method_class/BayesB/__init__.py +5 -0
  25. gpbench/method_class/BayesB/_bayesfromR.py +96 -0
  26. gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
  27. gpbench/method_class/BayesB/bayesBfromR.py +16 -0
  28. gpbench/method_class/BayesC/BayesC_class.py +141 -0
  29. gpbench/method_class/BayesC/__init__.py +4 -0
  30. gpbench/method_class/BayesC/_bayesfromR.py +96 -0
  31. gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
  32. gpbench/method_class/BayesC/bayesCfromR.py +16 -0
  33. gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
  34. gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
  35. gpbench/method_class/CropARNet/__init__.py +5 -0
  36. gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
  37. gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
  38. gpbench/method_class/Cropformer/__init__.py +5 -0
  39. gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
  40. gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
  41. gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
  42. gpbench/method_class/DL_GWAS/__init__.py +5 -0
  43. gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
  44. gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
  45. gpbench/method_class/DNNGP/__init__.py +5 -0
  46. gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
  47. gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
  48. gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
  49. gpbench/method_class/DeepCCR/__init__.py +5 -0
  50. gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
  51. gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
  52. gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
  53. gpbench/method_class/DeepGS/__init__.py +5 -0
  54. gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
  55. gpbench/method_class/EIR/EIR_class.py +276 -0
  56. gpbench/method_class/EIR/EIR_he_class.py +184 -0
  57. gpbench/method_class/EIR/__init__.py +5 -0
  58. gpbench/method_class/EIR/utils/__init__.py +0 -0
  59. gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
  60. gpbench/method_class/EIR/utils/common.py +65 -0
  61. gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
  62. gpbench/method_class/EIR/utils/logging.py +59 -0
  63. gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
  64. gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
  65. gpbench/method_class/EIR/utils/transformer_models.py +546 -0
  66. gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
  67. gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
  68. gpbench/method_class/ElasticNet/__init__.py +5 -0
  69. gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
  70. gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
  71. gpbench/method_class/G2PDeep/__init__.py +5 -0
  72. gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
  73. gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
  74. gpbench/method_class/GBLUP/__init__.py +5 -0
  75. gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
  76. gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
  77. gpbench/method_class/GEFormer/__init__.py +5 -0
  78. gpbench/method_class/GEFormer/gMLP_class.py +357 -0
  79. gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
  80. gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
  81. gpbench/method_class/LightGBM/__init__.py +5 -0
  82. gpbench/method_class/RF/RF_GPU_class.py +165 -0
  83. gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
  84. gpbench/method_class/RF/__init__.py +5 -0
  85. gpbench/method_class/SVC/SVC_GPU.py +181 -0
  86. gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
  87. gpbench/method_class/SVC/__init__.py +5 -0
  88. gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
  89. gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
  90. gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
  91. gpbench/method_class/SoyDNGP/__init__.py +5 -0
  92. gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
  93. gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
  94. gpbench/method_class/XGBoost/__init__.py +5 -0
  95. gpbench/method_class/__init__.py +52 -0
  96. gpbench/method_class/rrBLUP/__init__.py +5 -0
  97. gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
  98. gpbench/method_reg/BayesA/BayesA.py +116 -0
  99. gpbench/method_reg/BayesA/__init__.py +5 -0
  100. gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
  101. gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
  102. gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
  103. gpbench/method_reg/BayesB/BayesB.py +117 -0
  104. gpbench/method_reg/BayesB/__init__.py +5 -0
  105. gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
  106. gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
  107. gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
  108. gpbench/method_reg/BayesC/BayesC.py +115 -0
  109. gpbench/method_reg/BayesC/__init__.py +5 -0
  110. gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
  111. gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
  112. gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
  113. gpbench/method_reg/CropARNet/CropARNet.py +159 -0
  114. gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
  115. gpbench/method_reg/CropARNet/__init__.py +5 -0
  116. gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
  117. gpbench/method_reg/Cropformer/Cropformer.py +313 -0
  118. gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
  119. gpbench/method_reg/Cropformer/__init__.py +5 -0
  120. gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
  121. gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
  122. gpbench/method_reg/DL_GWAS/__init__.py +5 -0
  123. gpbench/method_reg/DNNGP/DNNGP.py +157 -0
  124. gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
  125. gpbench/method_reg/DNNGP/__init__.py +5 -0
  126. gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
  127. gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
  128. gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
  129. gpbench/method_reg/DeepCCR/__init__.py +5 -0
  130. gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
  131. gpbench/method_reg/DeepGS/DeepGS.py +165 -0
  132. gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
  133. gpbench/method_reg/DeepGS/__init__.py +5 -0
  134. gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
  135. gpbench/method_reg/EIR/EIR.py +258 -0
  136. gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
  137. gpbench/method_reg/EIR/__init__.py +5 -0
  138. gpbench/method_reg/EIR/utils/__init__.py +0 -0
  139. gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
  140. gpbench/method_reg/EIR/utils/common.py +65 -0
  141. gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
  142. gpbench/method_reg/EIR/utils/logging.py +59 -0
  143. gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
  144. gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
  145. gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
  146. gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
  147. gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
  148. gpbench/method_reg/ElasticNet/__init__.py +5 -0
  149. gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
  150. gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
  151. gpbench/method_reg/G2PDeep/__init__.py +5 -0
  152. gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
  153. gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
  154. gpbench/method_reg/GBLUP/__init__.py +5 -0
  155. gpbench/method_reg/GEFormer/GEFormer.py +164 -0
  156. gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
  157. gpbench/method_reg/GEFormer/__init__.py +5 -0
  158. gpbench/method_reg/GEFormer/gMLP.py +341 -0
  159. gpbench/method_reg/LightGBM/LightGBM.py +237 -0
  160. gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
  161. gpbench/method_reg/LightGBM/__init__.py +5 -0
  162. gpbench/method_reg/MVP/MVP.py +182 -0
  163. gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
  164. gpbench/method_reg/MVP/__init__.py +5 -0
  165. gpbench/method_reg/MVP/base_MVP.py +113 -0
  166. gpbench/method_reg/RF/RF_GPU.py +174 -0
  167. gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
  168. gpbench/method_reg/RF/__init__.py +5 -0
  169. gpbench/method_reg/SVC/SVC_GPU.py +194 -0
  170. gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
  171. gpbench/method_reg/SVC/__init__.py +5 -0
  172. gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
  173. gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
  174. gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
  175. gpbench/method_reg/SoyDNGP/__init__.py +5 -0
  176. gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
  177. gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
  178. gpbench/method_reg/XGBoost/__init__.py +5 -0
  179. gpbench/method_reg/__init__.py +55 -0
  180. gpbench/method_reg/rrBLUP/__init__.py +5 -0
  181. gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
  182. gpbench-1.0.0.dist-info/METADATA +379 -0
  183. gpbench-1.0.0.dist-info/RECORD +188 -0
  184. gpbench-1.0.0.dist-info/WHEEL +5 -0
  185. gpbench-1.0.0.dist-info/entry_points.txt +2 -0
  186. gpbench-1.0.0.dist-info/top_level.txt +3 -0
  187. tests/test_import.py +80 -0
  188. tests/test_method.py +232 -0
@@ -0,0 +1,250 @@
1
+ import time
2
+ import torch
3
+ import numpy as np
4
+ import torch.nn as nn
5
+ import random
6
+ import torch.optim as optim
7
+ from torch.utils.data import DataLoader, TensorDataset
8
+ from sklearn.preprocessing import StandardScaler
9
+ from lightning.pytorch import LightningModule
10
+ import optuna
11
+
12
+ from torch.optim import Adam
13
+ from torch.nn import MSELoss
14
+ from sklearn.model_selection import KFold
15
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
16
+
17
+ class LayerNorm(nn.Module):
18
+ def __init__(self, hidden_size, eps=1e-12):
19
+ super(LayerNorm, self).__init__()
20
+ self.weight = nn.Parameter(torch.ones(hidden_size))
21
+ self.bias = nn.Parameter(torch.zeros(hidden_size))
22
+ self.variance_epsilon = eps
23
+
24
+ def forward(self, x):
25
+ u = x.mean(-1, keepdim=True)
26
+ s = (x - u).pow(2).mean(-1, keepdim=True)
27
+ x = (x - u) / torch.sqrt(s + self.variance_epsilon)
28
+ return self.weight * x + self.bias
29
+
30
+ class SelfAttention(LightningModule):
31
+ def __init__(self, num_attention_heads, input_size, hidden_size, output_dim=1, kernel_size=3,
32
+ hidden_dropout_prob=0.5, attention_probs_dropout_prob=0.5, learning_rate=0.001):
33
+ super(SelfAttention, self).__init__()
34
+ self.num_attention_heads = num_attention_heads
35
+ self.attention_head_size = int(hidden_size / num_attention_heads)
36
+ self.all_head_size = hidden_size
37
+
38
+ self.query = torch.nn.Linear(input_size, self.all_head_size)
39
+ self.key = torch.nn.Linear(input_size, self.all_head_size)
40
+ self.value = torch.nn.Linear(input_size, self.all_head_size)
41
+
42
+ self.attn_dropout = torch.nn.Dropout(attention_probs_dropout_prob)
43
+ self.out_dropout = torch.nn.Dropout(hidden_dropout_prob)
44
+ self.dense = torch.nn.Linear(hidden_size, input_size)
45
+ self.LayerNorm = torch.nn.LayerNorm(input_size, eps=1e-12)
46
+ self.relu = torch.nn.ReLU()
47
+ self.out = torch.nn.Linear(input_size, output_dim)
48
+ self.cnn = torch.nn.Conv1d(1, 1, kernel_size, stride=1, padding=1)
49
+
50
+ self.learning_rate = learning_rate
51
+ self.loss_fn = MSELoss()
52
+
53
+ def forward(self, input_tensor):
54
+ input_tensor = input_tensor.to(self.device)
55
+ self.cnn = self.cnn.to(self.device)
56
+
57
+ cnn_hidden = self.cnn(input_tensor.view(input_tensor.size(0), 1, -1))
58
+ input_tensor = cnn_hidden
59
+ mixed_query_layer = self.query(input_tensor)
60
+ mixed_key_layer = self.key(input_tensor)
61
+ mixed_value_layer = self.value(input_tensor)
62
+
63
+ query_layer = mixed_query_layer
64
+ key_layer = mixed_key_layer
65
+ value_layer = mixed_value_layer
66
+
67
+ attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
68
+ attention_scores = attention_scores / np.sqrt(self.attention_head_size)
69
+ attention_probs = torch.nn.Softmax(dim=-1)(attention_scores)
70
+ attention_probs = self.attn_dropout(attention_probs)
71
+
72
+ context_layer = torch.matmul(attention_probs, value_layer)
73
+ hidden_states = self.dense(context_layer)
74
+ hidden_states = self.out_dropout(hidden_states)
75
+ hidden_states = self.LayerNorm(hidden_states + input_tensor)
76
+ output = self.out(self.relu(hidden_states.view(hidden_states.size(0), -1)))
77
+ return output
78
+
79
+ def training_step(self, batch, batch_idx):
80
+ x, y = batch
81
+ y_pred = self(x)
82
+ loss = self.loss_fn(y_pred, y)
83
+ return loss
84
+
85
+ def validation_step(self, batch, batch_idx):
86
+ x, y = batch
87
+ y_pred = self(x)
88
+ val_loss = self.loss_fn(y_pred, y)
89
+ return val_loss
90
+
91
+ def configure_optimizers(self):
92
+ return Adam(self.parameters(), lr=self.learning_rate)
93
+
94
+ class EarlyStopping:
95
+ def __init__(self, patience=10, delta=0):
96
+ self.patience = patience
97
+ self.delta = delta
98
+ self.best_score = None
99
+ self.counter = 0
100
+ self.early_stop = False
101
+
102
+ def __call__(self, score):
103
+ if self.best_score is None:
104
+ self.best_score = score
105
+ elif score < self.best_score + self.delta:
106
+ self.counter += 1
107
+ if self.counter >= self.patience:
108
+ self.early_stop = True
109
+ else:
110
+ self.best_score = score
111
+ self.counter = 0
112
+
113
+
114
+ def run_nested_cv_with_early_stopping(data, label, outer_cv, learning_rate, num_heads, dropout_prob, batch_size, hidden_dim,
115
+ output_dim, kernel_size, patience, DEVICE):
116
+ best_corr_coefs = []
117
+ best_maes = []
118
+ best_r2s = []
119
+ best_mses = []
120
+
121
+ time_star = time.time()
122
+ for fold, (train_idx, test_idx) in enumerate(outer_cv.split(data)):
123
+ x_train, x_test = data[train_idx], data[test_idx]
124
+ y_train, y_test = label[train_idx], label[test_idx]
125
+
126
+ num_attention_heads = num_heads
127
+ attention_probs_dropout_prob = dropout_prob
128
+ hidden_dropout_prob = 0.5
129
+
130
+ model = SelfAttention(num_attention_heads, x_train.shape[1], hidden_dim, output_dim,
131
+ hidden_dropout_prob=hidden_dropout_prob, kernel_size=kernel_size,
132
+ attention_probs_dropout_prob=attention_probs_dropout_prob).to(DEVICE)
133
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
134
+ loss_function = torch.nn.MSELoss()
135
+ scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=10)
136
+
137
+ scaler = StandardScaler()
138
+ x_train = scaler.fit_transform(x_train)
139
+ x_test = scaler.transform(x_test)
140
+
141
+ x_train_tensor = torch.from_numpy(x_train).float().to(DEVICE)
142
+ y_train_tensor = torch.from_numpy(y_train).float().to(DEVICE)
143
+ x_test_tensor = torch.from_numpy(x_test).float().to(DEVICE)
144
+ y_test_tensor = torch.from_numpy(y_test).float().to(DEVICE)
145
+
146
+ train_data = TensorDataset(x_train_tensor, y_train_tensor)
147
+ test_data = TensorDataset(x_test_tensor, y_test_tensor)
148
+
149
+ train_loader = DataLoader(train_data, batch_size, shuffle=True)
150
+ test_loader = DataLoader(test_data, batch_size, shuffle=False)
151
+
152
+ early_stopping = EarlyStopping(patience=patience)
153
+ best_corr_coef = -float('inf')
154
+ best_mae = float('inf')
155
+ best_mse = float('inf')
156
+ best_r2 = -float('inf')
157
+ for epoch in range(100):
158
+ model.train()
159
+ for x_batch, y_batch in train_loader:
160
+ optimizer.zero_grad()
161
+ y_pred = model(x_batch)
162
+ loss = loss_function(y_pred, y_batch.reshape(-1, 1))
163
+ loss.backward()
164
+ optimizer.step()
165
+
166
+ model.eval()
167
+ y_test_preds, y_test_trues = [], []
168
+
169
+ with torch.no_grad():
170
+ for x_batch, y_batch in test_loader:
171
+ y_test_pred = model(x_batch)
172
+ y_test_preds.extend(y_test_pred.cpu().numpy().reshape(-1).tolist())
173
+ y_test_trues.extend(y_batch.cpu().numpy().reshape(-1).tolist())
174
+
175
+ corr_coef = np.corrcoef(y_test_preds, y_test_trues)[0, 1]
176
+ mae = mean_absolute_error(np.array(y_test_trues), np.array(y_test_preds))
177
+ mse = mean_squared_error(np.array(y_test_trues), np.array(y_test_preds))
178
+ r2 = r2_score(np.array(y_test_trues), np.array(y_test_preds))
179
+ scheduler.step(corr_coef)
180
+
181
+ if corr_coef > best_corr_coef:
182
+ best_mae = mae
183
+ best_corr_coef = corr_coef
184
+ best_mse = mse
185
+ best_r2 = r2
186
+
187
+ early_stopping(corr_coef)
188
+ if early_stopping.early_stop:
189
+ print(f"Early stopping at epoch {epoch + 1}")
190
+ break
191
+
192
+ best_corr_coefs.append(best_corr_coef)
193
+ best_maes.append(best_mae)
194
+ best_mses.append(best_mse)
195
+ best_r2s.append(best_r2)
196
+ print(f'Fold {fold + 1}: MAE={best_mae:.4f}, MSE={best_mse:.4f}, R2={best_r2:.4f}, Corr={best_corr_coef:.4f}')
197
+
198
+ print("==== Final Results ====")
199
+ print(f"MAE: {np.mean(best_maes):.4f} ± {np.std(best_maes):.4f}")
200
+ print(f"MSE: {np.mean(best_mses):.4f} ± {np.std(best_mses):.4f}")
201
+ print(f"R2 : {np.mean(best_r2s):.4f} ± {np.std(best_r2s):.4f}")
202
+ print(f"Corr: {np.mean(best_corr_coefs):.4f} ± {np.std(best_corr_coefs):.4f}")
203
+
204
+ print(f"Time: {time.time() - time_star:.2f}s")
205
+ return best_corr_coefs
206
+
207
+
208
+ def set_seed(seed=42):
209
+ random.seed(seed)
210
+ np.random.seed(seed)
211
+ torch.manual_seed(seed)
212
+ torch.cuda.manual_seed_all(seed)
213
+ torch.backends.cudnn.deterministic = True
214
+ torch.backends.cudnn.benchmark = False
215
+
216
+ def Hyperparameter(X, label):
217
+ set_seed(42)
218
+ torch.cuda.empty_cache()
219
+ device = torch.device("cuda:0")
220
+
221
+ def objective(trial):
222
+ lr = trial.suggest_loguniform("learning_rate", 1e-4, 1e-1)
223
+ heads = trial.suggest_int("heads", 1, 8, step=1)
224
+ dropout = trial.suggest_float("dropout", 0.1, 0.9, step=0.1)
225
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
226
+
227
+ outer_cv = KFold(n_splits=10, shuffle=True, random_state=42)
228
+
229
+ corr_scores = run_nested_cv_with_early_stopping(
230
+ data=X,
231
+ label=label,
232
+ outer_cv=outer_cv,
233
+ learning_rate=lr,
234
+ num_heads=heads,
235
+ dropout_prob=dropout,
236
+ batch_size=batch_size,
237
+ hidden_dim=64,
238
+ output_dim=1,
239
+ kernel_size=3,
240
+ patience=5,
241
+ DEVICE=device,
242
+ )
243
+ return np.mean(corr_scores)
244
+
245
+ study = optuna.create_study(direction="maximize")
246
+ study.optimize(objective, n_trials=20)
247
+
248
+ print("best params:", study.best_params)
249
+ print("successfully")
250
+ return study.best_params
@@ -0,0 +1,5 @@
1
+ from .Cropformer import Cropformer_reg
2
+
3
+ Cropformer = Cropformer_reg
4
+
5
+ __all__ = ["Cropformer","Cropformer_reg"]
@@ -0,0 +1,186 @@
1
+ import os
2
+ import time
3
+ import psutil
4
+ import argparse
5
+ import random
6
+ import torch
7
+ import numpy as np
8
+ import pandas as pd
9
+ import tensorflow as tf
10
+ from . import DL_GWAS_Hyperparameters
11
+ import keras
12
+ import pynvml
13
+ from keras import layers
14
+ from keras import regularizers
15
+ from keras.models import Model
16
+ from keras.layers import *
17
+ from scipy.stats import pearsonr
18
+ from keras.callbacks import EarlyStopping
19
+ from sklearn.model_selection import KFold, train_test_split
20
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
21
+
22
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
23
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
24
+ gpus = tf.config.experimental.list_physical_devices('GPU')
25
+ if gpus:
26
+ for gpu in gpus:
27
+ tf.config.experimental.set_memory_growth(gpu, True)
28
+
29
+ def parse_args():
30
+ parser = argparse.ArgumentParser(description="Argument parser")
31
+ parser.add_argument('--methods', type=str, default='DL_GWAS/', help='Random seed')
32
+ parser.add_argument('--species', type=str, default='')
33
+ parser.add_argument('--phe', type=str, default='', help='Dataset name')
34
+ parser.add_argument('--data_dir', type=str, default='../../data/')
35
+ parser.add_argument('--result_dir', type=str, default='result/')
36
+
37
+ parser.add_argument('--epochs', type=int, default=1000, help='Number of training rounds')
38
+ parser.add_argument('--batch_size', type=int, default=128, help='Batch size')
39
+ parser.add_argument('--learning_rate', type=float, default=0.01, help='Learning rate')
40
+ parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
41
+ args = parser.parse_args()
42
+ return args
43
+
44
+ def indices_to_one_hot(data,nb_classes):
45
+ targets = np.array(data).reshape(-1)
46
+ return np.eye(nb_classes)[targets]
47
+
48
+ def load_data(args):
49
+ xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
50
+ yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
51
+ names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
52
+
53
+ nsample = xData.shape[0]
54
+ nsnp = xData.shape[1]
55
+ print("Number of samples: ", nsample)
56
+ print("Number of SNPs: ", nsnp)
57
+ xData = xData.astype(int)
58
+ arr = np.empty(shape=(nsample, nsnp, 4))
59
+ xData[xData == -9] = 0
60
+ for i in range(0, nsample):
61
+ arr[i] = indices_to_one_hot(pd.to_numeric(xData[i], downcast='signed'), 4)
62
+
63
+ return arr, yData, nsample, nsnp, names
64
+
65
+ def set_seed(seed=42):
66
+ random.seed(seed)
67
+ np.random.seed(seed)
68
+ torch.manual_seed(seed)
69
+ torch.cuda.manual_seed_all(seed)
70
+ torch.backends.cudnn.deterministic = True
71
+ torch.backends.cudnn.benchmark = False
72
+
73
+ def get_gpu_mem_by_pid(pid):
74
+ procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
75
+ for p in procs:
76
+ if p.pid == pid:
77
+ return p.usedGpuMemory / 1024**2
78
+ return 0.0
79
+
80
+ def resnet(args, nsnp):
81
+
82
+ inputs = Input(shape=(nsnp,4))
83
+
84
+ x = Conv1D(10,4,padding='same',activation = 'linear',kernel_initializer = 'TruncatedNormal', kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(inputs)
85
+ x = Conv1D(10,20,padding='same',activation = 'linear', kernel_initializer = 'TruncatedNormal',kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(x)
86
+ x = Dropout(0.75)(x)
87
+ shortcut = Conv1D(10,4,padding='same',activation = 'linear',kernel_initializer = 'TruncatedNormal', kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(inputs)
88
+ x = layers.add([shortcut,x])
89
+ x = Conv1D(10,4,padding='same',activation = 'linear',kernel_initializer = 'TruncatedNormal', kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(x)
90
+ x = Dropout(0.75)(x)
91
+ x = Flatten()(x)
92
+ x = Dropout(0.75)(x)
93
+ outputs = Dense(1,activation = 'linear',bias_regularizer = regularizers.l2(0.01),kernel_initializer = 'TruncatedNormal',name = 'out')(x)
94
+ model = Model(inputs = inputs,outputs = outputs)
95
+ model.compile(loss='mean_squared_error',optimizer=keras.optimizers.Adam(learning_rate=args.learning_rate),metrics=['mae'])
96
+ return model
97
+
98
+ def isru(x):
99
+ return x / (tf.sqrt(1 + 0.02 * tf.square(x)))
100
+
101
+ def run_nested_cv(args, data, label, nsnp):
102
+ result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
103
+ os.makedirs(result_dir, exist_ok=True)
104
+ print("Starting 10-fold cross-validation...")
105
+ kf = KFold(n_splits=10, shuffle=True, random_state=42)
106
+
107
+ all_mse, all_mae, all_r2, all_pcc = [], [], [], []
108
+ time_star = time.time()
109
+ early_stopping = EarlyStopping(monitor='val_loss', patience=args.patience)
110
+ for fold, (train_index, test_index) in enumerate(kf.split(data)):
111
+ print(f"Running fold {fold}...")
112
+ process = psutil.Process(os.getpid())
113
+ fold_start_time = time.time()
114
+
115
+ X_train, X_test = data[train_index], data[test_index]
116
+ y_train, y_test = label[train_index], label[test_index]
117
+
118
+ X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
119
+
120
+ model = resnet(args, nsnp = nsnp)
121
+ model.fit(X_train_sub, y_train_sub, batch_size=args.batch_size, epochs=args.epochs, validation_data=(X_valid, y_valid),callbacks=[early_stopping],shuffle= True, verbose=0)
122
+ y_pred = model.predict(X_test)
123
+
124
+ y_pred = np.asarray(y_pred).flatten().astype(np.float64)
125
+ y_test = np.asarray(y_test).flatten().astype(np.float64)
126
+
127
+ mse = mean_squared_error(y_test, y_pred)
128
+ r2 = r2_score(y_test, y_pred)
129
+ mae = mean_absolute_error(y_test, y_pred)
130
+ pcc, _ = pearsonr(y_test, y_pred)
131
+
132
+ all_mse.append(mse)
133
+ all_r2.append(r2)
134
+ all_mae.append(mae)
135
+ all_pcc.append(pcc)
136
+
137
+ fold_time = time.time() - fold_start_time
138
+ fold_gpu_mem = get_gpu_mem_by_pid(os.getpid())
139
+ fold_cpu_mem = process.memory_info().rss / 1024**2
140
+ print(f'Fold {fold}: Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
141
+ f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
142
+
143
+ results_df = pd.DataFrame({'Y_test': y_test, 'Y_pred': y_pred})
144
+ results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
145
+
146
+ print("\n===== Cross-validation summary =====")
147
+ print(f"Average PCC: {np.mean(all_pcc):.4f} ± {np.std(all_pcc):.4f}")
148
+ print(f"Average MAE: {np.mean(all_mae):.4f} ± {np.std(all_mae):.4f}")
149
+ print(f"Average MSE: {np.mean(all_mse):.4f} ± {np.std(all_mse):.4f}")
150
+ print(f"Average R2 : {np.mean(all_r2):.4f} ± {np.std(all_r2):.4f}")
151
+ print(f"Time: {time.time() - time_star:.2f}s")
152
+
153
+
154
+ def DL_GWAS_reg():
155
+ set_seed(42)
156
+ pynvml.nvmlInit()
157
+ handle = pynvml.nvmlDeviceGetHandleByIndex(0)
158
+ args = parse_args()
159
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
160
+ all_species =['Cotton/']
161
+
162
+ for i in range(len(all_species)):
163
+ args.species = all_species[i]
164
+ args.device = device
165
+ X, Y, nsamples, nsnp, names = load_data(args)
166
+ for j in range(len(names)):
167
+ args.phe = names[j]
168
+ print("starting run " + args.methods + args.species + args.phe)
169
+ label = Y[:, j]
170
+ label = np.nan_to_num(label, nan=np.nanmean(label))
171
+ best_params = DL_GWAS_Hyperparameters.Hyperparameter(X, label, nsnp)
172
+ args.learning_rate = best_params['learning_rate']
173
+ args.patience = best_params['patience']
174
+ args.batch_size = best_params['batch_size']
175
+ start_time = time.time()
176
+
177
+ process = psutil.Process(os.getpid())
178
+ run_nested_cv(args, data=X, label=label, nsnp = nsnp)
179
+
180
+ elapsed_time = time.time() - start_time
181
+ print(f"running time: {elapsed_time:.2f} s")
182
+ print("successfully")
183
+
184
+
185
+ if __name__ == "__main__":
186
+ DL_GWAS_reg()
@@ -0,0 +1,125 @@
1
+ import os
2
+ import time
3
+ import psutil
4
+ import random
5
+ import optuna
6
+ import torch
7
+ import numpy as np
8
+ import tensorflow as tf
9
+ import keras
10
+ from keras import layers
11
+ from keras import regularizers
12
+ from keras.models import Model
13
+ from keras.layers import *
14
+ from scipy.stats import pearsonr
15
+ from keras.callbacks import EarlyStopping
16
+ from sklearn.model_selection import KFold, train_test_split
17
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
18
+ from optuna.exceptions import TrialPruned
19
+
20
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
21
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
22
+ gpus = tf.config.experimental.list_physical_devices('GPU')
23
+ if gpus:
24
+ for gpu in gpus:
25
+ tf.config.experimental.set_memory_growth(gpu, True)
26
+
27
+ def resnet(nsnp, learning_rate):
28
+ inputs = Input(shape=(nsnp, 4))
29
+ x = Conv1D(10,4,padding='same',activation = 'linear',kernel_initializer = 'TruncatedNormal', kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(inputs)
30
+ x = Conv1D(10,20,padding='same',activation = 'linear', kernel_initializer = 'TruncatedNormal',kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(x)
31
+ x = Dropout(0.75)(x)
32
+ shortcut = Conv1D(10,4,padding='same',activation = 'linear',kernel_initializer = 'TruncatedNormal', kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(inputs)
33
+ x = layers.add([shortcut,x])
34
+ x = Conv1D(10,4,padding='same',activation = 'linear',kernel_initializer = 'TruncatedNormal', kernel_regularizer=regularizers.l2(0.1),bias_regularizer = regularizers.l2(0.01))(x)
35
+ x = Dropout(0.75)(x)
36
+ x = Flatten()(x)
37
+ x = Dropout(0.75)(x)
38
+ outputs = Dense(1,activation = 'linear', bias_regularizer = regularizers.l2(0.01),kernel_initializer = 'TruncatedNormal',name = 'out')(x)
39
+
40
+ model = Model(inputs = inputs,outputs = outputs)
41
+ model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(learning_rate=learning_rate),metrics=['mae'])
42
+
43
+ return model
44
+
45
+ def isru(x):
46
+ return x / (tf.sqrt(1 + 0.02 * tf.square(x)))
47
+
48
+ def run_nested_cv_with_early_stopping(data, label, nsnp, learning_rate, batch_size, patience):
49
+ print("Starting 10-fold cross-validation...")
50
+ kf = KFold(n_splits=10, shuffle=True, random_state=42)
51
+
52
+ all_mse, all_mae, all_r2, all_pcc = [], [], [], []
53
+ time_star = time.time()
54
+ early_stopping = EarlyStopping(monitor='val_loss', patience=patience)
55
+ for fold, (train_index, test_index) in enumerate(kf.split(data)):
56
+ print(f"Running fold {fold}...")
57
+ process = psutil.Process(os.getpid())
58
+ fold_start_time = time.time()
59
+
60
+ X_train, X_test = data[train_index], data[test_index]
61
+ y_train, y_test = label[train_index], label[test_index]
62
+
63
+ X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
64
+
65
+ model = resnet(nsnp = nsnp, learning_rate = learning_rate)
66
+ model.fit(X_train_sub, y_train_sub, batch_size=batch_size, epochs=1000, validation_data=(X_valid, y_valid),callbacks=[early_stopping],shuffle= True, verbose=0)
67
+ y_pred = model.predict(X_test)
68
+
69
+ y_pred = np.asarray(y_pred).flatten().astype(np.float64)
70
+ y_test = np.asarray(y_test).flatten().astype(np.float64)
71
+
72
+ mse = mean_squared_error(y_test, y_pred)
73
+ r2 = r2_score(y_test, y_pred)
74
+ mae = mean_absolute_error(y_test, y_pred)
75
+ pcc, _ = pearsonr(y_test, y_pred)
76
+
77
+ if np.isnan(pcc):
78
+ print(f"Fold {fold} resulted in NaN PCC, pruning the trial...")
79
+ raise TrialPruned()
80
+
81
+ all_mse.append(mse)
82
+ all_r2.append(r2)
83
+ all_mae.append(mae)
84
+ all_pcc.append(pcc)
85
+
86
+ fold_time = time.time() - fold_start_time
87
+ fold_cpu_mem = process.memory_info().rss / 1024**2
88
+ print(f'Fold {fold}: Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
89
+ f'CPU={fold_cpu_mem:.2f}MB')
90
+ return np.mean(all_pcc) if all_pcc else 0.0
91
+
92
+ def set_seed(seed=42):
93
+ random.seed(seed)
94
+ np.random.seed(seed)
95
+ torch.manual_seed(seed)
96
+ if torch.cuda.is_available():
97
+ torch.cuda.manual_seed_all(seed)
98
+ torch.backends.cudnn.deterministic = True
99
+ torch.backends.cudnn.benchmark = False
100
+
101
+ def Hyperparameter(data, label, nsnp):
102
+ set_seed(42)
103
+ def objective(trial):
104
+ learning_rate = trial.suggest_float("learning_rate", 1e-4, 0.1)
105
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
106
+ patience = trial.suggest_int("patience", 5, 10)
107
+ try:
108
+ corr_score = run_nested_cv_with_early_stopping(
109
+ data=data,
110
+ label=label,
111
+ nsnp=nsnp,
112
+ learning_rate=learning_rate,
113
+ batch_size=batch_size,
114
+ patience=patience
115
+ )
116
+ except TrialPruned:
117
+ return float("-inf")
118
+ return corr_score
119
+
120
+ study = optuna.create_study(direction="maximize")
121
+ study.optimize(objective, n_trials=20)
122
+
123
+ print("best params:", study.best_params)
124
+ print("successfully")
125
+ return study.best_params
@@ -0,0 +1,5 @@
1
+ from .DL_GWAS import DL_GWAS_reg
2
+
3
+ DL_GWAS = DL_GWAS_reg
4
+
5
+ __all__ = ["DL_GWAS","DL_GWAS_reg"]