gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. gp_agent_tool/compute_dataset_feature.py +67 -0
  2. gp_agent_tool/config.py +65 -0
  3. gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
  4. gp_agent_tool/experience/dataset_summary_info.py +13 -0
  5. gp_agent_tool/experience/experience_info.py +12 -0
  6. gp_agent_tool/experience/get_matched_experience.py +111 -0
  7. gp_agent_tool/llm_client.py +119 -0
  8. gp_agent_tool/logging_utils.py +24 -0
  9. gp_agent_tool/main.py +347 -0
  10. gp_agent_tool/read_agent/__init__.py +46 -0
  11. gp_agent_tool/read_agent/nodes.py +674 -0
  12. gp_agent_tool/read_agent/prompts.py +547 -0
  13. gp_agent_tool/read_agent/python_repl_tool.py +165 -0
  14. gp_agent_tool/read_agent/state.py +101 -0
  15. gp_agent_tool/read_agent/workflow.py +54 -0
  16. gpbench/__init__.py +25 -0
  17. gpbench/_selftest.py +104 -0
  18. gpbench/method_class/BayesA/BayesA_class.py +141 -0
  19. gpbench/method_class/BayesA/__init__.py +5 -0
  20. gpbench/method_class/BayesA/_bayesfromR.py +96 -0
  21. gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
  22. gpbench/method_class/BayesA/bayesAfromR.py +16 -0
  23. gpbench/method_class/BayesB/BayesB_class.py +140 -0
  24. gpbench/method_class/BayesB/__init__.py +5 -0
  25. gpbench/method_class/BayesB/_bayesfromR.py +96 -0
  26. gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
  27. gpbench/method_class/BayesB/bayesBfromR.py +16 -0
  28. gpbench/method_class/BayesC/BayesC_class.py +141 -0
  29. gpbench/method_class/BayesC/__init__.py +4 -0
  30. gpbench/method_class/BayesC/_bayesfromR.py +96 -0
  31. gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
  32. gpbench/method_class/BayesC/bayesCfromR.py +16 -0
  33. gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
  34. gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
  35. gpbench/method_class/CropARNet/__init__.py +5 -0
  36. gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
  37. gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
  38. gpbench/method_class/Cropformer/__init__.py +5 -0
  39. gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
  40. gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
  41. gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
  42. gpbench/method_class/DL_GWAS/__init__.py +5 -0
  43. gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
  44. gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
  45. gpbench/method_class/DNNGP/__init__.py +5 -0
  46. gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
  47. gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
  48. gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
  49. gpbench/method_class/DeepCCR/__init__.py +5 -0
  50. gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
  51. gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
  52. gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
  53. gpbench/method_class/DeepGS/__init__.py +5 -0
  54. gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
  55. gpbench/method_class/EIR/EIR_class.py +276 -0
  56. gpbench/method_class/EIR/EIR_he_class.py +184 -0
  57. gpbench/method_class/EIR/__init__.py +5 -0
  58. gpbench/method_class/EIR/utils/__init__.py +0 -0
  59. gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
  60. gpbench/method_class/EIR/utils/common.py +65 -0
  61. gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
  62. gpbench/method_class/EIR/utils/logging.py +59 -0
  63. gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
  64. gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
  65. gpbench/method_class/EIR/utils/transformer_models.py +546 -0
  66. gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
  67. gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
  68. gpbench/method_class/ElasticNet/__init__.py +5 -0
  69. gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
  70. gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
  71. gpbench/method_class/G2PDeep/__init__.py +5 -0
  72. gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
  73. gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
  74. gpbench/method_class/GBLUP/__init__.py +5 -0
  75. gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
  76. gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
  77. gpbench/method_class/GEFormer/__init__.py +5 -0
  78. gpbench/method_class/GEFormer/gMLP_class.py +357 -0
  79. gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
  80. gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
  81. gpbench/method_class/LightGBM/__init__.py +5 -0
  82. gpbench/method_class/RF/RF_GPU_class.py +165 -0
  83. gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
  84. gpbench/method_class/RF/__init__.py +5 -0
  85. gpbench/method_class/SVC/SVC_GPU.py +181 -0
  86. gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
  87. gpbench/method_class/SVC/__init__.py +5 -0
  88. gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
  89. gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
  90. gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
  91. gpbench/method_class/SoyDNGP/__init__.py +5 -0
  92. gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
  93. gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
  94. gpbench/method_class/XGBoost/__init__.py +5 -0
  95. gpbench/method_class/__init__.py +52 -0
  96. gpbench/method_class/rrBLUP/__init__.py +5 -0
  97. gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
  98. gpbench/method_reg/BayesA/BayesA.py +116 -0
  99. gpbench/method_reg/BayesA/__init__.py +5 -0
  100. gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
  101. gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
  102. gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
  103. gpbench/method_reg/BayesB/BayesB.py +117 -0
  104. gpbench/method_reg/BayesB/__init__.py +5 -0
  105. gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
  106. gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
  107. gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
  108. gpbench/method_reg/BayesC/BayesC.py +115 -0
  109. gpbench/method_reg/BayesC/__init__.py +5 -0
  110. gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
  111. gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
  112. gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
  113. gpbench/method_reg/CropARNet/CropARNet.py +159 -0
  114. gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
  115. gpbench/method_reg/CropARNet/__init__.py +5 -0
  116. gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
  117. gpbench/method_reg/Cropformer/Cropformer.py +313 -0
  118. gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
  119. gpbench/method_reg/Cropformer/__init__.py +5 -0
  120. gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
  121. gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
  122. gpbench/method_reg/DL_GWAS/__init__.py +5 -0
  123. gpbench/method_reg/DNNGP/DNNGP.py +157 -0
  124. gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
  125. gpbench/method_reg/DNNGP/__init__.py +5 -0
  126. gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
  127. gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
  128. gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
  129. gpbench/method_reg/DeepCCR/__init__.py +5 -0
  130. gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
  131. gpbench/method_reg/DeepGS/DeepGS.py +165 -0
  132. gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
  133. gpbench/method_reg/DeepGS/__init__.py +5 -0
  134. gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
  135. gpbench/method_reg/EIR/EIR.py +258 -0
  136. gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
  137. gpbench/method_reg/EIR/__init__.py +5 -0
  138. gpbench/method_reg/EIR/utils/__init__.py +0 -0
  139. gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
  140. gpbench/method_reg/EIR/utils/common.py +65 -0
  141. gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
  142. gpbench/method_reg/EIR/utils/logging.py +59 -0
  143. gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
  144. gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
  145. gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
  146. gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
  147. gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
  148. gpbench/method_reg/ElasticNet/__init__.py +5 -0
  149. gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
  150. gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
  151. gpbench/method_reg/G2PDeep/__init__.py +5 -0
  152. gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
  153. gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
  154. gpbench/method_reg/GBLUP/__init__.py +5 -0
  155. gpbench/method_reg/GEFormer/GEFormer.py +164 -0
  156. gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
  157. gpbench/method_reg/GEFormer/__init__.py +5 -0
  158. gpbench/method_reg/GEFormer/gMLP.py +341 -0
  159. gpbench/method_reg/LightGBM/LightGBM.py +237 -0
  160. gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
  161. gpbench/method_reg/LightGBM/__init__.py +5 -0
  162. gpbench/method_reg/MVP/MVP.py +182 -0
  163. gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
  164. gpbench/method_reg/MVP/__init__.py +5 -0
  165. gpbench/method_reg/MVP/base_MVP.py +113 -0
  166. gpbench/method_reg/RF/RF_GPU.py +174 -0
  167. gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
  168. gpbench/method_reg/RF/__init__.py +5 -0
  169. gpbench/method_reg/SVC/SVC_GPU.py +194 -0
  170. gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
  171. gpbench/method_reg/SVC/__init__.py +5 -0
  172. gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
  173. gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
  174. gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
  175. gpbench/method_reg/SoyDNGP/__init__.py +5 -0
  176. gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
  177. gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
  178. gpbench/method_reg/XGBoost/__init__.py +5 -0
  179. gpbench/method_reg/__init__.py +55 -0
  180. gpbench/method_reg/rrBLUP/__init__.py +5 -0
  181. gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
  182. gpbench-1.0.0.dist-info/METADATA +379 -0
  183. gpbench-1.0.0.dist-info/RECORD +188 -0
  184. gpbench-1.0.0.dist-info/WHEEL +5 -0
  185. gpbench-1.0.0.dist-info/entry_points.txt +2 -0
  186. gpbench-1.0.0.dist-info/top_level.txt +3 -0
  187. tests/test_import.py +80 -0
  188. tests/test_method.py +232 -0
@@ -0,0 +1,250 @@
1
+ import os
2
+ import time
3
+ import psutil
4
+ import swanlab
5
+ import argparse
6
+ import random
7
+ import gc
8
+ import torch
9
+ import numpy as np
10
+ import pandas as pd
11
+ import tensorflow as tf
12
+ import keras
13
+ import pynvml
14
+ from keras import layers
15
+ from keras import regularizers
16
+ from keras.models import Model
17
+ from keras.layers import Input, Conv1D, Dropout, Flatten, Dense
18
+ from keras.callbacks import EarlyStopping
19
+ from sklearn.model_selection import StratifiedKFold, train_test_split
20
+ from sklearn.preprocessing import LabelEncoder
21
+ from sklearn.metrics import accuracy_score, precision_recall_fscore_support
22
+ from . import DL_GWAS_he_class
23
+
24
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
25
+
26
+
27
+ def parse_args():
28
+ parser = argparse.ArgumentParser(description="DL_GWAS classification")
29
+ parser.add_argument("--methods", type=str, default="DL_GWAS/")
30
+ parser.add_argument('--species', type=str, default='')
31
+ parser.add_argument('--phe', type=str, default='')
32
+ parser.add_argument('--data_dir', type=str, default='../../data/')
33
+ parser.add_argument('--result_dir', type=str, default='result/')
34
+
35
+ parser.add_argument("--epochs", type=int, default=1000)
36
+ parser.add_argument("--batch_size", type=int, default=128)
37
+ parser.add_argument("--learning_rate", type=float, default=0.01)
38
+ parser.add_argument("--patience", type=int, default=5)
39
+ return parser.parse_args()
40
+
41
+
42
+ def indices_to_one_hot(data, nb_classes):
43
+ targets = np.array(data).reshape(-1)
44
+ return np.eye(nb_classes)[targets]
45
+
46
+
47
+ def load_data(args):
48
+ xData = np.load(os.path.join(args.data_dir, args.species, "genotype.npz"))["arr_0"]
49
+ yData = np.load(os.path.join(args.data_dir, args.species, "phenotype.npz"))["arr_0"]
50
+ names = np.load(os.path.join(args.data_dir, args.species, "phenotype.npz"))["arr_1"]
51
+
52
+ nsample = xData.shape[0]
53
+ nsnp = xData.shape[1]
54
+ print("Number of samples: ", nsample)
55
+ print("Number of SNPs: ", nsnp)
56
+ xData = xData.astype(int)
57
+ arr = np.empty(shape=(nsample, nsnp, 4), dtype=np.float32)
58
+ xData[xData == -9] = 0
59
+ for i in range(0, nsample):
60
+ arr[i] = indices_to_one_hot(pd.to_numeric(xData[i], downcast="signed"), 4).astype(np.float32, copy=False)
61
+
62
+ return arr, yData, nsample, nsnp, names
63
+
64
+
65
+ def set_seed(seed=42):
66
+ random.seed(seed)
67
+ np.random.seed(seed)
68
+ torch.manual_seed(seed)
69
+ torch.cuda.manual_seed_all(seed)
70
+ tf.random.set_seed(seed)
71
+ torch.backends.cudnn.deterministic = True
72
+ torch.backends.cudnn.benchmark = False
73
+
74
+
75
+ def get_gpu_mem_by_pid(pid, handle=None):
76
+ if handle is None:
77
+ return 0.0
78
+ try:
79
+ procs = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
80
+ for p in procs:
81
+ if p.pid == pid:
82
+ return p.usedGpuMemory / 1024**2
83
+ return 0.0
84
+ except Exception:
85
+ return 0.0
86
+
87
+
88
+ def resnet(args, nsnp: int, num_classes: int):
89
+ inputs = Input(shape=(nsnp, 4))
90
+
91
+ x = Conv1D(
92
+ 10, 4, padding="same", activation="linear",
93
+ kernel_initializer="TruncatedNormal",
94
+ kernel_regularizer=regularizers.l2(0.1),
95
+ bias_regularizer=regularizers.l2(0.01),
96
+ )(inputs)
97
+ x = Conv1D(
98
+ 10, 20, padding="same", activation="linear",
99
+ kernel_initializer="TruncatedNormal",
100
+ kernel_regularizer=regularizers.l2(0.1),
101
+ bias_regularizer=regularizers.l2(0.01),
102
+ )(x)
103
+ x = Dropout(0.75)(x)
104
+
105
+ shortcut = Conv1D(
106
+ 10, 4, padding="same", activation="linear",
107
+ kernel_initializer="TruncatedNormal",
108
+ kernel_regularizer=regularizers.l2(0.1),
109
+ bias_regularizer=regularizers.l2(0.01),
110
+ )(inputs)
111
+ x = layers.add([shortcut, x])
112
+
113
+ x = Conv1D(
114
+ 10, 4, padding="same", activation="linear",
115
+ kernel_initializer="TruncatedNormal",
116
+ kernel_regularizer=regularizers.l2(0.1),
117
+ bias_regularizer=regularizers.l2(0.01),
118
+ )(x)
119
+ x = Dropout(0.75)(x)
120
+ x = Flatten()(x)
121
+ x = Dropout(0.75)(x)
122
+
123
+ outputs = Dense(num_classes, activation="softmax", name="out")(x)
124
+ model = Model(inputs=inputs, outputs=outputs)
125
+
126
+ model.compile(
127
+ loss="sparse_categorical_crossentropy",
128
+ optimizer=keras.optimizers.Adam(learning_rate=args.learning_rate),
129
+ metrics=["accuracy"],
130
+ )
131
+ return model
132
+
133
+
134
+ def run_nested_cv(args, data, label, nsnp: int, num_classes: int):
135
+ result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
136
+ os.makedirs(result_dir, exist_ok=True)
137
+
138
+ kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
139
+ early_stopping = EarlyStopping(monitor="val_loss", patience=args.patience, restore_best_weights=True)
140
+
141
+ all_acc, all_prec, all_rec, all_f1 = [], [], [], []
142
+ cv_start_time = time.time()
143
+
144
+ for fold, (train_index, test_index) in enumerate(kf.split(data, label)):
145
+ fold_start_time = time.time()
146
+ process = psutil.Process(os.getpid())
147
+ print(f"\n===== Fold {fold} =====")
148
+
149
+ X_train, X_test = data[train_index], data[test_index]
150
+ y_train, y_test = label[train_index], label[test_index]
151
+
152
+ X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(
153
+ X_train,
154
+ y_train,
155
+ test_size=0.1,
156
+ stratify=y_train,
157
+ random_state=42,
158
+ )
159
+
160
+ model = resnet(args, nsnp=nsnp, num_classes=num_classes)
161
+ model.fit(
162
+ X_train_sub,
163
+ y_train_sub,
164
+ batch_size=args.batch_size,
165
+ epochs=args.epochs,
166
+ validation_data=(X_valid, y_valid),
167
+ callbacks=[early_stopping],
168
+ shuffle=True,
169
+ verbose=0,
170
+ )
171
+
172
+ y_prob = model.predict(X_test, verbose=0)
173
+ y_pred = np.argmax(y_prob, axis=1)
174
+
175
+ acc = accuracy_score(y_test, y_pred)
176
+ prec, rec, f1, _ = precision_recall_fscore_support(
177
+ y_test, y_pred, average="macro", zero_division=0
178
+ )
179
+
180
+ all_acc.append(acc)
181
+ all_prec.append(prec)
182
+ all_rec.append(rec)
183
+ all_f1.append(f1)
184
+
185
+ fold_time = time.time() - fold_start_time
186
+ fold_gpu_mem = get_gpu_mem_by_pid(os.getpid(), handle)
187
+ fold_cpu_mem = process.memory_info().rss / 1024**2
188
+
189
+ print(
190
+ f"Fold {fold}: "
191
+ f"ACC={acc:.4f}, PREC={prec:.4f}, REC={rec:.4f}, F1={f1:.4f}, "
192
+ f"Time={fold_time:.2f}s, GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB"
193
+ )
194
+
195
+ pd.DataFrame({"Y_test": y_test, "Y_pred": y_pred}).to_csv(
196
+ os.path.join(result_dir, f"fold{fold}.csv"), index=False
197
+ )
198
+
199
+ del model
200
+ keras.backend.clear_session()
201
+ gc.collect()
202
+
203
+ cv_time = time.time() - cv_start_time
204
+ print("\n===== Cross-validation summary =====")
205
+ print(f"ACC : {np.mean(all_acc):.4f} ± {np.std(all_acc):.4f}")
206
+ print(f"PREC: {np.mean(all_prec):.4f} ± {np.std(all_prec):.4f}")
207
+ print(f"REC : {np.mean(all_rec):.4f} ± {np.std(all_rec):.4f}")
208
+ print(f"F1 : {np.mean(all_f1):.4f} ± {np.std(all_f1):.4f}")
209
+ print(f"Time: {cv_time:.2f}s")
210
+
211
+
212
+ def DL_GWAS_class():
213
+ set_seed(42)
214
+ try:
215
+ gpus = tf.config.list_physical_devices("GPU")
216
+ for gpu in gpus:
217
+ tf.config.experimental.set_memory_growth(gpu, True)
218
+ except Exception:
219
+ pass
220
+
221
+ pynvml.nvmlInit()
222
+ handle = pynvml.nvmlDeviceGetHandleByIndex(0)
223
+ args = parse_args()
224
+
225
+ all_species = ["Human/Sim/"]
226
+ for species in all_species:
227
+ args.species = species
228
+ X, Y, nsamples, nsnp, names = load_data(args)
229
+ print("Starting:", args.methods + args.species)
230
+
231
+ label_raw = np.nan_to_num(Y[:, 0])
232
+ le = LabelEncoder()
233
+ label = le.fit_transform(label_raw)
234
+ num_classes = len(le.classes_)
235
+
236
+ best_params = DL_GWAS_he_class.Hyperparameter(X, label, nsnp, num_classes)
237
+ args.learning_rate = best_params["learning_rate"]
238
+ args.batch_size = best_params["batch_size"]
239
+ args.patience = best_params["patience"]
240
+
241
+ start_time = time.time()
242
+ run_nested_cv(args, data=X, label=label, nsnp=nsnp, num_classes=num_classes)
243
+ elapsed_time = time.time() - start_time
244
+
245
+ print(f"Total running time: {elapsed_time:.2f}s")
246
+ print("Successfully finished:", args.species)
247
+
248
+
249
+ if __name__ == "__main__":
250
+ DL_GWAS_class()
@@ -0,0 +1,169 @@
1
+ import os
2
+ import time
3
+ import psutil
4
+ import random
5
+ import optuna
6
+ import gc
7
+ import numpy as np
8
+ import tensorflow as tf
9
+ import keras
10
+ from keras import layers
11
+ from keras import regularizers
12
+ from keras.models import Model
13
+ from keras.layers import Input, Conv1D, Dropout, Flatten, Dense
14
+ from keras.callbacks import EarlyStopping
15
+ from sklearn.model_selection import StratifiedKFold, train_test_split
16
+ from sklearn.metrics import accuracy_score
17
+ from optuna.exceptions import TrialPruned
18
+
19
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
20
+
21
+
22
+ def set_seed(seed=42):
23
+ random.seed(seed)
24
+ np.random.seed(seed)
25
+ tf.random.set_seed(seed)
26
+
27
+
28
+ def resnet(nsnp: int, num_classes: int, learning_rate: float):
29
+ inputs = Input(shape=(nsnp, 4))
30
+
31
+ x = Conv1D(
32
+ 10, 4, padding="same", activation="linear",
33
+ kernel_initializer="TruncatedNormal",
34
+ kernel_regularizer=regularizers.l2(0.1),
35
+ bias_regularizer=regularizers.l2(0.01),
36
+ )(inputs)
37
+ x = Conv1D(
38
+ 10, 20, padding="same", activation="linear",
39
+ kernel_initializer="TruncatedNormal",
40
+ kernel_regularizer=regularizers.l2(0.1),
41
+ bias_regularizer=regularizers.l2(0.01),
42
+ )(x)
43
+ x = Dropout(0.75)(x)
44
+
45
+ shortcut = Conv1D(
46
+ 10, 4, padding="same", activation="linear",
47
+ kernel_initializer="TruncatedNormal",
48
+ kernel_regularizer=regularizers.l2(0.1),
49
+ bias_regularizer=regularizers.l2(0.01),
50
+ )(inputs)
51
+ x = layers.add([shortcut, x])
52
+
53
+ x = Conv1D(
54
+ 10, 4, padding="same", activation="linear",
55
+ kernel_initializer="TruncatedNormal",
56
+ kernel_regularizer=regularizers.l2(0.1),
57
+ bias_regularizer=regularizers.l2(0.01),
58
+ )(x)
59
+ x = Dropout(0.75)(x)
60
+ x = Flatten()(x)
61
+ x = Dropout(0.75)(x)
62
+
63
+ outputs = Dense(num_classes, activation="softmax", name="out")(x)
64
+ model = Model(inputs=inputs, outputs=outputs)
65
+
66
+ model.compile(
67
+ loss="sparse_categorical_crossentropy",
68
+ optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
69
+ metrics=["accuracy"],
70
+ )
71
+ return model
72
+
73
+
74
+ def run_nested_cv_with_early_stopping(
75
+ data,
76
+ label,
77
+ nsnp: int,
78
+ num_classes: int,
79
+ learning_rate: float,
80
+ batch_size: int,
81
+ patience: int,
82
+ ):
83
+ kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
84
+ early_stopping = EarlyStopping(monitor="val_loss", patience=patience, restore_best_weights=True)
85
+
86
+ all_acc = []
87
+ for fold, (train_index, test_index) in enumerate(kf.split(data, label)):
88
+ process = psutil.Process(os.getpid())
89
+ fold_start_time = time.time()
90
+
91
+ X_train, X_test = data[train_index], data[test_index]
92
+ y_train, y_test = label[train_index], label[test_index]
93
+
94
+ X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(
95
+ X_train,
96
+ y_train,
97
+ test_size=0.1,
98
+ stratify=y_train,
99
+ random_state=42,
100
+ )
101
+
102
+ model = resnet(nsnp=nsnp, num_classes=num_classes, learning_rate=learning_rate)
103
+ model.fit(
104
+ X_train_sub,
105
+ y_train_sub,
106
+ batch_size=batch_size,
107
+ epochs=1000,
108
+ validation_data=(X_valid, y_valid),
109
+ callbacks=[early_stopping],
110
+ shuffle=True,
111
+ verbose=0,
112
+ )
113
+
114
+ y_prob = model.predict(X_test, verbose=0)
115
+ y_pred = np.argmax(y_prob, axis=1)
116
+
117
+ acc = accuracy_score(y_test, y_pred)
118
+ if np.isnan(acc) or acc <= 0:
119
+ try:
120
+ model.stop_training = True
121
+ except Exception:
122
+ pass
123
+ del model
124
+ keras.backend.clear_session()
125
+ gc.collect()
126
+ raise TrialPruned()
127
+
128
+ all_acc.append(acc)
129
+
130
+ _ = process.memory_info().rss / 1024**2
131
+ _ = time.time() - fold_start_time
132
+ del model
133
+ keras.backend.clear_session()
134
+ gc.collect()
135
+
136
+ return float(np.mean(all_acc)) if all_acc else 0.0
137
+
138
+
139
+ def Hyperparameter(data, label, nsnp: int, num_classes: int):
140
+ set_seed(42)
141
+
142
+ def objective(trial):
143
+ learning_rate = trial.suggest_float("learning_rate", 1e-4, 0.1, log=True)
144
+ batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
145
+ patience = trial.suggest_int("patience", 5, 15)
146
+
147
+ try:
148
+ acc_score = run_nested_cv_with_early_stopping(
149
+ data=data,
150
+ label=label,
151
+ nsnp=nsnp,
152
+ num_classes=num_classes,
153
+ learning_rate=learning_rate,
154
+ batch_size=batch_size,
155
+ patience=patience,
156
+ )
157
+ except TrialPruned:
158
+ return float("-inf")
159
+ finally:
160
+ keras.backend.clear_session()
161
+ gc.collect()
162
+
163
+ return acc_score
164
+
165
+ study = optuna.create_study(direction="maximize")
166
+ study.optimize(objective, n_trials=20)
167
+
168
+ print("Best hyperparameters:", study.best_params)
169
+ return study.best_params
@@ -0,0 +1,5 @@
1
+ from .DL_GWAS_class import DL_GWAS_class
2
+
3
+ DL_GWAS = DL_GWAS_class
4
+
5
+ __all__ = ["DL_GWAS","DL_GWAS_class"]
@@ -0,0 +1,163 @@
1
+ import os
2
+ import time
3
+ import psutil
4
+ import swanlab
5
+ import argparse
6
+ import random
7
+ import torch
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from sklearn.model_selection import StratifiedKFold, train_test_split
12
+ from sklearn.preprocessing import LabelEncoder
13
+ from sklearn.metrics import accuracy_score, precision_recall_fscore_support
14
+ from torch.utils.data import DataLoader, TensorDataset
15
+
16
+ from .base_dnngp_class import DNNGP
17
+ from . import DNNGP_he_class
18
+
19
+ def parse_args():
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument('--methods', type=str, default='DNNGP/')
22
+ parser.add_argument('--species', type=str, default='')
23
+ parser.add_argument('--phe', type=str, default='')
24
+ parser.add_argument('--data_dir', type=str, default='../../data/')
25
+ parser.add_argument('--result_dir', type=str, default='result/')
26
+
27
+ parser.add_argument('--epoch', type=int, default=1000)
28
+ parser.add_argument('--batch_size', type=int, default=32)
29
+ parser.add_argument('--lr', type=float, default=0.01)
30
+ parser.add_argument('--patience', type=int, default=10)
31
+ parser.add_argument('--dropout1', type=float, default=0.5)
32
+ parser.add_argument('--dropout2', type=float, default=0.5)
33
+ return parser.parse_args()
34
+
35
+ def load_data(args):
36
+ xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
37
+ yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
38
+ names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
39
+
40
+ print("Samples:", xData.shape[0])
41
+ print("SNPs:", xData.shape[1])
42
+ return xData, yData, xData.shape[0], xData.shape[1], names
43
+
44
+ def set_seed(seed=42):
45
+ random.seed(seed)
46
+ np.random.seed(seed)
47
+ torch.manual_seed(seed)
48
+ torch.cuda.manual_seed_all(seed)
49
+
50
+ def run_nested_cv(args, data, label, nsnp, device, le):
51
+ result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
52
+ os.makedirs(result_dir, exist_ok=True)
53
+ num_classes = len(np.unique(label))
54
+
55
+ kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
56
+
57
+ all_acc, all_prec, all_rec, all_f1 = [], [], [], []
58
+ time_start = time.time()
59
+
60
+ for fold, (train_idx, test_idx) in enumerate(kf.split(data, label)):
61
+ print(f"\n===== Fold {fold} =====")
62
+ fold_start = time.time()
63
+ process = psutil.Process(os.getpid())
64
+
65
+ X_train, X_test = data[train_idx], data[test_idx]
66
+ y_train, y_test = label[train_idx], label[test_idx]
67
+
68
+ X_train_sub, X_valid, y_train_sub, y_valid = train_test_split(
69
+ X_train, y_train, test_size=0.1, stratify=y_train, random_state=42
70
+ )
71
+
72
+ # tensor
73
+ x_train = torch.from_numpy(X_train_sub).float().unsqueeze(1).to(device)
74
+ y_train = torch.from_numpy(y_train_sub).long().to(device)
75
+ x_valid = torch.from_numpy(X_valid).float().unsqueeze(1).to(device)
76
+ y_valid = torch.from_numpy(y_valid).long().to(device)
77
+ x_test = torch.from_numpy(X_test).float().unsqueeze(1).to(device)
78
+ y_test = torch.from_numpy(y_test).long().to(device)
79
+
80
+ train_loader = DataLoader(TensorDataset(x_train, y_train), args.batch_size, shuffle=True)
81
+ valid_loader = DataLoader(TensorDataset(x_valid, y_valid), args.batch_size, shuffle=False)
82
+ test_loader = DataLoader(TensorDataset(x_test, y_test), args.batch_size, shuffle=False)
83
+ model = DNNGP(nsnp, args.dropout1, args.dropout2, output_dim=num_classes).to(device)
84
+
85
+ model.train_model(
86
+ train_loader,
87
+ valid_loader,
88
+ args.epoch,
89
+ args.lr,
90
+ args.weight_decay,
91
+ args.patience,
92
+ device
93
+ )
94
+
95
+ y_pred = model.predict(test_loader)
96
+ y_pred_cls = np.argmax(y_pred, axis=1)
97
+
98
+ acc = accuracy_score(y_test.cpu().numpy(), y_pred_cls)
99
+ prec, rec, f1, _ = precision_recall_fscore_support(
100
+ y_test.cpu().numpy(), y_pred_cls,
101
+ average="macro", zero_division=0
102
+ )
103
+
104
+ all_acc.append(acc)
105
+ all_prec.append(prec)
106
+ all_rec.append(rec)
107
+ all_f1.append(f1)
108
+
109
+ print(
110
+ f"ACC={acc:.4f}, PREC={prec:.4f}, "
111
+ f"REC={rec:.4f}, F1={f1:.4f}, "
112
+ f"Time={time.time()-fold_start:.2f}s, "
113
+ f"CPU={process.memory_info().rss/1024**2:.2f}MB"
114
+ )
115
+
116
+ pd.DataFrame({
117
+ "Y_test": le.inverse_transform(y_test.cpu().numpy()),
118
+ "Y_pred": le.inverse_transform(y_pred_cls)
119
+ }).to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
120
+
121
+ print("\n===== CV Summary =====")
122
+ print(f"ACC : {np.mean(all_acc):.4f} ± {np.std(all_acc):.4f}")
123
+ print(f"PREC: {np.mean(all_prec):.4f} ± {np.std(all_prec):.4f}")
124
+ print(f"REC : {np.mean(all_rec):.4f} ± {np.std(all_rec):.4f}")
125
+ print(f"F1 : {np.mean(all_f1):.4f} ± {np.std(all_f1):.4f}")
126
+
127
+
128
+ def DNNGP_class():
129
+ set_seed(42)
130
+ args = parse_args()
131
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
132
+
133
+ all_species = ["Human/Sim/"]
134
+
135
+ for species in all_species:
136
+ args.species = species
137
+ X, Y, nsamples, nsnp, names = load_data(args)
138
+ print("Starting:", args.methods + args.species)
139
+
140
+ label = Y[:, 0]
141
+ label = np.nan_to_num(label, nan=np.nanmean(label))
142
+ le = LabelEncoder()
143
+ label = le.fit_transform(label)
144
+ num_classes = len(np.unique(label))
145
+
146
+ best_params = DNNGP_he_class.Hyperparameter(X, label, nsnp)
147
+ args.lr = best_params['learning_rate']
148
+ args.weight_decay = best_params['weight_decay']
149
+ args.patience = best_params['patience']
150
+ args.dropout1 = best_params['dropout1']
151
+ args.dropout2 = best_params['dropout2']
152
+ start_time = time.time()
153
+ if torch.cuda.is_available():
154
+ torch.cuda.reset_peak_memory_stats()
155
+ process = psutil.Process(os.getpid())
156
+
157
+ run_nested_cv(args, X, label, nsnp, device, le)
158
+ elapsed_time = time.time() - start_time
159
+ print(f"Running time: {elapsed_time:.2f} s")
160
+ print("successfully")
161
+
162
+ if __name__ == "__main__":
163
+ DNNGP_class()