gpbench 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. gp_agent_tool/compute_dataset_feature.py +67 -0
  2. gp_agent_tool/config.py +65 -0
  3. gp_agent_tool/experience/create_masked_dataset_summary.py +97 -0
  4. gp_agent_tool/experience/dataset_summary_info.py +13 -0
  5. gp_agent_tool/experience/experience_info.py +12 -0
  6. gp_agent_tool/experience/get_matched_experience.py +111 -0
  7. gp_agent_tool/llm_client.py +119 -0
  8. gp_agent_tool/logging_utils.py +24 -0
  9. gp_agent_tool/main.py +347 -0
  10. gp_agent_tool/read_agent/__init__.py +46 -0
  11. gp_agent_tool/read_agent/nodes.py +674 -0
  12. gp_agent_tool/read_agent/prompts.py +547 -0
  13. gp_agent_tool/read_agent/python_repl_tool.py +165 -0
  14. gp_agent_tool/read_agent/state.py +101 -0
  15. gp_agent_tool/read_agent/workflow.py +54 -0
  16. gpbench/__init__.py +25 -0
  17. gpbench/_selftest.py +104 -0
  18. gpbench/method_class/BayesA/BayesA_class.py +141 -0
  19. gpbench/method_class/BayesA/__init__.py +5 -0
  20. gpbench/method_class/BayesA/_bayesfromR.py +96 -0
  21. gpbench/method_class/BayesA/_param_free_base_model.py +84 -0
  22. gpbench/method_class/BayesA/bayesAfromR.py +16 -0
  23. gpbench/method_class/BayesB/BayesB_class.py +140 -0
  24. gpbench/method_class/BayesB/__init__.py +5 -0
  25. gpbench/method_class/BayesB/_bayesfromR.py +96 -0
  26. gpbench/method_class/BayesB/_param_free_base_model.py +84 -0
  27. gpbench/method_class/BayesB/bayesBfromR.py +16 -0
  28. gpbench/method_class/BayesC/BayesC_class.py +141 -0
  29. gpbench/method_class/BayesC/__init__.py +4 -0
  30. gpbench/method_class/BayesC/_bayesfromR.py +96 -0
  31. gpbench/method_class/BayesC/_param_free_base_model.py +84 -0
  32. gpbench/method_class/BayesC/bayesCfromR.py +16 -0
  33. gpbench/method_class/CropARNet/CropARNet_class.py +186 -0
  34. gpbench/method_class/CropARNet/CropARNet_he_class.py +154 -0
  35. gpbench/method_class/CropARNet/__init__.py +5 -0
  36. gpbench/method_class/CropARNet/base_CropARNet_class.py +178 -0
  37. gpbench/method_class/Cropformer/Cropformer_class.py +308 -0
  38. gpbench/method_class/Cropformer/__init__.py +5 -0
  39. gpbench/method_class/Cropformer/cropformer_he_class.py +221 -0
  40. gpbench/method_class/DL_GWAS/DL_GWAS_class.py +250 -0
  41. gpbench/method_class/DL_GWAS/DL_GWAS_he_class.py +169 -0
  42. gpbench/method_class/DL_GWAS/__init__.py +5 -0
  43. gpbench/method_class/DNNGP/DNNGP_class.py +163 -0
  44. gpbench/method_class/DNNGP/DNNGP_he_class.py +138 -0
  45. gpbench/method_class/DNNGP/__init__.py +5 -0
  46. gpbench/method_class/DNNGP/base_dnngp_class.py +116 -0
  47. gpbench/method_class/DeepCCR/DeepCCR_class.py +172 -0
  48. gpbench/method_class/DeepCCR/DeepCCR_he_class.py +161 -0
  49. gpbench/method_class/DeepCCR/__init__.py +5 -0
  50. gpbench/method_class/DeepCCR/base_DeepCCR_class.py +209 -0
  51. gpbench/method_class/DeepGS/DeepGS_class.py +184 -0
  52. gpbench/method_class/DeepGS/DeepGS_he_class.py +150 -0
  53. gpbench/method_class/DeepGS/__init__.py +5 -0
  54. gpbench/method_class/DeepGS/base_deepgs_class.py +153 -0
  55. gpbench/method_class/EIR/EIR_class.py +276 -0
  56. gpbench/method_class/EIR/EIR_he_class.py +184 -0
  57. gpbench/method_class/EIR/__init__.py +5 -0
  58. gpbench/method_class/EIR/utils/__init__.py +0 -0
  59. gpbench/method_class/EIR/utils/array_output_modules.py +97 -0
  60. gpbench/method_class/EIR/utils/common.py +65 -0
  61. gpbench/method_class/EIR/utils/lcl_layers.py +235 -0
  62. gpbench/method_class/EIR/utils/logging.py +59 -0
  63. gpbench/method_class/EIR/utils/mlp_layers.py +92 -0
  64. gpbench/method_class/EIR/utils/models_locally_connected.py +642 -0
  65. gpbench/method_class/EIR/utils/transformer_models.py +546 -0
  66. gpbench/method_class/ElasticNet/ElasticNet_class.py +133 -0
  67. gpbench/method_class/ElasticNet/ElasticNet_he_class.py +91 -0
  68. gpbench/method_class/ElasticNet/__init__.py +5 -0
  69. gpbench/method_class/G2PDeep/G2PDeep_he_class.py +217 -0
  70. gpbench/method_class/G2PDeep/G2Pdeep_class.py +205 -0
  71. gpbench/method_class/G2PDeep/__init__.py +5 -0
  72. gpbench/method_class/G2PDeep/base_G2PDeep_class.py +209 -0
  73. gpbench/method_class/GBLUP/GBLUP_class.py +183 -0
  74. gpbench/method_class/GBLUP/__init__.py +5 -0
  75. gpbench/method_class/GEFormer/GEFormer_class.py +169 -0
  76. gpbench/method_class/GEFormer/GEFormer_he_class.py +137 -0
  77. gpbench/method_class/GEFormer/__init__.py +5 -0
  78. gpbench/method_class/GEFormer/gMLP_class.py +357 -0
  79. gpbench/method_class/LightGBM/LightGBM_class.py +224 -0
  80. gpbench/method_class/LightGBM/LightGBM_he_class.py +121 -0
  81. gpbench/method_class/LightGBM/__init__.py +5 -0
  82. gpbench/method_class/RF/RF_GPU_class.py +165 -0
  83. gpbench/method_class/RF/RF_GPU_he_class.py +124 -0
  84. gpbench/method_class/RF/__init__.py +5 -0
  85. gpbench/method_class/SVC/SVC_GPU.py +181 -0
  86. gpbench/method_class/SVC/SVC_GPU_he.py +106 -0
  87. gpbench/method_class/SVC/__init__.py +5 -0
  88. gpbench/method_class/SoyDNGP/AlexNet_206_class.py +179 -0
  89. gpbench/method_class/SoyDNGP/SoyDNGP_class.py +189 -0
  90. gpbench/method_class/SoyDNGP/SoyDNGP_he_class.py +112 -0
  91. gpbench/method_class/SoyDNGP/__init__.py +5 -0
  92. gpbench/method_class/XGBoost/XGboost_GPU_class.py +198 -0
  93. gpbench/method_class/XGBoost/XGboost_GPU_he_class.py +178 -0
  94. gpbench/method_class/XGBoost/__init__.py +5 -0
  95. gpbench/method_class/__init__.py +52 -0
  96. gpbench/method_class/rrBLUP/__init__.py +5 -0
  97. gpbench/method_class/rrBLUP/rrBLUP_class.py +140 -0
  98. gpbench/method_reg/BayesA/BayesA.py +116 -0
  99. gpbench/method_reg/BayesA/__init__.py +5 -0
  100. gpbench/method_reg/BayesA/_bayesfromR.py +96 -0
  101. gpbench/method_reg/BayesA/_param_free_base_model.py +84 -0
  102. gpbench/method_reg/BayesA/bayesAfromR.py +16 -0
  103. gpbench/method_reg/BayesB/BayesB.py +117 -0
  104. gpbench/method_reg/BayesB/__init__.py +5 -0
  105. gpbench/method_reg/BayesB/_bayesfromR.py +96 -0
  106. gpbench/method_reg/BayesB/_param_free_base_model.py +84 -0
  107. gpbench/method_reg/BayesB/bayesBfromR.py +16 -0
  108. gpbench/method_reg/BayesC/BayesC.py +115 -0
  109. gpbench/method_reg/BayesC/__init__.py +5 -0
  110. gpbench/method_reg/BayesC/_bayesfromR.py +96 -0
  111. gpbench/method_reg/BayesC/_param_free_base_model.py +84 -0
  112. gpbench/method_reg/BayesC/bayesCfromR.py +16 -0
  113. gpbench/method_reg/CropARNet/CropARNet.py +159 -0
  114. gpbench/method_reg/CropARNet/CropARNet_Hyperparameters.py +109 -0
  115. gpbench/method_reg/CropARNet/__init__.py +5 -0
  116. gpbench/method_reg/CropARNet/base_CropARNet.py +137 -0
  117. gpbench/method_reg/Cropformer/Cropformer.py +313 -0
  118. gpbench/method_reg/Cropformer/Cropformer_Hyperparameters.py +250 -0
  119. gpbench/method_reg/Cropformer/__init__.py +5 -0
  120. gpbench/method_reg/DL_GWAS/DL_GWAS.py +186 -0
  121. gpbench/method_reg/DL_GWAS/DL_GWAS_Hyperparameters.py +125 -0
  122. gpbench/method_reg/DL_GWAS/__init__.py +5 -0
  123. gpbench/method_reg/DNNGP/DNNGP.py +157 -0
  124. gpbench/method_reg/DNNGP/DNNGP_Hyperparameters.py +118 -0
  125. gpbench/method_reg/DNNGP/__init__.py +5 -0
  126. gpbench/method_reg/DNNGP/base_dnngp.py +101 -0
  127. gpbench/method_reg/DeepCCR/DeepCCR.py +149 -0
  128. gpbench/method_reg/DeepCCR/DeepCCR_Hyperparameters.py +110 -0
  129. gpbench/method_reg/DeepCCR/__init__.py +5 -0
  130. gpbench/method_reg/DeepCCR/base_DeepCCR.py +171 -0
  131. gpbench/method_reg/DeepGS/DeepGS.py +165 -0
  132. gpbench/method_reg/DeepGS/DeepGS_Hyperparameters.py +114 -0
  133. gpbench/method_reg/DeepGS/__init__.py +5 -0
  134. gpbench/method_reg/DeepGS/base_deepgs.py +98 -0
  135. gpbench/method_reg/EIR/EIR.py +258 -0
  136. gpbench/method_reg/EIR/EIR_Hyperparameters.py +178 -0
  137. gpbench/method_reg/EIR/__init__.py +5 -0
  138. gpbench/method_reg/EIR/utils/__init__.py +0 -0
  139. gpbench/method_reg/EIR/utils/array_output_modules.py +97 -0
  140. gpbench/method_reg/EIR/utils/common.py +65 -0
  141. gpbench/method_reg/EIR/utils/lcl_layers.py +235 -0
  142. gpbench/method_reg/EIR/utils/logging.py +59 -0
  143. gpbench/method_reg/EIR/utils/mlp_layers.py +92 -0
  144. gpbench/method_reg/EIR/utils/models_locally_connected.py +642 -0
  145. gpbench/method_reg/EIR/utils/transformer_models.py +546 -0
  146. gpbench/method_reg/ElasticNet/ElasticNet.py +123 -0
  147. gpbench/method_reg/ElasticNet/ElasticNet_he.py +83 -0
  148. gpbench/method_reg/ElasticNet/__init__.py +5 -0
  149. gpbench/method_reg/G2PDeep/G2PDeep_Hyperparameters.py +107 -0
  150. gpbench/method_reg/G2PDeep/G2Pdeep.py +166 -0
  151. gpbench/method_reg/G2PDeep/__init__.py +5 -0
  152. gpbench/method_reg/G2PDeep/base_G2PDeep.py +209 -0
  153. gpbench/method_reg/GBLUP/GBLUP_R.py +182 -0
  154. gpbench/method_reg/GBLUP/__init__.py +5 -0
  155. gpbench/method_reg/GEFormer/GEFormer.py +164 -0
  156. gpbench/method_reg/GEFormer/GEFormer_Hyperparameters.py +106 -0
  157. gpbench/method_reg/GEFormer/__init__.py +5 -0
  158. gpbench/method_reg/GEFormer/gMLP.py +341 -0
  159. gpbench/method_reg/LightGBM/LightGBM.py +237 -0
  160. gpbench/method_reg/LightGBM/LightGBM_Hyperparameters.py +77 -0
  161. gpbench/method_reg/LightGBM/__init__.py +5 -0
  162. gpbench/method_reg/MVP/MVP.py +182 -0
  163. gpbench/method_reg/MVP/MVP_Hyperparameters.py +126 -0
  164. gpbench/method_reg/MVP/__init__.py +5 -0
  165. gpbench/method_reg/MVP/base_MVP.py +113 -0
  166. gpbench/method_reg/RF/RF_GPU.py +174 -0
  167. gpbench/method_reg/RF/RF_Hyperparameters.py +163 -0
  168. gpbench/method_reg/RF/__init__.py +5 -0
  169. gpbench/method_reg/SVC/SVC_GPU.py +194 -0
  170. gpbench/method_reg/SVC/SVC_Hyperparameters.py +107 -0
  171. gpbench/method_reg/SVC/__init__.py +5 -0
  172. gpbench/method_reg/SoyDNGP/AlexNet_206.py +185 -0
  173. gpbench/method_reg/SoyDNGP/SoyDNGP.py +179 -0
  174. gpbench/method_reg/SoyDNGP/SoyDNGP_Hyperparameters.py +105 -0
  175. gpbench/method_reg/SoyDNGP/__init__.py +5 -0
  176. gpbench/method_reg/XGBoost/XGboost_GPU.py +188 -0
  177. gpbench/method_reg/XGBoost/XGboost_Hyperparameters.py +167 -0
  178. gpbench/method_reg/XGBoost/__init__.py +5 -0
  179. gpbench/method_reg/__init__.py +55 -0
  180. gpbench/method_reg/rrBLUP/__init__.py +5 -0
  181. gpbench/method_reg/rrBLUP/rrBLUP.py +123 -0
  182. gpbench-1.0.0.dist-info/METADATA +379 -0
  183. gpbench-1.0.0.dist-info/RECORD +188 -0
  184. gpbench-1.0.0.dist-info/WHEEL +5 -0
  185. gpbench-1.0.0.dist-info/entry_points.txt +2 -0
  186. gpbench-1.0.0.dist-info/top_level.txt +3 -0
  187. tests/test_import.py +80 -0
  188. tests/test_method.py +232 -0
@@ -0,0 +1,341 @@
1
+ from random import randrange
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn.functional as F
5
+ from torch import nn, einsum
6
+
7
+ from einops import rearrange, repeat
8
+ from einops.layers.torch import Rearrange, Reduce
9
+
10
+ # functions
11
+
12
+ def exists(val):
13
+ return val is not None
14
+
15
+ def pair(val):
16
+ return (val, val) if not isinstance(val, tuple) else val
17
+
18
+ def dropout_layers(layers, prob_survival):
19
+ if prob_survival == 1:
20
+ return layers
21
+
22
+ num_layers = len(layers)
23
+ to_drop = torch.zeros(num_layers).uniform_(0., 1.) > prob_survival
24
+
25
+ # make sure at least one layer makes it
26
+ if all(to_drop):
27
+ rand_index = randrange(num_layers)
28
+ to_drop[rand_index] = False
29
+
30
+ layers = [layer for (layer, drop) in zip(layers, to_drop) if not drop]
31
+ return layers
32
+
33
+
34
+ # helper classes
35
+
36
+ class Residual(nn.Module):
37
+ def __init__(self, fn):
38
+ super().__init__()
39
+ self.fn = fn
40
+
41
+ def forward(self, x):
42
+ return self.fn(x) + x
43
+
44
+ class PreNorm(nn.Module):
45
+ def __init__(self, dim, fn):
46
+ super().__init__()
47
+ self.fn = fn
48
+ self.norm = nn.LayerNorm(dim)
49
+
50
+ def forward(self, x, **kwargs):
51
+ x = self.norm(x)
52
+ return self.fn(x, **kwargs)
53
+
54
+ class Attention(nn.Module):
55
+ def __init__(self, dim_in, dim_out, dim_inner, causal = False):
56
+ super().__init__()
57
+ self.scale = dim_inner ** -0.5
58
+ self.causal = causal
59
+
60
+ self.to_qkv = nn.Linear(dim_in, dim_inner * 3, bias = False)
61
+ self.to_out = nn.Linear(dim_inner, dim_out)
62
+
63
+ def forward(self, x):
64
+ device = x.device
65
+ q, k, v = self.to_qkv(x).chunk(3, dim = -1)
66
+ sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
67
+
68
+ if self.causal:
69
+ mask = torch.ones(sim.shape[-2:], device = device).triu(1).bool()
70
+ sim.masked_fill_(mask[None, ...], -torch.finfo(q.dtype).max)
71
+
72
+ attn = sim.softmax(dim = -1)
73
+ out = einsum('b i j, b j d -> b i d', attn, v)
74
+ return self.to_out(out)
75
+
76
+ class SpatialGatingUnit(nn.Module):
77
+ def __init__(
78
+ self,
79
+ dim,
80
+ dim_seq,
81
+ causal = False,
82
+ act = nn.Identity(),
83
+ heads = 1,
84
+ init_eps = 1e-3,
85
+ circulant_matrix = False
86
+ ):
87
+ super().__init__()
88
+ dim_out = dim // 2
89
+ self.heads = heads
90
+ self.causal = causal
91
+ self.norm = nn.LayerNorm(dim_out)
92
+
93
+ self.act = act
94
+
95
+ # parameters
96
+
97
+ if circulant_matrix:
98
+ self.circulant_pos_x = nn.Parameter(torch.ones(heads, dim_seq))
99
+ self.circulant_pos_y = nn.Parameter(torch.ones(heads, dim_seq))
100
+
101
+ self.circulant_matrix = circulant_matrix
102
+ shape = (heads, dim_seq,) if circulant_matrix else (heads, dim_seq, dim_seq)
103
+ weight = torch.zeros(shape)
104
+
105
+ self.weight = nn.Parameter(weight)
106
+ init_eps /= dim_seq
107
+ nn.init.uniform_(self.weight, -init_eps, init_eps)
108
+
109
+ self.bias = nn.Parameter(torch.ones(heads, dim_seq))
110
+
111
+ def forward(self, x, gate_res = None):
112
+ device, n, h = x.device, x.shape[1], self.heads
113
+
114
+ res, gate = x.chunk(2, dim = -1)
115
+ gate = self.norm(gate)
116
+
117
+ weight, bias = self.weight, self.bias
118
+
119
+ if self.circulant_matrix:
120
+ # build the circulant matrix
121
+
122
+ dim_seq = weight.shape[-1]
123
+ weight = F.pad(weight, (0, dim_seq), value = 0)
124
+ weight = repeat(weight, '... n -> ... (r n)', r = dim_seq)
125
+ weight = weight[:, :-dim_seq].reshape(h, dim_seq, 2 * dim_seq - 1)
126
+ weight = weight[:, :, (dim_seq - 1):]
127
+
128
+ # give circulant matrix absolute position awareness
129
+
130
+ pos_x, pos_y = self.circulant_pos_x, self.circulant_pos_y
131
+ weight = weight * rearrange(pos_x, 'h i -> h i ()') * rearrange(pos_y, 'h j -> h () j')
132
+
133
+ if self.causal:
134
+ weight, bias = weight[:, :n, :n], bias[:, :n]
135
+ mask = torch.ones(weight.shape[-2:], device = device).triu_(1).bool()
136
+ mask = rearrange(mask, 'i j -> () i j')
137
+ weight = weight.masked_fill(mask, 0.)
138
+
139
+ gate = rearrange(gate, 'b n (h d) -> b h n d', h = h)
140
+
141
+ gate = einsum('b h n d, h m n -> b h m d', gate, weight)
142
+ gate = gate + rearrange(bias, 'h n -> () h n ()')
143
+
144
+ gate = rearrange(gate, 'b h n d -> b n (h d)')
145
+
146
+ if exists(gate_res):
147
+ gate = gate + gate_res
148
+
149
+ return self.act(gate) * res
150
+
151
+ class gMLPBlock(nn.Module):
152
+ def __init__(
153
+ self,
154
+ *,
155
+ dim,
156
+ dim_ff,
157
+ seq_len,
158
+ heads = 1,
159
+ attn_dim = None,
160
+ causal = False,
161
+ act = nn.Identity(),
162
+ circulant_matrix = False
163
+ ):
164
+ super().__init__()
165
+ self.proj_in = nn.Sequential(
166
+ nn.Linear(dim, dim_ff),
167
+ nn.GELU()
168
+ )
169
+
170
+ self.attn = Attention(dim, dim_ff // 2, attn_dim, causal) if exists(attn_dim) else None
171
+
172
+ self.sgu = SpatialGatingUnit(dim_ff, seq_len, causal, act, heads, circulant_matrix = circulant_matrix)
173
+ self.proj_out = nn.Linear(dim_ff // 2, dim)
174
+
175
+ def forward(self, x):
176
+ gate_res = self.attn(x) if exists(self.attn) else None
177
+ x = self.proj_in(x)
178
+ x = self.sgu(x, gate_res=gate_res)
179
+ x = self.proj_out(x)
180
+ return x
181
+
182
+
183
+ # main classes
184
+ class gMLPVision(nn.Module):
185
+ def __init__(
186
+ self,
187
+ *,
188
+ image_size,
189
+ patch_size,
190
+ num_classes,
191
+ dim,
192
+ depth,
193
+ snp_len,
194
+ heads = 1,
195
+ ff_mult = 4,
196
+ channels = 1,
197
+ attn_dim = None,
198
+ prob_survival = 1.
199
+ ):
200
+ super().__init__()
201
+ assert (dim % heads) == 0, 'dimension must be divisible by number of heads'
202
+
203
+ image_height, image_width = pair(image_size)
204
+ patch_height, patch_width = pair(patch_size)
205
+ #assert (image_height % patch_height) == 0 and (image_width % patch_width) == 0, 'image height and width must be divisible by patch size'
206
+ #num_patches = (image_height[0] // patch_height[0]) * (image_width[1] // patch_width[1])
207
+ num_patches = 200
208
+ dim_ff = dim * ff_mult
209
+
210
+ self.to_patch_embed = nn.Sequential(
211
+ Rearrange('b c (h p1) (w p2) -> b (h w) (c p1 p2)', p1 = patch_height, p2 = patch_width),
212
+ nn.Linear(1*snp_len*1, dim)
213
+ )
214
+
215
+
216
+ self.prob_survival = prob_survival
217
+
218
+ self.layers = nn.ModuleList([Residual(PreNorm(dim, gMLPBlock(dim = dim, heads = heads, dim_ff = dim_ff, seq_len = num_patches, attn_dim = attn_dim))) for i in range(depth)])
219
+
220
+ self.to_logits = nn.Sequential(
221
+ nn.LayerNorm(dim),
222
+ Reduce('b n d -> b d', 'mean'),
223
+ nn.Linear(dim, num_classes)
224
+ )
225
+
226
+ def forward(self, x):
227
+ x = self.to_patch_embed(x)
228
+ layers = self.layers if not self.training else dropout_layers(self.layers, self.prob_survival)
229
+ x = nn.Sequential(*layers)(x)
230
+ return self.to_logits(x)
231
+
232
+ class EarlyStopping:
233
+ def __init__(self, patience=10, delta=0):
234
+ self.patience = patience
235
+ self.delta = delta
236
+ self.best_score = None
237
+ self.counter = 0
238
+ self.early_stop = False
239
+
240
+ def __call__(self, score):
241
+ if self.best_score is None:
242
+ self.best_score = score
243
+ elif score < self.best_score + self.delta:
244
+ self.counter += 1
245
+ if self.counter >= self.patience:
246
+ self.early_stop = True
247
+ else:
248
+ self.best_score = score
249
+ self.counter = 0
250
+
251
+ def exists(val):
252
+ return val is not None
253
+ class GEFormer(nn.Module):
254
+ def __init__(self,nsnp):
255
+ super(GEFormer, self).__init__()
256
+ self.gmlp = gMLPVision(image_size=(nsnp, 1),
257
+ patch_size=(nsnp, 1),
258
+ num_classes=126,
259
+ dim=126,
260
+ depth=1,
261
+ snp_len=nsnp
262
+ )
263
+
264
+ self.MLP = nn.Sequential(
265
+ nn.Linear(126, 128),
266
+ nn.LeakyReLU(),
267
+ nn.Dropout(0.4),
268
+ nn.Linear(128, 64),
269
+ nn.LeakyReLU(),
270
+ nn.Dropout(0.4),
271
+ nn.Linear(64, 1)
272
+ )
273
+ self.numsnp = nsnp
274
+ def forward(self, x):
275
+ x = x.view(x.size(0), 1,self.numsnp, 1)
276
+ x = self.gmlp(x)
277
+ predict = self.MLP(x)
278
+ return predict
279
+
280
+ def train_model(self, train_loader, valid_loader, num_epochs, learning_rate, patience, device):
281
+ optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate, weight_decay=1e-4)
282
+ criterion = nn.MSELoss()
283
+
284
+ self.to(device)
285
+
286
+ best_loss = float('inf')
287
+ best_state = None
288
+ trigger_times = 0
289
+
290
+ for epoch in range(num_epochs):
291
+ self.train()
292
+ train_loss = 0.0
293
+ for inputs, labels in train_loader:
294
+ inputs, labels = inputs.to(device), labels.to(device)
295
+ optimizer.zero_grad()
296
+ outputs = self(inputs)
297
+ labels = labels.unsqueeze(1)
298
+ loss = criterion(outputs, labels)
299
+ loss.backward()
300
+ optimizer.step()
301
+ train_loss += loss.item() * inputs.size(0)
302
+
303
+ self.eval()
304
+ valid_loss = 0.0
305
+ with torch.no_grad():
306
+ for inputs, labels in valid_loader:
307
+ inputs, labels = inputs.to(device), labels.to(device)
308
+ outputs = self(inputs)
309
+ labels = labels.unsqueeze(1)
310
+ loss = criterion(outputs, labels)
311
+ valid_loss += loss.item() * inputs.size(0)
312
+
313
+ train_loss /= len(train_loader.dataset)
314
+ valid_loss /= len(valid_loader.dataset)
315
+
316
+ # ---------- Early stopping ----------
317
+ if valid_loss < best_loss:
318
+ best_loss = valid_loss
319
+ best_state = self.state_dict()
320
+ trigger_times = 0
321
+ else:
322
+ trigger_times += 1
323
+ if trigger_times >= patience:
324
+ print(f"Early stopping at epoch {epoch+1}")
325
+ break
326
+
327
+ if best_state is not None:
328
+ self.load_state_dict(best_state)
329
+ return best_loss
330
+
331
+ def predict(self, test_loader):
332
+ self.eval()
333
+ y_pred = []
334
+ with torch.no_grad():
335
+ for inputs, _ in test_loader:
336
+ outputs = self(inputs)
337
+ y_pred.append(outputs.cpu().numpy())
338
+ y_pred = np.concatenate(y_pred, axis=0)
339
+ y_pred = np.squeeze(y_pred)
340
+ return y_pred
341
+
@@ -0,0 +1,237 @@
1
+ import os
2
+ import time
3
+ import psutil
4
+ import argparse
5
+ import random
6
+ import torch
7
+ import pandas as pd
8
+ import numpy as np
9
+ import lightgbm as lgb
10
+ from sklearn.model_selection import KFold
11
+ from scipy.stats import pearsonr
12
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
13
+ import subprocess
14
+ import threading
15
+ import queue
16
+ from . import LightGBM_Hyperparameters
17
+
18
+ class GPUMonitor:
19
+ def __init__(self, gpu_id=0, interval=0.5):
20
+ self.gpu_id = gpu_id
21
+ self.interval = interval
22
+ self.max_memory = 0
23
+ self.current_memory = 0
24
+ self.monitoring = False
25
+ self.pid = os.getpid()
26
+ self.queue = queue.Queue()
27
+
28
+ def _get_gpu_memory_by_pid(self):
29
+ try:
30
+ result = subprocess.check_output([
31
+ 'nvidia-smi',
32
+ '--query-compute-apps=pid,used_memory,gpu_bus_id',
33
+ '--format=csv,nounits,noheader'
34
+ ], timeout=5)
35
+
36
+ lines = result.decode('utf-8').strip().split('\n')
37
+ for line in lines:
38
+ if not line.strip():
39
+ continue
40
+ parts = line.split(',')
41
+ if len(parts) >= 2:
42
+ pid = int(parts[0].strip())
43
+ if pid == self.pid:
44
+ mem_str = parts[1].strip()
45
+ mem_value = ''.join(filter(str.isdigit, mem_str))
46
+ if mem_value:
47
+ return int(mem_value)
48
+ return 0
49
+ except Exception as e:
50
+ print(f"GPU memory query error: {e}")
51
+ return 0
52
+
53
+ def _monitor_loop(self):
54
+ while self.monitoring:
55
+ try:
56
+ mem = self._get_gpu_memory_by_pid()
57
+ self.current_memory = mem
58
+ if mem > self.max_memory:
59
+ self.max_memory = mem
60
+ time.sleep(self.interval)
61
+ except Exception as e:
62
+ print(f"Monitor loop error: {e}")
63
+ break
64
+
65
+ def start(self):
66
+ self.max_memory = 0
67
+ self.current_memory = 0
68
+ self.monitoring = True
69
+ self.thread = threading.Thread(target=self._monitor_loop)
70
+ self.thread.daemon = True
71
+ self.thread.start()
72
+
73
+ def stop(self):
74
+ self.monitoring = False
75
+ if hasattr(self, 'thread'):
76
+ self.thread.join(timeout=2)
77
+ return self.max_memory
78
+
79
+ gpu_monitor = GPUMonitor(gpu_id=0, interval=0.2)
80
+
81
+ def parse_args():
82
+ parser = argparse.ArgumentParser(description="LightGBM GPU Benchmark")
83
+ parser.add_argument('--methods', type=str, default='LightGBM/', help='Method name')
84
+ parser.add_argument('--species', type=str, default='', help='Dataset name')
85
+ parser.add_argument('--phe', type=str, default='', help='Phenotype')
86
+ parser.add_argument('--data_dir', type=str, default='../../data/')
87
+ parser.add_argument('--result_dir', type=str, default='result/')
88
+
89
+ parser.add_argument('--learning_rate', type=float, default=0.01)
90
+ parser.add_argument('--num_leaves', type=int, default=10)
91
+ parser.add_argument('--min_data_in_leaf', type=int, default=1)
92
+ parser.add_argument('--max_depth', type=int, default=1)
93
+ parser.add_argument('--lambda_l1', type=float, default=0.1)
94
+ parser.add_argument('--lambda_l2', type=float, default=0.1)
95
+ parser.add_argument('--min_gain_to_split', type=float, default=0.1)
96
+ parser.add_argument('--feature_fraction', type=float, default=0.9)
97
+ parser.add_argument('--bagging_fraction', type=float, default=0.9)
98
+ parser.add_argument('--bagging_freq', type=int, default=1)
99
+ parser.add_argument('--num_boost_round', type=int, default=100)
100
+ parser.add_argument('--objective', type=str, default='regression')
101
+ parser.add_argument('--device_type', type=str, default='gpu')
102
+ parser.add_argument('--early_stopping_rounds', type=int, default=50)
103
+ return parser.parse_args()
104
+
105
+ def load_data(args):
106
+ xData = np.load(os.path.join(args.data_dir, args.species, 'genotype.npz'))["arr_0"]
107
+ yData = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_0"]
108
+ names = np.load(os.path.join(args.data_dir, args.species, 'phenotype.npz'))["arr_1"]
109
+
110
+ nsample = xData.shape[0]
111
+ nsnp = xData.shape[1]
112
+ print(f"Number of samples: {nsample}, SNPs: {nsnp}")
113
+ return xData, yData, nsample, nsnp, names
114
+
115
+ def set_seed(seed=42):
116
+ random.seed(seed)
117
+ np.random.seed(seed)
118
+ torch.manual_seed(seed)
119
+ torch.cuda.manual_seed_all(seed)
120
+ torch.backends.cudnn.deterministic = True
121
+ torch.backends.cudnn.benchmark = False
122
+
123
+ def run_nested_cv(args, data, label):
124
+ result_dir = os.path.join(args.result_dir, args.methods + args.species + args.phe)
125
+ os.makedirs(result_dir, exist_ok=True)
126
+ print("Starting 10-fold cross-validation...")
127
+ kf = KFold(n_splits=10, shuffle=True, random_state=42)
128
+ all_mse, all_mae, all_r2, all_pcc = [], [], [], []
129
+ time_star = time.time()
130
+
131
+ params = {
132
+ 'objective': args.objective,
133
+ 'metric': 'rmse',
134
+ 'learning_rate': args.learning_rate,
135
+ 'num_leaves': args.num_leaves,
136
+ 'min_data_in_leaf': args.min_data_in_leaf,
137
+ 'max_depth': args.max_depth,
138
+ 'lambda_l1': args.lambda_l1,
139
+ 'lambda_l2': args.lambda_l2,
140
+ 'min_gain_to_split': args.min_gain_to_split,
141
+ 'feature_fraction': args.feature_fraction,
142
+ 'bagging_fraction': args.bagging_fraction,
143
+ 'bagging_freq': args.bagging_freq,
144
+ 'num_boost_round': args.num_boost_round,
145
+ 'device_type': 'gpu',
146
+ 'gpu_platform_id': 0,
147
+ 'gpu_device_id': 0,
148
+ 'num_threads': 8,
149
+ 'verbose': -1
150
+ }
151
+
152
+ for fold, (train_index, test_index) in enumerate(kf.split(data)):
153
+ print(f"\n===== Running fold {fold} =====")
154
+ process = psutil.Process(os.getpid())
155
+ fold_start_time = time.time()
156
+
157
+ gpu_monitor.start()
158
+ time.sleep(0.5)
159
+
160
+ cpu_mem_before = process.memory_info().rss / 1024**2
161
+
162
+ X_train, X_test = data[train_index], data[test_index]
163
+ y_train, y_test = label[train_index], label[test_index]
164
+
165
+ train_set = lgb.Dataset(X_train, label=y_train)
166
+ test_set = lgb.Dataset(X_test, label=y_test)
167
+
168
+ model = lgb.train(
169
+ params,
170
+ train_set,
171
+ num_boost_round=args.num_boost_round,
172
+ valid_sets=[test_set]
173
+ )
174
+
175
+ y_pred = model.predict(X_test)
176
+ mse = mean_squared_error(y_test, y_pred)
177
+ r2 = r2_score(y_test, y_pred)
178
+ mae = mean_absolute_error(y_test, y_pred)
179
+ pcc, _ = pearsonr(y_test, y_pred)
180
+
181
+ all_mse.append(mse)
182
+ all_r2.append(r2)
183
+ all_mae.append(mae)
184
+ all_pcc.append(pcc)
185
+
186
+ fold_time = time.time() - fold_start_time
187
+ fold_gpu_mem = gpu_monitor.stop()
188
+ fold_cpu_mem = process.memory_info().rss / 1024**2
189
+ print(f'Fold {fold}: Corr={pcc:.4f}, MAE={mae:.4f}, MSE={mse:.4f}, R2={r2:.4f}, Time={fold_time:.2f}s, '
190
+ f'GPU={fold_gpu_mem:.2f}MB, CPU={fold_cpu_mem:.2f}MB')
191
+
192
+ results_df = pd.DataFrame({'Y_test': y_test, 'Y_pred': y_pred})
193
+ results_df.to_csv(os.path.join(result_dir, f"fold{fold}.csv"), index=False)
194
+
195
+ print("\n===== Cross-validation summary =====")
196
+ print(f"Average PCC: {np.mean(all_pcc):.4f} ± {np.std(all_pcc):.4f}")
197
+ print(f"Average MAE: {np.mean(all_mae):.4f} ± {np.std(all_mae):.4f}")
198
+ print(f"Average MSE: {np.mean(all_mse):.4f} ± {np.std(all_mse):.4f}")
199
+ print(f"Average R2 : {np.mean(all_r2):.4f} ± {np.std(all_r2):.4f}")
200
+ print(f"Total Time : {time.time() - time_star:.2f}s")
201
+
202
+
203
+ def LightGBM_reg():
204
+ set_seed(42)
205
+ args = parse_args()
206
+ all_species =['Cotton/']
207
+
208
+ for i in range(len(all_species)):
209
+ args.species = all_species[i]
210
+ X, Y, nsamples, nsnp, names = load_data(args)
211
+ for j in range(len(names)):
212
+ args.phe = names[j]
213
+ print(f"Starting run: {args.methods}{args.species}{args.phe}")
214
+ label = Y[:, j]
215
+ label = np.nan_to_num(label, nan=np.nanmean(label))
216
+
217
+ best_params = LightGBM_Hyperparameters.Hyperparameter(X, label)
218
+ args.learning_rate = best_params['learning_rate']
219
+ args.num_leaves = best_params['num_leaves']
220
+ args.min_data_in_leaf = best_params['min_data_in_leaf']
221
+ args.max_depth = best_params['max_depth']
222
+ args.lambda_l1 = best_params['lambda_l1']
223
+ args.lambda_l2 = best_params['lambda_l2']
224
+ args.min_gain_to_split = best_params['min_gain_to_split']
225
+ args.feature_fraction = best_params['feature_fraction']
226
+ args.bagging_fraction = best_params['bagging_fraction']
227
+ args.bagging_freq = best_params['bagging_freq']
228
+ start_time = time.time()
229
+ run_nested_cv(args, data=X, label=label)
230
+ elapsed_time = time.time() - start_time
231
+
232
+ print(f"running time: {elapsed_time:.2f} s")
233
+ print("✅ Successfully finished.\n")
234
+
235
+
236
+ if __name__ == "__main__":
237
+ LightGBM_reg()
@@ -0,0 +1,77 @@
1
+ import random
2
+ import torch
3
+ import numpy as np
4
+ import lightgbm as lgb
5
+ import optuna
6
+ from sklearn.model_selection import KFold
7
+ from scipy.stats import pearsonr
8
+
9
+
10
+ def set_seed(seed=42):
11
+ random.seed(seed)
12
+ np.random.seed(seed)
13
+ torch.manual_seed(seed)
14
+ torch.cuda.manual_seed_all(seed)
15
+ torch.backends.cudnn.deterministic = True
16
+ torch.backends.cudnn.benchmark = False
17
+
18
+
19
+ def run_nested_cv(data, label, params):
20
+ print("Starting 10-fold cross-validation...")
21
+ kf = KFold(n_splits=10, shuffle=True, random_state=42)
22
+ all_corr = []
23
+
24
+ for train_idx, test_idx in kf.split(data):
25
+ X_train, X_test = data[train_idx], data[test_idx]
26
+ y_train, y_test = label[train_idx], label[test_idx]
27
+
28
+ train_set = lgb.Dataset(X_train, label=y_train)
29
+ test_set = lgb.Dataset(X_test, label=y_test)
30
+
31
+ model = lgb.train(
32
+ params,
33
+ train_set,
34
+ valid_sets=[test_set],
35
+ num_boost_round=100,
36
+ )
37
+
38
+ y_pred = model.predict(X_test)
39
+ corr, _ = pearsonr(y_test, y_pred)
40
+ all_corr.append(corr)
41
+ return np.mean(all_corr)
42
+
43
+
44
+ def Hyperparameter(X, label):
45
+ set_seed(42)
46
+ torch.cuda.empty_cache()
47
+
48
+ def objective(trial):
49
+ params = {
50
+ 'objective': 'regression',
51
+ 'metric': 'rmse',
52
+ 'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.2, log=True),
53
+ 'num_leaves': trial.suggest_int('num_leaves', 15, 255),
54
+ 'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
55
+ 'max_depth': trial.suggest_int('max_depth', 3, 10),
56
+ 'lambda_l1': trial.suggest_float('lambda_l1', 0.0, 5.0),
57
+ 'lambda_l2': trial.suggest_float('lambda_l2', 0.0, 5.0),
58
+ 'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0.0, 5.0),
59
+ 'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 1.0),
60
+ 'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 1.0),
61
+ 'bagging_freq': trial.suggest_int('bagging_freq', 0, 10),
62
+ 'num_boost_round':trial.suggest_int('num_boost_round', 100, 1000),
63
+ 'device_type': 'gpu',
64
+ 'gpu_device_id': 1,
65
+ 'num_threads': 8,
66
+ 'verbose':-1,
67
+ }
68
+
69
+ corr_scores = run_nested_cv(data=X, label=label, params=params)
70
+ return np.mean(corr_scores)
71
+
72
+ study = optuna.create_study(direction="maximize")
73
+ study.optimize(objective, n_trials=20)
74
+
75
+ print("best params:", study.best_params)
76
+ print("successfully")
77
+ return study.best_params
@@ -0,0 +1,5 @@
1
+ from .LightGBM import LightGBM_reg
2
+
3
+ LightGBM = LightGBM_reg
4
+
5
+ __all__ = ["LightGBM","LightGBM_reg"]