pwact 0.1.28__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pwact/active_learning/environment.py +13 -11
- pwact/active_learning/explore/run_model_md.py +110 -0
- pwact/active_learning/explore/select_image.py +10 -5
- pwact/active_learning/init_bulk/direct.py +182 -0
- pwact/active_learning/init_bulk/duplicate_scale.py +1 -1
- pwact/active_learning/init_bulk/explore.py +300 -0
- pwact/active_learning/init_bulk/init_bulk_run.py +87 -47
- pwact/active_learning/init_bulk/relabel.py +149 -116
- pwact/active_learning/label/labeling.py +132 -18
- pwact/active_learning/train/train_model.py +13 -3
- pwact/active_learning/user_input/init_bulk_input.py +55 -6
- pwact/active_learning/user_input/iter_input.py +12 -0
- pwact/active_learning/user_input/resource.py +19 -7
- pwact/active_learning/user_input/scf_param.py +24 -6
- pwact/active_learning/user_input/train_param/nep_param.py +2 -2
- pwact/active_learning/user_input/train_param/optimizer_param.py +1 -1
- pwact/active_learning/user_input/train_param/work_file_param.py +1 -1
- pwact/main.py +18 -9
- pwact/utils/app_lib/do_direct_sample.py +145 -0
- pwact/utils/app_lib/do_eqv2model.py +41 -0
- pwact/utils/app_lib/lammps.py +1 -1
- pwact/utils/constant.py +32 -12
- pwact/utils/file_operation.py +12 -5
- pwact-0.2.1.dist-info/METADATA +17 -0
- {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/RECORD +29 -25
- {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/WHEEL +1 -1
- pwact-0.1.28.dist-info/METADATA +0 -107
- {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/LICENSE +0 -0
- {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/entry_points.txt +0 -0
- {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -16,18 +16,27 @@
|
|
|
16
16
|
|
|
17
17
|
"""
|
|
18
18
|
import os
|
|
19
|
-
|
|
19
|
+
import glob
|
|
20
|
+
import json
|
|
21
|
+
import bisect
|
|
20
22
|
from pwact.active_learning.user_input.resource import Resource
|
|
21
23
|
from pwact.active_learning.user_input.init_bulk_input import InitBulkParam
|
|
22
24
|
from pwact.active_learning.init_bulk.duplicate_scale import get_config_files_with_order
|
|
23
25
|
|
|
24
|
-
from pwact.utils.constant import PWMAT, INIT_BULK, TEMP_STRUCTURE, SLURM_OUT, DFT_STYLE
|
|
26
|
+
from pwact.utils.constant import PWMAT, INIT_BULK, TEMP_STRUCTURE, SLURM_OUT, DFT_STYLE, PWDATA, VASP
|
|
25
27
|
from pwact.active_learning.slurm.slurm import SlurmJob, Mission
|
|
26
28
|
from pwact.utils.slurm_script import get_slurm_job_run_info, split_job_for_group, set_slurm_script_content
|
|
27
29
|
|
|
28
|
-
from pwact.utils.file_operation import write_to_file, link_file,
|
|
30
|
+
from pwact.utils.file_operation import write_to_file, link_file, del_dir, del_file_list_by_patten, get_random_nums
|
|
29
31
|
from pwact.utils.app_lib.common import link_pseudo_by_atom, set_input_script
|
|
30
|
-
from pwact.data_format.configop import save_config, get_atom_type, load_config
|
|
32
|
+
from pwact.data_format.configop import extract_pwdata, save_config, get_atom_type, load_config
|
|
33
|
+
|
|
34
|
+
import pandas as pd
|
|
35
|
+
from pwdata import Config
|
|
36
|
+
|
|
37
|
+
# from pwact.utils.constant import DFT_TYPE, VASP, PWDATA, AL_STRUCTURE, TEMP_STRUCTURE,\
|
|
38
|
+
# LABEL_FILE_STRUCTURE, EXPLORE_FILE_STRUCTURE, LAMMPS, SLURM_OUT, DFT_STYLE, PWMAT, INIT_BULK
|
|
39
|
+
# from pwact.utils.file_operation import write_to_file, copy_file, copy_dir, search_files, mv_file, add_postfix_dir, del_dir, del_file_list_by_patten, link_file
|
|
31
40
|
|
|
32
41
|
class Relabel(object):
|
|
33
42
|
def __init__(self, resource: Resource, input_param:InitBulkParam):
|
|
@@ -42,50 +51,104 @@ class Relabel(object):
|
|
|
42
51
|
|
|
43
52
|
self.scf_dir = os.path.join(self.input_param.root_dir, TEMP_STRUCTURE.tmp_init_bulk_dir, INIT_BULK.scf)
|
|
44
53
|
self.real_scf_dir = os.path.join(self.input_param.root_dir, INIT_BULK.scf)
|
|
45
|
-
|
|
46
|
-
def make_scf_work(self):
|
|
47
|
-
scf_paths = []
|
|
48
|
-
use_dftb = False
|
|
49
|
-
for init_config in self.init_configs:
|
|
50
|
-
if init_config.scf is False:
|
|
51
|
-
continue
|
|
52
|
-
init_config_name = "init_config_{}".format(init_config.config_index)
|
|
53
|
-
#1. read construtures from aimd dir
|
|
54
|
-
|
|
55
|
-
#2. set relabel dir
|
|
56
|
-
# read trajs from ./aimd/init_config_0/relax/0_aimd/
|
|
57
|
-
# make scf dir ./relabel/init_config_0/relax/0_aimd/10-scf/files
|
|
58
|
-
traj_list = search_files(os.path.join(self.aimd_dir, init_config_name), "*/*aimd")
|
|
59
|
-
for traj_dir in traj_list:
|
|
60
|
-
scf_dir = os.path.join(self.scf_dir, init_config_name, \
|
|
61
|
-
os.path.basename(os.path.dirname(traj_dir)),\
|
|
62
|
-
os.path.basename(traj_dir))
|
|
63
54
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
traj_format =DFT_STYLE.get_format_by_postfix(traj_file_name),
|
|
70
|
-
interval = self.input_param.interval,
|
|
71
|
-
target_format=DFT_STYLE.get_pwdata_format(self.input_param.scf_style, is_cp2k_coord=True),
|
|
72
|
-
input_file =init_config.scf_input_file,
|
|
73
|
-
kspacing =init_config.scf_kspacing,
|
|
74
|
-
flag_symm =init_config.scf_flag_symm,
|
|
75
|
-
is_dftb = False,
|
|
76
|
-
in_skf =None)
|
|
55
|
+
self.bigmodel_dir = os.path.join(self.input_param.root_dir, TEMP_STRUCTURE.tmp_init_bulk_dir, INIT_BULK.bigmodel)
|
|
56
|
+
self.real_bigmodel_dir = os.path.join(self.input_param.root_dir, INIT_BULK.bigmodel)
|
|
57
|
+
|
|
58
|
+
self.direct_dir = os.path.join(self.bigmodel_dir, INIT_BULK.direct)
|
|
59
|
+
self.real_direct_dir = os.path.join(self.real_bigmodel_dir, INIT_BULK.direct)
|
|
77
60
|
|
|
78
|
-
scf_paths.extend(scf_lsit)
|
|
79
|
-
# make slurm script and slurm job
|
|
80
|
-
self.make_scf_slurm_job_files(scf_paths, use_dftb)
|
|
81
|
-
|
|
82
61
|
def check_work_done(self):
|
|
83
62
|
slurm_remain, slurm_success = get_slurm_job_run_info(self.scf_dir, \
|
|
84
63
|
job_patten="*-{}".format(INIT_BULK.scf_job), \
|
|
85
64
|
tag_patten="*-{}".format(INIT_BULK.scf_tag))
|
|
86
65
|
slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False # len(slurm_remain) > 0 exist slurm jobs need to do
|
|
87
66
|
return slurm_done
|
|
88
|
-
|
|
67
|
+
|
|
68
|
+
def make_scf_work(self):
|
|
69
|
+
def find_position_binary(prefix_sum, N):
|
|
70
|
+
idx = bisect.bisect_right(prefix_sum, N)
|
|
71
|
+
if idx == 0:
|
|
72
|
+
return 0
|
|
73
|
+
elif idx >= len(prefix_sum):
|
|
74
|
+
return (len(prefix_sum)-1)
|
|
75
|
+
else:
|
|
76
|
+
return idx
|
|
77
|
+
|
|
78
|
+
def compute_prefix_sum(arr):
|
|
79
|
+
prefix_sum = []
|
|
80
|
+
current_sum = 0
|
|
81
|
+
for num in arr:
|
|
82
|
+
current_sum += num
|
|
83
|
+
prefix_sum.append(current_sum)
|
|
84
|
+
return prefix_sum
|
|
85
|
+
|
|
86
|
+
candidate = Config(data_path=os.path.join(self.direct_dir, INIT_BULK.direct_traj), format=PWDATA.extxyz)
|
|
87
|
+
# from idx get config idx
|
|
88
|
+
candidate_idx = json.load(open(os.path.join(self.direct_dir, INIT_BULK.candidate_idx)))
|
|
89
|
+
candidate_idx_sum = compute_prefix_sum([candidate_idx[_]['num'] for _ in candidate_idx.keys()])
|
|
90
|
+
_tmp = Config(data_path=os.path.join(self.direct_dir, INIT_BULK.direct_traj), format=PWDATA.extxyz)
|
|
91
|
+
scf_dir_list = []
|
|
92
|
+
if self.input_param.dft_input.scf_max_num is not None:
|
|
93
|
+
random_list = get_random_nums(0, len(candidate.images), self.input_param.dft_input.scf_max_num, seed=2024)
|
|
94
|
+
else:
|
|
95
|
+
random_list = None
|
|
96
|
+
for index, image in enumerate(candidate.images):
|
|
97
|
+
if random_list is not None and index not in random_list:
|
|
98
|
+
continue
|
|
99
|
+
_idx = find_position_binary(candidate_idx_sum, index)
|
|
100
|
+
config_idx = candidate_idx["{}".format(_idx)]['idx']
|
|
101
|
+
scf_dir = os.path.join(self.scf_dir, "{}".format(index))
|
|
102
|
+
if not os.path.exists(scf_dir):
|
|
103
|
+
os.makedirs(scf_dir)
|
|
104
|
+
|
|
105
|
+
_tmp.images = [image]
|
|
106
|
+
_tmp.to(data_path=scf_dir, data_name=PWMAT.atom_config,
|
|
107
|
+
format=PWDATA.pwmat_config)
|
|
108
|
+
self.make_scf_file(
|
|
109
|
+
scf_dir =scf_dir,
|
|
110
|
+
traj_file =os.path.join(scf_dir, PWMAT.atom_config),
|
|
111
|
+
traj_format =PWDATA.pwmat_config,
|
|
112
|
+
target_format=DFT_STYLE.get_pwdata_format(self.input_param.dft_style, is_cp2k_coord=True),
|
|
113
|
+
input_file =self.init_configs[config_idx].scf_input_file,
|
|
114
|
+
kspacing =self.init_configs[config_idx].scf_kspacing,
|
|
115
|
+
flag_symm =self.init_configs[config_idx].scf_flag_symm,
|
|
116
|
+
is_dftb = False,
|
|
117
|
+
in_skf =None)
|
|
118
|
+
|
|
119
|
+
scf_dir_list.append(scf_dir)
|
|
120
|
+
|
|
121
|
+
self.make_scf_slurm_job_files(scf_dir_list)
|
|
122
|
+
|
|
123
|
+
def make_scf_slurm_job_files(self, scf_dir_list:list[str]):
|
|
124
|
+
del_file_list_by_patten(self.scf_dir, "*{}".format(INIT_BULK.scf_job))
|
|
125
|
+
group_list = split_job_for_group(self.resource.dft_resource.group_size, scf_dir_list, self.resource.dft_resource.parallel_num)
|
|
126
|
+
for group_index, group in enumerate(group_list):
|
|
127
|
+
if group[0] == "NONE":
|
|
128
|
+
continue
|
|
129
|
+
jobname = "scf{}".format(group_index)
|
|
130
|
+
tag_name = "{}-{}".format(group_index, INIT_BULK.scf_tag)
|
|
131
|
+
tag = os.path.join(self.scf_dir, tag_name)
|
|
132
|
+
run_cmd = self.resource.dft_resource.command
|
|
133
|
+
group_slurm_script = set_slurm_script_content(gpu_per_node=self.resource.dft_resource.gpu_per_node,
|
|
134
|
+
number_node = self.resource.dft_resource.number_node,
|
|
135
|
+
cpu_per_node = self.resource.dft_resource.cpu_per_node,
|
|
136
|
+
queue_name = self.resource.dft_resource.queue_name,
|
|
137
|
+
custom_flags = self.resource.dft_resource.custom_flags,
|
|
138
|
+
env_script = self.resource.dft_resource.env_script,
|
|
139
|
+
job_name = jobname,
|
|
140
|
+
run_cmd_template = run_cmd,
|
|
141
|
+
group = group,
|
|
142
|
+
job_tag = tag,
|
|
143
|
+
task_tag = INIT_BULK.scf_tag,
|
|
144
|
+
task_tag_faild = INIT_BULK.scf_tag_failed,
|
|
145
|
+
parallel_num=self.resource.dft_resource.parallel_num,
|
|
146
|
+
check_type=None
|
|
147
|
+
)
|
|
148
|
+
slurm_script_name = "{}-{}".format(group_index, INIT_BULK.scf_job)
|
|
149
|
+
slurm_job_file = os.path.join(self.scf_dir, slurm_script_name)
|
|
150
|
+
write_to_file(slurm_job_file, group_slurm_script, "w")
|
|
151
|
+
|
|
89
152
|
def do_scf_jobs(self):
|
|
90
153
|
mission = Mission()
|
|
91
154
|
slurm_remain, slurm_success = get_slurm_job_run_info(self.scf_dir, \
|
|
@@ -111,89 +174,59 @@ class Relabel(object):
|
|
|
111
174
|
mission.all_job_finished(error_type=SLURM_OUT.dft_out)
|
|
112
175
|
# mission.move_slurm_log_to_slurm_work_dir()
|
|
113
176
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
177
|
+
def make_scf_file(self,
|
|
178
|
+
scf_dir,
|
|
179
|
+
traj_file ,
|
|
180
|
+
traj_format , # the input is pwmat/config
|
|
181
|
+
target_format,
|
|
182
|
+
input_file ,
|
|
183
|
+
kspacing =None,
|
|
184
|
+
flag_symm =None,
|
|
185
|
+
is_dftb =None,
|
|
186
|
+
in_skf =None,
|
|
187
|
+
atom_names:list[str]=None):
|
|
188
|
+
if DFT_STYLE.pwmat == self.resource.dft_style:
|
|
189
|
+
target_config = traj_file
|
|
190
|
+
pass
|
|
191
|
+
else:
|
|
192
|
+
if DFT_STYLE.vasp == self.resource.dft_style: # when do scf, the vasp input file name is 'POSCAR'
|
|
193
|
+
save_name = VASP.poscar
|
|
194
|
+
else:
|
|
195
|
+
save_name="{}".format(DFT_STYLE.get_normal_config(self.resource.dft_style))# for cp2k this param will be set as coord.xzy
|
|
196
|
+
target_config = save_config(config=traj_file,
|
|
197
|
+
input_format=traj_format,
|
|
132
198
|
wrap = False,
|
|
133
199
|
direct = True,
|
|
134
200
|
sort = True,
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
201
|
+
save_name = save_name,
|
|
202
|
+
save_format=DFT_STYLE.get_pwdata_format(dft_style=self.resource.dft_style, is_cp2k_coord=True),
|
|
203
|
+
save_path=scf_dir,
|
|
204
|
+
atom_names=atom_names)
|
|
138
205
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
pseudo_list
|
|
144
|
-
target_dir
|
|
145
|
-
atom_order
|
|
146
|
-
dft_style
|
|
206
|
+
#2.
|
|
207
|
+
atomic_name_list, atomic_number_list = get_atom_type(traj_file, traj_format)
|
|
208
|
+
#1. set pseudo files
|
|
209
|
+
pseudo_names = link_pseudo_by_atom(
|
|
210
|
+
pseudo_list = self.input_param.dft_input.pseudo,
|
|
211
|
+
target_dir = scf_dir,
|
|
212
|
+
atom_order = atomic_name_list,
|
|
213
|
+
dft_style = self.resource.dft_style,
|
|
147
214
|
basis_set_file =self.input_param.dft_input.basis_set_file,
|
|
148
215
|
potential_file =self.input_param.dft_input.potential_file
|
|
149
216
|
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
pseudo_names=pseudo_names,
|
|
164
|
-
gaussian_base_param=self.input_param.dft_input.gaussian_base_param,# these for cp2k
|
|
165
|
-
)
|
|
166
|
-
scf_lsit.append(save_dir)
|
|
167
|
-
return scf_lsit
|
|
168
|
-
|
|
169
|
-
def make_scf_slurm_job_files(self, scf_dir_list:list[str],use_dftb: bool=False):
|
|
170
|
-
del_file_list_by_patten(self.scf_dir, "*{}".format(INIT_BULK.scf_job))
|
|
171
|
-
group_list = split_job_for_group(self.resource.scf_resource.group_size, scf_dir_list, self.resource.scf_resource.parallel_num)
|
|
172
|
-
for group_index, group in enumerate(group_list):
|
|
173
|
-
if group[0] == "NONE":
|
|
174
|
-
continue
|
|
175
|
-
jobname = "scf{}".format(group_index)
|
|
176
|
-
tag_name = "{}-{}".format(group_index, INIT_BULK.scf_tag)
|
|
177
|
-
tag = os.path.join(self.scf_dir, tag_name)
|
|
178
|
-
run_cmd = self.resource.scf_resource.command
|
|
179
|
-
group_slurm_script = set_slurm_script_content(gpu_per_node=self.resource.scf_resource.gpu_per_node,
|
|
180
|
-
number_node = self.resource.scf_resource.number_node,
|
|
181
|
-
cpu_per_node = self.resource.scf_resource.cpu_per_node,
|
|
182
|
-
queue_name = self.resource.scf_resource.queue_name,
|
|
183
|
-
custom_flags = self.resource.scf_resource.custom_flags,
|
|
184
|
-
env_script = self.resource.scf_resource.env_script,
|
|
185
|
-
job_name = jobname,
|
|
186
|
-
run_cmd_template = run_cmd,
|
|
187
|
-
group = group,
|
|
188
|
-
job_tag = tag,
|
|
189
|
-
task_tag = INIT_BULK.scf_tag,
|
|
190
|
-
task_tag_faild = INIT_BULK.scf_tag_failed,
|
|
191
|
-
parallel_num=self.resource.scf_resource.parallel_num,
|
|
192
|
-
check_type=self.resource.scf_style
|
|
193
|
-
)
|
|
194
|
-
slurm_script_name = "{}-{}".format(group_index, INIT_BULK.scf_job)
|
|
195
|
-
slurm_job_file = os.path.join(self.scf_dir, slurm_script_name)
|
|
196
|
-
write_to_file(slurm_job_file, group_slurm_script, "w")
|
|
217
|
+
|
|
218
|
+
#2. make etot.input file
|
|
219
|
+
set_input_script(
|
|
220
|
+
input_file=input_file,
|
|
221
|
+
config=target_config,
|
|
222
|
+
dft_style=self.resource.dft_style,
|
|
223
|
+
kspacing=kspacing,
|
|
224
|
+
flag_symm=flag_symm,
|
|
225
|
+
save_dir = scf_dir,
|
|
226
|
+
pseudo_names=pseudo_names,
|
|
227
|
+
gaussian_base_param=self.input_param.dft_input.gaussian_base_param,# these for cp2k
|
|
228
|
+
is_scf = True
|
|
229
|
+
)
|
|
197
230
|
|
|
198
231
|
def do_post_process(self):
|
|
199
232
|
if os.path.exists(self.scf_dir):
|
|
@@ -35,6 +35,7 @@ from pwact.utils.file_operation import write_to_file, copy_file, copy_dir, searc
|
|
|
35
35
|
from pwact.utils.app_lib.common import link_pseudo_by_atom, set_input_script
|
|
36
36
|
|
|
37
37
|
from pwact.data_format.configop import extract_pwdata, save_config, get_atom_type
|
|
38
|
+
from pwdata import Config
|
|
38
39
|
class Labeling(object):
|
|
39
40
|
@staticmethod
|
|
40
41
|
def kill_job(root_dir:str, itername:str):
|
|
@@ -59,9 +60,10 @@ class Labeling(object):
|
|
|
59
60
|
self.real_explore_dir = os.path.join(self.input_param.root_dir, itername, AL_STRUCTURE.explore)
|
|
60
61
|
self.md_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.md)
|
|
61
62
|
self.select_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.select)
|
|
63
|
+
self.direct_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.direct)
|
|
62
64
|
self.real_md_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.md)
|
|
63
65
|
self.real_select_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.select)
|
|
64
|
-
|
|
66
|
+
self.real_direct_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.direct)
|
|
65
67
|
# labed work dir
|
|
66
68
|
self.label_dir = os.path.join(self.input_param.root_dir, itername, TEMP_STRUCTURE.tmp_run_iter_dir, AL_STRUCTURE.labeling)
|
|
67
69
|
self.scf_dir = os.path.join(self.label_dir, LABEL_FILE_STRUCTURE.scf)
|
|
@@ -71,6 +73,9 @@ class Labeling(object):
|
|
|
71
73
|
self.real_scf_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.scf)
|
|
72
74
|
self.real_result_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.result)
|
|
73
75
|
|
|
76
|
+
self.bigmodel_dir = os.path.join(self.label_dir, LABEL_FILE_STRUCTURE.bigmodel)
|
|
77
|
+
self.real_bigmodel_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.bigmodel)
|
|
78
|
+
|
|
74
79
|
'''
|
|
75
80
|
description:
|
|
76
81
|
the scf work dir file structure is as follow.
|
|
@@ -86,9 +91,8 @@ class Labeling(object):
|
|
|
86
91
|
return {*}
|
|
87
92
|
author: wuxingxing
|
|
88
93
|
'''
|
|
94
|
+
|
|
89
95
|
def make_scf_work(self):
|
|
90
|
-
# read select info, and make scf
|
|
91
|
-
# ["devi_force", "file_path", "config_index"]
|
|
92
96
|
candidate = pd.read_csv(os.path.join(self.select_dir, EXPLORE_FILE_STRUCTURE.candidate))
|
|
93
97
|
# make scf work dir
|
|
94
98
|
scf_dir_list = []
|
|
@@ -108,14 +112,51 @@ class Labeling(object):
|
|
|
108
112
|
atom_names = line.split()
|
|
109
113
|
self.make_scf_file(scf_sub_md_sys_path, tarj_lmp, atom_names)
|
|
110
114
|
scf_dir_list.append(scf_sub_md_sys_path)
|
|
111
|
-
|
|
115
|
+
|
|
112
116
|
self.make_scf_slurm_job_files(scf_dir_list)
|
|
113
117
|
|
|
118
|
+
def make_bigmodel_work(self):
|
|
119
|
+
# copy from realdir/direct/select.xyz
|
|
120
|
+
if self.input_param.strategy.direct:
|
|
121
|
+
copy_file(os.path.join(self.real_direct_dir, EXPLORE_FILE_STRUCTURE.select_xyz),
|
|
122
|
+
os.path.join(self.bigmodel_dir, EXPLORE_FILE_STRUCTURE.select_xyz))
|
|
123
|
+
else:
|
|
124
|
+
# copy trajs to bigmodel_dir and cvt to xyz
|
|
125
|
+
candidate = pd.read_csv(os.path.join(self.select_dir, EXPLORE_FILE_STRUCTURE.candidate))
|
|
126
|
+
# make scf work dir
|
|
127
|
+
image_list = None
|
|
128
|
+
for index, row in candidate.iterrows():
|
|
129
|
+
config_index = int(row["config_index"])
|
|
130
|
+
sub_md_sys_path = row["file_path"]
|
|
131
|
+
atom_names = None
|
|
132
|
+
with open(os.path.join(sub_md_sys_path, LAMMPS.atom_type_file), 'r') as rf:
|
|
133
|
+
line = rf.readline()
|
|
134
|
+
atom_names = line.split()
|
|
135
|
+
if image_list is None:
|
|
136
|
+
image_list = Config(data_path=os.path.join(sub_md_sys_path, EXPLORE_FILE_STRUCTURE.traj, "{}{}".format(config_index, LAMMPS.traj_postfix)),
|
|
137
|
+
format=PWDATA.lammps_dump, atom_names=atom_names)
|
|
138
|
+
else:
|
|
139
|
+
image_list.append(Config(data_path=os.path.join(sub_md_sys_path, EXPLORE_FILE_STRUCTURE.traj, "{}{}".format(config_index, LAMMPS.traj_postfix)),
|
|
140
|
+
format=PWDATA.lammps_dump, atom_names=atom_names))
|
|
141
|
+
# cvt_lammps.dump to extxyz
|
|
142
|
+
image_list.to(data_path=self.bigmodel_dir, format=PWDATA.extxyz, data_name="{}".format(EXPLORE_FILE_STRUCTURE.select_xyz))
|
|
143
|
+
# copy bigmodelscript
|
|
144
|
+
copy_file(self.input_param.scf.bigmodel_script, os.path.join(self.bigmodel_dir, os.path.basename(self.input_param.scf.bigmodel_script)))
|
|
145
|
+
# make slrum file
|
|
146
|
+
self.make_bigmodel_slurm_job_files([self.bigmodel_dir])
|
|
147
|
+
|
|
114
148
|
def back_label(self):
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
149
|
+
if self.input_param.scf.dft_style == DFT_STYLE.bigmodel:
|
|
150
|
+
slurm_remain, slurm_success = get_slurm_job_run_info(self.real_bigmodel_dir, \
|
|
151
|
+
job_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_job), \
|
|
152
|
+
tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_tag))
|
|
153
|
+
slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
|
|
154
|
+
else:
|
|
155
|
+
slurm_remain, slurm_success = get_slurm_job_run_info(self.real_scf_dir, \
|
|
156
|
+
job_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_job), \
|
|
157
|
+
tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_tag))
|
|
158
|
+
slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
|
|
159
|
+
|
|
119
160
|
if slurm_done:
|
|
120
161
|
# bk and do new job
|
|
121
162
|
target_bk_file = add_postfix_dir(self.real_label_dir, postfix_str="bk")
|
|
@@ -147,7 +188,31 @@ class Labeling(object):
|
|
|
147
188
|
mission.commit_jobs()
|
|
148
189
|
mission.check_running_job()
|
|
149
190
|
mission.all_job_finished(error_type=SLURM_OUT.dft_out)
|
|
150
|
-
|
|
191
|
+
|
|
192
|
+
def do_bigmodel_jobs(self):
|
|
193
|
+
mission = Mission()
|
|
194
|
+
slurm_remain, slurm_success = get_slurm_job_run_info(self.bigmodel_dir, \
|
|
195
|
+
job_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_job), \
|
|
196
|
+
tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_tag))
|
|
197
|
+
slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
|
|
198
|
+
if slurm_done is False:
|
|
199
|
+
#recover slurm jobs
|
|
200
|
+
if len(slurm_remain) > 0:
|
|
201
|
+
print("Run bigModel Job:\n")
|
|
202
|
+
print(slurm_remain)
|
|
203
|
+
for i, script_path in enumerate(slurm_remain):
|
|
204
|
+
slurm_job = SlurmJob()
|
|
205
|
+
tag_name = "{}-{}".format(os.path.basename(script_path).split('-')[0].strip(), LABEL_FILE_STRUCTURE.bigmodel_tag)
|
|
206
|
+
tag = os.path.join(os.path.dirname(script_path),tag_name)
|
|
207
|
+
slurm_job.set_tag(tag)
|
|
208
|
+
slurm_job.set_cmd(script_path)
|
|
209
|
+
mission.add_job(slurm_job)
|
|
210
|
+
|
|
211
|
+
if len(mission.job_list) > 0:
|
|
212
|
+
mission.commit_jobs()
|
|
213
|
+
mission.check_running_job()
|
|
214
|
+
mission.all_job_finished()
|
|
215
|
+
|
|
151
216
|
def make_scf_file(self, scf_dir:str, tarj_lmp:str, atom_names:list[str]=None):
|
|
152
217
|
config_index = os.path.basename(tarj_lmp).split('.')[0]
|
|
153
218
|
if DFT_STYLE.vasp == self.resource.dft_style: # when do scf, the vasp input file name is 'POSCAR'
|
|
@@ -230,6 +295,42 @@ class Labeling(object):
|
|
|
230
295
|
slurm_job_file = os.path.join(self.scf_dir, slurm_script_name)
|
|
231
296
|
write_to_file(slurm_job_file, group_slurm_script, "w")
|
|
232
297
|
|
|
298
|
+
|
|
299
|
+
def make_bigmodel_slurm_job_files(self, scf_sub_list:list[str]):
|
|
300
|
+
del_file_list_by_patten(self.bigmodel_dir, "*{}".format(LABEL_FILE_STRUCTURE.scf_job))
|
|
301
|
+
group_list = split_job_for_group(1, scf_sub_list, 1)
|
|
302
|
+
|
|
303
|
+
for group_index, group in enumerate(group_list):
|
|
304
|
+
if group[0] == "NONE":
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
jobname = "bigmodel{}".format(group_index)
|
|
308
|
+
tag_name = "{}-{}".format(group_index, LABEL_FILE_STRUCTURE.bigmodel_tag)
|
|
309
|
+
tag = os.path.join(self.bigmodel_dir, tag_name)
|
|
310
|
+
run_cmd = self.resource.dft_resource.command
|
|
311
|
+
# if self.resource.dft_resource.gpu_per_node > 0:
|
|
312
|
+
# run_cmd = "mpirun -np {} PWmat > {}".format(self.resource.dft_resource.gpu_per_node, SLURM_OUT.md_out)
|
|
313
|
+
# else:
|
|
314
|
+
# raise Exception("ERROR! the cpu version of pwmat not support yet!")
|
|
315
|
+
group_slurm_script = set_slurm_script_content(gpu_per_node=self.resource.dft_resource.gpu_per_node,
|
|
316
|
+
number_node = self.resource.dft_resource.number_node,
|
|
317
|
+
cpu_per_node = self.resource.dft_resource.cpu_per_node,
|
|
318
|
+
queue_name = self.resource.dft_resource.queue_name,
|
|
319
|
+
custom_flags = self.resource.dft_resource.custom_flags,
|
|
320
|
+
env_script = self.resource.dft_resource.env_script,
|
|
321
|
+
job_name = jobname,
|
|
322
|
+
run_cmd_template = run_cmd,
|
|
323
|
+
group = group,
|
|
324
|
+
job_tag = tag,
|
|
325
|
+
task_tag = LABEL_FILE_STRUCTURE.bigmodel_tag,
|
|
326
|
+
task_tag_faild = LABEL_FILE_STRUCTURE.bigmodel_tag_failed,
|
|
327
|
+
parallel_num=self.resource.dft_resource.parallel_num,
|
|
328
|
+
check_type=self.resource.dft_style
|
|
329
|
+
)
|
|
330
|
+
slurm_script_name = "{}-{}".format(group_index, LABEL_FILE_STRUCTURE.bigmodel_job)
|
|
331
|
+
slurm_job_file = os.path.join(self.bigmodel_dir, slurm_script_name)
|
|
332
|
+
write_to_file(slurm_job_file, group_slurm_script, "w")
|
|
333
|
+
|
|
233
334
|
'''
|
|
234
335
|
description:
|
|
235
336
|
collecte OUT.MLMD to mvm-
|
|
@@ -274,18 +375,31 @@ class Labeling(object):
|
|
|
274
375
|
for scf_file in scf_files:
|
|
275
376
|
scf_file_path = os.path.join(scf_dir, scf_file)
|
|
276
377
|
if scf_file.lower() in DFT_STYLE.get_scf_reserve_list(self.resource.dft_style) \
|
|
277
|
-
|
|
378
|
+
or "atom.config" in scf_file.lower() :# for the input natom.config
|
|
278
379
|
copy_file(scf_file_path, scf_file_path.replace(TEMP_STRUCTURE.tmp_run_iter_dir, ""))
|
|
279
380
|
|
|
280
381
|
# scf files to pwdata format
|
|
281
382
|
scf_configs = self.collect_scf_configs()
|
|
383
|
+
if len(scf_configs) > 0:
|
|
384
|
+
extract_pwdata(input_data_list=scf_configs,
|
|
385
|
+
intput_data_format =DFT_STYLE.get_format_by_postfix(os.path.basename(scf_configs[0])),
|
|
386
|
+
save_data_path =self.result_dir,
|
|
387
|
+
save_data_name = INIT_BULK.get_save_format(self.input_param.data_format),
|
|
388
|
+
save_data_format = self.input_param.data_format,
|
|
389
|
+
data_shuffle =self.input_param.train.data_shuffle
|
|
390
|
+
)
|
|
391
|
+
# copy to main dir
|
|
392
|
+
copy_dir(self.result_dir, self.real_result_dir)
|
|
282
393
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
# copy to
|
|
394
|
+
def do_post_bigmodel(self):
|
|
395
|
+
# copy the bigmodel labeled.xyz to result
|
|
396
|
+
if self.input_param.data_format == PWDATA.extxyz:
|
|
397
|
+
copy_file(os.path.join(self.bigmodel_dir, LABEL_FILE_STRUCTURE.train_xyz), os.path.join(self.result_dir, LABEL_FILE_STRUCTURE.train_xyz))
|
|
398
|
+
else:
|
|
399
|
+
images = Config(data_path=os.path.join(self.bigmodel_dir, LABEL_FILE_STRUCTURE.train_xyz), format=PWDATA.extxyz)
|
|
400
|
+
images.to(data_path=self.result_dir, format=PWDATA.pwmlff_npy)
|
|
401
|
+
# copy bigmodel dir to real dir
|
|
402
|
+
copy_dir(self.bigmodel_dir, self.real_bigmodel_dir)
|
|
291
403
|
copy_dir(self.result_dir, self.real_result_dir)
|
|
404
|
+
# del slurm logs and tags
|
|
405
|
+
del_file_list_by_patten(self.real_bigmodel_dir, "slurm-*")
|
|
@@ -33,6 +33,7 @@ class ModelTrian(object):
|
|
|
33
33
|
self.itername = itername
|
|
34
34
|
self.resource = resource
|
|
35
35
|
self.input_param = input_param
|
|
36
|
+
self.train_flag = True
|
|
36
37
|
self.iter = get_iter_from_iter_name(self.itername)
|
|
37
38
|
# train work dir
|
|
38
39
|
self.train_dir = os.path.join(self.input_param.root_dir, self.itername, TEMP_STRUCTURE.tmp_run_iter_dir, AL_STRUCTURE.train)
|
|
@@ -68,11 +69,16 @@ class ModelTrian(object):
|
|
|
68
69
|
if not os.path.exists(model_i_dir):
|
|
69
70
|
os.makedirs(model_i_dir)
|
|
70
71
|
# make train.json file
|
|
71
|
-
train_dict = self.set_train_input_dict(work_dir=model_i_dir, model_index = model_index)
|
|
72
|
+
train_dict, train_tag = self.set_train_input_dict(work_dir=model_i_dir, model_index = model_index)
|
|
72
73
|
train_json_file_path = os.path.join(model_i_dir, TRAIN_FILE_STRUCTUR.train_json)
|
|
73
74
|
save_json_file(train_dict, train_json_file_path)
|
|
74
75
|
train_list.append(model_i_dir)
|
|
75
|
-
|
|
76
|
+
if train_tag:
|
|
77
|
+
self.make_train_slurm_job_files(train_list)
|
|
78
|
+
else:
|
|
79
|
+
pre_iter_name = make_iter_name(self.iter - 1)
|
|
80
|
+
pre_iter_dir = os.path.join(self.input_param.root_dir, pre_iter_name, AL_STRUCTURE.train)
|
|
81
|
+
copy_dir(pre_iter_dir, self.train_dir)
|
|
76
82
|
|
|
77
83
|
def make_train_slurm_job_files(self, train_list:list[str]):
|
|
78
84
|
# make train slurm script
|
|
@@ -158,6 +164,7 @@ class ModelTrian(object):
|
|
|
158
164
|
# search train_feature_path in iter*/label/result/*/PWdata/*
|
|
159
165
|
iter_index = get_iter_from_iter_name(self.itername)
|
|
160
166
|
start_iter = 0
|
|
167
|
+
train_tag = True
|
|
161
168
|
while start_iter < iter_index:
|
|
162
169
|
if self.input_param.data_format == PWDATA.extxyz: # result/train.xyz
|
|
163
170
|
iter_data_list = search_files(self.input_param.root_dir,
|
|
@@ -172,6 +179,9 @@ class ModelTrian(object):
|
|
|
172
179
|
train_feature_path.extend(iter_data_list)
|
|
173
180
|
start_iter += 1
|
|
174
181
|
|
|
182
|
+
if start_iter > 0 and len(iter_data_list) == 0:
|
|
183
|
+
train_tag = False
|
|
184
|
+
|
|
175
185
|
# reset seed
|
|
176
186
|
train_json[TRAIN_INPUT_PARAM.seed] = get_seed_by_time()
|
|
177
187
|
train_json[TRAIN_INPUT_PARAM.raw_files] = []
|
|
@@ -181,7 +191,7 @@ class ModelTrian(object):
|
|
|
181
191
|
train_json[TRAIN_INPUT_PARAM.format] = self.input_param.data_format
|
|
182
192
|
if self.input_param.strategy.uncertainty == UNCERTAINTY.kpu:
|
|
183
193
|
train_json[TRAIN_INPUT_PARAM.save_p_matrix] = True
|
|
184
|
-
return train_json
|
|
194
|
+
return train_json, train_tag
|
|
185
195
|
|
|
186
196
|
def do_train_job(self):
|
|
187
197
|
mission = Mission()
|