pwact 0.1.28__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. pwact/active_learning/environment.py +13 -11
  2. pwact/active_learning/explore/run_model_md.py +110 -0
  3. pwact/active_learning/explore/select_image.py +10 -5
  4. pwact/active_learning/init_bulk/direct.py +182 -0
  5. pwact/active_learning/init_bulk/duplicate_scale.py +1 -1
  6. pwact/active_learning/init_bulk/explore.py +300 -0
  7. pwact/active_learning/init_bulk/init_bulk_run.py +87 -47
  8. pwact/active_learning/init_bulk/relabel.py +149 -116
  9. pwact/active_learning/label/labeling.py +132 -18
  10. pwact/active_learning/train/train_model.py +13 -3
  11. pwact/active_learning/user_input/init_bulk_input.py +55 -6
  12. pwact/active_learning/user_input/iter_input.py +12 -0
  13. pwact/active_learning/user_input/resource.py +19 -7
  14. pwact/active_learning/user_input/scf_param.py +24 -6
  15. pwact/active_learning/user_input/train_param/nep_param.py +2 -2
  16. pwact/active_learning/user_input/train_param/optimizer_param.py +1 -1
  17. pwact/active_learning/user_input/train_param/work_file_param.py +1 -1
  18. pwact/main.py +18 -9
  19. pwact/utils/app_lib/do_direct_sample.py +145 -0
  20. pwact/utils/app_lib/do_eqv2model.py +41 -0
  21. pwact/utils/app_lib/lammps.py +1 -1
  22. pwact/utils/constant.py +32 -12
  23. pwact/utils/file_operation.py +12 -5
  24. pwact-0.2.1.dist-info/METADATA +17 -0
  25. {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/RECORD +29 -25
  26. {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/WHEEL +1 -1
  27. pwact-0.1.28.dist-info/METADATA +0 -107
  28. {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/LICENSE +0 -0
  29. {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/entry_points.txt +0 -0
  30. {pwact-0.1.28.dist-info → pwact-0.2.1.dist-info}/top_level.txt +0 -0
@@ -16,18 +16,27 @@
16
16
 
17
17
  """
18
18
  import os
19
-
19
+ import glob
20
+ import json
21
+ import bisect
20
22
  from pwact.active_learning.user_input.resource import Resource
21
23
  from pwact.active_learning.user_input.init_bulk_input import InitBulkParam
22
24
  from pwact.active_learning.init_bulk.duplicate_scale import get_config_files_with_order
23
25
 
24
- from pwact.utils.constant import PWMAT, INIT_BULK, TEMP_STRUCTURE, SLURM_OUT, DFT_STYLE
26
+ from pwact.utils.constant import PWMAT, INIT_BULK, TEMP_STRUCTURE, SLURM_OUT, DFT_STYLE, PWDATA, VASP
25
27
  from pwact.active_learning.slurm.slurm import SlurmJob, Mission
26
28
  from pwact.utils.slurm_script import get_slurm_job_run_info, split_job_for_group, set_slurm_script_content
27
29
 
28
- from pwact.utils.file_operation import write_to_file, link_file, search_files, del_file_list_by_patten
30
+ from pwact.utils.file_operation import write_to_file, link_file, del_dir, del_file_list_by_patten, get_random_nums
29
31
  from pwact.utils.app_lib.common import link_pseudo_by_atom, set_input_script
30
- from pwact.data_format.configop import save_config, get_atom_type, load_config
32
+ from pwact.data_format.configop import extract_pwdata, save_config, get_atom_type, load_config
33
+
34
+ import pandas as pd
35
+ from pwdata import Config
36
+
37
+ # from pwact.utils.constant import DFT_TYPE, VASP, PWDATA, AL_STRUCTURE, TEMP_STRUCTURE,\
38
+ # LABEL_FILE_STRUCTURE, EXPLORE_FILE_STRUCTURE, LAMMPS, SLURM_OUT, DFT_STYLE, PWMAT, INIT_BULK
39
+ # from pwact.utils.file_operation import write_to_file, copy_file, copy_dir, search_files, mv_file, add_postfix_dir, del_dir, del_file_list_by_patten, link_file
31
40
 
32
41
  class Relabel(object):
33
42
  def __init__(self, resource: Resource, input_param:InitBulkParam):
@@ -42,50 +51,104 @@ class Relabel(object):
42
51
 
43
52
  self.scf_dir = os.path.join(self.input_param.root_dir, TEMP_STRUCTURE.tmp_init_bulk_dir, INIT_BULK.scf)
44
53
  self.real_scf_dir = os.path.join(self.input_param.root_dir, INIT_BULK.scf)
45
-
46
- def make_scf_work(self):
47
- scf_paths = []
48
- use_dftb = False
49
- for init_config in self.init_configs:
50
- if init_config.scf is False:
51
- continue
52
- init_config_name = "init_config_{}".format(init_config.config_index)
53
- #1. read construtures from aimd dir
54
-
55
- #2. set relabel dir
56
- # read trajs from ./aimd/init_config_0/relax/0_aimd/
57
- # make scf dir ./relabel/init_config_0/relax/0_aimd/10-scf/files
58
- traj_list = search_files(os.path.join(self.aimd_dir, init_config_name), "*/*aimd")
59
- for traj_dir in traj_list:
60
- scf_dir = os.path.join(self.scf_dir, init_config_name, \
61
- os.path.basename(os.path.dirname(traj_dir)),\
62
- os.path.basename(traj_dir))
63
54
 
64
- traj_file_name = DFT_STYLE.get_aimd_config(self.resource.dft_style)
65
-
66
- scf_lsit = self.make_scf_file(
67
- scf_dir =scf_dir,
68
- traj_file =os.path.join(traj_dir, traj_file_name),
69
- traj_format =DFT_STYLE.get_format_by_postfix(traj_file_name),
70
- interval = self.input_param.interval,
71
- target_format=DFT_STYLE.get_pwdata_format(self.input_param.scf_style, is_cp2k_coord=True),
72
- input_file =init_config.scf_input_file,
73
- kspacing =init_config.scf_kspacing,
74
- flag_symm =init_config.scf_flag_symm,
75
- is_dftb = False,
76
- in_skf =None)
55
+ self.bigmodel_dir = os.path.join(self.input_param.root_dir, TEMP_STRUCTURE.tmp_init_bulk_dir, INIT_BULK.bigmodel)
56
+ self.real_bigmodel_dir = os.path.join(self.input_param.root_dir, INIT_BULK.bigmodel)
57
+
58
+ self.direct_dir = os.path.join(self.bigmodel_dir, INIT_BULK.direct)
59
+ self.real_direct_dir = os.path.join(self.real_bigmodel_dir, INIT_BULK.direct)
77
60
 
78
- scf_paths.extend(scf_lsit)
79
- # make slurm script and slurm job
80
- self.make_scf_slurm_job_files(scf_paths, use_dftb)
81
-
82
61
  def check_work_done(self):
83
62
  slurm_remain, slurm_success = get_slurm_job_run_info(self.scf_dir, \
84
63
  job_patten="*-{}".format(INIT_BULK.scf_job), \
85
64
  tag_patten="*-{}".format(INIT_BULK.scf_tag))
86
65
  slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False # len(slurm_remain) > 0 exist slurm jobs need to do
87
66
  return slurm_done
88
-
67
+
68
+ def make_scf_work(self):
69
+ def find_position_binary(prefix_sum, N):
70
+ idx = bisect.bisect_right(prefix_sum, N)
71
+ if idx == 0:
72
+ return 0
73
+ elif idx >= len(prefix_sum):
74
+ return (len(prefix_sum)-1)
75
+ else:
76
+ return idx
77
+
78
+ def compute_prefix_sum(arr):
79
+ prefix_sum = []
80
+ current_sum = 0
81
+ for num in arr:
82
+ current_sum += num
83
+ prefix_sum.append(current_sum)
84
+ return prefix_sum
85
+
86
+ candidate = Config(data_path=os.path.join(self.direct_dir, INIT_BULK.direct_traj), format=PWDATA.extxyz)
87
+ # from idx get config idx
88
+ candidate_idx = json.load(open(os.path.join(self.direct_dir, INIT_BULK.candidate_idx)))
89
+ candidate_idx_sum = compute_prefix_sum([candidate_idx[_]['num'] for _ in candidate_idx.keys()])
90
+ _tmp = Config(data_path=os.path.join(self.direct_dir, INIT_BULK.direct_traj), format=PWDATA.extxyz)
91
+ scf_dir_list = []
92
+ if self.input_param.dft_input.scf_max_num is not None:
93
+ random_list = get_random_nums(0, len(candidate.images), self.input_param.dft_input.scf_max_num, seed=2024)
94
+ else:
95
+ random_list = None
96
+ for index, image in enumerate(candidate.images):
97
+ if random_list is not None and index not in random_list:
98
+ continue
99
+ _idx = find_position_binary(candidate_idx_sum, index)
100
+ config_idx = candidate_idx["{}".format(_idx)]['idx']
101
+ scf_dir = os.path.join(self.scf_dir, "{}".format(index))
102
+ if not os.path.exists(scf_dir):
103
+ os.makedirs(scf_dir)
104
+
105
+ _tmp.images = [image]
106
+ _tmp.to(data_path=scf_dir, data_name=PWMAT.atom_config,
107
+ format=PWDATA.pwmat_config)
108
+ self.make_scf_file(
109
+ scf_dir =scf_dir,
110
+ traj_file =os.path.join(scf_dir, PWMAT.atom_config),
111
+ traj_format =PWDATA.pwmat_config,
112
+ target_format=DFT_STYLE.get_pwdata_format(self.input_param.dft_style, is_cp2k_coord=True),
113
+ input_file =self.init_configs[config_idx].scf_input_file,
114
+ kspacing =self.init_configs[config_idx].scf_kspacing,
115
+ flag_symm =self.init_configs[config_idx].scf_flag_symm,
116
+ is_dftb = False,
117
+ in_skf =None)
118
+
119
+ scf_dir_list.append(scf_dir)
120
+
121
+ self.make_scf_slurm_job_files(scf_dir_list)
122
+
123
+ def make_scf_slurm_job_files(self, scf_dir_list:list[str]):
124
+ del_file_list_by_patten(self.scf_dir, "*{}".format(INIT_BULK.scf_job))
125
+ group_list = split_job_for_group(self.resource.dft_resource.group_size, scf_dir_list, self.resource.dft_resource.parallel_num)
126
+ for group_index, group in enumerate(group_list):
127
+ if group[0] == "NONE":
128
+ continue
129
+ jobname = "scf{}".format(group_index)
130
+ tag_name = "{}-{}".format(group_index, INIT_BULK.scf_tag)
131
+ tag = os.path.join(self.scf_dir, tag_name)
132
+ run_cmd = self.resource.dft_resource.command
133
+ group_slurm_script = set_slurm_script_content(gpu_per_node=self.resource.dft_resource.gpu_per_node,
134
+ number_node = self.resource.dft_resource.number_node,
135
+ cpu_per_node = self.resource.dft_resource.cpu_per_node,
136
+ queue_name = self.resource.dft_resource.queue_name,
137
+ custom_flags = self.resource.dft_resource.custom_flags,
138
+ env_script = self.resource.dft_resource.env_script,
139
+ job_name = jobname,
140
+ run_cmd_template = run_cmd,
141
+ group = group,
142
+ job_tag = tag,
143
+ task_tag = INIT_BULK.scf_tag,
144
+ task_tag_faild = INIT_BULK.scf_tag_failed,
145
+ parallel_num=self.resource.dft_resource.parallel_num,
146
+ check_type=None
147
+ )
148
+ slurm_script_name = "{}-{}".format(group_index, INIT_BULK.scf_job)
149
+ slurm_job_file = os.path.join(self.scf_dir, slurm_script_name)
150
+ write_to_file(slurm_job_file, group_slurm_script, "w")
151
+
89
152
  def do_scf_jobs(self):
90
153
  mission = Mission()
91
154
  slurm_remain, slurm_success = get_slurm_job_run_info(self.scf_dir, \
@@ -111,89 +174,59 @@ class Relabel(object):
111
174
  mission.all_job_finished(error_type=SLURM_OUT.dft_out)
112
175
  # mission.move_slurm_log_to_slurm_work_dir()
113
176
 
114
- '''
115
- description:
116
- input_file is scf input control file, for vasp is incar, for pwmat is etot.input
117
- return {*}
118
- author: wuxingxing
119
- '''
120
- def make_scf_file(self, scf_dir:str, traj_file:str, traj_format:str, interval:int, target_format:str, \
121
- input_file:str, kspacing:float=None, flag_symm:int=None, is_dftb:bool=False, in_skf:str=None):
122
- config = load_config(format=traj_format, config=traj_file)
123
- index_list = list(range(0, len(config), interval))
124
- scf_lsit = []
125
- for index in index_list:
126
- save_dir = os.path.join(scf_dir, "{}-{}".format(index, INIT_BULK.scf))
127
- if not os.path.exists(save_dir):
128
- os.makedirs(save_dir)
129
- #1. set config file
130
- target_config = save_config(config=config[index],
131
- input_format=traj_format,# or None, the same
177
+ def make_scf_file(self,
178
+ scf_dir,
179
+ traj_file ,
180
+ traj_format , # the input is pwmat/config
181
+ target_format,
182
+ input_file ,
183
+ kspacing =None,
184
+ flag_symm =None,
185
+ is_dftb =None,
186
+ in_skf =None,
187
+ atom_names:list[str]=None):
188
+ if DFT_STYLE.pwmat == self.resource.dft_style:
189
+ target_config = traj_file
190
+ pass
191
+ else:
192
+ if DFT_STYLE.vasp == self.resource.dft_style: # when do scf, the vasp input file name is 'POSCAR'
193
+ save_name = VASP.poscar
194
+ else:
195
+ save_name="{}".format(DFT_STYLE.get_normal_config(self.resource.dft_style))# for cp2k this param will be set as coord.xzy
196
+ target_config = save_config(config=traj_file,
197
+ input_format=traj_format,
132
198
  wrap = False,
133
199
  direct = True,
134
200
  sort = True,
135
- save_format=target_format,
136
- save_path=save_dir,
137
- save_name=DFT_STYLE.get_normal_config(self.input_param.scf_style))
201
+ save_name = save_name,
202
+ save_format=DFT_STYLE.get_pwdata_format(dft_style=self.resource.dft_style, is_cp2k_coord=True),
203
+ save_path=scf_dir,
204
+ atom_names=atom_names)
138
205
 
139
- atom_type_list, _ = get_atom_type(config[index])
140
- #2. set pseudo files
141
- # if not is_dftb:
142
- pseudo_names = link_pseudo_by_atom(
143
- pseudo_list = self.input_param.dft_input.scf_pseudo,
144
- target_dir = save_dir,
145
- atom_order = atom_type_list,
146
- dft_style = self.resource.scf_style,
206
+ #2.
207
+ atomic_name_list, atomic_number_list = get_atom_type(traj_file, traj_format)
208
+ #1. set pseudo files
209
+ pseudo_names = link_pseudo_by_atom(
210
+ pseudo_list = self.input_param.dft_input.pseudo,
211
+ target_dir = scf_dir,
212
+ atom_order = atomic_name_list,
213
+ dft_style = self.resource.dft_style,
147
214
  basis_set_file =self.input_param.dft_input.basis_set_file,
148
215
  potential_file =self.input_param.dft_input.potential_file
149
216
  )
150
- # else:
151
- # # link in.skf path to aimd dir
152
- # pseudo_names = []
153
- # target_dir = os.path.join(aimd_dir, PWMAT.in_skf)
154
- # link_file(in_skf, target_dir)
155
- #3. make dft input file
156
- set_input_script(
157
- input_file=input_file,
158
- config=target_config,
159
- dft_style=self.resource.scf_style,
160
- kspacing=kspacing,
161
- flag_symm=flag_symm,
162
- save_dir = save_dir,
163
- pseudo_names=pseudo_names,
164
- gaussian_base_param=self.input_param.dft_input.gaussian_base_param,# these for cp2k
165
- )
166
- scf_lsit.append(save_dir)
167
- return scf_lsit
168
-
169
- def make_scf_slurm_job_files(self, scf_dir_list:list[str],use_dftb: bool=False):
170
- del_file_list_by_patten(self.scf_dir, "*{}".format(INIT_BULK.scf_job))
171
- group_list = split_job_for_group(self.resource.scf_resource.group_size, scf_dir_list, self.resource.scf_resource.parallel_num)
172
- for group_index, group in enumerate(group_list):
173
- if group[0] == "NONE":
174
- continue
175
- jobname = "scf{}".format(group_index)
176
- tag_name = "{}-{}".format(group_index, INIT_BULK.scf_tag)
177
- tag = os.path.join(self.scf_dir, tag_name)
178
- run_cmd = self.resource.scf_resource.command
179
- group_slurm_script = set_slurm_script_content(gpu_per_node=self.resource.scf_resource.gpu_per_node,
180
- number_node = self.resource.scf_resource.number_node,
181
- cpu_per_node = self.resource.scf_resource.cpu_per_node,
182
- queue_name = self.resource.scf_resource.queue_name,
183
- custom_flags = self.resource.scf_resource.custom_flags,
184
- env_script = self.resource.scf_resource.env_script,
185
- job_name = jobname,
186
- run_cmd_template = run_cmd,
187
- group = group,
188
- job_tag = tag,
189
- task_tag = INIT_BULK.scf_tag,
190
- task_tag_faild = INIT_BULK.scf_tag_failed,
191
- parallel_num=self.resource.scf_resource.parallel_num,
192
- check_type=self.resource.scf_style
193
- )
194
- slurm_script_name = "{}-{}".format(group_index, INIT_BULK.scf_job)
195
- slurm_job_file = os.path.join(self.scf_dir, slurm_script_name)
196
- write_to_file(slurm_job_file, group_slurm_script, "w")
217
+
218
+ #2. make etot.input file
219
+ set_input_script(
220
+ input_file=input_file,
221
+ config=target_config,
222
+ dft_style=self.resource.dft_style,
223
+ kspacing=kspacing,
224
+ flag_symm=flag_symm,
225
+ save_dir = scf_dir,
226
+ pseudo_names=pseudo_names,
227
+ gaussian_base_param=self.input_param.dft_input.gaussian_base_param,# these for cp2k
228
+ is_scf = True
229
+ )
197
230
 
198
231
  def do_post_process(self):
199
232
  if os.path.exists(self.scf_dir):
@@ -35,6 +35,7 @@ from pwact.utils.file_operation import write_to_file, copy_file, copy_dir, searc
35
35
  from pwact.utils.app_lib.common import link_pseudo_by_atom, set_input_script
36
36
 
37
37
  from pwact.data_format.configop import extract_pwdata, save_config, get_atom_type
38
+ from pwdata import Config
38
39
  class Labeling(object):
39
40
  @staticmethod
40
41
  def kill_job(root_dir:str, itername:str):
@@ -59,9 +60,10 @@ class Labeling(object):
59
60
  self.real_explore_dir = os.path.join(self.input_param.root_dir, itername, AL_STRUCTURE.explore)
60
61
  self.md_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.md)
61
62
  self.select_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.select)
63
+ self.direct_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.direct)
62
64
  self.real_md_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.md)
63
65
  self.real_select_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.select)
64
-
66
+ self.real_direct_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.direct)
65
67
  # labed work dir
66
68
  self.label_dir = os.path.join(self.input_param.root_dir, itername, TEMP_STRUCTURE.tmp_run_iter_dir, AL_STRUCTURE.labeling)
67
69
  self.scf_dir = os.path.join(self.label_dir, LABEL_FILE_STRUCTURE.scf)
@@ -71,6 +73,9 @@ class Labeling(object):
71
73
  self.real_scf_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.scf)
72
74
  self.real_result_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.result)
73
75
 
76
+ self.bigmodel_dir = os.path.join(self.label_dir, LABEL_FILE_STRUCTURE.bigmodel)
77
+ self.real_bigmodel_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.bigmodel)
78
+
74
79
  '''
75
80
  description:
76
81
  the scf work dir file structure is as follow.
@@ -86,9 +91,8 @@ class Labeling(object):
86
91
  return {*}
87
92
  author: wuxingxing
88
93
  '''
94
+
89
95
  def make_scf_work(self):
90
- # read select info, and make scf
91
- # ["devi_force", "file_path", "config_index"]
92
96
  candidate = pd.read_csv(os.path.join(self.select_dir, EXPLORE_FILE_STRUCTURE.candidate))
93
97
  # make scf work dir
94
98
  scf_dir_list = []
@@ -108,14 +112,51 @@ class Labeling(object):
108
112
  atom_names = line.split()
109
113
  self.make_scf_file(scf_sub_md_sys_path, tarj_lmp, atom_names)
110
114
  scf_dir_list.append(scf_sub_md_sys_path)
111
-
115
+
112
116
  self.make_scf_slurm_job_files(scf_dir_list)
113
117
 
118
+ def make_bigmodel_work(self):
119
+ # copy from realdir/direct/select.xyz
120
+ if self.input_param.strategy.direct:
121
+ copy_file(os.path.join(self.real_direct_dir, EXPLORE_FILE_STRUCTURE.select_xyz),
122
+ os.path.join(self.bigmodel_dir, EXPLORE_FILE_STRUCTURE.select_xyz))
123
+ else:
124
+ # copy trajs to bigmodel_dir and cvt to xyz
125
+ candidate = pd.read_csv(os.path.join(self.select_dir, EXPLORE_FILE_STRUCTURE.candidate))
126
+ # make scf work dir
127
+ image_list = None
128
+ for index, row in candidate.iterrows():
129
+ config_index = int(row["config_index"])
130
+ sub_md_sys_path = row["file_path"]
131
+ atom_names = None
132
+ with open(os.path.join(sub_md_sys_path, LAMMPS.atom_type_file), 'r') as rf:
133
+ line = rf.readline()
134
+ atom_names = line.split()
135
+ if image_list is None:
136
+ image_list = Config(data_path=os.path.join(sub_md_sys_path, EXPLORE_FILE_STRUCTURE.traj, "{}{}".format(config_index, LAMMPS.traj_postfix)),
137
+ format=PWDATA.lammps_dump, atom_names=atom_names)
138
+ else:
139
+ image_list.append(Config(data_path=os.path.join(sub_md_sys_path, EXPLORE_FILE_STRUCTURE.traj, "{}{}".format(config_index, LAMMPS.traj_postfix)),
140
+ format=PWDATA.lammps_dump, atom_names=atom_names))
141
+ # cvt_lammps.dump to extxyz
142
+ image_list.to(data_path=self.bigmodel_dir, format=PWDATA.extxyz, data_name="{}".format(EXPLORE_FILE_STRUCTURE.select_xyz))
143
+ # copy bigmodelscript
144
+ copy_file(self.input_param.scf.bigmodel_script, os.path.join(self.bigmodel_dir, os.path.basename(self.input_param.scf.bigmodel_script)))
145
+ # make slrum file
146
+ self.make_bigmodel_slurm_job_files([self.bigmodel_dir])
147
+
114
148
  def back_label(self):
115
- slurm_remain, slurm_success = get_slurm_job_run_info(self.real_scf_dir, \
116
- job_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_job), \
117
- tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_tag))
118
- slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
149
+ if self.input_param.scf.dft_style == DFT_STYLE.bigmodel:
150
+ slurm_remain, slurm_success = get_slurm_job_run_info(self.real_bigmodel_dir, \
151
+ job_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_job), \
152
+ tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_tag))
153
+ slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
154
+ else:
155
+ slurm_remain, slurm_success = get_slurm_job_run_info(self.real_scf_dir, \
156
+ job_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_job), \
157
+ tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_tag))
158
+ slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
159
+
119
160
  if slurm_done:
120
161
  # bk and do new job
121
162
  target_bk_file = add_postfix_dir(self.real_label_dir, postfix_str="bk")
@@ -147,7 +188,31 @@ class Labeling(object):
147
188
  mission.commit_jobs()
148
189
  mission.check_running_job()
149
190
  mission.all_job_finished(error_type=SLURM_OUT.dft_out)
150
-
191
+
192
+ def do_bigmodel_jobs(self):
193
+ mission = Mission()
194
+ slurm_remain, slurm_success = get_slurm_job_run_info(self.bigmodel_dir, \
195
+ job_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_job), \
196
+ tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_tag))
197
+ slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
198
+ if slurm_done is False:
199
+ #recover slurm jobs
200
+ if len(slurm_remain) > 0:
201
+ print("Run bigModel Job:\n")
202
+ print(slurm_remain)
203
+ for i, script_path in enumerate(slurm_remain):
204
+ slurm_job = SlurmJob()
205
+ tag_name = "{}-{}".format(os.path.basename(script_path).split('-')[0].strip(), LABEL_FILE_STRUCTURE.bigmodel_tag)
206
+ tag = os.path.join(os.path.dirname(script_path),tag_name)
207
+ slurm_job.set_tag(tag)
208
+ slurm_job.set_cmd(script_path)
209
+ mission.add_job(slurm_job)
210
+
211
+ if len(mission.job_list) > 0:
212
+ mission.commit_jobs()
213
+ mission.check_running_job()
214
+ mission.all_job_finished()
215
+
151
216
  def make_scf_file(self, scf_dir:str, tarj_lmp:str, atom_names:list[str]=None):
152
217
  config_index = os.path.basename(tarj_lmp).split('.')[0]
153
218
  if DFT_STYLE.vasp == self.resource.dft_style: # when do scf, the vasp input file name is 'POSCAR'
@@ -230,6 +295,42 @@ class Labeling(object):
230
295
  slurm_job_file = os.path.join(self.scf_dir, slurm_script_name)
231
296
  write_to_file(slurm_job_file, group_slurm_script, "w")
232
297
 
298
+
299
+ def make_bigmodel_slurm_job_files(self, scf_sub_list:list[str]):
300
+ del_file_list_by_patten(self.bigmodel_dir, "*{}".format(LABEL_FILE_STRUCTURE.scf_job))
301
+ group_list = split_job_for_group(1, scf_sub_list, 1)
302
+
303
+ for group_index, group in enumerate(group_list):
304
+ if group[0] == "NONE":
305
+ continue
306
+
307
+ jobname = "bigmodel{}".format(group_index)
308
+ tag_name = "{}-{}".format(group_index, LABEL_FILE_STRUCTURE.bigmodel_tag)
309
+ tag = os.path.join(self.bigmodel_dir, tag_name)
310
+ run_cmd = self.resource.dft_resource.command
311
+ # if self.resource.dft_resource.gpu_per_node > 0:
312
+ # run_cmd = "mpirun -np {} PWmat > {}".format(self.resource.dft_resource.gpu_per_node, SLURM_OUT.md_out)
313
+ # else:
314
+ # raise Exception("ERROR! the cpu version of pwmat not support yet!")
315
+ group_slurm_script = set_slurm_script_content(gpu_per_node=self.resource.dft_resource.gpu_per_node,
316
+ number_node = self.resource.dft_resource.number_node,
317
+ cpu_per_node = self.resource.dft_resource.cpu_per_node,
318
+ queue_name = self.resource.dft_resource.queue_name,
319
+ custom_flags = self.resource.dft_resource.custom_flags,
320
+ env_script = self.resource.dft_resource.env_script,
321
+ job_name = jobname,
322
+ run_cmd_template = run_cmd,
323
+ group = group,
324
+ job_tag = tag,
325
+ task_tag = LABEL_FILE_STRUCTURE.bigmodel_tag,
326
+ task_tag_faild = LABEL_FILE_STRUCTURE.bigmodel_tag_failed,
327
+ parallel_num=self.resource.dft_resource.parallel_num,
328
+ check_type=self.resource.dft_style
329
+ )
330
+ slurm_script_name = "{}-{}".format(group_index, LABEL_FILE_STRUCTURE.bigmodel_job)
331
+ slurm_job_file = os.path.join(self.bigmodel_dir, slurm_script_name)
332
+ write_to_file(slurm_job_file, group_slurm_script, "w")
333
+
233
334
  '''
234
335
  description:
235
336
  collecte OUT.MLMD to mvm-
@@ -274,18 +375,31 @@ class Labeling(object):
274
375
  for scf_file in scf_files:
275
376
  scf_file_path = os.path.join(scf_dir, scf_file)
276
377
  if scf_file.lower() in DFT_STYLE.get_scf_reserve_list(self.resource.dft_style) \
277
- and scf_file.lower() not in DFT_STYLE.get_scf_del_list():# for pwmat final.config
378
+ or "atom.config" in scf_file.lower() :# for the input natom.config
278
379
  copy_file(scf_file_path, scf_file_path.replace(TEMP_STRUCTURE.tmp_run_iter_dir, ""))
279
380
 
280
381
  # scf files to pwdata format
281
382
  scf_configs = self.collect_scf_configs()
383
+ if len(scf_configs) > 0:
384
+ extract_pwdata(input_data_list=scf_configs,
385
+ intput_data_format =DFT_STYLE.get_format_by_postfix(os.path.basename(scf_configs[0])),
386
+ save_data_path =self.result_dir,
387
+ save_data_name = INIT_BULK.get_save_format(self.input_param.data_format),
388
+ save_data_format = self.input_param.data_format,
389
+ data_shuffle =self.input_param.train.data_shuffle
390
+ )
391
+ # copy to main dir
392
+ copy_dir(self.result_dir, self.real_result_dir)
282
393
 
283
- extract_pwdata(input_data_list=scf_configs,
284
- intput_data_format =DFT_STYLE.get_format_by_postfix(os.path.basename(scf_configs[0])),
285
- save_data_path =self.result_dir,
286
- save_data_name = INIT_BULK.get_save_format(self.input_param.data_format),
287
- save_data_format = self.input_param.data_format,
288
- data_shuffle =self.input_param.train.data_shuffle
289
- )
290
- # copy to main dir
394
+ def do_post_bigmodel(self):
395
+ # copy the bigmodel labeled.xyz to result
396
+ if self.input_param.data_format == PWDATA.extxyz:
397
+ copy_file(os.path.join(self.bigmodel_dir, LABEL_FILE_STRUCTURE.train_xyz), os.path.join(self.result_dir, LABEL_FILE_STRUCTURE.train_xyz))
398
+ else:
399
+ images = Config(data_path=os.path.join(self.bigmodel_dir, LABEL_FILE_STRUCTURE.train_xyz), format=PWDATA.extxyz)
400
+ images.to(data_path=self.result_dir, format=PWDATA.pwmlff_npy)
401
+ # copy bigmodel dir to real dir
402
+ copy_dir(self.bigmodel_dir, self.real_bigmodel_dir)
291
403
  copy_dir(self.result_dir, self.real_result_dir)
404
+ # del slurm logs and tags
405
+ del_file_list_by_patten(self.real_bigmodel_dir, "slurm-*")
@@ -33,6 +33,7 @@ class ModelTrian(object):
33
33
  self.itername = itername
34
34
  self.resource = resource
35
35
  self.input_param = input_param
36
+ self.train_flag = True
36
37
  self.iter = get_iter_from_iter_name(self.itername)
37
38
  # train work dir
38
39
  self.train_dir = os.path.join(self.input_param.root_dir, self.itername, TEMP_STRUCTURE.tmp_run_iter_dir, AL_STRUCTURE.train)
@@ -68,11 +69,16 @@ class ModelTrian(object):
68
69
  if not os.path.exists(model_i_dir):
69
70
  os.makedirs(model_i_dir)
70
71
  # make train.json file
71
- train_dict = self.set_train_input_dict(work_dir=model_i_dir, model_index = model_index)
72
+ train_dict, train_tag = self.set_train_input_dict(work_dir=model_i_dir, model_index = model_index)
72
73
  train_json_file_path = os.path.join(model_i_dir, TRAIN_FILE_STRUCTUR.train_json)
73
74
  save_json_file(train_dict, train_json_file_path)
74
75
  train_list.append(model_i_dir)
75
- self.make_train_slurm_job_files(train_list)
76
+ if train_tag:
77
+ self.make_train_slurm_job_files(train_list)
78
+ else:
79
+ pre_iter_name = make_iter_name(self.iter - 1)
80
+ pre_iter_dir = os.path.join(self.input_param.root_dir, pre_iter_name, AL_STRUCTURE.train)
81
+ copy_dir(pre_iter_dir, self.train_dir)
76
82
 
77
83
  def make_train_slurm_job_files(self, train_list:list[str]):
78
84
  # make train slurm script
@@ -158,6 +164,7 @@ class ModelTrian(object):
158
164
  # search train_feature_path in iter*/label/result/*/PWdata/*
159
165
  iter_index = get_iter_from_iter_name(self.itername)
160
166
  start_iter = 0
167
+ train_tag = True
161
168
  while start_iter < iter_index:
162
169
  if self.input_param.data_format == PWDATA.extxyz: # result/train.xyz
163
170
  iter_data_list = search_files(self.input_param.root_dir,
@@ -172,6 +179,9 @@ class ModelTrian(object):
172
179
  train_feature_path.extend(iter_data_list)
173
180
  start_iter += 1
174
181
 
182
+ if start_iter > 0 and len(iter_data_list) == 0:
183
+ train_tag = False
184
+
175
185
  # reset seed
176
186
  train_json[TRAIN_INPUT_PARAM.seed] = get_seed_by_time()
177
187
  train_json[TRAIN_INPUT_PARAM.raw_files] = []
@@ -181,7 +191,7 @@ class ModelTrian(object):
181
191
  train_json[TRAIN_INPUT_PARAM.format] = self.input_param.data_format
182
192
  if self.input_param.strategy.uncertainty == UNCERTAINTY.kpu:
183
193
  train_json[TRAIN_INPUT_PARAM.save_p_matrix] = True
184
- return train_json
194
+ return train_json, train_tag
185
195
 
186
196
  def do_train_job(self):
187
197
  mission = Mission()