pwact 0.1.27__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,15 +2,17 @@ import subprocess
2
2
  import pkg_resources
3
3
  def check_envs():
4
4
  # for pwmat
5
- pass
6
- # check pwdata
7
- # try:
8
- # package_version = pkg_resources.get_distribution('pwdata').version
9
- # if pkg_resources.parse_version(min_version) <= pkg_resources.parse_version(package_version) <= pkg_resources.parse_version(max_version):
10
- # print(f"{package_name} version {package_version} is within the required range [{min_version}, {max_version}].")
11
- # return True
12
- # else:
13
- # print(f"{package_name} version {package_version} is NOT within the required range [{min_version}, {max_version}].")
14
- # return False
5
+ comm_info()
15
6
 
16
- # check PWMLFF???
7
+ def comm_info():
8
+ print("\n" + "=" * 50)
9
+ print(" PWACT Basic Information")
10
+ print("=" * 50)
11
+ print("Version: 0.2.0")
12
+ print("Compatible pwdata: >= 0.5.0")
13
+ print("Compatible MatPL: >= 2025.3")
14
+ print("Contact: support@pwmat.com")
15
+ print("Citation: https://github.com/LonxunQuantum/MatPL")
16
+ print("Manual online: http://doc.lonxun.com/PWMLFF/")
17
+ print("=" * 50)
18
+ print("\n\n")
@@ -16,7 +16,7 @@
16
16
  ...
17
17
  """
18
18
  from pwact.active_learning.slurm.slurm import Mission, SlurmJob, scancle_job
19
- from pwact.utils.slurm_script import get_slurm_job_run_info, split_job_for_group, set_slurm_script_content
19
+ from pwact.utils.slurm_script import get_slurm_job_run_info, split_job_for_group, set_slurm_script_content, recheck_slurm_by_jobtag
20
20
  from pwact.active_learning.explore.select_image import select_image
21
21
  from pwact.active_learning.user_input.resource import Resource
22
22
  from pwact.active_learning.user_input.iter_input import InputParam, MdDetail
@@ -175,14 +175,17 @@ class Explore(object):
175
175
  slurm_remain, slurm_success = get_slurm_job_run_info(self.md_dir, \
176
176
  job_patten="*-{}".format(EXPLORE_FILE_STRUCTURE.md_job), \
177
177
  tag_patten="*-{}".format(EXPLORE_FILE_STRUCTURE.md_tag))
178
+ # for slurm remain, check if tags done
178
179
  slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
179
180
  if slurm_done is False:
181
+ slurm_remain = recheck_slurm_by_jobtag(slurm_remain, EXPLORE_FILE_STRUCTURE.md_tag)
182
+ if len(slurm_remain) > 0:
180
183
  #recover slurm jobs
181
184
  if len(slurm_remain) > 0:
182
185
  print("Run these MD Jobs:\n")
183
186
  print(slurm_remain)
184
187
  for i, script_path in enumerate(slurm_remain):
185
- slurm_job = SlurmJob()
188
+ slurm_job = SlurmJob(lmps_tolerance = self.input_param.strategy.lmps_tolerance)
186
189
  tag_name = "{}-{}".format(os.path.basename(script_path).split('-')[0].strip(), EXPLORE_FILE_STRUCTURE.md_tag)
187
190
  tag = os.path.join(os.path.dirname(script_path),tag_name)
188
191
  slurm_job.set_tag(tag, job_type=SLURM_JOB_TYPE.lammps)
@@ -3,7 +3,7 @@ import os
3
3
 
4
4
  import pandas as pd
5
5
  import numpy as np
6
- from pwact.utils.constant import EXPLORE_FILE_STRUCTURE, UNCERTAINTY
6
+ from pwact.utils.constant import EXPLORE_FILE_STRUCTURE, UNCERTAINTY, SLURM_OUT
7
7
  from pwact.utils.file_operation import write_to_file, search_files, read_data
8
8
 
9
9
  from pwact.utils.format_input_output import get_sub_md_sys_template_name
@@ -79,7 +79,7 @@ def select_image(
79
79
  devi_name:str,
80
80
  lower:float,
81
81
  higer:float
82
- ):
82
+ ):
83
83
  #1. get model_deviation file
84
84
  model_deviation_patten = "{}/{}".format(get_sub_md_sys_template_name(), devi_name)
85
85
  model_devi_files = search_files(md_dir, model_deviation_patten)
@@ -108,14 +108,22 @@ def select_image(
108
108
  accurate_pd = pd.concat([accurate_pd, tmp_accurate_pd]) if error_pd is not None else tmp_accurate_pd
109
109
  rand_candi = pd.concat([rand_candi, tmp_rand_candi]) if error_pd is not None else tmp_rand_candi
110
110
  remove_candi = pd.concat([remove_candi, tmp_remove_candi]) if error_pd is not None else tmp_remove_candi
111
- summary_info, summary = count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi)
111
+ right_md, error_md = count_mdstop_info(model_devi_files)
112
+ md_run_info = "A total of {} MD trajectories were run. with {} trajectories correctly executed and {} trajectories normally completed. \nFor detailed information, refer to File {}.".format(len(right_md) + len(error_md), len(right_md), len(error_md), EXPLORE_FILE_STRUCTURE.md_traj_error_record)
112
113
 
113
- # summary_info, summary = select_image(save_dir=self.select_dir,
114
- # devi_pd=devi_pd,
115
- # lower=self.input_param.strategy.lower_model_deiv_f,
116
- # higer=self.input_param.strategy.upper_model_deiv_f,
117
- # max_select=self.input_param.strategy.max_select)
114
+ summary_info, summary = count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi, md_run_info)
118
115
  print("Image select result:\n {}\n\n".format(summary_info))
116
+
117
+ write_to_file(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.md_traj_error_record), md_run_info, "w")
118
+ details = "\n"
119
+ if len(error_md) > 0:
120
+ details += "\nUnfinished md trajectory directory:\n"
121
+ details += "\n".join(error_md)
122
+ if len(right_md) > 0:
123
+ details += "\n\nCorrectly run md trajectory directory:\n"
124
+ details += "\n".join(right_md)
125
+ write_to_file(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.md_traj_error_record), details, "a")
126
+
119
127
  return summary
120
128
 
121
129
  def print_select_image(
@@ -211,7 +219,7 @@ def read_pd_files(model_devi_files:list[str]):
211
219
  return devi_pd, base_force_kpu
212
220
 
213
221
 
214
- def count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi):
222
+ def count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi, md_run_info:str=None):
215
223
  #5. save select info
216
224
  total_num = error_pd.shape[0] + accurate_pd.shape[0] + rand_candi.shape[0] + remove_candi.shape[0]
217
225
  cand_num = rand_candi.shape[0] + remove_candi.shape[0]
@@ -233,9 +241,14 @@ def count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi):
233
241
  summary_info += "Accurate configurations: {}, details in file {}\n".\
234
242
  format(accurate_pd.shape[0], EXPLORE_FILE_STRUCTURE.accurate)
235
243
  summary_info += candi_info
236
- summary_info += "Error configurations: {}, details in file {}\n".\
244
+ summary_info += "Error configurations: {}, details in file {}\n\n".\
237
245
  format(error_pd.shape[0], EXPLORE_FILE_STRUCTURE.failed)
246
+ if md_run_info is not None:
247
+ summary_info += md_run_info
248
+ summary_info += "\n\n"
238
249
 
250
+ summary += md_run_info
251
+ summary += "\n"
239
252
  if save_dir is not None:
240
253
  if not os.path.exists(save_dir):
241
254
  os.makedirs(save_dir)
@@ -249,3 +262,29 @@ def count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi):
249
262
  return summary_info, summary
250
263
 
251
264
 
265
+ def count_mdstop_info(devi_file_list):
266
+ def read_md_last_line(md_log):
267
+ with open(md_log, "rb") as file:
268
+ file.seek(-2, 2) # 定位到文件末尾前两个字节
269
+ while file.read(1) != b'\n': # 逐字节向前查找换行符
270
+ file.seek(-2, 1) # 向前移动两个字节
271
+ last_line = file.readline().decode().strip() # 读取最后一行并去除换行符和空白字符
272
+ if "Total wall time" in last_line: #md 正常结束
273
+ return True
274
+ else:
275
+ return False
276
+ # for each md model_deviation file get shape
277
+ # for each md md.log get run time
278
+ # do compare
279
+ right_list = []
280
+ error_list = []
281
+ for devi_file in devi_file_list:
282
+ devi = np.loadtxt(devi_file)
283
+ end_normal = read_md_last_line(os.path.join(os.path.dirname(devi_file), SLURM_OUT.md_out))
284
+ if end_normal and devi.shape[0] > 1:
285
+ right_list.append(os.path.dirname(devi_file))
286
+ else:
287
+ error_list.append(os.path.dirname(devi_file))
288
+ return right_list, error_list
289
+
290
+
@@ -9,7 +9,7 @@ from pwact.active_learning.init_bulk.relabel import Relabel
9
9
  from pwact.active_learning.user_input.init_bulk_input import InitBulkParam
10
10
  from pwact.active_learning.user_input.resource import Resource
11
11
  from pwact.active_learning.slurm.slurm import scancle_job
12
- from pwact.utils.constant import INIT_BULK, DFT_STYLE, TEMP_STRUCTURE
12
+ from pwact.utils.constant import INIT_BULK, DFT_STYLE, TEMP_STRUCTURE, PWDATA
13
13
  from pwact.utils.file_operation import copy_file, copy_dir, search_files, del_file, del_file_list, write_to_file
14
14
  from pwact.data_format.configop import extract_pwdata
15
15
 
@@ -104,13 +104,13 @@ def do_collection(resource: Resource, input_param:InitBulkParam):
104
104
  continue
105
105
  source_aimd = sorted(source_aimd)
106
106
  #5. convert the aimd files (for vasp is outcar, for pwmat is movement) to npy format
107
- extract_pwdata(data_list=source_aimd,
108
- data_format=DFT_STYLE.get_aimd_config_format(resource.dft_style),
109
- datasets_path=os.path.join(collection_dir, init_config_name, INIT_BULK.npy_format_save_dir),
110
- train_valid_ratio=input_param.train_valid_ratio,
111
- data_shuffle=input_param.data_shuffle,
112
- merge_data=True,
113
- interval=1
107
+ extract_pwdata(input_data_list=source_aimd,
108
+ intput_data_format= DFT_STYLE.get_aimd_config_format(resource.dft_style),
109
+ save_data_path = os.path.join(collection_dir, init_config_name),
110
+ save_data_name = INIT_BULK.get_save_format(input_param.data_format),
111
+ save_data_format= input_param.data_format,
112
+ data_shuffle=input_param.data_shuffle,
113
+ interval=1
114
114
  )
115
115
 
116
116
  #6 convert relabel datas
@@ -122,12 +122,12 @@ def do_collection(resource: Resource, input_param:InitBulkParam):
122
122
  continue
123
123
  source_scf = sorted(source_scf, key=lambda x:int(os.path.basename(os.path.dirname(x)).split('-')[0]), reverse=False)
124
124
  #5. convert the aimd files (for vasp is outcar, for pwmat is movement) to npy format
125
- extract_pwdata(data_list=source_scf,
126
- data_format=DFT_STYLE.get_format_by_postfix(os.path.basename(source_scf[0])),
127
- datasets_path=os.path.join(collection_dir, init_config_name, "scf_pwdata"),
128
- train_valid_ratio=input_param.train_valid_ratio,
125
+ extract_pwdata(input_data_list=source_scf,
126
+ intput_data_format= DFT_STYLE.get_format_by_postfix(os.path.basename(source_scf[0])),
127
+ save_data_path = os.path.join(collection_dir, init_config_name),
128
+ save_data_name = INIT_BULK.get_save_format(input_param.data_format),
129
+ save_data_format= input_param.data_format,
129
130
  data_shuffle=input_param.data_shuffle,
130
- merge_data=True,
131
131
  interval=1
132
132
  )
133
133
 
@@ -146,7 +146,11 @@ def do_collection(resource: Resource, input_param:InitBulkParam):
146
146
  del_file_list([temp_work_dir])
147
147
 
148
148
  # print the dir of pwdatas from aimd
149
- pwdatas = search_files(real_collection_dir, "*/{}".format(INIT_BULK.npy_format_save_dir))
149
+ if input_param.data_format == PWDATA.extxyz:
150
+ pwdatas = search_files(real_collection_dir, "*/{}".format(INIT_BULK.get_save_format(input_param.data_format)))
151
+ elif input_param.data_format == PWDATA.pwmlff_npy: # */PWdata/*.npy
152
+ tmp = search_files(real_collection_dir, "*/{}/*/position.npy".format(INIT_BULK.get_save_format(input_param.data_format)))
153
+ pwdatas = [os.path.dirname(_) for _ in tmp]
150
154
  if len(pwdatas) > 0:
151
155
  pwdatas = sorted(pwdatas)
152
156
  result_lines = ["\"{}\",".format(_) for _ in pwdatas]
@@ -27,7 +27,7 @@ from pwact.active_learning.user_input.iter_input import InputParam
27
27
  from pwact.active_learning.slurm.slurm import SlurmJob, Mission, scancle_job
28
28
 
29
29
  from pwact.utils.constant import DFT_TYPE, VASP, PWDATA, AL_STRUCTURE, TEMP_STRUCTURE,\
30
- LABEL_FILE_STRUCTURE, EXPLORE_FILE_STRUCTURE, LAMMPS, SLURM_OUT, DFT_STYLE, PWMAT
30
+ LABEL_FILE_STRUCTURE, EXPLORE_FILE_STRUCTURE, LAMMPS, SLURM_OUT, DFT_STYLE, PWMAT, INIT_BULK
31
31
 
32
32
  from pwact.utils.slurm_script import get_slurm_job_run_info, split_job_for_group, set_slurm_script_content
33
33
  from pwact.utils.format_input_output import get_iter_from_iter_name, get_md_sys_template_name
@@ -280,22 +280,12 @@ class Labeling(object):
280
280
  # scf files to pwdata format
281
281
  scf_configs = self.collect_scf_configs()
282
282
 
283
- extract_pwdata(data_list=scf_configs,
284
- data_format =DFT_STYLE.get_format_by_postfix(os.path.basename(scf_configs[0])),
285
- datasets_path =self.result_dir,
286
- train_valid_ratio=self.input_param.train.train_valid_ratio,
287
- data_shuffle =self.input_param.train.data_shuffle,
288
- merge_data =True
283
+ extract_pwdata(input_data_list=scf_configs,
284
+ intput_data_format =DFT_STYLE.get_format_by_postfix(os.path.basename(scf_configs[0])),
285
+ save_data_path =self.result_dir,
286
+ save_data_name = INIT_BULK.get_save_format(self.input_param.data_format),
287
+ save_data_format = self.input_param.data_format,
288
+ data_shuffle =self.input_param.train.data_shuffle
289
289
  )
290
-
291
- # for id, scf_md in enumerate(scf_configs):
292
- # datasets_path_name = os.path.basename(os.path.dirname(os.path.dirname(scf_md[0])))#md.001.sys.001.t.000.p.000
293
- # extract_pwdata(data_list=scf_md,
294
- # data_format =DFT_STYLE.get_format_by_postfix(os.path.basename(scf_md[0])),
295
- # datasets_path =os.path.join(self.result_dir, "{}-{}".format(id, datasets_path_name)),
296
- # train_valid_ratio=self.input_param.train.train_valid_ratio,
297
- # data_shuffle =self.input_param.train.data_shuffle,
298
- # merge_data =True
299
- # )
300
290
  # copy to main dir
301
291
  copy_dir(self.result_dir, self.real_result_dir)
@@ -4,6 +4,8 @@ import os
4
4
  import sys
5
5
  import time
6
6
  import shutil
7
+ import numpy as np
8
+ from pwact.utils.constant import EXPLORE_FILE_STRUCTURE
7
9
  from pwact.active_learning.slurm.slurm_tool import get_jobs
8
10
  class JobStatus (Enum) :
9
11
  unsubmitted = 1 #
@@ -20,7 +22,7 @@ def get_slurm_sbatch_cmd(job_dir:str, job_name:str):
20
22
  return cmd
21
23
 
22
24
  class SlurmJob(object):
23
- def __init__(self, job_id=None, status=JobStatus.unsubmitted, user=None, name=None, nodes=None, nodelist=None, partition=None) -> None:
25
+ def __init__(self, job_id=None, status=JobStatus.unsubmitted, user=None, name=None, nodes=None, nodelist=None, partition=None, lmps_tolerance:bool=True) -> None:
24
26
  self.job_id = job_id
25
27
  self.status = status
26
28
  self.user = user
@@ -29,6 +31,7 @@ class SlurmJob(object):
29
31
  self.nodes = nodes
30
32
  self.nodelist = nodelist
31
33
  self.submit_num = 0
34
+ self.lmps_tolerance = True
32
35
 
33
36
  def set_cmd(self, script_path:str):
34
37
  #such as "sbatch main_MD_test.sh"
@@ -186,24 +189,36 @@ class SlurmJob(object):
186
189
  md_log = os.path.join(md_dir, "md.log")
187
190
  if os.path.exists(tag_md_file):
188
191
  continue
189
- if not os.path.exists(md_log):
190
- return False
191
192
 
193
+ # check if has error
192
194
  with open(md_log, "rb") as file:
193
195
  file.seek(-2, 2) # 定位到文件末尾前两个字节
194
196
  while file.read(1) != b'\n': # 逐字节向前查找换行符
195
197
  file.seek(-2, 1) # 向前移动两个字节
196
198
  last_line = file.readline().decode().strip() # 读取最后一行并去除换行符和空白字符
197
- if "ERROR: there are two atoms" in last_line:
198
- with open(tag_md_file, 'w') as wf:
199
- wf.writelines("ERROR: there are two atoms too close")
200
- return True
201
- elif "Total wall time" in last_line:
199
+ if "Total wall time" in last_line: #md 正常结束
202
200
  with open(tag_md_file, 'w') as wf:
203
201
  wf.writelines("Job Done!")
204
202
  return True
205
- else:
203
+
204
+ if os.path.exists(os.path.join(md_dir, EXPLORE_FILE_STRUCTURE.model_devi)):
205
+ devi = np.loadtxt(os.path.join(md_dir, EXPLORE_FILE_STRUCTURE.model_devi))
206
+ if self.lmps_tolerance and devi.shape[0] > 0:
207
+ with open(tag_md_file, 'w') as wf:
208
+ wf.writelines("Job Done!")
209
+ return True
210
+ else:
211
+ return False
212
+ else: # md运行中非正常结束
206
213
  return False
214
+
215
+ # check model_devi.out
216
+ # elif "ERROR: there are two atoms" in last_line:
217
+ # with open(tag_md_file, 'w') as wf:
218
+ # wf.writelines("ERROR: there are two atoms too close")
219
+ # return True
220
+ # else:
221
+ # return False
207
222
  return True
208
223
  except Exception as e:
209
224
  return False
@@ -380,3 +395,16 @@ def scancle_job(work_dir:str):
380
395
  else:
381
396
  print("Scancel job {} failed, Please manually check and cancel this task!\n".format(job_id))
382
397
 
398
+ def scancle_byjobid(job_id):
399
+ job = SlurmJob(job_id=job_id)
400
+ status = job.check_status_no_tag()#get status
401
+ if status == JobStatus.waiting or status == JobStatus.running: # is running
402
+ job.scancel_job()
403
+ time.sleep(5)
404
+ job = SlurmJob(job_id=job_id)
405
+ status = job.check_status_no_tag()#get status
406
+ if JobStatus.finished == status:
407
+ print("scancel job {} successfully".format(job_id))
408
+ else:
409
+ print("Scancel job {} failed, Please manually check and cancel this task!\n".format(job_id))
410
+
@@ -9,7 +9,7 @@ from pwact.active_learning.user_input.iter_input import InputParam
9
9
 
10
10
  from pwact.utils.format_input_output import make_train_name, get_seed_by_time, get_iter_from_iter_name, make_iter_name
11
11
  from pwact.utils.constant import AL_STRUCTURE, UNCERTAINTY, TEMP_STRUCTURE, MODEL_CMD, \
12
- TRAIN_INPUT_PARAM, TRAIN_FILE_STRUCTUR, FORCEFILED, LABEL_FILE_STRUCTURE, SLURM_OUT, MODEL_TYPE
12
+ TRAIN_INPUT_PARAM, TRAIN_FILE_STRUCTUR, FORCEFILED, LABEL_FILE_STRUCTURE, SLURM_OUT, MODEL_TYPE, PWDATA, INIT_BULK
13
13
 
14
14
  from pwact.utils.file_operation import save_json_file, write_to_file, del_dir, search_files, add_postfix_dir, mv_file, copy_dir, del_file_list, del_file_list_by_patten
15
15
  '''
@@ -68,7 +68,7 @@ class ModelTrian(object):
68
68
  if not os.path.exists(model_i_dir):
69
69
  os.makedirs(model_i_dir)
70
70
  # make train.json file
71
- train_dict = self.set_train_input_dict(work_dir=model_i_dir)
71
+ train_dict = self.set_train_input_dict(work_dir=model_i_dir, model_index = model_index)
72
72
  train_json_file_path = os.path.join(model_i_dir, TRAIN_FILE_STRUCTUR.train_json)
73
73
  save_json_file(train_dict, train_json_file_path)
74
74
  train_list.append(model_i_dir)
@@ -139,33 +139,46 @@ class ModelTrian(object):
139
139
  return {*}
140
140
  author: wuxingxing
141
141
  '''
142
- def set_train_input_dict(self, work_dir:str=None):
142
+ def set_train_input_dict(self, work_dir:str=None, model_index=None):
143
143
  train_json = self.input_param.train.to_dict()
144
- train_feature_path = []
145
- if self.input_param.init_data_only_pretrain and self.iter > 0:
144
+ if self.iter == 0 and len(self.input_param.init_model_list) > 0:
145
+ train_json[TRAIN_INPUT_PARAM.recover_train] = True
146
+ train_json[TRAIN_INPUT_PARAM.model_load_file] = self.input_param.init_model_list[model_index]
147
+ train_json[TRAIN_INPUT_PARAM.optimizer][TRAIN_INPUT_PARAM.reset_epoch] = True
148
+ if self.iter > 0 and self.input_param.use_pre_model:
146
149
  # use old model param iter.*/train/train.000/model_record/dp_model.ckpt
147
150
  pre_model = os.path.join(self.input_param.root_dir, make_iter_name(self.iter-1), \
148
151
  AL_STRUCTURE.train, make_train_name(0), TRAIN_FILE_STRUCTUR.model_record, TRAIN_FILE_STRUCTUR.dp_model_name)
149
152
  train_json[TRAIN_INPUT_PARAM.recover_train] = True
150
153
  train_json[TRAIN_INPUT_PARAM.model_load_file] = pre_model
151
154
  train_json[TRAIN_INPUT_PARAM.optimizer][TRAIN_INPUT_PARAM.reset_epoch] = True
152
- else:
153
- for _data in self.input_param.init_data:
154
- train_feature_path.append(_data)
155
+ train_feature_path = []
156
+ for _data in self.input_param.init_data:
157
+ train_feature_path.append(_data)
155
158
  # search train_feature_path in iter*/label/result/*/PWdata/*
156
159
  iter_index = get_iter_from_iter_name(self.itername)
157
160
  start_iter = 0
158
161
  while start_iter < iter_index:
159
- iter_pwdata = search_files(self.input_param.root_dir,
160
- "{}/{}/{}/*".format(make_iter_name(start_iter), AL_STRUCTURE.labeling, LABEL_FILE_STRUCTURE.result))
161
- if len(iter_pwdata) > 0:
162
- train_feature_path.extend(iter_pwdata)
162
+ if self.input_param.data_format == PWDATA.extxyz: # result/train.xyz
163
+ iter_data_list = search_files(self.input_param.root_dir,
164
+ "{}/{}/{}/{}".format(make_iter_name(start_iter),
165
+ AL_STRUCTURE.labeling, LABEL_FILE_STRUCTURE.result, INIT_BULK.get_save_format(self.input_param.data_format)))
166
+ else:#pwmlff/npy 'iter.***/label/result/*/*' -> result/PWdata/dir
167
+ iter_data_list = search_files(self.input_param.root_dir,
168
+ "{}/{}/{}/*/*".format(make_iter_name(start_iter),
169
+ AL_STRUCTURE.labeling, LABEL_FILE_STRUCTURE.result))
170
+
171
+ if len(iter_data_list) > 0:
172
+ train_feature_path.extend(iter_data_list)
163
173
  start_iter += 1
164
174
 
165
175
  # reset seed
166
176
  train_json[TRAIN_INPUT_PARAM.seed] = get_seed_by_time()
167
177
  train_json[TRAIN_INPUT_PARAM.raw_files] = []
168
- train_json[TRAIN_INPUT_PARAM.datasets_path] = train_feature_path
178
+ train_json[TRAIN_INPUT_PARAM.train_data] = train_feature_path
179
+ # set valid data
180
+ train_json[TRAIN_INPUT_PARAM.valid_data] = self.input_param.valid_data
181
+ train_json[TRAIN_INPUT_PARAM.format] = self.input_param.data_format
169
182
  if self.input_param.strategy.uncertainty == UNCERTAINTY.kpu:
170
183
  train_json[TRAIN_INPUT_PARAM.save_p_matrix] = True
171
184
  return train_json
@@ -11,8 +11,10 @@ class InitBulkParam(object):
11
11
  if not os.path.isabs(self.root_dir):
12
12
  self.root_dir = os.path.realpath(self.root_dir)
13
13
 
14
- self.data_shuffle = get_parameter("data_shuffle", json_dict, True)
15
- self.train_valid_ratio = get_parameter("train_valid_ratio", json_dict, 0.8)
14
+ self.data_shuffle = get_parameter("data_shuffle", json_dict, False)
15
+ self.data_format = get_parameter("data_format", json_dict, PWDATA.extxyz).lower()
16
+ if self.data_format not in [PWDATA.extxyz, PWDATA.pwmlff_npy]:
17
+ raise Exception("ERROR! The 'data_format' only support 'extxyz' or 'pwmlff/npy' format! The fefault format is 'extxyz'!")
16
18
  self.interval = get_parameter("interval", json_dict, 1)
17
19
 
18
20
  # self.reserve_pwmat_files = get_parameter("reserve_pwmat_files", json_dict, False)
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import glob
3
-
3
+ from pwact.utils.file_operation import check_model_type
4
4
  from pwact.utils.json_operation import get_parameter, get_required_parameter
5
5
  from pwact.utils.constant import MODEL_CMD, FORCEFILED, UNCERTAINTY, PWDATA
6
6
  from pwact.active_learning.user_input.train_param.train_param import InputParam as TrainParam
@@ -20,13 +20,28 @@ class InputParam(object):
20
20
  self.reserve_md_traj = get_parameter("reserve_md_traj", json_dict, False) #
21
21
  self.reserve_scf_files = get_parameter("reserve_scf_files", json_dict, False) # not used
22
22
 
23
+ self.data_format = get_parameter("data_format", json_dict, "extxyz")
23
24
  init_data = get_parameter("init_data", json_dict, [])
24
25
  self.init_data = self.get_init_data(init_data)
26
+ init_valid_data= get_parameter("valid_data", json_dict, [])
27
+ self.valid_data = self.get_init_data(init_valid_data)
25
28
  # the init data for pretraining
26
- self.init_data_only_pretrain = get_parameter("init_data_only_pretrain", json_dict, False)
27
-
29
+ # self.init_data_only_pretrain = get_parameter("init_data_only_pretrain", json_dict, False)
28
30
  self.train = TrainParam(json_input=json_dict["train"], cmd=MODEL_CMD.train)
31
+ self.use_pre_model = get_parameter("use_pre_model", json_dict, True)
29
32
  self.strategy = StrategyParam(json_dict["strategy"])
33
+ #check_model_type: check type and nums
34
+ self.init_model_list = get_parameter("init_model_list", json_dict, [])
35
+ if len(self.init_model_list) > 0:
36
+ if len(self.init_model_list) != self.strategy.model_num:
37
+ raise Exception("Error! The number of input models needs to be consistent with model_num {} in 'strategy'".format(self.strategy.model_num))
38
+ for _model_file in self.init_model_list:
39
+ if not os.path.exists(_model_file):
40
+ raise Exception("Error! The model in init_model_list {} does not exist".format(_model_file))
41
+ _model_type = check_model_type(_model_file)
42
+ if _model_type != self.train.model_type:
43
+ raise Exception("Error! The model type in init_model_list is {}, should be consistent with model_type {} in 'train'".format(_model_type, self.train.model_type))
44
+ self.init_model_list = [os.path.abspath(_) for _ in self.init_model_list]
30
45
 
31
46
  if self.strategy.uncertainty == UNCERTAINTY.kpu and \
32
47
  self.train.optimizer_param.opt_name.upper() != "LKF":
@@ -63,7 +78,7 @@ class InputParam(object):
63
78
  class StrategyParam(object):
64
79
  def __init__(self, json_dict) -> None:
65
80
  self.md_type = get_parameter("md_type", json_dict, FORCEFILED.libtorch_lmps)
66
-
81
+ self.lmps_tolerance = get_parameter("lmps_tolerance", json_dict, True)
67
82
  self.max_select = get_parameter("max_select", json_dict, None)
68
83
  self.uncertainty = get_parameter("uncertainty", json_dict, UNCERTAINTY.committee).upper()
69
84
  if self.uncertainty.upper() == UNCERTAINTY.kpu:
@@ -70,6 +70,7 @@ class SCFParam(object):
70
70
  potential_list = get_parameter("potential_list", gaussian_param, None)
71
71
  atom_list = get_parameter("atom_list", gaussian_param, None)
72
72
  self.gaussian_base_param = {}
73
+ self.gaussian_base_param["KSPACING"] = get_parameter("kspacing", gaussian_param, None)
73
74
  self.gaussian_base_param["ELEMENT"] = atom_list
74
75
  self.gaussian_base_param["BASIS_SET"] = basis_set_list
75
76
  self.gaussian_base_param["POTENTIAL"] = potential_list
@@ -79,6 +80,7 @@ class SCFParam(object):
79
80
  self.basis_set_file = None# os.path.abspath(get_parameter("basis_set_file", json_dict, None))
80
81
  self.potential_file = None#os.path.abspath(get_parameter("potential_file", json_dict, None))
81
82
  self.gaussian_base_param = None
83
+ self.kspacing = None
82
84
  # for cp2k and pwmat gaussion
83
85
 
84
86
 
@@ -250,11 +250,11 @@ class NepParam(object):
250
250
  error_log = "the input 'l_max' should has 3 values. The values should be [4, 0, 0] (only use three body features), [4, 2, 0] (use 3 and 4 body features) or [4, 2, 1] (use 3,4,5 body features).\n"
251
251
  raise Exception(error_log)
252
252
  if "fitting_net" in model_dict.keys():
253
- self.neuron = self.get_parameter("network_size", model_dict["fitting_net"], [100]) # number of neurons in the hidden layer
253
+ self.neuron = self.get_parameter("network_size", model_dict["fitting_net"], [40]) # number of neurons in the hidden layer
254
254
  if not isinstance(self.neuron, list):
255
255
  self.neuron = [self.neuron]
256
256
  else:
257
- self.neuron = [100]
257
+ self.neuron = [40]
258
258
  if self.neuron[-1] != 1:
259
259
  self.neuron.append(1) # output layer of fitting net
260
260
  self.set_feature_params()
@@ -29,23 +29,26 @@ class InputParam(object):
29
29
  self.cmd = cmd
30
30
  self.inference = True if self.cmd == "test".upper() else False
31
31
  self.model_type = get_required_parameter("model_type", json_input).upper()
32
+ # self.atom_type = get_required_parameter("atom_type", json_input)
32
33
  self.atom_type = get_atomic_name_from_str(get_required_parameter("atom_type", json_input))
33
-
34
34
  self.model_num = get_parameter("model_num", json_input, 1)
35
35
  self.recover_train = get_parameter("recover_train", json_input, True)
36
36
  self.max_neigh_num = get_parameter("max_neigh_num", json_input, 100)
37
-
38
37
  self.profiling = get_parameter("profiling", json_input, False)#not realized
39
38
 
40
39
  self.set_feature_params(json_input)
41
40
  self.set_workdir_structures(json_input)
41
+ if self.inference and self.model_type in ["NN", "LINEAR"]:
42
+ self.file_paths.nn_work = os.path.join(self.file_paths.json_dir, "work_test_dir")
42
43
 
43
44
  if self.model_type in ["DP", "NN", "NEP", "LINEAR", "CHEBY"]:
44
45
  self.set_model_init_params(json_input)
45
46
  self.set_default_multi_gpu_info(json_input)
46
47
  # set optimizer
47
48
  self.set_optimizer(json_input)
48
-
49
+ if self.model_type in ["NN", "LINEAR"]:
50
+ self.optimizer_param.batch_size = 1
51
+ print("Warning! NN and Linear models only support single batch, automatically adjust batch_size=1.")
49
52
  # elif self.model_type in ["NEP"]:
50
53
  # self.set_nep_in_params(json_input)
51
54
 
@@ -195,11 +198,8 @@ class InputParam(object):
195
198
  # set feature related params
196
199
  self.valid_shuffle = get_parameter("valid_shuffle", json_input, False)
197
200
  self.data_shuffle = get_parameter("data_shuffle", json_input, True)
198
- self.train_valid_ratio = get_parameter("train_valid_ratio", json_input, 0.8)
199
201
  self.seed = get_parameter("seed", json_input, 2023)
200
202
  self.precision = get_parameter("precision", json_input, "float64")
201
- self.chunk_size = get_parameter("chunk_size", json_input, 10)
202
- self.format = get_parameter("format", json_input, "pwmat/movement")
203
203
 
204
204
  '''
205
205
  description:
@@ -211,9 +211,6 @@ class InputParam(object):
211
211
  '''
212
212
  def set_workdir_structures(self, json_input:dict):
213
213
  # set file structures
214
- work_dir = get_parameter("work_dir", json_input, None)
215
- if work_dir is None:
216
- work_dir = os.getcwd()
217
214
  self.file_paths = WorkFileStructure(json_dir=os.getcwd(),
218
215
  reserve_work_dir=get_parameter("reserve_work_dir", json_input, False),
219
216
  reserve_feature = get_parameter("reserve_feature", json_input, False),
@@ -233,9 +230,7 @@ class InputParam(object):
233
230
  self.recover_train = True
234
231
  self.optimizer_param.batch_size = 1 # set batch size to 1, so that each image inference info will be saved
235
232
  self.data_shuffle = False
236
- self.train_valid_ratio = 1
237
233
  self.valid_shuffle = False
238
- self.format = get_parameter("format", json_input, "pwmat/movement")
239
234
  self.file_paths.set_inference_paths(json_input,is_nep_txt = is_nep_txt)
240
235
 
241
236
  '''
@@ -267,14 +262,14 @@ class InputParam(object):
267
262
  author: wuxingxing
268
263
  '''
269
264
  def get_data_file_dict(self):
270
- data_file_dict = self.file_paths.get_data_file_structure()
265
+ data_file_dict = {}
266
+ # data_file_dict = self.file_paths.get_data_file_structure()
271
267
  data_file_dict["M2"] = self.descriptor.M2
272
268
  data_file_dict["maxNeighborNum"] = self.max_neigh_num
273
269
  data_file_dict["atomType"]=self.atom_type_dict
274
270
  data_file_dict["Rc_M"] = self.descriptor.Rmax
275
271
  data_file_dict["E_tolerance"] = self.descriptor.E_tolerance
276
272
  data_file_dict["train_egroup"] = self.optimizer_param.train_egroup
277
- data_file_dict["ratio"] = self.train_valid_ratio
278
273
 
279
274
  return data_file_dict
280
275
 
@@ -282,7 +277,8 @@ class InputParam(object):
282
277
  params_dict = {}
283
278
  params_dict["model_type"] = self.model_type
284
279
  params_dict["atom_type"] = self.atom_type
285
- params_dict["max_neigh_num"] = self.max_neigh_num
280
+ if self.model_type !="NEP":
281
+ params_dict["max_neigh_num"] = self.max_neigh_num
286
282
  if self.seed is not None:
287
283
  params_dict["seed"] = self.seed
288
284
  if self.model_num > 1 :
@@ -336,6 +332,17 @@ class InputParam(object):
336
332
  print(params_dict)
337
333
 
338
334
  def help_info():
339
- print("train: do model training")
340
- print("test: do dp model inference")
341
-
335
+ # 使用双线边框和加粗标题
336
+ print("\n\033[1;36m╔" + "=" * 48 + "╗\033[0m") # 双线上边框
337
+ print("\033[1;36m║" + " " * 10 + "\033[1;35m PWMLFF Basic Information \033[0m" + " " * 12 + "\033[1;36m║\033[0m") # 标题
338
+ print("\033[1;36m╚" + "=" * 48 + "╝\033[0m") # 双线下边框
339
+ print(f"\033[1;32mVersion:\033[0m 2025.02")
340
+ print(f"\033[1;32mCompatible pwdata:\033[0m >= 0.4.8")
341
+ print(f"\033[1;32mCompatible pwact:\033[0m >= 0.2.1")
342
+ print(f"\033[1;32mLast Commit:\033[0m 2025.03.05")
343
+ print(f"\033[1;32mGit Hash:\033[0m 7bdaa90da15a5bfca6a831e739ebdd67fca22299")
344
+ print(f"\033[1;32mContact:\033[0m support@pwmat.com")
345
+ print(f"\033[1;32mCitation:\033[0m https://github.com/LonxunQuantum/PWMLFF")
346
+ print(f"\033[1;32mManual online:\033[0m http://doc.lonxun.com/PWMLFF/")
347
+ print("\033[1;36m" + "=" * 50 + "\033[0m") # 青色分隔线
348
+ print("\n\n")