pwact 0.1.27__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,6 @@ class WorkFileStructure(object):
7
7
  '''
8
8
  description:
9
9
  param {*} self
10
- param {*} work_dir: is the work path, model training, feature generation, and inference work \
11
- are all carried out under the modified directory\
12
- if the user does not set it, it defaults to json_dir
13
10
  param {*} json_dir: The trained models, features, and inference results are collected in this directory
14
11
  return {*}
15
12
  author: wuxingxing
@@ -20,27 +17,32 @@ class WorkFileStructure(object):
20
17
  self.reserve_work_dir = reserve_work_dir
21
18
  self.reserve_feature = reserve_feature
22
19
  self.movement_name = "MOVEMENT"
23
- self.raw_path = []
20
+ # self.raw_path = []
24
21
  self.train_feature_path = []
22
+ self.valid_feature_path = []
25
23
  self.test_feature_path = []
26
- self.datasets_path = []
24
+ # self.datasets_path = []
27
25
  self.model_load_path = ""
28
-
26
+
27
+ self.train_data_path = []
28
+ self.valid_data_path = []
29
+ self.test_data_path = []
30
+
31
+ if self.model_type == "NN" or self.model_type == "LINEAR":
32
+ self._set_NN_PWdata_dirs()
29
33
  # def _set_training_path(self, train_raw_path:list, train_feature_path:list, train_dir: str):
30
34
  # self.raw_path = train_raw_path
31
35
  # self.train_feature_path = train_feature_path
32
36
  # self.train_dir = os.path.join(self.json_dir, train_dir)
33
37
 
34
- def _set_alive_atomic_energy(self, alive_atomic_energy:bool):
35
- self.alive_atomic_energy = alive_atomic_energy
36
38
 
37
- def _set_data_file_paths(self, trainSetDir:str, dRFeatureInputDir:str, dRFeatureOutputDir:str,\
38
- trainDataPath:str, validDataPath:str):
39
- self.trainSetDir = trainSetDir
40
- self.dRFeatureInputDir = dRFeatureInputDir# it is not used 2024.04.03
41
- self.dRFeatureOutputDir = dRFeatureOutputDir# it is not used 2024.04.03
42
- self.trainDataPath = trainDataPath
43
- self.validDataPath = validDataPath
39
+ # def _set_data_file_paths(self, trainSetDir:str, dRFeatureInputDir:str, dRFeatureOutputDir:str,\
40
+ # trainDataPath:str, validDataPath:str):
41
+ # self.trainSetDir = trainSetDir
42
+ # self.dRFeatureInputDir = dRFeatureInputDir# it is not used 2024.04.03
43
+ # self.dRFeatureOutputDir = dRFeatureOutputDir# it is not used 2024.04.03
44
+ # self.trainDataPath = trainDataPath
45
+ # self.validDataPath = validDataPath
44
46
 
45
47
  def _set_p_matrix_paths(self, p_path, save_p_matrix:bool):
46
48
  self.save_p_matrix = save_p_matrix
@@ -56,30 +58,12 @@ class WorkFileStructure(object):
56
58
  self.model_load_path = model_load_path
57
59
 
58
60
  def set_inference_paths(self, json_input:dict, is_nep_txt:bool=False):
59
- # load test files and check if they are exist
60
- raw_path = get_parameter("raw_files", json_input, [])
61
- for raw_data in raw_path:
62
- if os.path.exists(raw_data) is False:
63
- raise Exception("Error! test data: {} file not exist!".format(raw_data))
64
- raw_path = [os.path.abspath(_) for _ in raw_path]
65
- self.raw_path = raw_path
66
- datasets_path = get_parameter("datasets_path", json_input, [])
67
- if isinstance(datasets_path, list) is False:
68
- datasets_path = [datasets_path]
69
- for data_path in datasets_path:
70
- if os.path.exists(data_path) is False:
71
- raise Exception("{} file is not exists, please check!".format(data_path))
72
-
73
61
  test_dir_name = get_parameter("test_dir_name", json_input, "test_result")
74
- self.test_dir = os.path.join(self.json_dir, test_dir_name)
75
-
76
- '''test_feature_path = get_parameter("test_feature_path", json_input, [])
77
- for feat_path in test_feature_path:
78
- if os.path.exists(feat_path) is False:
79
- raise Exception("Error! test_feature_path {} does not exist!".format(feat_path))
80
- test_feature_path = [os.path.abspath(_) for _ in test_feature_path]
81
- self.test_feature_path = test_feature_path'''
82
- self.datasets_path = [os.path.abspath(_) for _ in datasets_path]
62
+
63
+ if json_input["model_type"].upper() in ["LINEAR", "NN"]:
64
+ self.test_dir = os.path.join(self.nn_work, test_dir_name)
65
+ else:
66
+ self.test_dir = os.path.join(self.json_dir, test_dir_name)
83
67
 
84
68
  if not json_input["model_type"].upper() == "LINEAR":
85
69
  if is_nep_txt:
@@ -90,8 +74,8 @@ class WorkFileStructure(object):
90
74
  if os.path.exists(self.model_load_path) is False:
91
75
  raise Exception("the model_load_path is not exist: {}, please speccified 'model_load_path' at json file".format(self.model_load_path))
92
76
 
93
- if "trainDataPath" in json_input.keys():# for test, people could set the 'trainSetDir' to 'valid', so the valid data in train dir could be used for valid
94
- self.trainDataPath = json_input["trainDataPath"]
77
+ # if "trainDataPath" in json_input.keys():# for test, people could set the 'trainSetDir' to 'valid', so the valid data in train dir could be used for valid
78
+ # self.trainDataPath = json_input["trainDataPath"]
95
79
 
96
80
  '''alive_atomic_energy = is_alive_atomic_energy(datasets_path)
97
81
  self._set_alive_atomic_energy(alive_atomic_energy)'''
@@ -109,11 +93,17 @@ class WorkFileStructure(object):
109
93
  def set_train_feature_path(self, feature_path:list):
110
94
  self.train_feature_path.extend(feature_path)
111
95
 
96
+ def set_valid_feature_path(self, feature_path:list):
97
+ self.valid_feature_path.extend(feature_path)
98
+
112
99
  def set_test_feature_path(self, feature_path:list):
113
100
  self.test_feature_path.extend(feature_path)
114
101
 
102
+ # delete in 2025
115
103
  def set_datasets_path(self, datasets_path:list):
116
- self.datasets_path.extend(datasets_path)
104
+ pass
105
+ # self.datasets_path.extend(datasets_path)
106
+
117
107
 
118
108
  '''
119
109
  description:
@@ -143,7 +133,6 @@ class WorkFileStructure(object):
143
133
  best_model_path = os.path.join(self.json_dir, "best_model.ckpt")
144
134
  forcefield_name = get_parameter("forcefield_name", json_input, "forcefield.ff")
145
135
  forcefield_dir = get_parameter("forcefield_dir", json_input, "forcefield")
146
- self.set_forcefield_path(forcefield_dir, forcefield_name)
147
136
  # p matix, resume p matrix when recover is not realized
148
137
  # p matrix should extract to checkpoint files or a single file.
149
138
  # current not realized
@@ -156,79 +145,112 @@ class WorkFileStructure(object):
156
145
 
157
146
  # common dir
158
147
  model_store_dir = get_parameter("model_store_dir", json_input, "model_record")
159
- model_store_dir = os.path.join(self.json_dir, model_store_dir)
148
+ if self.model_type == "NN":
149
+ model_store_dir = os.path.join(self.nn_work, model_store_dir)
150
+ self.forcefield_dir = os.path.join(self.nn_work, forcefield_dir)
151
+ self.forcefield_name = forcefield_name
152
+ else:
153
+ self.forcefield_dir = os.path.join(self.json_dir, forcefield_dir)
154
+ self.forcefield_name = forcefield_name
155
+ model_store_dir = os.path.join(self.json_dir, model_store_dir)
160
156
  self._set_model_paths(model_store_dir = model_store_dir, \
161
157
  model_name = model_name, best_model_path=best_model_path)
162
- self._set_PWdata_dirs(json_input)
158
+
159
+ # self._set_PWdata_dirs(json_input)
163
160
 
164
161
  def set_train_valid_file(self, json_input:dict):
165
162
  # set trian movement file path
166
- raw_path = get_parameter("raw_files", json_input, [])
167
- for raw_data in raw_path:
168
- if os.path.exists(raw_data) is False:
169
- raise Exception("Error! train data: {} file not exist!".format(raw_data))
170
- # set train feature path
171
- raw_path = [os.path.abspath(_) for _ in raw_path]
172
- self.raw_path = raw_path
173
- datasets_path = get_parameter("datasets_path", json_input, [])
174
- for data_path in datasets_path:
175
- if os.path.exists(data_path) is False:
176
- raise Exception("Error! train data: {} file not exist!".format(data_path))
177
- datasets_path = [os.path.abspath(_) for _ in datasets_path]
178
- self.datasets_path = datasets_path
179
- '''if len(raw_path) > 0:
180
- raw_path = sorted(raw_path)
163
+ self.format = get_parameter("format", json_input, "pwmat/movement").lower() # used in new file and raw_file
164
+ if self.model_type.upper() in ["NN", "LINEAR"]:
165
+ if self.format != "pwmat/movement":
166
+ raise Exception("Error! For NN or Linear model, the input 'format' should be 'pwmat/movement'!")
167
+ train_data = get_parameter("train_data", json_input, [])
168
+
169
+ for _train_data in train_data:
170
+ if os.path.exists(_train_data) is False:
171
+ raise Exception("Error! train data: {} file not exist!".format(_train_data))
172
+ else:
173
+ self.train_data_path.append(os.path.abspath(_train_data))
174
+ valid_data = get_parameter("valid_data", json_input, [])
175
+ for _valid_data in valid_data:
176
+ if os.path.exists(_valid_data) is False:
177
+ raise Exception("Error! valid data: {} file not exist!".format(_valid_data))
178
+ else:
179
+ self.valid_data_path.append(os.path.abspath(_valid_data))
180
+ test_data = get_parameter("test_data", json_input, [])
181
+ for _test_data in test_data:
182
+ if os.path.exists(_test_data) is False:
183
+ raise Exception("Error! test data: {} file not exist!".format(_test_data))
184
+ else:
185
+ self.test_data_path.append(os.path.abspath(_test_data))
186
+
187
+ if self.format == "pwmat/movement": # for nn
188
+ self.alive_atomic_energy = False
189
+ if len(self.train_data_path) > 0:
190
+ alive_atomic_energy = is_alive_atomic_energy(self.train_data_path)
191
+ self.alive_atomic_energy = alive_atomic_energy
192
+
193
+ if len(self.valid_data_path) > 0:
194
+ alive_atomic_energy = is_alive_atomic_energy(self.valid_data_path)
195
+ self.alive_atomic_energy = alive_atomic_energy
196
+
197
+ if len(self.test_data_path) > 0:
198
+ alive_atomic_energy = is_alive_atomic_energy(self.test_data_path)
199
+ self.alive_atomic_energy = alive_atomic_energy
200
+
201
+ def set_nn_file(self, json_input:dict):
202
+ self.train_feature_path = []
203
+ self.valid_feature_path = []
204
+ self.test_feature_path = []
181
205
  train_feature_path = get_parameter("train_feature_path", json_input, [])
182
206
  for feat_path in train_feature_path:
183
207
  if os.path.exists(feat_path) is False:
184
- raise Exception("Error! train movement: {} file not exist!".format(feat_path))
185
- train_feature_path = [os.path.abspath(_) for _ in train_feature_path]
186
- self._set_training_path(raw_path=raw_path,
187
- train_feature_path=train_feature_path,
188
- train_dir=os.path.join(self.work_dir, "feature"))
189
-
190
- alive_atomic_energy = get_parameter("alive_atomic_energy", json_input, False)
191
- alive_atomic_energy = is_alive_atomic_energy(raw_path)
192
- self._set_alive_atomic_energy(alive_atomic_energy)'''
208
+ raise Exception("Error! train_feature_path: {} file not exist!".format(feat_path))
209
+ self.train_feature_path = [os.path.abspath(_) for _ in train_feature_path]
193
210
 
194
- def _set_PWdata_dirs(self, json_input:dict):
211
+ valid_feature_path = get_parameter("valid_feature_path", json_input, [])
212
+ for feat_path in valid_feature_path:
213
+ if os.path.exists(feat_path) is False:
214
+ raise Exception("Error! valid_feature_path: {} file not exist!".format(feat_path))
215
+ self.valid_feature_path = [os.path.abspath(_) for _ in valid_feature_path]
216
+
217
+ test_feature_path = get_parameter("test_feature_path", json_input, [])
218
+ for feat_path in test_feature_path:
219
+ if os.path.exists(feat_path) is False:
220
+ raise Exception("Error! test_feature_path: {} file not exist!".format(feat_path))
221
+ self.test_feature_path = [os.path.abspath(_) for _ in test_feature_path]
222
+
223
+ def _set_NN_PWdata_dirs(self):
195
224
  # set Pwdata dir file structure, they are used in feature generation
196
- trainSetDir = get_parameter("trainSetDir", json_input, 'PWdata')
197
- dRFeatureInputDir = get_parameter("dRFeatureInputDir", json_input, 'input')# it is not used 2024.04.03
198
- dRFeatureOutputDir = get_parameter("dRFeatureOutputDir", json_input, 'output')# it is not used 2024.04.03
199
- trainDataPath = get_parameter("trainDataPath", json_input, 'train')
200
- validDataPath = get_parameter("validDataPath", json_input, 'valid')
201
- self._set_data_file_paths(trainSetDir, dRFeatureInputDir, dRFeatureOutputDir, trainDataPath, validDataPath)
225
+ self.nn_work = os.path.join(os.getcwd(), "work_dir") # the work dir of nn training or test
226
+ self.trainSetDir = 'PWdata'
227
+ self.dRFeatureInputDir = 'input'# it is not used 2024.04.03
228
+ self.dRFeatureOutputDir = 'output'# it is not used 2024.04.03
229
+ # self.trainDataPath = 'train'
230
+ # self.validDataPath = 'valid'
231
+ # self._set_data_file_paths(trainSetDir, dRFeatureInputDir, dRFeatureOutputDir, trainDataPath, validDataPath)
202
232
 
203
233
  def set_nep_native_file_paths(self):
204
- self.nep_train_xyz_path = "train.xyz"
205
- self.nep_test_xyz_path = "test.xyz"
206
- self.nep_in_file = "nep.in"
207
- self.nep_model_file = "nep_to_lmps.txt"
208
- self.nep_restart_file = "nep.restart"
234
+ self.nep_model_file = "nep5.txt"
209
235
 
210
236
  def get_data_file_structure(self):
211
237
  file_dict = {}
212
238
  file_dict["trainSetDir"] = self.trainSetDir
213
239
  file_dict["dRFeatureInputDir"] = self.dRFeatureInputDir
214
240
  file_dict["dRFeatureOutputDir"] = self.dRFeatureOutputDir
215
- file_dict["trainDataPath"] = self.trainDataPath
216
- file_dict["validDataPath"] = self.validDataPath
241
+ # file_dict["trainDataPath"] = self.trainDataPath
242
+ # file_dict["validDataPath"] = self.validDataPath
217
243
  return file_dict
218
244
 
219
- def set_forcefield_path(self, forcefield_dir:str, forcefield_name:str):
220
- self.forcefield_dir = os.path.join(self.json_dir, forcefield_dir)
221
- self.forcefield_name = forcefield_name
222
-
223
245
  def to_dict(self):
224
246
  dicts = {}
225
- # dicts["work_dir"] = self.work_dir
226
- # dicts["reserve_work_dir"] = self.reserve_work_dir
227
-
247
+ dicts["format"] = self.format
228
248
  if self.model_load_path is not None and os.path.exists(self.model_load_path):
229
249
  dicts["model_load_file"] = self.model_load_path
230
- if len(self.datasets_path) > 0:
231
- dicts["datasets_path"] = self.datasets_path
232
- # dicts["model_store_dir"] = self.model_store_dir
233
-
250
+ if len(self.train_data_path) > 0:
251
+ dicts["train_data"] = self.train_data_path
252
+ if len(self.valid_data_path) > 0:
253
+ dicts["valid_data"] = self.valid_data_path
254
+ if len(self.test_data_path) > 0:
255
+ dicts["test_data"] = self.test_data_path
234
256
  return dicts
@@ -58,9 +58,9 @@ def save_config(config, input_format:str = None, wrap = False, direct = True, so
58
58
  write_to_file(os.path.join(save_path, CP2K.cell_txt), lattice_line, 'w')
59
59
 
60
60
  else:
61
- config.to(output_path=save_path,
61
+ config.to(data_path =save_path,
62
62
  data_name =save_name,
63
- save_format =save_format,
63
+ format =save_format,
64
64
  direct =direct,
65
65
  sort =sort,
66
66
  wrap =wrap
@@ -90,9 +90,9 @@ def do_super_cell(config_file, input_format:str=None, supercell_matrix:list[int]
90
90
  # Make a supercell
91
91
  supercell = make_supercell(config, supercell_matrix, pbc)
92
92
  # Write out the structure
93
- supercell.to(output_path = save_path,
94
- data_name = save_name,
95
- save_format = save_format,
93
+ supercell.to(data_path = save_path,
94
+ data_name = save_name,
95
+ format = save_format,
96
96
  direct = direct,
97
97
  sort = sort)
98
98
  return os.path.join(save_path, save_name)
@@ -101,9 +101,9 @@ def do_scale(config, input_format:str=None, scale_factor:float=None,
101
101
  direct:bool=True, sort:bool=True, save_format:str=None, save_path:str=None, save_name:str=None):
102
102
  config = Config(format=input_format, data_path=config)
103
103
  scaled_struct = scale_cell(config, scale_factor)
104
- scaled_struct.to(output_path = save_path,
105
- data_name = save_name,
106
- save_format = save_format,
104
+ scaled_struct.to(data_path = save_path,
105
+ data_name = save_name,
106
+ format = save_format,
107
107
  direct = direct,
108
108
  sort = sort)
109
109
 
@@ -123,9 +123,9 @@ def do_pertub(config, input_format:str=None, pert_num:int=None, cell_pert_fracti
123
123
  atom_pert_distance = atom_pert_distance)
124
124
 
125
125
  for tmp_perturbed_idx, tmp_pertubed_struct in enumerate(perturbed_structs):
126
- tmp_pertubed_struct.to(output_path = save_path,
126
+ tmp_pertubed_struct.to(data_path = save_path,
127
127
  data_name = "{}_{}".format(tmp_perturbed_idx, save_name),
128
- save_format = save_format,
128
+ format = save_format,
129
129
  direct = direct,
130
130
  sort = sort)
131
131
 
@@ -133,35 +133,33 @@ def do_pertub(config, input_format:str=None, pert_num:int=None, cell_pert_fracti
133
133
 
134
134
  '''
135
135
  description:
136
- if merge is ture, save pwdata to datasets_path/data_name ...
137
- else:
138
- save pwdata to datasets_path/data_name/train or valid
136
+ save the inputfiles to pwmlff/npy format data
139
137
  return {*}
140
138
  author: wuxingxing
141
139
  '''
142
- def extract_pwdata(data_list:list[str],
143
- data_format:str="pwmat/movement",
144
- datasets_path="PWdata",
145
- train_valid_ratio:float=0.8,
146
- data_shuffle:bool=True,
147
- merge_data:bool=False,
140
+ def extract_pwdata(input_data_list:list[str],
141
+ intput_data_format:str="pwmat/movement",
142
+ save_data_path:str="./",
143
+ save_data_name="PWdata",
144
+ save_data_format="extxyz",
145
+ data_shuffle:bool=False,
148
146
  interval:int=1
149
147
  ):
150
148
  # if data_format == DFT_STYLE.cp2k:
151
149
  # raise Exception("not relized cp2k pwdata convert")
152
150
 
153
- if not os.path.isabs(datasets_path):
151
+ if not os.path.isabs(save_data_path):
154
152
  # data_name = datasets_path
155
- datasets_path = os.path.join(os.getcwd(), datasets_path)
153
+ save_data_path = os.path.join(os.getcwd(), save_data_path)
156
154
  image_data = None
157
- for data_path in data_list:
155
+ for dir in input_data_list:
158
156
  if image_data is not None:
159
- tmp_config = Config(data_format, data_path)
157
+ tmp_config = Config(format=intput_data_format, data_path=dir)
160
158
  # if not isinstance(tmp_config, list):
161
159
  # tmp_config = [tmp_config]
162
160
  image_data.images.extend(tmp_config.images)
163
161
  else:
164
- image_data = Config(data_format, data_path)
162
+ image_data = Config(format=intput_data_format, data_path=dir)
165
163
 
166
164
  if not isinstance(image_data.images, list):
167
165
  image_data.images = [image_data.images]
@@ -176,14 +174,10 @@ def extract_pwdata(data_list:list[str],
176
174
  image_data.images = tmp
177
175
 
178
176
  image_data.to(
179
- output_path=datasets_path,
180
- save_format=PWDATA.pwmlff_npy,
181
- train_ratio = train_valid_ratio,
182
- train_data_path="train",
183
- valid_data_path="valid",
184
- random=data_shuffle,
185
- seed = 2024,
186
- retain_raw = False
177
+ data_path =save_data_path,
178
+ data_name =save_data_name,
179
+ format =save_data_format,
180
+ random=data_shuffle
187
181
  )
188
182
 
189
183
  if __name__ == "__main__":
@@ -275,8 +269,7 @@ if __name__ == "__main__":
275
269
  data_list.append(outcar)
276
270
 
277
271
  datasets_path = "/data/home/wuxingxing/datas/al_dir/HfO2/dftb/init_data_200"
278
- extract_pwdata(data_list=data_list,
279
- data_format="vasp/outcar",
280
- datasets_path=datasets_path,
281
- merge_data=True
272
+ extract_pwdata(input_data_list=data_list,
273
+ intput_data_format="vasp/outcar",
274
+ save_data_path=datasets_path
282
275
  )
pwact/main.py CHANGED
@@ -24,14 +24,14 @@ from pwact.active_learning.environment import check_envs
24
24
 
25
25
  from pwact.data_format.configop import extract_pwdata
26
26
  from pwact.active_learning.explore.select_image import select_image, print_select_image
27
- from pwact.utils.process_tool import kill_process
27
+ from pwact.utils.process_tool import kill_process, get_pid
28
28
  def run_iter():
29
29
  system_json = json.load(open(sys.argv[2]))
30
30
  if "work_dir" in system_json.keys():
31
31
  os.chdir(system_json["work_dir"])
32
- pid = os.getpid()
32
+ pid = get_pid()
33
33
  with open("./PID", 'w') as wf:
34
- wf.write(str(pid))
34
+ wf.write(pid)
35
35
 
36
36
  system_info = convert_keys_to_lowercase(system_json)
37
37
  machine_json = json.load(open(sys.argv[3]))
@@ -149,9 +149,9 @@ def init_bulk():
149
149
  system_info = convert_keys_to_lowercase(system_json)
150
150
  if "work_dir" in system_json.keys():
151
151
  os.chdir(system_json["work_dir"])
152
- pid = os.getpid()
152
+ pid = get_pid()
153
153
  with open("./PID", 'w') as wf:
154
- wf.write(str(pid))
154
+ wf.write(pid)
155
155
 
156
156
  machine_info = convert_keys_to_lowercase(json.load(open(sys.argv[3])))
157
157
  input_param = InitBulkParam(system_info)
@@ -163,35 +163,6 @@ def init_bulk():
163
163
  init_bulk_run(resource, input_param)
164
164
  print("Init Bulk Work Done!")
165
165
 
166
- def to_pwdata(input_cmds:list):
167
- parser = argparse.ArgumentParser()
168
- parser.add_argument('-t', '--worktype', help="specify work type, default is 'to_pwdata'", type=str, default='to_pwdata')
169
- parser.add_argument('-i', '--input', help='specify input outcars or movement files', nargs='+', type=str, default=None)
170
- parser.add_argument('-f', '--format', help="specify input file format, 'vasp/outcar' or 'pwmat/movement', default is 'pwmat/movement'", type=str, default="pwmat/movement")
171
- parser.add_argument('-s', '--savepath', help="specify stored directory, default is 'PWdata'", type=str, default='PWdata')
172
- parser.add_argument('-o', '--train_valid_ratio', help='specify stored directory, default=0.8', type=float, default=0.8)
173
- # parser.add_argument('-r', '--data_shuffle', help='specify stored directory, default is True', type=bool, required=False, default=True)
174
- # parser.add_argument('-d', '--do_shuffle', help='if -d exits, doing the data shuffling', action='store_false')
175
- parser.add_argument('-r', '--data_shuffle', help='Specify whether to do data shuffle operation, -r is True', action='store_true')
176
- parser.add_argument('-m', '--merge', help='Specify whether to merge inputs to one, -m is True', action='store_true')
177
- # parser.add_argument('-m', '--merge', help='merge inputs to one, default is False', type=bool, required=False, default=False)
178
- parser.add_argument('-g', '--gap', help='Trail point interval before and after, default is 1', type=int, default=1)
179
-
180
- parser.add_argument('-w', '--work_dir', help='specify work dir, default is current dir', type=str, default='./')
181
- args = parser.parse_args(input_cmds)
182
- print(args.work_dir)
183
- os.chdir(args.work_dir)
184
-
185
- extract_pwdata(data_list=args.input,
186
- data_format=args.format,
187
- datasets_path=args.savepath,
188
- train_valid_ratio=args.train_valid_ratio,
189
- data_shuffle=args.data_shuffle,
190
- merge_data=args.merge,
191
- interval = args.gap
192
- )
193
-
194
-
195
166
  def gather_pwmata(input_cmds):
196
167
  parser = argparse.ArgumentParser()
197
168
  parser.add_argument('-i', '--input_dir', help="specify the dir above the iterations, the default dir is current dir './'\nthe result could be found in './final_pwdata'", type=str, default='./')
@@ -267,10 +238,12 @@ def kill_job():
267
238
  # os.chdir(system_json["work_dir"])
268
239
  try:
269
240
  with open("./PID", 'r') as rf:
270
- pid = rf.readline()
271
- except:
241
+ pid_str_info = rf.readline().split()
242
+ pid = pid_str_info[1]
243
+ jobid = pid_str_info[3] if "job" in pid_str_info else None
244
+ except :
272
245
  raise Exception("Error parsing PID file !")
273
- kill_process(int(pid))
246
+ kill_process(pid, jobid)
274
247
  if sys.argv[2].lower() == "init_bulk":
275
248
  # search all jobs
276
249
  init_scancel_jobs(os.getcwd())
@@ -349,7 +322,7 @@ def main():
349
322
 
350
323
  elif "to_pwdata".upper() == sys.argv[1].upper():#these function may use pwdata command
351
324
  print("\n\nWarning! This method has been abandoned, new conversion methods refer to the pwdata documentation http://doc.lonxun.com/PWMLFF/Appendix-2/\n\n")
352
- to_pwdata(sys.argv[2:])
325
+ # to_pwdata(sys.argv[2:])
353
326
 
354
327
  elif "run".upper() == sys.argv[1].upper():
355
328
  if len(sys.argv) == 2 or "-h".upper() == sys.argv[2].upper() or \
@@ -194,6 +194,7 @@ def make_cp2k_input_from_external(cell, coord_file, exinput_path, gaussian_base_
194
194
  # read the input content as string
195
195
  with open(exinput_path) as f:
196
196
  exinput = f.readlines()
197
+ exinput = [line.upper() for line in exinput]
197
198
  # replace the cell string
198
199
  start_cell = 0
199
200
  end_cell = 0
@@ -210,7 +211,9 @@ def make_cp2k_input_from_external(cell, coord_file, exinput_path, gaussian_base_
210
211
  end_dft = 0
211
212
  basis_set_file_name = -1
212
213
  potential_file_name = -1
213
-
214
+ start_kpoint = -1
215
+ end_kpoint = -1
216
+ kpoint_line = 0
214
217
  # delete the BASIS_SET_FILE_NAME and POTENTIAL_FILE_NAME line
215
218
  for line_idx, line in enumerate(exinput):
216
219
  line = line.upper()
@@ -222,15 +225,50 @@ def make_cp2k_input_from_external(cell, coord_file, exinput_path, gaussian_base_
222
225
  basis_set_file_name = line_idx
223
226
  if "POTENTIAL_FILE_NAME" in line:
224
227
  potential_file_name = line_idx
228
+ if "&KPOINTS" in line:
229
+ start_kpoint = line_idx
230
+ if "&END KPOINTS" in line:
231
+ end_kpoint = line_idx
232
+ if "MONKHORST-PACK" in line:
233
+ kpoint_line = line_idx
234
+
225
235
  if start_dft == end_dft:
226
236
  raise Exception("{} extarcted error! Can not find DFT set!".format(exinput_path))
237
+
238
+ # set kspacing and pseudo file
239
+ if gaussian_base_param["KSPACING"] is not None and kpoint_line > 0:
240
+ raise Exception("The 'kspacing' in 'gaussian_param' and 'KPOINTS' in {} file cannot be set set simultaneously!".format(os.path.basename(exinput_path)))
241
+ elif gaussian_base_param["KSPACING"] is None and kpoint_line == 0:
242
+ kspacing_content = make_kspacing_kpoints(cell, 0.5)
243
+ elif gaussian_base_param["KSPACING"] is not None:
244
+ kspacing_content = make_kspacing_kpoints(cell, gaussian_base_param["KSPACING"])
245
+ else:
246
+ kspacing_content = "\n".join(exinput[start_kpoint:end_kpoint+1])
227
247
  basis_set_file_name, potential_file_name = sorted([basis_set_file_name, potential_file_name], reverse=True)
228
- if basis_set_file_name != -1:
229
- exinput.pop(basis_set_file_name)
230
- if potential_file_name != -1:
231
- exinput.pop(potential_file_name)
248
+ if kpoint_line > 0:
249
+ if start_kpoint > basis_set_file_name:
250
+ for del_idx in list(range(end_kpoint, start_kpoint-1, -1)):
251
+ exinput.pop(del_idx)
252
+ if basis_set_file_name != -1:
253
+ exinput.pop(basis_set_file_name)
254
+ if potential_file_name != -1:
255
+ exinput.pop(potential_file_name)
256
+ else:
257
+ if basis_set_file_name != -1:
258
+ exinput.pop(basis_set_file_name)
259
+ if potential_file_name != -1:
260
+ exinput.pop(potential_file_name)
261
+ for del_idx in list(range(end_kpoint, start_kpoint-1, -1)):
262
+ exinput.pop(del_idx)
263
+ else:
264
+ if basis_set_file_name != -1:
265
+ exinput.pop(basis_set_file_name)
266
+ if potential_file_name != -1:
267
+ exinput.pop(potential_file_name)
268
+
232
269
  exinput.insert(start_dft+1, " BASIS_SET_FILE_NAME {}\n".format(gaussian_base_param["BASIS_SET_FILE_NAME"]))
233
270
  exinput.insert(start_dft+2, " POTENTIAL_FILE_NAME {}\n".format(gaussian_base_param["POTENTIAL_FILE_NAME"]))
271
+ exinput.insert(start_dft+3, kspacing_content)
234
272
 
235
273
  for line_idx, line in enumerate(exinput):
236
274
  line = line.upper()
@@ -326,6 +364,25 @@ def get_atom_type_from_config(coord_file:str):
326
364
  continue
327
365
  return res
328
366
 
367
+ def _reciprocal_box(box):
368
+ rbox = np.linalg.inv(box)
369
+ rbox = rbox.T
370
+ return rbox
371
+
372
+ def make_kspacing_kpoints(lattice, kspacing):
373
+ rbox = _reciprocal_box(lattice)
374
+ kpoints = [
375
+ round(2 * np.pi * np.linalg.norm(ii) / kspacing) for ii in rbox
376
+ ]
377
+ kpoints[0] = 1 if kpoints[0] == 0 else kpoints[0]
378
+ kpoints[1] = 1 if kpoints[1] == 0 else kpoints[1]
379
+ kpoints[2] = 1 if kpoints[2] == 0 else kpoints[2]
380
+ ret = " &KPOINTS\n"
381
+ ret += " SCHEME MONKHORST-PACK %d %d %d\n" % (kpoints[0], kpoints[1], kpoints[2])
382
+ ret += " &END KPOINTS\n"
383
+ return ret
384
+ # ret = _make_pwmat_kp_mp(kpoints)
385
+
329
386
  # if __name__=="__main__":
330
387
  # import dpdata
331
388
  # poscar = "/data/home/wuxingxing/datas/al_dir/si_4_vasp/init_bulk/collection/init_config_0/0.9_scale.poscar"
@@ -24,7 +24,7 @@ def make_pair_style(md_type, forcefiled, atom_type:list[int], dump_info:str):
24
24
  pair_names = ""
25
25
  for fi in forcefiled:
26
26
  pair_names += "{} ".format(os.path.basename(fi))
27
- pair_style = "pair_style pwmlff {} {} {}\n".format(len(forcefiled), pair_names, dump_info)
27
+ pair_style = "pair_style matpl {} {}\n".format(pair_names, dump_info)
28
28
  atom_names = " ".join(map(str, atom_type))
29
29
  pair_style += "pair_coeff * * {}\n".format(atom_names)
30
30
  return pair_style