pwact 0.1.27__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pwact/active_learning/environment.py +13 -11
- pwact/active_learning/explore/run_model_md.py +5 -2
- pwact/active_learning/explore/select_image.py +49 -10
- pwact/active_learning/init_bulk/init_bulk_run.py +18 -14
- pwact/active_learning/label/labeling.py +7 -17
- pwact/active_learning/slurm/slurm.py +37 -9
- pwact/active_learning/train/train_model.py +26 -13
- pwact/active_learning/user_input/init_bulk_input.py +4 -2
- pwact/active_learning/user_input/iter_input.py +19 -4
- pwact/active_learning/user_input/scf_param.py +2 -0
- pwact/active_learning/user_input/train_param/nep_param.py +2 -2
- pwact/active_learning/user_input/train_param/train_param.py +24 -17
- pwact/active_learning/user_input/train_param/work_file_param.py +115 -93
- pwact/data_format/configop.py +29 -36
- pwact/main.py +11 -38
- pwact/utils/app_lib/cp2k.py +62 -5
- pwact/utils/app_lib/lammps.py +1 -1
- pwact/utils/constant.py +14 -3
- pwact/utils/file_operation.py +14 -0
- pwact/utils/process_tool.py +22 -11
- pwact/utils/slurm_script.py +20 -17
- {pwact-0.1.27.dist-info → pwact-0.2.0.dist-info}/METADATA +1 -1
- {pwact-0.1.27.dist-info → pwact-0.2.0.dist-info}/RECORD +27 -27
- {pwact-0.1.27.dist-info → pwact-0.2.0.dist-info}/LICENSE +0 -0
- {pwact-0.1.27.dist-info → pwact-0.2.0.dist-info}/WHEEL +0 -0
- {pwact-0.1.27.dist-info → pwact-0.2.0.dist-info}/entry_points.txt +0 -0
- {pwact-0.1.27.dist-info → pwact-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -7,9 +7,6 @@ class WorkFileStructure(object):
|
|
|
7
7
|
'''
|
|
8
8
|
description:
|
|
9
9
|
param {*} self
|
|
10
|
-
param {*} work_dir: is the work path, model training, feature generation, and inference work \
|
|
11
|
-
are all carried out under the modified directory\
|
|
12
|
-
if the user does not set it, it defaults to json_dir
|
|
13
10
|
param {*} json_dir: The trained models, features, and inference results are collected in this directory
|
|
14
11
|
return {*}
|
|
15
12
|
author: wuxingxing
|
|
@@ -20,27 +17,32 @@ class WorkFileStructure(object):
|
|
|
20
17
|
self.reserve_work_dir = reserve_work_dir
|
|
21
18
|
self.reserve_feature = reserve_feature
|
|
22
19
|
self.movement_name = "MOVEMENT"
|
|
23
|
-
self.raw_path = []
|
|
20
|
+
# self.raw_path = []
|
|
24
21
|
self.train_feature_path = []
|
|
22
|
+
self.valid_feature_path = []
|
|
25
23
|
self.test_feature_path = []
|
|
26
|
-
self.datasets_path = []
|
|
24
|
+
# self.datasets_path = []
|
|
27
25
|
self.model_load_path = ""
|
|
28
|
-
|
|
26
|
+
|
|
27
|
+
self.train_data_path = []
|
|
28
|
+
self.valid_data_path = []
|
|
29
|
+
self.test_data_path = []
|
|
30
|
+
|
|
31
|
+
if self.model_type == "NN" or self.model_type == "LINEAR":
|
|
32
|
+
self._set_NN_PWdata_dirs()
|
|
29
33
|
# def _set_training_path(self, train_raw_path:list, train_feature_path:list, train_dir: str):
|
|
30
34
|
# self.raw_path = train_raw_path
|
|
31
35
|
# self.train_feature_path = train_feature_path
|
|
32
36
|
# self.train_dir = os.path.join(self.json_dir, train_dir)
|
|
33
37
|
|
|
34
|
-
def _set_alive_atomic_energy(self, alive_atomic_energy:bool):
|
|
35
|
-
self.alive_atomic_energy = alive_atomic_energy
|
|
36
38
|
|
|
37
|
-
def _set_data_file_paths(self, trainSetDir:str, dRFeatureInputDir:str, dRFeatureOutputDir:str,\
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
# def _set_data_file_paths(self, trainSetDir:str, dRFeatureInputDir:str, dRFeatureOutputDir:str,\
|
|
40
|
+
# trainDataPath:str, validDataPath:str):
|
|
41
|
+
# self.trainSetDir = trainSetDir
|
|
42
|
+
# self.dRFeatureInputDir = dRFeatureInputDir# it is not used 2024.04.03
|
|
43
|
+
# self.dRFeatureOutputDir = dRFeatureOutputDir# it is not used 2024.04.03
|
|
44
|
+
# self.trainDataPath = trainDataPath
|
|
45
|
+
# self.validDataPath = validDataPath
|
|
44
46
|
|
|
45
47
|
def _set_p_matrix_paths(self, p_path, save_p_matrix:bool):
|
|
46
48
|
self.save_p_matrix = save_p_matrix
|
|
@@ -56,30 +58,12 @@ class WorkFileStructure(object):
|
|
|
56
58
|
self.model_load_path = model_load_path
|
|
57
59
|
|
|
58
60
|
def set_inference_paths(self, json_input:dict, is_nep_txt:bool=False):
|
|
59
|
-
# load test files and check if they are exist
|
|
60
|
-
raw_path = get_parameter("raw_files", json_input, [])
|
|
61
|
-
for raw_data in raw_path:
|
|
62
|
-
if os.path.exists(raw_data) is False:
|
|
63
|
-
raise Exception("Error! test data: {} file not exist!".format(raw_data))
|
|
64
|
-
raw_path = [os.path.abspath(_) for _ in raw_path]
|
|
65
|
-
self.raw_path = raw_path
|
|
66
|
-
datasets_path = get_parameter("datasets_path", json_input, [])
|
|
67
|
-
if isinstance(datasets_path, list) is False:
|
|
68
|
-
datasets_path = [datasets_path]
|
|
69
|
-
for data_path in datasets_path:
|
|
70
|
-
if os.path.exists(data_path) is False:
|
|
71
|
-
raise Exception("{} file is not exists, please check!".format(data_path))
|
|
72
|
-
|
|
73
61
|
test_dir_name = get_parameter("test_dir_name", json_input, "test_result")
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
raise Exception("Error! test_feature_path {} does not exist!".format(feat_path))
|
|
80
|
-
test_feature_path = [os.path.abspath(_) for _ in test_feature_path]
|
|
81
|
-
self.test_feature_path = test_feature_path'''
|
|
82
|
-
self.datasets_path = [os.path.abspath(_) for _ in datasets_path]
|
|
62
|
+
|
|
63
|
+
if json_input["model_type"].upper() in ["LINEAR", "NN"]:
|
|
64
|
+
self.test_dir = os.path.join(self.nn_work, test_dir_name)
|
|
65
|
+
else:
|
|
66
|
+
self.test_dir = os.path.join(self.json_dir, test_dir_name)
|
|
83
67
|
|
|
84
68
|
if not json_input["model_type"].upper() == "LINEAR":
|
|
85
69
|
if is_nep_txt:
|
|
@@ -90,8 +74,8 @@ class WorkFileStructure(object):
|
|
|
90
74
|
if os.path.exists(self.model_load_path) is False:
|
|
91
75
|
raise Exception("the model_load_path is not exist: {}, please speccified 'model_load_path' at json file".format(self.model_load_path))
|
|
92
76
|
|
|
93
|
-
if "trainDataPath" in json_input.keys():# for test, people could set the 'trainSetDir' to 'valid', so the valid data in train dir could be used for valid
|
|
94
|
-
|
|
77
|
+
# if "trainDataPath" in json_input.keys():# for test, people could set the 'trainSetDir' to 'valid', so the valid data in train dir could be used for valid
|
|
78
|
+
# self.trainDataPath = json_input["trainDataPath"]
|
|
95
79
|
|
|
96
80
|
'''alive_atomic_energy = is_alive_atomic_energy(datasets_path)
|
|
97
81
|
self._set_alive_atomic_energy(alive_atomic_energy)'''
|
|
@@ -109,11 +93,17 @@ class WorkFileStructure(object):
|
|
|
109
93
|
def set_train_feature_path(self, feature_path:list):
|
|
110
94
|
self.train_feature_path.extend(feature_path)
|
|
111
95
|
|
|
96
|
+
def set_valid_feature_path(self, feature_path:list):
|
|
97
|
+
self.valid_feature_path.extend(feature_path)
|
|
98
|
+
|
|
112
99
|
def set_test_feature_path(self, feature_path:list):
|
|
113
100
|
self.test_feature_path.extend(feature_path)
|
|
114
101
|
|
|
102
|
+
# delete in 2025
|
|
115
103
|
def set_datasets_path(self, datasets_path:list):
|
|
116
|
-
|
|
104
|
+
pass
|
|
105
|
+
# self.datasets_path.extend(datasets_path)
|
|
106
|
+
|
|
117
107
|
|
|
118
108
|
'''
|
|
119
109
|
description:
|
|
@@ -143,7 +133,6 @@ class WorkFileStructure(object):
|
|
|
143
133
|
best_model_path = os.path.join(self.json_dir, "best_model.ckpt")
|
|
144
134
|
forcefield_name = get_parameter("forcefield_name", json_input, "forcefield.ff")
|
|
145
135
|
forcefield_dir = get_parameter("forcefield_dir", json_input, "forcefield")
|
|
146
|
-
self.set_forcefield_path(forcefield_dir, forcefield_name)
|
|
147
136
|
# p matix, resume p matrix when recover is not realized
|
|
148
137
|
# p matrix should extract to checkpoint files or a single file.
|
|
149
138
|
# current not realized
|
|
@@ -156,79 +145,112 @@ class WorkFileStructure(object):
|
|
|
156
145
|
|
|
157
146
|
# common dir
|
|
158
147
|
model_store_dir = get_parameter("model_store_dir", json_input, "model_record")
|
|
159
|
-
|
|
148
|
+
if self.model_type == "NN":
|
|
149
|
+
model_store_dir = os.path.join(self.nn_work, model_store_dir)
|
|
150
|
+
self.forcefield_dir = os.path.join(self.nn_work, forcefield_dir)
|
|
151
|
+
self.forcefield_name = forcefield_name
|
|
152
|
+
else:
|
|
153
|
+
self.forcefield_dir = os.path.join(self.json_dir, forcefield_dir)
|
|
154
|
+
self.forcefield_name = forcefield_name
|
|
155
|
+
model_store_dir = os.path.join(self.json_dir, model_store_dir)
|
|
160
156
|
self._set_model_paths(model_store_dir = model_store_dir, \
|
|
161
157
|
model_name = model_name, best_model_path=best_model_path)
|
|
162
|
-
|
|
158
|
+
|
|
159
|
+
# self._set_PWdata_dirs(json_input)
|
|
163
160
|
|
|
164
161
|
def set_train_valid_file(self, json_input:dict):
|
|
165
162
|
# set trian movement file path
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
if
|
|
169
|
-
raise Exception("Error!
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
163
|
+
self.format = get_parameter("format", json_input, "pwmat/movement").lower() # used in new file and raw_file
|
|
164
|
+
if self.model_type.upper() in ["NN", "LINEAR"]:
|
|
165
|
+
if self.format != "pwmat/movement":
|
|
166
|
+
raise Exception("Error! For NN or Linear model, the input 'format' should be 'pwmat/movement'!")
|
|
167
|
+
train_data = get_parameter("train_data", json_input, [])
|
|
168
|
+
|
|
169
|
+
for _train_data in train_data:
|
|
170
|
+
if os.path.exists(_train_data) is False:
|
|
171
|
+
raise Exception("Error! train data: {} file not exist!".format(_train_data))
|
|
172
|
+
else:
|
|
173
|
+
self.train_data_path.append(os.path.abspath(_train_data))
|
|
174
|
+
valid_data = get_parameter("valid_data", json_input, [])
|
|
175
|
+
for _valid_data in valid_data:
|
|
176
|
+
if os.path.exists(_valid_data) is False:
|
|
177
|
+
raise Exception("Error! valid data: {} file not exist!".format(_valid_data))
|
|
178
|
+
else:
|
|
179
|
+
self.valid_data_path.append(os.path.abspath(_valid_data))
|
|
180
|
+
test_data = get_parameter("test_data", json_input, [])
|
|
181
|
+
for _test_data in test_data:
|
|
182
|
+
if os.path.exists(_test_data) is False:
|
|
183
|
+
raise Exception("Error! test data: {} file not exist!".format(_test_data))
|
|
184
|
+
else:
|
|
185
|
+
self.test_data_path.append(os.path.abspath(_test_data))
|
|
186
|
+
|
|
187
|
+
if self.format == "pwmat/movement": # for nn
|
|
188
|
+
self.alive_atomic_energy = False
|
|
189
|
+
if len(self.train_data_path) > 0:
|
|
190
|
+
alive_atomic_energy = is_alive_atomic_energy(self.train_data_path)
|
|
191
|
+
self.alive_atomic_energy = alive_atomic_energy
|
|
192
|
+
|
|
193
|
+
if len(self.valid_data_path) > 0:
|
|
194
|
+
alive_atomic_energy = is_alive_atomic_energy(self.valid_data_path)
|
|
195
|
+
self.alive_atomic_energy = alive_atomic_energy
|
|
196
|
+
|
|
197
|
+
if len(self.test_data_path) > 0:
|
|
198
|
+
alive_atomic_energy = is_alive_atomic_energy(self.test_data_path)
|
|
199
|
+
self.alive_atomic_energy = alive_atomic_energy
|
|
200
|
+
|
|
201
|
+
def set_nn_file(self, json_input:dict):
|
|
202
|
+
self.train_feature_path = []
|
|
203
|
+
self.valid_feature_path = []
|
|
204
|
+
self.test_feature_path = []
|
|
181
205
|
train_feature_path = get_parameter("train_feature_path", json_input, [])
|
|
182
206
|
for feat_path in train_feature_path:
|
|
183
207
|
if os.path.exists(feat_path) is False:
|
|
184
|
-
raise Exception("Error!
|
|
185
|
-
train_feature_path = [os.path.abspath(_) for _ in train_feature_path]
|
|
186
|
-
self._set_training_path(raw_path=raw_path,
|
|
187
|
-
train_feature_path=train_feature_path,
|
|
188
|
-
train_dir=os.path.join(self.work_dir, "feature"))
|
|
189
|
-
|
|
190
|
-
alive_atomic_energy = get_parameter("alive_atomic_energy", json_input, False)
|
|
191
|
-
alive_atomic_energy = is_alive_atomic_energy(raw_path)
|
|
192
|
-
self._set_alive_atomic_energy(alive_atomic_energy)'''
|
|
208
|
+
raise Exception("Error! train_feature_path: {} file not exist!".format(feat_path))
|
|
209
|
+
self.train_feature_path = [os.path.abspath(_) for _ in train_feature_path]
|
|
193
210
|
|
|
194
|
-
|
|
211
|
+
valid_feature_path = get_parameter("valid_feature_path", json_input, [])
|
|
212
|
+
for feat_path in valid_feature_path:
|
|
213
|
+
if os.path.exists(feat_path) is False:
|
|
214
|
+
raise Exception("Error! valid_feature_path: {} file not exist!".format(feat_path))
|
|
215
|
+
self.valid_feature_path = [os.path.abspath(_) for _ in valid_feature_path]
|
|
216
|
+
|
|
217
|
+
test_feature_path = get_parameter("test_feature_path", json_input, [])
|
|
218
|
+
for feat_path in test_feature_path:
|
|
219
|
+
if os.path.exists(feat_path) is False:
|
|
220
|
+
raise Exception("Error! test_feature_path: {} file not exist!".format(feat_path))
|
|
221
|
+
self.test_feature_path = [os.path.abspath(_) for _ in test_feature_path]
|
|
222
|
+
|
|
223
|
+
def _set_NN_PWdata_dirs(self):
|
|
195
224
|
# set Pwdata dir file structure, they are used in feature generation
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
self.
|
|
225
|
+
self.nn_work = os.path.join(os.getcwd(), "work_dir") # the work dir of nn training or test
|
|
226
|
+
self.trainSetDir = 'PWdata'
|
|
227
|
+
self.dRFeatureInputDir = 'input'# it is not used 2024.04.03
|
|
228
|
+
self.dRFeatureOutputDir = 'output'# it is not used 2024.04.03
|
|
229
|
+
# self.trainDataPath = 'train'
|
|
230
|
+
# self.validDataPath = 'valid'
|
|
231
|
+
# self._set_data_file_paths(trainSetDir, dRFeatureInputDir, dRFeatureOutputDir, trainDataPath, validDataPath)
|
|
202
232
|
|
|
203
233
|
def set_nep_native_file_paths(self):
|
|
204
|
-
self.
|
|
205
|
-
self.nep_test_xyz_path = "test.xyz"
|
|
206
|
-
self.nep_in_file = "nep.in"
|
|
207
|
-
self.nep_model_file = "nep_to_lmps.txt"
|
|
208
|
-
self.nep_restart_file = "nep.restart"
|
|
234
|
+
self.nep_model_file = "nep5.txt"
|
|
209
235
|
|
|
210
236
|
def get_data_file_structure(self):
|
|
211
237
|
file_dict = {}
|
|
212
238
|
file_dict["trainSetDir"] = self.trainSetDir
|
|
213
239
|
file_dict["dRFeatureInputDir"] = self.dRFeatureInputDir
|
|
214
240
|
file_dict["dRFeatureOutputDir"] = self.dRFeatureOutputDir
|
|
215
|
-
file_dict["trainDataPath"] = self.trainDataPath
|
|
216
|
-
file_dict["validDataPath"] = self.validDataPath
|
|
241
|
+
# file_dict["trainDataPath"] = self.trainDataPath
|
|
242
|
+
# file_dict["validDataPath"] = self.validDataPath
|
|
217
243
|
return file_dict
|
|
218
244
|
|
|
219
|
-
def set_forcefield_path(self, forcefield_dir:str, forcefield_name:str):
|
|
220
|
-
self.forcefield_dir = os.path.join(self.json_dir, forcefield_dir)
|
|
221
|
-
self.forcefield_name = forcefield_name
|
|
222
|
-
|
|
223
245
|
def to_dict(self):
|
|
224
246
|
dicts = {}
|
|
225
|
-
|
|
226
|
-
# dicts["reserve_work_dir"] = self.reserve_work_dir
|
|
227
|
-
|
|
247
|
+
dicts["format"] = self.format
|
|
228
248
|
if self.model_load_path is not None and os.path.exists(self.model_load_path):
|
|
229
249
|
dicts["model_load_file"] = self.model_load_path
|
|
230
|
-
if len(self.
|
|
231
|
-
dicts["
|
|
232
|
-
|
|
233
|
-
|
|
250
|
+
if len(self.train_data_path) > 0:
|
|
251
|
+
dicts["train_data"] = self.train_data_path
|
|
252
|
+
if len(self.valid_data_path) > 0:
|
|
253
|
+
dicts["valid_data"] = self.valid_data_path
|
|
254
|
+
if len(self.test_data_path) > 0:
|
|
255
|
+
dicts["test_data"] = self.test_data_path
|
|
234
256
|
return dicts
|
pwact/data_format/configop.py
CHANGED
|
@@ -58,9 +58,9 @@ def save_config(config, input_format:str = None, wrap = False, direct = True, so
|
|
|
58
58
|
write_to_file(os.path.join(save_path, CP2K.cell_txt), lattice_line, 'w')
|
|
59
59
|
|
|
60
60
|
else:
|
|
61
|
-
config.to(
|
|
61
|
+
config.to(data_path =save_path,
|
|
62
62
|
data_name =save_name,
|
|
63
|
-
|
|
63
|
+
format =save_format,
|
|
64
64
|
direct =direct,
|
|
65
65
|
sort =sort,
|
|
66
66
|
wrap =wrap
|
|
@@ -90,9 +90,9 @@ def do_super_cell(config_file, input_format:str=None, supercell_matrix:list[int]
|
|
|
90
90
|
# Make a supercell
|
|
91
91
|
supercell = make_supercell(config, supercell_matrix, pbc)
|
|
92
92
|
# Write out the structure
|
|
93
|
-
supercell.to(
|
|
94
|
-
data_name
|
|
95
|
-
|
|
93
|
+
supercell.to(data_path = save_path,
|
|
94
|
+
data_name = save_name,
|
|
95
|
+
format = save_format,
|
|
96
96
|
direct = direct,
|
|
97
97
|
sort = sort)
|
|
98
98
|
return os.path.join(save_path, save_name)
|
|
@@ -101,9 +101,9 @@ def do_scale(config, input_format:str=None, scale_factor:float=None,
|
|
|
101
101
|
direct:bool=True, sort:bool=True, save_format:str=None, save_path:str=None, save_name:str=None):
|
|
102
102
|
config = Config(format=input_format, data_path=config)
|
|
103
103
|
scaled_struct = scale_cell(config, scale_factor)
|
|
104
|
-
scaled_struct.to(
|
|
105
|
-
data_name
|
|
106
|
-
|
|
104
|
+
scaled_struct.to(data_path = save_path,
|
|
105
|
+
data_name = save_name,
|
|
106
|
+
format = save_format,
|
|
107
107
|
direct = direct,
|
|
108
108
|
sort = sort)
|
|
109
109
|
|
|
@@ -123,9 +123,9 @@ def do_pertub(config, input_format:str=None, pert_num:int=None, cell_pert_fracti
|
|
|
123
123
|
atom_pert_distance = atom_pert_distance)
|
|
124
124
|
|
|
125
125
|
for tmp_perturbed_idx, tmp_pertubed_struct in enumerate(perturbed_structs):
|
|
126
|
-
tmp_pertubed_struct.to(
|
|
126
|
+
tmp_pertubed_struct.to(data_path = save_path,
|
|
127
127
|
data_name = "{}_{}".format(tmp_perturbed_idx, save_name),
|
|
128
|
-
|
|
128
|
+
format = save_format,
|
|
129
129
|
direct = direct,
|
|
130
130
|
sort = sort)
|
|
131
131
|
|
|
@@ -133,35 +133,33 @@ def do_pertub(config, input_format:str=None, pert_num:int=None, cell_pert_fracti
|
|
|
133
133
|
|
|
134
134
|
'''
|
|
135
135
|
description:
|
|
136
|
-
|
|
137
|
-
else:
|
|
138
|
-
save pwdata to datasets_path/data_name/train or valid
|
|
136
|
+
save the inputfiles to pwmlff/npy format data
|
|
139
137
|
return {*}
|
|
140
138
|
author: wuxingxing
|
|
141
139
|
'''
|
|
142
|
-
def extract_pwdata(
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
140
|
+
def extract_pwdata(input_data_list:list[str],
|
|
141
|
+
intput_data_format:str="pwmat/movement",
|
|
142
|
+
save_data_path:str="./",
|
|
143
|
+
save_data_name="PWdata",
|
|
144
|
+
save_data_format="extxyz",
|
|
145
|
+
data_shuffle:bool=False,
|
|
148
146
|
interval:int=1
|
|
149
147
|
):
|
|
150
148
|
# if data_format == DFT_STYLE.cp2k:
|
|
151
149
|
# raise Exception("not relized cp2k pwdata convert")
|
|
152
150
|
|
|
153
|
-
if not os.path.isabs(
|
|
151
|
+
if not os.path.isabs(save_data_path):
|
|
154
152
|
# data_name = datasets_path
|
|
155
|
-
|
|
153
|
+
save_data_path = os.path.join(os.getcwd(), save_data_path)
|
|
156
154
|
image_data = None
|
|
157
|
-
for
|
|
155
|
+
for dir in input_data_list:
|
|
158
156
|
if image_data is not None:
|
|
159
|
-
tmp_config = Config(
|
|
157
|
+
tmp_config = Config(format=intput_data_format, data_path=dir)
|
|
160
158
|
# if not isinstance(tmp_config, list):
|
|
161
159
|
# tmp_config = [tmp_config]
|
|
162
160
|
image_data.images.extend(tmp_config.images)
|
|
163
161
|
else:
|
|
164
|
-
image_data = Config(
|
|
162
|
+
image_data = Config(format=intput_data_format, data_path=dir)
|
|
165
163
|
|
|
166
164
|
if not isinstance(image_data.images, list):
|
|
167
165
|
image_data.images = [image_data.images]
|
|
@@ -176,14 +174,10 @@ def extract_pwdata(data_list:list[str],
|
|
|
176
174
|
image_data.images = tmp
|
|
177
175
|
|
|
178
176
|
image_data.to(
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
valid_data_path="valid",
|
|
184
|
-
random=data_shuffle,
|
|
185
|
-
seed = 2024,
|
|
186
|
-
retain_raw = False
|
|
177
|
+
data_path =save_data_path,
|
|
178
|
+
data_name =save_data_name,
|
|
179
|
+
format =save_data_format,
|
|
180
|
+
random=data_shuffle
|
|
187
181
|
)
|
|
188
182
|
|
|
189
183
|
if __name__ == "__main__":
|
|
@@ -275,8 +269,7 @@ if __name__ == "__main__":
|
|
|
275
269
|
data_list.append(outcar)
|
|
276
270
|
|
|
277
271
|
datasets_path = "/data/home/wuxingxing/datas/al_dir/HfO2/dftb/init_data_200"
|
|
278
|
-
extract_pwdata(
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
merge_data=True
|
|
272
|
+
extract_pwdata(input_data_list=data_list,
|
|
273
|
+
intput_data_format="vasp/outcar",
|
|
274
|
+
save_data_path=datasets_path
|
|
282
275
|
)
|
pwact/main.py
CHANGED
|
@@ -24,14 +24,14 @@ from pwact.active_learning.environment import check_envs
|
|
|
24
24
|
|
|
25
25
|
from pwact.data_format.configop import extract_pwdata
|
|
26
26
|
from pwact.active_learning.explore.select_image import select_image, print_select_image
|
|
27
|
-
from pwact.utils.process_tool import kill_process
|
|
27
|
+
from pwact.utils.process_tool import kill_process, get_pid
|
|
28
28
|
def run_iter():
|
|
29
29
|
system_json = json.load(open(sys.argv[2]))
|
|
30
30
|
if "work_dir" in system_json.keys():
|
|
31
31
|
os.chdir(system_json["work_dir"])
|
|
32
|
-
pid =
|
|
32
|
+
pid = get_pid()
|
|
33
33
|
with open("./PID", 'w') as wf:
|
|
34
|
-
wf.write(
|
|
34
|
+
wf.write(pid)
|
|
35
35
|
|
|
36
36
|
system_info = convert_keys_to_lowercase(system_json)
|
|
37
37
|
machine_json = json.load(open(sys.argv[3]))
|
|
@@ -149,9 +149,9 @@ def init_bulk():
|
|
|
149
149
|
system_info = convert_keys_to_lowercase(system_json)
|
|
150
150
|
if "work_dir" in system_json.keys():
|
|
151
151
|
os.chdir(system_json["work_dir"])
|
|
152
|
-
pid =
|
|
152
|
+
pid = get_pid()
|
|
153
153
|
with open("./PID", 'w') as wf:
|
|
154
|
-
wf.write(
|
|
154
|
+
wf.write(pid)
|
|
155
155
|
|
|
156
156
|
machine_info = convert_keys_to_lowercase(json.load(open(sys.argv[3])))
|
|
157
157
|
input_param = InitBulkParam(system_info)
|
|
@@ -163,35 +163,6 @@ def init_bulk():
|
|
|
163
163
|
init_bulk_run(resource, input_param)
|
|
164
164
|
print("Init Bulk Work Done!")
|
|
165
165
|
|
|
166
|
-
def to_pwdata(input_cmds:list):
|
|
167
|
-
parser = argparse.ArgumentParser()
|
|
168
|
-
parser.add_argument('-t', '--worktype', help="specify work type, default is 'to_pwdata'", type=str, default='to_pwdata')
|
|
169
|
-
parser.add_argument('-i', '--input', help='specify input outcars or movement files', nargs='+', type=str, default=None)
|
|
170
|
-
parser.add_argument('-f', '--format', help="specify input file format, 'vasp/outcar' or 'pwmat/movement', default is 'pwmat/movement'", type=str, default="pwmat/movement")
|
|
171
|
-
parser.add_argument('-s', '--savepath', help="specify stored directory, default is 'PWdata'", type=str, default='PWdata')
|
|
172
|
-
parser.add_argument('-o', '--train_valid_ratio', help='specify stored directory, default=0.8', type=float, default=0.8)
|
|
173
|
-
# parser.add_argument('-r', '--data_shuffle', help='specify stored directory, default is True', type=bool, required=False, default=True)
|
|
174
|
-
# parser.add_argument('-d', '--do_shuffle', help='if -d exits, doing the data shuffling', action='store_false')
|
|
175
|
-
parser.add_argument('-r', '--data_shuffle', help='Specify whether to do data shuffle operation, -r is True', action='store_true')
|
|
176
|
-
parser.add_argument('-m', '--merge', help='Specify whether to merge inputs to one, -m is True', action='store_true')
|
|
177
|
-
# parser.add_argument('-m', '--merge', help='merge inputs to one, default is False', type=bool, required=False, default=False)
|
|
178
|
-
parser.add_argument('-g', '--gap', help='Trail point interval before and after, default is 1', type=int, default=1)
|
|
179
|
-
|
|
180
|
-
parser.add_argument('-w', '--work_dir', help='specify work dir, default is current dir', type=str, default='./')
|
|
181
|
-
args = parser.parse_args(input_cmds)
|
|
182
|
-
print(args.work_dir)
|
|
183
|
-
os.chdir(args.work_dir)
|
|
184
|
-
|
|
185
|
-
extract_pwdata(data_list=args.input,
|
|
186
|
-
data_format=args.format,
|
|
187
|
-
datasets_path=args.savepath,
|
|
188
|
-
train_valid_ratio=args.train_valid_ratio,
|
|
189
|
-
data_shuffle=args.data_shuffle,
|
|
190
|
-
merge_data=args.merge,
|
|
191
|
-
interval = args.gap
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
|
|
195
166
|
def gather_pwmata(input_cmds):
|
|
196
167
|
parser = argparse.ArgumentParser()
|
|
197
168
|
parser.add_argument('-i', '--input_dir', help="specify the dir above the iterations, the default dir is current dir './'\nthe result could be found in './final_pwdata'", type=str, default='./')
|
|
@@ -267,10 +238,12 @@ def kill_job():
|
|
|
267
238
|
# os.chdir(system_json["work_dir"])
|
|
268
239
|
try:
|
|
269
240
|
with open("./PID", 'r') as rf:
|
|
270
|
-
|
|
271
|
-
|
|
241
|
+
pid_str_info = rf.readline().split()
|
|
242
|
+
pid = pid_str_info[1]
|
|
243
|
+
jobid = pid_str_info[3] if "job" in pid_str_info else None
|
|
244
|
+
except :
|
|
272
245
|
raise Exception("Error parsing PID file !")
|
|
273
|
-
kill_process(
|
|
246
|
+
kill_process(pid, jobid)
|
|
274
247
|
if sys.argv[2].lower() == "init_bulk":
|
|
275
248
|
# search all jobs
|
|
276
249
|
init_scancel_jobs(os.getcwd())
|
|
@@ -349,7 +322,7 @@ def main():
|
|
|
349
322
|
|
|
350
323
|
elif "to_pwdata".upper() == sys.argv[1].upper():#these function may use pwdata command
|
|
351
324
|
print("\n\nWarning! This method has been abandoned, new conversion methods refer to the pwdata documentation http://doc.lonxun.com/PWMLFF/Appendix-2/\n\n")
|
|
352
|
-
to_pwdata(sys.argv[2:])
|
|
325
|
+
# to_pwdata(sys.argv[2:])
|
|
353
326
|
|
|
354
327
|
elif "run".upper() == sys.argv[1].upper():
|
|
355
328
|
if len(sys.argv) == 2 or "-h".upper() == sys.argv[2].upper() or \
|
pwact/utils/app_lib/cp2k.py
CHANGED
|
@@ -194,6 +194,7 @@ def make_cp2k_input_from_external(cell, coord_file, exinput_path, gaussian_base_
|
|
|
194
194
|
# read the input content as string
|
|
195
195
|
with open(exinput_path) as f:
|
|
196
196
|
exinput = f.readlines()
|
|
197
|
+
exinput = [line.upper() for line in exinput]
|
|
197
198
|
# replace the cell string
|
|
198
199
|
start_cell = 0
|
|
199
200
|
end_cell = 0
|
|
@@ -210,7 +211,9 @@ def make_cp2k_input_from_external(cell, coord_file, exinput_path, gaussian_base_
|
|
|
210
211
|
end_dft = 0
|
|
211
212
|
basis_set_file_name = -1
|
|
212
213
|
potential_file_name = -1
|
|
213
|
-
|
|
214
|
+
start_kpoint = -1
|
|
215
|
+
end_kpoint = -1
|
|
216
|
+
kpoint_line = 0
|
|
214
217
|
# delete the BASIS_SET_FILE_NAME and POTENTIAL_FILE_NAME line
|
|
215
218
|
for line_idx, line in enumerate(exinput):
|
|
216
219
|
line = line.upper()
|
|
@@ -222,15 +225,50 @@ def make_cp2k_input_from_external(cell, coord_file, exinput_path, gaussian_base_
|
|
|
222
225
|
basis_set_file_name = line_idx
|
|
223
226
|
if "POTENTIAL_FILE_NAME" in line:
|
|
224
227
|
potential_file_name = line_idx
|
|
228
|
+
if "&KPOINTS" in line:
|
|
229
|
+
start_kpoint = line_idx
|
|
230
|
+
if "&END KPOINTS" in line:
|
|
231
|
+
end_kpoint = line_idx
|
|
232
|
+
if "MONKHORST-PACK" in line:
|
|
233
|
+
kpoint_line = line_idx
|
|
234
|
+
|
|
225
235
|
if start_dft == end_dft:
|
|
226
236
|
raise Exception("{} extarcted error! Can not find DFT set!".format(exinput_path))
|
|
237
|
+
|
|
238
|
+
# set kspacing and pseudo file
|
|
239
|
+
if gaussian_base_param["KSPACING"] is not None and kpoint_line > 0:
|
|
240
|
+
raise Exception("The 'kspacing' in 'gaussian_param' and 'KPOINTS' in {} file cannot be set set simultaneously!".format(os.path.basename(exinput_path)))
|
|
241
|
+
elif gaussian_base_param["KSPACING"] is None and kpoint_line == 0:
|
|
242
|
+
kspacing_content = make_kspacing_kpoints(cell, 0.5)
|
|
243
|
+
elif gaussian_base_param["KSPACING"] is not None:
|
|
244
|
+
kspacing_content = make_kspacing_kpoints(cell, gaussian_base_param["KSPACING"])
|
|
245
|
+
else:
|
|
246
|
+
kspacing_content = "\n".join(exinput[start_kpoint:end_kpoint+1])
|
|
227
247
|
basis_set_file_name, potential_file_name = sorted([basis_set_file_name, potential_file_name], reverse=True)
|
|
228
|
-
if
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
248
|
+
if kpoint_line > 0:
|
|
249
|
+
if start_kpoint > basis_set_file_name:
|
|
250
|
+
for del_idx in list(range(end_kpoint, start_kpoint-1, -1)):
|
|
251
|
+
exinput.pop(del_idx)
|
|
252
|
+
if basis_set_file_name != -1:
|
|
253
|
+
exinput.pop(basis_set_file_name)
|
|
254
|
+
if potential_file_name != -1:
|
|
255
|
+
exinput.pop(potential_file_name)
|
|
256
|
+
else:
|
|
257
|
+
if basis_set_file_name != -1:
|
|
258
|
+
exinput.pop(basis_set_file_name)
|
|
259
|
+
if potential_file_name != -1:
|
|
260
|
+
exinput.pop(potential_file_name)
|
|
261
|
+
for del_idx in list(range(end_kpoint, start_kpoint-1, -1)):
|
|
262
|
+
exinput.pop(del_idx)
|
|
263
|
+
else:
|
|
264
|
+
if basis_set_file_name != -1:
|
|
265
|
+
exinput.pop(basis_set_file_name)
|
|
266
|
+
if potential_file_name != -1:
|
|
267
|
+
exinput.pop(potential_file_name)
|
|
268
|
+
|
|
232
269
|
exinput.insert(start_dft+1, " BASIS_SET_FILE_NAME {}\n".format(gaussian_base_param["BASIS_SET_FILE_NAME"]))
|
|
233
270
|
exinput.insert(start_dft+2, " POTENTIAL_FILE_NAME {}\n".format(gaussian_base_param["POTENTIAL_FILE_NAME"]))
|
|
271
|
+
exinput.insert(start_dft+3, kspacing_content)
|
|
234
272
|
|
|
235
273
|
for line_idx, line in enumerate(exinput):
|
|
236
274
|
line = line.upper()
|
|
@@ -326,6 +364,25 @@ def get_atom_type_from_config(coord_file:str):
|
|
|
326
364
|
continue
|
|
327
365
|
return res
|
|
328
366
|
|
|
367
|
+
def _reciprocal_box(box):
|
|
368
|
+
rbox = np.linalg.inv(box)
|
|
369
|
+
rbox = rbox.T
|
|
370
|
+
return rbox
|
|
371
|
+
|
|
372
|
+
def make_kspacing_kpoints(lattice, kspacing):
|
|
373
|
+
rbox = _reciprocal_box(lattice)
|
|
374
|
+
kpoints = [
|
|
375
|
+
round(2 * np.pi * np.linalg.norm(ii) / kspacing) for ii in rbox
|
|
376
|
+
]
|
|
377
|
+
kpoints[0] = 1 if kpoints[0] == 0 else kpoints[0]
|
|
378
|
+
kpoints[1] = 1 if kpoints[1] == 0 else kpoints[1]
|
|
379
|
+
kpoints[2] = 1 if kpoints[2] == 0 else kpoints[2]
|
|
380
|
+
ret = " &KPOINTS\n"
|
|
381
|
+
ret += " SCHEME MONKHORST-PACK %d %d %d\n" % (kpoints[0], kpoints[1], kpoints[2])
|
|
382
|
+
ret += " &END KPOINTS\n"
|
|
383
|
+
return ret
|
|
384
|
+
# ret = _make_pwmat_kp_mp(kpoints)
|
|
385
|
+
|
|
329
386
|
# if __name__=="__main__":
|
|
330
387
|
# import dpdata
|
|
331
388
|
# poscar = "/data/home/wuxingxing/datas/al_dir/si_4_vasp/init_bulk/collection/init_config_0/0.9_scale.poscar"
|
pwact/utils/app_lib/lammps.py
CHANGED
|
@@ -24,7 +24,7 @@ def make_pair_style(md_type, forcefiled, atom_type:list[int], dump_info:str):
|
|
|
24
24
|
pair_names = ""
|
|
25
25
|
for fi in forcefiled:
|
|
26
26
|
pair_names += "{} ".format(os.path.basename(fi))
|
|
27
|
-
pair_style = "pair_style
|
|
27
|
+
pair_style = "pair_style matpl {} {}\n".format(pair_names, dump_info)
|
|
28
28
|
atom_names = " ".join(map(str, atom_type))
|
|
29
29
|
pair_style += "pair_coeff * * {}\n".format(atom_names)
|
|
30
30
|
return pair_style
|