ion-CSP 2.1.5__py3-none-any.whl → 2.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ion_CSP/__init__.py +2 -2
- ion_CSP/convert_SMILES.py +32 -10
- ion_CSP/empirical_estimate.py +136 -18
- ion_CSP/gen_opt.py +68 -22
- ion_CSP/identify_molecules.py +15 -0
- ion_CSP/log_and_time.py +55 -8
- ion_CSP/mlp_opt.py +52 -6
- ion_CSP/read_mlp_density.py +15 -1
- ion_CSP/task_manager.py +1 -1
- ion_CSP/upload_download.py +0 -1
- ion_CSP/vasp_processing.py +48 -20
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.8.dist-info}/METADATA +44 -16
- ion_csp-2.1.8.dist-info/RECORD +43 -0
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.8.dist-info}/licenses/LICENSE +1 -1
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.8.dist-info}/top_level.txt +0 -1
- ion_csp-2.1.5.dist-info/RECORD +0 -44
- run/update_changelog.py +0 -68
- {run → ion_CSP/run}/__init__.py +0 -0
- {run → ion_CSP/run}/main_CSP.py +0 -0
- {run → ion_CSP/run}/main_EE.py +0 -0
- {run → ion_CSP/run}/run_convert_SMILES.py +0 -0
- {run → ion_CSP/run}/run_empirical_estimate.py +0 -0
- {run → ion_CSP/run}/run_gen_opt.py +0 -0
- {run → ion_CSP/run}/run_read_mlp_density.py +0 -0
- {run → ion_CSP/run}/run_upload_download.py +0 -0
- {run → ion_CSP/run}/run_vasp_processing.py +0 -0
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.8.dist-info}/WHEEL +0 -0
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.8.dist-info}/entry_points.txt +0 -0
ion_CSP/__init__.py
CHANGED
ion_CSP/convert_SMILES.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2
2
|
import shutil
|
3
3
|
import logging
|
4
4
|
import pandas as pd
|
5
|
+
import importlib.resources
|
5
6
|
from typing import List
|
6
7
|
from rdkit import Chem
|
7
8
|
from rdkit.Chem import AllChem
|
@@ -13,9 +14,13 @@ class SmilesProcessing:
|
|
13
14
|
|
14
15
|
def __init__(self, work_dir: str, csv_file: str, converted_folder: str = '1_1_SMILES_gjf', optimized_dir: str = '1_2_Gaussian_optimized'):
|
15
16
|
"""
|
16
|
-
|
17
|
+
This class is used to process SMILES codes from a CSV file, convert them into Gaussian input files, and prepare for optimization tasks. It also supports grouping by charge and filtering based on functional groups.
|
18
|
+
|
19
|
+
params:
|
17
20
|
work_dir: the path of the working directory.
|
18
21
|
csv_file: the csv file name in the working directory.
|
22
|
+
converted_folder: the folder name for storing converted SMILES files.
|
23
|
+
optimized_dir: the folder name for storing Gaussian optimized files.
|
19
24
|
"""
|
20
25
|
redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
|
21
26
|
# 读取csv文件并处理数据, csv文件的表头包括 SMILES, Charge, Refcode或Number
|
@@ -28,7 +33,7 @@ class SmilesProcessing:
|
|
28
33
|
self.base_dir, converted_folder, os.path.splitext(csv_file)[0]
|
29
34
|
)
|
30
35
|
self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
|
31
|
-
self.param_dir =
|
36
|
+
self.param_dir = importlib.resources.files("ion_CSP.param")
|
32
37
|
original_df = pd.read_csv(csv_path)
|
33
38
|
logging.info(f"Processing {csv_path}")
|
34
39
|
# 对SMILES码去重
|
@@ -53,13 +58,15 @@ class SmilesProcessing:
|
|
53
58
|
self, dir: str, smiles: str, basename: str, charge: int
|
54
59
|
):
|
55
60
|
"""
|
56
|
-
Private method:
|
61
|
+
Private method:
|
62
|
+
Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
|
57
63
|
|
58
|
-
|
64
|
+
params:
|
59
65
|
dir: The directory used for outputting files, regardless of existence of the directory.
|
60
66
|
smiles: SMILES code to be converted.
|
61
67
|
basename: The reference code or number corresponding to SMILES code.
|
62
68
|
charge: The charge carried by ions.
|
69
|
+
|
63
70
|
return:
|
64
71
|
result_code: Result code 0 or -1, representing success and failure respectively.
|
65
72
|
basename: The corresponding basename.
|
@@ -144,6 +151,12 @@ class SmilesProcessing:
|
|
144
151
|
):
|
145
152
|
"""
|
146
153
|
Screen based on the provided functional groups and charges.
|
154
|
+
|
155
|
+
params:
|
156
|
+
charge_screen: The charge to screen for, default is 0.
|
157
|
+
group_screen: The functional group to screen for, default is empty string.
|
158
|
+
group_name: The name of the functional group, used for naming the output directory.
|
159
|
+
group_screen_invert: If True, invert the screening condition for the functional group.
|
147
160
|
"""
|
148
161
|
# 另外筛选出符合条件的离子
|
149
162
|
screened = self.df
|
@@ -179,6 +192,12 @@ class SmilesProcessing:
|
|
179
192
|
):
|
180
193
|
"""
|
181
194
|
Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
|
195
|
+
|
196
|
+
params:
|
197
|
+
folders: List of folders containing .gjf files to be processed, if empty, all folders in the converted directory will be processed.
|
198
|
+
machine: The machine configuration file for dpdispatcher, can be a JSON or YAML file.
|
199
|
+
resources: The resources configuration file for dpdispatcher, can be a JSON or YAML file.
|
200
|
+
nodes: The number of nodes to distribute the tasks to, default is 1.
|
182
201
|
"""
|
183
202
|
if os.path.exists(self.gaussian_optimized_dir):
|
184
203
|
logging.error(f'The directory {self.gaussian_optimized_dir} has already existed.')
|
@@ -233,7 +252,7 @@ class SmilesProcessing:
|
|
233
252
|
task_dir = os.path.join(self.converted_dir, f"{parent}pop{pop}")
|
234
253
|
os.makedirs(task_dir, exist_ok=True)
|
235
254
|
for file in forward_files:
|
236
|
-
shutil.copyfile(
|
255
|
+
shutil.copyfile(self.param_dir.joinpath(file), f"{task_dir}/{file}")
|
237
256
|
for job_i in node_jobs[pop]:
|
238
257
|
# 将分配好的 .gjf 文件添加到对应的上传文件中
|
239
258
|
forward_files.append(gjf_files[job_i])
|
@@ -274,11 +293,14 @@ class SmilesProcessing:
|
|
274
293
|
for job_i in node_jobs[pop]:
|
275
294
|
base_name, _ = os.path.splitext(gjf_files[job_i])
|
276
295
|
# 在优化后都取回每个 .gjf 文件对应的 .log、.fchk 输出文件
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
296
|
+
try:
|
297
|
+
for ext in ['gjf', 'log', 'fchk']:
|
298
|
+
shutil.copyfile(
|
299
|
+
f"{task_dir}/{base_name}.{ext}",
|
300
|
+
f"{optimized_folder_dir}/{base_name}.{ext}"
|
301
|
+
)
|
302
|
+
except FileNotFoundError as e:
|
303
|
+
logging.error(f"File not found during copying, please check the configuration and state of Gaussian: {e}")
|
282
304
|
# 在成功完成Gaussian优化后,删除 1_1_SMILES_gjf/{csv}/{parent}/pop{n} 文件夹以节省空间
|
283
305
|
shutil.rmtree(task_dir)
|
284
306
|
shutil.copyfile(
|
ion_CSP/empirical_estimate.py
CHANGED
@@ -55,7 +55,13 @@ class EmpiricalEstimation:
|
|
55
55
|
|
56
56
|
def __init__(self, work_dir: str, folders: List[str], ratios: List[int], sort_by: str):
|
57
57
|
"""
|
58
|
-
|
58
|
+
This class is designed to process Gaussian calculation files, perform electrostatic potential analysis using Multiwfn, and estimate the nitrogen content or density of ion crystal combinations. The class will also generate .csv files containing sorted nitrogen content or density based on the specified sorting criterion.
|
59
|
+
|
60
|
+
:params
|
61
|
+
work_dir: The working directory where the Gaussian calculation files are located.
|
62
|
+
folders: A list of folder names containing the Gaussian calculation files.
|
63
|
+
ratios: A list of integers representing the ratio of each folder in the combination.
|
64
|
+
sort_by: A string indicating the sorting criterion, either 'density' or 'nitrogen'.
|
59
65
|
"""
|
60
66
|
self.base_dir = work_dir
|
61
67
|
os.chdir(self.base_dir)
|
@@ -73,6 +79,9 @@ class EmpiricalEstimation:
|
|
73
79
|
def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
|
74
80
|
'''
|
75
81
|
If a specific directory is given, this method can be used separately to implement batch processing of FCHK files with Multiwfn and save the desired electrostatic potential analysis results to the corresponding JSON file. Otherwise, the folder list provided during initialization will be processed sequentially.
|
82
|
+
|
83
|
+
:params
|
84
|
+
specific_directory: The specific directory to process. If None, all folders will be processed.
|
76
85
|
'''
|
77
86
|
if specific_directory is None:
|
78
87
|
for folder in self.folders:
|
@@ -84,7 +93,11 @@ class EmpiricalEstimation:
|
|
84
93
|
|
85
94
|
def _multiwfn_process_fchk_to_json(self, folder: str):
|
86
95
|
'''
|
96
|
+
Private method:
|
87
97
|
Perform electrostatic potential analysis on .fchk files using Multiwfn and save the analysis results to a .json file.
|
98
|
+
|
99
|
+
:params
|
100
|
+
folder: The folder containing the .fchk files to be processed.
|
88
101
|
'''
|
89
102
|
# 在每个文件夹中获取 .fchk 文件并根据文件名排序, 再用 Multiwfn 进行静电势分析, 最后将分析结果保存到同名 .json 文件中
|
90
103
|
fchk_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.fchk')]
|
@@ -117,26 +130,89 @@ class EmpiricalEstimation:
|
|
117
130
|
logging.error(f'Error with moving bad files: {e}')
|
118
131
|
logging.info(f'\nElectrostatic potential analysis by Multiwfn for {folder} folder has completed, and the results have been stored in the corresponding json files.\n')
|
119
132
|
|
120
|
-
def
|
133
|
+
def _check_multiwfn_executable(self):
|
121
134
|
'''
|
122
|
-
Private method:
|
135
|
+
Private method:
|
136
|
+
Check if the Multiwfn executable file exists in the system PATH.
|
137
|
+
If not, raise a FileNotFoundError with an appropriate error message.
|
123
138
|
'''
|
124
|
-
|
125
|
-
|
126
|
-
|
139
|
+
multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
|
140
|
+
if not multiwfn_path:
|
141
|
+
error_msg = (
|
142
|
+
"Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
|
143
|
+
"1. Has Multiwfn been installed correctly?\n"
|
144
|
+
"2. Has Multiwfn been added to the system PATH environment variable"
|
145
|
+
)
|
146
|
+
print(error_msg)
|
147
|
+
logging.error(error_msg)
|
148
|
+
raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
|
149
|
+
return multiwfn_path
|
150
|
+
|
151
|
+
def _multiwfn_cmd_build(self, input_content):
|
152
|
+
'''
|
153
|
+
Private method:
|
154
|
+
Build the Multiwfn command to be executed based on the input content.
|
155
|
+
This method is used to create the input file for Multiwfn.
|
156
|
+
|
157
|
+
:params
|
158
|
+
input_content: The content to be written to the input file for Multiwfn.
|
159
|
+
'''
|
160
|
+
# 检查Multiwfn可执行文件是否存在
|
161
|
+
multiwfn_path = self._check_multiwfn_executable()
|
127
162
|
# 创建 input.txt 用于存储 Multiwfn 命令内容
|
128
163
|
with open('input.txt', 'w') as input_file:
|
129
|
-
input_file.write(
|
164
|
+
input_file.write(input_content)
|
130
165
|
# 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到output.txt中
|
166
|
+
cmd = [multiwfn_path, "<", "input.txt", ">", "output.txt"]
|
131
167
|
try:
|
132
|
-
subprocess.run(
|
133
|
-
except
|
134
|
-
|
168
|
+
subprocess.run(cmd, shell=True, capture_output=True)
|
169
|
+
except subprocess.CalledProcessError as e:
|
170
|
+
error_msg = f"Multiwfn execution failed (return code {e.returncode}): Error output: {e.stderr}"
|
171
|
+
print(error_msg)
|
172
|
+
logging.error(error_msg)
|
173
|
+
raise
|
174
|
+
except Exception as e:
|
175
|
+
error_msg = f"Unexpected Error: {str(e)}"
|
176
|
+
print(error_msg)
|
177
|
+
logging.error(error_msg)
|
178
|
+
raise
|
179
|
+
finally:
|
180
|
+
# 清理临时文件
|
181
|
+
try:
|
182
|
+
os.remove("input.txt")
|
183
|
+
except Exception as e:
|
184
|
+
logging.warning(f"无法删除临时文件 input.txt: {str(e)}")
|
185
|
+
|
186
|
+
def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
|
187
|
+
'''
|
188
|
+
Private method:
|
189
|
+
Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
|
190
|
+
|
191
|
+
:params
|
192
|
+
fchk_filename: The full path of the FCHK file to be processed.
|
193
|
+
|
194
|
+
:return: True if the processing is successful, False if the FCHK file is invalid.
|
195
|
+
'''
|
196
|
+
print(f'Multiwfn processing {fchk_filename}')
|
197
|
+
logging.info(f'Multiwfn processing {fchk_filename}')
|
198
|
+
result_flag = True
|
199
|
+
self._multiwfn_cmd_build(input_content=f"{fchk_filename}\n12\n0\nq\n")
|
200
|
+
|
135
201
|
# 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
|
136
202
|
folder, filename = os.path.split(fchk_filename)
|
137
203
|
refcode, _ = os.path.splitext(filename)
|
138
|
-
|
139
|
-
|
204
|
+
try:
|
205
|
+
with open('output.txt', 'r') as output_file:
|
206
|
+
output_content = output_file.read()
|
207
|
+
except Exception as e:
|
208
|
+
logging.error(f"Error reading output.txt: {e}")
|
209
|
+
raise
|
210
|
+
finally:
|
211
|
+
# 清理临时文件
|
212
|
+
try:
|
213
|
+
os.remove("output.txt")
|
214
|
+
except Exception as e:
|
215
|
+
logging.warning(f"无法删除临时文件 output.txt: {str(e)}")
|
140
216
|
# 提取所需数据
|
141
217
|
volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
|
142
218
|
density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
|
@@ -186,8 +262,6 @@ class EmpiricalEstimation:
|
|
186
262
|
with open (f"{folder}/{refcode}.json", 'w') as json_file:
|
187
263
|
json.dump(result, json_file, indent=4)
|
188
264
|
shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
|
189
|
-
os.remove('input.txt')
|
190
|
-
os.remove('output.txt')
|
191
265
|
logging.info(f'Finished processing {fchk_filename}')
|
192
266
|
return result_flag
|
193
267
|
|
@@ -195,6 +269,9 @@ class EmpiricalEstimation:
|
|
195
269
|
"""
|
196
270
|
If a specific directory is given, this method can be used separately to batch process the last frame of Gaussian optimized LOG files into GJF files using Multiwfn.
|
197
271
|
Otherwise, the folder list provided during initialization will be processed in order.
|
272
|
+
|
273
|
+
:params
|
274
|
+
specific_directory: The specific directory to process. If None, all folders will be processed.
|
198
275
|
"""
|
199
276
|
if specific_directory is None:
|
200
277
|
for folder in self.folders:
|
@@ -206,7 +283,11 @@ class EmpiricalEstimation:
|
|
206
283
|
|
207
284
|
def _gaussian_log_to_optimized_gjf(self, folder: str):
|
208
285
|
'''
|
286
|
+
Private method:
|
209
287
|
Due to the lack of support of Pyxtal module for LOG files in subsequent crystal generation, it is necessary to convert the last frame of the Gaussian optimized LOG file to a .gjf file with Multiwfn processing.
|
288
|
+
|
289
|
+
:params
|
290
|
+
folder: The folder containing the Gaussian LOG files to be processed.
|
210
291
|
'''
|
211
292
|
# 在每个文件夹中获取 .log 文件并根据文件名排序, 再用Multiwfn载入优化最后一帧转换为 gjf 文件
|
212
293
|
log_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.log')]
|
@@ -226,10 +307,19 @@ class EmpiricalEstimation:
|
|
226
307
|
pass
|
227
308
|
logging.info(f'\nThe .log to .gjf conversion by Multiwfn for {folder} folder has completed, and the optimized .gjf structures have been stored in the optimized directory.\n')
|
228
309
|
|
229
|
-
def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
|
310
|
+
def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
|
311
|
+
"""
|
312
|
+
Private method:
|
313
|
+
Use Multiwfn to convert the last frame of the Gaussian optimized LOG file to a .gjf file.
|
314
|
+
|
315
|
+
:params
|
316
|
+
folder: The folder containing the Gaussian LOG file to be processed.
|
317
|
+
log_filename: The full path of the LOG file to be processed.
|
318
|
+
"""
|
230
319
|
# 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
|
231
320
|
_, filename = os.path.split(log_filename)
|
232
321
|
refcode, _ = os.path.splitext(filename)
|
322
|
+
|
233
323
|
try:
|
234
324
|
# 创建 input.txt 用于存储 Multiwfn 命令内容
|
235
325
|
with open('input.txt', 'w') as input_file:
|
@@ -291,6 +381,15 @@ class EmpiricalEstimation:
|
|
291
381
|
writer.writerows(data) # 写入排序后的数
|
292
382
|
|
293
383
|
def _read_gjf_elements(self, gjf_file):
|
384
|
+
"""
|
385
|
+
Private method:
|
386
|
+
Read the elements from a .gjf file and return a dictionary with element counts.
|
387
|
+
|
388
|
+
:params
|
389
|
+
gjf_file: The full path of the .gjf file to be processed.
|
390
|
+
|
391
|
+
:return: A dictionary with element symbols as keys and their counts as values.
|
392
|
+
"""
|
294
393
|
# 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
|
295
394
|
with open(gjf_file, 'r') as file:
|
296
395
|
lines = file.readlines()
|
@@ -317,6 +416,15 @@ class EmpiricalEstimation:
|
|
317
416
|
return atomic_counts
|
318
417
|
|
319
418
|
def _generate_combinations(self, suffix: str):
|
419
|
+
"""
|
420
|
+
Private method:
|
421
|
+
Generate all valid combinations of files based on the specified suffix and ratios.
|
422
|
+
|
423
|
+
:params
|
424
|
+
suffix: The file suffix to filter the files in the folders.
|
425
|
+
|
426
|
+
:return: A list of dictionaries representing the combinations of files with their respective ratios.
|
427
|
+
"""
|
320
428
|
# 获取所有符合后缀名条件的文件
|
321
429
|
all_files = []
|
322
430
|
for folder in self.folders:
|
@@ -411,6 +519,15 @@ class EmpiricalEstimation:
|
|
411
519
|
writer.writerows(data) # 写入排序后的数
|
412
520
|
|
413
521
|
def _copy_combo_file(self, combo_path, folder_basename, file_type):
|
522
|
+
"""
|
523
|
+
Private method:
|
524
|
+
Copy the specified file type from the Optimized directory to the combo_n folder.
|
525
|
+
|
526
|
+
:params
|
527
|
+
combo_path: The path to the combo_n folder where the file will be copied.
|
528
|
+
folder_basename: The basename of the folder containing the file to be copied.
|
529
|
+
file_type: The type of file to be copied (e.g., '.gjf', '.json').
|
530
|
+
"""
|
414
531
|
filename = f"{folder_basename}{file_type}"
|
415
532
|
source_path = os.path.join(self.base_dir, 'Optimized', filename)
|
416
533
|
# 复制指定后缀名文件到对应的 combo_n 文件夹
|
@@ -428,9 +545,10 @@ class EmpiricalEstimation:
|
|
428
545
|
"""
|
429
546
|
Create a combo_n folder based on the .csv file and copy the corresponding .gjf structure file.
|
430
547
|
|
431
|
-
:
|
432
|
-
|
433
|
-
|
548
|
+
:params
|
549
|
+
target_directory: The target directory of the combo folder to be created
|
550
|
+
num_folders: The number of combo folders to be created
|
551
|
+
ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
|
434
552
|
"""
|
435
553
|
if self.sort_by == 'density':
|
436
554
|
base_csv = self.density_csv
|
ion_CSP/gen_opt.py
CHANGED
@@ -7,7 +7,7 @@ import subprocess
|
|
7
7
|
import importlib.resources
|
8
8
|
from typing import List
|
9
9
|
from ase.io import read
|
10
|
-
from dpdispatcher import Machine
|
10
|
+
from dpdispatcher import Machine, Resources
|
11
11
|
from pyxtal import pyxtal
|
12
12
|
from pyxtal.msg import Comp_CompatibilityError, Symm_CompatibilityError
|
13
13
|
from ion_CSP.log_and_time import redirect_dpdisp_logging
|
@@ -17,11 +17,13 @@ class CrystalGenerator:
|
|
17
17
|
def __init__(self, work_dir: str, ion_numbers: List[int], species: List[str]):
|
18
18
|
"""
|
19
19
|
Initialize the class based on the provided ionic crystal composition structure files and corresponding composition numbers.
|
20
|
+
|
21
|
+
:params
|
22
|
+
work_dir: The working directory where the ionic crystal structure files are located.
|
23
|
+
ion_numbers: A list of integers representing the number of each ion in the ionic crystal.
|
24
|
+
species: A list of strings representing the species of ions in the ionic crystal.
|
20
25
|
"""
|
21
26
|
redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
|
22
|
-
# self.script_dir = os.path.dirname(__file__)
|
23
|
-
# self.mlp_opt_file = os.path.join(self.script_dir, "mlp_opt.py")
|
24
|
-
# self.model_file = os.path.join(self.script_dir, "../../model/model.pt")
|
25
27
|
self.mlp_opt_file = importlib.resources.files("ion_CSP").joinpath("mlp_opt.py")
|
26
28
|
self.model_file = importlib.resources.files("ion_CSP.model").joinpath("model.pt")
|
27
29
|
# 获取当前脚本的路径以及同路径下离子晶体组分的结构文件, 并将这一路径作为工作路径来避免可能的错误
|
@@ -55,6 +57,10 @@ class CrystalGenerator:
|
|
55
57
|
"""
|
56
58
|
Private method:
|
57
59
|
Extract numbers from file names, convert them to integers, sort them by sequence, and return a list containing both indexes and file names
|
60
|
+
|
61
|
+
:params
|
62
|
+
directory: The directory where the files are located.
|
63
|
+
prefix_name: The prefix of the file names to be processed, e.g., 'POSCAR_'.
|
58
64
|
"""
|
59
65
|
# 获取dir文件夹中所有以prefix_name开头的文件,在此实例中为POSCAR_
|
60
66
|
files = [f for f in os.listdir(directory) if f.startswith(prefix_name)]
|
@@ -72,6 +78,9 @@ class CrystalGenerator:
|
|
72
78
|
):
|
73
79
|
"""
|
74
80
|
Based on the provided ion species and corresponding numbers, use pyxtal to randomly generate ion crystal structures based on crystal space groups.
|
81
|
+
:params
|
82
|
+
num_per_group: The number of POSCAR files to be generated for each space group, default is 100.
|
83
|
+
space_groups_limit: The maximum number of space groups to be searched, default is 230, which is the total number of space groups.
|
75
84
|
"""
|
76
85
|
# 如果目录不存在,则创建POSCAR_Files文件夹
|
77
86
|
os.makedirs(self.POSCAR_dir, exist_ok=True)
|
@@ -135,7 +144,14 @@ class CrystalGenerator:
|
|
135
144
|
)
|
136
145
|
|
137
146
|
def _single_phonopy_processing(self, filename):
|
138
|
-
|
147
|
+
"""
|
148
|
+
Private method:
|
149
|
+
Process a single POSCAR file using phonopy to generate symmetric primitive cells and conventional cells.
|
150
|
+
|
151
|
+
:params
|
152
|
+
filename: The name of the POSCAR file to be processed.
|
153
|
+
"""
|
154
|
+
# 按顺序处理POSCAR文件,首先复制一份无数字后缀的POSCAR文件
|
139
155
|
shutil.copy(f"{self.POSCAR_dir}/{filename}", f"{self.POSCAR_dir}/POSCAR")
|
140
156
|
try:
|
141
157
|
subprocess.run(["nohup", "phonopy", "--symmetry", "POSCAR"], check=True)
|
@@ -153,7 +169,7 @@ class CrystalGenerator:
|
|
153
169
|
# 检查生成的POSCAR中的原子数,如果不匹配则删除该POSCAR并在日志中记录
|
154
170
|
if cell_atoms != self.cell_atoms:
|
155
171
|
error_message = f"Atom number mismatch ({cell_atoms} vs {self.cell_atoms})"
|
156
|
-
|
172
|
+
print(f"{filename} - {error_message}")
|
157
173
|
|
158
174
|
# 新增:回溯空间群归属
|
159
175
|
poscar_index = int(filename.split('_')[1]) # 提取POSCAR编号
|
@@ -179,7 +195,15 @@ class CrystalGenerator:
|
|
179
195
|
os.remove(f"{self.primitive_cell_dir}/{filename}")
|
180
196
|
|
181
197
|
def _find_space_group(self, poscar_index: int) -> int:
|
182
|
-
"""
|
198
|
+
"""
|
199
|
+
Private method:
|
200
|
+
Find the space group for a given POSCAR index based on the group_counts.
|
201
|
+
|
202
|
+
:params
|
203
|
+
poscar_index: The index of the POSCAR file to find the space group for.
|
204
|
+
|
205
|
+
:return: The space group number corresponding to the POSCAR index.
|
206
|
+
"""
|
183
207
|
cumulative = 0
|
184
208
|
for idx, count in enumerate(self.group_counts, start=1):
|
185
209
|
if cumulative <= poscar_index < cumulative + count:
|
@@ -219,6 +243,11 @@ class CrystalGenerator:
|
|
219
243
|
def dpdisp_mlp_tasks(self, machine: str, resources: str, nodes: int = 1):
|
220
244
|
"""
|
221
245
|
Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
|
246
|
+
|
247
|
+
params:
|
248
|
+
machine: The machine configuration file for dpdispatcher, can be in JSON or YAML format.
|
249
|
+
resources: The resources configuration file for dpdispatcher, can be in JSON or YAML format.
|
250
|
+
nodes: The number of nodes to be used for optimization, default is 1.
|
222
251
|
"""
|
223
252
|
# 调整工作目录,减少错误发生
|
224
253
|
os.chdir(self.primitive_cell_dir)
|
@@ -233,26 +262,31 @@ class CrystalGenerator:
|
|
233
262
|
machine = Machine.load_from_yaml(machine)
|
234
263
|
else:
|
235
264
|
raise KeyError("Not supported machine file type")
|
265
|
+
if resources.endswith(".json"):
|
266
|
+
resources = Resources.load_from_json(resources)
|
267
|
+
elif resources.endswith(".yaml"):
|
268
|
+
resources = Resources.load_from_yaml(resources)
|
269
|
+
else:
|
270
|
+
raise KeyError("Not supported resources file type")
|
236
271
|
# 由于dpdispatcher对于远程服务器以及本地运行的forward_common_files的默认存放位置不同,因此需要预先进行判断,从而不改动优化脚本
|
237
272
|
machine_inform = machine.serialize()
|
273
|
+
resources_inform = resources.serialize()
|
238
274
|
if machine_inform["context_type"] == "SSHContext":
|
239
275
|
# 如果调用远程服务器,则创建二级目录
|
240
276
|
parent = "data/"
|
241
277
|
elif machine_inform["context_type"] == "LocalContext":
|
242
278
|
# 如果在本地运行作业,则只在后续创建一级目录
|
243
279
|
parent = ""
|
244
|
-
|
245
|
-
|
246
|
-
|
280
|
+
if (
|
281
|
+
machine_inform["batch_type"] == "Shell"
|
282
|
+
and resources_inform["gpu_per_node"] != 0
|
283
|
+
):
|
284
|
+
# 如果是本地运行,则根据显存占用率阈值,等待可用的GPU
|
285
|
+
selected_gpu = _wait_for_gpu(memory_percent_threshold=40, wait_time=600)
|
286
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = str(selected_gpu)
|
247
287
|
|
248
|
-
from dpdispatcher import
|
288
|
+
from dpdispatcher import Task, Submission
|
249
289
|
|
250
|
-
if resources.endswith(".json"):
|
251
|
-
resources = Resources.load_from_json(resources)
|
252
|
-
elif resources.endswith(".yaml"):
|
253
|
-
resources = Resources.load_from_yaml(resources)
|
254
|
-
else:
|
255
|
-
raise KeyError("Not supported resources file type")
|
256
290
|
# 依次读取primitive_cell文件夹中的所有POSCAR文件和对应的序号
|
257
291
|
primitive_cell_file_index_pairs = self._sequentially_read_files(
|
258
292
|
self.primitive_cell_dir, prefix_name="POSCAR_"
|
@@ -337,8 +371,14 @@ class CrystalGenerator:
|
|
337
371
|
logging.info("Batch optimization completed!!!")
|
338
372
|
|
339
373
|
|
340
|
-
def
|
341
|
-
"""
|
374
|
+
def _get_available_gpus(memory_percent_threshold=40):
|
375
|
+
"""
|
376
|
+
Private method:
|
377
|
+
Get available GPUs with memory usage below the specified threshold.
|
378
|
+
|
379
|
+
params:
|
380
|
+
memory_percent_threshold (int): The threshold for GPU memory usage percentage.
|
381
|
+
"""
|
342
382
|
try:
|
343
383
|
# 获取 nvidia-smi 的输出
|
344
384
|
output = subprocess.check_output(
|
@@ -368,10 +408,16 @@ def get_available_gpus(memory_percent_threshold=40):
|
|
368
408
|
return []
|
369
409
|
|
370
410
|
|
371
|
-
def
|
372
|
-
"""
|
411
|
+
def _wait_for_gpu(memory_percent_threshold=40, wait_time=300):
|
412
|
+
"""
|
413
|
+
Private method:
|
414
|
+
Wait until a GPU is available with memory usage below the specified threshold.
|
415
|
+
params:
|
416
|
+
memory_percent_threshold (int): The threshold for GPU memory usage percentage.
|
417
|
+
wait_time (int): The time to wait before checking again, in seconds.
|
418
|
+
"""
|
373
419
|
while True:
|
374
|
-
available_gpus =
|
420
|
+
available_gpus = _get_available_gpus(memory_percent_threshold)
|
375
421
|
logging.info(f"Available GPU: {available_gpus}")
|
376
422
|
if available_gpus:
|
377
423
|
selected_gpu = available_gpus[0]
|
ion_CSP/identify_molecules.py
CHANGED
@@ -7,6 +7,17 @@ from ase.neighborlist import NeighborList, natural_cutoffs
|
|
7
7
|
|
8
8
|
|
9
9
|
def identify_molecules(atoms) -> Tuple[List[Dict[str, int]], bool]:
|
10
|
+
"""
|
11
|
+
Identify independent molecules in a given set of atoms.
|
12
|
+
This function uses a depth-first search (DFS) approach to find connected components in the atomic structure,
|
13
|
+
treating each connected component as a separate molecule.
|
14
|
+
params:
|
15
|
+
atoms: ASE Atoms object containing the atomic structure.
|
16
|
+
returns:
|
17
|
+
A tuple containing:
|
18
|
+
- A list of dictionaries, each representing a molecule with element counts.
|
19
|
+
- A boolean flag indicating whether the identified molecules match the initial set of molecules.
|
20
|
+
"""
|
10
21
|
visited = set() # 用于记录已经访问过的原子索引
|
11
22
|
identified_molecules = [] # 用于存储识别到的独立分子
|
12
23
|
# 基于共价半径为每个原子生成径向截止
|
@@ -63,6 +74,10 @@ def identify_molecules(atoms) -> Tuple[List[Dict[str, int]], bool]:
|
|
63
74
|
def molecules_information(molecules: List[Dict[str, int]], molecules_flag: bool, initial_information: List[Dict[str, int]]):
|
64
75
|
"""
|
65
76
|
Set the output format of the molecule. Output simplified element information in the specified order of C, N, O, H, which may include other elements.
|
77
|
+
params:
|
78
|
+
molecules: A list of dictionaries representing identified molecules with element counts.
|
79
|
+
molecules_flag: A boolean flag indicating whether the identified molecules match the initial set of molecules.
|
80
|
+
initial_information: A list of dictionaries representing the initial set of molecules with element counts.
|
66
81
|
"""
|
67
82
|
# 定义固定顺序的元素
|
68
83
|
fixed_order = ['C', 'N', 'O', 'H']
|