ion-CSP 2.1.5__py3-none-any.whl → 2.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ion_CSP/__init__.py +3 -3
- ion_CSP/convert_SMILES.py +39 -11
- ion_CSP/empirical_estimate.py +288 -84
- ion_CSP/gen_opt.py +68 -22
- ion_CSP/identify_molecules.py +15 -0
- ion_CSP/log_and_time.py +55 -8
- ion_CSP/mlp_opt.py +52 -6
- ion_CSP/read_mlp_density.py +15 -1
- {run → ion_CSP/run}/main_EE.py +11 -13
- ion_CSP/task_manager.py +2 -2
- ion_CSP/upload_download.py +0 -1
- ion_CSP/vasp_processing.py +57 -28
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/METADATA +44 -16
- ion_csp-2.1.9.dist-info/RECORD +43 -0
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/licenses/LICENSE +1 -1
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/top_level.txt +0 -1
- ion_csp-2.1.5.dist-info/RECORD +0 -44
- run/update_changelog.py +0 -68
- {run → ion_CSP/run}/__init__.py +0 -0
- {run → ion_CSP/run}/main_CSP.py +0 -0
- {run → ion_CSP/run}/run_convert_SMILES.py +0 -0
- {run → ion_CSP/run}/run_empirical_estimate.py +0 -0
- {run → ion_CSP/run}/run_gen_opt.py +0 -0
- {run → ion_CSP/run}/run_read_mlp_density.py +0 -0
- {run → ion_CSP/run}/run_upload_download.py +0 -0
- {run → ion_CSP/run}/run_vasp_processing.py +0 -0
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/WHEEL +0 -0
- {ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/entry_points.txt +0 -0
ion_CSP/__init__.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
__author__ = "Ze Yang"
|
2
2
|
__contact__ = "yangze1995007@163.com"
|
3
3
|
__license__ = "MIT"
|
4
|
-
__version__ = "2.1.
|
5
|
-
__date__ = "2025-06-
|
4
|
+
__version__ = "2.1.9"
|
5
|
+
__date__ = "2025-06-27"
|
6
6
|
|
7
7
|
|
8
8
|
try:
|
9
|
-
from importlib.metadata import version
|
9
|
+
from importlib.metadata import version
|
10
10
|
except Exception:
|
11
11
|
try:
|
12
12
|
from importlib_metadata import version
|
ion_CSP/convert_SMILES.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2
2
|
import shutil
|
3
3
|
import logging
|
4
4
|
import pandas as pd
|
5
|
+
import importlib.resources
|
5
6
|
from typing import List
|
6
7
|
from rdkit import Chem
|
7
8
|
from rdkit.Chem import AllChem
|
@@ -13,9 +14,13 @@ class SmilesProcessing:
|
|
13
14
|
|
14
15
|
def __init__(self, work_dir: str, csv_file: str, converted_folder: str = '1_1_SMILES_gjf', optimized_dir: str = '1_2_Gaussian_optimized'):
|
15
16
|
"""
|
16
|
-
|
17
|
+
This class is used to process SMILES codes from a CSV file, convert them into Gaussian input files, and prepare for optimization tasks. It also supports grouping by charge and filtering based on functional groups.
|
18
|
+
|
19
|
+
params:
|
17
20
|
work_dir: the path of the working directory.
|
18
21
|
csv_file: the csv file name in the working directory.
|
22
|
+
converted_folder: the folder name for storing converted SMILES files.
|
23
|
+
optimized_dir: the folder name for storing Gaussian optimized files.
|
19
24
|
"""
|
20
25
|
redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
|
21
26
|
# 读取csv文件并处理数据, csv文件的表头包括 SMILES, Charge, Refcode或Number
|
@@ -28,7 +33,7 @@ class SmilesProcessing:
|
|
28
33
|
self.base_dir, converted_folder, os.path.splitext(csv_file)[0]
|
29
34
|
)
|
30
35
|
self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
|
31
|
-
self.param_dir =
|
36
|
+
self.param_dir = importlib.resources.files("ion_CSP.param")
|
32
37
|
original_df = pd.read_csv(csv_path)
|
33
38
|
logging.info(f"Processing {csv_path}")
|
34
39
|
# 对SMILES码去重
|
@@ -53,19 +58,27 @@ class SmilesProcessing:
|
|
53
58
|
self, dir: str, smiles: str, basename: str, charge: int
|
54
59
|
):
|
55
60
|
"""
|
56
|
-
Private method:
|
61
|
+
Private method:
|
62
|
+
Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
|
57
63
|
|
58
|
-
|
64
|
+
params:
|
59
65
|
dir: The directory used for outputting files, regardless of existence of the directory.
|
60
66
|
smiles: SMILES code to be converted.
|
61
67
|
basename: The reference code or number corresponding to SMILES code.
|
62
68
|
charge: The charge carried by ions.
|
69
|
+
|
63
70
|
return:
|
64
71
|
result_code: Result code 0 or -1, representing success and failure respectively.
|
65
72
|
basename: The corresponding basename.
|
66
73
|
"""
|
67
74
|
mol = Chem.MolFromSmiles(smiles)
|
68
|
-
|
75
|
+
try:
|
76
|
+
mol = Chem.AddHs(mol)
|
77
|
+
except Exception as e:
|
78
|
+
logging.error(
|
79
|
+
f"Error occurred while adding hydrogens to molecule {basename} with charge {charge}: {e}"
|
80
|
+
)
|
81
|
+
return 1, basename # 返回错误码1表示失败
|
69
82
|
try:
|
70
83
|
# 生成3D坐标
|
71
84
|
AllChem.EmbedMolecule(mol)
|
@@ -144,6 +157,12 @@ class SmilesProcessing:
|
|
144
157
|
):
|
145
158
|
"""
|
146
159
|
Screen based on the provided functional groups and charges.
|
160
|
+
|
161
|
+
params:
|
162
|
+
charge_screen: The charge to screen for, default is 0.
|
163
|
+
group_screen: The functional group to screen for, default is empty string.
|
164
|
+
group_name: The name of the functional group, used for naming the output directory.
|
165
|
+
group_screen_invert: If True, invert the screening condition for the functional group.
|
147
166
|
"""
|
148
167
|
# 另外筛选出符合条件的离子
|
149
168
|
screened = self.df
|
@@ -179,6 +198,12 @@ class SmilesProcessing:
|
|
179
198
|
):
|
180
199
|
"""
|
181
200
|
Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
|
201
|
+
|
202
|
+
params:
|
203
|
+
folders: List of folders containing .gjf files to be processed, if empty, all folders in the converted directory will be processed.
|
204
|
+
machine: The machine configuration file for dpdispatcher, can be a JSON or YAML file.
|
205
|
+
resources: The resources configuration file for dpdispatcher, can be a JSON or YAML file.
|
206
|
+
nodes: The number of nodes to distribute the tasks to, default is 1.
|
182
207
|
"""
|
183
208
|
if os.path.exists(self.gaussian_optimized_dir):
|
184
209
|
logging.error(f'The directory {self.gaussian_optimized_dir} has already existed.')
|
@@ -233,7 +258,7 @@ class SmilesProcessing:
|
|
233
258
|
task_dir = os.path.join(self.converted_dir, f"{parent}pop{pop}")
|
234
259
|
os.makedirs(task_dir, exist_ok=True)
|
235
260
|
for file in forward_files:
|
236
|
-
shutil.copyfile(
|
261
|
+
shutil.copyfile(self.param_dir.joinpath(file), f"{task_dir}/{file}")
|
237
262
|
for job_i in node_jobs[pop]:
|
238
263
|
# 将分配好的 .gjf 文件添加到对应的上传文件中
|
239
264
|
forward_files.append(gjf_files[job_i])
|
@@ -274,11 +299,14 @@ class SmilesProcessing:
|
|
274
299
|
for job_i in node_jobs[pop]:
|
275
300
|
base_name, _ = os.path.splitext(gjf_files[job_i])
|
276
301
|
# 在优化后都取回每个 .gjf 文件对应的 .log、.fchk 输出文件
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
302
|
+
try:
|
303
|
+
for ext in ['gjf', 'log', 'fchk']:
|
304
|
+
shutil.copyfile(
|
305
|
+
f"{task_dir}/{base_name}.{ext}",
|
306
|
+
f"{optimized_folder_dir}/{base_name}.{ext}"
|
307
|
+
)
|
308
|
+
except FileNotFoundError as e:
|
309
|
+
logging.error(f"File not found during copying, please check the configuration and state of Gaussian: {e}")
|
282
310
|
# 在成功完成Gaussian优化后,删除 1_1_SMILES_gjf/{csv}/{parent}/pop{n} 文件夹以节省空间
|
283
311
|
shutil.rmtree(task_dir)
|
284
312
|
shutil.copyfile(
|
ion_CSP/empirical_estimate.py
CHANGED
@@ -53,12 +53,26 @@ x.fchk //指定计算文件
|
|
53
53
|
|
54
54
|
class EmpiricalEstimation:
|
55
55
|
|
56
|
-
def __init__(
|
56
|
+
def __init__(
|
57
|
+
self,
|
58
|
+
work_dir: str,
|
59
|
+
folders: List[str],
|
60
|
+
ratios: List[int],
|
61
|
+
sort_by: str,
|
62
|
+
optimized_dir: str = "1_2_Gaussian_optimized",
|
63
|
+
):
|
57
64
|
"""
|
58
|
-
|
65
|
+
This class is designed to process Gaussian calculation files, perform electrostatic potential analysis using Multiwfn, and estimate the nitrogen content or density of ion crystal combinations. The class will also generate .csv files containing sorted nitrogen content or density based on the specified sorting criterion.
|
66
|
+
|
67
|
+
:params
|
68
|
+
work_dir: The working directory where the Gaussian calculation files are located.
|
69
|
+
folders: A list of folder names containing the Gaussian calculation files.
|
70
|
+
ratios: A list of integers representing the ratio of each folder in the combination.
|
71
|
+
sort_by: A string indicating the sorting criterion, either 'density' or 'nitrogen'.
|
59
72
|
"""
|
60
73
|
self.base_dir = work_dir
|
61
|
-
os.
|
74
|
+
self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
|
75
|
+
os.chdir(self.gaussian_optimized_dir)
|
62
76
|
# 确保所取的文件夹数与配比数是对应的
|
63
77
|
if len(folders) != len(ratios):
|
64
78
|
raise ValueError('The number of folders must match the number of ratios.')
|
@@ -69,10 +83,86 @@ class EmpiricalEstimation:
|
|
69
83
|
raise ValueError(f"The sort_by parameter must be either 'density' or 'nitrogen', but got '{sort_by}'")
|
70
84
|
self.density_csv = "sorted_density.csv"
|
71
85
|
self.nitrogen_csv = "sorted_nitrogen.csv"
|
86
|
+
self.carbon_nitrogen_csv = "specific_C_N_ratio.csv"
|
87
|
+
# 检查Multiwfn可执行文件是否存在
|
88
|
+
self.multiwfn_path = self._check_multiwfn_executable()
|
89
|
+
|
90
|
+
def _check_multiwfn_executable(self):
|
91
|
+
'''
|
92
|
+
Private method:
|
93
|
+
Check if the Multiwfn executable file exists in the system PATH.
|
94
|
+
If not, raise a FileNotFoundError with an appropriate error message.
|
95
|
+
'''
|
96
|
+
multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
|
97
|
+
if not multiwfn_path:
|
98
|
+
error_msg = (
|
99
|
+
"Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
|
100
|
+
"1. Has Multiwfn been installed correctly?\n"
|
101
|
+
"2. Has Multiwfn been added to the system PATH environment variable"
|
102
|
+
)
|
103
|
+
print(error_msg)
|
104
|
+
logging.error(error_msg)
|
105
|
+
raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
|
106
|
+
else:
|
107
|
+
print(f"Multiwfn executable found at: {multiwfn_path}")
|
108
|
+
logging.info(f"Multiwfn executable found at: {multiwfn_path}")
|
109
|
+
return multiwfn_path
|
110
|
+
|
111
|
+
def _multiwfn_cmd_build(self, input_content, output_file=None):
|
112
|
+
'''
|
113
|
+
Private method:
|
114
|
+
Build the Multiwfn command to be executed based on the input content.
|
115
|
+
This method is used to create the input file for Multiwfn.
|
116
|
+
|
117
|
+
:params
|
118
|
+
input_content: The content to be written to the input file for Multiwfn.
|
119
|
+
'''
|
120
|
+
# 创建 input.txt 用于存储 Multiwfn 命令内容
|
121
|
+
with open('input.txt', 'w') as input_file:
|
122
|
+
input_file.write(input_content)
|
123
|
+
if output_file:
|
124
|
+
with open('output.txt', 'w') as output_file, open('input.txt', 'r') as input_file:
|
125
|
+
try:
|
126
|
+
# 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
|
127
|
+
subprocess.run([self.multiwfn_path], stdin=input_file, stdout=output_file, check=True)
|
128
|
+
except subprocess.CalledProcessError as e:
|
129
|
+
logging.error(
|
130
|
+
f"Error executing Multiwfn command with input {input_content}: {e}"
|
131
|
+
)
|
132
|
+
except Exception as e:
|
133
|
+
logging.error(f"Unexpected error: {e}")
|
134
|
+
raise
|
135
|
+
finally:
|
136
|
+
# 清理临时文件
|
137
|
+
try:
|
138
|
+
os.remove("input.txt")
|
139
|
+
except Exception as e:
|
140
|
+
logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
|
141
|
+
else:
|
142
|
+
with open("input.txt", "r") as input_file:
|
143
|
+
try:
|
144
|
+
# 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
|
145
|
+
subprocess.run([self.multiwfn_path], stdin=input_file, check=True)
|
146
|
+
except subprocess.CalledProcessError as e:
|
147
|
+
logging.error(
|
148
|
+
f"Error executing Multiwfn command with input {input_content}: {e}"
|
149
|
+
)
|
150
|
+
except Exception as e:
|
151
|
+
logging.error(f"Unexpected error: {e}")
|
152
|
+
raise
|
153
|
+
finally:
|
154
|
+
# 清理临时文件
|
155
|
+
try:
|
156
|
+
os.remove("input.txt")
|
157
|
+
except Exception as e:
|
158
|
+
logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
|
72
159
|
|
73
160
|
def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
|
74
161
|
'''
|
75
162
|
If a specific directory is given, this method can be used separately to implement batch processing of FCHK files with Multiwfn and save the desired electrostatic potential analysis results to the corresponding JSON file. Otherwise, the folder list provided during initialization will be processed sequentially.
|
163
|
+
|
164
|
+
:params
|
165
|
+
specific_directory: The specific directory to process. If None, all folders will be processed.
|
76
166
|
'''
|
77
167
|
if specific_directory is None:
|
78
168
|
for folder in self.folders:
|
@@ -84,7 +174,11 @@ class EmpiricalEstimation:
|
|
84
174
|
|
85
175
|
def _multiwfn_process_fchk_to_json(self, folder: str):
|
86
176
|
'''
|
177
|
+
Private method:
|
87
178
|
Perform electrostatic potential analysis on .fchk files using Multiwfn and save the analysis results to a .json file.
|
179
|
+
|
180
|
+
:params
|
181
|
+
folder: The folder containing the .fchk files to be processed.
|
88
182
|
'''
|
89
183
|
# 在每个文件夹中获取 .fchk 文件并根据文件名排序, 再用 Multiwfn 进行静电势分析, 最后将分析结果保存到同名 .json 文件中
|
90
184
|
fchk_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.fchk')]
|
@@ -119,24 +213,31 @@ class EmpiricalEstimation:
|
|
119
213
|
|
120
214
|
def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
|
121
215
|
'''
|
122
|
-
Private method:
|
216
|
+
Private method:
|
217
|
+
Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
|
218
|
+
|
219
|
+
:params
|
220
|
+
fchk_filename: The full path of the FCHK file to be processed.
|
221
|
+
|
222
|
+
:return: True if the processing is successful, False if the FCHK file is invalid.
|
123
223
|
'''
|
124
224
|
print(f'Multiwfn processing {fchk_filename}')
|
125
225
|
logging.info(f'Multiwfn processing {fchk_filename}')
|
126
226
|
result_flag = True
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
subprocess.run('Multiwfn_noGUI < input.txt > output.txt', shell=True, capture_output=True)
|
133
|
-
except FileNotFoundError:
|
134
|
-
subprocess.run('Multiwfn < input.txt > output.txt', shell=True, capture_output=True)
|
227
|
+
self._multiwfn_cmd_build(
|
228
|
+
input_content=f"{fchk_filename}\n12\n0\n-1\n-1\nq\n",
|
229
|
+
output_file='output.txt')
|
230
|
+
print(f'Finished processing {fchk_filename}')
|
231
|
+
|
135
232
|
# 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
|
136
233
|
folder, filename = os.path.split(fchk_filename)
|
137
234
|
refcode, _ = os.path.splitext(filename)
|
138
|
-
|
139
|
-
|
235
|
+
try:
|
236
|
+
with open('output.txt', 'r') as output_file:
|
237
|
+
output_content = output_file.read()
|
238
|
+
except Exception as e:
|
239
|
+
logging.error(f"Error reading output.txt: {e}")
|
240
|
+
raise
|
140
241
|
# 提取所需数据
|
141
242
|
volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
|
142
243
|
density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
|
@@ -186,15 +287,20 @@ class EmpiricalEstimation:
|
|
186
287
|
with open (f"{folder}/{refcode}.json", 'w') as json_file:
|
187
288
|
json.dump(result, json_file, indent=4)
|
188
289
|
shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
|
189
|
-
os.remove('input.txt')
|
190
|
-
os.remove('output.txt')
|
191
290
|
logging.info(f'Finished processing {fchk_filename}')
|
291
|
+
try:
|
292
|
+
os.remove("output.txt")
|
293
|
+
except Exception as e:
|
294
|
+
logging.warning(f"Cannot remove temporary file output.txt: {str(e)}")
|
192
295
|
return result_flag
|
193
296
|
|
194
297
|
def gaussian_log_to_optimized_gjf(self, specific_directory: str = None):
|
195
298
|
"""
|
196
299
|
If a specific directory is given, this method can be used separately to batch process the last frame of Gaussian optimized LOG files into GJF files using Multiwfn.
|
197
300
|
Otherwise, the folder list provided during initialization will be processed in order.
|
301
|
+
|
302
|
+
:params
|
303
|
+
specific_directory: The specific directory to process. If None, all folders will be processed.
|
198
304
|
"""
|
199
305
|
if specific_directory is None:
|
200
306
|
for folder in self.folders:
|
@@ -206,7 +312,11 @@ class EmpiricalEstimation:
|
|
206
312
|
|
207
313
|
def _gaussian_log_to_optimized_gjf(self, folder: str):
|
208
314
|
'''
|
315
|
+
Private method:
|
209
316
|
Due to the lack of support of Pyxtal module for LOG files in subsequent crystal generation, it is necessary to convert the last frame of the Gaussian optimized LOG file to a .gjf file with Multiwfn processing.
|
317
|
+
|
318
|
+
:params
|
319
|
+
folder: The folder containing the Gaussian LOG files to be processed.
|
210
320
|
'''
|
211
321
|
# 在每个文件夹中获取 .log 文件并根据文件名排序, 再用Multiwfn载入优化最后一帧转换为 gjf 文件
|
212
322
|
log_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.log')]
|
@@ -226,19 +336,24 @@ class EmpiricalEstimation:
|
|
226
336
|
pass
|
227
337
|
logging.info(f'\nThe .log to .gjf conversion by Multiwfn for {folder} folder has completed, and the optimized .gjf structures have been stored in the optimized directory.\n')
|
228
338
|
|
229
|
-
def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
|
339
|
+
def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
|
340
|
+
"""
|
341
|
+
Private method:
|
342
|
+
Use Multiwfn to convert the last frame of the Gaussian optimized LOG file to a .gjf file.
|
343
|
+
|
344
|
+
:params
|
345
|
+
folder: The folder containing the Gaussian LOG file to be processed.
|
346
|
+
log_filename: The full path of the LOG file to be processed.
|
347
|
+
"""
|
230
348
|
# 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
|
231
349
|
_, filename = os.path.split(log_filename)
|
232
350
|
refcode, _ = os.path.splitext(filename)
|
351
|
+
|
233
352
|
try:
|
234
|
-
# 创建 input.txt 用于存储 Multiwfn 命令内容
|
235
|
-
with open('input.txt', 'w') as input_file:
|
236
|
-
input_file.write(f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n")
|
237
353
|
# Multiwfn首先载入优化任务的out/log文件, 然后输入gi, 再输入要保存的gjf文件名, 此时里面的结构就是优化最后一帧的, 还避免了使用完全图形界面
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
subprocess.run('Multiwfn < input.txt', shell=True, capture_output=True)
|
354
|
+
self._multiwfn_cmd_build(
|
355
|
+
input_content=f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n"
|
356
|
+
)
|
242
357
|
if os.path.exists(f"Optimized/{folder}/{refcode}.gjf"):
|
243
358
|
print(f'Finished converting {refcode} .log to .gjf')
|
244
359
|
logging.info(f'Finished converting {refcode} .log to .gjf')
|
@@ -249,6 +364,45 @@ class EmpiricalEstimation:
|
|
249
364
|
print(f'Error with processing {log_filename}: {e}')
|
250
365
|
logging.error(f'Error with processing {log_filename}: {e}')
|
251
366
|
|
367
|
+
def _read_gjf_elements(self, gjf_file):
|
368
|
+
"""
|
369
|
+
Private method:
|
370
|
+
Read the elements from a .gjf file and return a dictionary with element counts.
|
371
|
+
|
372
|
+
:params
|
373
|
+
gjf_file: The full path of the .gjf file to be processed.
|
374
|
+
|
375
|
+
:return: A dictionary with element symbols as keys and their counts as values.
|
376
|
+
"""
|
377
|
+
# 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
|
378
|
+
with open(gjf_file, "r") as file:
|
379
|
+
lines = file.readlines()
|
380
|
+
atomic_counts = {}
|
381
|
+
# 找到原子信息的开始行
|
382
|
+
start_reading = False
|
383
|
+
for line in lines:
|
384
|
+
line = line.strip()
|
385
|
+
# 跳过注释和空行
|
386
|
+
if line.startswith("%") or line.startswith("#") or not line:
|
387
|
+
continue
|
388
|
+
# 检测只包含两个数字的行
|
389
|
+
parts = line.split()
|
390
|
+
if (
|
391
|
+
len(parts) == 2
|
392
|
+
and parts[0].lstrip("-").isdigit()
|
393
|
+
and parts[1].isdigit()
|
394
|
+
):
|
395
|
+
start_reading = True
|
396
|
+
continue
|
397
|
+
if start_reading:
|
398
|
+
element = parts[0] # 第一个部分是元素符号
|
399
|
+
# 更新元素计数
|
400
|
+
if element in atomic_counts:
|
401
|
+
atomic_counts[element] += 1
|
402
|
+
else:
|
403
|
+
atomic_counts[element] = 1
|
404
|
+
return atomic_counts
|
405
|
+
|
252
406
|
def nitrogen_content_estimate(self):
|
253
407
|
"""
|
254
408
|
Evaluate the priority of ion crystal combinations based on nitrogen content and generate .csv files
|
@@ -290,55 +444,62 @@ class EmpiricalEstimation:
|
|
290
444
|
writer.writerow(header) # 写入表头
|
291
445
|
writer.writerows(data) # 写入排序后的数
|
292
446
|
|
293
|
-
def
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
# 跳过注释和空行
|
303
|
-
if line.startswith("%") or line.startswith("#") or not line:
|
304
|
-
continue
|
305
|
-
# 检测只包含两个数字的行
|
306
|
-
parts = line.split()
|
307
|
-
if len(parts) == 2 and parts[0].lstrip("-").isdigit() and parts[1].isdigit():
|
308
|
-
start_reading = True
|
309
|
-
continue
|
310
|
-
if start_reading:
|
311
|
-
element = parts[0] # 第一个部分是元素符号
|
312
|
-
# 更新元素计数
|
313
|
-
if element in atomic_counts:
|
314
|
-
atomic_counts[element] += 1
|
315
|
-
else:
|
316
|
-
atomic_counts[element] = 1
|
317
|
-
return atomic_counts
|
447
|
+
def carbon_nitrogen_ratio_estimate(self):
|
448
|
+
"""
|
449
|
+
Evaluate the priority of ion crystal combinations based on carbon and nitrogen ratio
|
450
|
+
(C:N < 1:8) and sort by oxygen content, then generate .csv files.
|
451
|
+
"""
|
452
|
+
atomic_masses = {"H": 1.008, "C": 12.01, "N": 14.01, "O": 16.00}
|
453
|
+
# 获取所有 .gjf 文件
|
454
|
+
combinations = self._generate_combinations(suffix=".gjf")
|
455
|
+
filtered_data = []
|
318
456
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
suffix_files.sort()
|
325
|
-
print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
|
326
|
-
logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
|
327
|
-
if not suffix_files:
|
328
|
-
raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
|
329
|
-
all_files.append(suffix_files)
|
457
|
+
for combo in combinations:
|
458
|
+
total_atoms = 0
|
459
|
+
carbon_atoms = 0
|
460
|
+
nitrogen_atoms = 0
|
461
|
+
oxygen_atoms = 0
|
330
462
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
463
|
+
for gjf_file, ion_count in combo.items():
|
464
|
+
atomic_counts = self._read_gjf_elements(gjf_file)
|
465
|
+
for element, atom_count in atomic_counts.items():
|
466
|
+
if element in atomic_masses:
|
467
|
+
total_atoms += atom_count * ion_count
|
468
|
+
if element == "C":
|
469
|
+
carbon_atoms += atom_count * ion_count
|
470
|
+
elif element == "N":
|
471
|
+
nitrogen_atoms += atom_count * ion_count
|
472
|
+
elif element == "O":
|
473
|
+
oxygen_atoms += atom_count * ion_count
|
474
|
+
else:
|
475
|
+
raise ValueError(
|
476
|
+
"Contains element information not included, unable to calculate ratios"
|
477
|
+
)
|
478
|
+
|
479
|
+
# 计算 C:N 比率
|
480
|
+
if carbon_atoms != 0: # 确保氮的质量大于 0,避免除以零
|
481
|
+
nitrogen_carbon_ratio = round(nitrogen_atoms / carbon_atoms, 2)
|
482
|
+
else:
|
483
|
+
nitrogen_carbon_ratio = 100.0
|
484
|
+
filtered_data.append((combo, nitrogen_carbon_ratio, oxygen_atoms))
|
485
|
+
|
486
|
+
# 根据氧含量排序
|
487
|
+
filtered_data.sort(key=lambda x: (-x[1], -x[2]))
|
488
|
+
|
489
|
+
# 写入排序后的 .csv 文件
|
490
|
+
with open(self.carbon_nitrogen_csv, "w", newline="", encoding="utf-8") as csv_file:
|
491
|
+
writer = csv.writer(csv_file)
|
492
|
+
# 动态生成表头
|
493
|
+
num_components = len(combinations[0]) if combinations else 0
|
494
|
+
header = [f"Component {i + 1}" for i in range(num_components)] + ["N_C_Ratio", "O_Atoms"]
|
495
|
+
writer.writerow(header) # 写入表头
|
496
|
+
|
497
|
+
# 写入筛选后的组合和氧含量
|
498
|
+
for combo, nitrogen_carbon_ratio, oxygen_content in filtered_data:
|
499
|
+
cleaned_combo = [name.replace(".gjf", "") for name in combo]
|
500
|
+
writer.writerow(
|
501
|
+
cleaned_combo + [nitrogen_carbon_ratio, oxygen_content]
|
502
|
+
) # 写入每一行
|
342
503
|
|
343
504
|
def empirical_estimate(self):
|
344
505
|
"""
|
@@ -410,9 +571,51 @@ class EmpiricalEstimation:
|
|
410
571
|
writer.writerow(header) # 写入表头
|
411
572
|
writer.writerows(data) # 写入排序后的数
|
412
573
|
|
574
|
+
def _generate_combinations(self, suffix: str):
|
575
|
+
"""
|
576
|
+
Private method:
|
577
|
+
Generate all valid combinations of files based on the specified suffix and ratios.
|
578
|
+
|
579
|
+
:params
|
580
|
+
suffix: The file suffix to filter the files in the folders.
|
581
|
+
|
582
|
+
:return: A list of dictionaries representing the combinations of files with their respective ratios.
|
583
|
+
"""
|
584
|
+
# 获取所有符合后缀名条件的文件
|
585
|
+
all_files = []
|
586
|
+
for folder in self.folders:
|
587
|
+
suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
|
588
|
+
suffix_files.sort()
|
589
|
+
print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
|
590
|
+
logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
|
591
|
+
if not suffix_files:
|
592
|
+
raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
|
593
|
+
all_files.append(suffix_files)
|
594
|
+
|
595
|
+
# 对所有文件根据其文件夹与配比进行组合
|
596
|
+
combinations = []
|
597
|
+
for folder_files in itertools.product(*all_files):
|
598
|
+
# 根据给定的配比生成字典形式的组合
|
599
|
+
ratio_combination = {}
|
600
|
+
for folder_index, count in enumerate(self.ratios):
|
601
|
+
ratio_combination.update({folder_files[folder_index]: count})
|
602
|
+
combinations.append(ratio_combination)
|
603
|
+
print(f'Valid combination number: {len(combinations)}')
|
604
|
+
logging.info(f'Valid combination number: {len(combinations)}')
|
605
|
+
return combinations
|
606
|
+
|
413
607
|
def _copy_combo_file(self, combo_path, folder_basename, file_type):
|
608
|
+
"""
|
609
|
+
Private method:
|
610
|
+
Copy the specified file type from the Optimized directory to the combo_n folder.
|
611
|
+
|
612
|
+
:params
|
613
|
+
combo_path: The path to the combo_n folder where the file will be copied.
|
614
|
+
folder_basename: The basename of the folder containing the file to be copied.
|
615
|
+
file_type: The type of file to be copied (e.g., '.gjf', '.json').
|
616
|
+
"""
|
414
617
|
filename = f"{folder_basename}{file_type}"
|
415
|
-
source_path = os.path.join(self.
|
618
|
+
source_path = os.path.join(self.gaussian_optimized_dir, "Optimized", filename)
|
416
619
|
# 复制指定后缀名文件到对应的 combo_n 文件夹
|
417
620
|
if os.path.exists(source_path):
|
418
621
|
if os.path.exists(os.path.join(combo_path, os.path.basename(filename))):
|
@@ -422,15 +625,18 @@ class EmpiricalEstimation:
|
|
422
625
|
shutil.copy(source_path, combo_path)
|
423
626
|
logging.info(f'Copied {os.path.basename(source_path)} to {combo_path}')
|
424
627
|
else:
|
425
|
-
logging.error(
|
628
|
+
logging.error(
|
629
|
+
f"File of {filename} does not exist in {self.gaussian_optimized_dir}"
|
630
|
+
)
|
426
631
|
|
427
632
|
def make_combo_dir(self, target_dir: str, num_combos: int, ion_numbers: List[int]):
|
428
633
|
"""
|
429
634
|
Create a combo_n folder based on the .csv file and copy the corresponding .gjf structure file.
|
430
635
|
|
431
|
-
:
|
432
|
-
|
433
|
-
|
636
|
+
:params
|
637
|
+
target_directory: The target directory of the combo folder to be created
|
638
|
+
num_folders: The number of combo folders to be created
|
639
|
+
ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
|
434
640
|
"""
|
435
641
|
if self.sort_by == 'density':
|
436
642
|
base_csv = self.density_csv
|
@@ -464,15 +670,15 @@ class EmpiricalEstimation:
|
|
464
670
|
gjf_names.append(f"{folder_basename.split('/')[1]}.gjf")
|
465
671
|
|
466
672
|
# 生成上级目录路径并解析 .yaml 文件
|
467
|
-
parent_dir =
|
673
|
+
parent_dir = self.base_dir
|
468
674
|
parent_config_path = os.path.join(parent_dir, 'config.yaml')
|
469
|
-
base_config_path = os.path.join(self.
|
675
|
+
base_config_path = os.path.join(self.gaussian_optimized_dir, "config.yaml")
|
470
676
|
try:
|
471
677
|
with open(parent_config_path, 'r') as file:
|
472
678
|
config = yaml.safe_load(file)
|
473
679
|
except FileNotFoundError as e:
|
474
680
|
logging.warning(f"No available config.yaml file in parent directory: {parent_dir} \n{e}")
|
475
|
-
logging.info(f"Trying to load config.yaml file from base directory: {
|
681
|
+
logging.info(f"Trying to load config.yaml file from base directory: {parent_dir}")
|
476
682
|
try:
|
477
683
|
with open(base_config_path, 'r') as file:
|
478
684
|
try:
|
@@ -480,11 +686,10 @@ class EmpiricalEstimation:
|
|
480
686
|
except yaml.YAMLError as e:
|
481
687
|
logging.error(f"YAML configuration file parsing failed: {e}")
|
482
688
|
except FileNotFoundError as e:
|
483
|
-
logging.error(
|
689
|
+
logging.error(
|
690
|
+
f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.gaussian_optimized_dir} \n{e}"
|
691
|
+
)
|
484
692
|
raise
|
485
|
-
except PermissionError:
|
486
|
-
logging.error(f'No read permission for the path: {parent_dir}')
|
487
|
-
raise
|
488
693
|
except Exception as e:
|
489
694
|
logging.error(f'Unexpected error: {e}')
|
490
695
|
raise
|
@@ -502,4 +707,3 @@ class EmpiricalEstimation:
|
|
502
707
|
yaml.dump(config, file)
|
503
708
|
except Exception as e:
|
504
709
|
logging.error(f"Unexpected error: {e}")
|
505
|
-
|