ion-CSP 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,505 @@
1
+ import os
2
+ import re
3
+ import csv
4
+ import json
5
+ import yaml
6
+ import shutil
7
+ import logging
8
+ import itertools
9
+ import subprocess
10
+ from typing import List
11
+
12
+ """
13
+ Gaussian计算后把优化后的结构设为gjf文件准备再次优化:
14
+ Multiwfn载入优化任务的out/log文件, 然后输入gi, 再输入要保存的gjf文件名
15
+ 此时里面的结构就是优化最后一帧的, 还避免了使用完全图形界面
16
+
17
+ 首先对高斯计算产生的chk文件转化为fchk文件
18
+ 具体命令为formchk x.chk
19
+ 执行后就会发现计算文件夹中多了一个x.fchk文件
20
+ 运行Multiwfn后依次输入
21
+ x.fchk //指定计算文件
22
+ 12 //定量分子表面分析功能
23
+ 0 //开始分析。默认的是分析静电势
24
+ 示例输出:
25
+ ================= Summary of surface analysis =================
26
+
27
+ Volume: 504.45976 Bohr^3 ( 74.75322 Angstrom^3)
28
+ Estimated density according to mass and volume (M/V): 1.5557 g/cm^3
29
+ Minimal value: -127.53161 kcal/mol Maximal value: -114.64900 kcal/mol
30
+ Overall surface area: 320.06186 Bohr^2 ( 89.62645 Angstrom^2)
31
+ Positive surface area: 0.00000 Bohr^2 ( 0.00000 Angstrom^2)
32
+ Negative surface area: 320.06186 Bohr^2 ( 89.62645 Angstrom^2)
33
+ Overall average value: -0.19677551 a.u. ( -123.47860 kcal/mol)
34
+ Positive average value: NaN a.u. ( NaN kcal/mol)
35
+ Negative average value: -0.19677551 a.u. ( -123.47860 kcal/mol)
36
+ Overall variance (sigma^2_tot): 0.00002851 a.u.^2 ( 11.22495 (kcal/mol)^2)
37
+ Positive variance: 0.00000000 a.u.^2 ( 0.00000 (kcal/mol)^2)
38
+ Negative variance: 0.00002851 a.u.^2 ( 11.22495 (kcal/mol)^2)
39
+ Balance of charges (nu): 0.00000000
40
+ Product of sigma^2_tot and nu: 0.00000000 a.u.^2 ( 0.00000 (kcal/mol)^2)
41
+ Internal charge separation (Pi): 0.00453275 a.u. ( 2.84434 kcal/mol)
42
+ Molecular polarity index (MPI): 5.35453398 eV ( 123.47860 kcal/mol)
43
+ Nonpolar surface area (|ESP| <= 10 kcal/mol): 0.00 Angstrom^2 ( 0.00 %)
44
+ Polar surface area (|ESP| > 10 kcal/mol): 89.63 Angstrom^2 (100.00 %)
45
+ Overall skewness: 0.7476810720
46
+ Negative skewness: 0.7476810720
47
+
48
+ Surface analysis finished!
49
+ Total wall clock time passed during this task: 1 s
50
+ Note: Previous orbital information has been restored
51
+ Citation of molecular polarity index (MPI): Carbon, 171, 514 (2021) DOI: 10.1016/j.carbon.2020.09.048
52
+ """
53
+
54
+ class EmpiricalEstimation:
55
+
56
+ def __init__(self, work_dir: str, folders: List[str], ratios: List[int], sort_by: str):
57
+ """
58
+ Retrieve the directory where the current script is located and use it as the working directory.
59
+ """
60
+ self.base_dir = work_dir
61
+ os.chdir(self.base_dir)
62
+ # 确保所取的文件夹数与配比数是对应的
63
+ if len(folders) != len(ratios):
64
+ raise ValueError('The number of folders must match the number of ratios.')
65
+ self.folders = folders
66
+ self.ratios = ratios
67
+ self.sort_by = sort_by
68
+ if sort_by not in ("density", "nitrogen"):
69
+ raise ValueError(f"The sort_by parameter must be either 'density' or 'nitrogen', but got '{sort_by}'")
70
+ self.density_csv = "sorted_density.csv"
71
+ self.nitrogen_csv = "sorted_nitrogen.csv"
72
+
73
+ def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
74
+ '''
75
+ If a specific directory is given, this method can be used separately to implement batch processing of FCHK files with Multiwfn and save the desired electrostatic potential analysis results to the corresponding JSON file. Otherwise, the folder list provided during initialization will be processed sequentially.
76
+ '''
77
+ if specific_directory is None:
78
+ for folder in self.folders:
79
+ os.makedirs(f"Optimized/{folder}", exist_ok=True)
80
+ self._multiwfn_process_fchk_to_json(folder)
81
+ else:
82
+ folder = specific_directory
83
+ self._multiwfn_process_fchk_to_json(folder)
84
+
85
+ def _multiwfn_process_fchk_to_json(self, folder: str):
86
+ '''
87
+ Perform electrostatic potential analysis on .fchk files using Multiwfn and save the analysis results to a .json file.
88
+ '''
89
+ # 在每个文件夹中获取 .fchk 文件并根据文件名排序, 再用 Multiwfn 进行静电势分析, 最后将分析结果保存到同名 .json 文件中
90
+ fchk_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.fchk')]
91
+ if fchk_files == []:
92
+ raise FileNotFoundError("No availible Gaussian .fchk file to process")
93
+ fchk_files.sort()
94
+ bad_files = []
95
+ for fchk_file in fchk_files:
96
+ base_name = os.path.splitext(fchk_file)[0]
97
+ json_file = f'{base_name}.json'
98
+ if os.path.exists(json_file):
99
+ if os.path.exists(f"Optimized/{json_file}"):
100
+ logging.info(f'{json_file} already exists, skipping multiwfn fchk_to_json processing.')
101
+ else:
102
+ shutil.copy(src=f"{json_file}", dst=f"Optimized/{json_file}")
103
+ else:
104
+ result_flag = self._single_multiwfn_fchk_to_json(fchk_file)
105
+ if not result_flag:
106
+ bad_files.append(base_name)
107
+ if bad_files:
108
+ logging.error(f'Bad Gaussian results for {bad_files}')
109
+ os.makedirs(f'Bad/{folder}', exist_ok=True)
110
+ # 文件扩展名列表
111
+ suffixes = ['gjf', 'chk', 'log', 'fchk']
112
+ for file in bad_files:
113
+ try:
114
+ for suffix in suffixes:
115
+ shutil.move(src=f"{file}.{suffix}", dst=f"Bad/{file}.{suffix}")
116
+ except FileNotFoundError as e:
117
+ logging.error(f'Error with moving bad files: {e}')
118
+ logging.info(f'\nElectrostatic potential analysis by Multiwfn for {folder} folder has completed, and the results have been stored in the corresponding json files.\n')
119
+
120
+ def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
121
+ '''
122
+ Private method: Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
123
+ '''
124
+ print(f'Multiwfn processing {fchk_filename}')
125
+ logging.info(f'Multiwfn processing {fchk_filename}')
126
+ result_flag = True
127
+ # 创建 input.txt 用于存储 Multiwfn 命令内容
128
+ with open('input.txt', 'w') as input_file:
129
+ input_file.write(f"{fchk_filename}\n12\n0\nq\n")
130
+ # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到output.txt中
131
+ try:
132
+ subprocess.run('Multiwfn_noGUI < input.txt > output.txt', shell=True, capture_output=True)
133
+ except FileNotFoundError:
134
+ subprocess.run('Multiwfn < input.txt > output.txt', shell=True, capture_output=True)
135
+ # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
136
+ folder, filename = os.path.split(fchk_filename)
137
+ refcode, _ = os.path.splitext(filename)
138
+ with open('output.txt', 'r') as output_file:
139
+ output_content = output_file.read()
140
+ # 提取所需数据
141
+ volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
142
+ density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
143
+ volume = volume_match.group(2) if volume_match else None # Angstrom^3
144
+ density = density_match.group(1) if density_match else None # g/cm^3
145
+
146
+ overall_surface_area_match = re.search(r'Overall surface area:\s*([\d.]+)\s*Bohr\^2\s+\(\s*([\d.]+)\s*Angstrom\^2\)', output_content)
147
+ positive_surface_area_match = re.search(r'Positive surface area:\s*([\d.]+)\s*Bohr\^2\s+\(\s*([\d.]+)\s*Angstrom\^2\)', output_content)
148
+ negative_surface_area_match = re.search(r'Negative surface area:\s*([\d.]+)\s*Bohr\^2\s+\(\s*([\d.]+)\s*Angstrom\^2\)', output_content)
149
+ overall_surface_area = overall_surface_area_match.group(2) if overall_surface_area_match else 'NaN' # Angstrom^2
150
+ positive_surface_area = positive_surface_area_match.group(2) if positive_surface_area_match else 'NaN' # Angstrom^2
151
+ negative_surface_area = negative_surface_area_match.group(2) if negative_surface_area_match else 'NaN' # Angstrom^2
152
+
153
+ overall_average_value_match = re.search(r'Overall average value:\s*[\d.-]*\s*a\.u\.\s*\(\s*([\d.-]+|NaN)\s*kcal/mol\)', output_content)
154
+ positive_average_value_match = re.search(r'Positive average value:\s*[\d.-]*\s*a\.u\.\s*\(\s*([\d.-]+|NaN)\s*kcal/mol\)', output_content)
155
+ negative_average_value_match = re.search(r'Negative average value:\s*[\d.-]*\s*a\.u\.\s*\(\s*([\d.-]+|NaN)\s*kcal/mol\)', output_content)
156
+ overall_average_value = overall_average_value_match.group(1) if overall_average_value_match else 'NaN'
157
+ positive_average_value = positive_average_value_match.group(1) if positive_average_value_match else 'NaN'
158
+ negative_average_value = negative_average_value_match.group(1) if negative_average_value_match else 'NaN'
159
+
160
+ # 判断阳离子或阴离子
161
+ if (positive_surface_area == overall_surface_area and
162
+ positive_average_value == overall_average_value and
163
+ negative_surface_area == '0.00000' and
164
+ negative_average_value == 'NaN'):
165
+ ion_type = 'cation'
166
+
167
+ elif (negative_surface_area == overall_surface_area and
168
+ negative_average_value == overall_average_value and
169
+ positive_surface_area == '0.00000' and
170
+ positive_average_value == 'NaN'):
171
+ ion_type = 'anion'
172
+ else:
173
+ ion_type = 'mixed_ion'
174
+
175
+ try:
176
+ # 1.66054这一转换因子用于将原子质量单位转换为克,以便在宏观尺度上计算密度 g/cm³
177
+ molecular_mass = round(float(volume) * float(density) / 1.66054, 5)
178
+ except TypeError as e:
179
+ print(f"Bad .fchk file: {fchk_filename}: {e}")
180
+ logging.error(f"Bad .fchk file: {fchk_filename}: {e}")
181
+ result_flag = False
182
+ return result_flag
183
+
184
+ result = {'refcode':refcode, 'ion_type':ion_type, 'molecular_mass':molecular_mass, 'volume':volume, 'density':density, 'positive_surface_area':positive_surface_area, 'positive_average_value':positive_average_value, 'negative_surface_area':negative_surface_area, 'negative_average_value':negative_average_value}
185
+ if result_flag:
186
+ with open (f"{folder}/{refcode}.json", 'w') as json_file:
187
+ json.dump(result, json_file, indent=4)
188
+ shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
189
+ os.remove('input.txt')
190
+ os.remove('output.txt')
191
+ logging.info(f'Finished processing {fchk_filename}')
192
+ return result_flag
193
+
194
+ def gaussian_log_to_optimized_gjf(self, specific_directory: str = None):
195
+ """
196
+ If a specific directory is given, this method can be used separately to batch process the last frame of Gaussian optimized LOG files into GJF files using Multiwfn.
197
+ Otherwise, the folder list provided during initialization will be processed in order.
198
+ """
199
+ if specific_directory is None:
200
+ for folder in self.folders:
201
+ os.makedirs(f"Optimized/{folder}", exist_ok=True)
202
+ self._gaussian_log_to_optimized_gjf(folder)
203
+ else:
204
+ folder = specific_directory
205
+ self._gaussian_log_to_optimized_gjf(folder)
206
+
207
+ def _gaussian_log_to_optimized_gjf(self, folder: str):
208
+ '''
209
+ Due to the lack of support of Pyxtal module for LOG files in subsequent crystal generation, it is necessary to convert the last frame of the Gaussian optimized LOG file to a .gjf file with Multiwfn processing.
210
+ '''
211
+ # 在每个文件夹中获取 .log 文件并根据文件名排序, 再用Multiwfn载入优化最后一帧转换为 gjf 文件
212
+ log_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.log')]
213
+ if not log_files:
214
+ raise FileNotFoundError(f'No availible Gaussian .log file to process in {folder}')
215
+ log_files.sort()
216
+ for log_file in log_files:
217
+ base_name = os.path.splitext(log_file)[0]
218
+ gjf_file = f"{base_name}.gjf"
219
+ if os.path.exists(os.path.join('Optimized', gjf_file)):
220
+ logging.info(f"{gjf_file} already exists, skipping multiwfn log_to_gjf processing.")
221
+ else:
222
+ self._single_multiwfn_log_to_gjf(folder, log_file)
223
+ try:
224
+ os.remove('input.txt')
225
+ except FileNotFoundError:
226
+ pass
227
+ logging.info(f'\nThe .log to .gjf conversion by Multiwfn for {folder} folder has completed, and the optimized .gjf structures have been stored in the optimized directory.\n')
228
+
229
+ def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
230
+ # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
231
+ _, filename = os.path.split(log_filename)
232
+ refcode, _ = os.path.splitext(filename)
233
+ try:
234
+ # 创建 input.txt 用于存储 Multiwfn 命令内容
235
+ with open('input.txt', 'w') as input_file:
236
+ input_file.write(f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n")
237
+ # Multiwfn首先载入优化任务的out/log文件, 然后输入gi, 再输入要保存的gjf文件名, 此时里面的结构就是优化最后一帧的, 还避免了使用完全图形界面
238
+ try:
239
+ subprocess.run('Multiwfn_noGUI < input.txt', shell=True, capture_output=True)
240
+ except FileNotFoundError:
241
+ subprocess.run('Multiwfn < input.txt', shell=True, capture_output=True)
242
+ if os.path.exists(f"Optimized/{folder}/{refcode}.gjf"):
243
+ print(f'Finished converting {refcode} .log to .gjf')
244
+ logging.info(f'Finished converting {refcode} .log to .gjf')
245
+ else:
246
+ print(f'Error with converting {refcode} .log to .gjf')
247
+ logging.error(f"Error with converting {refcode} .log to .gjf")
248
+ except Exception as e:
249
+ print(f'Error with processing {log_filename}: {e}')
250
+ logging.error(f'Error with processing {log_filename}: {e}')
251
+
252
+ def nitrogen_content_estimate(self):
253
+ """
254
+ Evaluate the priority of ion crystal combinations based on nitrogen content and generate .csv files
255
+ """
256
+ atomic_masses = {"H": 1.008, "C": 12.01, "N": 14.01, "O": 16.00}
257
+ # 获取所有 .gjf 文件
258
+ combinations = self._generate_combinations(suffix='.gjf')
259
+ nitrogen_contents = []
260
+ for combo in combinations:
261
+ total_masses = 0.0
262
+ nitrogen_masses = 0.0
263
+ for gjf_file, ion_count in combo.items():
264
+ atomic_counts = self._read_gjf_elements(gjf_file)
265
+ for element, atom_count in atomic_counts.items():
266
+ if element in atomic_masses:
267
+ total_masses += atomic_masses[element] * atom_count * ion_count
268
+ if element == 'N':
269
+ nitrogen_masses += atomic_masses[element] * atom_count * ion_count
270
+ else:
271
+ raise "Contains element information not included, unable to calculate nitrogen content"
272
+ nitrogen_content = round((nitrogen_masses / total_masses), 4) if total_masses > 0 else 0
273
+ nitrogen_contents.append(nitrogen_content)
274
+ # 将组合和对应的氮含量合并并排序
275
+ data = []
276
+ for combo, nitrogen in zip(combinations, nitrogen_contents):
277
+ # 去掉 .gjf 后缀
278
+ cleaned_combo = [name.replace(".gjf", "") for name in combo]
279
+ # 将组合和氮含量合并成一行
280
+ data.append(cleaned_combo + [nitrogen])
281
+ # 根据氮含量列进行排序(氮含量在最后一列)
282
+ data.sort(key=lambda x: float(x[-1]), reverse=True)
283
+
284
+ # 写入排序后的 .csv 文件
285
+ with open(self.nitrogen_csv, "w", newline="", encoding="utf-8") as csv_file:
286
+ writer = csv.writer(csv_file)
287
+ # 动态生成表头
288
+ num_components = len(combinations[0]) if combinations else 0
289
+ header = [f"Component {i+1}" for i in range(num_components)] + ["Nitrogen_Content"]
290
+ writer.writerow(header) # 写入表头
291
+ writer.writerows(data) # 写入排序后的数
292
+
293
+ def _read_gjf_elements(self, gjf_file):
294
+ # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
295
+ with open(gjf_file, 'r') as file:
296
+ lines = file.readlines()
297
+ atomic_counts = {}
298
+ # 找到原子信息的开始行
299
+ start_reading = False
300
+ for line in lines:
301
+ line = line.strip()
302
+ # 跳过注释和空行
303
+ if line.startswith("%") or line.startswith("#") or not line:
304
+ continue
305
+ # 检测只包含两个数字的行
306
+ parts = line.split()
307
+ if len(parts) == 2 and parts[0].lstrip("-").isdigit() and parts[1].isdigit():
308
+ start_reading = True
309
+ continue
310
+ if start_reading:
311
+ element = parts[0] # 第一个部分是元素符号
312
+ # 更新元素计数
313
+ if element in atomic_counts:
314
+ atomic_counts[element] += 1
315
+ else:
316
+ atomic_counts[element] = 1
317
+ return atomic_counts
318
+
319
+ def _generate_combinations(self, suffix: str):
320
+ # 获取所有符合后缀名条件的文件
321
+ all_files = []
322
+ for folder in self.folders:
323
+ suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
324
+ suffix_files.sort()
325
+ print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
326
+ logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
327
+ if not suffix_files:
328
+ raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
329
+ all_files.append(suffix_files)
330
+
331
+ # 对所有文件根据其文件夹与配比进行组合
332
+ combinations = []
333
+ for folder_files in itertools.product(*all_files):
334
+ # 根据给定的配比生成字典形式的组合
335
+ ratio_combination = {}
336
+ for folder_index, count in enumerate(self.ratios):
337
+ ratio_combination.update({folder_files[folder_index]: count})
338
+ combinations.append(ratio_combination)
339
+ print(f'Valid combination number: {len(combinations)}')
340
+ logging.info(f'Valid combination number: {len(combinations)}')
341
+ return combinations
342
+
343
+ def empirical_estimate(self):
344
+ """
345
+ Based on the electrostatic analysis obtained from the .json file, calculate the initial screening density of the ion crystal using empirical formulas, and generate the .csv file according to the sorted density.
346
+ """
347
+ combinations = self._generate_combinations(suffix='.json')
348
+ predicted_crystal_densities = []
349
+ for combo in combinations:
350
+ # 每个组合包含数个离子,分别获取其各项性质,包括质量、体积、密度、正/负电势与面积
351
+ refcodes, ion_types, masses, volumes = [], [], 0, 0
352
+ positive_surface_areas, positive_average_values, positive_electrostatics, negative_surface_areas, negative_average_values, negative_electrostatics = 0, 0, 0, 0, 0, 0
353
+ for json_file, count in combo.items():
354
+ # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
355
+ try:
356
+ with open(json_file, 'r') as json_file:
357
+ property = json.load(json_file)
358
+ except json.decoder.JSONDecodeError:
359
+ continue
360
+ refcodes.append(property['refcode'])
361
+ ion_types.append(property['ion_type'])
362
+ # 1.66054 这一转换因子用于将原子质量单位转换为克,以便在宏观尺度上计算密度 g/cm³
363
+ mass = property['molecular_mass'] * 1.66054
364
+ masses += (mass * count)
365
+ molecular_volume = float(property['volume'])
366
+ volumes += molecular_volume * count
367
+ positive_surface_area = property['positive_surface_area']
368
+ negative_surface_area = property['negative_surface_area']
369
+ positive_average_value = property['positive_average_value']
370
+ negative_average_value = property['negative_average_value']
371
+ if (positive_surface_area != '0.00000' and positive_average_value != 'NaN'):
372
+ positive_surface_areas += float(positive_surface_area) * count
373
+ positive_average_values += float(positive_average_value) * count
374
+ positive_electrostatic = float(positive_average_value) / float(positive_surface_area)
375
+ positive_electrostatics += positive_electrostatic * count
376
+ if (negative_surface_area != '0.00000' and negative_average_value != 'NaN'):
377
+ negative_surface_areas += float(negative_surface_area) * count
378
+ negative_average_values += float(negative_average_value) * count
379
+ negative_electrostatic = float(negative_average_value) / float(negative_surface_area)
380
+ negative_electrostatics += negative_electrostatic * count
381
+
382
+ # 1. 拟合经验公式参数来源:Molecular Physics 2010, 108:10, 1391-1396.
383
+ # http://dx.doi.org/10.1080/00268971003702221
384
+ # alpha, beta, gamma, delta = 1.0260, 0.0514, 0.0419, 0.0227
385
+ # 2. 拟合经验公式参数来源:Journal of Computational Chemistry 2013, 34, 2146–2151.
386
+ # https://doi.org/10.1002/jcc.23369
387
+ alpha, beta, gamma, delta = 1.1145, 0.02056, -0.0392, -0.1683
388
+
389
+ M_d_Vm = masses / volumes
390
+ predicted_crystal_density = (alpha * M_d_Vm) + (beta * positive_electrostatics) + (gamma * negative_electrostatics) + (delta)
391
+ predicted_crystal_density = round(predicted_crystal_density, 4)
392
+ predicted_crystal_densities.append(predicted_crystal_density)
393
+
394
+ # 将组合和对应的密度合并并排序
395
+ data = []
396
+ for combo, density in zip(combinations, predicted_crystal_densities):
397
+ # 去掉 .json 后缀
398
+ cleaned_combo = [name.replace('.json', '') for name in combo]
399
+ # 将组合和密度合并成一行
400
+ data.append(cleaned_combo + [density])
401
+ # 根据密度列进行排序(密度在最后一列)
402
+ data.sort(key=lambda x: float(x[-1]), reverse=True)
403
+
404
+ # 写入排序后的 .csv 文件
405
+ with open(self.density_csv, 'w', newline='', encoding='utf-8') as csv_file:
406
+ writer = csv.writer(csv_file)
407
+ # 动态生成表头
408
+ num_components = len(combinations[0]) if combinations else 0
409
+ header = [f'Component {i+1}' for i in range(num_components)] + ['Pred_Density']
410
+ writer.writerow(header) # 写入表头
411
+ writer.writerows(data) # 写入排序后的数
412
+
413
+ def _copy_combo_file(self, combo_path, folder_basename, file_type):
414
+ filename = f"{folder_basename}{file_type}"
415
+ source_path = os.path.join(self.base_dir, 'Optimized', filename)
416
+ # 复制指定后缀名文件到对应的 combo_n 文件夹
417
+ if os.path.exists(source_path):
418
+ if os.path.exists(os.path.join(combo_path, os.path.basename(filename))):
419
+ logging.info(f'{filename} of {os.path.basename(combo_path)} already exists in {os.path.abspath(combo_path)}. Skipping copy.')
420
+ else:
421
+ # 复制对应的指定后缀名文件
422
+ shutil.copy(source_path, combo_path)
423
+ logging.info(f'Copied {os.path.basename(source_path)} to {combo_path}')
424
+ else:
425
+ logging.error(f'File of {filename} does not exist in {self.base_dir}')
426
+
427
+ def make_combo_dir(self, target_dir: str, num_combos: int, ion_numbers: List[int]):
428
+ """
429
+ Create a combo_n folder based on the .csv file and copy the corresponding .gjf structure file.
430
+
431
+ :param target_directory: The target directory of the combo folder to be created
432
+ :param num_folders: The number of combo folders to be created
433
+ :param ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
434
+ """
435
+ if self.sort_by == 'density':
436
+ base_csv = self.density_csv
437
+ elif self.sort_by == 'nitrogen':
438
+ base_csv = self.nitrogen_csv
439
+ if not target_dir:
440
+ target_dir = f'../2_{self.sort_by}_combos'
441
+ with open(base_csv, mode='r', newline='') as file:
442
+ reader = csv.DictReader(file)
443
+ # 初始化已处理的文件夹计数
444
+ folder_count = 0
445
+ for index, row in enumerate(reader):
446
+ if folder_count >= num_combos:
447
+ break # 达到指定文件夹数量,停止处理
448
+ # 创建 combo_n 文件夹名称
449
+ combo_folder = f'combo_{index+1}'
450
+ combo_path = os.path.join(target_dir, combo_folder)
451
+ os.makedirs(combo_path, exist_ok=True)
452
+ folder_count += 1
453
+
454
+ # 遍历每一列(组分)并复制对应的文件
455
+ gjf_names = []
456
+ pattern = r'^Component \d+'
457
+ components = [key for key in row.keys() if re.match(pattern, key)]
458
+ for component in components:
459
+ # folder_basename变量存放的是包含目录名的离子名称,如charge_2/ABCDEF
460
+ folder_basename = row[component]
461
+ self._copy_combo_file(combo_path, folder_basename, file_type='.gjf')
462
+ self._copy_combo_file(combo_path, folder_basename, file_type=".json")
463
+ # gjf_names存放的是不包含目录名,且带 .gjf 后缀名的文件名,用于写入config.yaml
464
+ gjf_names.append(f"{folder_basename.split('/')[1]}.gjf")
465
+
466
+ # 生成上级目录路径并解析 .yaml 文件
467
+ parent_dir = os.path.dirname(self.base_dir)
468
+ parent_config_path = os.path.join(parent_dir, 'config.yaml')
469
+ base_config_path = os.path.join(self.base_dir, "config.yaml")
470
+ try:
471
+ with open(parent_config_path, 'r') as file:
472
+ config = yaml.safe_load(file)
473
+ except FileNotFoundError as e:
474
+ logging.warning(f"No available config.yaml file in parent directory: {parent_dir} \n{e}")
475
+ logging.info(f"Trying to load config.yaml file from base directory: {self.base_dir}")
476
+ try:
477
+ with open(base_config_path, 'r') as file:
478
+ try:
479
+ config = yaml.safe_load(file)
480
+ except yaml.YAMLError as e:
481
+ logging.error(f"YAML configuration file parsing failed: {e}")
482
+ except FileNotFoundError as e:
483
+ logging.error(f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.base_dir} \n{e}")
484
+ raise
485
+ except PermissionError:
486
+ logging.error(f'No read permission for the path: {parent_dir}')
487
+ raise
488
+ except Exception as e:
489
+ logging.error(f'Unexpected error: {e}')
490
+ raise
491
+ try:
492
+ # 确保 config.yaml 配置文件中 'gen_opt' 模块存在
493
+ if 'gen_opt' not in config:
494
+ config['gen_opt'] = {}
495
+ # 更新 combo 文件夹中对应的离子名称与数量配置
496
+ config['gen_opt']['species'] = gjf_names
497
+ config['gen_opt']['ion_numbers'] = ion_numbers
498
+ logging.info(
499
+ f"Generated 'species' and 'ion_numbers' config for gen_opt module in config.yaml are respectively: {config['gen_opt']['species']} and {config['gen_opt']['ion_numbers']}"
500
+ )
501
+ with open(os.path.join(combo_path, 'config.yaml'), 'w') as file:
502
+ yaml.dump(config, file)
503
+ except Exception as e:
504
+ logging.error(f"Unexpected error: {e}")
505
+