ion-CSP 2.1.5__py3-none-any.whl → 2.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ion_CSP/__init__.py CHANGED
@@ -1,12 +1,12 @@
1
1
  __author__ = "Ze Yang"
2
2
  __contact__ = "yangze1995007@163.com"
3
3
  __license__ = "MIT"
4
- __version__ = "2.1.5"
5
- __date__ = "2025-06-16"
4
+ __version__ = "2.1.9"
5
+ __date__ = "2025-06-27"
6
6
 
7
7
 
8
8
  try:
9
- from importlib.metadata import version # python >= 3.11
9
+ from importlib.metadata import version
10
10
  except Exception:
11
11
  try:
12
12
  from importlib_metadata import version
ion_CSP/convert_SMILES.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  import shutil
3
3
  import logging
4
4
  import pandas as pd
5
+ import importlib.resources
5
6
  from typing import List
6
7
  from rdkit import Chem
7
8
  from rdkit.Chem import AllChem
@@ -13,9 +14,13 @@ class SmilesProcessing:
13
14
 
14
15
  def __init__(self, work_dir: str, csv_file: str, converted_folder: str = '1_1_SMILES_gjf', optimized_dir: str = '1_2_Gaussian_optimized'):
15
16
  """
16
- args:
17
+ This class is used to process SMILES codes from a CSV file, convert them into Gaussian input files, and prepare for optimization tasks. It also supports grouping by charge and filtering based on functional groups.
18
+
19
+ params:
17
20
  work_dir: the path of the working directory.
18
21
  csv_file: the csv file name in the working directory.
22
+ converted_folder: the folder name for storing converted SMILES files.
23
+ optimized_dir: the folder name for storing Gaussian optimized files.
19
24
  """
20
25
  redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
21
26
  # 读取csv文件并处理数据, csv文件的表头包括 SMILES, Charge, Refcode或Number
@@ -28,7 +33,7 @@ class SmilesProcessing:
28
33
  self.base_dir, converted_folder, os.path.splitext(csv_file)[0]
29
34
  )
30
35
  self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
31
- self.param_dir = os.path.join(os.path.dirname(__file__), "../../param")
36
+ self.param_dir = importlib.resources.files("ion_CSP.param")
32
37
  original_df = pd.read_csv(csv_path)
33
38
  logging.info(f"Processing {csv_path}")
34
39
  # 对SMILES码去重
@@ -53,19 +58,27 @@ class SmilesProcessing:
53
58
  self, dir: str, smiles: str, basename: str, charge: int
54
59
  ):
55
60
  """
56
- Private method: Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
61
+ Private method:
62
+ Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
57
63
 
58
- args:
64
+ params:
59
65
  dir: The directory used for outputting files, regardless of existence of the directory.
60
66
  smiles: SMILES code to be converted.
61
67
  basename: The reference code or number corresponding to SMILES code.
62
68
  charge: The charge carried by ions.
69
+
63
70
  return:
64
71
  result_code: Result code 0 or -1, representing success and failure respectively.
65
72
  basename: The corresponding basename.
66
73
  """
67
74
  mol = Chem.MolFromSmiles(smiles)
68
- mol = Chem.AddHs(mol)
75
+ try:
76
+ mol = Chem.AddHs(mol)
77
+ except Exception as e:
78
+ logging.error(
79
+ f"Error occurred while adding hydrogens to molecule {basename} with charge {charge}: {e}"
80
+ )
81
+ return 1, basename # 返回错误码1表示失败
69
82
  try:
70
83
  # 生成3D坐标
71
84
  AllChem.EmbedMolecule(mol)
@@ -144,6 +157,12 @@ class SmilesProcessing:
144
157
  ):
145
158
  """
146
159
  Screen based on the provided functional groups and charges.
160
+
161
+ params:
162
+ charge_screen: The charge to screen for, default is 0.
163
+ group_screen: The functional group to screen for, default is empty string.
164
+ group_name: The name of the functional group, used for naming the output directory.
165
+ group_screen_invert: If True, invert the screening condition for the functional group.
147
166
  """
148
167
  # 另外筛选出符合条件的离子
149
168
  screened = self.df
@@ -179,6 +198,12 @@ class SmilesProcessing:
179
198
  ):
180
199
  """
181
200
  Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
201
+
202
+ params:
203
+ folders: List of folders containing .gjf files to be processed, if empty, all folders in the converted directory will be processed.
204
+ machine: The machine configuration file for dpdispatcher, can be a JSON or YAML file.
205
+ resources: The resources configuration file for dpdispatcher, can be a JSON or YAML file.
206
+ nodes: The number of nodes to distribute the tasks to, default is 1.
182
207
  """
183
208
  if os.path.exists(self.gaussian_optimized_dir):
184
209
  logging.error(f'The directory {self.gaussian_optimized_dir} has already existed.')
@@ -233,7 +258,7 @@ class SmilesProcessing:
233
258
  task_dir = os.path.join(self.converted_dir, f"{parent}pop{pop}")
234
259
  os.makedirs(task_dir, exist_ok=True)
235
260
  for file in forward_files:
236
- shutil.copyfile(f"{self.param_dir}/{file}", f"{task_dir}/{file}")
261
+ shutil.copyfile(self.param_dir.joinpath(file), f"{task_dir}/{file}")
237
262
  for job_i in node_jobs[pop]:
238
263
  # 将分配好的 .gjf 文件添加到对应的上传文件中
239
264
  forward_files.append(gjf_files[job_i])
@@ -274,11 +299,14 @@ class SmilesProcessing:
274
299
  for job_i in node_jobs[pop]:
275
300
  base_name, _ = os.path.splitext(gjf_files[job_i])
276
301
  # 在优化后都取回每个 .gjf 文件对应的 .log、.fchk 输出文件
277
- for ext in ['gjf', 'log', 'fchk']:
278
- shutil.copyfile(
279
- f"{task_dir}/{base_name}.{ext}",
280
- f"{optimized_folder_dir}/{base_name}.{ext}"
281
- )
302
+ try:
303
+ for ext in ['gjf', 'log', 'fchk']:
304
+ shutil.copyfile(
305
+ f"{task_dir}/{base_name}.{ext}",
306
+ f"{optimized_folder_dir}/{base_name}.{ext}"
307
+ )
308
+ except FileNotFoundError as e:
309
+ logging.error(f"File not found during copying, please check the configuration and state of Gaussian: {e}")
282
310
  # 在成功完成Gaussian优化后,删除 1_1_SMILES_gjf/{csv}/{parent}/pop{n} 文件夹以节省空间
283
311
  shutil.rmtree(task_dir)
284
312
  shutil.copyfile(
@@ -53,12 +53,26 @@ x.fchk //指定计算文件
53
53
 
54
54
  class EmpiricalEstimation:
55
55
 
56
- def __init__(self, work_dir: str, folders: List[str], ratios: List[int], sort_by: str):
56
+ def __init__(
57
+ self,
58
+ work_dir: str,
59
+ folders: List[str],
60
+ ratios: List[int],
61
+ sort_by: str,
62
+ optimized_dir: str = "1_2_Gaussian_optimized",
63
+ ):
57
64
  """
58
- Retrieve the directory where the current script is located and use it as the working directory.
65
+ This class is designed to process Gaussian calculation files, perform electrostatic potential analysis using Multiwfn, and estimate the nitrogen content or density of ion crystal combinations. The class will also generate .csv files containing sorted nitrogen content or density based on the specified sorting criterion.
66
+
67
+ :params
68
+ work_dir: The working directory where the Gaussian calculation files are located.
69
+ folders: A list of folder names containing the Gaussian calculation files.
70
+ ratios: A list of integers representing the ratio of each folder in the combination.
71
+ sort_by: A string indicating the sorting criterion, either 'density' or 'nitrogen'.
59
72
  """
60
73
  self.base_dir = work_dir
61
- os.chdir(self.base_dir)
74
+ self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
75
+ os.chdir(self.gaussian_optimized_dir)
62
76
  # 确保所取的文件夹数与配比数是对应的
63
77
  if len(folders) != len(ratios):
64
78
  raise ValueError('The number of folders must match the number of ratios.')
@@ -69,10 +83,86 @@ class EmpiricalEstimation:
69
83
  raise ValueError(f"The sort_by parameter must be either 'density' or 'nitrogen', but got '{sort_by}'")
70
84
  self.density_csv = "sorted_density.csv"
71
85
  self.nitrogen_csv = "sorted_nitrogen.csv"
86
+ self.carbon_nitrogen_csv = "specific_C_N_ratio.csv"
87
+ # 检查Multiwfn可执行文件是否存在
88
+ self.multiwfn_path = self._check_multiwfn_executable()
89
+
90
+ def _check_multiwfn_executable(self):
91
+ '''
92
+ Private method:
93
+ Check if the Multiwfn executable file exists in the system PATH.
94
+ If not, raise a FileNotFoundError with an appropriate error message.
95
+ '''
96
+ multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
97
+ if not multiwfn_path:
98
+ error_msg = (
99
+ "Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
100
+ "1. Has Multiwfn been installed correctly?\n"
101
+ "2. Has Multiwfn been added to the system PATH environment variable"
102
+ )
103
+ print(error_msg)
104
+ logging.error(error_msg)
105
+ raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
106
+ else:
107
+ print(f"Multiwfn executable found at: {multiwfn_path}")
108
+ logging.info(f"Multiwfn executable found at: {multiwfn_path}")
109
+ return multiwfn_path
110
+
111
+ def _multiwfn_cmd_build(self, input_content, output_file=None):
112
+ '''
113
+ Private method:
114
+ Build the Multiwfn command to be executed based on the input content.
115
+ This method is used to create the input file for Multiwfn.
116
+
117
+ :params
118
+ input_content: The content to be written to the input file for Multiwfn.
119
+ '''
120
+ # 创建 input.txt 用于存储 Multiwfn 命令内容
121
+ with open('input.txt', 'w') as input_file:
122
+ input_file.write(input_content)
123
+ if output_file:
124
+ with open('output.txt', 'w') as output_file, open('input.txt', 'r') as input_file:
125
+ try:
126
+ # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
127
+ subprocess.run([self.multiwfn_path], stdin=input_file, stdout=output_file, check=True)
128
+ except subprocess.CalledProcessError as e:
129
+ logging.error(
130
+ f"Error executing Multiwfn command with input {input_content}: {e}"
131
+ )
132
+ except Exception as e:
133
+ logging.error(f"Unexpected error: {e}")
134
+ raise
135
+ finally:
136
+ # 清理临时文件
137
+ try:
138
+ os.remove("input.txt")
139
+ except Exception as e:
140
+ logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
141
+ else:
142
+ with open("input.txt", "r") as input_file:
143
+ try:
144
+ # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
145
+ subprocess.run([self.multiwfn_path], stdin=input_file, check=True)
146
+ except subprocess.CalledProcessError as e:
147
+ logging.error(
148
+ f"Error executing Multiwfn command with input {input_content}: {e}"
149
+ )
150
+ except Exception as e:
151
+ logging.error(f"Unexpected error: {e}")
152
+ raise
153
+ finally:
154
+ # 清理临时文件
155
+ try:
156
+ os.remove("input.txt")
157
+ except Exception as e:
158
+ logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
72
159
 
73
160
  def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
74
161
  '''
75
162
  If a specific directory is given, this method can be used separately to implement batch processing of FCHK files with Multiwfn and save the desired electrostatic potential analysis results to the corresponding JSON file. Otherwise, the folder list provided during initialization will be processed sequentially.
163
+
164
+ :params
165
+ specific_directory: The specific directory to process. If None, all folders will be processed.
76
166
  '''
77
167
  if specific_directory is None:
78
168
  for folder in self.folders:
@@ -84,7 +174,11 @@ class EmpiricalEstimation:
84
174
 
85
175
  def _multiwfn_process_fchk_to_json(self, folder: str):
86
176
  '''
177
+ Private method:
87
178
  Perform electrostatic potential analysis on .fchk files using Multiwfn and save the analysis results to a .json file.
179
+
180
+ :params
181
+ folder: The folder containing the .fchk files to be processed.
88
182
  '''
89
183
  # 在每个文件夹中获取 .fchk 文件并根据文件名排序, 再用 Multiwfn 进行静电势分析, 最后将分析结果保存到同名 .json 文件中
90
184
  fchk_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.fchk')]
@@ -119,24 +213,31 @@ class EmpiricalEstimation:
119
213
 
120
214
  def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
121
215
  '''
122
- Private method: Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
216
+ Private method:
217
+ Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
218
+
219
+ :params
220
+ fchk_filename: The full path of the FCHK file to be processed.
221
+
222
+ :return: True if the processing is successful, False if the FCHK file is invalid.
123
223
  '''
124
224
  print(f'Multiwfn processing {fchk_filename}')
125
225
  logging.info(f'Multiwfn processing {fchk_filename}')
126
226
  result_flag = True
127
- # 创建 input.txt 用于存储 Multiwfn 命令内容
128
- with open('input.txt', 'w') as input_file:
129
- input_file.write(f"{fchk_filename}\n12\n0\nq\n")
130
- # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到output.txt中
131
- try:
132
- subprocess.run('Multiwfn_noGUI < input.txt > output.txt', shell=True, capture_output=True)
133
- except FileNotFoundError:
134
- subprocess.run('Multiwfn < input.txt > output.txt', shell=True, capture_output=True)
227
+ self._multiwfn_cmd_build(
228
+ input_content=f"{fchk_filename}\n12\n0\n-1\n-1\nq\n",
229
+ output_file='output.txt')
230
+ print(f'Finished processing {fchk_filename}')
231
+
135
232
  # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
136
233
  folder, filename = os.path.split(fchk_filename)
137
234
  refcode, _ = os.path.splitext(filename)
138
- with open('output.txt', 'r') as output_file:
139
- output_content = output_file.read()
235
+ try:
236
+ with open('output.txt', 'r') as output_file:
237
+ output_content = output_file.read()
238
+ except Exception as e:
239
+ logging.error(f"Error reading output.txt: {e}")
240
+ raise
140
241
  # 提取所需数据
141
242
  volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
142
243
  density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
@@ -186,15 +287,20 @@ class EmpiricalEstimation:
186
287
  with open (f"{folder}/{refcode}.json", 'w') as json_file:
187
288
  json.dump(result, json_file, indent=4)
188
289
  shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
189
- os.remove('input.txt')
190
- os.remove('output.txt')
191
290
  logging.info(f'Finished processing {fchk_filename}')
291
+ try:
292
+ os.remove("output.txt")
293
+ except Exception as e:
294
+ logging.warning(f"Cannot remove temporary file output.txt: {str(e)}")
192
295
  return result_flag
193
296
 
194
297
  def gaussian_log_to_optimized_gjf(self, specific_directory: str = None):
195
298
  """
196
299
  If a specific directory is given, this method can be used separately to batch process the last frame of Gaussian optimized LOG files into GJF files using Multiwfn.
197
300
  Otherwise, the folder list provided during initialization will be processed in order.
301
+
302
+ :params
303
+ specific_directory: The specific directory to process. If None, all folders will be processed.
198
304
  """
199
305
  if specific_directory is None:
200
306
  for folder in self.folders:
@@ -206,7 +312,11 @@ class EmpiricalEstimation:
206
312
 
207
313
  def _gaussian_log_to_optimized_gjf(self, folder: str):
208
314
  '''
315
+ Private method:
209
316
  Due to the lack of support of Pyxtal module for LOG files in subsequent crystal generation, it is necessary to convert the last frame of the Gaussian optimized LOG file to a .gjf file with Multiwfn processing.
317
+
318
+ :params
319
+ folder: The folder containing the Gaussian LOG files to be processed.
210
320
  '''
211
321
  # 在每个文件夹中获取 .log 文件并根据文件名排序, 再用Multiwfn载入优化最后一帧转换为 gjf 文件
212
322
  log_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.log')]
@@ -226,19 +336,24 @@ class EmpiricalEstimation:
226
336
  pass
227
337
  logging.info(f'\nThe .log to .gjf conversion by Multiwfn for {folder} folder has completed, and the optimized .gjf structures have been stored in the optimized directory.\n')
228
338
 
229
- def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
339
+ def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
340
+ """
341
+ Private method:
342
+ Use Multiwfn to convert the last frame of the Gaussian optimized LOG file to a .gjf file.
343
+
344
+ :params
345
+ folder: The folder containing the Gaussian LOG file to be processed.
346
+ log_filename: The full path of the LOG file to be processed.
347
+ """
230
348
  # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
231
349
  _, filename = os.path.split(log_filename)
232
350
  refcode, _ = os.path.splitext(filename)
351
+
233
352
  try:
234
- # 创建 input.txt 用于存储 Multiwfn 命令内容
235
- with open('input.txt', 'w') as input_file:
236
- input_file.write(f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n")
237
353
  # Multiwfn首先载入优化任务的out/log文件, 然后输入gi, 再输入要保存的gjf文件名, 此时里面的结构就是优化最后一帧的, 还避免了使用完全图形界面
238
- try:
239
- subprocess.run('Multiwfn_noGUI < input.txt', shell=True, capture_output=True)
240
- except FileNotFoundError:
241
- subprocess.run('Multiwfn < input.txt', shell=True, capture_output=True)
354
+ self._multiwfn_cmd_build(
355
+ input_content=f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n"
356
+ )
242
357
  if os.path.exists(f"Optimized/{folder}/{refcode}.gjf"):
243
358
  print(f'Finished converting {refcode} .log to .gjf')
244
359
  logging.info(f'Finished converting {refcode} .log to .gjf')
@@ -249,6 +364,45 @@ class EmpiricalEstimation:
249
364
  print(f'Error with processing {log_filename}: {e}')
250
365
  logging.error(f'Error with processing {log_filename}: {e}')
251
366
 
367
+ def _read_gjf_elements(self, gjf_file):
368
+ """
369
+ Private method:
370
+ Read the elements from a .gjf file and return a dictionary with element counts.
371
+
372
+ :params
373
+ gjf_file: The full path of the .gjf file to be processed.
374
+
375
+ :return: A dictionary with element symbols as keys and their counts as values.
376
+ """
377
+ # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
378
+ with open(gjf_file, "r") as file:
379
+ lines = file.readlines()
380
+ atomic_counts = {}
381
+ # 找到原子信息的开始行
382
+ start_reading = False
383
+ for line in lines:
384
+ line = line.strip()
385
+ # 跳过注释和空行
386
+ if line.startswith("%") or line.startswith("#") or not line:
387
+ continue
388
+ # 检测只包含两个数字的行
389
+ parts = line.split()
390
+ if (
391
+ len(parts) == 2
392
+ and parts[0].lstrip("-").isdigit()
393
+ and parts[1].isdigit()
394
+ ):
395
+ start_reading = True
396
+ continue
397
+ if start_reading:
398
+ element = parts[0] # 第一个部分是元素符号
399
+ # 更新元素计数
400
+ if element in atomic_counts:
401
+ atomic_counts[element] += 1
402
+ else:
403
+ atomic_counts[element] = 1
404
+ return atomic_counts
405
+
252
406
  def nitrogen_content_estimate(self):
253
407
  """
254
408
  Evaluate the priority of ion crystal combinations based on nitrogen content and generate .csv files
@@ -290,55 +444,62 @@ class EmpiricalEstimation:
290
444
  writer.writerow(header) # 写入表头
291
445
  writer.writerows(data) # 写入排序后的数
292
446
 
293
- def _read_gjf_elements(self, gjf_file):
294
- # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
295
- with open(gjf_file, 'r') as file:
296
- lines = file.readlines()
297
- atomic_counts = {}
298
- # 找到原子信息的开始行
299
- start_reading = False
300
- for line in lines:
301
- line = line.strip()
302
- # 跳过注释和空行
303
- if line.startswith("%") or line.startswith("#") or not line:
304
- continue
305
- # 检测只包含两个数字的行
306
- parts = line.split()
307
- if len(parts) == 2 and parts[0].lstrip("-").isdigit() and parts[1].isdigit():
308
- start_reading = True
309
- continue
310
- if start_reading:
311
- element = parts[0] # 第一个部分是元素符号
312
- # 更新元素计数
313
- if element in atomic_counts:
314
- atomic_counts[element] += 1
315
- else:
316
- atomic_counts[element] = 1
317
- return atomic_counts
447
+ def carbon_nitrogen_ratio_estimate(self):
448
+ """
449
+ Evaluate the priority of ion crystal combinations based on carbon and nitrogen ratio
450
+ (C:N < 1:8) and sort by oxygen content, then generate .csv files.
451
+ """
452
+ atomic_masses = {"H": 1.008, "C": 12.01, "N": 14.01, "O": 16.00}
453
+ # 获取所有 .gjf 文件
454
+ combinations = self._generate_combinations(suffix=".gjf")
455
+ filtered_data = []
318
456
 
319
- def _generate_combinations(self, suffix: str):
320
- # 获取所有符合后缀名条件的文件
321
- all_files = []
322
- for folder in self.folders:
323
- suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
324
- suffix_files.sort()
325
- print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
326
- logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
327
- if not suffix_files:
328
- raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
329
- all_files.append(suffix_files)
457
+ for combo in combinations:
458
+ total_atoms = 0
459
+ carbon_atoms = 0
460
+ nitrogen_atoms = 0
461
+ oxygen_atoms = 0
330
462
 
331
- # 对所有文件根据其文件夹与配比进行组合
332
- combinations = []
333
- for folder_files in itertools.product(*all_files):
334
- # 根据给定的配比生成字典形式的组合
335
- ratio_combination = {}
336
- for folder_index, count in enumerate(self.ratios):
337
- ratio_combination.update({folder_files[folder_index]: count})
338
- combinations.append(ratio_combination)
339
- print(f'Valid combination number: {len(combinations)}')
340
- logging.info(f'Valid combination number: {len(combinations)}')
341
- return combinations
463
+ for gjf_file, ion_count in combo.items():
464
+ atomic_counts = self._read_gjf_elements(gjf_file)
465
+ for element, atom_count in atomic_counts.items():
466
+ if element in atomic_masses:
467
+ total_atoms += atom_count * ion_count
468
+ if element == "C":
469
+ carbon_atoms += atom_count * ion_count
470
+ elif element == "N":
471
+ nitrogen_atoms += atom_count * ion_count
472
+ elif element == "O":
473
+ oxygen_atoms += atom_count * ion_count
474
+ else:
475
+ raise ValueError(
476
+ "Contains element information not included, unable to calculate ratios"
477
+ )
478
+
479
+ # 计算 C:N 比率
480
+ if carbon_atoms != 0: # 确保氮的质量大于 0,避免除以零
481
+ nitrogen_carbon_ratio = round(nitrogen_atoms / carbon_atoms, 2)
482
+ else:
483
+ nitrogen_carbon_ratio = 100.0
484
+ filtered_data.append((combo, nitrogen_carbon_ratio, oxygen_atoms))
485
+
486
+ # 根据氧含量排序
487
+ filtered_data.sort(key=lambda x: (-x[1], -x[2]))
488
+
489
+ # 写入排序后的 .csv 文件
490
+ with open(self.carbon_nitrogen_csv, "w", newline="", encoding="utf-8") as csv_file:
491
+ writer = csv.writer(csv_file)
492
+ # 动态生成表头
493
+ num_components = len(combinations[0]) if combinations else 0
494
+ header = [f"Component {i + 1}" for i in range(num_components)] + ["N_C_Ratio", "O_Atoms"]
495
+ writer.writerow(header) # 写入表头
496
+
497
+ # 写入筛选后的组合和氧含量
498
+ for combo, nitrogen_carbon_ratio, oxygen_content in filtered_data:
499
+ cleaned_combo = [name.replace(".gjf", "") for name in combo]
500
+ writer.writerow(
501
+ cleaned_combo + [nitrogen_carbon_ratio, oxygen_content]
502
+ ) # 写入每一行
342
503
 
343
504
  def empirical_estimate(self):
344
505
  """
@@ -410,9 +571,51 @@ class EmpiricalEstimation:
410
571
  writer.writerow(header) # 写入表头
411
572
  writer.writerows(data) # 写入排序后的数
412
573
 
574
+ def _generate_combinations(self, suffix: str):
575
+ """
576
+ Private method:
577
+ Generate all valid combinations of files based on the specified suffix and ratios.
578
+
579
+ :params
580
+ suffix: The file suffix to filter the files in the folders.
581
+
582
+ :return: A list of dictionaries representing the combinations of files with their respective ratios.
583
+ """
584
+ # 获取所有符合后缀名条件的文件
585
+ all_files = []
586
+ for folder in self.folders:
587
+ suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
588
+ suffix_files.sort()
589
+ print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
590
+ logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
591
+ if not suffix_files:
592
+ raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
593
+ all_files.append(suffix_files)
594
+
595
+ # 对所有文件根据其文件夹与配比进行组合
596
+ combinations = []
597
+ for folder_files in itertools.product(*all_files):
598
+ # 根据给定的配比生成字典形式的组合
599
+ ratio_combination = {}
600
+ for folder_index, count in enumerate(self.ratios):
601
+ ratio_combination.update({folder_files[folder_index]: count})
602
+ combinations.append(ratio_combination)
603
+ print(f'Valid combination number: {len(combinations)}')
604
+ logging.info(f'Valid combination number: {len(combinations)}')
605
+ return combinations
606
+
413
607
  def _copy_combo_file(self, combo_path, folder_basename, file_type):
608
+ """
609
+ Private method:
610
+ Copy the specified file type from the Optimized directory to the combo_n folder.
611
+
612
+ :params
613
+ combo_path: The path to the combo_n folder where the file will be copied.
614
+ folder_basename: The basename of the folder containing the file to be copied.
615
+ file_type: The type of file to be copied (e.g., '.gjf', '.json').
616
+ """
414
617
  filename = f"{folder_basename}{file_type}"
415
- source_path = os.path.join(self.base_dir, 'Optimized', filename)
618
+ source_path = os.path.join(self.gaussian_optimized_dir, "Optimized", filename)
416
619
  # 复制指定后缀名文件到对应的 combo_n 文件夹
417
620
  if os.path.exists(source_path):
418
621
  if os.path.exists(os.path.join(combo_path, os.path.basename(filename))):
@@ -422,15 +625,18 @@ class EmpiricalEstimation:
422
625
  shutil.copy(source_path, combo_path)
423
626
  logging.info(f'Copied {os.path.basename(source_path)} to {combo_path}')
424
627
  else:
425
- logging.error(f'File of {filename} does not exist in {self.base_dir}')
628
+ logging.error(
629
+ f"File of {filename} does not exist in {self.gaussian_optimized_dir}"
630
+ )
426
631
 
427
632
  def make_combo_dir(self, target_dir: str, num_combos: int, ion_numbers: List[int]):
428
633
  """
429
634
  Create a combo_n folder based on the .csv file and copy the corresponding .gjf structure file.
430
635
 
431
- :param target_directory: The target directory of the combo folder to be created
432
- :param num_folders: The number of combo folders to be created
433
- :param ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
636
+ :params
637
+ target_directory: The target directory of the combo folder to be created
638
+ num_folders: The number of combo folders to be created
639
+ ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
434
640
  """
435
641
  if self.sort_by == 'density':
436
642
  base_csv = self.density_csv
@@ -464,15 +670,15 @@ class EmpiricalEstimation:
464
670
  gjf_names.append(f"{folder_basename.split('/')[1]}.gjf")
465
671
 
466
672
  # 生成上级目录路径并解析 .yaml 文件
467
- parent_dir = os.path.dirname(self.base_dir)
673
+ parent_dir = self.base_dir
468
674
  parent_config_path = os.path.join(parent_dir, 'config.yaml')
469
- base_config_path = os.path.join(self.base_dir, "config.yaml")
675
+ base_config_path = os.path.join(self.gaussian_optimized_dir, "config.yaml")
470
676
  try:
471
677
  with open(parent_config_path, 'r') as file:
472
678
  config = yaml.safe_load(file)
473
679
  except FileNotFoundError as e:
474
680
  logging.warning(f"No available config.yaml file in parent directory: {parent_dir} \n{e}")
475
- logging.info(f"Trying to load config.yaml file from base directory: {self.base_dir}")
681
+ logging.info(f"Trying to load config.yaml file from base directory: {parent_dir}")
476
682
  try:
477
683
  with open(base_config_path, 'r') as file:
478
684
  try:
@@ -480,11 +686,10 @@ class EmpiricalEstimation:
480
686
  except yaml.YAMLError as e:
481
687
  logging.error(f"YAML configuration file parsing failed: {e}")
482
688
  except FileNotFoundError as e:
483
- logging.error(f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.base_dir} \n{e}")
689
+ logging.error(
690
+ f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.gaussian_optimized_dir} \n{e}"
691
+ )
484
692
  raise
485
- except PermissionError:
486
- logging.error(f'No read permission for the path: {parent_dir}')
487
- raise
488
693
  except Exception as e:
489
694
  logging.error(f'Unexpected error: {e}')
490
695
  raise
@@ -502,4 +707,3 @@ class EmpiricalEstimation:
502
707
  yaml.dump(config, file)
503
708
  except Exception as e:
504
709
  logging.error(f"Unexpected error: {e}")
505
-