ion-CSP 2.1.8__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ion_CSP/__init__.py CHANGED
@@ -1,12 +1,12 @@
1
1
  __author__ = "Ze Yang"
2
2
  __contact__ = "yangze1995007@163.com"
3
3
  __license__ = "MIT"
4
- __version__ = "2.1.8"
5
- __date__ = "2025-06-23"
4
+ __version__ = "2.2.0"
5
+ __date__ = "2025-06-27"
6
6
 
7
7
 
8
8
  try:
9
- from importlib.metadata import version # python >= 3.11
9
+ from importlib.metadata import version
10
10
  except Exception:
11
11
  try:
12
12
  from importlib_metadata import version
ion_CSP/convert_SMILES.py CHANGED
@@ -72,7 +72,13 @@ class SmilesProcessing:
72
72
  basename: The corresponding basename.
73
73
  """
74
74
  mol = Chem.MolFromSmiles(smiles)
75
- mol = Chem.AddHs(mol)
75
+ try:
76
+ mol = Chem.AddHs(mol)
77
+ except Exception as e:
78
+ logging.error(
79
+ f"Error occurred while adding hydrogens to molecule {basename} with charge {charge}: {e}"
80
+ )
81
+ return 1, basename # 返回错误码1表示失败
76
82
  try:
77
83
  # 生成3D坐标
78
84
  AllChem.EmbedMolecule(mol)
@@ -53,7 +53,14 @@ x.fchk //指定计算文件
53
53
 
54
54
  class EmpiricalEstimation:
55
55
 
56
- def __init__(self, work_dir: str, folders: List[str], ratios: List[int], sort_by: str):
56
+ def __init__(
57
+ self,
58
+ work_dir: str,
59
+ folders: List[str],
60
+ ratios: List[int],
61
+ sort_by: str,
62
+ optimized_dir: str = "1_2_Gaussian_optimized",
63
+ ):
57
64
  """
58
65
  This class is designed to process Gaussian calculation files, perform electrostatic potential analysis using Multiwfn, and estimate the nitrogen content or density of ion crystal combinations. The class will also generate .csv files containing sorted nitrogen content or density based on the specified sorting criterion.
59
66
 
@@ -64,17 +71,91 @@ class EmpiricalEstimation:
64
71
  sort_by: A string indicating the sorting criterion, either 'density' or 'nitrogen'.
65
72
  """
66
73
  self.base_dir = work_dir
67
- os.chdir(self.base_dir)
74
+ self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
75
+ os.chdir(self.gaussian_optimized_dir)
68
76
  # 确保所取的文件夹数与配比数是对应的
69
77
  if len(folders) != len(ratios):
70
78
  raise ValueError('The number of folders must match the number of ratios.')
71
79
  self.folders = folders
72
80
  self.ratios = ratios
73
81
  self.sort_by = sort_by
74
- if sort_by not in ("density", "nitrogen"):
75
- raise ValueError(f"The sort_by parameter must be either 'density' or 'nitrogen', but got '{sort_by}'")
82
+ if sort_by not in ("density", "nitrogen", "NC_ratio"):
83
+ raise ValueError(f"The sort_by parameter must be either 'density' 'nitrogen' or 'NC_ratio', but got '{sort_by}'")
76
84
  self.density_csv = "sorted_density.csv"
77
85
  self.nitrogen_csv = "sorted_nitrogen.csv"
86
+ self.NC_ratio_csv = "specific_NC_ratio.csv"
87
+ # 检查Multiwfn可执行文件是否存在
88
+ self.multiwfn_path = self._check_multiwfn_executable()
89
+
90
+ def _check_multiwfn_executable(self):
91
+ '''
92
+ Private method:
93
+ Check if the Multiwfn executable file exists in the system PATH.
94
+ If not, raise a FileNotFoundError with an appropriate error message.
95
+ '''
96
+ multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
97
+ if not multiwfn_path:
98
+ error_msg = (
99
+ "Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
100
+ "1. Has Multiwfn been installed correctly?\n"
101
+ "2. Has Multiwfn been added to the system PATH environment variable"
102
+ )
103
+ print(error_msg)
104
+ logging.error(error_msg)
105
+ raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
106
+ else:
107
+ print(f"Multiwfn executable found at: {multiwfn_path}")
108
+ logging.info(f"Multiwfn executable found at: {multiwfn_path}")
109
+ return multiwfn_path
110
+
111
+ def _multiwfn_cmd_build(self, input_content, output_file=None):
112
+ '''
113
+ Private method:
114
+ Build the Multiwfn command to be executed based on the input content.
115
+ This method is used to create the input file for Multiwfn.
116
+
117
+ :params
118
+ input_content: The content to be written to the input file for Multiwfn.
119
+ '''
120
+ # 创建 input.txt 用于存储 Multiwfn 命令内容
121
+ with open('input.txt', 'w') as input_file:
122
+ input_file.write(input_content)
123
+ if output_file:
124
+ with open('output.txt', 'w') as output_file, open('input.txt', 'r') as input_file:
125
+ try:
126
+ # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
127
+ subprocess.run([self.multiwfn_path], stdin=input_file, stdout=output_file, check=True)
128
+ except subprocess.CalledProcessError as e:
129
+ logging.error(
130
+ f"Error executing Multiwfn command with input {input_content}: {e}"
131
+ )
132
+ except Exception as e:
133
+ logging.error(f"Unexpected error: {e}")
134
+ raise
135
+ finally:
136
+ # 清理临时文件
137
+ try:
138
+ os.remove("input.txt")
139
+ except Exception as e:
140
+ logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
141
+ else:
142
+ with open("input.txt", "r") as input_file:
143
+ try:
144
+ # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
145
+ subprocess.run([self.multiwfn_path], stdin=input_file, check=True)
146
+ except subprocess.CalledProcessError as e:
147
+ logging.error(
148
+ f"Error executing Multiwfn command with input {input_content}: {e}"
149
+ )
150
+ except Exception as e:
151
+ logging.error(f"Unexpected error: {e}")
152
+ raise
153
+ finally:
154
+ # 清理临时文件
155
+ try:
156
+ os.remove("input.txt")
157
+ except Exception as e:
158
+ logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
78
159
 
79
160
  def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
80
161
  '''
@@ -130,59 +211,6 @@ class EmpiricalEstimation:
130
211
  logging.error(f'Error with moving bad files: {e}')
131
212
  logging.info(f'\nElectrostatic potential analysis by Multiwfn for {folder} folder has completed, and the results have been stored in the corresponding json files.\n')
132
213
 
133
- def _check_multiwfn_executable(self):
134
- '''
135
- Private method:
136
- Check if the Multiwfn executable file exists in the system PATH.
137
- If not, raise a FileNotFoundError with an appropriate error message.
138
- '''
139
- multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
140
- if not multiwfn_path:
141
- error_msg = (
142
- "Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
143
- "1. Has Multiwfn been installed correctly?\n"
144
- "2. Has Multiwfn been added to the system PATH environment variable"
145
- )
146
- print(error_msg)
147
- logging.error(error_msg)
148
- raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
149
- return multiwfn_path
150
-
151
- def _multiwfn_cmd_build(self, input_content):
152
- '''
153
- Private method:
154
- Build the Multiwfn command to be executed based on the input content.
155
- This method is used to create the input file for Multiwfn.
156
-
157
- :params
158
- input_content: The content to be written to the input file for Multiwfn.
159
- '''
160
- # 检查Multiwfn可执行文件是否存在
161
- multiwfn_path = self._check_multiwfn_executable()
162
- # 创建 input.txt 用于存储 Multiwfn 命令内容
163
- with open('input.txt', 'w') as input_file:
164
- input_file.write(input_content)
165
- # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到output.txt中
166
- cmd = [multiwfn_path, "<", "input.txt", ">", "output.txt"]
167
- try:
168
- subprocess.run(cmd, shell=True, capture_output=True)
169
- except subprocess.CalledProcessError as e:
170
- error_msg = f"Multiwfn execution failed (return code {e.returncode}): Error output: {e.stderr}"
171
- print(error_msg)
172
- logging.error(error_msg)
173
- raise
174
- except Exception as e:
175
- error_msg = f"Unexpected Error: {str(e)}"
176
- print(error_msg)
177
- logging.error(error_msg)
178
- raise
179
- finally:
180
- # 清理临时文件
181
- try:
182
- os.remove("input.txt")
183
- except Exception as e:
184
- logging.warning(f"无法删除临时文件 input.txt: {str(e)}")
185
-
186
214
  def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
187
215
  '''
188
216
  Private method:
@@ -196,7 +224,10 @@ class EmpiricalEstimation:
196
224
  print(f'Multiwfn processing {fchk_filename}')
197
225
  logging.info(f'Multiwfn processing {fchk_filename}')
198
226
  result_flag = True
199
- self._multiwfn_cmd_build(input_content=f"{fchk_filename}\n12\n0\nq\n")
227
+ self._multiwfn_cmd_build(
228
+ input_content=f"{fchk_filename}\n12\n0\n-1\n-1\nq\n",
229
+ output_file='output.txt')
230
+ print(f'Finished processing {fchk_filename}')
200
231
 
201
232
  # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
202
233
  folder, filename = os.path.split(fchk_filename)
@@ -207,12 +238,6 @@ class EmpiricalEstimation:
207
238
  except Exception as e:
208
239
  logging.error(f"Error reading output.txt: {e}")
209
240
  raise
210
- finally:
211
- # 清理临时文件
212
- try:
213
- os.remove("output.txt")
214
- except Exception as e:
215
- logging.warning(f"无法删除临时文件 output.txt: {str(e)}")
216
241
  # 提取所需数据
217
242
  volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
218
243
  density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
@@ -263,6 +288,10 @@ class EmpiricalEstimation:
263
288
  json.dump(result, json_file, indent=4)
264
289
  shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
265
290
  logging.info(f'Finished processing {fchk_filename}')
291
+ try:
292
+ os.remove("output.txt")
293
+ except Exception as e:
294
+ logging.warning(f"Cannot remove temporary file output.txt: {str(e)}")
266
295
  return result_flag
267
296
 
268
297
  def gaussian_log_to_optimized_gjf(self, specific_directory: str = None):
@@ -321,14 +350,10 @@ class EmpiricalEstimation:
321
350
  refcode, _ = os.path.splitext(filename)
322
351
 
323
352
  try:
324
- # 创建 input.txt 用于存储 Multiwfn 命令内容
325
- with open('input.txt', 'w') as input_file:
326
- input_file.write(f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n")
327
353
  # Multiwfn首先载入优化任务的out/log文件, 然后输入gi, 再输入要保存的gjf文件名, 此时里面的结构就是优化最后一帧的, 还避免了使用完全图形界面
328
- try:
329
- subprocess.run('Multiwfn_noGUI < input.txt', shell=True, capture_output=True)
330
- except FileNotFoundError:
331
- subprocess.run('Multiwfn < input.txt', shell=True, capture_output=True)
354
+ self._multiwfn_cmd_build(
355
+ input_content=f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n"
356
+ )
332
357
  if os.path.exists(f"Optimized/{folder}/{refcode}.gjf"):
333
358
  print(f'Finished converting {refcode} .log to .gjf')
334
359
  logging.info(f'Finished converting {refcode} .log to .gjf')
@@ -339,6 +364,45 @@ class EmpiricalEstimation:
339
364
  print(f'Error with processing {log_filename}: {e}')
340
365
  logging.error(f'Error with processing {log_filename}: {e}')
341
366
 
367
+ def _read_gjf_elements(self, gjf_file):
368
+ """
369
+ Private method:
370
+ Read the elements from a .gjf file and return a dictionary with element counts.
371
+
372
+ :params
373
+ gjf_file: The full path of the .gjf file to be processed.
374
+
375
+ :return: A dictionary with element symbols as keys and their counts as values.
376
+ """
377
+ # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
378
+ with open(gjf_file, "r") as file:
379
+ lines = file.readlines()
380
+ atomic_counts = {}
381
+ # 找到原子信息的开始行
382
+ start_reading = False
383
+ for line in lines:
384
+ line = line.strip()
385
+ # 跳过注释和空行
386
+ if line.startswith("%") or line.startswith("#") or not line:
387
+ continue
388
+ # 检测只包含两个数字的行
389
+ parts = line.split()
390
+ if (
391
+ len(parts) == 2
392
+ and parts[0].lstrip("-").isdigit()
393
+ and parts[1].isdigit()
394
+ ):
395
+ start_reading = True
396
+ continue
397
+ if start_reading:
398
+ element = parts[0] # 第一个部分是元素符号
399
+ # 更新元素计数
400
+ if element in atomic_counts:
401
+ atomic_counts[element] += 1
402
+ else:
403
+ atomic_counts[element] = 1
404
+ return atomic_counts
405
+
342
406
  def nitrogen_content_estimate(self):
343
407
  """
344
408
  Evaluate the priority of ion crystal combinations based on nitrogen content and generate .csv files
@@ -380,73 +444,62 @@ class EmpiricalEstimation:
380
444
  writer.writerow(header) # 写入表头
381
445
  writer.writerows(data) # 写入排序后的数
382
446
 
383
- def _read_gjf_elements(self, gjf_file):
447
+ def carbon_nitrogen_ratio_estimate(self):
384
448
  """
385
- Private method:
386
- Read the elements from a .gjf file and return a dictionary with element counts.
449
+ Evaluate the priority of ion crystal combinations based on carbon and nitrogen ratio
450
+ (C:N < 1:8) and sort by oxygen content, then generate .csv files.
451
+ """
452
+ atomic_masses = {"H": 1.008, "C": 12.01, "N": 14.01, "O": 16.00}
453
+ # 获取所有 .gjf 文件
454
+ combinations = self._generate_combinations(suffix=".gjf")
455
+ filtered_data = []
387
456
 
388
- :params
389
- gjf_file: The full path of the .gjf file to be processed.
457
+ for combo in combinations:
458
+ total_atoms = 0
459
+ carbon_atoms = 0
460
+ nitrogen_atoms = 0
461
+ oxygen_atoms = 0
390
462
 
391
- :return: A dictionary with element symbols as keys and their counts as values.
392
- """
393
- # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
394
- with open(gjf_file, 'r') as file:
395
- lines = file.readlines()
396
- atomic_counts = {}
397
- # 找到原子信息的开始行
398
- start_reading = False
399
- for line in lines:
400
- line = line.strip()
401
- # 跳过注释和空行
402
- if line.startswith("%") or line.startswith("#") or not line:
403
- continue
404
- # 检测只包含两个数字的行
405
- parts = line.split()
406
- if len(parts) == 2 and parts[0].lstrip("-").isdigit() and parts[1].isdigit():
407
- start_reading = True
408
- continue
409
- if start_reading:
410
- element = parts[0] # 第一个部分是元素符号
411
- # 更新元素计数
412
- if element in atomic_counts:
413
- atomic_counts[element] += 1
414
- else:
415
- atomic_counts[element] = 1
416
- return atomic_counts
463
+ for gjf_file, ion_count in combo.items():
464
+ atomic_counts = self._read_gjf_elements(gjf_file)
465
+ for element, atom_count in atomic_counts.items():
466
+ if element in atomic_masses:
467
+ total_atoms += atom_count * ion_count
468
+ if element == "C":
469
+ carbon_atoms += atom_count * ion_count
470
+ elif element == "N":
471
+ nitrogen_atoms += atom_count * ion_count
472
+ elif element == "O":
473
+ oxygen_atoms += atom_count * ion_count
474
+ else:
475
+ raise ValueError(
476
+ "Contains element information not included, unable to calculate ratios"
477
+ )
417
478
 
418
- def _generate_combinations(self, suffix: str):
419
- """
420
- Private method:
421
- Generate all valid combinations of files based on the specified suffix and ratios.
479
+ # 计算 C:N 比率
480
+ if carbon_atoms != 0: # 确保氮的质量大于 0,避免除以零
481
+ nitrogen_carbon_ratio = round(nitrogen_atoms / carbon_atoms, 2)
482
+ else:
483
+ nitrogen_carbon_ratio = 100.0
484
+ filtered_data.append((combo, nitrogen_carbon_ratio, oxygen_atoms))
422
485
 
423
- :params
424
- suffix: The file suffix to filter the files in the folders.
486
+ # 根据氧含量排序
487
+ filtered_data.sort(key=lambda x: (-x[1], -x[2]))
425
488
 
426
- :return: A list of dictionaries representing the combinations of files with their respective ratios.
427
- """
428
- # 获取所有符合后缀名条件的文件
429
- all_files = []
430
- for folder in self.folders:
431
- suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
432
- suffix_files.sort()
433
- print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
434
- logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
435
- if not suffix_files:
436
- raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
437
- all_files.append(suffix_files)
489
+ # 写入排序后的 .csv 文件
490
+ with open(self.NC_ratio_csv, "w", newline="", encoding="utf-8") as csv_file:
491
+ writer = csv.writer(csv_file)
492
+ # 动态生成表头
493
+ num_components = len(combinations[0]) if combinations else 0
494
+ header = [f"Component {i + 1}" for i in range(num_components)] + ["N_C_Ratio", "O_Atoms"]
495
+ writer.writerow(header) # 写入表头
438
496
 
439
- # 对所有文件根据其文件夹与配比进行组合
440
- combinations = []
441
- for folder_files in itertools.product(*all_files):
442
- # 根据给定的配比生成字典形式的组合
443
- ratio_combination = {}
444
- for folder_index, count in enumerate(self.ratios):
445
- ratio_combination.update({folder_files[folder_index]: count})
446
- combinations.append(ratio_combination)
447
- print(f'Valid combination number: {len(combinations)}')
448
- logging.info(f'Valid combination number: {len(combinations)}')
449
- return combinations
497
+ # 写入筛选后的组合和氧含量
498
+ for combo, nitrogen_carbon_ratio, oxygen_content in filtered_data:
499
+ cleaned_combo = [name.replace(".gjf", "") for name in combo]
500
+ writer.writerow(
501
+ cleaned_combo + [nitrogen_carbon_ratio, oxygen_content]
502
+ ) # 写入每一行
450
503
 
451
504
  def empirical_estimate(self):
452
505
  """
@@ -518,6 +571,39 @@ class EmpiricalEstimation:
518
571
  writer.writerow(header) # 写入表头
519
572
  writer.writerows(data) # 写入排序后的数
520
573
 
574
+ def _generate_combinations(self, suffix: str):
575
+ """
576
+ Private method:
577
+ Generate all valid combinations of files based on the specified suffix and ratios.
578
+
579
+ :params
580
+ suffix: The file suffix to filter the files in the folders.
581
+
582
+ :return: A list of dictionaries representing the combinations of files with their respective ratios.
583
+ """
584
+ # 获取所有符合后缀名条件的文件
585
+ all_files = []
586
+ for folder in self.folders:
587
+ suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
588
+ suffix_files.sort()
589
+ print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
590
+ logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
591
+ if not suffix_files:
592
+ raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
593
+ all_files.append(suffix_files)
594
+
595
+ # 对所有文件根据其文件夹与配比进行组合
596
+ combinations = []
597
+ for folder_files in itertools.product(*all_files):
598
+ # 根据给定的配比生成字典形式的组合
599
+ ratio_combination = {}
600
+ for folder_index, count in enumerate(self.ratios):
601
+ ratio_combination.update({folder_files[folder_index]: count})
602
+ combinations.append(ratio_combination)
603
+ print(f'Valid combination number: {len(combinations)}')
604
+ logging.info(f'Valid combination number: {len(combinations)}')
605
+ return combinations
606
+
521
607
  def _copy_combo_file(self, combo_path, folder_basename, file_type):
522
608
  """
523
609
  Private method:
@@ -529,7 +615,7 @@ class EmpiricalEstimation:
529
615
  file_type: The type of file to be copied (e.g., '.gjf', '.json').
530
616
  """
531
617
  filename = f"{folder_basename}{file_type}"
532
- source_path = os.path.join(self.base_dir, 'Optimized', filename)
618
+ source_path = os.path.join(self.gaussian_optimized_dir, "Optimized", filename)
533
619
  # 复制指定后缀名文件到对应的 combo_n 文件夹
534
620
  if os.path.exists(source_path):
535
621
  if os.path.exists(os.path.join(combo_path, os.path.basename(filename))):
@@ -539,7 +625,9 @@ class EmpiricalEstimation:
539
625
  shutil.copy(source_path, combo_path)
540
626
  logging.info(f'Copied {os.path.basename(source_path)} to {combo_path}')
541
627
  else:
542
- logging.error(f'File of {filename} does not exist in {self.base_dir}')
628
+ logging.error(
629
+ f"File of {filename} does not exist in {self.gaussian_optimized_dir}"
630
+ )
543
631
 
544
632
  def make_combo_dir(self, target_dir: str, num_combos: int, ion_numbers: List[int]):
545
633
  """
@@ -554,6 +642,8 @@ class EmpiricalEstimation:
554
642
  base_csv = self.density_csv
555
643
  elif self.sort_by == 'nitrogen':
556
644
  base_csv = self.nitrogen_csv
645
+ elif self.sort_by == "NC_ratio":
646
+ base_csv = self.NC_ratio_csv
557
647
  if not target_dir:
558
648
  target_dir = f'../2_{self.sort_by}_combos'
559
649
  with open(base_csv, mode='r', newline='') as file:
@@ -582,15 +672,15 @@ class EmpiricalEstimation:
582
672
  gjf_names.append(f"{folder_basename.split('/')[1]}.gjf")
583
673
 
584
674
  # 生成上级目录路径并解析 .yaml 文件
585
- parent_dir = os.path.dirname(self.base_dir)
675
+ parent_dir = self.base_dir
586
676
  parent_config_path = os.path.join(parent_dir, 'config.yaml')
587
- base_config_path = os.path.join(self.base_dir, "config.yaml")
677
+ base_config_path = os.path.join(self.gaussian_optimized_dir, "config.yaml")
588
678
  try:
589
679
  with open(parent_config_path, 'r') as file:
590
680
  config = yaml.safe_load(file)
591
681
  except FileNotFoundError as e:
592
682
  logging.warning(f"No available config.yaml file in parent directory: {parent_dir} \n{e}")
593
- logging.info(f"Trying to load config.yaml file from base directory: {self.base_dir}")
683
+ logging.info(f"Trying to load config.yaml file from base directory: {parent_dir}")
594
684
  try:
595
685
  with open(base_config_path, 'r') as file:
596
686
  try:
@@ -598,11 +688,10 @@ class EmpiricalEstimation:
598
688
  except yaml.YAMLError as e:
599
689
  logging.error(f"YAML configuration file parsing failed: {e}")
600
690
  except FileNotFoundError as e:
601
- logging.error(f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.base_dir} \n{e}")
691
+ logging.error(
692
+ f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.gaussian_optimized_dir} \n{e}"
693
+ )
602
694
  raise
603
- except PermissionError:
604
- logging.error(f'No read permission for the path: {parent_dir}')
605
- raise
606
695
  except Exception as e:
607
696
  logging.error(f'Unexpected error: {e}')
608
697
  raise
@@ -620,4 +709,3 @@ class EmpiricalEstimation:
620
709
  yaml.dump(config, file)
621
710
  except Exception as e:
622
711
  logging.error(f"Unexpected error: {e}")
623
-
ion_CSP/run/main_EE.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
1
  import logging
3
2
  from ion_CSP.convert_SMILES import SmilesProcessing
4
3
  from ion_CSP.empirical_estimate import EmpiricalEstimation
@@ -31,11 +30,10 @@ DEFAULT_CONFIG = {
31
30
  @log_and_time
32
31
  def main(work_dir, config):
33
32
  logging.info(f"Using config: {config}")
34
- empirical_estimate_dir = os.path.join(work_dir, "1_2_Gaussian_optimized")
35
33
  tasks = {
36
34
  "0_convertion": lambda: convertion_task(work_dir, config),
37
- "0_estimation": lambda: estimation_task(empirical_estimate_dir, config),
38
- "0_update_combo": lambda: combination_task(empirical_estimate_dir, config),
35
+ "0_estimation": lambda: estimation_task(work_dir, config),
36
+ "0_update_combo": lambda: combination_task(work_dir, config),
39
37
  }
40
38
  for task_name, task_func in tasks.items():
41
39
  task_logger = StatusLogger(work_dir=work_dir, task_name=task_name)
@@ -52,7 +50,7 @@ def main(work_dir, config):
52
50
  task_logger = StatusLogger(work_dir=work_dir, task_name="0_update_combo")
53
51
  try:
54
52
  task_logger.set_running()
55
- combination_task(empirical_estimate_dir, config)
53
+ combination_task(work_dir, config)
56
54
  task_logger.set_success()
57
55
  except Exception:
58
56
  task_logger.set_failure()
@@ -94,14 +92,6 @@ def estimation_task(work_dir, config):
94
92
  estimation.multiwfn_process_fchk_to_json()
95
93
  # 由于后续晶体生成不支持 .log 文件,需要将 Gaussian 优化得到的 .log 文件最后一帧转为 .gjf 结构文件
96
94
  estimation.gaussian_log_to_optimized_gjf()
97
- # 如果依据密度排序,则需要经验公式根据配比生成离子晶体组合,读取 .json 文件并将静电势分析得到的各离子性质代入经验公式
98
- if config["empirical_estimate"]["sort_by"] == "density":
99
- # 最终将预测的离子晶体密度以及对应的组分输出到 .csv 文件并根据密度从大到小排序
100
- estimation.empirical_estimate()
101
- # 如果依据氮含量排序,则调用另一套根据 .gjf 文件中化学分布信息
102
- elif config["empirical_estimate"]["sort_by"] == "nitrogen":
103
- # 最终将预测的离子晶体氮含量以及对应的组分输出到 .csv 文件并根据氮含量从大到小排序
104
- estimation.nitrogen_content_estimate()
105
95
 
106
96
  def combination_task(work_dir, config):
107
97
  # 在工作目录下准备 Gaussian 优化处理后具有 .gjf、.fchk 和 .log 文件的文件夹, 并提供对应的离子配比
@@ -111,6 +101,14 @@ def combination_task(work_dir, config):
111
101
  ratios=config["empirical_estimate"]["ratios"],
112
102
  sort_by=config["empirical_estimate"]["sort_by"],
113
103
  )
104
+ # 如果依据密度排序,则需要经验公式根据配比生成离子晶体组合,读取 .json 文件并将静电势分析得到的各离子性质代入经验公式
105
+ if config["empirical_estimate"]["sort_by"] == "density":
106
+ # 最终将预测的离子晶体密度以及对应的组分输出到 .csv 文件并根据密度从大到小排序
107
+ combination.empirical_estimate()
108
+ # 如果依据氮含量排序,则调用另一套根据 .gjf 文件中化学分布信息
109
+ elif config["empirical_estimate"]["sort_by"] == "nitrogen":
110
+ # 最终将预测的离子晶体氮含量以及对应的组分输出到 .csv 文件并根据氮含量从大到小排序
111
+ combination.nitrogen_content_estimate()
114
112
  # 基于排序依据 sort_by 对应的 .csv 文件创建 combo_n 文件夹,并复制相应的 .gjf 结构文件。
115
113
  if config["empirical_estimate"]["make_combo_dir"]:
116
114
  combination.make_combo_dir(
ion_CSP/task_manager.py CHANGED
@@ -31,7 +31,7 @@ class TaskManager:
31
31
  try:
32
32
  return importlib.metadata.version("ion_CSP")
33
33
  except importlib.metadata.PackageNotFoundError:
34
- logging.error("Version detection failed")
34
+ logging.error("Package not found")
35
35
  return "unknown"
36
36
  except Exception as e:
37
37
  logging.error(f"Version detection failed: {e}")
@@ -140,9 +140,7 @@ class VaspProcessing:
140
140
  task_dir = os.path.join(self.for_vasp_opt_dir, f"{parent}pop{pop}")
141
141
  for job_i in node_jobs[pop]:
142
142
  vasp_dir = mlp_contcar_files[job_i].split("CONTCAR_")[1]
143
- shutil.copytree(
144
- f"{task_dir}/{vasp_dir}", f"{self.vasp_optimized_dir}/{vasp_dir}"
145
- )
143
+ shutil.copytree(f"{task_dir}/{vasp_dir}", f"{self.vasp_optimized_dir}/{vasp_dir}", dirs_exist_ok=True)
146
144
  # 在成功完成 VASP 分步优化后,删除 3_for_vasp_opt/{parent}/pop{n} 文件夹以节省空间
147
145
  shutil.rmtree(task_dir)
148
146
  if machine_inform["context_type"] == "SSHContext":
@@ -219,14 +217,15 @@ class VaspProcessing:
219
217
  # 将分配好的POSCAR文件添加到对应的上传文件中
220
218
  vasp_dir = vasp_optimized_folders[job_i]
221
219
  fine_optimized_file = f"{vasp_dir}/fine/CONTCAR"
222
- forward_files.append(fine_optimized_file)
223
- os.makedirs(
224
- os.path.dirname(f"{task_dir}/{fine_optimized_file}"), exist_ok=True
225
- )
226
- shutil.copyfile(
227
- f"{self.vasp_optimized_dir}/{fine_optimized_file}",
228
- f"{task_dir}/{fine_optimized_file}",
229
- )
220
+ if os.path.exists(fine_optimized_file):
221
+ forward_files.append(fine_optimized_file)
222
+ os.makedirs(
223
+ os.path.dirname(f"{task_dir}/{fine_optimized_file}"), exist_ok=True
224
+ )
225
+ shutil.copyfile(
226
+ f"{self.vasp_optimized_dir}/{fine_optimized_file}",
227
+ f"{task_dir}/{fine_optimized_file}",
228
+ )
230
229
  # 每个POSCAR文件在优化后都取回对应的CONTCAR和OUTCAR输出文件
231
230
  backward_files.append(f"{vasp_dir}/*")
232
231
  backward_files.append(f"{vasp_dir}/fine/*")
@@ -255,10 +254,16 @@ class VaspProcessing:
255
254
  task_dir = os.path.join(self.vasp_optimized_dir, f"{parent}pop{pop}")
256
255
  for job_i in node_jobs[pop]:
257
256
  vasp_dir = vasp_optimized_folders[job_i]
258
- shutil.copytree(
259
- f"{task_dir}/{vasp_dir}/fine/final",
260
- f"{self.vasp_optimized_dir}/{vasp_dir}/fine/final",
261
- )
257
+ try:
258
+ shutil.copytree(
259
+ f"{task_dir}/{vasp_dir}/fine/final",
260
+ f"{self.vasp_optimized_dir}/{vasp_dir}/fine/final",
261
+ dirs_exist_ok=True,
262
+ )
263
+ except FileNotFoundError:
264
+ logging.error(
265
+ f"No final optimization results found for {vasp_dir} in {task_dir}"
266
+ )
262
267
  # 在成功完成 VASP 分步优化后,删除 4_vasp_optimized /{parent}/pop{n} 文件夹以节省空间
263
268
  shutil.rmtree(task_dir)
264
269
  if machine_inform["context_type"] == "SSHContext":
@@ -266,127 +271,6 @@ class VaspProcessing:
266
271
  shutil.rmtree(os.path.join(self.vasp_optimized_dir, parent))
267
272
  logging.info("Batch VASP optimization completed!!!")
268
273
 
269
- def dpdisp_vasp_complete_tasks(
270
- self,
271
- machine: str,
272
- resources: str,
273
- nodes: int = 1,
274
- ):
275
- """
276
- Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
277
- """
278
- # 调整工作目录,减少错误发生
279
- os.chdir(self.for_vasp_opt_dir)
280
- # 读取machine.json和resources.json的参数
281
- if machine.endswith(".json"):
282
- machine = Machine.load_from_json(machine)
283
- elif machine.endswith(".yaml"):
284
- machine = Machine.load_from_yaml(machine)
285
- else:
286
- raise KeyError("Not supported machine file type")
287
- if resources.endswith(".json"):
288
- resources = Resources.load_from_json(resources)
289
- elif resources.endswith(".yaml"):
290
- resources = Resources.load_from_yaml(resources)
291
- else:
292
- raise KeyError("Not supported resources file type")
293
- # 由于dpdispatcher对于远程服务器以及本地运行的forward_common_files的默认存放位置不同,因此需要预先进行判断,从而不改动优化脚本
294
- machine_inform = machine.serialize()
295
- if machine_inform["context_type"] == "SSHContext":
296
- # 如果调用远程服务器,则创建二级目录
297
- parent = "data/"
298
- elif machine_inform["context_type"] == "LocalContext":
299
- # 如果在本地运行作业,则只在后续创建一级目录
300
- parent = ""
301
-
302
- # 获取dir文件夹中所有以prefix_name开头的文件,在此实例中为POSCAR_
303
- mlp_contcar_files = [
304
- f for f in os.listdir(self.for_vasp_opt_dir) if f.startswith("CONTCAR_")
305
- ]
306
- # 创建一个嵌套列表来存储每个节点的任务并将文件平均依次分配给每个节点
307
- # 例如:对于10个结构文件任务分发给4个节点的情况,则4个节点领到的任务分别[0, 4, 8], [1, 5, 9], [2, 6], [3, 7]
308
- node_jobs = [[] for _ in range(nodes)]
309
- for index, file in enumerate(mlp_contcar_files):
310
- node_index = index % nodes
311
- node_jobs[node_index].append(index)
312
- task_list = []
313
- for pop in range(nodes):
314
- forward_files = [
315
- "INCAR_1",
316
- "INCAR_2",
317
- "INCAR_3",
318
- "POTCAR_H",
319
- "POTCAR_C",
320
- "POTCAR_N",
321
- "POTCAR_O",
322
- "sub_final.sh",
323
- ]
324
- backward_files = ["log", "err"]
325
- # 将所有参数文件各复制一份到每个 task_dir 目录下
326
- task_dir = os.path.join(self.for_vasp_opt_dir, f"{parent}pop{pop}")
327
- os.makedirs(task_dir, exist_ok=True)
328
- for file in forward_files:
329
- shutil.copyfile(self.param_dir.joinpath(file), f"{task_dir}/{file}")
330
- for job_i in node_jobs[pop]:
331
- # 将分配好的POSCAR文件添加到对应的上传文件中
332
- forward_files.append(mlp_contcar_files[job_i])
333
- vasp_dir = mlp_contcar_files[job_i].split("CONTCAR_")[1]
334
- # 每个POSCAR文件在优化后都取回对应的CONTCAR和OUTCAR输出文件
335
- backward_files.append(f"{vasp_dir}/*")
336
- backward_files.append(f"{vasp_dir}/fine/*")
337
- backward_files.append(f"{vasp_dir}/fine/final/*")
338
- shutil.copyfile(
339
- f"{self.for_vasp_opt_dir}/{mlp_contcar_files[job_i]}",
340
- f"{task_dir}/{mlp_contcar_files[job_i]}",
341
- )
342
-
343
- remote_task_dir = f"{parent}pop{pop}"
344
- command = "chmod +x sub_final.sh && ./sub_final.sh"
345
- task = Task(
346
- command=command,
347
- task_work_path=remote_task_dir,
348
- forward_files=forward_files,
349
- backward_files=backward_files,
350
- )
351
- task_list.append(task)
352
-
353
- submission = Submission(
354
- work_base=self.for_vasp_opt_dir,
355
- machine=machine,
356
- resources=resources,
357
- task_list=task_list,
358
- )
359
- submission.run_submission()
360
-
361
- # 创建用于存放优化后文件的 4_vasp_optimized 目录
362
- os.makedirs(self.vasp_optimized_dir, exist_ok=True)
363
- mlp_outcar_files = [
364
- f for f in os.listdir(self.for_vasp_opt_dir) if f.startswith("OUTCAR_")
365
- ]
366
- for mlp_contcar, mlp_outcar in zip(mlp_contcar_files, mlp_outcar_files):
367
- shutil.copyfile(
368
- f"{self.for_vasp_opt_dir}/{mlp_contcar}",
369
- f"{self.vasp_optimized_dir}/{mlp_contcar}",
370
- )
371
- shutil.copyfile(
372
- f"{self.for_vasp_opt_dir}/{mlp_outcar}",
373
- f"{self.vasp_optimized_dir}/{mlp_outcar}",
374
- )
375
- for pop in range(nodes):
376
- # 从传回的 pop 文件夹中将结果文件取到 4_vasp_optimized 目录
377
- task_dir = os.path.join(self.for_vasp_opt_dir, f"{parent}pop{pop}")
378
- for job_i in node_jobs[pop]:
379
- vasp_dir = mlp_contcar_files[job_i].split("CONTCAR_")[1]
380
- shutil.copytree(
381
- f"{task_dir}/{vasp_dir}", f"{self.vasp_optimized_dir}/{vasp_dir}"
382
- )
383
- # 在成功完成 VASP 分步优化后,删除 3_for_vasp_opt/{parent}/pop{n} 文件夹以节省空间
384
- shutil.rmtree(task_dir)
385
- if machine_inform["context_type"] == "SSHContext":
386
- # 如果调用远程服务器,则删除data级目录
387
- shutil.rmtree(os.path.join(self.for_vasp_opt_dir, parent))
388
- logging.info("Batch VASP optimization completed!!!")
389
-
390
274
  def read_vaspout_save_csv(self, molecules_prior: bool, relaxation: bool = False):
391
275
  """
392
276
  Read VASP output files in batches and save energy and density to corresponding CSV files in the directory
@@ -625,6 +509,17 @@ class VaspProcessing:
625
509
  writer.writerow(header)
626
510
  for data in datas:
627
511
  writer.writerow(data)
512
+
513
+ logging.info(
514
+ f"Maximum MLP Density: {max(mlp_densities)}, Structure Number: {numbers[mlp_densities.index(max(mlp_densities))]}"
515
+ )
516
+ logging.info(
517
+ f"Maximum Fine Density: {max(fine_densities)}, Structure Number: {numbers[fine_densities.index(max(fine_densities))]}"
518
+ )
519
+ if relaxation:
520
+ logging.info(
521
+ f"Maximum Final Density: {max(final_densities)}, Structure Number: {numbers[final_densities.index(max(final_densities))]}"
522
+ )
628
523
 
629
524
  def export_max_density_structure(self):
630
525
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ion_CSP
3
- Version: 2.1.8
3
+ Version: 2.2.0
4
4
  Summary: Crystal Structure Design Software Based on Molecular/Ionic Configuration.
5
5
  Home-page: https://github.com/bagabaga007/ion_CSP
6
6
  Author: Ze Yang
@@ -28,7 +28,7 @@ Dynamic: home-page
28
28
  Dynamic: license-file
29
29
  Dynamic: requires-python
30
30
 
31
- # 基于分子/离子构型的晶体结构设计软件 V2.1
31
+ # 基于分子/离子构型的晶体结构设计软件 V2.2
32
32
 
33
33
  ## 项目概述
34
34
 
@@ -1,16 +1,16 @@
1
- ion_CSP/__init__.py,sha256=9fEgjaiU5zoBKrOtVe14YmIhZJkFf_8u26EnBlod6Tw,374
1
+ ion_CSP/__init__.py,sha256=1WGzlZrB669L2oFjKynPwf89qMUaSmlCttuR4864M_Y,356
2
2
  ion_CSP/__main__.py,sha256=XlNCx5eMSrL7yld9ddSYXhjXvg2ZYGD_uk9LdqNabvs,74
3
- ion_CSP/convert_SMILES.py,sha256=78StHwYm_hkP0CZhxa_AM5ywnjy6TGBNm6edyzhMTZ4,15337
4
- ion_CSP/empirical_estimate.py,sha256=bnV8Ak_UA5dWv-e4enChc-aAJ_2mlvtObxBZPmCbrsI,33965
3
+ ion_CSP/convert_SMILES.py,sha256=HAexqf6HXZAqRuMww5BKmU68MIO3d7XIaUtPKv_QwMs,15595
4
+ ion_CSP/empirical_estimate.py,sha256=aSidH3jyoG39ky-kDNUY0ix8hPefeVVWmPABVjTmy0g,37866
5
5
  ion_CSP/gen_opt.py,sha256=F_gEopuOO-b-tHfS0V4OMeThktY2QvPGWCVRXOCemfk,21605
6
6
  ion_CSP/identify_molecules.py,sha256=GxDWq815Bk_Fq_SR8fe-dbrbEi1YgATVa7UINw3hAu4,5535
7
7
  ion_CSP/log_and_time.py,sha256=Db53LAM2KH_ty6M9_5FF8xDGiULgExh7pcKOvFtS7DQ,11697
8
8
  ion_CSP/mlp_opt.py,sha256=uJaqjNYLzc4dRogNcGIP_Ukta_fMd5YdYVf9cNweOA4,7029
9
9
  ion_CSP/read_mlp_density.py,sha256=KwVgniroT46uFQ7_HROd5Fk9YxJCMip1jnufWvHHEiw,12104
10
10
  ion_CSP/steps_opt_monitor.sh,sha256=1klPjnK0gqkDbvI9PtjdK5qidJ5G0Mo8q1SfrlLW5xM,3330
11
- ion_CSP/task_manager.py,sha256=-tZXcK9S2igh--K_Ry_SSk0w4UEI6rqu4L48FIRUNyk,16576
11
+ ion_CSP/task_manager.py,sha256=JglPNDKpsv-bjbCm42D4k6GegDkSylX4oDWAdFa-oSU,16569
12
12
  ion_CSP/upload_download.py,sha256=HXxVQMUydEoHe2vV89wR7De4Ut1lEte6pmp8Q82decI,23800
13
- ion_CSP/vasp_processing.py,sha256=fuDqJU7vy3TGbvPFJMBXgN2C-VFCndcdkWCMJ0DqHfE,33249
13
+ ion_CSP/vasp_processing.py,sha256=Q4OotC5eK4RN4R3GZu5DnLk7wnkYSh-yC1oeGvrtT5U,28436
14
14
  ion_CSP/model/model.pt,sha256=5D9HTP5b7jUOv3kHltT71ORzhgt5p96awjbqC4oZVjQ,24649402
15
15
  ion_CSP/model/options/README.md,sha256=ifoeNXF2CfwqUjt3Xmh5qUl-e4hfP4eMV4EkqV7GU30,182
16
16
  ion_CSP/model/options/model.ckpt-4000000.pt,sha256=5D9HTP5b7jUOv3kHltT71ORzhgt5p96awjbqC4oZVjQ,24649402
@@ -28,16 +28,16 @@ ion_CSP/param/sub_ori.sh,sha256=JBERlc-VOVCNaKGwiJR8oq7Nyf0KV4JpHEVT5sE5s8E,2497
28
28
  ion_CSP/param/sub_supple.sh,sha256=23cam7WyW7-80J8O-Bs45qYkabk3mxZDgiHZvf48KBM,1887
29
29
  ion_CSP/run/__init__.py,sha256=_9EAXp4cv41ARbxahCkihwqY4F00Y18tBeTauWeD9mw,186
30
30
  ion_CSP/run/main_CSP.py,sha256=UaYHlh7BSxar4uGppPi-V0cFDpB14212Oy6gta59LfA,5898
31
- ion_CSP/run/main_EE.py,sha256=4L0VbbgUaYaDJM-6EjffphxMoWAHaZchEaSCVJxsdls,6345
31
+ ion_CSP/run/main_EE.py,sha256=8TFlJx7QhJKGc4qZ2O0ESRYrlySp3r1WjeGLkUBeL5k,6217
32
32
  ion_CSP/run/run_convert_SMILES.py,sha256=85a8-UXPxPo3Yw_iYED_QF47yNTvYRnJHm3PC1d-d_Q,2056
33
33
  ion_CSP/run/run_empirical_estimate.py,sha256=U_yvQ5gMiBkDEabHXLJSAEm0EzGHhSKs6xmWoEC_gjc,2831
34
34
  ion_CSP/run/run_gen_opt.py,sha256=_Zcsu0FkuZTfiGKSWNaK17LiyQ3qrP30F66UN5QemCo,2727
35
35
  ion_CSP/run/run_read_mlp_density.py,sha256=aSJjWS1jH-D7qzx7RnpMPSTH7KEZp2b35dg1b2OQSCM,1864
36
36
  ion_CSP/run/run_upload_download.py,sha256=wuTAdy4bgdduD7TJtgHwo_fTpHKlkAwmgRknClDLYDo,2436
37
37
  ion_CSP/run/run_vasp_processing.py,sha256=hziE4cZwmIWvVaZtwHn9Dl35apYSLlMvSVIbCyd5mFg,1612
38
- ion_csp-2.1.8.dist-info/licenses/LICENSE,sha256=yeL9PshY_rGAt3GKqn8U7NafHifpmZipb-Owu0DDrHo,1070
39
- ion_csp-2.1.8.dist-info/METADATA,sha256=w6C2UdafCraaU8Cl-5kPRC6Q0BXAPZ8AzQ2anwCeAIg,6314
40
- ion_csp-2.1.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
- ion_csp-2.1.8.dist-info/entry_points.txt,sha256=NexQJDs9f69kJA2DgoU6tsA3V8a66nadJRem1U_c_6g,54
42
- ion_csp-2.1.8.dist-info/top_level.txt,sha256=aYZa43dDebjLpWPN6bDIlBb6BVwA8gk4ajEjDDK9b9I,8
43
- ion_csp-2.1.8.dist-info/RECORD,,
38
+ ion_csp-2.2.0.dist-info/licenses/LICENSE,sha256=yeL9PshY_rGAt3GKqn8U7NafHifpmZipb-Owu0DDrHo,1070
39
+ ion_csp-2.2.0.dist-info/METADATA,sha256=7vO9oy9g9NEA_XQWdWBZSXda1VxTcE8NzuyCvr06VQ8,6314
40
+ ion_csp-2.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
+ ion_csp-2.2.0.dist-info/entry_points.txt,sha256=NexQJDs9f69kJA2DgoU6tsA3V8a66nadJRem1U_c_6g,54
42
+ ion_csp-2.2.0.dist-info/top_level.txt,sha256=aYZa43dDebjLpWPN6bDIlBb6BVwA8gk4ajEjDDK9b9I,8
43
+ ion_csp-2.2.0.dist-info/RECORD,,