ion-CSP 2.1.4__py3-none-any.whl → 2.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. ion_CSP/__init__.py +2 -2
  2. ion_CSP/convert_SMILES.py +32 -10
  3. ion_CSP/empirical_estimate.py +136 -18
  4. ion_CSP/gen_opt.py +83 -33
  5. ion_CSP/identify_molecules.py +15 -0
  6. ion_CSP/log_and_time.py +55 -8
  7. ion_CSP/mlp_opt.py +52 -6
  8. ion_CSP/model/model.pt +0 -0
  9. ion_CSP/model/options/README.md +5 -0
  10. ion_CSP/model/options/model.ckpt-4000000.pt +0 -0
  11. ion_CSP/param/INCAR_0 +16 -0
  12. ion_CSP/param/INCAR_1 +19 -0
  13. ion_CSP/param/INCAR_2 +19 -0
  14. ion_CSP/param/INCAR_3 +19 -0
  15. ion_CSP/param/POTCAR_C +2319 -0
  16. ion_CSP/param/POTCAR_H +1563 -0
  17. ion_CSP/param/POTCAR_N +2351 -0
  18. ion_CSP/param/POTCAR_O +2487 -0
  19. ion_CSP/param/g16_sub.sh +21 -0
  20. ion_CSP/param/sub_final.sh +91 -0
  21. ion_CSP/param/sub_ori.sh +74 -0
  22. ion_CSP/param/sub_supple.sh +56 -0
  23. ion_CSP/read_mlp_density.py +15 -1
  24. ion_CSP/task_manager.py +2 -2
  25. ion_CSP/upload_download.py +0 -1
  26. ion_CSP/vasp_processing.py +48 -20
  27. {ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/METADATA +45 -16
  28. ion_csp-2.1.8.dist-info/RECORD +43 -0
  29. {ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/licenses/LICENSE +1 -1
  30. {ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/top_level.txt +0 -1
  31. ion_csp-2.1.4.dist-info/RECORD +0 -28
  32. {run → ion_CSP/run}/__init__.py +0 -0
  33. {run → ion_CSP/run}/main_CSP.py +0 -0
  34. {run → ion_CSP/run}/main_EE.py +0 -0
  35. {run → ion_CSP/run}/run_convert_SMILES.py +0 -0
  36. {run → ion_CSP/run}/run_empirical_estimate.py +0 -0
  37. {run → ion_CSP/run}/run_gen_opt.py +0 -0
  38. {run → ion_CSP/run}/run_read_mlp_density.py +0 -0
  39. {run → ion_CSP/run}/run_upload_download.py +0 -0
  40. {run → ion_CSP/run}/run_vasp_processing.py +0 -0
  41. {ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/WHEEL +0 -0
  42. {ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/entry_points.txt +0 -0
ion_CSP/__init__.py CHANGED
@@ -1,8 +1,8 @@
1
1
  __author__ = "Ze Yang"
2
2
  __contact__ = "yangze1995007@163.com"
3
3
  __license__ = "MIT"
4
- __version__ = "2.1.4"
5
- __date__ = "2025-06-13"
4
+ __version__ = "2.1.8"
5
+ __date__ = "2025-06-23"
6
6
 
7
7
 
8
8
  try:
ion_CSP/convert_SMILES.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  import shutil
3
3
  import logging
4
4
  import pandas as pd
5
+ import importlib.resources
5
6
  from typing import List
6
7
  from rdkit import Chem
7
8
  from rdkit.Chem import AllChem
@@ -13,9 +14,13 @@ class SmilesProcessing:
13
14
 
14
15
  def __init__(self, work_dir: str, csv_file: str, converted_folder: str = '1_1_SMILES_gjf', optimized_dir: str = '1_2_Gaussian_optimized'):
15
16
  """
16
- args:
17
+ This class is used to process SMILES codes from a CSV file, convert them into Gaussian input files, and prepare for optimization tasks. It also supports grouping by charge and filtering based on functional groups.
18
+
19
+ params:
17
20
  work_dir: the path of the working directory.
18
21
  csv_file: the csv file name in the working directory.
22
+ converted_folder: the folder name for storing converted SMILES files.
23
+ optimized_dir: the folder name for storing Gaussian optimized files.
19
24
  """
20
25
  redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
21
26
  # 读取csv文件并处理数据, csv文件的表头包括 SMILES, Charge, Refcode或Number
@@ -28,7 +33,7 @@ class SmilesProcessing:
28
33
  self.base_dir, converted_folder, os.path.splitext(csv_file)[0]
29
34
  )
30
35
  self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
31
- self.param_dir = os.path.join(os.path.dirname(__file__), "../../param")
36
+ self.param_dir = importlib.resources.files("ion_CSP.param")
32
37
  original_df = pd.read_csv(csv_path)
33
38
  logging.info(f"Processing {csv_path}")
34
39
  # 对SMILES码去重
@@ -53,13 +58,15 @@ class SmilesProcessing:
53
58
  self, dir: str, smiles: str, basename: str, charge: int
54
59
  ):
55
60
  """
56
- Private method: Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
61
+ Private method:
62
+ Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
57
63
 
58
- args:
64
+ params:
59
65
  dir: The directory used for outputting files, regardless of existence of the directory.
60
66
  smiles: SMILES code to be converted.
61
67
  basename: The reference code or number corresponding to SMILES code.
62
68
  charge: The charge carried by ions.
69
+
63
70
  return:
64
71
  result_code: Result code 0 or -1, representing success and failure respectively.
65
72
  basename: The corresponding basename.
@@ -144,6 +151,12 @@ class SmilesProcessing:
144
151
  ):
145
152
  """
146
153
  Screen based on the provided functional groups and charges.
154
+
155
+ params:
156
+ charge_screen: The charge to screen for, default is 0.
157
+ group_screen: The functional group to screen for, default is empty string.
158
+ group_name: The name of the functional group, used for naming the output directory.
159
+ group_screen_invert: If True, invert the screening condition for the functional group.
147
160
  """
148
161
  # 另外筛选出符合条件的离子
149
162
  screened = self.df
@@ -179,6 +192,12 @@ class SmilesProcessing:
179
192
  ):
180
193
  """
181
194
  Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
195
+
196
+ params:
197
+ folders: List of folders containing .gjf files to be processed, if empty, all folders in the converted directory will be processed.
198
+ machine: The machine configuration file for dpdispatcher, can be a JSON or YAML file.
199
+ resources: The resources configuration file for dpdispatcher, can be a JSON or YAML file.
200
+ nodes: The number of nodes to distribute the tasks to, default is 1.
182
201
  """
183
202
  if os.path.exists(self.gaussian_optimized_dir):
184
203
  logging.error(f'The directory {self.gaussian_optimized_dir} has already existed.')
@@ -233,7 +252,7 @@ class SmilesProcessing:
233
252
  task_dir = os.path.join(self.converted_dir, f"{parent}pop{pop}")
234
253
  os.makedirs(task_dir, exist_ok=True)
235
254
  for file in forward_files:
236
- shutil.copyfile(f"{self.param_dir}/{file}", f"{task_dir}/{file}")
255
+ shutil.copyfile(self.param_dir.joinpath(file), f"{task_dir}/{file}")
237
256
  for job_i in node_jobs[pop]:
238
257
  # 将分配好的 .gjf 文件添加到对应的上传文件中
239
258
  forward_files.append(gjf_files[job_i])
@@ -274,11 +293,14 @@ class SmilesProcessing:
274
293
  for job_i in node_jobs[pop]:
275
294
  base_name, _ = os.path.splitext(gjf_files[job_i])
276
295
  # 在优化后都取回每个 .gjf 文件对应的 .log、.fchk 输出文件
277
- for ext in ['gjf', 'log', 'fchk']:
278
- shutil.copyfile(
279
- f"{task_dir}/{base_name}.{ext}",
280
- f"{optimized_folder_dir}/{base_name}.{ext}"
281
- )
296
+ try:
297
+ for ext in ['gjf', 'log', 'fchk']:
298
+ shutil.copyfile(
299
+ f"{task_dir}/{base_name}.{ext}",
300
+ f"{optimized_folder_dir}/{base_name}.{ext}"
301
+ )
302
+ except FileNotFoundError as e:
303
+ logging.error(f"File not found during copying, please check the configuration and state of Gaussian: {e}")
282
304
  # 在成功完成Gaussian优化后,删除 1_1_SMILES_gjf/{csv}/{parent}/pop{n} 文件夹以节省空间
283
305
  shutil.rmtree(task_dir)
284
306
  shutil.copyfile(
@@ -55,7 +55,13 @@ class EmpiricalEstimation:
55
55
 
56
56
  def __init__(self, work_dir: str, folders: List[str], ratios: List[int], sort_by: str):
57
57
  """
58
- Retrieve the directory where the current script is located and use it as the working directory.
58
+ This class is designed to process Gaussian calculation files, perform electrostatic potential analysis using Multiwfn, and estimate the nitrogen content or density of ion crystal combinations. The class will also generate .csv files containing sorted nitrogen content or density based on the specified sorting criterion.
59
+
60
+ :params
61
+ work_dir: The working directory where the Gaussian calculation files are located.
62
+ folders: A list of folder names containing the Gaussian calculation files.
63
+ ratios: A list of integers representing the ratio of each folder in the combination.
64
+ sort_by: A string indicating the sorting criterion, either 'density' or 'nitrogen'.
59
65
  """
60
66
  self.base_dir = work_dir
61
67
  os.chdir(self.base_dir)
@@ -73,6 +79,9 @@ class EmpiricalEstimation:
73
79
  def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
74
80
  '''
75
81
  If a specific directory is given, this method can be used separately to implement batch processing of FCHK files with Multiwfn and save the desired electrostatic potential analysis results to the corresponding JSON file. Otherwise, the folder list provided during initialization will be processed sequentially.
82
+
83
+ :params
84
+ specific_directory: The specific directory to process. If None, all folders will be processed.
76
85
  '''
77
86
  if specific_directory is None:
78
87
  for folder in self.folders:
@@ -84,7 +93,11 @@ class EmpiricalEstimation:
84
93
 
85
94
  def _multiwfn_process_fchk_to_json(self, folder: str):
86
95
  '''
96
+ Private method:
87
97
  Perform electrostatic potential analysis on .fchk files using Multiwfn and save the analysis results to a .json file.
98
+
99
+ :params
100
+ folder: The folder containing the .fchk files to be processed.
88
101
  '''
89
102
  # 在每个文件夹中获取 .fchk 文件并根据文件名排序, 再用 Multiwfn 进行静电势分析, 最后将分析结果保存到同名 .json 文件中
90
103
  fchk_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.fchk')]
@@ -117,26 +130,89 @@ class EmpiricalEstimation:
117
130
  logging.error(f'Error with moving bad files: {e}')
118
131
  logging.info(f'\nElectrostatic potential analysis by Multiwfn for {folder} folder has completed, and the results have been stored in the corresponding json files.\n')
119
132
 
120
- def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
133
+ def _check_multiwfn_executable(self):
121
134
  '''
122
- Private method: Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
135
+ Private method:
136
+ Check if the Multiwfn executable file exists in the system PATH.
137
+ If not, raise a FileNotFoundError with an appropriate error message.
123
138
  '''
124
- print(f'Multiwfn processing {fchk_filename}')
125
- logging.info(f'Multiwfn processing {fchk_filename}')
126
- result_flag = True
139
+ multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
140
+ if not multiwfn_path:
141
+ error_msg = (
142
+ "Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
143
+ "1. Has Multiwfn been installed correctly?\n"
144
+ "2. Has Multiwfn been added to the system PATH environment variable"
145
+ )
146
+ print(error_msg)
147
+ logging.error(error_msg)
148
+ raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
149
+ return multiwfn_path
150
+
151
+ def _multiwfn_cmd_build(self, input_content):
152
+ '''
153
+ Private method:
154
+ Build the Multiwfn command to be executed based on the input content.
155
+ This method is used to create the input file for Multiwfn.
156
+
157
+ :params
158
+ input_content: The content to be written to the input file for Multiwfn.
159
+ '''
160
+ # 检查Multiwfn可执行文件是否存在
161
+ multiwfn_path = self._check_multiwfn_executable()
127
162
  # 创建 input.txt 用于存储 Multiwfn 命令内容
128
163
  with open('input.txt', 'w') as input_file:
129
- input_file.write(f"{fchk_filename}\n12\n0\nq\n")
164
+ input_file.write(input_content)
130
165
  # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到output.txt中
166
+ cmd = [multiwfn_path, "<", "input.txt", ">", "output.txt"]
131
167
  try:
132
- subprocess.run('Multiwfn_noGUI < input.txt > output.txt', shell=True, capture_output=True)
133
- except FileNotFoundError:
134
- subprocess.run('Multiwfn < input.txt > output.txt', shell=True, capture_output=True)
168
+ subprocess.run(cmd, shell=True, capture_output=True)
169
+ except subprocess.CalledProcessError as e:
170
+ error_msg = f"Multiwfn execution failed (return code {e.returncode}): Error output: {e.stderr}"
171
+ print(error_msg)
172
+ logging.error(error_msg)
173
+ raise
174
+ except Exception as e:
175
+ error_msg = f"Unexpected Error: {str(e)}"
176
+ print(error_msg)
177
+ logging.error(error_msg)
178
+ raise
179
+ finally:
180
+ # 清理临时文件
181
+ try:
182
+ os.remove("input.txt")
183
+ except Exception as e:
184
+ logging.warning(f"无法删除临时文件 input.txt: {str(e)}")
185
+
186
+ def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
187
+ '''
188
+ Private method:
189
+ Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
190
+
191
+ :params
192
+ fchk_filename: The full path of the FCHK file to be processed.
193
+
194
+ :return: True if the processing is successful, False if the FCHK file is invalid.
195
+ '''
196
+ print(f'Multiwfn processing {fchk_filename}')
197
+ logging.info(f'Multiwfn processing {fchk_filename}')
198
+ result_flag = True
199
+ self._multiwfn_cmd_build(input_content=f"{fchk_filename}\n12\n0\nq\n")
200
+
135
201
  # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
136
202
  folder, filename = os.path.split(fchk_filename)
137
203
  refcode, _ = os.path.splitext(filename)
138
- with open('output.txt', 'r') as output_file:
139
- output_content = output_file.read()
204
+ try:
205
+ with open('output.txt', 'r') as output_file:
206
+ output_content = output_file.read()
207
+ except Exception as e:
208
+ logging.error(f"Error reading output.txt: {e}")
209
+ raise
210
+ finally:
211
+ # 清理临时文件
212
+ try:
213
+ os.remove("output.txt")
214
+ except Exception as e:
215
+ logging.warning(f"无法删除临时文件 output.txt: {str(e)}")
140
216
  # 提取所需数据
141
217
  volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
142
218
  density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
@@ -186,8 +262,6 @@ class EmpiricalEstimation:
186
262
  with open (f"{folder}/{refcode}.json", 'w') as json_file:
187
263
  json.dump(result, json_file, indent=4)
188
264
  shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
189
- os.remove('input.txt')
190
- os.remove('output.txt')
191
265
  logging.info(f'Finished processing {fchk_filename}')
192
266
  return result_flag
193
267
 
@@ -195,6 +269,9 @@ class EmpiricalEstimation:
195
269
  """
196
270
  If a specific directory is given, this method can be used separately to batch process the last frame of Gaussian optimized LOG files into GJF files using Multiwfn.
197
271
  Otherwise, the folder list provided during initialization will be processed in order.
272
+
273
+ :params
274
+ specific_directory: The specific directory to process. If None, all folders will be processed.
198
275
  """
199
276
  if specific_directory is None:
200
277
  for folder in self.folders:
@@ -206,7 +283,11 @@ class EmpiricalEstimation:
206
283
 
207
284
  def _gaussian_log_to_optimized_gjf(self, folder: str):
208
285
  '''
286
+ Private method:
209
287
  Due to the lack of support of Pyxtal module for LOG files in subsequent crystal generation, it is necessary to convert the last frame of the Gaussian optimized LOG file to a .gjf file with Multiwfn processing.
288
+
289
+ :params
290
+ folder: The folder containing the Gaussian LOG files to be processed.
210
291
  '''
211
292
  # 在每个文件夹中获取 .log 文件并根据文件名排序, 再用Multiwfn载入优化最后一帧转换为 gjf 文件
212
293
  log_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.log')]
@@ -226,10 +307,19 @@ class EmpiricalEstimation:
226
307
  pass
227
308
  logging.info(f'\nThe .log to .gjf conversion by Multiwfn for {folder} folder has completed, and the optimized .gjf structures have been stored in the optimized directory.\n')
228
309
 
229
- def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
310
+ def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
311
+ """
312
+ Private method:
313
+ Use Multiwfn to convert the last frame of the Gaussian optimized LOG file to a .gjf file.
314
+
315
+ :params
316
+ folder: The folder containing the Gaussian LOG file to be processed.
317
+ log_filename: The full path of the LOG file to be processed.
318
+ """
230
319
  # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
231
320
  _, filename = os.path.split(log_filename)
232
321
  refcode, _ = os.path.splitext(filename)
322
+
233
323
  try:
234
324
  # 创建 input.txt 用于存储 Multiwfn 命令内容
235
325
  with open('input.txt', 'w') as input_file:
@@ -291,6 +381,15 @@ class EmpiricalEstimation:
291
381
  writer.writerows(data) # 写入排序后的数
292
382
 
293
383
  def _read_gjf_elements(self, gjf_file):
384
+ """
385
+ Private method:
386
+ Read the elements from a .gjf file and return a dictionary with element counts.
387
+
388
+ :params
389
+ gjf_file: The full path of the .gjf file to be processed.
390
+
391
+ :return: A dictionary with element symbols as keys and their counts as values.
392
+ """
294
393
  # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
295
394
  with open(gjf_file, 'r') as file:
296
395
  lines = file.readlines()
@@ -317,6 +416,15 @@ class EmpiricalEstimation:
317
416
  return atomic_counts
318
417
 
319
418
  def _generate_combinations(self, suffix: str):
419
+ """
420
+ Private method:
421
+ Generate all valid combinations of files based on the specified suffix and ratios.
422
+
423
+ :params
424
+ suffix: The file suffix to filter the files in the folders.
425
+
426
+ :return: A list of dictionaries representing the combinations of files with their respective ratios.
427
+ """
320
428
  # 获取所有符合后缀名条件的文件
321
429
  all_files = []
322
430
  for folder in self.folders:
@@ -411,6 +519,15 @@ class EmpiricalEstimation:
411
519
  writer.writerows(data) # 写入排序后的数
412
520
 
413
521
  def _copy_combo_file(self, combo_path, folder_basename, file_type):
522
+ """
523
+ Private method:
524
+ Copy the specified file type from the Optimized directory to the combo_n folder.
525
+
526
+ :params
527
+ combo_path: The path to the combo_n folder where the file will be copied.
528
+ folder_basename: The basename of the folder containing the file to be copied.
529
+ file_type: The type of file to be copied (e.g., '.gjf', '.json').
530
+ """
414
531
  filename = f"{folder_basename}{file_type}"
415
532
  source_path = os.path.join(self.base_dir, 'Optimized', filename)
416
533
  # 复制指定后缀名文件到对应的 combo_n 文件夹
@@ -428,9 +545,10 @@ class EmpiricalEstimation:
428
545
  """
429
546
  Create a combo_n folder based on the .csv file and copy the corresponding .gjf structure file.
430
547
 
431
- :param target_directory: The target directory of the combo folder to be created
432
- :param num_folders: The number of combo folders to be created
433
- :param ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
548
+ :params
549
+ target_directory: The target directory of the combo folder to be created
550
+ num_folders: The number of combo folders to be created
551
+ ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
434
552
  """
435
553
  if self.sort_by == 'density':
436
554
  base_csv = self.density_csv
ion_CSP/gen_opt.py CHANGED
@@ -4,11 +4,12 @@ import time
4
4
  import shutil
5
5
  import logging
6
6
  import subprocess
7
+ import importlib.resources
8
+ from typing import List
7
9
  from ase.io import read
10
+ from dpdispatcher import Machine, Resources
8
11
  from pyxtal import pyxtal
9
12
  from pyxtal.msg import Comp_CompatibilityError, Symm_CompatibilityError
10
- from dpdispatcher import Machine
11
- from typing import List
12
13
  from ion_CSP.log_and_time import redirect_dpdisp_logging
13
14
 
14
15
 
@@ -16,32 +17,36 @@ class CrystalGenerator:
16
17
  def __init__(self, work_dir: str, ion_numbers: List[int], species: List[str]):
17
18
  """
18
19
  Initialize the class based on the provided ionic crystal composition structure files and corresponding composition numbers.
20
+
21
+ :params
22
+ work_dir: The working directory where the ionic crystal structure files are located.
23
+ ion_numbers: A list of integers representing the number of each ion in the ionic crystal.
24
+ species: A list of strings representing the species of ions in the ionic crystal.
19
25
  """
20
26
  redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
21
- self.script_dir = os.path.dirname(__file__)
22
- self.mlp_opt_file = os.path.join(self.script_dir, "mlp_opt.py")
23
- self.model_file = os.path.join(self.script_dir, "../../model/model.pt")
27
+ self.mlp_opt_file = importlib.resources.files("ion_CSP").joinpath("mlp_opt.py")
28
+ self.model_file = importlib.resources.files("ion_CSP.model").joinpath("model.pt")
24
29
  # 获取当前脚本的路径以及同路径下离子晶体组分的结构文件, 并将这一路径作为工作路径来避免可能的错误
25
30
  self.base_dir = work_dir
26
31
  os.chdir(self.base_dir)
27
32
  self.ion_numbers = ion_numbers
28
33
  self.species = species
29
34
  self.species_paths = []
30
- ion_atomss, species_atomss = [], []
35
+ ion_atomss, species_atoms = [], []
31
36
  # 读取离子晶体各组分的原子数,并在日志文件中记录
32
37
  for ion, number in zip(self.species, self.ion_numbers):
33
38
  species_path = os.path.join(self.base_dir, ion)
34
39
  self.species_paths.append(species_path)
35
40
  species_atom = len(read(species_path))
36
- species_atomss.append(species_atom)
37
- species_atoms = species_atom * number
38
- ion_atomss.append(species_atoms)
41
+ species_atoms.append(species_atom)
42
+ ion_atoms = species_atom * number
43
+ ion_atomss.append(ion_atoms)
39
44
  self.cell_atoms = sum(ion_atomss)
40
45
  logging.info(
41
46
  f"The components of ions {self.species} in the ionic crystal are {self.ion_numbers}"
42
47
  )
43
48
  logging.info(
44
- f"The number of atoms for each ion is: {species_atomss}, and the total number of atoms is {self.cell_atoms}"
49
+ f"The number of atoms for each ion is: {species_atoms}, and the total number of atoms is {self.cell_atoms}"
45
50
  )
46
51
  self.generation_dir = os.path.join(self.base_dir, "1_generated")
47
52
  os.makedirs(self.generation_dir, exist_ok=True)
@@ -52,6 +57,10 @@ class CrystalGenerator:
52
57
  """
53
58
  Private method:
54
59
  Extract numbers from file names, convert them to integers, sort them by sequence, and return a list containing both indexes and file names
60
+
61
+ :params
62
+ directory: The directory where the files are located.
63
+ prefix_name: The prefix of the file names to be processed, e.g., 'POSCAR_'.
55
64
  """
56
65
  # 获取dir文件夹中所有以prefix_name开头的文件,在此实例中为POSCAR_
57
66
  files = [f for f in os.listdir(directory) if f.startswith(prefix_name)]
@@ -69,6 +78,9 @@ class CrystalGenerator:
69
78
  ):
70
79
  """
71
80
  Based on the provided ion species and corresponding numbers, use pyxtal to randomly generate ion crystal structures based on crystal space groups.
81
+ :params
82
+ num_per_group: The number of POSCAR files to be generated for each space group, default is 100.
83
+ space_groups_limit: The maximum number of space groups to be searched, default is 230, which is the total number of space groups.
72
84
  """
73
85
  # 如果目录不存在,则创建POSCAR_Files文件夹
74
86
  os.makedirs(self.POSCAR_dir, exist_ok=True)
@@ -132,7 +144,14 @@ class CrystalGenerator:
132
144
  )
133
145
 
134
146
  def _single_phonopy_processing(self, filename):
135
- # 按顺序处理POSCAR文件,首先复制一份无数字后缀的POSCAR文件
147
+ """
148
+ Private method:
149
+ Process a single POSCAR file using phonopy to generate symmetric primitive cells and conventional cells.
150
+
151
+ :params
152
+ filename: The name of the POSCAR file to be processed.
153
+ """
154
+ # 按顺序处理POSCAR文件,首先复制一份无数字后缀的POSCAR文件
136
155
  shutil.copy(f"{self.POSCAR_dir}/{filename}", f"{self.POSCAR_dir}/POSCAR")
137
156
  try:
138
157
  subprocess.run(["nohup", "phonopy", "--symmetry", "POSCAR"], check=True)
@@ -150,7 +169,7 @@ class CrystalGenerator:
150
169
  # 检查生成的POSCAR中的原子数,如果不匹配则删除该POSCAR并在日志中记录
151
170
  if cell_atoms != self.cell_atoms:
152
171
  error_message = f"Atom number mismatch ({cell_atoms} vs {self.cell_atoms})"
153
- logging.error(f"{filename} - {error_message}")
172
+ print(f"{filename} - {error_message}")
154
173
 
155
174
  # 新增:回溯空间群归属
156
175
  poscar_index = int(filename.split('_')[1]) # 提取POSCAR编号
@@ -176,7 +195,15 @@ class CrystalGenerator:
176
195
  os.remove(f"{self.primitive_cell_dir}/{filename}")
177
196
 
178
197
  def _find_space_group(self, poscar_index: int) -> int:
179
- """根据POSCAR编号查找对应的空间群"""
198
+ """
199
+ Private method:
200
+ Find the space group for a given POSCAR index based on the group_counts.
201
+
202
+ :params
203
+ poscar_index: The index of the POSCAR file to find the space group for.
204
+
205
+ :return: The space group number corresponding to the POSCAR index.
206
+ """
180
207
  cumulative = 0
181
208
  for idx, count in enumerate(self.group_counts, start=1):
182
209
  if cumulative <= poscar_index < cumulative + count:
@@ -199,14 +226,10 @@ class CrystalGenerator:
199
226
  logging.info("Start running phonopy processing ...")
200
227
  for _, filename in POSCAR_file_index_pairs:
201
228
  self._single_phonopy_processing(filename=filename)
202
- # 准备dpdispatcher运行所需的文件,将其复制到primitive_cell文件夹中
203
- self.required_files = [self.mlp_opt_file, self.model_file]
204
- for file in self.required_files:
205
- shutil.copy(file, self.primitive_cell_dir)
229
+ # 在 phonopy 成功进行对称化处理后,删除 1_generated/POSCAR_Files 文件夹以节省空间
206
230
  logging.info(
207
231
  "The phonopy processing has been completed!!\nThe symmetrized primitive cells have been saved in POSCAR format to the primitive_cell folder."
208
232
  )
209
- # 在 phonopy 成功进行对称化处理后,删除 1_generated/POSCAR_Files 文件夹以节省空间
210
233
  shutil.rmtree(self.POSCAR_dir)
211
234
  except FileNotFoundError:
212
235
  logging.error(
@@ -215,13 +238,23 @@ class CrystalGenerator:
215
238
  raise FileNotFoundError(
216
239
  "There are no POSCAR structure files after generating.\nPlease check the error during generation"
217
240
  )
241
+
218
242
 
219
243
  def dpdisp_mlp_tasks(self, machine: str, resources: str, nodes: int = 1):
220
244
  """
221
245
  Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
246
+
247
+ params:
248
+ machine: The machine configuration file for dpdispatcher, can be in JSON or YAML format.
249
+ resources: The resources configuration file for dpdispatcher, can be in JSON or YAML format.
250
+ nodes: The number of nodes to be used for optimization, default is 1.
222
251
  """
223
252
  # 调整工作目录,减少错误发生
224
253
  os.chdir(self.primitive_cell_dir)
254
+ # 准备dpdispatcher运行所需的文件,将其复制到primitive_cell文件夹中
255
+ self.required_files = [self.mlp_opt_file, self.model_file]
256
+ for file in self.required_files:
257
+ shutil.copy(file, self.primitive_cell_dir)
225
258
  # 读取machine和resources的参数
226
259
  if machine.endswith(".json"):
227
260
  machine = Machine.load_from_json(machine)
@@ -229,26 +262,31 @@ class CrystalGenerator:
229
262
  machine = Machine.load_from_yaml(machine)
230
263
  else:
231
264
  raise KeyError("Not supported machine file type")
265
+ if resources.endswith(".json"):
266
+ resources = Resources.load_from_json(resources)
267
+ elif resources.endswith(".yaml"):
268
+ resources = Resources.load_from_yaml(resources)
269
+ else:
270
+ raise KeyError("Not supported resources file type")
232
271
  # 由于dpdispatcher对于远程服务器以及本地运行的forward_common_files的默认存放位置不同,因此需要预先进行判断,从而不改动优化脚本
233
272
  machine_inform = machine.serialize()
273
+ resources_inform = resources.serialize()
234
274
  if machine_inform["context_type"] == "SSHContext":
235
275
  # 如果调用远程服务器,则创建二级目录
236
276
  parent = "data/"
237
277
  elif machine_inform["context_type"] == "LocalContext":
238
278
  # 如果在本地运行作业,则只在后续创建一级目录
239
279
  parent = ""
240
- # 如果是本地运行,则根据显存占用率阈值,等待可用的GPU
241
- selected_gpu = wait_for_gpu(memory_percent_threshold=40, wait_time=600)
242
- os.environ["CUDA_VISIBLE_DEVICES"] = str(selected_gpu)
280
+ if (
281
+ machine_inform["batch_type"] == "Shell"
282
+ and resources_inform["gpu_per_node"] != 0
283
+ ):
284
+ # 如果是本地运行,则根据显存占用率阈值,等待可用的GPU
285
+ selected_gpu = _wait_for_gpu(memory_percent_threshold=40, wait_time=600)
286
+ os.environ["CUDA_VISIBLE_DEVICES"] = str(selected_gpu)
243
287
 
244
- from dpdispatcher import Resources, Task, Submission
288
+ from dpdispatcher import Task, Submission
245
289
 
246
- if resources.endswith(".json"):
247
- resources = Resources.load_from_json(resources)
248
- elif resources.endswith(".yaml"):
249
- resources = Resources.load_from_yaml(resources)
250
- else:
251
- raise KeyError("Not supported resources file type")
252
290
  # 依次读取primitive_cell文件夹中的所有POSCAR文件和对应的序号
253
291
  primitive_cell_file_index_pairs = self._sequentially_read_files(
254
292
  self.primitive_cell_dir, prefix_name="POSCAR_"
@@ -333,8 +371,14 @@ class CrystalGenerator:
333
371
  logging.info("Batch optimization completed!!!")
334
372
 
335
373
 
336
- def get_available_gpus(memory_percent_threshold=40):
337
- """获取可用的 GPU 节点,内存负载低于指定阈值且没有其他用户的任务在运行"""
374
+ def _get_available_gpus(memory_percent_threshold=40):
375
+ """
376
+ Private method:
377
+ Get available GPUs with memory usage below the specified threshold.
378
+
379
+ params:
380
+ memory_percent_threshold (int): The threshold for GPU memory usage percentage.
381
+ """
338
382
  try:
339
383
  # 获取 nvidia-smi 的输出
340
384
  output = subprocess.check_output(
@@ -364,10 +408,16 @@ def get_available_gpus(memory_percent_threshold=40):
364
408
  return []
365
409
 
366
410
 
367
- def wait_for_gpu(memory_percent_threshold=40, wait_time=300):
368
- """等待直到有可用的 GPU"""
411
+ def _wait_for_gpu(memory_percent_threshold=40, wait_time=300):
412
+ """
413
+ Private method:
414
+ Wait until a GPU is available with memory usage below the specified threshold.
415
+ params:
416
+ memory_percent_threshold (int): The threshold for GPU memory usage percentage.
417
+ wait_time (int): The time to wait before checking again, in seconds.
418
+ """
369
419
  while True:
370
- available_gpus = get_available_gpus(memory_percent_threshold)
420
+ available_gpus = _get_available_gpus(memory_percent_threshold)
371
421
  logging.info(f"Available GPU: {available_gpus}")
372
422
  if available_gpus:
373
423
  selected_gpu = available_gpus[0]
@@ -7,6 +7,17 @@ from ase.neighborlist import NeighborList, natural_cutoffs
7
7
 
8
8
 
9
9
  def identify_molecules(atoms) -> Tuple[List[Dict[str, int]], bool]:
10
+ """
11
+ Identify independent molecules in a given set of atoms.
12
+ This function uses a depth-first search (DFS) approach to find connected components in the atomic structure,
13
+ treating each connected component as a separate molecule.
14
+ params:
15
+ atoms: ASE Atoms object containing the atomic structure.
16
+ returns:
17
+ A tuple containing:
18
+ - A list of dictionaries, each representing a molecule with element counts.
19
+ - A boolean flag indicating whether the identified molecules match the initial set of molecules.
20
+ """
10
21
  visited = set() # 用于记录已经访问过的原子索引
11
22
  identified_molecules = [] # 用于存储识别到的独立分子
12
23
  # 基于共价半径为每个原子生成径向截止
@@ -63,6 +74,10 @@ def identify_molecules(atoms) -> Tuple[List[Dict[str, int]], bool]:
63
74
  def molecules_information(molecules: List[Dict[str, int]], molecules_flag: bool, initial_information: List[Dict[str, int]]):
64
75
  """
65
76
  Set the output format of the molecule. Output simplified element information in the specified order of C, N, O, H, which may include other elements.
77
+ params:
78
+ molecules: A list of dictionaries representing identified molecules with element counts.
79
+ molecules_flag: A boolean flag indicating whether the identified molecules match the initial set of molecules.
80
+ initial_information: A list of dictionaries representing the initial set of molecules with element counts.
66
81
  """
67
82
  # 定义固定顺序的元素
68
83
  fixed_order = ['C', 'N', 'O', 'H']