PyPI - ion-CSP - Versions diffs - 2.1.4__py3-none-any.whl → 2.1.8__py3-none-any.whl - Mend

ion-CSP 2.1.4py3-none-any.whl → 2.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

ion_CSP/__init__.py +2 -2
ion_CSP/convert_SMILES.py +32 -10
ion_CSP/empirical_estimate.py +136 -18
ion_CSP/gen_opt.py +83 -33
ion_CSP/identify_molecules.py +15 -0
ion_CSP/log_and_time.py +55 -8
ion_CSP/mlp_opt.py +52 -6
ion_CSP/model/model.pt +0 -0
ion_CSP/model/options/README.md +5 -0
ion_CSP/model/options/model.ckpt-4000000.pt +0 -0
ion_CSP/param/INCAR_0 +16 -0
ion_CSP/param/INCAR_1 +19 -0
ion_CSP/param/INCAR_2 +19 -0
ion_CSP/param/INCAR_3 +19 -0
ion_CSP/param/POTCAR_C +2319 -0
ion_CSP/param/POTCAR_H +1563 -0
ion_CSP/param/POTCAR_N +2351 -0
ion_CSP/param/POTCAR_O +2487 -0
ion_CSP/param/g16_sub.sh +21 -0
ion_CSP/param/sub_final.sh +91 -0
ion_CSP/param/sub_ori.sh +74 -0
ion_CSP/param/sub_supple.sh +56 -0
ion_CSP/read_mlp_density.py +15 -1
ion_CSP/task_manager.py +2 -2
ion_CSP/upload_download.py +0 -1
ion_CSP/vasp_processing.py +48 -20
{ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/METADATA +45 -16
ion_csp-2.1.8.dist-info/RECORD +43 -0
{ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/licenses/LICENSE +1 -1
{ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/top_level.txt +0 -1
ion_csp-2.1.4.dist-info/RECORD +0 -28
{run → ion_CSP/run}/__init__.py +0 -0
{run → ion_CSP/run}/main_CSP.py +0 -0
{run → ion_CSP/run}/main_EE.py +0 -0
{run → ion_CSP/run}/run_convert_SMILES.py +0 -0
{run → ion_CSP/run}/run_empirical_estimate.py +0 -0
{run → ion_CSP/run}/run_gen_opt.py +0 -0
{run → ion_CSP/run}/run_read_mlp_density.py +0 -0
{run → ion_CSP/run}/run_upload_download.py +0 -0
{run → ion_CSP/run}/run_vasp_processing.py +0 -0
{ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/WHEEL +0 -0
{ion_csp-2.1.4.dist-info → ion_csp-2.1.8.dist-info}/entry_points.txt +0 -0

ion_CSP/__init__.py CHANGED Viewed

@@ -1,8 +1,8 @@
 __author__ = "Ze Yang"
 __contact__ = "yangze1995007@163.com"
 __license__ = "MIT"
-__version__ = "2.1.4"
-__date__ = "2025-06-13"
+__version__ = "2.1.8"
+__date__ = "2025-06-23"
 try:

ion_CSP/convert_SMILES.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import shutil
 import logging
 import pandas as pd
+import importlib.resources
 from typing import List
 from rdkit import Chem
 from rdkit.Chem import AllChem
@@ -13,9 +14,13 @@ class SmilesProcessing:
     def __init__(self, work_dir: str, csv_file: str, converted_folder: str = '1_1_SMILES_gjf', optimized_dir: str = '1_2_Gaussian_optimized'):
         """
-        args:
+        This class is used to process SMILES codes from a CSV file, convert them into Gaussian input files, and prepare for optimization tasks. It also supports grouping by charge and filtering based on functional groups.
+        params:
             work_dir: the path of the working directory.
             csv_file: the csv file name in the working directory.
+            converted_folder: the folder name for storing converted SMILES files.
+            optimized_dir: the folder name for storing Gaussian optimized files.
         """
         redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
         # 读取csv文件并处理数据, csv文件的表头包括 SMILES, Charge, Refcode或Number
@@ -28,7 +33,7 @@ class SmilesProcessing:
             self.base_dir, converted_folder, os.path.splitext(csv_file)[0]
         )
         self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
-        self.param_dir = os.path.join(os.path.dirname(__file__), "../../param")
+        self.param_dir = importlib.resources.files("ion_CSP.param")
         original_df = pd.read_csv(csv_path)
         logging.info(f"Processing {csv_path}")
         # 对SMILES码去重
@@ -53,13 +58,15 @@ class SmilesProcessing:
         self, dir: str, smiles: str, basename: str, charge: int
     ):
         """
-        Private method: Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
+        Private method:
+        Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
-        args:
+        params:
             dir: The directory used for outputting files, regardless of existence of the directory.
             smiles: SMILES code to be converted.
             basename: The reference code or number corresponding to SMILES code.
             charge: The charge carried by ions.
         return:
             result_code: Result code 0 or -1, representing success and failure respectively.
             basename: The corresponding basename.
@@ -144,6 +151,12 @@ class SmilesProcessing:
     ):
         """
         Screen based on the provided functional groups and charges.
+        params:
+            charge_screen: The charge to screen for, default is 0.
+            group_screen: The functional group to screen for, default is empty string.
+            group_name: The name of the functional group, used for naming the output directory.
+            group_screen_invert: If True, invert the screening condition for the functional group.
         """
         # 另外筛选出符合条件的离子
         screened = self.df
@@ -179,6 +192,12 @@ class SmilesProcessing:
     ):
         """
         Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
+        params:
+            folders: List of folders containing .gjf files to be processed, if empty, all folders in the converted directory will be processed.
+            machine: The machine configuration file for dpdispatcher, can be a JSON or YAML file.
+            resources: The resources configuration file for dpdispatcher, can be a JSON or YAML file.
+            nodes: The number of nodes to distribute the tasks to, default is 1.
         """
         if os.path.exists(self.gaussian_optimized_dir):
             logging.error(f'The directory {self.gaussian_optimized_dir} has already existed.')
@@ -233,7 +252,7 @@ class SmilesProcessing:
                 task_dir = os.path.join(self.converted_dir, f"{parent}pop{pop}")
                 os.makedirs(task_dir, exist_ok=True)
                 for file in forward_files:
-                    shutil.copyfile(f"{self.param_dir}/{file}", f"{task_dir}/{file}")
+                    shutil.copyfile(self.param_dir.joinpath(file), f"{task_dir}/{file}")
                 for job_i in node_jobs[pop]:
                     # 将分配好的 .gjf 文件添加到对应的上传文件中
                     forward_files.append(gjf_files[job_i])
@@ -274,11 +293,14 @@ class SmilesProcessing:
                 for job_i in node_jobs[pop]:
                     base_name, _ = os.path.splitext(gjf_files[job_i])
                     # 在优化后都取回每个 .gjf 文件对应的 .log、.fchk 输出文件
-                    for ext in ['gjf', 'log', 'fchk']:
-                        shutil.copyfile(
-                            f"{task_dir}/{base_name}.{ext}",
-                            f"{optimized_folder_dir}/{base_name}.{ext}"
-                        )
+                    try:
+                        for ext in ['gjf', 'log', 'fchk']:
+                            shutil.copyfile(
+                                f"{task_dir}/{base_name}.{ext}",
+                                f"{optimized_folder_dir}/{base_name}.{ext}"
+                            )
+                    except FileNotFoundError as e:
+                        logging.error(f"File not found during copying, please check the configuration and state of Gaussian: {e}")
                 # 在成功完成Gaussian优化后，删除 1_1_SMILES_gjf/{csv}/{parent}/pop{n} 文件夹以节省空间
                 shutil.rmtree(task_dir)
         shutil.copyfile(

ion_CSP/empirical_estimate.py CHANGED Viewed

@@ -55,7 +55,13 @@ class EmpiricalEstimation:
     def __init__(self, work_dir: str, folders: List[str], ratios: List[int], sort_by: str):
         """
-        Retrieve the directory where the current script is located and use it as the working directory.
+        This class is designed to process Gaussian calculation files, perform electrostatic potential analysis using Multiwfn, and estimate the nitrogen content or density of ion crystal combinations. The class will also generate .csv files containing sorted nitrogen content or density based on the specified sorting criterion.
+        :params
+            work_dir: The working directory where the Gaussian calculation files are located.
+            folders: A list of folder names containing the Gaussian calculation files.
+            ratios: A list of integers representing the ratio of each folder in the combination.
+            sort_by: A string indicating the sorting criterion, either 'density' or 'nitrogen'.
         """
         self.base_dir = work_dir
         os.chdir(self.base_dir)
@@ -73,6 +79,9 @@ class EmpiricalEstimation:
     def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
         '''
         If a specific directory is given, this method can be used separately to implement batch processing of FCHK files with Multiwfn and save the desired electrostatic potential analysis results to the corresponding JSON file. Otherwise, the folder list provided during initialization will be processed sequentially.
+        :params
+            specific_directory: The specific directory to process. If None, all folders will be processed.
         '''
         if specific_directory is None:
             for folder in self.folders:
@@ -84,7 +93,11 @@ class EmpiricalEstimation:
     def _multiwfn_process_fchk_to_json(self, folder: str):
         '''
+        Private method:
         Perform electrostatic potential analysis on .fchk files using Multiwfn and save the analysis results to a .json file.
+        :params
+            folder: The folder containing the .fchk files to be processed.
         '''
         # 在每个文件夹中获取 .fchk 文件并根据文件名排序, 再用 Multiwfn 进行静电势分析, 最后将分析结果保存到同名 .json 文件中
         fchk_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.fchk')]
@@ -117,26 +130,89 @@ class EmpiricalEstimation:
                     logging.error(f'Error with moving bad files: {e}')
         logging.info(f'\nElectrostatic potential analysis by Multiwfn for {folder} folder has completed, and the results have been stored in the corresponding json files.\n')
-    def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
+    def _check_multiwfn_executable(self):
         '''
-        Private method: Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
+        Private method:
+        Check if the Multiwfn executable file exists in the system PATH.
+        If not, raise a FileNotFoundError with an appropriate error message.
         '''
-        print(f'Multiwfn processing {fchk_filename}')
-        logging.info(f'Multiwfn processing {fchk_filename}')
-        result_flag = True
+        multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
+        if not multiwfn_path:
+            error_msg = (
+                "Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
+                "1. Has Multiwfn been installed correctly?\n"
+                "2. Has Multiwfn been added to the system PATH environment variable"
+            )
+            print(error_msg)
+            logging.error(error_msg)
+            raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
+        return multiwfn_path
+    def _multiwfn_cmd_build(self, input_content):
+        '''
+        Private method:
+        Build the Multiwfn command to be executed based on the input content.
+        This method is used to create the input file for Multiwfn.
+        :params
+            input_content: The content to be written to the input file for Multiwfn.
+        '''
+        # 检查Multiwfn可执行文件是否存在
+        multiwfn_path = self._check_multiwfn_executable()
         # 创建 input.txt 用于存储 Multiwfn 命令内容
         with open('input.txt', 'w') as input_file:
-            input_file.write(f"{fchk_filename}\n12\n0\nq\n")
+            input_file.write(input_content)
         # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到output.txt中
+        cmd = [multiwfn_path, "<", "input.txt", ">", "output.txt"]
         try:
-            subprocess.run('Multiwfn_noGUI < input.txt > output.txt', shell=True, capture_output=True)
-        except FileNotFoundError:
-            subprocess.run('Multiwfn < input.txt > output.txt', shell=True, capture_output=True)
+            subprocess.run(cmd, shell=True, capture_output=True)
+        except subprocess.CalledProcessError as e:
+            error_msg = f"Multiwfn execution failed (return code {e.returncode}): Error output: {e.stderr}"
+            print(error_msg)
+            logging.error(error_msg)
+            raise
+        except Exception as e:
+            error_msg = f"Unexpected Error: {str(e)}"
+            print(error_msg)
+            logging.error(error_msg)
+            raise
+        finally:
+            # 清理临时文件
+            try:
+                os.remove("input.txt")
+            except Exception as e:
+                logging.warning(f"无法删除临时文件 input.txt: {str(e)}")
+    def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
+        '''
+        Private method:
+        Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
+        :params
+            fchk_filename: The full path of the FCHK file to be processed.
+        :return: True if the processing is successful, False if the FCHK file is invalid.
+        '''
+        print(f'Multiwfn processing {fchk_filename}')
+        logging.info(f'Multiwfn processing {fchk_filename}')
+        result_flag = True
+        self._multiwfn_cmd_build(input_content=f"{fchk_filename}\n12\n0\nq\n")
         # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
         folder, filename = os.path.split(fchk_filename)
         refcode, _ = os.path.splitext(filename)
-        with open('output.txt', 'r') as output_file:
-            output_content = output_file.read()
+        try:
+            with open('output.txt', 'r') as output_file:
+                output_content = output_file.read()
+        except Exception as e:
+            logging.error(f"Error reading output.txt: {e}")
+            raise
+        finally:
+            # 清理临时文件
+            try:
+                os.remove("output.txt")
+            except Exception as e:
+                logging.warning(f"无法删除临时文件 output.txt: {str(e)}")
         # 提取所需数据
         volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
         density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
@@ -186,8 +262,6 @@ class EmpiricalEstimation:
             with open (f"{folder}/{refcode}.json", 'w') as json_file:
                 json.dump(result, json_file, indent=4)
             shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
-        os.remove('input.txt')
-        os.remove('output.txt')
         logging.info(f'Finished processing {fchk_filename}')
         return result_flag
@@ -195,6 +269,9 @@ class EmpiricalEstimation:
         """
         If a specific directory is given, this method can be used separately to batch process the last frame of Gaussian optimized LOG files into GJF files using Multiwfn.
         Otherwise, the folder list provided during initialization will be processed in order.
+        :params
+            specific_directory: The specific directory to process. If None, all folders will be processed.
         """
         if specific_directory is None:
             for folder in self.folders:
@@ -206,7 +283,11 @@ class EmpiricalEstimation:
     def _gaussian_log_to_optimized_gjf(self, folder: str):
         '''
+        Private method:
         Due to the lack of support of Pyxtal module for LOG files in subsequent crystal generation, it is necessary to convert the last frame of the Gaussian optimized LOG file to a .gjf file with Multiwfn processing.
+        :params
+            folder: The folder containing the Gaussian LOG files to be processed.
         '''
         # 在每个文件夹中获取 .log 文件并根据文件名排序, 再用Multiwfn载入优化最后一帧转换为 gjf 文件
         log_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.log')]
@@ -226,10 +307,19 @@ class EmpiricalEstimation:
             pass
         logging.info(f'\nThe .log to .gjf conversion by Multiwfn for {folder} folder has completed, and the optimized .gjf structures have been stored in the optimized directory.\n')
-    def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
+    def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
+        """
+        Private method:
+        Use Multiwfn to convert the last frame of the Gaussian optimized LOG file to a .gjf file.
+        :params
+            folder: The folder containing the Gaussian LOG file to be processed.
+            log_filename: The full path of the LOG file to be processed.
+        """
         # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
         _, filename = os.path.split(log_filename)
         refcode, _ = os.path.splitext(filename)
         try:
             # 创建 input.txt 用于存储 Multiwfn 命令内容
             with open('input.txt', 'w') as input_file:
@@ -291,6 +381,15 @@ class EmpiricalEstimation:
             writer.writerows(data)  # 写入排序后的数
     def _read_gjf_elements(self, gjf_file):
+        """
+        Private method:
+        Read the elements from a .gjf file and return a dictionary with element counts.
+        :params
+            gjf_file: The full path of the .gjf file to be processed.
+        :return: A dictionary with element symbols as keys and their counts as values.
+        """
         # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
         with open(gjf_file, 'r') as file:
             lines = file.readlines()
@@ -317,6 +416,15 @@ class EmpiricalEstimation:
         return atomic_counts
     def _generate_combinations(self, suffix: str):
+        """
+        Private method:
+        Generate all valid combinations of files based on the specified suffix and ratios.
+        :params
+            suffix: The file suffix to filter the files in the folders.
+        :return: A list of dictionaries representing the combinations of files with their respective ratios.
+        """
         # 获取所有符合后缀名条件的文件
         all_files = []
         for folder in self.folders:
@@ -411,6 +519,15 @@ class EmpiricalEstimation:
             writer.writerows(data)  # 写入排序后的数
     def _copy_combo_file(self, combo_path, folder_basename, file_type):
+        """
+        Private method:
+        Copy the specified file type from the Optimized directory to the combo_n folder.
+        :params
+            combo_path: The path to the combo_n folder where the file will be copied.
+            folder_basename: The basename of the folder containing the file to be copied.
+            file_type: The type of file to be copied (e.g., '.gjf', '.json').
+        """
         filename = f"{folder_basename}{file_type}"
         source_path = os.path.join(self.base_dir, 'Optimized', filename)
         # 复制指定后缀名文件到对应的 combo_n 文件夹
@@ -428,9 +545,10 @@ class EmpiricalEstimation:
         """
         Create a combo_n folder based on the .csv file and copy the corresponding .gjf structure file.
-        :param target_directory: The target directory of the combo folder to be created
-        :param num_folders: The number of combo folders to be created
-        :param ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
+        :params
+            target_directory: The target directory of the combo folder to be created
+            num_folders: The number of combo folders to be created
+            ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
         """
         if self.sort_by == 'density':
             base_csv = self.density_csv

ion_CSP/gen_opt.py CHANGED Viewed

@@ -4,11 +4,12 @@ import time
 import shutil
 import logging
 import subprocess
+import importlib.resources
+from typing import List
 from ase.io import read
+from dpdispatcher import Machine, Resources
 from pyxtal import pyxtal
 from pyxtal.msg import Comp_CompatibilityError, Symm_CompatibilityError
-from dpdispatcher import Machine
-from typing import List
 from ion_CSP.log_and_time import redirect_dpdisp_logging
@@ -16,32 +17,36 @@ class CrystalGenerator:
     def __init__(self, work_dir: str, ion_numbers: List[int], species: List[str]):
         """
         Initialize the class based on the provided ionic crystal composition structure files and corresponding composition numbers.
+        :params
+            work_dir: The working directory where the ionic crystal structure files are located.
+            ion_numbers: A list of integers representing the number of each ion in the ionic crystal.
+            species: A list of strings representing the species of ions in the ionic crystal.
         """
         redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
-        self.script_dir = os.path.dirname(__file__)
-        self.mlp_opt_file = os.path.join(self.script_dir, "mlp_opt.py")
-        self.model_file = os.path.join(self.script_dir, "../../model/model.pt")
+        self.mlp_opt_file = importlib.resources.files("ion_CSP").joinpath("mlp_opt.py")
+        self.model_file = importlib.resources.files("ion_CSP.model").joinpath("model.pt")
         # 获取当前脚本的路径以及同路径下离子晶体组分的结构文件, 并将这一路径作为工作路径来避免可能的错误
         self.base_dir = work_dir
         os.chdir(self.base_dir)
         self.ion_numbers = ion_numbers
         self.species = species
         self.species_paths = []
-        ion_atomss, species_atomss = [], []
+        ion_atomss, species_atoms = [], []
         # 读取离子晶体各组分的原子数，并在日志文件中记录
         for ion, number in zip(self.species, self.ion_numbers):
             species_path = os.path.join(self.base_dir, ion)
             self.species_paths.append(species_path)
             species_atom = len(read(species_path))
-            species_atomss.append(species_atom)
-            species_atoms = species_atom * number
-            ion_atomss.append(species_atoms)
+            species_atoms.append(species_atom)
+            ion_atoms = species_atom * number
+            ion_atomss.append(ion_atoms)
         self.cell_atoms = sum(ion_atomss)
         logging.info(
             f"The components of ions {self.species} in the ionic crystal are {self.ion_numbers}"
         )
         logging.info(
-            f"The number of atoms for each ion is: {species_atomss}, and the total number of atoms is {self.cell_atoms}"
+            f"The number of atoms for each ion is: {species_atoms}, and the total number of atoms is {self.cell_atoms}"
         )
         self.generation_dir = os.path.join(self.base_dir, "1_generated")
         os.makedirs(self.generation_dir, exist_ok=True)
@@ -52,6 +57,10 @@ class CrystalGenerator:
         """
         Private method:
         Extract numbers from file names, convert them to integers, sort them by sequence, and return a list containing both indexes and file names
+        :params
+            directory: The directory where the files are located.
+            prefix_name: The prefix of the file names to be processed, e.g., 'POSCAR_'.
         """
         # 获取dir文件夹中所有以prefix_name开头的文件，在此实例中为POSCAR_
         files = [f for f in os.listdir(directory) if f.startswith(prefix_name)]
@@ -69,6 +78,9 @@ class CrystalGenerator:
     ):
         """
         Based on the provided ion species and corresponding numbers, use pyxtal to randomly generate ion crystal structures based on crystal space groups.
+        :params
+            num_per_group: The number of POSCAR files to be generated for each space group, default is 100.
+            space_groups_limit: The maximum number of space groups to be searched, default is 230, which is the total number of space groups.
         """
         # 如果目录不存在，则创建POSCAR_Files文件夹
         os.makedirs(self.POSCAR_dir, exist_ok=True)
@@ -132,7 +144,14 @@ class CrystalGenerator:
         )
     def _single_phonopy_processing(self, filename):
-    # 按顺序处理POSCAR文件，首先复制一份无数字后缀的POSCAR文件
+        """
+        Private method:
+        Process a single POSCAR file using phonopy to generate symmetric primitive cells and conventional cells.
+        :params
+            filename: The name of the POSCAR file to be processed.
+        """
+        # 按顺序处理POSCAR文件，首先复制一份无数字后缀的POSCAR文件
         shutil.copy(f"{self.POSCAR_dir}/{filename}", f"{self.POSCAR_dir}/POSCAR")
         try:
             subprocess.run(["nohup", "phonopy", "--symmetry", "POSCAR"], check=True)
@@ -150,7 +169,7 @@ class CrystalGenerator:
         # 检查生成的POSCAR中的原子数，如果不匹配则删除该POSCAR并在日志中记录
         if cell_atoms != self.cell_atoms:
             error_message = f"Atom number mismatch ({cell_atoms} vs {self.cell_atoms})"
-            logging.error(f"{filename} - {error_message}")
+            print(f"{filename} - {error_message}")
             # 新增：回溯空间群归属
             poscar_index = int(filename.split('_')[1])  # 提取POSCAR编号
@@ -176,7 +195,15 @@ class CrystalGenerator:
             os.remove(f"{self.primitive_cell_dir}/{filename}")
     def _find_space_group(self, poscar_index: int) -> int:
-        """根据POSCAR编号查找对应的空间群"""
+        """
+        Private method:
+        Find the space group for a given POSCAR index based on the group_counts.
+        :params
+            poscar_index: The index of the POSCAR file to find the space group for.
+        :return: The space group number corresponding to the POSCAR index.
+        """
         cumulative = 0
         for idx, count in enumerate(self.group_counts, start=1):
             if cumulative <= poscar_index < cumulative + count:
@@ -199,14 +226,10 @@ class CrystalGenerator:
             logging.info("Start running phonopy processing ...")
             for _, filename in POSCAR_file_index_pairs:
                 self._single_phonopy_processing(filename=filename)
-            # 准备dpdispatcher运行所需的文件，将其复制到primitive_cell文件夹中
-            self.required_files = [self.mlp_opt_file, self.model_file]
-            for file in self.required_files:
-                shutil.copy(file, self.primitive_cell_dir)
+            # 在 phonopy 成功进行对称化处理后，删除 1_generated/POSCAR_Files 文件夹以节省空间
             logging.info(
                 "The phonopy processing has been completed!!\nThe symmetrized primitive cells have been saved in POSCAR format to the primitive_cell folder."
             )
-            # 在 phonopy 成功进行对称化处理后，删除 1_generated/POSCAR_Files 文件夹以节省空间
             shutil.rmtree(self.POSCAR_dir)
         except FileNotFoundError:
             logging.error(
@@ -215,13 +238,23 @@ class CrystalGenerator:
             raise FileNotFoundError(
                 "There are no POSCAR structure files after generating.\nPlease check the error during generation"
             )
     def dpdisp_mlp_tasks(self, machine: str, resources: str, nodes: int = 1):
         """
         Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
+        params:
+            machine: The machine configuration file for dpdispatcher, can be in JSON or YAML format.
+            resources: The resources configuration file for dpdispatcher, can be in JSON or YAML format.
+            nodes: The number of nodes to be used for optimization, default is 1.
         """
         # 调整工作目录，减少错误发生
         os.chdir(self.primitive_cell_dir)
+        # 准备dpdispatcher运行所需的文件，将其复制到primitive_cell文件夹中
+        self.required_files = [self.mlp_opt_file, self.model_file]
+        for file in self.required_files:
+            shutil.copy(file, self.primitive_cell_dir)
         # 读取machine和resources的参数
         if machine.endswith(".json"):
             machine = Machine.load_from_json(machine)
@@ -229,26 +262,31 @@ class CrystalGenerator:
             machine = Machine.load_from_yaml(machine)
         else:
             raise KeyError("Not supported machine file type")
+        if resources.endswith(".json"):
+            resources = Resources.load_from_json(resources)
+        elif resources.endswith(".yaml"):
+            resources = Resources.load_from_yaml(resources)
+        else:
+            raise KeyError("Not supported resources file type")
         # 由于dpdispatcher对于远程服务器以及本地运行的forward_common_files的默认存放位置不同，因此需要预先进行判断，从而不改动优化脚本
         machine_inform = machine.serialize()
+        resources_inform = resources.serialize()
         if machine_inform["context_type"] == "SSHContext":
             # 如果调用远程服务器，则创建二级目录
             parent = "data/"
         elif machine_inform["context_type"] == "LocalContext":
             # 如果在本地运行作业，则只在后续创建一级目录
             parent = ""
-            # 如果是本地运行，则根据显存占用率阈值，等待可用的GPU
-            selected_gpu = wait_for_gpu(memory_percent_threshold=40, wait_time=600)
-            os.environ["CUDA_VISIBLE_DEVICES"] = str(selected_gpu)
+            if (
+                machine_inform["batch_type"] == "Shell"
+                and resources_inform["gpu_per_node"] != 0
+            ):
+                # 如果是本地运行，则根据显存占用率阈值，等待可用的GPU
+                selected_gpu = _wait_for_gpu(memory_percent_threshold=40, wait_time=600)
+                os.environ["CUDA_VISIBLE_DEVICES"] = str(selected_gpu)
-        from dpdispatcher import Resources, Task, Submission
+        from dpdispatcher import Task, Submission
-        if resources.endswith(".json"):
-            resources = Resources.load_from_json(resources)
-        elif resources.endswith(".yaml"):
-            resources = Resources.load_from_yaml(resources)
-        else:
-            raise KeyError("Not supported resources file type")
         # 依次读取primitive_cell文件夹中的所有POSCAR文件和对应的序号
         primitive_cell_file_index_pairs = self._sequentially_read_files(
             self.primitive_cell_dir, prefix_name="POSCAR_"
@@ -333,8 +371,14 @@ class CrystalGenerator:
         logging.info("Batch optimization completed!!!")
-def get_available_gpus(memory_percent_threshold=40):
-    """获取可用的 GPU 节点，内存负载低于指定阈值且没有其他用户的任务在运行"""
+def _get_available_gpus(memory_percent_threshold=40):
+    """
+    Private method:
+    Get available GPUs with memory usage below the specified threshold.
+    params:
+        memory_percent_threshold (int): The threshold for GPU memory usage percentage.
+    """
     try:
         # 获取 nvidia-smi 的输出
         output = subprocess.check_output(
@@ -364,10 +408,16 @@ def get_available_gpus(memory_percent_threshold=40):
         return []
-def wait_for_gpu(memory_percent_threshold=40, wait_time=300):
-    """等待直到有可用的 GPU"""
+def _wait_for_gpu(memory_percent_threshold=40, wait_time=300):
+    """
+    Private method:
+    Wait until a GPU is available with memory usage below the specified threshold.
+    params:
+        memory_percent_threshold (int): The threshold for GPU memory usage percentage.
+        wait_time (int): The time to wait before checking again, in seconds.
+    """
     while True:
-        available_gpus = get_available_gpus(memory_percent_threshold)
+        available_gpus = _get_available_gpus(memory_percent_threshold)
         logging.info(f"Available GPU: {available_gpus}")
         if available_gpus:
             selected_gpu = available_gpus[0]

ion_CSP/identify_molecules.py CHANGED Viewed

@@ -7,6 +7,17 @@ from ase.neighborlist import NeighborList, natural_cutoffs
 def identify_molecules(atoms) -> Tuple[List[Dict[str, int]], bool]:
+    """
+    Identify independent molecules in a given set of atoms.
+    This function uses a depth-first search (DFS) approach to find connected components in the atomic structure,
+    treating each connected component as a separate molecule.
+    params:
+        atoms: ASE Atoms object containing the atomic structure.
+    returns:
+        A tuple containing:
+        - A list of dictionaries, each representing a molecule with element counts.
+        - A boolean flag indicating whether the identified molecules match the initial set of molecules.
+    """
     visited = set()  # 用于记录已经访问过的原子索引
     identified_molecules = []   # 用于存储识别到的独立分子
     # 基于共价半径为每个原子生成径向截止
@@ -63,6 +74,10 @@ def identify_molecules(atoms) -> Tuple[List[Dict[str, int]], bool]:
 def molecules_information(molecules: List[Dict[str, int]], molecules_flag: bool, initial_information: List[Dict[str, int]]):
     """
     Set the output format of the molecule. Output simplified element information in the specified order of C, N, O, H, which may include other elements.
+    params:
+        molecules: A list of dictionaries representing identified molecules with element counts.
+        molecules_flag: A boolean flag indicating whether the identified molecules match the initial set of molecules.
+        initial_information: A list of dictionaries representing the initial set of molecules with element counts.
     """
     # 定义固定顺序的元素
     fixed_order = ['C', 'N', 'O', 'H']

ion-CSP 2.1.4__py3-none-any.whl → 2.1.8__py3-none-any.whl

ion-CSP 2.1.4py3-none-any.whl → 2.1.8py3-none-any.whl