PyPI - ion-CSP - Versions diffs - 2.1.5__py3-none-any.whl → 2.1.9__py3-none-any.whl - Mend

ion-CSP 2.1.5py3-none-any.whl → 2.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

ion_CSP/__init__.py +3 -3
ion_CSP/convert_SMILES.py +39 -11
ion_CSP/empirical_estimate.py +288 -84
ion_CSP/gen_opt.py +68 -22
ion_CSP/identify_molecules.py +15 -0
ion_CSP/log_and_time.py +55 -8
ion_CSP/mlp_opt.py +52 -6
ion_CSP/read_mlp_density.py +15 -1
{run → ion_CSP/run}/main_EE.py +11 -13
ion_CSP/task_manager.py +2 -2
ion_CSP/upload_download.py +0 -1
ion_CSP/vasp_processing.py +57 -28
{ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/METADATA +44 -16
ion_csp-2.1.9.dist-info/RECORD +43 -0
{ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/licenses/LICENSE +1 -1
{ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/top_level.txt +0 -1
ion_csp-2.1.5.dist-info/RECORD +0 -44
run/update_changelog.py +0 -68
{run → ion_CSP/run}/__init__.py +0 -0
{run → ion_CSP/run}/main_CSP.py +0 -0
{run → ion_CSP/run}/run_convert_SMILES.py +0 -0
{run → ion_CSP/run}/run_empirical_estimate.py +0 -0
{run → ion_CSP/run}/run_gen_opt.py +0 -0
{run → ion_CSP/run}/run_read_mlp_density.py +0 -0
{run → ion_CSP/run}/run_upload_download.py +0 -0
{run → ion_CSP/run}/run_vasp_processing.py +0 -0
{ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/WHEEL +0 -0
{ion_csp-2.1.5.dist-info → ion_csp-2.1.9.dist-info}/entry_points.txt +0 -0

ion_CSP/__init__.py CHANGED Viewed

@@ -1,12 +1,12 @@
 __author__ = "Ze Yang"
 __contact__ = "yangze1995007@163.com"
 __license__ = "MIT"
-__version__ = "2.1.5"
-__date__ = "2025-06-16"
+__version__ = "2.1.9"
+__date__ = "2025-06-27"
 try:
-    from importlib.metadata import version  # python >= 3.11
+    from importlib.metadata import version
 except Exception:
     try:
         from importlib_metadata import version

ion_CSP/convert_SMILES.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import shutil
 import logging
 import pandas as pd
+import importlib.resources
 from typing import List
 from rdkit import Chem
 from rdkit.Chem import AllChem
@@ -13,9 +14,13 @@ class SmilesProcessing:
     def __init__(self, work_dir: str, csv_file: str, converted_folder: str = '1_1_SMILES_gjf', optimized_dir: str = '1_2_Gaussian_optimized'):
         """
-        args:
+        This class is used to process SMILES codes from a CSV file, convert them into Gaussian input files, and prepare for optimization tasks. It also supports grouping by charge and filtering based on functional groups.
+        params:
             work_dir: the path of the working directory.
             csv_file: the csv file name in the working directory.
+            converted_folder: the folder name for storing converted SMILES files.
+            optimized_dir: the folder name for storing Gaussian optimized files.
         """
         redirect_dpdisp_logging(os.path.join(work_dir, "dpdispatcher.log"))
         # 读取csv文件并处理数据, csv文件的表头包括 SMILES, Charge, Refcode或Number
@@ -28,7 +33,7 @@ class SmilesProcessing:
             self.base_dir, converted_folder, os.path.splitext(csv_file)[0]
         )
         self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
-        self.param_dir = os.path.join(os.path.dirname(__file__), "../../param")
+        self.param_dir = importlib.resources.files("ion_CSP.param")
         original_df = pd.read_csv(csv_path)
         logging.info(f"Processing {csv_path}")
         # 对SMILES码去重
@@ -53,19 +58,27 @@ class SmilesProcessing:
         self, dir: str, smiles: str, basename: str, charge: int
     ):
         """
-        Private method: Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
+        Private method:
+        Use the rdkit module to read SMILES code and convert it into the required file types such as gjf, xyz, mol, etc.
-        args:
+        params:
             dir: The directory used for outputting files, regardless of existence of the directory.
             smiles: SMILES code to be converted.
             basename: The reference code or number corresponding to SMILES code.
             charge: The charge carried by ions.
         return:
             result_code: Result code 0 or -1, representing success and failure respectively.
             basename: The corresponding basename.
         """
         mol = Chem.MolFromSmiles(smiles)
-        mol = Chem.AddHs(mol)
+        try:
+            mol = Chem.AddHs(mol)
+        except Exception as e:
+            logging.error(
+                f"Error occurred while adding hydrogens to molecule {basename} with charge {charge}: {e}"
+            )
+            return 1, basename  # 返回错误码1表示失败
         try:
             # 生成3D坐标
             AllChem.EmbedMolecule(mol)
@@ -144,6 +157,12 @@ class SmilesProcessing:
     ):
         """
         Screen based on the provided functional groups and charges.
+        params:
+            charge_screen: The charge to screen for, default is 0.
+            group_screen: The functional group to screen for, default is empty string.
+            group_name: The name of the functional group, used for naming the output directory.
+            group_screen_invert: If True, invert the screening condition for the functional group.
         """
         # 另外筛选出符合条件的离子
         screened = self.df
@@ -179,6 +198,12 @@ class SmilesProcessing:
     ):
         """
         Based on the dpdispatcher module, prepare and submit files for optimization on remote server or local machine.
+        params:
+            folders: List of folders containing .gjf files to be processed, if empty, all folders in the converted directory will be processed.
+            machine: The machine configuration file for dpdispatcher, can be a JSON or YAML file.
+            resources: The resources configuration file for dpdispatcher, can be a JSON or YAML file.
+            nodes: The number of nodes to distribute the tasks to, default is 1.
         """
         if os.path.exists(self.gaussian_optimized_dir):
             logging.error(f'The directory {self.gaussian_optimized_dir} has already existed.')
@@ -233,7 +258,7 @@ class SmilesProcessing:
                 task_dir = os.path.join(self.converted_dir, f"{parent}pop{pop}")
                 os.makedirs(task_dir, exist_ok=True)
                 for file in forward_files:
-                    shutil.copyfile(f"{self.param_dir}/{file}", f"{task_dir}/{file}")
+                    shutil.copyfile(self.param_dir.joinpath(file), f"{task_dir}/{file}")
                 for job_i in node_jobs[pop]:
                     # 将分配好的 .gjf 文件添加到对应的上传文件中
                     forward_files.append(gjf_files[job_i])
@@ -274,11 +299,14 @@ class SmilesProcessing:
                 for job_i in node_jobs[pop]:
                     base_name, _ = os.path.splitext(gjf_files[job_i])
                     # 在优化后都取回每个 .gjf 文件对应的 .log、.fchk 输出文件
-                    for ext in ['gjf', 'log', 'fchk']:
-                        shutil.copyfile(
-                            f"{task_dir}/{base_name}.{ext}",
-                            f"{optimized_folder_dir}/{base_name}.{ext}"
-                        )
+                    try:
+                        for ext in ['gjf', 'log', 'fchk']:
+                            shutil.copyfile(
+                                f"{task_dir}/{base_name}.{ext}",
+                                f"{optimized_folder_dir}/{base_name}.{ext}"
+                            )
+                    except FileNotFoundError as e:
+                        logging.error(f"File not found during copying, please check the configuration and state of Gaussian: {e}")
                 # 在成功完成Gaussian优化后，删除 1_1_SMILES_gjf/{csv}/{parent}/pop{n} 文件夹以节省空间
                 shutil.rmtree(task_dir)
         shutil.copyfile(

ion_CSP/empirical_estimate.py CHANGED Viewed

@@ -53,12 +53,26 @@ x.fchk //指定计算文件
 class EmpiricalEstimation:
-    def __init__(self, work_dir: str, folders: List[str], ratios: List[int], sort_by: str):
+    def __init__(
+        self,
+        work_dir: str,
+        folders: List[str],
+        ratios: List[int],
+        sort_by: str,
+        optimized_dir: str = "1_2_Gaussian_optimized",
+    ):
         """
-        Retrieve the directory where the current script is located and use it as the working directory.
+        This class is designed to process Gaussian calculation files, perform electrostatic potential analysis using Multiwfn, and estimate the nitrogen content or density of ion crystal combinations. The class will also generate .csv files containing sorted nitrogen content or density based on the specified sorting criterion.
+        :params
+            work_dir: The working directory where the Gaussian calculation files are located.
+            folders: A list of folder names containing the Gaussian calculation files.
+            ratios: A list of integers representing the ratio of each folder in the combination.
+            sort_by: A string indicating the sorting criterion, either 'density' or 'nitrogen'.
         """
         self.base_dir = work_dir
-        os.chdir(self.base_dir)
+        self.gaussian_optimized_dir = os.path.join(self.base_dir, optimized_dir)
+        os.chdir(self.gaussian_optimized_dir)
         # 确保所取的文件夹数与配比数是对应的
         if len(folders) != len(ratios):
             raise ValueError('The number of folders must match the number of ratios.')
@@ -69,10 +83,86 @@ class EmpiricalEstimation:
             raise ValueError(f"The sort_by parameter must be either 'density' or 'nitrogen', but got '{sort_by}'")
         self.density_csv = "sorted_density.csv"
         self.nitrogen_csv = "sorted_nitrogen.csv"
+        self.carbon_nitrogen_csv = "specific_C_N_ratio.csv"
+        # 检查Multiwfn可执行文件是否存在
+        self.multiwfn_path = self._check_multiwfn_executable()
+    def _check_multiwfn_executable(self):
+        '''
+        Private method:
+        Check if the Multiwfn executable file exists in the system PATH.
+        If not, raise a FileNotFoundError with an appropriate error message.
+        '''
+        multiwfn_path = shutil.which("Multiwfn_noGUI") or shutil.which("Multiwfn")
+        if not multiwfn_path:
+            error_msg = (
+                "Error: No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI), please check:\n "
+                "1. Has Multiwfn been installed correctly?\n"
+                "2. Has Multiwfn been added to the system PATH environment variable"
+            )
+            print(error_msg)
+            logging.error(error_msg)
+            raise FileNotFoundError("No detected Multiwfn executable file (Multiwfn or Multiwfn_GUI)")
+        else:
+            print(f"Multiwfn executable found at: {multiwfn_path}")
+            logging.info(f"Multiwfn executable found at: {multiwfn_path}")
+        return multiwfn_path
+    def _multiwfn_cmd_build(self, input_content, output_file=None):
+        '''
+        Private method:
+        Build the Multiwfn command to be executed based on the input content.
+        This method is used to create the input file for Multiwfn.
+        :params
+            input_content: The content to be written to the input file for Multiwfn.
+        '''
+        # 创建 input.txt 用于存储 Multiwfn 命令内容
+        with open('input.txt', 'w') as input_file:
+            input_file.write(input_content)
+        if output_file:
+            with open('output.txt', 'w') as output_file, open('input.txt', 'r') as input_file:
+                try:
+                    # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
+                    subprocess.run([self.multiwfn_path], stdin=input_file, stdout=output_file, check=True)
+                except subprocess.CalledProcessError as e:
+                    logging.error(
+                        f"Error executing Multiwfn command with input {input_content}: {e}"
+                    )
+                except Exception as e:
+                    logging.error(f"Unexpected error: {e}")
+                    raise
+                finally:
+                    # 清理临时文件
+                    try:
+                        os.remove("input.txt")
+                    except Exception as e:
+                        logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
+        else:
+            with open("input.txt", "r") as input_file:
+                try:
+                    # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到 output.txt 中
+                    subprocess.run([self.multiwfn_path], stdin=input_file, check=True)
+                except subprocess.CalledProcessError as e:
+                    logging.error(
+                        f"Error executing Multiwfn command with input {input_content}: {e}"
+                    )
+                except Exception as e:
+                    logging.error(f"Unexpected error: {e}")
+                    raise
+                finally:
+                    # 清理临时文件
+                    try:
+                        os.remove("input.txt")
+                    except Exception as e:
+                        logging.warning(f"Cannot remove temporary file input.txt: {str(e)}")
     def multiwfn_process_fchk_to_json(self, specific_directory: str = None):
         '''
         If a specific directory is given, this method can be used separately to implement batch processing of FCHK files with Multiwfn and save the desired electrostatic potential analysis results to the corresponding JSON file. Otherwise, the folder list provided during initialization will be processed sequentially.
+        :params
+            specific_directory: The specific directory to process. If None, all folders will be processed.
         '''
         if specific_directory is None:
             for folder in self.folders:
@@ -84,7 +174,11 @@ class EmpiricalEstimation:
     def _multiwfn_process_fchk_to_json(self, folder: str):
         '''
+        Private method:
         Perform electrostatic potential analysis on .fchk files using Multiwfn and save the analysis results to a .json file.
+        :params
+            folder: The folder containing the .fchk files to be processed.
         '''
         # 在每个文件夹中获取 .fchk 文件并根据文件名排序, 再用 Multiwfn 进行静电势分析, 最后将分析结果保存到同名 .json 文件中
         fchk_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.fchk')]
@@ -119,24 +213,31 @@ class EmpiricalEstimation:
     def _single_multiwfn_fchk_to_json(self, fchk_filename: str):
         '''
-        Private method: Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
+        Private method:
+        Use multiwfn to perform electrostatic potential analysis on each FCHK file separately, and save the required results to a corresponding JSON file.
+        :params
+            fchk_filename: The full path of the FCHK file to be processed.
+        :return: True if the processing is successful, False if the FCHK file is invalid.
         '''
         print(f'Multiwfn processing {fchk_filename}')
         logging.info(f'Multiwfn processing {fchk_filename}')
         result_flag = True
-        # 创建 input.txt 用于存储 Multiwfn 命令内容
-        with open('input.txt', 'w') as input_file:
-            input_file.write(f"{fchk_filename}\n12\n0\nq\n")
-        # 通过 input.txt 执行 Multiwfn 命令, 并将输出结果重定向到output.txt中
-        try:
-            subprocess.run('Multiwfn_noGUI < input.txt > output.txt', shell=True, capture_output=True)
-        except FileNotFoundError:
-            subprocess.run('Multiwfn < input.txt > output.txt', shell=True, capture_output=True)
+        self._multiwfn_cmd_build(
+            input_content=f"{fchk_filename}\n12\n0\n-1\n-1\nq\n",
+            output_file='output.txt')
+        print(f'Finished processing {fchk_filename}')
         # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
         folder, filename = os.path.split(fchk_filename)
         refcode, _ = os.path.splitext(filename)
-        with open('output.txt', 'r') as output_file:
-            output_content = output_file.read()
+        try:
+            with open('output.txt', 'r') as output_file:
+                output_content = output_file.read()
+        except Exception as e:
+            logging.error(f"Error reading output.txt: {e}")
+            raise
         # 提取所需数据
         volume_match = re.search(r'Volume:\s*([\d.]+)\s*Bohr\^3\s+\(\s*([\d.]+)\s*Angstrom\^3\)', output_content)
         density_match = re.search(r'Estimated density according to mass and volume \(M/V\):\s*([\d.]+)\s*g/cm\^3', output_content)
@@ -186,15 +287,20 @@ class EmpiricalEstimation:
             with open (f"{folder}/{refcode}.json", 'w') as json_file:
                 json.dump(result, json_file, indent=4)
             shutil.copyfile(src=f"{folder}/{refcode}.json", dst=f"Optimized/{folder}/{refcode}.json")
-        os.remove('input.txt')
-        os.remove('output.txt')
         logging.info(f'Finished processing {fchk_filename}')
+        try:
+            os.remove("output.txt")
+        except Exception as e:
+            logging.warning(f"Cannot remove temporary file output.txt: {str(e)}")
         return result_flag
     def gaussian_log_to_optimized_gjf(self, specific_directory: str = None):
         """
         If a specific directory is given, this method can be used separately to batch process the last frame of Gaussian optimized LOG files into GJF files using Multiwfn.
         Otherwise, the folder list provided during initialization will be processed in order.
+        :params
+            specific_directory: The specific directory to process. If None, all folders will be processed.
         """
         if specific_directory is None:
             for folder in self.folders:
@@ -206,7 +312,11 @@ class EmpiricalEstimation:
     def _gaussian_log_to_optimized_gjf(self, folder: str):
         '''
+        Private method:
         Due to the lack of support of Pyxtal module for LOG files in subsequent crystal generation, it is necessary to convert the last frame of the Gaussian optimized LOG file to a .gjf file with Multiwfn processing.
+        :params
+            folder: The folder containing the Gaussian LOG files to be processed.
         '''
         # 在每个文件夹中获取 .log 文件并根据文件名排序, 再用Multiwfn载入优化最后一帧转换为 gjf 文件
         log_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith('.log')]
@@ -226,19 +336,24 @@ class EmpiricalEstimation:
             pass
         logging.info(f'\nThe .log to .gjf conversion by Multiwfn for {folder} folder has completed, and the optimized .gjf structures have been stored in the optimized directory.\n')
-    def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
+    def _single_multiwfn_log_to_gjf(self, folder: str, log_filename: str):
+        """
+        Private method:
+        Use Multiwfn to convert the last frame of the Gaussian optimized LOG file to a .gjf file.
+        :params
+            folder: The folder containing the Gaussian LOG file to be processed.
+            log_filename: The full path of the LOG file to be processed.
+        """
         # 获取目录以及 .fchk 文件的无后缀文件名, 即 refcode
         _, filename = os.path.split(log_filename)
         refcode, _ = os.path.splitext(filename)
         try:
-            # 创建 input.txt 用于存储 Multiwfn 命令内容
-            with open('input.txt', 'w') as input_file:
-                input_file.write(f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n")
             # Multiwfn首先载入优化任务的out/log文件, 然后输入gi, 再输入要保存的gjf文件名, 此时里面的结构就是优化最后一帧的, 还避免了使用完全图形界面
-            try:
-                subprocess.run('Multiwfn_noGUI < input.txt', shell=True, capture_output=True)
-            except FileNotFoundError:
-                subprocess.run('Multiwfn < input.txt', shell=True, capture_output=True)
+            self._multiwfn_cmd_build(
+                input_content=f"{log_filename}\ngi\nOptimized/{folder}/{refcode}.gjf\nq\n"
+            )
             if os.path.exists(f"Optimized/{folder}/{refcode}.gjf"):
                 print(f'Finished converting {refcode} .log to .gjf')
                 logging.info(f'Finished converting {refcode} .log to .gjf')
@@ -249,6 +364,45 @@ class EmpiricalEstimation:
             print(f'Error with processing {log_filename}: {e}')
             logging.error(f'Error with processing {log_filename}: {e}')
+    def _read_gjf_elements(self, gjf_file):
+        """
+        Private method:
+        Read the elements from a .gjf file and return a dictionary with element counts.
+        :params
+            gjf_file: The full path of the .gjf file to be processed.
+        :return: A dictionary with element symbols as keys and their counts as values.
+        """
+        # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
+        with open(gjf_file, "r") as file:
+            lines = file.readlines()
+        atomic_counts = {}
+        # 找到原子信息的开始行
+        start_reading = False
+        for line in lines:
+            line = line.strip()
+            # 跳过注释和空行
+            if line.startswith("%") or line.startswith("#") or not line:
+                continue
+            # 检测只包含两个数字的行
+            parts = line.split()
+            if (
+                len(parts) == 2
+                and parts[0].lstrip("-").isdigit()
+                and parts[1].isdigit()
+            ):
+                start_reading = True
+                continue
+            if start_reading:
+                element = parts[0]  # 第一个部分是元素符号
+                # 更新元素计数
+                if element in atomic_counts:
+                    atomic_counts[element] += 1
+                else:
+                    atomic_counts[element] = 1
+        return atomic_counts
     def nitrogen_content_estimate(self):
         """
         Evaluate the priority of ion crystal combinations based on nitrogen content and generate .csv files
@@ -290,55 +444,62 @@ class EmpiricalEstimation:
             writer.writerow(header)  # 写入表头
             writer.writerows(data)  # 写入排序后的数
-    def _read_gjf_elements(self, gjf_file):
-        # 根据每一个组合中的组分找到对应的 JSON 文件并读取其中的性质内容
-        with open(gjf_file, 'r') as file:
-            lines = file.readlines()
-        atomic_counts = {}
-        # 找到原子信息的开始行
-        start_reading = False
-        for line in lines:
-            line = line.strip()
-            # 跳过注释和空行
-            if line.startswith("%") or line.startswith("#") or not line:
-                continue
-            # 检测只包含两个数字的行
-            parts = line.split()
-            if len(parts) == 2 and parts[0].lstrip("-").isdigit() and parts[1].isdigit():
-                start_reading = True
-                continue
-            if start_reading:
-                element = parts[0]  # 第一个部分是元素符号
-                # 更新元素计数
-                if element in atomic_counts:
-                    atomic_counts[element] += 1
-                else:
-                    atomic_counts[element] = 1
-        return atomic_counts
+    def carbon_nitrogen_ratio_estimate(self):
+        """
+        Evaluate the priority of ion crystal combinations based on carbon and nitrogen ratio
+        (C:N < 1:8) and sort by oxygen content, then generate .csv files.
+        """
+        atomic_masses = {"H": 1.008, "C": 12.01, "N": 14.01, "O": 16.00}
+        # 获取所有 .gjf 文件
+        combinations = self._generate_combinations(suffix=".gjf")
+        filtered_data = []
-    def _generate_combinations(self, suffix: str):
-        # 获取所有符合后缀名条件的文件
-        all_files = []
-        for folder in self.folders:
-            suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
-            suffix_files.sort()
-            print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
-            logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
-            if not suffix_files:
-                raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
-            all_files.append(suffix_files)
+        for combo in combinations:
+            total_atoms = 0
+            carbon_atoms = 0
+            nitrogen_atoms = 0
+            oxygen_atoms = 0
-        # 对所有文件根据其文件夹与配比进行组合
-        combinations = []
-        for folder_files in itertools.product(*all_files):
-            # 根据给定的配比生成字典形式的组合
-            ratio_combination = {}
-            for folder_index, count in enumerate(self.ratios):
-                ratio_combination.update({folder_files[folder_index]: count})
-            combinations.append(ratio_combination)
-        print(f'Valid combination number: {len(combinations)}')
-        logging.info(f'Valid combination number: {len(combinations)}')
-        return combinations
+            for gjf_file, ion_count in combo.items():
+                atomic_counts = self._read_gjf_elements(gjf_file)
+                for element, atom_count in atomic_counts.items():
+                    if element in atomic_masses:
+                        total_atoms += atom_count * ion_count
+                        if element == "C":
+                            carbon_atoms += atom_count * ion_count
+                        elif element == "N":
+                            nitrogen_atoms += atom_count * ion_count
+                        elif element == "O":
+                            oxygen_atoms += atom_count * ion_count
+                    else:
+                        raise ValueError(
+                            "Contains element information not included, unable to calculate ratios"
+                        )
+            # 计算 C:N 比率
+            if carbon_atoms != 0:  # 确保氮的质量大于 0，避免除以零
+                nitrogen_carbon_ratio = round(nitrogen_atoms / carbon_atoms, 2)
+            else:
+                nitrogen_carbon_ratio = 100.0
+            filtered_data.append((combo, nitrogen_carbon_ratio, oxygen_atoms))
+        # 根据氧含量排序
+        filtered_data.sort(key=lambda x: (-x[1], -x[2]))
+        # 写入排序后的 .csv 文件
+        with open(self.carbon_nitrogen_csv, "w", newline="", encoding="utf-8") as csv_file:
+            writer = csv.writer(csv_file)
+            # 动态生成表头
+            num_components = len(combinations[0]) if combinations else 0
+            header = [f"Component {i + 1}" for i in range(num_components)] + ["N_C_Ratio", "O_Atoms"]
+            writer.writerow(header)  # 写入表头
+            # 写入筛选后的组合和氧含量
+            for combo, nitrogen_carbon_ratio, oxygen_content in filtered_data:
+                cleaned_combo = [name.replace(".gjf", "") for name in combo]
+                writer.writerow(
+                    cleaned_combo + [nitrogen_carbon_ratio, oxygen_content]
+                )  # 写入每一行
     def empirical_estimate(self):
         """
@@ -410,9 +571,51 @@ class EmpiricalEstimation:
             writer.writerow(header)  # 写入表头
             writer.writerows(data)  # 写入排序后的数
+    def _generate_combinations(self, suffix: str):
+        """
+        Private method:
+        Generate all valid combinations of files based on the specified suffix and ratios.
+        :params
+            suffix: The file suffix to filter the files in the folders.
+        :return: A list of dictionaries representing the combinations of files with their respective ratios.
+        """
+        # 获取所有符合后缀名条件的文件
+        all_files = []
+        for folder in self.folders:
+            suffix_files = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(suffix)]
+            suffix_files.sort()
+            print(f'Valid {suffix} file number in {folder}: {len(suffix_files)}')
+            logging.info(f"Valid {suffix} file number in {folder}: {len(suffix_files)}")
+            if not suffix_files:
+                raise FileNotFoundError(f'No available {suffix} files in {folder} folder')
+            all_files.append(suffix_files)
+        # 对所有文件根据其文件夹与配比进行组合
+        combinations = []
+        for folder_files in itertools.product(*all_files):
+            # 根据给定的配比生成字典形式的组合
+            ratio_combination = {}
+            for folder_index, count in enumerate(self.ratios):
+                ratio_combination.update({folder_files[folder_index]: count})
+            combinations.append(ratio_combination)
+        print(f'Valid combination number: {len(combinations)}')
+        logging.info(f'Valid combination number: {len(combinations)}')
+        return combinations
     def _copy_combo_file(self, combo_path, folder_basename, file_type):
+        """
+        Private method:
+        Copy the specified file type from the Optimized directory to the combo_n folder.
+        :params
+            combo_path: The path to the combo_n folder where the file will be copied.
+            folder_basename: The basename of the folder containing the file to be copied.
+            file_type: The type of file to be copied (e.g., '.gjf', '.json').
+        """
         filename = f"{folder_basename}{file_type}"
-        source_path = os.path.join(self.base_dir, 'Optimized', filename)
+        source_path = os.path.join(self.gaussian_optimized_dir, "Optimized", filename)
         # 复制指定后缀名文件到对应的 combo_n 文件夹
         if os.path.exists(source_path):
             if os.path.exists(os.path.join(combo_path, os.path.basename(filename))):
@@ -422,15 +625,18 @@ class EmpiricalEstimation:
                 shutil.copy(source_path, combo_path)
                 logging.info(f'Copied {os.path.basename(source_path)} to {combo_path}')
         else:
-            logging.error(f'File of {filename} does not exist in {self.base_dir}')
+            logging.error(
+                f"File of {filename} does not exist in {self.gaussian_optimized_dir}"
+            )
     def make_combo_dir(self, target_dir: str, num_combos: int, ion_numbers: List[int]):
         """
         Create a combo_n folder based on the .csv file and copy the corresponding .gjf structure file.
-        :param target_directory: The target directory of the combo folder to be created
-        :param num_folders: The number of combo folders to be created
-        :param ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
+        :params
+            target_directory: The target directory of the combo folder to be created
+            num_folders: The number of combo folders to be created
+            ion_numbers: The number of ions for ionic crystal generation step (generated in config.yaml in the corresponding combo_dir automatically)
         """
         if self.sort_by == 'density':
             base_csv = self.density_csv
@@ -464,15 +670,15 @@ class EmpiricalEstimation:
                     gjf_names.append(f"{folder_basename.split('/')[1]}.gjf")
                 # 生成上级目录路径并解析 .yaml 文件
-                parent_dir = os.path.dirname(self.base_dir)
+                parent_dir = self.base_dir
                 parent_config_path = os.path.join(parent_dir, 'config.yaml')
-                base_config_path = os.path.join(self.base_dir, "config.yaml")
+                base_config_path = os.path.join(self.gaussian_optimized_dir, "config.yaml")
                 try:
                     with open(parent_config_path, 'r') as file:
                         config = yaml.safe_load(file)
                 except FileNotFoundError as e:
                     logging.warning(f"No available config.yaml file in parent directory: {parent_dir} \n{e}")
-                    logging.info(f"Trying to load config.yaml file from base directory: {self.base_dir}")
+                    logging.info(f"Trying to load config.yaml file from base directory: {parent_dir}")
                     try:
                         with open(base_config_path, 'r') as file:
                             try:
@@ -480,11 +686,10 @@ class EmpiricalEstimation:
                             except yaml.YAMLError as e:
                                 logging.error(f"YAML configuration file parsing failed: {e}")
                     except FileNotFoundError as e:
-                        logging.error(f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.base_dir} \n{e}")
+                        logging.error(
+                            f"No available config.yaml file either in parent directory: {parent_dir} and base directory {self.gaussian_optimized_dir} \n{e}"
+                        )
                         raise
-                except PermissionError:
-                    logging.error(f'No read permission for the path: {parent_dir}')
-                    raise
                 except Exception as e:
                     logging.error(f'Unexpected error: {e}')
                     raise
@@ -502,4 +707,3 @@ class EmpiricalEstimation:
                         yaml.dump(config, file)
                 except Exception as e:
                     logging.error(f"Unexpected error: {e}")

ion-CSP 2.1.5__py3-none-any.whl → 2.1.9__py3-none-any.whl

ion-CSP 2.1.5py3-none-any.whl → 2.1.9py3-none-any.whl