PyPI - pwact - Versions diffs - 0.1.19__tar.gz → 0.1.21__tar.gz - Mend

pwact 0.1.19tar.gz → 0.1.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

{pwact-0.1.19 → pwact-0.1.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pwact
-Version: 0.1.19
+Version: 0.1.21
 Summary: PWACT is an open-source automated active learning platform based on PWMLFF for efficient data sampling.
 Home-page: https://github.com/LonxunQuantum/PWact
 Author: LonxunQuantum

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/explore/select_image.py RENAMED Viewed

@@ -118,7 +118,51 @@ def select_image(
     print("Image select result:\n {}\n\n".format(summary_info))
     return summary
+def print_select_image(
+    md_dir:str,
+    save_dir:str,
+    devi_name:str,
+    lower:float,
+    higer:float
+):
+    #1. get model_deviation file
+    model_deviation_patten = "{}/{}".format(get_sub_md_sys_template_name(), devi_name)
+    model_devi_files = search_files(md_dir, model_deviation_patten)
+    model_devi_files = sorted(model_devi_files)
+    md_sys_dict = sort_model_devi_files(model_devi_files)
+    error_pd =None
+    accurate_pd =None
+    rand_candi =None
+    remove_candi =None
+    for md in md_sys_dict.keys():
+        sys_dict = md_sys_dict[md]
+        for sys_idx, sys in enumerate(sys_dict.keys()):
+            devi_files = sys_dict[sys]
+            tmp_devi_pd, _base_kpu = read_pd_files(devi_files)
+            if len(_base_kpu) > 0: # for kpu upper and lower
+                _lower = np.mean(_base_kpu)*lower
+                _higer = _lower * higer
+            else:
+                _lower = lower
+                _higer = higer
+            tmp_error_pd, tmp_accurate_pd, tmp_rand_candi, tmp_remove_candi = select_pd(tmp_devi_pd, _lower, _higer, 10000000)
+            error_pd = pd.concat([error_pd, tmp_error_pd]) if error_pd is not None else tmp_error_pd
+            accurate_pd = pd.concat([accurate_pd, tmp_accurate_pd]) if error_pd is not None else tmp_accurate_pd
+            rand_candi = pd.concat([rand_candi, tmp_rand_candi]) if error_pd is not None else tmp_rand_candi
+            remove_candi = pd.concat([remove_candi, tmp_remove_candi]) if error_pd is not None else tmp_remove_candi
+    summary_info, summary = count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi)
+    # summary_info, summary = select_image(save_dir=self.select_dir,
+    #                 devi_pd=devi_pd,
+    #                 lower=self.input_param.strategy.lower_model_deiv_f,
+    #                 higer=self.input_param.strategy.upper_model_deiv_f,
+    #                 max_select=self.input_param.strategy.max_select)
+    print("Image select result (lower {} upper {}):\n {}\n\n".format(lower, higer, summary_info))
+    return summary
 def select_pd(devi_pd:DataFrame, lower:float, higer:float, max_select:float):
     accurate_pd  = devi_pd[devi_pd[EXPLORE_FILE_STRUCTURE.devi_columns[0]] < lower]
     candidate_pd = devi_pd[(devi_pd[EXPLORE_FILE_STRUCTURE.devi_columns[0]] >= lower) & (devi_pd[EXPLORE_FILE_STRUCTURE.devi_columns[0]] < higer)]
@@ -169,41 +213,39 @@ def read_pd_files(model_devi_files:list[str]):
 def count_info(save_dir, error_pd, accurate_pd, rand_candi, remove_candi):
     #5. save select info
-    if not os.path.exists(save_dir):
-        os.makedirs(save_dir)
     total_num = error_pd.shape[0] + accurate_pd.shape[0] + rand_candi.shape[0] + remove_candi.shape[0]
     cand_num = rand_candi.shape[0] + remove_candi.shape[0]
     summary = "Total structures {}    accurate {} rate {:.2f}%    selected {} rate {:.2f}%    error {} rate {:.2f}%\n"\
         .format(total_num, accurate_pd.shape[0], accurate_pd.shape[0]/total_num*100, \
                     cand_num, cand_num/total_num*100, \
                         error_pd.shape[0], error_pd.shape[0]/total_num*100)
-    accurate_pd.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.accurate))
     candi_info = ""
-    rand_candi.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.candidate))
     if remove_candi.shape[0] == 0:
         candi_info += "Candidate configurations: {}\n        Select details in file {}\n".format(
             cand_num, EXPLORE_FILE_STRUCTURE.candidate)
     else:
-        remove_candi.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.candidate_delete))
         candi_info += "Candidate configurations: {}, randomly select {}, delete {}\n        Select details in file {}\n        Delete details in file {}.\n".format(
             cand_num, rand_candi.shape[0], remove_candi.shape[0],\
             EXPLORE_FILE_STRUCTURE.candidate, EXPLORE_FILE_STRUCTURE.candidate_delete)
-    error_pd.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.failed))
     summary_info = ""
     summary_info += summary
     summary_info += "\nSelect by model deviation force:\n"
     summary_info += "Accurate configurations: {}, details in file {}\n".\
         format(accurate_pd.shape[0], EXPLORE_FILE_STRUCTURE.accurate)
     summary_info += candi_info
     summary_info += "Error configurations: {}, details in file {}\n".\
         format(error_pd.shape[0], EXPLORE_FILE_STRUCTURE.failed)
-    write_to_file(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.select_summary), summary_info, "w")
-    return summary_info, summary
+    if save_dir is not None:
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+        accurate_pd.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.accurate))
+        rand_candi.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.candidate))
+        if remove_candi.shape[0] > 0:
+            remove_candi.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.candidate_delete))
+        error_pd.to_csv(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.failed))
+        write_to_file(os.path.join(save_dir, EXPLORE_FILE_STRUCTURE.select_summary), summary_info, "w")
+    return summary_info, summary

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/init_bulk/aimd.py RENAMED Viewed

@@ -161,8 +161,7 @@ class AIMD(object):
             flag_symm=flag_symm,
             save_dir = aimd_dir,
             pseudo_names=pseudo_names,
-            basis_set_file_name=self.input_param.dft_input.basis_set_file,# these for cp2k
-            potential_file_name=self.input_param.dft_input.potential_file
+            gaussian_base_param = self.input_param.dft_input.gaussian_base_param
         )
     def make_aimd_slurm_job_files(self, aimd_dir_list:list[str],use_dftb: bool=False):

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/init_bulk/duplicate_scale.py RENAMED Viewed

@@ -34,7 +34,7 @@ def  duplicate_scale(resource: Resource, input_param:InitBulkParam):
             super_cell_config = os.path.join(super_cell_scale_dir, DFT_STYLE.get_super_cell_config(resource.dft_style))
             if not os.path.exists(super_cell_config):
-                do_super_cell(config=config_file,
+                do_super_cell(config_file=config_file,
                     input_format=config_format,
                     supercell_matrix=init_config.super_cell,
                     pbc=init_config.pbc,

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/init_bulk/relabel.py RENAMED Viewed

@@ -161,8 +161,7 @@ class Relabel(object):
                 flag_symm=flag_symm,
                 save_dir = save_dir,
                 pseudo_names=pseudo_names,
-                basis_set_file_name=self.input_param.dft_input.basis_set_file,# these for cp2k
-                potential_file_name=self.input_param.dft_input.potential_file
+                gaussian_base_param=self.input_param.dft_input.gaussian_base_param,# these for cp2k
             )
             scf_lsit.append(save_dir)
         return scf_lsit

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/init_bulk/relax.py RENAMED Viewed

@@ -117,8 +117,7 @@ class Relax(object):
             dft_style=self.input_param.dft_style,
             save_dir=relax_path,
             pseudo_names=pseudo_names,
-            basis_set_file_name=self.input_param.dft_input.basis_set_file,# these for cp2k
-            potential_file_name=self.input_param.dft_input.potential_file,
+            gaussian_base_param=self.input_param.dft_input.gaussian_base_param,# these for cp2k
             # xc_functional=self.input_param.dft_input.xc_functional,
             # potential=self.input_param.dft_input.potential,
             # basis_set=self.input_param.dft_input.basis_set

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/label/labeling.py RENAMED Viewed

@@ -191,9 +191,8 @@ class Labeling(object):
             flag_symm=self.input_param.scf.scf_input_list[0].flag_symm,
             save_dir=scf_dir,
             pseudo_names=pseudo_names,
-            is_scf = True,
-            basis_set_file_name  =self.input_param.scf.basis_set_file,
-            potential_file_name  =self.input_param.scf.potential_file
+            gaussian_base_param=self.input_param.scf.gaussian_base_param,# these for cp2k
+            is_scf = True
         )
     def make_scf_slurm_job_files(self, scf_sub_list:list[str]):

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/test/test.py RENAMED Viewed

@@ -4,7 +4,7 @@ import os
 import shutil
 import glob
 import json
-from pwdata.main import Config
+from pwdata.config import Config
 def make_kspacing_kpoints(config, format, kspacing):
     config = Config(format=format, data_path=config)
     # with open(config, "r") as fp:
@@ -17,7 +17,7 @@ def make_kspacing_kpoints(config, format, kspacing):
     #             box.append(vector)
     #         box = np.array(box)
     #         rbox = _reciprocal_box(box)
-    box = config.images.lattice
+    box = config.images[0].lattice
     rbox = _reciprocal_box(box)
     kpoints = [
         max(1, round(2 * np.pi * np.linalg.norm(ii) / kspacing)) for ii in rbox

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/user_input/cmd_infos.py RENAMED Viewed

@@ -39,6 +39,8 @@ def cmd_infos(cmd_type=None):
         cmd_info = cmd_info_run_iter()
     elif cmd_type == "kill":
         cmd_info = cmd_info_kill()
+    elif cmd_type == "filter":
+        cmd_info = cmd_info_filter()
     print(cmd_info)
@@ -64,3 +66,10 @@ def cmd_info_kill():
     cmd_info += "'pwact kill init_bulk' for 'init_bulk' tasks\n"
     cmd_info += "'pwact kill run' for 'run' tasks\n\n"
     return cmd_info
+def cmd_info_filter():
+    cmd_info = ""
+    cmd_info += "filter" + "\n"
+    cmd_info += "you could use this method to test the selection results corresponding to the upper and lower limit settings\n"
+    cmd_info += "example:\n"
+    cmd_info += "'pwact filter -i iter.0000/explore/select -l 0.01 -h 0.02 -s filter_test_result'\n\n"

{pwact-0.1.19 → pwact-0.1.21}/pwact/active_learning/user_input/scf_param.py RENAMED Viewed

@@ -62,8 +62,25 @@ class SCFParam(object):
         # else:
         #     pass
         # for cp2k
-        self.basis_set_file = get_parameter("basis_set_file", json_dict, None)
-        self.potential_file = get_parameter("potential_file", json_dict, None)
+        gaussian_param = get_parameter("gaussian_param", json_dict, None)
+        if gaussian_param is not None:
+            self.basis_set_file = os.path.abspath(get_parameter("basis_set_file", gaussian_param, None))
+            self.potential_file = os.path.abspath(get_parameter("potential_file", gaussian_param, None))
+            basis_set_list = get_parameter("basis_set_list", gaussian_param, None)
+            potential_list = get_parameter("potential_list", gaussian_param, None)
+            atom_list = get_parameter("atom_list", gaussian_param, None)
+            self.gaussian_base_param = {}
+            self.gaussian_base_param["ELEMENT"] = atom_list
+            self.gaussian_base_param["BASIS_SET"] = basis_set_list
+            self.gaussian_base_param["POTENTIAL"] = potential_list
+            self.gaussian_base_param["BASIS_SET_FILE_NAME"] = os.path.basename(self.basis_set_file)
+            self.gaussian_base_param["POTENTIAL_FILE_NAME"] = os.path.basename(self.potential_file)
+        else:
+            self.basis_set_file = None# os.path.abspath(get_parameter("basis_set_file", json_dict, None))
+            self.potential_file = None#os.path.abspath(get_parameter("potential_file", json_dict, None))
+            self.gaussian_base_param = None
+        # for cp2k and pwmat gaussion
     def _set_pseudo(self, pseudo, style:str):
         res_pseudo = []
@@ -158,7 +175,7 @@ class DFTInput(object):
         self.flag_symm = flag_symm
         self.use_dftb = False
         self.use_skf = False
+        self.use_gaussion = False
         # check etot input file
         if self.dft_style == DFT_STYLE.pwmat:
             key_values, etot_lines = read_and_check_etot_input(self.input_file)
@@ -177,6 +194,9 @@ class DFTInput(object):
                 if key_values["DFTB_DETAIL"].replace(",", " ").split()[0] != "3": # not chardb
                     self.use_skf = True
+            if "USE_GAUSSIAN" in key_values.keys() and key_values["USE_GAUSSIAN"]is not None and key_values["USE_GAUSSIAN"] == "T":
+                self.use_gaussion
     def get_input_content(self):
         if self.dft_style == DFT_STYLE.pwmat:
             return read_and_check_etot_input(self.input_file)

{pwact-0.1.19 → pwact-0.1.21}/pwact/data_format/configop.py RENAMED Viewed

@@ -2,7 +2,7 @@ import os
 from pwact.utils.constant import ELEMENTTABLE, DFT_STYLE, ELEMENTTABLE_2, CP2K, PWDATA
 from pwact.utils.app_lib.cp2k import make_cp2k_xyz
 from pwact.utils.file_operation import write_to_file
-from pwdata.main import Config
+from pwdata.config import Config
 from pwdata import perturb_structure, make_supercell, scale_cell
 '''
 description:
@@ -14,7 +14,7 @@ author: wuxingxing
 '''
 def get_atom_type(config_path, format:str=None):
     if isinstance(config_path, str):
-        image = Config.read(format=format, data_path=config_path, atom_names=None)
+        image = Config(format=format, data_path=config_path, atom_names=None).images[0]
     else:
         image = config_path
     atomic_number_list = []
@@ -25,7 +25,7 @@ def get_atom_type(config_path, format:str=None):
     return atomic_name_list, atomic_number_list
 def load_config(config, format, atom_names=None):
-    config = Config.read(format=format, data_path=config, atom_names=atom_names)
+    config = Config(format=format, data_path=config, atom_names=atom_names)
     return config
 '''
@@ -38,9 +38,7 @@ author: wuxingxing
 def save_config(config, input_format:str = None, wrap = False, direct = True, sort = True, \
         save_format:str=None, save_path:str=None, save_name:str=None, atom_names: list[str] = None):
     if isinstance(config, str):
-        config = Config.read(format=input_format, data_path=config, atom_names=atom_names)
-    if isinstance(config, list): # for lammps dump traj, config will be list
-        config = config[0]
+        config = Config(format=input_format, data_path=config, atom_names=atom_names).images[0]
     if save_format == PWDATA.cp2k_scf:
         # make coord.xyz used by cp2k for every task
         config = config._set_cartesian() if config.cartesian is False else config._set_cartesian()
@@ -86,9 +84,9 @@ def read_cp2k_xyz(config_file:str):
         coord.appnd([float(elements[1]), float(elements[2]), float(elements[3])])
     return atom_type_name, atom_names, coord
-def do_super_cell(config, input_format:str=None, supercell_matrix:list[int]=None, pbc:list[int]=[1, 1, 1], direct = True, sort = True, \
+def do_super_cell(config_file, input_format:str=None, supercell_matrix:list[int]=None, pbc:list[int]=[1, 1, 1], direct = True, sort = True, \
                     save_format:str=None, save_path:str=None, save_name:str=None):
-    config = Config.read(format=input_format, data_path=config, atom_names=None)
+    config = Config(format=input_format, data_path=config_file, atom_names=None)
     # Make a supercell
     supercell = make_supercell(config, supercell_matrix, pbc)
     # Write out the structure
@@ -101,7 +99,7 @@ def do_super_cell(config, input_format:str=None, supercell_matrix:list[int]=None
 def do_scale(config, input_format:str=None, scale_factor:float=None,
             direct:bool=True, sort:bool=True, save_format:str=None, save_path:str=None, save_name:str=None):
-    config = Config.read(format=input_format, data_path=config)
+    config = Config(format=input_format, data_path=config)
     scaled_struct = scale_cell(config, scale_factor)
     scaled_struct.to(output_path = save_path,
                     data_name = save_name,
@@ -113,7 +111,7 @@ def do_scale(config, input_format:str=None, scale_factor:float=None,
 def do_pertub(config, input_format:str=None, pert_num:int=None, cell_pert_fraction:float=None, atom_pert_distance:float=None, \
         direct:bool=True, sort:bool=True, save_format:str=None, save_path:str=None, save_name:str=None):
-    config = Config.read(format=input_format, data_path=config)
+    config = Config(format=input_format, data_path=config)
     if not os.path.exists(save_path):
         os.makedirs(save_path)
@@ -166,10 +164,12 @@ def extract_pwdata(data_list:list[str],
                 tmp_config = Config(data_format, data_path)
                 # if not isinstance(tmp_config, list):
                 #     tmp_config = [tmp_config]
-                image_data.append(tmp_config)
+                image_data.images.extend(tmp_config.images)
             else:
                 image_data = Config(data_format, data_path)
+                if not isinstance(image_data.images, list):
+                    image_data.images = [image_data.images]
                 # if not isinstance(image_data, list):
                 #     image_data = [image_data]

{pwact-0.1.19 → pwact-0.1.21}/pwact/main.py RENAMED Viewed

@@ -23,7 +23,7 @@ from pwact.active_learning.init_bulk.init_bulk_run import init_bulk_run, scancel
 from pwact.active_learning.environment import check_envs
 from pwact.data_format.configop import extract_pwdata
-from pwact.active_learning.explore.select_image import select_image
+from pwact.active_learning.explore.select_image import select_image, print_select_image
 from pwact.utils.process_tool import kill_process
 def run_iter():
     system_json = json.load(open(sys.argv[2]))
@@ -299,6 +299,26 @@ def kill_job():
     # for run iters jobs
+def filter_test(input_cmds):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--md_dir', help="specify input dir such as 'iter.0000/temp_run_iter_work/explore/md'", type=str, required=True)
+    parser.add_argument('-l', '--lower', help="specify lower limit value", type=float, required=True)
+    parser.add_argument('-u', '--upper', help="specify upper limit value", type=float, required=True)
+    parser.add_argument('-s', '--save', action='store_true', help="if '-s' is set, save the detailed information of the selected configs to CSV files")
+    args = parser.parse_args(input_cmds)
+    if not os.path.exists(args.md_dir):
+        raise Exception("ERROR! The input md_dir {} not found!".format(args.md_dir))
+    save_dir = os.path.join(os.getcwd(), "filter_test_result") if args.save else None
+    summary = print_select_image(
+                md_dir=args.md_dir,
+                save_dir=save_dir,
+                devi_name=EXPLORE_FILE_STRUCTURE.get_devi_name(UNCERTAINTY.committee),
+                lower=args.lower,
+                higer=args.upper
+        )
 def main():
     environment_check()
     if len(sys.argv) == 1 or "-h".upper() == sys.argv[1].upper() or \
@@ -343,7 +363,14 @@ def main():
             cmd_infos("kill")
         else:
             kill_job()
+    elif "filter_test".upper() == sys.argv[1].upper() or "filter".upper() == sys.argv[1].upper():
+        if len(sys.argv) == 2 or "-h".upper() == sys.argv[2].upper() or \
+            "help".upper() == sys.argv[2].upper() or "-help".upper() == sys.argv[2].upper() or "--help".upper() == sys.argv[2].upper():
+            cmd_infos("filter")
+        else:
+            filter_test(sys.argv[2:])
     else:
         print("ERROR! The input cmd {} can not be recognized, please check.".format(sys.argv[1]))
         print("\n\n\nYou can enter the following command.\n\n\n")

{pwact-0.1.19 → pwact-0.1.21}/pwact/utils/app_lib/common.py RENAMED Viewed

@@ -46,8 +46,11 @@ def link_pseudo_by_atom(
                     link_file(pseudo_path, os.path.join(target_dir, pseudo_name))
                     pseudo_find.append(pseudo_path)
                     break
-        assert len(pseudo_find) == len(atom_order), "the pwmat pseudo files {} not same as atom type '{}'".format(pseudo_find, atom_order)
+        # assert len(pseudo_find) == len(atom_order), "the pwmat pseudo files {} not same as atom type '{}'".format(pseudo_find, atom_order)
+        if basis_set_file is not None and potential_file is not None: # these 2 files for pwmat gaussian base
+            link_file(basis_set_file, os.path.join(target_dir, os.path.basename(basis_set_file)))
+            link_file(potential_file, os.path.join(target_dir, os.path.basename(potential_file)))
     elif dft_style == DFT_STYLE.vasp:
         # merge file to where? to save dir
         for atom_name in atom_order:
@@ -98,8 +101,11 @@ def set_input_script(
     save_dir:str=None,
     pseudo_names:list[str]=None,
     is_scf = False, # if is_scf, the pwmat etot.input will set the 'out.mlmd = T'
-    basis_set_file_name=None,
-    potential_file_name=None,
+    gaussian_base_param = None
+    # basis_set_file_name=None,
+    # potential_file_name=None,
+    # basis_set_list=None,
+    # potential_list=None
     # xc_functional=None,
     # potential=None,
     # basis_set=None
@@ -114,7 +120,8 @@ def set_input_script(
             flag_symm=flag_symm,
             pseudo_names=pseudo_names,
             is_scf = is_scf,
-            is_skf_file = is_skf_file
+            is_skf_file = is_skf_file,
+            gaussian_base_param=gaussian_base_param
             )
         write_to_file(target_file, script, "w")
     elif dft_style == DFT_STYLE.vasp:
@@ -127,10 +134,13 @@ def set_input_script(
         cell = file_read_last_line(os.path.join(save_dir, CP2K.cell_txt), type_name="float")
         del_file(os.path.join(save_dir, CP2K.cell_txt))
         # inp file, cell cood add to inp file
+        # set kind_dict
         script = make_cp2k_input_from_external(
             cell=cell,
-            coord_file_name = os.path.join(os.path.basename(config)),
-            exinput_path=input_file
+            coord_file = config,
+            exinput_path=input_file,
+            gaussian_base_param = gaussian_base_param
             )
         write_to_file(target_file, script, "w")

{pwact-0.1.19 → pwact-0.1.21}/pwact/utils/app_lib/cp2k.py RENAMED Viewed

@@ -6,7 +6,7 @@ description:
 return {*}
 author: wuxingxing
 '''
+import os
 import numpy as np
 default_config = {
     "GLOBAL": {"PROJECT": "AL_PWMLFF"},
@@ -186,8 +186,8 @@ param {*} exinput_path
 return {*}
 author: wuxingxing
 '''
-def make_cp2k_input_from_external(cell, coord_file_name, exinput_path):
+def make_cp2k_input_from_external(cell, coord_file, exinput_path, gaussian_base_param:dict):
+    coord_file_name = os.path.basename(coord_file)
     # insert the cell information
     # covert cell to cell string
     cell = np.reshape(cell, [3, 3])
@@ -201,10 +201,37 @@ def make_cp2k_input_from_external(cell, coord_file_name, exinput_path):
     end_subsys = 0
     start_coord = 0
     end_coord = 0
+    start_kind = -1
+    end_kind = -1
     start_global = 0
     end_global = 0
     print_level_line = -1
+    start_dft = 0
+    end_dft = 0
+    basis_set_file_name = -1
+    potential_file_name = -1
+    # delete the BASIS_SET_FILE_NAME and POTENTIAL_FILE_NAME line
+    for line_idx, line in enumerate(exinput):
+        line = line.upper()
+        if "&DFT" in line:
+            start_dft = line_idx
+        if "&END DFT" in line:
+            end_dft = line_idx
+        if "BASIS_SET_FILE_NAME" in line:
+            basis_set_file_name = line_idx
+        if "POTENTIAL_FILE_NAME" in line:
+            potential_file_name = line_idx
+    if start_dft == end_dft:
+        raise Exception("{} extarcted error! Can not find DFT set!".format(exinput_path))
+    basis_set_file_name, potential_file_name = sorted([basis_set_file_name, potential_file_name], reverse=True)
+    if basis_set_file_name != -1:
+        exinput.pop(basis_set_file_name)
+    if potential_file_name != -1:
+        exinput.pop(potential_file_name)
+    exinput.insert(start_dft+1, "    BASIS_SET_FILE_NAME {}\n".format(gaussian_base_param["BASIS_SET_FILE_NAME"]))
+    exinput.insert(start_dft+2, "    POTENTIAL_FILE_NAME {}\n".format(gaussian_base_param["POTENTIAL_FILE_NAME"]))
     for line_idx, line in enumerate(exinput):
         line = line.upper()
         if "&GLOBAL" in line:
@@ -225,8 +252,14 @@ def make_cp2k_input_from_external(cell, coord_file_name, exinput_path):
             start_coord = line_idx
         if "&END COORD" in line:
             end_coord = line_idx
+        if "&KIND" in line and start_kind == -1:
+            start_kind = line_idx
+        if "&END KIND" in line:
+            end_kind = line_idx
     if start_global == end_global:
         raise Exception("ERROR! the input cp2k inp file does not have 'GLOBAL' block! Please check the file {}\n".format(exinput_path))
     temp_exinput = exinput[:start_subsys+1]
     # add coord
     temp_exinput.append("    &COORD\n")
@@ -241,11 +274,16 @@ def make_cp2k_input_from_external(cell, coord_file_name, exinput_path):
     # temp_exinput.append("        PERIODIC XYZ\n")
     temp_exinput.append("    &END CELL\n")
+    kind_input = get_kind(coord_file=coord_file, gassion_base_param=gaussian_base_param)
+    temp_exinput.append(kind_input)
     del_content_index = []
     if start_cell != end_cell:
         del_content_index.extend(list(range(start_cell, end_cell+1)))
     if start_coord != end_coord:
         del_content_index.extend(list(range(start_coord, end_coord+1)))
+    if start_kind != end_kind:
+        del_content_index.extend(list(range(start_kind, end_kind+1)))
     del_content_index = sorted(del_content_index)
     for index in range(start_subsys+1, end_subsys):
         if index not in del_content_index:
@@ -259,34 +297,63 @@ def make_cp2k_input_from_external(cell, coord_file_name, exinput_path):
         temp_exinput[print_level_line] = "    PRINT_LEVEL medium\n"
     return "".join(temp_exinput)
+def get_kind(coord_file:str, gassion_base_param:dict):
+    atom_list = get_atom_type_from_config(coord_file)
+    kind_line = "\n"
+    for idx, atom in enumerate(gassion_base_param["ELEMENT"]):
+        if atom not in atom_list:
+            continue
+        kind_line += "    &KIND {}\n".format(atom)
+        kind_line += "        ELEMENT {}\n".format(atom)
+        kind_line += "        BASIS_SET {}\n".format(gassion_base_param["BASIS_SET"][idx])
+        kind_line += "        POTENTIAL {}\n".format(gassion_base_param["POTENTIAL"][idx])
+        kind_line += "    &END KIND\n"
+    return kind_line
+def get_atom_type_from_config(coord_file:str):
+    res = []
+    with open(coord_file, 'r') as rf:
+        lines = rf.readlines()
+    for line in lines:
+        try:
+            atom_type, x, y, z = line.strip().split()
+            x = float(x)
+            y = float(y)
+            z = float(z)
+            if atom_type not in res:
+                res.append(atom_type)
+        except:
+            continue
+    return res
 # if __name__=="__main__":
-    # import dpdata
-    # poscar = "/data/home/wuxingxing/datas/al_dir/si_4_vasp/init_bulk/collection/init_config_0/0.9_scale.poscar"
-    # sys_data = dpdata.System(poscar).data
+#     import dpdata
+#     poscar = "/data/home/wuxingxing/datas/al_dir/si_4_vasp/init_bulk/collection/init_config_0/0.9_scale.poscar"
+#     sys_data = dpdata.System(poscar).data
-    # from pwdata.main import Configs
-    # from pwdata.calculators.const import ELEMENTTABLE_2
-    # image = Configs.read(format="pwmat", data_path="/data/home/wuxingxing/datas/al_dir/si_exp/init_bulk/atom.config")
-    # image = image._set_cartesian() if image.cartesian is False else image._set_cartesian()
-    # potential = {"Si":"GTH-PBE"}
-    # basis_set = {"Si":"DZVP-MOLOPT-SR-GTH-q4"}
-    # atom_types_image = []
-    # for atom in image.atom_types_image:
-    #     atom_types_image.append(ELEMENTTABLE_2[atom])
-    # coord_xyz = make_cp2k_xyz(
-    #     atom_types = atom_types_image,
-    #     coord_list = image.position
-    # )
-    # with open("/data/home/wuxingxing/datas/al_dir/si_exp/init_bulk/coord.xyz", "w") as fp:
-    #     fp.write(coord_xyz)
+#     from pwdata.config import Configs
+#     from pwdata.calculators.const import ELEMENTTABLE_2
+#     image = Configs.read(format="pwmat", data_path="/data/home/wuxingxing/datas/al_dir/si_exp/init_bulk/atom.config")
+#     image = image._set_cartesian() if image.cartesian is False else image._set_cartesian()
+#     potential = {"Si":"GTH-PBE"}
+#     basis_set = {"Si":"DZVP-MOLOPT-SR-GTH-q4"}
+#     atom_types_image = []
+#     for atom in image.atom_types_image:
+#         atom_types_image.append(ELEMENTTABLE_2[atom])
+#     coord_xyz = make_cp2k_xyz(
+#         atom_types = atom_types_image,
+#         coord_list = image.position
+#     )
+#     with open("/data/home/wuxingxing/datas/al_dir/si_exp/init_bulk/coord.xyz", "w") as fp:
+#         fp.write(coord_xyz)
-    # make_cp2k_input(
-    #     cell = image.lattice,
-    #     atom_names=["Si"],
-    #     basis_set_file_name="BASIS_SET_FILE",
-    #     potential_file_name="POTENTIAL_FILE",
-    #     xc_functional="PBE",
-    #     potential=potential,
-    #     basis_set=basis_set,
-    #     coord_content=coord_xyz
-    # )
+#     make_cp2k_input(
+#         cell = image.lattice,
+#         atom_names=["Si"],
+#         basis_set_file_name="BASIS_SET_FILE",
+#         potential_file_name="POTENTIAL_FILE",
+#         xc_functional="PBE",
+#         potential=potential,
+#         basis_set=basis_set,
+#         coord_content=coord_xyz
+#     )

pwact-0.1.21/pwact/utils/app_lib/cp2k_dp.py ADDED Viewed

@@ -0,0 +1,194 @@
+import numpy as np
+default_config = {
+    "GLOBAL": {"PROJECT": "DPGEN"},
+    "FORCE_EVAL": {
+        "METHOD": "QS",
+        "STRESS_TENSOR": "ANALYTICAL",
+        "DFT": {
+            "BASIS_SET_FILE_NAME": "./cp2k_basis_pp_file/BASIS_MOLOPT",
+            "POTENTIAL_FILE_NAME": "./cp2k_basis_pp_file/GTH_POTENTIALS",
+            "CHARGE": 0,
+            "UKS": "F",
+            "MULTIPLICITY": 1,
+            "MGRID": {"CUTOFF": 400, "REL_CUTOFF": 50, "NGRIDS": 4},
+            "QS": {"EPS_DEFAULT": "1.0E-12"},
+            "SCF": {"SCF_GUESS": "ATOMIC", "EPS_SCF": "1.0E-6", "MAX_SCF": 50},
+            "XC": {"XC_FUNCTIONAL": {"_": "PBE"}},
+        },
+        "SUBSYS": {
+            "CELL": {"A": "10 .0 .0", "B": ".0 10 .0", "C": ".0 .0 10"},
+            "COORD": {"@include": "coord.xyz"},
+            "KIND": {
+                "_": ["H", "C", "N"],
+                "POTENTIAL": ["GTH-PBE-q1", "GTH-PBE-q4", "GTH-PBE-q5"],
+                "BASIS_SET": ["DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH", "DZVP-MOLOPT-GTH"],
+            },
+        },
+        "PRINT": {"FORCES": {"_": "ON"}, "STRESS_TENSOR": {"_": "ON"}},
+    },
+}
+def update_dict(old_d, update_d):
+    """A method to recursive update dict
+    :old_d: old dictionary
+    :update_d: some update value written in dictionary form.
+    """
+    import collections.abc
+    for k, v in update_d.items():
+        if (
+            k in old_d
+            and isinstance(old_d[k], dict)
+            and isinstance(update_d[k], collections.abc.Mapping)
+        ):
+            update_dict(old_d[k], update_d[k])
+        else:
+            old_d[k] = update_d[k]
+def iterdict(d, out_list, flag=None, indent=0):
+    """
+    :doc: a recursive expansion of dictionary into cp2k input
+    :k: current key
+    :v: current value
+    :d: current dictionary under expansion
+    :flag: used to record dictionary state. if flag is None,
+    it means we are in top level dict. flag is a string.
+    :indent: intent for current section.
+    """
+    for k, v in d.items():
+        k = str(k)  # cast key into string
+        # if value is dictionary
+        if isinstance(v, dict):
+            # flag == None, it is now in top level section of cp2k
+            if flag is None:
+                out_list.append("&" + k)
+                out_list.append("&END " + k)
+                iterdict(v, out_list, k, indent + 2)
+            # flag is not None, now it has name of section
+            else:
+                index = out_list.index(" " * (indent - 2) + "&END " + flag)
+                out_list.insert(index, " " * indent + "&" + k + " #" + flag)
+                out_list.insert(index + 1, " " * indent + "&END " + k + " #" + flag)
+                # the flag now contains its parent section name, separed by "#".
+                iterdict(v, out_list, k + " #" + flag, indent + 2)
+        elif isinstance(v, list):
+            #            print("we have encountered the repeat section!")
+            index = out_list.index(" " * (indent - 2) + "&" + flag)
+            # delete the current constructed repeat section
+            del out_list[index : index + 2]
+            # do a loop over key and corresponding list
+            k_tmp_list = []
+            v_list_tmp_list = []
+            for k_tmp, v_tmp in d.items():
+                k_tmp_list.append(str(k_tmp))
+                v_list_tmp_list.append(v_tmp)
+            for repeat_keyword in zip(*v_list_tmp_list):
+                out_list.insert(index, " " * (indent - 2) + "&" + flag)
+                out_list.insert(index + 1, " " * (indent - 2) + "&END " + flag)
+                for idx, k_tmp in enumerate(k_tmp_list):
+                    if k_tmp == "_":
+                        out_list[index] = (
+                            " " * (indent - 2)
+                            + "&"
+                            + flag.split(" #")[0]
+                            + " "
+                            + repeat_keyword[idx]
+                        )
+                    else:
+                        out_list.insert(
+                            index + 1,
+                            " " * (indent) + k_tmp + " " + repeat_keyword[idx],
+                        )
+            break
+        else:
+            v = str(v)
+            if flag is None:
+                out_list.append(k + " " + v)
+                print(k, ":", v)
+            else:
+                if k == "_":
+                    index = out_list.index(" " * (indent - 2) + "&" + flag)
+                    out_list[index] = (
+                        " " * (indent - 2) + "&" + flag.split(" #")[0] + " " + v
+                    )
+                else:
+                    index = out_list.index(" " * (indent - 2) + "&END " + flag)
+                    out_list.insert(index, " " * indent + k + " " + v)
+def make_cp2k_input(sys_data, fp_params):
+    # covert cell to cell string
+    cell = sys_data["cells"][0]
+    cell = np.reshape(cell, [3, 3])
+    cell_a = np.array2string(cell[0, :])
+    cell_a = cell_a[1:-1]
+    cell_b = np.array2string(cell[1, :])
+    cell_b = cell_b[1:-1]
+    cell_c = np.array2string(cell[2, :])
+    cell_c = cell_c[1:-1]
+    # get update from user
+    user_config = fp_params
+    # get update from cell
+    cell_config = {
+        "FORCE_EVAL": {"SUBSYS": {"CELL": {"A": cell_a, "B": cell_b, "C": cell_c}}}
+    }
+    update_dict(default_config, user_config)
+    update_dict(default_config, cell_config)
+    # output list
+    input_str = []
+    iterdict(default_config, input_str)
+    string = "\n".join(input_str)
+    return string
+def make_cp2k_xyz(sys_data):
+    # get structral information
+    atom_names = sys_data["atom_names"]
+    atom_types = sys_data["atom_types"]
+    # write coordinate to xyz file used by cp2k input
+    coord_list = sys_data["coords"][0]
+    u = np.array(atom_names)
+    atom_list = u[atom_types]
+    x = "\n"
+    for kind, coord in zip(atom_list, coord_list):
+        x += str(kind) + " " + str(coord[:])[1:-1] + "\n"
+    return x
+def make_cp2k_input_from_external(sys_data, exinput_path):
+    # read the input content as string
+    with open(exinput_path) as f:
+        exinput = f.readlines()
+    # find the ABC cell string
+    for line_idx, line in enumerate(exinput):
+        if "ABC" in line:
+            delete_cell_idx = line_idx
+            delete_cell_line = line
+    # remove the useless CELL line
+    exinput.remove(delete_cell_line)
+    # insert the cell information
+    # covert cell to cell string
+    cell = sys_data["cells"][0]
+    cell = np.reshape(cell, [3, 3])
+    cell_a = np.array2string(cell[0, :])
+    cell_a = cell_a[1:-1]
+    cell_b = np.array2string(cell[1, :])
+    cell_b = cell_b[1:-1]
+    cell_c = np.array2string(cell[2, :])
+    cell_c = cell_c[1:-1]
+    exinput.insert(delete_cell_idx, "A  " + cell_a + "\n")
+    exinput.insert(delete_cell_idx + 1, "B  " + cell_b + "\n")
+    exinput.insert(delete_cell_idx + 2, "C  " + cell_c + "\n")
+    return "".join(exinput)

{pwact-0.1.19 → pwact-0.1.21}/pwact/utils/app_lib/pwmat.py RENAMED Viewed

@@ -1,8 +1,9 @@
 import os
 import numpy as np
 import subprocess
-from pwact.utils.constant import PWMAT, VASP
+from pwact.utils.constant import PWMAT, VASP, PWDATA, get_atomic_name_from_number
 from pwact.utils.file_operation import del_file, copy_file
+from pwdata import Config
 # '''
 # description:
 #     lammps dump file to poscar format or pwmat format
@@ -100,16 +101,17 @@ def _reciprocal_box(box):
 #     return ret
 def _make_kspacing_kpoints(config, kspacing):
-    with open(config, "r") as fp:
-        lines = fp.read().split("\n")
-    box = []
-    for idx, ii in enumerate(lines):
-        if "LATTICE" in ii.upper():
-            for kk in range(idx + 1, idx + 1 + 3):
-                vector = [float(jj) for jj in lines[kk].split()[0:3]]
-                box.append(vector)
-            box = np.array(box)
-            rbox = _reciprocal_box(box)
+    config = Config(data_path=config, format="pwmat/config")
+    lattice = config.images[0].lattice
+    # box = []
+    # for idx, ii in enumerate(lines):
+    #     if "LATTICE" in ii.upper():
+    #         for kk in range(idx + 1, idx + 1 + 3):
+    #             vector = [float(jj) for jj in lines[kk].split()[0:3]]
+    #             box.append(vector)
+    #         box = np.array(box)
+    #         rbox = _reciprocal_box(box)
+    rbox = _reciprocal_box(lattice)
     kpoints = [
         round(2 * np.pi * np.linalg.norm(ii) / kspacing) for ii in rbox
     ]
@@ -321,13 +323,19 @@ def set_etot_input_by_file(
     atom_config:str=None,
     pseudo_names:list[str]=None,
     is_scf = False, # if True, job is scf, and 'OUT.MLMD = T' to etot.input
-    is_skf_file = False  # if True, set in.skf to etot.input file
+    is_skf_file = False,  # if True, set in.skf to etot.input file
+    gaussian_base_param:dict=None
     ):
     key_values, etot_lines = read_and_check_etot_input(etot_input_file)
+    is_gaussian = False
+    if "USE_GAUSSIAN" in key_values.keys() and key_values["USE_GAUSSIAN"] is not None and key_values["USE_GAUSSIAN"] == "T":
+        is_gaussian = True
     is_skf = False
     if "USE_DFTB" in key_values.keys() and key_values["USE_DFTB"] is not None and key_values["USE_DFTB"] == "T":
         if key_values["DFTB_DETAIL"].replace(",", " ").split()[0] != "3": # not chardb
             is_skf = True
     index = 0
     new_etot_lines = []
     while index < len(etot_lines):
@@ -340,18 +348,26 @@ def set_etot_input_by_file(
             pass
         elif "IN.PSP" in etot_lines[index].upper(): # to avoid the new_etot_lines add 'IN.PSP' and 'in.skf' 'in.atom' in etot_lines
             pass
+        elif "IN.BASIS" in etot_lines[index].upper(): # to avoid the new_etot_lines add 'IN.BASIS' in etot_lines
+            pass
             # etot_lines.remove(etot_lines[index])
         else:
             new_etot_lines.append(etot_lines[index])
         index += 1
     new_etot_lines.append("\nIN.ATOM = {}\n".format(os.path.basename(atom_config)))
+    atom_type_numbers = Config(format=PWDATA.pwmat_config, data_path=atom_config).images[0].atom_type
+    atom_type_names = get_atomic_name_from_number(atom_type_numbers)
     # if dftb and need in_skf
     if is_skf and is_skf_file:
         new_etot_lines.append("IN.SKF = ./{}/\n".format(PWMAT.in_skf))
     # is not for dftb, reset the IN.PSP
     if "USE_DFTB" not in key_values.keys() or key_values["USE_DFTB"] is None and key_values["USE_DFTB"] == "F":
-        for pseudo_i, pseudo in enumerate(pseudo_names):
-            new_etot_lines.append("IN.PSP{} = {}\n".format(pseudo_i + 1, pseudo))
+        if is_gaussian is False:
+            for pseudo_i, pseudo in enumerate(pseudo_names):
+                new_etot_lines.append("IN.PSP{} = {}\n".format(pseudo_i + 1, pseudo))
+        else: # pwmat gaussian
+            psp_line = set_gassion_psp(atom_type_names, gaussian_base_param)
+            new_etot_lines.append(psp_line)
     key_list = list(key_values)
     # set OUT.MLMD
     if "OUT.MLMD" not in key_list:
@@ -359,11 +375,11 @@ def set_etot_input_by_file(
             new_etot_lines.append("OUT.MLMD = T\n")
     # # set OUT.WG OUT.RHO OUT.VR
     if "OUT.WG" not in key_list:
-        etot_lines.append("OUT.WG = F\n")
+        new_etot_lines.append("OUT.WG = F\n")
     if "OUT.RHO" not in key_list:
-        etot_lines.append("OUT.RHO = F\n")
+        new_etot_lines.append("OUT.RHO = F\n")
     if "OUT.VR" not in key_list:
-        etot_lines.append("OUT.VR = F\n")
+        new_etot_lines.append("OUT.VR = F\n")
     # if MP_N123 is not in etot.input file then using 'kespacing' generates it
     if "MP_N123" not in key_list:
         kspacing = PWMAT.kspacing_default if kspacing is None else kspacing
@@ -378,6 +394,20 @@ def set_etot_input_by_file(
     return "".join(new_etot_lines)
+def set_gassion_psp(atom_list, gaussian_base_param:dict):
+    psp_num = 1
+    psp_line = "\n"
+    base_line = "\n"
+    for idx, atom in enumerate(gaussian_base_param["ELEMENT"]):
+        if atom not in atom_list:
+            continue
+        psp_line  += "IN.PSP{}   = {} {} {}\n".format(psp_num, atom, gaussian_base_param["POTENTIAL"][idx],  gaussian_base_param["POTENTIAL_FILE_NAME"])
+        base_line += "IN.BASIS{} = {} {} {}\n".format(psp_num, atom, gaussian_base_param["BASIS_SET"][idx],gaussian_base_param["BASIS_SET_FILE_NAME"])
+        psp_num += 1
+    psp_line += base_line
+    return psp_line
 def is_alive_atomic_energy(movement_list:list):
     if len(movement_list) < 1:
         return False
@@ -443,7 +473,8 @@ bool_keys=[
     'OUT.MLMD',
     'NUM_BLOCKED_PSI',
     'OUT.RHOATOM',
-    "USE_DFTB"
+    "USE_DFTB",
+    "USE_GAUSSIAN"
     ]
 char_keys=['PRECISION',

{pwact-0.1.19 → pwact-0.1.21}/pwact.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pwact
-Version: 0.1.19
+Version: 0.1.21
 Summary: PWACT is an open-source automated active learning platform based on PWMLFF for efficient data sampling.
 Home-page: https://github.com/LonxunQuantum/PWact
 Author: LonxunQuantum

{pwact-0.1.19 → pwact-0.1.21}/pwact.egg-info/SOURCES.txt RENAMED Viewed

@@ -57,5 +57,6 @@ pwact/utils/slurm_script.py
 pwact/utils/app_lib/__init__.py
 pwact/utils/app_lib/common.py
 pwact/utils/app_lib/cp2k.py
+pwact/utils/app_lib/cp2k_dp.py
 pwact/utils/app_lib/lammps.py
 pwact/utils/app_lib/pwmat.py

{pwact-0.1.19 → pwact-0.1.21}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
 setuptools.setup(
     name="pwact",
-    version="0.1.19",
+    version="0.1.21",
     author="LonxunQuantum",
     author_email="lonxun@pwmat.com",
     description="PWACT is an open-source automated active learning platform based on PWMLFF for efficient data sampling.",