PyPI - pwact - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2.dev0__py3-none-any.whl - Mend

pwact 0.2.0py3-none-any.whl → 0.2.2.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

pwact/active_learning/explore/run_model_md.py +110 -0
pwact/active_learning/explore/select_image.py +1 -1
pwact/active_learning/init_bulk/direct.py +182 -0
pwact/active_learning/init_bulk/duplicate_scale.py +1 -1
pwact/active_learning/init_bulk/explore.py +301 -0
pwact/active_learning/init_bulk/init_bulk_run.py +78 -48
pwact/active_learning/init_bulk/relabel.py +149 -120
pwact/active_learning/label/labeling.py +125 -11
pwact/active_learning/user_input/init_bulk_input.py +55 -6
pwact/active_learning/user_input/iter_input.py +12 -0
pwact/active_learning/user_input/resource.py +18 -6
pwact/active_learning/user_input/scf_param.py +24 -6
pwact/active_learning/user_input/train_param/optimizer_param.py +1 -1
pwact/main.py +17 -7
pwact/utils/app_lib/do_direct_sample.py +145 -0
pwact/utils/app_lib/do_eqv2model.py +41 -0
pwact/utils/constant.py +31 -11
pwact/utils/file_operation.py +12 -5
{pwact-0.2.0.dist-info → pwact-0.2.2.dev0.dist-info}/METADATA +1 -1
{pwact-0.2.0.dist-info → pwact-0.2.2.dev0.dist-info}/RECORD +24 -20
{pwact-0.2.0.dist-info → pwact-0.2.2.dev0.dist-info}/LICENSE +0 -0
{pwact-0.2.0.dist-info → pwact-0.2.2.dev0.dist-info}/WHEEL +0 -0
{pwact-0.2.0.dist-info → pwact-0.2.2.dev0.dist-info}/entry_points.txt +0 -0
{pwact-0.2.0.dist-info → pwact-0.2.2.dev0.dist-info}/top_level.txt +0 -0

pwact/active_learning/label/labeling.py CHANGED Viewed

@@ -35,6 +35,7 @@ from pwact.utils.file_operation import write_to_file, copy_file, copy_dir, searc
 from pwact.utils.app_lib.common import link_pseudo_by_atom, set_input_script
 from pwact.data_format.configop import extract_pwdata, save_config, get_atom_type
+from pwdata import Config
 class Labeling(object):
     @staticmethod
     def kill_job(root_dir:str, itername:str):
@@ -59,9 +60,10 @@ class Labeling(object):
         self.real_explore_dir = os.path.join(self.input_param.root_dir, itername, AL_STRUCTURE.explore)
         self.md_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.md)
         self.select_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.select)
+        self.direct_dir = os.path.join(self.explore_dir, EXPLORE_FILE_STRUCTURE.direct)
         self.real_md_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.md)
         self.real_select_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.select)
+        self.real_direct_dir = os.path.join(self.real_explore_dir, EXPLORE_FILE_STRUCTURE.direct)
         # labed work dir
         self.label_dir = os.path.join(self.input_param.root_dir, itername, TEMP_STRUCTURE.tmp_run_iter_dir, AL_STRUCTURE.labeling)
         self.scf_dir = os.path.join(self.label_dir, LABEL_FILE_STRUCTURE.scf)
@@ -71,6 +73,9 @@ class Labeling(object):
         self.real_scf_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.scf)
         self.real_result_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.result)
+        self.bigmodel_dir = os.path.join(self.label_dir, LABEL_FILE_STRUCTURE.bigmodel)
+        self.real_bigmodel_dir = os.path.join(self.real_label_dir, LABEL_FILE_STRUCTURE.bigmodel)
     '''
     description:
     the scf work dir file structure is as follow.
@@ -86,9 +91,8 @@ class Labeling(object):
     return {*}
     author: wuxingxing
     '''
     def make_scf_work(self):
-        # read select info, and make scf
-        # ["devi_force", "file_path", "config_index"]
         candidate = pd.read_csv(os.path.join(self.select_dir, EXPLORE_FILE_STRUCTURE.candidate))
         # make scf work dir
         scf_dir_list = []
@@ -108,14 +112,51 @@ class Labeling(object):
                 atom_names = line.split()
             self.make_scf_file(scf_sub_md_sys_path, tarj_lmp, atom_names)
             scf_dir_list.append(scf_sub_md_sys_path)
         self.make_scf_slurm_job_files(scf_dir_list)
+    def make_bigmodel_work(self):
+        # copy from realdir/direct/select.xyz
+        if self.input_param.strategy.direct:
+            copy_file(os.path.join(self.real_direct_dir, EXPLORE_FILE_STRUCTURE.select_xyz),
+                os.path.join(self.bigmodel_dir, EXPLORE_FILE_STRUCTURE.select_xyz))
+        else:
+            # copy trajs to bigmodel_dir and cvt to xyz
+            candidate = pd.read_csv(os.path.join(self.select_dir, EXPLORE_FILE_STRUCTURE.candidate))
+            # make scf work dir
+            image_list = None
+            for index, row in candidate.iterrows():
+                config_index    = int(row["config_index"])
+                sub_md_sys_path = row["file_path"]
+                atom_names = None
+                with open(os.path.join(sub_md_sys_path, LAMMPS.atom_type_file), 'r') as rf:
+                    line = rf.readline()
+                    atom_names = line.split()
+                if image_list is None:
+                    image_list = Config(data_path=os.path.join(sub_md_sys_path, EXPLORE_FILE_STRUCTURE.traj, "{}{}".format(config_index, LAMMPS.traj_postfix)),
+                                        format=PWDATA.lammps_dump, atom_names=atom_names)
+                else:
+                    image_list.append(Config(data_path=os.path.join(sub_md_sys_path, EXPLORE_FILE_STRUCTURE.traj, "{}{}".format(config_index, LAMMPS.traj_postfix)),
+                                        format=PWDATA.lammps_dump, atom_names=atom_names))
+            # cvt_lammps.dump to extxyz
+            image_list.to(data_path=self.bigmodel_dir, format=PWDATA.extxyz, data_name="{}".format(EXPLORE_FILE_STRUCTURE.select_xyz))
+        # copy bigmodelscript
+        copy_file(self.input_param.scf.bigmodel_script, os.path.join(self.bigmodel_dir, os.path.basename(self.input_param.scf.bigmodel_script)))
+        # make slrum file
+        self.make_bigmodel_slurm_job_files([self.bigmodel_dir])
     def back_label(self):
-        slurm_remain, slurm_success = get_slurm_job_run_info(self.real_scf_dir, \
-            job_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_job), \
-            tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_tag))
-        slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
+        if self.input_param.scf.dft_style == DFT_STYLE.bigmodel:
+            slurm_remain, slurm_success = get_slurm_job_run_info(self.real_bigmodel_dir, \
+                job_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_job), \
+                tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_tag))
+            slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
+        else:
+            slurm_remain, slurm_success = get_slurm_job_run_info(self.real_scf_dir, \
+                job_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_job), \
+                tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.scf_tag))
+            slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
         if slurm_done:
             # bk and do new job
             target_bk_file = add_postfix_dir(self.real_label_dir, postfix_str="bk")
@@ -147,7 +188,31 @@ class Labeling(object):
                 mission.commit_jobs()
                 mission.check_running_job()
                 mission.all_job_finished(error_type=SLURM_OUT.dft_out)
+    def do_bigmodel_jobs(self):
+        mission = Mission()
+        slurm_remain, slurm_success = get_slurm_job_run_info(self.bigmodel_dir, \
+            job_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_job), \
+            tag_patten="*-{}".format(LABEL_FILE_STRUCTURE.bigmodel_tag))
+        slurm_done = True if len(slurm_remain) == 0 and len(slurm_success) > 0 else False
+        if slurm_done is False:
+            #recover slurm jobs
+            if len(slurm_remain) > 0:
+                print("Run bigModel Job:\n")
+                print(slurm_remain)
+                for i, script_path in enumerate(slurm_remain):
+                    slurm_job = SlurmJob()
+                    tag_name = "{}-{}".format(os.path.basename(script_path).split('-')[0].strip(), LABEL_FILE_STRUCTURE.bigmodel_tag)
+                    tag = os.path.join(os.path.dirname(script_path),tag_name)
+                    slurm_job.set_tag(tag)
+                    slurm_job.set_cmd(script_path)
+                    mission.add_job(slurm_job)
+            if len(mission.job_list) > 0:
+                mission.commit_jobs()
+                mission.check_running_job()
+                mission.all_job_finished()
     def make_scf_file(self, scf_dir:str, tarj_lmp:str, atom_names:list[str]=None):
         config_index = os.path.basename(tarj_lmp).split('.')[0]
         if DFT_STYLE.vasp == self.resource.dft_style: # when do scf, the vasp input file name is 'POSCAR'
@@ -230,6 +295,42 @@ class Labeling(object):
             slurm_job_file = os.path.join(self.scf_dir, slurm_script_name)
             write_to_file(slurm_job_file, group_slurm_script, "w")
+    def make_bigmodel_slurm_job_files(self, scf_sub_list:list[str]):
+        del_file_list_by_patten(self.bigmodel_dir, "*{}".format(LABEL_FILE_STRUCTURE.scf_job))
+        group_list = split_job_for_group(1, scf_sub_list, 1)
+        for group_index, group in enumerate(group_list):
+            if group[0] == "NONE":
+                continue
+            jobname = "bigmodel{}".format(group_index)
+            tag_name = "{}-{}".format(group_index, LABEL_FILE_STRUCTURE.bigmodel_tag)
+            tag = os.path.join(self.bigmodel_dir, tag_name)
+            run_cmd = self.resource.dft_resource.command
+            # if self.resource.dft_resource.gpu_per_node > 0:
+            #     run_cmd = "mpirun -np {} PWmat > {}".format(self.resource.dft_resource.gpu_per_node, SLURM_OUT.md_out)
+            # else:
+            #     raise Exception("ERROR! the cpu version of pwmat not support yet!")
+            group_slurm_script = set_slurm_script_content(gpu_per_node=self.resource.dft_resource.gpu_per_node,
+                number_node = self.resource.dft_resource.number_node,
+                cpu_per_node = self.resource.dft_resource.cpu_per_node,
+                queue_name = self.resource.dft_resource.queue_name,
+                custom_flags = self.resource.dft_resource.custom_flags,
+                env_script = self.resource.dft_resource.env_script,
+                job_name = jobname,
+                run_cmd_template = run_cmd,
+                group = group,
+                job_tag = tag,
+                task_tag = LABEL_FILE_STRUCTURE.bigmodel_tag,
+                task_tag_faild = LABEL_FILE_STRUCTURE.bigmodel_tag_failed,
+                parallel_num=self.resource.dft_resource.parallel_num,
+                check_type=self.resource.dft_style
+                )
+            slurm_script_name = "{}-{}".format(group_index, LABEL_FILE_STRUCTURE.bigmodel_job)
+            slurm_job_file = os.path.join(self.bigmodel_dir, slurm_script_name)
+            write_to_file(slurm_job_file, group_slurm_script, "w")
     '''
     description:
     collecte OUT.MLMD to mvm-
@@ -274,12 +375,12 @@ class Labeling(object):
                 for scf_file in scf_files:
                     scf_file_path = os.path.join(scf_dir, scf_file)
                     if scf_file.lower() in DFT_STYLE.get_scf_reserve_list(self.resource.dft_style) \
-                        and scf_file.lower() not in DFT_STYLE.get_scf_del_list():# for pwmat final.config
+                        or "atom.config" in scf_file.lower() :# for the input natom.config
                         copy_file(scf_file_path, scf_file_path.replace(TEMP_STRUCTURE.tmp_run_iter_dir, ""))
         # scf files to pwdata format
         scf_configs = self.collect_scf_configs()
         extract_pwdata(input_data_list=scf_configs,
                 intput_data_format =DFT_STYLE.get_format_by_postfix(os.path.basename(scf_configs[0])),
                 save_data_path =self.result_dir,
@@ -289,3 +390,16 @@ class Labeling(object):
         )
         # copy to main dir
         copy_dir(self.result_dir, self.real_result_dir)
+    def do_post_bigmodel(self):
+        # copy the bigmodel labeled.xyz to result
+        if self.input_param.data_format == PWDATA.extxyz:
+            copy_file(os.path.join(self.bigmodel_dir, LABEL_FILE_STRUCTURE.train_xyz), os.path.join(self.result_dir, LABEL_FILE_STRUCTURE.train_xyz))
+        else:
+            images = Config(data_path=os.path.join(self.bigmodel_dir, LABEL_FILE_STRUCTURE.train_xyz), format=PWDATA.extxyz)
+            images.to(data_path=self.result_dir, format=PWDATA.pwmlff_npy)
+        # copy bigmodel dir to real dir
+        copy_dir(self.bigmodel_dir, self.real_bigmodel_dir)
+        copy_dir(self.result_dir, self.real_result_dir)
+        # del slurm logs and tags
+        del_file_list_by_patten(self.real_bigmodel_dir, "slurm-*")

pwact/active_learning/user_input/init_bulk_input.py CHANGED Viewed

@@ -26,13 +26,15 @@ class InitBulkParam(object):
             sys_configs = [sys_configs]
         # set sys_config detail
-        self.dft_style = get_required_parameter("dft_style", json_dict).lower()
+        self.dft_style = get_parameter("dft_style", json_dict, "PWMAT").lower()
         self.scf_style = get_parameter("scf_style", json_dict, None)
         self.sys_config:list[Stage] = []
         self.is_relax = False
         self.is_aimd = False
         self.is_scf = False
+        self.is_bigmodel=False
+        self.is_direct = False
         for index, config in enumerate(sys_configs):
             stage = Stage(config, index, sys_config_prefix, self.dft_style)
             self.sys_config.append(stage)
@@ -42,22 +44,46 @@ class InitBulkParam(object):
                 self.is_aimd = True
             if stage.scf:
                 self.is_scf = True
+            if stage.bigmodel:
+                self.is_bigmodel = True
+            if stage.direct:
+                self.is_direct = True
         # for PWmat: set etot.input files and persudo files
         # for Vasp: set INCAR files and persudo files
-        self.dft_input = SCFParam(json_dict=json_dict, is_scf=self.is_scf, is_relax=self.is_relax, is_aimd=self.is_aimd, root_dir=self.root_dir, dft_style=self.dft_style, scf_style=self.scf_style)
+        self.dft_input = SCFParam(json_dict=json_dict,
+                                    is_scf=self.is_scf,
+                                    is_relax=self.is_relax,
+                                    is_aimd=self.is_aimd,
+                                    root_dir=self.root_dir,
+                                    dft_style=self.dft_style,
+                                    scf_style=self.scf_style,
+                                    is_bigmodel=self.is_bigmodel,
+                                    is_direct=self.is_direct)
         # check and set relax etot.input file
         for config in self.sys_config:
             if self.is_relax:
                 if config.relax_input_idx >= len(self.dft_input.relax_input_list):
                     raise Exception("Error! for config '{}' 'relax_input_idx' {} not in 'relax_input'!".format(os.path.basename(config.config_file), config.relax_input_idx))
                 config.set_relax_input_file(self.dft_input.relax_input_list[config.relax_input_idx])
             if self.is_scf:
                 if not os.path.exists(self.dft_input.scf_input_list[0].input_file):
                     raise Exception("Error! relabel dft input file {} not exisit!".format(self.dft_input.scf_input_list[0].input_file))
                 config.set_scf_input_file(self.dft_input.scf_input_list[0])
-        # check and set aimd etot.input file
-        for config in self.sys_config:
+            if self.is_bigmodel:
+                if config.bigmodel_input_idx >= len(self.dft_input.bigmodel_input_list):
+                    raise Exception("Error! for script '{}' 'bigmodel_input_idx' {} not in 'bigmodel_input'!".format(os.path.basename(config.config_file), config.bigmodel_input_idx))
+                config.set_bigmodel_input_file(self.dft_input.bigmodel_input_list[config.bigmodel_input_idx])
+            if self.is_direct:
+                if config.direct_input_idx >= len(self.dft_input.direct_input_list):
+                    raise Exception("Error! for script '{}' 'direct_input_idx' {} not in 'direct_input'!".format(os.path.basename(config.config_file), config.direct_input_idx))
+                config.set_direct_input_file(self.dft_input.direct_input_list[config.direct_input_idx])
+            # check and set aimd etot.input file
             if self.is_aimd:
                 if config.aimd_input_idx >= len(self.dft_input.aimd_input_list):
                     raise Exception("Error! for config '{}' 'aimd_input_idx' {} not in 'aimd_input'!".format(os.path.basename(config.config_file), config.aimd_input_idx))
@@ -77,16 +103,29 @@ class Stage(object):
         self.format = get_parameter("format", json_dict, PWDATA.pwmat_config).lower()
         self.pbc = get_parameter("pbc", json_dict, [1,1,1])
         # extract config file to Config object, then use it
-        self.relax = get_parameter("relax", json_dict, True)
+        self.relax = get_parameter("relax", json_dict, False)
         self.relax_input_idx = get_parameter("relax_input_idx", json_dict, 0)
         self.relax_input_file = None
-        self.aimd = get_parameter("aimd", json_dict, True)
+        self.aimd = get_parameter("aimd", json_dict, False)
         self.aimd_input_idx = get_parameter("aimd_input_idx", json_dict, 0)
         self.aimd_input_file = None
         self.scf = get_parameter("scf", json_dict, False)
+        self.scf_input_idx = get_parameter("scf_input_idx", json_dict, 0)
+        self.scf_input_file = None
+        self.bigmodel = get_parameter("bigmodel", json_dict, False)
+        self.bigmodel_input_idx = get_parameter("bigmodel_input_idx", json_dict, 0)
+        self.bigmodel_script = None
+        self.direct = get_parameter("direct", json_dict, False)
+        self.direct_input_idx = get_parameter("direct_input_idx", json_dict, 0)
+        self.direct_script = None
+        if self.bigmodel and self.aimd:
+            raise Exception("ERROR! The 'aimd' and 'bigmodel' cannot be set simultaneously!")
         super_cell = get_parameter("super_cell", json_dict, [])
         super_cell = str_list_format(super_cell)
         if len(super_cell) > 0:
@@ -131,3 +170,13 @@ class Stage(object):
         self.aimd_flag_symm = input_file.flag_symm
         self.use_dftb = input_file.use_dftb
         self.use_skf = input_file.use_skf
+    def set_bigmodel_input_file(self, input_file:DFTInput):
+        self.bigmodel_input_file = input_file.input_file
+        self.bigmodel_kspacing = input_file.kspacing
+        self.bigmodel_flag_symm = input_file.flag_symm
+    def set_direct_input_file(self, input_file:DFTInput):
+        self.direct_input_file = input_file.input_file
+        self.direct_kspacing = input_file.kspacing
+        self.direct_flag_symm = input_file.flag_symm

pwact/active_learning/user_input/iter_input.py CHANGED Viewed

@@ -105,6 +105,18 @@ class StrategyParam(object):
             if self.compress:
                 error_log = "Error! the kpu uncertainty does not support compress, please set the 'compress' in strategy dict to be false!"
                 raise Exception(error_log)
+        self.direct = get_parameter("direct", json_dict, False)
+        if self.direct:
+            self.direct_script = get_parameter("direct_script", json_dict, None)
+            if self.direct_script is not None:
+                self.direct_script = os.path.abspath(self.direct_script)
+                if not os.path.exists(self.direct_script):
+                    raise Exception("ERROR! The direct script {} does not exist!".format(self.direct_script))
+            else:
+                raise Exception("ERROR! The direct script does not exist!")
+        else:
+            self.direct_script = None
     def to_dict(self):
         res = {}

pwact/active_learning/user_input/resource.py CHANGED Viewed

@@ -20,10 +20,22 @@ class Resource(object):
             if "-in" in self.explore_resource.command:
                 self.explore_resource.command = self.explore_resource.command.split('-in')[0].strip()
             self.explore_resource.command = "{} -in {} > {}".format(self.explore_resource.command, LAMMPS.input_lammps, SLURM_OUT.md_out)
+        else:
+            if "explore" in json_dict.keys():
+                self.explore_resource = self.get_resource(get_required_parameter("explore", json_dict))
+            else:
+                self.explore_resource = None
         # check dft resource
-        self.dft_resource = self.get_resource(get_required_parameter("dft", json_dict))
+        if "dft" in json_dict.keys():
+            self.dft_resource = self.get_resource(get_required_parameter("dft", json_dict))
+        else:
+            self.dft_resource = ResourceDetail("mpirun -np 1 PWmat", 1, 1, 1, 1, 1, None, None, None)
+        if "direct" in json_dict.keys():
+            self.direct_resource = self.get_resource(get_required_parameter("direct", json_dict))
+        else:
+            self.direct_resource = None
         if "scf" in json_dict.keys():
             self.scf_resource = self.get_resource(get_parameter("scf", json_dict, None))
         else:
@@ -33,11 +45,11 @@ class Resource(object):
         #     self.dft_resource.dftb_command  = "{} > {}".format(dftb_command, SLURM_OUT.dft_out)
         self.dft_style = dft_style
         self.scf_style = scf_style
-        if DFT_STYLE.vasp.lower() == dft_style.lower():
+        if DFT_STYLE.vasp.lower() == dft_style:
             self.dft_resource.command = "{} > {}".format(self.dft_resource.command, SLURM_OUT.dft_out)
-        elif DFT_STYLE.pwmat.lower() == dft_style.lower():
+        elif DFT_STYLE.pwmat.lower() == dft_style:
             self.dft_resource.command = "{} > {}".format(self.dft_resource.command, SLURM_OUT.dft_out)
-        elif DFT_STYLE.cp2k.lower() == dft_style.lower():
+        elif DFT_STYLE.cp2k.lower() == dft_style:
             self.dft_resource.command = "{} {} > {}".format(self.dft_resource.command, CP2K.cp2k_inp, SLURM_OUT.dft_out)
         if self.scf_resource is not None and scf_style is not None:

pwact/active_learning/user_input/scf_param.py CHANGED Viewed

@@ -10,7 +10,9 @@ class SCFParam(object):
         is_scf:bool=False,
         root_dir:str=None,
         dft_style:str=None,
-        scf_style:str=None) -> None:# for scf relabel in init_bulk
+        scf_style:str=None,
+        is_bigmodel:bool=False,
+        is_direct:bool=False) -> None:# for scf relabel in init_bulk
         self.dft_style = dft_style
         self.root_dir = root_dir
@@ -24,12 +26,18 @@ class SCFParam(object):
         if is_scf:
             if "scf_input" in json_dict.keys(): # for init_bulk relabel
-                json_scf = get_required_parameter("scf_input", json_dict)
-                self.scf_input_list = self.set_input(json_scf, flag_symm=0)
+                if dft_style == DFT_STYLE.bigmodel:
+                    self.bigmodel_script = get_required_parameter("bigmodel_script", json_dict)
+                else:
+                    json_scf = get_required_parameter("scf_input", json_dict)
+                    self.scf_input_list = self.set_input(json_scf, flag_symm=0)
             else: # for run_iter
-                self.scf_input_list = self.set_input(json_dict, flag_symm=0)
-                if self.scf_input_list[0].use_dftb:
-                    self.use_dftb = True
+                if dft_style == DFT_STYLE.bigmodel:
+                    self.bigmodel_script = get_required_parameter("bigmodel_script", json_dict)
+                else:
+                    self.scf_input_list = self.set_input(json_dict, flag_symm=0)
+                    if self.scf_input_list[0].use_dftb:
+                        self.use_dftb = True
         if is_aimd:
             json_aimd = get_required_parameter("aimd_input", json_dict)
             self.aimd_input_list = self.set_input(json_aimd, flag_symm=0)
@@ -40,6 +48,16 @@ class SCFParam(object):
             self.relax_input_list = self.set_input(json_relax, flag_symm=3)
             if self.relax_input_list[0].use_dftb:
                 self.use_dftb = True
+        if is_bigmodel: # init_bulk
+            json_bigmodel = get_required_parameter("bigmodel_input", json_dict)
+            self.bigmodel_input_list = self.set_input(json_bigmodel, flag_symm=3)
+        if is_direct: # init_bulk
+            json_direct = get_required_parameter("direct_input", json_dict)
+            self.direct_input_list = self.set_input(json_direct, flag_symm=3)
+        self.scf_max_num = get_parameter("scf_max_num", json_dict, None)
         # for pwmat, use 'pseudo' key
         # for vasp is INCAR file, use 'pseudo' key
         pseudo = get_parameter("pseudo", json_dict, [])

pwact/active_learning/user_input/train_param/optimizer_param.py CHANGED Viewed

@@ -6,7 +6,7 @@ class OptimizerParam(object):
     def set_optimizer(self, json_source:dict, nep_param:NepParam=None):
         optimizer_dict = get_parameter("optimizer", json_source, {})
-        self.opt_name = get_parameter("optimizer", optimizer_dict, "LKF")
+        self.opt_name = get_parameter("optimizer", optimizer_dict, "ADAM")
         self.batch_size = get_parameter("batch_size", optimizer_dict, 1)
         self.epochs = get_parameter("epochs", optimizer_dict, 30)
         self.print_freq = get_parameter("print_freq", optimizer_dict, 10)

pwact/main.py CHANGED Viewed

@@ -5,7 +5,7 @@ import glob
 import sys
 import json
 import argparse
-from pwact.utils.constant import TEMP_STRUCTURE, UNCERTAINTY, AL_WORK, AL_STRUCTURE, LABEL_FILE_STRUCTURE, EXPLORE_FILE_STRUCTURE
+from pwact.utils.constant import TEMP_STRUCTURE, UNCERTAINTY, AL_WORK, AL_STRUCTURE, LABEL_FILE_STRUCTURE, EXPLORE_FILE_STRUCTURE, DFT_STYLE
 from pwact.utils.format_input_output import make_iter_name
 from pwact.utils.file_operation import write_to_file, del_file_list, search_files, del_dir, copy_dir
 from pwact.utils.json_operation import convert_keys_to_lowercase
@@ -86,11 +86,16 @@ def run_fp(itername:str, resource : Resource, input_param: InputParam):
     #1. if the label work done before, back up and do new work
     lab.back_label()
     #2. make scf work
-    lab.make_scf_work()
-    #3. do scf work
-    lab.do_scf_jobs()
-    #4. collect scf configs outcar or movement, then to pwdata format
-    lab.do_post_labeling()
+    if input_param.dft_style == DFT_STYLE.bigmodel:
+        lab.make_bigmodel_work()
+        lab.do_bigmodel_jobs()
+        lab.do_post_bigmodel()
+    else:
+        lab.make_scf_work()
+        #3. do scf work
+        lab.do_scf_jobs()
+        #4. collect scf configs outcar or movement, then to pwdata format
+        lab.do_post_labeling()
 def do_training_work(itername:str, resource : Resource, input_param: InputParam):
     mtrain = ModelTrian(itername, resource, input_param)
@@ -129,8 +134,13 @@ def do_exploring_work(itername:str, resource : Resource, input_param: InputParam
     summary = "{}  {}\n".format(itername, summary)
     write_to_file(os.path.join(input_param.root_dir, EXPLORE_FILE_STRUCTURE.iter_select_file), summary, mode='a')
+    if input_param.strategy.direct:
+        md.make_drct_work()
+        md.do_drct_jobs()
+        md.post_drct()
     print("config selection done!")
-    # 5. do post process after lammps md running
+    # 5. do post process
     md.post_process_md()
     print("exploring done!")

pwact/utils/app_lib/do_direct_sample.py ADDED Viewed

@@ -0,0 +1,145 @@
+from maml.sampling.direct import DIRECTSampler, BirchClustering, SelectKFromClusters
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.ticker as mtick
+from ase.io import read
+import subprocess, os, sys
+write_file = "select.xyz"
+if os.path.exists(write_file):
+    os.remove(write_file)
+filenames = ["candidate.xyz"]
+k = 1
+threshold = .04
+def load_ase_MD_traj(filenames: list):
+    """
+    Load .traj to pymatgen structures
+    """
+    structs = []
+    trajs = []
+    lens = []
+    for filename in filenames:
+        traj = read(filename,index=":")
+        structs += [i for i in traj]
+        trajs.append(traj)
+        lens.append(len(traj))
+    return structs, trajs, lens
+structures, trajs, lens = load_ase_MD_traj(filenames)
+n_image = len(structures)
+DIRECT_sampler = DIRECTSampler(
+    clustering=BirchClustering(n=None, threshold_init=threshold), select_k_from_clusters=SelectKFromClusters(k=k)
+)
+DIRECT_selection = DIRECT_sampler.fit_transform(structures)
+n, m = DIRECT_selection["PCAfeatures"].shape
+explained_variance = DIRECT_sampler.pca.pca.explained_variance_ratio_
+DIRECT_selection["PCAfeatures_unweighted"] = DIRECT_selection["PCAfeatures"] / explained_variance[:m]
+plt.plot(
+    range(1, explained_variance.shape[0]+1),
+    explained_variance * 100,
+    "o-",
+)
+plt.xlabel("i$^{\mathrm{th}}$ PC", size=20)
+plt.ylabel("Explained variance", size=20)
+ax = plt.gca()
+ax.yaxis.set_major_formatter(mtick.PercentFormatter())
+plt.tight_layout()
+plt.savefig("PCA_variance.png",dpi=360)
+plt.close()
+def plot_PCAfeature_coverage(all_features, selected_indexes, method="DIRECT"):
+    fig, ax = plt.subplots(figsize=(5, 5))
+    selected_features = all_features[selected_indexes]
+    plt.plot(all_features[:, 0], all_features[:, 1], "*", alpha=0.5, label=f"All {len(all_features):,} structures")
+    plt.plot(
+        selected_features[:, 0],
+        selected_features[:, 1],
+        "*",
+        alpha=0.5,
+        label=f"{method} sampled {len(selected_features):,}",
+    )
+    legend = plt.legend(frameon=False, fontsize=14, loc="upper left", bbox_to_anchor=(-0.02, 1.02), reverse=True)
+    #for lh in legend.legendHandles:
+    #    lh.set_alpha(1)
+    plt.ylabel("PC 2", size=20)
+    plt.xlabel("PC 1", size=20)
+all_features = DIRECT_selection["PCAfeatures_unweighted"]
+selected_indexes = DIRECT_selection["selected_indexes"]
+plot_PCAfeature_coverage(all_features, selected_indexes)
+plt.tight_layout()
+plt.savefig("PCA_direct.png",dpi=360)
+plt.close()
+#manual_selection_index = np.arange(0, n_image, int(n_image/n))
+#plot_PCAfeature_coverage(all_features, manual_selection_index, "Manually")
+#plt.tight_layout()
+#plt.savefig("PCA_manually.png",dpi=360)
+#plt.close()
+def calculate_feature_coverage_score(all_features, selected_indexes, n_bins=100):
+    selected_features = all_features[selected_indexes]
+    n_all = np.count_nonzero(
+        np.histogram(all_features, bins=np.linspace(min(all_features), max(all_features), n_bins))[0]
+    )
+    n_select = np.count_nonzero(
+        np.histogram(selected_features, bins=np.linspace(min(all_features), max(all_features), n_bins))[0]
+    )
+    return n_select / n_all
+def calculate_all_FCS(all_features, selected_indexes, b_bins=100):
+    select_scores = [
+        calculate_feature_coverage_score(all_features[:, i], selected_indexes, n_bins=b_bins)
+        for i in range(all_features.shape[1])
+    ]
+    return select_scores
+all_features = DIRECT_selection["PCAfeatures_unweighted"]
+scores_DIRECT = calculate_all_FCS(all_features, DIRECT_selection["selected_indexes"], b_bins=100)
+#scores_MS = calculate_all_FCS(all_features, manual_selection_index, b_bins=100)
+x = np.arange(len(scores_DIRECT))
+x_ticks = [f"PC {n+1}" for n in range(len(x))]
+plt.figure(figsize=(15, 4))
+plt.bar(
+    x,
+    scores_DIRECT,
+    width=0.3,
+    label=f"DIRECT, $\overline{{\mathrm{{Coverage\ score}}}}$ = {np.mean(scores_DIRECT):.3f}",
+)
+#plt.bar(
+#    x + 0.3, scores_MS, width=0.3, label=f"Manual, $\overline{{\mathrm{{Coverage\ score}}}}$ = {np.mean(scores_MS):.3f}"
+#)
+plt.xticks(x, x_ticks, size=16)
+plt.yticks(np.linspace(0, 1.0, 6), size=16)
+plt.ylabel("Coverage score", size=20)
+plt.legend(shadow=True, loc="lower right", fontsize=16)
+plt.tight_layout()
+plt.savefig("Cov_score.png",dpi=360)
+plt.close()
+def get2index(num: int, list_lens: list):
+    for idx, i in enumerate(list_lens):
+        if num >= i:
+            num -= i
+        else:
+            break
+    return idx, num
+indices = DIRECT_selection["selected_indexes"]
+select_idx = []
+for ii,index in enumerate(indices):
+    idx, num = get2index(index, lens)
+    atoms = trajs[idx][num]
+    angles = atoms.cell.cellpar()[-3:]
+    if angles.max() > 140 or angles.min() < 40:
+        continue
+    else:
+        atoms.set_scaled_positions(atoms.get_scaled_positions())
+        atoms.write(write_file,format="extxyz",append=True)
+        select_idx.append(idx)
+np.savetxt("select_idx.dat",np.array(indices),fmt="%8d")

pwact 0.2.0__py3-none-any.whl → 0.2.2.dev0__py3-none-any.whl

pwact 0.2.0py3-none-any.whl → 0.2.2.dev0py3-none-any.whl