PyPI - dpdispatcher - Versions diffs - 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl - Mend

dpdispatcher 0.6.4py3-none-any.whl → 0.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dpdispatcher might be problematic. Click here for more details.

Files changed (26) hide show

dpdispatcher/_version.py +2 -2
dpdispatcher/contexts/__init__.py +1 -0
dpdispatcher/contexts/hdfs_context.py +3 -5
dpdispatcher/contexts/local_context.py +6 -6
dpdispatcher/contexts/ssh_context.py +9 -10
dpdispatcher/dlog.py +9 -5
dpdispatcher/dpdisp.py +15 -0
dpdispatcher/entrypoints/run.py +9 -0
dpdispatcher/machine.py +2 -2
dpdispatcher/machines/JH_UniScheduler.py +175 -0
dpdispatcher/machines/__init__.py +1 -0
dpdispatcher/machines/distributed_shell.py +4 -6
dpdispatcher/machines/fugaku.py +9 -9
dpdispatcher/machines/lsf.py +2 -4
dpdispatcher/machines/pbs.py +14 -14
dpdispatcher/machines/shell.py +1 -6
dpdispatcher/machines/slurm.py +12 -12
dpdispatcher/run.py +172 -0
dpdispatcher/submission.py +1 -3
dpdispatcher/utils/hdfs_cli.py +4 -8
{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/METADATA +4 -3
{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/RECORD +26 -23
{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/WHEEL +1 -1
{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/LICENSE +0 -0
{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/entry_points.txt +0 -0
{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/top_level.txt +0 -0

dpdispatcher/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.6.4'
-__version_tuple__ = version_tuple = (0, 6, 4)
+__version__ = version = '0.6.5'
+__version_tuple__ = version_tuple = (0, 6, 5)

dpdispatcher/contexts/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Contexts."""
 import importlib
 from pathlib import Path

dpdispatcher/contexts/hdfs_context.py CHANGED Viewed

@@ -138,7 +138,7 @@ class HDFSContext(BaseContext):
             shutil.rmtree(gz_dir, ignore_errors=True)
         os.mkdir(os.path.join(self.local_root, "tmp"))
         rfile_tgz = f"{self.remote_root}/{submission.submission_hash}_*_download.tar.gz"
-        lfile_tgz = "%s/tmp/" % (self.local_root)
+        lfile_tgz = f"{self.local_root}/tmp/"
         HDFS.copy_to_local(rfile_tgz, lfile_tgz)
         tgz_file_list = glob(os.path.join(self.local_root, "tmp/*_download.tar.gz"))
@@ -164,7 +164,7 @@ class HDFSContext(BaseContext):
                                 os.path.join(
                                     self.local_root,
                                     task.task_work_path,
-                                    "tag_failure_download_%s" % jj,
+                                    f"tag_failure_download_{jj}",
                                 ),
                                 "w",
                             ) as fp:
@@ -198,9 +198,7 @@ class HDFSContext(BaseContext):
                 if check_exists:
                     if mark_failure:
                         with open(
-                            os.path.join(
-                                self.local_root, "tag_failure_download_%s" % jj
-                            ),
+                            os.path.join(self.local_root, f"tag_failure_download_{jj}"),
                             "w",
                         ) as fp:
                             pass

dpdispatcher/contexts/local_context.py CHANGED Viewed

@@ -153,7 +153,7 @@ class LocalContext(BaseContext):
                             tag_file_path = os.path.join(
                                 self.local_root,
                                 ii.task_work_path,
-                                "tag_failure_download_%s" % kk,
+                                f"tag_failure_download_{kk}",
                             )
                             with open(tag_file_path, "w") as fp:
                                 pass
@@ -181,7 +181,7 @@ class LocalContext(BaseContext):
                                 tag_file_path = os.path.join(
                                     self.local_root,
                                     ii.task_work_path,
-                                    "tag_failure_download_%s" % jj,
+                                    f"tag_failure_download_{jj}",
                                 )
                                 with open(tag_file_path, "w") as fp:
                                     pass
@@ -227,7 +227,7 @@ class LocalContext(BaseContext):
                 if check_exists:
                     if mark_failure:
                         tag_file_path = os.path.join(
-                            self.local_root, "tag_failure_download_%s" % kk
+                            self.local_root, f"tag_failure_download_{kk}"
                         )
                         with open(tag_file_path, "w") as fp:
                             pass
@@ -252,7 +252,7 @@ class LocalContext(BaseContext):
                         if mark_failure:
                             with open(
                                 os.path.join(
-                                    self.local_root, "tag_failure_download_%s" % jj
+                                    self.local_root, f"tag_failure_download_{jj}"
                                 ),
                                 "w",
                             ) as fp:
@@ -298,8 +298,8 @@ class LocalContext(BaseContext):
         code = proc.returncode
         if code != 0:
             raise RuntimeError(
-                "Get error code %d in locally calling %s with job: %s ",
-                (code, cmd, self.submission.submission_hash),
+                f"Get error code {code} in locally calling {cmd} with job: {self.submission.submission_hash}"
+                f"\nStandard error: {stderr}"
             )
         return None, stdout, stderr

dpdispatcher/contexts/ssh_context.py CHANGED Viewed

@@ -300,7 +300,7 @@ class SSHSession:
             # retry for up to 3 times
             # ensure alive
             self.ensure_alive()
-            raise RetrySignal("SSH session not active in calling %s" % cmd) from e
+            raise RetrySignal(f"SSH session not active in calling {cmd}") from e
     @property
     def sftp(self):
@@ -628,8 +628,7 @@ class SSHContext(BaseContext):
             # check sha256
             # `:` means pass: https://stackoverflow.com/a/2421592/9567349
             _, stdout, _ = self.block_checkcall(
-                "sha256sum -c %s --quiet >.sha256sum_stdout 2>/dev/null || :"
-                % shlex.quote(sha256_file)
+                f"sha256sum -c {shlex.quote(sha256_file)} --quiet >.sha256sum_stdout 2>/dev/null || :"
             )
             self.sftp.remove(sha256_file)
             # regenerate file list
@@ -708,7 +707,7 @@ class SSHContext(BaseContext):
                                 os.path.join(
                                     self.local_root,
                                     ii.task_work_path,
-                                    "tag_failure_download_%s" % jj,
+                                    f"tag_failure_download_{jj}",
                                 ),
                                 "w",
                             ) as fp:
@@ -758,9 +757,9 @@ class SSHContext(BaseContext):
         assert self.remote_root is not None
         self.ssh_session.ensure_alive()
         if asynchronously:
-            cmd = "nohup %s >/dev/null &" % cmd
+            cmd = f"nohup {cmd} >/dev/null &"
         stdin, stdout, stderr = self.ssh_session.exec_command(
-            ("cd %s ;" % shlex.quote(self.remote_root)) + cmd
+            (f"cd {shlex.quote(self.remote_root)} ;") + cmd
         )
         exit_status = stdout.channel.recv_exit_status()
         if exit_status != 0:
@@ -779,7 +778,7 @@ class SSHContext(BaseContext):
         assert self.remote_root is not None
         self.ssh_session.ensure_alive()
         stdin, stdout, stderr = self.ssh_session.exec_command(
-            ("cd %s ;" % shlex.quote(self.remote_root)) + cmd
+            (f"cd {shlex.quote(self.remote_root)} ;") + cmd
         )
         exit_status = stdout.channel.recv_exit_status()
         return exit_status, stdin, stdout, stderr
@@ -846,12 +845,12 @@ class SSHContext(BaseContext):
         # Thus, it's better to use system's `rm` to remove a directory, which may
         # save a lot of time.
         if verbose:
-            dlog.info("removing %s" % remotepath)
+            dlog.info(f"removing {remotepath}")
         # In some supercomputers, it's very slow to remove large numbers of files
         # (e.g. directory containing trajectory) due to bad I/O performance.
         # So an asynchronously option is provided.
         self.block_checkcall(
-            "rm -rf %s" % shlex.quote(remotepath),
+            f"rm -rf {shlex.quote(remotepath)}",
             asynchronously=self.clean_asynchronously,
         )
@@ -921,7 +920,7 @@ class SSHContext(BaseContext):
                 f"from {from_f} to {self.ssh_session.username} @ {self.ssh_session.hostname} : {to_f} Error!"
             )
         # remote extract
-        self.block_checkcall("tar xf %s" % of)
+        self.block_checkcall(f"tar xf {of}")
         # clean up
         os.remove(from_f)
         self.sftp.remove(to_f)

dpdispatcher/dlog.py CHANGED Viewed

@@ -6,21 +6,25 @@ import warnings
 dlog = logging.getLogger("dpdispatcher")
 dlog.propagate = False
 dlog.setLevel(logging.INFO)
+cwd_logfile_path = os.path.join(os.getcwd(), "dpdispatcher.log")
+dlogf = logging.FileHandler(cwd_logfile_path, delay=True)
 try:
-    dlogf = logging.FileHandler(
-        os.getcwd() + os.sep + "dpdispatcher" + ".log", delay=True
-    )
+    dlog.addHandler(dlogf)
+    dlog.info(f"LOG INIT:dpdispatcher log direct to {cwd_logfile_path}")
 except PermissionError:
+    dlog.removeHandler(dlogf)
     warnings.warn(
-        "dpdispatcher.log meet permission error. redirect the log to ~/dpdispatcher.log"
+        f"dump logfile dpdispatcher.log to {cwd_logfile_path} meet permission error. redirect the log to ~/dpdispatcher.log"
     )
     dlogf = logging.FileHandler(
         os.path.join(os.path.expanduser("~"), "dpdispatcher.log"), delay=True
     )
+    dlog.addHandler(dlogf)
+    dlog.info("LOG INIT:dpdispatcher log init at ~/dpdispatcher.log")
 dlogf_formatter = logging.Formatter("%(asctime)s - %(levelname)s : %(message)s")
 dlogf.setFormatter(dlogf_formatter)
-dlog.addHandler(dlogf)
+# dlog.addHandler(dlogf)
 dlog_stdout = logging.StreamHandler(sys.stdout)
 dlog_stdout.setFormatter(dlogf_formatter)

dpdispatcher/dpdisp.py CHANGED Viewed

@@ -3,6 +3,7 @@ import argparse
 from typing import List, Optional
 from dpdispatcher.entrypoints.gui import start_dpgui
+from dpdispatcher.entrypoints.run import run
 from dpdispatcher.entrypoints.submission import handle_submission
@@ -81,6 +82,18 @@ def main_parser() -> argparse.ArgumentParser:
             "to the network on both IPv4 and IPv6 (where available)."
         ),
     )
+    ##########################################
+    # run
+    parser_run = subparsers.add_parser(
+        "run",
+        help="Run a Python script.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser_run.add_argument(
+        "filename",
+        type=str,
+        help="Python script to run. PEP 723 metadata should be contained in this file.",
+    )
     return parser
@@ -117,6 +130,8 @@ def main():
             port=args.port,
             bind_all=args.bind_all,
         )
+    elif args.command == "run":
+        run(filename=args.filename)
     elif args.command is None:
         pass
     else:

dpdispatcher/entrypoints/run.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Run PEP 723 script."""
+from dpdispatcher.run import run_pep723
+def run(*, filename: str):
+    with open(filename) as f:
+        script = f.read()
+    run_pep723(script)

dpdispatcher/machine.py CHANGED Viewed

@@ -261,7 +261,7 @@ class Machine(metaclass=ABCMeta):
         source_list = job.resources.source_list
         for ii in source_list:
-            line = "{ source %s; } \n" % ii
+            line = f"{{ source {ii}; }} \n"
             source_files_part += line
         export_envs_part = ""
@@ -466,7 +466,7 @@ class Machine(metaclass=ABCMeta):
         job : Job
             job
         """
-        dlog.warning("Job %s should be manually killed" % job.job_id)
+        dlog.warning(f"Job {job.job_id} should be manually killed")
     def get_exit_code(self, job):
         """Get exit code of the job.

dpdispatcher/machines/JH_UniScheduler.py ADDED Viewed

@@ -0,0 +1,175 @@
+import shlex
+from typing import List
+from dargs import Argument
+from dpdispatcher.dlog import dlog
+from dpdispatcher.machine import Machine
+from dpdispatcher.utils.job_status import JobStatus
+from dpdispatcher.utils.utils import (
+    RetrySignal,
+    customized_script_header_template,
+    retry,
+)
+JH_UniScheduler_script_header_template = """\
+#!/bin/bash -l
+#JSUB -e %J.err
+#JSUB -o %J.out
+{JH_UniScheduler_nodes_line}
+{JH_UniScheduler_ptile_line}
+{JH_UniScheduler_partition_line}
+{JH_UniScheduler_number_gpu_line}"""
+class JH_UniScheduler(Machine):
+    """JH_UniScheduler batch."""
+    def gen_script(self, job):
+        JH_UniScheduler_script = super().gen_script(job)
+        return JH_UniScheduler_script
+    def gen_script_header(self, job):
+        resources = job.resources
+        script_header_dict = {
+            "JH_UniScheduler_nodes_line": f"#JSUB -n {resources.number_node * resources.cpu_per_node}",
+            "JH_UniScheduler_ptile_line": f"#JSUB -R 'span[ptile={resources.cpu_per_node}]'",
+            "JH_UniScheduler_partition_line": f"#JSUB -q {resources.queue_name}",
+        }
+        custom_gpu_line = resources.kwargs.get("custom_gpu_line", None)
+        if not custom_gpu_line:
+            script_header_dict["JH_UniScheduler_number_gpu_line"] = (
+                "" f"#JSUB -gpgpu {resources.gpu_per_node}"
+            )
+        else:
+            script_header_dict["JH_UniScheduler_number_gpu_line"] = custom_gpu_line
+        if (
+            resources["strategy"].get("customized_script_header_template_file")
+            is not None
+        ):
+            JH_UniScheduler_script_header = customized_script_header_template(
+                resources["strategy"]["customized_script_header_template_file"],
+                resources,
+            )
+        else:
+            JH_UniScheduler_script_header = (
+                JH_UniScheduler_script_header_template.format(**script_header_dict)
+            )
+        return JH_UniScheduler_script_header
+    @retry()
+    def do_submit(self, job):
+        script_file_name = job.script_file_name
+        script_str = self.gen_script(job)
+        job_id_name = job.job_hash + "_job_id"
+        self.context.write_file(fname=script_file_name, write_str=script_str)
+        script_run_str = self.gen_script_command(job)
+        script_run_file_name = f"{job.script_file_name}.run"
+        self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
+        try:
+            stdin, stdout, stderr = self.context.block_checkcall(
+                "cd {} && {} {}".format(
+                    shlex.quote(self.context.remote_root),
+                    "jsub < ",
+                    shlex.quote(script_file_name),
+                )
+            )
+        except RuntimeError as err:
+            raise RetrySignal(err) from err
+        subret = stdout.readlines()
+        job_id = subret[0].split()[1][1:-1]
+        self.context.write_file(job_id_name, job_id)
+        return job_id
+    def default_resources(self, resources):
+        pass
+    @retry()
+    def check_status(self, job):
+        try:
+            job_id = job.job_id
+        except AttributeError:
+            return JobStatus.terminated
+        if job_id == "":
+            return JobStatus.unsubmitted
+        ret, stdin, stdout, stderr = self.context.block_call("jjobs " + job_id)
+        err_str = stderr.read().decode("utf-8")
+        if (f"Job <{job_id}> is not found") in err_str:
+            if self.check_finish_tag(job):
+                return JobStatus.finished
+            else:
+                return JobStatus.terminated
+        elif ret != 0:
+            # just retry when any unknown error raised.
+            raise RetrySignal(
+                "Get error code %d in checking status through ssh with job: %s . message: %s"
+                % (ret, job.job_hash, err_str)
+            )
+        status_out = stdout.read().decode("utf-8").split("\n")
+        if len(status_out) < 2:
+            return JobStatus.unknown
+        else:
+            status_line = status_out[1]
+            status_word = status_line.split()[2]
+        if status_word in ["PEND"]:
+            return JobStatus.waiting
+        elif status_word in ["RUN", "PSUSP", "SSUSP", "USUSP"]:
+            return JobStatus.running
+        elif status_word in ["DONE", "EXIT"]:
+            if self.check_finish_tag(job):
+                dlog.info(f"job: {job.job_hash} {job.job_id} finished")
+                return JobStatus.finished
+            else:
+                return JobStatus.terminated
+        else:
+            return JobStatus.unknown
+    def check_finish_tag(self, job):
+        job_tag_finished = job.job_hash + "_job_tag_finished"
+        return self.context.check_file_exists(job_tag_finished)
+    @classmethod
+    def resources_subfields(cls) -> List[Argument]:
+        """Generate the resources subfields.
+        Returns
+        -------
+        list[Argument]
+            resources subfields
+        """
+        doc_custom_gpu_line = "Custom GPU configuration, starting with #JSUB"
+        return [
+            Argument(
+                "kwargs",
+                dict,
+                [
+                    Argument(
+                        "custom_gpu_line",
+                        str,
+                        optional=True,
+                        default=None,
+                        doc=doc_custom_gpu_line,
+                    ),
+                ],
+                optional=False,
+                doc="Extra arguments.",
+            )
+        ]
+    def kill(self, job):
+        """Kill the job.
+        Parameters
+        ----------
+        job : Job
+            job
+        """
+        job_id = job.job_id
+        ret, stdin, stdout, stderr = self.context.block_call(
+            "jctrl kill " + str(job_id)
+        )

dpdispatcher/machines/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Machines."""
 import importlib
 from pathlib import Path

dpdispatcher/machines/distributed_shell.py CHANGED Viewed

@@ -64,7 +64,7 @@ class DistributedShell(Machine):
         source_list = job.resources.source_list
         for ii in source_list:
-            line = "{ source %s; } \n" % ii
+            line = f"{{ source {ii}; }} \n"
             source_files_part += line
         export_envs_part = ""
@@ -96,7 +96,7 @@ class DistributedShell(Machine):
     def gen_script_end(self, job):
         all_task_dirs = ""
         for task in job.job_task_list:
-            all_task_dirs += "%s " % task.task_work_path
+            all_task_dirs += f"{task.task_work_path} "
         job_tag_finished = job.job_hash + "_job_tag_finished"
         flag_if_job_task_fail = job.job_hash + "_flag_if_job_task_fail"
@@ -173,10 +173,8 @@ class DistributedShell(Machine):
             )
         )
-        cmd = "{{ nohup {} 1>{} 2>{} & }} && echo $!".format(
-            submit_command,
-            output_name,
-            output_name,
+        cmd = (
+            f"{{ nohup {submit_command} 1>{output_name} 2>{output_name} & }} && echo $!"
         )
         ret, stdout, stderr = run_cmd_with_all_output(cmd)

dpdispatcher/machines/fugaku.py CHANGED Viewed

@@ -20,15 +20,15 @@ class Fugaku(Machine):
     def gen_script_header(self, job):
         resources = job.resources
         fugaku_script_header_dict = {}
-        fugaku_script_header_dict[
-            "fugaku_node_number_line"
-        ] = f'#PJM -L "node={resources.number_node}" '
-        fugaku_script_header_dict[
-            "fugaku_ntasks_per_node_line"
-        ] = f'#PJM --mpi "max-proc-per-node={resources.cpu_per_node}"'
-        fugaku_script_header_dict[
-            "queue_name_line"
-        ] = f'#PJM -L "rscgrp={resources.queue_name}"'
+        fugaku_script_header_dict["fugaku_node_number_line"] = (
+            f'#PJM -L "node={resources.number_node}" '
+        )
+        fugaku_script_header_dict["fugaku_ntasks_per_node_line"] = (
+            f'#PJM --mpi "max-proc-per-node={resources.cpu_per_node}"'
+        )
+        fugaku_script_header_dict["queue_name_line"] = (
+            f'#PJM -L "rscgrp={resources.queue_name}"'
+        )
         if (
             resources["strategy"].get("customized_script_header_template_file")
             is not None

dpdispatcher/machines/lsf.py CHANGED Viewed

@@ -32,9 +32,7 @@ class LSF(Machine):
     def gen_script_header(self, job):
         resources = job.resources
         script_header_dict = {
-            "lsf_nodes_line": "#BSUB -n {number_cores}".format(
-                number_cores=resources.number_node * resources.cpu_per_node
-            ),
+            "lsf_nodes_line": f"#BSUB -n {resources.number_node * resources.cpu_per_node}",
             "lsf_ptile_line": f"#BSUB -R 'span[ptile={resources.cpu_per_node}]'",
             "lsf_partition_line": f"#BSUB -q {resources.queue_name}",
         }
@@ -123,7 +121,7 @@ class LSF(Machine):
             return JobStatus.unsubmitted
         ret, stdin, stdout, stderr = self.context.block_call("bjobs " + job_id)
         err_str = stderr.read().decode("utf-8")
-        if ("Job <%s> is not found" % job_id) in err_str:
+        if (f"Job <{job_id}> is not found") in err_str:
             if self.check_finish_tag(job):
                 return JobStatus.finished
             else:

dpdispatcher/machines/pbs.py CHANGED Viewed

@@ -21,13 +21,13 @@ class PBS(Machine):
     def gen_script_header(self, job):
         resources = job.resources
         pbs_script_header_dict = {}
-        pbs_script_header_dict[
-            "select_node_line"
-        ] = f"#PBS -l select={resources.number_node}:ncpus={resources.cpu_per_node}"
+        pbs_script_header_dict["select_node_line"] = (
+            f"#PBS -l select={resources.number_node}:ncpus={resources.cpu_per_node}"
+        )
         if resources.gpu_per_node != 0:
-            pbs_script_header_dict[
-                "select_node_line"
-            ] += f":ngpus={resources.gpu_per_node}"
+            pbs_script_header_dict["select_node_line"] += (
+                f":ngpus={resources.gpu_per_node}"
+            )
         pbs_script_header_dict["queue_name_line"] = f"#PBS -q {resources.queue_name}"
         if (
             resources["strategy"].get("customized_script_header_template_file")
@@ -156,12 +156,12 @@ class Torque(PBS):
         # ref: https://support.adaptivecomputing.com/wp-content/uploads/2021/02/torque/torque.htm#topics/torque/2-jobs/requestingRes.htm
         resources = job.resources
         pbs_script_header_dict = {}
-        pbs_script_header_dict[
-            "select_node_line"
-        ] = f"#PBS -l nodes={resources.number_node}:ppn={resources.cpu_per_node}"
+        pbs_script_header_dict["select_node_line"] = (
+            f"#PBS -l nodes={resources.number_node}:ppn={resources.cpu_per_node}"
+        )
         if resources.gpu_per_node != 0:
-            pbs_script_header_dict["select_node_line"] += ":gpus={gpu_per_node}".format(
-                gpu_per_node=resources.gpu_per_node
+            pbs_script_header_dict["select_node_line"] += (
+                f":gpus={resources.gpu_per_node}"
             )
         pbs_script_header_dict["queue_name_line"] = f"#PBS -q {resources.queue_name}"
         if (
@@ -212,9 +212,9 @@ class SGE(PBS):
         resources = job.resources
         sge_script_header_dict = {}
         # resources.number_node is not used
-        sge_script_header_dict[
-            "select_node_line"
-        ] = f"#$ -pe mpi {resources.cpu_per_node} "
+        sge_script_header_dict["select_node_line"] = (
+            f"#$ -pe mpi {resources.cpu_per_node} "
+        )
         # resources.queue_name is not necessary
         sge_script_header = sge_script_header_template.format(**sge_script_header_dict)
         return sge_script_header

dpdispatcher/machines/shell.py CHANGED Viewed

@@ -39,12 +39,7 @@ class Shell(Machine):
         script_run_file_name = f"{job.script_file_name}.run"
         self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
         ret, stdin, stdout, stderr = self.context.block_call(
-            "cd {} && {{ nohup bash {} 1>>{} 2>>{} & }} && echo $!".format(
-                shlex.quote(self.context.remote_root),
-                script_file_name,
-                output_name,
-                output_name,
-            )
+            f"cd {shlex.quote(self.context.remote_root)} && {{ nohup bash {script_file_name} 1>>{output_name} 2>>{output_name} & }} && echo $!"
         )
         if ret != 0:
             err_str = stderr.read().decode("utf-8")

dpdispatcher/machines/slurm.py CHANGED Viewed

@@ -39,23 +39,23 @@ class Slurm(Machine):
     def gen_script_header(self, job):
         resources = job.resources
         script_header_dict = {}
-        script_header_dict["slurm_nodes_line"] = "#SBATCH --nodes {number_node}".format(
-            number_node=resources.number_node
+        script_header_dict["slurm_nodes_line"] = (
+            f"#SBATCH --nodes {resources.number_node}"
+        )
+        script_header_dict["slurm_ntasks_per_node_line"] = (
+            f"#SBATCH --ntasks-per-node {resources.cpu_per_node}"
         )
-        script_header_dict[
-            "slurm_ntasks_per_node_line"
-        ] = f"#SBATCH --ntasks-per-node {resources.cpu_per_node}"
         custom_gpu_line = resources.kwargs.get("custom_gpu_line", None)
         if not custom_gpu_line:
-            script_header_dict[
-                "slurm_number_gpu_line"
-            ] = f"#SBATCH --gres=gpu:{resources.gpu_per_node}"
+            script_header_dict["slurm_number_gpu_line"] = (
+                f"#SBATCH --gres=gpu:{resources.gpu_per_node}"
+            )
         else:
             script_header_dict["slurm_number_gpu_line"] = custom_gpu_line
         if resources.queue_name != "":
-            script_header_dict[
-                "slurm_partition_line"
-            ] = f"#SBATCH --partition {resources.queue_name}"
+            script_header_dict["slurm_partition_line"] = (
+                f"#SBATCH --partition {resources.queue_name}"
+            )
         else:
             script_header_dict["slurm_partition_line"] = ""
         if (
@@ -254,7 +254,7 @@ class SlurmJobArray(Slurm):
                 ).as_posix()
                 if not self.context.check_file_exists(task_tag_finished):
                     job_array.add(ii // slurm_job_size)
-            return super().gen_script_header(job) + "\n#SBATCH --array=%s" % (
+            return super().gen_script_header(job) + "\n#SBATCH --array={}".format(
                 ",".join(map(str, job_array))
             )
         return super().gen_script_header(job) + "\n#SBATCH --array=0-%d" % (

dpdispatcher/run.py ADDED Viewed

@@ -0,0 +1,172 @@
+import os
+import re
+import sys
+from glob import glob
+from hashlib import sha1
+from dpdispatcher.machine import Machine
+from dpdispatcher.submission import Resources, Submission, Task
+if sys.version_info >= (3, 11):
+    import tomllib
+else:
+    import tomli as tomllib
+from typing import List, Optional
+from dargs import Argument
+from dpdispatcher.arginfo import machine_dargs, resources_dargs, task_dargs
+REGEX = r"(?m)^# /// (?P<type>[a-zA-Z0-9-]+)$\s(?P<content>(^#(| .*)$\s)+)^# ///$"
+def read_pep723(script: str) -> Optional[dict]:
+    """Read a PEP 723 script metadata from a script string.
+    Parameters
+    ----------
+    script : str
+        Script content.
+    Returns
+    -------
+    dict
+        PEP 723 metadata.
+    """
+    name = "script"
+    matches = list(
+        filter(lambda m: m.group("type") == name, re.finditer(REGEX, script))
+    )
+    if len(matches) > 1:
+        # TODO: Add tests for scenarios where multiple script blocks are found
+        raise ValueError(f"Multiple {name} blocks found")
+    elif len(matches) == 1:
+        content = "".join(
+            line[2:] if line.startswith("# ") else line[1:]
+            for line in matches[0].group("content").splitlines(keepends=True)
+        )
+        return tomllib.loads(content)
+    else:
+        # TODO: Add tests for scenarios where no metadata is found
+        return None
+def pep723_args() -> Argument:
+    """Return the argument parser for PEP 723 metadata."""
+    machine_args = machine_dargs()
+    machine_args.fold_subdoc = True
+    machine_args.doc = "Machine configuration. See related documentation for details."
+    resources_args = resources_dargs(detail_kwargs=False)
+    resources_args.fold_subdoc = True
+    resources_args.doc = (
+        "Resources configuration. See related documentation for details."
+    )
+    task_args = task_dargs()
+    command_arg = task_args["command"]
+    command_arg.doc = (
+        "Python interpreter or launcher. No need to contain the Python script filename."
+    )
+    command_arg.default = "python"
+    command_arg.optional = True
+    task_args["task_work_path"].doc += " Can be a glob pattern."
+    task_args.name = "task_list"
+    task_args.doc = "List of tasks to execute."
+    task_args.repeat = True
+    task_args.dtype = (list,)
+    return Argument(
+        "pep723",
+        dtype=dict,
+        doc="PEP 723 metadata",
+        sub_fields=[
+            Argument(
+                "work_base",
+                dtype=str,
+                optional=True,
+                default="./",
+                doc="Base directory for the work",
+            ),
+            Argument(
+                "forward_common_files",
+                dtype=List[str],
+                optional=True,
+                default=[],
+                doc="Common files to forward to the remote machine",
+            ),
+            Argument(
+                "backward_common_files",
+                dtype=List[str],
+                optional=True,
+                default=[],
+                doc="Common files to backward from the remote machine",
+            ),
+            machine_args,
+            resources_args,
+            task_args,
+        ],
+    )
+def create_submission(metadata: dict, hash: str) -> Submission:
+    """Create a Submission instance from a PEP 723 metadata.
+    Parameters
+    ----------
+    metadata : dict
+        PEP 723 metadata.
+    hash : str
+        Submission hash.
+    Returns
+    -------
+    Submission
+        Submission instance.
+    """
+    base = pep723_args()
+    metadata = base.normalize_value(metadata, trim_pattern="_*")
+    base.check_value(metadata, strict=False)
+    tasks = []
+    for task in metadata["task_list"]:
+        task = task.copy()
+        task["command"] += f" $REMOTE_ROOT/script_{hash}.py"
+        task_work_path = os.path.join(
+            metadata["machine"]["local_root"],
+            metadata["work_base"],
+            task["task_work_path"],
+        )
+        if os.path.isdir(task_work_path):
+            tasks.append(Task.load_from_dict(task))
+        elif glob(task_work_path):
+            for file in glob(task_work_path):
+                tasks.append(Task.load_from_dict({**task, "task_work_path": file}))
+        # TODO: Add tests for scenarios where the task work path is a glob pattern
+        else:
+            # TODO: Add tests for scenarios where the task work path is not found
+            raise FileNotFoundError(f"Task work path {task_work_path} not found.")
+    return Submission(
+        work_base=metadata["work_base"],
+        forward_common_files=metadata["forward_common_files"],
+        backward_common_files=metadata["backward_common_files"],
+        machine=Machine.load_from_dict(metadata["machine"]),
+        resources=Resources.load_from_dict(metadata["resources"]),
+        task_list=tasks,
+    )
+def run_pep723(script: str):
+    """Run a PEP 723 script.
+    Parameters
+    ----------
+    script : str
+        Script content.
+    """
+    metadata = read_pep723(script)
+    if metadata is None:
+        raise ValueError("No PEP 723 metadata found.")
+    dpdispatcher_metadata = metadata["tool"]["dpdispatcher"]
+    script_hash = sha1(script.encode("utf-8")).hexdigest()
+    submission = create_submission(dpdispatcher_metadata, script_hash)
+    submission.machine.context.write_file(f"script_{script_hash}.py", script)
+    # write script
+    submission.run_submission()

dpdispatcher/submission.py CHANGED Viewed

@@ -863,9 +863,7 @@ class Job:
             self.submit_job()
             if self.job_state != JobStatus.unsubmitted:
                 dlog.info(
-                    "job:{job_hash} re-submit after terminated; new job_id is {job_id}".format(
-                        job_hash=self.job_hash, job_id=self.job_id
-                    )
+                    f"job:{self.job_hash} re-submit after terminated; new job_id is {self.job_id}"
                 )
                 time.sleep(0.2)
                 self.get_job_state()

dpdispatcher/utils/hdfs_cli.py CHANGED Viewed

@@ -88,10 +88,8 @@ class HDFS:
                 return True, out
             else:
                 raise RuntimeError(
-                    "Cannot copy local[{}] to remote[{}] with cmd[{}]; "
-                    "ret[{}] output[{}] stderr[{}]".format(
-                        local_path, to_uri, cmd, ret, out, err
-                    )
+                    f"Cannot copy local[{local_path}] to remote[{to_uri}] with cmd[{cmd}]; "
+                    f"ret[{ret}] output[{out}] stderr[{err}]"
                 )
         except Exception as e:
             raise RuntimeError(
@@ -113,10 +111,8 @@ class HDFS:
                 return True
             else:
                 raise RuntimeError(
-                    "Cannot copy remote[{}] to local[{}] with cmd[{}]; "
-                    "ret[{}] output[{}] stderr[{}]".format(
-                        from_uri, local_path, cmd, ret, out, err
-                    )
+                    f"Cannot copy remote[{from_uri}] to local[{local_path}] with cmd[{cmd}]; "
+                    f"ret[{ret}] output[{out}] stderr[{err}]"
                 )
         except Exception as e:
             raise RuntimeError(

{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dpdispatcher
-Version: 0.6.4
+Version: 0.6.5
 Summary: Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish
 Author: DeepModeling
 License: GNU LESSER GENERAL PUBLIC LICENSE
@@ -172,7 +172,7 @@ License: GNU LESSER GENERAL PUBLIC LICENSE
 Project-URL: Homepage, https://github.com/deepmodeling/dpdispatcher
 Project-URL: documentation, https://docs.deepmodeling.com/projects/dpdispatcher
 Project-URL: repository, https://github.com/deepmodeling/dpdispatcher
-Keywords: dispatcher,hpc,slurm,lsf,pbs,ssh
+Keywords: dispatcher,hpc,slurm,lsf,pbs,ssh,jh_unischeduler
 Classifier: Programming Language :: Python :: 3.7
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
@@ -190,6 +190,7 @@ Requires-Dist: dargs >=0.4.1
 Requires-Dist: requests
 Requires-Dist: tqdm >=4.9.0
 Requires-Dist: pyyaml
+Requires-Dist: tomli >=1.1.0 ; python_version < "3.11"
 Requires-Dist: typing-extensions ; python_version < "3.7"
 Provides-Extra: bohrium
 Requires-Dist: oss2 ; extra == 'bohrium'
@@ -250,4 +251,4 @@ See [Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓
 ## References
-DPDispatcher is derivated from the [DP-GEN](https://github.com/deepmodeling/dpgen) package. To mention DPDispatcher in a scholarly publication, please read Section 3.3 in the [DP-GEN paper](https://doi.org/10.1016/j.cpc.2020.107206).
+DPDispatcher is derived from the [DP-GEN](https://github.com/deepmodeling/dpgen) package. To mention DPDispatcher in a scholarly publication, please read Section 3.3 in the [DP-GEN paper](https://doi.org/10.1016/j.cpc.2020.107206).

{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/RECORD RENAMED Viewed

@@ -1,35 +1,38 @@
 dpdispatcher/__init__.py,sha256=CLZP_N5CTp14ujWCykEHuJjoIfKR6CwrclXhjWUgNoE,517
 dpdispatcher/__main__.py,sha256=BFhG-mSBzVZUEezQJqXWZnt2WsnhAHT_zpT8Y6gpOz0,116
-dpdispatcher/_version.py,sha256=YeKWh9qHxBSK-fURNyCejbICk3LuDCjfwlZrUuEgWgM,411
+dpdispatcher/_version.py,sha256=PuC6q1U5hHaOMp2tDNeTKt6ExeuO2V9ihjqjMYIsVUo,411
 dpdispatcher/arginfo.py,sha256=pNaxYIE6ahBidpR7OCKZdw8iGt003uTXGSlVzwiuvRg,188
 dpdispatcher/base_context.py,sha256=NvaC_RHyspxq412z-eCq4Zn8-szZxvn8K6OkXvx7l4Y,3615
-dpdispatcher/dlog.py,sha256=ndh12teQBbJRybXd8UjEmAi6QTsAXajRicDj5mAH5h0,799
-dpdispatcher/dpdisp.py,sha256=p-f_KCJxALBqP4StpNK2X_-VkGZ0L43MhROxhCpFwE4,3712
-dpdispatcher/machine.py,sha256=XFRH41gNCex_qs9gbg-S88_qab3_UAGfxKWUPxoipCM,16140
-dpdispatcher/submission.py,sha256=mVAHBlT0a3_1PtsEvvhvwNPkAhgLiBXXemX64BcwizU,48447
-dpdispatcher/contexts/__init__.py,sha256=s5M0ZJSrPttSyLdBwKD2m3W7a5AbYZdPB7IAND2j7EY,335
+dpdispatcher/dlog.py,sha256=QJKAwB6gV3Zb6zQUL9dZ_uIoTIEy9Z7ecmVQ-8WNmD8,1081
+dpdispatcher/dpdisp.py,sha256=jhuTmwPY7KBF4WukaQomEwZcfYoISaMbKwuxdDGSluc,4206
+dpdispatcher/machine.py,sha256=z5D0eLAPfdo5SZdO6NLvWBUUePE0VHRMWurRMzEV0U0,16138
+dpdispatcher/run.py,sha256=tFHbJAioXXpgHTE5bhRRAuc8w7cX1ET9SBbiAg3Rw-I,5382
+dpdispatcher/submission.py,sha256=0_PCpRyiUwCHwYAzdXs-3rzq8YzZs0VZBU6tS7SixG0,48361
+dpdispatcher/contexts/__init__.py,sha256=jlvcIppmUnS39yBlkZEDvIQFV-j_BR75ZTbZALF_RB0,336
 dpdispatcher/contexts/dp_cloud_server_context.py,sha256=6XK0B2sLGEDeZmV2SZzQdVrMcWAWYZVLLK-IaShEXIY,12245
-dpdispatcher/contexts/hdfs_context.py,sha256=GbV_o3i0NL43B7dCPnArXS5DPkkET4EAiHw1VgsMcdE,9000
+dpdispatcher/contexts/hdfs_context.py,sha256=B6pjGUD8Xaa0G_Zrnoci2DZnEXxojE9fAcexMMvAZCM,8930
 dpdispatcher/contexts/lazy_local_context.py,sha256=F8abWAJRY1Ewx1sErINKN1ltWerXzeCcJgjTvLvucKE,5696
-dpdispatcher/contexts/local_context.py,sha256=vhZtdtduPokw6hU0YbaWNuoCOO6Tio2w99Fi8AegIw0,14052
+dpdispatcher/contexts/local_context.py,sha256=AsIfOT24FV0_bNlD2xU-pqAJy-XHZ6XTsbll4Vt6bMM,14065
 dpdispatcher/contexts/openapi_context.py,sha256=DXaMS10SXN3VKEeEdzQyfOgRwUyHRJVCJHd2fKKdsmA,9499
-dpdispatcher/contexts/ssh_context.py,sha256=zhBM_qH4zGMws7Yww5txNLhkK7b3maiFEMoewuAJou0,38612
+dpdispatcher/contexts/ssh_context.py,sha256=baMiD_1KlrksqNKCkpx7apovLW_qdfU9U1KRDNTjCz0,38578
 dpdispatcher/dpcloudserver/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dpdispatcher/dpcloudserver/client.py,sha256=k1niKjG6zFnMtHn_UuCjYoOcMju3o3PV-GdyVLr5-KM,165
 dpdispatcher/entrypoints/__init__.py,sha256=exKSFT3j2oCerGwtI8WbHQK-D0K-CyifocRji1xntT4,20
 dpdispatcher/entrypoints/gui.py,sha256=29lMXqbmSRbLj4rfBv7Jnw89NLU9syTB88IUP6IRJsU,830
+dpdispatcher/entrypoints/run.py,sha256=tRkHfeAktV6gF31yb2MVOSTlpNGZFw3N0jHBmM1YfIg,175
 dpdispatcher/entrypoints/submission.py,sha256=ikVwIZAQL0SsYO5xaMIdKXgO6qtc05w1vqmvtG7Nk5M,3401
-dpdispatcher/machines/__init__.py,sha256=9kSYkz2w3flp00IrHWTEwvoFGrathQAT3tvbieye83c,335
-dpdispatcher/machines/distributed_shell.py,sha256=7avNcoOzEj7UcJuKl6b1ka2bj5dixcJaMlZK-I-i_Tc,7571
+dpdispatcher/machines/JH_UniScheduler.py,sha256=f7Vs9_m4Th1GVSgsJTy9_nMAY8g9n0ZewnPY2DFECfI,5795
+dpdispatcher/machines/__init__.py,sha256=tOQuPUlW1Ab4qcC0oSAIyDjZA_WyE67h_EIxPCWGhys,336
+dpdispatcher/machines/distributed_shell.py,sha256=LvWl6ktPlgmJ7rk90VWxp4douve8hYmuRf-B0saFBds,7534
 dpdispatcher/machines/dp_cloud_server.py,sha256=SR69gsFb2BvOQCW1QnWfP3cQvu_qHLJNsycp5wzosJU,11706
-dpdispatcher/machines/fugaku.py,sha256=9OP3qSaaruqypHAdcuBFQM_MUtFp3yrvhZ5bPyLwEEk,4308
-dpdispatcher/machines/lsf.py,sha256=Qruot39cPEpBNbbPmDwb1Gyfgyw3N36O0hs9PNEXyVU,7997
+dpdispatcher/machines/fugaku.py,sha256=oY2hD2ldL2dztwtJ9WNisdsfPnaX-5yTRXewIT9r60I,4314
+dpdispatcher/machines/lsf.py,sha256=Q6IE4nCkNEKcW0AdBTKPOYgmCJAeXWmUVxZ9sQFkxos,7932
 dpdispatcher/machines/openapi.py,sha256=Gzzbo8YOAybXGTrgMutexErcaEi3ts7uTUNvOhThFS8,8858
-dpdispatcher/machines/pbs.py,sha256=YK0rKIsd8GAZYfxGLezA9RdCQ6AOeuPML_v5OwmioTo,10331
-dpdispatcher/machines/shell.py,sha256=qaia7mC_fz5Bqyelxmc1je-xg7NQ_6vQQ0qAjg2m4RQ,4796
-dpdispatcher/machines/slurm.py,sha256=SP5rQiCPWzq4rqgUgp0IGJXXD_1DURWl4OBRAJ-Kng4,15611
+dpdispatcher/machines/pbs.py,sha256=KjJcLpQr748ZgOwFfWmJ_LG1q6Jm1UF24YCSLiDfcac,10308
+dpdispatcher/machines/shell.py,sha256=DnqMNb2nmBc3gVx8tA8oiUWdnWHKJwpIPs660i3Eq7A,4703
+dpdispatcher/machines/slurm.py,sha256=YM2Mv55jAFtDIiJoJLkD6p1Wi1ujjH6t4WlU8EtlbCw,15592
 dpdispatcher/utils/__init__.py,sha256=fwvwkMf7DFNQkNBiIce8Y8gRA6FhICwKjkKiXu_BEJg,13
-dpdispatcher/utils/hdfs_cli.py,sha256=Fy36JTrfdhuxGbaHe1hYY0KrlNp06Tbjwo5wpj4ph-8,5434
+dpdispatcher/utils/hdfs_cli.py,sha256=n3EIfFIralsISlaEewawD35f0P8mabo-u8D8UW3k_7Y,5308
 dpdispatcher/utils/job_status.py,sha256=Eszs4TPLfszCuf6zLaFonf25feXDUguF28spYOjJpQE,233
 dpdispatcher/utils/record.py,sha256=c8jdPmCuLzRmFo_jOjR0j9zFR1EWX3NSHVuPEIYCycg,2147
 dpdispatcher/utils/utils.py,sha256=1One9eW-v3ejDcL6PB9PSCMZQkalnbxq0DfJoUwQaLs,5334
@@ -38,9 +41,9 @@ dpdispatcher/utils/dpcloudserver/client.py,sha256=CLfXswvzI4inDrW2bYkfMQ6gQJFcZO
 dpdispatcher/utils/dpcloudserver/config.py,sha256=NteQzf1OeEkz2UbkXHHQ0B72cUu23zLVzpM9Yh4v1Cc,559
 dpdispatcher/utils/dpcloudserver/retcode.py,sha256=1qAF8gFZx55u2sO8KbtYSIIrjcO-IGufEUlwbkSfC1g,721
 dpdispatcher/utils/dpcloudserver/zip_file.py,sha256=f9WrlktwHW0YipaWg5Y0kxjMZlhD1cJYa6EUpvu4Cro,2611
-dpdispatcher-0.6.4.dist-info/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
-dpdispatcher-0.6.4.dist-info/METADATA,sha256=9NoY3DjM_USxl-2nBiNNLMCzJ6DZadgEMS1e11Fj72c,12752
-dpdispatcher-0.6.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-dpdispatcher-0.6.4.dist-info/entry_points.txt,sha256=NRHUV0IU_u7_XtcmmEDnVzAcUmurhiEAGwENckrajo4,233
-dpdispatcher-0.6.4.dist-info/top_level.txt,sha256=35jAQoXY-b-e9fJ1_mxhZUiaCoJNt1ZI7mpFRf07Qjs,13
-dpdispatcher-0.6.4.dist-info/RECORD,,
+dpdispatcher-0.6.5.dist-info/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
+dpdispatcher-0.6.5.dist-info/METADATA,sha256=eLIZlw1J6l08VjltNG2O3Z7kWK_TNVJR08aaGlfwESc,12821
+dpdispatcher-0.6.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+dpdispatcher-0.6.5.dist-info/entry_points.txt,sha256=NRHUV0IU_u7_XtcmmEDnVzAcUmurhiEAGwENckrajo4,233
+dpdispatcher-0.6.5.dist-info/top_level.txt,sha256=35jAQoXY-b-e9fJ1_mxhZUiaCoJNt1ZI7mpFRf07Qjs,13
+dpdispatcher-0.6.5.dist-info/RECORD,,

{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.42.0)
+Generator: bdist_wheel (0.43.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

dpdispatcher 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl

Potentially problematic release.

dpdispatcher 0.6.4py3-none-any.whl → 0.6.5py3-none-any.whl