dpdispatcher 0.6.4__py3-none-any.whl → 0.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dpdispatcher might be problematic. Click here for more details.
- dpdispatcher/_version.py +2 -2
- dpdispatcher/contexts/__init__.py +1 -0
- dpdispatcher/contexts/hdfs_context.py +3 -5
- dpdispatcher/contexts/local_context.py +6 -6
- dpdispatcher/contexts/ssh_context.py +9 -10
- dpdispatcher/dlog.py +9 -5
- dpdispatcher/dpdisp.py +15 -0
- dpdispatcher/entrypoints/run.py +9 -0
- dpdispatcher/machine.py +2 -2
- dpdispatcher/machines/JH_UniScheduler.py +175 -0
- dpdispatcher/machines/__init__.py +1 -0
- dpdispatcher/machines/distributed_shell.py +4 -6
- dpdispatcher/machines/fugaku.py +9 -9
- dpdispatcher/machines/lsf.py +2 -4
- dpdispatcher/machines/pbs.py +14 -14
- dpdispatcher/machines/shell.py +1 -6
- dpdispatcher/machines/slurm.py +12 -12
- dpdispatcher/run.py +172 -0
- dpdispatcher/submission.py +1 -3
- dpdispatcher/utils/hdfs_cli.py +4 -8
- {dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/METADATA +4 -3
- {dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/RECORD +26 -23
- {dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/WHEEL +1 -1
- {dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/LICENSE +0 -0
- {dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/entry_points.txt +0 -0
- {dpdispatcher-0.6.4.dist-info → dpdispatcher-0.6.5.dist-info}/top_level.txt +0 -0
dpdispatcher/_version.py
CHANGED
|
@@ -138,7 +138,7 @@ class HDFSContext(BaseContext):
|
|
|
138
138
|
shutil.rmtree(gz_dir, ignore_errors=True)
|
|
139
139
|
os.mkdir(os.path.join(self.local_root, "tmp"))
|
|
140
140
|
rfile_tgz = f"{self.remote_root}/{submission.submission_hash}_*_download.tar.gz"
|
|
141
|
-
lfile_tgz = "
|
|
141
|
+
lfile_tgz = f"{self.local_root}/tmp/"
|
|
142
142
|
HDFS.copy_to_local(rfile_tgz, lfile_tgz)
|
|
143
143
|
|
|
144
144
|
tgz_file_list = glob(os.path.join(self.local_root, "tmp/*_download.tar.gz"))
|
|
@@ -164,7 +164,7 @@ class HDFSContext(BaseContext):
|
|
|
164
164
|
os.path.join(
|
|
165
165
|
self.local_root,
|
|
166
166
|
task.task_work_path,
|
|
167
|
-
"tag_failure_download_
|
|
167
|
+
f"tag_failure_download_{jj}",
|
|
168
168
|
),
|
|
169
169
|
"w",
|
|
170
170
|
) as fp:
|
|
@@ -198,9 +198,7 @@ class HDFSContext(BaseContext):
|
|
|
198
198
|
if check_exists:
|
|
199
199
|
if mark_failure:
|
|
200
200
|
with open(
|
|
201
|
-
os.path.join(
|
|
202
|
-
self.local_root, "tag_failure_download_%s" % jj
|
|
203
|
-
),
|
|
201
|
+
os.path.join(self.local_root, f"tag_failure_download_{jj}"),
|
|
204
202
|
"w",
|
|
205
203
|
) as fp:
|
|
206
204
|
pass
|
|
@@ -153,7 +153,7 @@ class LocalContext(BaseContext):
|
|
|
153
153
|
tag_file_path = os.path.join(
|
|
154
154
|
self.local_root,
|
|
155
155
|
ii.task_work_path,
|
|
156
|
-
"tag_failure_download_
|
|
156
|
+
f"tag_failure_download_{kk}",
|
|
157
157
|
)
|
|
158
158
|
with open(tag_file_path, "w") as fp:
|
|
159
159
|
pass
|
|
@@ -181,7 +181,7 @@ class LocalContext(BaseContext):
|
|
|
181
181
|
tag_file_path = os.path.join(
|
|
182
182
|
self.local_root,
|
|
183
183
|
ii.task_work_path,
|
|
184
|
-
"tag_failure_download_
|
|
184
|
+
f"tag_failure_download_{jj}",
|
|
185
185
|
)
|
|
186
186
|
with open(tag_file_path, "w") as fp:
|
|
187
187
|
pass
|
|
@@ -227,7 +227,7 @@ class LocalContext(BaseContext):
|
|
|
227
227
|
if check_exists:
|
|
228
228
|
if mark_failure:
|
|
229
229
|
tag_file_path = os.path.join(
|
|
230
|
-
self.local_root, "tag_failure_download_
|
|
230
|
+
self.local_root, f"tag_failure_download_{kk}"
|
|
231
231
|
)
|
|
232
232
|
with open(tag_file_path, "w") as fp:
|
|
233
233
|
pass
|
|
@@ -252,7 +252,7 @@ class LocalContext(BaseContext):
|
|
|
252
252
|
if mark_failure:
|
|
253
253
|
with open(
|
|
254
254
|
os.path.join(
|
|
255
|
-
self.local_root, "tag_failure_download_
|
|
255
|
+
self.local_root, f"tag_failure_download_{jj}"
|
|
256
256
|
),
|
|
257
257
|
"w",
|
|
258
258
|
) as fp:
|
|
@@ -298,8 +298,8 @@ class LocalContext(BaseContext):
|
|
|
298
298
|
code = proc.returncode
|
|
299
299
|
if code != 0:
|
|
300
300
|
raise RuntimeError(
|
|
301
|
-
"Get error code
|
|
302
|
-
|
|
301
|
+
f"Get error code {code} in locally calling {cmd} with job: {self.submission.submission_hash}"
|
|
302
|
+
f"\nStandard error: {stderr}"
|
|
303
303
|
)
|
|
304
304
|
return None, stdout, stderr
|
|
305
305
|
|
|
@@ -300,7 +300,7 @@ class SSHSession:
|
|
|
300
300
|
# retry for up to 3 times
|
|
301
301
|
# ensure alive
|
|
302
302
|
self.ensure_alive()
|
|
303
|
-
raise RetrySignal("SSH session not active in calling
|
|
303
|
+
raise RetrySignal(f"SSH session not active in calling {cmd}") from e
|
|
304
304
|
|
|
305
305
|
@property
|
|
306
306
|
def sftp(self):
|
|
@@ -628,8 +628,7 @@ class SSHContext(BaseContext):
|
|
|
628
628
|
# check sha256
|
|
629
629
|
# `:` means pass: https://stackoverflow.com/a/2421592/9567349
|
|
630
630
|
_, stdout, _ = self.block_checkcall(
|
|
631
|
-
"sha256sum -c
|
|
632
|
-
% shlex.quote(sha256_file)
|
|
631
|
+
f"sha256sum -c {shlex.quote(sha256_file)} --quiet >.sha256sum_stdout 2>/dev/null || :"
|
|
633
632
|
)
|
|
634
633
|
self.sftp.remove(sha256_file)
|
|
635
634
|
# regenerate file list
|
|
@@ -708,7 +707,7 @@ class SSHContext(BaseContext):
|
|
|
708
707
|
os.path.join(
|
|
709
708
|
self.local_root,
|
|
710
709
|
ii.task_work_path,
|
|
711
|
-
"tag_failure_download_
|
|
710
|
+
f"tag_failure_download_{jj}",
|
|
712
711
|
),
|
|
713
712
|
"w",
|
|
714
713
|
) as fp:
|
|
@@ -758,9 +757,9 @@ class SSHContext(BaseContext):
|
|
|
758
757
|
assert self.remote_root is not None
|
|
759
758
|
self.ssh_session.ensure_alive()
|
|
760
759
|
if asynchronously:
|
|
761
|
-
cmd = "nohup
|
|
760
|
+
cmd = f"nohup {cmd} >/dev/null &"
|
|
762
761
|
stdin, stdout, stderr = self.ssh_session.exec_command(
|
|
763
|
-
("cd
|
|
762
|
+
(f"cd {shlex.quote(self.remote_root)} ;") + cmd
|
|
764
763
|
)
|
|
765
764
|
exit_status = stdout.channel.recv_exit_status()
|
|
766
765
|
if exit_status != 0:
|
|
@@ -779,7 +778,7 @@ class SSHContext(BaseContext):
|
|
|
779
778
|
assert self.remote_root is not None
|
|
780
779
|
self.ssh_session.ensure_alive()
|
|
781
780
|
stdin, stdout, stderr = self.ssh_session.exec_command(
|
|
782
|
-
("cd
|
|
781
|
+
(f"cd {shlex.quote(self.remote_root)} ;") + cmd
|
|
783
782
|
)
|
|
784
783
|
exit_status = stdout.channel.recv_exit_status()
|
|
785
784
|
return exit_status, stdin, stdout, stderr
|
|
@@ -846,12 +845,12 @@ class SSHContext(BaseContext):
|
|
|
846
845
|
# Thus, it's better to use system's `rm` to remove a directory, which may
|
|
847
846
|
# save a lot of time.
|
|
848
847
|
if verbose:
|
|
849
|
-
dlog.info("removing
|
|
848
|
+
dlog.info(f"removing {remotepath}")
|
|
850
849
|
# In some supercomputers, it's very slow to remove large numbers of files
|
|
851
850
|
# (e.g. directory containing trajectory) due to bad I/O performance.
|
|
852
851
|
# So an asynchronously option is provided.
|
|
853
852
|
self.block_checkcall(
|
|
854
|
-
"rm -rf
|
|
853
|
+
f"rm -rf {shlex.quote(remotepath)}",
|
|
855
854
|
asynchronously=self.clean_asynchronously,
|
|
856
855
|
)
|
|
857
856
|
|
|
@@ -921,7 +920,7 @@ class SSHContext(BaseContext):
|
|
|
921
920
|
f"from {from_f} to {self.ssh_session.username} @ {self.ssh_session.hostname} : {to_f} Error!"
|
|
922
921
|
)
|
|
923
922
|
# remote extract
|
|
924
|
-
self.block_checkcall("tar xf
|
|
923
|
+
self.block_checkcall(f"tar xf {of}")
|
|
925
924
|
# clean up
|
|
926
925
|
os.remove(from_f)
|
|
927
926
|
self.sftp.remove(to_f)
|
dpdispatcher/dlog.py
CHANGED
|
@@ -6,21 +6,25 @@ import warnings
|
|
|
6
6
|
dlog = logging.getLogger("dpdispatcher")
|
|
7
7
|
dlog.propagate = False
|
|
8
8
|
dlog.setLevel(logging.INFO)
|
|
9
|
+
cwd_logfile_path = os.path.join(os.getcwd(), "dpdispatcher.log")
|
|
10
|
+
dlogf = logging.FileHandler(cwd_logfile_path, delay=True)
|
|
9
11
|
try:
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
)
|
|
12
|
+
dlog.addHandler(dlogf)
|
|
13
|
+
dlog.info(f"LOG INIT:dpdispatcher log direct to {cwd_logfile_path}")
|
|
13
14
|
except PermissionError:
|
|
15
|
+
dlog.removeHandler(dlogf)
|
|
14
16
|
warnings.warn(
|
|
15
|
-
"dpdispatcher.log meet permission error. redirect the log to ~/dpdispatcher.log"
|
|
17
|
+
f"dump logfile dpdispatcher.log to {cwd_logfile_path} meet permission error. redirect the log to ~/dpdispatcher.log"
|
|
16
18
|
)
|
|
17
19
|
dlogf = logging.FileHandler(
|
|
18
20
|
os.path.join(os.path.expanduser("~"), "dpdispatcher.log"), delay=True
|
|
19
21
|
)
|
|
22
|
+
dlog.addHandler(dlogf)
|
|
23
|
+
dlog.info("LOG INIT:dpdispatcher log init at ~/dpdispatcher.log")
|
|
20
24
|
|
|
21
25
|
dlogf_formatter = logging.Formatter("%(asctime)s - %(levelname)s : %(message)s")
|
|
22
26
|
dlogf.setFormatter(dlogf_formatter)
|
|
23
|
-
dlog.addHandler(dlogf)
|
|
27
|
+
# dlog.addHandler(dlogf)
|
|
24
28
|
|
|
25
29
|
dlog_stdout = logging.StreamHandler(sys.stdout)
|
|
26
30
|
dlog_stdout.setFormatter(dlogf_formatter)
|
dpdispatcher/dpdisp.py
CHANGED
|
@@ -3,6 +3,7 @@ import argparse
|
|
|
3
3
|
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from dpdispatcher.entrypoints.gui import start_dpgui
|
|
6
|
+
from dpdispatcher.entrypoints.run import run
|
|
6
7
|
from dpdispatcher.entrypoints.submission import handle_submission
|
|
7
8
|
|
|
8
9
|
|
|
@@ -81,6 +82,18 @@ def main_parser() -> argparse.ArgumentParser:
|
|
|
81
82
|
"to the network on both IPv4 and IPv6 (where available)."
|
|
82
83
|
),
|
|
83
84
|
)
|
|
85
|
+
##########################################
|
|
86
|
+
# run
|
|
87
|
+
parser_run = subparsers.add_parser(
|
|
88
|
+
"run",
|
|
89
|
+
help="Run a Python script.",
|
|
90
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
91
|
+
)
|
|
92
|
+
parser_run.add_argument(
|
|
93
|
+
"filename",
|
|
94
|
+
type=str,
|
|
95
|
+
help="Python script to run. PEP 723 metadata should be contained in this file.",
|
|
96
|
+
)
|
|
84
97
|
return parser
|
|
85
98
|
|
|
86
99
|
|
|
@@ -117,6 +130,8 @@ def main():
|
|
|
117
130
|
port=args.port,
|
|
118
131
|
bind_all=args.bind_all,
|
|
119
132
|
)
|
|
133
|
+
elif args.command == "run":
|
|
134
|
+
run(filename=args.filename)
|
|
120
135
|
elif args.command is None:
|
|
121
136
|
pass
|
|
122
137
|
else:
|
dpdispatcher/machine.py
CHANGED
|
@@ -261,7 +261,7 @@ class Machine(metaclass=ABCMeta):
|
|
|
261
261
|
|
|
262
262
|
source_list = job.resources.source_list
|
|
263
263
|
for ii in source_list:
|
|
264
|
-
line = "{ source
|
|
264
|
+
line = f"{{ source {ii}; }} \n"
|
|
265
265
|
source_files_part += line
|
|
266
266
|
|
|
267
267
|
export_envs_part = ""
|
|
@@ -466,7 +466,7 @@ class Machine(metaclass=ABCMeta):
|
|
|
466
466
|
job : Job
|
|
467
467
|
job
|
|
468
468
|
"""
|
|
469
|
-
dlog.warning("Job
|
|
469
|
+
dlog.warning(f"Job {job.job_id} should be manually killed")
|
|
470
470
|
|
|
471
471
|
def get_exit_code(self, job):
|
|
472
472
|
"""Get exit code of the job.
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import shlex
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from dargs import Argument
|
|
5
|
+
|
|
6
|
+
from dpdispatcher.dlog import dlog
|
|
7
|
+
from dpdispatcher.machine import Machine
|
|
8
|
+
from dpdispatcher.utils.job_status import JobStatus
|
|
9
|
+
from dpdispatcher.utils.utils import (
|
|
10
|
+
RetrySignal,
|
|
11
|
+
customized_script_header_template,
|
|
12
|
+
retry,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
JH_UniScheduler_script_header_template = """\
|
|
16
|
+
#!/bin/bash -l
|
|
17
|
+
#JSUB -e %J.err
|
|
18
|
+
#JSUB -o %J.out
|
|
19
|
+
{JH_UniScheduler_nodes_line}
|
|
20
|
+
{JH_UniScheduler_ptile_line}
|
|
21
|
+
{JH_UniScheduler_partition_line}
|
|
22
|
+
{JH_UniScheduler_number_gpu_line}"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class JH_UniScheduler(Machine):
|
|
26
|
+
"""JH_UniScheduler batch."""
|
|
27
|
+
|
|
28
|
+
def gen_script(self, job):
|
|
29
|
+
JH_UniScheduler_script = super().gen_script(job)
|
|
30
|
+
return JH_UniScheduler_script
|
|
31
|
+
|
|
32
|
+
def gen_script_header(self, job):
|
|
33
|
+
resources = job.resources
|
|
34
|
+
script_header_dict = {
|
|
35
|
+
"JH_UniScheduler_nodes_line": f"#JSUB -n {resources.number_node * resources.cpu_per_node}",
|
|
36
|
+
"JH_UniScheduler_ptile_line": f"#JSUB -R 'span[ptile={resources.cpu_per_node}]'",
|
|
37
|
+
"JH_UniScheduler_partition_line": f"#JSUB -q {resources.queue_name}",
|
|
38
|
+
}
|
|
39
|
+
custom_gpu_line = resources.kwargs.get("custom_gpu_line", None)
|
|
40
|
+
if not custom_gpu_line:
|
|
41
|
+
script_header_dict["JH_UniScheduler_number_gpu_line"] = (
|
|
42
|
+
"" f"#JSUB -gpgpu {resources.gpu_per_node}"
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
script_header_dict["JH_UniScheduler_number_gpu_line"] = custom_gpu_line
|
|
46
|
+
if (
|
|
47
|
+
resources["strategy"].get("customized_script_header_template_file")
|
|
48
|
+
is not None
|
|
49
|
+
):
|
|
50
|
+
JH_UniScheduler_script_header = customized_script_header_template(
|
|
51
|
+
resources["strategy"]["customized_script_header_template_file"],
|
|
52
|
+
resources,
|
|
53
|
+
)
|
|
54
|
+
else:
|
|
55
|
+
JH_UniScheduler_script_header = (
|
|
56
|
+
JH_UniScheduler_script_header_template.format(**script_header_dict)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return JH_UniScheduler_script_header
|
|
60
|
+
|
|
61
|
+
@retry()
|
|
62
|
+
def do_submit(self, job):
|
|
63
|
+
script_file_name = job.script_file_name
|
|
64
|
+
script_str = self.gen_script(job)
|
|
65
|
+
job_id_name = job.job_hash + "_job_id"
|
|
66
|
+
self.context.write_file(fname=script_file_name, write_str=script_str)
|
|
67
|
+
script_run_str = self.gen_script_command(job)
|
|
68
|
+
script_run_file_name = f"{job.script_file_name}.run"
|
|
69
|
+
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
stdin, stdout, stderr = self.context.block_checkcall(
|
|
73
|
+
"cd {} && {} {}".format(
|
|
74
|
+
shlex.quote(self.context.remote_root),
|
|
75
|
+
"jsub < ",
|
|
76
|
+
shlex.quote(script_file_name),
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
except RuntimeError as err:
|
|
80
|
+
raise RetrySignal(err) from err
|
|
81
|
+
|
|
82
|
+
subret = stdout.readlines()
|
|
83
|
+
job_id = subret[0].split()[1][1:-1]
|
|
84
|
+
self.context.write_file(job_id_name, job_id)
|
|
85
|
+
return job_id
|
|
86
|
+
|
|
87
|
+
def default_resources(self, resources):
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
@retry()
|
|
91
|
+
def check_status(self, job):
|
|
92
|
+
try:
|
|
93
|
+
job_id = job.job_id
|
|
94
|
+
except AttributeError:
|
|
95
|
+
return JobStatus.terminated
|
|
96
|
+
if job_id == "":
|
|
97
|
+
return JobStatus.unsubmitted
|
|
98
|
+
ret, stdin, stdout, stderr = self.context.block_call("jjobs " + job_id)
|
|
99
|
+
err_str = stderr.read().decode("utf-8")
|
|
100
|
+
if (f"Job <{job_id}> is not found") in err_str:
|
|
101
|
+
if self.check_finish_tag(job):
|
|
102
|
+
return JobStatus.finished
|
|
103
|
+
else:
|
|
104
|
+
return JobStatus.terminated
|
|
105
|
+
elif ret != 0:
|
|
106
|
+
# just retry when any unknown error raised.
|
|
107
|
+
raise RetrySignal(
|
|
108
|
+
"Get error code %d in checking status through ssh with job: %s . message: %s"
|
|
109
|
+
% (ret, job.job_hash, err_str)
|
|
110
|
+
)
|
|
111
|
+
status_out = stdout.read().decode("utf-8").split("\n")
|
|
112
|
+
if len(status_out) < 2:
|
|
113
|
+
return JobStatus.unknown
|
|
114
|
+
else:
|
|
115
|
+
status_line = status_out[1]
|
|
116
|
+
status_word = status_line.split()[2]
|
|
117
|
+
|
|
118
|
+
if status_word in ["PEND"]:
|
|
119
|
+
return JobStatus.waiting
|
|
120
|
+
elif status_word in ["RUN", "PSUSP", "SSUSP", "USUSP"]:
|
|
121
|
+
return JobStatus.running
|
|
122
|
+
elif status_word in ["DONE", "EXIT"]:
|
|
123
|
+
if self.check_finish_tag(job):
|
|
124
|
+
dlog.info(f"job: {job.job_hash} {job.job_id} finished")
|
|
125
|
+
return JobStatus.finished
|
|
126
|
+
else:
|
|
127
|
+
return JobStatus.terminated
|
|
128
|
+
else:
|
|
129
|
+
return JobStatus.unknown
|
|
130
|
+
|
|
131
|
+
def check_finish_tag(self, job):
|
|
132
|
+
job_tag_finished = job.job_hash + "_job_tag_finished"
|
|
133
|
+
return self.context.check_file_exists(job_tag_finished)
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def resources_subfields(cls) -> List[Argument]:
|
|
137
|
+
"""Generate the resources subfields.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
list[Argument]
|
|
142
|
+
resources subfields
|
|
143
|
+
"""
|
|
144
|
+
doc_custom_gpu_line = "Custom GPU configuration, starting with #JSUB"
|
|
145
|
+
|
|
146
|
+
return [
|
|
147
|
+
Argument(
|
|
148
|
+
"kwargs",
|
|
149
|
+
dict,
|
|
150
|
+
[
|
|
151
|
+
Argument(
|
|
152
|
+
"custom_gpu_line",
|
|
153
|
+
str,
|
|
154
|
+
optional=True,
|
|
155
|
+
default=None,
|
|
156
|
+
doc=doc_custom_gpu_line,
|
|
157
|
+
),
|
|
158
|
+
],
|
|
159
|
+
optional=False,
|
|
160
|
+
doc="Extra arguments.",
|
|
161
|
+
)
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
def kill(self, job):
|
|
165
|
+
"""Kill the job.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
job : Job
|
|
170
|
+
job
|
|
171
|
+
"""
|
|
172
|
+
job_id = job.job_id
|
|
173
|
+
ret, stdin, stdout, stderr = self.context.block_call(
|
|
174
|
+
"jctrl kill " + str(job_id)
|
|
175
|
+
)
|
|
@@ -64,7 +64,7 @@ class DistributedShell(Machine):
|
|
|
64
64
|
|
|
65
65
|
source_list = job.resources.source_list
|
|
66
66
|
for ii in source_list:
|
|
67
|
-
line = "{ source
|
|
67
|
+
line = f"{{ source {ii}; }} \n"
|
|
68
68
|
source_files_part += line
|
|
69
69
|
|
|
70
70
|
export_envs_part = ""
|
|
@@ -96,7 +96,7 @@ class DistributedShell(Machine):
|
|
|
96
96
|
def gen_script_end(self, job):
|
|
97
97
|
all_task_dirs = ""
|
|
98
98
|
for task in job.job_task_list:
|
|
99
|
-
all_task_dirs += "
|
|
99
|
+
all_task_dirs += f"{task.task_work_path} "
|
|
100
100
|
job_tag_finished = job.job_hash + "_job_tag_finished"
|
|
101
101
|
flag_if_job_task_fail = job.job_hash + "_flag_if_job_task_fail"
|
|
102
102
|
|
|
@@ -173,10 +173,8 @@ class DistributedShell(Machine):
|
|
|
173
173
|
)
|
|
174
174
|
)
|
|
175
175
|
|
|
176
|
-
cmd =
|
|
177
|
-
submit_command
|
|
178
|
-
output_name,
|
|
179
|
-
output_name,
|
|
176
|
+
cmd = (
|
|
177
|
+
f"{{ nohup {submit_command} 1>{output_name} 2>{output_name} & }} && echo $!"
|
|
180
178
|
)
|
|
181
179
|
ret, stdout, stderr = run_cmd_with_all_output(cmd)
|
|
182
180
|
|
dpdispatcher/machines/fugaku.py
CHANGED
|
@@ -20,15 +20,15 @@ class Fugaku(Machine):
|
|
|
20
20
|
def gen_script_header(self, job):
|
|
21
21
|
resources = job.resources
|
|
22
22
|
fugaku_script_header_dict = {}
|
|
23
|
-
fugaku_script_header_dict[
|
|
24
|
-
"
|
|
25
|
-
|
|
26
|
-
fugaku_script_header_dict[
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
fugaku_script_header_dict[
|
|
30
|
-
"
|
|
31
|
-
|
|
23
|
+
fugaku_script_header_dict["fugaku_node_number_line"] = (
|
|
24
|
+
f'#PJM -L "node={resources.number_node}" '
|
|
25
|
+
)
|
|
26
|
+
fugaku_script_header_dict["fugaku_ntasks_per_node_line"] = (
|
|
27
|
+
f'#PJM --mpi "max-proc-per-node={resources.cpu_per_node}"'
|
|
28
|
+
)
|
|
29
|
+
fugaku_script_header_dict["queue_name_line"] = (
|
|
30
|
+
f'#PJM -L "rscgrp={resources.queue_name}"'
|
|
31
|
+
)
|
|
32
32
|
if (
|
|
33
33
|
resources["strategy"].get("customized_script_header_template_file")
|
|
34
34
|
is not None
|
dpdispatcher/machines/lsf.py
CHANGED
|
@@ -32,9 +32,7 @@ class LSF(Machine):
|
|
|
32
32
|
def gen_script_header(self, job):
|
|
33
33
|
resources = job.resources
|
|
34
34
|
script_header_dict = {
|
|
35
|
-
"lsf_nodes_line": "#BSUB -n {
|
|
36
|
-
number_cores=resources.number_node * resources.cpu_per_node
|
|
37
|
-
),
|
|
35
|
+
"lsf_nodes_line": f"#BSUB -n {resources.number_node * resources.cpu_per_node}",
|
|
38
36
|
"lsf_ptile_line": f"#BSUB -R 'span[ptile={resources.cpu_per_node}]'",
|
|
39
37
|
"lsf_partition_line": f"#BSUB -q {resources.queue_name}",
|
|
40
38
|
}
|
|
@@ -123,7 +121,7 @@ class LSF(Machine):
|
|
|
123
121
|
return JobStatus.unsubmitted
|
|
124
122
|
ret, stdin, stdout, stderr = self.context.block_call("bjobs " + job_id)
|
|
125
123
|
err_str = stderr.read().decode("utf-8")
|
|
126
|
-
if ("Job
|
|
124
|
+
if (f"Job <{job_id}> is not found") in err_str:
|
|
127
125
|
if self.check_finish_tag(job):
|
|
128
126
|
return JobStatus.finished
|
|
129
127
|
else:
|
dpdispatcher/machines/pbs.py
CHANGED
|
@@ -21,13 +21,13 @@ class PBS(Machine):
|
|
|
21
21
|
def gen_script_header(self, job):
|
|
22
22
|
resources = job.resources
|
|
23
23
|
pbs_script_header_dict = {}
|
|
24
|
-
pbs_script_header_dict[
|
|
25
|
-
"
|
|
26
|
-
|
|
24
|
+
pbs_script_header_dict["select_node_line"] = (
|
|
25
|
+
f"#PBS -l select={resources.number_node}:ncpus={resources.cpu_per_node}"
|
|
26
|
+
)
|
|
27
27
|
if resources.gpu_per_node != 0:
|
|
28
|
-
pbs_script_header_dict[
|
|
29
|
-
"
|
|
30
|
-
|
|
28
|
+
pbs_script_header_dict["select_node_line"] += (
|
|
29
|
+
f":ngpus={resources.gpu_per_node}"
|
|
30
|
+
)
|
|
31
31
|
pbs_script_header_dict["queue_name_line"] = f"#PBS -q {resources.queue_name}"
|
|
32
32
|
if (
|
|
33
33
|
resources["strategy"].get("customized_script_header_template_file")
|
|
@@ -156,12 +156,12 @@ class Torque(PBS):
|
|
|
156
156
|
# ref: https://support.adaptivecomputing.com/wp-content/uploads/2021/02/torque/torque.htm#topics/torque/2-jobs/requestingRes.htm
|
|
157
157
|
resources = job.resources
|
|
158
158
|
pbs_script_header_dict = {}
|
|
159
|
-
pbs_script_header_dict[
|
|
160
|
-
"
|
|
161
|
-
|
|
159
|
+
pbs_script_header_dict["select_node_line"] = (
|
|
160
|
+
f"#PBS -l nodes={resources.number_node}:ppn={resources.cpu_per_node}"
|
|
161
|
+
)
|
|
162
162
|
if resources.gpu_per_node != 0:
|
|
163
|
-
pbs_script_header_dict["select_node_line"] +=
|
|
164
|
-
|
|
163
|
+
pbs_script_header_dict["select_node_line"] += (
|
|
164
|
+
f":gpus={resources.gpu_per_node}"
|
|
165
165
|
)
|
|
166
166
|
pbs_script_header_dict["queue_name_line"] = f"#PBS -q {resources.queue_name}"
|
|
167
167
|
if (
|
|
@@ -212,9 +212,9 @@ class SGE(PBS):
|
|
|
212
212
|
resources = job.resources
|
|
213
213
|
sge_script_header_dict = {}
|
|
214
214
|
# resources.number_node is not used
|
|
215
|
-
sge_script_header_dict[
|
|
216
|
-
"
|
|
217
|
-
|
|
215
|
+
sge_script_header_dict["select_node_line"] = (
|
|
216
|
+
f"#$ -pe mpi {resources.cpu_per_node} "
|
|
217
|
+
)
|
|
218
218
|
# resources.queue_name is not necessary
|
|
219
219
|
sge_script_header = sge_script_header_template.format(**sge_script_header_dict)
|
|
220
220
|
return sge_script_header
|
dpdispatcher/machines/shell.py
CHANGED
|
@@ -39,12 +39,7 @@ class Shell(Machine):
|
|
|
39
39
|
script_run_file_name = f"{job.script_file_name}.run"
|
|
40
40
|
self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
|
|
41
41
|
ret, stdin, stdout, stderr = self.context.block_call(
|
|
42
|
-
"cd {} && {{ nohup bash {} 1>>{} 2>>{} & }} && echo $!"
|
|
43
|
-
shlex.quote(self.context.remote_root),
|
|
44
|
-
script_file_name,
|
|
45
|
-
output_name,
|
|
46
|
-
output_name,
|
|
47
|
-
)
|
|
42
|
+
f"cd {shlex.quote(self.context.remote_root)} && {{ nohup bash {script_file_name} 1>>{output_name} 2>>{output_name} & }} && echo $!"
|
|
48
43
|
)
|
|
49
44
|
if ret != 0:
|
|
50
45
|
err_str = stderr.read().decode("utf-8")
|
dpdispatcher/machines/slurm.py
CHANGED
|
@@ -39,23 +39,23 @@ class Slurm(Machine):
|
|
|
39
39
|
def gen_script_header(self, job):
|
|
40
40
|
resources = job.resources
|
|
41
41
|
script_header_dict = {}
|
|
42
|
-
script_header_dict["slurm_nodes_line"] =
|
|
43
|
-
|
|
42
|
+
script_header_dict["slurm_nodes_line"] = (
|
|
43
|
+
f"#SBATCH --nodes {resources.number_node}"
|
|
44
|
+
)
|
|
45
|
+
script_header_dict["slurm_ntasks_per_node_line"] = (
|
|
46
|
+
f"#SBATCH --ntasks-per-node {resources.cpu_per_node}"
|
|
44
47
|
)
|
|
45
|
-
script_header_dict[
|
|
46
|
-
"slurm_ntasks_per_node_line"
|
|
47
|
-
] = f"#SBATCH --ntasks-per-node {resources.cpu_per_node}"
|
|
48
48
|
custom_gpu_line = resources.kwargs.get("custom_gpu_line", None)
|
|
49
49
|
if not custom_gpu_line:
|
|
50
|
-
script_header_dict[
|
|
51
|
-
"
|
|
52
|
-
|
|
50
|
+
script_header_dict["slurm_number_gpu_line"] = (
|
|
51
|
+
f"#SBATCH --gres=gpu:{resources.gpu_per_node}"
|
|
52
|
+
)
|
|
53
53
|
else:
|
|
54
54
|
script_header_dict["slurm_number_gpu_line"] = custom_gpu_line
|
|
55
55
|
if resources.queue_name != "":
|
|
56
|
-
script_header_dict[
|
|
57
|
-
"
|
|
58
|
-
|
|
56
|
+
script_header_dict["slurm_partition_line"] = (
|
|
57
|
+
f"#SBATCH --partition {resources.queue_name}"
|
|
58
|
+
)
|
|
59
59
|
else:
|
|
60
60
|
script_header_dict["slurm_partition_line"] = ""
|
|
61
61
|
if (
|
|
@@ -254,7 +254,7 @@ class SlurmJobArray(Slurm):
|
|
|
254
254
|
).as_posix()
|
|
255
255
|
if not self.context.check_file_exists(task_tag_finished):
|
|
256
256
|
job_array.add(ii // slurm_job_size)
|
|
257
|
-
return super().gen_script_header(job) + "\n#SBATCH --array
|
|
257
|
+
return super().gen_script_header(job) + "\n#SBATCH --array={}".format(
|
|
258
258
|
",".join(map(str, job_array))
|
|
259
259
|
)
|
|
260
260
|
return super().gen_script_header(job) + "\n#SBATCH --array=0-%d" % (
|
dpdispatcher/run.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
from glob import glob
|
|
5
|
+
from hashlib import sha1
|
|
6
|
+
|
|
7
|
+
from dpdispatcher.machine import Machine
|
|
8
|
+
from dpdispatcher.submission import Resources, Submission, Task
|
|
9
|
+
|
|
10
|
+
if sys.version_info >= (3, 11):
|
|
11
|
+
import tomllib
|
|
12
|
+
else:
|
|
13
|
+
import tomli as tomllib
|
|
14
|
+
from typing import List, Optional
|
|
15
|
+
|
|
16
|
+
from dargs import Argument
|
|
17
|
+
|
|
18
|
+
from dpdispatcher.arginfo import machine_dargs, resources_dargs, task_dargs
|
|
19
|
+
|
|
20
|
+
REGEX = r"(?m)^# /// (?P<type>[a-zA-Z0-9-]+)$\s(?P<content>(^#(| .*)$\s)+)^# ///$"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def read_pep723(script: str) -> Optional[dict]:
|
|
24
|
+
"""Read a PEP 723 script metadata from a script string.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
script : str
|
|
29
|
+
Script content.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
dict
|
|
34
|
+
PEP 723 metadata.
|
|
35
|
+
"""
|
|
36
|
+
name = "script"
|
|
37
|
+
matches = list(
|
|
38
|
+
filter(lambda m: m.group("type") == name, re.finditer(REGEX, script))
|
|
39
|
+
)
|
|
40
|
+
if len(matches) > 1:
|
|
41
|
+
# TODO: Add tests for scenarios where multiple script blocks are found
|
|
42
|
+
raise ValueError(f"Multiple {name} blocks found")
|
|
43
|
+
elif len(matches) == 1:
|
|
44
|
+
content = "".join(
|
|
45
|
+
line[2:] if line.startswith("# ") else line[1:]
|
|
46
|
+
for line in matches[0].group("content").splitlines(keepends=True)
|
|
47
|
+
)
|
|
48
|
+
return tomllib.loads(content)
|
|
49
|
+
else:
|
|
50
|
+
# TODO: Add tests for scenarios where no metadata is found
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def pep723_args() -> Argument:
|
|
55
|
+
"""Return the argument parser for PEP 723 metadata."""
|
|
56
|
+
machine_args = machine_dargs()
|
|
57
|
+
machine_args.fold_subdoc = True
|
|
58
|
+
machine_args.doc = "Machine configuration. See related documentation for details."
|
|
59
|
+
resources_args = resources_dargs(detail_kwargs=False)
|
|
60
|
+
resources_args.fold_subdoc = True
|
|
61
|
+
resources_args.doc = (
|
|
62
|
+
"Resources configuration. See related documentation for details."
|
|
63
|
+
)
|
|
64
|
+
task_args = task_dargs()
|
|
65
|
+
command_arg = task_args["command"]
|
|
66
|
+
command_arg.doc = (
|
|
67
|
+
"Python interpreter or launcher. No need to contain the Python script filename."
|
|
68
|
+
)
|
|
69
|
+
command_arg.default = "python"
|
|
70
|
+
command_arg.optional = True
|
|
71
|
+
task_args["task_work_path"].doc += " Can be a glob pattern."
|
|
72
|
+
task_args.name = "task_list"
|
|
73
|
+
task_args.doc = "List of tasks to execute."
|
|
74
|
+
task_args.repeat = True
|
|
75
|
+
task_args.dtype = (list,)
|
|
76
|
+
return Argument(
|
|
77
|
+
"pep723",
|
|
78
|
+
dtype=dict,
|
|
79
|
+
doc="PEP 723 metadata",
|
|
80
|
+
sub_fields=[
|
|
81
|
+
Argument(
|
|
82
|
+
"work_base",
|
|
83
|
+
dtype=str,
|
|
84
|
+
optional=True,
|
|
85
|
+
default="./",
|
|
86
|
+
doc="Base directory for the work",
|
|
87
|
+
),
|
|
88
|
+
Argument(
|
|
89
|
+
"forward_common_files",
|
|
90
|
+
dtype=List[str],
|
|
91
|
+
optional=True,
|
|
92
|
+
default=[],
|
|
93
|
+
doc="Common files to forward to the remote machine",
|
|
94
|
+
),
|
|
95
|
+
Argument(
|
|
96
|
+
"backward_common_files",
|
|
97
|
+
dtype=List[str],
|
|
98
|
+
optional=True,
|
|
99
|
+
default=[],
|
|
100
|
+
doc="Common files to backward from the remote machine",
|
|
101
|
+
),
|
|
102
|
+
machine_args,
|
|
103
|
+
resources_args,
|
|
104
|
+
task_args,
|
|
105
|
+
],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def create_submission(metadata: dict, hash: str) -> Submission:
|
|
110
|
+
"""Create a Submission instance from a PEP 723 metadata.
|
|
111
|
+
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
metadata : dict
|
|
115
|
+
PEP 723 metadata.
|
|
116
|
+
hash : str
|
|
117
|
+
Submission hash.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
Submission
|
|
122
|
+
Submission instance.
|
|
123
|
+
"""
|
|
124
|
+
base = pep723_args()
|
|
125
|
+
metadata = base.normalize_value(metadata, trim_pattern="_*")
|
|
126
|
+
base.check_value(metadata, strict=False)
|
|
127
|
+
|
|
128
|
+
tasks = []
|
|
129
|
+
for task in metadata["task_list"]:
|
|
130
|
+
task = task.copy()
|
|
131
|
+
task["command"] += f" $REMOTE_ROOT/script_{hash}.py"
|
|
132
|
+
task_work_path = os.path.join(
|
|
133
|
+
metadata["machine"]["local_root"],
|
|
134
|
+
metadata["work_base"],
|
|
135
|
+
task["task_work_path"],
|
|
136
|
+
)
|
|
137
|
+
if os.path.isdir(task_work_path):
|
|
138
|
+
tasks.append(Task.load_from_dict(task))
|
|
139
|
+
elif glob(task_work_path):
|
|
140
|
+
for file in glob(task_work_path):
|
|
141
|
+
tasks.append(Task.load_from_dict({**task, "task_work_path": file}))
|
|
142
|
+
# TODO: Add tests for scenarios where the task work path is a glob pattern
|
|
143
|
+
else:
|
|
144
|
+
# TODO: Add tests for scenarios where the task work path is not found
|
|
145
|
+
raise FileNotFoundError(f"Task work path {task_work_path} not found.")
|
|
146
|
+
return Submission(
|
|
147
|
+
work_base=metadata["work_base"],
|
|
148
|
+
forward_common_files=metadata["forward_common_files"],
|
|
149
|
+
backward_common_files=metadata["backward_common_files"],
|
|
150
|
+
machine=Machine.load_from_dict(metadata["machine"]),
|
|
151
|
+
resources=Resources.load_from_dict(metadata["resources"]),
|
|
152
|
+
task_list=tasks,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def run_pep723(script: str):
|
|
157
|
+
"""Run a PEP 723 script.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
script : str
|
|
162
|
+
Script content.
|
|
163
|
+
"""
|
|
164
|
+
metadata = read_pep723(script)
|
|
165
|
+
if metadata is None:
|
|
166
|
+
raise ValueError("No PEP 723 metadata found.")
|
|
167
|
+
dpdispatcher_metadata = metadata["tool"]["dpdispatcher"]
|
|
168
|
+
script_hash = sha1(script.encode("utf-8")).hexdigest()
|
|
169
|
+
submission = create_submission(dpdispatcher_metadata, script_hash)
|
|
170
|
+
submission.machine.context.write_file(f"script_{script_hash}.py", script)
|
|
171
|
+
# write script
|
|
172
|
+
submission.run_submission()
|
dpdispatcher/submission.py
CHANGED
|
@@ -863,9 +863,7 @@ class Job:
|
|
|
863
863
|
self.submit_job()
|
|
864
864
|
if self.job_state != JobStatus.unsubmitted:
|
|
865
865
|
dlog.info(
|
|
866
|
-
"job:{job_hash} re-submit after terminated; new job_id is {job_id}"
|
|
867
|
-
job_hash=self.job_hash, job_id=self.job_id
|
|
868
|
-
)
|
|
866
|
+
f"job:{self.job_hash} re-submit after terminated; new job_id is {self.job_id}"
|
|
869
867
|
)
|
|
870
868
|
time.sleep(0.2)
|
|
871
869
|
self.get_job_state()
|
dpdispatcher/utils/hdfs_cli.py
CHANGED
|
@@ -88,10 +88,8 @@ class HDFS:
|
|
|
88
88
|
return True, out
|
|
89
89
|
else:
|
|
90
90
|
raise RuntimeError(
|
|
91
|
-
"Cannot copy local[{}] to remote[{}] with cmd[{}]; "
|
|
92
|
-
"ret[{}] output[{}] stderr[{}]"
|
|
93
|
-
local_path, to_uri, cmd, ret, out, err
|
|
94
|
-
)
|
|
91
|
+
f"Cannot copy local[{local_path}] to remote[{to_uri}] with cmd[{cmd}]; "
|
|
92
|
+
f"ret[{ret}] output[{out}] stderr[{err}]"
|
|
95
93
|
)
|
|
96
94
|
except Exception as e:
|
|
97
95
|
raise RuntimeError(
|
|
@@ -113,10 +111,8 @@ class HDFS:
|
|
|
113
111
|
return True
|
|
114
112
|
else:
|
|
115
113
|
raise RuntimeError(
|
|
116
|
-
"Cannot copy remote[{}] to local[{}] with cmd[{}]; "
|
|
117
|
-
"ret[{}] output[{}] stderr[{}]"
|
|
118
|
-
from_uri, local_path, cmd, ret, out, err
|
|
119
|
-
)
|
|
114
|
+
f"Cannot copy remote[{from_uri}] to local[{local_path}] with cmd[{cmd}]; "
|
|
115
|
+
f"ret[{ret}] output[{out}] stderr[{err}]"
|
|
120
116
|
)
|
|
121
117
|
except Exception as e:
|
|
122
118
|
raise RuntimeError(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dpdispatcher
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.5
|
|
4
4
|
Summary: Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish
|
|
5
5
|
Author: DeepModeling
|
|
6
6
|
License: GNU LESSER GENERAL PUBLIC LICENSE
|
|
@@ -172,7 +172,7 @@ License: GNU LESSER GENERAL PUBLIC LICENSE
|
|
|
172
172
|
Project-URL: Homepage, https://github.com/deepmodeling/dpdispatcher
|
|
173
173
|
Project-URL: documentation, https://docs.deepmodeling.com/projects/dpdispatcher
|
|
174
174
|
Project-URL: repository, https://github.com/deepmodeling/dpdispatcher
|
|
175
|
-
Keywords: dispatcher,hpc,slurm,lsf,pbs,ssh
|
|
175
|
+
Keywords: dispatcher,hpc,slurm,lsf,pbs,ssh,jh_unischeduler
|
|
176
176
|
Classifier: Programming Language :: Python :: 3.7
|
|
177
177
|
Classifier: Programming Language :: Python :: 3.8
|
|
178
178
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -190,6 +190,7 @@ Requires-Dist: dargs >=0.4.1
|
|
|
190
190
|
Requires-Dist: requests
|
|
191
191
|
Requires-Dist: tqdm >=4.9.0
|
|
192
192
|
Requires-Dist: pyyaml
|
|
193
|
+
Requires-Dist: tomli >=1.1.0 ; python_version < "3.11"
|
|
193
194
|
Requires-Dist: typing-extensions ; python_version < "3.7"
|
|
194
195
|
Provides-Extra: bohrium
|
|
195
196
|
Requires-Dist: oss2 ; extra == 'bohrium'
|
|
@@ -250,4 +251,4 @@ See [Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓
|
|
|
250
251
|
|
|
251
252
|
## References
|
|
252
253
|
|
|
253
|
-
DPDispatcher is
|
|
254
|
+
DPDispatcher is derived from the [DP-GEN](https://github.com/deepmodeling/dpgen) package. To mention DPDispatcher in a scholarly publication, please read Section 3.3 in the [DP-GEN paper](https://doi.org/10.1016/j.cpc.2020.107206).
|
|
@@ -1,35 +1,38 @@
|
|
|
1
1
|
dpdispatcher/__init__.py,sha256=CLZP_N5CTp14ujWCykEHuJjoIfKR6CwrclXhjWUgNoE,517
|
|
2
2
|
dpdispatcher/__main__.py,sha256=BFhG-mSBzVZUEezQJqXWZnt2WsnhAHT_zpT8Y6gpOz0,116
|
|
3
|
-
dpdispatcher/_version.py,sha256=
|
|
3
|
+
dpdispatcher/_version.py,sha256=PuC6q1U5hHaOMp2tDNeTKt6ExeuO2V9ihjqjMYIsVUo,411
|
|
4
4
|
dpdispatcher/arginfo.py,sha256=pNaxYIE6ahBidpR7OCKZdw8iGt003uTXGSlVzwiuvRg,188
|
|
5
5
|
dpdispatcher/base_context.py,sha256=NvaC_RHyspxq412z-eCq4Zn8-szZxvn8K6OkXvx7l4Y,3615
|
|
6
|
-
dpdispatcher/dlog.py,sha256=
|
|
7
|
-
dpdispatcher/dpdisp.py,sha256=
|
|
8
|
-
dpdispatcher/machine.py,sha256=
|
|
9
|
-
dpdispatcher/
|
|
10
|
-
dpdispatcher/
|
|
6
|
+
dpdispatcher/dlog.py,sha256=QJKAwB6gV3Zb6zQUL9dZ_uIoTIEy9Z7ecmVQ-8WNmD8,1081
|
|
7
|
+
dpdispatcher/dpdisp.py,sha256=jhuTmwPY7KBF4WukaQomEwZcfYoISaMbKwuxdDGSluc,4206
|
|
8
|
+
dpdispatcher/machine.py,sha256=z5D0eLAPfdo5SZdO6NLvWBUUePE0VHRMWurRMzEV0U0,16138
|
|
9
|
+
dpdispatcher/run.py,sha256=tFHbJAioXXpgHTE5bhRRAuc8w7cX1ET9SBbiAg3Rw-I,5382
|
|
10
|
+
dpdispatcher/submission.py,sha256=0_PCpRyiUwCHwYAzdXs-3rzq8YzZs0VZBU6tS7SixG0,48361
|
|
11
|
+
dpdispatcher/contexts/__init__.py,sha256=jlvcIppmUnS39yBlkZEDvIQFV-j_BR75ZTbZALF_RB0,336
|
|
11
12
|
dpdispatcher/contexts/dp_cloud_server_context.py,sha256=6XK0B2sLGEDeZmV2SZzQdVrMcWAWYZVLLK-IaShEXIY,12245
|
|
12
|
-
dpdispatcher/contexts/hdfs_context.py,sha256=
|
|
13
|
+
dpdispatcher/contexts/hdfs_context.py,sha256=B6pjGUD8Xaa0G_Zrnoci2DZnEXxojE9fAcexMMvAZCM,8930
|
|
13
14
|
dpdispatcher/contexts/lazy_local_context.py,sha256=F8abWAJRY1Ewx1sErINKN1ltWerXzeCcJgjTvLvucKE,5696
|
|
14
|
-
dpdispatcher/contexts/local_context.py,sha256=
|
|
15
|
+
dpdispatcher/contexts/local_context.py,sha256=AsIfOT24FV0_bNlD2xU-pqAJy-XHZ6XTsbll4Vt6bMM,14065
|
|
15
16
|
dpdispatcher/contexts/openapi_context.py,sha256=DXaMS10SXN3VKEeEdzQyfOgRwUyHRJVCJHd2fKKdsmA,9499
|
|
16
|
-
dpdispatcher/contexts/ssh_context.py,sha256=
|
|
17
|
+
dpdispatcher/contexts/ssh_context.py,sha256=baMiD_1KlrksqNKCkpx7apovLW_qdfU9U1KRDNTjCz0,38578
|
|
17
18
|
dpdispatcher/dpcloudserver/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
19
|
dpdispatcher/dpcloudserver/client.py,sha256=k1niKjG6zFnMtHn_UuCjYoOcMju3o3PV-GdyVLr5-KM,165
|
|
19
20
|
dpdispatcher/entrypoints/__init__.py,sha256=exKSFT3j2oCerGwtI8WbHQK-D0K-CyifocRji1xntT4,20
|
|
20
21
|
dpdispatcher/entrypoints/gui.py,sha256=29lMXqbmSRbLj4rfBv7Jnw89NLU9syTB88IUP6IRJsU,830
|
|
22
|
+
dpdispatcher/entrypoints/run.py,sha256=tRkHfeAktV6gF31yb2MVOSTlpNGZFw3N0jHBmM1YfIg,175
|
|
21
23
|
dpdispatcher/entrypoints/submission.py,sha256=ikVwIZAQL0SsYO5xaMIdKXgO6qtc05w1vqmvtG7Nk5M,3401
|
|
22
|
-
dpdispatcher/machines/
|
|
23
|
-
dpdispatcher/machines/
|
|
24
|
+
dpdispatcher/machines/JH_UniScheduler.py,sha256=f7Vs9_m4Th1GVSgsJTy9_nMAY8g9n0ZewnPY2DFECfI,5795
|
|
25
|
+
dpdispatcher/machines/__init__.py,sha256=tOQuPUlW1Ab4qcC0oSAIyDjZA_WyE67h_EIxPCWGhys,336
|
|
26
|
+
dpdispatcher/machines/distributed_shell.py,sha256=LvWl6ktPlgmJ7rk90VWxp4douve8hYmuRf-B0saFBds,7534
|
|
24
27
|
dpdispatcher/machines/dp_cloud_server.py,sha256=SR69gsFb2BvOQCW1QnWfP3cQvu_qHLJNsycp5wzosJU,11706
|
|
25
|
-
dpdispatcher/machines/fugaku.py,sha256=
|
|
26
|
-
dpdispatcher/machines/lsf.py,sha256=
|
|
28
|
+
dpdispatcher/machines/fugaku.py,sha256=oY2hD2ldL2dztwtJ9WNisdsfPnaX-5yTRXewIT9r60I,4314
|
|
29
|
+
dpdispatcher/machines/lsf.py,sha256=Q6IE4nCkNEKcW0AdBTKPOYgmCJAeXWmUVxZ9sQFkxos,7932
|
|
27
30
|
dpdispatcher/machines/openapi.py,sha256=Gzzbo8YOAybXGTrgMutexErcaEi3ts7uTUNvOhThFS8,8858
|
|
28
|
-
dpdispatcher/machines/pbs.py,sha256=
|
|
29
|
-
dpdispatcher/machines/shell.py,sha256=
|
|
30
|
-
dpdispatcher/machines/slurm.py,sha256=
|
|
31
|
+
dpdispatcher/machines/pbs.py,sha256=KjJcLpQr748ZgOwFfWmJ_LG1q6Jm1UF24YCSLiDfcac,10308
|
|
32
|
+
dpdispatcher/machines/shell.py,sha256=DnqMNb2nmBc3gVx8tA8oiUWdnWHKJwpIPs660i3Eq7A,4703
|
|
33
|
+
dpdispatcher/machines/slurm.py,sha256=YM2Mv55jAFtDIiJoJLkD6p1Wi1ujjH6t4WlU8EtlbCw,15592
|
|
31
34
|
dpdispatcher/utils/__init__.py,sha256=fwvwkMf7DFNQkNBiIce8Y8gRA6FhICwKjkKiXu_BEJg,13
|
|
32
|
-
dpdispatcher/utils/hdfs_cli.py,sha256=
|
|
35
|
+
dpdispatcher/utils/hdfs_cli.py,sha256=n3EIfFIralsISlaEewawD35f0P8mabo-u8D8UW3k_7Y,5308
|
|
33
36
|
dpdispatcher/utils/job_status.py,sha256=Eszs4TPLfszCuf6zLaFonf25feXDUguF28spYOjJpQE,233
|
|
34
37
|
dpdispatcher/utils/record.py,sha256=c8jdPmCuLzRmFo_jOjR0j9zFR1EWX3NSHVuPEIYCycg,2147
|
|
35
38
|
dpdispatcher/utils/utils.py,sha256=1One9eW-v3ejDcL6PB9PSCMZQkalnbxq0DfJoUwQaLs,5334
|
|
@@ -38,9 +41,9 @@ dpdispatcher/utils/dpcloudserver/client.py,sha256=CLfXswvzI4inDrW2bYkfMQ6gQJFcZO
|
|
|
38
41
|
dpdispatcher/utils/dpcloudserver/config.py,sha256=NteQzf1OeEkz2UbkXHHQ0B72cUu23zLVzpM9Yh4v1Cc,559
|
|
39
42
|
dpdispatcher/utils/dpcloudserver/retcode.py,sha256=1qAF8gFZx55u2sO8KbtYSIIrjcO-IGufEUlwbkSfC1g,721
|
|
40
43
|
dpdispatcher/utils/dpcloudserver/zip_file.py,sha256=f9WrlktwHW0YipaWg5Y0kxjMZlhD1cJYa6EUpvu4Cro,2611
|
|
41
|
-
dpdispatcher-0.6.
|
|
42
|
-
dpdispatcher-0.6.
|
|
43
|
-
dpdispatcher-0.6.
|
|
44
|
-
dpdispatcher-0.6.
|
|
45
|
-
dpdispatcher-0.6.
|
|
46
|
-
dpdispatcher-0.6.
|
|
44
|
+
dpdispatcher-0.6.5.dist-info/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
|
|
45
|
+
dpdispatcher-0.6.5.dist-info/METADATA,sha256=eLIZlw1J6l08VjltNG2O3Z7kWK_TNVJR08aaGlfwESc,12821
|
|
46
|
+
dpdispatcher-0.6.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
47
|
+
dpdispatcher-0.6.5.dist-info/entry_points.txt,sha256=NRHUV0IU_u7_XtcmmEDnVzAcUmurhiEAGwENckrajo4,233
|
|
48
|
+
dpdispatcher-0.6.5.dist-info/top_level.txt,sha256=35jAQoXY-b-e9fJ1_mxhZUiaCoJNt1ZI7mpFRf07Qjs,13
|
|
49
|
+
dpdispatcher-0.6.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|