dpdispatcher 0.5.11__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dpdispatcher/__init__.py +7 -89
  2. dpdispatcher/__main__.py +8 -0
  3. dpdispatcher/_version.py +14 -2
  4. dpdispatcher/base_context.py +1 -1
  5. dpdispatcher/contexts/__init__.py +11 -0
  6. dpdispatcher/{dp_cloud_server_context.py → contexts/dp_cloud_server_context.py} +7 -3
  7. dpdispatcher/{hdfs_context.py → contexts/hdfs_context.py} +2 -2
  8. dpdispatcher/{local_context.py → contexts/local_context.py} +51 -14
  9. dpdispatcher/{openapi_context.py → contexts/openapi_context.py} +3 -2
  10. dpdispatcher/{ssh_context.py → contexts/ssh_context.py} +113 -34
  11. dpdispatcher/dlog.py +31 -0
  12. dpdispatcher/dpdisp.py +113 -1
  13. dpdispatcher/entrypoints/__init__.py +1 -0
  14. dpdispatcher/entrypoints/gui.py +31 -0
  15. dpdispatcher/entrypoints/submission.py +83 -0
  16. dpdispatcher/machine.py +18 -4
  17. dpdispatcher/machines/__init__.py +11 -0
  18. dpdispatcher/{distributed_shell.py → machines/distributed_shell.py} +20 -4
  19. dpdispatcher/{dp_cloud_server.py → machines/dp_cloud_server.py} +21 -5
  20. dpdispatcher/{fugaku.py → machines/fugaku.py} +18 -5
  21. dpdispatcher/{lsf.py → machines/lsf.py} +20 -4
  22. dpdispatcher/{openapi.py → machines/openapi.py} +23 -4
  23. dpdispatcher/{pbs.py → machines/pbs.py} +30 -4
  24. dpdispatcher/{shell.py → machines/shell.py} +17 -3
  25. dpdispatcher/{slurm.py → machines/slurm.py} +37 -6
  26. dpdispatcher/submission.py +83 -39
  27. dpdispatcher/utils/__init__.py +1 -0
  28. dpdispatcher/{dpcloudserver → utils/dpcloudserver}/client.py +1 -1
  29. dpdispatcher/{hdfs_cli.py → utils/hdfs_cli.py} +1 -1
  30. dpdispatcher/utils/record.py +79 -0
  31. dpdispatcher/{utils.py → utils/utils.py} +14 -2
  32. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/METADATA +7 -2
  33. dpdispatcher-0.6.1.dist-info/RECORD +44 -0
  34. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/WHEEL +1 -1
  35. dpdispatcher-0.6.1.dist-info/entry_points.txt +7 -0
  36. dpdispatcher/dpcloudserver/temp_test.py +0 -90
  37. dpdispatcher-0.5.11.dist-info/RECORD +0 -36
  38. dpdispatcher-0.5.11.dist-info/entry_points.txt +0 -2
  39. /dpdispatcher/{lazy_local_context.py → contexts/lazy_local_context.py} +0 -0
  40. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/__init__.py +0 -0
  41. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/config.py +0 -0
  42. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/retcode.py +0 -0
  43. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/zip_file.py +0 -0
  44. /dpdispatcher/{JobStatus.py → utils/job_status.py} +0 -0
  45. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/LICENSE +0 -0
  46. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/top_level.txt +0 -0
dpdispatcher/dpdisp.py CHANGED
@@ -1,8 +1,120 @@
1
1
  #!/usr/bin/env python
2
+ import argparse
3
+ from typing import List, Optional
4
+
5
+ from dpdispatcher.entrypoints.gui import start_dpgui
6
+ from dpdispatcher.entrypoints.submission import handle_submission
7
+
8
+
9
+ def main_parser() -> argparse.ArgumentParser:
10
+ """Dpdispatcher commandline options argument parser.
11
+
12
+ Notes
13
+ -----
14
+ This function is used by documentation.
15
+
16
+ Returns
17
+ -------
18
+ argparse.ArgumentParser
19
+ the argument parser
20
+ """
21
+ parser = argparse.ArgumentParser(
22
+ description="dpdispatcher: Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish",
23
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
24
+ )
25
+ subparsers = parser.add_subparsers(title="Valid subcommands", dest="command")
26
+ ##########################################
27
+ # backward
28
+ parser_submission = subparsers.add_parser(
29
+ "submission",
30
+ help="Handle terminated submission.",
31
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
32
+ )
33
+ parser_submission.add_argument(
34
+ "SUBMISSION_HASH",
35
+ type=str,
36
+ help="Submission hash to download.",
37
+ )
38
+ parser_submission_action = parser_submission.add_argument_group(
39
+ "Actions",
40
+ description="One or more actions to take on submission.",
41
+ )
42
+ parser_submission_action.add_argument(
43
+ "--download-terminated-log",
44
+ action="store_true",
45
+ help="Download log files of terminated tasks.",
46
+ )
47
+ parser_submission_action.add_argument(
48
+ "--download-finished-task",
49
+ action="store_true",
50
+ help="Download finished tasks.",
51
+ )
52
+ parser_submission_action.add_argument(
53
+ "--clean",
54
+ action="store_true",
55
+ help="Clean submission.",
56
+ )
57
+ ##########################################
58
+ # gui
59
+ parser_gui = subparsers.add_parser(
60
+ "gui",
61
+ help="Serve DP-GUI.",
62
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
63
+ )
64
+ parser_gui.add_argument(
65
+ "-p",
66
+ "--port",
67
+ type=int,
68
+ default=6042,
69
+ help="The port to serve DP-GUI on.",
70
+ )
71
+ parser_gui.add_argument(
72
+ "--bind_all",
73
+ action="store_true",
74
+ help=(
75
+ "Serve on all public interfaces. This will expose your DP-GUI instance "
76
+ "to the network on both IPv4 and IPv6 (where available)."
77
+ ),
78
+ )
79
+ return parser
80
+
81
+
82
+ def parse_args(args: Optional[List[str]] = None):
83
+ """Dpdispatcher commandline options argument parsing.
84
+
85
+ Parameters
86
+ ----------
87
+ args : List[str]
88
+ list of command line arguments, main purpose is testing default option None
89
+ takes arguments from sys.argv
90
+ """
91
+ parser = main_parser()
92
+
93
+ parsed_args = parser.parse_args(args=args)
94
+ if parsed_args.command is None:
95
+ parser.print_help()
96
+
97
+ return parsed_args
2
98
 
3
99
 
4
100
  def main():
5
- print("test")
101
+ args = parse_args()
102
+ if args.command == "submission":
103
+ handle_submission(
104
+ submission_hash=args.SUBMISSION_HASH,
105
+ download_terminated_log=args.download_terminated_log,
106
+ download_finished_task=args.download_finished_task,
107
+ clean=args.clean,
108
+ )
109
+ elif args.command == "gui":
110
+ start_dpgui(
111
+ port=args.port,
112
+ bind_all=args.bind_all,
113
+ )
114
+ elif args.command is None:
115
+ pass
116
+ else:
117
+ raise RuntimeError(f"unknown command {args.command}")
6
118
 
7
119
 
8
120
  if __name__ == "__main__":
@@ -0,0 +1 @@
1
+ """Entry points."""
@@ -0,0 +1,31 @@
1
+ # SPDX-License-Identifier: LGPL-3.0-or-later
2
+ """DP-GUI entrypoint."""
3
+
4
+
5
+ def start_dpgui(*, port: int, bind_all: bool, **kwargs):
6
+ """Host DP-GUI server.
7
+
8
+ Parameters
9
+ ----------
10
+ port : int
11
+ The port to serve DP-GUI on.
12
+ bind_all : bool
13
+ Serve on all public interfaces. This will expose your DP-GUI instance
14
+ to the network on both IPv4 and IPv6 (where available).
15
+ **kwargs
16
+ additional arguments
17
+
18
+ Raises
19
+ ------
20
+ ModuleNotFoundError
21
+ The dpgui package is not installed
22
+ """
23
+ try:
24
+ from dpgui import (
25
+ start_dpgui,
26
+ )
27
+ except ModuleNotFoundError as e:
28
+ raise ModuleNotFoundError(
29
+ "To use DP-GUI, please install the dpgui package:\npip install dpgui"
30
+ ) from e
31
+ start_dpgui(port=port, bind_all=bind_all)
@@ -0,0 +1,83 @@
1
+ from pathlib import Path
2
+
3
+ from dpdispatcher.dlog import dlog
4
+ from dpdispatcher.submission import Submission
5
+ from dpdispatcher.utils.job_status import JobStatus
6
+ from dpdispatcher.utils.record import record
7
+
8
+
9
+ def handle_submission(
10
+ *,
11
+ submission_hash: str,
12
+ download_terminated_log: bool = False,
13
+ download_finished_task: bool = False,
14
+ clean: bool = False,
15
+ ):
16
+ """Handle terminated submission.
17
+
18
+ Parameters
19
+ ----------
20
+ submission_hash : str
21
+ Submission hash to download.
22
+ download_terminated_log : bool, optional
23
+ Download log files of terminated tasks.
24
+ download_finished_task : bool, optional
25
+ Download finished tasks.
26
+ clean : bool, optional
27
+ Clean submission.
28
+
29
+ Raises
30
+ ------
31
+ ValueError
32
+ At least one action should be specified.
33
+ """
34
+ if int(download_terminated_log) + int(download_finished_task) + int(clean) == 0:
35
+ raise ValueError("At least one action should be specified.")
36
+
37
+ submission_file = record.get_submission(submission_hash)
38
+ submission = Submission.submission_from_json(str(submission_file))
39
+ submission.belonging_tasks = [
40
+ task for job in submission.belonging_jobs for task in job.job_task_list
41
+ ]
42
+ # TODO: for unclear reason, the submission_hash may be changed
43
+ submission.submission_hash = submission_hash
44
+ submission.machine.context.bind_submission(submission)
45
+ submission.update_submission_state()
46
+
47
+ terminated_tasks = []
48
+ finished_tasks = []
49
+ for task in submission.belonging_tasks:
50
+ task.get_task_state(submission.machine.context)
51
+ if task.task_state == JobStatus.terminated:
52
+ terminated_tasks.append(task)
53
+ elif task.task_state == JobStatus.finished:
54
+ finished_tasks.append(task)
55
+ submission.belonging_tasks = []
56
+
57
+ if download_terminated_log:
58
+ for task in terminated_tasks:
59
+ task.backward_files = [task.outlog, task.errlog]
60
+ submission.belonging_tasks += terminated_tasks
61
+ if download_finished_task:
62
+ submission.belonging_tasks += finished_tasks
63
+
64
+ submission.download_jobs()
65
+
66
+ if download_terminated_log:
67
+ terminated_log_files = []
68
+ for task in terminated_tasks:
69
+ assert submission.local_root is not None
70
+ terminated_log_files.append(
71
+ Path(submission.local_root) / task.task_work_path / task.outlog
72
+ )
73
+ terminated_log_files.append(
74
+ Path(submission.local_root) / task.task_work_path / task.errlog
75
+ )
76
+
77
+ dlog.info(
78
+ "Terminated logs are downloaded into:\n "
79
+ + "\n ".join([str(f) for f in terminated_log_files])
80
+ )
81
+
82
+ if clean:
83
+ submission.clean_jobs()
dpdispatcher/machine.py CHANGED
@@ -4,10 +4,11 @@ import shlex
4
4
  from abc import ABCMeta, abstractmethod
5
5
  from typing import List, Tuple
6
6
 
7
+ import yaml
7
8
  from dargs import Argument, Variant
8
9
 
9
- from dpdispatcher import dlog
10
10
  from dpdispatcher.base_context import BaseContext
11
+ from dpdispatcher.dlog import dlog
11
12
 
12
13
  script_template = """\
13
14
  {script_header}
@@ -35,7 +36,7 @@ cd {task_work_path}
35
36
  test $? -ne 0 && exit 1
36
37
  if [ ! -f {task_tag_finished} ] ;then
37
38
  {command_env} ( {command} ) {log_err_part}
38
- if test $? -eq 0; then touch {task_tag_finished}; else echo 1 > $REMOTE_ROOT/{flag_if_job_task_fail};fi
39
+ if test $? -eq 0; then touch {task_tag_finished}; else echo 1 > $REMOTE_ROOT/{flag_if_job_task_fail};tail -v -c 1000 $REMOTE_ROOT/{task_work_path}/{err_file} > $REMOTE_ROOT/{last_err_file};fi
39
40
  fi &
40
41
  """
41
42
 
@@ -124,6 +125,13 @@ class Machine(metaclass=ABCMeta):
124
125
  machine = cls.load_from_dict(machine_dict=machine_dict)
125
126
  return machine
126
127
 
128
+ @classmethod
129
+ def load_from_yaml(cls, yaml_path):
130
+ with open(yaml_path) as f:
131
+ machine_dict = yaml.safe_load(f)
132
+ machine = cls.load_from_dict(machine_dict=machine_dict)
133
+ return machine
134
+
127
135
  @classmethod
128
136
  def load_from_dict(cls, machine_dict):
129
137
  batch_type = machine_dict["batch_type"]
@@ -191,17 +199,20 @@ class Machine(metaclass=ABCMeta):
191
199
  "abstract method do_submit should be implemented by derived class"
192
200
  )
193
201
 
202
+ def gen_script_run_command(self, job):
203
+ return f"source $REMOTE_ROOT/{job.script_file_name}.run"
204
+
194
205
  def gen_script(self, job):
195
206
  script_header = self.gen_script_header(job)
196
207
  script_custom_flags = self.gen_script_custom_flags_lines(job)
197
208
  script_env = self.gen_script_env(job)
198
- script_command = self.gen_script_command(job)
209
+ script_run_command = self.gen_script_run_command(job)
199
210
  script_end = self.gen_script_end(job)
200
211
  script = script_template.format(
201
212
  script_header=script_header,
202
213
  script_custom_flags=script_custom_flags,
203
214
  script_env=script_env,
204
- script_command=script_command,
215
+ script_command=script_run_command,
205
216
  script_end=script_end,
206
217
  )
207
218
  return script
@@ -295,6 +306,7 @@ class Machine(metaclass=ABCMeta):
295
306
  log_err_part += f"2>>{shlex.quote(task.errlog)} "
296
307
 
297
308
  flag_if_job_task_fail = job.job_hash + "_flag_if_job_task_fail"
309
+ last_err_file = job.job_hash + "_last_err_file"
298
310
  single_script_command = script_command_template.format(
299
311
  flag_if_job_task_fail=flag_if_job_task_fail,
300
312
  command_env=command_env,
@@ -304,6 +316,8 @@ class Machine(metaclass=ABCMeta):
304
316
  command=task.command,
305
317
  task_tag_finished=task_tag_finished,
306
318
  log_err_part=log_err_part,
319
+ err_file=shlex.quote(task.errlog),
320
+ last_err_file=shlex.quote(last_err_file),
307
321
  )
308
322
  script_command += single_script_command
309
323
 
@@ -0,0 +1,11 @@
1
+ """Machines."""
2
+ import importlib
3
+ from pathlib import Path
4
+
5
+ PACKAGE_BASE = "dpdispatcher.machines"
6
+ NOT_LOADABLE = ("__init__.py",)
7
+
8
+ for module_file in Path(__file__).parent.glob("*.py"):
9
+ if module_file.name not in NOT_LOADABLE:
10
+ module_name = f".{module_file.stem}"
11
+ importlib.import_module(module_name, PACKAGE_BASE)
@@ -1,7 +1,10 @@
1
- from dpdispatcher import dlog
2
- from dpdispatcher.JobStatus import JobStatus
1
+ from dpdispatcher.dlog import dlog
3
2
  from dpdispatcher.machine import Machine
4
- from dpdispatcher.utils import run_cmd_with_all_output
3
+ from dpdispatcher.utils.job_status import JobStatus
4
+ from dpdispatcher.utils.utils import (
5
+ customized_script_header_template,
6
+ run_cmd_with_all_output,
7
+ )
5
8
 
6
9
  shell_script_header_template = """
7
10
  #!/bin/bash -l
@@ -112,7 +115,17 @@ class DistributedShell(Machine):
112
115
  return script_end
113
116
 
114
117
  def gen_script_header(self, job):
115
- shell_script_header = shell_script_header_template
118
+ resources = job.resources
119
+ if (
120
+ resources["strategy"].get("customized_script_header_template_file")
121
+ is not None
122
+ ):
123
+ shell_script_header = customized_script_header_template(
124
+ resources["strategy"]["customized_script_header_template_file"],
125
+ resources,
126
+ )
127
+ else:
128
+ shell_script_header = shell_script_header_template
116
129
  return shell_script_header
117
130
 
118
131
  def do_submit(self, job):
@@ -133,6 +146,9 @@ class DistributedShell(Machine):
133
146
  job_id_name = job.job_hash + "_job_id"
134
147
  output_name = job.job_hash + ".out"
135
148
  self.context.write_file(fname=script_file_name, write_str=script_str)
149
+ script_run_str = self.gen_script_command(job)
150
+ script_run_file_name = f"{job.script_file_name}.run"
151
+ self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
136
152
 
137
153
  resources = job.resources
138
154
  submit_command = (
@@ -4,11 +4,12 @@ import time
4
4
  import uuid
5
5
  import warnings
6
6
 
7
- from dpdispatcher import dlog
8
- from dpdispatcher.dpcloudserver import Client, zip_file
9
- from dpdispatcher.dpcloudserver.config import ALI_OSS_BUCKET_URL
10
- from dpdispatcher.JobStatus import JobStatus
7
+ from dpdispatcher.dlog import dlog
11
8
  from dpdispatcher.machine import Machine
9
+ from dpdispatcher.utils.dpcloudserver import Client, zip_file
10
+ from dpdispatcher.utils.dpcloudserver.config import ALI_OSS_BUCKET_URL
11
+ from dpdispatcher.utils.job_status import JobStatus
12
+ from dpdispatcher.utils.utils import customized_script_header_template
12
13
 
13
14
  shell_script_header_template = """
14
15
  #!/bin/bash -l
@@ -71,13 +72,28 @@ class Bohrium(Machine):
71
72
  return shell_script
72
73
 
73
74
  def gen_script_header(self, job):
74
- shell_script_header = shell_script_header_template
75
+ resources = job.resources
76
+ if (
77
+ resources["strategy"].get("customized_script_header_template_file")
78
+ is not None
79
+ ):
80
+ shell_script_header = customized_script_header_template(
81
+ resources["strategy"]["customized_script_header_template_file"],
82
+ resources,
83
+ )
84
+ else:
85
+ shell_script_header = shell_script_header_template
75
86
  return shell_script_header
76
87
 
77
88
  def gen_local_script(self, job):
78
89
  script_str = self.gen_script(job)
79
90
  script_file_name = job.script_file_name
80
91
  self.context.write_local_file(fname=script_file_name, write_str=script_str)
92
+ script_run_str = self.gen_script_command(job)
93
+ script_run_file_name = f"{job.script_file_name}.run"
94
+ self.context.write_local_file(
95
+ fname=script_run_file_name, write_str=script_run_str
96
+ )
81
97
  return script_file_name
82
98
 
83
99
  def _gen_backward_files_list(self, job):
@@ -1,8 +1,9 @@
1
1
  import shlex
2
2
 
3
- from dpdispatcher import dlog
4
- from dpdispatcher.JobStatus import JobStatus
3
+ from dpdispatcher.dlog import dlog
5
4
  from dpdispatcher.machine import Machine
5
+ from dpdispatcher.utils.job_status import JobStatus
6
+ from dpdispatcher.utils.utils import customized_script_header_template
6
7
 
7
8
  fugaku_script_header_template = """\
8
9
  {queue_name_line}
@@ -28,9 +29,18 @@ class Fugaku(Machine):
28
29
  fugaku_script_header_dict[
29
30
  "queue_name_line"
30
31
  ] = f'#PJM -L "rscgrp={resources.queue_name}"'
31
- fugaku_script_header = fugaku_script_header_template.format(
32
- **fugaku_script_header_dict
33
- )
32
+ if (
33
+ resources["strategy"].get("customized_script_header_template_file")
34
+ is not None
35
+ ):
36
+ fugaku_script_header = customized_script_header_template(
37
+ resources["strategy"]["customized_script_header_template_file"],
38
+ resources,
39
+ )
40
+ else:
41
+ fugaku_script_header = fugaku_script_header_template.format(
42
+ **fugaku_script_header_dict
43
+ )
34
44
  return fugaku_script_header
35
45
 
36
46
  def do_submit(self, job):
@@ -39,6 +49,9 @@ class Fugaku(Machine):
39
49
  job_id_name = job.job_hash + "_job_id"
40
50
  # script_str = self.sub_script(job_dirs, cmd, args=args, resources=resources, outlog=outlog, errlog=errlog)
41
51
  self.context.write_file(fname=script_file_name, write_str=script_str)
52
+ script_run_str = self.gen_script_command(job)
53
+ script_run_file_name = f"{job.script_file_name}.run"
54
+ self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
42
55
  # self.context.write_file(fname=os.path.join(self.context.submission.work_base, script_file_name), write_str=script_str)
43
56
  # script_file_dir = os.path.join(self.context.submission.work_base)
44
57
  script_file_dir = self.context.remote_root
@@ -3,10 +3,14 @@ from typing import List
3
3
 
4
4
  from dargs import Argument
5
5
 
6
- from dpdispatcher import dlog
7
- from dpdispatcher.JobStatus import JobStatus
6
+ from dpdispatcher.dlog import dlog
8
7
  from dpdispatcher.machine import Machine
9
- from dpdispatcher.utils import RetrySignal, retry
8
+ from dpdispatcher.utils.job_status import JobStatus
9
+ from dpdispatcher.utils.utils import (
10
+ RetrySignal,
11
+ customized_script_header_template,
12
+ retry,
13
+ )
10
14
 
11
15
  lsf_script_header_template = """\
12
16
  #!/bin/bash -l
@@ -60,7 +64,16 @@ class LSF(Machine):
60
64
  script_header_dict["lsf_number_gpu_line"] = ""
61
65
  else:
62
66
  script_header_dict["lsf_number_gpu_line"] = custom_gpu_line
63
- lsf_script_header = lsf_script_header_template.format(**script_header_dict)
67
+ if (
68
+ resources["strategy"].get("customized_script_header_template_file")
69
+ is not None
70
+ ):
71
+ lsf_script_header = customized_script_header_template(
72
+ resources["strategy"]["customized_script_header_template_file"],
73
+ resources,
74
+ )
75
+ else:
76
+ lsf_script_header = lsf_script_header_template.format(**script_header_dict)
64
77
 
65
78
  return lsf_script_header
66
79
 
@@ -70,6 +83,9 @@ class LSF(Machine):
70
83
  script_str = self.gen_script(job)
71
84
  job_id_name = job.job_hash + "_job_id"
72
85
  self.context.write_file(fname=script_file_name, write_str=script_str)
86
+ script_run_str = self.gen_script_command(job)
87
+ script_run_file_name = f"{job.script_file_name}.run"
88
+ self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
73
89
 
74
90
  try:
75
91
  stdin, stdout, stderr = self.context.block_checkcall(
@@ -2,6 +2,8 @@ import os
2
2
  import shutil
3
3
  import time
4
4
 
5
+ from dpdispatcher.utils.utils import customized_script_header_template
6
+
5
7
  try:
6
8
  from bohriumsdk.client import Client
7
9
  from bohriumsdk.job import Job
@@ -12,9 +14,9 @@ except ModuleNotFoundError:
12
14
  else:
13
15
  found_bohriumsdk = True
14
16
 
15
- from dpdispatcher import dlog
16
- from dpdispatcher.JobStatus import JobStatus
17
+ from dpdispatcher.dlog import dlog
17
18
  from dpdispatcher.machine import Machine
19
+ from dpdispatcher.utils.job_status import JobStatus
18
20
 
19
21
  shell_script_header_template = """
20
22
  #!/bin/bash -l
@@ -43,13 +45,28 @@ class OpenAPI(Machine):
43
45
  return shell_script
44
46
 
45
47
  def gen_script_header(self, job):
46
- shell_script_header = shell_script_header_template
48
+ resources = job.resources
49
+ if (
50
+ resources["strategy"].get("customized_script_header_template_file")
51
+ is not None
52
+ ):
53
+ shell_script_header = customized_script_header_template(
54
+ resources["strategy"]["customized_script_header_template_file"],
55
+ resources,
56
+ )
57
+ else:
58
+ shell_script_header = shell_script_header_template
47
59
  return shell_script_header
48
60
 
49
61
  def gen_local_script(self, job):
50
62
  script_str = self.gen_script(job)
51
63
  script_file_name = job.script_file_name
52
64
  self.context.write_local_file(fname=script_file_name, write_str=script_str)
65
+ script_run_str = self.gen_script_command(job)
66
+ script_run_file_name = f"{job.script_file_name}.run"
67
+ self.context.write_local_file(
68
+ fname=script_run_file_name, write_str=script_run_str
69
+ )
53
70
  return script_file_name
54
71
 
55
72
  def _gen_backward_files_list(self, job):
@@ -130,7 +147,9 @@ class OpenAPI(Machine):
130
147
  )
131
148
 
132
149
  job_state = self.map_dp_job_state(
133
- dp_job_status, check_return.get("exitCode", 0), self.ignore_exit_code # type: ignore
150
+ dp_job_status,
151
+ check_return.get("exitCode", 0), # type: ignore
152
+ self.ignore_exit_code,
134
153
  )
135
154
  if job_state == JobStatus.finished:
136
155
  job_log = self.job.log(job_id)
@@ -1,8 +1,9 @@
1
1
  import shlex
2
2
 
3
- from dpdispatcher import dlog
4
- from dpdispatcher.JobStatus import JobStatus
3
+ from dpdispatcher.dlog import dlog
5
4
  from dpdispatcher.machine import Machine
5
+ from dpdispatcher.utils.job_status import JobStatus
6
+ from dpdispatcher.utils.utils import customized_script_header_template
6
7
 
7
8
  pbs_script_header_template = """
8
9
  #!/bin/bash -l
@@ -28,7 +29,18 @@ class PBS(Machine):
28
29
  "select_node_line"
29
30
  ] += f":ngpus={resources.gpu_per_node}"
30
31
  pbs_script_header_dict["queue_name_line"] = f"#PBS -q {resources.queue_name}"
31
- pbs_script_header = pbs_script_header_template.format(**pbs_script_header_dict)
32
+ if (
33
+ resources["strategy"].get("customized_script_header_template_file")
34
+ is not None
35
+ ):
36
+ pbs_script_header = customized_script_header_template(
37
+ resources["strategy"]["customized_script_header_template_file"],
38
+ resources,
39
+ )
40
+ else:
41
+ pbs_script_header = pbs_script_header_template.format(
42
+ **pbs_script_header_dict
43
+ )
32
44
  return pbs_script_header
33
45
 
34
46
  def do_submit(self, job):
@@ -37,6 +49,9 @@ class PBS(Machine):
37
49
  job_id_name = job.job_hash + "_job_id"
38
50
  # script_str = self.sub_script(job_dirs, cmd, args=args, resources=resources, outlog=outlog, errlog=errlog)
39
51
  self.context.write_file(fname=script_file_name, write_str=script_str)
52
+ script_run_str = self.gen_script_command(job)
53
+ script_run_file_name = f"{job.script_file_name}.run"
54
+ self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
40
55
  # self.context.write_file(fname=os.path.join(self.context.submission.work_base, script_file_name), write_str=script_str)
41
56
  # script_file_dir = os.path.join(self.context.submission.work_base)
42
57
  script_file_dir = self.context.remote_root
@@ -149,5 +164,16 @@ class Torque(PBS):
149
164
  gpu_per_node=resources.gpu_per_node
150
165
  )
151
166
  pbs_script_header_dict["queue_name_line"] = f"#PBS -q {resources.queue_name}"
152
- pbs_script_header = pbs_script_header_template.format(**pbs_script_header_dict)
167
+ if (
168
+ resources["strategy"].get("customized_script_header_template_file")
169
+ is not None
170
+ ):
171
+ pbs_script_header = customized_script_header_template(
172
+ resources["strategy"]["customized_script_header_template_file"],
173
+ resources,
174
+ )
175
+ else:
176
+ pbs_script_header = pbs_script_header_template.format(
177
+ **pbs_script_header_dict
178
+ )
153
179
  return pbs_script_header
@@ -1,8 +1,9 @@
1
1
  import shlex
2
2
 
3
- from dpdispatcher import dlog
4
- from dpdispatcher.JobStatus import JobStatus
3
+ from dpdispatcher.dlog import dlog
5
4
  from dpdispatcher.machine import Machine
5
+ from dpdispatcher.utils.job_status import JobStatus
6
+ from dpdispatcher.utils.utils import customized_script_header_template
6
7
 
7
8
  shell_script_header_template = """
8
9
  #!/bin/bash -l
@@ -15,7 +16,17 @@ class Shell(Machine):
15
16
  return shell_script
16
17
 
17
18
  def gen_script_header(self, job):
18
- shell_script_header = shell_script_header_template
19
+ resources = job.resources
20
+ if (
21
+ resources["strategy"].get("customized_script_header_template_file")
22
+ is not None
23
+ ):
24
+ shell_script_header = customized_script_header_template(
25
+ resources["strategy"]["customized_script_header_template_file"],
26
+ resources,
27
+ )
28
+ else:
29
+ shell_script_header = shell_script_header_template
19
30
  return shell_script_header
20
31
 
21
32
  def do_submit(self, job):
@@ -24,6 +35,9 @@ class Shell(Machine):
24
35
  job_id_name = job.job_hash + "_job_id"
25
36
  output_name = job.job_hash + ".out"
26
37
  self.context.write_file(fname=script_file_name, write_str=script_str)
38
+ script_run_str = self.gen_script_command(job)
39
+ script_run_file_name = f"{job.script_file_name}.run"
40
+ self.context.write_file(fname=script_run_file_name, write_str=script_run_str)
27
41
  ret, stdin, stdout, stderr = self.context.block_call(
28
42
  "cd {} && {{ nohup bash {} 1>>{} 2>>{} & }} && echo $!".format(
29
43
  shlex.quote(self.context.remote_root),