dpdispatcher 0.5.6__py3-none-any.whl → 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dpdispatcher might be problematic. Click here for more details.

dpdispatcher/__init__.py CHANGED
@@ -43,6 +43,7 @@ except ImportError:
43
43
  from .distributed_shell import DistributedShell
44
44
  from .dp_cloud_server import DpCloudServer, Lebesgue
45
45
  from .dp_cloud_server_context import DpCloudServerContext, LebesgueContext
46
+ from .fugaku import Fugaku
46
47
  from .hdfs_context import HDFSContext
47
48
  from .lazy_local_context import LazyLocalContext
48
49
  from .local_context import LocalContext
@@ -85,6 +86,7 @@ __all__ = [
85
86
  "PBS",
86
87
  "Shell",
87
88
  "Slurm",
89
+ "Fugaku",
88
90
  "SSHContext",
89
91
  "Submission",
90
92
  "Task",
dpdispatcher/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # file generated by setuptools_scm
2
2
  # don't change, don't track in version control
3
- __version__ = version = '0.5.6'
4
- __version_tuple__ = version_tuple = (0, 5, 6)
3
+ __version__ = version = '0.5.8'
4
+ __version_tuple__ = version_tuple = (0, 5, 8)
@@ -70,9 +70,6 @@ class BaseContext(metaclass=ABCMeta):
70
70
  def read_file(self, fname):
71
71
  raise NotImplementedError("abstract method")
72
72
 
73
- def kill(self, proc):
74
- raise NotImplementedError("abstract method")
75
-
76
73
  def check_finish(self, proc):
77
74
  raise NotImplementedError("abstract method")
78
75
 
@@ -136,17 +136,16 @@ class DistributedShell(Machine):
136
136
 
137
137
  resources = job.resources
138
138
  submit_command = (
139
- "hadoop jar %s/hadoop-yarn-applications-distributedshell-*.jar "
139
+ "hadoop jar {}/hadoop-yarn-applications-distributedshell-*.jar "
140
140
  "org.apache.hadoop.yarn.applications.distributedshell.Client "
141
- "-jar %s/hadoop-yarn-applications-distributedshell-*.jar "
142
- '-queue %s -appname "distributedshell_dpgen_%s" '
141
+ "-jar {}/hadoop-yarn-applications-distributedshell-*.jar "
142
+ '-queue {} -appname "distributedshell_dpgen_{}" '
143
143
  "-shell_env YARN_CONTAINER_RUNTIME_TYPE=docker "
144
- "-shell_env YARN_CONTAINER_RUNTIME_DOCKER_IMAGE=%s "
144
+ "-shell_env YARN_CONTAINER_RUNTIME_DOCKER_IMAGE={} "
145
145
  "-shell_env ENV_DOCKER_CONTAINER_SHM_SIZE='600m' "
146
146
  "-master_memory 1024 -master_vcores 2 -num_containers 1 "
147
- "-container_resources memory-mb=%s,vcores=%s "
148
- "-shell_script /tmp/%s"
149
- % (
147
+ "-container_resources memory-mb={},vcores={} "
148
+ "-shell_script /tmp/{}".format(
150
149
  resources.kwargs.get("yarn_path", ""),
151
150
  resources.kwargs.get("yarn_path", ""),
152
151
  resources.queue_name,
@@ -106,7 +106,9 @@ class Bohrium(Machine):
106
106
 
107
107
  input_data = self.input_data.copy()
108
108
 
109
- input_data["job_resources"] = job_resources
109
+ if not input_data.get("job_resources"):
110
+ input_data["job_resources"] = []
111
+ input_data["job_resources"].append(job_resources)
110
112
  input_data["command"] = f"bash {job.script_file_name}"
111
113
  if not input_data.get("backward_files"):
112
114
  input_data["backward_files"] = self._gen_backward_files_list(job)
@@ -270,9 +270,6 @@ class BohriumContext(BaseContext):
270
270
  # retcode = cmd_pipes['stdout'].channel.recv_exit_status()
271
271
  # return retcode, cmd_pipes['stdout'], cmd_pipes['stderr']
272
272
 
273
- def kill(self, cmd_pipes):
274
- pass
275
-
276
273
  @classmethod
277
274
  def machine_subfields(cls) -> List[Argument]:
278
275
  """Generate the machine subfields.
@@ -198,7 +198,7 @@ class Client:
198
198
  ):
199
199
  post_data = {
200
200
  "job_type": job_type,
201
- "oss_path": [oss_path],
201
+ "oss_path": oss_path,
202
202
  }
203
203
  if program_id is not None:
204
204
  post_data["project_id"] = program_id
dpdispatcher/fugaku.py ADDED
@@ -0,0 +1,94 @@
1
+ import shlex
2
+
3
+ from dpdispatcher import dlog
4
+ from dpdispatcher.JobStatus import JobStatus
5
+ from dpdispatcher.machine import Machine
6
+
7
+ fugaku_script_header_template = """\
8
+ {queue_name_line}
9
+ {fugaku_node_number_line}
10
+ {fugaku_ntasks_per_node_line}
11
+ """
12
+
13
+
14
+ class Fugaku(Machine):
15
+ def gen_script(self, job):
16
+ fugaku_script = super().gen_script(job)
17
+ return fugaku_script
18
+
19
+ def gen_script_header(self, job):
20
+ resources = job.resources
21
+ fugaku_script_header_dict = {}
22
+ fugaku_script_header_dict[
23
+ "fugaku_node_number_line"
24
+ ] = f'#PJM -L "node={resources.number_node}" '
25
+ fugaku_script_header_dict[
26
+ "fugaku_ntasks_per_node_line"
27
+ ] = '#PJM --mpi "max-proc-per-node={cpu_per_node}"'.format(
28
+ cpu_per_node=resources.cpu_per_node
29
+ )
30
+ fugaku_script_header_dict[
31
+ "queue_name_line"
32
+ ] = f'#PJM -L "rscgrp={resources.queue_name}"'
33
+ fugaku_script_header = fugaku_script_header_template.format(
34
+ **fugaku_script_header_dict
35
+ )
36
+ return fugaku_script_header
37
+
38
+ def do_submit(self, job):
39
+ script_file_name = job.script_file_name
40
+ script_str = self.gen_script(job)
41
+ job_id_name = job.job_hash + "_job_id"
42
+ # script_str = self.sub_script(job_dirs, cmd, args=args, resources=resources, outlog=outlog, errlog=errlog)
43
+ self.context.write_file(fname=script_file_name, write_str=script_str)
44
+ # self.context.write_file(fname=os.path.join(self.context.submission.work_base, script_file_name), write_str=script_str)
45
+ # script_file_dir = os.path.join(self.context.submission.work_base)
46
+ script_file_dir = self.context.remote_root
47
+ # stdin, stdout, stderr = self.context.block_checkcall('cd %s && %s %s' % (self.context.remote_root, 'pjsub', script_file_name))
48
+
49
+ stdin, stdout, stderr = self.context.block_checkcall(
50
+ "cd {} && {} {}".format(
51
+ shlex.quote(script_file_dir), "pjsub", shlex.quote(script_file_name)
52
+ )
53
+ )
54
+ subret = stdout.readlines()
55
+ job_id = subret[0].split()[5]
56
+ self.context.write_file(job_id_name, job_id)
57
+ return job_id
58
+
59
+ def default_resources(self, resources):
60
+ pass
61
+
62
+ def check_status(self, job):
63
+ job_id = job.job_id
64
+ if job_id == "":
65
+ return JobStatus.unsubmitted
66
+ ret, stdin, stdout, stderr = self.context.block_call("pjstat " + job_id)
67
+ err_str = stderr.read().decode("utf-8")
68
+ try:
69
+ status_line = stdout.read().decode("utf-8").split("\n")[-2]
70
+ # pjstat only retrun 0 if the job is not waiting or running
71
+ except Exception:
72
+ ret, stdin, stdout, stderr = self.context.block_call("pjstat -H " + job_id)
73
+ status_line = stdout.read().decode("utf-8").split("\n")[-2]
74
+ status_word = status_line.split()[3]
75
+ if status_word in ["EXT", "CCL", "ERR"]:
76
+ if self.check_finish_tag(job):
77
+ dlog.info(f"job: {job.job_hash} {job.job_id} finished")
78
+ return JobStatus.finished
79
+ else:
80
+ return JobStatus.terminated
81
+ else:
82
+ return JobStatus.unknown
83
+ status_word = status_line.split()[3]
84
+ # dlog.info (status_word)
85
+ if status_word in ["QUE", "HLD", "RNA", "SPD"]:
86
+ return JobStatus.waiting
87
+ elif status_word in ["RUN", "RNE"]:
88
+ return JobStatus.running
89
+ else:
90
+ return JobStatus.unknown
91
+
92
+ def check_finish_tag(self, job):
93
+ job_tag_finished = job.job_hash + "_job_tag_finished"
94
+ return self.context.check_file_exists(job_tag_finished)
@@ -247,6 +247,3 @@ class HDFSContext(BaseContext):
247
247
 
248
248
  def read_file(self, fname):
249
249
  return HDFS.read_hdfs_file(os.path.join(self.remote_root, fname))
250
-
251
- def kill(self, job_id):
252
- pass
@@ -1,5 +1,4 @@
1
1
  import os
2
- import signal
3
2
  import subprocess as sp
4
3
 
5
4
  from dpdispatcher.base_context import BaseContext
@@ -167,9 +166,6 @@ class LazyLocalContext(BaseContext):
167
166
  )
168
167
  return proc
169
168
 
170
- def kill(self, job_id):
171
- os.kill(job_id, signal.SIGTERM)
172
-
173
169
  def check_finish(self, proc):
174
170
  return proc.poll() is not None
175
171
 
@@ -1,7 +1,6 @@
1
1
  import hashlib
2
2
  import os
3
3
  import shutil
4
- import signal
5
4
  import subprocess as sp
6
5
  from glob import glob
7
6
  from subprocess import TimeoutExpired
@@ -291,9 +290,6 @@ class LocalContext(BaseContext):
291
290
  )
292
291
  return proc
293
292
 
294
- def kill(self, job_id):
295
- os.kill(job_id, signal.SIGTERM)
296
-
297
293
  def check_finish(self, proc):
298
294
  return proc.poll() is not None
299
295
 
dpdispatcher/lsf.py CHANGED
@@ -83,8 +83,7 @@ class LSF(Machine):
83
83
 
84
84
  try:
85
85
  stdin, stdout, stderr = self.context.block_checkcall(
86
- "cd %s && %s %s"
87
- % (
86
+ "cd {} && {} {}".format(
88
87
  shlex.quote(self.context.remote_root),
89
88
  "bsub < ",
90
89
  shlex.quote(script_file_name),
@@ -211,3 +210,14 @@ class LSF(Machine):
211
210
  doc="Extra arguments.",
212
211
  )
213
212
  ]
213
+
214
+ def kill(self, job):
215
+ """Kill the job.
216
+
217
+ Parameters
218
+ ----------
219
+ job : Job
220
+ job
221
+ """
222
+ job_id = job.job_id
223
+ ret, stdin, stdout, stderr = self.context.block_call("bkill " + str(job_id))
dpdispatcher/machine.py CHANGED
@@ -377,8 +377,12 @@ class Machine(metaclass=ABCMeta):
377
377
  machine_args = [
378
378
  Argument("batch_type", str, optional=False, doc=doc_batch_type),
379
379
  # TODO: add default to local_root and remote_root after refactor the code
380
- Argument("local_root", [str, None], optional=False, doc=doc_local_root),
381
- Argument("remote_root", [str, None], optional=True, doc=doc_remote_root),
380
+ Argument(
381
+ "local_root", [str, type(None)], optional=False, doc=doc_local_root
382
+ ),
383
+ Argument(
384
+ "remote_root", [str, type(None)], optional=True, doc=doc_remote_root
385
+ ),
382
386
  Argument(
383
387
  "clean_asynchronously",
384
388
  bool,
@@ -439,3 +443,15 @@ class Machine(metaclass=ABCMeta):
439
443
  "kwargs", dict, optional=True, doc="This field is empty for this batch."
440
444
  )
441
445
  ]
446
+
447
+ def kill(self, job):
448
+ """Kill the job.
449
+
450
+ If not implemented, pass and let the user manually kill it.
451
+
452
+ Parameters
453
+ ----------
454
+ job : Job
455
+ job
456
+ """
457
+ dlog.warning("Job %s should be manually killed" % job.job_id)
dpdispatcher/pbs.py CHANGED
@@ -46,8 +46,9 @@ class PBS(Machine):
46
46
  script_file_dir = self.context.remote_root
47
47
  # stdin, stdout, stderr = self.context.block_checkcall('cd %s && %s %s' % (self.context.remote_root, 'qsub', script_file_name))
48
48
  stdin, stdout, stderr = self.context.block_checkcall(
49
- "cd %s && %s %s"
50
- % (shlex.quote(script_file_dir), "qsub", shlex.quote(script_file_name))
49
+ "cd {} && {} {}".format(
50
+ shlex.quote(script_file_dir), "qsub", shlex.quote(script_file_name)
51
+ )
51
52
  )
52
53
  subret = stdout.readlines()
53
54
  job_id = subret[0].split()[0]
@@ -94,6 +95,17 @@ class PBS(Machine):
94
95
  job_tag_finished = job.job_hash + "_job_tag_finished"
95
96
  return self.context.check_file_exists(job_tag_finished)
96
97
 
98
+ def kill(self, job):
99
+ """Kill the job.
100
+
101
+ Parameters
102
+ ----------
103
+ job : Job
104
+ job
105
+ """
106
+ job_id = job.job_id
107
+ ret, stdin, stdout, stderr = self.context.block_call("qdel " + str(job_id))
108
+
97
109
 
98
110
  class Torque(PBS):
99
111
  def check_status(self, job):
dpdispatcher/shell.py CHANGED
@@ -25,8 +25,7 @@ class Shell(Machine):
25
25
  output_name = job.job_hash + ".out"
26
26
  self.context.write_file(fname=script_file_name, write_str=script_str)
27
27
  ret, stdin, stdout, stderr = self.context.block_call(
28
- "cd %s && { nohup bash %s 1>>%s 2>>%s & } && echo $!"
29
- % (
28
+ "cd {} && {{ nohup bash {} 1>>{} 2>>{} & }} && echo $!".format(
30
29
  shlex.quote(self.context.remote_root),
31
30
  script_file_name,
32
31
  output_name,
@@ -66,7 +65,7 @@ class Shell(Machine):
66
65
 
67
66
  # mark defunct process as terminated
68
67
  ret, stdin, stdout, stderr = self.context.block_call(
69
- f"if ps -p {job_id} > /dev/null && ! (ps -p {job_id} | grep defunct >/dev/null) ; then echo 1; fi"
68
+ f"if ps -p {job_id} > /dev/null && ! (ps -o command -p {job_id} | grep defunct >/dev/null) ; then echo 1; fi"
70
69
  )
71
70
  if ret != 0:
72
71
  err_str = stderr.read().decode("utf-8")
@@ -101,3 +100,15 @@ class Shell(Machine):
101
100
  job_tag_finished = job.job_hash + "_job_tag_finished"
102
101
  # print('job finished: ',job.job_id, job_tag_finished)
103
102
  return self.context.check_file_exists(job_tag_finished)
103
+
104
+ def kill(self, job):
105
+ """Kill the job.
106
+
107
+ Parameters
108
+ ----------
109
+ job : Job
110
+ job
111
+ """
112
+ job_id = job.job_id
113
+ # 9 means exit, cannot be blocked
114
+ ret, stdin, stdout, stderr = self.context.block_call("kill -9 " + str(job_id))
dpdispatcher/slurm.py CHANGED
@@ -1,3 +1,4 @@
1
+ import math
1
2
  import pathlib
2
3
  import shlex
3
4
  from typing import List
@@ -45,9 +46,12 @@ class Slurm(Machine):
45
46
  )
46
47
  else:
47
48
  script_header_dict["slurm_number_gpu_line"] = custom_gpu_line
48
- script_header_dict[
49
- "slurm_partition_line"
50
- ] = f"#SBATCH --partition {resources.queue_name}"
49
+ if resources.queue_name != "":
50
+ script_header_dict[
51
+ "slurm_partition_line"
52
+ ] = f"#SBATCH --partition {resources.queue_name}"
53
+ else:
54
+ script_header_dict["slurm_partition_line"] = ""
51
55
  slurm_script_header = slurm_script_header_template.format(**script_header_dict)
52
56
  return slurm_script_header
53
57
 
@@ -60,8 +64,7 @@ class Slurm(Machine):
60
64
  self.context.write_file(fname=script_file_name, write_str=script_str)
61
65
  # self.context.write_file(fname=os.path.join(self.context.submission.work_base, script_file_name), write_str=script_str)
62
66
  ret, stdin, stdout, stderr = self.context.block_call(
63
- "cd %s && %s %s"
64
- % (
67
+ "cd {} && {} {}".format(
65
68
  shlex.quote(self.context.remote_root),
66
69
  "sbatch",
67
70
  shlex.quote(script_file_name),
@@ -78,7 +81,12 @@ class Slurm(Machine):
78
81
  "Get error code %d in submitting through ssh with job: %s . message: %s"
79
82
  % (ret, job.job_hash, err_str)
80
83
  )
81
- elif "Job violates accounting/QOS policy" in err_str:
84
+ elif (
85
+ "Job violates accounting/QOS policy" in err_str
86
+ # the number of jobs exceeds DEFAULT_MAX_JOB_COUNT (by default 10000)
87
+ or "Slurm temporarily unable to accept job, sleeping and retrying"
88
+ in err_str
89
+ ):
82
90
  # job number exceeds, skip the submitting
83
91
  return ""
84
92
  raise RuntimeError(
@@ -115,6 +123,7 @@ class Slurm(Machine):
115
123
  elif (
116
124
  "Socket timed out on send/recv operation" in err_str
117
125
  or "Unable to contact slurm controller" in err_str
126
+ or "Invalid user for SlurmUser" in err_str
118
127
  ):
119
128
  # retry 3 times
120
129
  raise RetrySignal(
@@ -194,30 +203,47 @@ class Slurm(Machine):
194
203
  )
195
204
  ]
196
205
 
206
+ def kill(self, job):
207
+ """Kill the job.
208
+
209
+ Parameters
210
+ ----------
211
+ job : Job
212
+ job
213
+ """
214
+ job_id = job.job_id
215
+ # -Q Do not report an error if the specified job is already completed.
216
+ ret, stdin, stdout, stderr = self.context.block_call(
217
+ "scancel -Q " + str(job_id)
218
+ )
219
+ # we do not need to stop here if scancel failed; just continue
220
+
197
221
 
198
222
  class SlurmJobArray(Slurm):
199
223
  """Slurm with job array enabled for multiple tasks in a job."""
200
224
 
201
225
  def gen_script_header(self, job):
226
+ slurm_job_size = job.resources.kwargs.get("slurm_job_size", 1)
202
227
  if job.fail_count > 0:
203
228
  # resubmit jobs, check if some of tasks have been finished
204
- job_array = []
229
+ job_array = set()
205
230
  for ii, task in enumerate(job.job_task_list):
206
231
  task_tag_finished = (
207
232
  pathlib.PurePath(task.task_work_path)
208
233
  / (task.task_hash + "_task_tag_finished")
209
234
  ).as_posix()
210
235
  if not self.context.check_file_exists(task_tag_finished):
211
- job_array.append(ii)
236
+ job_array.add(ii // slurm_job_size)
212
237
  return super().gen_script_header(job) + "\n#SBATCH --array=%s" % (
213
238
  ",".join(map(str, job_array))
214
239
  )
215
240
  return super().gen_script_header(job) + "\n#SBATCH --array=0-%d" % (
216
- len(job.job_task_list) - 1
241
+ math.ceil(len(job.job_task_list) / slurm_job_size) - 1
217
242
  )
218
243
 
219
244
  def gen_script_command(self, job):
220
245
  resources = job.resources
246
+ slurm_job_size = resources.kwargs.get("slurm_job_size", 1)
221
247
  # SLURM_ARRAY_TASK_ID: 0 ~ n_jobs-1
222
248
  script_command = "case $SLURM_ARRAY_TASK_ID in\n"
223
249
  for ii, task in enumerate(job.job_task_list):
@@ -243,10 +269,16 @@ class SlurmJobArray(Slurm):
243
269
  task_tag_finished=task_tag_finished,
244
270
  log_err_part=log_err_part,
245
271
  )
246
- script_command += f"{ii})\n"
272
+ if ii % slurm_job_size == 0:
273
+ script_command += f"{ii // slurm_job_size})\n"
247
274
  script_command += single_script_command
248
275
  script_command += self.gen_script_wait(resources=resources)
249
- script_command += "\n;;\n"
276
+ script_command += "\n"
277
+ if (
278
+ ii % slurm_job_size == slurm_job_size - 1
279
+ or ii == len(job.job_task_list) - 1
280
+ ):
281
+ script_command += ";;\n"
250
282
  script_command += "*)\nexit 1\n;;\nesac\n"
251
283
  return script_command
252
284
 
@@ -337,9 +369,30 @@ class SlurmJobArray(Slurm):
337
369
  def check_finish_tag(self, job):
338
370
  results = []
339
371
  for task in job.job_task_list:
340
- task_tag_finished = (
341
- pathlib.PurePath(task.task_work_path)
342
- / (task.task_hash + "_task_tag_finished")
343
- ).as_posix()
344
- results.append(self.context.check_file_exists(task_tag_finished))
372
+ task.get_task_state(self.context)
373
+ results.append(task.task_state == JobStatus.finished)
345
374
  return all(results)
375
+
376
+ @classmethod
377
+ def resources_subfields(cls) -> List[Argument]:
378
+ """Generate the resources subfields.
379
+
380
+ Returns
381
+ -------
382
+ list[Argument]
383
+ resources subfields
384
+ """
385
+ doc_slurm_job_size = "Number of tasks in a Slurm job"
386
+ arg = super().resources_subfields()[0]
387
+ arg.extend_subfields(
388
+ [
389
+ Argument(
390
+ "slurm_job_size",
391
+ int,
392
+ optional=True,
393
+ default=1,
394
+ doc=doc_slurm_job_size,
395
+ ),
396
+ ]
397
+ )
398
+ return [arg]
@@ -116,7 +116,7 @@ class SSHSession:
116
116
  # transport = self.ssh.get_transport()
117
117
  # transport.set_keepalive(60)
118
118
 
119
- @retry(max_retry=3, sleep=1)
119
+ @retry(max_retry=6, sleep=1)
120
120
  def _setup_ssh(self):
121
121
  # machine = self.machine
122
122
  self.ssh = paramiko.SSHClient()
@@ -199,7 +199,7 @@ class SSHSession:
199
199
  ts.auth_interactive(self.username, self.inter_handler)
200
200
  except paramiko.ssh_exception.AuthenticationException:
201
201
  # since the asynchrony of interactive authentication, one addtional try is added
202
- # retry for up to 3 times
202
+ # retry for up to 6 times
203
203
  raise RetrySignal("Authentication failed")
204
204
  elif key_ok:
205
205
  pass
@@ -213,7 +213,12 @@ class SSHSession:
213
213
  raise RuntimeError("Please provide at least one form of authentication")
214
214
  assert ts.is_active()
215
215
  # Opening a session creates a channel along the socket to the server
216
- ts.open_session(timeout=self.timeout)
216
+ try:
217
+ ts.open_session(timeout=self.timeout)
218
+ except paramiko.ssh_exception.SSHException:
219
+ # retry for up to 6 times
220
+ # ref: https://github.com/paramiko/paramiko/issues/1508
221
+ raise RetrySignal("Opening session failed")
217
222
  ts.set_keepalive(60)
218
223
  self.ssh._transport = ts # type: ignore
219
224
  # reset sftp
@@ -323,14 +328,14 @@ class SSHSession:
323
328
  Argument("port", int, optional=True, default=22, doc=doc_port),
324
329
  Argument(
325
330
  "key_filename",
326
- [str, None],
331
+ [str, type(None)],
327
332
  optional=True,
328
333
  default=None,
329
334
  doc=doc_key_filename,
330
335
  ),
331
336
  Argument(
332
337
  "passphrase",
333
- [str, None],
338
+ [str, type(None)],
334
339
  optional=True,
335
340
  default=None,
336
341
  doc=doc_passphrase,
@@ -497,6 +502,14 @@ class SSHContext(BaseContext):
497
502
  self.block_checkcall(
498
503
  f"mv {shlex.quote(old_remote_root)} {shlex.quote(self.remote_root)}"
499
504
  )
505
+ elif (
506
+ old_remote_root is not None
507
+ and old_remote_root != self.remote_root
508
+ and self.check_file_exists(old_remote_root)
509
+ and not len(self.ssh_session.sftp.listdir(old_remote_root))
510
+ ):
511
+ # if the new directory exists and the old directory does not contain files, then move the old directory
512
+ self._rmtree(old_remote_root)
500
513
 
501
514
  sftp = self.ssh_session.ssh.open_sftp()
502
515
  try:
@@ -762,12 +775,6 @@ class SSHContext(BaseContext):
762
775
  retcode = cmd_pipes["stdout"].channel.recv_exit_status()
763
776
  return retcode, cmd_pipes["stdout"], cmd_pipes["stderr"]
764
777
 
765
- def kill(self, cmd_pipes):
766
- raise RuntimeError(
767
- "dose not work! we do not know how to kill proc through paramiko.SSHClient"
768
- )
769
- # self.block_checkcall('kill -15 %s' % cmd_pipes['pid'])
770
-
771
778
  def _rmtree(self, remotepath, verbose=False):
772
779
  """Remove the remote path."""
773
780
  # The original implementation method removes files one by one using sftp.
@@ -847,8 +854,7 @@ class SSHContext(BaseContext):
847
854
  self.ssh_session.put(from_f, to_f)
848
855
  except FileNotFoundError:
849
856
  raise FileNotFoundError(
850
- "from %s to %s @ %s : %s Error!"
851
- % (from_f, self.ssh_session.username, self.ssh_session.hostname, to_f)
857
+ f"from {from_f} to {self.ssh_session.username} @ {self.ssh_session.hostname} : {to_f} Error!"
852
858
  )
853
859
  # remote extract
854
860
  self.block_checkcall("tar xf %s" % of)
@@ -877,8 +883,7 @@ class SSHContext(BaseContext):
877
883
  ntar = len(files) // per_nfile + 1
878
884
  if ntar <= 1:
879
885
  self.block_checkcall(
880
- "tar %s %s %s"
881
- % (
886
+ "tar {} {} {}".format(
882
887
  tar_command,
883
888
  shlex.quote(of),
884
889
  " ".join([shlex.quote(file) for file in files]),
@@ -890,8 +895,7 @@ class SSHContext(BaseContext):
890
895
  )
891
896
  self.write_file(file_list_file, "\n".join(files))
892
897
  self.block_checkcall(
893
- "tar %s %s -T %s"
894
- % (tar_command, shlex.quote(of), shlex.quote(file_list_file))
898
+ f"tar {tar_command} {shlex.quote(of)} -T {shlex.quote(file_list_file)}"
895
899
  )
896
900
  # trans
897
901
  from_f = pathlib.PurePath(os.path.join(self.remote_root, of)).as_posix()
@@ -1,7 +1,10 @@
1
1
  # %%
2
+ import asyncio
2
3
  import copy
4
+ import functools
3
5
  import json
4
6
  import os
7
+ import pathlib
5
8
  import random
6
9
  import time
7
10
  import uuid
@@ -198,7 +201,9 @@ class Submission:
198
201
  self.local_root = machine.context.temp_local_root
199
202
  return self
200
203
 
201
- def run_submission(self, *, dry_run=False, exit_on_submit=False, clean=True):
204
+ def run_submission(
205
+ self, *, dry_run=False, exit_on_submit=False, clean=True, check_interval=30
206
+ ):
202
207
  """Main method to execute the submission.
203
208
  First, check whether old Submission exists on the remote machine, and try to recover from it.
204
209
  Second, upload the local files to the remote machine where the tasks to be executed.
@@ -235,11 +240,11 @@ class Submission:
235
240
  dlog.info(f"at {self.machine.context.remote_root}")
236
241
  return self.serialize()
237
242
  if ratio_unfinished > 0.0 and self.check_ratio_unfinished(ratio_unfinished):
238
- self.remove_unfinished_jobs()
243
+ self.remove_unfinished_tasks()
239
244
  break
240
245
 
241
246
  try:
242
- time.sleep(30)
247
+ time.sleep(check_interval)
243
248
  except (Exception, KeyboardInterrupt, SystemExit) as e:
244
249
  self.submission_to_json()
245
250
  dlog.exception(e)
@@ -253,12 +258,72 @@ class Submission:
253
258
  finally:
254
259
  pass
255
260
  self.handle_unexpected_submission_state()
256
- self.download_jobs()
261
+ self.try_download_result()
257
262
  self.submission_to_json()
258
263
  if clean:
259
264
  self.clean_jobs()
260
265
  return self.serialize()
261
266
 
267
+ def try_download_result(self):
268
+ start_time = time.time()
269
+ retry_interval = 60 # 每1分钟重试一次
270
+ success = False
271
+ while not success:
272
+ try:
273
+ self.download_jobs()
274
+ success = True
275
+ except (EOFError, Exception) as e:
276
+ dlog.exception(e)
277
+ elapsed_time = time.time() - start_time
278
+ if elapsed_time < 3600: # 1小时内
279
+ dlog.info("Retrying in 1 minute...")
280
+ time.sleep(retry_interval)
281
+ elif elapsed_time < 86400: # 1小时后,但在24小时内
282
+ retry_interval = 600 # 每10分钟重试一次
283
+ dlog.info("Retrying in 10 minutes...")
284
+ time.sleep(retry_interval)
285
+ else: # 超过24小时
286
+ dlog.info("Maximum retries time reached. Exiting.")
287
+ break
288
+
289
+ async def async_run_submission(self, **kwargs):
290
+ """Async interface of run_submission.
291
+
292
+ Examples
293
+ --------
294
+ >>> import asyncio
295
+ >>> from dpdispacher import Machine, Resource, Submission
296
+ >>> async def run_jobs():
297
+ ... backgroud_task = set()
298
+ ... # task1
299
+ ... task1 = Task(...)
300
+ ... submission1 = Submission(..., task_list=[task1])
301
+ ... background_task = asyncio.create_task(
302
+ ... submission1.async_run_submission(check_interval=2, clean=False)
303
+ ... )
304
+ ... # task2
305
+ ... task2 = Task(...)
306
+ ... submission2 = Submission(..., task_list=[task1])
307
+ ... background_task = asyncio.create_task(
308
+ ... submission2.async_run_submission(check_interval=2, clean=False)
309
+ ... )
310
+ ... background_tasks.add(background_task)
311
+ ... result = await asyncio.gather(*background_tasks)
312
+ ... return result
313
+ >>> run_jobs()
314
+
315
+ May raise Error if pass `clean=True` explicitly when submit to pbs or slurm.
316
+ """
317
+ kwargs = {**{"clean": False}, **kwargs}
318
+ if kwargs["clean"]:
319
+ dlog.warning(
320
+ "Using async submission with `clean=True`, "
321
+ "job may fail in queue system"
322
+ )
323
+ loop = asyncio.get_event_loop()
324
+ wrapped_submission = functools.partial(self.run_submission, **kwargs)
325
+ return await loop.run_in_executor(None, wrapped_submission)
326
+
262
327
  def update_submission_state(self):
263
328
  """Check whether all the jobs in the submission.
264
329
 
@@ -306,41 +371,53 @@ class Submission:
306
371
 
307
372
  # def update_submi
308
373
 
309
- def check_ratio_unfinished(self, ratio_unfinished):
310
- status_list = [job.job_state for job in self.belonging_jobs]
311
- finished_num = status_list.count(JobStatus.finished)
312
- if finished_num / len(self.belonging_jobs) < (1 - ratio_unfinished):
313
- return False
374
+ def check_ratio_unfinished(self, ratio_unfinished: float) -> bool:
375
+ """Calculate the ratio of unfinished tasks in the submission.
376
+
377
+ Parameters
378
+ ----------
379
+ ratio_unfinished : float
380
+ the ratio of unfinished tasks in the submission
381
+
382
+ Returns
383
+ -------
384
+ bool
385
+ whether the ratio of unfinished tasks in the submission is larger than ratio_unfinished
386
+ """
387
+ assert self.resources is not None
388
+ if self.resources.group_size == 1:
389
+ # if group size is 1, calculate job state is enough and faster
390
+ status_list = [job.job_state for job in self.belonging_jobs]
314
391
  else:
315
- return True
392
+ # get task state is more accurate
393
+ status_list = []
394
+ for task in self.belonging_tasks:
395
+ task.get_task_state(self.machine.context)
396
+ status_list.append(task.task_state)
397
+ finished_num = status_list.count(JobStatus.finished)
398
+ return finished_num / len(self.belonging_tasks) >= (1 - ratio_unfinished)
316
399
 
317
- def remove_unfinished_jobs(self):
318
- removed_jobs = [
319
- job
320
- for job in self.belonging_jobs
321
- if job.job_state not in [JobStatus.finished]
322
- ]
323
- self.belonging_jobs = [
324
- job for job in self.belonging_jobs if job.job_state in [JobStatus.finished]
325
- ]
326
- for job in removed_jobs:
327
- # kill unfinished jobs
328
- try:
329
- self.machine.context.kill(job.job_id)
330
- except Exception as e:
331
- dlog.info("Can not kill job %s" % job.job_id)
332
-
333
- # remove unfinished tasks
334
- import os
335
- import shutil
336
-
337
- for task in job.job_task_list:
338
- shutil.rmtree(
339
- os.path.join(self.machine.context.local_root, task.task_work_path),
340
- ignore_errors=True,
341
- )
342
- self.belonging_tasks = [
343
- task for task in self.belonging_tasks if task not in job.job_task_list
400
+ def remove_unfinished_tasks(self):
401
+ dlog.info("Remove unfinished tasks")
402
+ # kill all jobs and mark them as finished
403
+ for job in self.belonging_jobs:
404
+ if job.job_state != JobStatus.finished:
405
+ self.machine.kill(job)
406
+ job.job_state = JobStatus.finished
407
+ # remove all unfinished tasks
408
+ finished_tasks = []
409
+ for task in self.belonging_tasks:
410
+ if task.task_state == JobStatus.finished:
411
+ finished_tasks.append(task)
412
+ # there is no need to remove actual remote directory
413
+ # as it should be cleaned anyway
414
+ self.belonging_tasks = finished_tasks
415
+ # clean removed tasks in jobs - although this should not be necessary
416
+ for job in self.belonging_jobs:
417
+ job.job_task_list = [
418
+ task
419
+ for task in job.job_task_list
420
+ if task.task_state == JobStatus.finished
344
421
  ]
345
422
 
346
423
  def check_all_finished(self):
@@ -463,6 +540,9 @@ class Submission:
463
540
  submission.bind_machine(machine=self.machine)
464
541
  if self == submission:
465
542
  self.belonging_jobs = submission.belonging_jobs
543
+ self.belonging_tasks = [
544
+ task for job in self.belonging_jobs for task in job.job_task_list
545
+ ]
466
546
  self.bind_machine(machine=self.machine)
467
547
  dlog.info(
468
548
  f"Find old submission; recover submission from json file;"
@@ -518,6 +598,7 @@ class Task:
518
598
  self.task_hash = self.get_hash()
519
599
  # self.task_need_resources="<to be completed in the future>"
520
600
  # self.uuid =
601
+ self.task_state = JobStatus.unsubmitted
521
602
 
522
603
  def __repr__(self):
523
604
  return str(self.serialize())
@@ -602,15 +683,44 @@ class Task:
602
683
  default=[],
603
684
  ),
604
685
  Argument(
605
- "outlog", [None, str], optional=False, doc=doc_outlog, default="log"
686
+ "outlog",
687
+ [type(None), str],
688
+ optional=False,
689
+ doc=doc_outlog,
690
+ default="log",
606
691
  ),
607
692
  Argument(
608
- "errlog", [None, str], optional=False, doc=doc_errlog, default="err"
693
+ "errlog",
694
+ [type(None), str],
695
+ optional=False,
696
+ doc=doc_errlog,
697
+ default="err",
609
698
  ),
610
699
  ]
611
700
  task_format = Argument("task", dict, task_args)
612
701
  return task_format
613
702
 
703
+ def get_task_state(self, context):
704
+ """Get the task state by checking the tag file.
705
+
706
+ Parameters
707
+ ----------
708
+ context : Context
709
+ the context of the task
710
+ """
711
+ if self.task_state in (JobStatus.finished, JobStatus.unsubmitted):
712
+ # finished task should always be finished
713
+ # unsubmitted task do not need to check tag
714
+ return
715
+ # check tag
716
+ task_tag_finished = (
717
+ pathlib.PurePath(self.task_work_path)
718
+ / (self.task_hash + "_task_tag_finished")
719
+ ).as_posix()
720
+ result = context.check_file_exists(task_tag_finished)
721
+ if result:
722
+ self.task_state = JobStatus.finished
723
+
614
724
 
615
725
  class Job:
616
726
  """Job is generated by Submission automatically.
@@ -700,6 +810,8 @@ class Job:
700
810
  job.job_id = job_dict[job_hash]["job_id"]
701
811
  job.fail_count = job_dict[job_hash]["fail_count"]
702
812
  # job.job_uuid = job_dict[job_hash]['job_uuid']
813
+ for task in job.job_task_list:
814
+ task.task_state = job.job_state
703
815
  return job
704
816
 
705
817
  def get_job_state(self):
@@ -715,6 +827,11 @@ class Job:
715
827
  assert self.machine is not None
716
828
  job_state = self.machine.check_status(self)
717
829
  self.job_state = job_state
830
+ # update general task_state, which should be faster than checking tags
831
+ for task in self.job_task_list:
832
+ # only update if the task is not finished
833
+ if task.task_state != JobStatus.finished:
834
+ task.task_state = job_state
718
835
 
719
836
  def handle_unexpected_job_state(self):
720
837
  job_state = self.job_state
@@ -838,7 +955,7 @@ class Resources:
838
955
  If true, dpdispatcher will manually export environment variable CUDA_VISIBLE_DEVICES to different task.
839
956
  Usually, this option will be used with Task.task_need_resources variable simultaneously.
840
957
  ratio_unfinished : float
841
- The ratio of `jobs` that can be unfinished.
958
+ The ratio of `task` that can be unfinished.
842
959
  para_deg : int
843
960
  Decide how many tasks will be run in parallel.
844
961
  Usually run with `strategy['if_cuda_multi_devices']`
@@ -1010,7 +1127,7 @@ class Resources:
1010
1127
  "If true, dpdispatcher will manually export environment variable CUDA_VISIBLE_DEVICES to different task."
1011
1128
  "Usually, this option will be used with Task.task_need_resources variable simultaneously."
1012
1129
  )
1013
- doc_ratio_unfinished = "The ratio of `jobs` that can be unfinished."
1130
+ doc_ratio_unfinished = "The ratio of `tasks` that can be unfinished."
1014
1131
 
1015
1132
  strategy_args = [
1016
1133
  Argument(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dpdispatcher
3
- Version: 0.5.6
3
+ Version: 0.5.8
4
4
  Summary: Generate HPC scheduler systems jobs input scripts, submit these scripts to HPC systems, and poke until they finish
5
5
  Author: DeepModeling
6
6
  License: GNU LESSER GENERAL PUBLIC LICENSE
@@ -204,15 +204,20 @@ Provides-Extra: test
204
204
 
205
205
  # DPDispatcher
206
206
 
207
- DPDispatcher is a python package used to generate HPC(High Performance Computing) scheduler systems (Slurm/PBS/LSF/dpcloudserver) jobs input scripts and submit these scripts to HPC systems and poke until they finish.
207
+ [![conda-forge](https://img.shields.io/conda/dn/conda-forge/dpdispatcher?color=red&label=conda-forge&logo=conda-forge)](https://anaconda.org/conda-forge/dpdispatcher)
208
+ [![pip install](https://img.shields.io/pypi/dm/dpdispatcher?label=pip%20install&logo=pypi)](https://pypi.org/project/dpdispatcher)
209
+ [![docker pull](https://img.shields.io/docker/pulls/dptechnology/dpdispatcher?logo=docker)](https://hub.docker.com/r/dptechnology/dpdispatcher)
210
+ [![Documentation Status](https://readthedocs.org/projects/dpdispatcher/badge/)](https://dpdispatcher.readthedocs.io/)
211
+
212
+ DPDispatcher is a Python package used to generate HPC (High-Performance Computing) scheduler systems (Slurm/PBS/LSF/Bohrium) jobs input scripts, submit them to HPC systems, and poke until they finish.
208
213
 
209
- DPDispatcher will monitor (poke) until these jobs finish and download the results files (if these jobs is running on remote systems connected by SSH).
214
+ DPDispatcher will monitor (poke) until these jobs finish and download the results files (if these jobs are running on remote systems connected by SSH).
210
215
 
211
216
  For more information, check the [documentation](https://dpdispatcher.readthedocs.io/).
212
217
 
213
218
  ## Installation
214
219
 
215
- DPDispatcher can installed by `pip`:
220
+ DPDispatcher can be installed by `pip`:
216
221
 
217
222
  ```bash
218
223
  pip install dpdispatcher
@@ -224,5 +229,9 @@ See [Getting Started](https://dpdispatcher.readthedocs.io/en/latest/getting-star
224
229
 
225
230
  ## Contributing
226
231
 
227
- DPDispatcher is maintained by Deep Modeling's developers and welcome other people.
232
+ DPDispatcher is maintained by Deep Modeling's developers and welcomes other people.
228
233
  See [Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓
234
+
235
+ ## References
236
+
237
+ DPDispatcher is derivated from the [DP-GEN](https://github.com/deepmodeling/dpgen) package. To mention DPDispatcher in a scholarly publication, please read Section 3.3 in the [DP-GEN paper](https://doi.org/10.1016/j.cpc.2020.107206).
@@ -0,0 +1,34 @@
1
+ dpdispatcher/JobStatus.py,sha256=Eszs4TPLfszCuf6zLaFonf25feXDUguF28spYOjJpQE,233
2
+ dpdispatcher/__init__.py,sha256=2GIz4niyzHTbxros1G7Mi4uBJbD3AMSnTPxXSJMJmUs,2907
3
+ dpdispatcher/_version.py,sha256=iqWtoISytDDNpYe-atC8Kl-rZhTojPnDQKAEcFNtIhg,160
4
+ dpdispatcher/arginfo.py,sha256=pNaxYIE6ahBidpR7OCKZdw8iGt003uTXGSlVzwiuvRg,188
5
+ dpdispatcher/base_context.py,sha256=Hfri0x41XC4MRUjxc0-WMiZB_E4NvLp94ZYaHfYCWHM,3610
6
+ dpdispatcher/distributed_shell.py,sha256=XMcXt8g1f2DY5HYhhyiN5ehV2ihKULY5ng-sB0B7YaI,6933
7
+ dpdispatcher/dp_cloud_server.py,sha256=xVpDI0exBwHNSZECLJdfrQsvBzeUn5a0gx5Bzt9UAdU,9857
8
+ dpdispatcher/dp_cloud_server_context.py,sha256=VfRRo4ruorWC8NVjW19EjmxQ0Rbz6XzxrHrJKl4cCZk,11255
9
+ dpdispatcher/dpdisp.py,sha256=_dyH8xEgUR-s2xKkB20D9FIYhSHUCmzc2PxWgo9ildQ,94
10
+ dpdispatcher/fugaku.py,sha256=wSjY0XB3TNNWAPKHgMpoPl5jyYJIlijBcEkYXp6nrZQ,3733
11
+ dpdispatcher/hdfs_cli.py,sha256=9Vrf7Kz_kJgXP2xEdZqNVNxRGbui5RrtnLtEjxfcq9A,6047
12
+ dpdispatcher/hdfs_context.py,sha256=1jT1nzx7VGJFJ42MHTXoFWhfEu4KBkMBJO84klRAnPI,8938
13
+ dpdispatcher/lazy_local_context.py,sha256=ZdWNqK3QF8SsoqnCjpFt3ZDRCIagjzJNlKPUYutRUC8,5692
14
+ dpdispatcher/local_context.py,sha256=anYJqQASOnkcAhfckUcFD8_DcjNUZ1KE0GuksxR5Mxw,11772
15
+ dpdispatcher/lsf.py,sha256=zy-WEnC7f2Dy5hJGnRBl5jpjYZ_H3-KMcE0lxDG6ejo,7790
16
+ dpdispatcher/machine.py,sha256=31xG5ksN8mBVwD8taLsk5KXLhjM0ZTjlHlbbPgiig1c,15296
17
+ dpdispatcher/pbs.py,sha256=LiULEKNDuisrKmOpZyB1af6sGDQ35xrAhMh7VMwpFbY,6327
18
+ dpdispatcher/shell.py,sha256=kEP7za-qN71y_21p0uBNkopZ5s63Adq54904hjUHv48,4141
19
+ dpdispatcher/slurm.py,sha256=krlyjzxK8gIhSsqcKHFvNiUwVE7411wTUwuW9xGzS-E,14648
20
+ dpdispatcher/ssh_context.py,sha256=7Xrm8biVA7tAEDJ6YJZzC3nbdQrVBr_5UOhQNQ7qJ2g,35032
21
+ dpdispatcher/submission.py,sha256=r_F05nHTpN86b2os8RZAjZsCILNarDko2BjAEUYSntw,46643
22
+ dpdispatcher/utils.py,sha256=RXUHJl3S2z26Em3SeltnxtdVM3kv7weXJKvBEjG6I34,5035
23
+ dpdispatcher/dpcloudserver/__init__.py,sha256=FnX9HH-2dXADluNfucg98JPMfruMoBpN9ER9lZkVQvQ,49
24
+ dpdispatcher/dpcloudserver/client.py,sha256=w1wQ8g-FMQlyh00LIAbJLE1xirGXocpp7zAnhbeM4V0,11152
25
+ dpdispatcher/dpcloudserver/config.py,sha256=vBRtzExJXTGfXPeBObXrZNAhBNXoFFzMkzSuSrrjHEQ,635
26
+ dpdispatcher/dpcloudserver/retcode.py,sha256=1qAF8gFZx55u2sO8KbtYSIIrjcO-IGufEUlwbkSfC1g,721
27
+ dpdispatcher/dpcloudserver/temp_test.py,sha256=jklOSu7tZ_wW5gycGRiUsbBWMLZDqCBslSYOCb2hTHw,2932
28
+ dpdispatcher/dpcloudserver/zip_file.py,sha256=f9WrlktwHW0YipaWg5Y0kxjMZlhD1cJYa6EUpvu4Cro,2611
29
+ dpdispatcher-0.5.8.dist-info/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
30
+ dpdispatcher-0.5.8.dist-info/METADATA,sha256=o2oD8_6Ohc04mRTkJWi51-KOPamYqH0kvUD-E0iW-c0,12280
31
+ dpdispatcher-0.5.8.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
32
+ dpdispatcher-0.5.8.dist-info/entry_points.txt,sha256=3bKn6IB6SYhKOUbbcOdBBevz4gsDmhmbogKMVn4ptOQ,52
33
+ dpdispatcher-0.5.8.dist-info/top_level.txt,sha256=35jAQoXY-b-e9fJ1_mxhZUiaCoJNt1ZI7mpFRf07Qjs,13
34
+ dpdispatcher-0.5.8.dist-info/RECORD,,
@@ -1,33 +0,0 @@
1
- dpdispatcher/JobStatus.py,sha256=Eszs4TPLfszCuf6zLaFonf25feXDUguF28spYOjJpQE,233
2
- dpdispatcher/__init__.py,sha256=U8OLDjSGHxILiz8XH-HYBxjIlhD429HEqqxQ-vVK1a4,2866
3
- dpdispatcher/_version.py,sha256=J0O-QTcfk70wRsnrg-XApMYr8T5heaCiqHkl3PJ9zfs,160
4
- dpdispatcher/arginfo.py,sha256=pNaxYIE6ahBidpR7OCKZdw8iGt003uTXGSlVzwiuvRg,188
5
- dpdispatcher/base_context.py,sha256=XTKN0T_ffhVipEsbVEYNPmbKds8qMuwWbCixAhV8mUc,3690
6
- dpdispatcher/distributed_shell.py,sha256=vbNT8VHaYwEu2zIVFyosQGji4C3_QSpWKMFZURkJC7c,6941
7
- dpdispatcher/dp_cloud_server.py,sha256=mwXt2rtQeW4uMYBP05JcYE5vm5dp2JeitxiS8KjpFQc,9758
8
- dpdispatcher/dp_cloud_server_context.py,sha256=t47Kfn3cyQ223cBU-HVVdKvTm8yQiqHsFcMnQcIXdgk,11300
9
- dpdispatcher/dpdisp.py,sha256=_dyH8xEgUR-s2xKkB20D9FIYhSHUCmzc2PxWgo9ildQ,94
10
- dpdispatcher/hdfs_cli.py,sha256=9Vrf7Kz_kJgXP2xEdZqNVNxRGbui5RrtnLtEjxfcq9A,6047
11
- dpdispatcher/hdfs_context.py,sha256=IGvXsw9wdR8aemQ9kOE5WaciwVLtZbr-t2mrCQjxywU,8980
12
- dpdispatcher/lazy_local_context.py,sha256=V0jVuAgOHKw_PkYPCKnr3OkMyWfLRTJ8B7As3VCzLX8,5775
13
- dpdispatcher/local_context.py,sha256=8tML77WRSydJoPA6DseYsIshV-id5xO-6kWsqDsJHQ0,11855
14
- dpdispatcher/lsf.py,sha256=PjsjNO8YZkWBzFe_277G1oVrLcAm1Qz1fN-3FZ4dsK4,7553
15
- dpdispatcher/machine.py,sha256=vafq9zTW6NYZ-ZOMEfVEiZkoZzbEjco09d4VwEb9Jk8,14949
16
- dpdispatcher/pbs.py,sha256=RScX8rX1lGlvilgCEFxzUAeTA-Em5AeAcM2yT2OKY3s,6057
17
- dpdispatcher/shell.py,sha256=SZoJynOmqldMrl-lIMsNOY1RELFNFWWffeSg7XJsi9g,3843
18
- dpdispatcher/slurm.py,sha256=bsSTaRe5t3-z5BdjMyerGGBEXjJ_BfzMcawNMaFULfs,12886
19
- dpdispatcher/ssh_context.py,sha256=aeGiTUmBzfwkZ4xgNsyXBSpHAUHoUEVltidkNFWLXUE,34670
20
- dpdispatcher/submission.py,sha256=5qt1Nw3qRQeFq-w5DvzoWb9K-qMLiOFUjy_JhtnxFfI,41960
21
- dpdispatcher/utils.py,sha256=RXUHJl3S2z26Em3SeltnxtdVM3kv7weXJKvBEjG6I34,5035
22
- dpdispatcher/dpcloudserver/__init__.py,sha256=FnX9HH-2dXADluNfucg98JPMfruMoBpN9ER9lZkVQvQ,49
23
- dpdispatcher/dpcloudserver/client.py,sha256=a1KzBbbBKz6ZMH9iWhQfrdhL5BwrWevLS1vlTK4WP8w,11154
24
- dpdispatcher/dpcloudserver/config.py,sha256=vBRtzExJXTGfXPeBObXrZNAhBNXoFFzMkzSuSrrjHEQ,635
25
- dpdispatcher/dpcloudserver/retcode.py,sha256=1qAF8gFZx55u2sO8KbtYSIIrjcO-IGufEUlwbkSfC1g,721
26
- dpdispatcher/dpcloudserver/temp_test.py,sha256=jklOSu7tZ_wW5gycGRiUsbBWMLZDqCBslSYOCb2hTHw,2932
27
- dpdispatcher/dpcloudserver/zip_file.py,sha256=f9WrlktwHW0YipaWg5Y0kxjMZlhD1cJYa6EUpvu4Cro,2611
28
- dpdispatcher-0.5.6.dist-info/LICENSE,sha256=46mU2C5kSwOnkqkw9XQAJlhBL2JAf1_uCD8lVcXyMRg,7652
29
- dpdispatcher-0.5.6.dist-info/METADATA,sha256=KtfW7Uy8R5YHqMkRH3oDE5jh3JHXk_RcZ1W5H_gv2EM,11481
30
- dpdispatcher-0.5.6.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
31
- dpdispatcher-0.5.6.dist-info/entry_points.txt,sha256=3bKn6IB6SYhKOUbbcOdBBevz4gsDmhmbogKMVn4ptOQ,52
32
- dpdispatcher-0.5.6.dist-info/top_level.txt,sha256=35jAQoXY-b-e9fJ1_mxhZUiaCoJNt1ZI7mpFRf07Qjs,13
33
- dpdispatcher-0.5.6.dist-info/RECORD,,