dpdispatcher 0.5.11__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dpdispatcher/__init__.py +7 -89
  2. dpdispatcher/__main__.py +8 -0
  3. dpdispatcher/_version.py +14 -2
  4. dpdispatcher/base_context.py +1 -1
  5. dpdispatcher/contexts/__init__.py +11 -0
  6. dpdispatcher/{dp_cloud_server_context.py → contexts/dp_cloud_server_context.py} +7 -3
  7. dpdispatcher/{hdfs_context.py → contexts/hdfs_context.py} +2 -2
  8. dpdispatcher/{local_context.py → contexts/local_context.py} +51 -14
  9. dpdispatcher/{openapi_context.py → contexts/openapi_context.py} +3 -2
  10. dpdispatcher/{ssh_context.py → contexts/ssh_context.py} +113 -34
  11. dpdispatcher/dlog.py +31 -0
  12. dpdispatcher/dpdisp.py +113 -1
  13. dpdispatcher/entrypoints/__init__.py +1 -0
  14. dpdispatcher/entrypoints/gui.py +31 -0
  15. dpdispatcher/entrypoints/submission.py +83 -0
  16. dpdispatcher/machine.py +18 -4
  17. dpdispatcher/machines/__init__.py +11 -0
  18. dpdispatcher/{distributed_shell.py → machines/distributed_shell.py} +20 -4
  19. dpdispatcher/{dp_cloud_server.py → machines/dp_cloud_server.py} +21 -5
  20. dpdispatcher/{fugaku.py → machines/fugaku.py} +18 -5
  21. dpdispatcher/{lsf.py → machines/lsf.py} +20 -4
  22. dpdispatcher/{openapi.py → machines/openapi.py} +23 -4
  23. dpdispatcher/{pbs.py → machines/pbs.py} +30 -4
  24. dpdispatcher/{shell.py → machines/shell.py} +17 -3
  25. dpdispatcher/{slurm.py → machines/slurm.py} +37 -6
  26. dpdispatcher/submission.py +83 -39
  27. dpdispatcher/utils/__init__.py +1 -0
  28. dpdispatcher/{dpcloudserver → utils/dpcloudserver}/client.py +1 -1
  29. dpdispatcher/{hdfs_cli.py → utils/hdfs_cli.py} +1 -1
  30. dpdispatcher/utils/record.py +79 -0
  31. dpdispatcher/{utils.py → utils/utils.py} +14 -2
  32. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/METADATA +7 -2
  33. dpdispatcher-0.6.1.dist-info/RECORD +44 -0
  34. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/WHEEL +1 -1
  35. dpdispatcher-0.6.1.dist-info/entry_points.txt +7 -0
  36. dpdispatcher/dpcloudserver/temp_test.py +0 -90
  37. dpdispatcher-0.5.11.dist-info/RECORD +0 -36
  38. dpdispatcher-0.5.11.dist-info/entry_points.txt +0 -2
  39. /dpdispatcher/{lazy_local_context.py → contexts/lazy_local_context.py} +0 -0
  40. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/__init__.py +0 -0
  41. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/config.py +0 -0
  42. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/retcode.py +0 -0
  43. /dpdispatcher/{dpcloudserver → utils/dpcloudserver}/zip_file.py +0 -0
  44. /dpdispatcher/{JobStatus.py → utils/job_status.py} +0 -0
  45. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/LICENSE +0 -0
  46. {dpdispatcher-0.5.11.dist-info → dpdispatcher-0.6.1.dist-info}/top_level.txt +0 -0
dpdispatcher/__init__.py CHANGED
@@ -1,103 +1,21 @@
1
- import logging
2
- import os
3
- import sys
4
- import warnings
5
-
6
- ROOT_PATH = tuple(__path__)[0]
7
- dlog = logging.getLogger(__name__)
8
- dlog.propagate = False
9
- dlog.setLevel(logging.INFO)
10
- try:
11
- dlogf = logging.FileHandler(
12
- os.getcwd() + os.sep + "dpdispatcher" + ".log", delay=True
13
- )
14
- except PermissionError:
15
- warnings.warn(
16
- "dpdispatcher.log meet permission error. redirect the log to ~/dpdispatcher.log"
17
- )
18
- dlogf = logging.FileHandler(
19
- os.path.join(os.path.expanduser("~"), "dpdispatcher.log")
20
- )
21
-
22
- # dlogf = logging.FileHandler('./'+os.sep+SHORT_CMD+'.log')
23
- # dlogf = logging.FileHandler(os.path.join(os.environ['HOME'], SHORT_CMD+'.log'))
24
- # dlogf = logging.FileHandler(os.path.join(os.path.expanduser('~'), SHORT_CMD+'.log'))
25
- # dlogf = logging.FileHandler(os.path.join("/tmp/", SHORT_CMD+'.log'))
26
- dlogf_formatter = logging.Formatter("%(asctime)s - %(levelname)s : %(message)s")
27
- # dlogf_formatter=logging.Formatter('%(asctime)s - %(name)s - [%(filename)s:%(funcName)s - %(lineno)d ] - %(levelname)s \n %(message)s')
28
- dlogf.setFormatter(dlogf_formatter)
29
- dlog.addHandler(dlogf)
30
-
31
- dlog_stdout = logging.StreamHandler(sys.stdout)
32
- dlog_stdout.setFormatter(dlogf_formatter)
33
- dlog.addHandler(dlog_stdout)
34
-
35
1
  __author__ = "DeepModeling Team"
36
- __copyright__ = "Copyright 2019"
37
- __status__ = "Development"
2
+ __copyright__ = "Copyright 2019-2023, DeepModeling"
3
+ __status__ = "Production"
38
4
  try:
39
5
  from ._version import version as __version__
40
6
  except ImportError:
41
- __version__ = "unkown"
42
-
43
- from .distributed_shell import DistributedShell
44
- from .dp_cloud_server import DpCloudServer, Lebesgue
45
- from .dp_cloud_server_context import DpCloudServerContext, LebesgueContext
46
- from .fugaku import Fugaku
47
- from .hdfs_context import HDFSContext
48
- from .lazy_local_context import LazyLocalContext
49
- from .local_context import LocalContext
50
- from .lsf import LSF
51
- from .machine import Machine
52
- from .openapi import OpenAPI
53
- from .openapi_context import OpenAPIContext
54
- from .pbs import PBS, Torque
55
- from .shell import Shell
56
- from .slurm import Slurm
57
- from .ssh_context import SSHContext
58
- from .submission import Job, Resources, Submission, Task
59
-
60
-
61
- def info():
62
- """Show basic information about dpdispatcher, its location and version."""
63
- print("DeepModeling\n------------")
64
- print("Version: " + __version__)
65
- print("Path: " + ROOT_PATH)
66
- print("")
67
- print("Dependency")
68
- print("------------")
69
- for modui in ["psutil", "paramiko", "dargs", "oss2"]:
70
- try:
71
- mm = __import__(modui)
72
- print("%10s %10s %s" % (modui, mm.__version__, mm.__path__[0]))
73
- except ImportError:
74
- print("%10s %10s Not Found" % (modui, ""))
75
- print()
7
+ __version__ = "unknown"
76
8
 
9
+ import dpdispatcher.contexts # noqa: F401
10
+ import dpdispatcher.machines # noqa: F401
11
+ from dpdispatcher.machine import Machine
12
+ from dpdispatcher.submission import Job, Resources, Submission, Task
77
13
 
78
14
  __all__ = [
79
15
  "__version__",
80
- "DistributedShell",
81
- "DpCloudServer",
82
- "OpenAPI",
83
- "OpenAPIContext",
84
- "DpCloudServerContext",
85
- "HDFSContext",
86
- "LazyLocalContext",
87
- "LocalContext",
88
- "LSF",
89
16
  "Machine",
90
- "PBS",
91
- "Shell",
92
- "Slurm",
93
- "Fugaku",
94
- "SSHContext",
95
17
  "Submission",
96
18
  "Task",
97
- "Torque",
98
- "info",
99
- "Lebesgue",
100
- "LebesgueContext",
101
19
  "Job",
102
20
  "Resources",
103
21
  ]
@@ -0,0 +1,8 @@
1
+ """Package dp entry point."""
2
+
3
+ from dpdispatcher.dpdisp import (
4
+ main,
5
+ )
6
+
7
+ if __name__ == "__main__":
8
+ main()
dpdispatcher/_version.py CHANGED
@@ -1,4 +1,16 @@
1
1
  # file generated by setuptools_scm
2
2
  # don't change, don't track in version control
3
- __version__ = version = '0.5.11'
4
- __version_tuple__ = version_tuple = (0, 5, 11)
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
9
+
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '0.6.1'
16
+ __version_tuple__ = version_tuple = (0, 6, 1)
@@ -3,7 +3,7 @@ from typing import List, Tuple
3
3
 
4
4
  from dargs import Argument
5
5
 
6
- from dpdispatcher import dlog
6
+ from dpdispatcher.dlog import dlog
7
7
 
8
8
 
9
9
  class BaseContext(metaclass=ABCMeta):
@@ -0,0 +1,11 @@
1
+ """Contexts."""
2
+ import importlib
3
+ from pathlib import Path
4
+
5
+ PACKAGE_BASE = "dpdispatcher.contexts"
6
+ NOT_LOADABLE = ("__init__.py",)
7
+
8
+ for module_file in Path(__file__).parent.glob("*.py"):
9
+ if module_file.name not in NOT_LOADABLE:
10
+ module_name = f".{module_file.stem}"
11
+ importlib.import_module(module_name, PACKAGE_BASE)
@@ -8,13 +8,16 @@ from typing import List
8
8
  import tqdm
9
9
  from dargs.dargs import Argument
10
10
 
11
- from dpdispatcher import dlog
12
11
  from dpdispatcher.base_context import BaseContext
13
- from dpdispatcher.dpcloudserver.config import ALI_STS_BUCKET_NAME, ALI_STS_ENDPOINT
12
+ from dpdispatcher.dlog import dlog
14
13
 
15
14
  # from dpdispatcher.submission import Machine
16
15
  # from . import dlog
17
- from .dpcloudserver import Client, zip_file
16
+ from dpdispatcher.utils.dpcloudserver import Client, zip_file
17
+ from dpdispatcher.utils.dpcloudserver.config import (
18
+ ALI_STS_BUCKET_NAME,
19
+ ALI_STS_ENDPOINT,
20
+ )
18
21
 
19
22
  # from zip_file import zip_files
20
23
 
@@ -113,6 +116,7 @@ class BohriumContext(BaseContext):
113
116
 
114
117
  upload_file_list = [
115
118
  job.script_file_name,
119
+ f"{job.script_file_name}.run",
116
120
  ]
117
121
  upload_file_list.extend(common_files)
118
122
 
@@ -3,9 +3,9 @@ import shutil
3
3
  import tarfile
4
4
  from glob import glob
5
5
 
6
- from dpdispatcher import dlog
7
6
  from dpdispatcher.base_context import BaseContext
8
- from dpdispatcher.hdfs_cli import HDFS
7
+ from dpdispatcher.dlog import dlog
8
+ from dpdispatcher.utils.hdfs_cli import HDFS
9
9
 
10
10
 
11
11
  class HDFSContext(BaseContext):
@@ -1,12 +1,11 @@
1
- import hashlib
2
1
  import os
3
2
  import shutil
4
3
  import subprocess as sp
5
4
  from glob import glob
6
5
  from subprocess import TimeoutExpired
7
6
 
8
- from dpdispatcher import dlog
9
7
  from dpdispatcher.base_context import BaseContext
8
+ from dpdispatcher.dlog import dlog
10
9
 
11
10
 
12
11
  class SPRetObj:
@@ -30,14 +29,6 @@ def _check_file_path(fname):
30
29
  os.makedirs(dirname, exist_ok=True)
31
30
 
32
31
 
33
- def _identical_files(fname0, fname1):
34
- with open(fname0) as fp:
35
- code0 = hashlib.sha1(fp.read().encode("utf-8")).hexdigest()
36
- with open(fname1) as fp:
37
- code1 = hashlib.sha1(fp.read().encode("utf-8")).hexdigest()
38
- return code0 == code1
39
-
40
-
41
32
  class LocalContext(BaseContext):
42
33
  """Run jobs in the local server and remote directory.
43
34
 
@@ -152,9 +143,34 @@ class LocalContext(BaseContext):
152
143
  for ii in submission.belonging_tasks:
153
144
  local_job = os.path.join(self.local_root, ii.task_work_path)
154
145
  remote_job = os.path.join(self.remote_root, ii.task_work_path)
155
- flist = ii.backward_files
146
+ flist = []
147
+ for kk in ii.backward_files:
148
+ abs_flist_r = glob(os.path.join(remote_job, kk))
149
+ abs_flist_l = glob(os.path.join(local_job, kk))
150
+ if not abs_flist_r and not abs_flist_l:
151
+ if check_exists:
152
+ if mark_failure:
153
+ tag_file_path = os.path.join(
154
+ self.local_root,
155
+ ii.task_work_path,
156
+ "tag_failure_download_%s" % kk,
157
+ )
158
+ with open(tag_file_path, "w") as fp:
159
+ pass
160
+ else:
161
+ pass
162
+ else:
163
+ raise RuntimeError(
164
+ "cannot find download file " + os.path.join(remote_job, kk)
165
+ )
166
+ rel_flist = [
167
+ os.path.relpath(ii, start=remote_job) for ii in abs_flist_r
168
+ ]
169
+ flist.extend(rel_flist)
156
170
  if back_error:
157
- flist += glob(os.path.join(remote_job, "error*"))
171
+ abs_flist = glob(os.path.join(remote_job, "error*"))
172
+ rel_flist = [os.path.relpath(ii, start=remote_job) for ii in abs_flist]
173
+ flist.extend(rel_flist)
158
174
  for jj in flist:
159
175
  rfile = os.path.join(remote_job, jj)
160
176
  lfile = os.path.join(local_job, jj)
@@ -198,9 +214,30 @@ class LocalContext(BaseContext):
198
214
  pass
199
215
  local_job = self.local_root
200
216
  remote_job = self.remote_root
201
- flist = submission.backward_common_files
217
+ flist = []
218
+ for kk in submission.backward_common_files:
219
+ abs_flist_r = glob(os.path.join(remote_job, kk))
220
+ abs_flist_l = glob(os.path.join(local_job, kk))
221
+ if not abs_flist_r and not abs_flist_l:
222
+ if check_exists:
223
+ if mark_failure:
224
+ tag_file_path = os.path.join(
225
+ self.local_root, "tag_failure_download_%s" % kk
226
+ )
227
+ with open(tag_file_path, "w") as fp:
228
+ pass
229
+ else:
230
+ pass
231
+ else:
232
+ raise RuntimeError(
233
+ "cannot find download file " + os.path.join(remote_job, kk)
234
+ )
235
+ rel_flist = [os.path.relpath(ii, start=remote_job) for ii in abs_flist_r]
236
+ flist.extend(rel_flist)
202
237
  if back_error:
203
- flist += glob(os.path.join(remote_job, "error*"))
238
+ abs_flist = glob(os.path.join(remote_job, "error*"))
239
+ rel_flist = [os.path.relpath(ii, start=remote_job) for ii in abs_flist]
240
+ flist.extend(rel_flist)
204
241
  for jj in flist:
205
242
  rfile = os.path.join(remote_job, jj)
206
243
  lfile = os.path.join(local_job, jj)
@@ -14,9 +14,9 @@ except ModuleNotFoundError:
14
14
  else:
15
15
  found_bohriumsdk = True
16
16
 
17
- from dpdispatcher import dlog
18
17
  from dpdispatcher.base_context import BaseContext
19
- from dpdispatcher.JobStatus import JobStatus
18
+ from dpdispatcher.dlog import dlog
19
+ from dpdispatcher.utils.job_status import JobStatus
20
20
 
21
21
  DP_CLOUD_SERVER_HOME_DIR = os.path.join(
22
22
  os.path.expanduser("~"), ".dpdispatcher/", "dp_cloud_server/"
@@ -88,6 +88,7 @@ class OpenAPIContext(BaseContext):
88
88
 
89
89
  upload_file_list = [
90
90
  job.script_file_name,
91
+ f"{job.script_file_name}.run",
91
92
  ]
92
93
 
93
94
  upload_file_list.extend(common_files)
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env python
2
2
 
3
+ import fnmatch
3
4
  import os
4
5
  import pathlib
5
6
  import shlex
@@ -10,17 +11,24 @@ import time
10
11
  import uuid
11
12
  from functools import lru_cache
12
13
  from glob import glob
14
+ from stat import S_ISDIR, S_ISREG
13
15
  from typing import List
14
16
 
15
17
  import paramiko
16
18
  import paramiko.ssh_exception
17
19
  from dargs.dargs import Argument
18
20
 
19
- from dpdispatcher import dlog
20
21
  from dpdispatcher.base_context import BaseContext
22
+ from dpdispatcher.dlog import dlog
21
23
 
22
24
  # from dpdispatcher.submission import Machine
23
- from dpdispatcher.utils import RetrySignal, generate_totp, get_sha256, retry, rsync
25
+ from dpdispatcher.utils.utils import (
26
+ RetrySignal,
27
+ generate_totp,
28
+ get_sha256,
29
+ retry,
30
+ rsync,
31
+ )
24
32
 
25
33
 
26
34
  class SSHSession:
@@ -48,6 +56,7 @@ class SSHSession:
48
56
  self.ssh = None
49
57
  self.tar_compress = tar_compress
50
58
  self.look_for_keys = look_for_keys
59
+ self._keyboard_interactive_auth = False
51
60
  self._setup_ssh()
52
61
 
53
62
  # @classmethod
@@ -137,6 +146,7 @@ class SSHSession:
137
146
  # Make a Paramiko Transport object using the socket
138
147
  ts = paramiko.Transport(sock)
139
148
  ts.banner_timeout = 60
149
+ ts.auth_timeout = self.timeout + 20
140
150
  ts.use_compression(compress=True)
141
151
 
142
152
  # Tell Paramiko that the Transport is going to be used as a client
@@ -165,6 +175,8 @@ class SSHSession:
165
175
  pass
166
176
  if key is not None:
167
177
  break
178
+ else:
179
+ raise OSError(f"{key_path} not found!")
168
180
  elif self.look_for_keys:
169
181
  for keytype, name in [
170
182
  (paramiko.RSAKey, "rsa"),
@@ -187,20 +199,22 @@ class SSHSession:
187
199
  if key is not None:
188
200
  break
189
201
 
202
+ allowed_types = set()
190
203
  if key is not None:
191
204
  try:
192
- ts.auth_publickey(self.username, key)
205
+ allowed_types = set(ts.auth_publickey(self.username, key))
193
206
  except paramiko.ssh_exception.AuthenticationException as e:
194
207
  key_error = e
195
208
  else:
196
209
  key_ok = True
197
- if self.totp_secret is not None:
210
+ if self.totp_secret is not None or "keyboard-interactive" in allowed_types:
198
211
  try:
199
212
  ts.auth_interactive(self.username, self.inter_handler)
200
213
  except paramiko.ssh_exception.AuthenticationException:
201
214
  # since the asynchrony of interactive authentication, one addtional try is added
202
215
  # retry for up to 6 times
203
216
  raise RetrySignal("Authentication failed")
217
+ self._keyboard_interactive_auth = True
204
218
  elif key_ok:
205
219
  pass
206
220
  elif self.password is not None:
@@ -390,6 +404,7 @@ class SSHSession:
390
404
  and self.exec_command("rsync --version")[1].channel.recv_exit_status() == 0
391
405
  and self.totp_secret is None
392
406
  and self.passphrase is None
407
+ and not self._keyboard_interactive_auth
393
408
  )
394
409
 
395
410
  @property
@@ -542,6 +557,8 @@ class SSHContext(BaseContext):
542
557
  directory_list.append(root)
543
558
  for name in files:
544
559
  file_list.append(os.path.join(root, name))
560
+ elif os.path.islink(file_name) and not os.path.exists(file_name):
561
+ raise OSError(f"{file_name} is broken symbolic link")
545
562
  elif glob(file_name):
546
563
  # If the file name contains a wildcard, os.path functions will fail to identify it. Use glob to get the complete list of filenames which match the wildcard.
547
564
  abs_file_list = glob(file_name)
@@ -632,6 +649,18 @@ class SSHContext(BaseContext):
632
649
  tar_compress=self.remote_profile.get("tar_compress", None),
633
650
  )
634
651
 
652
+ def list_remote_dir(self, sftp, remote_dir, ref_remote_root, result_list):
653
+ for entry in sftp.listdir_attr(remote_dir):
654
+ remote_name = pathlib.PurePath(
655
+ os.path.join(remote_dir, entry.filename)
656
+ ).as_posix()
657
+ st_mode = entry.st_mode
658
+ if S_ISDIR(st_mode):
659
+ self.list_remote_dir(sftp, remote_name, ref_remote_root, result_list)
660
+ elif S_ISREG(st_mode):
661
+ rel_remote_name = os.path.relpath(remote_name, start=ref_remote_root)
662
+ result_list.append(rel_remote_name)
663
+
635
664
  def download(
636
665
  self,
637
666
  submission,
@@ -641,34 +670,70 @@ class SSHContext(BaseContext):
641
670
  mark_failure=True,
642
671
  back_error=False,
643
672
  ):
673
+ assert self.remote_root is not None
644
674
  self.ssh_session.ensure_alive()
645
675
  file_list = []
646
676
  # for ii in job_dirs :
647
- for task in submission.belonging_tasks:
648
- for jj in task.backward_files:
649
- file_name = pathlib.PurePath(
650
- os.path.join(task.task_work_path, jj)
651
- ).as_posix()
677
+ for ii in submission.belonging_tasks:
678
+ remote_file_list = None
679
+ for jj in ii.backward_files:
680
+ if "*" in jj or "?" in jj:
681
+ if remote_file_list is not None:
682
+ abs_file_list = fnmatch.filter(remote_file_list, jj)
683
+ else:
684
+ remote_file_list = []
685
+ remote_job = pathlib.PurePath(
686
+ os.path.join(self.remote_root, ii.task_work_path)
687
+ ).as_posix()
688
+ self.list_remote_dir(
689
+ self.sftp, remote_job, remote_job, remote_file_list
690
+ )
691
+
692
+ abs_file_list = fnmatch.filter(remote_file_list, jj)
693
+ rel_file_list = [
694
+ pathlib.PurePath(os.path.join(ii.task_work_path, kk)).as_posix()
695
+ for kk in abs_file_list
696
+ ]
697
+
698
+ else:
699
+ rel_file_list = [
700
+ pathlib.PurePath(os.path.join(ii.task_work_path, jj)).as_posix()
701
+ ]
652
702
  if check_exists:
653
- if self.check_file_exists(file_name):
654
- file_list.append(file_name)
655
- elif mark_failure:
656
- with open(
657
- os.path.join(
658
- self.local_root,
659
- task.task_work_path,
660
- "tag_failure_download_%s" % jj,
661
- ),
662
- "w",
663
- ) as fp:
703
+ for file_name in rel_file_list:
704
+ if self.check_file_exists(file_name):
705
+ file_list.append(file_name)
706
+ elif mark_failure:
707
+ with open(
708
+ os.path.join(
709
+ self.local_root,
710
+ ii.task_work_path,
711
+ "tag_failure_download_%s" % jj,
712
+ ),
713
+ "w",
714
+ ) as fp:
715
+ pass
716
+ else:
664
717
  pass
665
- else:
666
- pass
667
718
  else:
668
- file_list.append(file_name)
719
+ file_list.extend(rel_file_list)
669
720
  if back_error:
670
- errors = glob(os.path.join(task.task_work_path, "error*"))
671
- file_list.extend(errors)
721
+ if remote_file_list is not None:
722
+ abs_errors = fnmatch.filter(remote_file_list, "error*")
723
+ else:
724
+ remote_file_list = []
725
+ remote_job = pathlib.PurePath(
726
+ os.path.join(self.remote_root, ii.task_work_path)
727
+ ).as_posix()
728
+ self.list_remote_dir(
729
+ self.sftp, remote_job, remote_job, remote_file_list
730
+ )
731
+ abs_errors = fnmatch.filter(remote_file_list, "error*")
732
+ rel_errors = [
733
+ pathlib.PurePath(os.path.join(ii.task_work_path, kk)).as_posix()
734
+ for kk in abs_errors
735
+ ]
736
+ file_list.extend(rel_errors)
672
737
  file_list.extend(submission.backward_common_files)
673
738
  if len(file_list) > 0:
674
739
  self._get_files(
@@ -881,21 +946,35 @@ class SSHContext(BaseContext):
881
946
  per_nfile = 100
882
947
  ntar = len(files) // per_nfile + 1
883
948
  if ntar <= 1:
884
- self.block_checkcall(
885
- "tar {} {} {}".format(
886
- tar_command,
887
- shlex.quote(of),
888
- " ".join([shlex.quote(file) for file in files]),
949
+ try:
950
+ self.block_checkcall(
951
+ "tar {} {} {}".format(
952
+ tar_command,
953
+ shlex.quote(of),
954
+ " ".join([shlex.quote(file) for file in files]),
955
+ )
889
956
  )
890
- )
957
+ except RuntimeError as e:
958
+ if "No such file or directory" in str(e):
959
+ raise FileNotFoundError(
960
+ "Any of the backward files does not exist in the remote directory."
961
+ ) from e
962
+ raise e
891
963
  else:
892
964
  file_list_file = os.path.join(
893
965
  self.remote_root, ".tmp.tar." + str(uuid.uuid4())
894
966
  )
895
967
  self.write_file(file_list_file, "\n".join(files))
896
- self.block_checkcall(
897
- f"tar {tar_command} {shlex.quote(of)} -T {shlex.quote(file_list_file)}"
898
- )
968
+ try:
969
+ self.block_checkcall(
970
+ f"tar {tar_command} {shlex.quote(of)} -T {shlex.quote(file_list_file)}"
971
+ )
972
+ except RuntimeError as e:
973
+ if "No such file or directory" in str(e):
974
+ raise FileNotFoundError(
975
+ "Any of the backward files does not exist in the remote directory."
976
+ ) from e
977
+ raise e
899
978
  # trans
900
979
  from_f = pathlib.PurePath(os.path.join(self.remote_root, of)).as_posix()
901
980
  to_f = pathlib.PurePath(os.path.join(self.local_root, of)).as_posix()
dpdispatcher/dlog.py ADDED
@@ -0,0 +1,31 @@
1
+ import logging
2
+ import os
3
+ import sys
4
+ import warnings
5
+
6
+ dlog = logging.getLogger("dpdispatcher")
7
+ dlog.propagate = False
8
+ dlog.setLevel(logging.INFO)
9
+ try:
10
+ dlogf = logging.FileHandler(
11
+ os.getcwd() + os.sep + "dpdispatcher" + ".log", delay=True
12
+ )
13
+ except PermissionError:
14
+ warnings.warn(
15
+ "dpdispatcher.log meet permission error. redirect the log to ~/dpdispatcher.log"
16
+ )
17
+ dlogf = logging.FileHandler(
18
+ os.path.join(os.path.expanduser("~"), "dpdispatcher.log"), delay=True
19
+ )
20
+
21
+ dlogf_formatter = logging.Formatter("%(asctime)s - %(levelname)s : %(message)s")
22
+ dlogf.setFormatter(dlogf_formatter)
23
+ dlog.addHandler(dlogf)
24
+
25
+ dlog_stdout = logging.StreamHandler(sys.stdout)
26
+ dlog_stdout.setFormatter(dlogf_formatter)
27
+ dlog.addHandler(dlog_stdout)
28
+
29
+ __all__ = [
30
+ "dlog",
31
+ ]