hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a190__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +8 -6
- hpcflow/_version.py +1 -1
- hpcflow/app.py +1 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
- hpcflow/sdk/__init__.py +21 -15
- hpcflow/sdk/app.py +2133 -770
- hpcflow/sdk/cli.py +281 -250
- hpcflow/sdk/cli_common.py +6 -2
- hpcflow/sdk/config/__init__.py +1 -1
- hpcflow/sdk/config/callbacks.py +77 -42
- hpcflow/sdk/config/cli.py +126 -103
- hpcflow/sdk/config/config.py +578 -311
- hpcflow/sdk/config/config_file.py +131 -95
- hpcflow/sdk/config/errors.py +112 -85
- hpcflow/sdk/config/types.py +145 -0
- hpcflow/sdk/core/actions.py +1054 -994
- hpcflow/sdk/core/app_aware.py +24 -0
- hpcflow/sdk/core/cache.py +81 -63
- hpcflow/sdk/core/command_files.py +275 -185
- hpcflow/sdk/core/commands.py +111 -107
- hpcflow/sdk/core/element.py +724 -503
- hpcflow/sdk/core/enums.py +192 -0
- hpcflow/sdk/core/environment.py +74 -93
- hpcflow/sdk/core/errors.py +398 -51
- hpcflow/sdk/core/json_like.py +540 -272
- hpcflow/sdk/core/loop.py +380 -334
- hpcflow/sdk/core/loop_cache.py +160 -43
- hpcflow/sdk/core/object_list.py +370 -207
- hpcflow/sdk/core/parameters.py +728 -600
- hpcflow/sdk/core/rule.py +59 -41
- hpcflow/sdk/core/run_dir_files.py +33 -22
- hpcflow/sdk/core/task.py +1546 -1325
- hpcflow/sdk/core/task_schema.py +240 -196
- hpcflow/sdk/core/test_utils.py +126 -88
- hpcflow/sdk/core/types.py +387 -0
- hpcflow/sdk/core/utils.py +410 -305
- hpcflow/sdk/core/validation.py +82 -9
- hpcflow/sdk/core/workflow.py +1192 -1028
- hpcflow/sdk/core/zarr_io.py +98 -137
- hpcflow/sdk/demo/cli.py +46 -33
- hpcflow/sdk/helper/cli.py +18 -16
- hpcflow/sdk/helper/helper.py +75 -63
- hpcflow/sdk/helper/watcher.py +61 -28
- hpcflow/sdk/log.py +83 -59
- hpcflow/sdk/persistence/__init__.py +8 -31
- hpcflow/sdk/persistence/base.py +988 -586
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +38 -0
- hpcflow/sdk/persistence/json.py +408 -153
- hpcflow/sdk/persistence/pending.py +158 -123
- hpcflow/sdk/persistence/store_resource.py +37 -22
- hpcflow/sdk/persistence/types.py +307 -0
- hpcflow/sdk/persistence/utils.py +14 -11
- hpcflow/sdk/persistence/zarr.py +477 -420
- hpcflow/sdk/runtime.py +44 -41
- hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
- hpcflow/sdk/submission/jobscript.py +444 -404
- hpcflow/sdk/submission/schedulers/__init__.py +133 -40
- hpcflow/sdk/submission/schedulers/direct.py +97 -71
- hpcflow/sdk/submission/schedulers/sge.py +132 -126
- hpcflow/sdk/submission/schedulers/slurm.py +263 -268
- hpcflow/sdk/submission/schedulers/utils.py +7 -2
- hpcflow/sdk/submission/shells/__init__.py +14 -15
- hpcflow/sdk/submission/shells/base.py +102 -29
- hpcflow/sdk/submission/shells/bash.py +72 -55
- hpcflow/sdk/submission/shells/os_version.py +31 -30
- hpcflow/sdk/submission/shells/powershell.py +37 -29
- hpcflow/sdk/submission/submission.py +203 -257
- hpcflow/sdk/submission/types.py +143 -0
- hpcflow/sdk/typing.py +163 -12
- hpcflow/tests/conftest.py +8 -6
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
- hpcflow/tests/scripts/test_main_scripts.py +60 -30
- hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -4
- hpcflow/tests/unit/test_action.py +86 -75
- hpcflow/tests/unit/test_action_rule.py +9 -4
- hpcflow/tests/unit/test_app.py +13 -6
- hpcflow/tests/unit/test_cli.py +1 -1
- hpcflow/tests/unit/test_command.py +71 -54
- hpcflow/tests/unit/test_config.py +20 -15
- hpcflow/tests/unit/test_config_file.py +21 -18
- hpcflow/tests/unit/test_element.py +58 -62
- hpcflow/tests/unit/test_element_iteration.py +3 -1
- hpcflow/tests/unit/test_element_set.py +29 -19
- hpcflow/tests/unit/test_group.py +4 -2
- hpcflow/tests/unit/test_input_source.py +116 -93
- hpcflow/tests/unit/test_input_value.py +29 -24
- hpcflow/tests/unit/test_json_like.py +44 -35
- hpcflow/tests/unit/test_loop.py +65 -58
- hpcflow/tests/unit/test_object_list.py +17 -12
- hpcflow/tests/unit/test_parameter.py +16 -7
- hpcflow/tests/unit/test_persistence.py +48 -35
- hpcflow/tests/unit/test_resources.py +20 -18
- hpcflow/tests/unit/test_run.py +8 -3
- hpcflow/tests/unit/test_runtime.py +2 -1
- hpcflow/tests/unit/test_schema_input.py +23 -15
- hpcflow/tests/unit/test_shell.py +3 -2
- hpcflow/tests/unit/test_slurm.py +8 -7
- hpcflow/tests/unit/test_submission.py +39 -19
- hpcflow/tests/unit/test_task.py +352 -247
- hpcflow/tests/unit/test_task_schema.py +33 -20
- hpcflow/tests/unit/test_utils.py +9 -11
- hpcflow/tests/unit/test_value_sequence.py +15 -12
- hpcflow/tests/unit/test_workflow.py +114 -83
- hpcflow/tests/unit/test_workflow_template.py +0 -1
- hpcflow/tests/workflows/test_jobscript.py +2 -1
- hpcflow/tests/workflows/test_workflows.py +18 -13
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a190.dist-info}/METADATA +2 -1
- hpcflow_new2-0.2.0a190.dist-info/RECORD +165 -0
- hpcflow/sdk/core/parallel.py +0 -21
- hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a190.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a190.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a190.dist-info}/entry_points.txt +0 -0
@@ -2,18 +2,36 @@
|
|
2
2
|
Job scheduler models.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from __future__ import annotations
|
6
|
+
from abc import ABC, abstractmethod
|
6
7
|
import sys
|
7
8
|
import time
|
8
|
-
from typing import
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
from typing import Generic, TypeVar, TYPE_CHECKING
|
10
|
+
from typing_extensions import override
|
11
|
+
from hpcflow.sdk.typing import hydrate
|
12
|
+
from hpcflow.sdk.core.app_aware import AppAware
|
13
|
+
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from collections.abc import Mapping, Sequence
|
16
|
+
from typing import Any, ClassVar
|
17
|
+
from ..shells import Shell
|
18
|
+
from ..jobscript import Jobscript
|
19
|
+
from ..enums import JobscriptElementState
|
20
|
+
from ..types import VersionInfo
|
21
|
+
from ...config.types import SchedulerConfigDescriptor
|
22
|
+
from ...core.element import ElementResources
|
23
|
+
|
24
|
+
#: The type of a jobscript reference.
|
25
|
+
JSRefType = TypeVar("JSRefType")
|
26
|
+
|
27
|
+
|
28
|
+
@hydrate
|
29
|
+
class Scheduler(ABC, Generic[JSRefType], AppAware):
|
12
30
|
"""
|
13
31
|
Abstract base class for schedulers.
|
14
32
|
|
15
|
-
|
16
|
-
|
33
|
+
Parameters
|
34
|
+
----------
|
17
35
|
shell_args: str
|
18
36
|
Arguments to pass to the shell. Pre-quoted.
|
19
37
|
shebang_args: str
|
@@ -22,48 +40,65 @@ class NullScheduler:
|
|
22
40
|
Options to the scheduler.
|
23
41
|
"""
|
24
42
|
|
43
|
+
# This would be in the docstring except it renders really wrongly!
|
44
|
+
# Type Parameters
|
45
|
+
# ---------------
|
46
|
+
# T
|
47
|
+
# The type of a jobscript reference.
|
48
|
+
|
25
49
|
#: Default value for arguments to the shell.
|
26
|
-
DEFAULT_SHELL_ARGS = ""
|
50
|
+
DEFAULT_SHELL_ARGS: ClassVar[str] = ""
|
27
51
|
#: Default value for arguments on the shebang line.
|
28
|
-
DEFAULT_SHEBANG_ARGS = ""
|
52
|
+
DEFAULT_SHEBANG_ARGS: ClassVar[str] = ""
|
29
53
|
|
30
54
|
def __init__(
|
31
55
|
self,
|
32
|
-
shell_args=None,
|
33
|
-
shebang_args=None,
|
34
|
-
options=None,
|
56
|
+
shell_args: str | None = None,
|
57
|
+
shebang_args: str | None = None,
|
58
|
+
options: dict | None = None,
|
35
59
|
):
|
36
60
|
self.shebang_args = shebang_args or self.DEFAULT_SHEBANG_ARGS
|
37
61
|
self.shell_args = shell_args or self.DEFAULT_SHELL_ARGS
|
38
62
|
self.options = options or {}
|
39
63
|
|
40
64
|
@property
|
41
|
-
def unique_properties(self):
|
65
|
+
def unique_properties(self) -> tuple[str, ...]:
|
42
66
|
"""
|
43
67
|
Unique properties, for hashing.
|
44
68
|
"""
|
45
69
|
return (self.__class__.__name__,)
|
46
70
|
|
47
|
-
def __eq__(self, other) -> bool:
|
48
|
-
if
|
71
|
+
def __eq__(self, other: Any) -> bool:
|
72
|
+
if not isinstance(other, self.__class__):
|
49
73
|
return False
|
50
|
-
|
51
|
-
|
74
|
+
return self.__dict__ == other.__dict__
|
75
|
+
|
76
|
+
@abstractmethod
|
77
|
+
def process_resources(
|
78
|
+
self, resources: ElementResources, scheduler_config: SchedulerConfigDescriptor
|
79
|
+
) -> None:
|
80
|
+
"""
|
81
|
+
Perform scheduler-specific processing to the element resources.
|
52
82
|
|
53
|
-
|
83
|
+
Note
|
84
|
+
----
|
85
|
+
This mutates `resources`.
|
86
|
+
"""
|
87
|
+
|
88
|
+
def get_version_info(self) -> VersionInfo:
|
54
89
|
"""
|
55
90
|
Get the version of the scheduler.
|
56
91
|
"""
|
57
92
|
return {}
|
58
93
|
|
59
|
-
def parse_submission_output(self, stdout: str) -> None:
|
94
|
+
def parse_submission_output(self, stdout: str) -> str | None:
|
60
95
|
"""
|
61
96
|
Parse the output from a submission to determine the submission ID.
|
62
97
|
"""
|
63
98
|
return None
|
64
99
|
|
65
100
|
@staticmethod
|
66
|
-
def is_num_cores_supported(num_cores, core_range:
|
101
|
+
def is_num_cores_supported(num_cores: int | None, core_range: Sequence[int]) -> bool:
|
67
102
|
"""
|
68
103
|
Test whether particular number of cores is supported in given range of cores.
|
69
104
|
"""
|
@@ -71,8 +106,45 @@ class NullScheduler:
|
|
71
106
|
upper = core_range[2] + 1 if core_range[2] is not None else sys.maxsize
|
72
107
|
return num_cores in range(core_range[0], upper, step)
|
73
108
|
|
109
|
+
@abstractmethod
|
110
|
+
def get_submit_command(
|
111
|
+
self,
|
112
|
+
shell: Shell,
|
113
|
+
js_path: str,
|
114
|
+
deps: dict[Any, tuple[Any, ...]],
|
115
|
+
) -> list[str]:
|
116
|
+
"""
|
117
|
+
Get a command for submitting a jobscript.
|
118
|
+
"""
|
74
119
|
|
75
|
-
|
120
|
+
@abstractmethod
|
121
|
+
def get_job_state_info(
|
122
|
+
self, *, js_refs: Sequence[JSRefType] | None = None, num_js_elements: int = 0
|
123
|
+
) -> Mapping[str, Mapping[int | None, JobscriptElementState]]:
|
124
|
+
"""
|
125
|
+
Get the state of one or more jobscripts.
|
126
|
+
"""
|
127
|
+
|
128
|
+
@abstractmethod
|
129
|
+
def wait_for_jobscripts(self, js_refs: list[JSRefType]) -> None:
|
130
|
+
"""
|
131
|
+
Wait for one or more jobscripts to complete.
|
132
|
+
"""
|
133
|
+
|
134
|
+
@abstractmethod
|
135
|
+
def cancel_jobs(
|
136
|
+
self,
|
137
|
+
js_refs: list[JSRefType],
|
138
|
+
jobscripts: list[Jobscript] | None = None,
|
139
|
+
num_js_elements: int = 0, # Ignored!
|
140
|
+
) -> None:
|
141
|
+
"""
|
142
|
+
Cancel one or more jobscripts.
|
143
|
+
"""
|
144
|
+
|
145
|
+
|
146
|
+
@hydrate
|
147
|
+
class QueuedScheduler(Scheduler[str]):
|
76
148
|
"""
|
77
149
|
Base class for schedulers that use a job submission system.
|
78
150
|
|
@@ -86,7 +158,7 @@ class Scheduler(NullScheduler):
|
|
86
158
|
The delete command, if overridden from default.
|
87
159
|
js_cmd: str
|
88
160
|
The job script command, if overridden from default.
|
89
|
-
login_nodes_cmd: str
|
161
|
+
login_nodes_cmd: list[str]
|
90
162
|
The login nodes command, if overridden from default.
|
91
163
|
array_switch: str
|
92
164
|
The switch to enable array jobs, if overridden from default.
|
@@ -95,25 +167,37 @@ class Scheduler(NullScheduler):
|
|
95
167
|
"""
|
96
168
|
|
97
169
|
#: Default command for logging into nodes.
|
98
|
-
DEFAULT_LOGIN_NODES_CMD = None
|
170
|
+
DEFAULT_LOGIN_NODES_CMD: ClassVar[Sequence[str] | None] = None
|
99
171
|
#: Default pattern for matching the names of login nodes.
|
100
|
-
DEFAULT_LOGIN_NODE_MATCH = "*login*"
|
172
|
+
DEFAULT_LOGIN_NODE_MATCH: ClassVar[str] = "*login*"
|
173
|
+
#: Default command for submitting a job.
|
174
|
+
DEFAULT_SUBMIT_CMD: ClassVar[str]
|
175
|
+
#: Default command for listing current submitted jobs.
|
176
|
+
DEFAULT_SHOW_CMD: ClassVar[Sequence[str]]
|
177
|
+
#: Default command for deleting a job.
|
178
|
+
DEFAULT_DEL_CMD: ClassVar[str]
|
179
|
+
#: Default marker for job control metadata in a job script.
|
180
|
+
DEFAULT_JS_CMD: ClassVar[str]
|
181
|
+
#: Default switch for enabling array mode.
|
182
|
+
DEFAULT_ARRAY_SWITCH: ClassVar[str]
|
183
|
+
#: Default shell variable containin the current array index.
|
184
|
+
DEFAULT_ARRAY_ITEM_VAR: ClassVar[str]
|
101
185
|
|
102
186
|
def __init__(
|
103
187
|
self,
|
104
|
-
submit_cmd=None,
|
105
|
-
show_cmd=None,
|
106
|
-
del_cmd=None,
|
107
|
-
js_cmd=None,
|
108
|
-
login_nodes_cmd=None,
|
109
|
-
array_switch=None,
|
110
|
-
array_item_var=None,
|
188
|
+
submit_cmd: str | None = None,
|
189
|
+
show_cmd: Sequence[str] | None = None,
|
190
|
+
del_cmd: str | None = None,
|
191
|
+
js_cmd: str | None = None,
|
192
|
+
login_nodes_cmd: Sequence[str] | None = None,
|
193
|
+
array_switch: str | None = None,
|
194
|
+
array_item_var: str | None = None,
|
111
195
|
*args,
|
112
196
|
**kwargs,
|
113
|
-
):
|
197
|
+
) -> None:
|
114
198
|
super().__init__(*args, **kwargs)
|
115
199
|
|
116
|
-
self.submit_cmd = submit_cmd or self.DEFAULT_SUBMIT_CMD
|
200
|
+
self.submit_cmd: str = submit_cmd or self.DEFAULT_SUBMIT_CMD
|
117
201
|
self.show_cmd = show_cmd or self.DEFAULT_SHOW_CMD
|
118
202
|
self.del_cmd = del_cmd or self.DEFAULT_DEL_CMD
|
119
203
|
self.js_cmd = js_cmd or self.DEFAULT_JS_CMD
|
@@ -122,27 +206,36 @@ class Scheduler(NullScheduler):
|
|
122
206
|
self.array_item_var = array_item_var or self.DEFAULT_ARRAY_ITEM_VAR
|
123
207
|
|
124
208
|
@property
|
125
|
-
def unique_properties(self):
|
209
|
+
def unique_properties(self) -> tuple[str, str, Any, Any]:
|
126
210
|
return (self.__class__.__name__, self.submit_cmd, self.show_cmd, self.del_cmd)
|
127
211
|
|
128
|
-
def format_switch(self, switch):
|
212
|
+
def format_switch(self, switch: str) -> str:
|
129
213
|
"""
|
130
214
|
Format a particular switch to use the JS command.
|
131
215
|
"""
|
132
216
|
return f"{self.js_cmd} {switch}"
|
133
217
|
|
134
|
-
def is_jobscript_active(self, job_ID: str):
|
218
|
+
def is_jobscript_active(self, job_ID: str) -> bool:
|
135
219
|
"""Query if a jobscript is running/pending."""
|
136
|
-
return bool(self.get_job_state_info([job_ID]))
|
220
|
+
return bool(self.get_job_state_info(js_refs=[job_ID]))
|
137
221
|
|
138
|
-
|
222
|
+
@override
|
223
|
+
def wait_for_jobscripts(self, js_refs: list[str]) -> None:
|
139
224
|
"""
|
140
225
|
Wait for jobscripts to update their state.
|
141
226
|
"""
|
142
227
|
while js_refs:
|
143
|
-
info = self.get_job_state_info(js_refs)
|
228
|
+
info: Mapping[str, Any] = self.get_job_state_info(js_refs=js_refs)
|
144
229
|
print(info)
|
145
230
|
if not info:
|
146
231
|
break
|
147
|
-
js_refs = list(info
|
232
|
+
js_refs = list(info)
|
148
233
|
time.sleep(2)
|
234
|
+
|
235
|
+
@abstractmethod
|
236
|
+
def format_options(
|
237
|
+
self, resources: ElementResources, num_elements: int, is_array: bool, sub_idx: int
|
238
|
+
) -> str:
|
239
|
+
"""
|
240
|
+
Render options in a way that the scheduler can handle.
|
241
|
+
"""
|
@@ -2,19 +2,28 @@
|
|
2
2
|
A direct job "scheduler" that just runs immediate subprocesses.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from __future__ import annotations
|
6
6
|
import shutil
|
7
7
|
import signal
|
8
|
-
from typing import
|
9
|
-
|
8
|
+
from typing import overload, cast, TYPE_CHECKING
|
9
|
+
from typing_extensions import override, TypeAlias
|
10
10
|
import psutil
|
11
|
-
from hpcflow.sdk.submission.jobscript_info import JobscriptElementState
|
12
11
|
|
13
|
-
from hpcflow.sdk.
|
14
|
-
from hpcflow.sdk.submission.
|
12
|
+
from hpcflow.sdk.typing import hydrate
|
13
|
+
from hpcflow.sdk.submission.enums import JobscriptElementState
|
14
|
+
from hpcflow.sdk.submission.schedulers import Scheduler
|
15
|
+
|
16
|
+
if TYPE_CHECKING:
|
17
|
+
from collections.abc import Callable, Mapping, Sequence
|
18
|
+
from typing import Any, ClassVar
|
19
|
+
from ...config.types import SchedulerConfigDescriptor
|
20
|
+
from ..jobscript import Jobscript
|
21
|
+
from ..shells.base import Shell
|
22
|
+
|
23
|
+
DirectRef: TypeAlias = "tuple[int, list[str]]"
|
15
24
|
|
16
25
|
|
17
|
-
class DirectScheduler(
|
26
|
+
class DirectScheduler(Scheduler[DirectRef]):
|
18
27
|
"""
|
19
28
|
A direct scheduler, that just runs jobs immediately as direct subprocesses.
|
20
29
|
|
@@ -31,53 +40,55 @@ class DirectScheduler(NullScheduler):
|
|
31
40
|
Options to the jobscript command.
|
32
41
|
"""
|
33
42
|
|
34
|
-
def __init__(self, *args, **kwargs):
|
35
|
-
super().__init__(*args, **kwargs)
|
36
|
-
|
37
43
|
@classmethod
|
38
|
-
|
44
|
+
@override
|
45
|
+
def process_resources(
|
46
|
+
cls, resources, scheduler_config: SchedulerConfigDescriptor
|
47
|
+
) -> None:
|
39
48
|
"""Perform scheduler-specific processing to the element resources.
|
40
49
|
|
41
|
-
Note
|
42
|
-
|
50
|
+
Note
|
51
|
+
----
|
52
|
+
This mutates `resources`.
|
43
53
|
"""
|
44
54
|
return
|
45
55
|
|
56
|
+
@override
|
46
57
|
def get_submit_command(
|
47
58
|
self,
|
48
59
|
shell: Shell,
|
49
60
|
js_path: str,
|
50
|
-
deps:
|
51
|
-
) ->
|
61
|
+
deps: dict[Any, tuple[Any, ...]],
|
62
|
+
) -> list[str]:
|
52
63
|
"""
|
53
64
|
Get the concrete submission command.
|
54
65
|
"""
|
55
66
|
return shell.get_direct_submit_command(js_path)
|
56
67
|
|
57
68
|
@staticmethod
|
58
|
-
def
|
59
|
-
procs:
|
60
|
-
sig=signal.SIGTERM,
|
61
|
-
timeout=None,
|
62
|
-
on_terminate=None,
|
69
|
+
def __kill_processes(
|
70
|
+
procs: list[psutil.Process],
|
71
|
+
sig: signal.Signals = signal.SIGTERM,
|
72
|
+
timeout: float | None = None,
|
73
|
+
on_terminate: Callable[[psutil.Process], object] | None = None,
|
63
74
|
):
|
64
|
-
all_procs = []
|
65
|
-
for
|
66
|
-
all_procs.append(
|
67
|
-
all_procs.extend(
|
75
|
+
all_procs: list[psutil.Process] = []
|
76
|
+
for process in procs:
|
77
|
+
all_procs.append(process)
|
78
|
+
all_procs.extend(process.children(recursive=True))
|
68
79
|
|
69
|
-
for
|
80
|
+
for process in all_procs:
|
70
81
|
try:
|
71
|
-
|
82
|
+
process.send_signal(sig)
|
72
83
|
except psutil.NoSuchProcess:
|
73
84
|
pass
|
74
|
-
|
75
|
-
for
|
76
|
-
|
85
|
+
_, alive = psutil.wait_procs(all_procs, timeout=timeout, callback=on_terminate)
|
86
|
+
for process in alive:
|
87
|
+
process.kill()
|
77
88
|
|
78
89
|
@staticmethod
|
79
|
-
def
|
80
|
-
procs = []
|
90
|
+
def __get_jobscript_processes(js_refs: list[DirectRef]) -> list[psutil.Process]:
|
91
|
+
procs: list[psutil.Process] = []
|
81
92
|
for p_id, p_cmdline in js_refs:
|
82
93
|
try:
|
83
94
|
proc_i = psutil.Process(p_id)
|
@@ -89,71 +100,94 @@ class DirectScheduler(NullScheduler):
|
|
89
100
|
procs.append(proc_i)
|
90
101
|
return procs
|
91
102
|
|
103
|
+
@overload
|
104
|
+
@override
|
105
|
+
@classmethod
|
106
|
+
def wait_for_jobscripts(cls, js_refs: list[DirectRef]) -> None:
|
107
|
+
...
|
108
|
+
|
109
|
+
@overload
|
92
110
|
@classmethod
|
93
111
|
def wait_for_jobscripts(
|
94
112
|
cls,
|
95
|
-
js_refs:
|
96
|
-
|
97
|
-
|
113
|
+
js_refs: list[DirectRef],
|
114
|
+
*,
|
115
|
+
callback: Callable[[psutil.Process], None],
|
116
|
+
) -> list[psutil.Process]:
|
117
|
+
...
|
118
|
+
|
119
|
+
@classmethod
|
120
|
+
def wait_for_jobscripts(
|
121
|
+
cls,
|
122
|
+
js_refs: list[DirectRef],
|
123
|
+
*,
|
124
|
+
callback: Callable[[psutil.Process], None] | None = None,
|
125
|
+
) -> list[psutil.Process] | None:
|
98
126
|
"""Wait until the specified jobscripts have completed."""
|
99
|
-
procs = cls.
|
127
|
+
procs = cls.__get_jobscript_processes(js_refs)
|
100
128
|
(gone, alive) = psutil.wait_procs(procs, callback=callback)
|
101
129
|
assert not alive
|
102
|
-
return gone
|
130
|
+
return gone if callback else None
|
103
131
|
|
132
|
+
@override
|
104
133
|
def get_job_state_info(
|
105
134
|
self,
|
106
|
-
|
107
|
-
|
108
|
-
|
135
|
+
*,
|
136
|
+
js_refs: Sequence[DirectRef] | None = None,
|
137
|
+
num_js_elements: int = 0,
|
138
|
+
) -> Mapping[str, Mapping[int | None, JobscriptElementState]]:
|
109
139
|
"""Query the scheduler to get the states of all of this user's jobs, optionally
|
110
140
|
filtering by specified job IDs.
|
111
141
|
|
112
142
|
Jobs that are not in the scheduler's status output will not appear in the output
|
113
143
|
of this method."""
|
114
|
-
info = {}
|
115
|
-
for p_id, p_cmdline in js_refs:
|
116
|
-
|
117
|
-
if is_active:
|
144
|
+
info: dict[str, Mapping[int | None, JobscriptElementState]] = {}
|
145
|
+
for p_id, p_cmdline in js_refs or ():
|
146
|
+
if self.is_jobscript_active(p_id, p_cmdline):
|
118
147
|
# as far as the "scheduler" is concerned, all elements are running:
|
119
|
-
info[p_id] = {
|
148
|
+
info[str(p_id)] = {
|
120
149
|
i: JobscriptElementState.running for i in range(num_js_elements)
|
121
150
|
}
|
122
151
|
|
123
152
|
return info
|
124
153
|
|
154
|
+
@override
|
125
155
|
def cancel_jobs(
|
126
156
|
self,
|
127
|
-
js_refs:
|
128
|
-
jobscripts:
|
157
|
+
js_refs: list[DirectRef],
|
158
|
+
jobscripts: list[Jobscript] | None = None,
|
159
|
+
num_js_elements: int = 0, # Ignored!
|
129
160
|
):
|
130
161
|
"""
|
131
162
|
Cancel some jobs.
|
132
163
|
"""
|
133
164
|
|
134
|
-
|
165
|
+
js_proc_id: dict[int, Jobscript]
|
166
|
+
|
167
|
+
def callback(proc: psutil.Process):
|
135
168
|
try:
|
136
169
|
js = js_proc_id[proc.pid]
|
137
170
|
except KeyError:
|
138
171
|
# child process of one of the jobscripts
|
139
|
-
self.
|
172
|
+
self._app.submission_logger.debug(
|
140
173
|
f"jobscript child process ({proc.pid}) killed"
|
141
174
|
)
|
142
175
|
return
|
176
|
+
assert hasattr(proc, "returncode")
|
143
177
|
print(
|
144
178
|
f"Jobscript {js.index} from submission {js.submission.index} "
|
145
179
|
f"terminated (user-initiated cancel) with exit code {proc.returncode}."
|
146
180
|
)
|
147
181
|
|
148
|
-
procs = self.
|
149
|
-
self.
|
182
|
+
procs = self.__get_jobscript_processes(js_refs)
|
183
|
+
self._app.submission_logger.info(
|
150
184
|
f"cancelling {self.__class__.__name__} jobscript processes: {procs}."
|
151
185
|
)
|
152
|
-
js_proc_id = {i.pid: jobscripts[idx] for idx, i in enumerate(procs)}
|
153
|
-
self.
|
154
|
-
self.
|
186
|
+
js_proc_id = {i.pid: jobscripts[idx] for idx, i in enumerate(procs) if jobscripts}
|
187
|
+
self.__kill_processes(procs, timeout=3, on_terminate=callback)
|
188
|
+
self._app.submission_logger.info("jobscripts cancel command executed.")
|
155
189
|
|
156
|
-
def is_jobscript_active(self, process_ID: int, process_cmdline:
|
190
|
+
def is_jobscript_active(self, process_ID: int, process_cmdline: list[str]):
|
157
191
|
"""Query if a jobscript is running.
|
158
192
|
|
159
193
|
Note that a "running" jobscript might be waiting on upstream jobscripts to
|
@@ -165,12 +199,10 @@ class DirectScheduler(NullScheduler):
|
|
165
199
|
except psutil.NoSuchProcess:
|
166
200
|
return False
|
167
201
|
|
168
|
-
|
169
|
-
return True
|
170
|
-
else:
|
171
|
-
return False
|
202
|
+
return proc.cmdline() == process_cmdline
|
172
203
|
|
173
204
|
|
205
|
+
@hydrate
|
174
206
|
class DirectPosix(DirectScheduler):
|
175
207
|
"""
|
176
208
|
A direct scheduler for POSIX systems.
|
@@ -185,14 +217,11 @@ class DirectPosix(DirectScheduler):
|
|
185
217
|
Options to the jobscript command.
|
186
218
|
"""
|
187
219
|
|
188
|
-
_app_attr = "app"
|
189
220
|
#: Default shell.
|
190
|
-
DEFAULT_SHELL_EXECUTABLE = "/bin/bash"
|
191
|
-
|
192
|
-
def __init__(self, *args, **kwargs):
|
193
|
-
super().__init__(*args, **kwargs)
|
221
|
+
DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "/bin/bash"
|
194
222
|
|
195
223
|
|
224
|
+
@hydrate
|
196
225
|
class DirectWindows(DirectScheduler):
|
197
226
|
"""
|
198
227
|
A direct scheduler for Windows.
|
@@ -205,19 +234,16 @@ class DirectWindows(DirectScheduler):
|
|
205
234
|
Options to the jobscript command.
|
206
235
|
"""
|
207
236
|
|
208
|
-
_app_attr = "app"
|
209
237
|
#: Default shell.
|
210
|
-
DEFAULT_SHELL_EXECUTABLE = "powershell.exe"
|
211
|
-
|
212
|
-
def __init__(self, *args, **kwargs):
|
213
|
-
super().__init__(*args, **kwargs)
|
238
|
+
DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "powershell.exe"
|
214
239
|
|
240
|
+
@override
|
215
241
|
def get_submit_command(
|
216
|
-
self, shell: Shell, js_path: str, deps:
|
217
|
-
) ->
|
242
|
+
self, shell: Shell, js_path: str, deps: dict[Any, tuple[Any, ...]]
|
243
|
+
) -> list[str]:
|
218
244
|
cmd = super().get_submit_command(shell, js_path, deps)
|
219
245
|
# `Start-Process` (see `Jobscript._launch_direct_js_win`) seems to resolve the
|
220
246
|
# executable, which means the process's `cmdline` might look different to what we
|
221
247
|
# record; so let's resolve it ourselves:
|
222
|
-
cmd[0] = shutil.which(cmd[0])
|
248
|
+
cmd[0] = cast("str", shutil.which(cmd[0]))
|
223
249
|
return cmd
|