hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
- hpcflow/_version.py +1 -1
- hpcflow/app.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +26 -15
- hpcflow/sdk/app.py +2192 -768
- hpcflow/sdk/cli.py +506 -296
- hpcflow/sdk/cli_common.py +105 -7
- hpcflow/sdk/config/__init__.py +1 -1
- hpcflow/sdk/config/callbacks.py +115 -43
- hpcflow/sdk/config/cli.py +126 -103
- hpcflow/sdk/config/config.py +674 -318
- hpcflow/sdk/config/config_file.py +131 -95
- hpcflow/sdk/config/errors.py +125 -84
- hpcflow/sdk/config/types.py +148 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +1771 -1059
- hpcflow/sdk/core/app_aware.py +24 -0
- hpcflow/sdk/core/cache.py +139 -79
- hpcflow/sdk/core/command_files.py +263 -287
- hpcflow/sdk/core/commands.py +145 -112
- hpcflow/sdk/core/element.py +828 -535
- hpcflow/sdk/core/enums.py +192 -0
- hpcflow/sdk/core/environment.py +74 -93
- hpcflow/sdk/core/errors.py +455 -52
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +540 -272
- hpcflow/sdk/core/loop.py +751 -347
- hpcflow/sdk/core/loop_cache.py +164 -47
- hpcflow/sdk/core/object_list.py +370 -207
- hpcflow/sdk/core/parameters.py +1100 -627
- hpcflow/sdk/core/rule.py +59 -41
- hpcflow/sdk/core/run_dir_files.py +21 -37
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +1649 -1339
- hpcflow/sdk/core/task_schema.py +308 -196
- hpcflow/sdk/core/test_utils.py +191 -114
- hpcflow/sdk/core/types.py +440 -0
- hpcflow/sdk/core/utils.py +485 -309
- hpcflow/sdk/core/validation.py +82 -9
- hpcflow/sdk/core/workflow.py +2544 -1178
- hpcflow/sdk/core/zarr_io.py +98 -137
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +53 -33
- hpcflow/sdk/helper/cli.py +18 -15
- hpcflow/sdk/helper/helper.py +75 -63
- hpcflow/sdk/helper/watcher.py +61 -28
- hpcflow/sdk/log.py +122 -71
- hpcflow/sdk/persistence/__init__.py +8 -31
- hpcflow/sdk/persistence/base.py +1360 -606
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +38 -0
- hpcflow/sdk/persistence/json.py +568 -188
- hpcflow/sdk/persistence/pending.py +382 -179
- hpcflow/sdk/persistence/store_resource.py +39 -23
- hpcflow/sdk/persistence/types.py +318 -0
- hpcflow/sdk/persistence/utils.py +14 -11
- hpcflow/sdk/persistence/zarr.py +1337 -433
- hpcflow/sdk/runtime.py +44 -41
- hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
- hpcflow/sdk/submission/jobscript.py +1651 -692
- hpcflow/sdk/submission/schedulers/__init__.py +167 -39
- hpcflow/sdk/submission/schedulers/direct.py +121 -81
- hpcflow/sdk/submission/schedulers/sge.py +170 -129
- hpcflow/sdk/submission/schedulers/slurm.py +291 -268
- hpcflow/sdk/submission/schedulers/utils.py +12 -2
- hpcflow/sdk/submission/shells/__init__.py +14 -15
- hpcflow/sdk/submission/shells/base.py +150 -29
- hpcflow/sdk/submission/shells/bash.py +283 -173
- hpcflow/sdk/submission/shells/os_version.py +31 -30
- hpcflow/sdk/submission/shells/powershell.py +228 -170
- hpcflow/sdk/submission/submission.py +1014 -335
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +182 -12
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +27 -6
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +866 -85
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
- hpcflow/tests/unit/test_action.py +262 -75
- hpcflow/tests/unit/test_action_rule.py +9 -4
- hpcflow/tests/unit/test_app.py +33 -6
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +134 -1
- hpcflow/tests/unit/test_command.py +71 -54
- hpcflow/tests/unit/test_config.py +142 -16
- hpcflow/tests/unit/test_config_file.py +21 -18
- hpcflow/tests/unit/test_element.py +58 -62
- hpcflow/tests/unit/test_element_iteration.py +50 -1
- hpcflow/tests/unit/test_element_set.py +29 -19
- hpcflow/tests/unit/test_group.py +4 -2
- hpcflow/tests/unit/test_input_source.py +116 -93
- hpcflow/tests/unit/test_input_value.py +29 -24
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +44 -35
- hpcflow/tests/unit/test_loop.py +1396 -84
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_object_list.py +17 -12
- hpcflow/tests/unit/test_parameter.py +29 -7
- hpcflow/tests/unit/test_persistence.py +237 -42
- hpcflow/tests/unit/test_resources.py +20 -18
- hpcflow/tests/unit/test_run.py +117 -6
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +2 -1
- hpcflow/tests/unit/test_schema_input.py +23 -15
- hpcflow/tests/unit/test_shell.py +23 -2
- hpcflow/tests/unit/test_slurm.py +8 -7
- hpcflow/tests/unit/test_submission.py +38 -89
- hpcflow/tests/unit/test_task.py +352 -247
- hpcflow/tests/unit/test_task_schema.py +33 -20
- hpcflow/tests/unit/test_utils.py +9 -11
- hpcflow/tests/unit/test_value_sequence.py +15 -12
- hpcflow/tests/unit/test_workflow.py +114 -83
- hpcflow/tests/unit/test_workflow_template.py +0 -1
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +334 -1
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +160 -15
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
- hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
- hpcflow/sdk/core/parallel.py +0 -21
- hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -2,18 +2,43 @@
|
|
2
2
|
Job scheduler models.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from __future__ import annotations
|
6
|
+
from abc import ABC, abstractmethod
|
6
7
|
import sys
|
7
8
|
import time
|
8
|
-
from typing import
|
9
|
+
from typing import Generic, TypeVar, TYPE_CHECKING
|
10
|
+
from typing_extensions import override
|
11
|
+
from hpcflow.sdk.typing import hydrate
|
12
|
+
from hpcflow.sdk.core.app_aware import AppAware
|
9
13
|
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from collections.abc import Mapping, Sequence
|
16
|
+
from typing import Any, ClassVar
|
17
|
+
from ..shells import Shell
|
18
|
+
from ..jobscript import Jobscript
|
19
|
+
from ..enums import JobscriptElementState
|
20
|
+
from ..types import VersionInfo
|
21
|
+
from ...config.types import SchedulerConfigDescriptor
|
22
|
+
from ...core.element import ElementResources
|
10
23
|
|
11
|
-
|
24
|
+
#: The type of a jobscript reference.
|
25
|
+
JSRefType = TypeVar("JSRefType")
|
26
|
+
|
27
|
+
|
28
|
+
@hydrate
|
29
|
+
class Scheduler(ABC, Generic[JSRefType], AppAware):
|
12
30
|
"""
|
13
31
|
Abstract base class for schedulers.
|
14
32
|
|
15
|
-
|
16
|
-
|
33
|
+
Note
|
34
|
+
----
|
35
|
+
Do not make immediate subclasses of this class other than
|
36
|
+
:py:class:`DirectScheduler` and :py:class:`QueuedScheduler`;
|
37
|
+
subclass those two instead. Code (e.g., in :py:class:`Jobscript`)
|
38
|
+
assumes that this model is followed and does not check it.
|
39
|
+
|
40
|
+
Parameters
|
41
|
+
----------
|
17
42
|
shell_args: str
|
18
43
|
Arguments to pass to the shell. Pre-quoted.
|
19
44
|
shebang_args: str
|
@@ -22,48 +47,65 @@ class NullScheduler:
|
|
22
47
|
Options to the scheduler.
|
23
48
|
"""
|
24
49
|
|
50
|
+
# This would be in the docstring except it renders really wrongly!
|
51
|
+
# Type Parameters
|
52
|
+
# ---------------
|
53
|
+
# T
|
54
|
+
# The type of a jobscript reference.
|
55
|
+
|
25
56
|
#: Default value for arguments to the shell.
|
26
|
-
DEFAULT_SHELL_ARGS = ""
|
57
|
+
DEFAULT_SHELL_ARGS: ClassVar[str] = ""
|
27
58
|
#: Default value for arguments on the shebang line.
|
28
|
-
DEFAULT_SHEBANG_ARGS = ""
|
59
|
+
DEFAULT_SHEBANG_ARGS: ClassVar[str] = ""
|
29
60
|
|
30
61
|
def __init__(
|
31
62
|
self,
|
32
|
-
shell_args=None,
|
33
|
-
shebang_args=None,
|
34
|
-
options=None,
|
63
|
+
shell_args: str | None = None,
|
64
|
+
shebang_args: str | None = None,
|
65
|
+
options: dict | None = None,
|
35
66
|
):
|
36
67
|
self.shebang_args = shebang_args or self.DEFAULT_SHEBANG_ARGS
|
37
68
|
self.shell_args = shell_args or self.DEFAULT_SHELL_ARGS
|
38
69
|
self.options = options or {}
|
39
70
|
|
40
71
|
@property
|
41
|
-
def unique_properties(self):
|
72
|
+
def unique_properties(self) -> tuple[str, ...]:
|
42
73
|
"""
|
43
74
|
Unique properties, for hashing.
|
44
75
|
"""
|
45
76
|
return (self.__class__.__name__,)
|
46
77
|
|
47
|
-
def __eq__(self, other) -> bool:
|
48
|
-
if
|
78
|
+
def __eq__(self, other: Any) -> bool:
|
79
|
+
if not isinstance(other, self.__class__):
|
49
80
|
return False
|
50
|
-
|
51
|
-
|
81
|
+
return self.__dict__ == other.__dict__
|
82
|
+
|
83
|
+
@abstractmethod
|
84
|
+
def process_resources(
|
85
|
+
self, resources: ElementResources, scheduler_config: SchedulerConfigDescriptor
|
86
|
+
) -> None:
|
87
|
+
"""
|
88
|
+
Perform scheduler-specific processing to the element resources.
|
89
|
+
|
90
|
+
Note
|
91
|
+
----
|
92
|
+
This mutates `resources`.
|
93
|
+
"""
|
52
94
|
|
53
|
-
def get_version_info(self):
|
95
|
+
def get_version_info(self) -> VersionInfo:
|
54
96
|
"""
|
55
97
|
Get the version of the scheduler.
|
56
98
|
"""
|
57
99
|
return {}
|
58
100
|
|
59
|
-
def parse_submission_output(self, stdout: str) -> None:
|
101
|
+
def parse_submission_output(self, stdout: str) -> str | None:
|
60
102
|
"""
|
61
103
|
Parse the output from a submission to determine the submission ID.
|
62
104
|
"""
|
63
105
|
return None
|
64
106
|
|
65
107
|
@staticmethod
|
66
|
-
def is_num_cores_supported(num_cores, core_range:
|
108
|
+
def is_num_cores_supported(num_cores: int | None, core_range: Sequence[int]) -> bool:
|
67
109
|
"""
|
68
110
|
Test whether particular number of cores is supported in given range of cores.
|
69
111
|
"""
|
@@ -71,8 +113,56 @@ class NullScheduler:
|
|
71
113
|
upper = core_range[2] + 1 if core_range[2] is not None else sys.maxsize
|
72
114
|
return num_cores in range(core_range[0], upper, step)
|
73
115
|
|
116
|
+
@abstractmethod
|
117
|
+
def get_submit_command(
|
118
|
+
self,
|
119
|
+
shell: Shell,
|
120
|
+
js_path: str,
|
121
|
+
deps: dict[Any, tuple[Any, ...]],
|
122
|
+
) -> list[str]:
|
123
|
+
"""
|
124
|
+
Get a command for submitting a jobscript.
|
125
|
+
"""
|
126
|
+
|
127
|
+
@abstractmethod
|
128
|
+
def get_job_state_info(
|
129
|
+
self, *, js_refs: Sequence[JSRefType] | None = None
|
130
|
+
) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
|
131
|
+
"""
|
132
|
+
Get the state of one or more jobscripts.
|
133
|
+
"""
|
134
|
+
|
135
|
+
@abstractmethod
|
136
|
+
def wait_for_jobscripts(self, js_refs: list[JSRefType]) -> None:
|
137
|
+
"""
|
138
|
+
Wait for one or more jobscripts to complete.
|
139
|
+
"""
|
74
140
|
|
75
|
-
|
141
|
+
@abstractmethod
|
142
|
+
def cancel_jobs(
|
143
|
+
self,
|
144
|
+
js_refs: list[JSRefType],
|
145
|
+
jobscripts: list[Jobscript] | None = None,
|
146
|
+
) -> None:
|
147
|
+
"""
|
148
|
+
Cancel one or more jobscripts.
|
149
|
+
"""
|
150
|
+
|
151
|
+
@abstractmethod
|
152
|
+
def get_std_out_err_filename(self, js_idx: int, *args, **kwargs) -> str:
|
153
|
+
"""File name of combined standard output and error streams."""
|
154
|
+
|
155
|
+
@abstractmethod
|
156
|
+
def get_stdout_filename(self, js_idx: int, *args, **kwargs) -> str:
|
157
|
+
"""File name of the standard output stream file."""
|
158
|
+
|
159
|
+
@abstractmethod
|
160
|
+
def get_stderr_filename(self, js_idx: int, *args, **kwargs) -> str:
|
161
|
+
"""File name of the standard error stream file."""
|
162
|
+
|
163
|
+
|
164
|
+
@hydrate
|
165
|
+
class QueuedScheduler(Scheduler[str]):
|
76
166
|
"""
|
77
167
|
Base class for schedulers that use a job submission system.
|
78
168
|
|
@@ -86,7 +176,7 @@ class Scheduler(NullScheduler):
|
|
86
176
|
The delete command, if overridden from default.
|
87
177
|
js_cmd: str
|
88
178
|
The job script command, if overridden from default.
|
89
|
-
login_nodes_cmd: str
|
179
|
+
login_nodes_cmd: list[str]
|
90
180
|
The login nodes command, if overridden from default.
|
91
181
|
array_switch: str
|
92
182
|
The switch to enable array jobs, if overridden from default.
|
@@ -95,25 +185,37 @@ class Scheduler(NullScheduler):
|
|
95
185
|
"""
|
96
186
|
|
97
187
|
#: Default command for logging into nodes.
|
98
|
-
DEFAULT_LOGIN_NODES_CMD = None
|
188
|
+
DEFAULT_LOGIN_NODES_CMD: ClassVar[Sequence[str] | None] = None
|
99
189
|
#: Default pattern for matching the names of login nodes.
|
100
|
-
DEFAULT_LOGIN_NODE_MATCH = "*login*"
|
190
|
+
DEFAULT_LOGIN_NODE_MATCH: ClassVar[str] = "*login*"
|
191
|
+
#: Default command for submitting a job.
|
192
|
+
DEFAULT_SUBMIT_CMD: ClassVar[str]
|
193
|
+
#: Default command for listing current submitted jobs.
|
194
|
+
DEFAULT_SHOW_CMD: ClassVar[Sequence[str]]
|
195
|
+
#: Default command for deleting a job.
|
196
|
+
DEFAULT_DEL_CMD: ClassVar[str]
|
197
|
+
#: Default marker for job control metadata in a job script.
|
198
|
+
DEFAULT_JS_CMD: ClassVar[str]
|
199
|
+
#: Default switch for enabling array mode.
|
200
|
+
DEFAULT_ARRAY_SWITCH: ClassVar[str]
|
201
|
+
#: Default shell variable containin the current array index.
|
202
|
+
DEFAULT_ARRAY_ITEM_VAR: ClassVar[str]
|
101
203
|
|
102
204
|
def __init__(
|
103
205
|
self,
|
104
|
-
submit_cmd=None,
|
105
|
-
show_cmd=None,
|
106
|
-
del_cmd=None,
|
107
|
-
js_cmd=None,
|
108
|
-
login_nodes_cmd=None,
|
109
|
-
array_switch=None,
|
110
|
-
array_item_var=None,
|
206
|
+
submit_cmd: str | None = None,
|
207
|
+
show_cmd: Sequence[str] | None = None,
|
208
|
+
del_cmd: str | None = None,
|
209
|
+
js_cmd: str | None = None,
|
210
|
+
login_nodes_cmd: Sequence[str] | None = None,
|
211
|
+
array_switch: str | None = None,
|
212
|
+
array_item_var: str | None = None,
|
111
213
|
*args,
|
112
214
|
**kwargs,
|
113
|
-
):
|
215
|
+
) -> None:
|
114
216
|
super().__init__(*args, **kwargs)
|
115
217
|
|
116
|
-
self.submit_cmd = submit_cmd or self.DEFAULT_SUBMIT_CMD
|
218
|
+
self.submit_cmd: str = submit_cmd or self.DEFAULT_SUBMIT_CMD
|
117
219
|
self.show_cmd = show_cmd or self.DEFAULT_SHOW_CMD
|
118
220
|
self.del_cmd = del_cmd or self.DEFAULT_DEL_CMD
|
119
221
|
self.js_cmd = js_cmd or self.DEFAULT_JS_CMD
|
@@ -122,27 +224,53 @@ class Scheduler(NullScheduler):
|
|
122
224
|
self.array_item_var = array_item_var or self.DEFAULT_ARRAY_ITEM_VAR
|
123
225
|
|
124
226
|
@property
|
125
|
-
def unique_properties(self):
|
227
|
+
def unique_properties(self) -> tuple[str, str, Any, Any]:
|
126
228
|
return (self.__class__.__name__, self.submit_cmd, self.show_cmd, self.del_cmd)
|
127
229
|
|
128
|
-
def format_switch(self, switch):
|
230
|
+
def format_switch(self, switch: str) -> str:
|
129
231
|
"""
|
130
232
|
Format a particular switch to use the JS command.
|
131
233
|
"""
|
132
234
|
return f"{self.js_cmd} {switch}"
|
133
235
|
|
134
|
-
def is_jobscript_active(self, job_ID: str):
|
236
|
+
def is_jobscript_active(self, job_ID: str) -> bool:
|
135
237
|
"""Query if a jobscript is running/pending."""
|
136
|
-
return bool(self.get_job_state_info([job_ID]))
|
238
|
+
return bool(self.get_job_state_info(js_refs=[job_ID]))
|
137
239
|
|
138
|
-
|
240
|
+
@override
|
241
|
+
def wait_for_jobscripts(self, js_refs: list[str]) -> None:
|
139
242
|
"""
|
140
243
|
Wait for jobscripts to update their state.
|
141
244
|
"""
|
142
245
|
while js_refs:
|
143
|
-
info = self.get_job_state_info(js_refs)
|
144
|
-
print(info)
|
246
|
+
info: Mapping[str, Any] = self.get_job_state_info(js_refs=js_refs)
|
145
247
|
if not info:
|
146
248
|
break
|
147
|
-
js_refs = list(info
|
249
|
+
js_refs = list(info)
|
148
250
|
time.sleep(2)
|
251
|
+
|
252
|
+
@abstractmethod
|
253
|
+
def format_options(
|
254
|
+
self,
|
255
|
+
resources: ElementResources,
|
256
|
+
num_elements: int,
|
257
|
+
is_array: bool,
|
258
|
+
sub_idx: int,
|
259
|
+
js_idx: int,
|
260
|
+
) -> str:
|
261
|
+
"""
|
262
|
+
Render options in a way that the scheduler can handle.
|
263
|
+
"""
|
264
|
+
|
265
|
+
def get_std_out_err_filename(
|
266
|
+
self, js_idx: int, job_ID: str, array_idx: int | None = None
|
267
|
+
):
|
268
|
+
"""File name of combined standard output and error streams.
|
269
|
+
|
270
|
+
Notes
|
271
|
+
-----
|
272
|
+
We use the standard output stream filename format for the combined output and
|
273
|
+
error streams file.
|
274
|
+
|
275
|
+
"""
|
276
|
+
return self.get_stdout_filename(js_idx=js_idx, job_ID=job_ID, array_idx=array_idx)
|
@@ -2,19 +2,42 @@
|
|
2
2
|
A direct job "scheduler" that just runs immediate subprocesses.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from __future__ import annotations
|
6
6
|
import shutil
|
7
7
|
import signal
|
8
|
-
from typing import
|
9
|
-
|
8
|
+
from typing import overload, cast, TYPE_CHECKING
|
9
|
+
from typing_extensions import override, TypeAlias
|
10
10
|
import psutil
|
11
|
-
from hpcflow.sdk.submission.jobscript_info import JobscriptElementState
|
12
11
|
|
13
|
-
from hpcflow.sdk.
|
14
|
-
from hpcflow.sdk.submission.
|
12
|
+
from hpcflow.sdk.typing import hydrate
|
13
|
+
from hpcflow.sdk.submission.enums import JobscriptElementState
|
14
|
+
from hpcflow.sdk.submission.schedulers import Scheduler
|
15
|
+
|
16
|
+
if TYPE_CHECKING:
|
17
|
+
from collections.abc import Callable, Mapping, Sequence
|
18
|
+
from typing import Any, ClassVar
|
19
|
+
from ...config.types import SchedulerConfigDescriptor
|
20
|
+
from ..jobscript import Jobscript
|
21
|
+
from ..shells.base import Shell
|
22
|
+
|
23
|
+
DirectRef: TypeAlias = "tuple[int, list[str]]"
|
24
|
+
|
25
|
+
|
26
|
+
def _is_process_cmdline_equal(proc: psutil.Process, cmdline: list[str]) -> bool:
|
27
|
+
"""Check if the `cmdline` of a psutil `Process` is equal to the specified
|
28
|
+
`cmdline`."""
|
29
|
+
try:
|
30
|
+
if proc.cmdline() == cmdline:
|
31
|
+
return True
|
32
|
+
else:
|
33
|
+
return False
|
34
|
+
except (psutil.NoSuchProcess, psutil.ZombieProcess):
|
35
|
+
# process no longer exists or, on unix, process has completed but still has a
|
36
|
+
# record
|
37
|
+
return False
|
15
38
|
|
16
39
|
|
17
|
-
class DirectScheduler(
|
40
|
+
class DirectScheduler(Scheduler[DirectRef]):
|
18
41
|
"""
|
19
42
|
A direct scheduler, that just runs jobs immediately as direct subprocesses.
|
20
43
|
|
@@ -31,129 +54,143 @@ class DirectScheduler(NullScheduler):
|
|
31
54
|
Options to the jobscript command.
|
32
55
|
"""
|
33
56
|
|
34
|
-
def __init__(self, *args, **kwargs):
|
35
|
-
super().__init__(*args, **kwargs)
|
36
|
-
|
37
57
|
@classmethod
|
38
|
-
|
58
|
+
@override
|
59
|
+
def process_resources(
|
60
|
+
cls, resources, scheduler_config: SchedulerConfigDescriptor
|
61
|
+
) -> None:
|
39
62
|
"""Perform scheduler-specific processing to the element resources.
|
40
63
|
|
41
|
-
Note
|
42
|
-
|
64
|
+
Note
|
65
|
+
----
|
66
|
+
This mutates `resources`.
|
43
67
|
"""
|
44
68
|
return
|
45
69
|
|
70
|
+
@override
|
46
71
|
def get_submit_command(
|
47
72
|
self,
|
48
73
|
shell: Shell,
|
49
74
|
js_path: str,
|
50
|
-
deps:
|
51
|
-
) ->
|
75
|
+
deps: dict[Any, tuple[Any, ...]],
|
76
|
+
) -> list[str]:
|
52
77
|
"""
|
53
78
|
Get the concrete submission command.
|
54
79
|
"""
|
55
80
|
return shell.get_direct_submit_command(js_path)
|
56
81
|
|
57
82
|
@staticmethod
|
58
|
-
def
|
59
|
-
procs:
|
60
|
-
sig=signal.SIGTERM,
|
61
|
-
timeout=None,
|
62
|
-
on_terminate=None,
|
83
|
+
def __kill_processes(
|
84
|
+
procs: list[psutil.Process],
|
85
|
+
sig: signal.Signals = signal.SIGTERM,
|
86
|
+
timeout: float | None = None,
|
87
|
+
on_terminate: Callable[[psutil.Process], object] | None = None,
|
63
88
|
):
|
64
|
-
all_procs = []
|
65
|
-
for
|
66
|
-
all_procs.append(
|
67
|
-
all_procs.extend(
|
89
|
+
all_procs: list[psutil.Process] = []
|
90
|
+
for process in procs:
|
91
|
+
all_procs.append(process)
|
92
|
+
all_procs.extend(process.children(recursive=True))
|
68
93
|
|
69
|
-
for
|
94
|
+
for process in all_procs:
|
70
95
|
try:
|
71
|
-
|
96
|
+
process.send_signal(sig)
|
72
97
|
except psutil.NoSuchProcess:
|
73
98
|
pass
|
74
|
-
|
75
|
-
for
|
76
|
-
|
99
|
+
_, alive = psutil.wait_procs(all_procs, timeout=timeout, callback=on_terminate)
|
100
|
+
for process in alive:
|
101
|
+
process.kill()
|
77
102
|
|
78
103
|
@staticmethod
|
79
|
-
def
|
80
|
-
procs = []
|
104
|
+
def __get_jobscript_processes(js_refs: list[DirectRef]) -> list[psutil.Process]:
|
105
|
+
procs: list[psutil.Process] = []
|
81
106
|
for p_id, p_cmdline in js_refs:
|
82
107
|
try:
|
83
108
|
proc_i = psutil.Process(p_id)
|
84
109
|
except psutil.NoSuchProcess:
|
85
110
|
# process might have completed already
|
86
111
|
continue
|
87
|
-
if proc_i
|
88
|
-
# additional check this is the same process that we submitted
|
112
|
+
if _is_process_cmdline_equal(proc_i, p_cmdline):
|
89
113
|
procs.append(proc_i)
|
90
114
|
return procs
|
91
115
|
|
116
|
+
@overload
|
117
|
+
@override
|
118
|
+
@classmethod
|
119
|
+
def wait_for_jobscripts(cls, js_refs: list[DirectRef]) -> None:
|
120
|
+
...
|
121
|
+
|
122
|
+
@overload
|
92
123
|
@classmethod
|
93
124
|
def wait_for_jobscripts(
|
94
125
|
cls,
|
95
|
-
js_refs:
|
96
|
-
|
97
|
-
|
126
|
+
js_refs: list[DirectRef],
|
127
|
+
*,
|
128
|
+
callback: Callable[[psutil.Process], None],
|
129
|
+
) -> list[psutil.Process]:
|
130
|
+
...
|
131
|
+
|
132
|
+
@classmethod
|
133
|
+
def wait_for_jobscripts(
|
134
|
+
cls,
|
135
|
+
js_refs: list[DirectRef],
|
136
|
+
*,
|
137
|
+
callback: Callable[[psutil.Process], None] | None = None,
|
138
|
+
) -> list[psutil.Process] | None:
|
98
139
|
"""Wait until the specified jobscripts have completed."""
|
99
|
-
procs = cls.
|
140
|
+
procs = cls.__get_jobscript_processes(js_refs)
|
100
141
|
(gone, alive) = psutil.wait_procs(procs, callback=callback)
|
101
142
|
assert not alive
|
102
|
-
return gone
|
143
|
+
return gone if callback else None
|
103
144
|
|
145
|
+
@override
|
104
146
|
def get_job_state_info(
|
105
|
-
self,
|
106
|
-
|
107
|
-
num_js_elements: int,
|
108
|
-
) -> Dict[int, Dict[int, JobscriptElementState]]:
|
147
|
+
self, *, js_refs: Sequence[DirectRef] | None = None
|
148
|
+
) -> Mapping[str, JobscriptElementState]:
|
109
149
|
"""Query the scheduler to get the states of all of this user's jobs, optionally
|
110
150
|
filtering by specified job IDs.
|
111
151
|
|
112
152
|
Jobs that are not in the scheduler's status output will not appear in the output
|
113
153
|
of this method."""
|
114
|
-
info = {}
|
115
|
-
for p_id, p_cmdline in js_refs:
|
116
|
-
|
117
|
-
if is_active:
|
154
|
+
info: dict[str, JobscriptElementState] = {}
|
155
|
+
for p_id, p_cmdline in js_refs or ():
|
156
|
+
if self.is_jobscript_active(p_id, p_cmdline):
|
118
157
|
# as far as the "scheduler" is concerned, all elements are running:
|
119
|
-
info[p_id] =
|
120
|
-
i: JobscriptElementState.running for i in range(num_js_elements)
|
121
|
-
}
|
158
|
+
info[str(p_id)] = JobscriptElementState.running
|
122
159
|
|
123
160
|
return info
|
124
161
|
|
162
|
+
@override
|
125
163
|
def cancel_jobs(
|
126
164
|
self,
|
127
|
-
js_refs:
|
128
|
-
jobscripts:
|
165
|
+
js_refs: list[DirectRef],
|
166
|
+
jobscripts: list[Jobscript] | None = None,
|
129
167
|
):
|
130
168
|
"""
|
131
169
|
Cancel some jobs.
|
132
170
|
"""
|
133
171
|
|
134
|
-
|
172
|
+
js_proc_id: dict[int, Jobscript]
|
173
|
+
|
174
|
+
def callback(proc: psutil.Process):
|
135
175
|
try:
|
136
|
-
|
176
|
+
js_proc_id[proc.pid]
|
137
177
|
except KeyError:
|
138
178
|
# child process of one of the jobscripts
|
139
|
-
self.
|
179
|
+
self._app.submission_logger.debug(
|
140
180
|
f"jobscript child process ({proc.pid}) killed"
|
141
181
|
)
|
142
182
|
return
|
143
|
-
print(
|
144
|
-
f"Jobscript {js.index} from submission {js.submission.index} "
|
145
|
-
f"terminated (user-initiated cancel) with exit code {proc.returncode}."
|
146
|
-
)
|
147
183
|
|
148
|
-
procs = self.
|
149
|
-
self.
|
184
|
+
procs = self.__get_jobscript_processes(js_refs)
|
185
|
+
self._app.submission_logger.info(
|
150
186
|
f"cancelling {self.__class__.__name__} jobscript processes: {procs}."
|
151
187
|
)
|
152
|
-
js_proc_id = {i.pid: jobscripts[idx] for idx, i in enumerate(procs)}
|
153
|
-
self.
|
154
|
-
|
188
|
+
js_proc_id = {i.pid: jobscripts[idx] for idx, i in enumerate(procs) if jobscripts}
|
189
|
+
self.__kill_processes(procs, timeout=3, on_terminate=callback)
|
190
|
+
print(f"Cancelled {len(procs)} jobscript{'s' if len(procs) > 1 else ''}.")
|
191
|
+
self._app.submission_logger.info("jobscripts cancel command executed.")
|
155
192
|
|
156
|
-
def is_jobscript_active(self, process_ID: int, process_cmdline:
|
193
|
+
def is_jobscript_active(self, process_ID: int, process_cmdline: list[str]):
|
157
194
|
"""Query if a jobscript is running.
|
158
195
|
|
159
196
|
Note that a "running" jobscript might be waiting on upstream jobscripts to
|
@@ -164,13 +201,22 @@ class DirectScheduler(NullScheduler):
|
|
164
201
|
proc = psutil.Process(process_ID)
|
165
202
|
except psutil.NoSuchProcess:
|
166
203
|
return False
|
204
|
+
return _is_process_cmdline_equal(proc, process_cmdline)
|
167
205
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
206
|
+
def get_std_out_err_filename(self, js_idx: int, **kwargs) -> str:
|
207
|
+
"""File name of combined standard output and error streams."""
|
208
|
+
return f"js_{js_idx}_std.log"
|
209
|
+
|
210
|
+
def get_stdout_filename(self, js_idx: int, **kwargs) -> str:
|
211
|
+
"""File name of the standard output stream file."""
|
212
|
+
return f"js_{js_idx}_stdout.log"
|
172
213
|
|
214
|
+
def get_stderr_filename(self, js_idx: int, **kwargs) -> str:
|
215
|
+
"""File name of the standard error stream file."""
|
216
|
+
return f"js_{js_idx}_stderr.log"
|
173
217
|
|
218
|
+
|
219
|
+
@hydrate
|
174
220
|
class DirectPosix(DirectScheduler):
|
175
221
|
"""
|
176
222
|
A direct scheduler for POSIX systems.
|
@@ -185,14 +231,11 @@ class DirectPosix(DirectScheduler):
|
|
185
231
|
Options to the jobscript command.
|
186
232
|
"""
|
187
233
|
|
188
|
-
_app_attr = "app"
|
189
234
|
#: Default shell.
|
190
|
-
DEFAULT_SHELL_EXECUTABLE = "/bin/bash"
|
191
|
-
|
192
|
-
def __init__(self, *args, **kwargs):
|
193
|
-
super().__init__(*args, **kwargs)
|
235
|
+
DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "/bin/bash"
|
194
236
|
|
195
237
|
|
238
|
+
@hydrate
|
196
239
|
class DirectWindows(DirectScheduler):
|
197
240
|
"""
|
198
241
|
A direct scheduler for Windows.
|
@@ -205,19 +248,16 @@ class DirectWindows(DirectScheduler):
|
|
205
248
|
Options to the jobscript command.
|
206
249
|
"""
|
207
250
|
|
208
|
-
_app_attr = "app"
|
209
251
|
#: Default shell.
|
210
|
-
DEFAULT_SHELL_EXECUTABLE = "powershell.exe"
|
211
|
-
|
212
|
-
def __init__(self, *args, **kwargs):
|
213
|
-
super().__init__(*args, **kwargs)
|
252
|
+
DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "powershell.exe"
|
214
253
|
|
254
|
+
@override
|
215
255
|
def get_submit_command(
|
216
|
-
self, shell: Shell, js_path: str, deps:
|
217
|
-
) ->
|
256
|
+
self, shell: Shell, js_path: str, deps: dict[Any, tuple[Any, ...]]
|
257
|
+
) -> list[str]:
|
218
258
|
cmd = super().get_submit_command(shell, js_path, deps)
|
219
259
|
# `Start-Process` (see `Jobscript._launch_direct_js_win`) seems to resolve the
|
220
260
|
# executable, which means the process's `cmdline` might look different to what we
|
221
261
|
# record; so let's resolve it ourselves:
|
222
|
-
cmd[0] = shutil.which(cmd[0])
|
262
|
+
cmd[0] = cast("str", shutil.which(cmd[0]))
|
223
263
|
return cmd
|