hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
- hpcflow/_version.py +1 -1
- hpcflow/app.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/environments.yaml +1 -1
- hpcflow/sdk/__init__.py +26 -15
- hpcflow/sdk/app.py +2192 -768
- hpcflow/sdk/cli.py +506 -296
- hpcflow/sdk/cli_common.py +105 -7
- hpcflow/sdk/config/__init__.py +1 -1
- hpcflow/sdk/config/callbacks.py +115 -43
- hpcflow/sdk/config/cli.py +126 -103
- hpcflow/sdk/config/config.py +674 -318
- hpcflow/sdk/config/config_file.py +131 -95
- hpcflow/sdk/config/errors.py +125 -84
- hpcflow/sdk/config/types.py +148 -0
- hpcflow/sdk/core/__init__.py +25 -1
- hpcflow/sdk/core/actions.py +1771 -1059
- hpcflow/sdk/core/app_aware.py +24 -0
- hpcflow/sdk/core/cache.py +139 -79
- hpcflow/sdk/core/command_files.py +263 -287
- hpcflow/sdk/core/commands.py +145 -112
- hpcflow/sdk/core/element.py +828 -535
- hpcflow/sdk/core/enums.py +192 -0
- hpcflow/sdk/core/environment.py +74 -93
- hpcflow/sdk/core/errors.py +455 -52
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +540 -272
- hpcflow/sdk/core/loop.py +751 -347
- hpcflow/sdk/core/loop_cache.py +164 -47
- hpcflow/sdk/core/object_list.py +370 -207
- hpcflow/sdk/core/parameters.py +1100 -627
- hpcflow/sdk/core/rule.py +59 -41
- hpcflow/sdk/core/run_dir_files.py +21 -37
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +1649 -1339
- hpcflow/sdk/core/task_schema.py +308 -196
- hpcflow/sdk/core/test_utils.py +191 -114
- hpcflow/sdk/core/types.py +440 -0
- hpcflow/sdk/core/utils.py +485 -309
- hpcflow/sdk/core/validation.py +82 -9
- hpcflow/sdk/core/workflow.py +2544 -1178
- hpcflow/sdk/core/zarr_io.py +98 -137
- hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
- hpcflow/sdk/demo/cli.py +53 -33
- hpcflow/sdk/helper/cli.py +18 -15
- hpcflow/sdk/helper/helper.py +75 -63
- hpcflow/sdk/helper/watcher.py +61 -28
- hpcflow/sdk/log.py +122 -71
- hpcflow/sdk/persistence/__init__.py +8 -31
- hpcflow/sdk/persistence/base.py +1360 -606
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +38 -0
- hpcflow/sdk/persistence/json.py +568 -188
- hpcflow/sdk/persistence/pending.py +382 -179
- hpcflow/sdk/persistence/store_resource.py +39 -23
- hpcflow/sdk/persistence/types.py +318 -0
- hpcflow/sdk/persistence/utils.py +14 -11
- hpcflow/sdk/persistence/zarr.py +1337 -433
- hpcflow/sdk/runtime.py +44 -41
- hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
- hpcflow/sdk/submission/jobscript.py +1651 -692
- hpcflow/sdk/submission/schedulers/__init__.py +167 -39
- hpcflow/sdk/submission/schedulers/direct.py +121 -81
- hpcflow/sdk/submission/schedulers/sge.py +170 -129
- hpcflow/sdk/submission/schedulers/slurm.py +291 -268
- hpcflow/sdk/submission/schedulers/utils.py +12 -2
- hpcflow/sdk/submission/shells/__init__.py +14 -15
- hpcflow/sdk/submission/shells/base.py +150 -29
- hpcflow/sdk/submission/shells/bash.py +283 -173
- hpcflow/sdk/submission/shells/os_version.py +31 -30
- hpcflow/sdk/submission/shells/powershell.py +228 -170
- hpcflow/sdk/submission/submission.py +1014 -335
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +182 -12
- hpcflow/sdk/utils/arrays.py +71 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +12 -0
- hpcflow/sdk/utils/strings.py +33 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +27 -6
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +866 -85
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
- hpcflow/tests/unit/test_action.py +262 -75
- hpcflow/tests/unit/test_action_rule.py +9 -4
- hpcflow/tests/unit/test_app.py +33 -6
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +134 -1
- hpcflow/tests/unit/test_command.py +71 -54
- hpcflow/tests/unit/test_config.py +142 -16
- hpcflow/tests/unit/test_config_file.py +21 -18
- hpcflow/tests/unit/test_element.py +58 -62
- hpcflow/tests/unit/test_element_iteration.py +50 -1
- hpcflow/tests/unit/test_element_set.py +29 -19
- hpcflow/tests/unit/test_group.py +4 -2
- hpcflow/tests/unit/test_input_source.py +116 -93
- hpcflow/tests/unit/test_input_value.py +29 -24
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +44 -35
- hpcflow/tests/unit/test_loop.py +1396 -84
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
- hpcflow/tests/unit/test_object_list.py +17 -12
- hpcflow/tests/unit/test_parameter.py +29 -7
- hpcflow/tests/unit/test_persistence.py +237 -42
- hpcflow/tests/unit/test_resources.py +20 -18
- hpcflow/tests/unit/test_run.py +117 -6
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +2 -1
- hpcflow/tests/unit/test_schema_input.py +23 -15
- hpcflow/tests/unit/test_shell.py +23 -2
- hpcflow/tests/unit/test_slurm.py +8 -7
- hpcflow/tests/unit/test_submission.py +38 -89
- hpcflow/tests/unit/test_task.py +352 -247
- hpcflow/tests/unit/test_task_schema.py +33 -20
- hpcflow/tests/unit/test_utils.py +9 -11
- hpcflow/tests/unit/test_value_sequence.py +15 -12
- hpcflow/tests/unit/test_workflow.py +114 -83
- hpcflow/tests/unit/test_workflow_template.py +0 -1
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +334 -1
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +160 -15
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6587 -3
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
- hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
- hpcflow/sdk/core/parallel.py +0 -21
- hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
- {hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -2,22 +2,34 @@
|
|
2
2
|
An interface to SGE.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from __future__ import annotations
|
6
|
+
from collections.abc import Sequence
|
6
7
|
import re
|
7
|
-
from typing import
|
8
|
+
from typing import cast, TYPE_CHECKING
|
9
|
+
from typing_extensions import override
|
10
|
+
from hpcflow.sdk.typing import hydrate
|
8
11
|
from hpcflow.sdk.core.errors import (
|
9
12
|
IncompatibleSGEPEError,
|
10
13
|
NoCompatibleSGEPEError,
|
11
14
|
UnknownSGEPEError,
|
12
15
|
)
|
13
16
|
from hpcflow.sdk.log import TimeIt
|
14
|
-
from hpcflow.sdk.submission.
|
15
|
-
from hpcflow.sdk.submission.schedulers import
|
17
|
+
from hpcflow.sdk.submission.enums import JobscriptElementState
|
18
|
+
from hpcflow.sdk.submission.schedulers import QueuedScheduler
|
16
19
|
from hpcflow.sdk.submission.schedulers.utils import run_cmd
|
17
|
-
from hpcflow.sdk.submission.shells.base import Shell
|
18
20
|
|
21
|
+
if TYPE_CHECKING:
|
22
|
+
from collections.abc import Iterator, Mapping
|
23
|
+
from typing import Any, ClassVar
|
24
|
+
from ...config.types import SchedulerConfigDescriptor
|
25
|
+
from ...core.element import ElementResources
|
26
|
+
from ..jobscript import Jobscript
|
27
|
+
from ..types import VersionInfo
|
28
|
+
from ..shells.base import Shell
|
19
29
|
|
20
|
-
|
30
|
+
|
31
|
+
@hydrate
|
32
|
+
class SGEPosix(QueuedScheduler):
|
21
33
|
"""
|
22
34
|
A scheduler that uses SGE.
|
23
35
|
|
@@ -43,36 +55,34 @@ class SGEPosix(Scheduler):
|
|
43
55
|
|
44
56
|
"""
|
45
57
|
|
46
|
-
_app_attr = "app"
|
47
|
-
|
48
58
|
#: Default args for shebang line.
|
49
|
-
DEFAULT_SHEBANG_ARGS = ""
|
59
|
+
DEFAULT_SHEBANG_ARGS: ClassVar[str] = ""
|
50
60
|
#: Default submission command.
|
51
|
-
DEFAULT_SUBMIT_CMD = "qsub"
|
61
|
+
DEFAULT_SUBMIT_CMD: ClassVar[str] = "qsub"
|
52
62
|
#: Default command to show the queue state.
|
53
|
-
DEFAULT_SHOW_CMD =
|
63
|
+
DEFAULT_SHOW_CMD: ClassVar[Sequence[str]] = ("qstat",)
|
54
64
|
#: Default cancel command.
|
55
|
-
DEFAULT_DEL_CMD = "qdel"
|
65
|
+
DEFAULT_DEL_CMD: ClassVar[str] = "qdel"
|
56
66
|
#: Default job control directive prefix.
|
57
|
-
DEFAULT_JS_CMD = "#$"
|
67
|
+
DEFAULT_JS_CMD: ClassVar[str] = "#$"
|
58
68
|
#: Default prefix to enable array processing.
|
59
|
-
DEFAULT_ARRAY_SWITCH = "-t"
|
69
|
+
DEFAULT_ARRAY_SWITCH: ClassVar[str] = "-t"
|
60
70
|
#: Default shell variable with array ID.
|
61
|
-
DEFAULT_ARRAY_ITEM_VAR = "SGE_TASK_ID"
|
71
|
+
DEFAULT_ARRAY_ITEM_VAR: ClassVar[str] = "SGE_TASK_ID"
|
62
72
|
#: Default switch to control CWD.
|
63
|
-
DEFAULT_CWD_SWITCH = "-cwd"
|
73
|
+
DEFAULT_CWD_SWITCH: ClassVar[str] = "-cwd"
|
64
74
|
#: Default command to get the login nodes.
|
65
|
-
DEFAULT_LOGIN_NODES_CMD =
|
75
|
+
DEFAULT_LOGIN_NODES_CMD: ClassVar[Sequence[str]] = ("qconf", "-sh")
|
66
76
|
|
67
77
|
#: Maps scheduler state codes to :py:class:`JobscriptElementState` values.
|
68
|
-
state_lookup = {
|
78
|
+
state_lookup: ClassVar[Mapping[str, JobscriptElementState]] = {
|
69
79
|
"qw": JobscriptElementState.pending,
|
70
80
|
"hq": JobscriptElementState.waiting,
|
71
81
|
"hR": JobscriptElementState.waiting,
|
72
82
|
"r": JobscriptElementState.running,
|
73
83
|
"t": JobscriptElementState.running,
|
74
84
|
"Rr": JobscriptElementState.running,
|
75
|
-
"Rt": JobscriptElementState.running,
|
85
|
+
# "Rt": JobscriptElementState.running,
|
76
86
|
"s": JobscriptElementState.errored,
|
77
87
|
"ts": JobscriptElementState.errored,
|
78
88
|
"S": JobscriptElementState.errored,
|
@@ -93,17 +103,22 @@ class SGEPosix(Scheduler):
|
|
93
103
|
"dT": JobscriptElementState.cancelled,
|
94
104
|
}
|
95
105
|
|
96
|
-
def __init__(self, cwd_switch=None, *args, **kwargs):
|
106
|
+
def __init__(self, cwd_switch: str | None = None, *args, **kwargs):
|
97
107
|
super().__init__(*args, **kwargs)
|
98
108
|
self.cwd_switch = cwd_switch or self.DEFAULT_CWD_SWITCH
|
99
109
|
|
100
110
|
@classmethod
|
111
|
+
@override
|
101
112
|
@TimeIt.decorator
|
102
|
-
def process_resources(
|
103
|
-
|
104
|
-
|
105
|
-
|
113
|
+
def process_resources(
|
114
|
+
cls, resources: ElementResources, scheduler_config: SchedulerConfigDescriptor
|
115
|
+
) -> None:
|
116
|
+
"""
|
117
|
+
Perform scheduler-specific processing to the element resources.
|
106
118
|
|
119
|
+
Note
|
120
|
+
----
|
121
|
+
This mutates `resources`.
|
107
122
|
"""
|
108
123
|
if resources.num_nodes is not None:
|
109
124
|
raise ValueError(
|
@@ -116,7 +131,7 @@ class SGEPosix(Scheduler):
|
|
116
131
|
if resources.SGE_parallel_env is not None:
|
117
132
|
# check user-specified `parallel_env` is valid and compatible with
|
118
133
|
# `num_cores`:
|
119
|
-
if resources.num_cores
|
134
|
+
if resources.num_cores and resources.num_cores == 1:
|
120
135
|
raise ValueError(
|
121
136
|
f"An SGE parallel environment should not be specified if `num_cores` "
|
122
137
|
f"is 1 (`SGE_parallel_env` was specified as "
|
@@ -126,105 +141,121 @@ class SGEPosix(Scheduler):
|
|
126
141
|
try:
|
127
142
|
env = para_envs[resources.SGE_parallel_env]
|
128
143
|
except KeyError:
|
129
|
-
raise UnknownSGEPEError(
|
130
|
-
f"The SGE parallel environment {resources.SGE_parallel_env!r} is not "
|
131
|
-
f"specified in the configuration. Specified parallel environments "
|
132
|
-
f"are {list(para_envs.keys())!r}."
|
133
|
-
)
|
144
|
+
raise UnknownSGEPEError(resources.SGE_parallel_env, para_envs)
|
134
145
|
if not cls.is_num_cores_supported(resources.num_cores, env["num_cores"]):
|
135
146
|
raise IncompatibleSGEPEError(
|
136
|
-
|
137
|
-
f"compatible with the number of cores requested: "
|
138
|
-
f"{resources.num_cores!r}."
|
147
|
+
resources.SGE_parallel_env, resources.num_cores
|
139
148
|
)
|
140
149
|
else:
|
141
150
|
# find the first compatible PE:
|
142
|
-
pe_match = -1 # pe_name might be `None`
|
143
151
|
for pe_name, pe_info in para_envs.items():
|
144
152
|
if cls.is_num_cores_supported(resources.num_cores, pe_info["num_cores"]):
|
145
|
-
|
153
|
+
resources.SGE_parallel_env = pe_name
|
146
154
|
break
|
147
|
-
if pe_match != -1:
|
148
|
-
resources.SGE_parallel_env = pe_name
|
149
155
|
else:
|
150
|
-
raise NoCompatibleSGEPEError(
|
151
|
-
f"No compatible SGE parallel environment could be found for the "
|
152
|
-
f"specified `num_cores` ({resources.num_cores!r})."
|
153
|
-
)
|
156
|
+
raise NoCompatibleSGEPEError(resources.num_cores)
|
154
157
|
|
155
|
-
def get_login_nodes(self):
|
158
|
+
def get_login_nodes(self) -> list[str]:
|
156
159
|
"""Return a list of hostnames of login/administrative nodes as reported by the
|
157
160
|
scheduler."""
|
158
|
-
|
161
|
+
get_login = self.login_nodes_cmd
|
162
|
+
assert get_login is not None and len(get_login) >= 1
|
163
|
+
stdout, stderr = run_cmd(get_login)
|
159
164
|
if stderr:
|
160
165
|
print(stderr)
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
if resources.num_cores > 1:
|
167
|
-
lns.append(
|
168
|
-
f"{self.js_cmd} -pe {resources.SGE_parallel_env} {resources.num_cores}"
|
169
|
-
)
|
166
|
+
return stdout.strip().split("\n")
|
167
|
+
|
168
|
+
def __format_core_request_lines(self, resources: ElementResources) -> Iterator[str]:
|
169
|
+
if resources.num_cores and resources.num_cores > 1:
|
170
|
+
yield f"{self.js_cmd} -pe {resources.SGE_parallel_env} {resources.num_cores}"
|
170
171
|
if resources.max_array_items:
|
171
|
-
|
172
|
-
return lns
|
172
|
+
yield f"{self.js_cmd} -tc {resources.max_array_items}"
|
173
173
|
|
174
|
-
def
|
174
|
+
def __format_array_request(self, num_elements: int) -> str:
|
175
175
|
return f"{self.js_cmd} {self.array_switch} 1-{num_elements}"
|
176
176
|
|
177
|
-
def
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
177
|
+
def get_stdout_filename(
|
178
|
+
self, js_idx: int, job_ID: str, array_idx: int | None = None
|
179
|
+
) -> str:
|
180
|
+
"""File name of the standard output stream file."""
|
181
|
+
# TODO: untested, might not work!
|
182
|
+
array_idx_str = f".{array_idx}" if array_idx is not None else ""
|
183
|
+
return f"js_{js_idx}.sh.o{job_ID}{array_idx_str}"
|
184
|
+
|
185
|
+
def get_stderr_filename(
|
186
|
+
self, js_idx: int, job_ID: str, array_idx: int | None = None
|
187
|
+
) -> str:
|
188
|
+
"""File name of the standard error stream file."""
|
189
|
+
# TODO: untested, might not work!
|
190
|
+
array_idx_str = f".{array_idx}" if array_idx is not None else ""
|
191
|
+
return f"js_{js_idx}.sh.e{job_ID}{array_idx_str}"
|
192
|
+
|
193
|
+
def __format_std_stream_file_option_lines(
|
194
|
+
self, is_array: bool, sub_idx: int, js_idx: int, combine_std: bool
|
195
|
+
) -> Iterator[str]:
|
196
|
+
# note: if we modify the file names, there is, I believe, no way to include the
|
197
|
+
# job ID; so we don't modify the file names:
|
198
|
+
base = f"./artifacts/submissions/{sub_idx}/js_std/{js_idx}"
|
199
|
+
yield f"{self.js_cmd} -o {base}"
|
200
|
+
if combine_std:
|
201
|
+
yield f"{self.js_cmd} -j y" # redirect stderr to stdout
|
202
|
+
else:
|
203
|
+
yield f"{self.js_cmd} -e {base}"
|
184
204
|
|
185
|
-
|
205
|
+
@override
|
206
|
+
def format_options(
|
207
|
+
self,
|
208
|
+
resources: ElementResources,
|
209
|
+
num_elements: int,
|
210
|
+
is_array: bool,
|
211
|
+
sub_idx: int,
|
212
|
+
js_idx: int,
|
213
|
+
) -> str:
|
186
214
|
"""
|
187
215
|
Format the options to the jobscript command.
|
188
216
|
"""
|
189
|
-
opts = []
|
217
|
+
opts: list[str] = []
|
190
218
|
opts.append(self.format_switch(self.cwd_switch))
|
191
|
-
opts.extend(self.
|
219
|
+
opts.extend(self.__format_core_request_lines(resources))
|
192
220
|
if is_array:
|
193
|
-
opts.append(self.
|
221
|
+
opts.append(self.__format_array_request(num_elements))
|
194
222
|
|
195
|
-
opts.extend(
|
223
|
+
opts.extend(
|
224
|
+
self.__format_std_stream_file_option_lines(
|
225
|
+
is_array, sub_idx, js_idx, resources.combine_jobscript_std
|
226
|
+
)
|
227
|
+
)
|
196
228
|
|
197
229
|
for opt_k, opt_v in self.options.items():
|
198
|
-
if
|
199
|
-
|
200
|
-
|
230
|
+
if opt_v is None:
|
231
|
+
opts.append(f"{self.js_cmd} {opt_k}")
|
232
|
+
elif isinstance(opt_v, list):
|
233
|
+
opts.extend(f"{self.js_cmd} {opt_k} {i}" for i in opt_v)
|
201
234
|
elif opt_v:
|
202
235
|
opts.append(f"{self.js_cmd} {opt_k} {opt_v}")
|
203
|
-
elif opt_v is None:
|
204
|
-
opts.append(f"{self.js_cmd} {opt_k}")
|
205
236
|
|
206
237
|
return "\n".join(opts) + "\n"
|
207
238
|
|
239
|
+
@override
|
208
240
|
@TimeIt.decorator
|
209
|
-
def get_version_info(self):
|
210
|
-
|
211
|
-
stdout, stderr = run_cmd(vers_cmd)
|
241
|
+
def get_version_info(self) -> VersionInfo:
|
242
|
+
stdout, stderr = run_cmd([*self.show_cmd, "-help"])
|
212
243
|
if stderr:
|
213
244
|
print(stderr)
|
214
|
-
|
215
|
-
name, version =
|
216
|
-
|
245
|
+
first_line, *_ = stdout.split("\n")
|
246
|
+
name, version, *_ = first_line.strip().split()
|
247
|
+
return {
|
217
248
|
"scheduler_name": name,
|
218
249
|
"scheduler_version": version,
|
219
250
|
}
|
220
|
-
return out
|
221
251
|
|
252
|
+
@override
|
222
253
|
def get_submit_command(
|
223
254
|
self,
|
224
255
|
shell: Shell,
|
225
256
|
js_path: str,
|
226
|
-
deps:
|
227
|
-
) ->
|
257
|
+
deps: dict[Any, tuple[Any, ...]],
|
258
|
+
) -> list[str]:
|
228
259
|
"""
|
229
260
|
Get the command to use to submit a job to the scheduler.
|
230
261
|
|
@@ -234,8 +265,8 @@ class SGEPosix(Scheduler):
|
|
234
265
|
"""
|
235
266
|
cmd = [self.submit_cmd, "-terse"]
|
236
267
|
|
237
|
-
dep_job_IDs = []
|
238
|
-
dep_job_IDs_arr = []
|
268
|
+
dep_job_IDs: list[str] = []
|
269
|
+
dep_job_IDs_arr: list[str] = []
|
239
270
|
for job_ID, is_array_dep in deps.values():
|
240
271
|
if is_array_dep: # array dependency
|
241
272
|
dep_job_IDs_arr.append(str(job_ID))
|
@@ -253,60 +284,65 @@ class SGEPosix(Scheduler):
|
|
253
284
|
cmd.append(js_path)
|
254
285
|
return cmd
|
255
286
|
|
287
|
+
__SGE_JOB_ID_RE: ClassVar[re.Pattern] = re.compile(r"^\d+")
|
288
|
+
|
256
289
|
def parse_submission_output(self, stdout: str) -> str:
|
257
290
|
"""Extract scheduler reference for a newly submitted jobscript"""
|
258
|
-
match
|
259
|
-
if match:
|
260
|
-
job_ID = match.group()
|
261
|
-
else:
|
291
|
+
if not (match := self.__SGE_JOB_ID_RE.search(stdout)):
|
262
292
|
raise RuntimeError(f"Could not parse Job ID from scheduler output {stdout!r}")
|
263
|
-
return
|
293
|
+
return match.group()
|
264
294
|
|
265
|
-
def get_job_statuses(
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
295
|
+
def get_job_statuses(
|
296
|
+
self,
|
297
|
+
) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
|
298
|
+
"""Get information about all of this user's jobscripts that are currently listed
|
299
|
+
by the scheduler."""
|
300
|
+
cmd = [*self.show_cmd, "-u", "$USER", "-g", "d"] # "-g d": separate arrays items
|
301
|
+
stdout, stderr = run_cmd(cmd, logger=self._app.submission_logger)
|
270
302
|
if stderr:
|
271
303
|
raise ValueError(
|
272
304
|
f"Could not get query SGE jobs. Command was: {cmd!r}; stderr was: "
|
273
305
|
f"{stderr}"
|
274
306
|
)
|
275
307
|
elif not stdout:
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
else
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
308
|
+
return {}
|
309
|
+
|
310
|
+
info: dict[str, dict[int, JobscriptElementState] | JobscriptElementState] = {}
|
311
|
+
lines = stdout.split("\n")
|
312
|
+
# assuming a job name with spaces means we can't split on spaces to get
|
313
|
+
# anywhere beyond the job name, so get the column index of the state heading
|
314
|
+
# and assume the state is always left-aligned with the heading:
|
315
|
+
state_idx = lines[0].index("state")
|
316
|
+
task_id_idx = lines[0].index("ja-task-ID")
|
317
|
+
for ln in lines[2:]:
|
318
|
+
if not ln:
|
319
|
+
continue
|
320
|
+
base_job_ID, *_ = ln.split()
|
321
|
+
|
322
|
+
# states can be one or two chars (for our limited purposes):
|
323
|
+
state_str = ln[state_idx : state_idx + 2].strip()
|
324
|
+
state = self.state_lookup[state_str]
|
325
|
+
|
326
|
+
arr_idx_s = ln[task_id_idx:].strip()
|
327
|
+
arr_idx = (
|
328
|
+
int(arr_idx_s) - 1 # We are using zero-indexed info
|
329
|
+
if arr_idx_s
|
330
|
+
else None
|
331
|
+
)
|
332
|
+
|
333
|
+
if arr_idx is not None:
|
334
|
+
entry = cast(
|
335
|
+
dict[int, JobscriptElementState], info.setdefault(base_job_ID, {})
|
336
|
+
)
|
337
|
+
entry[arr_idx] = state
|
338
|
+
else:
|
339
|
+
info[base_job_ID] = state
|
305
340
|
return info
|
306
341
|
|
342
|
+
@override
|
307
343
|
def get_job_state_info(
|
308
|
-
self, js_refs:
|
309
|
-
) ->
|
344
|
+
self, *, js_refs: Sequence[str] | None = None
|
345
|
+
) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
|
310
346
|
"""Query the scheduler to get the states of all of this user's jobs, optionally
|
311
347
|
filtering by specified job IDs.
|
312
348
|
|
@@ -316,23 +352,28 @@ class SGEPosix(Scheduler):
|
|
316
352
|
"""
|
317
353
|
info = self.get_job_statuses()
|
318
354
|
if js_refs:
|
319
|
-
|
355
|
+
return {k: v for k, v in info.items() if k in js_refs}
|
320
356
|
return info
|
321
357
|
|
322
|
-
|
358
|
+
@override
|
359
|
+
def cancel_jobs(
|
360
|
+
self,
|
361
|
+
js_refs: list[str],
|
362
|
+
jobscripts: list[Jobscript] | None = None,
|
363
|
+
):
|
323
364
|
"""
|
324
365
|
Cancel submitted jobs.
|
325
366
|
"""
|
326
367
|
cmd = [self.del_cmd] + js_refs
|
327
|
-
self.
|
368
|
+
self._app.submission_logger.info(
|
328
369
|
f"cancelling {self.__class__.__name__} jobscripts with command: {cmd}."
|
329
370
|
)
|
330
|
-
stdout, stderr = run_cmd(cmd, logger=self.
|
371
|
+
stdout, stderr = run_cmd(cmd, logger=self._app.submission_logger)
|
331
372
|
if stderr:
|
332
373
|
raise ValueError(
|
333
374
|
f"Could not get query SGE {self.__class__.__name__}. Command was: "
|
334
375
|
f"{cmd!r}; stderr was: {stderr}"
|
335
376
|
)
|
336
|
-
self.
|
377
|
+
self._app.submission_logger.info(
|
337
378
|
f"jobscripts cancel command executed; stdout was: {stdout}."
|
338
379
|
)
|