hpcflow 0.1.9__py3-none-any.whl → 0.2.0a271__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__init__.py +2 -11
- hpcflow/__pyinstaller/__init__.py +5 -0
- hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
- hpcflow/_version.py +1 -1
- hpcflow/app.py +43 -0
- hpcflow/cli.py +2 -462
- hpcflow/data/demo_data_manifest/__init__.py +3 -0
- hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
- hpcflow/data/jinja_templates/test/test_template.txt +8 -0
- hpcflow/data/programs/hello_world/README.md +1 -0
- hpcflow/data/programs/hello_world/hello_world.c +87 -0
- hpcflow/data/programs/hello_world/linux/hello_world +0 -0
- hpcflow/data/programs/hello_world/macos/hello_world +0 -0
- hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
- hpcflow/data/scripts/__init__.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
- hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/generate_t1_file_01.py +7 -0
- hpcflow/data/scripts/import_future_script.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
- hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
- hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/parse_t1_file_01.py +4 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/__init__.py +1 -0
- hpcflow/data/template_components/command_files.yaml +26 -0
- hpcflow/data/template_components/environments.yaml +13 -0
- hpcflow/data/template_components/parameters.yaml +14 -0
- hpcflow/data/template_components/task_schemas.yaml +139 -0
- hpcflow/data/workflows/workflow_1.yaml +5 -0
- hpcflow/examples.ipynb +1037 -0
- hpcflow/sdk/__init__.py +149 -0
- hpcflow/sdk/app.py +4266 -0
- hpcflow/sdk/cli.py +1479 -0
- hpcflow/sdk/cli_common.py +385 -0
- hpcflow/sdk/config/__init__.py +5 -0
- hpcflow/sdk/config/callbacks.py +246 -0
- hpcflow/sdk/config/cli.py +388 -0
- hpcflow/sdk/config/config.py +1410 -0
- hpcflow/sdk/config/config_file.py +501 -0
- hpcflow/sdk/config/errors.py +272 -0
- hpcflow/sdk/config/types.py +150 -0
- hpcflow/sdk/core/__init__.py +38 -0
- hpcflow/sdk/core/actions.py +3857 -0
- hpcflow/sdk/core/app_aware.py +25 -0
- hpcflow/sdk/core/cache.py +224 -0
- hpcflow/sdk/core/command_files.py +814 -0
- hpcflow/sdk/core/commands.py +424 -0
- hpcflow/sdk/core/element.py +2071 -0
- hpcflow/sdk/core/enums.py +221 -0
- hpcflow/sdk/core/environment.py +256 -0
- hpcflow/sdk/core/errors.py +1043 -0
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +809 -0
- hpcflow/sdk/core/loop.py +1320 -0
- hpcflow/sdk/core/loop_cache.py +282 -0
- hpcflow/sdk/core/object_list.py +933 -0
- hpcflow/sdk/core/parameters.py +3371 -0
- hpcflow/sdk/core/rule.py +196 -0
- hpcflow/sdk/core/run_dir_files.py +57 -0
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +3792 -0
- hpcflow/sdk/core/task_schema.py +993 -0
- hpcflow/sdk/core/test_utils.py +538 -0
- hpcflow/sdk/core/types.py +447 -0
- hpcflow/sdk/core/utils.py +1207 -0
- hpcflow/sdk/core/validation.py +87 -0
- hpcflow/sdk/core/values.py +477 -0
- hpcflow/sdk/core/workflow.py +4820 -0
- hpcflow/sdk/core/zarr_io.py +206 -0
- hpcflow/sdk/data/__init__.py +13 -0
- hpcflow/sdk/data/config_file_schema.yaml +34 -0
- hpcflow/sdk/data/config_schema.yaml +260 -0
- hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
- hpcflow/sdk/data/files_spec_schema.yaml +5 -0
- hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
- hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
- hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
- hpcflow/sdk/demo/__init__.py +3 -0
- hpcflow/sdk/demo/cli.py +242 -0
- hpcflow/sdk/helper/__init__.py +3 -0
- hpcflow/sdk/helper/cli.py +137 -0
- hpcflow/sdk/helper/helper.py +300 -0
- hpcflow/sdk/helper/watcher.py +192 -0
- hpcflow/sdk/log.py +288 -0
- hpcflow/sdk/persistence/__init__.py +18 -0
- hpcflow/sdk/persistence/base.py +2817 -0
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +39 -0
- hpcflow/sdk/persistence/json.py +954 -0
- hpcflow/sdk/persistence/pending.py +948 -0
- hpcflow/sdk/persistence/store_resource.py +203 -0
- hpcflow/sdk/persistence/types.py +309 -0
- hpcflow/sdk/persistence/utils.py +73 -0
- hpcflow/sdk/persistence/zarr.py +2388 -0
- hpcflow/sdk/runtime.py +320 -0
- hpcflow/sdk/submission/__init__.py +3 -0
- hpcflow/sdk/submission/enums.py +70 -0
- hpcflow/sdk/submission/jobscript.py +2379 -0
- hpcflow/sdk/submission/schedulers/__init__.py +281 -0
- hpcflow/sdk/submission/schedulers/direct.py +233 -0
- hpcflow/sdk/submission/schedulers/sge.py +376 -0
- hpcflow/sdk/submission/schedulers/slurm.py +598 -0
- hpcflow/sdk/submission/schedulers/utils.py +25 -0
- hpcflow/sdk/submission/shells/__init__.py +52 -0
- hpcflow/sdk/submission/shells/base.py +229 -0
- hpcflow/sdk/submission/shells/bash.py +504 -0
- hpcflow/sdk/submission/shells/os_version.py +115 -0
- hpcflow/sdk/submission/shells/powershell.py +352 -0
- hpcflow/sdk/submission/submission.py +1402 -0
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +194 -0
- hpcflow/sdk/utils/arrays.py +69 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +31 -0
- hpcflow/sdk/utils/strings.py +69 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +123 -0
- hpcflow/tests/data/__init__.py +0 -0
- hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_1.json +10 -0
- hpcflow/tests/data/workflow_1.yaml +5 -0
- hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
- hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
- hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
- hpcflow/tests/programs/test_programs.py +180 -0
- hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +1361 -0
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
- hpcflow/tests/unit/test_action.py +1066 -0
- hpcflow/tests/unit/test_action_rule.py +24 -0
- hpcflow/tests/unit/test_app.py +132 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +172 -0
- hpcflow/tests/unit/test_command.py +377 -0
- hpcflow/tests/unit/test_config.py +195 -0
- hpcflow/tests/unit/test_config_file.py +162 -0
- hpcflow/tests/unit/test_element.py +666 -0
- hpcflow/tests/unit/test_element_iteration.py +88 -0
- hpcflow/tests/unit/test_element_set.py +158 -0
- hpcflow/tests/unit/test_group.py +115 -0
- hpcflow/tests/unit/test_input_source.py +1479 -0
- hpcflow/tests/unit/test_input_value.py +398 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +1247 -0
- hpcflow/tests/unit/test_loop.py +2674 -0
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
- hpcflow/tests/unit/test_object_list.py +116 -0
- hpcflow/tests/unit/test_parameter.py +243 -0
- hpcflow/tests/unit/test_persistence.py +664 -0
- hpcflow/tests/unit/test_resources.py +243 -0
- hpcflow/tests/unit/test_run.py +286 -0
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +9 -0
- hpcflow/tests/unit/test_schema_input.py +372 -0
- hpcflow/tests/unit/test_shell.py +129 -0
- hpcflow/tests/unit/test_slurm.py +39 -0
- hpcflow/tests/unit/test_submission.py +502 -0
- hpcflow/tests/unit/test_task.py +2560 -0
- hpcflow/tests/unit/test_task_schema.py +182 -0
- hpcflow/tests/unit/test_utils.py +616 -0
- hpcflow/tests/unit/test_value_sequence.py +549 -0
- hpcflow/tests/unit/test_values.py +91 -0
- hpcflow/tests/unit/test_workflow.py +827 -0
- hpcflow/tests/unit/test_workflow_template.py +186 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/unit/utils/test_strings.py +97 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +355 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +564 -0
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6794 -0
- hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
- hpcflow-0.2.0a271.dist-info/METADATA +65 -0
- hpcflow-0.2.0a271.dist-info/RECORD +237 -0
- {hpcflow-0.1.9.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
- hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
- hpcflow/api.py +0 -458
- hpcflow/archive/archive.py +0 -308
- hpcflow/archive/cloud/cloud.py +0 -47
- hpcflow/archive/cloud/errors.py +0 -9
- hpcflow/archive/cloud/providers/dropbox.py +0 -432
- hpcflow/archive/errors.py +0 -5
- hpcflow/base_db.py +0 -4
- hpcflow/config.py +0 -232
- hpcflow/copytree.py +0 -66
- hpcflow/data/examples/_config.yml +0 -14
- hpcflow/data/examples/damask/demo/1.run.yml +0 -4
- hpcflow/data/examples/damask/demo/2.process.yml +0 -29
- hpcflow/data/examples/damask/demo/geom.geom +0 -2052
- hpcflow/data/examples/damask/demo/load.load +0 -1
- hpcflow/data/examples/damask/demo/material.config +0 -185
- hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
- hpcflow/data/examples/damask/inputs/load.load +0 -1
- hpcflow/data/examples/damask/inputs/material.config +0 -185
- hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
- hpcflow/data/examples/damask/profiles/damask.yml +0 -4
- hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
- hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
- hpcflow/data/examples/damask/profiles/default.yml +0 -6
- hpcflow/data/examples/thinking.yml +0 -177
- hpcflow/errors.py +0 -2
- hpcflow/init_db.py +0 -37
- hpcflow/models.py +0 -2549
- hpcflow/nesting.py +0 -9
- hpcflow/profiles.py +0 -455
- hpcflow/project.py +0 -81
- hpcflow/scheduler.py +0 -323
- hpcflow/utils.py +0 -103
- hpcflow/validation.py +0 -167
- hpcflow/variables.py +0 -544
- hpcflow-0.1.9.dist-info/METADATA +0 -168
- hpcflow-0.1.9.dist-info/RECORD +0 -45
- hpcflow-0.1.9.dist-info/entry_points.txt +0 -8
- hpcflow-0.1.9.dist-info/top_level.txt +0 -1
- /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
- /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
- /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""
|
|
2
|
+
An interface to SGE.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
import re
|
|
8
|
+
from typing import cast, TYPE_CHECKING
|
|
9
|
+
from typing_extensions import override
|
|
10
|
+
from hpcflow.sdk.typing import hydrate
|
|
11
|
+
from hpcflow.sdk.core.errors import (
|
|
12
|
+
IncompatibleSGEPEError,
|
|
13
|
+
NoCompatibleSGEPEError,
|
|
14
|
+
UnknownSGEPEError,
|
|
15
|
+
)
|
|
16
|
+
from hpcflow.sdk.log import TimeIt
|
|
17
|
+
from hpcflow.sdk.submission.enums import JobscriptElementState
|
|
18
|
+
from hpcflow.sdk.submission.schedulers import QueuedScheduler
|
|
19
|
+
from hpcflow.sdk.submission.schedulers.utils import run_cmd
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from collections.abc import Iterator, Mapping
|
|
23
|
+
from typing import Any, ClassVar
|
|
24
|
+
from ...config.types import SchedulerConfigDescriptor
|
|
25
|
+
from ...core.element import ElementResources
|
|
26
|
+
from ..jobscript import Jobscript
|
|
27
|
+
from ..types import VersionInfo
|
|
28
|
+
from ..shells.base import Shell
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@hydrate
|
|
32
|
+
class SGEPosix(QueuedScheduler):
|
|
33
|
+
"""
|
|
34
|
+
A scheduler that uses SGE.
|
|
35
|
+
|
|
36
|
+
Keyword Args
|
|
37
|
+
------------
|
|
38
|
+
cwd_switch: str
|
|
39
|
+
Override of default switch to use to set the current working directory.
|
|
40
|
+
directives: dict
|
|
41
|
+
Scheduler directives. Each item is written verbatim in the jobscript as a
|
|
42
|
+
scheduler directive, and is not processed in any way. If a value is `None`, the
|
|
43
|
+
key is considered a flag-like directive. If a value is a list, multiple directives
|
|
44
|
+
will be printed to the jobscript with the same key, but different values.
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
- runs in serial by default
|
|
49
|
+
|
|
50
|
+
References
|
|
51
|
+
----------
|
|
52
|
+
[1] https://gridscheduler.sourceforge.net/htmlman/htmlman1/qsub.html
|
|
53
|
+
[2] https://softpanorama.org/HPC/Grid_engine/Queues/queue_states.shtml
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
#: Default submission command.
|
|
58
|
+
DEFAULT_SUBMIT_CMD: ClassVar[str] = "qsub"
|
|
59
|
+
#: Default command to show the queue state.
|
|
60
|
+
DEFAULT_SHOW_CMD: ClassVar[Sequence[str]] = ("qstat",)
|
|
61
|
+
#: Default cancel command.
|
|
62
|
+
DEFAULT_DEL_CMD: ClassVar[str] = "qdel"
|
|
63
|
+
#: Default job control directive prefix.
|
|
64
|
+
DEFAULT_JS_CMD: ClassVar[str] = "#$"
|
|
65
|
+
#: Default prefix to enable array processing.
|
|
66
|
+
DEFAULT_ARRAY_SWITCH: ClassVar[str] = "-t"
|
|
67
|
+
#: Default shell variable with array ID.
|
|
68
|
+
DEFAULT_ARRAY_ITEM_VAR: ClassVar[str] = "SGE_TASK_ID"
|
|
69
|
+
#: Default switch to control CWD.
|
|
70
|
+
DEFAULT_CWD_SWITCH: ClassVar[str] = "-cwd"
|
|
71
|
+
#: Default command to get the login nodes.
|
|
72
|
+
DEFAULT_LOGIN_NODES_CMD: ClassVar[Sequence[str]] = ("qconf", "-sh")
|
|
73
|
+
|
|
74
|
+
#: Maps scheduler state codes to :py:class:`JobscriptElementState` values.
|
|
75
|
+
state_lookup: ClassVar[Mapping[str, JobscriptElementState]] = {
|
|
76
|
+
"qw": JobscriptElementState.pending,
|
|
77
|
+
"hq": JobscriptElementState.waiting,
|
|
78
|
+
"hR": JobscriptElementState.waiting,
|
|
79
|
+
"r": JobscriptElementState.running,
|
|
80
|
+
"t": JobscriptElementState.running,
|
|
81
|
+
"Rr": JobscriptElementState.running,
|
|
82
|
+
# "Rt": JobscriptElementState.running,
|
|
83
|
+
"s": JobscriptElementState.errored,
|
|
84
|
+
"ts": JobscriptElementState.errored,
|
|
85
|
+
"S": JobscriptElementState.errored,
|
|
86
|
+
"tS": JobscriptElementState.errored,
|
|
87
|
+
"T": JobscriptElementState.errored,
|
|
88
|
+
"tT": JobscriptElementState.errored,
|
|
89
|
+
"Rs": JobscriptElementState.errored,
|
|
90
|
+
"Rt": JobscriptElementState.errored,
|
|
91
|
+
"RS": JobscriptElementState.errored,
|
|
92
|
+
"RT": JobscriptElementState.errored,
|
|
93
|
+
"Eq": JobscriptElementState.errored,
|
|
94
|
+
"Eh": JobscriptElementState.errored,
|
|
95
|
+
"dr": JobscriptElementState.cancelled,
|
|
96
|
+
"dt": JobscriptElementState.cancelled,
|
|
97
|
+
"dR": JobscriptElementState.cancelled,
|
|
98
|
+
"ds": JobscriptElementState.cancelled,
|
|
99
|
+
"dS": JobscriptElementState.cancelled,
|
|
100
|
+
"dT": JobscriptElementState.cancelled,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
def __init__(self, cwd_switch: str | None = None, *args, **kwargs):
|
|
104
|
+
super().__init__(*args, **kwargs)
|
|
105
|
+
self.cwd_switch = cwd_switch or self.DEFAULT_CWD_SWITCH
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
@override
|
|
109
|
+
@TimeIt.decorator
|
|
110
|
+
def process_resources(
|
|
111
|
+
cls, resources: ElementResources, scheduler_config: SchedulerConfigDescriptor
|
|
112
|
+
) -> None:
|
|
113
|
+
"""
|
|
114
|
+
Perform scheduler-specific processing to the element resources.
|
|
115
|
+
|
|
116
|
+
Note
|
|
117
|
+
----
|
|
118
|
+
This mutates `resources`.
|
|
119
|
+
"""
|
|
120
|
+
if resources.num_nodes is not None:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"Specifying `num_nodes` for the {cls.__name__!r} scheduler is not "
|
|
123
|
+
f"supported."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
para_envs = scheduler_config.get("parallel_environments", {})
|
|
127
|
+
|
|
128
|
+
if resources.SGE_parallel_env is not None:
|
|
129
|
+
# check user-specified `parallel_env` is valid and compatible with
|
|
130
|
+
# `num_cores`:
|
|
131
|
+
if resources.num_cores and resources.num_cores == 1:
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"An SGE parallel environment should not be specified if `num_cores` "
|
|
134
|
+
f"is 1 (`SGE_parallel_env` was specified as "
|
|
135
|
+
f"{resources.SGE_parallel_env!r})."
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
env = para_envs[resources.SGE_parallel_env]
|
|
140
|
+
except KeyError:
|
|
141
|
+
raise UnknownSGEPEError(resources.SGE_parallel_env, para_envs)
|
|
142
|
+
if not cls.is_num_cores_supported(resources.num_cores, env["num_cores"]):
|
|
143
|
+
raise IncompatibleSGEPEError(
|
|
144
|
+
resources.SGE_parallel_env, resources.num_cores
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
# find the first compatible PE:
|
|
148
|
+
for pe_name, pe_info in para_envs.items():
|
|
149
|
+
if cls.is_num_cores_supported(resources.num_cores, pe_info["num_cores"]):
|
|
150
|
+
resources.SGE_parallel_env = pe_name
|
|
151
|
+
break
|
|
152
|
+
else:
|
|
153
|
+
raise NoCompatibleSGEPEError(resources.num_cores)
|
|
154
|
+
|
|
155
|
+
def get_login_nodes(self) -> list[str]:
|
|
156
|
+
"""Return a list of hostnames of login/administrative nodes as reported by the
|
|
157
|
+
scheduler."""
|
|
158
|
+
get_login = self.login_nodes_cmd
|
|
159
|
+
assert get_login is not None and len(get_login) >= 1
|
|
160
|
+
stdout, stderr = run_cmd(get_login)
|
|
161
|
+
if stderr:
|
|
162
|
+
print(stderr)
|
|
163
|
+
return stdout.strip().split("\n")
|
|
164
|
+
|
|
165
|
+
def __format_core_request_lines(self, resources: ElementResources) -> Iterator[str]:
|
|
166
|
+
if resources.num_cores and resources.num_cores > 1:
|
|
167
|
+
yield f"{self.js_cmd} -pe {resources.SGE_parallel_env} {resources.num_cores}"
|
|
168
|
+
if resources.max_array_items:
|
|
169
|
+
yield f"{self.js_cmd} -tc {resources.max_array_items}"
|
|
170
|
+
|
|
171
|
+
def __format_array_request(self, num_elements: int) -> str:
|
|
172
|
+
return f"{self.js_cmd} {self.array_switch} 1-{num_elements}"
|
|
173
|
+
|
|
174
|
+
def get_stdout_filename(
|
|
175
|
+
self, js_idx: int, job_ID: str, array_idx: int | None = None
|
|
176
|
+
) -> str:
|
|
177
|
+
"""File name of the standard output stream file."""
|
|
178
|
+
# TODO: untested, might not work!
|
|
179
|
+
array_idx_str = f".{array_idx}" if array_idx is not None else ""
|
|
180
|
+
return f"js_{js_idx}.sh.o{job_ID}{array_idx_str}"
|
|
181
|
+
|
|
182
|
+
def get_stderr_filename(
|
|
183
|
+
self, js_idx: int, job_ID: str, array_idx: int | None = None
|
|
184
|
+
) -> str:
|
|
185
|
+
"""File name of the standard error stream file."""
|
|
186
|
+
# TODO: untested, might not work!
|
|
187
|
+
array_idx_str = f".{array_idx}" if array_idx is not None else ""
|
|
188
|
+
return f"js_{js_idx}.sh.e{job_ID}{array_idx_str}"
|
|
189
|
+
|
|
190
|
+
def __format_std_stream_file_option_lines(
|
|
191
|
+
self, is_array: bool, sub_idx: int, js_idx: int, combine_std: bool
|
|
192
|
+
) -> Iterator[str]:
|
|
193
|
+
# note: if we modify the file names, there is, I believe, no way to include the
|
|
194
|
+
# job ID; so we don't modify the file names:
|
|
195
|
+
base = f"./artifacts/submissions/{sub_idx}/js_std/{js_idx}"
|
|
196
|
+
yield f"{self.js_cmd} -o {base}"
|
|
197
|
+
if combine_std:
|
|
198
|
+
yield f"{self.js_cmd} -j y" # redirect stderr to stdout
|
|
199
|
+
else:
|
|
200
|
+
yield f"{self.js_cmd} -e {base}"
|
|
201
|
+
|
|
202
|
+
@override
|
|
203
|
+
def format_directives(
|
|
204
|
+
self,
|
|
205
|
+
resources: ElementResources,
|
|
206
|
+
num_elements: int,
|
|
207
|
+
is_array: bool,
|
|
208
|
+
sub_idx: int,
|
|
209
|
+
js_idx: int,
|
|
210
|
+
) -> str:
|
|
211
|
+
"""
|
|
212
|
+
Format the directives to the jobscript command.
|
|
213
|
+
"""
|
|
214
|
+
opts: list[str] = []
|
|
215
|
+
opts.append(self.format_switch(self.cwd_switch))
|
|
216
|
+
opts.extend(self.__format_core_request_lines(resources))
|
|
217
|
+
if is_array:
|
|
218
|
+
opts.append(self.__format_array_request(num_elements))
|
|
219
|
+
|
|
220
|
+
opts.extend(
|
|
221
|
+
self.__format_std_stream_file_option_lines(
|
|
222
|
+
is_array, sub_idx, js_idx, resources.combine_jobscript_std
|
|
223
|
+
)
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
for opt_k, opt_v in self.directives.items():
|
|
227
|
+
if opt_v is None:
|
|
228
|
+
opts.append(f"{self.js_cmd} {opt_k}")
|
|
229
|
+
elif isinstance(opt_v, list):
|
|
230
|
+
opts.extend(f"{self.js_cmd} {opt_k} {i}" for i in opt_v)
|
|
231
|
+
elif opt_v:
|
|
232
|
+
opts.append(f"{self.js_cmd} {opt_k} {opt_v}")
|
|
233
|
+
|
|
234
|
+
return "\n".join(opts) + "\n"
|
|
235
|
+
|
|
236
|
+
@override
|
|
237
|
+
@TimeIt.decorator
|
|
238
|
+
def get_version_info(self) -> VersionInfo:
|
|
239
|
+
stdout, stderr = run_cmd([*self.show_cmd, "-help"])
|
|
240
|
+
if stderr:
|
|
241
|
+
print(stderr)
|
|
242
|
+
first_line, *_ = stdout.split("\n")
|
|
243
|
+
name, version, *_ = first_line.strip().split()
|
|
244
|
+
return {
|
|
245
|
+
"scheduler_name": name,
|
|
246
|
+
"scheduler_version": version,
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
@override
|
|
250
|
+
def get_submit_command(
|
|
251
|
+
self,
|
|
252
|
+
shell: Shell,
|
|
253
|
+
js_path: str,
|
|
254
|
+
deps: dict[Any, tuple[Any, ...]],
|
|
255
|
+
) -> list[str]:
|
|
256
|
+
"""
|
|
257
|
+
Get the command to use to submit a job to the scheduler.
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
List of argument words.
|
|
262
|
+
"""
|
|
263
|
+
cmd = [self.submit_cmd, "-terse"]
|
|
264
|
+
|
|
265
|
+
dep_job_IDs: list[str] = []
|
|
266
|
+
dep_job_IDs_arr: list[str] = []
|
|
267
|
+
for job_ID, is_array_dep in deps.values():
|
|
268
|
+
if is_array_dep: # array dependency
|
|
269
|
+
dep_job_IDs_arr.append(str(job_ID))
|
|
270
|
+
else:
|
|
271
|
+
dep_job_IDs.append(str(job_ID))
|
|
272
|
+
|
|
273
|
+
if dep_job_IDs:
|
|
274
|
+
cmd.append("-hold_jid")
|
|
275
|
+
cmd.append(",".join(dep_job_IDs))
|
|
276
|
+
|
|
277
|
+
if dep_job_IDs_arr:
|
|
278
|
+
cmd.append("-hold_jid_ad")
|
|
279
|
+
cmd.append(",".join(dep_job_IDs_arr))
|
|
280
|
+
|
|
281
|
+
cmd.append(js_path)
|
|
282
|
+
return cmd
|
|
283
|
+
|
|
284
|
+
__SGE_JOB_ID_RE: ClassVar[re.Pattern] = re.compile(r"^\d+")
|
|
285
|
+
|
|
286
|
+
def parse_submission_output(self, stdout: str) -> str:
|
|
287
|
+
"""Extract scheduler reference for a newly submitted jobscript"""
|
|
288
|
+
if not (match := self.__SGE_JOB_ID_RE.search(stdout)):
|
|
289
|
+
raise RuntimeError(f"Could not parse Job ID from scheduler output {stdout!r}")
|
|
290
|
+
return match.group()
|
|
291
|
+
|
|
292
|
+
def get_job_statuses(
|
|
293
|
+
self,
|
|
294
|
+
) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
|
|
295
|
+
"""Get information about all of this user's jobscripts that are currently listed
|
|
296
|
+
by the scheduler."""
|
|
297
|
+
cmd = [*self.show_cmd, "-u", "$USER", "-g", "d"] # "-g d": separate arrays items
|
|
298
|
+
stdout, stderr = run_cmd(cmd, logger=self._app.submission_logger)
|
|
299
|
+
if stderr:
|
|
300
|
+
raise ValueError(
|
|
301
|
+
f"Could not get query SGE jobs. Command was: {cmd!r}; stderr was: "
|
|
302
|
+
f"{stderr}"
|
|
303
|
+
)
|
|
304
|
+
elif not stdout:
|
|
305
|
+
return {}
|
|
306
|
+
|
|
307
|
+
info: dict[str, dict[int, JobscriptElementState] | JobscriptElementState] = {}
|
|
308
|
+
lines = stdout.split("\n")
|
|
309
|
+
# assuming a job name with spaces means we can't split on spaces to get
|
|
310
|
+
# anywhere beyond the job name, so get the column index of the state heading
|
|
311
|
+
# and assume the state is always left-aligned with the heading:
|
|
312
|
+
state_idx = lines[0].index("state")
|
|
313
|
+
task_id_idx = lines[0].index("ja-task-ID")
|
|
314
|
+
for ln in lines[2:]:
|
|
315
|
+
if not ln:
|
|
316
|
+
continue
|
|
317
|
+
base_job_ID, *_ = ln.split()
|
|
318
|
+
|
|
319
|
+
# states can be one or two chars (for our limited purposes):
|
|
320
|
+
state_str = ln[state_idx : state_idx + 2].strip()
|
|
321
|
+
state = self.state_lookup[state_str]
|
|
322
|
+
|
|
323
|
+
arr_idx_s = ln[task_id_idx:].strip()
|
|
324
|
+
arr_idx = (
|
|
325
|
+
int(arr_idx_s) - 1 # We are using zero-indexed info
|
|
326
|
+
if arr_idx_s
|
|
327
|
+
else None
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
if arr_idx is not None:
|
|
331
|
+
entry = cast(
|
|
332
|
+
dict[int, JobscriptElementState], info.setdefault(base_job_ID, {})
|
|
333
|
+
)
|
|
334
|
+
entry[arr_idx] = state
|
|
335
|
+
else:
|
|
336
|
+
info[base_job_ID] = state
|
|
337
|
+
return info
|
|
338
|
+
|
|
339
|
+
@override
|
|
340
|
+
def get_job_state_info(
|
|
341
|
+
self, *, js_refs: Sequence[str] | None = None
|
|
342
|
+
) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
|
|
343
|
+
"""Query the scheduler to get the states of all of this user's jobs, optionally
|
|
344
|
+
filtering by specified job IDs.
|
|
345
|
+
|
|
346
|
+
Jobs that are not in the scheduler's status output will not appear in the output
|
|
347
|
+
of this method.
|
|
348
|
+
|
|
349
|
+
"""
|
|
350
|
+
info = self.get_job_statuses()
|
|
351
|
+
if js_refs:
|
|
352
|
+
return {k: v for k, v in info.items() if k in js_refs}
|
|
353
|
+
return info
|
|
354
|
+
|
|
355
|
+
@override
|
|
356
|
+
def cancel_jobs(
|
|
357
|
+
self,
|
|
358
|
+
js_refs: list[str],
|
|
359
|
+
jobscripts: list[Jobscript] | None = None,
|
|
360
|
+
):
|
|
361
|
+
"""
|
|
362
|
+
Cancel submitted jobs.
|
|
363
|
+
"""
|
|
364
|
+
cmd = [self.del_cmd] + js_refs
|
|
365
|
+
self._app.submission_logger.info(
|
|
366
|
+
f"cancelling {self.__class__.__name__} jobscripts with command: {cmd}."
|
|
367
|
+
)
|
|
368
|
+
stdout, stderr = run_cmd(cmd, logger=self._app.submission_logger)
|
|
369
|
+
if stderr:
|
|
370
|
+
raise ValueError(
|
|
371
|
+
f"Could not get query SGE {self.__class__.__name__}. Command was: "
|
|
372
|
+
f"{cmd!r}; stderr was: {stderr}"
|
|
373
|
+
)
|
|
374
|
+
self._app.submission_logger.info(
|
|
375
|
+
f"jobscripts cancel command executed; stdout was: {stdout}."
|
|
376
|
+
)
|