hpcflow 0.1.15__py3-none-any.whl → 0.2.0a271__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hpcflow/__init__.py +2 -11
- hpcflow/__pyinstaller/__init__.py +5 -0
- hpcflow/__pyinstaller/hook-hpcflow.py +40 -0
- hpcflow/_version.py +1 -1
- hpcflow/app.py +43 -0
- hpcflow/cli.py +2 -461
- hpcflow/data/demo_data_manifest/__init__.py +3 -0
- hpcflow/data/demo_data_manifest/demo_data_manifest.json +6 -0
- hpcflow/data/jinja_templates/test/test_template.txt +8 -0
- hpcflow/data/programs/hello_world/README.md +1 -0
- hpcflow/data/programs/hello_world/hello_world.c +87 -0
- hpcflow/data/programs/hello_world/linux/hello_world +0 -0
- hpcflow/data/programs/hello_world/macos/hello_world +0 -0
- hpcflow/data/programs/hello_world/win/hello_world.exe +0 -0
- hpcflow/data/scripts/__init__.py +1 -0
- hpcflow/data/scripts/bad_script.py +2 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_1.py +8 -0
- hpcflow/data/scripts/demo_task_1_generate_t1_infile_2.py +8 -0
- hpcflow/data/scripts/demo_task_1_parse_p3.py +7 -0
- hpcflow/data/scripts/do_nothing.py +2 -0
- hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
- hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
- hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
- hpcflow/data/scripts/generate_t1_file_01.py +7 -0
- hpcflow/data/scripts/import_future_script.py +7 -0
- hpcflow/data/scripts/input_file_generator_basic.py +3 -0
- hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
- hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_all_iters_test.py +15 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_env_spec.py +7 -0
- hpcflow/data/scripts/main_script_test_direct_in_direct_out_labels.py +8 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
- hpcflow/data/scripts/main_script_test_direct_sub_param_in_direct_out.py +6 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_in_obj_group.py +12 -0
- hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +11 -0
- hpcflow/data/scripts/main_script_test_json_and_direct_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_and_direct_out.py +17 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out.py +14 -0
- hpcflow/data/scripts/main_script_test_json_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_json_in_obj.py +12 -0
- hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
- hpcflow/data/scripts/main_script_test_json_out_obj.py +10 -0
- hpcflow/data/scripts/main_script_test_json_sub_param_in_json_out_labels.py +16 -0
- hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
- hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
- hpcflow/data/scripts/output_file_parser_basic.py +3 -0
- hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
- hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
- hpcflow/data/scripts/parse_t1_file_01.py +4 -0
- hpcflow/data/scripts/script_exit_test.py +5 -0
- hpcflow/data/template_components/__init__.py +1 -0
- hpcflow/data/template_components/command_files.yaml +26 -0
- hpcflow/data/template_components/environments.yaml +13 -0
- hpcflow/data/template_components/parameters.yaml +14 -0
- hpcflow/data/template_components/task_schemas.yaml +139 -0
- hpcflow/data/workflows/workflow_1.yaml +5 -0
- hpcflow/examples.ipynb +1037 -0
- hpcflow/sdk/__init__.py +149 -0
- hpcflow/sdk/app.py +4266 -0
- hpcflow/sdk/cli.py +1479 -0
- hpcflow/sdk/cli_common.py +385 -0
- hpcflow/sdk/config/__init__.py +5 -0
- hpcflow/sdk/config/callbacks.py +246 -0
- hpcflow/sdk/config/cli.py +388 -0
- hpcflow/sdk/config/config.py +1410 -0
- hpcflow/sdk/config/config_file.py +501 -0
- hpcflow/sdk/config/errors.py +272 -0
- hpcflow/sdk/config/types.py +150 -0
- hpcflow/sdk/core/__init__.py +38 -0
- hpcflow/sdk/core/actions.py +3857 -0
- hpcflow/sdk/core/app_aware.py +25 -0
- hpcflow/sdk/core/cache.py +224 -0
- hpcflow/sdk/core/command_files.py +814 -0
- hpcflow/sdk/core/commands.py +424 -0
- hpcflow/sdk/core/element.py +2071 -0
- hpcflow/sdk/core/enums.py +221 -0
- hpcflow/sdk/core/environment.py +256 -0
- hpcflow/sdk/core/errors.py +1043 -0
- hpcflow/sdk/core/execute.py +207 -0
- hpcflow/sdk/core/json_like.py +809 -0
- hpcflow/sdk/core/loop.py +1320 -0
- hpcflow/sdk/core/loop_cache.py +282 -0
- hpcflow/sdk/core/object_list.py +933 -0
- hpcflow/sdk/core/parameters.py +3371 -0
- hpcflow/sdk/core/rule.py +196 -0
- hpcflow/sdk/core/run_dir_files.py +57 -0
- hpcflow/sdk/core/skip_reason.py +7 -0
- hpcflow/sdk/core/task.py +3792 -0
- hpcflow/sdk/core/task_schema.py +993 -0
- hpcflow/sdk/core/test_utils.py +538 -0
- hpcflow/sdk/core/types.py +447 -0
- hpcflow/sdk/core/utils.py +1207 -0
- hpcflow/sdk/core/validation.py +87 -0
- hpcflow/sdk/core/values.py +477 -0
- hpcflow/sdk/core/workflow.py +4820 -0
- hpcflow/sdk/core/zarr_io.py +206 -0
- hpcflow/sdk/data/__init__.py +13 -0
- hpcflow/sdk/data/config_file_schema.yaml +34 -0
- hpcflow/sdk/data/config_schema.yaml +260 -0
- hpcflow/sdk/data/environments_spec_schema.yaml +21 -0
- hpcflow/sdk/data/files_spec_schema.yaml +5 -0
- hpcflow/sdk/data/parameters_spec_schema.yaml +7 -0
- hpcflow/sdk/data/task_schema_spec_schema.yaml +3 -0
- hpcflow/sdk/data/workflow_spec_schema.yaml +22 -0
- hpcflow/sdk/demo/__init__.py +3 -0
- hpcflow/sdk/demo/cli.py +242 -0
- hpcflow/sdk/helper/__init__.py +3 -0
- hpcflow/sdk/helper/cli.py +137 -0
- hpcflow/sdk/helper/helper.py +300 -0
- hpcflow/sdk/helper/watcher.py +192 -0
- hpcflow/sdk/log.py +288 -0
- hpcflow/sdk/persistence/__init__.py +18 -0
- hpcflow/sdk/persistence/base.py +2817 -0
- hpcflow/sdk/persistence/defaults.py +6 -0
- hpcflow/sdk/persistence/discovery.py +39 -0
- hpcflow/sdk/persistence/json.py +954 -0
- hpcflow/sdk/persistence/pending.py +948 -0
- hpcflow/sdk/persistence/store_resource.py +203 -0
- hpcflow/sdk/persistence/types.py +309 -0
- hpcflow/sdk/persistence/utils.py +73 -0
- hpcflow/sdk/persistence/zarr.py +2388 -0
- hpcflow/sdk/runtime.py +320 -0
- hpcflow/sdk/submission/__init__.py +3 -0
- hpcflow/sdk/submission/enums.py +70 -0
- hpcflow/sdk/submission/jobscript.py +2379 -0
- hpcflow/sdk/submission/schedulers/__init__.py +281 -0
- hpcflow/sdk/submission/schedulers/direct.py +233 -0
- hpcflow/sdk/submission/schedulers/sge.py +376 -0
- hpcflow/sdk/submission/schedulers/slurm.py +598 -0
- hpcflow/sdk/submission/schedulers/utils.py +25 -0
- hpcflow/sdk/submission/shells/__init__.py +52 -0
- hpcflow/sdk/submission/shells/base.py +229 -0
- hpcflow/sdk/submission/shells/bash.py +504 -0
- hpcflow/sdk/submission/shells/os_version.py +115 -0
- hpcflow/sdk/submission/shells/powershell.py +352 -0
- hpcflow/sdk/submission/submission.py +1402 -0
- hpcflow/sdk/submission/types.py +140 -0
- hpcflow/sdk/typing.py +194 -0
- hpcflow/sdk/utils/arrays.py +69 -0
- hpcflow/sdk/utils/deferred_file.py +55 -0
- hpcflow/sdk/utils/hashing.py +16 -0
- hpcflow/sdk/utils/patches.py +31 -0
- hpcflow/sdk/utils/strings.py +69 -0
- hpcflow/tests/api/test_api.py +32 -0
- hpcflow/tests/conftest.py +123 -0
- hpcflow/tests/data/__init__.py +0 -0
- hpcflow/tests/data/benchmark_N_elements.yaml +6 -0
- hpcflow/tests/data/benchmark_script_runner.yaml +26 -0
- hpcflow/tests/data/multi_path_sequences.yaml +29 -0
- hpcflow/tests/data/workflow_1.json +10 -0
- hpcflow/tests/data/workflow_1.yaml +5 -0
- hpcflow/tests/data/workflow_1_slurm.yaml +8 -0
- hpcflow/tests/data/workflow_1_wsl.yaml +8 -0
- hpcflow/tests/data/workflow_test_run_abort.yaml +42 -0
- hpcflow/tests/jinja_templates/test_jinja_templates.py +161 -0
- hpcflow/tests/programs/test_programs.py +180 -0
- hpcflow/tests/schedulers/direct_linux/test_direct_linux_submission.py +12 -0
- hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
- hpcflow/tests/schedulers/slurm/test_slurm_submission.py +14 -0
- hpcflow/tests/scripts/test_input_file_generators.py +282 -0
- hpcflow/tests/scripts/test_main_scripts.py +1361 -0
- hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
- hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
- hpcflow/tests/shells/wsl/test_wsl_submission.py +14 -0
- hpcflow/tests/unit/test_action.py +1066 -0
- hpcflow/tests/unit/test_action_rule.py +24 -0
- hpcflow/tests/unit/test_app.py +132 -0
- hpcflow/tests/unit/test_cache.py +46 -0
- hpcflow/tests/unit/test_cli.py +172 -0
- hpcflow/tests/unit/test_command.py +377 -0
- hpcflow/tests/unit/test_config.py +195 -0
- hpcflow/tests/unit/test_config_file.py +162 -0
- hpcflow/tests/unit/test_element.py +666 -0
- hpcflow/tests/unit/test_element_iteration.py +88 -0
- hpcflow/tests/unit/test_element_set.py +158 -0
- hpcflow/tests/unit/test_group.py +115 -0
- hpcflow/tests/unit/test_input_source.py +1479 -0
- hpcflow/tests/unit/test_input_value.py +398 -0
- hpcflow/tests/unit/test_jobscript_unit.py +757 -0
- hpcflow/tests/unit/test_json_like.py +1247 -0
- hpcflow/tests/unit/test_loop.py +2674 -0
- hpcflow/tests/unit/test_meta_task.py +325 -0
- hpcflow/tests/unit/test_multi_path_sequences.py +259 -0
- hpcflow/tests/unit/test_object_list.py +116 -0
- hpcflow/tests/unit/test_parameter.py +243 -0
- hpcflow/tests/unit/test_persistence.py +664 -0
- hpcflow/tests/unit/test_resources.py +243 -0
- hpcflow/tests/unit/test_run.py +286 -0
- hpcflow/tests/unit/test_run_directories.py +29 -0
- hpcflow/tests/unit/test_runtime.py +9 -0
- hpcflow/tests/unit/test_schema_input.py +372 -0
- hpcflow/tests/unit/test_shell.py +129 -0
- hpcflow/tests/unit/test_slurm.py +39 -0
- hpcflow/tests/unit/test_submission.py +502 -0
- hpcflow/tests/unit/test_task.py +2560 -0
- hpcflow/tests/unit/test_task_schema.py +182 -0
- hpcflow/tests/unit/test_utils.py +616 -0
- hpcflow/tests/unit/test_value_sequence.py +549 -0
- hpcflow/tests/unit/test_values.py +91 -0
- hpcflow/tests/unit/test_workflow.py +827 -0
- hpcflow/tests/unit/test_workflow_template.py +186 -0
- hpcflow/tests/unit/utils/test_arrays.py +40 -0
- hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
- hpcflow/tests/unit/utils/test_hashing.py +65 -0
- hpcflow/tests/unit/utils/test_patches.py +5 -0
- hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
- hpcflow/tests/unit/utils/test_strings.py +97 -0
- hpcflow/tests/workflows/__init__.py +0 -0
- hpcflow/tests/workflows/test_directory_structure.py +31 -0
- hpcflow/tests/workflows/test_jobscript.py +355 -0
- hpcflow/tests/workflows/test_run_status.py +198 -0
- hpcflow/tests/workflows/test_skip_downstream.py +696 -0
- hpcflow/tests/workflows/test_submission.py +140 -0
- hpcflow/tests/workflows/test_workflows.py +564 -0
- hpcflow/tests/workflows/test_zip.py +18 -0
- hpcflow/viz_demo.ipynb +6794 -0
- hpcflow-0.2.0a271.dist-info/LICENSE +375 -0
- hpcflow-0.2.0a271.dist-info/METADATA +65 -0
- hpcflow-0.2.0a271.dist-info/RECORD +237 -0
- {hpcflow-0.1.15.dist-info → hpcflow-0.2.0a271.dist-info}/WHEEL +4 -5
- hpcflow-0.2.0a271.dist-info/entry_points.txt +6 -0
- hpcflow/api.py +0 -490
- hpcflow/archive/archive.py +0 -307
- hpcflow/archive/cloud/cloud.py +0 -45
- hpcflow/archive/cloud/errors.py +0 -9
- hpcflow/archive/cloud/providers/dropbox.py +0 -427
- hpcflow/archive/errors.py +0 -5
- hpcflow/base_db.py +0 -4
- hpcflow/config.py +0 -233
- hpcflow/copytree.py +0 -66
- hpcflow/data/examples/_config.yml +0 -14
- hpcflow/data/examples/damask/demo/1.run.yml +0 -4
- hpcflow/data/examples/damask/demo/2.process.yml +0 -29
- hpcflow/data/examples/damask/demo/geom.geom +0 -2052
- hpcflow/data/examples/damask/demo/load.load +0 -1
- hpcflow/data/examples/damask/demo/material.config +0 -185
- hpcflow/data/examples/damask/inputs/geom.geom +0 -2052
- hpcflow/data/examples/damask/inputs/load.load +0 -1
- hpcflow/data/examples/damask/inputs/material.config +0 -185
- hpcflow/data/examples/damask/profiles/_variable_lookup.yml +0 -21
- hpcflow/data/examples/damask/profiles/damask.yml +0 -4
- hpcflow/data/examples/damask/profiles/damask_process.yml +0 -8
- hpcflow/data/examples/damask/profiles/damask_run.yml +0 -5
- hpcflow/data/examples/damask/profiles/default.yml +0 -6
- hpcflow/data/examples/thinking.yml +0 -177
- hpcflow/errors.py +0 -2
- hpcflow/init_db.py +0 -37
- hpcflow/models.py +0 -2595
- hpcflow/nesting.py +0 -9
- hpcflow/profiles.py +0 -455
- hpcflow/project.py +0 -81
- hpcflow/scheduler.py +0 -322
- hpcflow/utils.py +0 -103
- hpcflow/validation.py +0 -166
- hpcflow/variables.py +0 -543
- hpcflow-0.1.15.dist-info/METADATA +0 -168
- hpcflow-0.1.15.dist-info/RECORD +0 -45
- hpcflow-0.1.15.dist-info/entry_points.txt +0 -8
- hpcflow-0.1.15.dist-info/top_level.txt +0 -1
- /hpcflow/{archive → data/jinja_templates}/__init__.py +0 -0
- /hpcflow/{archive/cloud → data/programs}/__init__.py +0 -0
- /hpcflow/{archive/cloud/providers → data/workflows}/__init__.py +0 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Job scheduler models.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
from typing import Generic, TypeVar, TYPE_CHECKING
|
|
10
|
+
import warnings
|
|
11
|
+
from typing_extensions import override
|
|
12
|
+
from hpcflow.sdk.typing import hydrate
|
|
13
|
+
from hpcflow.sdk.core.app_aware import AppAware
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Mapping, Sequence
|
|
17
|
+
from typing import Any, ClassVar
|
|
18
|
+
from ..shells import Shell
|
|
19
|
+
from ..jobscript import Jobscript
|
|
20
|
+
from ..enums import JobscriptElementState
|
|
21
|
+
from ..types import VersionInfo
|
|
22
|
+
from ...config.types import SchedulerConfigDescriptor
|
|
23
|
+
from ...core.element import ElementResources
|
|
24
|
+
|
|
25
|
+
#: The type of a jobscript reference.
|
|
26
|
+
JSRefType = TypeVar("JSRefType")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@hydrate
|
|
30
|
+
class Scheduler(ABC, Generic[JSRefType], AppAware):
|
|
31
|
+
"""
|
|
32
|
+
Abstract base class for schedulers.
|
|
33
|
+
|
|
34
|
+
Note
|
|
35
|
+
----
|
|
36
|
+
Do not make immediate subclasses of this class other than
|
|
37
|
+
:py:class:`DirectScheduler` and :py:class:`QueuedScheduler`;
|
|
38
|
+
subclass those two instead. Code (e.g., in :py:class:`Jobscript`)
|
|
39
|
+
assumes that this model is followed and does not check it.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
shebang_executable: list[str]
|
|
44
|
+
If specified, this will be used in the jobscript's shebang line instead of the
|
|
45
|
+
shell's `executable` and `executable_args` attributes.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
# This would be in the docstring except it renders really wrongly!
|
|
49
|
+
# Type Parameters
|
|
50
|
+
# ---------------
|
|
51
|
+
# T
|
|
52
|
+
# The type of a jobscript reference.
|
|
53
|
+
|
|
54
|
+
def __init__(self, shebang_executable: list[str] | None = None):
|
|
55
|
+
self.shebang_executable = shebang_executable
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def unique_properties(self) -> tuple[str, ...]:
|
|
59
|
+
"""
|
|
60
|
+
Unique properties, for hashing.
|
|
61
|
+
"""
|
|
62
|
+
return (self.__class__.__name__,)
|
|
63
|
+
|
|
64
|
+
def __eq__(self, other: Any) -> bool:
|
|
65
|
+
if not isinstance(other, self.__class__):
|
|
66
|
+
return False
|
|
67
|
+
return self.__dict__ == other.__dict__
|
|
68
|
+
|
|
69
|
+
@abstractmethod
|
|
70
|
+
def process_resources(
|
|
71
|
+
self, resources: ElementResources, scheduler_config: SchedulerConfigDescriptor
|
|
72
|
+
) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Perform scheduler-specific processing to the element resources.
|
|
75
|
+
|
|
76
|
+
Note
|
|
77
|
+
----
|
|
78
|
+
This mutates `resources`.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def get_version_info(self) -> VersionInfo:
|
|
82
|
+
"""
|
|
83
|
+
Get the version of the scheduler.
|
|
84
|
+
"""
|
|
85
|
+
return {}
|
|
86
|
+
|
|
87
|
+
def parse_submission_output(self, stdout: str) -> str | None:
|
|
88
|
+
"""
|
|
89
|
+
Parse the output from a submission to determine the submission ID.
|
|
90
|
+
"""
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def is_num_cores_supported(num_cores: int | None, core_range: Sequence[int]) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Test whether particular number of cores is supported in given range of cores.
|
|
97
|
+
"""
|
|
98
|
+
step = core_range[1] if core_range[1] is not None else 1
|
|
99
|
+
upper = core_range[2] + 1 if core_range[2] is not None else sys.maxsize
|
|
100
|
+
return num_cores in range(core_range[0], upper, step)
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
def get_submit_command(
|
|
104
|
+
self,
|
|
105
|
+
shell: Shell,
|
|
106
|
+
js_path: str,
|
|
107
|
+
deps: dict[Any, tuple[Any, ...]],
|
|
108
|
+
) -> list[str]:
|
|
109
|
+
"""
|
|
110
|
+
Get a command for submitting a jobscript.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
@abstractmethod
|
|
114
|
+
def get_job_state_info(
|
|
115
|
+
self, *, js_refs: Sequence[JSRefType] | None = None
|
|
116
|
+
) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
|
|
117
|
+
"""
|
|
118
|
+
Get the state of one or more jobscripts.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
def wait_for_jobscripts(self, js_refs: list[JSRefType]) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Wait for one or more jobscripts to complete.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
@abstractmethod
|
|
128
|
+
def cancel_jobs(
|
|
129
|
+
self,
|
|
130
|
+
js_refs: list[JSRefType],
|
|
131
|
+
jobscripts: list[Jobscript] | None = None,
|
|
132
|
+
) -> None:
|
|
133
|
+
"""
|
|
134
|
+
Cancel one or more jobscripts.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
@abstractmethod
|
|
138
|
+
def get_std_out_err_filename(self, js_idx: int, *args, **kwargs) -> str:
|
|
139
|
+
"""File name of combined standard output and error streams."""
|
|
140
|
+
|
|
141
|
+
@abstractmethod
|
|
142
|
+
def get_stdout_filename(self, js_idx: int, *args, **kwargs) -> str:
|
|
143
|
+
"""File name of the standard output stream file."""
|
|
144
|
+
|
|
145
|
+
@abstractmethod
|
|
146
|
+
def get_stderr_filename(self, js_idx: int, *args, **kwargs) -> str:
|
|
147
|
+
"""File name of the standard error stream file."""
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@hydrate
|
|
151
|
+
class QueuedScheduler(Scheduler[str]):
|
|
152
|
+
"""
|
|
153
|
+
Base class for schedulers that use a job submission system.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
directives: dict
|
|
158
|
+
Scheduler directives. Each item is written verbatim in the jobscript as a
|
|
159
|
+
scheduler directive, and is not processed in any way. If a value is `None`, the
|
|
160
|
+
key is considered a flag-like directive. If a value is a list, multiple directives
|
|
161
|
+
will be printed to the jobscript with the same key, but different values.
|
|
162
|
+
options: dict
|
|
163
|
+
Deprecated. Please use `directives` instead.
|
|
164
|
+
submit_cmd: str
|
|
165
|
+
The submission command, if overridden from default.
|
|
166
|
+
show_cmd: str
|
|
167
|
+
The show command, if overridden from default.
|
|
168
|
+
del_cmd: str
|
|
169
|
+
The delete command, if overridden from default.
|
|
170
|
+
js_cmd: str
|
|
171
|
+
The job script command, if overridden from default.
|
|
172
|
+
login_nodes_cmd: list[str]
|
|
173
|
+
The login nodes command, if overridden from default.
|
|
174
|
+
array_switch: str
|
|
175
|
+
The switch to enable array jobs, if overridden from default.
|
|
176
|
+
array_item_var: str
|
|
177
|
+
The variable for array items, if overridden from default.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
#: Default command for logging into nodes.
|
|
181
|
+
DEFAULT_LOGIN_NODES_CMD: ClassVar[Sequence[str] | None] = None
|
|
182
|
+
#: Default pattern for matching the names of login nodes.
|
|
183
|
+
DEFAULT_LOGIN_NODE_MATCH: ClassVar[str] = "*login*"
|
|
184
|
+
#: Default command for submitting a job.
|
|
185
|
+
DEFAULT_SUBMIT_CMD: ClassVar[str]
|
|
186
|
+
#: Default command for listing current submitted jobs.
|
|
187
|
+
DEFAULT_SHOW_CMD: ClassVar[Sequence[str]]
|
|
188
|
+
#: Default command for deleting a job.
|
|
189
|
+
DEFAULT_DEL_CMD: ClassVar[str]
|
|
190
|
+
#: Default marker for job control metadata in a job script.
|
|
191
|
+
DEFAULT_JS_CMD: ClassVar[str]
|
|
192
|
+
#: Default switch for enabling array mode.
|
|
193
|
+
DEFAULT_ARRAY_SWITCH: ClassVar[str]
|
|
194
|
+
#: Default shell variable containin the current array index.
|
|
195
|
+
DEFAULT_ARRAY_ITEM_VAR: ClassVar[str]
|
|
196
|
+
|
|
197
|
+
def __init__(
|
|
198
|
+
self,
|
|
199
|
+
directives: dict | None = None,
|
|
200
|
+
options: dict | None = None,
|
|
201
|
+
submit_cmd: str | None = None,
|
|
202
|
+
show_cmd: Sequence[str] | None = None,
|
|
203
|
+
del_cmd: str | None = None,
|
|
204
|
+
js_cmd: str | None = None,
|
|
205
|
+
login_nodes_cmd: Sequence[str] | None = None,
|
|
206
|
+
array_switch: str | None = None,
|
|
207
|
+
array_item_var: str | None = None,
|
|
208
|
+
*args,
|
|
209
|
+
**kwargs,
|
|
210
|
+
) -> None:
|
|
211
|
+
super().__init__(*args, **kwargs)
|
|
212
|
+
|
|
213
|
+
if options:
|
|
214
|
+
warnings.warn(
|
|
215
|
+
f"{self.__class__.__name__!r}: Please use `directives` instead of "
|
|
216
|
+
f"`options`, which will be removed in a future release.",
|
|
217
|
+
DeprecationWarning,
|
|
218
|
+
stacklevel=2,
|
|
219
|
+
)
|
|
220
|
+
directives = options
|
|
221
|
+
|
|
222
|
+
self.directives = directives or {}
|
|
223
|
+
self.submit_cmd: str = submit_cmd or self.DEFAULT_SUBMIT_CMD
|
|
224
|
+
self.show_cmd = show_cmd or self.DEFAULT_SHOW_CMD
|
|
225
|
+
self.del_cmd = del_cmd or self.DEFAULT_DEL_CMD
|
|
226
|
+
self.js_cmd = js_cmd or self.DEFAULT_JS_CMD
|
|
227
|
+
self.login_nodes_cmd = login_nodes_cmd or self.DEFAULT_LOGIN_NODES_CMD
|
|
228
|
+
self.array_switch = array_switch or self.DEFAULT_ARRAY_SWITCH
|
|
229
|
+
self.array_item_var = array_item_var or self.DEFAULT_ARRAY_ITEM_VAR
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def unique_properties(self) -> tuple[str, str, Any, Any]:
|
|
233
|
+
return (self.__class__.__name__, self.submit_cmd, self.show_cmd, self.del_cmd)
|
|
234
|
+
|
|
235
|
+
def format_switch(self, switch: str) -> str:
|
|
236
|
+
"""
|
|
237
|
+
Format a particular switch to use the JS command.
|
|
238
|
+
"""
|
|
239
|
+
return f"{self.js_cmd} {switch}"
|
|
240
|
+
|
|
241
|
+
def is_jobscript_active(self, job_ID: str) -> bool:
|
|
242
|
+
"""Query if a jobscript is running/pending."""
|
|
243
|
+
return bool(self.get_job_state_info(js_refs=[job_ID]))
|
|
244
|
+
|
|
245
|
+
@override
|
|
246
|
+
def wait_for_jobscripts(self, js_refs: list[str]) -> None:
|
|
247
|
+
"""
|
|
248
|
+
Wait for jobscripts to update their state.
|
|
249
|
+
"""
|
|
250
|
+
while js_refs:
|
|
251
|
+
info: Mapping[str, Any] = self.get_job_state_info(js_refs=js_refs)
|
|
252
|
+
if not info:
|
|
253
|
+
break
|
|
254
|
+
js_refs = list(info)
|
|
255
|
+
time.sleep(2)
|
|
256
|
+
|
|
257
|
+
@abstractmethod
|
|
258
|
+
def format_directives(
|
|
259
|
+
self,
|
|
260
|
+
resources: ElementResources,
|
|
261
|
+
num_elements: int,
|
|
262
|
+
is_array: bool,
|
|
263
|
+
sub_idx: int,
|
|
264
|
+
js_idx: int,
|
|
265
|
+
) -> str:
|
|
266
|
+
"""
|
|
267
|
+
Render directives in a way that the scheduler can handle.
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
def get_std_out_err_filename(
|
|
271
|
+
self, js_idx: int, job_ID: str, array_idx: int | None = None
|
|
272
|
+
):
|
|
273
|
+
"""File name of combined standard output and error streams.
|
|
274
|
+
|
|
275
|
+
Notes
|
|
276
|
+
-----
|
|
277
|
+
We use the standard output stream filename format for the combined output and
|
|
278
|
+
error streams file.
|
|
279
|
+
|
|
280
|
+
"""
|
|
281
|
+
return self.get_stdout_filename(js_idx=js_idx, job_ID=job_ID, array_idx=array_idx)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A direct job "scheduler" that just runs immediate subprocesses.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import shutil
|
|
7
|
+
import signal
|
|
8
|
+
from typing import overload, cast, TYPE_CHECKING
|
|
9
|
+
from typing_extensions import override, TypeAlias
|
|
10
|
+
import psutil
|
|
11
|
+
|
|
12
|
+
from hpcflow.sdk.typing import hydrate
|
|
13
|
+
from hpcflow.sdk.submission.enums import JobscriptElementState
|
|
14
|
+
from hpcflow.sdk.submission.schedulers import Scheduler
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
18
|
+
from typing import Any, ClassVar
|
|
19
|
+
from ...config.types import SchedulerConfigDescriptor
|
|
20
|
+
from ..jobscript import Jobscript
|
|
21
|
+
from ..shells.base import Shell
|
|
22
|
+
|
|
23
|
+
DirectRef: TypeAlias = "tuple[int, list[str]]"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _is_process_cmdline_equal(proc: psutil.Process, cmdline: list[str]) -> bool:
|
|
27
|
+
"""Check if the `cmdline` of a psutil `Process` is equal to the specified
|
|
28
|
+
`cmdline`."""
|
|
29
|
+
try:
|
|
30
|
+
if proc.cmdline() == cmdline:
|
|
31
|
+
return True
|
|
32
|
+
else:
|
|
33
|
+
return False
|
|
34
|
+
except (psutil.NoSuchProcess, psutil.ZombieProcess):
|
|
35
|
+
# process no longer exists or, on unix, process has completed but still has a
|
|
36
|
+
# record
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DirectScheduler(Scheduler[DirectRef]):
|
|
41
|
+
"""
|
|
42
|
+
A direct scheduler, that just runs jobs immediately as direct subprocesses.
|
|
43
|
+
|
|
44
|
+
The correct subclass (:py:class:`DirectPosix` or :py:class:`DirectWindows`) should
|
|
45
|
+
be used to create actual instances.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
@override
|
|
51
|
+
def process_resources(
|
|
52
|
+
cls, resources, scheduler_config: SchedulerConfigDescriptor
|
|
53
|
+
) -> None:
|
|
54
|
+
"""Perform scheduler-specific processing to the element resources.
|
|
55
|
+
|
|
56
|
+
Note
|
|
57
|
+
----
|
|
58
|
+
This mutates `resources`.
|
|
59
|
+
"""
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
@override
|
|
63
|
+
def get_submit_command(
|
|
64
|
+
self,
|
|
65
|
+
shell: Shell,
|
|
66
|
+
js_path: str,
|
|
67
|
+
deps: dict[Any, tuple[Any, ...]],
|
|
68
|
+
) -> list[str]:
|
|
69
|
+
"""
|
|
70
|
+
Get the concrete submission command.
|
|
71
|
+
"""
|
|
72
|
+
return shell.get_direct_submit_command(js_path)
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def __kill_processes(
|
|
76
|
+
procs: list[psutil.Process],
|
|
77
|
+
sig: signal.Signals = signal.SIGTERM,
|
|
78
|
+
timeout: float | None = None,
|
|
79
|
+
on_terminate: Callable[[psutil.Process], object] | None = None,
|
|
80
|
+
):
|
|
81
|
+
all_procs: list[psutil.Process] = []
|
|
82
|
+
for process in procs:
|
|
83
|
+
all_procs.append(process)
|
|
84
|
+
all_procs.extend(process.children(recursive=True))
|
|
85
|
+
|
|
86
|
+
for process in all_procs:
|
|
87
|
+
try:
|
|
88
|
+
process.send_signal(sig)
|
|
89
|
+
except psutil.NoSuchProcess:
|
|
90
|
+
pass
|
|
91
|
+
_, alive = psutil.wait_procs(all_procs, timeout=timeout, callback=on_terminate)
|
|
92
|
+
for process in alive:
|
|
93
|
+
process.kill()
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def __get_jobscript_processes(js_refs: list[DirectRef]) -> list[psutil.Process]:
|
|
97
|
+
procs: list[psutil.Process] = []
|
|
98
|
+
for p_id, p_cmdline in js_refs:
|
|
99
|
+
try:
|
|
100
|
+
proc_i = psutil.Process(p_id)
|
|
101
|
+
except psutil.NoSuchProcess:
|
|
102
|
+
# process might have completed already
|
|
103
|
+
continue
|
|
104
|
+
if _is_process_cmdline_equal(proc_i, p_cmdline):
|
|
105
|
+
procs.append(proc_i)
|
|
106
|
+
return procs
|
|
107
|
+
|
|
108
|
+
@overload
|
|
109
|
+
@override
|
|
110
|
+
@classmethod
|
|
111
|
+
def wait_for_jobscripts(cls, js_refs: list[DirectRef]) -> None: ...
|
|
112
|
+
|
|
113
|
+
@overload
|
|
114
|
+
@classmethod
|
|
115
|
+
def wait_for_jobscripts(
|
|
116
|
+
cls,
|
|
117
|
+
js_refs: list[DirectRef],
|
|
118
|
+
*,
|
|
119
|
+
callback: Callable[[psutil.Process], None],
|
|
120
|
+
) -> list[psutil.Process]: ...
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def wait_for_jobscripts(
|
|
124
|
+
cls,
|
|
125
|
+
js_refs: list[DirectRef],
|
|
126
|
+
*,
|
|
127
|
+
callback: Callable[[psutil.Process], None] | None = None,
|
|
128
|
+
) -> list[psutil.Process] | None:
|
|
129
|
+
"""Wait until the specified jobscripts have completed."""
|
|
130
|
+
procs = cls.__get_jobscript_processes(js_refs)
|
|
131
|
+
(gone, alive) = psutil.wait_procs(procs, callback=callback)
|
|
132
|
+
assert not alive
|
|
133
|
+
return gone if callback else None
|
|
134
|
+
|
|
135
|
+
@override
|
|
136
|
+
def get_job_state_info(
|
|
137
|
+
self, *, js_refs: Sequence[DirectRef] | None = None
|
|
138
|
+
) -> Mapping[str, JobscriptElementState]:
|
|
139
|
+
"""Query the scheduler to get the states of all of this user's jobs, optionally
|
|
140
|
+
filtering by specified job IDs.
|
|
141
|
+
|
|
142
|
+
Jobs that are not in the scheduler's status output will not appear in the output
|
|
143
|
+
of this method."""
|
|
144
|
+
info: dict[str, JobscriptElementState] = {}
|
|
145
|
+
for p_id, p_cmdline in js_refs or ():
|
|
146
|
+
if self.is_jobscript_active(p_id, p_cmdline):
|
|
147
|
+
# as far as the "scheduler" is concerned, all elements are running:
|
|
148
|
+
info[str(p_id)] = JobscriptElementState.running
|
|
149
|
+
|
|
150
|
+
return info
|
|
151
|
+
|
|
152
|
+
@override
|
|
153
|
+
def cancel_jobs(
|
|
154
|
+
self,
|
|
155
|
+
js_refs: list[DirectRef],
|
|
156
|
+
jobscripts: list[Jobscript] | None = None,
|
|
157
|
+
):
|
|
158
|
+
"""
|
|
159
|
+
Cancel some jobs.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
js_proc_id: dict[int, Jobscript]
|
|
163
|
+
|
|
164
|
+
def callback(proc: psutil.Process):
|
|
165
|
+
try:
|
|
166
|
+
js_proc_id[proc.pid]
|
|
167
|
+
except KeyError:
|
|
168
|
+
# child process of one of the jobscripts
|
|
169
|
+
self._app.submission_logger.debug(
|
|
170
|
+
f"jobscript child process ({proc.pid}) killed"
|
|
171
|
+
)
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
procs = self.__get_jobscript_processes(js_refs)
|
|
175
|
+
self._app.submission_logger.info(
|
|
176
|
+
f"cancelling {self.__class__.__name__} jobscript processes: {procs}."
|
|
177
|
+
)
|
|
178
|
+
js_proc_id = {i.pid: jobscripts[idx] for idx, i in enumerate(procs) if jobscripts}
|
|
179
|
+
self.__kill_processes(procs, timeout=3, on_terminate=callback)
|
|
180
|
+
print(f"Cancelled {len(procs)} jobscript{'s' if len(procs) > 1 else ''}.")
|
|
181
|
+
self._app.submission_logger.info("jobscripts cancel command executed.")
|
|
182
|
+
|
|
183
|
+
def is_jobscript_active(self, process_ID: int, process_cmdline: list[str]):
|
|
184
|
+
"""Query if a jobscript is running.
|
|
185
|
+
|
|
186
|
+
Note that a "running" jobscript might be waiting on upstream jobscripts to
|
|
187
|
+
complete.
|
|
188
|
+
|
|
189
|
+
"""
|
|
190
|
+
try:
|
|
191
|
+
proc = psutil.Process(process_ID)
|
|
192
|
+
except psutil.NoSuchProcess:
|
|
193
|
+
return False
|
|
194
|
+
return _is_process_cmdline_equal(proc, process_cmdline)
|
|
195
|
+
|
|
196
|
+
def get_std_out_err_filename(self, js_idx: int, **kwargs) -> str:
|
|
197
|
+
"""File name of combined standard output and error streams."""
|
|
198
|
+
return f"js_{js_idx}_std.log"
|
|
199
|
+
|
|
200
|
+
def get_stdout_filename(self, js_idx: int, **kwargs) -> str:
|
|
201
|
+
"""File name of the standard output stream file."""
|
|
202
|
+
return f"js_{js_idx}_stdout.log"
|
|
203
|
+
|
|
204
|
+
def get_stderr_filename(self, js_idx: int, **kwargs) -> str:
|
|
205
|
+
"""File name of the standard error stream file."""
|
|
206
|
+
return f"js_{js_idx}_stderr.log"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@hydrate
|
|
210
|
+
class DirectPosix(DirectScheduler):
|
|
211
|
+
"""
|
|
212
|
+
A direct scheduler for POSIX systems.
|
|
213
|
+
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@hydrate
|
|
218
|
+
class DirectWindows(DirectScheduler):
|
|
219
|
+
"""
|
|
220
|
+
A direct scheduler for Windows.
|
|
221
|
+
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
@override
|
|
225
|
+
def get_submit_command(
|
|
226
|
+
self, shell: Shell, js_path: str, deps: dict[Any, tuple[Any, ...]]
|
|
227
|
+
) -> list[str]:
|
|
228
|
+
cmd = super().get_submit_command(shell, js_path, deps)
|
|
229
|
+
# `Start-Process` (see `Jobscript._launch_direct_js_win`) seems to resolve the
|
|
230
|
+
# executable, which means the process's `cmdline` might look different to what we
|
|
231
|
+
# record; so let's resolve it ourselves:
|
|
232
|
+
cmd[0] = cast("str", shutil.which(cmd[0]))
|
|
233
|
+
return cmd
|