PyPI - hpcflow-new2 - Versions diffs - 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl - Mend

hpcflow-new2 0.2.0a189py3-none-any.whl → 0.2.0a199py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
hpcflow/_version.py +1 -1
hpcflow/app.py +1 -0
hpcflow/data/scripts/bad_script.py +2 -0
hpcflow/data/scripts/do_nothing.py +2 -0
hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/input_file_generator_basic.py +3 -0
hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
hpcflow/data/scripts/output_file_parser_basic.py +3 -0
hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/script_exit_test.py +5 -0
hpcflow/data/template_components/environments.yaml +1 -1
hpcflow/sdk/__init__.py +26 -15
hpcflow/sdk/app.py +2192 -768
hpcflow/sdk/cli.py +506 -296
hpcflow/sdk/cli_common.py +105 -7
hpcflow/sdk/config/__init__.py +1 -1
hpcflow/sdk/config/callbacks.py +115 -43
hpcflow/sdk/config/cli.py +126 -103
hpcflow/sdk/config/config.py +674 -318
hpcflow/sdk/config/config_file.py +131 -95
hpcflow/sdk/config/errors.py +125 -84
hpcflow/sdk/config/types.py +148 -0
hpcflow/sdk/core/__init__.py +25 -1
hpcflow/sdk/core/actions.py +1771 -1059
hpcflow/sdk/core/app_aware.py +24 -0
hpcflow/sdk/core/cache.py +139 -79
hpcflow/sdk/core/command_files.py +263 -287
hpcflow/sdk/core/commands.py +145 -112
hpcflow/sdk/core/element.py +828 -535
hpcflow/sdk/core/enums.py +192 -0
hpcflow/sdk/core/environment.py +74 -93
hpcflow/sdk/core/errors.py +455 -52
hpcflow/sdk/core/execute.py +207 -0
hpcflow/sdk/core/json_like.py +540 -272
hpcflow/sdk/core/loop.py +751 -347
hpcflow/sdk/core/loop_cache.py +164 -47
hpcflow/sdk/core/object_list.py +370 -207
hpcflow/sdk/core/parameters.py +1100 -627
hpcflow/sdk/core/rule.py +59 -41
hpcflow/sdk/core/run_dir_files.py +21 -37
hpcflow/sdk/core/skip_reason.py +7 -0
hpcflow/sdk/core/task.py +1649 -1339
hpcflow/sdk/core/task_schema.py +308 -196
hpcflow/sdk/core/test_utils.py +191 -114
hpcflow/sdk/core/types.py +440 -0
hpcflow/sdk/core/utils.py +485 -309
hpcflow/sdk/core/validation.py +82 -9
hpcflow/sdk/core/workflow.py +2544 -1178
hpcflow/sdk/core/zarr_io.py +98 -137
hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
hpcflow/sdk/demo/cli.py +53 -33
hpcflow/sdk/helper/cli.py +18 -15
hpcflow/sdk/helper/helper.py +75 -63
hpcflow/sdk/helper/watcher.py +61 -28
hpcflow/sdk/log.py +122 -71
hpcflow/sdk/persistence/__init__.py +8 -31
hpcflow/sdk/persistence/base.py +1360 -606
hpcflow/sdk/persistence/defaults.py +6 -0
hpcflow/sdk/persistence/discovery.py +38 -0
hpcflow/sdk/persistence/json.py +568 -188
hpcflow/sdk/persistence/pending.py +382 -179
hpcflow/sdk/persistence/store_resource.py +39 -23
hpcflow/sdk/persistence/types.py +318 -0
hpcflow/sdk/persistence/utils.py +14 -11
hpcflow/sdk/persistence/zarr.py +1337 -433
hpcflow/sdk/runtime.py +44 -41
hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
hpcflow/sdk/submission/jobscript.py +1651 -692
hpcflow/sdk/submission/schedulers/__init__.py +167 -39
hpcflow/sdk/submission/schedulers/direct.py +121 -81
hpcflow/sdk/submission/schedulers/sge.py +170 -129
hpcflow/sdk/submission/schedulers/slurm.py +291 -268
hpcflow/sdk/submission/schedulers/utils.py +12 -2
hpcflow/sdk/submission/shells/__init__.py +14 -15
hpcflow/sdk/submission/shells/base.py +150 -29
hpcflow/sdk/submission/shells/bash.py +283 -173
hpcflow/sdk/submission/shells/os_version.py +31 -30
hpcflow/sdk/submission/shells/powershell.py +228 -170
hpcflow/sdk/submission/submission.py +1014 -335
hpcflow/sdk/submission/types.py +140 -0
hpcflow/sdk/typing.py +182 -12
hpcflow/sdk/utils/arrays.py +71 -0
hpcflow/sdk/utils/deferred_file.py +55 -0
hpcflow/sdk/utils/hashing.py +16 -0
hpcflow/sdk/utils/patches.py +12 -0
hpcflow/sdk/utils/strings.py +33 -0
hpcflow/tests/api/test_api.py +32 -0
hpcflow/tests/conftest.py +27 -6
hpcflow/tests/data/multi_path_sequences.yaml +29 -0
hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
hpcflow/tests/scripts/test_input_file_generators.py +282 -0
hpcflow/tests/scripts/test_main_scripts.py +866 -85
hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
hpcflow/tests/unit/test_action.py +262 -75
hpcflow/tests/unit/test_action_rule.py +9 -4
hpcflow/tests/unit/test_app.py +33 -6
hpcflow/tests/unit/test_cache.py +46 -0
hpcflow/tests/unit/test_cli.py +134 -1
hpcflow/tests/unit/test_command.py +71 -54
hpcflow/tests/unit/test_config.py +142 -16
hpcflow/tests/unit/test_config_file.py +21 -18
hpcflow/tests/unit/test_element.py +58 -62
hpcflow/tests/unit/test_element_iteration.py +50 -1
hpcflow/tests/unit/test_element_set.py +29 -19
hpcflow/tests/unit/test_group.py +4 -2
hpcflow/tests/unit/test_input_source.py +116 -93
hpcflow/tests/unit/test_input_value.py +29 -24
hpcflow/tests/unit/test_jobscript_unit.py +757 -0
hpcflow/tests/unit/test_json_like.py +44 -35
hpcflow/tests/unit/test_loop.py +1396 -84
hpcflow/tests/unit/test_meta_task.py +325 -0
hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
hpcflow/tests/unit/test_object_list.py +17 -12
hpcflow/tests/unit/test_parameter.py +29 -7
hpcflow/tests/unit/test_persistence.py +237 -42
hpcflow/tests/unit/test_resources.py +20 -18
hpcflow/tests/unit/test_run.py +117 -6
hpcflow/tests/unit/test_run_directories.py +29 -0
hpcflow/tests/unit/test_runtime.py +2 -1
hpcflow/tests/unit/test_schema_input.py +23 -15
hpcflow/tests/unit/test_shell.py +23 -2
hpcflow/tests/unit/test_slurm.py +8 -7
hpcflow/tests/unit/test_submission.py +38 -89
hpcflow/tests/unit/test_task.py +352 -247
hpcflow/tests/unit/test_task_schema.py +33 -20
hpcflow/tests/unit/test_utils.py +9 -11
hpcflow/tests/unit/test_value_sequence.py +15 -12
hpcflow/tests/unit/test_workflow.py +114 -83
hpcflow/tests/unit/test_workflow_template.py +0 -1
hpcflow/tests/unit/utils/test_arrays.py +40 -0
hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
hpcflow/tests/unit/utils/test_hashing.py +65 -0
hpcflow/tests/unit/utils/test_patches.py +5 -0
hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
hpcflow/tests/workflows/__init__.py +0 -0
hpcflow/tests/workflows/test_directory_structure.py +31 -0
hpcflow/tests/workflows/test_jobscript.py +334 -1
hpcflow/tests/workflows/test_run_status.py +198 -0
hpcflow/tests/workflows/test_skip_downstream.py +696 -0
hpcflow/tests/workflows/test_submission.py +140 -0
hpcflow/tests/workflows/test_workflows.py +160 -15
hpcflow/tests/workflows/test_zip.py +18 -0
hpcflow/viz_demo.ipynb +6587 -3
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
hpcflow/sdk/core/parallel.py +0 -21
hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0

hpcflow/sdk/submission/schedulers/slurm.py CHANGED Viewed

@@ -2,25 +2,36 @@
 An interface to SLURM.
 """
-from pathlib import Path
+from __future__ import annotations
 import subprocess
 import time
-from typing import Dict, List, Tuple
+from typing import cast, TYPE_CHECKING
+from typing_extensions import override
+from hpcflow.sdk.typing import hydrate
+from hpcflow.sdk.core.enums import ParallelMode
 from hpcflow.sdk.core.errors import (
     IncompatibleParallelModeError,
     IncompatibleSLURMArgumentsError,
     IncompatibleSLURMPartitionError,
     UnknownSLURMPartitionError,
 )
-from hpcflow.sdk.core.parameters import ParallelMode
 from hpcflow.sdk.log import TimeIt
-from hpcflow.sdk.submission.jobscript_info import JobscriptElementState
-from hpcflow.sdk.submission.schedulers import Scheduler
+from hpcflow.sdk.submission.enums import JobscriptElementState
+from hpcflow.sdk.submission.schedulers import QueuedScheduler
 from hpcflow.sdk.submission.schedulers.utils import run_cmd
-from hpcflow.sdk.submission.shells.base import Shell
+if TYPE_CHECKING:
+    from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
+    from typing import Any, ClassVar
+    from ...config.types import SchedulerConfigDescriptor, SLURMPartitionsDescriptor
+    from ...core.element import ElementResources
+    from ..jobscript import Jobscript
+    from ..types import VersionInfo
+    from ..shells.base import Shell
-class SlurmPosix(Scheduler):
+@hydrate
+class SlurmPosix(QueuedScheduler):
     """
     A scheduler that uses SLURM.
@@ -48,27 +59,29 @@ class SlurmPosix(Scheduler):
     """
-    _app_attr = "app"
     #: Default shell.
-    DEFAULT_SHELL_EXECUTABLE = "/bin/bash"
+    DEFAULT_SHELL_EXECUTABLE: ClassVar[str] = "/bin/bash"
     #: Default args for shebang line.
-    DEFAULT_SHEBANG_ARGS = ""
+    DEFAULT_SHEBANG_ARGS: ClassVar[str] = ""
     #: Default submission command.
-    DEFAULT_SUBMIT_CMD = "sbatch"
+    DEFAULT_SUBMIT_CMD: ClassVar[str] = "sbatch"
     #: Default command to show the queue state.
-    DEFAULT_SHOW_CMD = ["squeue", "--me"]
+    DEFAULT_SHOW_CMD: ClassVar[Sequence[str]] = ("squeue", "--me")
     #: Default cancel command.
-    DEFAULT_DEL_CMD = "scancel"
+    DEFAULT_DEL_CMD: ClassVar[str] = "scancel"
     #: Default job control directive prefix.
-    DEFAULT_JS_CMD = "#SBATCH"
+    DEFAULT_JS_CMD: ClassVar[str] = "#SBATCH"
     #: Default prefix to enable array processing.
-    DEFAULT_ARRAY_SWITCH = "--array"
+    DEFAULT_ARRAY_SWITCH: ClassVar[str] = "--array"
     #: Default shell variable with array ID.
-    DEFAULT_ARRAY_ITEM_VAR = "SLURM_ARRAY_TASK_ID"
+    DEFAULT_ARRAY_ITEM_VAR: ClassVar[str] = "SLURM_ARRAY_TASK_ID"
+    #: Number of times to try when querying the state.
+    NUM_STATE_QUERY_TRIES: ClassVar[int] = 5
+    #: Delay (in seconds) between attempts to query the state.
+    INTER_STATE_QUERY_DELAY: ClassVar[float] = 0.5
     #: Maps scheduler state codes to :py:class:`JobscriptElementState` values.
-    state_lookup = {
+    state_lookup: ClassVar[Mapping[str, JobscriptElementState]] = {
         "PENDING": JobscriptElementState.pending,
         "RUNNING": JobscriptElementState.running,
         "COMPLETING": JobscriptElementState.running,
@@ -79,16 +92,17 @@ class SlurmPosix(Scheduler):
         "TIMEOUT": JobscriptElementState.errored,
     }
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
     @classmethod
+    @override
     @TimeIt.decorator
-    def process_resources(cls, resources, scheduler_config: Dict) -> None:
+    def process_resources(
+        cls, resources: ElementResources, scheduler_config: SchedulerConfigDescriptor
+    ) -> None:
         """Perform scheduler-specific processing to the element resources.
-        Note: this mutates `resources`.
+        Note
+        ----
+        This mutates `resources`.
         """
         if resources.is_parallel:
             if resources.parallel_mode is None:
@@ -97,21 +111,17 @@ class SlurmPosix(Scheduler):
             if resources.parallel_mode is ParallelMode.SHARED:
                 if (resources.num_nodes and resources.num_nodes > 1) or (
-                    resources.SLURM_node_nodes and resources.SLURM_num_nodes > 1
+                    resources.SLURM_num_nodes and resources.SLURM_num_nodes > 1
                 ):
-                    raise IncompatibleParallelModeError(
-                        f"For the {resources.parallel_mode.name.lower()} parallel mode, "
-                        f"only a single node may be requested."
-                    )
+                    raise IncompatibleParallelModeError(resources.parallel_mode)
                 # consider `num_cores` and `num_threads` synonyms in this case:
-                if resources.SLURM_num_tasks and resources.SLURM_num_task != 1:
+                if resources.SLURM_num_tasks and resources.SLURM_num_tasks != 1:
                     raise IncompatibleSLURMArgumentsError(
                         f"For the {resources.parallel_mode.name.lower()} parallel mode, "
                         f"`SLURM_num_tasks` must be set to 1 (to ensure all requested "
                         f"cores reside on the same node)."
                     )
-                else:
-                    resources.SLURM_num_tasks = 1
+                resources.SLURM_num_tasks = 1
                 if resources.SLURM_num_cpus_per_task == 1:
                     raise IncompatibleSLURMArgumentsError(
@@ -120,28 +130,24 @@ class SlurmPosix(Scheduler):
                         f"number of threads/cores to use, and so must be greater than 1, "
                         f"but {resources.SLURM_num_cpus_per_task!r} was specified."
                     )
-                else:
-                    resources.num_threads = resources.num_threads or resources.num_cores
-                    if (
-                        not resources.num_threads
-                        and not resources.SLURM_num_cpus_per_task
-                    ):
-                        raise ValueError(
-                            f"For the {resources.parallel_mode.name.lower()} parallel "
-                            f"mode, specify `num_threads` (or its synonym for this "
-                            f"parallel mode: `num_cores`), or the SLURM-specific "
-                            f"parameter `SLURM_num_cpus_per_task`."
-                        )
-                    elif (
-                        resources.num_threads and resources.SLURM_num_cpus_per_task
-                    ) and (resources.num_threads != resources.SLURM_num_cpus_per_task):
-                        raise IncompatibleSLURMArgumentsError(
-                            f"Incompatible parameters for `num_cores`/`num_threads` "
-                            f"({resources.num_threads}) and `SLURM_num_cpus_per_task` "
-                            f"({resources.SLURM_num_cpus_per_task}) for the "
-                            f"{resources.parallel_mode.name.lower()} parallel mode."
-                        )
-                    resources.SLURM_num_cpus_per_task = resources.num_threads
+                resources.num_threads = resources.num_threads or resources.num_cores
+                if not resources.num_threads and not resources.SLURM_num_cpus_per_task:
+                    raise ValueError(
+                        f"For the {resources.parallel_mode.name.lower()} parallel "
+                        f"mode, specify `num_threads` (or its synonym for this "
+                        f"parallel mode: `num_cores`), or the SLURM-specific "
+                        f"parameter `SLURM_num_cpus_per_task`."
+                    )
+                elif (resources.num_threads and resources.SLURM_num_cpus_per_task) and (
+                    resources.num_threads != resources.SLURM_num_cpus_per_task
+                ):
+                    raise IncompatibleSLURMArgumentsError(
+                        f"Incompatible parameters for `num_cores`/`num_threads` "
+                        f"({resources.num_threads}) and `SLURM_num_cpus_per_task` "
+                        f"({resources.SLURM_num_cpus_per_task}) for the "
+                        f"{resources.parallel_mode.name.lower()} parallel mode."
+                    )
+                resources.SLURM_num_cpus_per_task = resources.num_threads
             elif resources.parallel_mode is ParallelMode.DISTRIBUTED:
                 if resources.num_threads:
@@ -197,9 +203,9 @@ class SlurmPosix(Scheduler):
         else:
             if resources.SLURM_is_parallel:
                 raise IncompatibleSLURMArgumentsError(
-                    f"Some specified SLURM-specific arguments (which indicate a parallel "
-                    f"job) conflict with the scheduler-agnostic arguments (which "
-                    f"indicate a serial job)."
+                    "Some specified SLURM-specific arguments (which indicate a parallel "
+                    "job) conflict with the scheduler-agnostic arguments (which "
+                    "indicate a serial job)."
                 )
             if not resources.SLURM_num_tasks:
                 resources.SLURM_num_tasks = 1
@@ -228,155 +234,162 @@ class SlurmPosix(Scheduler):
             try:
                 part = all_parts[resources.SLURM_partition]
             except KeyError:
-                raise UnknownSLURMPartitionError(
-                    f"The SLURM partition {resources.SLURM_partition!r} is not "
-                    f"specified in the configuration. Specified partitions are "
-                    f"{list(all_parts.keys())!r}."
-                )
+                raise UnknownSLURMPartitionError(resources.SLURM_partition, all_parts)
             # TODO: we when we support ParallelMode.HYBRID, these checks will have to
             # consider the total number of cores requested per node
             # (num_cores_per_node * num_threads)?
-            part_num_cores = part.get("num_cores")
-            part_num_cores_per_node = part.get("num_cores_per_node")
-            part_num_nodes = part.get("num_nodes")
-            part_para_modes = part.get("parallel_modes", [])
-            if (
-                num_cores
-                and part_num_cores
-                and not cls.is_num_cores_supported(num_cores, part_num_cores)
-            ):
+            part_num_cores = part.get("num_cores", ())
+            part_num_cores_per_node = part.get("num_cores_per_node", ())
+            part_num_nodes = part.get("num_nodes", ())
+            part_para_modes = part.get("parallel_modes", ())
+            if cls.__is_present_unsupported(num_cores, part_num_cores):
                 raise IncompatibleSLURMPartitionError(
-                    f"The SLURM partition {resources.SLURM_partition!r} is not "
-                    f"compatible with the number of cores requested: {num_cores!r}."
+                    resources.SLURM_partition, "number of cores", num_cores
                 )
-            if (
-                num_cores_per_node
-                and part_num_cores_per_node
-                and not cls.is_num_cores_supported(
-                    num_cores_per_node, part_num_cores_per_node
-                )
-            ):
+            if cls.__is_present_unsupported(num_cores_per_node, part_num_cores_per_node):
                 raise IncompatibleSLURMPartitionError(
-                    f"The SLURM partition {resources.SLURM_partition!r} is not "
-                    f"compatible with the number of cores per node requested: "
-                    f"{num_cores_per_node!r}."
+                    resources.SLURM_partition,
+                    "number of cores per node",
+                    num_cores_per_node,
                 )
-            if (
-                num_nodes
-                and part_num_nodes
-                and not cls.is_num_cores_supported(num_nodes, part_num_nodes)
-            ):
+            if cls.__is_present_unsupported(num_nodes, part_num_nodes):
                 raise IncompatibleSLURMPartitionError(
-                    f"The SLURM partition {resources.SLURM_partition!r} is not "
-                    f"compatible with the number of nodes requested: {num_nodes!r}."
+                    resources.SLURM_partition, "number of nodes", num_nodes
                 )
             if para_mode and para_mode.name.lower() not in part_para_modes:
                 raise IncompatibleSLURMPartitionError(
-                    f"The SLURM partition {resources.SLURM_partition!r} is not "
-                    f"compatible with the parallel mode requested: {para_mode!r}."
+                    resources.SLURM_partition, "parallel mode", para_mode
                 )
         else:
             # find the first compatible partition if one exists:
             # TODO: bug here? not finding correct partition?
-            part_match = False
             for part_name, part_info in all_parts.items():
-                part_num_cores = part_info.get("num_cores")
-                part_num_cores_per_node = part_info.get("num_cores_per_node")
-                part_num_nodes = part_info.get("num_nodes")
-                part_para_modes = part_info.get("parallel_modes", [])
-                if (
-                    num_cores
-                    and part_num_cores
-                    and cls.is_num_cores_supported(num_cores, part_num_cores)
-                ):
-                    part_match = True
-                else:
-                    part_match = False
-                    continue
-                if (
-                    num_cores_per_node
-                    and part_num_cores_per_node
-                    and cls.is_num_cores_supported(
-                        num_cores_per_node, part_num_cores_per_node
-                    )
-                ):
-                    part_match = True
-                else:
-                    part_match = False
-                    continue
-                if (
-                    num_nodes
-                    and part_num_nodes
-                    and cls.is_num_cores_supported(num_nodes, part_num_nodes)
+                if cls.__partition_matches(
+                    num_cores, num_cores_per_node, num_nodes, para_mode, part_info
                 ):
-                    part_match = True
-                else:
-                    part_match = False
-                    continue
-                if part_match:
-                    part_match = part_name
+                    resources.SLURM_partition = str(part_name)
                     break
-                if para_mode and para_mode.name.lower() not in part_para_modes:
-                    part_match = False
-                    continue
-                if part_match:
-                    part_match = part_name
-                    break
-            if part_match:
-                resources.SLURM_partition = part_match
-    def _format_core_request_lines(self, resources):
-        lns = []
-        if resources.SLURM_partition:
-            lns.append(f"{self.js_cmd} --partition {resources.SLURM_partition}")
+    @classmethod
+    def __is_present_unsupported(
+        cls, num_req: int | None, part_have: Sequence[int] | None
+    ) -> bool:
+        """
+        Test if information is present on both sides, but doesn't match.
+        """
+        return bool(
+            num_req and part_have and not cls.is_num_cores_supported(num_req, part_have)
+        )
-        if resources.SLURM_num_nodes:  # TODO: option for --exclusive ?
-            lns.append(f"{self.js_cmd} --nodes {resources.SLURM_num_nodes}")
+    @classmethod
+    def __is_present_supported(
+        cls, num_req: int | None, part_have: Sequence[int] | None
+    ) -> bool:
+        """
+        Test if information is present on both sides, and also matches.
+        """
+        return bool(
+            num_req and part_have and cls.is_num_cores_supported(num_req, part_have)
+        )
+    @classmethod
+    def __partition_matches(
+        cls,
+        num_cores: int | None,
+        num_cores_per_node: int | None,
+        num_nodes: int | None,
+        para_mode: ParallelMode | None,
+        part_info: SLURMPartitionsDescriptor,
+    ) -> bool:
+        """
+        Check whether a partition (part_name, part_info) matches the requested number
+        of cores and nodes.
+        """
+        part_num_cores = part_info.get("num_cores", [])
+        part_num_cores_per_node = part_info.get("num_cores_per_node", [])
+        part_num_nodes = part_info.get("num_nodes", [])
+        part_para_modes = part_info.get("parallel_modes", [])
+        if (
+            not cls.__is_present_supported(num_cores, part_num_cores)
+            or not cls.__is_present_supported(num_cores_per_node, part_num_cores_per_node)
+            or not cls.__is_present_supported(num_nodes, part_num_nodes)
+        ):
+            return False
+        # FIXME: Does the next check come above or below the check below?
+        # Surely not both!
+        part_match = True
+        if part_match:
+            return True
+        if para_mode and para_mode.name.lower() not in part_para_modes:
+            return False
+        if part_match:
+            return True
+        return False
+    def __format_core_request_lines(self, resources: ElementResources) -> Iterator[str]:
+        if resources.SLURM_partition:
+            yield f"{self.js_cmd} --partition {resources.SLURM_partition}"
+        if resources.SLURM_num_nodes:  # TODO: option for --exclusive ?
+            yield f"{self.js_cmd} --nodes {resources.SLURM_num_nodes}"
         if resources.SLURM_num_tasks:
-            lns.append(f"{self.js_cmd} --ntasks {resources.SLURM_num_tasks}")
+            yield f"{self.js_cmd} --ntasks {resources.SLURM_num_tasks}"
         if resources.SLURM_num_tasks_per_node:
-            lns.append(
-                f"{self.js_cmd} --ntasks-per-node {resources.SLURM_num_tasks_per_node}"
-            )
+            yield f"{self.js_cmd} --ntasks-per-node {resources.SLURM_num_tasks_per_node}"
         if resources.SLURM_num_cpus_per_task:
-            lns.append(
-                f"{self.js_cmd} --cpus-per-task {resources.SLURM_num_cpus_per_task}"
-            )
-        return lns
+            yield f"{self.js_cmd} --cpus-per-task {resources.SLURM_num_cpus_per_task}"
-    def _format_array_request(self, num_elements, resources):
+    def __format_array_request(self, num_elements: int, resources: ElementResources):
         # TODO: Slurm docs start indices at zero, why are we starting at one?
         #   https://slurm.schedmd.com/sbatch.html#OPT_array
         max_str = f"%{resources.max_array_items}" if resources.max_array_items else ""
         return f"{self.js_cmd} {self.array_switch} 1-{num_elements}{max_str}"
-    def _format_std_stream_file_option_lines(self, is_array, sub_idx):
-        base = r"%x_"
-        if is_array:
-            base += r"%A.%a"
-        else:
-            base += r"%j"
-        base = f"./artifacts/submissions/{sub_idx}/{base}"
-        return [
-            f"{self.js_cmd} -o {base}.out",
-            f"{self.js_cmd} -e {base}.err",
-        ]
-    def format_options(self, resources, num_elements, is_array, sub_idx):
+    def get_stdout_filename(
+        self, js_idx: int, job_ID: str, array_idx: int | None = None
+    ) -> str:
+        """File name of the standard output stream file."""
+        array_idx_str = f".{array_idx}" if array_idx is not None else ""
+        return f"js_{js_idx}.sh_{job_ID}{array_idx_str}.out"
+    def get_stderr_filename(
+        self, js_idx: int, job_ID: str, array_idx: int | None = None
+    ) -> str:
+        """File name of the standard error stream file."""
+        array_idx_str = f".{array_idx}" if array_idx is not None else ""
+        return f"js_{js_idx}.sh_{job_ID}{array_idx_str}.err"
+    def __format_std_stream_file_option_lines(
+        self, is_array: bool, sub_idx: int, js_idx: int, combine_std: bool
+    ) -> Iterator[str]:
+        pattern = R"%x_%A.%a" if is_array else R"%x_%j"
+        base = f"./artifacts/submissions/{sub_idx}/js_std/{js_idx}/{pattern}"
+        yield f"{self.js_cmd} --output {base}.out"
+        if not combine_std:
+            yield f"{self.js_cmd} --error {base}.err"
+    @override
+    def format_options(
+        self,
+        resources: ElementResources,
+        num_elements: int,
+        is_array: bool,
+        sub_idx: int,
+        js_idx: int,
+    ) -> str:
         """
         Format the options to the scheduler.
         """
-        opts = []
-        opts.extend(self._format_core_request_lines(resources))
+        opts: list[str] = []
+        opts.extend(self.__format_core_request_lines(resources))
         if is_array:
-            opts.append(self._format_array_request(num_elements, resources))
+            opts.append(self.__format_array_request(num_elements, resources))
-        opts.extend(self._format_std_stream_file_option_lines(is_array, sub_idx))
+        opts.extend(
+            self.__format_std_stream_file_option_lines(
+                is_array, sub_idx, js_idx, resources.combine_jobscript_std
+            )
+        )
         for opt_k, opt_v in self.options.items():
             if isinstance(opt_v, list):
@@ -389,8 +402,9 @@ class SlurmPosix(Scheduler):
         return "\n".join(opts) + "\n"
+    @override
     @TimeIt.decorator
-    def get_version_info(self):
+    def get_version_info(self) -> VersionInfo:
         vers_cmd = [self.submit_cmd, "--version"]
         proc = subprocess.run(
             args=vers_cmd,
@@ -402,18 +416,18 @@ class SlurmPosix(Scheduler):
         if stderr:
             print(stderr)
         name, version = stdout.split()
-        out = {
+        return {
             "scheduler_name": name,
             "scheduler_version": version,
         }
-        return out
+    @override
     def get_submit_command(
         self,
         shell: Shell,
         js_path: str,
-        deps: List[Tuple],
-    ) -> List[str]:
+        deps: dict[Any, tuple[Any, ...]],
+    ) -> list[str]:
         """
         Get the command to use to submit a job to the scheduler.
@@ -422,94 +436,101 @@ class SlurmPosix(Scheduler):
         List of argument words.
         """
         cmd = [self.submit_cmd, "--parsable"]
+        if deps:
+            cmd.append("--dependency")
+            cmd.append(",".join(self.__dependency_args(deps)))
+        cmd.append(js_path)
+        return cmd
-        dep_cmd = []
+    @staticmethod
+    def __dependency_args(deps: dict[Any, tuple[Any, ...]]) -> Iterator[str]:
         for job_ID, is_array_dep in deps.values():
-            dep_i_str = ""
             if is_array_dep:  # array dependency
-                dep_i_str += "aftercorr:"
+                yield f"aftercorr:{job_ID}"
             else:
-                dep_i_str += "afterany:"
-            dep_i_str += str(job_ID)
-            dep_cmd.append(dep_i_str)
-        if dep_cmd:
-            cmd.append(f"--dependency")
-            cmd.append(",".join(dep_cmd))
-        cmd.append(js_path)
-        return cmd
+                yield f"afterany:{job_ID}"
     def parse_submission_output(self, stdout: str) -> str:
         """Extract scheduler reference for a newly submitted jobscript"""
         if ";" in stdout:
-            job_ID, _ = stdout.split(";")  # since we submit with "--parsable"
-        else:
-            job_ID = stdout
-        return job_ID
+            return stdout.split(";")[0]  # since we submit with "--parsable"
+        # Try using the whole thing
+        return stdout
     @staticmethod
-    def _parse_job_IDs(job_ID_str: str):
-        """Parse the job ID column from the `squeue` command (the `%i` format option)."""
-        parts = job_ID_str.split("_")
-        base_job_ID, arr_idx = parts if len(parts) == 2 else (parts[0], None)
-        if arr_idx is not None:
-            try:
-                arr_idx = [int(arr_idx) - 1]  # zero-index
-            except ValueError:
-                # split on commas (e.g. "[5,8-40]")
-                _arr_idx = []
-                for i_range_str in arr_idx.strip("[]").split(","):
-                    if "-" in i_range_str:
-                        range_parts = i_range_str.split("-")
-                        if "%" in range_parts[1]:
-                            # indicates max concurrent array items; not needed
-                            range_parts[1] = range_parts[1].split("%")[0]
-                        i_args = [int(j) - 1 for j in range_parts]
-                        _arr_idx.extend(list(range(i_args[0], i_args[1] + 1)))
-                    else:
-                        _arr_idx.append(int(i_range_str) - 1)
-                arr_idx = _arr_idx
-        return base_job_ID, arr_idx
-    def _parse_job_states(self, stdout) -> Dict[str, Dict[int, JobscriptElementState]]:
+    def _parse_job_IDs(job_ID_str: str) -> tuple[str, None | list[int]]:
+        """
+        Parse the job ID column from the `squeue` command (the `%i` format option).
+        Returns
+        -------
+        job_id
+            The job identifier.
+        array_indices
+            The indices into the job array.
+        """
+        base_job_ID, *arr_idx_data = job_ID_str.split("_")
+        if not arr_idx_data:
+            return base_job_ID, None
+        arr_idx = arr_idx_data[0]
+        try:
+            return base_job_ID, [int(arr_idx) - 1]  # zero-index
+        except ValueError:
+            pass
+        # split on commas (e.g. "[5,8-40]")
+        _arr_idx: list[int] = []
+        for i_range_str in arr_idx.strip("[]").split(","):
+            if "-" in i_range_str:
+                _from, _to = i_range_str.split("-")
+                if "%" in _to:
+                    # indicates max concurrent array items; not needed
+                    _to = _to.split("%")[0]
+                _arr_idx.extend(range(int(_from) - 1, int(_to)))
+            else:
+                _arr_idx.append(int(i_range_str) - 1)
+        return base_job_ID, _arr_idx
+    def __parse_job_states(
+        self, stdout: str
+    ) -> dict[str, JobscriptElementState | dict[int, JobscriptElementState]]:
         """Parse output from Slurm `squeue` command with a simple format."""
-        info = {}
+        info: dict[str, JobscriptElementState | dict[int, JobscriptElementState]] = {}
         for ln in stdout.split("\n"):
             if not ln:
                 continue
-            ln_s = [i.strip() for i in ln.split()]
-            base_job_ID, arr_idx = self._parse_job_IDs(ln_s[0])
-            state = self.state_lookup.get(ln_s[1], None)
+            job_id, job_state, *_ = ln.split()
+            base_job_ID, arr_idx = self._parse_job_IDs(job_id)
+            state = self.state_lookup.get(job_state, JobscriptElementState.errored)
-            if base_job_ID not in info:
-                info[base_job_ID] = {}
-            for arr_idx_i in arr_idx or [None]:
-                info[base_job_ID][arr_idx_i] = state
+            if arr_idx is not None:
+                entry = cast(
+                    dict[int, JobscriptElementState], info.setdefault(base_job_ID, {})
+                )
+                for arr_idx_i in arr_idx:
+                    entry[arr_idx_i] = state
+            else:
+                info[base_job_ID] = state
         return info
-    def _query_job_states(self, job_IDs):
+    def __query_job_states(self, job_IDs: Iterable[str]) -> tuple[str, str]:
         """Query the state of the specified jobs."""
         cmd = [
-            "squeue",
-            "--me",
+            *self.show_cmd,
             "--noheader",
             "--format",
-            r"%40i %30T",
+            R"%200i %30T",  # job ID (<base_job_id>_<index> for array job) and job state
             "--jobs",
             ",".join(job_IDs),
         ]
-        return run_cmd(cmd, logger=self.app.submission_logger)
+        return run_cmd(cmd, logger=self._app.submission_logger)
-    def _get_job_valid_IDs(self, job_IDs=None):
+    def __get_job_valid_IDs(self, job_IDs: Collection[str] | None = None) -> set[str]:
         """Get a list of job IDs that are known by the scheduler, optionally filtered by
         specified job IDs."""
-        cmd = ["squeue", "--me", "--noheader", "--format", r"%F"]
-        stdout, stderr = run_cmd(cmd, logger=self.app.submission_logger)
+        cmd = [*self.show_cmd, "--noheader", "--format", r"%F"]
+        stdout, stderr = run_cmd(cmd, logger=self._app.submission_logger)
         if stderr:
             raise ValueError(
                 f"Could not get query Slurm jobs. Command was: {cmd!r}; stderr was: "
@@ -517,64 +538,66 @@ class SlurmPosix(Scheduler):
             )
         else:
             known_jobs = set(i.strip() for i in stdout.split("\n") if i.strip())
-        job_IDs = known_jobs.intersection(job_IDs or [])
-        return job_IDs
+        if job_IDs is None:
+            return known_jobs
+        return known_jobs.intersection(job_IDs)
+    @override
     def get_job_state_info(
-        self, js_refs: List[str] = None
-    ) -> Dict[str, Dict[int, JobscriptElementState]]:
+        self, *, js_refs: Sequence[str] | None = None
+    ) -> Mapping[str, JobscriptElementState | Mapping[int, JobscriptElementState]]:
         """Query the scheduler to get the states of all of this user's jobs, optionally
         filtering by specified job IDs.
         Jobs that are not in the scheduler's status output will not appear in the output
         of this method.
         """
         # if job_IDs are passed, then assume they are existant, otherwise retrieve valid
         # jobs:
-        if not js_refs:
-            js_refs = self._get_job_valid_IDs()
-            if not js_refs:
-                return {}
+        refs: Collection[str] = js_refs or self.__get_job_valid_IDs()
-        stdout, stderr = self._query_job_states(js_refs)
         count = 0
-        while stderr:
-            if "Invalid job id specified" in stderr and count < 5:
-                # the job might have finished; this only seems to happen if a single
-                # non-existant job ID is specified; for multiple non-existant jobs, no
-                # error is produced;
-                self.app.submission_logger.info(
-                    f"A specified job ID is non-existant; refreshing known job IDs..."
-                )
-                time.sleep(0.5)
-                js_refs = self._get_job_valid_IDs(js_refs)
-                if not js_refs:
-                    return {}
-                stdout, stderr = self._query_job_states(js_refs)
-                count += 1
-            else:
+        while refs:
+            stdout, stderr = self.__query_job_states(refs)
+            if not stderr:
+                return self.__parse_job_states(stdout)
+            if (
+                "Invalid job id specified" not in stderr
+                or count >= self.NUM_STATE_QUERY_TRIES
+            ):
                 raise ValueError(f"Could not get Slurm job states. Stderr was: {stderr}")
-        info = self._parse_job_states(stdout)
-        return info
+            # the job might have finished; this only seems to happen if a single
+            # non-existant job ID is specified; for multiple non-existant jobs, no
+            # error is produced;
+            self._app.submission_logger.info(
+                "A specified job ID is non-existant; refreshing known job IDs..."
+            )
+            time.sleep(self.INTER_STATE_QUERY_DELAY)
+            refs = self.__get_job_valid_IDs(refs)
+            count += 1
+        return {}
-    def cancel_jobs(self, js_refs: List[str], jobscripts: List = None):
+    @override
+    def cancel_jobs(
+        self,
+        js_refs: list[str],
+        jobscripts: list[Jobscript] | None = None,
+    ):
         """
         Cancel submitted jobs.
         """
-        cmd = [self.del_cmd] + js_refs
-        self.app.submission_logger.info(
+        cmd = [self.del_cmd, *js_refs]
+        self._app.submission_logger.info(
             f"cancelling {self.__class__.__name__} jobscripts with command: {cmd}."
         )
-        stdout, stderr = run_cmd(cmd, logger=self.app.submission_logger)
+        stdout, stderr = run_cmd(cmd, logger=self._app.submission_logger)
         if stderr:
             raise ValueError(
                 f"Could not get query {self.__class__.__name__} jobs. Command was: "
                 f"{cmd!r}; stderr was: {stderr}"
             )
-        self.app.submission_logger.info(
+        self._app.submission_logger.info(
             f"jobscripts cancel command executed; stdout was: {stdout}."
         )

hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

hpcflow-new2 0.2.0a189py3-none-any.whl → 0.2.0a199py3-none-any.whl