PyPI - hpcflow-new2 - Versions diffs - 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl - Mend

hpcflow-new2 0.2.0a189py3-none-any.whl → 0.2.0a199py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
hpcflow/_version.py +1 -1
hpcflow/app.py +1 -0
hpcflow/data/scripts/bad_script.py +2 -0
hpcflow/data/scripts/do_nothing.py +2 -0
hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/input_file_generator_basic.py +3 -0
hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
hpcflow/data/scripts/output_file_parser_basic.py +3 -0
hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/script_exit_test.py +5 -0
hpcflow/data/template_components/environments.yaml +1 -1
hpcflow/sdk/__init__.py +26 -15
hpcflow/sdk/app.py +2192 -768
hpcflow/sdk/cli.py +506 -296
hpcflow/sdk/cli_common.py +105 -7
hpcflow/sdk/config/__init__.py +1 -1
hpcflow/sdk/config/callbacks.py +115 -43
hpcflow/sdk/config/cli.py +126 -103
hpcflow/sdk/config/config.py +674 -318
hpcflow/sdk/config/config_file.py +131 -95
hpcflow/sdk/config/errors.py +125 -84
hpcflow/sdk/config/types.py +148 -0
hpcflow/sdk/core/__init__.py +25 -1
hpcflow/sdk/core/actions.py +1771 -1059
hpcflow/sdk/core/app_aware.py +24 -0
hpcflow/sdk/core/cache.py +139 -79
hpcflow/sdk/core/command_files.py +263 -287
hpcflow/sdk/core/commands.py +145 -112
hpcflow/sdk/core/element.py +828 -535
hpcflow/sdk/core/enums.py +192 -0
hpcflow/sdk/core/environment.py +74 -93
hpcflow/sdk/core/errors.py +455 -52
hpcflow/sdk/core/execute.py +207 -0
hpcflow/sdk/core/json_like.py +540 -272
hpcflow/sdk/core/loop.py +751 -347
hpcflow/sdk/core/loop_cache.py +164 -47
hpcflow/sdk/core/object_list.py +370 -207
hpcflow/sdk/core/parameters.py +1100 -627
hpcflow/sdk/core/rule.py +59 -41
hpcflow/sdk/core/run_dir_files.py +21 -37
hpcflow/sdk/core/skip_reason.py +7 -0
hpcflow/sdk/core/task.py +1649 -1339
hpcflow/sdk/core/task_schema.py +308 -196
hpcflow/sdk/core/test_utils.py +191 -114
hpcflow/sdk/core/types.py +440 -0
hpcflow/sdk/core/utils.py +485 -309
hpcflow/sdk/core/validation.py +82 -9
hpcflow/sdk/core/workflow.py +2544 -1178
hpcflow/sdk/core/zarr_io.py +98 -137
hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
hpcflow/sdk/demo/cli.py +53 -33
hpcflow/sdk/helper/cli.py +18 -15
hpcflow/sdk/helper/helper.py +75 -63
hpcflow/sdk/helper/watcher.py +61 -28
hpcflow/sdk/log.py +122 -71
hpcflow/sdk/persistence/__init__.py +8 -31
hpcflow/sdk/persistence/base.py +1360 -606
hpcflow/sdk/persistence/defaults.py +6 -0
hpcflow/sdk/persistence/discovery.py +38 -0
hpcflow/sdk/persistence/json.py +568 -188
hpcflow/sdk/persistence/pending.py +382 -179
hpcflow/sdk/persistence/store_resource.py +39 -23
hpcflow/sdk/persistence/types.py +318 -0
hpcflow/sdk/persistence/utils.py +14 -11
hpcflow/sdk/persistence/zarr.py +1337 -433
hpcflow/sdk/runtime.py +44 -41
hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
hpcflow/sdk/submission/jobscript.py +1651 -692
hpcflow/sdk/submission/schedulers/__init__.py +167 -39
hpcflow/sdk/submission/schedulers/direct.py +121 -81
hpcflow/sdk/submission/schedulers/sge.py +170 -129
hpcflow/sdk/submission/schedulers/slurm.py +291 -268
hpcflow/sdk/submission/schedulers/utils.py +12 -2
hpcflow/sdk/submission/shells/__init__.py +14 -15
hpcflow/sdk/submission/shells/base.py +150 -29
hpcflow/sdk/submission/shells/bash.py +283 -173
hpcflow/sdk/submission/shells/os_version.py +31 -30
hpcflow/sdk/submission/shells/powershell.py +228 -170
hpcflow/sdk/submission/submission.py +1014 -335
hpcflow/sdk/submission/types.py +140 -0
hpcflow/sdk/typing.py +182 -12
hpcflow/sdk/utils/arrays.py +71 -0
hpcflow/sdk/utils/deferred_file.py +55 -0
hpcflow/sdk/utils/hashing.py +16 -0
hpcflow/sdk/utils/patches.py +12 -0
hpcflow/sdk/utils/strings.py +33 -0
hpcflow/tests/api/test_api.py +32 -0
hpcflow/tests/conftest.py +27 -6
hpcflow/tests/data/multi_path_sequences.yaml +29 -0
hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
hpcflow/tests/scripts/test_input_file_generators.py +282 -0
hpcflow/tests/scripts/test_main_scripts.py +866 -85
hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
hpcflow/tests/unit/test_action.py +262 -75
hpcflow/tests/unit/test_action_rule.py +9 -4
hpcflow/tests/unit/test_app.py +33 -6
hpcflow/tests/unit/test_cache.py +46 -0
hpcflow/tests/unit/test_cli.py +134 -1
hpcflow/tests/unit/test_command.py +71 -54
hpcflow/tests/unit/test_config.py +142 -16
hpcflow/tests/unit/test_config_file.py +21 -18
hpcflow/tests/unit/test_element.py +58 -62
hpcflow/tests/unit/test_element_iteration.py +50 -1
hpcflow/tests/unit/test_element_set.py +29 -19
hpcflow/tests/unit/test_group.py +4 -2
hpcflow/tests/unit/test_input_source.py +116 -93
hpcflow/tests/unit/test_input_value.py +29 -24
hpcflow/tests/unit/test_jobscript_unit.py +757 -0
hpcflow/tests/unit/test_json_like.py +44 -35
hpcflow/tests/unit/test_loop.py +1396 -84
hpcflow/tests/unit/test_meta_task.py +325 -0
hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
hpcflow/tests/unit/test_object_list.py +17 -12
hpcflow/tests/unit/test_parameter.py +29 -7
hpcflow/tests/unit/test_persistence.py +237 -42
hpcflow/tests/unit/test_resources.py +20 -18
hpcflow/tests/unit/test_run.py +117 -6
hpcflow/tests/unit/test_run_directories.py +29 -0
hpcflow/tests/unit/test_runtime.py +2 -1
hpcflow/tests/unit/test_schema_input.py +23 -15
hpcflow/tests/unit/test_shell.py +23 -2
hpcflow/tests/unit/test_slurm.py +8 -7
hpcflow/tests/unit/test_submission.py +38 -89
hpcflow/tests/unit/test_task.py +352 -247
hpcflow/tests/unit/test_task_schema.py +33 -20
hpcflow/tests/unit/test_utils.py +9 -11
hpcflow/tests/unit/test_value_sequence.py +15 -12
hpcflow/tests/unit/test_workflow.py +114 -83
hpcflow/tests/unit/test_workflow_template.py +0 -1
hpcflow/tests/unit/utils/test_arrays.py +40 -0
hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
hpcflow/tests/unit/utils/test_hashing.py +65 -0
hpcflow/tests/unit/utils/test_patches.py +5 -0
hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
hpcflow/tests/workflows/__init__.py +0 -0
hpcflow/tests/workflows/test_directory_structure.py +31 -0
hpcflow/tests/workflows/test_jobscript.py +334 -1
hpcflow/tests/workflows/test_run_status.py +198 -0
hpcflow/tests/workflows/test_skip_downstream.py +696 -0
hpcflow/tests/workflows/test_submission.py +140 -0
hpcflow/tests/workflows/test_workflows.py +160 -15
hpcflow/tests/workflows/test_zip.py +18 -0
hpcflow/viz_demo.ipynb +6587 -3
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
hpcflow/sdk/core/parallel.py +0 -21
hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0

hpcflow/sdk/submission/submission.py CHANGED Viewed

@@ -4,16 +4,19 @@ A collection of submissions to a scheduler, generated from a workflow.
 from __future__ import annotations
 from collections import defaultdict
-from datetime import datetime, timedelta, timezone
-import enum
-import os
+import shutil
 from pathlib import Path
+import socket
 from textwrap import indent
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Literal, overload, TYPE_CHECKING
+from typing_extensions import override
+import warnings
-from hpcflow.sdk import app
-from hpcflow.sdk.core.element import ElementResources
+from hpcflow.sdk.utils.strings import shorten_list_str
+import numpy as np
+from hpcflow.sdk.typing import hydrate
 from hpcflow.sdk.core.errors import (
     JobscriptSubmissionFailure,
     MissingEnvironmentError,
@@ -21,47 +24,50 @@ from hpcflow.sdk.core.errors import (
     MissingEnvironmentExecutableInstanceError,
     MultipleEnvironmentsError,
     SubmissionFailure,
+    OutputFileParserNoOutputError,
 )
 from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
 from hpcflow.sdk.core.object_list import ObjectListMultipleMatchError
+from hpcflow.sdk.core.utils import parse_timestamp, current_timestamp
+from hpcflow.sdk.submission.enums import SubmissionStatus
+from hpcflow.sdk.core import RUN_DIR_ARR_DTYPE
 from hpcflow.sdk.log import TimeIt
+from hpcflow.sdk.utils.strings import shorten_list_str
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Mapping, Sequence
+    from datetime import datetime
+    from typing import ClassVar, Literal
+    from rich.status import Status
+    from numpy.typing import NDArray
+    from .jobscript import Jobscript
+    from .enums import JobscriptElementState
+    from .schedulers import Scheduler
+    from .shells import Shell
+    from .types import SubmissionPart
+    from ..core.element import ElementActionRun
+    from ..core.environment import Environment
+    from ..core.object_list import EnvironmentsList
+    from ..core.workflow import Workflow
+    from ..core.cache import ObjectCache
+# jobscript attributes that are set persistently just after the jobscript has been
+# submitted to the scheduler:
+JOBSCRIPT_SUBMIT_TIME_KEYS = (
+    "submit_cmdline",
+    "scheduler_job_ID",
+    "process_ID",
+    "submit_time",
+)
+# submission attributes that are set persistently just after all of a submission's
+# jobscripts have been submitted:
+SUBMISSION_SUBMIT_TIME_KEYS = {
+    "submission_parts": dict,
+}
-def timedelta_format(td: timedelta) -> str:
-    """
-    Convert time delta to string in standard form.
-    """
-    days, seconds = td.days, td.seconds
-    hours = seconds // (60 * 60)
-    seconds -= hours * (60 * 60)
-    minutes = seconds // 60
-    seconds -= minutes * 60
-    return f"{days}-{hours:02}:{minutes:02}:{seconds:02}"
-def timedelta_parse(td_str: str) -> timedelta:
-    """
-    Parse a string in standard form as a time delta.
-    """
-    days, other = td_str.split("-")
-    days = int(days)
-    hours, mins, secs = [int(i) for i in other.split(":")]
-    return timedelta(days=days, hours=hours, minutes=mins, seconds=secs)
-class SubmissionStatus(enum.Enum):
-    """
-    The overall status of a submission.
-    """
-    #: Not yet submitted.
-    PENDING = 0
-    #: All jobscripts submitted successfully.
-    SUBMITTED = 1
-    #: Some jobscripts submitted successfully.
-    PARTIALLY_SUBMITTED = 2
+@hydrate
 class Submission(JSONLike):
     """
     A collection of jobscripts to be submitted to a scheduler.
@@ -82,7 +88,7 @@ class Submission(JSONLike):
         The execution environments to use.
     """
-    _child_objects = (
+    _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
         ChildObjectSpec(
             name="jobscripts",
             class_name="Jobscript",
@@ -95,22 +101,39 @@ class Submission(JSONLike):
         ),
     )
+    TMP_DIR_NAME = "tmp"
+    LOG_DIR_NAME = "app_logs"
+    APP_STD_DIR_NAME = "app_std"
+    JS_DIR_NAME = "jobscripts"
+    JS_STD_DIR_NAME = "js_std"
+    JS_RUN_IDS_DIR_NAME = "js_run_ids"
+    JS_FUNCS_DIR_NAME = "js_funcs"
+    JS_WIN_PIDS_DIR_NAME = "js_pids"
+    JS_SCRIPT_INDICES_DIR_NAME = "js_script_indices"
+    SCRIPTS_DIR_NAME = "scripts"
+    COMMANDS_DIR_NAME = "commands"
+    WORKFLOW_APP_ALIAS = "wkflow_app"
     def __init__(
         self,
         index: int,
-        jobscripts: List[app.Jobscript],
-        workflow: Optional[app.Workflow] = None,
-        submission_parts: Optional[Dict] = None,
-        JS_parallelism: Optional[bool] = None,
-        environments: Optional[app.EnvironmentsList] = None,
+        jobscripts: list[Jobscript],
+        workflow: Workflow | None = None,
+        at_submit_metadata: dict[str, Any] | None = None,
+        JS_parallelism: bool | Literal["direct", "scheduled"] | None = None,
+        environments: EnvironmentsList | None = None,
     ):
         self._index = index
         self._jobscripts = jobscripts
-        self._submission_parts = submission_parts or {}
+        self._at_submit_metadata = at_submit_metadata or {
+            k: v() for k, v in SUBMISSION_SUBMIT_TIME_KEYS.items()
+        }
         self._JS_parallelism = JS_parallelism
-        self._environments = environments
+        self._environments = environments  # assigned by _set_environments
-        self._submission_parts_lst = None  # assigned on first access; datetime objects
+        self._submission_parts_lst: list[
+            SubmissionPart
+        ] | None = None  # assigned on first access
         if workflow:
             #: The workflow this is part of.
@@ -118,41 +141,61 @@ class Submission(JSONLike):
         self._set_parent_refs()
-        for js_idx, js in enumerate(self.jobscripts):
-            js._index = js_idx
+    def _ensure_JS_parallelism_set(self):
+        """Ensure that the JS_parallelism attribute is one of `True`, `False`, `'direct'`
+        or `'scheduled'`.
+        Notes
+        -----
+        This method is called after the Submission object is first created in
+        `Workflow._add_submission`.
+        """
+        # if JS_parallelism explicitly requested but store doesn't support, raise:
+        supports_JS_para = self.workflow._store._features.jobscript_parallelism
+        if self.JS_parallelism:
+            # could be: True | "direct" | "scheduled"
+            if not supports_JS_para:
+                # if status:
+                #     status.stop()
+                raise ValueError(
+                    f"Store type {self.workflow._store!r} does not support jobscript "
+                    f"parallelism."
+                )
+        elif self.JS_parallelism is None:
+            # by default only use JS parallelism for scheduled jobscripts:
+            self._JS_parallelism = "scheduled" if supports_JS_para else False
     @TimeIt.decorator
-    def _set_environments(self):
-        filterable = ElementResources.get_env_instance_filterable_attributes()
+    def _set_environments(self) -> None:
+        filterable = self._app.ElementResources.get_env_instance_filterable_attributes()
         # map required environments and executable labels to job script indices:
-        req_envs = defaultdict(lambda: defaultdict(set))
-        for js_idx, js_i in enumerate(self.jobscripts):
-            for run in js_i.all_EARs:
-                env_spec_h = tuple(zip(*run.env_spec.items()))  # hashable
-                for exec_label_j in run.action.get_required_executables():
-                    req_envs[env_spec_h][exec_label_j].add(js_idx)
-                if env_spec_h not in req_envs:
-                    req_envs[env_spec_h] = {}
+        req_envs: dict[
+            tuple[tuple[str, ...], tuple[Any, ...]], dict[str, set[int]]
+        ] = defaultdict(lambda: defaultdict(set))
+        with self.workflow.cached_merged_parameters():
+            # using the cache (for `run.env_spec_hashable` -> `run.resources`) should
+            # significantly speed up this loop, unless a large resources sequence is used:
+            for js_idx, all_EARs_i in enumerate(self.all_EARs_by_jobscript):
+                for run in all_EARs_i:
+                    env_spec_h = run.env_spec_hashable
+                    for exec_label_j in run.action.get_required_executables():
+                        req_envs[env_spec_h][exec_label_j].add(js_idx)
+                    # add any environment for which an executable was not required:
+                    if env_spec_h not in req_envs:
+                        req_envs[env_spec_h]
         # check these envs/execs exist in app data:
-        envs = []
+        envs: list[Environment] = []
         for env_spec_h, exec_js in req_envs.items():
-            env_spec = dict(zip(*env_spec_h))
-            non_name_spec = {k: v for k, v in env_spec.items() if k != "name"}
-            spec_str = f" with specifiers {non_name_spec!r}" if non_name_spec else ""
-            env_ref = f"{env_spec['name']!r}{spec_str}"
+            env_spec = self._app.Action.env_spec_from_hashable(env_spec_h)
             try:
-                env_i = self.app.envs.get(**env_spec)
+                env_i = self._app.envs.get(**env_spec)
             except ObjectListMultipleMatchError:
-                raise MultipleEnvironmentsError(
-                    f"Multiple environments {env_ref} are defined on this machine."
-                )
+                raise MultipleEnvironmentsError(env_spec)
             except ValueError:
-                raise MissingEnvironmentError(
-                    f"The environment {env_ref} is not defined on this machine, so the "
-                    f"submission cannot be created."
-                ) from None
+                raise MissingEnvironmentError(env_spec) from None
             else:
                 if env_i not in envs:
                     envs.append(env_i)
@@ -162,34 +205,28 @@ class Submission(JSONLike):
                     exec_i = env_i.executables.get(exec_i_lab)
                 except ValueError:
                     raise MissingEnvironmentExecutableError(
-                        f"The environment {env_ref} as defined on this machine has no "
-                        f"executable labelled {exec_i_lab!r}, which is required for this "
-                        f"submission, so the submission cannot be created."
+                        env_spec, exec_i_lab
                     ) from None
                 # check matching executable instances exist:
                 for js_idx_j in js_idx_set:
-                    js_j = self.jobscripts[js_idx_j]
-                    filter_exec = {j: getattr(js_j.resources, j) for j in filterable}
-                    exec_instances = exec_i.filter_instances(**filter_exec)
-                    if not exec_instances:
+                    js_res = self.jobscripts[js_idx_j].resources
+                    filter_exec = {j: getattr(js_res, j) for j in filterable}
+                    if not exec_i.filter_instances(**filter_exec):
                         raise MissingEnvironmentExecutableInstanceError(
-                            f"No matching executable instances found for executable "
-                            f"{exec_i_lab!r} of environment {env_ref} for jobscript "
-                            f"index {js_idx_j!r} with requested resources "
-                            f"{filter_exec!r}."
+                            env_spec, exec_i_lab, js_idx_j, filter_exec
                         )
         # save env definitions to the environments attribute:
-        self._environments = self.app.EnvironmentsList(envs)
+        self._environments = self._app.EnvironmentsList(envs)
-    def to_dict(self):
-        dct = super().to_dict()
+    @override
+    def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
+        dct = super()._postprocess_to_dict(d)
         del dct["_workflow"]
         del dct["_index"]
         del dct["_submission_parts_lst"]
-        dct = {k.lstrip("_"): v for k, v in dct.items()}
-        return dct
+        return {k.lstrip("_"): v for k, v in dct.items()}
     @property
     def index(self) -> int:
@@ -199,26 +236,29 @@ class Submission(JSONLike):
         return self._index
     @property
-    def environments(self) -> app.EnvironmentsList:
+    def environments(self) -> EnvironmentsList:
         """
         The execution environments to use.
         """
+        assert self._environments
         return self._environments
     @property
-    def submission_parts(self) -> List[Dict]:
-        """
-        Description of the parts of this submission.
-        """
-        if not self._submission_parts:
-            return []
+    def at_submit_metadata(self) -> dict[str, dict[str, Any]]:
+        return self.workflow._store.get_submission_at_submit_metadata(
+            sub_idx=self.index, metadata_attr=self._at_submit_metadata
+        )
+    @property
+    def _submission_parts(self) -> dict[str, list[int]]:
+        return self.at_submit_metadata["submission_parts"] or {}
+    @property
+    def submission_parts(self) -> list[SubmissionPart]:
         if self._submission_parts_lst is None:
             self._submission_parts_lst = [
                 {
-                    "submit_time": datetime.strptime(dt, self.workflow.ts_fmt)
-                    .replace(tzinfo=timezone.utc)
-                    .astimezone(),
+                    "submit_time": parse_timestamp(dt, self.workflow.ts_fmt),
                     "jobscripts": js_idx,
                 }
                 for dt, js_idx in self._submission_parts.items()
@@ -226,116 +266,89 @@ class Submission(JSONLike):
         return self._submission_parts_lst
     @TimeIt.decorator
-    def get_start_time(self, submit_time: str) -> Union[datetime, None]:
+    def get_start_time(self, submit_time: str) -> datetime | None:
         """Get the start time of a given submission part."""
-        js_idx = self._submission_parts[submit_time]
-        all_part_starts = []
-        for i in js_idx:
-            start_time = self.jobscripts[i].start_time
-            if start_time:
-                all_part_starts.append(start_time)
-        if all_part_starts:
-            return min(all_part_starts)
-        else:
-            return None
+        times = (
+            self.jobscripts[i].start_time for i in self._submission_parts[submit_time]
+        )
+        return min((t for t in times if t is not None), default=None)
     @TimeIt.decorator
-    def get_end_time(self, submit_time: str) -> Union[datetime, None]:
+    def get_end_time(self, submit_time: str) -> datetime | None:
         """Get the end time of a given submission part."""
-        js_idx = self._submission_parts[submit_time]
-        all_part_ends = []
-        for i in js_idx:
-            end_time = self.jobscripts[i].end_time
-            if end_time:
-                all_part_ends.append(end_time)
-        if all_part_ends:
-            return max(all_part_ends)
-        else:
-            return None
+        times = (self.jobscripts[i].end_time for i in self._submission_parts[submit_time])
+        return max((t for t in times if t is not None), default=None)
     @property
     @TimeIt.decorator
-    def start_time(self):
+    def start_time(self) -> datetime | None:
         """Get the first non-None start time over all submission parts."""
-        all_start_times = []
-        for submit_time in self._submission_parts:
-            start_i = self.get_start_time(submit_time)
-            if start_i:
-                all_start_times.append(start_i)
-        if all_start_times:
-            return max(all_start_times)
-        else:
-            return None
+        times = (
+            self.get_start_time(submit_time) for submit_time in self._submission_parts
+        )
+        return min((t for t in times if t is not None), default=None)
     @property
     @TimeIt.decorator
-    def end_time(self):
+    def end_time(self) -> datetime | None:
         """Get the final non-None end time over all submission parts."""
-        all_end_times = []
-        for submit_time in self._submission_parts:
-            end_i = self.get_end_time(submit_time)
-            if end_i:
-                all_end_times.append(end_i)
-        if all_end_times:
-            return max(all_end_times)
-        else:
-            return None
+        times = (self.get_end_time(submit_time) for submit_time in self._submission_parts)
+        return max((t for t in times if t is not None), default=None)
     @property
-    def jobscripts(self) -> List:
+    def jobscripts(self) -> list[Jobscript]:
         """
         The jobscripts in this submission.
         """
         return self._jobscripts
     @property
-    def JS_parallelism(self):
+    def JS_parallelism(self) -> bool | Literal["direct", "scheduled"] | None:
         """
         Whether to exploit jobscript parallelism.
         """
         return self._JS_parallelism
     @property
-    def workflow(self) -> List:
+    def workflow(self) -> Workflow:
         """
         The workflow this is part of.
         """
         return self._workflow
     @workflow.setter
-    def workflow(self, wk):
+    def workflow(self, wk: Workflow):
         self._workflow = wk
     @property
-    def jobscript_indices(self) -> Tuple[int]:
+    def jobscript_indices(self) -> tuple[int, ...]:
         """All associated jobscript indices."""
-        return tuple(i.index for i in self.jobscripts)
+        return tuple(js.index for js in self.jobscripts)
     @property
-    def submitted_jobscripts(self) -> Tuple[int]:
+    def submitted_jobscripts(self) -> tuple[int, ...]:
         """Jobscript indices that have been successfully submitted."""
-        return tuple(j for i in self.submission_parts for j in i["jobscripts"])
+        return tuple(j for sp in self.submission_parts for j in sp["jobscripts"])
     @property
-    def outstanding_jobscripts(self) -> Tuple[int]:
+    def outstanding_jobscripts(self) -> tuple[int, ...]:
         """Jobscript indices that have not yet been successfully submitted."""
-        return tuple(set(self.jobscript_indices) - set(self.submitted_jobscripts))
+        return tuple(set(self.jobscript_indices).difference(self.submitted_jobscripts))
     @property
-    def status(self):
+    def status(self) -> SubmissionStatus:
         """
         The status of this submission.
         """
         if not self.submission_parts:
             return SubmissionStatus.PENDING
+        elif set(self.submitted_jobscripts) == set(self.jobscript_indices):
+            return SubmissionStatus.SUBMITTED
         else:
-            if set(self.submitted_jobscripts) == set(self.jobscript_indices):
-                return SubmissionStatus.SUBMITTED
-            else:
-                return SubmissionStatus.PARTIALLY_SUBMITTED
+            return SubmissionStatus.PARTIALLY_SUBMITTED
     @property
-    def needs_submit(self):
+    def needs_submit(self) -> bool:
         """
         Whether this submission needs a submit to be done.
         """
@@ -345,131 +358,695 @@ class Submission(JSONLike):
         )
     @property
-    def path(self):
+    def needs_app_log_dir(self) -> bool:
         """
-        The path to files associated with this submission.
+        Whether this submision requires an app log directory.
         """
-        return self.workflow.submissions_path / str(self.index)
+        for js in self.jobscripts:
+            if js.resources.write_app_logs:
+                return True
+        return False
     @property
-    def all_EAR_IDs(self):
+    def needs_win_pids_dir(self) -> bool:
         """
-        The IDs of all EARs in this submission.
+        Whether this submision requires a directory for process ID files (Windows only).
+        """
+        for js in self.jobscripts:
+            if js.os_name == "nt":
+                return True
+        return False
+    @property
+    def needs_script_indices_dir(self) -> bool:
+        """
+        Whether this submision requires a directory for combined-script script ID files.
+        """
+        for js in self.jobscripts:
+            if js.resources.combine_scripts:
+                return True
+        return False
+    @classmethod
+    def get_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The directory path to files associated with the specified submission.
+        """
+        return submissions_path / str(sub_idx)
+    @classmethod
+    def get_tmp_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the temporary files directory, for the specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.TMP_DIR_NAME
+    @classmethod
+    def get_app_log_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the app log directory for this submission, for the specified
+        submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.LOG_DIR_NAME
+    @staticmethod
+    def get_app_log_file_name(run_ID: int | str) -> str:
+        """
+        The app log file name.
+        """
+        # TODO: consider combine_app_logs argument
+        return f"r_{run_ID}.log"
+    @classmethod
+    def get_app_log_file_path(cls, submissions_path: Path, sub_idx: int, run_ID: int):
+        """
+        The file path to the app log, for the specified submission.
+        """
+        return (
+            cls.get_path(submissions_path, sub_idx)
+            / cls.LOG_DIR_NAME
+            / cls.get_app_log_file_name(run_ID)
+        )
+    @classmethod
+    def get_app_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the app standard output and error stream files directory, for the
+        specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.APP_STD_DIR_NAME
+    @classmethod
+    def get_js_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the jobscript files directory, for the specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.JS_DIR_NAME
+    @classmethod
+    def get_js_std_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the jobscript standard output and error files directory, for the
+        specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.JS_STD_DIR_NAME
+    @classmethod
+    def get_js_run_ids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the directory containing jobscript run IDs, for the specified
+        submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.JS_RUN_IDS_DIR_NAME
+    @classmethod
+    def get_js_funcs_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the directory containing the shell functions that are invoked within
+        jobscripts and commmand files, for the specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.JS_FUNCS_DIR_NAME
+    @classmethod
+    def get_js_win_pids_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the directory containing process ID files (Windows only), for the
+        specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.JS_WIN_PIDS_DIR_NAME
+    @classmethod
+    def get_js_script_indices_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the directory containing script indices for combined-script jobscripts
+        only, for the specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.JS_SCRIPT_INDICES_DIR_NAME
+    @classmethod
+    def get_scripts_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the directory containing action scripts, for the specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.SCRIPTS_DIR_NAME
+    @classmethod
+    def get_commands_path(cls, submissions_path: Path, sub_idx: int) -> Path:
+        """
+        The path to the directory containing command files, for the specified submission.
+        """
+        return cls.get_path(submissions_path, sub_idx) / cls.COMMANDS_DIR_NAME
+    @property
+    def path(self) -> Path:
+        """
+        The path to the directory containing action scripts.
+        """
+        return self.get_path(self.workflow.submissions_path, self.index)
+    @property
+    def tmp_path(self) -> Path:
+        """
+        The path to the temporary files directory for this submission.
+        """
+        return self.get_tmp_path(self.workflow.submissions_path, self.index)
+    @property
+    def app_log_path(self) -> Path:
+        """
+        The path to the app log directory for this submission for this submission.
+        """
+        return self.get_app_log_path(self.workflow.submissions_path, self.index)
+    @property
+    def app_std_path(self) -> Path:
+        """
+        The path to the app standard output and error stream files directory, for the
+        this submission.
+        """
+        return self.get_app_std_path(self.workflow.submissions_path, self.index)
+    @property
+    def js_path(self) -> Path:
+        """
+        The path to the jobscript files directory, for this submission.
+        """
+        return self.get_js_path(self.workflow.submissions_path, self.index)
+    @property
+    def js_std_path(self) -> Path:
+        """
+        The path to the jobscript standard output and error files directory, for this
+        submission.
+        """
+        return self.get_js_std_path(self.workflow.submissions_path, self.index)
+    @property
+    def js_run_ids_path(self) -> Path:
+        """
+        The path to the directory containing jobscript run IDs, for this submission.
+        """
+        return self.get_js_run_ids_path(self.workflow.submissions_path, self.index)
+    @property
+    def js_funcs_path(self) -> Path:
         """
-        return [i for js in self.jobscripts for i in js.all_EAR_IDs]
+        The path to the directory containing the shell functions that are invoked within
+        jobscripts and commmand files, for this submission.
+        """
+        return self.get_js_funcs_path(self.workflow.submissions_path, self.index)
+    @property
+    def js_win_pids_path(self) -> Path:
+        """
+        The path to the directory containing process ID files (Windows only), for this
+        submission.
+        """
+        return self.get_js_win_pids_path(self.workflow.submissions_path, self.index)
+    @property
+    def js_script_indices_path(self) -> Path:
+        """
+        The path to the directory containing script indices for combined-script jobscripts
+        only, for this submission.
+        """
+        return self.get_js_script_indices_path(self.workflow.submissions_path, self.index)
+    @property
+    def scripts_path(self) -> Path:
+        """
+        The path to the directory containing action scripts, for this submission.
+        """
+        return self.get_scripts_path(self.workflow.submissions_path, self.index)
     @property
-    def all_EARs(self):
+    def commands_path(self) -> Path:
         """
-        All EARs in this this submission.
+        The path to the directory containing command files, for this submission.
         """
-        return [i for js in self.jobscripts for i in js.all_EARs]
+        return self.get_commands_path(self.workflow.submissions_path, self.index)
     @property
     @TimeIt.decorator
-    def EARs_by_elements(self):
+    def all_EAR_IDs(self) -> Iterable[int]:
         """
-        All EARs in this submission, grouped by element.
+        The IDs of all EARs in this submission.
         """
-        task_elem_EARs = defaultdict(lambda: defaultdict(list))
-        for i in self.all_EARs:
-            task_elem_EARs[i.task.index][i.element.index].append(i)
-        return task_elem_EARs
+        return (i for js in self.jobscripts for i in js.all_EAR_IDs)
     @property
-    def abort_EARs_file_name(self):
+    @TimeIt.decorator
+    def all_EARs(self) -> Iterable[ElementActionRun]:
         """
-        The name of a file describing what EARs have aborted.
+        All EARs in this submission.
         """
-        return f"abort_EARs.txt"
+        return (ear for js in self.jobscripts for ear in js.all_EARs)
+    @property
+    @TimeIt.decorator
+    def all_EARs_IDs_by_jobscript(self) -> list[np.ndarray]:
+        return [i.all_EAR_IDs for i in self.jobscripts]
     @property
-    def abort_EARs_file_path(self):
+    @TimeIt.decorator
+    def all_EARs_by_jobscript(self) -> list[list[ElementActionRun]]:
+        ids = [i.all_EAR_IDs for i in self.jobscripts]
+        all_EARs = {i.id_: i for i in self.workflow.get_EARs_from_IDs(self.all_EAR_IDs)}
+        return [[all_EARs[i] for i in js_ids] for js_ids in ids]
+    @property
+    @TimeIt.decorator
+    def EARs_by_elements(self) -> Mapping[int, Mapping[int, Sequence[ElementActionRun]]]:
         """
-        The path to the file describing what EARs have aborted in this submission.
+        All EARs in this submission, grouped by element.
         """
-        return self.path / self.abort_EARs_file_name
+        task_elem_EARs: dict[int, dict[int, list[ElementActionRun]]] = defaultdict(
+            lambda: defaultdict(list)
+        )
+        for ear in self.all_EARs:
+            task_elem_EARs[ear.task.index][ear.element.index].append(ear)
+        return task_elem_EARs
+    @property
+    def is_scheduled(self) -> tuple[bool, ...]:
+        """Return whether each jobscript of this submission uses a scheduler or not."""
+        return tuple(i.is_scheduled for i in self.jobscripts)
+    @overload
+    def get_active_jobscripts(
+        self, as_json: Literal[False] = False
+    ) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState]]]:
+        ...
+    @overload
+    def get_active_jobscripts(
+        self, as_json: Literal[True]
+    ) -> Mapping[int, Mapping[int, Mapping[int, str]]]:
+        ...
     @TimeIt.decorator
     def get_active_jobscripts(
-        self, as_json: bool = False
-    ) -> List[Tuple[int, Dict[int, JobscriptElementState]]]:
+        self,
+        as_json: Literal[True] | Literal[False] = False,  # TODO: why can't we use bool?
+    ) -> Mapping[int, Mapping[int, Mapping[int, JobscriptElementState | str]]]:
         """Get jobscripts that are active on this machine, and their active states."""
-        # this returns: {JS_IDX: {JS_ELEMENT_IDX: STATE}}
+        # this returns: {JS_IDX: {BLOCK_IDX: {JS_ELEMENT_IDX: STATE}}}
         # TODO: query the scheduler once for all jobscripts?
-        out = {}
-        for js in self.jobscripts:
-            active_states = js.get_active_states(as_json=as_json)
-            if active_states:
-                out[js.index] = active_states
-        return out
+        return {
+            js.index: act_states
+            for js in self.jobscripts
+            if (act_states := js.get_active_states(as_json=as_json))
+        }
-    def _write_abort_EARs_file(self):
-        with self.abort_EARs_file_path.open(mode="wt", newline="\n") as fp:
-            # write a single line for each EAR currently in the workflow:
-            fp.write("\n".join("0" for _ in range(self.workflow.num_EARs)) + "\n")
-    def _set_run_abort(self, run_ID: int):
-        """Modify the abort runs file to indicate a specified run should be aborted."""
-        with self.abort_EARs_file_path.open(mode="rt", newline="\n") as fp:
-            lines = fp.read().splitlines()
-        lines[run_ID] = "1"
-        # write a new temporary run-abort file:
-        tmp_suffix = self.abort_EARs_file_path.suffix + ".tmp"
-        tmp = self.abort_EARs_file_path.with_suffix(tmp_suffix)
-        self.app.submission_logger.debug(f"Creating temporary run abort file: {tmp!r}.")
-        with tmp.open(mode="wt", newline="\n") as fp:
-            fp.write("\n".join(i for i in lines) + "\n")
-        # atomic rename, overwriting original:
-        self.app.submission_logger.debug(
-            "Replacing original run abort file with new temporary file."
+    @TimeIt.decorator
+    def _write_scripts(
+        self, cache: ObjectCache, status: Status | None = None
+    ) -> tuple[dict[int, int | None], NDArray, dict[int, list[Path]]]:
+        """Write to disk all action scripts associated with this submission."""
+        # TODO: rename this method
+        # TODO: need to check is_snippet_script is exclusive? i.e. only `script` and no
+        # `commands` in the action?
+        # TODO: scripts must have the same exe and the same environment as well?
+        # TODO: env_spec should be included in jobscript hash if combine_scripts=True ?
+        actions_by_schema: dict[str, dict[int, set]] = defaultdict(
+            lambda: defaultdict(set)
         )
-        os.replace(src=tmp, dst=self.abort_EARs_file_path)
+        combined_env_specs = {}
+        # task insert IDs and action indices for each combined_scripts jobscript:
+        combined_actions = {}
+        cmd_hashes = defaultdict(set)
+        num_runs_tot = sum(len(js.all_EAR_IDs) for js in self.jobscripts)
+        run_indices = np.ones((num_runs_tot, 9), dtype=int) * -1
+        run_inp_files = defaultdict(
+            list
+        )  # keys are `run_idx`, values are Paths to copy to run dir
+        run_cmd_file_names: dict[int, int | None] = {}  # None if no commands to write
+        run_idx = 0
+        if status:
+            status.update(f"Adding new submission: processing run 1/{num_runs_tot}.")
+        all_runs = cache.runs
+        assert all_runs is not None
+        runs_ids_by_js = self.all_EARs_IDs_by_jobscript
+        with self.workflow.cached_merged_parameters():
+            for js in self.jobscripts:
+                js_idx = js.index
+                js_run_0 = all_runs[runs_ids_by_js[js.index][0]]
+                if js.resources.combine_scripts:
+                    # this will be one or more snippet scripts that needs to be combined into
+                    # one script for the whole jobscript
+                    # need to write one script + one commands file for the whole jobscript
+                    # env_spec will be the same for all runs of this jobscript:
+                    combined_env_specs[js_idx] = js_run_0.env_spec
+                    combined_actions[js_idx] = [
+                        [j[0:2] for j in i.task_actions] for i in js.blocks
+                    ]
+                for idx, run_id in enumerate(js.all_EAR_IDs):
+                    run = all_runs[run_id]
+                    run_indices[run_idx] = [
+                        run.task.insert_ID,
+                        run.element.id_,
+                        run.element_iteration.id_,
+                        run.id_,
+                        run.element.index,
+                        run.element_iteration.index,
+                        run.element_action.action_idx,
+                        run.index,
+                        int(run.action.requires_dir),
+                    ]
+                    run_idx += 1
+                    if status and run_idx % 10 == 0:
+                        status.update(
+                            f"Adding new submission: processing run {run_idx}/{num_runs_tot}."
+                        )
+                    if js.resources.combine_scripts:
+                        if idx == 0:
+                            # the commands file for a combined jobscript won't have
+                            # any parameter data in the command line, so should raise
+                            # if something is found to be unset:
+                            run.try_write_commands(
+                                environments=self.environments,
+                                jobscript=js,
+                                raise_on_unset=True,
+                            )
+                        run_cmd_file_names[run.id_] = None
+                    else:
+                        if run.is_snippet_script:
+                            actions_by_schema[run.action.task_schema.name][
+                                run.element_action.action_idx
+                            ].add(run.env_spec_hashable)
+                        if run.action.commands:
+                            hash_i = run.get_commands_file_hash()
+                            # TODO: could further reduce number of files in the case the data
+                            # indices hash is the same: if commands objects are the same and
+                            # environment objects are the same, then the files will be the
+                            # same, even if runs come from different task schemas/actions...
+                            if hash_i not in cmd_hashes:
+                                try:
+                                    run.try_write_commands(
+                                        environments=self.environments,
+                                        jobscript=js,
+                                    )
+                                except OutputFileParserNoOutputError:
+                                    # no commands to write, might be used just for saving
+                                    # files
+                                    run_cmd_file_names[run.id_] = None
+                            cmd_hashes[hash_i].add(run.id_)
+                        else:
+                            run_cmd_file_names[run.id_] = None
+                    if run.action.requires_dir:
+                        # TODO: what is type of `path`?
+                        for name, path in run.get("input_files", {}).items():
+                            if path:
+                                run_inp_files[run_idx].append(path)
+        for run_ids in cmd_hashes.values():
+            run_ids_srt = sorted(run_ids)
+            root_id = run_ids_srt[0]  # used for command file name for this group
+            # TODO: could store multiple IDs to reduce number of files created
+            for run_id_i in run_ids_srt:
+                if run_id_i not in run_cmd_file_names:
+                    run_cmd_file_names[run_id_i] = root_id
+        if status:
+            status.update("Adding new submission: writing scripts...")
+        seen: dict[int, Path] = {}
+        combined_script_data: dict[
+            int, dict[int, list[tuple[str, Path, bool]]]
+        ] = defaultdict(lambda: defaultdict(list))
+        for task in self.workflow.tasks:
+            for schema in task.template.schemas:
+                if schema.name in actions_by_schema:
+                    for idx, action in enumerate(schema.actions):
+                        if not action.script:
+                            continue
+                        for env_spec_h in actions_by_schema[schema.name][idx]:
+                            env_spec = action.env_spec_from_hashable(env_spec_h)
+                            name, snip_path, specs = action.get_script_artifact_name(
+                                env_spec=env_spec,
+                                act_idx=idx,
+                                ret_specifiers=True,
+                            )
+                            script_hash = action.get_script_determinant_hash(specs)
+                            script_path = self.scripts_path / name
+                            prev_path = seen.get(script_hash)
+                            if script_path == prev_path:
+                                continue
+                            elif prev_path:
+                                # try to make a symbolic link to the file previously
+                                # created:
+                                try:
+                                    script_path.symlink_to(prev_path.name)
+                                except OSError:
+                                    # windows requires admin permission, copy instead:
+                                    shutil.copy(prev_path, script_path)
+                            else:
+                                # write script to disk:
+                                source_str = action.compose_source(snip_path)
+                                if source_str:
+                                    with script_path.open("wt", newline="\n") as fp:
+                                        fp.write(source_str)
+                                    seen[script_hash] = script_path
+        # combined script stuff
+        for js_idx, act_IDs in combined_actions.items():
+            for block_idx, act_IDs_i in enumerate(act_IDs):
+                for task_iID, act_idx in act_IDs_i:
+                    task = self.workflow.tasks.get(insert_ID=task_iID)
+                    schema = task.template.schemas[0]  # TODO: multiple schemas
+                    action = schema.actions[act_idx]
+                    func_name, snip_path = action.get_script_artifact_name(
+                        env_spec=combined_env_specs[js_idx],
+                        act_idx=act_idx,
+                        ret_specifiers=False,
+                        include_suffix=False,
+                        specs_suffix_delim="_",  # can't use "." in function name
+                    )
+                    combined_script_data[js_idx][block_idx].append(
+                        (func_name, snip_path, action.requires_dir)
+                    )
+        for js_idx, action_scripts in combined_script_data.items():
+            js = self.jobscripts[js_idx]
+            script_str, script_indices, num_elems, num_acts = js.compose_combined_script(
+                [i for _, i in sorted(action_scripts.items())]
+            )
+            js.write_script_indices_file(script_indices, num_elems, num_acts)
+            script_path = self.scripts_path / f"js_{js_idx}.py"  # TODO: refactor name
+            with script_path.open("wt", newline="\n") as fp:
+                fp.write(script_str)
+        return run_cmd_file_names, run_indices, run_inp_files
+    @TimeIt.decorator
+    def _calculate_run_dir_indices(
+        self,
+        run_indices: np.ndarray,
+        cache: ObjectCache,
+    ) -> tuple[np.ndarray, np.ndarray]:
+        assert cache.elements is not None
+        assert cache.iterations is not None
+        # get the multiplicities of all tasks, elements, iterations, and runs:
+        wk_num_tasks = self.workflow.num_tasks
+        task_num_elems = {}
+        elem_num_iters = {}
+        iter_num_acts = {}
+        iter_acts_num_runs = {}
+        for task in self.workflow.tasks:
+            elem_IDs = task.element_IDs
+            task_num_elems[task.insert_ID] = len(elem_IDs)
+            for elem_ID in elem_IDs:
+                iter_IDs = cache.elements[elem_ID].iteration_IDs
+                elem_num_iters[elem_ID] = len(iter_IDs)
+                for iter_ID in iter_IDs:
+                    run_IDs = cache.iterations[iter_ID].EAR_IDs
+                    if run_IDs:  # the schema might have no actions
+                        iter_num_acts[iter_ID] = len(run_IDs)
+                        for act_idx, act_run_IDs in run_IDs.items():
+                            iter_acts_num_runs[(iter_ID, act_idx)] = len(act_run_IDs)
+                    else:
+                        iter_num_acts[iter_ID] = 0
+        max_u8 = np.iinfo(np.uint8).max
+        max_u32 = np.iinfo(np.uint32).max
+        MAX_ELEMS_PER_DIR = 1000  # TODO: configurable (add `workflow_defaults` to Config)
+        MAX_ITERS_PER_DIR = 1000
+        requires_dir_idx = np.where(run_indices[:, -1] == 1)[0]
+        run_dir_arr = np.empty(requires_dir_idx.size, dtype=RUN_DIR_ARR_DTYPE)
+        run_ids = np.empty(requires_dir_idx.size, dtype=int)
+        elem_depths: dict[int, int] = {}
+        iter_depths: dict[int, int] = {}
+        for idx in range(requires_dir_idx.size):
+            row = run_indices[requires_dir_idx[idx]]
+            t_iID, e_id, i_id, r_id, e_idx, i_idx, a_idx, r_idx = row[:-1]
+            run_ids[idx] = r_id
+            num_elems_i = task_num_elems[t_iID]
+            num_iters_i = elem_num_iters[e_id]
+            num_acts_i = iter_num_acts[i_id]  # see TODO below
+            num_runs_i = iter_acts_num_runs[(i_id, a_idx)]
+            e_depth = 1
+            if num_elems_i == 1:
+                e_idx = max_u32
+            elif num_elems_i > MAX_ELEMS_PER_DIR:
+                if (e_depth := elem_depths.get(t_iID, -1)) == -1:
+                    e_depth = int(
+                        np.ceil(np.log(num_elems_i) / np.log(MAX_ELEMS_PER_DIR))
+                    )
+                    elem_depths[t_iID] = e_depth
+            # TODO: i_idx should be either MAX or the iteration ID, which will index into
+            # a separate array to get the formatted loop indices e.g.
+            # ("outer_loop_0_inner_loop_9")
+            i_depth = 1
+            if num_iters_i == 1:
+                i_idx = max_u32
+            elif num_iters_i > MAX_ITERS_PER_DIR:
+                if (i_depth := iter_depths.get(e_id, -1)) == -1:
+                    i_depth = int(
+                        np.ceil(np.log(num_iters_i) / np.log(MAX_ITERS_PER_DIR))
+                    )
+                    iter_depths[e_id] = i_depth
+            a_idx = max_u8  # TODO: for now, always exclude action index dir
+            if num_runs_i == 1:
+                r_idx = max_u8
+            if wk_num_tasks == 1:
+                t_iID = max_u8
+            run_dir_arr[idx] = (t_iID, e_idx, i_idx, a_idx, r_idx, e_depth, i_depth)
+        return run_dir_arr, run_ids
+    @TimeIt.decorator
+    def _write_execute_dirs(
+        self,
+        run_indices: NDArray,
+        run_inp_files: dict[int, list[Path]],
+        cache: ObjectCache,
+        status: Status | None = None,
+    ):
+        if status:
+            status.update("Adding new submission: resolving execution directories...")
+        run_dir_arr, run_idx = self._calculate_run_dir_indices(run_indices, cache)
+        # set run dirs in persistent array:
+        if run_idx.size:
+            self.workflow._store.set_run_dirs(run_dir_arr, run_idx)
+        # retrieve run directories as paths. array is not yet commited, so pass in
+        # directly:
+        run_dirs = self.workflow.get_run_directories(dir_indices_arr=run_dir_arr)
+        if status:
+            status.update("Adding new submission: making execution directories...")
+        # make directories
+        for idx, run_dir in enumerate(run_dirs):
+            assert run_dir
+            run_dir.mkdir(parents=True, exist_ok=True)
+            inp_files_i = run_inp_files.get(run_idx[idx])
+            if inp_files_i:
+                # copy (TODO: optionally symlink) any input files:
+                for path_i in inp_files_i:
+                    shutil.copy(path_i, run_dir)
     @staticmethod
     def get_unique_schedulers_of_jobscripts(
-        jobscripts: List[Jobscript],
-    ) -> Dict[Tuple[Tuple[int, int]], Scheduler]:
+        jobscripts: Iterable[Jobscript],
+    ) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
         """Get unique schedulers and which of the passed jobscripts they correspond to.
-        Uniqueness is determines only by the `Scheduler.unique_properties` tuple.
+        Uniqueness is determined only by the `QueuedScheduler.unique_properties` tuple.
         Parameters
         ----------
         jobscripts: list[~hpcflow.app.Jobscript]
+        Returns
+        -------
+        scheduler_mapping
+            Mapping where keys are a sequence of jobscript index descriptors and
+            the values are the scheduler to use for that jobscript.
+            A jobscript index descriptor is a pair of the submission index and the main
+            jobscript index.
         """
-        js_idx = []
-        schedulers = []
+        js_idx: list[list[tuple[int, int]]] = []
+        schedulers: list[Scheduler] = []
         # list of tuples of scheduler properties we consider to determine "uniqueness",
         # with the first string being the scheduler type (class name):
-        seen_schedulers = []
+        seen_schedulers: dict[tuple, int] = {}
         for js in jobscripts:
-            if js.scheduler.unique_properties not in seen_schedulers:
-                seen_schedulers.append(js.scheduler.unique_properties)
+            if (
+                sched_idx := seen_schedulers.get(key := js.scheduler.unique_properties)
+            ) is None:
+                seen_schedulers[key] = sched_idx = len(seen_schedulers) - 1
                 schedulers.append(js.scheduler)
                 js_idx.append([])
-            sched_idx = seen_schedulers.index(js.scheduler.unique_properties)
             js_idx[sched_idx].append((js.submission.index, js.index))
-        sched_js_idx = dict(zip((tuple(i) for i in js_idx), schedulers))
+        return zip(map(tuple, js_idx), schedulers)
-        return sched_js_idx
+    @property
+    @TimeIt.decorator
+    def _unique_schedulers(
+        self,
+    ) -> Iterable[tuple[tuple[tuple[int, int], ...], Scheduler]]:
+        return self.get_unique_schedulers_of_jobscripts(self.jobscripts)
     @TimeIt.decorator
-    def get_unique_schedulers(self) -> Dict[Tuple[int], Scheduler]:
+    def get_unique_schedulers(self) -> Mapping[tuple[tuple[int, int], ...], Scheduler]:
         """Get unique schedulers and which of this submission's jobscripts they
-        correspond to."""
-        return self.get_unique_schedulers_of_jobscripts(self.jobscripts)
+        correspond to.
+        Returns
+        -------
+        scheduler_mapping
+            Mapping where keys are a sequence of jobscript index descriptors and
+            the values are the scheduler to use for that jobscript.
+            A jobscript index descriptor is a pair of the submission index and the main
+            jobscript index.
+        """
+        # This is an absurd type; you never use the key as a key
+        return dict(self._unique_schedulers)
     @TimeIt.decorator
-    def get_unique_shells(self) -> Dict[Tuple[int], Shell]:
+    def get_unique_shells(self) -> Iterable[tuple[tuple[int, ...], Shell]]:
         """Get unique shells and which jobscripts they correspond to."""
-        js_idx = []
-        shells = []
+        js_idx: list[list[int]] = []
+        shells: list[Shell] = []
         for js in self.jobscripts:
             if js.shell not in shells:
@@ -478,126 +1055,154 @@ class Submission(JSONLike):
             shell_idx = shells.index(js.shell)
             js_idx[shell_idx].append(js.index)
-        shell_js_idx = dict(zip((tuple(i) for i in js_idx), shells))
+        return zip(map(tuple, js_idx), shells)
-        return shell_js_idx
+    def _update_at_submit_metadata(self, submission_parts: dict[str, list[int]]):
+        """Update persistent store and in-memory record of at-submit metadata.
-    def _raise_failure(self, submitted_js_idx, exceptions):
-        msg = f"Some jobscripts in submission index {self.index} could not be submitted"
-        if submitted_js_idx:
-            msg += f" (but jobscripts {submitted_js_idx} were submitted successfully):"
-        else:
-            msg += ":"
-        msg += "\n"
-        for sub_err in exceptions:
-            msg += (
-                f"Jobscript {sub_err.js_idx} at path: {str(sub_err.js_path)!r}\n"
-                f"Submit command: {sub_err.submit_cmd!r}.\n"
-                f"Reason: {sub_err.message!r}\n"
-            )
-            if sub_err.subprocess_exc is not None:
-                msg += f"Subprocess exception: {sub_err.subprocess_exc}\n"
-            if sub_err.job_ID_parse_exc is not None:
-                msg += f"Subprocess job ID parse exception: {sub_err.job_ID_parse_exc}\n"
-            if sub_err.job_ID_parse_exc is not None:
-                msg += f"Job ID parse exception: {sub_err.job_ID_parse_exc}\n"
-            if sub_err.stdout:
-                msg += f"Submission stdout:\n{indent(sub_err.stdout, '  ')}\n"
-            if sub_err.stderr:
-                msg += f"Submission stderr:\n{indent(sub_err.stderr, '  ')}\n"
-        raise SubmissionFailure(message=msg)
-    def _append_submission_part(self, submit_time: str, submitted_js_idx: List[int]):
-        self._submission_parts[submit_time] = submitted_js_idx
-        self.workflow._store.add_submission_part(
+        Notes
+        -----
+        Currently there is only one type of at-submit metadata, which is the
+        submission-parts: a mapping between a string submit-time, and the list of
+        jobscript indices that were submitted at that submit-time. This method updates
+        the recorded submission parts to include those passed here.
+        """
+        self.workflow._store.update_at_submit_metadata(
             sub_idx=self.index,
-            dt_str=submit_time,
-            submitted_js_idx=submitted_js_idx,
+            submission_parts=submission_parts,
+        )
+        self._at_submit_metadata["submission_parts"].update(submission_parts)
+        # cache is now invalid:
+        self._submission_parts_lst = None
+    def _append_submission_part(self, submit_time: str, submitted_js_idx: list[int]):
+        self._update_at_submit_metadata(submission_parts={submit_time: submitted_js_idx})
+    def get_jobscript_functions_name(self, shell: Shell, shell_idx: int) -> str:
+        """Get the name of the jobscript functions file for the specified shell."""
+        return f"js_funcs_{shell_idx}{shell.JS_EXT}"
+    def get_jobscript_functions_path(self, shell: Shell, shell_idx: int) -> Path:
+        """Get the path of the jobscript functions file for the specified shell."""
+        return self.js_funcs_path / self.get_jobscript_functions_name(shell, shell_idx)
+    def _compose_functions_file(self, shell: Shell) -> str:
+        """Prepare the contents of the jobscript functions file for the specified
+        shell.
+        Notes
+        -----
+        The functions file includes, at a minimum, a shell function that invokes the app
+        with provided arguments. This file will be sourced/invoked within all jobscripts
+        and command files that share the specified shell.
+        """
+        cfg_invocation = self._app.config._file.get_invocation(
+            self._app.config._config_key
+        )
+        env_setup = cfg_invocation["environment_setup"]
+        if env_setup:
+            env_setup = indent(env_setup.strip(), shell.JS_ENV_SETUP_INDENT)
+            env_setup += "\n\n" + shell.JS_ENV_SETUP_INDENT
+        else:
+            env_setup = shell.JS_ENV_SETUP_INDENT
+        app_invoc = list(self._app.run_time_info.invocation_command)
+        app_caps = self._app.package_name.upper()
+        func_file_args = shell.process_JS_header_args(  # TODO: rename?
+            {
+                "workflow_app_alias": self.WORKFLOW_APP_ALIAS,
+                "env_setup": env_setup,
+                "app_invoc": app_invoc,
+                "app_caps": app_caps,
+                "config_dir": str(self._app.config.config_directory),
+                "config_invoc_key": self._app.config.config_key,
+            }
         )
+        out = shell.JS_FUNCS.format(**func_file_args)
+        return out
+    def _write_functions_file(self, shell: Shell, shell_idx: int) -> None:
+        """Write the jobscript functions file for the specified shell.
+        Notes
+        -----
+        The functions file includes, at a minimum, a shell function that invokes the app
+        with provided arguments. This file will be sourced/invoked within all jobscripts
+        and command files that share the specified shell.
+        """
+        js_funcs_str = self._compose_functions_file(shell)
+        path = self.get_jobscript_functions_path(shell, shell_idx)
+        with path.open("wt", newline="\n") as fp:
+            fp.write(js_funcs_str)
     @TimeIt.decorator
     def submit(
         self,
-        status,
-        ignore_errors: Optional[bool] = False,
-        print_stdout: Optional[bool] = False,
-        add_to_known: Optional[bool] = True,
-    ) -> List[int]:
+        status: Status | None,
+        ignore_errors: bool = False,
+        print_stdout: bool = False,
+        add_to_known: bool = True,
+    ) -> list[int]:
         """Generate and submit the jobscripts of this submission."""
-        # if JS_parallelism explicitly requested but store doesn't support, raise:
-        supports_JS_para = self.workflow._store._features.jobscript_parallelism
-        if self.JS_parallelism:
-            if not supports_JS_para:
-                if status:
-                    status.stop()
-                raise ValueError(
-                    f"Store type {self.workflow._store!r} does not support jobscript "
-                    f"parallelism."
-                )
-        elif self.JS_parallelism is None:
-            self._JS_parallelism = supports_JS_para
-        # set os_name and shell_name for each jobscript:
-        for js in self.jobscripts:
-            js._set_os_name()
-            js._set_shell_name()
-            js._set_scheduler_name()
+        # TODO: support passing list of jobscript indices to submit; this will allow us
+        # to test a submision with multiple "submission parts". would also need to check
+        # dependencies if this customised list is passed
         outstanding = self.outstanding_jobscripts
         # get scheduler, shell and OS version information (also an opportunity to fail
         # before trying to submit jobscripts):
-        js_vers_info = {}
-        for js_indices, sched in self.get_unique_schedulers().items():
+        js_vers_info: dict[int, dict[str, str | list[str]]] = {}
+        for js_indices, sched in self._unique_schedulers:
             try:
                 vers_info = sched.get_version_info()
-            except Exception as err:
-                if ignore_errors:
-                    vers_info = {}
-                else:
-                    raise err
+            except Exception:
+                if not ignore_errors:
+                    raise
+                vers_info = {}
             for _, js_idx in js_indices:
                 if js_idx in outstanding:
-                    if js_idx not in js_vers_info:
-                        js_vers_info[js_idx] = {}
-                    js_vers_info[js_idx].update(vers_info)
+                    js_vers_info.setdefault(js_idx, {}).update(vers_info)
-        for js_indices, shell in self.get_unique_shells().items():
+        js_shell_indices = {}
+        for shell_idx, (js_indices_2, shell) in enumerate(self.get_unique_shells()):
             try:
                 vers_info = shell.get_version_info()
-            except Exception as err:
-                if ignore_errors:
-                    vers_info = {}
-                else:
-                    raise err
-            for js_idx in js_indices:
+            except Exception:
+                if not ignore_errors:
+                    raise
+                vers_info = {}
+            for js_idx in js_indices_2:
                 if js_idx in outstanding:
-                    if js_idx not in js_vers_info:
-                        js_vers_info[js_idx] = {}
-                    js_vers_info[js_idx].update(vers_info)
+                    js_vers_info.setdefault(js_idx, {}).update(vers_info)
+                    js_shell_indices[js_idx] = shell_idx
+            # write a file containing useful shell functions:
+            self._write_functions_file(shell, shell_idx)
+        hostname = socket.gethostname()
+        machine = self._app.config.get("machine")
         for js_idx, vers_info_i in js_vers_info.items():
-            self.jobscripts[js_idx]._set_version_info(vers_info_i)
+            js = self.jobscripts[js_idx]
+            js._set_version_info(vers_info_i)
+            js._set_submit_hostname(hostname)
+            js._set_submit_machine(machine)
+            js._set_shell_idx(js_shell_indices[js_idx])
-        # for direct submission, it's important that os_name/shell_name/scheduler_name
-        # are made persistent now, because `Workflow.write_commands`, which might be
-        # invoked in a new process before submission has completed, needs to know these:
         self.workflow._store._pending.commit_all()
-        # TODO: a submission should only be "submitted" once shouldn't it?
-        # no; there could be an IO error (e.g. internet connectivity), so might
-        # need to be able to reattempt submission of outstanding jobscripts.
-        self.path.mkdir(exist_ok=True)
-        if not self.abort_EARs_file_path.is_file():
-            self._write_abort_EARs_file()
         # map jobscript `index` to (scheduler job ID or process ID, is_array):
-        scheduler_refs = {}
-        submitted_js_idx = []
-        errs = []
+        scheduler_refs: dict[int, tuple[str, bool]] = {}
+        submitted_js_idx: list[int] = []
+        errs: list[JobscriptSubmissionFailure] = []
         for js in self.jobscripts:
             # check not previously submitted:
             if js.index not in outstanding:
@@ -605,14 +1210,20 @@ class Submission(JSONLike):
             # check all dependencies were submitted now or previously:
             if not all(
-                i in submitted_js_idx or i in self.submitted_jobscripts
-                for i in js.dependencies
+                js_idx in submitted_js_idx or js_idx in self.submitted_jobscripts
+                for js_idx, _ in js.dependencies
             ):
+                warnings.warn(
+                    f"Cannot submit jobscript index {js.index} since not all of its "
+                    f"dependencies have been submitted: {js.dependencies!r}"
+                )
                 continue
             try:
                 if status:
-                    status.update(f"Submitting jobscript {js.index}...")
+                    status.update(
+                        f"Submitting jobscript {js.index + 1}/{len(self.jobscripts)}..."
+                    )
                 js_ref_i = js.submit(scheduler_refs, print_stdout=print_stdout)
                 scheduler_refs[js.index] = (js_ref_i, js.is_array)
                 submitted_js_idx.append(js.index)
@@ -621,15 +1232,21 @@ class Submission(JSONLike):
                 errs.append(err)
                 continue
+            # TODO: some way to handle KeyboardInterrupt during submission?
+            #   - stop, and cancel already submitted?
         if submitted_js_idx:
-            dt_str = datetime.utcnow().strftime(self.app._submission_ts_fmt)
+            dt_str = current_timestamp().strftime(self._app._submission_ts_fmt)
             self._append_submission_part(
                 submit_time=dt_str,
                 submitted_js_idx=submitted_js_idx,
             )
+            # ensure `_submission_parts` is committed
+            self.workflow._store._pending.commit_all()
             # add a record of the submission part to the known-submissions file
             if add_to_known:
-                self.app._add_to_known_submissions(
+                self._app._add_to_known_submissions(
                     wk_path=self.workflow.path,
                     wk_id=self.workflow.id_,
                     sub_idx=self.index,
@@ -639,7 +1256,7 @@ class Submission(JSONLike):
         if errs and not ignore_errors:
             if status:
                 status.stop()
-            self._raise_failure(submitted_js_idx, errs)
+            raise SubmissionFailure(self.index, submitted_js_idx, errs)
         len_js = len(submitted_js_idx)
         print(f"Submitted {len_js} jobscript{'s' if len_js > 1 else ''}.")
@@ -647,24 +1264,86 @@ class Submission(JSONLike):
         return submitted_js_idx
     @TimeIt.decorator
-    def cancel(self):
+    def cancel(self) -> None:
         """
         Cancel the active jobs for this submission's jobscripts.
         """
-        act_js = list(self.get_active_jobscripts())
-        if not act_js:
+        if not (act_js := self.get_active_jobscripts()):
             print("No active jobscripts to cancel.")
             return
-        for js_indices, sched in self.get_unique_schedulers().items():
+        for js_indices, sched in self._unique_schedulers:
             # filter by active jobscripts:
-            js_idx = [i[1] for i in js_indices if i[1] in act_js]
-            if js_idx:
+            if js_idx := [i[1] for i in js_indices if i[1] in act_js]:
                 print(
-                    f"Cancelling jobscripts {js_idx!r} of submission {self.index} of "
-                    f"workflow {self.workflow.name!r}."
+                    f"Cancelling jobscripts {shorten_list_str(js_idx, items=5)} of "
+                    f"submission {self.index} of workflow {self.workflow.name!r}."
                 )
                 jobscripts = [self.jobscripts[i] for i in js_idx]
-                sched_refs = [i.scheduler_js_ref for i in jobscripts]
+                sched_refs = [js.scheduler_js_ref for js in jobscripts]
                 sched.cancel_jobs(js_refs=sched_refs, jobscripts=jobscripts)
             else:
                 print("No active jobscripts to cancel.")
+    @TimeIt.decorator
+    def get_scheduler_job_IDs(self) -> tuple[str, ...]:
+        """Return jobscript scheduler job IDs."""
+        return tuple(
+            js_i.scheduler_job_ID
+            for js_i in self.jobscripts
+            if js_i.scheduler_job_ID is not None
+        )
+    @TimeIt.decorator
+    def get_process_IDs(self) -> tuple[int, ...]:
+        """Return jobscript process IDs."""
+        return tuple(
+            js_i.process_ID for js_i in self.jobscripts if js_i.process_ID is not None
+        )
+    @TimeIt.decorator
+    def list_jobscripts(
+        self,
+        max_js: int | None = None,
+        jobscripts: list[int] | None = None,
+        width: int | None = None,
+    ) -> None:
+        """Print a table listing jobscripts and associated information.
+        Parameters
+        ----------
+        max_js
+            Maximum jobscript index to display. This cannot be specified with `jobscripts`.
+        jobscripts
+            A list of jobscripts to display. This cannot be specified with `max_js`.
+        width
+            Width in characters of the printed table.
+        """
+        self.workflow.list_jobscripts(
+            sub_idx=self.index, max_js=max_js, jobscripts=jobscripts, width=width
+        )
+    @TimeIt.decorator
+    def list_task_jobscripts(
+        self,
+        task_names: list[str] | None = None,
+        max_js: int | None = None,
+        width: int | None = None,
+    ) -> None:
+        """Print a table listing the jobscripts associated with the specified (or all)
+        tasks for the specified submission.
+        Parameters
+        ----------
+        task_names
+            List of sub-strings to match to task names. Only matching task names will be
+            included.
+        max_js
+            Maximum jobscript index to display.
+        width
+            Width in characters of the printed table.
+        """
+        self.workflow.list_task_jobscripts(
+            sub_idx=self.index, max_js=max_js, task_names=task_names, width=width
+        )

hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

hpcflow-new2 0.2.0a189py3-none-any.whl → 0.2.0a199py3-none-any.whl