PyPI - hpcflow-new2 - Versions diffs - 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl - Mend

hpcflow-new2 0.2.0a189py3-none-any.whl → 0.2.0a199py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

hpcflow/__pyinstaller/hook-hpcflow.py +9 -6
hpcflow/_version.py +1 -1
hpcflow/app.py +1 -0
hpcflow/data/scripts/bad_script.py +2 -0
hpcflow/data/scripts/do_nothing.py +2 -0
hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/input_file_generator_basic.py +3 -0
hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
hpcflow/data/scripts/output_file_parser_basic.py +3 -0
hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/script_exit_test.py +5 -0
hpcflow/data/template_components/environments.yaml +1 -1
hpcflow/sdk/__init__.py +26 -15
hpcflow/sdk/app.py +2192 -768
hpcflow/sdk/cli.py +506 -296
hpcflow/sdk/cli_common.py +105 -7
hpcflow/sdk/config/__init__.py +1 -1
hpcflow/sdk/config/callbacks.py +115 -43
hpcflow/sdk/config/cli.py +126 -103
hpcflow/sdk/config/config.py +674 -318
hpcflow/sdk/config/config_file.py +131 -95
hpcflow/sdk/config/errors.py +125 -84
hpcflow/sdk/config/types.py +148 -0
hpcflow/sdk/core/__init__.py +25 -1
hpcflow/sdk/core/actions.py +1771 -1059
hpcflow/sdk/core/app_aware.py +24 -0
hpcflow/sdk/core/cache.py +139 -79
hpcflow/sdk/core/command_files.py +263 -287
hpcflow/sdk/core/commands.py +145 -112
hpcflow/sdk/core/element.py +828 -535
hpcflow/sdk/core/enums.py +192 -0
hpcflow/sdk/core/environment.py +74 -93
hpcflow/sdk/core/errors.py +455 -52
hpcflow/sdk/core/execute.py +207 -0
hpcflow/sdk/core/json_like.py +540 -272
hpcflow/sdk/core/loop.py +751 -347
hpcflow/sdk/core/loop_cache.py +164 -47
hpcflow/sdk/core/object_list.py +370 -207
hpcflow/sdk/core/parameters.py +1100 -627
hpcflow/sdk/core/rule.py +59 -41
hpcflow/sdk/core/run_dir_files.py +21 -37
hpcflow/sdk/core/skip_reason.py +7 -0
hpcflow/sdk/core/task.py +1649 -1339
hpcflow/sdk/core/task_schema.py +308 -196
hpcflow/sdk/core/test_utils.py +191 -114
hpcflow/sdk/core/types.py +440 -0
hpcflow/sdk/core/utils.py +485 -309
hpcflow/sdk/core/validation.py +82 -9
hpcflow/sdk/core/workflow.py +2544 -1178
hpcflow/sdk/core/zarr_io.py +98 -137
hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
hpcflow/sdk/demo/cli.py +53 -33
hpcflow/sdk/helper/cli.py +18 -15
hpcflow/sdk/helper/helper.py +75 -63
hpcflow/sdk/helper/watcher.py +61 -28
hpcflow/sdk/log.py +122 -71
hpcflow/sdk/persistence/__init__.py +8 -31
hpcflow/sdk/persistence/base.py +1360 -606
hpcflow/sdk/persistence/defaults.py +6 -0
hpcflow/sdk/persistence/discovery.py +38 -0
hpcflow/sdk/persistence/json.py +568 -188
hpcflow/sdk/persistence/pending.py +382 -179
hpcflow/sdk/persistence/store_resource.py +39 -23
hpcflow/sdk/persistence/types.py +318 -0
hpcflow/sdk/persistence/utils.py +14 -11
hpcflow/sdk/persistence/zarr.py +1337 -433
hpcflow/sdk/runtime.py +44 -41
hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
hpcflow/sdk/submission/jobscript.py +1651 -692
hpcflow/sdk/submission/schedulers/__init__.py +167 -39
hpcflow/sdk/submission/schedulers/direct.py +121 -81
hpcflow/sdk/submission/schedulers/sge.py +170 -129
hpcflow/sdk/submission/schedulers/slurm.py +291 -268
hpcflow/sdk/submission/schedulers/utils.py +12 -2
hpcflow/sdk/submission/shells/__init__.py +14 -15
hpcflow/sdk/submission/shells/base.py +150 -29
hpcflow/sdk/submission/shells/bash.py +283 -173
hpcflow/sdk/submission/shells/os_version.py +31 -30
hpcflow/sdk/submission/shells/powershell.py +228 -170
hpcflow/sdk/submission/submission.py +1014 -335
hpcflow/sdk/submission/types.py +140 -0
hpcflow/sdk/typing.py +182 -12
hpcflow/sdk/utils/arrays.py +71 -0
hpcflow/sdk/utils/deferred_file.py +55 -0
hpcflow/sdk/utils/hashing.py +16 -0
hpcflow/sdk/utils/patches.py +12 -0
hpcflow/sdk/utils/strings.py +33 -0
hpcflow/tests/api/test_api.py +32 -0
hpcflow/tests/conftest.py +27 -6
hpcflow/tests/data/multi_path_sequences.yaml +29 -0
hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
hpcflow/tests/scripts/test_input_file_generators.py +282 -0
hpcflow/tests/scripts/test_main_scripts.py +866 -85
hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
hpcflow/tests/shells/wsl/test_wsl_submission.py +12 -4
hpcflow/tests/unit/test_action.py +262 -75
hpcflow/tests/unit/test_action_rule.py +9 -4
hpcflow/tests/unit/test_app.py +33 -6
hpcflow/tests/unit/test_cache.py +46 -0
hpcflow/tests/unit/test_cli.py +134 -1
hpcflow/tests/unit/test_command.py +71 -54
hpcflow/tests/unit/test_config.py +142 -16
hpcflow/tests/unit/test_config_file.py +21 -18
hpcflow/tests/unit/test_element.py +58 -62
hpcflow/tests/unit/test_element_iteration.py +50 -1
hpcflow/tests/unit/test_element_set.py +29 -19
hpcflow/tests/unit/test_group.py +4 -2
hpcflow/tests/unit/test_input_source.py +116 -93
hpcflow/tests/unit/test_input_value.py +29 -24
hpcflow/tests/unit/test_jobscript_unit.py +757 -0
hpcflow/tests/unit/test_json_like.py +44 -35
hpcflow/tests/unit/test_loop.py +1396 -84
hpcflow/tests/unit/test_meta_task.py +325 -0
hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
hpcflow/tests/unit/test_object_list.py +17 -12
hpcflow/tests/unit/test_parameter.py +29 -7
hpcflow/tests/unit/test_persistence.py +237 -42
hpcflow/tests/unit/test_resources.py +20 -18
hpcflow/tests/unit/test_run.py +117 -6
hpcflow/tests/unit/test_run_directories.py +29 -0
hpcflow/tests/unit/test_runtime.py +2 -1
hpcflow/tests/unit/test_schema_input.py +23 -15
hpcflow/tests/unit/test_shell.py +23 -2
hpcflow/tests/unit/test_slurm.py +8 -7
hpcflow/tests/unit/test_submission.py +38 -89
hpcflow/tests/unit/test_task.py +352 -247
hpcflow/tests/unit/test_task_schema.py +33 -20
hpcflow/tests/unit/test_utils.py +9 -11
hpcflow/tests/unit/test_value_sequence.py +15 -12
hpcflow/tests/unit/test_workflow.py +114 -83
hpcflow/tests/unit/test_workflow_template.py +0 -1
hpcflow/tests/unit/utils/test_arrays.py +40 -0
hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
hpcflow/tests/unit/utils/test_hashing.py +65 -0
hpcflow/tests/unit/utils/test_patches.py +5 -0
hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
hpcflow/tests/workflows/__init__.py +0 -0
hpcflow/tests/workflows/test_directory_structure.py +31 -0
hpcflow/tests/workflows/test_jobscript.py +334 -1
hpcflow/tests/workflows/test_run_status.py +198 -0
hpcflow/tests/workflows/test_skip_downstream.py +696 -0
hpcflow/tests/workflows/test_submission.py +140 -0
hpcflow/tests/workflows/test_workflows.py +160 -15
hpcflow/tests/workflows/test_zip.py +18 -0
hpcflow/viz_demo.ipynb +6587 -3
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +8 -4
hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
hpcflow/sdk/core/parallel.py +0 -21
hpcflow_new2-0.2.0a189.dist-info/RECORD +0 -158
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
{hpcflow_new2-0.2.0a189.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0

hpcflow/sdk/persistence/zarr.py CHANGED Viewed

@@ -7,19 +7,24 @@ from __future__ import annotations
 import copy
 from contextlib import contextmanager
 from dataclasses import dataclass
-from datetime import datetime
 from pathlib import Path
+from typing import Any, cast, TYPE_CHECKING
+from typing_extensions import override
 import shutil
 import time
-from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
 import numpy as np
-import zarr
-from fsspec.implementations.zip import ZipFileSystem
+from numpy.ma.core import MaskedArray
+import zarr  # type: ignore
+from zarr.errors import BoundsCheckError  # type: ignore
+from zarr.storage import DirectoryStore, FSStore  # type: ignore
+from fsspec.implementations.zip import ZipFileSystem  # type: ignore
 from rich.console import Console
-from numcodecs import MsgPack, VLenArray, blosc, Blosc, Zstd
-from reretry import retry
+from numcodecs import MsgPack, VLenArray, blosc, Blosc, Zstd  # type: ignore
+from reretry import retry  # type: ignore
+from hpcflow.sdk.typing import hydrate
+from hpcflow.sdk.core import RUN_DIR_ARR_DTYPE, RUN_DIR_ARR_FILL
 from hpcflow.sdk.core.errors import (
     MissingParameterData,
     MissingStoreEARError,
@@ -38,18 +43,60 @@ from hpcflow.sdk.persistence.base import (
     StoreParameter,
     StoreTask,
 )
+from hpcflow.sdk.persistence.types import (
+    LoopDescriptor,
+    StoreCreationInfo,
+    TemplateMeta,
+    ZarrAttrsDict,
+)
 from hpcflow.sdk.persistence.store_resource import ZarrAttrsStoreResource
 from hpcflow.sdk.persistence.utils import ask_pw_on_auth_exc
 from hpcflow.sdk.persistence.pending import CommitResourceMap
 from hpcflow.sdk.persistence.base import update_param_source_dict
 from hpcflow.sdk.log import TimeIt
+from hpcflow.sdk.submission.submission import (
+    JOBSCRIPT_SUBMIT_TIME_KEYS,
+    SUBMISSION_SUBMIT_TIME_KEYS,
+)
+from hpcflow.sdk.utils.arrays import get_2D_idx, split_arr
+from hpcflow.sdk.utils.strings import shorten_list_str
+if TYPE_CHECKING:
+    from collections.abc import (
+        Callable,
+        Iterable,
+        Iterator,
+        Mapping,
+        MutableMapping,
+        Sequence,
+    )
+    from datetime import datetime
+    from fsspec import AbstractFileSystem  # type: ignore
+    from logging import Logger
+    from typing import ClassVar
+    from typing_extensions import Self, TypeAlias
+    from numpy.typing import NDArray
+    from zarr import Array, Group  # type: ignore
+    from zarr.attrs import Attributes  # type: ignore
+    from zarr.storage import Store  # type: ignore
+    from ..submission.types import ResolvedJobscriptBlockDependencies
+    from .types import TypeLookup
+    from ..app import BaseApp
+    from ..core.json_like import JSONed, JSONDocument
+    from ..typing import ParamSource, PathLike, DataIndex
+#: List of any (Zarr-serializable) value.
+ListAny: TypeAlias = "list[Any]"
+#: Zarr attribute mapping context.
+ZarrAttrs: TypeAlias = "dict[str, Any]"
+_JS: TypeAlias = "dict[str, list[dict[str, dict]]]"
 blosc.use_threads = False  # hpcflow is a multiprocess program in general
 @TimeIt.decorator
-def _zarr_get_coord_selection(arr, selection, logger):
+def _zarr_get_coord_selection(arr: Array, selection: Any, logger: Logger):
     @retry(
         RuntimeError,
         tries=10,
@@ -59,53 +106,84 @@ def _zarr_get_coord_selection(arr, selection, logger):
         logger=logger,
     )
     @TimeIt.decorator
-    def _inner(arr, selection):
+    def _inner(arr: Array, selection: Any):
         return arr.get_coordinate_selection(selection)
     return _inner(arr, selection)
-def _encode_numpy_array(obj, type_lookup, path, root_group, arr_path):
+def _encode_numpy_array(
+    obj: NDArray,
+    type_lookup: TypeLookup,
+    path: list[int],
+    root_group: Group,
+    arr_path: list[int],
+) -> int:
     # Might need to generate new group:
     param_arr_group = root_group.require_group(arr_path)
-    names = [int(i.split("arr_")[1]) for i in param_arr_group.keys()]
-    if not names:
-        new_idx = 0
-    else:
-        new_idx = max(names) + 1
+    new_idx = (
+        max((int(i.removeprefix("arr_")) for i in param_arr_group.keys()), default=-1) + 1
+    )
     param_arr_group.create_dataset(name=f"arr_{new_idx}", data=obj)
     type_lookup["arrays"].append([path, new_idx])
     return len(type_lookup["arrays"]) - 1
-def _decode_numpy_arrays(obj, type_lookup, path, arr_group, dataset_copy):
-    for arr_path, arr_idx in type_lookup["arrays"]:
+def _decode_numpy_arrays(
+    obj: dict | None,
+    type_lookup: TypeLookup,
+    path: list[int],
+    arr_group: Group,
+    dataset_copy: bool,
+):
+    # Yuck! Type lies! Zarr's internal types are not modern Python types.
+    arrays = cast("Iterable[tuple[list[int], int]]", type_lookup.get("arrays", []))
+    obj_: dict | NDArray | None = obj
+    for arr_path, arr_idx in arrays:
         try:
             rel_path = get_relative_path(arr_path, path)
         except ValueError:
             continue
-        dataset = arr_group.get(f"arr_{arr_idx}")
+        dataset: NDArray = arr_group.get(f"arr_{arr_idx}")
         if dataset_copy:
             dataset = dataset[:]
         if rel_path:
-            set_in_container(obj, rel_path, dataset)
+            set_in_container(obj_, rel_path, dataset)
         else:
-            obj = dataset
+            obj_ = dataset
-    return obj
+    return obj_
-def _encode_masked_array(obj, type_lookup, path, root_group, arr_path):
+def _encode_masked_array(
+    obj: MaskedArray,
+    type_lookup: TypeLookup,
+    path: list[int],
+    root_group: Group,
+    arr_path: list[int],
+):
     data_idx = _encode_numpy_array(obj.data, type_lookup, path, root_group, arr_path)
     mask_idx = _encode_numpy_array(obj.mask, type_lookup, path, root_group, arr_path)
     type_lookup["masked_arrays"].append([path, [data_idx, mask_idx]])
-def _decode_masked_arrays(obj, type_lookup, path, arr_group, dataset_copy):
-    for arr_path, (data_idx, mask_idx) in type_lookup["masked_arrays"]:
+def _decode_masked_arrays(
+    obj: dict,
+    type_lookup: TypeLookup,
+    path: list[int],
+    arr_group: Group,
+    dataset_copy: bool,
+):
+    # Yuck! Type lies! Zarr's internal types are not modern Python types.
+    masked_arrays = cast(
+        "Iterable[tuple[list[int], tuple[int, int]]]",
+        type_lookup.get("masked_arrays", []),
+    )
+    obj_: dict | MaskedArray = obj
+    for arr_path, (data_idx, mask_idx) in masked_arrays:
         try:
             rel_path = get_relative_path(arr_path, path)
         except ValueError:
@@ -113,17 +191,17 @@ def _decode_masked_arrays(obj, type_lookup, path, arr_group, dataset_copy):
         data = arr_group.get(f"arr_{data_idx}")
         mask = arr_group.get(f"arr_{mask_idx}")
-        dataset = np.ma.core.MaskedArray(data=data, mask=mask)
+        dataset: MaskedArray = MaskedArray(data=data, mask=mask)
         if rel_path:
-            set_in_container(obj, rel_path, dataset)
+            set_in_container(obj_, rel_path, dataset)
         else:
-            obj = dataset
+            obj_ = dataset
-    return obj
+    return obj_
-def append_items_to_ragged_array(arr, items):
+def append_items_to_ragged_array(arr: Array, items: Sequence[int]):
     """Append an array to a Zarr ragged array.
     I think `arr.append([item])` should work, but does not for some reason, so we do it
@@ -135,36 +213,39 @@ def append_items_to_ragged_array(arr, items):
 @dataclass
-class ZarrStoreTask(StoreTask):
+class ZarrStoreTask(StoreTask[dict]):
     """
     Represents a task in a Zarr persistent store.
     """
-    def encode(self) -> Tuple[int, np.ndarray, Dict]:
+    @override
+    def encode(self) -> tuple[int, dict, dict[str, Any]]:
         """Prepare store task data for the persistent store."""
         wk_task = {"id_": self.id_, "element_IDs": np.array(self.element_IDs)}
-        task = {"id_": self.id_, **self.task_template}
+        task = {"id_": self.id_, **(self.task_template or {})}
         return self.index, wk_task, task
+    @override
     @classmethod
-    def decode(cls, task_dat: Dict) -> ZarrStoreTask:
+    def decode(cls, task_dat: dict) -> Self:
         """Initialise a `StoreTask` from persistent task data"""
         task_dat["element_IDs"] = task_dat["element_IDs"].tolist()
-        return super().decode(task_dat)
+        return cls(is_pending=False, **task_dat)
 @dataclass
-class ZarrStoreElement(StoreElement):
+class ZarrStoreElement(StoreElement[ListAny, ZarrAttrs]):
     """
     Represents an element in a Zarr persistent store.
     """
-    def encode(self, attrs: Dict) -> List:
+    @override
+    def encode(self, attrs: ZarrAttrs) -> ListAny:
         """Prepare store elements data for the persistent store.
         This method mutates `attrs`.
         """
-        elem_enc = [
+        return [
             self.id_,
             self.index,
             self.es_idx,
@@ -173,10 +254,10 @@ class ZarrStoreElement(StoreElement):
             self.task_ID,
             self.iteration_IDs,
         ]
-        return elem_enc
+    @override
     @classmethod
-    def decode(cls, elem_dat: List, attrs: Dict) -> ZarrStoreElement:
+    def decode(cls, elem_dat: ListAny, attrs: ZarrAttrs) -> Self:
         """Initialise a `StoreElement` from persistent element data"""
         obj_dat = {
             "id_": elem_dat[0],
@@ -191,21 +272,22 @@ class ZarrStoreElement(StoreElement):
 @dataclass
-class ZarrStoreElementIter(StoreElementIter):
+class ZarrStoreElementIter(StoreElementIter[ListAny, ZarrAttrs]):
     """
     Represents an element iteration in a Zarr persistent store.
     """
-    def encode(self, attrs: Dict) -> List:
+    @override
+    def encode(self, attrs: ZarrAttrs) -> ListAny:
         """Prepare store element iteration data for the persistent store.
         This method mutates `attrs`.
         """
-        iter_enc = [
+        return [
             self.id_,
             self.element_ID,
             int(self.EARs_initialised),
-            [[k, v] for k, v in self.EAR_IDs.items()] if self.EAR_IDs else None,
+            [[ek, ev] for ek, ev in self.EAR_IDs.items()] if self.EAR_IDs else None,
             [
                 [ensure_in(dk, attrs["parameter_paths"]), dv]
                 for dk, dv in self.data_idx.items()
@@ -213,11 +295,11 @@ class ZarrStoreElementIter(StoreElementIter):
             [ensure_in(i, attrs["schema_parameters"]) for i in self.schema_parameters],
             [[ensure_in(dk, attrs["loops"]), dv] for dk, dv in self.loop_idx.items()],
         ]
-        return iter_enc
+    @override
     @classmethod
-    def decode(cls, iter_dat: List, attrs: Dict) -> StoreElementIter:
-        """Initialise a `StoreElementIter` from persistent element iteration data"""
+    def decode(cls, iter_dat: ListAny, attrs: ZarrAttrs) -> Self:
+        """Initialise a `ZarrStoreElementIter` from persistent element iteration data"""
         obj_dat = {
             "id_": iter_dat[0],
             "element_ID": iter_dat[1],
@@ -231,17 +313,18 @@ class ZarrStoreElementIter(StoreElementIter):
 @dataclass
-class ZarrStoreEAR(StoreEAR):
+class ZarrStoreEAR(StoreEAR[ListAny, ZarrAttrs]):
     """
     Represents an element action run in a Zarr persistent store.
     """
-    def encode(self, attrs: Dict, ts_fmt: str) -> Tuple[List, Tuple[np.datetime64]]:
+    @override
+    def encode(self, ts_fmt: str, attrs: ZarrAttrs) -> ListAny:
         """Prepare store EAR data for the persistent store.
         This method mutates `attrs`.
         """
-        EAR_enc = [
+        return [
             self.id_,
             self.elem_iter_ID,
             self.action_idx,
@@ -260,11 +343,13 @@ class ZarrStoreEAR(StoreEAR):
             self.metadata,
             self.run_hostname,
             self.commands_idx,
+            self.port_number,
+            self.commands_file_ID,
         ]
-        return EAR_enc
+    @override
     @classmethod
-    def decode(cls, EAR_dat: List, attrs: Dict, ts_fmt: str) -> ZarrStoreEAR:
+    def decode(cls, EAR_dat: ListAny, ts_fmt: str, attrs: ZarrAttrs) -> Self:
         """Initialise a `ZarrStoreEAR` from persistent EAR data"""
         obj_dat = {
             "id_": EAR_dat[0],
@@ -282,55 +367,44 @@ class ZarrStoreEAR(StoreEAR):
             "metadata": EAR_dat[12],
             "run_hostname": EAR_dat[13],
             "commands_idx": EAR_dat[14],
+            "port_number": EAR_dat[15],
+            "commands_file_ID": EAR_dat[16],
         }
         return cls(is_pending=False, **obj_dat)
 @dataclass
+@hydrate
 class ZarrStoreParameter(StoreParameter):
     """
     Represents a parameter in a Zarr persistent store.
     """
-    _encoders = {  # keys are types
+    _encoders: ClassVar[dict[type, Callable]] = {  # keys are types
         np.ndarray: _encode_numpy_array,
-        np.ma.core.MaskedArray: _encode_masked_array,
+        MaskedArray: _encode_masked_array,
     }
-    _decoders = {  # keys are keys in type_lookup
+    _decoders: ClassVar[dict[str, Callable]] = {  # keys are keys in type_lookup
         "arrays": _decode_numpy_arrays,
         "masked_arrays": _decode_masked_arrays,
     }
-    def encode(self, root_group: zarr.Group, arr_path: str) -> Dict[str, Any]:
-        return super().encode(root_group=root_group, arr_path=arr_path)
-    @classmethod
-    def decode(
-        cls,
-        id_: int,
-        data: Union[None, Dict],
-        source: Dict,
-        arr_group: zarr.Group,
-        path: Optional[List[str]] = None,
-        dataset_copy: bool = False,
-    ) -> Any:
-        return super().decode(
-            id_=id_,
-            data=data,
-            source=source,
-            path=path,
-            arr_group=arr_group,
-            dataset_copy=dataset_copy,
-        )
-class ZarrPersistentStore(PersistentStore):
+class ZarrPersistentStore(
+    PersistentStore[
+        ZarrStoreTask,
+        ZarrStoreElement,
+        ZarrStoreElementIter,
+        ZarrStoreEAR,
+        ZarrStoreParameter,
+    ]
+):
     """
     A persistent store implemented using Zarr.
     """
-    _name = "zarr"
-    _features = PersistentStoreFeatures(
+    _name: ClassVar[str] = "zarr"
+    _features: ClassVar[PersistentStoreFeatures] = PersistentStoreFeatures(
         create=True,
         edit=True,
         jobscript_parallelism=True,
@@ -339,39 +413,82 @@ class ZarrPersistentStore(PersistentStore):
         submission=True,
     )
-    _store_task_cls = ZarrStoreTask
-    _store_elem_cls = ZarrStoreElement
-    _store_iter_cls = ZarrStoreElementIter
-    _store_EAR_cls = ZarrStoreEAR
-    _store_param_cls = ZarrStoreParameter
-    _param_grp_name = "parameters"
-    _param_base_arr_name = "base"
-    _param_sources_arr_name = "sources"
-    _param_user_arr_grp_name = "arrays"
-    _param_data_arr_grp_name = lambda _, param_idx: f"param_{param_idx}"
-    _task_arr_name = "tasks"
-    _elem_arr_name = "elements"
-    _iter_arr_name = "iters"
-    _EAR_arr_name = "runs"
-    _time_res = "us"  # microseconds; must not be smaller than micro!
-    _res_map = CommitResourceMap(commit_template_components=("attrs",))
-    def __init__(self, app, workflow, path, fs) -> None:
+    @classmethod
+    def _store_task_cls(cls) -> type[ZarrStoreTask]:
+        return ZarrStoreTask
+    @classmethod
+    def _store_elem_cls(cls) -> type[ZarrStoreElement]:
+        return ZarrStoreElement
+    @classmethod
+    def _store_iter_cls(cls) -> type[ZarrStoreElementIter]:
+        return ZarrStoreElementIter
+    @classmethod
+    def _store_EAR_cls(cls) -> type[ZarrStoreEAR]:
+        return ZarrStoreEAR
+    @classmethod
+    def _store_param_cls(cls) -> type[ZarrStoreParameter]:
+        return ZarrStoreParameter
+    _param_grp_name: ClassVar[str] = "parameters"
+    _param_base_arr_name: ClassVar[str] = "base"
+    _param_sources_arr_name: ClassVar[str] = "sources"
+    _param_user_arr_grp_name: ClassVar[str] = "arrays"
+    _param_data_arr_grp_name: ClassVar = lambda _, param_idx: f"param_{param_idx}"
+    _subs_md_group_name: ClassVar[str] = "submissions"
+    _task_arr_name: ClassVar[str] = "tasks"
+    _elem_arr_name: ClassVar[str] = "elements"
+    _iter_arr_name: ClassVar[str] = "iters"
+    _EAR_arr_name: ClassVar[str] = "runs"
+    _run_dir_arr_name: ClassVar[str] = "run_dirs"
+    _js_at_submit_md_arr_name: ClassVar[str] = "js_at_submit_md"
+    _js_run_IDs_arr_name: ClassVar[str] = "js_run_IDs"
+    _js_task_elems_arr_name: ClassVar[str] = "js_task_elems"
+    _js_task_acts_arr_name: ClassVar[str] = "js_task_acts"
+    _js_deps_arr_name: ClassVar[str] = "js_deps"
+    _time_res: ClassVar[str] = "us"  # microseconds; must not be smaller than micro!
+    _res_map: ClassVar[CommitResourceMap] = CommitResourceMap(
+        commit_template_components=("attrs",)
+    )
+    def __init__(self, app, workflow, path: str | Path, fs: AbstractFileSystem) -> None:
         self._zarr_store = None  # assigned on first access to `zarr_store`
         self._resources = {
             "attrs": ZarrAttrsStoreResource(
                 app, name="attrs", open_call=self._get_root_group
             ),
         }
+        self._jobscript_at_submit_metadata: dict[
+            int, dict[str, Any]
+        ] = {}  # this is a cache
+        # these are caches; keys are submission index and then tuples of
+        # (jobscript index, jobscript-block index):
+        self._jobscript_run_ID_arrays: dict[int, dict[tuple[int, int], NDArray]] = {}
+        self._jobscript_task_element_maps: dict[
+            int, dict[tuple[int, int], dict[int, list[int]]]
+        ] = {}
+        self._jobscript_task_actions_arrays: dict[
+            int, dict[tuple[int, int], NDArray]
+        ] = {}
+        self._jobscript_dependencies: dict[
+            int,
+            dict[
+                tuple[int, int], dict[tuple[int, int], ResolvedJobscriptBlockDependencies]
+            ],
+        ] = {}
         super().__init__(app, workflow, path, fs)
     @contextmanager
-    def cached_load(self) -> Iterator[Dict]:
+    def cached_load(self) -> Iterator[None]:
         """Context manager to cache the root attributes."""
         with self.using_resource("attrs", "read") as attrs:
-            yield attrs
+            yield
     def remove_replaced_dir(self) -> None:
         """
@@ -380,8 +497,8 @@ class ZarrPersistentStore(PersistentStore):
         with self.using_resource("attrs", "update") as md:
             if "replaced_workflow" in md:
                 self.logger.debug("removing temporarily renamed pre-existing workflow.")
-                self.remove_path(md["replaced_workflow"], self.fs)
-                md["replaced_workflow"] = None
+                self.remove_path(md["replaced_workflow"])
+                del md["replaced_workflow"]
     def reinstate_replaced_dir(self) -> None:
         """
@@ -392,32 +509,38 @@ class ZarrPersistentStore(PersistentStore):
                 self.logger.debug(
                     "reinstating temporarily renamed pre-existing workflow."
                 )
-                self.rename_path(md["replaced_workflow"], self.path, self.fs)
+                self.rename_path(
+                    md["replaced_workflow"],
+                    self.path,
+                )
     @staticmethod
-    def _get_zarr_store(path: str, fs) -> zarr.storage.Store:
-        return zarr.storage.FSStore(url=path, fs=fs)
+    def _get_zarr_store(path: str | Path, fs: AbstractFileSystem) -> Store:
+        return FSStore(url=str(path), fs=fs)
+    _CODEC: ClassVar = MsgPack()
     @classmethod
     def write_empty_workflow(
         cls,
-        app,
-        template_js: Dict,
-        template_components_js: Dict,
+        app: BaseApp,
+        *,
+        template_js: TemplateMeta,
+        template_components_js: dict[str, Any],
         wk_path: str,
-        fs,
+        fs: AbstractFileSystem,
         name: str,
-        replaced_wk: str,
+        replaced_wk: str | None,
         ts_fmt: str,
         ts_name_fmt: str,
-        creation_info: Dict,
-        compressor: Optional[Union[str, None]] = "blosc",
-        compressor_kwargs: Optional[Dict[str, Any]] = None,
+        creation_info: StoreCreationInfo,
+        compressor: str | None = "blosc",
+        compressor_kwargs: dict[str, Any] | None = None,
     ) -> None:
         """
         Write an empty persistent workflow.
         """
-        attrs = {
+        attrs: ZarrAttrsDict = {
             "name": name,
             "ts_fmt": ts_fmt,
             "ts_name_fmt": ts_name_fmt,
@@ -436,7 +559,11 @@ class ZarrPersistentStore(PersistentStore):
         root = zarr.group(store=store, overwrite=False)
         root.attrs.update(attrs)
-        md = root.create_group("metadata")
+        # use a nested directory store for the metadata group so the runs array
+        # can be stored as a 2D array in nested directories, thereby limiting the maximum
+        # number of files stored in a given directory:
+        md_store = zarr.NestedDirectoryStore(Path(root.store.path).joinpath("metadata"))
+        md = zarr.group(store=md_store)
         compressor_lookup = {
             "blosc": Blosc,
@@ -459,7 +586,7 @@ class ZarrPersistentStore(PersistentStore):
             name=cls._elem_arr_name,
             shape=0,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1000,
             compressor=cmp,
         )
@@ -469,7 +596,7 @@ class ZarrPersistentStore(PersistentStore):
             name=cls._iter_arr_name,
             shape=0,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1000,
             compressor=cmp,
         )
@@ -483,20 +610,31 @@ class ZarrPersistentStore(PersistentStore):
         EARs_arr = md.create_dataset(
             name=cls._EAR_arr_name,
-            shape=0,
+            shape=(0, 1000),
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1,  # single-chunk rows for multiprocess writing
             compressor=cmp,
+            dimension_separator="/",
+        )
+        EARs_arr.attrs.update({"parameter_paths": [], "num_runs": 0})
+        # array for storing indices that can be used to reproduce run directory paths:
+        run_dir_arr = md.create_dataset(
+            name=cls._run_dir_arr_name,
+            shape=0,
+            chunks=10_000,
+            dtype=RUN_DIR_ARR_DTYPE,
+            fill_value=RUN_DIR_ARR_FILL,
+            write_empty_chunks=False,
         )
-        EARs_arr.attrs.update({"parameter_paths": []})
         parameter_data = root.create_group(name=cls._param_grp_name)
         parameter_data.create_dataset(
             name=cls._param_base_arr_name,
             shape=0,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1,
             compressor=cmp,
             write_empty_chunks=False,
@@ -506,15 +644,18 @@ class ZarrPersistentStore(PersistentStore):
             name=cls._param_sources_arr_name,
             shape=0,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1000,  # TODO: check this is a sensible size with many parameters
             compressor=cmp,
         )
         parameter_data.create_group(name=cls._param_user_arr_grp_name)
-    def _append_tasks(self, tasks: List[ZarrStoreTask]):
+        # for storing submission metadata that should not be stored in the root group:
+        md.create_group(name=cls._subs_md_group_name)
+    def _append_tasks(self, tasks: Iterable[ZarrStoreTask]):
         elem_IDs_arr = self._get_tasks_arr(mode="r+")
-        elem_IDs = []
+        elem_IDs: list[int] = []
         with self.using_resource("attrs", "update") as attrs:
             for i_idx, i in enumerate(tasks):
                 idx, wk_task_i, task_i = i.encode()
@@ -529,24 +670,350 @@ class ZarrPersistentStore(PersistentStore):
         # increasing IDs.
         append_items_to_ragged_array(arr=elem_IDs_arr, items=elem_IDs)
-    def _append_loops(self, loops: Dict[int, Dict]):
+    def _append_loops(self, loops: dict[int, LoopDescriptor]):
         with self.using_resource("attrs", action="update") as attrs:
-            for loop_idx, loop in loops.items():
+            for loop in loops.values():
                 attrs["loops"].append(
                     {
                         "num_added_iterations": loop["num_added_iterations"],
                         "iterable_parameters": loop["iterable_parameters"],
+                        "output_parameters": loop["output_parameters"],
                         "parents": loop["parents"],
                     }
                 )
                 attrs["template"]["loops"].append(loop["loop_template"])
-    def _append_submissions(self, subs: Dict[int, Dict]):
+    @staticmethod
+    def _extract_submission_run_IDs_array(
+        sub_js: Mapping[str, JSONed],
+    ) -> tuple[np.ndarray, list[list[list[int]]]]:
+        """For a JSON-like representation of a Submission object, remove and combine all
+        jobscript-block run ID lists into a single array with a fill value.
+        Notes
+        -----
+        This mutates `sub_js`, by setting `EAR_ID` jobscript-block keys to `None`.
+        Parameters
+        ----------
+        sub_js
+            JSON-like representation of a `Submission` object.
+        Returns
+        -------
+        combined_run_IDs
+            Integer Numpy array that contains a concatenation of all 2D run ID arrays
+            from each jobscript-block. Technically a "jagged"/"ragged" array that is made
+            square with a large fill value.
+        block_shapes
+            List of length equal to the number of jobscripts in the submission. Each
+            sub-list contains a list of shapes (as a two-item list:
+            `[num_actions, num_elements]`) of the constituent blocks of that jobscript.
+        """
+        arrs = []
+        max_acts, max_elems = 0, 0
+        # a list for each jobscript, containing shapes of run ID arrays in each block:
+        block_shapes = []
+        for js in cast("Sequence[Mapping[str, JSONed]]", sub_js["jobscripts"]):
+            block_shapes_js_i = []
+            for blk in cast("Sequence[MutableMapping[str, JSONed]]", js["blocks"]):
+                run_IDs_i = np.array(blk["EAR_ID"])
+                blk["EAR_ID"] = None  # TODO: how to type?
+                block_shapes_js_i.append(list(run_IDs_i.shape))
+                if run_IDs_i.shape[0] > max_acts:
+                    max_acts = run_IDs_i.shape[0]
+                if run_IDs_i.shape[1] > max_elems:
+                    max_elems = run_IDs_i.shape[1]
+                arrs.append(run_IDs_i)
+            block_shapes.append(block_shapes_js_i)
+        combined_run_IDs = np.full(
+            (len(arrs), max_acts, max_elems),
+            dtype=np.uint32,
+            fill_value=np.iinfo(np.uint32).max,
+        )
+        for arr_idx, arr in enumerate(arrs):
+            combined_run_IDs[arr_idx][: arr.shape[0], : arr.shape[1]] = arr
+        return combined_run_IDs, block_shapes
+    @staticmethod
+    def _extract_submission_task_elements_array(
+        sub_js: Mapping[str, JSONed],
+    ) -> tuple[np.ndarray, list[list[list[int]]]]:
+        """For a JSON-like representation of a Submission object, remove and combine all
+        jobscript-block task-element mappings into a single array with a fill value.
+        Notes
+        -----
+        This mutates `sub_js`, by setting `task_elements` jobscript-block keys to `None`.
+        Parameters
+        ----------
+        sub_js
+            JSON-like representation of a `Submission` object.
+        Returns
+        -------
+        combined_task_elems
+            Integer Numpy array that contains a concatenation of each task-element,
+            mapping, where each mapping is expressed as a 2D array whose first column
+            corresponds to the keys of the mappings, and whose remaining columns
+            correspond to the values of the mappings. Technically a "jagged"/"ragged"
+            array that is made square with a large fill value.
+        block_shapes
+            List of length equal to the number of jobscripts in the submission. Each
+            sub-list contains a list of shapes (as a two-item list:
+            `[num_actions, num_elements]`) of the constituent blocks of that jobscript.
+        """
+        arrs = []
+        max_x, max_y = 0, 0
+        # a list for each jobscript, containing shapes of run ID arrays in each block:
+        block_shapes = []
+        for js in cast("Sequence[Mapping[str, JSONed]]", sub_js["jobscripts"]):
+            block_shapes_js_i = []
+            for blk in cast("Sequence[MutableMapping[str, JSONed]]", js["blocks"]):
+                task_elems_lst = []
+                for k, v in cast("Mapping[int, list[int]]", blk["task_elements"]).items():
+                    task_elems_lst.append([k] + v)
+                task_elems_i = np.array(task_elems_lst)
+                block_shape_j = [task_elems_i.shape[1] - 1, task_elems_i.shape[0]]
+                block_shapes_js_i.append(block_shape_j)
+                blk["task_elements"] = None  # TODO: how to type?
+                if task_elems_i.shape[1] > max_x:
+                    max_x = task_elems_i.shape[1]
+                if task_elems_i.shape[0] > max_y:
+                    max_y = task_elems_i.shape[0]
+                arrs.append(task_elems_i)
+            block_shapes.append(block_shapes_js_i)
+        combined_task_elems = np.full(
+            (len(arrs), max_y, max_x),
+            dtype=np.uint32,
+            fill_value=np.iinfo(np.uint32).max,
+        )
+        for arr_idx, arr in enumerate(arrs):
+            combined_task_elems[arr_idx][: arr.shape[0], : arr.shape[1]] = arr
+        return combined_task_elems, block_shapes
+    @staticmethod
+    def _extract_submission_task_actions_array(
+        sub_js: Mapping[str, JSONed],
+    ) -> tuple[np.ndarray, list[list[int]]]:
+        """For a JSON-like representation of a Submission object, remove and concatenate
+        all jobscript-block task-action arrays into a single array.
+        Notes
+        -----
+        This mutates `sub_js`, by setting `task_actions` jobscript-block keys to `None`.
+        Parameters
+        ----------
+        sub_js
+            JSON-like representation of a `Submission` object.
+        Returns
+        -------
+        combined_task_acts
+            Integer 2D Numpy array which is a concatenation along the first axis of
+            task-action actions from all jobscript blocks. The second dimension is of
+            length three.
+        block_num_acts
+            List of length equal to the number of jobscripts in the submission. Each
+            sub-list contains a list of `num_actions` of the constituent blocks of that
+            jobscript.
+        """
+        arrs = []
+        # a list for each jobscript, containing shapes of run ID arrays in each block:
+        blk_num_acts = []
+        for js in cast("Sequence[Mapping[str, JSONed]]", sub_js["jobscripts"]):
+            blk_num_acts_js_i = []
+            for blk in cast("Sequence[MutableMapping[str, JSONed]]", js["blocks"]):
+                blk_acts = np.array(blk["task_actions"])
+                blk["task_actions"] = None  # TODO: how to type?
+                blk_num_acts_js_i.append(blk_acts.shape[0])
+                arrs.append(blk_acts)
+            blk_num_acts.append(blk_num_acts_js_i)
+        combined_task_acts = np.vstack(arrs)
+        return combined_task_acts, blk_num_acts
+    @staticmethod
+    def _encode_jobscript_block_dependencies(sub_js: Mapping[str, JSONed]) -> np.ndarray:
+        """For a JSON-like representation of a Submission object, remove jobscript-block
+        dependencies for all jobscripts and transform to a single 1D integer array, that
+        can be transformed back by `_decode_jobscript_block_dependencies`.
+        Notes
+        -----
+        This mutates `sub_js`, by setting `depdendencies` jobscript-block keys to `None`.
+        """
+        # TODO: avoid this horrible mess of casts
+        all_deps_arr = []
+        assert sub_js["jobscripts"] is not None
+        for js in cast("Sequence[Mapping[str, JSONed]]", sub_js["jobscripts"]):
+            for blk in cast("Sequence[MutableMapping[str, JSONed]]", js["blocks"]):
+                all_deps_i: list[int] = []
+                assert blk["dependencies"] is not None
+                blk_deps = cast(
+                    "list[tuple[tuple[int, int], Mapping[str, JSONed]]]",
+                    blk["dependencies"],
+                )
+                for (dep_js_idx, dep_blk_idx), dep in blk_deps:
+                    deps_arr: list[int] = []
+                    for elem_i, elements_j in cast(
+                        "Mapping[int, Sequence[int]]", dep["js_element_mapping"]
+                    ).items():
+                        deps_arr.extend([len(elements_j) + 1, elem_i] + list(elements_j))
+                    blk_arr = [
+                        dep_js_idx,
+                        dep_blk_idx,
+                        int(cast("bool", dep["is_array"])),
+                    ] + deps_arr
+                    blk_arr = [len(blk_arr)] + blk_arr
+                    all_deps_i.extend(blk_arr)
+                all_deps_i = [
+                    cast("int", js["index"]),
+                    cast("int", blk["index"]),
+                ] + all_deps_i
+                blk["dependencies"] = None  # TODO: how to type?
+                all_deps_arr.extend([len(all_deps_i)] + all_deps_i)
+        return np.array(all_deps_arr)
+    @staticmethod
+    def _decode_jobscript_block_dependencies(
+        arr: np.ndarray,
+    ) -> dict[tuple[int, int], dict[tuple[int, int], ResolvedJobscriptBlockDependencies]]:
+        """Re-generate jobscript-block dependencies that have been transformed by
+        `_encode_jobscript_block_dependencies` into a single 1D integer array.
+        Parameters
+        ----------
+        arr:
+            The 1D integer array to transform back to a verbose jobscript-block dependency
+            mapping.
+        """
+        # metadata is js/blk_idx for which the dependencies are stored:
+        block_arrs = split_arr(arr, metadata_size=2)
+        block_deps = {}
+        for i in block_arrs:
+            js_idx: int
+            blk_idx: int
+            dep_js_idx: int
+            dep_blk_idx: int
+            is_array: int
+            js_idx, blk_idx = i[0]
+            # metadata is js/blk_idx that this block depends on, plus whether the
+            # dependency is an array dependency:
+            deps_arrs = split_arr(i[1], metadata_size=3)
+            all_deps_ij: dict[tuple[int, int], ResolvedJobscriptBlockDependencies] = {}
+            for j in deps_arrs:
+                dep_js_idx, dep_blk_idx, is_array = j[0]
+                # no metadata:
+                elem_deps = split_arr(j[1], metadata_size=0)
+                all_deps_ij[(dep_js_idx, dep_blk_idx)] = {
+                    "js_element_mapping": {},
+                    "is_array": bool(is_array),
+                }
+                for k in elem_deps:
+                    all_deps_ij[(dep_js_idx, dep_blk_idx)]["js_element_mapping"].update(
+                        {k[1][0]: list(k[1][1:])}
+                    )
+            block_deps[(js_idx, blk_idx)] = all_deps_ij
+        return block_deps
+    def _append_submissions(self, subs: dict[int, Mapping[str, JSONed]]):
+        for sub_idx, sub_i in subs.items():
+            # add a new metadata group for this submission:
+            sub_grp = self._get_all_submissions_metadata_group(mode="r+").create_group(
+                sub_idx
+            )
+            # add a new at-submit metadata array for jobscripts of this submission:
+            num_js = len(cast("list", sub_i["jobscripts"]))
+            sub_grp.create_dataset(
+                name=self._js_at_submit_md_arr_name,
+                shape=num_js,
+                dtype=object,
+                object_codec=MsgPack(),
+                chunks=1,
+                write_empty_chunks=False,
+            )
+            # add a new array to store run IDs for each jobscript:
+            combined_run_IDs, block_shapes = self._extract_submission_run_IDs_array(sub_i)
+            run_IDs_arr = sub_grp.create_dataset(
+                name=self._js_run_IDs_arr_name,
+                data=combined_run_IDs,
+                chunks=(None, None, None),  # single chunk for the whole array
+            )
+            run_IDs_arr.attrs["block_shapes"] = block_shapes
+            # add a new array to store task-element map for each jobscript:
+            (
+                combined_task_elems,
+                block_shapes,
+            ) = self._extract_submission_task_elements_array(sub_i)
+            task_elems_arr = sub_grp.create_dataset(
+                name=self._js_task_elems_arr_name,
+                data=combined_task_elems,
+                chunks=(None, None, None),
+            )
+            task_elems_arr.attrs["block_shapes"] = block_shapes
+            # add a new array to store task-actions for each jobscript:
+            (
+                combined_task_acts,
+                block_num_acts,
+            ) = self._extract_submission_task_actions_array(sub_i)
+            task_acts_arr = sub_grp.create_dataset(
+                name=self._js_task_acts_arr_name,
+                data=combined_task_acts,
+                chunks=(None, None),
+            )
+            task_acts_arr.attrs["block_num_acts"] = block_num_acts
+            # add a new array to store jobscript-block dependencies for this submission:
+            sub_grp.create_dataset(
+                name=self._js_deps_arr_name,
+                data=self._encode_jobscript_block_dependencies(sub_i),
+                chunks=(None,),
+            )
+            # TODO: store block shapes in `grp.attrs` since it is defined at the
+            # submission level
+            # add attributes for at-submit-time submission metadata:
+            grp = self._get_submission_metadata_group(sub_idx, mode="r+")
+            grp.attrs["submission_parts"] = {}
         with self.using_resource("attrs", action="update") as attrs:
-            for sub_idx, sub_i in subs.items():
-                attrs["submissions"].append(sub_i)
+            attrs["submissions"].extend(subs.values())
-    def _append_task_element_IDs(self, task_ID: int, elem_IDs: List[int]):
+    def _append_task_element_IDs(self, task_ID: int, elem_IDs: list[int]):
         # I don't think there's a way to "append" to an existing array in a zarr ragged
         # array? So we have to build a new array from existing + new.
         arr = self._get_tasks_arr(mode="r+")
@@ -554,169 +1021,262 @@ class ZarrPersistentStore(PersistentStore):
         elem_IDs_new = np.concatenate((elem_IDs_cur, elem_IDs))
         arr[task_ID] = elem_IDs_new
-    def _append_elements(self, elems: List[ZarrStoreElement]):
-        arr = self._get_elements_arr(mode="r+")
-        attrs_orig = arr.attrs.asdict()
+    @staticmethod
+    def __as_dict(attrs: Attributes) -> ZarrAttrs:
+        """
+        Type thunk to work around incomplete typing in zarr.
+        """
+        return cast("ZarrAttrs", attrs.asdict())
+    @contextmanager
+    def __mutate_attrs(self, arr: Array) -> Iterator[ZarrAttrs]:
+        attrs_orig = self.__as_dict(arr.attrs)
         attrs = copy.deepcopy(attrs_orig)
-        arr_add = np.empty((len(elems)), dtype=object)
-        arr_add[:] = [i.encode(attrs) for i in elems]
-        arr.append(arr_add)
+        yield attrs
         if attrs != attrs_orig:
             arr.attrs.put(attrs)
-    def _append_element_sets(self, task_id: int, es_js: List[Dict]):
+    def _append_elements(self, elems: Sequence[ZarrStoreElement]):
+        arr = self._get_elements_arr(mode="r+")
+        with self.__mutate_attrs(arr) as attrs:
+            arr_add = np.empty((len(elems)), dtype=object)
+            arr_add[:] = [elem.encode(attrs) for elem in elems]
+            arr.append(arr_add)
+    def _append_element_sets(self, task_id: int, es_js: Sequence[Mapping]):
         task_idx = task_idx = self._get_task_id_to_idx_map()[task_id]
         with self.using_resource("attrs", "update") as attrs:
             attrs["template"]["tasks"][task_idx]["element_sets"].extend(es_js)
-    def _append_elem_iter_IDs(self, elem_ID: int, iter_IDs: List[int]):
+    def _append_elem_iter_IDs(self, elem_ID: int, iter_IDs: Iterable[int]):
         arr = self._get_elements_arr(mode="r+")
-        attrs = arr.attrs.asdict()
-        elem_dat = arr[elem_ID]
+        attrs = self.__as_dict(arr.attrs)
+        elem_dat = cast("list", arr[elem_ID])
         store_elem = ZarrStoreElement.decode(elem_dat, attrs)
         store_elem = store_elem.append_iteration_IDs(iter_IDs)
-        arr[elem_ID] = store_elem.encode(
-            attrs
-        )  # attrs shouldn't be mutated (TODO: test!)
+        arr[elem_ID] = store_elem.encode(attrs)
+        # attrs shouldn't be mutated (TODO: test!)
-    def _append_elem_iters(self, iters: List[ZarrStoreElementIter]):
+    def _append_elem_iters(self, iters: Sequence[ZarrStoreElementIter]):
         arr = self._get_iters_arr(mode="r+")
-        attrs_orig = arr.attrs.asdict()
-        attrs = copy.deepcopy(attrs_orig)
-        arr_add = np.empty((len(iters)), dtype=object)
-        arr_add[:] = [i.encode(attrs) for i in iters]
-        arr.append(arr_add)
-        if attrs != attrs_orig:
-            arr.attrs.put(attrs)
+        with self.__mutate_attrs(arr) as attrs:
+            arr_add = np.empty((len(iters)), dtype=object)
+            arr_add[:] = [i.encode(attrs) for i in iters]
+            arr.append(arr_add)
-    def _append_elem_iter_EAR_IDs(self, iter_ID: int, act_idx: int, EAR_IDs: List[int]):
+    def _append_elem_iter_EAR_IDs(
+        self, iter_ID: int, act_idx: int, EAR_IDs: Sequence[int]
+    ):
         arr = self._get_iters_arr(mode="r+")
-        attrs = arr.attrs.asdict()
-        iter_dat = arr[iter_ID]
+        attrs = self.__as_dict(arr.attrs)
+        iter_dat = cast("list", arr[iter_ID])
         store_iter = ZarrStoreElementIter.decode(iter_dat, attrs)
         store_iter = store_iter.append_EAR_IDs(pend_IDs={act_idx: EAR_IDs})
-        arr[iter_ID] = store_iter.encode(
-            attrs
-        )  # attrs shouldn't be mutated (TODO: test!)
+        arr[iter_ID] = store_iter.encode(attrs)
+        # attrs shouldn't be mutated (TODO: test!)
     def _update_elem_iter_EARs_initialised(self, iter_ID: int):
         arr = self._get_iters_arr(mode="r+")
-        attrs = arr.attrs.asdict()
-        iter_dat = arr[iter_ID]
+        attrs = self.__as_dict(arr.attrs)
+        iter_dat = cast("list", arr[iter_ID])
         store_iter = ZarrStoreElementIter.decode(iter_dat, attrs)
         store_iter = store_iter.set_EARs_initialised()
-        arr[iter_ID] = store_iter.encode(
-            attrs
-        )  # attrs shouldn't be mutated (TODO: test!)
+        arr[iter_ID] = store_iter.encode(attrs)
+        # attrs shouldn't be mutated (TODO: test!)
-    def _append_submission_parts(self, sub_parts: Dict[int, Dict[str, List[int]]]):
-        with self.using_resource("attrs", action="update") as attrs:
-            for sub_idx, sub_i_parts in sub_parts.items():
-                for dt_str, parts_j in sub_i_parts.items():
-                    attrs["submissions"][sub_idx]["submission_parts"][dt_str] = parts_j
+    def _update_at_submit_metadata(
+        self,
+        at_submit_metadata: dict[int, dict[str, Any]],
+    ):
+        for sub_idx, metadata_i in at_submit_metadata.items():
+            grp = self._get_submission_metadata_group(sub_idx, mode="r+")
+            attrs = self.__as_dict(grp.attrs)
+            attrs["submission_parts"].update(metadata_i["submission_parts"])
+            grp.attrs.put(attrs)
+    def _update_loop_index(self, loop_indices: dict[int, dict[str, int]]):
-    def _update_loop_index(self, iter_ID: int, loop_idx: Dict):
         arr = self._get_iters_arr(mode="r+")
-        attrs = arr.attrs.asdict()
-        iter_dat = arr[iter_ID]
-        store_iter = ZarrStoreElementIter.decode(iter_dat, attrs)
-        store_iter = store_iter.update_loop_idx(loop_idx)
-        arr[iter_ID] = store_iter.encode(attrs)
+        attrs = self.__as_dict(arr.attrs)
+        iter_IDs = list(loop_indices.keys())
+        iter_dat = arr.get_coordinate_selection(iter_IDs)
+        store_iters = [ZarrStoreElementIter.decode(i, attrs) for i in iter_dat]
-    def _update_loop_num_iters(self, index: int, num_iters: int):
+        for idx, iter_ID_i in enumerate(iter_IDs):
+            new_iter_i = store_iters[idx].update_loop_idx(loop_indices[iter_ID_i])
+            # seems to be a Zarr bug that prevents `set_coordinate_selection` with an
+            # object array, so set one-by-one:
+            arr[iter_ID_i] = new_iter_i.encode(attrs)
+    def _update_loop_num_iters(self, index: int, num_iters: list[list[list[int] | int]]):
         with self.using_resource("attrs", action="update") as attrs:
             attrs["loops"][index]["num_added_iterations"] = num_iters
-    def _update_loop_parents(self, index: int, parents: List[str]):
+    def _update_loop_parents(self, index: int, parents: list[str]):
         with self.using_resource("attrs", action="update") as attrs:
             attrs["loops"][index]["parents"] = parents
-    def _append_EARs(self, EARs: List[ZarrStoreEAR]):
-        arr = self._get_EARs_arr(mode="r+")
-        attrs_orig = arr.attrs.asdict()
-        attrs = copy.deepcopy(attrs_orig)
-        arr_add = np.empty((len(EARs)), dtype=object)
-        arr_add[:] = [i.encode(attrs, self.ts_fmt) for i in EARs]
-        arr.append(arr_add)
+    def _update_iter_data_indices(self, iter_data_indices: dict[int, DataIndex]):
-        if attrs != attrs_orig:
-            arr.attrs.put(attrs)
+        arr = self._get_iters_arr(mode="r+")
+        attrs = self.__as_dict(arr.attrs)
+        iter_IDs = list(iter_data_indices.keys())
+        iter_dat = arr.get_coordinate_selection(iter_IDs)
+        store_iters = [ZarrStoreElementIter.decode(i, attrs) for i in iter_dat]
-    @TimeIt.decorator
-    def _update_EAR_submission_indices(self, sub_indices: Dict[int:int]):
-        EAR_IDs = list(sub_indices.keys())
-        EARs = self._get_persistent_EARs(EAR_IDs)
+        for idx, iter_ID_i in enumerate(iter_IDs):
+            new_iter_i = store_iters[idx].update_data_idx(iter_data_indices[iter_ID_i])
+            # seems to be a Zarr bug that prevents `set_coordinate_selection` with an
+            # object array, so set one-by-one:
+            arr[iter_ID_i] = new_iter_i.encode(attrs)
+    def _update_run_data_indices(self, run_data_indices: dict[int, DataIndex]):
+        self._update_runs(
+            updates={k: {"data_idx": v} for k, v in run_data_indices.items()}
+        )
+    def _append_EARs(self, EARs: Sequence[ZarrStoreEAR]):
         arr = self._get_EARs_arr(mode="r+")
-        attrs_orig = arr.attrs.asdict()
-        attrs = copy.deepcopy(attrs_orig)
+        with self.__mutate_attrs(arr) as attrs:
+            num_existing = attrs["num_runs"]
+            num_add = len(EARs)
+            num_tot = num_existing + num_add
+            arr_add = np.empty(num_add, dtype=object)
+            arr_add[:] = [i.encode(self.ts_fmt, attrs) for i in EARs]
-        encoded_EARs = []
-        for EAR_ID_i, sub_idx_i in sub_indices.items():
-            new_EAR_i = EARs[EAR_ID_i].update(submission_idx=sub_idx_i)
-            # seems to be a Zarr bug that prevents `set_coordinate_selection` with an
-            # object array, so set one-by-one:
-            arr[EAR_ID_i] = new_EAR_i.encode(attrs, self.ts_fmt)
+            # get new 1D indices:
+            new_idx: NDArray = np.arange(num_existing, num_tot)
-        if attrs != attrs_orig:
-            arr.attrs.put(attrs)
+            # transform to 2D indices:
+            r_idx, c_idx = get_2D_idx(new_idx, num_cols=arr.shape[1])
+            # add rows to accomodate new runs:
+            max_r_idx = np.max(r_idx)
+            if max_r_idx + 1 > arr.shape[0]:
+                arr.resize(max_r_idx + 1, arr.shape[1])
+            # fill in new data:
+            for arr_add_idx_i, (r_idx_i, c_idx_i) in enumerate(zip(r_idx, c_idx)):
+                # seems to be a Zarr bug that prevents `set_coordinate_selection` with an
+                # object array, so set one-by-one:
+                arr[r_idx_i, c_idx_i] = arr_add[arr_add_idx_i]
+            attrs["num_runs"] = num_tot
+        # add more rows to run dirs array:
+        dirs_arr = self._get_dirs_arr(mode="r+")
+        dirs_arr.resize(num_tot)
+    def _set_run_dirs(self, run_dir_arr: np.ndarray, run_idx: np.ndarray):
+        dirs_arr = self._get_dirs_arr(mode="r+")
+        dirs_arr[run_idx] = run_dir_arr
+    @TimeIt.decorator
+    def _update_runs(self, updates: dict[int, dict[str, Any]]):
+        """Update the provided EAR attribute values in the specified existing runs."""
+        run_IDs = list(updates.keys())
+        runs = self._get_persistent_EARs(run_IDs)
-    def _update_EAR_start(self, EAR_id: int, s_time: datetime, s_snap: Dict, s_hn: str):
         arr = self._get_EARs_arr(mode="r+")
-        attrs_orig = arr.attrs.asdict()
-        attrs = copy.deepcopy(attrs_orig)
+        with self.__mutate_attrs(arr) as attrs:
+            # convert to 2D array indices:
+            r_idx, c_idx = get_2D_idx(
+                np.array(list(updates.keys())), num_cols=arr.shape[1]
+            )
+            for ri, ci, rID_i, upd_i in zip(
+                r_idx, c_idx, updates.keys(), updates.values()
+            ):
+                new_run_i = runs[rID_i].update(**upd_i)
+                # seems to be a Zarr bug that prevents `set_coordinate_selection` with an
+                # object array, so set one-by-one:
+                arr[ri, ci] = new_run_i.encode(self.ts_fmt, attrs)
-        EAR_i = self._get_persistent_EARs([EAR_id])[EAR_id]
-        EAR_i = EAR_i.update(
-            start_time=s_time,
-            snapshot_start=s_snap,
-            run_hostname=s_hn,
+    @TimeIt.decorator
+    def _update_EAR_submission_data(self, sub_data: Mapping[int, tuple[int, int | None]]):
+        self._update_runs(
+            updates={
+                k: {"submission_idx": v[0], "commands_file_ID": v[1]}
+                for k, v in sub_data.items()
+            }
         )
-        arr[EAR_id] = EAR_i.encode(attrs, self.ts_fmt)
-        if attrs != attrs_orig:
-            arr.attrs.put(attrs)
+    def _update_EAR_start(
+        self,
+        run_starts: dict[int, tuple[datetime, dict[str, Any] | None, str, int | None]],
+    ):
+        self._update_runs(
+            updates={
+                k: {
+                    "start_time": v[0],
+                    "snapshot_start": v[1],
+                    "run_hostname": v[2],
+                    "port_number": v[3],
+                }
+                for k, v in run_starts.items()
+            }
+        )
     def _update_EAR_end(
-        self, EAR_id: int, e_time: datetime, e_snap: Dict, ext_code: int, success: bool
+        self, run_ends: dict[int, tuple[datetime, dict[str, Any] | None, int, bool]]
     ):
-        arr = self._get_EARs_arr(mode="r+")
-        attrs_orig = arr.attrs.asdict()
-        attrs = copy.deepcopy(attrs_orig)
-        EAR_i = self._get_persistent_EARs([EAR_id])[EAR_id]
-        EAR_i = EAR_i.update(
-            end_time=e_time,
-            snapshot_end=e_snap,
-            exit_code=ext_code,
-            success=success,
+        self._update_runs(
+            updates={
+                k: {
+                    "end_time": v[0],
+                    "snapshot_end": v[1],
+                    "exit_code": v[2],
+                    "success": v[3],
+                }
+                for k, v in run_ends.items()
+            }
         )
-        arr[EAR_id] = EAR_i.encode(attrs, self.ts_fmt)
-        if attrs != attrs_orig:
-            arr.attrs.put(attrs)
+    def _update_EAR_skip(self, skips: dict[int, int]):
+        self._update_runs(updates={k: {"skip": v} for k, v in skips.items()})
-    def _update_EAR_skip(self, EAR_id: int):
-        arr = self._get_EARs_arr(mode="r+")
-        attrs_orig = arr.attrs.asdict()
-        attrs = copy.deepcopy(attrs_orig)
+    def _update_js_metadata(self, js_meta: dict[int, dict[int, dict[str, Any]]]):
-        EAR_i = self._get_persistent_EARs([EAR_id])[EAR_id]
-        EAR_i = EAR_i.update(skip=True)
-        arr[EAR_id] = EAR_i.encode(attrs, self.ts_fmt)
+        arr_keys = JOBSCRIPT_SUBMIT_TIME_KEYS  # these items go to the Zarr array
-        if attrs != attrs_orig:
-            arr.attrs.put(attrs)
+        # split into attributes to save to the root group metadata, and those to save to
+        # the submit-time jobscript metadata array
-    def _update_js_metadata(self, js_meta: Dict):
-        with self.using_resource("attrs", action="update") as attrs:
-            for sub_idx, all_js_md in js_meta.items():
-                for js_idx, js_meta_i in all_js_md.items():
-                    attrs["submissions"][sub_idx]["jobscripts"][js_idx].update(
-                        **js_meta_i
+        grp_dat = {}  # keys are tuples of (sub_idx, js_idx), values are metadata dicts
+        for sub_idx, all_js_md in js_meta.items():
+            js_arr = None
+            for js_idx, js_meta_i in all_js_md.items():
+                grp_dat_i = {k: v for k, v in js_meta_i.items() if k not in arr_keys}
+                if grp_dat_i:
+                    grp_dat[(sub_idx, js_idx)] = grp_dat_i
+                arr_dat = [js_meta_i.get(k) for k in arr_keys]
+                if any(arr_dat):
+                    # we are updating the at-sumbmit metadata, so clear the cache:
+                    self.clear_jobscript_at_submit_metadata_cache()
+                    js_arr = js_arr or self._get_jobscripts_at_submit_metadata_arr(
+                        mode="r+", sub_idx=sub_idx
                     )
+                    self.logger.info(
+                        f"updating submit-time jobscript metadata array: {arr_dat!r}."
+                    )
+                    js_arr[js_idx] = arr_dat
+        if grp_dat:
+            with self.using_resource("attrs", action="update") as attrs:
+                for (sub_idx, js_idx), js_meta_i in grp_dat.items():
+                    self.logger.info(
+                        f"updating jobscript metadata in the root group for "
+                        f"(sub={sub_idx}, js={js_idx}): {js_meta_i!r}."
+                    )
+                    sub = cast(
+                        "dict[str, list[dict[str, Any]]]", attrs["submissions"][sub_idx]
+                    )
+                    sub["jobscripts"][js_idx].update(js_meta_i)
-    def _append_parameters(self, params: List[ZarrStoreParameter]):
+    def _append_parameters(self, params: Sequence[StoreParameter]):
         """Add new persistent parameters."""
         base_arr = self._get_parameter_base_array(mode="r+", write_empty_chunks=False)
         src_arr = self._get_parameter_sources_array(mode="r+")
@@ -725,8 +1285,8 @@ class ZarrPersistentStore(PersistentStore):
         )
         param_encode_root_group = self._get_parameter_user_array_group(mode="r+")
-        param_enc = []
-        src_enc = []
+        param_enc: list[dict[str, Any] | int] = []
+        src_enc: list[dict] = []
         for param_i in params:
             dat_i = param_i.encode(
                 root_group=param_encode_root_group,
@@ -741,16 +1301,15 @@ class ZarrPersistentStore(PersistentStore):
             f"PersistentStore._append_parameters: finished adding {len(params)} parameters."
         )
-    def _set_parameter_values(self, set_parameters: Dict[int, Tuple[Any, bool]]):
+    def _set_parameter_values(self, set_parameters: dict[int, tuple[Any, bool]]):
         """Set multiple unset persistent parameters."""
-        param_ids = list(set_parameters.keys())
+        param_ids = list(set_parameters)
         # the `decode` call in `_get_persistent_parameters` should be quick:
         params = self._get_persistent_parameters(param_ids)
-        new_data = []
+        new_data: list[dict[str, Any] | int] = []
         param_encode_root_group = self._get_parameter_user_array_group(mode="r+")
         for param_id, (value, is_file) in set_parameters.items():
             param_i = params[param_id]
             if is_file:
                 param_i = param_i.set_file(value)
@@ -768,19 +1327,19 @@ class ZarrPersistentStore(PersistentStore):
         base_arr = self._get_parameter_base_array(mode="r+")
         base_arr.set_coordinate_selection(param_ids, new_data)
-    def _update_parameter_sources(self, sources: Dict[int, Dict]):
+    def _update_parameter_sources(self, sources: Mapping[int, ParamSource]):
         """Update the sources of multiple persistent parameters."""
-        param_ids = list(sources.keys())
+        param_ids = list(sources)
         src_arr = self._get_parameter_sources_array(mode="r+")
         existing_sources = src_arr.get_coordinate_selection(param_ids)
-        new_sources = []
-        for idx, source_i in enumerate(sources.values()):
-            new_src_i = update_param_source_dict(existing_sources[idx], source_i)
-            new_sources.append(new_src_i)
+        new_sources = [
+            update_param_source_dict(cast("ParamSource", existing_sources[idx]), source_i)
+            for idx, source_i in enumerate(sources.values())
+        ]
         src_arr.set_coordinate_selection(param_ids, new_sources)
-    def _update_template_components(self, tc: Dict):
+    def _update_template_components(self, tc: dict[str, Any]):
         with self.using_resource("attrs", "update") as md:
             md["template_components"] = tc
@@ -819,7 +1378,7 @@ class ZarrPersistentStore(PersistentStore):
         if self.use_cache and self.num_EARs_cache is not None:
             num = self.num_EARs_cache
         else:
-            num = len(self._get_EARs_arr())
+            num = self._get_EARs_arr().attrs["num_runs"]
         if self.use_cache and self.num_EARs_cache is None:
             self.num_EARs_cache = num
         return num
@@ -832,46 +1391,55 @@ class ZarrPersistentStore(PersistentStore):
             return attrs["num_added_tasks"]
     @property
-    def zarr_store(self) -> zarr.storage.Store:
+    def zarr_store(self) -> Store:
         """
         The underlying store object.
         """
         if self._zarr_store is None:
+            assert self.fs is not None
             self._zarr_store = self._get_zarr_store(self.path, self.fs)
         return self._zarr_store
-    def _get_root_group(self, mode: str = "r", **kwargs) -> zarr.Group:
+    def _get_root_group(self, mode: str = "r", **kwargs) -> Group:
+        # TODO: investigate if there are inefficiencies in how we retrieve zarr groups
+        # and arrays, e.g. opening sub groups sequentially would open the root group
+        # multiple times, and so read the root group attrs file multiple times?
+        # it might make sense to define a ZarrAttrsStoreResource for each zarr group and
+        # array (or at least non-parameter groups/arrays?), there could be some built-in
+        # understanding of the hierarchy (e.g. via a `path` attribute) which would then
+        # avoid reading parent groups multiple times --- if that is happening currently.
         return zarr.open(self.zarr_store, mode=mode, **kwargs)
-    def _get_parameter_group(self, mode: str = "r", **kwargs) -> zarr.Group:
+    def _get_parameter_group(self, mode: str = "r", **kwargs) -> Group:
         return self._get_root_group(mode=mode, **kwargs).get(self._param_grp_name)
-    def _get_parameter_base_array(self, mode: str = "r", **kwargs) -> zarr.Array:
+    def _get_parameter_base_array(self, mode: str = "r", **kwargs) -> Array:
         path = f"{self._param_grp_name}/{self._param_base_arr_name}"
         return zarr.open(self.zarr_store, mode=mode, path=path, **kwargs)
-    def _get_parameter_sources_array(self, mode: str = "r") -> zarr.Array:
+    def _get_parameter_sources_array(self, mode: str = "r") -> Array:
         return self._get_parameter_group(mode=mode).get(self._param_sources_arr_name)
-    def _get_parameter_user_array_group(self, mode: str = "r") -> zarr.Group:
+    def _get_parameter_user_array_group(self, mode: str = "r") -> Group:
         return self._get_parameter_group(mode=mode).get(self._param_user_arr_grp_name)
     def _get_parameter_data_array_group(
         self,
         parameter_idx: int,
         mode: str = "r",
-    ) -> zarr.Group:
+    ) -> Group:
         return self._get_parameter_user_array_group(mode=mode).get(
             self._param_data_arr_grp_name(parameter_idx)
         )
-    def _get_array_group_and_dataset(self, mode: str, param_id: int, data_path):
+    def _get_array_group_and_dataset(
+        self, mode: str, param_id: int, data_path: list[int]
+    ):
         base_dat = self._get_parameter_base_array(mode="r")[param_id]
-        arr_idx = None
         for arr_dat_path, arr_idx in base_dat["type_lookup"]["arrays"]:
             if arr_dat_path == data_path:
                 break
-        if arr_idx is None:
+        else:
             raise ValueError(
                 f"Could not find array path {data_path} in the base data for parameter "
                 f"ID {param_id}."
@@ -881,21 +1449,72 @@ class ZarrPersistentStore(PersistentStore):
         )
         return group, f"arr_{arr_idx}"
-    def _get_metadata_group(self, mode: str = "r") -> zarr.Group:
-        return self._get_root_group(mode=mode).get("metadata")
+    def _get_metadata_group(self, mode: str = "r") -> Group:
+        try:
+            path = Path(self.workflow.url).joinpath("metadata")
+            md_store = zarr.NestedDirectoryStore(path)
+            return zarr.open_group(store=md_store, mode=mode)
+        except (FileNotFoundError, zarr.errors.GroupNotFoundError):
+            # zip store?
+            return zarr.open_group(self.zarr_store, path="metadata", mode=mode)
+    def _get_all_submissions_metadata_group(self, mode: str = "r") -> Group:
+        return self._get_metadata_group(mode=mode).get(self._subs_md_group_name)
+    def _get_submission_metadata_group(self, sub_idx: int, mode: str = "r") -> Group:
+        return self._get_all_submissions_metadata_group(mode=mode).get(sub_idx)
+    def _get_submission_metadata_group_path(self, sub_idx: int) -> Path:
+        grp = self._get_submission_metadata_group(sub_idx)
+        return Path(grp.store.path).joinpath(grp.path)
+    def _get_jobscripts_at_submit_metadata_arr(
+        self, sub_idx: int, mode: str = "r"
+    ) -> Array:
+        return self._get_submission_metadata_group(sub_idx=sub_idx, mode=mode).get(
+            self._js_at_submit_md_arr_name
+        )
+    def _get_jobscripts_at_submit_metadata_arr_path(self, sub_idx: int) -> Path:
+        arr = self._get_jobscripts_at_submit_metadata_arr(sub_idx)
+        return Path(arr.store.path).joinpath(arr.path)
+    @TimeIt.decorator
+    def _get_jobscripts_run_ID_arr(self, sub_idx: int, mode: str = "r") -> Array:
+        return self._get_submission_metadata_group(sub_idx=sub_idx, mode=mode).get(
+            self._js_run_IDs_arr_name
+        )
+    def _get_jobscripts_task_elements_arr(self, sub_idx: int, mode: str = "r") -> Array:
+        return self._get_submission_metadata_group(sub_idx=sub_idx, mode=mode).get(
+            self._js_task_elems_arr_name
+        )
+    def _get_jobscripts_task_actions_arr(self, sub_idx: int, mode: str = "r") -> Array:
+        return self._get_submission_metadata_group(sub_idx=sub_idx, mode=mode).get(
+            self._js_task_acts_arr_name
+        )
+    def _get_jobscripts_dependencies_arr(self, sub_idx: int, mode: str = "r") -> Array:
+        return self._get_submission_metadata_group(sub_idx=sub_idx, mode=mode).get(
+            self._js_deps_arr_name
+        )
-    def _get_tasks_arr(self, mode: str = "r") -> zarr.Array:
+    def _get_tasks_arr(self, mode: str = "r") -> Array:
         return self._get_metadata_group(mode=mode).get(self._task_arr_name)
-    def _get_elements_arr(self, mode: str = "r") -> zarr.Array:
+    def _get_elements_arr(self, mode: str = "r") -> Array:
         return self._get_metadata_group(mode=mode).get(self._elem_arr_name)
-    def _get_iters_arr(self, mode: str = "r") -> zarr.Array:
+    def _get_iters_arr(self, mode: str = "r") -> Array:
         return self._get_metadata_group(mode=mode).get(self._iter_arr_name)
-    def _get_EARs_arr(self, mode: str = "r") -> zarr.Array:
+    def _get_EARs_arr(self, mode: str = "r") -> Array:
         return self._get_metadata_group(mode=mode).get(self._EAR_arr_name)
+    def _get_dirs_arr(self, mode: str = "r") -> zarr.Array:
+        return self._get_metadata_group(mode=mode).get(self._run_dir_arr_name)
     @classmethod
     def make_test_store_from_spec(
         cls,
@@ -905,10 +1524,10 @@ class ZarrPersistentStore(PersistentStore):
         overwrite=False,
     ):
         """Generate an store for testing purposes."""
+        ts_fmt = "FIXME"
         path = Path(dir or "", path)
-        store = zarr.DirectoryStore(path)
-        root = zarr.group(store=store, overwrite=overwrite)
+        root = zarr.group(store=DirectoryStore(path), overwrite=overwrite)
         md = root.create_group("metadata")
         tasks_arr = md.create_dataset(
@@ -922,7 +1541,7 @@ class ZarrPersistentStore(PersistentStore):
             name=cls._elem_arr_name,
             shape=0,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1000,
         )
         elems_arr.attrs.update({"seq_idx": [], "src_idx": []})
@@ -931,7 +1550,7 @@ class ZarrPersistentStore(PersistentStore):
             name=cls._iter_arr_name,
             shape=0,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1000,
         )
         elem_iters_arr.attrs.update(
@@ -946,12 +1565,12 @@ class ZarrPersistentStore(PersistentStore):
             name=cls._EAR_arr_name,
             shape=0,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=cls._CODEC,
             chunks=1000,
         )
-        EARs_arr.attrs.update({"parameter_paths": []})
+        EARs_arr.attrs["parameter_paths"] = []
-        tasks, elems, elem_iters, EARs = super().prepare_test_store_from_spec(spec)
+        tasks, elems, elem_iters, EARs_ = super().prepare_test_store_from_spec(spec)
         path = Path(path).resolve()
         tasks = [ZarrStoreTask(**i).encode() for i in tasks]
@@ -960,21 +1579,13 @@ class ZarrPersistentStore(PersistentStore):
             ZarrStoreElementIter(**i).encode(elem_iters_arr.attrs.asdict())
             for i in elem_iters
         ]
-        EARs = [ZarrStoreEAR(**i).encode(EARs_arr.attrs.asdict()) for i in EARs]
+        EARs = [ZarrStoreEAR(**i).encode(ts_fmt, EARs_arr.attrs.asdict()) for i in EARs_]
         append_items_to_ragged_array(tasks_arr, tasks)
-        elem_arr_add = np.empty((len(elements)), dtype=object)
-        elem_arr_add[:] = elements
-        elems_arr.append(elem_arr_add)
-        iter_arr_add = np.empty((len(elem_iters)), dtype=object)
-        iter_arr_add[:] = elem_iters
-        elem_iters_arr.append(iter_arr_add)
-        EAR_arr_add = np.empty((len(EARs)), dtype=object)
-        EAR_arr_add[:] = EARs
-        EARs_arr.append(EAR_arr_add)
+        elems_arr.append(np.fromiter(elements, dtype=object))
+        elem_iters_arr.append(np.fromiter(elem_iters, dtype=object))
+        EARs_arr.append(np.fromiter(EARs, dtype=object))
         return cls(path)
@@ -982,17 +1593,18 @@ class ZarrPersistentStore(PersistentStore):
         with self.using_resource("attrs", "read") as attrs:
             return attrs["template_components"]
-    def _get_persistent_template(self):
+    def _get_persistent_template(self) -> dict[str, JSONed]:
         with self.using_resource("attrs", "read") as attrs:
-            return attrs["template"]
+            return cast("dict[str, JSONed]", attrs["template"])
     @TimeIt.decorator
-    def _get_persistent_tasks(self, id_lst: Iterable[int]) -> Dict[int, ZarrStoreTask]:
+    def _get_persistent_tasks(self, id_lst: Iterable[int]) -> dict[int, ZarrStoreTask]:
         tasks, id_lst = self._get_cached_persistent_tasks(id_lst)
         if id_lst:
             with self.using_resource("attrs", action="read") as attrs:
-                task_dat = {}
-                elem_IDs = []
+                task_dat: dict[int, dict[str, Any]] = {}
+                elem_IDs: list[int] = []
+                i: dict[str, Any]
                 for idx, i in enumerate(attrs["tasks"]):
                     i = copy.deepcopy(i)
                     elem_IDs.append(i.pop("element_IDs_idx"))
@@ -1003,65 +1615,62 @@ class ZarrPersistentStore(PersistentStore):
                     elem_IDs_arr_dat = self._get_tasks_arr().get_coordinate_selection(
                         elem_IDs
                     )
-                except zarr.errors.BoundsCheckError:
+                except BoundsCheckError:
                     raise MissingStoreTaskError(
                         elem_IDs
                     ) from None  # TODO: not an ID list
                 new_tasks = {
                     id_: ZarrStoreTask.decode({**i, "element_IDs": elem_IDs_arr_dat[id_]})
-                    for idx, (id_, i) in enumerate(task_dat.items())
+                    for id_, i in task_dat.items()
                 }
-            else:
-                new_tasks = {}
-            self.task_cache.update(new_tasks)
-            tasks.update(new_tasks)
+                self.task_cache.update(new_tasks)
+                tasks.update(new_tasks)
         return tasks
     @TimeIt.decorator
-    def _get_persistent_loops(self, id_lst: Optional[Iterable[int]] = None):
+    def _get_persistent_loops(
+        self, id_lst: Iterable[int] | None = None
+    ) -> dict[int, LoopDescriptor]:
         with self.using_resource("attrs", "read") as attrs:
-            loop_dat = {
-                idx: i
+            return {
+                idx: cast("LoopDescriptor", i)
                 for idx, i in enumerate(attrs["loops"])
                 if id_lst is None or idx in id_lst
             }
-        return loop_dat
     @TimeIt.decorator
-    def _get_persistent_submissions(self, id_lst: Optional[Iterable[int]] = None):
+    def _get_persistent_submissions(
+        self, id_lst: Iterable[int] | None = None
+    ) -> dict[int, Mapping[str, JSONed]]:
         self.logger.debug("loading persistent submissions from the zarr store")
+        ids = set(id_lst or ())
         with self.using_resource("attrs", "read") as attrs:
             subs_dat = copy.deepcopy(
                 {
                     idx: i
                     for idx, i in enumerate(attrs["submissions"])
-                    if id_lst is None or idx in id_lst
+                    if id_lst is None or idx in ids
                 }
             )
-            # cast jobscript submit-times and jobscript `task_elements` keys:
-            for sub_idx, sub in subs_dat.items():
-                for js_idx, js in enumerate(sub["jobscripts"]):
-                    for key in list(js["task_elements"].keys()):
-                        subs_dat[sub_idx]["jobscripts"][js_idx]["task_elements"][
-                            int(key)
-                        ] = subs_dat[sub_idx]["jobscripts"][js_idx]["task_elements"].pop(
-                            key
-                        )
         return subs_dat
     @TimeIt.decorator
     def _get_persistent_elements(
         self, id_lst: Iterable[int]
-    ) -> Dict[int, ZarrStoreElement]:
+    ) -> dict[int, ZarrStoreElement]:
         elems, id_lst = self._get_cached_persistent_elements(id_lst)
         if id_lst:
+            self.logger.debug(
+                f"loading {len(id_lst)} persistent element(s) from disk: "
+                f"{shorten_list_str(id_lst)}."
+            )
             arr = self._get_elements_arr()
             attrs = arr.attrs.asdict()
             try:
                 elem_arr_dat = arr.get_coordinate_selection(id_lst)
-            except zarr.errors.BoundsCheckError:
+            except BoundsCheckError:
                 raise MissingStoreElementError(id_lst) from None
             elem_dat = dict(zip(id_lst, elem_arr_dat))
             new_elems = {
@@ -1074,14 +1683,18 @@ class ZarrPersistentStore(PersistentStore):
     @TimeIt.decorator
     def _get_persistent_element_iters(
         self, id_lst: Iterable[int]
-    ) -> Dict[int, ZarrStoreElementIter]:
+    ) -> dict[int, ZarrStoreElementIter]:
         iters, id_lst = self._get_cached_persistent_element_iters(id_lst)
         if id_lst:
+            self.logger.debug(
+                f"loading {len(id_lst)} persistent element iteration(s) from disk: "
+                f"{shorten_list_str(id_lst)}."
+            )
             arr = self._get_iters_arr()
             attrs = arr.attrs.asdict()
             try:
                 iter_arr_dat = arr.get_coordinate_selection(id_lst)
-            except zarr.errors.BoundsCheckError:
+            except BoundsCheckError:
                 raise MissingStoreElementIterationError(id_lst) from None
             iter_dat = dict(zip(id_lst, iter_arr_dat))
             new_iters = {
@@ -1092,19 +1705,29 @@ class ZarrPersistentStore(PersistentStore):
         return iters
     @TimeIt.decorator
-    def _get_persistent_EARs(self, id_lst: Iterable[int]) -> Dict[int, ZarrStoreEAR]:
+    def _get_persistent_EARs(self, id_lst: Iterable[int]) -> dict[int, ZarrStoreEAR]:
         runs, id_lst = self._get_cached_persistent_EARs(id_lst)
         if id_lst:
+            self.logger.debug(
+                f"loading {len(id_lst)} persistent EAR(s) from disk: "
+                f"{shorten_list_str(id_lst)}."
+            )
             arr = self._get_EARs_arr()
             attrs = arr.attrs.asdict()
+            sel: tuple[NDArray, NDArray] | list[int]
             try:
-                self.logger.debug(f"_get_persistent_EARs: {id_lst=}")
-                EAR_arr_dat = _zarr_get_coord_selection(arr, id_lst, self.logger)
-            except zarr.errors.BoundsCheckError:
+                # convert to 2D array indices:
+                sel = get_2D_idx(np.array(id_lst), num_cols=arr.shape[1])
+            except IndexError:
+                # 1D runs array from before update to 2D in Feb 2025 refactor/jobscript:
+                sel = id_lst
+            try:
+                EAR_arr_dat = _zarr_get_coord_selection(arr, sel, self.logger)
+            except BoundsCheckError:
                 raise MissingStoreEARError(id_lst) from None
             EAR_dat = dict(zip(id_lst, EAR_arr_dat))
             new_runs = {
-                k: ZarrStoreEAR.decode(EAR_dat=v, attrs=attrs, ts_fmt=self.ts_fmt)
+                k: ZarrStoreEAR.decode(EAR_dat=v, ts_fmt=self.ts_fmt, attrs=attrs)
                 for k, v in EAR_dat.items()
             }
             self.EAR_cache.update(new_runs)
@@ -1114,20 +1737,25 @@ class ZarrPersistentStore(PersistentStore):
     @TimeIt.decorator
     def _get_persistent_parameters(
-        self,
-        id_lst: Iterable[int],
-        dataset_copy: Optional[bool] = False,
-    ) -> Dict[int, ZarrStoreParameter]:
+        self, id_lst: Iterable[int], *, dataset_copy: bool = False, **kwargs
+    ) -> dict[int, ZarrStoreParameter]:
         params, id_lst = self._get_cached_persistent_parameters(id_lst)
         if id_lst:
+            self.logger.debug(
+                f"loading {len(id_lst)} persistent parameter(s) from disk: "
+                f"{shorten_list_str(id_lst)}."
+            )
+            # TODO: implement the "parameter_metadata_cache" for zarr stores, which would
+            # keep the base_arr and src_arr open
             base_arr = self._get_parameter_base_array(mode="r")
             src_arr = self._get_parameter_sources_array(mode="r")
             try:
                 param_arr_dat = base_arr.get_coordinate_selection(list(id_lst))
                 src_arr_dat = src_arr.get_coordinate_selection(list(id_lst))
-            except zarr.errors.BoundsCheckError:
+            except BoundsCheckError:
                 raise MissingParameterData(id_lst) from None
             param_dat = dict(zip(id_lst, param_arr_dat))
@@ -1149,13 +1777,15 @@ class ZarrPersistentStore(PersistentStore):
         return params
     @TimeIt.decorator
-    def _get_persistent_param_sources(self, id_lst: Iterable[int]) -> Dict[int, Dict]:
+    def _get_persistent_param_sources(
+        self, id_lst: Iterable[int]
+    ) -> dict[int, ParamSource]:
         sources, id_lst = self._get_cached_persistent_param_sources(id_lst)
         if id_lst:
             src_arr = self._get_parameter_sources_array(mode="r")
             try:
                 src_arr_dat = src_arr.get_coordinate_selection(list(id_lst))
-            except zarr.errors.BoundsCheckError:
+            except BoundsCheckError:
                 raise MissingParameterData(id_lst) from None
             new_sources = dict(zip(id_lst, src_arr_dat))
             self.param_sources_cache.update(new_sources)
@@ -1164,20 +1794,267 @@ class ZarrPersistentStore(PersistentStore):
     def _get_persistent_parameter_set_status(
         self, id_lst: Iterable[int]
-    ) -> Dict[int, bool]:
+    ) -> dict[int, bool]:
         base_arr = self._get_parameter_base_array(mode="r")
         try:
             param_arr_dat = base_arr.get_coordinate_selection(list(id_lst))
-        except zarr.errors.BoundsCheckError:
+        except BoundsCheckError:
             raise MissingParameterData(id_lst) from None
         return dict(zip(id_lst, [i is not None for i in param_arr_dat]))
-    def _get_persistent_parameter_IDs(self) -> List[int]:
+    def _get_persistent_parameter_IDs(self) -> list[int]:
         # we assume the row index is equivalent to ID, might need to revisit in future
         base_arr = self._get_parameter_base_array(mode="r")
         return list(range(len(base_arr)))
+    def get_submission_at_submit_metadata(
+        self, sub_idx: int, metadata_attr: dict | None
+    ) -> dict[str, Any]:
+        """Retrieve the values of submission attributes that are stored at submit-time."""
+        grp = self._get_submission_metadata_group(sub_idx)
+        attrs = grp.attrs.asdict()
+        return {k: attrs[k] for k in SUBMISSION_SUBMIT_TIME_KEYS}
+    def clear_jobscript_at_submit_metadata_cache(self):
+        """Clear the cache of at-submit-time jobscript metadata."""
+        self._jobscript_at_submit_metadata = {}
+    def get_jobscript_at_submit_metadata(
+        self,
+        sub_idx: int,
+        js_idx: int,
+        metadata_attr: dict | None,
+    ) -> dict[str, Any]:
+        """For the specified jobscript, retrieve the values of jobscript-submit-time
+        attributes.
+        Notes
+        -----
+        If the cache does not exist, this method will retrieve and cache metadata for
+        all jobscripts for which metadata has been set. If the cache does exist, but not
+        for the requested jobscript, then this method will retrieve and cache metadata for
+        all non-cached jobscripts for which metadata has been set. If metadata has not
+        yet been set for the specified jobscript, and dict with all `None` values will be
+        returned.
+        The cache can be cleared using the method
+        `clear_jobscript_at_submit_metadata_cache`.
+        """
+        if self._jobscript_at_submit_metadata:
+            # cache exists, but might not include data for the requested jobscript:
+            if js_idx in self._jobscript_at_submit_metadata:
+                return self._jobscript_at_submit_metadata[js_idx]
+        arr = self._get_jobscripts_at_submit_metadata_arr(sub_idx)
+        non_cached = set(range(len(arr))) - set(self._jobscript_at_submit_metadata.keys())
+        # populate cache:
+        arr_non_cached = arr.get_coordinate_selection((list(non_cached),))
+        for js_idx_i, arr_item in zip(non_cached, arr_non_cached):
+            try:
+                self._jobscript_at_submit_metadata[js_idx_i] = {
+                    i: arr_item[i_idx]
+                    for i_idx, i in enumerate(JOBSCRIPT_SUBMIT_TIME_KEYS)
+                }
+            except TypeError:
+                # data for this jobscript is not set
+                pass
+        if js_idx not in self._jobscript_at_submit_metadata:
+            return {i: None for i in JOBSCRIPT_SUBMIT_TIME_KEYS}
+        return self._jobscript_at_submit_metadata[js_idx]
+    @TimeIt.decorator
+    def get_jobscript_block_run_ID_array(
+        self,
+        sub_idx: int,
+        js_idx: int,
+        blk_idx: int,
+        run_ID_arr: NDArray | None,
+    ) -> NDArray:
+        """For the specified jobscript-block, retrieve the run ID array."""
+        if run_ID_arr is not None:
+            self.logger.debug("jobscript-block run IDs are still in memory.")
+            # in the special case when the Submission object has just been created, the
+            # run ID arrays will not yet be persistent.
+            return np.asarray(run_ID_arr)
+        # otherwise, `append_submissions` has been called, the run IDs have been
+        # removed from the JSON-representation of the submission object, and have been
+        # saved in separate zarr arrays:
+        if sub_idx not in self._jobscript_run_ID_arrays:
+            self.logger.debug(
+                f"retrieving jobscript-block run IDs for submission {sub_idx} from disk,"
+                f" and caching."
+            )
+            # for a given submission, run IDs are stored for all jobscript-blocks in the
+            # same array (and chunk), so retrieve all of them and cache:
+            arr = self._get_jobscripts_run_ID_arr(sub_idx)
+            arr_dat = arr[:]
+            block_shapes = arr.attrs["block_shapes"]
+            self._jobscript_run_ID_arrays[sub_idx] = {}  # keyed by (js_idx, blk_idx)
+            arr_idx = 0
+            for js_idx_i, js_blk_shapes in enumerate(block_shapes):
+                for blk_idx_j, blk_shape_j in enumerate(js_blk_shapes):
+                    self._jobscript_run_ID_arrays[sub_idx][
+                        (js_idx_i, blk_idx_j)
+                    ] = arr_dat[arr_idx, : blk_shape_j[0], : blk_shape_j[1]]
+                    arr_idx += 1
+        else:
+            self.logger.debug(
+                f"retrieving jobscript-block run IDs for submission {sub_idx} from cache."
+            )
+        return self._jobscript_run_ID_arrays[sub_idx][(js_idx, blk_idx)]
+    def get_jobscript_block_task_elements_map(
+        self,
+        sub_idx: int,
+        js_idx: int,
+        blk_idx: int,
+        task_elems_map: dict[int, list[int]] | None,
+    ) -> dict[int, list[int]]:
+        """For the specified jobscript-block, retrieve the task-elements mapping."""
+        if task_elems_map is not None:
+            self.logger.debug("jobscript-block task elements are still in memory.")
+            # in the special case when the Submission object has just been created, the
+            # task elements arrays will not yet be persistent.
+            return task_elems_map
+        # otherwise, `append_submissions` has been called, the task elements have been
+        # removed from the JSON-representation of the submission object, and have been
+        # saved in separate zarr arrays:
+        if sub_idx not in self._jobscript_task_element_maps:
+            self.logger.debug(
+                f"retrieving jobscript-block task elements for submission {sub_idx} from "
+                f"disk, and caching."
+            )
+            # for a given submission, task elements are stored for all jobscript-blocks in
+            # the same array (and chunk), so retrieve all of them and cache:
+            arr = self._get_jobscripts_task_elements_arr(sub_idx)
+            arr_dat = arr[:]
+            block_shapes = arr.attrs["block_shapes"]
+            self._jobscript_task_element_maps[sub_idx] = {}  # keys: (js_idx, blk_idx)
+            arr_idx = 0
+            for js_idx_i, js_blk_shapes in enumerate(block_shapes):
+                for blk_idx_j, blk_shape_j in enumerate(js_blk_shapes):
+                    arr_i = arr_dat[arr_idx, : blk_shape_j[1], : blk_shape_j[0] + 1]
+                    self._jobscript_task_element_maps[sub_idx][(js_idx_i, blk_idx_j)] = {
+                        k[0]: list(k[1:]) for k in arr_i
+                    }
+                    arr_idx += 1
+        else:
+            self.logger.debug(
+                f"retrieving jobscript-block task elements for submission {sub_idx} from "
+                "cache."
+            )
+        return self._jobscript_task_element_maps[sub_idx][(js_idx, blk_idx)]
+    @TimeIt.decorator
+    def get_jobscript_block_task_actions_array(
+        self,
+        sub_idx: int,
+        js_idx: int,
+        blk_idx: int,
+        task_actions_arr: NDArray | list[tuple[int, int, int]] | None,
+    ) -> NDArray:
+        """For the specified jobscript-block, retrieve the task-actions array."""
+        if task_actions_arr is not None:
+            self.logger.debug("jobscript-block task actions are still in memory.")
+            # in the special case when the Submission object has just been created, the
+            # task actions arrays will not yet be persistent.
+            return np.asarray(task_actions_arr)
+        # otherwise, `append_submissions` has been called, the task actions have been
+        # removed from the JSON-representation of the submission object, and have been
+        # saved in separate zarr arrays:
+        if sub_idx not in self._jobscript_task_actions_arrays:
+            self.logger.debug(
+                f"retrieving jobscript-block task actions for submission {sub_idx} from "
+                f"disk, and caching."
+            )
+            # for a given submission, task actions are stored for all jobscript-blocks in
+            # the same array (and chunk), so retrieve all of them and cache:
+            arr = self._get_jobscripts_task_actions_arr(sub_idx)
+            arr_dat = arr[:]
+            block_num_acts = arr.attrs["block_num_acts"]
+            num_acts_count = 0
+            self._jobscript_task_actions_arrays[sub_idx] = {}  # keys: (js_idx, blk_idx)
+            for js_idx_i, js_blk_num_acts in enumerate(block_num_acts):
+                for blk_idx_j, blk_num_acts_j in enumerate(js_blk_num_acts):
+                    arr_i = arr_dat[num_acts_count : num_acts_count + blk_num_acts_j]
+                    num_acts_count += blk_num_acts_j
+                    self._jobscript_task_actions_arrays[sub_idx][
+                        (js_idx_i, blk_idx_j)
+                    ] = arr_i
+        else:
+            self.logger.debug(
+                f"retrieving jobscript-block task actions for submission {sub_idx} from "
+                "cache."
+            )
+        return self._jobscript_task_actions_arrays[sub_idx][(js_idx, blk_idx)]
+    @TimeIt.decorator
+    def get_jobscript_block_dependencies(
+        self,
+        sub_idx: int,
+        js_idx: int,
+        blk_idx: int,
+        js_dependencies: dict[tuple[int, int], ResolvedJobscriptBlockDependencies] | None,
+    ) -> dict[tuple[int, int], ResolvedJobscriptBlockDependencies]:
+        """For the specified jobscript-block, retrieve the dependencies."""
+        if js_dependencies is not None:
+            self.logger.debug("jobscript-block dependencies are still in memory.")
+            # in the special case when the Submission object has just been created, the
+            # dependencies will not yet be persistent.
+            return js_dependencies
+        # otherwise, `append_submissions` has been called, the dependencies have been
+        # removed from the JSON-representation of the submission object, and have been
+        # saved in separate zarr arrays:
+        if sub_idx not in self._jobscript_dependencies:
+            self.logger.debug(
+                f"retrieving jobscript-block dependencies for submission {sub_idx} from "
+                f"disk, and caching."
+            )
+            # for a given submission, dependencies are stored for all jobscript-blocks in
+            # the same array (and chunk), so retrieve all of them and cache:
+            arr = self._get_jobscripts_dependencies_arr(sub_idx)
+            self._jobscript_dependencies[
+                sub_idx
+            ] = self._decode_jobscript_block_dependencies(arr)
+        else:
+            self.logger.debug(
+                f"retrieving jobscript-block dependencies for submission {sub_idx} from "
+                "cache."
+            )
+        return self._jobscript_dependencies[sub_idx][(js_idx, blk_idx)]
     def get_ts_fmt(self):
         """
         Get the format for timestamps.
@@ -1208,11 +2085,11 @@ class ZarrPersistentStore(PersistentStore):
     def zip(
         self,
-        path=".",
-        log=None,
-        overwrite=False,
-        include_execute=False,
-        include_rechunk_backups=False,
+        path: str = ".",
+        log: str | None = None,
+        overwrite: bool = False,
+        include_execute: bool = False,
+        include_rechunk_backups: bool = False,
     ):
         """
         Convert the persistent store to zipped form.
@@ -1224,69 +2101,66 @@ class ZarrPersistentStore(PersistentStore):
             directory, the zip file will be created within this directory. Otherwise,
             this path is assumed to be the full file path to the new zip file.
         """
-        console = Console()
-        status = console.status(f"Zipping workflow {self.workflow.name!r}...")
-        status.start()
-        # TODO: this won't work for remote file systems
-        dst_path = Path(path).resolve()
-        if dst_path.is_dir():
-            dst_path = dst_path.joinpath(self.workflow.name).with_suffix(".zip")
-        if not overwrite and dst_path.exists():
-            status.stop()
-            raise FileExistsError(
-                f"File at path already exists: {dst_path!r}. Pass `overwrite=True` to "
-                f"overwrite the existing file."
-            )
+        with Console().status(f"Zipping workflow {self.workflow.name!r}..."):
+            # TODO: this won't work for remote file systems
+            dst_path = Path(path).resolve()
+            if dst_path.is_dir():
+                dst_path = dst_path.joinpath(self.workflow.name).with_suffix(".zip")
+            if not overwrite and dst_path.exists():
+                raise FileExistsError(
+                    f"File at path already exists: {dst_path!r}. Pass `overwrite=True` to "
+                    f"overwrite the existing file."
+                )
-        dst_path = str(dst_path)
+            dst_path_s = str(dst_path)
-        src_zarr_store = self.zarr_store
-        zfs, _ = ask_pw_on_auth_exc(
-            ZipFileSystem,
-            fo=dst_path,
-            mode="w",
-            target_options={},
-            add_pw_to="target_options",
-        )
-        dst_zarr_store = zarr.storage.FSStore(url="", fs=zfs)
-        excludes = []
-        if not include_execute:
-            excludes.append("execute")
-        if not include_rechunk_backups:
-            excludes.append("runs.bak")
-            excludes.append("base.bak")
-        zarr.convenience.copy_store(
-            src_zarr_store,
-            dst_zarr_store,
-            excludes=excludes or None,
-            log=log,
-        )
-        del zfs  # ZipFileSystem remains open for instance lifetime
-        status.stop()
-        return dst_path
+            src_zarr_store = self.zarr_store
+            zfs, _ = ask_pw_on_auth_exc(
+                ZipFileSystem,
+                fo=dst_path_s,
+                mode="w",
+                target_options={},
+                add_pw_to="target_options",
+            )
+            dst_zarr_store = FSStore(url="", fs=zfs)
+            excludes = []
+            if not include_execute:
+                excludes.append("execute")
+            if not include_rechunk_backups:
+                excludes.append("runs.bak")
+                excludes.append("base.bak")
+            zarr.copy_store(
+                src_zarr_store,
+                dst_zarr_store,
+                excludes=excludes or None,
+                log=log,
+            )
+            del zfs  # ZipFileSystem remains open for instance lifetime
+        return dst_path_s
+    def unzip(self, path: str = ".", log: str | None = None):
+        raise ValueError("Not a zip store!")
     def _rechunk_arr(
         self,
-        arr,
-        chunk_size: Optional[int] = None,
-        backup: Optional[bool] = True,
-        status: Optional[bool] = True,
-    ):
-        arr_path = Path(self.workflow.path) / arr.path
+        arr: Array,
+        chunk_size: int | None = None,
+        backup: bool = True,
+        status: bool = True,
+    ) -> Array:
+        arr_path = Path(arr.store.path) / arr.path
         arr_name = arr.path.split("/")[-1]
         if status:
-            console = Console()
-            status = console.status("Rechunking...")
-            status.start()
+            s = Console().status("Rechunking...")
+            s.start()
         backup_time = None
         if backup:
             if status:
-                status.update("Backing up...")
+                s.update("Backing up...")
             backup_path = arr_path.with_suffix(".bak")
             if backup_path.is_dir():
                 pass
@@ -1298,18 +2172,26 @@ class ZarrPersistentStore(PersistentStore):
         tic = time.perf_counter()
         arr_rc_path = arr_path.with_suffix(".rechunked")
-        arr = zarr.open(arr_path)
         if status:
-            status.update("Creating new array...")
+            s.update("Creating new array...")
+        # use the same store:
+        try:
+            arr_rc_store = arr.store.__class__(path=arr_rc_path)
+        except TypeError:
+            # FSStore
+            arr_rc_store = arr.store.__class__(url=str(arr_rc_path))
         arr_rc = zarr.create(
-            store=arr_rc_path,
+            store=arr_rc_store,
             shape=arr.shape,
             chunks=arr.shape if chunk_size is None else chunk_size,
             dtype=object,
-            object_codec=MsgPack(),
+            object_codec=self._CODEC,
         )
         if status:
-            status.update("Copying data...")
+            s.update("Copying data...")
         data = np.empty(shape=arr.shape, dtype=object)
         bad_data = []
         for idx in range(len(arr)):
@@ -1318,24 +2200,23 @@ class ZarrPersistentStore(PersistentStore):
             except RuntimeError:
                 # blosc decompression errors
                 bad_data.append(idx)
-                pass
         arr_rc[:] = data
         arr_rc.attrs.put(arr.attrs.asdict())
         if status:
-            status.update("Deleting old array...")
+            s.update("Deleting old array...")
         shutil.rmtree(arr_path)
         if status:
-            status.update("Moving new array into place...")
+            s.update("Moving new array into place...")
         shutil.move(arr_rc_path, arr_path)
         toc = time.perf_counter()
         rechunk_time = toc - tic
         if status:
-            status.stop()
+            s.stop()
         if backup_time:
             print(f"Time to backup {arr_name}: {backup_time:.1f} s")
@@ -1349,10 +2230,10 @@ class ZarrPersistentStore(PersistentStore):
     def rechunk_parameter_base(
         self,
-        chunk_size: Optional[int] = None,
-        backup: Optional[bool] = True,
-        status: Optional[bool] = True,
-    ):
+        chunk_size: int | None = None,
+        backup: bool = True,
+        status: bool = True,
+    ) -> Array:
         """
         Rechunk the parameter data to be stored more efficiently.
         """
@@ -1361,16 +2242,22 @@ class ZarrPersistentStore(PersistentStore):
     def rechunk_runs(
         self,
-        chunk_size: Optional[int] = None,
-        backup: Optional[bool] = True,
-        status: Optional[bool] = True,
-    ):
+        chunk_size: int | None = None,
+        backup: bool = True,
+        status: bool = True,
+    ) -> Array:
         """
         Rechunk the run data to be stored more efficiently.
         """
         arr = self._get_EARs_arr()
         return self._rechunk_arr(arr, chunk_size, backup, status)
+    def get_dirs_array(self) -> NDArray:
+        """
+        Retrieve the run directories array.
+        """
+        return self._get_dirs_arr()[:]
 class ZarrZipPersistentStore(ZarrPersistentStore):
     """A store designed mainly as an archive format that can be uploaded to data
@@ -1381,8 +2268,8 @@ class ZarrZipPersistentStore(ZarrPersistentStore):
     Archive format persistent stores cannot be updated without being unzipped first.
     """
-    _name = "zip"
-    _features = PersistentStoreFeatures(
+    _name: ClassVar[str] = "zip"
+    _features: ClassVar[PersistentStoreFeatures] = PersistentStoreFeatures(
         create=False,
         edit=False,
         jobscript_parallelism=False,
@@ -1393,10 +2280,17 @@ class ZarrZipPersistentStore(ZarrPersistentStore):
     # TODO: enforce read-only nature
-    def zip(self):
+    def zip(
+        self,
+        path: str = ".",
+        log: str | None = None,
+        overwrite: bool = False,
+        include_execute: bool = False,
+        include_rechunk_backups: bool = False,
+    ):
         raise ValueError("Already a zip store!")
-    def unzip(self, path=".", log=None):
+    def unzip(self, path: str = ".", log: str | None = None) -> str:
         """
         Expand the persistent store.
@@ -1409,28 +2303,23 @@ class ZarrZipPersistentStore(ZarrPersistentStore):
         """
-        console = Console()
-        status = console.status(f"Unzipping workflow {self.workflow.name!r}...")
-        status.start()
+        with Console().status(f"Unzipping workflow {self.workflow.name!r}..."):
+            # TODO: this won't work for remote file systems
+            dst_path = Path(path).resolve()
+            if dst_path.is_dir():
+                dst_path = dst_path.joinpath(self.workflow.name)
-        # TODO: this won't work for remote file systems
-        dst_path = Path(path).resolve()
-        if dst_path.is_dir():
-            dst_path = dst_path.joinpath(self.workflow.name)
+            if dst_path.exists():
+                raise FileExistsError(f"Directory at path already exists: {dst_path!r}.")
-        if dst_path.exists():
-            status.stop()
-            raise FileExistsError(f"Directory at path already exists: {dst_path!r}.")
+            dst_path_s = str(dst_path)
-        dst_path = str(dst_path)
+            src_zarr_store = self.zarr_store
+            dst_zarr_store = FSStore(url=dst_path_s)
+            zarr.copy_store(src_zarr_store, dst_zarr_store, log=log)
+            return dst_path_s
-        src_zarr_store = self.zarr_store
-        dst_zarr_store = zarr.storage.FSStore(url=dst_path)
-        zarr.convenience.copy_store(src_zarr_store, dst_zarr_store, log=log)
-        status.stop()
-        return dst_path
-    def copy(self, path=None) -> str:
+    def copy(self, path: PathLike = None) -> Path:
         # not sure how to do this.
         raise NotImplementedError()
@@ -1441,8 +2330,23 @@ class ZarrZipPersistentStore(ZarrPersistentStore):
     def _rechunk_arr(
         self,
         arr,
-        chunk_size: Optional[int] = None,
-        backup: Optional[bool] = True,
-        status: Optional[bool] = True,
-    ):
+        chunk_size: int | None = None,
+        backup: bool = True,
+        status: bool = True,
+    ) -> Array:
         raise NotImplementedError
+    def get_text_file(self, path: str | Path) -> str:
+        """Retrieve the contents of a text file stored within the workflow."""
+        path = Path(path)
+        if path.is_absolute():
+            path = path.relative_to(self.workflow.url)
+        path = str(path.as_posix())
+        assert self.fs
+        try:
+            with self.fs.open(path, mode="rt") as fp:
+                return fp.read()
+        except KeyError:
+            raise FileNotFoundError(
+                f"File within zip at location {path!r} does not exist."
+            ) from None

hpcflow-new2 0.2.0a189__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

hpcflow-new2 0.2.0a189py3-none-any.whl → 0.2.0a199py3-none-any.whl