PyPI - hpcflow-new2 - Versions diffs - 0.2.0a188__py3-none-any.whl → 0.2.0a190__py3-none-any.whl - Mend

hpcflow-new2 0.2.0a188py3-none-any.whl → 0.2.0a190py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

hpcflow/__pyinstaller/hook-hpcflow.py +8 -6
hpcflow/_version.py +1 -1
hpcflow/app.py +1 -0
hpcflow/data/scripts/main_script_test_hdf5_in_obj.py +1 -1
hpcflow/data/scripts/main_script_test_hdf5_out_obj.py +1 -1
hpcflow/sdk/__init__.py +21 -15
hpcflow/sdk/app.py +2133 -770
hpcflow/sdk/cli.py +281 -250
hpcflow/sdk/cli_common.py +6 -2
hpcflow/sdk/config/__init__.py +1 -1
hpcflow/sdk/config/callbacks.py +77 -42
hpcflow/sdk/config/cli.py +126 -103
hpcflow/sdk/config/config.py +578 -311
hpcflow/sdk/config/config_file.py +131 -95
hpcflow/sdk/config/errors.py +112 -85
hpcflow/sdk/config/types.py +145 -0
hpcflow/sdk/core/actions.py +1054 -994
hpcflow/sdk/core/app_aware.py +24 -0
hpcflow/sdk/core/cache.py +81 -63
hpcflow/sdk/core/command_files.py +275 -185
hpcflow/sdk/core/commands.py +111 -107
hpcflow/sdk/core/element.py +724 -503
hpcflow/sdk/core/enums.py +192 -0
hpcflow/sdk/core/environment.py +74 -93
hpcflow/sdk/core/errors.py +398 -51
hpcflow/sdk/core/json_like.py +540 -272
hpcflow/sdk/core/loop.py +380 -334
hpcflow/sdk/core/loop_cache.py +160 -43
hpcflow/sdk/core/object_list.py +370 -207
hpcflow/sdk/core/parameters.py +728 -600
hpcflow/sdk/core/rule.py +59 -41
hpcflow/sdk/core/run_dir_files.py +33 -22
hpcflow/sdk/core/task.py +1546 -1325
hpcflow/sdk/core/task_schema.py +240 -196
hpcflow/sdk/core/test_utils.py +126 -88
hpcflow/sdk/core/types.py +387 -0
hpcflow/sdk/core/utils.py +410 -305
hpcflow/sdk/core/validation.py +82 -9
hpcflow/sdk/core/workflow.py +1192 -1028
hpcflow/sdk/core/zarr_io.py +98 -137
hpcflow/sdk/demo/cli.py +46 -33
hpcflow/sdk/helper/cli.py +18 -16
hpcflow/sdk/helper/helper.py +75 -63
hpcflow/sdk/helper/watcher.py +61 -28
hpcflow/sdk/log.py +83 -59
hpcflow/sdk/persistence/__init__.py +8 -31
hpcflow/sdk/persistence/base.py +988 -586
hpcflow/sdk/persistence/defaults.py +6 -0
hpcflow/sdk/persistence/discovery.py +38 -0
hpcflow/sdk/persistence/json.py +408 -153
hpcflow/sdk/persistence/pending.py +158 -123
hpcflow/sdk/persistence/store_resource.py +37 -22
hpcflow/sdk/persistence/types.py +307 -0
hpcflow/sdk/persistence/utils.py +14 -11
hpcflow/sdk/persistence/zarr.py +477 -420
hpcflow/sdk/runtime.py +44 -41
hpcflow/sdk/submission/{jobscript_info.py → enums.py} +39 -12
hpcflow/sdk/submission/jobscript.py +444 -404
hpcflow/sdk/submission/schedulers/__init__.py +133 -40
hpcflow/sdk/submission/schedulers/direct.py +97 -71
hpcflow/sdk/submission/schedulers/sge.py +132 -126
hpcflow/sdk/submission/schedulers/slurm.py +263 -268
hpcflow/sdk/submission/schedulers/utils.py +7 -2
hpcflow/sdk/submission/shells/__init__.py +14 -15
hpcflow/sdk/submission/shells/base.py +102 -29
hpcflow/sdk/submission/shells/bash.py +72 -55
hpcflow/sdk/submission/shells/os_version.py +31 -30
hpcflow/sdk/submission/shells/powershell.py +37 -29
hpcflow/sdk/submission/submission.py +203 -257
hpcflow/sdk/submission/types.py +143 -0
hpcflow/sdk/typing.py +163 -12
hpcflow/tests/conftest.py +8 -6
hpcflow/tests/schedulers/slurm/test_slurm_submission.py +5 -2
hpcflow/tests/scripts/test_main_scripts.py +60 -30
hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -4
hpcflow/tests/unit/test_action.py +86 -75
hpcflow/tests/unit/test_action_rule.py +9 -4
hpcflow/tests/unit/test_app.py +13 -6
hpcflow/tests/unit/test_cli.py +1 -1
hpcflow/tests/unit/test_command.py +71 -54
hpcflow/tests/unit/test_config.py +20 -15
hpcflow/tests/unit/test_config_file.py +21 -18
hpcflow/tests/unit/test_element.py +58 -62
hpcflow/tests/unit/test_element_iteration.py +3 -1
hpcflow/tests/unit/test_element_set.py +29 -19
hpcflow/tests/unit/test_group.py +4 -2
hpcflow/tests/unit/test_input_source.py +116 -93
hpcflow/tests/unit/test_input_value.py +29 -24
hpcflow/tests/unit/test_json_like.py +44 -35
hpcflow/tests/unit/test_loop.py +65 -58
hpcflow/tests/unit/test_object_list.py +17 -12
hpcflow/tests/unit/test_parameter.py +16 -7
hpcflow/tests/unit/test_persistence.py +48 -35
hpcflow/tests/unit/test_resources.py +20 -18
hpcflow/tests/unit/test_run.py +8 -3
hpcflow/tests/unit/test_runtime.py +2 -1
hpcflow/tests/unit/test_schema_input.py +23 -15
hpcflow/tests/unit/test_shell.py +3 -2
hpcflow/tests/unit/test_slurm.py +8 -7
hpcflow/tests/unit/test_submission.py +39 -19
hpcflow/tests/unit/test_task.py +352 -247
hpcflow/tests/unit/test_task_schema.py +33 -20
hpcflow/tests/unit/test_utils.py +9 -11
hpcflow/tests/unit/test_value_sequence.py +15 -12
hpcflow/tests/unit/test_workflow.py +114 -83
hpcflow/tests/unit/test_workflow_template.py +0 -1
hpcflow/tests/workflows/test_jobscript.py +2 -1
hpcflow/tests/workflows/test_workflows.py +18 -13
{hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/METADATA +2 -1
hpcflow_new2-0.2.0a190.dist-info/RECORD +165 -0
hpcflow/sdk/core/parallel.py +0 -21
hpcflow_new2-0.2.0a188.dist-info/RECORD +0 -158
{hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/LICENSE +0 -0
{hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/WHEEL +0 -0
{hpcflow_new2-0.2.0a188.dist-info → hpcflow_new2-0.2.0a190.dist-info}/entry_points.txt +0 -0

hpcflow/sdk/core/loop.py CHANGED Viewed

@@ -7,20 +7,29 @@ notably looping over a set of values or until a condition holds.
 from __future__ import annotations
 import copy
-from typing import Dict, List, Optional, Tuple, Union
+from collections import defaultdict
+from itertools import chain
+from typing import TYPE_CHECKING
+from typing_extensions import override
-from hpcflow.sdk import app
+from hpcflow.sdk.core.app_aware import AppAware
 from hpcflow.sdk.core.errors import LoopTaskSubsetError
 from hpcflow.sdk.core.json_like import ChildObjectSpec, JSONLike
-from hpcflow.sdk.core.loop_cache import LoopCache
-from hpcflow.sdk.core.parameters import InputSourceType
-from hpcflow.sdk.core.task import WorkflowTask
+from hpcflow.sdk.core.loop_cache import LoopCache, LoopIndex
+from hpcflow.sdk.core.enums import InputSourceType
 from hpcflow.sdk.core.utils import check_valid_py_identifier, nth_key, nth_value
 from hpcflow.sdk.log import TimeIt
-# from .parameters import Parameter
-# from valida.conditions import ConditionLike
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Iterator, Mapping, Sequence
+    from typing import Any, ClassVar
+    from typing_extensions import Self, TypeIs
+    from ..typing import DataIndex, ParamSource
+    from .parameters import SchemaInput, InputSource
+    from .rule import Rule
+    from .task import WorkflowTask
+    from .types import IterableParam
+    from .workflow import Workflow, WorkflowTemplate
 # @dataclass
@@ -55,20 +64,25 @@ class Loop(JSONLike):
         Stopping criterion, expressed as a rule.
     """
-    _app_attr = "app"
-    _child_objects = (ChildObjectSpec(name="termination", class_name="Rule"),)
+    _child_objects: ClassVar[tuple[ChildObjectSpec, ...]] = (
+        ChildObjectSpec(name="termination", class_name="Rule"),
+    )
+    @classmethod
+    def __is_WorkflowTask(cls, value) -> TypeIs[WorkflowTask]:
+        return isinstance(value, cls._app.WorkflowTask)
     def __init__(
         self,
-        tasks: List[Union[int, app.WorkflowTask]],
+        tasks: Iterable[int | WorkflowTask],
         num_iterations: int,
-        name: Optional[str] = None,
-        non_iterable_parameters: Optional[List[str]] = None,
-        termination: Optional[app.Rule] = None,
+        name: str | None = None,
+        non_iterable_parameters: list[str] | None = None,
+        termination: Rule | None = None,
     ) -> None:
-        _task_insert_IDs = []
+        _task_insert_IDs: list[int] = []
         for task in tasks:
-            if isinstance(task, WorkflowTask):
+            if self.__is_WorkflowTask(task):
                 _task_insert_IDs.append(task.insert_ID)
             elif isinstance(task, int):
                 _task_insert_IDs.append(task)
@@ -84,95 +98,103 @@ class Loop(JSONLike):
         self._non_iterable_parameters = non_iterable_parameters or []
         self._termination = termination
-        self._workflow_template = None  # assigned by parent WorkflowTemplate
+        self._workflow_template: WorkflowTemplate | None = (
+            None  # assigned by parent WorkflowTemplate
+        )
-    def to_dict(self):
-        out = super().to_dict()
+    @override
+    def _postprocess_to_dict(self, d: dict[str, Any]) -> dict[str, Any]:
+        out = super()._postprocess_to_dict(d)
         return {k.lstrip("_"): v for k, v in out.items()}
     @classmethod
-    def _json_like_constructor(cls, json_like):
+    def _json_like_constructor(cls, json_like: dict) -> Self:
         """Invoked by `JSONLike.from_json_like` instead of `__init__`."""
         if "task_insert_IDs" in json_like:
             insert_IDs = json_like.pop("task_insert_IDs")
         else:
             insert_IDs = json_like.pop("tasks")
-        obj = cls(tasks=insert_IDs, **json_like)
-        return obj
+        return cls(tasks=insert_IDs, **json_like)
     @property
-    def task_insert_IDs(self) -> Tuple[int]:
+    def task_insert_IDs(self) -> tuple[int, ...]:
         """Get the list of task insert_IDs that define the extent of the loop."""
         return tuple(self._task_insert_IDs)
     @property
-    def name(self):
+    def name(self) -> str | None:
         """
         The name of the loop, if one was provided.
         """
         return self._name
     @property
-    def num_iterations(self):
+    def num_iterations(self) -> int:
         """
         The number of loop iterations to do.
         """
         return self._num_iterations
     @property
-    def non_iterable_parameters(self):
+    def non_iterable_parameters(self) -> Sequence[str]:
         """
         Which parameters are not iterable.
         """
         return self._non_iterable_parameters
     @property
-    def termination(self):
+    def termination(self) -> Rule | None:
         """
         A termination rule for the loop, if one is provided.
         """
         return self._termination
     @property
-    def workflow_template(self):
+    def workflow_template(self) -> WorkflowTemplate | None:
         """
         The workflow template that contains this loop.
         """
         return self._workflow_template
     @workflow_template.setter
-    def workflow_template(self, template: app.WorkflowTemplate):
+    def workflow_template(self, template: WorkflowTemplate):
         self._workflow_template = template
-        self._validate_against_template()
+        self.__validate_against_template()
+    def __workflow(self) -> None | Workflow:
+        if (wt := self.workflow_template) is None:
+            return None
+        return wt.workflow
     @property
-    def task_objects(self) -> Tuple[app.WorkflowTask]:
+    def task_objects(self) -> tuple[WorkflowTask, ...]:
         """
         The tasks in the loop.
         """
-        if not self.workflow_template:
+        if not (wf := self.__workflow()):
             raise RuntimeError(
                 "Workflow template must be assigned to retrieve task objects of the loop."
             )
-        return tuple(
-            self.workflow_template.workflow.tasks.get(insert_ID=i)
-            for i in self.task_insert_IDs
-        )
+        return tuple(wf.tasks.get(insert_ID=t_id) for t_id in self.task_insert_IDs)
-    def _validate_against_template(self):
+    def __validate_against_template(self) -> None:
         """Validate the loop parameters against the associated workflow."""
         # insert IDs must exist:
+        if not (wf := self.__workflow()):
+            raise RuntimeError(
+                "workflow cannot be validated against as it is not assigned"
+            )
         for insert_ID in self.task_insert_IDs:
             try:
-                self.workflow_template.workflow.tasks.get(insert_ID=insert_ID)
+                wf.tasks.get(insert_ID=insert_ID)
             except ValueError:
                 raise ValueError(
                     f"Loop {self.name!r} has an invalid task insert ID {insert_ID!r}. "
                     f"Such as task does not exist in the associated workflow."
                 )
-    def __repr__(self):
+    def __repr__(self) -> str:
         num_iterations_str = ""
         if self.num_iterations is not None:
             num_iterations_str = f", num_iterations={self.num_iterations!r}"
@@ -187,7 +209,7 @@ class Loop(JSONLike):
             f")"
         )
-    def __deepcopy__(self, memo):
+    def __deepcopy__(self, memo: dict[int, Any]) -> Self:
         kwargs = self.to_dict()
         kwargs["tasks"] = kwargs.pop("task_insert_IDs")
         obj = self.__class__(**copy.deepcopy(kwargs, memo))
@@ -195,7 +217,7 @@ class Loop(JSONLike):
         return obj
-class WorkflowLoop:
+class WorkflowLoop(AppAware):
     """
     Class to represent a :py:class:`.Loop` that is bound to a
     :py:class:`~hpcflow.app.Workflow`.
@@ -216,17 +238,15 @@ class WorkflowLoop:
         The paths to the parent entities of this loop.
     """
-    _app_attr = "app"
     def __init__(
         self,
         index: int,
-        workflow: app.Workflow,
-        template: app.Loop,
-        num_added_iterations: Dict[Tuple[int], int],
-        iterable_parameters: Dict[int : List[int, List[int]]],
-        parents: List[str],
-    ):
+        workflow: Workflow,
+        template: Loop,
+        num_added_iterations: dict[tuple[int, ...], int],
+        iterable_parameters: dict[str, IterableParam],
+        parents: list[str],
+    ) -> None:
         self._index = index
         self._workflow = workflow
         self._template = template
@@ -236,25 +256,22 @@ class WorkflowLoop:
         # appended to on adding a empty loop to the workflow that's a parent of this loop,
         # reset and added to `self._parents` on dump to disk:
-        self._pending_parents = []
+        self._pending_parents: list[str] = []
         # used for `num_added_iterations` when a new loop iteration is added, or when
         # parents are append to; reset to None on dump to disk. Each key is a tuple of
         # parent loop indices and each value is the number of pending new iterations:
-        self._pending_num_added_iterations = None
+        self._pending_num_added_iterations: dict[tuple[int, ...], int] | None = None
         self._validate()
     @TimeIt.decorator
-    def _validate(self):
+    def _validate(self) -> None:
         # task subset must be a contiguous range of task indices:
         task_indices = self.task_indices
         task_min, task_max = task_indices[0], task_indices[-1]
         if task_indices != tuple(range(task_min, task_max + 1)):
-            raise LoopTaskSubsetError(
-                f"Loop {self.name!r}: task subset must be an ascending contiguous range, "
-                f"but specified task indices were: {self.task_indices!r}."
-            )
+            raise LoopTaskSubsetError(self.name, self.task_indices)
         for task in self.downstream_tasks:
             for param in self.iterable_parameters:
@@ -273,7 +290,7 @@ class WorkflowLoop:
         )
     @property
-    def num_added_iterations(self):
+    def num_added_iterations(self) -> Mapping[tuple[int, ...], int]:
         """
         The number of added iterations.
         """
@@ -282,27 +299,30 @@ class WorkflowLoop:
         else:
             return self._num_added_iterations
-    def _initialise_pending_added_iters(self, added_iters_key):
+    @property
+    def __pending(self) -> dict[tuple[int, ...], int]:
         if not self._pending_num_added_iterations:
-            self._pending_num_added_iterations = copy.deepcopy(self._num_added_iterations)
-        if added_iters_key not in self._pending_num_added_iterations:
-            self._pending_num_added_iterations[added_iters_key] = 1
+            self._pending_num_added_iterations = dict(self._num_added_iterations)
+        return self._pending_num_added_iterations
-    def _increment_pending_added_iters(self, added_iters_key):
+    def _initialise_pending_added_iters(self, added_iters: Iterable[int]):
         if not self._pending_num_added_iterations:
-            self._pending_num_added_iterations = copy.deepcopy(self._num_added_iterations)
+            self._pending_num_added_iterations = dict(self._num_added_iterations)
+        if (added_iters_key := tuple(added_iters)) not in (pending := self.__pending):
+            pending[added_iters_key] = 1
-        self._pending_num_added_iterations[added_iters_key] += 1
+    def _increment_pending_added_iters(self, added_iters_key: Iterable[int]):
+        self.__pending[tuple(added_iters_key)] += 1
-    def _update_parents(self, parent: app.WorkflowLoop):
+    def _update_parents(self, parent: WorkflowLoop):
+        assert parent.name
         self._pending_parents.append(parent.name)
-        if not self._pending_num_added_iterations:
-            self._pending_num_added_iterations = copy.deepcopy(self._num_added_iterations)
         self._pending_num_added_iterations = {
-            tuple(list(k) + [0]): v for k, v in self._pending_num_added_iterations.items()
+            (*k, 0): v
+            for k, v in (
+                self._pending_num_added_iterations or self._num_added_iterations
+            ).items()
         }
         self.workflow._store.update_loop_parents(
@@ -311,116 +331,118 @@ class WorkflowLoop:
             parents=self.parents,
         )
-    def _reset_pending_num_added_iters(self):
+    def _reset_pending_num_added_iters(self) -> None:
         self._pending_num_added_iterations = None
-    def _accept_pending_num_added_iters(self):
+    def _accept_pending_num_added_iters(self) -> None:
         if self._pending_num_added_iterations:
-            self._num_added_iterations = copy.deepcopy(self._pending_num_added_iterations)
+            self._num_added_iterations = dict(self._pending_num_added_iterations)
             self._reset_pending_num_added_iters()
-    def _reset_pending_parents(self):
+    def _reset_pending_parents(self) -> None:
         self._pending_parents = []
-    def _accept_pending_parents(self):
+    def _accept_pending_parents(self) -> None:
         self._parents += self._pending_parents
         self._reset_pending_parents()
     @property
-    def index(self):
+    def index(self) -> int:
         """
         The index of this loop within its workflow.
         """
         return self._index
     @property
-    def task_insert_IDs(self):
+    def task_insert_IDs(self) -> tuple[int, ...]:
         """
         The insertion IDs of the tasks inside this loop.
         """
         return self.template.task_insert_IDs
     @property
-    def task_objects(self):
+    def task_objects(self) -> tuple[WorkflowTask, ...]:
         """
         The tasks in this loop.
         """
         return self.template.task_objects
     @property
-    def task_indices(self) -> Tuple[int]:
+    def task_indices(self) -> tuple[int, ...]:
         """
         The list of task indices that define the extent of the loop.
         """
-        return tuple(i.index for i in self.task_objects)
+        return tuple(task.index for task in self.task_objects)
     @property
-    def workflow(self):
+    def workflow(self) -> Workflow:
         """
         The workflow containing this loop.
         """
         return self._workflow
     @property
-    def template(self):
+    def template(self) -> Loop:
         """
         The loop template for this loop.
         """
         return self._template
     @property
-    def parents(self) -> List[str]:
+    def parents(self) -> Sequence[str]:
         """
         The parents of this loop.
         """
         return self._parents + self._pending_parents
     @property
-    def name(self):
+    def name(self) -> str:
         """
         The name of this loop, if one is defined.
         """
+        assert self.template.name
         return self.template.name
     @property
-    def iterable_parameters(self):
+    def iterable_parameters(self) -> Mapping[str, IterableParam]:
         """
         The parameters that are being iterated over.
         """
         return self._iterable_parameters
     @property
-    def num_iterations(self):
+    def num_iterations(self) -> int:
         """
         The number of iterations.
         """
         return self.template.num_iterations
     @property
-    def downstream_tasks(self) -> List[app.WorkflowLoop]:
+    def downstream_tasks(self) -> Iterator[WorkflowTask]:
         """Tasks that are not part of the loop, and downstream from this loop."""
-        return self.workflow.tasks[self.task_objects[-1].index + 1 :]
+        tasks = self.workflow.tasks
+        for idx in range(self.task_objects[-1].index + 1, len(tasks)):
+            yield tasks[idx]
     @property
-    def upstream_tasks(self) -> List[app.WorkflowLoop]:
+    def upstream_tasks(self) -> Iterator[WorkflowTask]:
         """Tasks that are not part of the loop, and upstream from this loop."""
-        return self.workflow.tasks[: self.task_objects[0].index]
+        tasks = self.workflow.tasks
+        for idx in range(0, self.task_objects[0].index):
+            yield tasks[idx]
     @staticmethod
     @TimeIt.decorator
-    def _find_iterable_parameters(loop_template: app.Loop):
-        all_inputs_first_idx = {}
-        all_outputs_idx = {}
+    def _find_iterable_parameters(loop_template: Loop) -> dict[str, IterableParam]:
+        all_inputs_first_idx: dict[str, int] = {}
+        all_outputs_idx: dict[str, list[int]] = defaultdict(list)
         for task in loop_template.task_objects:
             for typ in task.template.all_schema_input_types:
-                if typ not in all_inputs_first_idx:
-                    all_inputs_first_idx[typ] = task.insert_ID
+                all_inputs_first_idx.setdefault(typ, task.insert_ID)
             for typ in task.template.all_schema_output_types:
-                if typ not in all_outputs_idx:
-                    all_outputs_idx[typ] = []
                 all_outputs_idx[typ].append(task.insert_ID)
-        iterable_params = {}
+        iterable_params: dict[str, IterableParam] = {}
         for typ, first_idx in all_inputs_first_idx.items():
             if typ in all_outputs_idx and first_idx <= all_outputs_idx[typ][0]:
                 iterable_params[typ] = {
@@ -429,8 +451,7 @@ class WorkflowLoop:
                 }
         for non_iter in loop_template.non_iterable_parameters:
-            if non_iter in iterable_params:
-                del iterable_params[non_iter]
+            iterable_params.pop(non_iter, None)
         return iterable_params
@@ -439,10 +460,10 @@ class WorkflowLoop:
     def new_empty_loop(
         cls,
         index: int,
-        workflow: app.Workflow,
-        template: app.Loop,
-        iter_loop_idx: List[Dict],
-    ) -> Tuple[app.WorkflowLoop, List[Dict[str, int]]]:
+        workflow: Workflow,
+        template: Loop,
+        iter_loop_idx: Sequence[Mapping[str, int]],
+    ) -> WorkflowLoop:
         """
         Make a new empty loop.
@@ -457,13 +478,16 @@ class WorkflowLoop:
         iter_loop_idx: list[dict]
             Iteration information from parent loops.
         """
-        parent_loops = cls._get_parent_loops(index, workflow, template)
-        parent_names = [i.name for i in parent_loops]
-        num_added_iters = {}
-        for i in iter_loop_idx:
-            num_added_iters[tuple([i[j] for j in parent_names])] = 1
+        parent_names = [
+            loop.name
+            for loop in cls._get_parent_loops(index, workflow, template)
+            if loop.name
+        ]
+        num_added_iters = {
+            tuple(l_idx[nm] for nm in parent_names): 1 for l_idx in iter_loop_idx
+        }
-        obj = cls(
+        return cls(
             index=index,
             workflow=workflow,
             template=template,
@@ -471,17 +495,16 @@ class WorkflowLoop:
             iterable_parameters=cls._find_iterable_parameters(template),
             parents=parent_names,
         )
-        return obj
     @classmethod
     @TimeIt.decorator
     def _get_parent_loops(
         cls,
         index: int,
-        workflow: app.Workflow,
-        template: app.Loop,
-    ) -> List[app.WorkflowLoop]:
-        parents = []
+        workflow: Workflow,
+        template: Loop,
+    ) -> list[WorkflowLoop]:
+        parents: list[WorkflowLoop] = []
         passed_self = False
         self_tasks = set(template.task_insert_IDs)
         for loop_i in workflow.loops:
@@ -496,18 +519,18 @@ class WorkflowLoop:
         return parents
     @TimeIt.decorator
-    def get_parent_loops(self) -> List[app.WorkflowLoop]:
+    def get_parent_loops(self) -> list[WorkflowLoop]:
         """Get loops whose task subset is a superset of this loop's task subset. If two
         loops have identical task subsets, the first loop in the workflow loop list is
         considered the child."""
         return self._get_parent_loops(self.index, self.workflow, self.template)
     @TimeIt.decorator
-    def get_child_loops(self) -> List[app.WorkflowLoop]:
+    def get_child_loops(self) -> list[WorkflowLoop]:
         """Get loops whose task subset is a subset of this loop's task subset. If two
         loops have identical task subsets, the first loop in the workflow loop list is
         considered the child."""
-        children = []
+        children: list[WorkflowLoop] = []
         passed_self = False
         self_tasks = set(self.task_insert_IDs)
         for loop_i in self.workflow.loops:
@@ -521,11 +544,14 @@ class WorkflowLoop:
                 children.append(loop_i)
         # order by depth, so direct child is first:
-        children = sorted(children, key=lambda x: len(next(iter(x.num_added_iterations))))
-        return children
+        return sorted(children, key=lambda x: len(next(iter(x.num_added_iterations))))
     @TimeIt.decorator
-    def add_iteration(self, parent_loop_indices=None, cache: Optional[LoopCache] = None):
+    def add_iteration(
+        self,
+        parent_loop_indices: Mapping[str, int] | None = None,
+        cache: LoopCache | None = None,
+    ) -> None:
         """
         Add an iteration to this loop.
@@ -539,42 +565,40 @@ class WorkflowLoop:
         """
         if not cache:
             cache = LoopCache.build(self.workflow)
+        assert cache is not None
         parent_loops = self.get_parent_loops()
         child_loops = self.get_child_loops()
-        parent_loop_indices = parent_loop_indices or {}
-        if parent_loops and not parent_loop_indices:
-            parent_loop_indices = {i.name: 0 for i in parent_loops}
+        parent_loop_indices_ = parent_loop_indices or {
+            loop.name: 0 for loop in parent_loops
+        }
-        iters_key = tuple([parent_loop_indices[k] for k in self.parents])
+        iters_key = tuple(parent_loop_indices_[p_nm] for p_nm in self.parents)
         cur_loop_idx = self.num_added_iterations[iters_key] - 1
-        all_new_data_idx = {}  # keys are (task.insert_ID and element.index)
+        # keys are (task.insert_ID and element.index)
+        all_new_data_idx: dict[tuple[int, int], DataIndex] = {}
         # initialise a new `num_added_iterations` key on each child loop:
+        iters_key_dct = {
+            **parent_loop_indices_,
+            self.name: cur_loop_idx + 1,
+        }
         for child in child_loops:
-            iters_key_dct = {
-                **parent_loop_indices,
-                self.name: cur_loop_idx + 1,
-            }
-            added_iters_key_chd = tuple([iters_key_dct.get(j, 0) for j in child.parents])
-            child._initialise_pending_added_iters(added_iters_key_chd)
+            child._initialise_pending_added_iters(
+                iters_key_dct.get(j, 0) for j in child.parents
+            )
         for task in self.task_objects:
-            new_loop_idx = {
-                **parent_loop_indices,
-                self.name: cur_loop_idx + 1,
-                **{
-                    child.name: 0
-                    for child in child_loops
-                    if task.insert_ID in child.task_insert_IDs
-                },
+            new_loop_idx = LoopIndex(iters_key_dct) + {
+                child.name: 0
+                for child in child_loops
+                if task.insert_ID in child.task_insert_IDs
             }
-            added_iter_IDs = []
+            added_iter_IDs: list[int] = []
             for elem_idx in range(task.num_elements):
                 elem_ID = task.element_IDs[elem_idx]
-                new_data_idx = {}
+                new_data_idx: DataIndex = {}
                 # copy resources from zeroth iteration:
                 zeroth_iter_ID, zi_iter_data_idx = cache.zeroth_iters[elem_ID]
@@ -587,109 +611,26 @@ class WorkflowLoop:
                 for inp in task.template.all_schema_inputs:
                     is_inp_task = False
-                    iter_dat = self.iterable_parameters.get(inp.typ)
-                    if iter_dat:
+                    if iter_dat := self.iterable_parameters.get(inp.typ):
                         is_inp_task = task.insert_ID == iter_dat["input_task"]
-                    if is_inp_task:
-                        # source from final output task of previous iteration, with all parent
-                        # loop indices the same as previous iteration, and all child loop indices
-                        # maximised:
-                        # identify element(s) from which this iterable input should be
-                        # parametrised:
-                        if task.insert_ID == iter_dat["output_tasks"][-1]:
-                            src_elem_ID = elem_ID
-                            grouped_elems = None
-                        else:
-                            src_elem_IDs_all = cache.element_dependents[elem_ID]
-                            src_elem_IDs = {
-                                k: v
-                                for k, v in src_elem_IDs_all.items()
-                                if cache.elements[k]["task_insert_ID"]
-                                == iter_dat["output_tasks"][-1]
-                            }
-                            # consider groups
-                            inp_group_name = inp.single_labelled_data.get("group")
-                            grouped_elems = []
-                            for src_elem_j_ID, src_elem_j_dat in src_elem_IDs.items():
-                                i_in_group = any(
-                                    k == inp_group_name
-                                    for k in src_elem_j_dat["group_names"]
-                                )
-                                if i_in_group:
-                                    grouped_elems.append(src_elem_j_ID)
-                            if not grouped_elems and len(src_elem_IDs) > 1:
-                                raise NotImplementedError(
-                                    f"Multiple elements found in the iterable parameter "
-                                    f"{inp!r}'s latest output task (insert ID: "
-                                    f"{iter_dat['output_tasks'][-1]}) that can be used "
-                                    f"to parametrise the next iteration: "
-                                    f"{list(src_elem_IDs.keys())!r}."
-                                )
-                            elif not src_elem_IDs:
-                                # TODO: maybe OK?
-                                raise NotImplementedError(
-                                    f"No elements found in the iterable parameter "
-                                    f"{inp!r}'s latest output task (insert ID: "
-                                    f"{iter_dat['output_tasks'][-1]}) that can be used "
-                                    f"to parametrise the next iteration."
-                                )
-                            else:
-                                src_elem_ID = nth_key(src_elem_IDs, 0)
-                        child_loop_max_iters = {}
-                        parent_loop_same_iters = {
-                            i.name: parent_loop_indices[i.name] for i in parent_loops
-                        }
-                        child_iter_parents = {
-                            **parent_loop_same_iters,
-                            self.name: cur_loop_idx,
-                        }
-                        for i in child_loops:
-                            i_num_iters = i.num_added_iterations[
-                                tuple(child_iter_parents[j] for j in i.parents)
-                            ]
-                            i_max = i_num_iters - 1
-                            child_iter_parents[i.name] = i_max
-                            child_loop_max_iters[i.name] = i_max
-                        source_iter_loop_idx = {
-                            **child_loop_max_iters,
-                            **parent_loop_same_iters,
-                            self.name: cur_loop_idx,
-                        }
-                        # identify the ElementIteration from which this input should be
-                        # parametrised:
-                        loop_idx_key = tuple(sorted(source_iter_loop_idx.items()))
-                        if grouped_elems:
-                            src_data_idx = []
-                            for src_elem_ID in grouped_elems:
-                                src_data_idx.append(
-                                    cache.data_idx[src_elem_ID][loop_idx_key]
-                                )
-                        else:
-                            src_data_idx = cache.data_idx[src_elem_ID][loop_idx_key]
-                        if not src_data_idx:
-                            raise RuntimeError(
-                                f"Could not find a source iteration with loop_idx: "
-                                f"{source_iter_loop_idx!r}."
-                            )
-                        if grouped_elems:
-                            inp_dat_idx = [i[f"outputs.{inp.typ}"] for i in src_data_idx]
-                        else:
-                            inp_dat_idx = src_data_idx[f"outputs.{inp.typ}"]
-                        new_data_idx[f"inputs.{inp.typ}"] = inp_dat_idx
+                    inp_key = f"inputs.{inp.typ}"
+                    if is_inp_task:
+                        assert iter_dat is not None
+                        inp_dat_idx = self.__get_looped_index(
+                            task,
+                            elem_ID,
+                            cache,
+                            iter_dat,
+                            inp,
+                            parent_loops,
+                            parent_loop_indices_,
+                            child_loops,
+                            cur_loop_idx,
+                        )
+                        new_data_idx[inp_key] = inp_dat_idx
                     else:
-                        inp_key = f"inputs.{inp.typ}"
                         orig_inp_src = cache.elements[elem_ID]["input_sources"][inp_key]
                         inp_dat_idx = None
@@ -709,77 +650,16 @@ class WorkflowLoop:
                                 inp_dat_idx = zi_iter_data_idx[inp_key]
                         elif orig_inp_src.source_type is InputSourceType.TASK:
-                            if orig_inp_src.task_ref not in self.task_insert_IDs:
-                                # source the data_idx from the iteration with same parent
-                                # loop indices as the new iteration to add:
-                                # src_iters = []
-                                src_data_idx = []
-                                for li_k, di_k in cache.data_idx[elem_ID].items():
-                                    skip_iter = False
-                                    li_k_dct = dict(li_k)
-                                    for p_k, p_v in parent_loop_indices.items():
-                                        if li_k_dct.get(p_k) != p_v:
-                                            skip_iter = True
-                                            break
-                                    if not skip_iter:
-                                        src_data_idx.append(di_k)
-                                # could be multiple, but they should all have the same
-                                # data index for this parameter:
-                                src_data_idx = src_data_idx[0]
-                                inp_dat_idx = src_data_idx[inp_key]
-                            else:
-                                is_group = False
-                                if (
-                                    not inp.multiple
-                                    and "group" in inp.single_labelled_data
-                                ):
-                                    # this input is a group, assume for now all elements:
-                                    is_group = True
-                                # same task/element, but update iteration to the just-added
-                                # iteration:
-                                key_prefix = orig_inp_src.task_source_type.name.lower()
-                                prev_dat_idx_key = f"{key_prefix}s.{inp.typ}"
-                                new_sources = []
-                                for (
-                                    tiID,
-                                    e_idx,
-                                ), prev_dat_idx in all_new_data_idx.items():
-                                    if tiID == orig_inp_src.task_ref:
-                                        # find which element in that task `element`
-                                        # depends on:
-                                        task_i = self.workflow.tasks.get(insert_ID=tiID)
-                                        elem_i_ID = task_i.element_IDs[e_idx]
-                                        src_elem_IDs_all = cache.element_dependents[
-                                            elem_i_ID
-                                        ]
-                                        src_elem_IDs_i = {
-                                            k: v
-                                            for k, v in src_elem_IDs_all.items()
-                                            if cache.elements[k]["task_insert_ID"]
-                                            == task.insert_ID
-                                        }
-                                        # filter src_elem_IDs_i for matching element IDs:
-                                        src_elem_IDs_i = [
-                                            i for i in src_elem_IDs_i if i == elem_ID
-                                        ]
-                                        if (
-                                            len(src_elem_IDs_i) == 1
-                                            and src_elem_IDs_i[0] == elem_ID
-                                        ):
-                                            new_sources.append((tiID, e_idx))
-                                if is_group:
-                                    inp_dat_idx = [
-                                        all_new_data_idx[i][prev_dat_idx_key]
-                                        for i in new_sources
-                                    ]
-                                else:
-                                    assert len(new_sources) == 1
-                                    prev_dat_idx = all_new_data_idx[new_sources[0]]
-                                    inp_dat_idx = prev_dat_idx[prev_dat_idx_key]
+                            inp_dat_idx = self.__get_task_index(
+                                task,
+                                orig_inp_src,
+                                cache,
+                                elem_ID,
+                                inp,
+                                inp_key,
+                                parent_loop_indices_,
+                                all_new_data_idx,
+                            )
                         if inp_dat_idx is None:
                             raise RuntimeError(
@@ -791,9 +671,8 @@ class WorkflowLoop:
                 # add any locally defined sub-parameters:
                 inp_statuses = cache.elements[elem_ID]["input_statuses"]
-                inp_status_inps = set([f"inputs.{i}" for i in inp_statuses])
-                sub_params = inp_status_inps - set(new_data_idx.keys())
-                for sub_param_i in sub_params:
+                inp_status_inps = set(f"inputs.{inp}" for inp in inp_statuses)
+                for sub_param_i in inp_status_inps.difference(new_data_idx):
                     sub_param_data_idx_iter_0 = zi_data_idx
                     try:
                         sub_param_data_idx = sub_param_data_idx_iter_0[sub_param_i]
@@ -808,13 +687,11 @@ class WorkflowLoop:
                 for out in task.template.all_schema_outputs:
                     path_i = f"outputs.{out.typ}"
-                    p_src = {"type": "EAR_output"}
+                    p_src: ParamSource = {"type": "EAR_output"}
                     new_data_idx[path_i] = self.workflow._add_unset_parameter_data(p_src)
-                schema_params = set(
-                    i for i in new_data_idx.keys() if len(i.split(".")) == 2
-                )
-                all_new_data_idx[(task.insert_ID, elem_idx)] = new_data_idx
+                schema_params = set(i for i in new_data_idx if len(i.split(".")) == 2)
+                all_new_data_idx[task.insert_ID, elem_idx] = new_data_idx
                 iter_ID_i = self.workflow._store.add_element_iteration(
                     element_ID=elem_ID,
@@ -835,8 +712,9 @@ class WorkflowLoop:
             task.initialise_EARs(iter_IDs=added_iter_IDs)
-        added_iters_key = tuple(parent_loop_indices[k] for k in self.parents)
-        self._increment_pending_added_iters(added_iters_key)
+        self._increment_pending_added_iters(
+            parent_loop_indices_[p_nm] for p_nm in self.parents
+        )
         self.workflow._store.update_loop_num_iters(
             index=self.index,
             num_added_iters=self.num_added_iterations,
@@ -846,17 +724,185 @@ class WorkflowLoop:
         for child in child_loops[::-1]:
             if child.num_iterations is not None:
                 for _ in range(child.num_iterations - 1):
-                    par_idx = {k: 0 for k in child.parents}
-                    child.add_iteration(
-                        parent_loop_indices={
-                            **par_idx,
-                            **parent_loop_indices,
-                            self.name: cur_loop_idx + 1,
-                        },
-                        cache=cache,
-                    )
+                    par_idx = {parent_name: 0 for parent_name in child.parents}
+                    if parent_loop_indices:
+                        par_idx.update(parent_loop_indices)
+                    par_idx[self.name] = cur_loop_idx + 1
+                    child.add_iteration(parent_loop_indices=par_idx, cache=cache)
+    def __get_src_ID_and_groups(
+        self, elem_ID: int, iter_dat: IterableParam, inp: SchemaInput, cache: LoopCache
+    ) -> tuple[int, Sequence[int]]:
+        src_elem_IDs = {
+            k: v
+            for k, v in cache.element_dependents[elem_ID].items()
+            if cache.elements[k]["task_insert_ID"] == iter_dat["output_tasks"][-1]
+        }
+        # consider groups
+        single_data = inp.single_labelled_data
+        assert single_data is not None
+        inp_group_name = single_data.get("group")
+        grouped_elems = [
+            src_elem_j_ID
+            for src_elem_j_ID, src_elem_j_dat in src_elem_IDs.items()
+            if any(nm == inp_group_name for nm in src_elem_j_dat["group_names"])
+        ]
+        if not grouped_elems and len(src_elem_IDs) > 1:
+            raise NotImplementedError(
+                f"Multiple elements found in the iterable parameter "
+                f"{inp!r}'s latest output task (insert ID: "
+                f"{iter_dat['output_tasks'][-1]}) that can be used "
+                f"to parametrise the next iteration: "
+                f"{list(src_elem_IDs)!r}."
+            )
+        elif not src_elem_IDs:
+            # TODO: maybe OK?
+            raise NotImplementedError(
+                f"No elements found in the iterable parameter "
+                f"{inp!r}'s latest output task (insert ID: "
+                f"{iter_dat['output_tasks'][-1]}) that can be used "
+                f"to parametrise the next iteration."
+            )
+        return nth_key(src_elem_IDs, 0), grouped_elems
+    def __get_looped_index(
+        self,
+        task: WorkflowTask,
+        elem_ID: int,
+        cache: LoopCache,
+        iter_dat: IterableParam,
+        inp: SchemaInput,
+        parent_loops: list[WorkflowLoop],
+        parent_loop_indices: Mapping[str, int],
+        child_loops: list[WorkflowLoop],
+        cur_loop_idx: int,
+    ):
+        # source from final output task of previous iteration, with all parent
+        # loop indices the same as previous iteration, and all child loop indices
+        # maximised:
+        # identify element(s) from which this iterable input should be
+        # parametrised:
+        if task.insert_ID == iter_dat["output_tasks"][-1]:
+            src_elem_ID = elem_ID
+            grouped_elems: Sequence[int] = []
+        else:
+            src_elem_ID, grouped_elems = self.__get_src_ID_and_groups(
+                elem_ID, iter_dat, inp, cache
+            )
+        child_loop_max_iters: dict[str, int] = {}
+        parent_loop_same_iters = {
+            loop.name: parent_loop_indices[loop.name] for loop in parent_loops
+        }
+        child_iter_parents = {
+            **parent_loop_same_iters,
+            self.name: cur_loop_idx,
+        }
+        for loop in child_loops:
+            i_num_iters = loop.num_added_iterations[
+                tuple(child_iter_parents[j] for j in loop.parents)
+            ]
+            i_max = i_num_iters - 1
+            child_iter_parents[loop.name] = i_max
+            child_loop_max_iters[loop.name] = i_max
+        loop_idx_key = LoopIndex(child_loop_max_iters)
+        loop_idx_key.update(parent_loop_same_iters)
+        loop_idx_key[self.name] = cur_loop_idx
+        # identify the ElementIteration from which this input should be
+        # parametrised:
+        if grouped_elems:
+            src_data_idx = [
+                cache.data_idx[src_elem_ID][loop_idx_key] for src_elem_ID in grouped_elems
+            ]
+            if not src_data_idx:
+                raise RuntimeError(
+                    f"Could not find a source iteration with loop_idx: "
+                    f"{loop_idx_key!r}."
+                )
+            return [i[f"outputs.{inp.typ}"] for i in src_data_idx]
+        else:
+            return cache.data_idx[src_elem_ID][loop_idx_key][f"outputs.{inp.typ}"]
+    def __get_task_index(
+        self,
+        task: WorkflowTask,
+        orig_inp_src: InputSource,
+        cache: LoopCache,
+        elem_ID: int,
+        inp: SchemaInput,
+        inp_key: str,
+        parent_loop_indices: Mapping[str, int],
+        all_new_data_idx: Mapping[tuple[int, int], DataIndex],
+    ) -> int | list[int]:
+        if orig_inp_src.task_ref not in self.task_insert_IDs:
+            # source the data_idx from the iteration with same parent
+            # loop indices as the new iteration to add:
+            src_data_idx = next(
+                di_k
+                for li_k, di_k in cache.data_idx[elem_ID].items()
+                if all(li_k.get(p_k) == p_v for p_k, p_v in parent_loop_indices.items())
+            )
+            # could be multiple, but they should all have the same
+            # data index for this parameter:
+            return src_data_idx[inp_key]
+        is_group = (
+            inp.single_labelled_data is not None
+            and "group" in inp.single_labelled_data
+            # this input is a group, assume for now all elements
+        )
+        # same task/element, but update iteration to the just-added
+        # iteration:
+        assert orig_inp_src.task_source_type is not None
+        key_prefix = orig_inp_src.task_source_type.name.lower()
+        prev_dat_idx_key = f"{key_prefix}s.{inp.typ}"
+        new_sources: list[tuple[int, int]] = []
+        for (tiID, e_idx), _ in all_new_data_idx.items():
+            if tiID == orig_inp_src.task_ref:
+                # find which element in that task `element`
+                # depends on:
+                src_elem_IDs = cache.element_dependents[
+                    self.workflow.tasks.get(insert_ID=tiID).element_IDs[e_idx]
+                ]
+                # filter src_elem_IDs_i for matching element IDs:
+                src_elem_IDs_i = [
+                    k
+                    for k, _v in src_elem_IDs.items()
+                    if cache.elements[k]["task_insert_ID"] == task.insert_ID
+                    and k == elem_ID
+                ]
+                if len(src_elem_IDs_i) == 1:
+                    new_sources.append((tiID, e_idx))
+        if is_group:
+            # Convert into simple list of indices
+            return list(
+                chain.from_iterable(
+                    self.__as_sequence(all_new_data_idx[src][prev_dat_idx_key])
+                    for src in new_sources
+                )
+            )
+        else:
+            assert len(new_sources) == 1
+            return all_new_data_idx[new_sources[0]][prev_dat_idx_key]
+    @staticmethod
+    def __as_sequence(seq: int | Iterable[int]) -> Iterable[int]:
+        if isinstance(seq, int):
+            yield seq
+        else:
+            yield from seq
-    def test_termination(self, element_iter):
+    def test_termination(self, element_iter) -> bool:
         """Check if a loop should terminate, given the specified completed element
         iteration."""
         if self.template.termination:

hpcflow-new2 0.2.0a188__py3-none-any.whl → 0.2.0a190__py3-none-any.whl

hpcflow-new2 0.2.0a188py3-none-any.whl → 0.2.0a190py3-none-any.whl