PyPI - hpcflow-new2 - Versions diffs - 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl - Mend

hpcflow-new2 0.2.0a190py3-none-any.whl → 0.2.0a199py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
hpcflow/_version.py +1 -1
hpcflow/data/scripts/bad_script.py +2 -0
hpcflow/data/scripts/do_nothing.py +2 -0
hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
hpcflow/data/scripts/input_file_generator_basic.py +3 -0
hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
hpcflow/data/scripts/output_file_parser_basic.py +3 -0
hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
hpcflow/data/scripts/script_exit_test.py +5 -0
hpcflow/data/template_components/environments.yaml +1 -1
hpcflow/sdk/__init__.py +5 -0
hpcflow/sdk/app.py +150 -89
hpcflow/sdk/cli.py +263 -84
hpcflow/sdk/cli_common.py +99 -5
hpcflow/sdk/config/callbacks.py +38 -1
hpcflow/sdk/config/config.py +102 -13
hpcflow/sdk/config/errors.py +19 -5
hpcflow/sdk/config/types.py +3 -0
hpcflow/sdk/core/__init__.py +25 -1
hpcflow/sdk/core/actions.py +914 -262
hpcflow/sdk/core/cache.py +76 -34
hpcflow/sdk/core/command_files.py +14 -128
hpcflow/sdk/core/commands.py +35 -6
hpcflow/sdk/core/element.py +122 -50
hpcflow/sdk/core/errors.py +58 -2
hpcflow/sdk/core/execute.py +207 -0
hpcflow/sdk/core/loop.py +408 -50
hpcflow/sdk/core/loop_cache.py +4 -4
hpcflow/sdk/core/parameters.py +382 -37
hpcflow/sdk/core/run_dir_files.py +13 -40
hpcflow/sdk/core/skip_reason.py +7 -0
hpcflow/sdk/core/task.py +119 -30
hpcflow/sdk/core/task_schema.py +68 -0
hpcflow/sdk/core/test_utils.py +66 -27
hpcflow/sdk/core/types.py +54 -1
hpcflow/sdk/core/utils.py +78 -7
hpcflow/sdk/core/workflow.py +1538 -336
hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
hpcflow/sdk/demo/cli.py +7 -0
hpcflow/sdk/helper/cli.py +1 -0
hpcflow/sdk/log.py +42 -15
hpcflow/sdk/persistence/base.py +405 -53
hpcflow/sdk/persistence/json.py +177 -52
hpcflow/sdk/persistence/pending.py +237 -69
hpcflow/sdk/persistence/store_resource.py +3 -2
hpcflow/sdk/persistence/types.py +15 -4
hpcflow/sdk/persistence/zarr.py +928 -81
hpcflow/sdk/submission/jobscript.py +1408 -489
hpcflow/sdk/submission/schedulers/__init__.py +40 -5
hpcflow/sdk/submission/schedulers/direct.py +33 -19
hpcflow/sdk/submission/schedulers/sge.py +51 -16
hpcflow/sdk/submission/schedulers/slurm.py +44 -16
hpcflow/sdk/submission/schedulers/utils.py +7 -2
hpcflow/sdk/submission/shells/base.py +68 -20
hpcflow/sdk/submission/shells/bash.py +222 -129
hpcflow/sdk/submission/shells/powershell.py +200 -150
hpcflow/sdk/submission/submission.py +852 -119
hpcflow/sdk/submission/types.py +18 -21
hpcflow/sdk/typing.py +24 -5
hpcflow/sdk/utils/arrays.py +71 -0
hpcflow/sdk/utils/deferred_file.py +55 -0
hpcflow/sdk/utils/hashing.py +16 -0
hpcflow/sdk/utils/patches.py +12 -0
hpcflow/sdk/utils/strings.py +33 -0
hpcflow/tests/api/test_api.py +32 -0
hpcflow/tests/conftest.py +19 -0
hpcflow/tests/data/multi_path_sequences.yaml +29 -0
hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
hpcflow/tests/scripts/test_input_file_generators.py +282 -0
hpcflow/tests/scripts/test_main_scripts.py +821 -70
hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
hpcflow/tests/unit/test_action.py +176 -0
hpcflow/tests/unit/test_app.py +20 -0
hpcflow/tests/unit/test_cache.py +46 -0
hpcflow/tests/unit/test_cli.py +133 -0
hpcflow/tests/unit/test_config.py +122 -1
hpcflow/tests/unit/test_element_iteration.py +47 -0
hpcflow/tests/unit/test_jobscript_unit.py +757 -0
hpcflow/tests/unit/test_loop.py +1332 -27
hpcflow/tests/unit/test_meta_task.py +325 -0
hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
hpcflow/tests/unit/test_parameter.py +13 -0
hpcflow/tests/unit/test_persistence.py +190 -8
hpcflow/tests/unit/test_run.py +109 -3
hpcflow/tests/unit/test_run_directories.py +29 -0
hpcflow/tests/unit/test_shell.py +20 -0
hpcflow/tests/unit/test_submission.py +5 -76
hpcflow/tests/unit/utils/test_arrays.py +40 -0
hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
hpcflow/tests/unit/utils/test_hashing.py +65 -0
hpcflow/tests/unit/utils/test_patches.py +5 -0
hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
hpcflow/tests/workflows/__init__.py +0 -0
hpcflow/tests/workflows/test_directory_structure.py +31 -0
hpcflow/tests/workflows/test_jobscript.py +332 -0
hpcflow/tests/workflows/test_run_status.py +198 -0
hpcflow/tests/workflows/test_skip_downstream.py +696 -0
hpcflow/tests/workflows/test_submission.py +140 -0
hpcflow/tests/workflows/test_workflows.py +142 -2
hpcflow/tests/workflows/test_zip.py +18 -0
hpcflow/viz_demo.ipynb +6587 -3
{hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +7 -4
hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
{hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
{hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
{hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0

hpcflow/sdk/core/run_dir_files.py CHANGED Viewed

@@ -3,82 +3,55 @@ Model of files in the run directory.
 """
 from __future__ import annotations
-import re
 from typing import Any, TYPE_CHECKING
 from hpcflow.sdk.core.app_aware import AppAware
 from hpcflow.sdk.core.utils import JSONLikeDirSnapShot
 if TYPE_CHECKING:
-    from re import Pattern
-    from typing_extensions import ClassVar
     from ..submission.shells.base import Shell
+    from .types import BlockActionKey
 class RunDirAppFiles(AppAware):
     """A class to encapsulate the naming/recognition of app-created files within run
     directories."""
-    __CMD_FILES_RE_PATTERN: ClassVar[Pattern] = re.compile(r"js_\d+_act_\d+\.?\w*")
-    @classmethod
-    def get_log_file_name(cls) -> str:
-        """File name for the app log file."""
-        return f"{cls._app.package_name}.log"
-    @classmethod
-    def get_std_file_name(cls) -> str:
-        """File name for stdout and stderr streams from the app."""
-        return f"{cls._app.package_name}_std.txt"
     @staticmethod
-    def get_run_file_prefix(js_idx: int | str, js_action_idx: int | str) -> str:
+    def get_run_file_prefix(block_act_key: BlockActionKey) -> str:
         """
         Get the common prefix for files associated with a run.
         """
-        return f"js_{js_idx}_act_{js_action_idx}"
+        return f"js_{block_act_key[0]}_block_{block_act_key[1]}_act_{block_act_key[2]}"
     @classmethod
-    def get_commands_file_name(
-        cls, js_idx: int | str, js_action_idx: int | str, shell: Shell
-    ) -> str:
+    def get_commands_file_name(cls, block_act_key: BlockActionKey, shell: Shell) -> str:
         """
         Get the name of the file containing commands.
         """
-        return cls.get_run_file_prefix(js_idx, js_action_idx) + shell.JS_EXT
+        return cls.get_run_file_prefix(block_act_key) + shell.JS_EXT
     @classmethod
-    def get_run_param_dump_file_prefix(
-        cls, js_idx: int | str, js_action_idx: int | str
-    ) -> str:
+    def get_run_param_dump_file_prefix(cls, block_act_key: BlockActionKey) -> str:
         """Get the prefix to a file in the run directory that the app will dump parameter
         data to."""
-        return cls.get_run_file_prefix(js_idx, js_action_idx) + "_inputs"
+        return cls.get_run_file_prefix(block_act_key) + "_inputs"
     @classmethod
-    def get_run_param_load_file_prefix(
-        cls, js_idx: int | str, js_action_idx: int | str
-    ) -> str:
+    def get_run_param_load_file_prefix(cls, block_act_key: BlockActionKey) -> str:
         """Get the prefix to a file in the run directory that the app will load parameter
         data from."""
-        return cls.get_run_file_prefix(js_idx, js_action_idx) + "_outputs"
+        return cls.get_run_file_prefix(block_act_key) + "_outputs"
     @classmethod
-    def take_snapshot(cls) -> dict[str, Any]:
+    def take_snapshot(cls, root_path=None) -> dict[str, Any]:
         """
-        Take a :py:class:`JSONLikeDirSnapShot`, and process to ignore files created by
-        the app.
+        Take a :py:class:`JSONLikeDirSnapShot`.
         This includes command files that are invoked by jobscripts, the app log file, and
         the app standard out/error file.
         """
         snapshot = JSONLikeDirSnapShot()
-        snapshot.take(".")
-        ss_js = snapshot.to_json_like()
+        snapshot.take(root_path or ".")
+        ss_js = snapshot.to_json_like(use_strings=True)
         ss_js.pop("root_path")  # always the current working directory of the run
-        excluded = {cls.get_log_file_name(), cls.get_std_file_name()}
-        data: dict[str, Any] = ss_js["data"]
-        for filename in tuple(data):
-            if filename in excluded or cls.__CMD_FILES_RE_PATTERN.match(filename):
-                data.pop(filename)
         return ss_js

hpcflow/sdk/core/skip_reason.py ADDED Viewed

@@ -0,0 +1,7 @@
+import enum
+class SkipReason(enum.Enum):
+    NOT_SKIPPED = 0
+    UPSTREAM_FAILURE = 1
+    LOOP_TERMINATION = 2

hpcflow/sdk/core/task.py CHANGED Viewed

@@ -67,12 +67,13 @@ if TYPE_CHECKING:
         InputValue,
         InputSource,
         ValueSequence,
+        MultiPathSequence,
         SchemaInput,
         SchemaOutput,
         ParameterPath,
     )
     from .rule import Rule
-    from .task_schema import TaskObjective, TaskSchema
+    from .task_schema import TaskObjective, TaskSchema, MetaTaskSchema
     from .types import (
         MultiplicityDescriptor,
         RelevantData,
@@ -132,6 +133,8 @@ class ElementSet(JSONLike):
         Input files to the set of elements.
     sequences: list[~hpcflow.app.ValueSequence]
         Input value sequences to parameterise over.
+    multi_path_sequences: list[~hpcflow.app.MultiPathSequence]
+        Multi-path sequences to parameterise over.
     resources: ~hpcflow.app.ResourceList
         Resources to use for the set of elements.
     repeats: list[dict]
@@ -154,9 +157,10 @@ class ElementSet(JSONLike):
         If True, if more than one parameter is sourced from the same task, then allow
         these sources to come from distinct element sub-sets. If False (default),
         only the intersection of element sub-sets for all parameters are included.
-    merge_envs: bool
-        If True, merge ``environments`` into ``resources`` using the "any" scope. If
-        False, ``environments`` are ignored. This is required on first initialisation,
+    is_creation: bool
+        If True, merge ``environments`` into ``resources`` using the "any" scope, and
+        merge sequences belonging to multi-path sequences into the value-sequences list.
+        If False, ``environments`` are ignored. This is required on first initialisation,
         but not on subsequent re-initialisation from a persistent workflow.
     """
@@ -188,6 +192,12 @@ class ElementSet(JSONLike):
             is_multiple=True,
             parent_ref="_element_set",
         ),
+        ChildObjectSpec(
+            name="multi_path_sequences",
+            class_name="MultiPathSequence",
+            is_multiple=True,
+            parent_ref="_element_set",
+        ),
         ChildObjectSpec(
             name="input_sources",
             class_name="InputSource",
@@ -207,6 +217,7 @@ class ElementSet(JSONLike):
         inputs: list[InputValue] | dict[str, Any] | None = None,
         input_files: list[InputFile] | None = None,
         sequences: list[ValueSequence] | None = None,
+        multi_path_sequences: list[MultiPathSequence] | None = None,
         resources: Resources = None,
         repeats: list[RepeatsDescriptor] | int | None = None,
         groups: list[ElementGroup] | None = None,
@@ -216,7 +227,7 @@ class ElementSet(JSONLike):
         environments: Mapping[str, Mapping[str, Any]] | None = None,
         sourceable_elem_iters: list[int] | None = None,
         allow_non_coincident_task_sources: bool = False,
-        merge_envs: bool = True,
+        is_creation: bool = True,
     ):
         #: Inputs to the set of elements.
         self.inputs = self.__decode_inputs(inputs or [])
@@ -230,6 +241,8 @@ class ElementSet(JSONLike):
         self.resources = self._app.ResourceList.normalise(resources)
         #: Input value sequences to parameterise over.
         self.sequences = sequences or []
+        #: Input value multi-path sequences to parameterise over.
+        self.multi_path_sequences = multi_path_sequences or []
         #: Input source descriptors.
         self.input_sources = input_sources or {}
         #: How to handle nesting of iterations.
@@ -244,9 +257,11 @@ class ElementSet(JSONLike):
         self.sourceable_elem_iters = sourceable_elem_iters
         #: Whether to allow sources to come from distinct element sub-sets.
         self.allow_non_coincident_task_sources = allow_non_coincident_task_sources
-        #: Whether to merge ``environments`` into ``resources`` using the "any" scope
-        #: on first initialisation.
-        self.merge_envs = merge_envs
+        #: Whether this initialisation is the first for this data (i.e. not a
+        #: reconstruction from persistent workflow data), in which case, we merge
+        #: ``environments`` into ``resources`` using the "any" scope, and merge any multi-
+        #: path sequences into the sequences list.
+        self.is_creation = is_creation
         self.original_input_sources: dict[str, list[InputSource]] | None = None
         self.original_nesting_order: dict[str, float] | None = None
@@ -260,16 +275,23 @@ class ElementSet(JSONLike):
         # assigned by WorkflowTask._add_element_set
         self._element_local_idx_range: list[int] | None = None
-        # merge `environments` into element set resources (this mutates `resources`, and
-        # should only happen on creation of the element set, not re-initialisation from a
-        # persistent workflow):
-        if self.environments and self.merge_envs:
-            self.resources.merge_one(
-                self._app.ResourceSpec(scope="any", environments=self.environments)
-            )
-            self.merge_envs = False
+        if self.is_creation:
+            # merge `environments` into element set resources (this mutates `resources`, and
+            # should only happen on creation of the element set, not re-initialisation from a
+            # persistent workflow):
+            if self.environments:
+                self.resources.merge_one(
+                    self._app.ResourceSpec(scope="any", environments=self.environments)
+                )
+            # note: `env_preset` is merged into resources by the Task init.
-        # note: `env_preset` is merged into resources by the Task init.
+            # merge sequences belonging to multi-path sequences into the value-sequences list:
+            if self.multi_path_sequences:
+                for mp_seq in self.multi_path_sequences:
+                    mp_seq._move_to_sequence_list(self.sequences)
+            self.is_creation = False
     def __deepcopy__(self, memo: dict[int, Any] | None) -> Self:
         dct = self.to_dict()
@@ -450,6 +472,7 @@ class ElementSet(JSONLike):
         inputs: list[InputValue] | dict[str, Any] | None = None,
         input_files: list[InputFile] | None = None,
         sequences: list[ValueSequence] | None = None,
+        multi_path_sequences: list[MultiPathSequence] | None = None,
         resources: Resources = None,
         repeats: list[RepeatsDescriptor] | int | None = None,
         groups: list[ElementGroup] | None = None,
@@ -468,6 +491,7 @@ class ElementSet(JSONLike):
             inputs,
             input_files,
             sequences,
+            multi_path_sequences,
             resources,
             repeats,
             groups,
@@ -692,6 +716,9 @@ class Task(JSONLike):
         A list of `InputValue` objects.
     input_files: list[~hpcflow.app.InputFile]
     sequences: list[~hpcflow.app.ValueSequence]
+        Input value sequences to parameterise over.
+    multi_path_sequences: list[~hpcflow.app.MultiPathSequence]
+        Multi-path sequences to parameterise over.
     input_sources: dict[str, ~hpcflow.app.InputSource]
     nesting_order: list
     env_preset: str
@@ -745,6 +772,7 @@ class Task(JSONLike):
         inputs: list[InputValue] | dict[str, Any] | None = None,
         input_files: list[InputFile] | None = None,
         sequences: list[ValueSequence] | None = None,
+        multi_path_sequences: list[MultiPathSequence] | None = None,
         input_sources: dict[str, list[InputSource]] | None = None,
         nesting_order: dict[str, float] | None = None,
         env_preset: str | None = None,
@@ -790,6 +818,7 @@ class Task(JSONLike):
             inputs=inputs,
             input_files=input_files,
             sequences=sequences,
+            multi_path_sequences=multi_path_sequences,
             resources=resources,
             repeats=repeats,
             groups=groups,
@@ -1000,9 +1029,11 @@ class Task(JSONLike):
         )
         return [
-            f"{task.name}_{task_name_rep_idx[idx]}"
-            if task_name_rep_idx[idx] > 0
-            else task.name
+            (
+                f"{task.name}_{task_name_rep_idx[idx]}"
+                if task_name_rep_idx[idx] > 0
+                else task.name
+            )
             for idx, task in enumerate(tasks)
         ]
@@ -1684,6 +1715,7 @@ class WorkflowTask(AppAware):
         return self._element_IDs + self._pending_element_IDs
     @property
+    @TimeIt.decorator
     def num_elements(self) -> int:
         """
         The number of elements associated with this task.
@@ -1891,16 +1923,18 @@ class WorkflowTask(AppAware):
             input_data_idx[key] = list(seq_dat_ref)
             sequence_idx[key] = list(range(len(seq_dat_ref)))
             try:
-                key_ = key.removeprefix("inputs.")
+                key_ = key.split("inputs.")[1]
             except IndexError:
-                pass
+                # e.g. "resources."
+                key_ = ""
             try:
                 # TODO: wouldn't need to do this if we raise when an ValueSequence is
                 # provided for a parameter whose inputs sources do not include the local
                 # value.
-                source_idx[key] = [
-                    element_set.input_sources[key_].index(loc_inp_src)
-                ] * len(seq_dat_ref)
+                if key_:
+                    source_idx[key] = [
+                        element_set.input_sources[key_].index(loc_inp_src)
+                    ] * len(seq_dat_ref)
             except ValueError:
                 pass
@@ -2942,7 +2976,7 @@ class WorkflowTask(AppAware):
         return params
     @staticmethod
-    def __get_relevant_paths(
+    def _get_relevant_paths(
         data_index: Mapping[str, Any], path: list[str], children_of: str | None = None
     ) -> Mapping[str, RelevantPath]:
         relevant_paths: dict[str, RelevantPath] = {}
@@ -2968,7 +3002,12 @@ class WorkflowTask(AppAware):
         return relevant_paths
     def __get_relevant_data_item(
-        self, path: str | None, path_i: str, data_idx_ij: int, raise_on_unset: bool
+        self,
+        path: str | None,
+        path_i: str,
+        data_idx_ij: int,
+        raise_on_unset: bool,
+        len_dat_idx: int = 1,
     ) -> tuple[Any, bool, str | None]:
         if path_i.startswith("repeats."):
             # data is an integer repeats index, rather than a parameter ID:
@@ -3002,6 +3041,13 @@ class WorkflowTask(AppAware):
                 data_j = param_j.data
         if raise_on_unset and not is_set_i:
             raise UnsetParameterDataError(path, path_i)
+        if not is_set_i and self.workflow._is_tracking_unset:
+            src_run_id = param_j.source.get("EAR_ID")
+            unset_trackers = self.workflow._tracked_unset
+            assert src_run_id is not None
+            assert unset_trackers is not None
+            unset_trackers[path_i].run_ids.add(src_run_id)
+            unset_trackers[path_i].group_size = len_dat_idx
         return data_j, is_set_i, meth_i
     def __get_relevant_data(
@@ -3029,7 +3075,7 @@ class WorkflowTask(AppAware):
             is_param_set_i: list[bool] = []
             for data_idx_ij in data_idx_i:
                 data_j, is_set_i, meth_i = self.__get_relevant_data_item(
-                    path, path_i, data_idx_ij, raise_on_unset
+                    path, path_i, data_idx_ij, raise_on_unset, len_dat_idx=len(data_idx_i)
                 )
                 data_i.append(data_j)
                 methods_i.append(meth_i)
@@ -3041,6 +3087,7 @@ class WorkflowTask(AppAware):
                 "is_set": is_param_set_i,
                 "is_multi": True,
             }
         if not raise_on_unset:
             to_remove: set[str] = set()
             for key, dat_info in relevant_data.items():
@@ -3229,13 +3276,38 @@ class WorkflowTask(AppAware):
         """Get element data from the persistent store."""
         path_split = [] if not path else path.split(".")
-        if not (relevant_paths := self.__get_relevant_paths(data_index, path_split)):
+        if not (relevant_paths := self._get_relevant_paths(data_index, path_split)):
             if raise_on_missing:
                 # TODO: custom exception?
                 raise ValueError(f"Path {path!r} does not exist in the element data.")
             return default
         relevant_data_idx = {k: v for k, v in data_index.items() if k in relevant_paths}
+        cache = self.workflow._merged_parameters_cache
+        use_cache = (
+            self.workflow._use_merged_parameters_cache
+            and raise_on_missing is False
+            and raise_on_unset is False
+            and default is None  # cannot cache on default value, may not be hashable
+        )
+        add_to_cache = False
+        if use_cache:
+            # generate the key:
+            dat_idx_cache: list[tuple[str, tuple[int, ...] | int]] = []
+            for k, v in sorted(relevant_data_idx.items()):
+                dat_idx_cache.append((k, tuple(v) if isinstance(v, list) else v))
+            cache_key = (path, tuple(dat_idx_cache))
+            # check for cache hit:
+            if cache_key in cache:
+                self._app.logger.debug(
+                    f"_get_merged_parameter_data: cache hit with key: {cache_key}"
+                )
+                return cache[cache_key]
+            else:
+                add_to_cache = True
         PV_classes = self._paths_to_PV_classes(*relevant_paths, path)
         relevant_data = self.__get_relevant_data(relevant_data_idx, raise_on_unset, path)
@@ -3248,7 +3320,7 @@ class WorkflowTask(AppAware):
         except MayNeedObjectError as err:
             path_to_init = err.path
             path_to_init_split = path_to_init.split(".")
-            relevant_paths = self.__get_relevant_paths(data_index, path_to_init_split)
+            relevant_paths = self._get_relevant_paths(data_index, path_to_init_split)
             PV_classes = self._paths_to_PV_classes(*relevant_paths, path_to_init)
             relevant_data_idx = {
                 k: v for k, v in data_index.items() if k in relevant_paths
@@ -3296,6 +3368,14 @@ class WorkflowTask(AppAware):
                 raise ValueError(f"Path {path!r} does not exist in the element data.")
             current_val = default
+        if add_to_cache:
+            self._app.logger.debug(
+                f"_get_merged_parameter_data: adding to cache with key: {cache_key!r}"
+            )
+            # tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]]
+            # tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]] | None
+            cache[cache_key] = current_val
         return current_val
@@ -3626,3 +3706,12 @@ class ElementPropagation(AppAware):
 #: A task used as a template for other tasks.
 TaskTemplate: TypeAlias = Task
+class MetaTask(JSONLike):
+    def __init__(self, schema: MetaTaskSchema, tasks: Sequence[Task]):
+        self.schema = schema
+        self.tasks = tasks
+        # TODO: validate schema's inputs and outputs are inputs and outputs of `tasks`
+        # schemas

hpcflow/sdk/core/task_schema.py CHANGED Viewed

@@ -915,3 +915,71 @@ class TaskSchema(JSONLike):
     def multi_input_types(self) -> list[str]:
         """Get a list of input types that have multiple labels."""
         return [inp.parameter.typ for inp in self.inputs if inp.multiple]
+class MetaTaskSchema(TaskSchema):
+    """Class to represent a task schema with no actions, that can be used to represent the
+    effect of multiple task schemas.
+    Parameters
+    ----------
+    objective:
+        This is a string representing the objective of the task schema.
+    method:
+        An optional string to label the task schema by its method.
+    implementation:
+        An optional string to label the task schema by its implementation.
+    inputs:
+        A list of SchemaInput objects that define the inputs to the task.
+    outputs:
+        A list of SchemaOutput objects that define the outputs of the task.
+    version:
+        The version of this task schema.
+    web_doc:
+        True if this object should be included in the Sphinx documentation
+        (normally only relevant for built-in task schemas). True by default.
+    environment_presets:
+        Information about default execution environments. Can be overridden in specific
+        cases in the concrete tasks.
+    """
+    _validation_schema: ClassVar[str] = "task_schema_spec_schema.yaml"
+    _hash_value = None
+    _validate_actions = False
+    _child_objects = (
+        ChildObjectSpec(name="objective", class_name="TaskObjective"),
+        ChildObjectSpec(
+            name="inputs",
+            class_name="SchemaInput",
+            is_multiple=True,
+            parent_ref="_task_schema",
+        ),
+        ChildObjectSpec(name="outputs", class_name="SchemaOutput", is_multiple=True),
+    )
+    def __init__(
+        self,
+        objective: TaskObjective | str,
+        method: str | None = None,
+        implementation: str | None = None,
+        inputs: list[Parameter | SchemaInput] | None = None,
+        outputs: list[Parameter | SchemaParameter] | None = None,
+        version: str | None = None,
+        web_doc: bool | None = True,
+        environment_presets: Mapping[str, Mapping[str, Mapping[str, Any]]] | None = None,
+        doc: str = "",
+        _hash_value: str | None = None,
+    ):
+        super().__init__(
+            objective=objective,
+            method=method,
+            implementation=implementation,
+            inputs=inputs,
+            outputs=outputs,
+            version=version,
+            web_doc=web_doc,
+            environment_presets=environment_presets,
+            doc=doc,
+            _hash_value=_hash_value,
+        )

hpcflow/sdk/core/test_utils.py CHANGED Viewed

@@ -34,46 +34,48 @@ Strs: TypeAlias = "str | tuple[str, ...]"
 def make_schemas(
     *ins_outs: tuple[dict[str, Any], tuple[str, ...]]
     | tuple[dict[str, Any], tuple[str, ...], str]
+    | tuple[dict[str, Any], tuple[str, ...], str, dict[str, Any]]
 ) -> list[TaskSchema]:
     """
     Construct a collection of schemas.
     """
     out: list[TaskSchema] = []
     for idx, info in enumerate(ins_outs):
+        act_kwargs: dict[str, Any] = {}
         if len(info) == 2:
             (ins_i, outs_i) = info
             obj = f"t{idx}"
-        else:
+        elif len(info) == 3:
             (ins_i, outs_i, obj) = info
-        # distribute outputs over stdout, stderr and out file parsers:
-        stdout = None
-        stderr = None
-        out_file_parsers = None
-        if outs_i:
-            stdout = f"<<parameter:{outs_i[0]}>>"
-        if len(outs_i) > 1:
-            stderr = f"<<parameter:{outs_i[1]}>>"
-        if len(outs_i) > 2:
-            out_file_parsers = [
-                hf.OutputFileParser(
-                    output=hf.Parameter(out_i),
-                    output_files=[hf.FileSpec(label="file1", name="file1.txt")],
+        else:
+            (ins_i, outs_i, obj, act_kwargs) = info
+        # distribute outputs over multiple commands' stdout:
+        cmds_lst = []
+        for out_idx, out_j in enumerate(outs_i):
+            cmd = hf.Command(
+                command=(
+                    "echo $(("
+                    + " + ".join(f"<<parameter:{i}>> + {100 + out_idx}" for i in ins_i)
+                    + "))"
+                ),
+                stdout=f"<<int(parameter:{out_j})>>",
+            )
+            cmds_lst.append(cmd)
+        if not outs_i:
+            # no outputs
+            cmds_lst = [
+                hf.Command(
+                    command=(
+                        "echo $(("
+                        + " + ".join(f"<<parameter:{i}>> + 100" for i in ins_i)
+                        + "))"
+                    ),
                 )
-                for out_i in outs_i[2:]
             ]
-        cmd = hf.Command(
-            " ".join(f"echo $((<<parameter:{i}>> + 100))" for i in ins_i),
-            stdout=stdout,
-            stderr=stderr,
-        )
-        act_i = hf.Action(
-            commands=[cmd],
-            output_file_parsers=out_file_parsers,
-            environments=[hf.ActionEnvironment("env_1")],
-        )
+        act_i = hf.Action(commands=cmds_lst, **act_kwargs)
         out.append(
             hf.TaskSchema(
                 objective=obj,
@@ -402,3 +404,40 @@ class P1_parameter_cls(ParameterValue):
             sub_param = None
         obj = cls(a=a, d=d, sub_param=sub_param)
         workflow.set_parameter_value(param_id=param_id, value=obj, commit=True)
+def make_workflow_to_run_command(
+    command,
+    path,
+    outputs=None,
+    name="w1",
+    overwrite=False,
+    store="zarr",
+    requires_dir=False,
+):
+    """Generate a single-task single-action workflow that runs the specified command,
+    optionally generating some outputs."""
+    outputs = outputs or []
+    commands = [hf.Command(command=command)]
+    commands += [
+        hf.Command(command=f'echo "output_{out}"', stdout=f"<<parameter:{out}>>")
+        for out in outputs
+    ]
+    schema = hf.TaskSchema(
+        objective="run_command",
+        outputs=[hf.SchemaOutput(i) for i in outputs],
+        actions=[hf.Action(commands=commands, requires_dir=requires_dir)],
+    )
+    template = {
+        "name": name,
+        "tasks": [hf.Task(schema=schema)],
+    }
+    wk = hf.Workflow.from_template(
+        hf.WorkflowTemplate(**template),
+        path=path,
+        name=name,
+        overwrite=overwrite,
+        store=store,
+    )
+    return wk

hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

hpcflow-new2 0.2.0a190py3-none-any.whl → 0.2.0a199py3-none-any.whl