hpcflow-new2 0.2.0a190__py3-none-any.whl → 0.2.0a199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. hpcflow/__pyinstaller/hook-hpcflow.py +1 -0
  2. hpcflow/_version.py +1 -1
  3. hpcflow/data/scripts/bad_script.py +2 -0
  4. hpcflow/data/scripts/do_nothing.py +2 -0
  5. hpcflow/data/scripts/env_specifier_test/input_file_generator_pass_env_spec.py +4 -0
  6. hpcflow/data/scripts/env_specifier_test/main_script_test_pass_env_spec.py +8 -0
  7. hpcflow/data/scripts/env_specifier_test/output_file_parser_pass_env_spec.py +4 -0
  8. hpcflow/data/scripts/env_specifier_test/v1/input_file_generator_basic.py +4 -0
  9. hpcflow/data/scripts/env_specifier_test/v1/main_script_test_direct_in_direct_out.py +7 -0
  10. hpcflow/data/scripts/env_specifier_test/v1/output_file_parser_basic.py +4 -0
  11. hpcflow/data/scripts/env_specifier_test/v2/main_script_test_direct_in_direct_out.py +7 -0
  12. hpcflow/data/scripts/input_file_generator_basic.py +3 -0
  13. hpcflow/data/scripts/input_file_generator_basic_FAIL.py +3 -0
  14. hpcflow/data/scripts/input_file_generator_test_stdout_stderr.py +8 -0
  15. hpcflow/data/scripts/main_script_test_direct_in.py +3 -0
  16. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2.py +6 -0
  17. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed.py +6 -0
  18. hpcflow/data/scripts/main_script_test_direct_in_direct_out_2_fail_allowed_group.py +7 -0
  19. hpcflow/data/scripts/main_script_test_direct_in_direct_out_3.py +6 -0
  20. hpcflow/data/scripts/main_script_test_direct_in_group_direct_out_3.py +6 -0
  21. hpcflow/data/scripts/main_script_test_direct_in_group_one_fail_direct_out_3.py +6 -0
  22. hpcflow/data/scripts/main_script_test_hdf5_in_obj_2.py +12 -0
  23. hpcflow/data/scripts/main_script_test_json_out_FAIL.py +3 -0
  24. hpcflow/data/scripts/main_script_test_shell_env_vars.py +12 -0
  25. hpcflow/data/scripts/main_script_test_std_out_std_err.py +6 -0
  26. hpcflow/data/scripts/output_file_parser_basic.py +3 -0
  27. hpcflow/data/scripts/output_file_parser_basic_FAIL.py +7 -0
  28. hpcflow/data/scripts/output_file_parser_test_stdout_stderr.py +8 -0
  29. hpcflow/data/scripts/script_exit_test.py +5 -0
  30. hpcflow/data/template_components/environments.yaml +1 -1
  31. hpcflow/sdk/__init__.py +5 -0
  32. hpcflow/sdk/app.py +150 -89
  33. hpcflow/sdk/cli.py +263 -84
  34. hpcflow/sdk/cli_common.py +99 -5
  35. hpcflow/sdk/config/callbacks.py +38 -1
  36. hpcflow/sdk/config/config.py +102 -13
  37. hpcflow/sdk/config/errors.py +19 -5
  38. hpcflow/sdk/config/types.py +3 -0
  39. hpcflow/sdk/core/__init__.py +25 -1
  40. hpcflow/sdk/core/actions.py +914 -262
  41. hpcflow/sdk/core/cache.py +76 -34
  42. hpcflow/sdk/core/command_files.py +14 -128
  43. hpcflow/sdk/core/commands.py +35 -6
  44. hpcflow/sdk/core/element.py +122 -50
  45. hpcflow/sdk/core/errors.py +58 -2
  46. hpcflow/sdk/core/execute.py +207 -0
  47. hpcflow/sdk/core/loop.py +408 -50
  48. hpcflow/sdk/core/loop_cache.py +4 -4
  49. hpcflow/sdk/core/parameters.py +382 -37
  50. hpcflow/sdk/core/run_dir_files.py +13 -40
  51. hpcflow/sdk/core/skip_reason.py +7 -0
  52. hpcflow/sdk/core/task.py +119 -30
  53. hpcflow/sdk/core/task_schema.py +68 -0
  54. hpcflow/sdk/core/test_utils.py +66 -27
  55. hpcflow/sdk/core/types.py +54 -1
  56. hpcflow/sdk/core/utils.py +78 -7
  57. hpcflow/sdk/core/workflow.py +1538 -336
  58. hpcflow/sdk/data/workflow_spec_schema.yaml +2 -0
  59. hpcflow/sdk/demo/cli.py +7 -0
  60. hpcflow/sdk/helper/cli.py +1 -0
  61. hpcflow/sdk/log.py +42 -15
  62. hpcflow/sdk/persistence/base.py +405 -53
  63. hpcflow/sdk/persistence/json.py +177 -52
  64. hpcflow/sdk/persistence/pending.py +237 -69
  65. hpcflow/sdk/persistence/store_resource.py +3 -2
  66. hpcflow/sdk/persistence/types.py +15 -4
  67. hpcflow/sdk/persistence/zarr.py +928 -81
  68. hpcflow/sdk/submission/jobscript.py +1408 -489
  69. hpcflow/sdk/submission/schedulers/__init__.py +40 -5
  70. hpcflow/sdk/submission/schedulers/direct.py +33 -19
  71. hpcflow/sdk/submission/schedulers/sge.py +51 -16
  72. hpcflow/sdk/submission/schedulers/slurm.py +44 -16
  73. hpcflow/sdk/submission/schedulers/utils.py +7 -2
  74. hpcflow/sdk/submission/shells/base.py +68 -20
  75. hpcflow/sdk/submission/shells/bash.py +222 -129
  76. hpcflow/sdk/submission/shells/powershell.py +200 -150
  77. hpcflow/sdk/submission/submission.py +852 -119
  78. hpcflow/sdk/submission/types.py +18 -21
  79. hpcflow/sdk/typing.py +24 -5
  80. hpcflow/sdk/utils/arrays.py +71 -0
  81. hpcflow/sdk/utils/deferred_file.py +55 -0
  82. hpcflow/sdk/utils/hashing.py +16 -0
  83. hpcflow/sdk/utils/patches.py +12 -0
  84. hpcflow/sdk/utils/strings.py +33 -0
  85. hpcflow/tests/api/test_api.py +32 -0
  86. hpcflow/tests/conftest.py +19 -0
  87. hpcflow/tests/data/multi_path_sequences.yaml +29 -0
  88. hpcflow/tests/data/workflow_test_run_abort.yaml +34 -35
  89. hpcflow/tests/schedulers/sge/test_sge_submission.py +36 -0
  90. hpcflow/tests/scripts/test_input_file_generators.py +282 -0
  91. hpcflow/tests/scripts/test_main_scripts.py +821 -70
  92. hpcflow/tests/scripts/test_non_snippet_script.py +46 -0
  93. hpcflow/tests/scripts/test_ouput_file_parsers.py +353 -0
  94. hpcflow/tests/shells/wsl/test_wsl_submission.py +6 -0
  95. hpcflow/tests/unit/test_action.py +176 -0
  96. hpcflow/tests/unit/test_app.py +20 -0
  97. hpcflow/tests/unit/test_cache.py +46 -0
  98. hpcflow/tests/unit/test_cli.py +133 -0
  99. hpcflow/tests/unit/test_config.py +122 -1
  100. hpcflow/tests/unit/test_element_iteration.py +47 -0
  101. hpcflow/tests/unit/test_jobscript_unit.py +757 -0
  102. hpcflow/tests/unit/test_loop.py +1332 -27
  103. hpcflow/tests/unit/test_meta_task.py +325 -0
  104. hpcflow/tests/unit/test_multi_path_sequences.py +229 -0
  105. hpcflow/tests/unit/test_parameter.py +13 -0
  106. hpcflow/tests/unit/test_persistence.py +190 -8
  107. hpcflow/tests/unit/test_run.py +109 -3
  108. hpcflow/tests/unit/test_run_directories.py +29 -0
  109. hpcflow/tests/unit/test_shell.py +20 -0
  110. hpcflow/tests/unit/test_submission.py +5 -76
  111. hpcflow/tests/unit/utils/test_arrays.py +40 -0
  112. hpcflow/tests/unit/utils/test_deferred_file_writer.py +34 -0
  113. hpcflow/tests/unit/utils/test_hashing.py +65 -0
  114. hpcflow/tests/unit/utils/test_patches.py +5 -0
  115. hpcflow/tests/unit/utils/test_redirect_std.py +50 -0
  116. hpcflow/tests/workflows/__init__.py +0 -0
  117. hpcflow/tests/workflows/test_directory_structure.py +31 -0
  118. hpcflow/tests/workflows/test_jobscript.py +332 -0
  119. hpcflow/tests/workflows/test_run_status.py +198 -0
  120. hpcflow/tests/workflows/test_skip_downstream.py +696 -0
  121. hpcflow/tests/workflows/test_submission.py +140 -0
  122. hpcflow/tests/workflows/test_workflows.py +142 -2
  123. hpcflow/tests/workflows/test_zip.py +18 -0
  124. hpcflow/viz_demo.ipynb +6587 -3
  125. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/METADATA +7 -4
  126. hpcflow_new2-0.2.0a199.dist-info/RECORD +221 -0
  127. hpcflow_new2-0.2.0a190.dist-info/RECORD +0 -165
  128. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/LICENSE +0 -0
  129. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/WHEEL +0 -0
  130. {hpcflow_new2-0.2.0a190.dist-info → hpcflow_new2-0.2.0a199.dist-info}/entry_points.txt +0 -0
@@ -3,82 +3,55 @@ Model of files in the run directory.
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- import re
7
6
  from typing import Any, TYPE_CHECKING
8
7
  from hpcflow.sdk.core.app_aware import AppAware
9
8
  from hpcflow.sdk.core.utils import JSONLikeDirSnapShot
10
9
 
11
10
  if TYPE_CHECKING:
12
- from re import Pattern
13
- from typing_extensions import ClassVar
14
11
  from ..submission.shells.base import Shell
12
+ from .types import BlockActionKey
15
13
 
16
14
 
17
15
  class RunDirAppFiles(AppAware):
18
16
  """A class to encapsulate the naming/recognition of app-created files within run
19
17
  directories."""
20
18
 
21
- __CMD_FILES_RE_PATTERN: ClassVar[Pattern] = re.compile(r"js_\d+_act_\d+\.?\w*")
22
-
23
- @classmethod
24
- def get_log_file_name(cls) -> str:
25
- """File name for the app log file."""
26
- return f"{cls._app.package_name}.log"
27
-
28
- @classmethod
29
- def get_std_file_name(cls) -> str:
30
- """File name for stdout and stderr streams from the app."""
31
- return f"{cls._app.package_name}_std.txt"
32
-
33
19
  @staticmethod
34
- def get_run_file_prefix(js_idx: int | str, js_action_idx: int | str) -> str:
20
+ def get_run_file_prefix(block_act_key: BlockActionKey) -> str:
35
21
  """
36
22
  Get the common prefix for files associated with a run.
37
23
  """
38
- return f"js_{js_idx}_act_{js_action_idx}"
24
+ return f"js_{block_act_key[0]}_block_{block_act_key[1]}_act_{block_act_key[2]}"
39
25
 
40
26
  @classmethod
41
- def get_commands_file_name(
42
- cls, js_idx: int | str, js_action_idx: int | str, shell: Shell
43
- ) -> str:
27
+ def get_commands_file_name(cls, block_act_key: BlockActionKey, shell: Shell) -> str:
44
28
  """
45
29
  Get the name of the file containing commands.
46
30
  """
47
- return cls.get_run_file_prefix(js_idx, js_action_idx) + shell.JS_EXT
31
+ return cls.get_run_file_prefix(block_act_key) + shell.JS_EXT
48
32
 
49
33
  @classmethod
50
- def get_run_param_dump_file_prefix(
51
- cls, js_idx: int | str, js_action_idx: int | str
52
- ) -> str:
34
+ def get_run_param_dump_file_prefix(cls, block_act_key: BlockActionKey) -> str:
53
35
  """Get the prefix to a file in the run directory that the app will dump parameter
54
36
  data to."""
55
- return cls.get_run_file_prefix(js_idx, js_action_idx) + "_inputs"
37
+ return cls.get_run_file_prefix(block_act_key) + "_inputs"
56
38
 
57
39
  @classmethod
58
- def get_run_param_load_file_prefix(
59
- cls, js_idx: int | str, js_action_idx: int | str
60
- ) -> str:
40
+ def get_run_param_load_file_prefix(cls, block_act_key: BlockActionKey) -> str:
61
41
  """Get the prefix to a file in the run directory that the app will load parameter
62
42
  data from."""
63
- return cls.get_run_file_prefix(js_idx, js_action_idx) + "_outputs"
43
+ return cls.get_run_file_prefix(block_act_key) + "_outputs"
64
44
 
65
45
  @classmethod
66
- def take_snapshot(cls) -> dict[str, Any]:
46
+ def take_snapshot(cls, root_path=None) -> dict[str, Any]:
67
47
  """
68
- Take a :py:class:`JSONLikeDirSnapShot`, and process to ignore files created by
69
- the app.
48
+ Take a :py:class:`JSONLikeDirSnapShot`.
70
49
 
71
50
  This includes command files that are invoked by jobscripts, the app log file, and
72
51
  the app standard out/error file.
73
52
  """
74
53
  snapshot = JSONLikeDirSnapShot()
75
- snapshot.take(".")
76
- ss_js = snapshot.to_json_like()
54
+ snapshot.take(root_path or ".")
55
+ ss_js = snapshot.to_json_like(use_strings=True)
77
56
  ss_js.pop("root_path") # always the current working directory of the run
78
- excluded = {cls.get_log_file_name(), cls.get_std_file_name()}
79
- data: dict[str, Any] = ss_js["data"]
80
- for filename in tuple(data):
81
- if filename in excluded or cls.__CMD_FILES_RE_PATTERN.match(filename):
82
- data.pop(filename)
83
-
84
57
  return ss_js
@@ -0,0 +1,7 @@
1
+ import enum
2
+
3
+
4
+ class SkipReason(enum.Enum):
5
+ NOT_SKIPPED = 0
6
+ UPSTREAM_FAILURE = 1
7
+ LOOP_TERMINATION = 2
hpcflow/sdk/core/task.py CHANGED
@@ -67,12 +67,13 @@ if TYPE_CHECKING:
67
67
  InputValue,
68
68
  InputSource,
69
69
  ValueSequence,
70
+ MultiPathSequence,
70
71
  SchemaInput,
71
72
  SchemaOutput,
72
73
  ParameterPath,
73
74
  )
74
75
  from .rule import Rule
75
- from .task_schema import TaskObjective, TaskSchema
76
+ from .task_schema import TaskObjective, TaskSchema, MetaTaskSchema
76
77
  from .types import (
77
78
  MultiplicityDescriptor,
78
79
  RelevantData,
@@ -132,6 +133,8 @@ class ElementSet(JSONLike):
132
133
  Input files to the set of elements.
133
134
  sequences: list[~hpcflow.app.ValueSequence]
134
135
  Input value sequences to parameterise over.
136
+ multi_path_sequences: list[~hpcflow.app.MultiPathSequence]
137
+ Multi-path sequences to parameterise over.
135
138
  resources: ~hpcflow.app.ResourceList
136
139
  Resources to use for the set of elements.
137
140
  repeats: list[dict]
@@ -154,9 +157,10 @@ class ElementSet(JSONLike):
154
157
  If True, if more than one parameter is sourced from the same task, then allow
155
158
  these sources to come from distinct element sub-sets. If False (default),
156
159
  only the intersection of element sub-sets for all parameters are included.
157
- merge_envs: bool
158
- If True, merge ``environments`` into ``resources`` using the "any" scope. If
159
- False, ``environments`` are ignored. This is required on first initialisation,
160
+ is_creation: bool
161
+ If True, merge ``environments`` into ``resources`` using the "any" scope, and
162
+ merge sequences belonging to multi-path sequences into the value-sequences list.
163
+ If False, ``environments`` are ignored. This is required on first initialisation,
160
164
  but not on subsequent re-initialisation from a persistent workflow.
161
165
  """
162
166
 
@@ -188,6 +192,12 @@ class ElementSet(JSONLike):
188
192
  is_multiple=True,
189
193
  parent_ref="_element_set",
190
194
  ),
195
+ ChildObjectSpec(
196
+ name="multi_path_sequences",
197
+ class_name="MultiPathSequence",
198
+ is_multiple=True,
199
+ parent_ref="_element_set",
200
+ ),
191
201
  ChildObjectSpec(
192
202
  name="input_sources",
193
203
  class_name="InputSource",
@@ -207,6 +217,7 @@ class ElementSet(JSONLike):
207
217
  inputs: list[InputValue] | dict[str, Any] | None = None,
208
218
  input_files: list[InputFile] | None = None,
209
219
  sequences: list[ValueSequence] | None = None,
220
+ multi_path_sequences: list[MultiPathSequence] | None = None,
210
221
  resources: Resources = None,
211
222
  repeats: list[RepeatsDescriptor] | int | None = None,
212
223
  groups: list[ElementGroup] | None = None,
@@ -216,7 +227,7 @@ class ElementSet(JSONLike):
216
227
  environments: Mapping[str, Mapping[str, Any]] | None = None,
217
228
  sourceable_elem_iters: list[int] | None = None,
218
229
  allow_non_coincident_task_sources: bool = False,
219
- merge_envs: bool = True,
230
+ is_creation: bool = True,
220
231
  ):
221
232
  #: Inputs to the set of elements.
222
233
  self.inputs = self.__decode_inputs(inputs or [])
@@ -230,6 +241,8 @@ class ElementSet(JSONLike):
230
241
  self.resources = self._app.ResourceList.normalise(resources)
231
242
  #: Input value sequences to parameterise over.
232
243
  self.sequences = sequences or []
244
+ #: Input value multi-path sequences to parameterise over.
245
+ self.multi_path_sequences = multi_path_sequences or []
233
246
  #: Input source descriptors.
234
247
  self.input_sources = input_sources or {}
235
248
  #: How to handle nesting of iterations.
@@ -244,9 +257,11 @@ class ElementSet(JSONLike):
244
257
  self.sourceable_elem_iters = sourceable_elem_iters
245
258
  #: Whether to allow sources to come from distinct element sub-sets.
246
259
  self.allow_non_coincident_task_sources = allow_non_coincident_task_sources
247
- #: Whether to merge ``environments`` into ``resources`` using the "any" scope
248
- #: on first initialisation.
249
- self.merge_envs = merge_envs
260
+ #: Whether this initialisation is the first for this data (i.e. not a
261
+ #: reconstruction from persistent workflow data), in which case, we merge
262
+ #: ``environments`` into ``resources`` using the "any" scope, and merge any multi-
263
+ #: path sequences into the sequences list.
264
+ self.is_creation = is_creation
250
265
  self.original_input_sources: dict[str, list[InputSource]] | None = None
251
266
  self.original_nesting_order: dict[str, float] | None = None
252
267
 
@@ -260,16 +275,23 @@ class ElementSet(JSONLike):
260
275
  # assigned by WorkflowTask._add_element_set
261
276
  self._element_local_idx_range: list[int] | None = None
262
277
 
263
- # merge `environments` into element set resources (this mutates `resources`, and
264
- # should only happen on creation of the element set, not re-initialisation from a
265
- # persistent workflow):
266
- if self.environments and self.merge_envs:
267
- self.resources.merge_one(
268
- self._app.ResourceSpec(scope="any", environments=self.environments)
269
- )
270
- self.merge_envs = False
278
+ if self.is_creation:
279
+
280
+ # merge `environments` into element set resources (this mutates `resources`, and
281
+ # should only happen on creation of the element set, not re-initialisation from a
282
+ # persistent workflow):
283
+ if self.environments:
284
+ self.resources.merge_one(
285
+ self._app.ResourceSpec(scope="any", environments=self.environments)
286
+ )
287
+ # note: `env_preset` is merged into resources by the Task init.
271
288
 
272
- # note: `env_preset` is merged into resources by the Task init.
289
+ # merge sequences belonging to multi-path sequences into the value-sequences list:
290
+ if self.multi_path_sequences:
291
+ for mp_seq in self.multi_path_sequences:
292
+ mp_seq._move_to_sequence_list(self.sequences)
293
+
294
+ self.is_creation = False
273
295
 
274
296
  def __deepcopy__(self, memo: dict[int, Any] | None) -> Self:
275
297
  dct = self.to_dict()
@@ -450,6 +472,7 @@ class ElementSet(JSONLike):
450
472
  inputs: list[InputValue] | dict[str, Any] | None = None,
451
473
  input_files: list[InputFile] | None = None,
452
474
  sequences: list[ValueSequence] | None = None,
475
+ multi_path_sequences: list[MultiPathSequence] | None = None,
453
476
  resources: Resources = None,
454
477
  repeats: list[RepeatsDescriptor] | int | None = None,
455
478
  groups: list[ElementGroup] | None = None,
@@ -468,6 +491,7 @@ class ElementSet(JSONLike):
468
491
  inputs,
469
492
  input_files,
470
493
  sequences,
494
+ multi_path_sequences,
471
495
  resources,
472
496
  repeats,
473
497
  groups,
@@ -692,6 +716,9 @@ class Task(JSONLike):
692
716
  A list of `InputValue` objects.
693
717
  input_files: list[~hpcflow.app.InputFile]
694
718
  sequences: list[~hpcflow.app.ValueSequence]
719
+ Input value sequences to parameterise over.
720
+ multi_path_sequences: list[~hpcflow.app.MultiPathSequence]
721
+ Multi-path sequences to parameterise over.
695
722
  input_sources: dict[str, ~hpcflow.app.InputSource]
696
723
  nesting_order: list
697
724
  env_preset: str
@@ -745,6 +772,7 @@ class Task(JSONLike):
745
772
  inputs: list[InputValue] | dict[str, Any] | None = None,
746
773
  input_files: list[InputFile] | None = None,
747
774
  sequences: list[ValueSequence] | None = None,
775
+ multi_path_sequences: list[MultiPathSequence] | None = None,
748
776
  input_sources: dict[str, list[InputSource]] | None = None,
749
777
  nesting_order: dict[str, float] | None = None,
750
778
  env_preset: str | None = None,
@@ -790,6 +818,7 @@ class Task(JSONLike):
790
818
  inputs=inputs,
791
819
  input_files=input_files,
792
820
  sequences=sequences,
821
+ multi_path_sequences=multi_path_sequences,
793
822
  resources=resources,
794
823
  repeats=repeats,
795
824
  groups=groups,
@@ -1000,9 +1029,11 @@ class Task(JSONLike):
1000
1029
  )
1001
1030
 
1002
1031
  return [
1003
- f"{task.name}_{task_name_rep_idx[idx]}"
1004
- if task_name_rep_idx[idx] > 0
1005
- else task.name
1032
+ (
1033
+ f"{task.name}_{task_name_rep_idx[idx]}"
1034
+ if task_name_rep_idx[idx] > 0
1035
+ else task.name
1036
+ )
1006
1037
  for idx, task in enumerate(tasks)
1007
1038
  ]
1008
1039
 
@@ -1684,6 +1715,7 @@ class WorkflowTask(AppAware):
1684
1715
  return self._element_IDs + self._pending_element_IDs
1685
1716
 
1686
1717
  @property
1718
+ @TimeIt.decorator
1687
1719
  def num_elements(self) -> int:
1688
1720
  """
1689
1721
  The number of elements associated with this task.
@@ -1891,16 +1923,18 @@ class WorkflowTask(AppAware):
1891
1923
  input_data_idx[key] = list(seq_dat_ref)
1892
1924
  sequence_idx[key] = list(range(len(seq_dat_ref)))
1893
1925
  try:
1894
- key_ = key.removeprefix("inputs.")
1926
+ key_ = key.split("inputs.")[1]
1895
1927
  except IndexError:
1896
- pass
1928
+ # e.g. "resources."
1929
+ key_ = ""
1897
1930
  try:
1898
1931
  # TODO: wouldn't need to do this if we raise when an ValueSequence is
1899
1932
  # provided for a parameter whose inputs sources do not include the local
1900
1933
  # value.
1901
- source_idx[key] = [
1902
- element_set.input_sources[key_].index(loc_inp_src)
1903
- ] * len(seq_dat_ref)
1934
+ if key_:
1935
+ source_idx[key] = [
1936
+ element_set.input_sources[key_].index(loc_inp_src)
1937
+ ] * len(seq_dat_ref)
1904
1938
  except ValueError:
1905
1939
  pass
1906
1940
 
@@ -2942,7 +2976,7 @@ class WorkflowTask(AppAware):
2942
2976
  return params
2943
2977
 
2944
2978
  @staticmethod
2945
- def __get_relevant_paths(
2979
+ def _get_relevant_paths(
2946
2980
  data_index: Mapping[str, Any], path: list[str], children_of: str | None = None
2947
2981
  ) -> Mapping[str, RelevantPath]:
2948
2982
  relevant_paths: dict[str, RelevantPath] = {}
@@ -2968,7 +3002,12 @@ class WorkflowTask(AppAware):
2968
3002
  return relevant_paths
2969
3003
 
2970
3004
  def __get_relevant_data_item(
2971
- self, path: str | None, path_i: str, data_idx_ij: int, raise_on_unset: bool
3005
+ self,
3006
+ path: str | None,
3007
+ path_i: str,
3008
+ data_idx_ij: int,
3009
+ raise_on_unset: bool,
3010
+ len_dat_idx: int = 1,
2972
3011
  ) -> tuple[Any, bool, str | None]:
2973
3012
  if path_i.startswith("repeats."):
2974
3013
  # data is an integer repeats index, rather than a parameter ID:
@@ -3002,6 +3041,13 @@ class WorkflowTask(AppAware):
3002
3041
  data_j = param_j.data
3003
3042
  if raise_on_unset and not is_set_i:
3004
3043
  raise UnsetParameterDataError(path, path_i)
3044
+ if not is_set_i and self.workflow._is_tracking_unset:
3045
+ src_run_id = param_j.source.get("EAR_ID")
3046
+ unset_trackers = self.workflow._tracked_unset
3047
+ assert src_run_id is not None
3048
+ assert unset_trackers is not None
3049
+ unset_trackers[path_i].run_ids.add(src_run_id)
3050
+ unset_trackers[path_i].group_size = len_dat_idx
3005
3051
  return data_j, is_set_i, meth_i
3006
3052
 
3007
3053
  def __get_relevant_data(
@@ -3029,7 +3075,7 @@ class WorkflowTask(AppAware):
3029
3075
  is_param_set_i: list[bool] = []
3030
3076
  for data_idx_ij in data_idx_i:
3031
3077
  data_j, is_set_i, meth_i = self.__get_relevant_data_item(
3032
- path, path_i, data_idx_ij, raise_on_unset
3078
+ path, path_i, data_idx_ij, raise_on_unset, len_dat_idx=len(data_idx_i)
3033
3079
  )
3034
3080
  data_i.append(data_j)
3035
3081
  methods_i.append(meth_i)
@@ -3041,6 +3087,7 @@ class WorkflowTask(AppAware):
3041
3087
  "is_set": is_param_set_i,
3042
3088
  "is_multi": True,
3043
3089
  }
3090
+
3044
3091
  if not raise_on_unset:
3045
3092
  to_remove: set[str] = set()
3046
3093
  for key, dat_info in relevant_data.items():
@@ -3229,13 +3276,38 @@ class WorkflowTask(AppAware):
3229
3276
  """Get element data from the persistent store."""
3230
3277
  path_split = [] if not path else path.split(".")
3231
3278
 
3232
- if not (relevant_paths := self.__get_relevant_paths(data_index, path_split)):
3279
+ if not (relevant_paths := self._get_relevant_paths(data_index, path_split)):
3233
3280
  if raise_on_missing:
3234
3281
  # TODO: custom exception?
3235
3282
  raise ValueError(f"Path {path!r} does not exist in the element data.")
3236
3283
  return default
3237
3284
 
3238
3285
  relevant_data_idx = {k: v for k, v in data_index.items() if k in relevant_paths}
3286
+
3287
+ cache = self.workflow._merged_parameters_cache
3288
+ use_cache = (
3289
+ self.workflow._use_merged_parameters_cache
3290
+ and raise_on_missing is False
3291
+ and raise_on_unset is False
3292
+ and default is None # cannot cache on default value, may not be hashable
3293
+ )
3294
+ add_to_cache = False
3295
+ if use_cache:
3296
+ # generate the key:
3297
+ dat_idx_cache: list[tuple[str, tuple[int, ...] | int]] = []
3298
+ for k, v in sorted(relevant_data_idx.items()):
3299
+ dat_idx_cache.append((k, tuple(v) if isinstance(v, list) else v))
3300
+ cache_key = (path, tuple(dat_idx_cache))
3301
+
3302
+ # check for cache hit:
3303
+ if cache_key in cache:
3304
+ self._app.logger.debug(
3305
+ f"_get_merged_parameter_data: cache hit with key: {cache_key}"
3306
+ )
3307
+ return cache[cache_key]
3308
+ else:
3309
+ add_to_cache = True
3310
+
3239
3311
  PV_classes = self._paths_to_PV_classes(*relevant_paths, path)
3240
3312
  relevant_data = self.__get_relevant_data(relevant_data_idx, raise_on_unset, path)
3241
3313
 
@@ -3248,7 +3320,7 @@ class WorkflowTask(AppAware):
3248
3320
  except MayNeedObjectError as err:
3249
3321
  path_to_init = err.path
3250
3322
  path_to_init_split = path_to_init.split(".")
3251
- relevant_paths = self.__get_relevant_paths(data_index, path_to_init_split)
3323
+ relevant_paths = self._get_relevant_paths(data_index, path_to_init_split)
3252
3324
  PV_classes = self._paths_to_PV_classes(*relevant_paths, path_to_init)
3253
3325
  relevant_data_idx = {
3254
3326
  k: v for k, v in data_index.items() if k in relevant_paths
@@ -3296,6 +3368,14 @@ class WorkflowTask(AppAware):
3296
3368
  raise ValueError(f"Path {path!r} does not exist in the element data.")
3297
3369
  current_val = default
3298
3370
 
3371
+ if add_to_cache:
3372
+ self._app.logger.debug(
3373
+ f"_get_merged_parameter_data: adding to cache with key: {cache_key!r}"
3374
+ )
3375
+ # tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]]
3376
+ # tuple[str | None, tuple[tuple[str, tuple[int, ...] | int], ...]] | None
3377
+ cache[cache_key] = current_val
3378
+
3299
3379
  return current_val
3300
3380
 
3301
3381
 
@@ -3626,3 +3706,12 @@ class ElementPropagation(AppAware):
3626
3706
 
3627
3707
  #: A task used as a template for other tasks.
3628
3708
  TaskTemplate: TypeAlias = Task
3709
+
3710
+
3711
+ class MetaTask(JSONLike):
3712
+ def __init__(self, schema: MetaTaskSchema, tasks: Sequence[Task]):
3713
+ self.schema = schema
3714
+ self.tasks = tasks
3715
+
3716
+ # TODO: validate schema's inputs and outputs are inputs and outputs of `tasks`
3717
+ # schemas
@@ -915,3 +915,71 @@ class TaskSchema(JSONLike):
915
915
  def multi_input_types(self) -> list[str]:
916
916
  """Get a list of input types that have multiple labels."""
917
917
  return [inp.parameter.typ for inp in self.inputs if inp.multiple]
918
+
919
+
920
+ class MetaTaskSchema(TaskSchema):
921
+ """Class to represent a task schema with no actions, that can be used to represent the
922
+ effect of multiple task schemas.
923
+
924
+ Parameters
925
+ ----------
926
+ objective:
927
+ This is a string representing the objective of the task schema.
928
+ method:
929
+ An optional string to label the task schema by its method.
930
+ implementation:
931
+ An optional string to label the task schema by its implementation.
932
+ inputs:
933
+ A list of SchemaInput objects that define the inputs to the task.
934
+ outputs:
935
+ A list of SchemaOutput objects that define the outputs of the task.
936
+ version:
937
+ The version of this task schema.
938
+ web_doc:
939
+ True if this object should be included in the Sphinx documentation
940
+ (normally only relevant for built-in task schemas). True by default.
941
+ environment_presets:
942
+ Information about default execution environments. Can be overridden in specific
943
+ cases in the concrete tasks.
944
+ """
945
+
946
+ _validation_schema: ClassVar[str] = "task_schema_spec_schema.yaml"
947
+ _hash_value = None
948
+ _validate_actions = False
949
+
950
+ _child_objects = (
951
+ ChildObjectSpec(name="objective", class_name="TaskObjective"),
952
+ ChildObjectSpec(
953
+ name="inputs",
954
+ class_name="SchemaInput",
955
+ is_multiple=True,
956
+ parent_ref="_task_schema",
957
+ ),
958
+ ChildObjectSpec(name="outputs", class_name="SchemaOutput", is_multiple=True),
959
+ )
960
+
961
+ def __init__(
962
+ self,
963
+ objective: TaskObjective | str,
964
+ method: str | None = None,
965
+ implementation: str | None = None,
966
+ inputs: list[Parameter | SchemaInput] | None = None,
967
+ outputs: list[Parameter | SchemaParameter] | None = None,
968
+ version: str | None = None,
969
+ web_doc: bool | None = True,
970
+ environment_presets: Mapping[str, Mapping[str, Mapping[str, Any]]] | None = None,
971
+ doc: str = "",
972
+ _hash_value: str | None = None,
973
+ ):
974
+ super().__init__(
975
+ objective=objective,
976
+ method=method,
977
+ implementation=implementation,
978
+ inputs=inputs,
979
+ outputs=outputs,
980
+ version=version,
981
+ web_doc=web_doc,
982
+ environment_presets=environment_presets,
983
+ doc=doc,
984
+ _hash_value=_hash_value,
985
+ )
@@ -34,46 +34,48 @@ Strs: TypeAlias = "str | tuple[str, ...]"
34
34
  def make_schemas(
35
35
  *ins_outs: tuple[dict[str, Any], tuple[str, ...]]
36
36
  | tuple[dict[str, Any], tuple[str, ...], str]
37
+ | tuple[dict[str, Any], tuple[str, ...], str, dict[str, Any]]
37
38
  ) -> list[TaskSchema]:
38
39
  """
39
40
  Construct a collection of schemas.
40
41
  """
41
42
  out: list[TaskSchema] = []
42
43
  for idx, info in enumerate(ins_outs):
44
+ act_kwargs: dict[str, Any] = {}
43
45
  if len(info) == 2:
44
46
  (ins_i, outs_i) = info
45
47
  obj = f"t{idx}"
46
- else:
48
+ elif len(info) == 3:
47
49
  (ins_i, outs_i, obj) = info
48
-
49
- # distribute outputs over stdout, stderr and out file parsers:
50
- stdout = None
51
- stderr = None
52
- out_file_parsers = None
53
-
54
- if outs_i:
55
- stdout = f"<<parameter:{outs_i[0]}>>"
56
- if len(outs_i) > 1:
57
- stderr = f"<<parameter:{outs_i[1]}>>"
58
- if len(outs_i) > 2:
59
- out_file_parsers = [
60
- hf.OutputFileParser(
61
- output=hf.Parameter(out_i),
62
- output_files=[hf.FileSpec(label="file1", name="file1.txt")],
50
+ else:
51
+ (ins_i, outs_i, obj, act_kwargs) = info
52
+
53
+ # distribute outputs over multiple commands' stdout:
54
+ cmds_lst = []
55
+ for out_idx, out_j in enumerate(outs_i):
56
+ cmd = hf.Command(
57
+ command=(
58
+ "echo $(("
59
+ + " + ".join(f"<<parameter:{i}>> + {100 + out_idx}" for i in ins_i)
60
+ + "))"
61
+ ),
62
+ stdout=f"<<int(parameter:{out_j})>>",
63
+ )
64
+ cmds_lst.append(cmd)
65
+
66
+ if not outs_i:
67
+ # no outputs
68
+ cmds_lst = [
69
+ hf.Command(
70
+ command=(
71
+ "echo $(("
72
+ + " + ".join(f"<<parameter:{i}>> + 100" for i in ins_i)
73
+ + "))"
74
+ ),
63
75
  )
64
- for out_i in outs_i[2:]
65
76
  ]
66
- cmd = hf.Command(
67
- " ".join(f"echo $((<<parameter:{i}>> + 100))" for i in ins_i),
68
- stdout=stdout,
69
- stderr=stderr,
70
- )
71
77
 
72
- act_i = hf.Action(
73
- commands=[cmd],
74
- output_file_parsers=out_file_parsers,
75
- environments=[hf.ActionEnvironment("env_1")],
76
- )
78
+ act_i = hf.Action(commands=cmds_lst, **act_kwargs)
77
79
  out.append(
78
80
  hf.TaskSchema(
79
81
  objective=obj,
@@ -402,3 +404,40 @@ class P1_parameter_cls(ParameterValue):
402
404
  sub_param = None
403
405
  obj = cls(a=a, d=d, sub_param=sub_param)
404
406
  workflow.set_parameter_value(param_id=param_id, value=obj, commit=True)
407
+
408
+
409
+ def make_workflow_to_run_command(
410
+ command,
411
+ path,
412
+ outputs=None,
413
+ name="w1",
414
+ overwrite=False,
415
+ store="zarr",
416
+ requires_dir=False,
417
+ ):
418
+ """Generate a single-task single-action workflow that runs the specified command,
419
+ optionally generating some outputs."""
420
+
421
+ outputs = outputs or []
422
+ commands = [hf.Command(command=command)]
423
+ commands += [
424
+ hf.Command(command=f'echo "output_{out}"', stdout=f"<<parameter:{out}>>")
425
+ for out in outputs
426
+ ]
427
+ schema = hf.TaskSchema(
428
+ objective="run_command",
429
+ outputs=[hf.SchemaOutput(i) for i in outputs],
430
+ actions=[hf.Action(commands=commands, requires_dir=requires_dir)],
431
+ )
432
+ template = {
433
+ "name": name,
434
+ "tasks": [hf.Task(schema=schema)],
435
+ }
436
+ wk = hf.Workflow.from_template(
437
+ hf.WorkflowTemplate(**template),
438
+ path=path,
439
+ name=name,
440
+ overwrite=overwrite,
441
+ store=store,
442
+ )
443
+ return wk